def test_db_all_in_one_quotas_2(monkeypatch): """ quotas[queue, project, job_type, user] = [int, int, float]; | | | maximum used resources ----------+ | | maximum number of running jobs -------+ | maximum resources times (hours) ------------+ """ create_quotas_rules_file('{"quotas": {"*,*,*,/": [-1, 1, -1]}}') # Submit and allocate an Advance Reservation t0 = get_date() insert_and_sched_ar(t0 + 100) # Submit other jobs insert_job(res=[(100, [('resource_id=1', "")])], properties="", user="******") insert_job(res=[(200, [('resource_id=1', "")])], properties="", user="******") # pdb.set_trace() t1 = get_date() meta_schedule('internal') res = [] for i in db['GanttJobsPrediction'].query.all(): print("moldable_id: ", i.moldable_id, ' start_time: ', i.start_time - t1) res.append(i.start_time - t1) assert (res[1] - res[0]) == 120 assert (res[2] - res[0]) == 280
def test_db_all_in_one_ar_1(monkeypatch): # add one job job = insert_and_sched_ar(get_date() + 10) print(job.state, ' ', job.reservation) assert ((job.state == 'Waiting') and (job.reservation == 'Scheduled'))
def test_db_all_in_one_quotas_1(monkeypatch): """ quotas[queue, project, job_type, user] = [int, int, float]; | | | maximum used resources ----------+ | | maximum number of running jobs -------+ | maximum resources times (hours) ------------+ """ create_quotas_rules_file('{"quotas": {"*,*,*,/": [-1, 1, -1], "/,*,*,*": [-1, -1, 0.55]}}') insert_job(res=[(100, [('resource_id=1', "")])], properties="", user="******") insert_job(res=[(200, [('resource_id=1', "")])], properties="", user="******") insert_job(res=[(200, [('resource_id=1', "")])], properties="", user="******") # pdb.set_trace() now = get_date() meta_schedule('internal') res = [] for i in db['GanttJobsPrediction'].query.order_by(GanttJobsPrediction.moldable_id).all(): print("moldable_id: ", i.moldable_id, ' start_time: ', i.start_time - now) res.append(i.start_time - now) assert res == [0, 160, 420]
def frag_job(jid): if 'OARDO_USER' in os.environ: luser = os.environ['OARDO_USER'] else: luser = os.environ['USER'] job = get_job(jid) if (job is not None) and ((luser == job.user) or (luser == 'oar') or (luser == 'root')): res = db.query(FragJob).filter(FragJob.job_id == jid).all() if len(res) == 0: date = tools.get_date() frajob = FragJob(job_id=jid, date=date) db.add(frajob) db.commit() add_new_event("FRAG_JOB_REQUEST", jid, "User %s requested to frag the job %s" % (luser, str(jid))) return 0 else: # Job already killed return -2 else: return -1
def test_db_all_in_one_quotas_AR(monkeypatch): create_quotas_rules_file('{"quotas": {"*,*,*,*": [1, -1, -1]}}') job = insert_and_sched_ar(get_date() + 10) print(job.state, ' ', job.reservation) assert job.state == 'Error'
def test_db_all_in_one_AR_7(monkeypatch): now = get_date() insert_job(res=[(60, [('resource_id=4', "")])], reservation='toSchedule', start_time=now+10, info_type='localhost:4242', types=["timesharing=*,*"]) meta_schedule('internal') job = db['Job'].query.one() assert ((job.state == 'Waiting') and (job.reservation == 'Scheduled'))
def test_db_all_in_one_wakeup_node_energy_saving_internal_1(monkeypatch): config['ENERGY_SAVING_INTERNAL'] = 'yes' insert_job(res=[(60, [('resource_id=4', "")])], properties="") now = get_date() # Suspend nodes db.query(Resource).update({Resource.state: 'Absent', Resource.available_upto: now + 1000}, synchronize_session=False) db.commit() meta_schedule('internal') job = db['Job'].query.one() print(job.state) print(node_list) assert (job.state == 'Waiting')
def test_db_moldable_2(monkeypatch): now = get_date() insert_job(res=[(60, [('resource_id=3', "")])], properties="") insert_job(res=[(60, [('resource_id=4', "")]), (70, [('resource_id=2', "")])], properties="") meta_schedule('internal') for j in db['Job'].query.all(): print(j.state) res = [] for i in db['GanttJobsPrediction'].query.all(): print("moldable_id: ", i.moldable_id, ' start_time: ', i.start_time - now) res.append(i.start_time - now) assert res[0] == res[1]
def test_db_placeholder_2(monkeypatch): now = get_date() insert_job(res=[(60, [('resource_id=4', "")])], properties="", types=["placeholder=yop"]) insert_job(res=[(60, [('resource_id=4', "")])], properties="", types=["allow=poy"]) meta_schedule('internal') for j in db['Job'].query.all(): print(j.state) res = [] for i in db['GanttJobsPrediction'].query.all(): print("moldable_id: ", i.moldable_id, ' start_time: ', i.start_time - now) res.append(i.start_time - now) assert res[0] != res[1]
def test_db_all_in_one_AR_4(monkeypatch): now = get_date() job = insert_and_sched_ar(now + 10) new_start_time = now - 20 db.query(GanttJobsPrediction).update({GanttJobsPrediction.start_time: new_start_time}, synchronize_session=False) db.commit() meta_schedule('internal') job = db['Job'].query.one() print('\n', job.id, job.state, ' ', job.reservation, job.start_time) assert job.state == 'toLaunch'
def test_db_metasched_ar_1(monkeypatch): # add one job now = get_date() # sql_now = local_to_sql(now) insert_job(res=[(60, [('resource_id=4', "")])], properties="", reservation='toSchedule', start_time=(now + 10), info_type='localhost:4242') meta_schedule() job = db['Job'].query.one() print(job.state, ' ', job.reservation) assert ((job.state == 'Waiting') and (job.reservation == 'Scheduled'))
def test_db_all_in_one_wakeup_node_1(monkeypatch): insert_job(res=[(60, [('resource_id=4', "")])], properties="") now = get_date() # Suspend nodes db.query(Resource).update({Resource.state: 'Absent', Resource.available_upto: now + 1000}, synchronize_session=False) db.commit() meta_schedule('internal') job = db['Job'].query.one() print(job.state) print(node_list) assert (job.state == 'Waiting') assert (node_list == [u'localhost0', u'localhost1'])
def test_db_timesharing_2(monkeypatch): now = get_date() insert_job(res=[(60, [('resource_id=4', "")])], properties="", types=["timesharing=user,*"], user='******') insert_job(res=[(60, [('resource_id=4', "")])], properties="", types=["timesharing=user,*"], user='******') meta_schedule('internal') for j in db['Job'].query.all(): print(j.state) res = [] for i in db['GanttJobsPrediction'].query.all(): print("moldable_id: ", i.moldable_id, ' start_time: ', i.start_time - now) res.append(i.start_time - now) assert res[0] != res[1]
def test_db_all_in_one_AR_6(monkeypatch): now = get_date() job = insert_and_sched_ar(now + 10, 600) new_start_time = now - 350 set_jobs_start_time(tuple([job.id]), new_start_time) db.query(GanttJobsPrediction).update({GanttJobsPrediction.start_time: new_start_time}, synchronize_session=False) # db.query(Resource).update({Resource.state: 'Suspected'}, synchronize_session=False) meta_schedule('internal') job = db['Job'].query.one() print('\n', job.id, job.state, ' ', job.reservation, job.start_time) assert job.state == 'Waiting'
def test_db_all_in_one_sleep_node_1(monkeypatch): now = get_date() insert_job(res=[(60, [('resource_id=1', "")])], properties="") # Suspend nodes # pdb.set_trace() db.query(Resource).update({Resource.available_upto: now + 50000}, synchronize_session=False) db.commit() meta_schedule('internal') job = db['Job'].query.one() print(job.state) print(node_list) assert (job.state == 'toLaunch') assert (node_list == [u'localhost2', u'localhost1'] or node_list == [u'localhost1', u'localhost2'])
def add_micheline_subjob(job_vars, ssh_private_key, ssh_public_key, array_id, array_index, array_commands, properties_applied_after_validation): # Estimate_job_nb_resources and incidentally test if properties and resources request are coherent # against avalaible resources # pdb.set_trace() date = get_date() properties = job_vars['properties'] resource_request = job_vars['resource_request'] resource_available, estimated_nb_resources = estimate_job_nb_resources(resource_request, properties) # Add admin properties to the job if properties_applied_after_validation: if properties: properties = '(' + properties + ') AND ' + properties_applied_after_validation else: properties = properties_applied_after_validation job_vars['properties'] = properties # TODO Verify the content of the ssh keys # TODO format job message # message = '' # my $job_message = format_job_message_text($job_name,$estimated_nb_resources, $estimated_walltime, # $jobType, $reservationField, $queue_name, $project, $type_list, ''); # TODO job_group # name = job_vars['name'] stdout = job_vars['stdout'] if not stdout: stdout = 'OAR' if name: stdout += '.' + name stdout += ".%jobid%.stdout" else: stdout = re.sub(r'%jobname%', name, stdout) job_vars['stdout'] = stdout stderr = job_vars['stderr'] if not stderr: stderr = 'OAR' if name: stderr += '.' + name stderr += '.%jobid%.stderr' else: stderr = re.sub(r'%jobname%', name, stderr) stderr = job_vars['stderr'] # Insert job kwargs = job_kwargs(job_vars, array_commands[0], date) kwargs['message'] = '' # TODO message kwargs['array_index'] = array_index if array_id > 0: kwargs['array_id'] = array_id ins = Job.__table__.insert().values(**kwargs) result = db.session.execute(ins) job_id = result.inserted_primary_key[0] if array_id <= 0: db.query(Job).filter(Job.id == job_id).update({Job.array_id: job_id}) db.commit() random_number = random.randint(1, 1000000000000) ins = Challenge.__table__.insert().values( {'job_id': job_id, 'challenge': random_number, 'ssh_private_key': ssh_private_key, 'ssh_public_key': ssh_public_key}) db.session.execute(ins) # print(resource_request) # Insert resources request in DB mld_jid_walltimes = [] resource_desc_lst = [] for moldable_instance in resource_request: resource_desc, walltime = moldable_instance if not walltime: # TODO add nullable=True in [email protected] ? walltime = 0 mld_jid_walltimes.append( {'moldable_job_id': job_id, 'moldable_walltime': walltime}) resource_desc_lst.append(resource_desc) # Insert MoldableJobDescription job_id and walltime # print('mld_jid_walltimes) result = db.session.execute(MoldableJobDescription.__table__.insert(), mld_jid_walltimes) # Retrieve MoldableJobDescription.ids if len(mld_jid_walltimes) == 1: mld_ids = [result.inserted_primary_key[0]] else: r = db.query(MoldableJobDescription.id)\ .filter(MoldableJobDescription.job_id == job_id).all() mld_ids = [e[0] for e in r] # # print(mld_ids, resource_desc_lst) for mld_idx, resource_desc in enumerate(resource_desc_lst): # job_resource_groups mld_id_property = [] res_lst = [] moldable_id = mld_ids[mld_idx] for prop_res in resource_desc: prop = prop_res['property'] res = prop_res['resources'] mld_id_property.append({'res_group_moldable_id': moldable_id, 'res_group_property': prop}) res_lst.append(res) # print(mld_id_property) # Insert property for moldable db.session.execute(JobResourceGroup.__table__.insert(), mld_id_property) if len(mld_id_property) == 1: grp_ids = [result.inserted_primary_key[0]] else: r = db.query(JobResourceGroup.id)\ .filter(JobResourceGroup.moldable_id == moldable_id).all() grp_ids = [e[0] for e in r] # print('grp_ids, res_lst) # Insert job_resource_descriptions for grp_idx, res in enumerate(res_lst): res_description = [] for idx, res_value in enumerate(res): res_description.append({'res_job_group_id': grp_ids[grp_idx], 'res_job_resource_type': res_value['resource'], 'res_job_value': res_value['value'], 'res_job_order': idx}) # print(res_description) db.session.execute(JobResourceDescription.__table__.insert(), res_description) # types of job types = job_vars['types'] if types: ins = [{'job_id': job_id, 'type': typ} for typ in types] db.session.execute(JobType.__table__.insert(), ins) # TODO dependencies with min_start_shift and max_start_shift dependencies = job_vars['dependencies'] if dependencies: ins = [{'job_id': job_id, 'job_id_required': dep} for dep in dependencies] db.session.execute(JobDependencie.__table__.insert(), ins) # foreach my $a (@{$anterior_ref}){ # if (my ($j,$min,$max) = $a =~ /^(\d+)(?:,([\[\]][-+]?\d+)?(?:,([\[\]][-+]?\d+)?)?)?$/) { # $dbh->do(" INSERT INTO job_dependencies (job_id,job_id_required,min_start_shift,max_start_shift) # VALUES ($job_id,$j,'".(defined($min)?$min:"")."','".(defined($max)?$max:"")."') if not job_vars['hold']: req = db.insert(JobStateLog).values( {'job_id': job_id, 'job_state': 'Waiting', 'date_start': date}) db.session.execute(req) db.commit() db.query(Job).filter(Job.id == job_id).update({Job.state: 'Waiting'}) db.commit() else: req = db.insert(JobStateLog).values( {'job_id': job_id, 'job_state': 'Hold', 'date_start': date}) db.session.execute(req) db.commit() return(0, job_id)
def set_job_state(jid, state): # TODO # TODO Later: notify_user # TODO Later: update_current_scheduler_priority result = db.query(Job).filter(Job.id == jid)\ .filter(Job.state != 'Error')\ .filter(Job.state != 'Terminated')\ .filter(Job.state != state)\ .update({Job.state: state}) db.commit() if result == 1: # OK for sqlite logger.debug( "Job state updated, job_id: " + str(jid) + ", wanted state: " + state) date = tools.get_date() # TODO: optimize job log db.query(JobStateLog).filter(JobStateLog.date_stop == 0)\ .filter(JobStateLog.job_id == jid)\ .update({JobStateLog.date_stop: date}) db.commit() req = db.insert(JobStateLog).values( {'job_id': jid, 'job_state': state, 'date_start': date}) db.session.execute(req) if state == "Terminated" or state == "Error" or state == "toLaunch" or \ state == "Running" or state == "Suspended" or state == "Resuming": job = db.query(Job).filter(Job.id == jid).one() if state == "Suspend": tools.notify_user(job, "SUSPENDED", "Job is suspended.") elif state == "Resuming": tools.notify_user(job, "RESUMING", "Job is resuming.") elif state == "Running": tools.notify_user(job, "RUNNING", "Job is running.") elif state == "toLaunch": update_current_scheduler_priority(job, "+2", "START") else: # job is "Terminated" or ($state eq "Error") if job.stop_time < job.start_time: db.query(Job).filter(Job.id == jid)\ .update({Job.stop_time: job.start_time}) db.commit() if job.assigned_moldable_job != "0": # Update last_job_date field for resources used update_scheduler_last_job_date( date, int(job.assigned_moldable_job)) if state == "Terminated": tools.notify_user(job, "END", "Job stopped normally.") else: # Verify if the job was suspended and if the resource # property suspended is updated if job.suspended == "YES": r = get_current_resources_with_suspended_job() if r != (): db.query(Resource).filter(~Resource.id.in_(r))\ .update({Resource.suspended_jobs: 'NO'}) else: db.query(Resource).update( {Resource.suspended_jobs: 'NO'}) db.commit() tools.notify_user( job, "ERROR", "Job stopped abnormally or an OAR error occured.") update_current_scheduler_priority(job, "-2", "STOP") # Here we must not be asynchronously with the scheduler log_job(job) # $dbh is valid so these 2 variables must be defined nb_sent = tools.notify_almighty("ChState") if nb_sent == 0: logger.warning("Not able to notify almighty to launch the job " + str(job.id) + " (socket error)") else: logger.warning("Job is already termindated or in error or wanted state, job_id: " + str(jid) + ", wanted state: " + state)
def meta_schedule(mode='internal', plt=Platform()): exit_code = 0 job_security_time = int(config['SCHEDULER_JOB_SECURITY_TIME']) if ('QUOTAS' in config) and (config['QUOTAS'] == 'yes'): if 'QUOTAS_FILE' not in config: config['QUOTAS_FILE'] = './quotas_conf.json' load_quotas_rules() tools.init_judas_notify_user() tools.create_almighty_socket() logger.debug( "Retrieve information for already scheduled reservations from \ database before flush (keep assign resources)") # reservation ??. initial_time_sec = tools.get_date() # time.time() initial_time_sql = local_to_sql(initial_time_sec) current_time_sec = initial_time_sec current_time_sql = initial_time_sql gantt_init_results = gantt_init_with_running_jobs(plt, initial_time_sec, job_security_time) all_slot_sets, scheduled_jobs, besteffort_rid2jid = gantt_init_results resource_set = plt.resource_set() # Path for user of external schedulers if 'OARDIR' in os.environ: binpath = os.environ['OARDIR'] + '/' else: binpath = '/usr/local/lib/oar/' logger.warning( "OARDIR env variable must be defined, " + binpath + " is used by default") for queue in db.query(Queue).order_by(text('priority DESC')).all(): if queue.state == 'Active': logger.debug("Queue " + queue.name + ": Launching scheduler " + queue.scheduler_policy + " at time " + initial_time_sql) if mode == 'external': # pragma: no cover call_external_scheduler(binpath, scheduled_jobs, all_slot_sets, resource_set, job_security_time, queue, initial_time_sec, initial_time_sql) else: call_internal_scheduler(plt, scheduled_jobs, all_slot_sets, job_security_time, queue, initial_time_sec) handle_waiting_reservation_jobs(queue.name, resource_set, job_security_time, current_time_sec) # handle_new_AR_jobs check_reservation_jobs( plt, resource_set, queue.name, all_slot_sets, current_time_sec) jobs_to_launch, jobs_to_launch_lst, rid2jid_to_launch = get_gantt_jobs_to_launch(resource_set, job_security_time, current_time_sec) if check_besteffort_jobs_to_kill(jobs_to_launch, rid2jid_to_launch, current_time_sec, besteffort_rid2jid, resource_set) == 1: # We must kill some besteffort jobs tools.notify_almighty('ChState') exit_code = 2 elif handle_jobs_to_launch(jobs_to_launch_lst, current_time_sec, current_time_sql) == 1: exit_code = 0 # Update visu gantt tables update_gantt_visualization() # Manage dynamic node feature flag_hulot = False timeout_cmd = int(config['SCHEDULER_TIMEOUT']) if ((('SCHEDULER_NODE_MANAGER_SLEEP_CMD' in config) or ((config['ENERGY_SAVING_INTERNAL'] == 'yes') and ('ENERGY_SAVING_NODE_MANAGER_SLEEP_CMD' in config))) and (('SCHEDULER_NODE_MANAGER_SLEEP_TIME' in config) and ('SCHEDULER_NODE_MANAGER_IDLE_TIME' in config))): # Look at nodes that are unused for a duration idle_duration = int(config['SCHEDULER_NODE_MANAGER_IDLE_TIME']) sleep_duration = int(config['SCHEDULER_NODE_MANAGER_SLEEP_TIME']) idle_nodes = search_idle_nodes(current_time_sec) tmp_time = current_time_sec - idle_duration node_halt = [] for node, idle_duration in iteritems(idle_nodes): if idle_duration < tmp_time: # Search if the node has enough time to sleep tmp = get_next_job_date_on_node(node) if (tmp is None) or (tmp - sleep_duration > current_time_sec): # Search if node has not been woken up recently wakeup_date = get_last_wake_up_date_of_node(node) if (wakeup_date is None) or (wakeup_date < tmp_time): node_halt.append(node) if node_halt != []: logger.debug("Powering off some nodes (energy saving): " + str(node_halt)) # Using the built-in energy saving module to shut down nodes if config['ENERGY_SAVING_INTERNAL'] == 'yes': if kao_tools.send_to_hulot('HALT', ' '.join(node_halt)): logger.error("Communication problem with the energy saving module (Hulot)\n") flag_hulot = 1 else: # Not using the built-in energy saving module to shut down nodes cmd = config['SCHEDULER_NODE_MANAGER_SLEEP_CMD'] if kao_tools.fork_and_feed_stdin(cmd, timeout_cmd, node_halt): logger.error("Command " + cmd + "timeouted (" + str(timeout_cmd) + "s) while trying to poweroff some nodes") if (('SCHEDULER_NODE_MANAGER_SLEEP_CMD' in config) or ((config['ENERGY_SAVING_INTERNAL'] == 'yes') and ('ENERGY_SAVING_NODE_MANAGER_SLEEP_CMD' in config))): # Get nodes which the scheduler wants to schedule jobs to, # but which are in the Absent state, to wake them up wakeup_time = int(config['SCHEDULER_NODE_MANAGER_WAKEUP_TIME']) nodes = get_gantt_hostname_to_wake_up(current_time_sec, wakeup_time) if nodes != []: logger.debug("Awaking some nodes: " + str(nodes)) # Using the built-in energy saving module to wake up nodes if config['ENERGY_SAVING_INTERNAL'] == 'yes': if kao_tools.send_to_hulot('WAKEUP', ' '.join(nodes)): logger.error("Communication problem with the energy saving module (Hulot)") flag_hulot = 1 else: # Not using the built-in energy saving module to wake up nodes cmd = config['SCHEDULER_NODE_MANAGER_WAKE_UP_CMD'] if kao_tools.fork_and_feed_stdin(cmd, timeout_cmd, nodes): logger.error("Command " + cmd + "timeouted (" + str(timeout_cmd) + "s) while trying to wake-up some nodes ") # Send CHECK signal to Hulot if needed if not flag_hulot and (config['ENERGY_SAVING_INTERNAL'] == 'yes'): if kao_tools.send_to_hulot('CHECK', []): logger.error("Communication problem with the energy saving module (Hulot)") # Retrieve jobs according to their state and excluding job in 'Waiting' state. jobs_by_state = get_current_not_waiting_jobs() # # Search jobs to resume # # # TODO: TOFINISH # if 'Resuming' in jobs_by_state: logger.warn("Resuming job is NOT ENTIRELY IMPLEMENTED") for job in jobs_by_state['Resuming']: other_jobs = get_jobs_on_resuming_job_resources(job.id) # TODO : look for timesharing other jobs. What do we do????? if other_jobs == []: # We can resume the job logger.debug("[" + str(job.id) + "] Resuming job") if 'noop' in job.types: resume_job_action(job.id) logger.debug("[" + str(job.id) + "] Resume NOOP job OK") else: script = config['JUST_BEFORE_RESUME_EXEC_FILE'] timeout = int(config['SUSPEND_RESUME_SCRIPT_TIMEOUT']) if timeout is None: timeout = kao_tools.get_default_suspend_resume_script_timeout() skip = 0 logger.debug("[" + str(job.id) + "] Running post suspend script: `" + script + " " + str(job.id) + "'") cmd_str = script + str(job.id) return_code = -1 try: return_code = call(cmd_str, shell=True, timeout=timeout) except TimeoutExpired as e: logger.error(str(e) + "[" + str(job.id) + "] Suspend script timeouted") add_new_event('RESUME_SCRIPT_ERROR', job.id, "Suspend script timeouted") if return_code != 0: str_error = "[" + str(job.id) + "] Suspend script error, return code = "\ + str(return_code) logger.error(str_error) add_new_event('RESUME_SCRIPT_ERROR', job.id, str_error) frag_job(job.id) tools.notify_almighty('Qdel') skip = 1 cpuset_nodes = None if 'JOB_RESOURCE_MANAGER_PROPERTY_DB_FIELD' in config: cpuset_field = config['JOB_RESOURCE_MANAGER_PROPERTY_DB_FIELD'] else: cpuset_field = "" if cpuset_field and (skip == 0): # TODO cpuset_name = job.user + "_" + str(job.id) cpuset_nodes = get_cpuset_values(cpuset_field, job.assigned_moldable_id) # TODO suspend_data_hash = {'name': cpuset_name, 'job_id': job.id, 'oarexec_pid_file': kao_tools.get_oar_pid_file_name(job.id)} if cpuset_nodes: # TODO taktuk_cmd = config['TAKTUK_CMD'] if 'SUSPEND_RESUME_FILE' in config: suspend_file = config['SUSPEND_RESUME_FILE'] else: # TODO suspend_file = kao_tools.get_default_suspend_resume_file() # # TODO: TOFINISH # # Notify oarsub -I when they will be launched for j_info in get_gantt_waiting_interactive_prediction_date(): job_id, job_info_type, job_start_time, job_message = j_info addr, port = job_info_type.split(':') new_start_prediction = local_to_sql(job_start_time) logger.debug("[" + str(job_id) + "] Notifying user of the start prediction: " + new_start_prediction + "(" + job_message + ")") tools.notify_tcp_socket(addr, port, "[" + initial_time_sql + "] Start prediction: " + new_start_prediction + " (" + job_message + ")") # Run the decisions # Process "toError" jobs if 'toError' in jobs_by_state: for job in jobs_by_state['toError']: addr, port = job.info_type.split(':') if job.type == 'INTERACTIVE' or\ (job.type == 'PASSIVE' and job.reservation == 'Scheduled'): logger.debug("Notify oarsub job (num:" + str(job.id) + ") in error; jobInfo=" + job.info_type) nb_sent1 = tools.notify_tcp_socket(addr, port, job.message + '\n') nb_sent2 = tools.notify_tcp_socket(addr, port, 'BAD JOB' + '\n') if (nb_sent1 == 0) or (nb_sent2 == 0): logger.warn( "Cannot open connection to oarsub client for" + str(job.id)) logger.debug("Set job " + str(job.id) + " to state Error") set_job_state(job.id, 'Error') # Process toAckReservation jobs if 'toAckReservation' in jobs_by_state: for job in jobs_by_state['toAckReservation']: addr, port = job.info_type.split(':') logger.debug( "Treate job" + str(job.id) + " in toAckReservation state") nb_sent = tools.notify_tcp_socket(addr, port, 'GOOD RESERVATION' + '\n') if nb_sent == 0: logger.warn( "Frag job " + str(job.id) + ", I cannot notify oarsub for the reservation") add_new_event('CANNOT_NOTIFY_OARSUB', str( job.id), "Can not notify oarsub for the job " + str(job.id)) # TODO ??? # OAR::IO::lock_table / OAR::IO::unlock_table($base) frag_job(job.id) exit_code = 2 else: logger.debug("Notify oarsub for a RESERVATION (idJob=" + str(job.id) + ") --> OK; jobInfo=" + job.info_type) set_job_state(job.id, 'Waiting') if ((job.start_time - 1) <= current_time_sec) and (exit_code == 0): exit_code = 1 # Process toLaunch jobs if 'toLaunch' in jobs_by_state: for job in jobs_by_state['toLaunch']: notify_to_run_job(job.id) logger.debug("End of Meta Scheduler") return exit_code
def test_db_all_in_one_AR_2(monkeypatch): job = insert_and_sched_ar(get_date() - 1000) print(job.state, ' ', job.reservation) assert job.state == 'Error'
def add_micheline_simple_array_job(job_vars, ssh_private_key, ssh_public_key, array_id, array_index, array_commands, properties_applied_after_validation): job_id_list = [] date = get_date() # Check the jobs are no moldable resource_request = job_vars['resource_request'] if len(resource_request) > 1: print_error('array jobs cannot be moldable') sub_exit(-30) # Estimate_job_nb_resources and incidentally test if properties and resources request are coherent # against avalaible resources # pdb.set_trace() properties = job_vars['properties'] resource_available, estimated_nb_resources = estimate_job_nb_resources(resource_request, properties) # Add admin properties to the job if properties_applied_after_validation: if properties: properties = '(' + properties + ') AND ' + properties_applied_after_validation else: properties = properties_applied_after_validation job_vars['properties'] = properties # TODO format job message # my $job_message = format_job_message_text($job_name,$estimated_nb_resources, $estimated_walltime, # $jobType, $reservationField, $queue_name, $project, $type_list, ''); name = job_vars['name'] stdout = job_vars['stdout'] if not stdout: stdout = 'OAR' if name: stdout += '.' + name stdout += ".%jobid%.stdout" else: stdout = re.sub(r'%jobname%', name, stdout) job_vars['stdout'] = stdout stderr = job_vars['stderr'] if not stderr: stderr = 'OAR' if name: stderr += '.' + name stderr += '.%jobid%.stderr' else: stderr = re.sub(r'%jobname%', name, stderr) stderr = job_vars['stderr'] # Insert job kwargs = job_kwargs(job_vars, array_commands[0], date) kwargs['message'] = '' # TODO message kwargs['array_index'] = array_index # print(kwargs) ins = Job.__table__.insert().values(**kwargs) result = db.session.execute(ins) first_job_id = result.inserted_primary_key[0] # Update array_id array_id = first_job_id db.query(Job).filter(Job.id == first_job_id).update({Job.array_id: array_id}) db.commit() # Insert remaining array jobs with array_id jobs_data = [] kwargs['array_id'] = array_id for command in array_commands[1:]: job_data = kwargs.copy() job_data['command'] = command jobs_data.append(job_data) db.session.execute(Job.__table__.insert(), jobs_data) db.commit() # Retrieve job_ids thanks to array_id value result = db.query(Job.id).filter(Job.array_id == array_id).all() job_id_list = [r[0] for r in result] # TODO Populate challenges and moldable_job_descriptions tables challenges = [] moldable_job_descriptions = [] walltime = resource_request[0][1] if not walltime: walltime = default_job_walltime for job_id in job_id_list: random_number = random.randint(1, 1000000000000) challenges.append({'job_id': job_id, 'challenge': random_number}) moldable_job_descriptions.append({'moldable_job_id': job_id, 'moldable_walltime': walltime}) db.session.execute(Challenge.__table__.insert(), challenges) db.session.execute(MoldableJobDescription.__table__.insert(), moldable_job_descriptions) db.commit() # Retrieve moldable_ids thanks to job_ids result = db.query(MoldableJobDescription.id)\ .filter(MoldableJobDescription.job_id.in_(tuple(job_id_list)))\ .order_by(MoldableJobDescription.id).all() moldable_ids = [r[0] for r in result] # Populate job_resource_groups table job_resource_groups = [] resource_desc_lst = resource_request[0][0] for moldable_id in moldable_ids: for resource_desc in resource_desc_lst: prop = resource_desc['property'] job_resource_groups.append({'res_group_moldable_id': moldable_id, 'res_group_property': prop}) db.session.execute(JobResourceGroup.__table__.insert(), job_resource_groups) db.commit() # Retrieve res_group_ids thanks to moldable_ids result = db.query(JobResourceGroup.id)\ .filter(JobResourceGroup.moldable_id.in_(tuple(moldable_ids)))\ .order_by(JobResourceGroup.id).all() res_group_ids = [r[0] for r in result] # Populate job_resource_descriptions table job_resource_descriptions = [] k = 0 for i in range(len(array_commands)): # Nb jobs for resource_desc in resource_desc_lst: order = 0 for res_val in resource_desc['resources']: job_resource_descriptions.append({'res_job_group_id': res_group_ids[k], 'res_job_resource_type': res_val['resource'], 'res_job_value': res_val['value'], 'res_job_order': order}) order += 1 k += 1 db.session.execute(JobResourceDescription.__table__.insert(), job_resource_descriptions) db.commit() # Populate job_types table types = job_vars['types'] if types: job_types = [] for job_id in job_id_list: for typ in types: job_types.append({'job_id': job_id, 'type': typ}) db.session.execute(JobType.__table__.insert(), job_types) db.commit() # TODO Anterior job setting # Hold/Waiting management, job_state_log setting # Job is inserted with hold state first state_log = 'Hold' if job_vars['hold']: state_log = 'Waiting' db.query(Job).filter(Job.array_id == array_id).update({Job.state: state_log}) db.commit # Update array_id field and set job to state if waiting and insert job_state_log job_state_logs = [{'job_id': job_id, 'job_state': state_log, 'date_start': date} for job_id in job_id_list] db.session.execute(JobStateLog.__table__.insert(), job_state_logs) db.commit() return(0, job_id_list)