def handle_jobs_to_launch(jobs_to_launch_lst, current_time_sec, current_time_sql): logger.debug( "Begin processing jobs to launch (start time <= " + current_time_sql) return_code = 0 for job in jobs_to_launch_lst: return_code = 1 logger.debug("Set job " + str(job.id) + " state to toLaunch at " + current_time_sql) # # Advance Reservation # walltime = job.walltime if ((job.reservation == 'Scheduled') and (job.start_time < current_time_sec)): max_time = walltime - (current_time_sec - job.start_time) set_moldable_job_max_time(job.moldable_id, max_time) set_gantt_job_start_time(job.moldable_id, current_time_sec) logger.warn("Reduce walltime of job " + str(job.id) + "to " + str(max_time) + "(was " + str(walltime) + " )") add_new_event('REDUCE_RESERVATION_WALLTIME', job.id, "Change walltime from " + str(walltime) + " to " + str(max_time)) w_max_time = duration_to_sql(max_time) new_message = re.sub(r'W=\d+:\d+:\d+', 'W=' + w_max_time, job.message) if new_message != job.message: set_job_message(job.id, new_message) prepare_job_to_be_launched(job, current_time_sec) logger.debug("End processing of jobs to launch") return return_code
def check_reservation_jobs(plt, resource_set, queue_name, all_slot_sets, current_time_sec): """Processing of new Advance Reservations""" logger.debug("Queue " + queue_name + ": begin processing of new reservations") ar_jobs_scheduled = {} ar_jobs, ar_jids, nb_ar_jobs = plt.get_waiting_jobs( queue_name, 'toSchedule') logger.debug("nb_ar_jobs:" + str(nb_ar_jobs)) if nb_ar_jobs > 0: job_security_time = int(config['SCHEDULER_JOB_SECURITY_TIME']) plt.get_data_jobs(ar_jobs, ar_jids, resource_set, job_security_time) logger.debug("Try and schedule new reservations") for jid in ar_jids: job = ar_jobs[jid] logger.debug( "Find resource for Advance Reservation job:" + str(job.id)) # It is a reservation, we take care only of the first moldable job moldable_id, walltime, hy_res_rqts = job.mld_res_rqts[0] # test if reservation is too old if current_time_sec >= (job.start_time + walltime): logger.warn( "[" + str(job.id) + "] Canceling job: reservation is too old") set_job_message(job.id, "Reservation too old") set_job_state(job.id, 'toError') continue else: if job.start_time < current_time_sec: # TODO update to DB ???? job.start_time = current_time_sec ss_name = 'default' # TODO container # if 'inner' in job.types: # ss_name = job.types['inner'] # TODO: test if container is an AR job slots = all_slot_sets[ss_name].slots t_e = job.start_time + walltime - job_security_time sid_left, sid_right = get_encompassing_slots( slots, job.start_time, t_e) if job.ts or (job.ph == ALLOW): itvs_avail = intersec_ts_ph_itvs_slots( slots, sid_left, sid_right, job) else: itvs_avail = intersec_itvs_slots(slots, sid_left, sid_right) itvs = find_resource_hierarchies_job( itvs_avail, hy_res_rqts, resource_set.hierarchy) if ('QUOTAS' in config) and (config['QUOTAS'] == 'yes'): nb_res = itvs_size(intersec(itvs, resource_set.default_resource_itvs)) res = check_slots_quotas(slots, sid_left, sid_right, job, nb_res, walltime) (quotas_ok, quotas_msg, rule, value) = res if not quotas_ok: itvs = [] logger.info("Quotas limitaion reached, job:" + str(job.id) + ", " + quotas_msg + ", rule: " + str(rule) + ", value: " + str(value)) set_job_state(job.id, 'toError') set_job_message(job.id, "This advance reservation cannot run due to quotas") if itvs == []: # not enough resource available logger.warn("[" + str(job.id) + "] advance reservation cannot be validated, not enough resources") set_job_state(job.id, 'toError') set_job_message(job.id, "This advance reservation cannot run") else: # The reservation can be scheduled logger.debug( "[" + str(job.id) + "] advance reservation is validated") job.moldable_id = moldable_id job.res_set = itvs ar_jobs_scheduled[job.id] = job # if 'container' in job.types # slot = Slot(1, 0, 0, job.res_set[:], job.start_time, # job.start_time + job.walltime - job_security_time) # slot.show() # slots_sets[job.id] = SlotSet(slot) set_job_state(job.id, 'toAckReservation') set_job_resa_state(job.id, 'Scheduled') if ar_jobs_scheduled != []: logger.debug("Save AR jobs' assignements in database") save_assigns(ar_jobs_scheduled, resource_set) logger.debug("Queue " + queue_name + ": end processing of new reservations")
def handle_waiting_reservation_jobs(queue_name, resource_set, job_security_time, current_time_sec): logger.debug("Queue " + queue_name + ": begin processing accepted Advance Reservations") ar_jobs = get_waiting_scheduled_AR_jobs(queue_name, resource_set, job_security_time, current_time_sec) for job in ar_jobs: moldable_id = job.moldable_id walltime = job.walltime # Test if AR job is expired and handle it if (current_time_sec > (job.start_time + walltime)): logger.warn("[" + str(job.id) + "] set job state to Error: avdance reservation expired and couldn't be started") set_job_state(job.id, 'Error') set_job_message(job.id, "Reservation expired and couldn't be started.") else: # Determine current available ressources avail_res = intersec(resource_set.roid_itvs, job.res_set) # Test if the AR job is waiting to be launched due to nodes' unavailabilities if (avail_res == []) and (job.start_time < current_time_sec): logger.warn("[%s] advance reservation is waiting because no resource is present" % str(job.id)) # Delay launching time set_gantt_job_start_time(moldable_id, current_time_sec + 1) elif (job.start_time < current_time_sec): if (job.start_time + reservation_waiting_timeout) > current_time_sec: if not equal_itvs(avail_res, job.res_set): # The expected ressources are not all available, # wait the specified timeout logger.warn("[" + str(job.id) + "] advance reservation is waiting because not all \ resources are available yet") set_gantt_job_start_time(moldable_id, current_time_sec + 1) else: # It's time to launch the AR job, remove missing ressources missing_resources_itvs = sub_intervals(job.res_set, avail_res) remove_gantt_resource_job(moldable_id, missing_resources_itvs, resource_set) logger.warn("[" + str(job.id) + "remove some resources assigned to this advance reservation, \ because there are not Alive") add_new_event('SCHEDULER_REDUCE_NB_RESSOURCES_FOR_ADVANCE_RESERVATION', job.id, "[MetaSched] Reduce the number of resources for the job " + str(job.id)) nb_res = itvs_size(job.res_set) - itvs_size(missing_resources_itvs) new_message = re.sub(r'R=\d+', 'R=' + str(nb_res), job.message) if new_message != job.message: set_job_message(job.id, new_message) logger.debug("Queue " + queue_name + ": end processing of reservations with missing resources")