def find_resource_n_h(itvs, hy, rqts, top, h, h_bottom): # potentiel available blocks avail_bks = keep_no_empty_scat_bks(itvs, top) l_avail_bks = len(avail_bks) if (l_avail_bks < rqts[h]): # not enough scattered blocks return [] else: if (h == h_bottom - 2): # reach last level hierarchy of requested resource # iter on top and find rqts[h-1] block itvs_acc = [] i = 0 nb_r = 0 while (i < l_avail_bks) and (nb_r != rqts[h]): # need # print avail_bks[i], "*", hy[h+1] # TODO test cost of [] filtering ..... avail_sub_bks = [ intersec(avail_bks[i], x) for x in hy[h + 1] if intersec(avail_bks[i], x) != []] # print avail_sub_bks # print "--------------------------------------" r = extract_n_scattered_block_itv( itvs, avail_sub_bks, rqts[h + 1]) # r = [] if (r != []): # win for this top_block itvs_acc.extend(r) nb_r += 1 i += 1 if (nb_r == rqts[h]): return itvs_acc else: return [] else: # intermediate hierarchy level # iter on available_bk itvs_acc = [] i = 0 nb_r = 0 while (i < l_avail_bks) and (nb_r != rqts[h]): r = find_resource_n_h( itvs, hy, rqts, [avail_bks[i]], h + 1, h_bottom) if (r != []): # win for this top_block itvs_acc.extend(r) nb_r += 1 i += 1 if (nb_r == rqts[h]): return itvs_acc else: return []
def find_resource_n_h_local(itvs, hy, rqts, top, h, h_bottom): n = rqts[h+1] size_bks = [] avail_bks = [] for top_itvs in top: avail_itvs = intersec(top_itvs, itvs) avail_bks.append(avail_itvs) size_bks.append(itvs_size(avail_itvs)) sorted_ids = sorted(range(len(avail_bks)), key=lambda k: size_bks[k]) for i, idx in enumerate(sorted_ids): if size_bks[i] >= n: res_itvs = [] k = 0 for itv in avail_bks[idx]: size_itv = itv[1] - itv[0] + 1 if (k + size_itv) < n: res_itvs.append(itv) else: res_itvs.append((itv[0], itv[0] + (n-k-1))) return res_itvs return []
def intersec_ts_ph_itvs_slots(slots, sid_left, sid_right, job): sid = sid_left itvs_acc = [] while True: slot = slots[sid] itvs = slot.itvs if job.ts: if "*" in slot.ts_itvs: # slot.ts_itvs[user][name] if "*" in slot.ts_itvs["*"]: itvs = add_intervals(itvs, slot.ts_itvs["*"]["*"]) elif job.name in slot.ts_itvs["*"]: itvs = add_intervals(itvs, slot.ts_itvs["*"][job.name]) elif job.user in slot.ts_itvs: if "*" in slot.ts_itvs[job.user]: itvs = add_intervals(itvs, slot.ts_itvs[job.user]["*"]) elif job.name in slot.ts_itvs[job.user]: itvs = add_intervals( itvs, slot.ts_itvs[job.user][job.name]) if job.ph == ALLOW: if job.ph_name in slot.ph_itvs: itvs = add_intervals(itvs, slot.ph_itvs[job.ph_name]) if not itvs_acc: itvs_acc = itvs else: itvs_acc = intersec(itvs_acc, itvs) if sid == sid_right: break sid = slots[sid].next return itvs_acc
def intersec_itvs_slots(slots, sid_left, sid_right): sid = sid_left itvs_acc = slots[sid].itvs while (sid != sid_right): sid = slots[sid].next itvs_acc = intersec(itvs_acc, slots[sid].itvs) return itvs_acc
def update(self, job, prev_nb_res=0, prev_duration=0): queue = job.queue_name project = job.project user = job.user # TOREMOVE ? if hasattr(job, 'res_set'): if not hasattr(self, 'nb_res'): job.nb_res = itvs_size(intersec(job.res_set, rs.default_resource_itvs)) nb_resources = job.nb_res else: nb_resources = prev_nb_res if hasattr(job, 'walltime'): duration = job.walltime else: duration = prev_duration for t in quotas_job_types: if (t == '*') or (t in job.types): # Update the number of used resources self.counters['*', '*', t, '*'][0] += nb_resources self.counters['*', '*', t, user][0] += nb_resources self.counters['*', project, t, '*'][0] += nb_resources self.counters[queue, '*', t, '*'][0] += nb_resources self.counters[queue, project, t, user][0] += nb_resources self.counters[queue, project, t, '*'][0] += nb_resources self.counters[queue, '*', t, user][0] += nb_resources self.counters['*', project, t, user][0] += nb_resources # Update the number of running jobs self.counters['*', '*', t, '*'][1] += 1 self.counters['*', '*', t, user][1] += 1 self.counters['*', project, t, '*'][1] += 1 self.counters[queue, '*', t, '*'][1] += 1 self.counters[queue, project, t, user][1] += 1 self.counters[queue, project, t, '*'][1] += 1 self.counters[queue, '*', t, user][1] += 1 self.counters['*', project, t, user][1] += 1 # Update the resource * second self.counters['*', '*', t, '*'][2] += nb_resources * duration self.counters['*', '*', t, user][2] += nb_resources * duration self.counters['*', project, t, '*'][2] += nb_resources * duration self.counters[queue, '*', t, '*'][2] += nb_resources * duration self.counters[queue, project, t, user][2] += nb_resources * duration self.counters[queue, project, t, '*'][2] += nb_resources * duration self.counters[queue, '*', t, user][2] += nb_resources * duration self.counters['*', project, t, user][2] += nb_resources * duration
def find_contiguous_1h(itvs_avail, hy_res_rqts, hy, beginning): # NOT FOR PRODUCTION USE # Notes support only one resource group and ordered resource_id hierarchy level result = [] hy_level_nbs, constraints = hy_res_rqts[0] # one resource group l_name, n = hy_level_nbs[0] # one hierarchy level # hy_level = hy[l_name] itvs_cts_slots = aggregate_itvs(intersec(constraints, itvs_avail)) if l_name == "resource_id": for itv in itvs_cts_slots: if (itv[1] - itv[0] + 1) >= n: result = [(itv[0], itv[0]+n-1)] break return result
def find_resource_hierarchies_job(itvs_slots, hy_res_rqts, hy): ''' Find resources in interval for all resource subrequests of a moldable instance of a job ''' result = [] for hy_res_rqt in hy_res_rqts: (hy_level_nbs, constraints) = hy_res_rqt hy_levels = [] hy_nbs = [] for hy_l_n in hy_level_nbs: (l_name, n) = hy_l_n hy_levels.append(hy[l_name]) hy_nbs.append(n) itvs_cts_slots = intersec(constraints, itvs_slots) result.extend(find_resource_hierarchies_scattered(itvs_cts_slots, hy_levels, hy_nbs)) return result
def find_local(itvs_slots, hy_res_rqts, hy, beginning): """ 2 Level of Hierarchy supported with sorting by increasing blocks' size""" result = [] for hy_res_rqt in hy_res_rqts: (hy_level_nbs, constraints) = hy_res_rqt hy_levels = [] hy_nbs = [] for hy_l_n in hy_level_nbs: (l_name, n) = hy_l_n hy_levels.append(hy[l_name]) hy_nbs.append(n) itvs_cts_slots = intersec(constraints, itvs_slots) res = find_resource_hierarchies_scattered_local(itvs_cts_slots, hy_levels, hy_nbs) if res: result.extend(res) else: return [] return result
def find_contiguous_sorted_1h(itvs_avail, hy_res_rqts, hy, beginning): # NOT FOR PRODUCTION USE # Notes support only one resource group and ordered resource_id hierarchy level result = [] hy_level_nbs, constraints = hy_res_rqts[0] # one resource group l_name, n = hy_level_nbs[0] # one hierarchy level # hy_level = hy[l_name] itvs_unsorted = aggregate_itvs(intersec(constraints, itvs_avail)) lg = len(itvs_unsorted) ids_sorted = sorted(range(lg), key=lambda k: itvs_unsorted[k][1] - itvs_unsorted[k][0]) if l_name == "resource_id": for i in ids_sorted: itv = itvs_unsorted[i] if (itv[1] - itv[0] + 1) >= n: result = [(itv[0], itv[0]+n-1)] break return result
def check_reservation_jobs(plt, resource_set, queue_name, all_slot_sets, current_time_sec): """Processing of new Advance Reservations""" logger.debug("Queue " + queue_name + ": begin processing of new reservations") ar_jobs_scheduled = {} ar_jobs, ar_jids, nb_ar_jobs = plt.get_waiting_jobs( queue_name, 'toSchedule') logger.debug("nb_ar_jobs:" + str(nb_ar_jobs)) if nb_ar_jobs > 0: job_security_time = int(config['SCHEDULER_JOB_SECURITY_TIME']) plt.get_data_jobs(ar_jobs, ar_jids, resource_set, job_security_time) logger.debug("Try and schedule new reservations") for jid in ar_jids: job = ar_jobs[jid] logger.debug( "Find resource for Advance Reservation job:" + str(job.id)) # It is a reservation, we take care only of the first moldable job moldable_id, walltime, hy_res_rqts = job.mld_res_rqts[0] # test if reservation is too old if current_time_sec >= (job.start_time + walltime): logger.warn( "[" + str(job.id) + "] Canceling job: reservation is too old") set_job_message(job.id, "Reservation too old") set_job_state(job.id, 'toError') continue else: if job.start_time < current_time_sec: # TODO update to DB ???? job.start_time = current_time_sec ss_name = 'default' # TODO container # if 'inner' in job.types: # ss_name = job.types['inner'] # TODO: test if container is an AR job slots = all_slot_sets[ss_name].slots t_e = job.start_time + walltime - job_security_time sid_left, sid_right = get_encompassing_slots( slots, job.start_time, t_e) if job.ts or (job.ph == ALLOW): itvs_avail = intersec_ts_ph_itvs_slots( slots, sid_left, sid_right, job) else: itvs_avail = intersec_itvs_slots(slots, sid_left, sid_right) itvs = find_resource_hierarchies_job( itvs_avail, hy_res_rqts, resource_set.hierarchy) if ('QUOTAS' in config) and (config['QUOTAS'] == 'yes'): nb_res = itvs_size(intersec(itvs, resource_set.default_resource_itvs)) res = check_slots_quotas(slots, sid_left, sid_right, job, nb_res, walltime) (quotas_ok, quotas_msg, rule, value) = res if not quotas_ok: itvs = [] logger.info("Quotas limitaion reached, job:" + str(job.id) + ", " + quotas_msg + ", rule: " + str(rule) + ", value: " + str(value)) set_job_state(job.id, 'toError') set_job_message(job.id, "This advance reservation cannot run due to quotas") if itvs == []: # not enough resource available logger.warn("[" + str(job.id) + "] advance reservation cannot be validated, not enough resources") set_job_state(job.id, 'toError') set_job_message(job.id, "This advance reservation cannot run") else: # The reservation can be scheduled logger.debug( "[" + str(job.id) + "] advance reservation is validated") job.moldable_id = moldable_id job.res_set = itvs ar_jobs_scheduled[job.id] = job # if 'container' in job.types # slot = Slot(1, 0, 0, job.res_set[:], job.start_time, # job.start_time + job.walltime - job_security_time) # slot.show() # slots_sets[job.id] = SlotSet(slot) set_job_state(job.id, 'toAckReservation') set_job_resa_state(job.id, 'Scheduled') if ar_jobs_scheduled != []: logger.debug("Save AR jobs' assignements in database") save_assigns(ar_jobs_scheduled, resource_set) logger.debug("Queue " + queue_name + ": end processing of new reservations")
def handle_waiting_reservation_jobs(queue_name, resource_set, job_security_time, current_time_sec): logger.debug("Queue " + queue_name + ": begin processing accepted Advance Reservations") ar_jobs = get_waiting_scheduled_AR_jobs(queue_name, resource_set, job_security_time, current_time_sec) for job in ar_jobs: moldable_id = job.moldable_id walltime = job.walltime # Test if AR job is expired and handle it if (current_time_sec > (job.start_time + walltime)): logger.warn("[" + str(job.id) + "] set job state to Error: avdance reservation expired and couldn't be started") set_job_state(job.id, 'Error') set_job_message(job.id, "Reservation expired and couldn't be started.") else: # Determine current available ressources avail_res = intersec(resource_set.roid_itvs, job.res_set) # Test if the AR job is waiting to be launched due to nodes' unavailabilities if (avail_res == []) and (job.start_time < current_time_sec): logger.warn("[%s] advance reservation is waiting because no resource is present" % str(job.id)) # Delay launching time set_gantt_job_start_time(moldable_id, current_time_sec + 1) elif (job.start_time < current_time_sec): if (job.start_time + reservation_waiting_timeout) > current_time_sec: if not equal_itvs(avail_res, job.res_set): # The expected ressources are not all available, # wait the specified timeout logger.warn("[" + str(job.id) + "] advance reservation is waiting because not all \ resources are available yet") set_gantt_job_start_time(moldable_id, current_time_sec + 1) else: # It's time to launch the AR job, remove missing ressources missing_resources_itvs = sub_intervals(job.res_set, avail_res) remove_gantt_resource_job(moldable_id, missing_resources_itvs, resource_set) logger.warn("[" + str(job.id) + "remove some resources assigned to this advance reservation, \ because there are not Alive") add_new_event('SCHEDULER_REDUCE_NB_RESSOURCES_FOR_ADVANCE_RESERVATION', job.id, "[MetaSched] Reduce the number of resources for the job " + str(job.id)) nb_res = itvs_size(job.res_set) - itvs_size(missing_resources_itvs) new_message = re.sub(r'R=\d+', 'R=' + str(nb_res), job.message) if new_message != job.message: set_job_message(job.id, new_message) logger.debug("Queue " + queue_name + ": end processing of reservations with missing resources")
def find_first_suitable_contiguous_slots(slots_set, job, res_rqt, hy, min_start_time): '''find first_suitable_contiguous_slot ''' (mld_id, walltime, hy_res_rqts) = res_rqt itvs = [] slots = slots_set.slots cache = slots_set.cache # flag to control cache update for considered entry no_cache = False # updated_cache = False sid_left = 1 if min_start_time < 0: # to not always begin by the first slots ( O(n^2) ) # TODO cache_by_container/inner + moldable + time_sharing(?) if job.key_cache and (job.key_cache[mld_id] in cache): sid_left = cache[job.key_cache[mld_id]] # print("cache hit...... ", sid_left) # else: # print("cache miss :(") else: while slots[sid_left].b < min_start_time: sid_left = slots[sid_left].next # satisfy job dependencies converted in min start_time # sid_left = 1 # TODO no cache sid_right = sid_left slot_e = slots[sid_right].e # print('first sid_left', sid_left) while True: # find next contiguous slots_time # print("A: job.id:", job.id, "sid_left:", sid_left, "sid_right:",) # sid_right if sid_left != 0 and sid_right != 0: slot_b = slots[sid_left].b else: # TODO error # print("TODO error can't schedule job.id:", job.id) logger.info( "can't schedule job with id:" + str(job.id) + ", due resources") return ([], -1, -1) while ((slot_e - slot_b + 1) < walltime): sid_right = slots[sid_right].next if sid_right != 0: slot_e = slots[sid_right].e else: logger.info( "can't schedule job with id:" + str(job.id) + ", due time") return ([], -1, -1) # if not updated_cache and (slots[sid_left].itvs != []): # cache[walltime] = sid_left # updated_cache = True if job.ts or (job.ph == ALLOW): itvs_avail = intersec_ts_ph_itvs_slots( slots, sid_left, sid_right, job) else: itvs_avail = intersec_itvs_slots(slots, sid_left, sid_right) # print("itvs_avail", itvs_avail, "h_res_req", hy_res_rqts, "hy", hy) if job.find: beginning_slotset = True if (sid_left == 1) and (slots_set.begin == slots[1].b) else False # Use specialized find resource function itvs = job.find_func(itvs_avail, hy_res_rqts, hy, beginning_slotset, *job.find_args, **job.find_kwargs) else: itvs = find_resource_hierarchies_job(itvs_avail, hy_res_rqts, hy) if itvs != []: if ('QUOTAS' in config) and (config['QUOTAS'] == 'yes'): nb_res = itvs_size(intersec(itvs, rs.default_resource_itvs)) res = check_slots_quotas(slots, sid_left, sid_right, job, nb_res, walltime) (quotas_ok, quotas_msg, rule, value) = res if not quotas_ok: logger.info("Quotas limitaion reached, job:" + str(job.id) + ", " + quotas_msg + ", rule: " + str(rule) + ", value: " + str(value)) # quotas limitation trigger therefore disable cache update for this entry no_cache = True else: break else: break sid_left = slots[sid_left].next if job.key_cache and (min_start_time < 0) and (not no_cache): # and (not job.deps): cache[job.key_cache[mld_id]] = sid_left # print("cache: update entry ", job.key_cache[mld_id], " with ", sid_left) # else: # print("cache: not updated ", job.key_cache, min_start_time, job.deps) return (itvs, sid_left, sid_right)
def schedule_fifo_cycle(plt, queue="default", hierarchy_use=False): assigned_jobs = {} now = plt.get_time() logger.info("Begin scheduling....now: " + str(now) + ", queue: " + queue) # # Retrieve waiting jobs # waiting_jobs, waiting_jids, nb_waiting_jobs = plt.get_waiting_jobs(queue) if nb_waiting_jobs > 0: logger.info("nb_waiting_jobs:" + str(nb_waiting_jobs)) for jid in waiting_jids: logger.debug("waiting_jid: " + str(jid)) # # Determine Global Resource Intervals # resource_set = plt.resource_set() res_itvs = deepcopy(resource_set.roid_itvs) # # Get additional waiting jobs' data # job_security_time = int(config["SCHEDULER_JOB_SECURITY_TIME"]) plt.get_data_jobs(waiting_jobs, waiting_jids, resource_set, job_security_time) # # Remove resources used by running job # for job in plt.get_scheduled_jobs(resource_set, job_security_time, now): if job.state == "Running": res_itvs = sub_intervals(res_itvs, job.res_itvs) # # Assign resource to jobs # for jid in waiting_jids: job = waiting_jobs[jid] # We consider only one instance of resources request (no support for moldable) (mld_id, walltime, hy_res_rqts) = job.mld_res_rqts[0] if hierarchy_use: # Assign resources which hierarchy support (uncomment) itvs = find_resource_hierarchies_job(res_itvs, hy_res_rqts, resource_set.hierarchy) else: # OR assign resource by considering only resource_id (no hierarchy) # and only one type of resource (hy_level_nbs, constraints) = hy_res_rqts[0] (h_name, nb_asked_res) = hy_level_nbs[0] itvs_avail = intersec(constraints, res_itvs) ids_avail = itvs2ids(itvs_avail) if len(ids_avail) < nb_asked_res: itvs = [] else: itvs = unordered_ids2itvs(ids_avail[:nb_asked_res]) if (itvs != []): job.moldable_id = mld_id job.res_set = itvs assigned_jobs[job.id] = job res_itvs = sub_intervals(res_itvs, itvs) else: logger.debug("Not enough available resources, it's a FIFO scheduler, we stop here.") break # # Save assignement # logger.info("save assignement") plt.save_assigns(assigned_jobs, resource_set) else: logger.info("no waiting jobs")
def test_intersec(): x = [(1, 4), (6, 9)] y = intersec(x, x) assert y == x
def intersec_slots(slots): # not used TO REMOVE? "Return intersection of intervals from a slot list" return reduce(lambda itvs_acc, s: intersec(itvs_acc, s.itvs), slots, slots[0].itvs)
def estimate_job_nb_resources(resource_request, j_properties): '''returns an array with an estimation of the number of resources that can be used by a job: (resources_available, [(nbresources => int, walltime => int)]) ''' # estimate_job_nb_resources estimated_nb_resources = [] resource_available = False resource_set = ResourceSet() resources_itvs = resource_set.roid_itvs for mld_idx, mld_resource_request in enumerate(resource_request): resource_desc, walltime = mld_resource_request if not walltime: walltime = default_job_walltime result = [] for prop_res in resource_desc: jrg_grp_property = prop_res['property'] resource_value_lst = prop_res['resources'] # # determine resource constraints # if (not j_properties) and (not jrg_grp_property or (jrg_grp_property == "type = 'default'")): constraints = deepcopy(resource_set.roid_itvs) else: if not j_properties or not jrg_grp_property: and_sql = "" else: and_sql = " AND " sql_constraints = j_properties + and_sql + jrg_grp_property try: request_constraints = db.query(Resource.id).filter(text(sql_constraints)).all() except exc.SQLAlchemyError: print_error('Bad resource SQL constraints request:', sql_constraints) print_error('SQLAlchemyError: ', exc) result = [] break roids = [resource_set.rid_i2o[int(y[0])] for y in request_constraints] constraints = unordered_ids2itvs(roids) hy_levels = [] hy_nbs = [] for resource_value in resource_value_lst: res_name = resource_value['resource'] value = resource_value['value'] hy_levels.append(resource_set.hierarchy[res_name]) hy_nbs.append(int(value)) cts_resources_itvs = intersec(constraints, resources_itvs) res_itvs = find_resource_hierarchies_scattered(cts_resources_itvs, hy_levels, hy_nbs) if res_itvs: result.extend(res_itvs) else: result = [] break if result: resource_available = True estimated_nb_res = itvs_size(result) estimated_nb_resources.append((estimated_nb_res, walltime)) print_info('Moldable instance: ', mld_idx, ' Estimated nb resources: ', estimated_nb_res, ' Walltime: ', walltime) if not resource_available: print_error("There are not enough resources for your request") sub_exit(-5) return(resource_available, estimated_nb_resources)