Beispiel #1
0
def handle_jobs_to_launch(jobs_to_launch_lst, current_time_sec, current_time_sql):
    logger.debug(
        "Begin processing jobs to launch (start time <= " + current_time_sql)

    return_code = 0

    for job in jobs_to_launch_lst:
        return_code = 1
        logger.debug("Set job " + str(job.id) + " state to toLaunch at " + current_time_sql)

        #
        # Advance Reservation
        #
        walltime = job.walltime
        if ((job.reservation == 'Scheduled') and (job.start_time < current_time_sec)):
            max_time = walltime - (current_time_sec - job.start_time)

            set_moldable_job_max_time(job.moldable_id, max_time)
            set_gantt_job_start_time(job.moldable_id, current_time_sec)
            logger.warn("Reduce walltime of job " + str(job.id) +
                        "to " + str(max_time) + "(was  " + str(walltime) + " )")

            add_new_event('REDUCE_RESERVATION_WALLTIME', job.id,
                          "Change walltime from " + str(walltime) + " to "
                          + str(max_time))

            w_max_time = duration_to_sql(max_time)
            new_message = re.sub(r'W=\d+:\d+:\d+', 'W=' + w_max_time, job.message)

            if new_message != job.message:
                set_job_message(job.id, new_message)

        prepare_job_to_be_launched(job, current_time_sec)

    logger.debug("End processing of jobs to launch")

    return return_code
Beispiel #2
0
def check_reservation_jobs(plt, resource_set, queue_name, all_slot_sets, current_time_sec):
    """Processing of new Advance Reservations"""

    logger.debug("Queue " + queue_name + ": begin processing of new reservations")

    ar_jobs_scheduled = {}

    ar_jobs, ar_jids, nb_ar_jobs = plt.get_waiting_jobs(
        queue_name, 'toSchedule')
    logger.debug("nb_ar_jobs:" + str(nb_ar_jobs))

    if nb_ar_jobs > 0:
        job_security_time = int(config['SCHEDULER_JOB_SECURITY_TIME'])
        plt.get_data_jobs(ar_jobs, ar_jids, resource_set, job_security_time)

        logger.debug("Try and schedule new reservations")
        for jid in ar_jids:
            job = ar_jobs[jid]
            logger.debug(
                "Find resource for Advance Reservation job:" + str(job.id))

            # It is a reservation, we take care only of the first moldable job
            moldable_id, walltime, hy_res_rqts = job.mld_res_rqts[0]

            # test if reservation is too old
            if current_time_sec >= (job.start_time + walltime):
                logger.warn(
                    "[" + str(job.id) + "] Canceling job: reservation is too old")
                set_job_message(job.id, "Reservation too old")
                set_job_state(job.id, 'toError')
                continue
            else:
                if job.start_time < current_time_sec:
                    # TODO update to DB ????
                    job.start_time = current_time_sec

            ss_name = 'default'

            # TODO container
            # if 'inner' in job.types:
            #    ss_name = job.types['inner']

            # TODO: test if container is an AR job

            slots = all_slot_sets[ss_name].slots

            t_e = job.start_time + walltime - job_security_time
            sid_left, sid_right = get_encompassing_slots(
                slots, job.start_time, t_e)

            if job.ts or (job.ph == ALLOW):
                itvs_avail = intersec_ts_ph_itvs_slots(
                    slots, sid_left, sid_right, job)
            else:
                itvs_avail = intersec_itvs_slots(slots, sid_left, sid_right)

            itvs = find_resource_hierarchies_job(
                itvs_avail, hy_res_rqts, resource_set.hierarchy)

            if ('QUOTAS' in config) and (config['QUOTAS'] == 'yes'):
                nb_res = itvs_size(intersec(itvs, resource_set.default_resource_itvs))
                res = check_slots_quotas(slots, sid_left, sid_right, job, nb_res, walltime)
                (quotas_ok, quotas_msg, rule, value) = res
                if not quotas_ok:
                    itvs = []
                    logger.info("Quotas limitaion reached, job:" + str(job.id) +
                                ", " + quotas_msg + ", rule: " + str(rule) +
                                ", value: " + str(value))
                    set_job_state(job.id, 'toError')
                    set_job_message(job.id,
                                    "This advance reservation cannot run due to quotas")

            if itvs == []:
                # not enough resource available
                logger.warn("[" + str(job.id) +
                            "] advance reservation cannot be validated, not enough resources")
                set_job_state(job.id, 'toError')
                set_job_message(job.id, "This advance reservation cannot run")
            else:
                # The reservation can be scheduled
                logger.debug(
                    "[" + str(job.id) + "] advance reservation is validated")
                job.moldable_id = moldable_id
                job.res_set = itvs
                ar_jobs_scheduled[job.id] = job
                # if 'container' in job.types
                #    slot = Slot(1, 0, 0, job.res_set[:], job.start_time,
                #                job.start_time + job.walltime - job_security_time)
                # slot.show()
                #    slots_sets[job.id] = SlotSet(slot)

                set_job_state(job.id, 'toAckReservation')

            set_job_resa_state(job.id, 'Scheduled')

    if ar_jobs_scheduled != []:
        logger.debug("Save AR jobs' assignements in database")
        save_assigns(ar_jobs_scheduled, resource_set)

    logger.debug("Queue " + queue_name + ": end processing of new reservations")
Beispiel #3
0
def handle_waiting_reservation_jobs(queue_name, resource_set, job_security_time, current_time_sec):

    logger.debug("Queue " + queue_name +
                 ": begin processing accepted Advance Reservations")

    ar_jobs = get_waiting_scheduled_AR_jobs(queue_name, resource_set, job_security_time, current_time_sec)

    for job in ar_jobs:

        moldable_id = job.moldable_id
        walltime = job.walltime

        # Test if AR job is expired and handle it
        if (current_time_sec > (job.start_time + walltime)):
            logger.warn("[" + str(job.id) +
                        "] set job state to Error: avdance reservation expired and couldn't be started")
            set_job_state(job.id, 'Error')
            set_job_message(job.id, "Reservation expired and couldn't be started.")
        else:

            # Determine current available ressources
            avail_res = intersec(resource_set.roid_itvs, job.res_set)

            # Test if the AR job is waiting to be launched due to nodes' unavailabilities
            if (avail_res == []) and (job.start_time < current_time_sec):
                logger.warn("[%s] advance reservation is waiting because no resource is present"
                            % str(job.id))

                # Delay launching time
                set_gantt_job_start_time(moldable_id, current_time_sec + 1)
            elif (job.start_time < current_time_sec):

                if (job.start_time + reservation_waiting_timeout) > current_time_sec:
                    if not equal_itvs(avail_res, job.res_set):
                        # The expected ressources are not all available,
                        # wait the specified timeout
                        logger.warn("[" + str(job.id) +
                                    "] advance reservation is waiting because not all \
                                    resources are available yet")
                        set_gantt_job_start_time(moldable_id, current_time_sec + 1)
                else:
                    # It's time to launch the AR job, remove missing ressources
                    missing_resources_itvs = sub_intervals(job.res_set, avail_res)
                    remove_gantt_resource_job(moldable_id, missing_resources_itvs,
                                              resource_set)
                    logger.warn("[" + str(job.id) +
                                "remove some resources assigned to this advance reservation, \
                                because there are not Alive")

                    add_new_event('SCHEDULER_REDUCE_NB_RESSOURCES_FOR_ADVANCE_RESERVATION',
                                  job.id,
                                  "[MetaSched] Reduce the number of resources for the job "
                                  + str(job.id))

                    nb_res = itvs_size(job.res_set) - itvs_size(missing_resources_itvs)
                    new_message = re.sub(r'R=\d+', 'R=' + str(nb_res), job.message)
                    if new_message != job.message:
                        set_job_message(job.id, new_message)

    logger.debug("Queue " + queue_name +
                 ": end processing of reservations with missing resources")