def _scontrol_show_stdout_to_dict_list(stdout_data, stderr_data, debug=False):
    '''
    Convert the 'scontrol show' stdout data to a list of dicts
    Nearly all params are of the form "keyword=value".
    If they all were, a neat functional one-liner would do...
    '''
    stdout_dict_list = []

    if len(stderr_data):
        return []

    # Split the output and remove the trailing None from the subprocess output
    stdout_lines = stdout_data.split(os.linesep)
    stdout_lines = filter(None, stdout_lines)

    # Convert the output to a list of dicts
    for line in stdout_lines:
        stdout_line_dict = {}

        for kv_pair in line.split(' '):
            kv = kv_pair.split('=')
            if (len(kv) == 2):
                stdout_line_dict[kv[0]] = kv[1]
            elif debug:
                log_debug('Failed to convert `$s`' % kv_pair)

        stdout_dict_list.append(stdout_line_dict)

    return stdout_dict_list
Beispiel #2
0
def _check_hil_command(env_dict):
    '''
    Get and validate the HIL command specified with srun / sbatch
    '''
    jobname = env_dict['jobname']
    if jobname in HIL_RESERVATION_COMMANDS:
        return jobname
    else:
        log_debug(
            'Jobname `%s` is not a HIL reservation command, nothing to do.' %
            jobname)
        return None
def get_object_data(what_obj, obj_id, debug=False):
    '''
    Get a list of dictionaries of information on the object, via
    'scontrol show <what_object> <object_id>'
    '''
    objdata_dict_list, stdout_data, stderr_data = exec_scontrol_show_cmd(
        what_obj, obj_id, debug=False)
    if (len(stderr_data) != 0):
        if debug:
            log_debug('Failed to retrieve data for %s `%s`' %
                      (what_obj, obj_id))
            log_debug('  %s' % stderr_data)

    return objdata_dict_list
Beispiel #4
0
def main(argv=[]):
    '''
    '''
    log_init('hil_monitor', HIL_MONITOR_LOGFILE, logging.DEBUG)

    # Look for HIL reservations.  If there are none, return

    resdata_dict_list = _get_hil_reservations()
    if not len(resdata_dict_list):
        return

    log_info('HIL Reservation Monitor', separator=True)
    log_debug('')

    release_reservations = _find_hil_release_reservations(resdata_dict_list)
def exec_scontrol_cmd(action, entity, entity_id=None, debug=True, **kwargs):
    '''
    Build an 'scontrol <action> <entity>' command and pass to an executor
    Specify single-line output to support stdout postprocessing
    '''
    cmd = [os.path.join(SLURM_INSTALL_DIR, 'scontrol'), action]

    if entity:
        cmd.append(entity)

    if entity_id:
        cmd.append(entity_id)

    cmd.append('-o')

    if kwargs:
        for k, v in kwargs.iteritems():
            cmd.append('%s=%s' % (k, v))

    if debug:
        log_debug('exec_scontrol_cmd(): Command  %s' % cmd)

    stdout_data, stderr_data = _exec_subprocess_cmd(cmd)

    if debug:
        log_debug('exec_scontrol_cmd(): Stdout  %s' % stdout_data)
        log_debug('exec_scontrol_cmd(): Stderr  %s' % stderr_data)

    return stdout_data, stderr_data
def _exec_subprocess_cmd(cmd):
    '''
    Execute a command in a subprocess and wait for completion
    '''
    debug = False
    p = None
    try:
        p = Popen(cmd, stdout=PIPE, stderr=PIPE)
        (stdout_data, stderr_data) = p.communicate()
    except Exception as e:
        stdout_data = None
        stderr_data = 'error: Exception on Popen or communicate'
        log_debug('Exception on Popen or communicate')
        log_debug('Exception: %s' % e)

    if debug:
        f = _exec_subprocess_cmd.__name__
        log_debug('%s: cmd is %s' % (f, cmd))
        log_debug('%s: stdout is %s' % (f, stdout_data))
        log_debug('%s: stderr is %s' % (f, stderr_data))

    return stdout_data, stderr_data
def exec_scontrol_show_cmd(entity, entity_id, debug=False, **kwargs):
    '''
    Run the 'scontrol show' command on the entity and ID
    Convert standard output data to a list of dictionaries, one per line
    '''
    stdout_data, stderr_data = exec_scontrol_cmd('show',
                                                 entity,
                                                 entity_id,
                                                 debug=debug,
                                                 **kwargs)

    # Check for errors.
    # If anything in stderr, return it
    # Next, check if stdout includes various error strings - 'scontrol show'
    #     writes error output to stdout.
    #     Failure indications:
    #         Reservation:  stdout includes 'not found'
    #         Job: stdout includes 'Invalid job id'
    #     Copy stdout to stderr if found.
    # If stderr is empty, and stdout does not contain an error string,
    #     convert stdout to a list of dicts and return that

    stdout_dict_list = []

    entity_error_dict = {'reservation': 'not found', 'job': 'Invalid job id'}

    cmd = 'scontrol show ' + entity
    if (len(stderr_data) != 0):
        log_debug('Command `%s` failed' % cmd)
        log_debug('  stderr: %s' % stderr_data)

    elif (entity in entity_error_dict) and (entity_error_dict[entity]
                                            in stdout_data):
        if debug:
            log_debug('Command `%s` failed' % cmd)
            log_debug('  stderr: %s' % stderr_data)
        stderr_data = stdout_data
        stdout_data = None

    else:
        stdout_dict_list = _scontrol_show_stdout_to_dict_list(
            stdout_data, stderr_data)

    return stdout_dict_list, stdout_data, stderr_data
Beispiel #8
0
def main(argv=[]):

    args = process_args()
    log_init('hil_slurmctld.prolog', HIL_SLURMCTLD_PROLOG_LOGFILE,
             logging.DEBUG)

    if args.hil_prolog:
        pass
    elif args.hil_epilog:
        pass
    else:
        log_debug('Must specify one of --hil_prolog or --hil_epilog',
                  separator=True)
        return

    # Collect prolog/epilog environment, job data, and partition data into dictionaries,
    # perform basic sanity checks
    # Since data for one partition and one job is expected, select the first dict in the list

    env_dict = _get_prolog_environment()
    pdata_dict = get_partition_data(env_dict['partition'])[0]
    jobdata_dict = get_job_data(env_dict['job_id'])[0]

    if not pdata_dict or not jobdata_dict or not env_dict:
        log_debug('One of pdata_dict, jobdata_dict, or env_dict is empty')
        log_debug('Job data', jobdata_dict)
        log_debug('P   data', pdata_dict)
        return

    if not _check_hil_partition(env_dict, pdata_dict):
        return

    # Verify the command is a HIL command.  If so, process it.

    hil_cmd = _check_hil_command(env_dict)
    if not hil_cmd:
        return

    if args.hil_prolog:
        if (hil_cmd == 'hil_reserve'):
            log_info('HIL Slurmctld Prolog', separator=True)
            log_debug('Processing reserve request')
            _hil_reserve_cmd(env_dict, pdata_dict, jobdata_dict)

    elif args.hil_epilog:
        if (hil_cmd == 'hil_release'):
            log_info('HIL Slurmctld Epilog', separator=True)
            log_debug('Processing release request')
            _hil_release_cmd(env_dict, pdata_dict, jobdata_dict)
    return
Beispiel #9
0
def _get_hil_reservation_times(env_dict, pdata_dict, jobdata_dict):
    '''
    Calculate the start time and end time of the reservation
    Start time:
        If the user specified a start time for the job, use that
        Otherwise, use the current time
    End time:
        if the job has an end time, use that and extend it by the HIL grace period.
        If the job does not have an end time (e.g., TimeLimit UNLIMITED), set the
        reservation end time to either the partition MaxTime, if defined, or the HIL default
        maximum time.
    '''
    t_job_start_s = jobdata_dict['StartTime']
    t_job_end_s = jobdata_dict['EndTime']
    #   log_debug('Job start %s  Job end %s' % (t_job_start_s, t_job_end_s))

    t_start_dt = datetime.strptime(t_job_start_s, SHOW_OBJ_TIME_FMT)

    if 'Unknown' not in t_job_end_s:
        log_debug('Using job end time for reservation')
        # Job has a defined end time.  Use it.
        t_end_dt = datetime.strptime(t_job_end_s, SHOW_OBJ_TIME_FMT)
        t_end_dt += timedelta(seconds=HIL_RESERVATION_GRACE_PERIOD)

    else:
        # Job does not have a defined end time.  See if there's a time limit.

        if 'UNLIMITED' in jobdata_dict['TimeLimit']:

            # Job does not have a time limit. See if the partition has a max time.
            # If so, use that. If not, use the HIL default duration.

            p_max_time_s = pdata_dict['MaxTime']
            log_debug('Partition MaxTime is %s' % p_max_time_s)
            if 'UNLIMITED' in p_max_time_s:

                # Partition does not have a max time, use HIL default.
                log_debug(
                    'No job or partition time limit, using HIL default reservation duration'
                )
                t_end_dt = t_start_dt + timedelta(
                    seconds=HIL_RESERVATION_DEFAULT_DURATION)

            else:

                # Partition has a max time, parse it. Output format is [days-]H:M:S.
                log_debug(
                    'Using partition time limit to calculate reservation end time'
                )
                d_hms = p_max_time_s.split('-')
                if (len(d_hms) == 1):
                    p_max_hms_dt = datetime.strptime(
                        d_hms[0], SHOW_PARTITION_MAXTIME_HMS_FMT)
                    p_max_timedelta = timedelta(hours=p_max_hms_dt.hour,
                                                minutes=p_max_hms_dt.minute,
                                                seconds=p_max_hms_dt.second)
                elif (len(d_hms) == 2):
                    # Days field is present
                    p_max_days_timedelta = datetime.timedelta(
                        days=int(d_hms[0]))

                    p_max_hms_dt = datetime.strptime(
                        d_hms[1], SHOW_PARTITION_MAXTIME_HMS_FMT)
                    p_max_hms_timedelta = timedelta(
                        hours=p_max_hms_dt.hour,
                        minutes=p_max_hms_dt.minute,
                        seconds=p_max_hms_dt.second)
                    p_max_timedelta = p_max_days_timedelta + p_max_hms_timedelta
                    log_debug(p_max_timedelta)
                    t_end_dt = t_start_dt + p_max_timedelta
                else:
                    log_error('Cannot parse partition MaxTime (`%s`)' %
                              p_max_time_s)
        else:
            # Job has a time limit. Use it.
            # $$$ FIX
            log_debug('Job has a time limit! Unsupported!')
            pass

    # We now have a defined reservation t_start and t_end in datetime format.
    # Convert to strings and return.
    t_start_s = t_start_dt.strftime(RES_CREATE_TIME_FMT)
    t_end_s = t_end_dt.strftime(RES_CREATE_TIME_FMT)

    # log_debug('Start time %s' % t_start_s)
    # log_debug('End time %s' % t_end_s)

    return t_start_s, t_end_s