コード例 #1
0
ファイル: jobs.py プロジェクト: jjhursey/onramp
def _delete_job(job_state):
    """Delete given job.

    Both state for and contents of job will be removed.

    Args:
        job_state (JobState): State object for the job to remove.
    """
    job_cancel_states = ['Scheduled', 'Queued', 'Running']
    if job_state['state'] in job_cancel_states:
        cfgfile = os.path.join(pce_root, 'bin', 'onramp_pce_config.cfg')
        specfile = os.path.join(pce_root, 'src', 'configspecs',
                                'onramp_pce_config.cfgspec')
        cfg = ConfigObj(cfgfile, configspec=specfile)
        cfg.validate(Validator())
        scheduler = Scheduler(cfg['cluster']['batch_scheduler'])
        result = scheduler.cancel_job(job_state['scheduler_job_num'])
        _logger.debug('Cancel job output: %s' % result[1])
    job_state_file = os.path.join(_job_state_dir, str(job_state['job_id']))
    os.remove(job_state_file)
    args = (job_state['username'], job_state['mod_name'], job_state['mod_id'],
            job_state['run_name'])
    run_dir = os.path.join(pce_root, 'users/%s/%s_%d/%s' % args)
    shutil.rmtree(run_dir, ignore_errors=True)
    job_state.clear()
コード例 #2
0
ファイル: jobs.py プロジェクト: jragatz/onramp
def _delete_job(job_state):
    """Delete given job.

    Both state for and contents of job will be removed.

    Args:
        job_state (JobState): State object for the job to remove.
    """
    job_cancel_states = ['Scheduled', 'Queued', 'Running']
    if job_state['state'] in job_cancel_states:
        cfgfile = os.path.join(pce_root, 'bin', 'onramp_pce_config.cfg')
        specfile = os.path.join(pce_root, 'src', 'configspecs',
                                'onramp_pce_config.cfgspec')
        cfg = ConfigObj(cfgfile, configspec=specfile)
        cfg.validate(Validator())
        scheduler = Scheduler(cfg['cluster']['batch_scheduler'])
        result = scheduler.cancel_job(job_state['scheduler_job_num'])
        _logger.debug('Cancel job output: %s' % result[1])
    job_state_file = os.path.join(_job_state_dir, str(job_state['job_id']))
    os.remove(job_state_file)
    args = (job_state['username'], job_state['mod_name'], job_state['mod_id'],
            job_state['run_name'])
    run_dir = os.path.join(pce_root, 'users/%s/%s_%d/%s' % args)
    shutil.rmtree(run_dir, ignore_errors=True)
    job_state.clear()
コード例 #3
0
ファイル: jobs.py プロジェクト: ckbrehm/onramp
def job_run(job_id, job_state_file=None):
    # Determine batch scheduler to user from config.
    cfg = ConfigObj(os.path.join(pce_root, 'bin', 'onramp_pce_config.cfg'),
                    configspec=os.path.join(pce_root, 'src', 'configspecs',
                                            'onramp_pce_config.cfgspec'))
    cfg.validate(Validator())
    scheduler = Scheduler(cfg['cluster']['batch_scheduler'])

    ret_dir = os.getcwd()
    with JobState(job_id, job_state_file) as job_state:
        run_dir = job_state['run_dir']
        run_name = job_state['run_name']
    os.chdir(run_dir)

    # Write batch script.
    with open('script.sh', 'w') as f:
        f.write(scheduler.get_batch_script(run_name))

    # Schedule job.
    result = scheduler.schedule(run_dir)

    if result['status_code'] != 0:
        _logger.error(result['msg'])
        with JobState(job_id, job_state_file) as job_state:
            job_state['state'] = 'Schedule failed'
            job_state['error'] = result['msg']
            os.chdir(ret_dir)
            if job_state['_marked_for_del']:
                _delete_job(job_state)
                return (-2, 'Job %d deleted' % job_id)
        return (result['returncode'], result['msg'])
    
    with JobState(job_id, job_state_file) as job_state:
        job_state['state'] = 'Scheduled'
        job_state['error'] = None
        job_state['scheduler_job_num'] = result['job_num']
        os.chdir(ret_dir)
        if job_state['_marked_for_del']:
            _delete_job(job_state)
            return (-2, 'Job %d deleted' % job_id)

    return (0, 'Job scheduled')
コード例 #4
0
def job_run(job_id, job_state_file=None):
    # Determine batch scheduler to user from config.
    cfg = ConfigObj(os.path.join(pce_root, 'bin', 'onramp_pce_config.cfg'),
                    configspec=os.path.join(pce_root, 'src', 'configspecs',
                                            'onramp_pce_config.cfgspec'))
    cfg.validate(Validator())
    scheduler = Scheduler(cfg['cluster']['batch_scheduler'])

    ret_dir = os.getcwd()
    with JobState(job_id, job_state_file) as job_state:
        run_dir = job_state['run_dir']
        run_name = job_state['run_name']
    os.chdir(run_dir)

    # Write batch script.
    with open('script.sh', 'w') as f:
        f.write(scheduler.get_batch_script(run_name))

    # Schedule job.
    result = scheduler.schedule(run_dir)

    if result['status_code'] != 0:
        _logger.error(result['msg'])
        with JobState(job_id, job_state_file) as job_state:
            job_state['state'] = 'Schedule failed'
            job_state['error'] = result['msg']
            os.chdir(ret_dir)
            if job_state['_marked_for_del']:
                _delete_job(job_state)
                return (-2, 'Job %d deleted' % job_id)
        return (result['returncode'], result['msg'])

    with JobState(job_id, job_state_file) as job_state:
        job_state['state'] = 'Scheduled'
        job_state['error'] = None
        job_state['scheduler_job_num'] = result['job_num']
        os.chdir(ret_dir)
        if job_state['_marked_for_del']:
            _delete_job(job_state)
            return (-2, 'Job %d deleted' % job_id)

    return (0, 'Job scheduled')
コード例 #5
0
ファイル: jobs.py プロジェクト: ckbrehm/onramp
def _build_job(job_id, job_state_file=None):
    """Launch actions required to maintain job state and/or currate job results
    and return the state.
    When current job state (as a function of both PCE state tracking and
    scheduler output) warrants, initiate job postprocessing and/or status
    checking prior to building and returning state.
    Args:
        job_id (int): Id of the job to get state for.
    Returns:
        OnRamp formatted dictionary containing job attrs.
    """
    status_check_states = ['Scheduled', 'Queued', 'Running']
    with JobState(job_id, job_state_file) as job_state:
        _logger.debug('Building at %s' % time.time())
        if 'state' not in job_state.keys():
            _logger.debug('No state at %s' % time.time())
            _logger.debug('job_state keys: %s' % job_state.keys())
            return {}

        if job_state['state'] in status_check_states:
            specfile = os.path.join(pce_root, 'src', 'configspecs',
                                    'onramp_pce_config.cfgspec')
            cfg = ConfigObj(os.path.join(pce_root, 'bin',
                                         'onramp_pce_config.cfg'),
                            configspec=specfile)
            cfg.validate(Validator())
            scheduler = Scheduler(cfg['cluster']['batch_scheduler'])
            sched_job_num = job_state['scheduler_job_num']
            job_status = scheduler.check_status(sched_job_num)

            # Bad.
            if job_status[0] != 0:
                _logger.debug('Bad job status: %s' % job_status[1])
                job_state['state'] = 'Run failed'
                job_state['error'] = job_status[1]
                if job_status[0] != -2:
                    job_state['state'] = job_status[1]
                if job_state['_marked_for_del']:
                    _delete_job(job_state)
                    # FIXME: This might cause trouble. About to return {}.
                    return copy.deepcopy(job_state)
                return copy.deepcopy(job_state)

            # Good.
            if job_status[1] in ['Done', 'No info']:
                job_state['state'] = 'Postprocessing'
                if job_state['_marked_for_del']:
                    _delete_job(job_state)
                    # FIXME: This might cause trouble. About to return {}.
                    return copy.deepcopy(job_state)
                job_state['error'] = None
                job_state['mod_status_output'] = None
                p = Process(target=job_postprocess, args=(job_id, job_state_file))
                p.start()
            elif job_status[1] == 'Running':
                job_state['state'] = 'Running'
                job_state['error'] = None
                if job_state['_marked_for_del']:
                    _delete_job(job_state)
                    # FIXME: This might cause trouble. About to return {}.
                    return copy.deepcopy(job_state)
                run_dir = job_state['run_dir']
                mod_status_output = _get_module_status_output(run_dir)
                job_state['mod_status_output'] = mod_status_output
            elif job_status[1] == 'Queued':
                job_state['state'] = 'Queued'
                job_state['error'] = None
                if job_state['_marked_for_del']:
                    _delete_job(job_state)
                    # FIXME: This might cause trouble. About to return {}.
                    return copy.deepcopy(job_state)

        job = copy.deepcopy(job_state)

    if job['state'] in ['Launch failed', 'Setting up launch']:
        return job

    # Build visible files.
    _logger.debug('job state: %s' % str(job))
    dir_args = (job['username'], job['mod_name'], job['mod_id'],
                job['run_name'])
    run_dir = os.path.join(pce_root, 'users/%s/%s_%d/%s' % dir_args)
    cfg_file = os.path.join(run_dir, 'config/onramp_metadata.cfg')
    try:
        conf = ConfigObj(cfg_file, file_error=True)
    except (IOError, SyntaxError):
        # Badly formed or non-existant config/onramp_metadata.cfg.
        _logger.debug('Bad metadata')
        _logger.debug(cfg_file)
        return job

    if 'onramp' in conf.keys() and 'visible' in conf['onramp'].keys():
        globs = conf['onramp']['visible']
        if isinstance(globs, basestring):
            # Globs is only a single string. Convert to list.
            globs = [globs]
    else:
        globs = []

    ret_dir = os.getcwd()
    os.chdir(run_dir)
    filenames = [
        name for name in
        chain.from_iterable(
            map(glob.glob, globs)
        )
    ]

    prefix = os.path.join(pce_root, 'users') + '/'
    url_prefix = run_dir.split(prefix)[1]

    job['visible_files'] = [{
            'name': filename,
            'size': os.path.getsize(os.path.join(run_dir, filename)),
            'url': os.path.join('files', os.path.join(url_prefix, filename))
        } for filename in filenames
    ]
    os.chdir(ret_dir)

    return job
コード例 #6
0
ファイル: jobs.py プロジェクト: jjhursey/onramp
def launch_job(job_id, mod_id, username, run_name, run_params):
    """Schedule job launch using system batch scheduler as configured in
    onramp_pce_config.cfg.

    Args:
        job_id (int): Unique identifier for job.
        mod_id (int): Id for OnRamp educational module to run in this job.
        username (str): Username of user running the job.
        run_name (str): Human-readable label for this job run.

    Returns:
        Tuple with 0th position being error code and 1st position being string
        indication of status.
    """
    accepted_states = ['Schedule failed', 'Launch failed', 'Preprocess failed']
    _logger.debug('PCE.tools.launch_job() called')

    # Initialize job state.
    with JobState(job_id) as job_state:
        if ('state' in job_state.keys()
            and job_state['state'] not in accepted_states):
            msg = 'Job launch already initiated'
            _logger.warn(msg)
            return (-1, msg)

        job_state['job_id'] = job_id
        job_state['mod_id'] = mod_id
        job_state['username'] = username
        job_state['run_name'] = run_name
        job_state['scheduler_job_num'] = None
        job_state['state'] = 'Setting up launch'
        job_state['error'] = None
        job_state['mod_status_output'] = None
        job_state['output'] = None
        job_state['visible_files'] = None
        job_state['mod_name'] = None
        job_state['_marked_for_del'] = False
        _logger.debug('Waiting on ModState at: %s' % time.time())
        with ModState(mod_id) as mod_state:
            _logger.debug('Done waiting on ModState at: %s' % time.time())
            if ('state' not in mod_state.keys()
                or mod_state['state'] != 'Module ready'):
                msg = 'Module not ready'
                job_state['state'] = 'Launch failed'
                job_state['error'] = msg
                _logger.warn(msg)
                _logger.warn('mod_state: %s' % str(mod_state))
                if job_state['_marked_for_del']:
                    _delete_job(job_state)
                    return (-2, 'Job %d deleted' % job_id)
                return (-1, 'Module not ready')
            job_state['mod_name'] = mod_state['mod_name']
            proj_loc = mod_state['installed_path']
            mod_name = mod_state['mod_name']

    _logger.debug('Testing project location')
    if not os.path.isdir(proj_loc):
        msg = 'Project location does not exist'
        _logger.error(msg)
        return (-1, msg)
    _logger.debug('Project location good')

    # Initialize dir structure.
    user_dir = os.path.join(os.path.join(pce_root, 'users'), username)
    user_mod_dir = os.path.join(user_dir, '%s_%d' % (mod_name, mod_id))
    run_dir = os.path.join(user_mod_dir, run_name)
    try:
        os.mkdir(user_dir)
    except OSError:
        # Thrown if dir already exists.
        pass
    try:
        os.mkdir(user_mod_dir)
    except OSError:
        # Thrown if dir already exists.
        pass
    # The way the following is setup, if a run_dir has already been setup with
    # this run_name, it will be used (that is, not overwritten) for this launch.
    try:
        shutil.copytree(proj_loc, run_dir)
    except shutil.Error as e:
        pass
    if run_params:
        _logger.debug('Handling run_params')
        spec = os.path.join(run_dir, 'config/onramp_uioptions.cfgspec')
        params = ConfigObj(run_params, configspec=spec)
        result = params.validate(Validator())
        if result:
            with open(os.path.join(run_dir, 'onramp_runparams.cfg'), 'w') as f:
                params.write(f)
        else:
            msg = 'Runparams failed validation'
            _logger.warn(msg)
            return (-1, msg)

    ret_dir = os.getcwd()
    os.chdir(run_dir)

    # Preprocess.
    _logger.info('Calling bin/onramp_preprocess.py')
    with JobState(job_id) as job_state:
        job_state['state'] = 'Preprocessing'
        job_state['error'] = None

    try:
        result = check_output([os.path.join(pce_root, 'src/env/bin/python'),
                               'bin/onramp_preprocess.py'], stderr=STDOUT)
    except CalledProcessError as e:
        code = e.returncode
        if code > 127:
            code -= 256
        result = e.output
        msg = ('Preprocess exited with return status %d and output: %s'
               % (code, result))
        with JobState(job_id) as job_state:
            job_state['state'] = 'Preprocess failed'
            job_state['error'] = msg
            _logger.error(msg)
            os.chdir(ret_dir)
            if job_state['_marked_for_del']:
                _delete_job(job_state)
                return (-2, 'Job %d deleted' % job_id)
        return (-1, msg)
    finally:
        module_log(run_dir, 'preprocess', result)

    # Determine batch scheduler to user from config.
    cfg = ConfigObj(os.path.join(pce_root, 'bin', 'onramp_pce_config.cfg'),
                    configspec=os.path.join(pce_root, 'src', 'configspecs',
                                            'onramp_pce_config.cfgspec'))
    cfg.validate(Validator())
    scheduler = Scheduler(cfg['cluster']['batch_scheduler'])

    # Write batch script.
    with open('script.sh', 'w') as f:
        f.write(scheduler.get_batch_script(run_name))

    # Schedule job.
    result = scheduler.schedule(run_dir)

    if result['status_code'] != 0:
        _logger.error(result['msg'])
        with JobState(job_id) as job_state:
            job_state['state'] = 'Schedule failed'
            job_state['error'] = result['msg']
            os.chdir(ret_dir)
            if job_state['_marked_for_del']:
                _delete_job(job_state)
                return (-2, 'Job %d deleted' % job_id)
        return (result['returncode'], result['msg'])
    
    with JobState(job_id) as job_state:
        job_state['state'] = 'Scheduled'
        job_state['error'] = None
        job_state['scheduler_job_num'] = result['job_num']
        os.chdir(ret_dir)
        if job_state['_marked_for_del']:
            _delete_job(job_state)
            return (-2, 'Job %d deleted' % job_id)

    return (0, 'Job scheduled')
コード例 #7
0
def _build_job(job_id, job_state_file=None):
    """Launch actions required to maintain job state and/or currate job results
    and return the state.
    When current job state (as a function of both PCE state tracking and
    scheduler output) warrants, initiate job postprocessing and/or status
    checking prior to building and returning state.
    Args:
        job_id (int): Id of the job to get state for.
    Returns:
        OnRamp formatted dictionary containing job attrs.
    """
    status_check_states = ['Scheduled', 'Queued', 'Running']
    with JobState(job_id, job_state_file) as job_state:
        _logger.debug('Building at %s' % time.time())
        if 'state' not in job_state.keys():
            _logger.debug('No state at %s' % time.time())
            _logger.debug('job_state keys: %s' % job_state.keys())
            return {}

        if job_state['state'] in status_check_states:
            specfile = os.path.join(pce_root, 'src', 'configspecs',
                                    'onramp_pce_config.cfgspec')
            cfg = ConfigObj(os.path.join(pce_root, 'bin',
                                         'onramp_pce_config.cfg'),
                            configspec=specfile)
            cfg.validate(Validator())
            scheduler = Scheduler(cfg['cluster']['batch_scheduler'])
            sched_job_num = job_state['scheduler_job_num']
            job_status = scheduler.check_status(sched_job_num)

            # Bad.
            if job_status[0] != 0:
                _logger.debug('Bad job status: %s' % job_status[1])
                job_state['state'] = 'Run failed'
                job_state['error'] = job_status[1]
                if job_status[0] != -2:
                    job_state['state'] = job_status[1]
                if job_state['_marked_for_del']:
                    _delete_job(job_state)
                    # FIXME: This might cause trouble. About to return {}.
                    return copy.deepcopy(job_state)
                return copy.deepcopy(job_state)

            # Good.
            if job_status[1] in ['Done', 'No info']:
                job_state['state'] = 'Postprocessing'
                if job_state['_marked_for_del']:
                    _delete_job(job_state)
                    # FIXME: This might cause trouble. About to return {}.
                    return copy.deepcopy(job_state)
                job_state['error'] = None
                job_state['mod_status_output'] = None
                p = Process(target=job_postprocess,
                            args=(job_id, job_state_file))
                p.start()
            elif job_status[1] == 'Running':
                job_state['state'] = 'Running'
                job_state['error'] = None
                if job_state['_marked_for_del']:
                    _delete_job(job_state)
                    # FIXME: This might cause trouble. About to return {}.
                    return copy.deepcopy(job_state)
                run_dir = job_state['run_dir']
                mod_status_output = _get_module_status_output(run_dir)
                job_state['mod_status_output'] = mod_status_output
            elif job_status[1] == 'Queued':
                job_state['state'] = 'Queued'
                job_state['error'] = None
                if job_state['_marked_for_del']:
                    _delete_job(job_state)
                    # FIXME: This might cause trouble. About to return {}.
                    return copy.deepcopy(job_state)

        job = copy.deepcopy(job_state)

    if job['state'] in ['Launch failed', 'Setting up launch']:
        return job

    # Build visible files.
    _logger.debug('job state: %s' % str(job))
    dir_args = (job['username'], job['mod_name'], job['mod_id'],
                job['run_name'])
    run_dir = os.path.join(pce_root, 'users/%s/%s_%d/%s' % dir_args)
    cfg_file = os.path.join(run_dir, 'config/onramp_metadata.cfg')
    try:
        conf = ConfigObj(cfg_file, file_error=True)
    except (IOError, SyntaxError):
        # Badly formed or non-existant config/onramp_metadata.cfg.
        _logger.debug('Bad metadata')
        _logger.debug(cfg_file)
        return job

    if 'onramp' in conf.keys() and 'visible' in conf['onramp'].keys():
        globs = conf['onramp']['visible']
        if isinstance(globs, basestring):
            # Globs is only a single string. Convert to list.
            globs = [globs]
    else:
        globs = []

    ret_dir = os.getcwd()
    os.chdir(run_dir)
    filenames = [name for name in chain.from_iterable(map(glob.glob, globs))]

    prefix = os.path.join(pce_root, 'users') + '/'
    url_prefix = run_dir.split(prefix)[1]

    job['visible_files'] = [{
        'name':
        filename,
        'size':
        os.path.getsize(os.path.join(run_dir, filename)),
        'url':
        os.path.join('files', os.path.join(url_prefix, filename))
    } for filename in filenames]
    os.chdir(ret_dir)

    return job
コード例 #8
0
ファイル: jobs.py プロジェクト: elise-baumgartner/onramp
def job_run(job_id, job_state_file=None):
    # Determine batch scheduler to user from config.
    cfg = ConfigObj(os.path.join(pce_root, 'bin', 'onramp_pce_config.cfg'),
                    configspec=os.path.join(pce_root, 'src', 'configspecs',
                                            'onramp_pce_config.cfgspec'))
    cfg.validate(Validator())
    scheduler = Scheduler(cfg['cluster']['batch_scheduler'])

    _logger.debug("in job_run: trying to launch using scheduler %s", cfg['cluster']['batch_scheduler'])
    #ret_dir = os.getcwd()
    with JobState(job_id, job_state_file) as job_state:
        run_dir = job_state['run_dir']
        run_name = job_state['run_name']
    os.chdir(run_dir)

    #_logger.debug("in job_run: attempting to be in %s, really in %s", run_dir, os.get_cwd())
    # Load run params:
    run_np = None
    run_nodes = None
    run_cfg = ConfigObj('onramp_runparams.cfg')
    if 'onramp' in run_cfg.keys():
        if 'np' in run_cfg['onramp']:
            run_np = run_cfg['onramp']['np']
        if 'nodes' in run_cfg['onramp']:
            run_nodes = run_cfg['onramp']['nodes']

    _logger.debug("in job_run: loaded params np: %d and nodes: %d", run_np, run_nodes)
    # Write batch script.
    with open('script.sh', 'w') as f:
        if run_np and run_nodes:
            f.write(scheduler.get_batch_script(run_name, numtasks=run_np,
                    num_nodes=run_nodes))
        elif run_np:
            f.write(scheduler.get_batch_script(run_name, numtasks=run_np))
        elif run_nodes:
            f.write(scheduler.get_batch_script(run_name, num_nodes=run_nodes))
        else:
            f.write(scheduler.get_batch_script(run_name))

    # Schedule job.
    result = scheduler.schedule(run_dir)

    if result['status_code'] != 0:
        _logger.error(result['msg'])
        with JobState(job_id, job_state_file) as job_state:
            job_state['state'] = 'Schedule failed'
            job_state['error'] = result['msg']
            os.chdir(ret_dir)
            if job_state['_marked_for_del']:
                _delete_job(job_state)
                return (-2, 'Job %d deleted' % job_id)
        return (result['returncode'], result['msg'])
    
    with JobState(job_id, job_state_file) as job_state:
        job_state['state'] = 'Scheduled'
        job_state['error'] = None
        job_state['scheduler_job_num'] = result['job_num']
        os.chdir(ret_dir)
        if job_state['_marked_for_del']:
            _delete_job(job_state)
            return (-2, 'Job %d deleted' % job_id)

    return (0, 'Job scheduled')