Beispiel #1
0
def cleanup_execution(
        config,
        credentials,
        skip,
        simulate,
        **kwargs):  # pylint: disable=W0613
    """ Cleans execution working directory """
    if skip:
        return

    ctx.logger.info('Cleaning up...')
    if not simulate:
        workdir = ctx.instance.runtime_properties['workdir']
        wm_type = config['workload_manager']
        wm = WorkloadManager.factory(wm_type)
        if not wm:
            raise NonRecoverableError(
                "Workload Manager '" +
                wm_type +
                "' not supported.")

        if 'credentials' in ctx.instance.runtime_properties:
            credentials = ctx.instance.runtime_properties['credentials']
        client = SshClient(credentials)
        client.execute_shell_command(
            'rm -r ' + workdir,
            wait_result=True)
        client.close_connection()
        ctx.logger.info('..all clean.')
    else:
        ctx.logger.warning('clean up simulated.')
Beispiel #2
0
def configure_execution(
        config,
        credentials,
        base_dir,
        workdir_prefix,
        simulate,
        **kwargs):  # pylint: disable=W0613
    """ Creates the working directory for the execution """
    ctx.logger.info('Connecting to workload manager..')
    if not simulate:
        wm_type = config['workload_manager']
        ctx.logger.info(' - manager: {wm_type}'.format(wm_type=wm_type))

        wm = WorkloadManager.factory(wm_type)
        if not wm:
            raise NonRecoverableError(
                "Workload Manager '" +
                wm_type +
                "' not supported.")

        if 'credentials' in ctx.instance.runtime_properties:
            credentials = ctx.instance.runtime_properties['credentials']
        try:
            client = SshClient(credentials)
        except Exception as exp:
            raise NonRecoverableError(
                "Failed trying to connect to workload manager: " + str(exp))

        # TODO: use command according to wm
        _, exit_code = client.execute_shell_command(
            'uname',
            wait_result=True)

        if exit_code is not 0:
            client.close_connection()
            raise NonRecoverableError(
                "Failed executing on the workload manager: exit code " +
                str(exit_code))

        ctx.instance.runtime_properties['login'] = exit_code is 0

        prefix = workdir_prefix
        if workdir_prefix is "":
            prefix = ctx.blueprint.id

        workdir = wm.create_new_workdir(client, base_dir, prefix, ctx.logger)
        client.close_connection()
        if workdir is None:
            raise NonRecoverableError(
                "failed to create the working directory, base dir: " +
                base_dir)
        ctx.instance.runtime_properties['workdir'] = workdir
        ctx.logger.info('..workload manager ready to be used on ' + workdir)
    else:
        ctx.logger.info(' - [simulation]..')
        ctx.instance.runtime_properties['login'] = True
        ctx.instance.runtime_properties['workdir'] = "simulation"
        ctx.logger.warning('Workload manager connection simulated')
def send_job(job_options, **kwargs):  # pylint: disable=W0613
    """ Sends a job to the workload manager """
    ctx.logger.info('TASKS.PY::SEND_JOB L455')
    simulate = ctx.instance.runtime_properties['simulate']

    name = kwargs['name']
    is_singularity = 'hpc.nodes.SingularityJob' in ctx.node.\
        type_hierarchy

    if not simulate:
        workdir = ctx.instance.runtime_properties['workdir']
        wm_type = ctx.instance.runtime_properties['workload_manager']

        if wm_type != 'K8S':
            client = SshClient(ctx.instance.runtime_properties['credentials'])
        else:
            client = None

        wm = WorkloadManager.factory(wm_type)
        if not wm:
            if wm_type != 'K8S':
                client.close_connection()
            raise NonRecoverableError(
                "Workload Manager '" +
                wm_type +
                "' not supported.")
        context_vars = {
            'CFY_EXECUTION_ID': ctx.execution_id,
            'CFY_JOB_NAME': name
        }
        is_submitted = wm.submit_job(client,
                                     name,
                                     job_options,
                                     is_singularity,
                                     ctx.logger,
                                     workdir=workdir,
                                     context=context_vars)

	if wm_type != 'K8S':
	    client.close_connection()
    else:
        ctx.logger.warning('Instance ' + ctx.instance.id + ' simulated')
        is_submitted = True

    if is_submitted:
        ctx.logger.info('Job ' + name + ' (' + ctx.instance.id + ') sent.')
    else:
        ctx.logger.error(
            'Job ' + name + ' (' + ctx.instance.id + ') not sent.')
        raise NonRecoverableError(
            'Job ' + name + ' (' + ctx.instance.id + ') not sent.')

    ctx.instance.runtime_properties['job_name'] = name

    ctx.logger.info('TASKS.PY::SEND_JOB *** END *** L517')
def cleanup_job(job_options, skip, **kwargs):  # pylint: disable=W0613
    ctx.logger.info('TASKS.PY::PRECONFIGURE_WM L503')
    """Clean the aux files of the job"""
    if skip:
        return

    try:
        simulate = ctx.instance.runtime_properties['simulate']
    except KeyError:
        # The job wasn't configured properly, so no cleanup needed
        ctx.logger.warning('Job was not cleaned up as it was not configured.')

    try:
        name = kwargs['name']
        if not simulate:
            is_singularity = 'hpc.nodes.SingularityJob' in ctx.node.\
                type_hierarchy
            workdir = ctx.instance.runtime_properties['workdir']
            wm_type = ctx.instance.runtime_properties['workload_manager']

            client = SshClient(ctx.instance.runtime_properties['credentials'])

            wm = WorkloadManager.factory(wm_type)
            if not wm:
                client.close_connection()
                raise NonRecoverableError(
                    "Workload Manager '" +
                    wm_type +
                    "' not supported.")
            is_clean = wm.clean_job_aux_files(client,
                                              name,
                                              job_options,
                                              is_singularity,
                                              ctx.logger,
                                              workdir=workdir)

            client.close_connection()
        else:
            ctx.logger.warning('Instance ' + ctx.instance.id + ' simulated')
            is_clean = True

        if is_clean:
            ctx.logger.info(
                'Job ' + name + ' (' + ctx.instance.id + ') cleaned.')
        else:
            ctx.logger.error('Job ' + name + ' (' + ctx.instance.id +
                             ') not cleaned.')
    except Exception as exp:
        print(traceback.format_exc())
        ctx.logger.error(
            'Something happend when trying to clean up: ' + exp.message)

    ctx.logger.info('TASKS.PY::PRECONFIGURE_WM L573')
def stop_job(job_options, **kwargs):  # pylint: disable=W0613
    ctx.logger.info('TASKS.PY::STOP_JOB L557')
    """ Stops a job in the workload manager """
    try:
        simulate = ctx.instance.runtime_properties['simulate']
    except KeyError:
        # The job wasn't configured properly, no need to be stopped
        ctx.logger.warning('Job was not stopped as it was not configured.')

    try:
        name = kwargs['name']
        is_singularity = 'hpc.nodes.SingularityJob' in ctx.node.\
            type_hierarchy

        if not simulate:
            workdir = ctx.instance.runtime_properties['workdir']
            wm_type = ctx.instance.runtime_properties['workload_manager']
            client = SshClient(ctx.instance.runtime_properties['credentials'])

            wm = WorkloadManager.factory(wm_type)
            if not wm:
                client.close_connection()
                raise NonRecoverableError(
                    "Workload Manager '" +
                    wm_type +
                    "' not supported.")
            is_stopped = wm.stop_job(client,
                                     name,
                                     job_options,
                                     is_singularity,
                                     ctx.logger,
                                     workdir=workdir)

            client.close_connection()
        else:
            ctx.logger.warning('Instance ' + ctx.instance.id + ' simulated')
            is_stopped = True

        if is_stopped:
            ctx.logger.info(
                'Job ' + name + ' (' + ctx.instance.id + ') stopped.')
        else:
            ctx.logger.error('Job ' + name + ' (' + ctx.instance.id +
                             ') not stopped.')
            raise NonRecoverableError('Job ' + name + ' (' + ctx.instance.id +
                                      ') not stopped.')
    except Exception as exp:
        print(traceback.format_exc())
        ctx.logger.error(
            'Something happend when trying to stop: ' + exp.message)

    ctx.logger.info('TASKS.PY::STOP_JOB L628')
Beispiel #6
0
def deploy_job(script,
               inputs,
               credentials,
               wm_type,
               workdir,
               name,
               logger,
               skip_cleanup):  # pylint: disable=W0613
    """ Exec a deployment job script that receives SSH credentials as input """

    wm = WorkloadManager.factory(wm_type)
    if not wm:
        raise NonRecoverableError(
            "Workload Manager '" +
            wm_type +
            "' not supported.")

    # Execute the script and manage the output
    success = False
    client = SshClient(credentials)
    if wm._create_shell_script(client,
                               name,
                               ctx.get_resource(script),
                               logger,
                               workdir=workdir):
        call = "./" + name
        for dinput in inputs:
            str_input = str(dinput)
            if ('\n' in str_input or ' ' in str_input) and str_input[0] != '"':
                call += ' "' + str_input + '"'
            else:
                call += ' ' + str_input
        _, exit_code = client.execute_shell_command(
            call,
            workdir=workdir,
            wait_result=True)
        if exit_code is not 0:
            logger.warning(
                "failed to deploy job: call '" + call + "', exit code " +
                str(exit_code))
        else:
            success = True

        if not skip_cleanup:
            if not client.execute_shell_command(
                    "rm " + name,
                    workdir=workdir):
                logger.warning("failed removing bootstrap script")

    client.close_connection()

    return success
def cleanup_execution(
        config,
        credentials,
        skip,
        simulate,
        **kwargs):  # pylint: disable=W0613
    """ Cleans execution working directory """
    ctx.logger.info('TASKS.PY::CLEANUP_EXECUTION L148')
    if skip:
        ctx.logger.info('TASKS.PY::CLEANUP_EXECUTION *** SKIP *** L154')
        return

    ctx.logger.info('Cleaning up...')
    if not simulate:
        workdir = ctx.instance.runtime_properties['workdir']
        wm_type = config['workload_manager']
        wm = WorkloadManager.factory(wm_type)
        if not wm:
            raise NonRecoverableError(
                "Workload Manager '" +
                wm_type +
                "' not supported.")

        if 'credentials' in ctx.instance.runtime_properties:
            credentials = ctx.instance.runtime_properties['credentials']

        if wm_type != 'K8S':
            client = SshClient(credentials)
            client.execute_shell_command(
                'rm -r ' + workdir,
                wait_result=True)
            client.close_connection()
        else:
            try:
                os.rmdir(workdir)
            except Exception as exp:
                print(traceback.format_exc())
                ctx.logger.error(
                    'Something happend when trying to clean up: ' + 
                    exp.message)

        ctx.logger.info('..all clean.')
    else:
        ctx.logger.warning('clean up simulated.')

    ctx.logger.info('TASKS.PY::CLEANUP_EXECUTION *** END *** L179')
Beispiel #8
0
 def __init__(self, methodName='runTest'):
     super(TestTorque, self).__init__(methodName)
     self.wm = WorkloadManager.factory("TORQUE")
     self.logger = logging.getLogger('TestTorque')
 def __init__(self, methodName='runTest'):
     super(TestSlurm, self).__init__(methodName)
     self.wm = WorkloadManager.factory("SLURM")
     self.logger = logging.getLogger('TestSlurm')
Beispiel #10
0
 def __init__(self, methodName='runTest'):
     super(TestK8s, self).__init__(methodName)
     self.wm = WorkloadManager.factory("K8S")
     self.logger = logging.getLogger('TestK8s')