Example #1
0
    def stop_job(self,
                 ssh_client,
                 name,
                 job_options,
                 is_singularity,
                 logger,
                 workdir=None):
        """
        Stops a job from the HPC

        @type ssh_client: SshClient
        @param ssh_client: ssh client connected to an HPC login node
        @type name: string
        @param name: name of the job
        @type job_settings: dictionary
        @param job_settings: dictionary with the job options
        @type is_singularity: bool
        @param is_singularity: True if the job is in a container
        @rtype string
        @return Slurm's job name stopped. None if an error arise.
        """
        if not SshClient.check_ssh_client(ssh_client, logger):
            return False

        if job_options['type'] == "SPARK":
            call = self._build_job_cancellation_call(name, ssh_client, logger)
        else:
            call = self._build_job_cancellation_call(name, job_options, logger)
        if call is None:
            return False

        if not SshClient.check_ssh_client(ssh_client, logger):
            return False
        return ssh_client.execute_shell_command(call, workdir=workdir)
Example #2
0
    def _upload_data(self):
        if not self.dataset_info['package_id']:
            self._create_dataset()
        ssh_credentials = self.from_infra['credentials']

        filepath = self.dt_config['from_source']['filepath']
        workdir = self.from_infra['workdir']

        action = 'update' if self._resource_exists() else 'create'
        command = 'curl {0}/api/action/resource_{1}'.format(
            self.endpoint, action)
        command += ' --form upload=@{0}'.format(filepath)
        command += ' --form package_id={0}'.format(
            self.dataset_info['package_id'])

        for arg in self.ckan_resource:
            if self.ckan_resource[arg]:
                command += ' --form {0}={1}'.format(arg,
                                                    self.ckan_resource[arg])

        if self.apikey:
            command += " -H 'Authorization: {0}'".format(self.apikey)

        ssh_client = SshClient(ssh_credentials)
        exit_code, exit_msg = ssh_client.execute_shell_command(
            command, workdir, wait_result=True)
        if exit_code != 0:
            self.logger.error(
                'There was a problem publishing the results in CKAN ({0}):\n{1}'
                .format(exit_code, exit_msg))
        else:
            self.logger.info('Data published in CKAN')
Example #3
0
    def get_states(self, credentials, job_names):

        monitor_start_time_str = start_time_tostr(self.monitor_start_time, self.timezone)

        call = "sacct -n -o JobName,State -X -P --name=" + ','.join(job_names) + " -S " + monitor_start_time_str

        client = SshClient(credentials)

        output, exit_code = client.execute_shell_command(call, workdir=self.workdir, wait_result=True)
        states = {}
        if exit_code == 0:
            states = _parse_states(output, self.logger)
        else:
            self.logger.error("Failed to get job states: " + output)

        # Get job execution audits for monitoring metrics
        audits = {}
        for name in job_names:
            if name in states:
                if states[name] != 'PENDING':
                    audits[name] = get_job_metrics(name, client, self.workdir, monitor_start_time_str, self.logger)
            else:
                self.logger.warning("Could not parse the state of job: " + name + "Parsed dict:" + str(states))

        client.close_connection()

        return states, audits
Example #4
0
    def clean_job_aux_files(self,
                            ssh_client,
                            name,
                            job_options,
                            is_singularity,
                            logger,
                            workdir=None):
        """
        Cleans no more needed job files in the HPC

        @type ssh_client: SshClient
        @param ssh_client: ssh client connected to an HPC login node
        @type name: string
        @param name: name of the job
        @type job_settings: dictionary
        @param job_settings: dictionary with the job options
        @type is_singularity: bool
        @param is_singularity: True if the job is in a container
        @rtype string
        @return Slurm's job name stopped. None if an error arise.
        """
        if not SshClient.check_ssh_client(ssh_client, logger):
            return False

        if is_singularity:
            return ssh_client.execute_shell_command("rm " + name + ".script",
                                                    workdir=workdir)
        return True
Example #5
0
    def get_states(self, credentials, job_names):
        call = "cat croupier-monitor.dat"

        client = SshClient(credentials)

        output, exit_code = client.execute_shell_command(call,
                                                         workdir=self.workdir,
                                                         wait_result=True)

        client.close_connection()

        states = {}
        audits = {}
        if exit_code == 0:
            states = self._parse_states(output)
        for job_name in job_names:
            audits[job_name] = {}

        return states, audits
Example #6
0
    def get_states(self, workdir, credentials, job_names, logger):
        # TODO set start time of consulting
        # (sacct only check current day)
        call = "cat croupier-monitor.data"

        client = SshClient(credentials)

        output, exit_code = client.execute_shell_command(
            call,
            workdir=workdir,
            wait_result=True)

        client.close_connection()

        states = {}
        if exit_code == 0:
            states = self._parse_states(output, logger)

        return states
Example #7
0
    def get_states(self, workdir, credentials, job_names, logger):
        # TODO set start time of consulting
        # (sacct only check current day)
        call = "sacct -n -o JobName,State -X -P --name=" + ','.join(job_names)

        client = SshClient(credentials)

        output, exit_code = client.execute_shell_command(call,
                                                         workdir=workdir,
                                                         wait_result=True)

        client.close_connection()

        states = {}
        if exit_code == 0:
            states = self._parse_states(output, logger)
        else:
            logger.warning("Failed to get states")

        return states
Example #8
0
    def get_states(self, workdir, credentials, job_names, logger):
        states = {}
        frameinfo = getframeinfo(currentframe())
        logger.debug("{2}: {0} - {1}".format(frameinfo.filename,
                                             frameinfo.lineno,
                                             frameinfo.function))
        call = "curl http://{0}:`cat /security/secrets/{0}.mesos" + \
            "`@localhost:5050/frameworks"

        for i in range(5):
            try:
                client = SshClient(credentials)
                user = client._user
            except AuthenticationException as ae:
                logger.debug(ae)
                import time
                time.sleep(5)
                continue

        call_format = call.format(user)
        logger.debug("{2}: cal_fmt: {0}, usr: {1}".format(call_format,
                                                          user,
                                                          frameinfo.function))

        output, exit_code = client.execute_shell_command(call_format,
                                                         workdir=workdir,
                                                         wait_result=True)
        if exit_code == 0:
            json_output = json.loads(output)
            states = self._parse_frameworks_states(json_output,
                                                   job_names[0], logger)
        else:
            logger.warning("failed to get states from {0}".format(
                call_format))

        logger.debug("{0}: job_state:{1}".format(frameinfo.function,
                                                 states))
        client.close_connection()
        return states
Example #9
0
def configure_execution(config, credentials, base_dir, workdir_prefix,
                        simulate, **kwargs):  # pylint: disable=W0613
    """ Creates the working directory for the execution """
    ctx.logger.info('Connecting to infrastructure interface..')
    if not simulate:
        if 'infrastructure_interface' not in config:
            raise NonRecoverableError(
                "'infrastructure_interface' key missing on config")
        interface_type = config['infrastructure_interface']
        ctx.logger.info(' - manager: {interface_type}'.format(
            interface_type=interface_type))

        wm = InfrastructureInterface.factory(interface_type)
        if not wm:
            raise NonRecoverableError("Infrastructure Interface '" +
                                      interface_type + "' not supported.")

        if 'credentials' in ctx.instance.runtime_properties:
            credentials = ctx.instance.runtime_properties['credentials']
        try:
            client = SshClient(credentials)
        except Exception as exp:
            raise NonRecoverableError(
                "Failed trying to connect to infrastructure interface: " +
                str(exp))

        # TODO: use command according to wm
        _, exit_code = client.execute_shell_command('uname', wait_result=True)

        if exit_code != 0:
            client.close_connection()
            raise NonRecoverableError(
                "Failed executing on the infrastructure: exit code " +
                str(exit_code))

        ctx.instance.runtime_properties['login'] = exit_code == 0

        prefix = workdir_prefix
        if workdir_prefix == "":
            prefix = ctx.blueprint.id

        workdir = wm.create_new_workdir(client, base_dir, prefix, ctx.logger)
        client.close_connection()
        if workdir is None:
            raise NonRecoverableError(
                "failed to create the working directory, base dir: " +
                base_dir)
        ctx.instance.runtime_properties['workdir'] = workdir
        ctx.logger.info('..infrastructure ready to be used on ' + workdir)
    else:
        ctx.logger.info(' - [simulation]..')
        ctx.instance.runtime_properties['login'] = True
        ctx.instance.runtime_properties['workdir'] = "simulation"
        ctx.logger.warning('Infrastructure Interface connection simulated')
Example #10
0
    def _get_states_detailed(self, job_names):
        """
        Get job states by job names

        This function uses `qstat` command to query PBSPro.
        Please don't launch this call very frequently. Polling it
        frequently, especially across all users on the cluster,
        will slow down response times and may bring
        scheduling to a crawl.

        It allows to a precise mapping of Torque states to
        Slurm states by taking into account `exit_code`.
        Unlike `get_states_tabular` it parses output on host
        and uses several SSH commands.
        """
        # identify job ids
        # Read environment, required by some HPC (e.g. HLRS Hawk)
        read_environment = "source /etc/profile > /dev/null 2>&1; "
        call = read_environment + "echo {} | xargs -n 1 qselect -x -N".format(
            shlex_quote(' '.join(map(shlex_quote, job_names))))

        client = SshClient(self.credentials)

        output, exit_code = client.execute_shell_command(call,
                                                         workdir=self.workdir,
                                                         wait_result=True)
        job_ids = Pbspro._parse_qselect(output)
        if not job_ids:
            return {}

        # get detailed information about jobs
        call = read_environment + "qstat -x -f {}".format(' '.join(
            map(str, job_ids)))

        output, exit_code = client.execute_shell_command(call,
                                                         workdir=self.workdir,
                                                         wait_result=True)
        client.close_connection()
        try:
            job_states, audits = Pbspro._parse_qstat_detailed(output)
        except SyntaxError as e:
            self.logger.warning(
                "cannot parse state response for job ids=[{}]".format(','.join(
                    map(str, job_ids))))
            self.logger.warning(
                "{err}\n`qstat -x -f` output to parse:\n\\[\n{text}\n\\]".
                format(err=str(e), text=output))
            # TODO: think whether error ignoring is better
            #       for the correct lifecycle
            raise e

        return job_states, audits
    def publish(self, ssh_client, logger, workdir=None):
        """
        Publish the local file in the external repository

        @type ssh_client: SshClient
        @param ssh_client: ssh client connected to an HPC login node
        @rtype string
        @return False if something went wrong
        """
        if not SshClient.check_ssh_client(ssh_client, logger):
            return False

        call = self._build_publish_call(logger)
        if call is None:
            return False

        return ssh_client.execute_shell_command(call,
                                                workdir=workdir,
                                                wait_result=False)
Example #12
0
def cleanup_job(job_options, skip, **kwargs):  # pylint: disable=W0613
    """Clean the aux files of the job"""
    if skip:
        return

    try:
        simulate = ctx.instance.runtime_properties['simulate']
    except KeyError:
        # The job wasn't configured properly, so no cleanup needed
        ctx.logger.warning('Job was not cleaned up as it was not configured.')

    try:
        name = kwargs['name']
        if not simulate:
            is_singularity = 'croupier.nodes.SingularityJob' in ctx.node.\
                type_hierarchy
            workdir = ctx.instance.runtime_properties['workdir']
            interface_type = ctx.instance.runtime_properties[
                'infrastructure_interface']

            client = SshClient(ctx.instance.runtime_properties['credentials'])

            wm = InfrastructureInterface.factory(interface_type)
            if not wm:
                client.close_connection()
                raise NonRecoverableError("Infrastructure Interface '" +
                                          interface_type + "' not supported.")
            is_clean = wm.clean_job_aux_files(client,
                                              name,
                                              job_options,
                                              is_singularity,
                                              ctx.logger,
                                              workdir=workdir)

            client.close_connection()
        else:
            ctx.logger.warning('Instance ' + ctx.instance.id + ' simulated')
            is_clean = True

        if is_clean:
            ctx.logger.info('Job ' + name + ' (' + ctx.instance.id +
                            ') cleaned.')
        else:
            ctx.logger.error('Job ' + name + ' (' + ctx.instance.id +
                             ') not cleaned.')
    except Exception as exp:
        print(traceback.format_exc())
        ctx.logger.error('Something happend when trying to clean up: ' +
                         exp.message)
Example #13
0
def stop_job(job_options, **kwargs):  # pylint: disable=W0613
    """ Stops a job in the infrastructure """
    try:
        simulate = ctx.instance.runtime_properties['simulate']
    except KeyError:
        # The job wasn't configured properly, no need to be stopped
        ctx.logger.warning('Job was not stopped as it was not configured.')

    try:
        name = kwargs['name']
        is_singularity = 'croupier.nodes.SingularityJob' in ctx.node.\
            type_hierarchy

        if not simulate:
            workdir = ctx.instance.runtime_properties['workdir']
            interface_type = ctx.instance.runtime_properties[
                'infrastructure_interface']
            client = SshClient(ctx.instance.runtime_properties['credentials'])

            wm = InfrastructureInterface.factory(interface_type)
            if not wm:
                client.close_connection()
                raise NonRecoverableError("Infrastructure Interface '" +
                                          interface_type + "' not supported.")
            is_stopped = wm.stop_job(client,
                                     name,
                                     job_options,
                                     is_singularity,
                                     ctx.logger,
                                     workdir=workdir)

            client.close_connection()
        else:
            ctx.logger.warning('Instance ' + ctx.instance.id + ' simulated')
            is_stopped = True

        if is_stopped:
            ctx.logger.info('Job ' + name + ' (' + ctx.instance.id +
                            ') stopped.')
        else:
            ctx.logger.error('Job ' + name + ' (' + ctx.instance.id +
                             ') not stopped.')
            raise NonRecoverableError('Job ' + name + ' (' + ctx.instance.id +
                                      ') not stopped.')
    except Exception as exp:
        print(traceback.format_exc())
        ctx.logger.error('Something happend when trying to stop: ' +
                         exp.message)
Example #14
0
def send_job(job_options, **kwargs):  # pylint: disable=W0613
    """ Sends a job to the infrastructure interface """
    simulate = ctx.instance.runtime_properties['simulate']

    name = kwargs['name']
    is_singularity = 'croupier.nodes.SingularityJob' in ctx.node.\
        type_hierarchy

    if not simulate:
        workdir = ctx.instance.runtime_properties['workdir']
        interface_type = ctx.instance.runtime_properties[
            'infrastructure_interface']
        client = SshClient(ctx.instance.runtime_properties['credentials'])

        wm = InfrastructureInterface.factory(interface_type)
        if not wm:
            client.close_connection()
            raise NonRecoverableError("Infrastructure Interface '" +
                                      interface_type + "' not supported.")
        context_vars = {
            'CFY_EXECUTION_ID': ctx.execution_id,
            'CFY_JOB_NAME': name
        }
        is_submitted = wm.submit_job(client,
                                     name,
                                     job_options,
                                     is_singularity,
                                     ctx.logger,
                                     workdir=workdir,
                                     context=context_vars)
        client.close_connection()
    else:
        ctx.logger.warning('Instance ' + ctx.instance.id + ' simulated')
        is_submitted = True

    if is_submitted:
        ctx.logger.info('Job ' + name + ' (' + ctx.instance.id + ') sent.')
    else:
        ctx.logger.error('Job ' + name + ' (' + ctx.instance.id +
                         ') not sent.')
        raise NonRecoverableError('Job ' + name + ' (' + ctx.instance.id +
                                  ') not sent.')

    ctx.instance.runtime_properties['job_name'] = name
Example #15
0
def publish(publish_list, **kwargs):
    """ Publish the job outputs """
    try:
        simulate = ctx.instance.runtime_properties['simulate']
    except KeyError as exp:
        # The job wasn't configured properly, no need to publish
        ctx.logger.warning('Job outputs where not published as' +
                           ' the job was not configured properly.')
        return

    try:
        name = kwargs['name']
        published = True
        if not simulate:
            workdir = ctx.instance.runtime_properties['workdir']
            client = SshClient(ctx.instance.runtime_properties['credentials'])

            for publish_item in publish_list:
                if not published:
                    break
                exrep = ExternalRepository.factory(publish_item)
                if not exrep:
                    client.close_connection()
                    raise NonRecoverableError("External repository '" +
                                              publish_item['dataset']['type'] +
                                              "' not supported.")
                published = exrep.publish(client, ctx.logger, workdir)

            client.close_connection()
        else:
            ctx.logger.warning('Instance ' + ctx.instance.id + ' simulated')

        if published:
            ctx.logger.info('Job ' + name + ' (' + ctx.instance.id +
                            ') published.')
        else:
            ctx.logger.error('Job ' + name + ' (' + ctx.instance.id +
                             ') not published.')
            raise NonRecoverableError('Job ' + name + ' (' + ctx.instance.id +
                                      ') not published.')
    except Exception as exp:
        print(traceback.format_exc())
        ctx.logger.error('Cannot publish: ' + exp.message)
Example #16
0
def deploy_job(script, inputs, credentials, wm_type, workdir, name, logger,
               skip_cleanup):  # pylint: disable=W0613
    """ Exec a deployment job script that receives SSH credentials as input """

    wm = WorkloadManager.factory(wm_type)
    if not wm:
        raise NonRecoverableError("Workload Manager '" + wm_type +
                                  "' not supported.")

    # Execute the script and manage the output
    success = False
    client = SshClient(credentials)
    if wm._create_shell_script(client,
                               name,
                               ctx.get_resource(script),
                               logger,
                               workdir=workdir):
        call = "./" + name
        for dinput in inputs:
            str_input = str(dinput)
            if ('\n' in str_input or ' ' in str_input) and str_input[0] != '"':
                call += ' "' + str_input + '"'
            else:
                call += ' ' + str_input
        _, exit_code = client.execute_shell_command(call,
                                                    workdir=workdir,
                                                    wait_result=True)
        if exit_code != 0:
            logger.warning("failed to deploy job: call '" + call +
                           "', exit code " + str(exit_code))
        else:
            success = True

        if not skip_cleanup:
            if not client.execute_shell_command("rm " + name, workdir=workdir):
                logger.warning("failed removing bootstrap script")

    client.close_connection()

    return success
Example #17
0
def cleanup_execution(config, credentials, skip, simulate, **kwargs):  # pylint: disable=W0613
    """ Cleans execution working directory """
    if skip:
        return

    ctx.logger.info('Cleaning up...')
    if not simulate:
        workdir = ctx.instance.runtime_properties['workdir']
        wm_type = config['workload_manager']
        wm = WorkloadManager.factory(wm_type)
        if not wm:
            raise NonRecoverableError("Workload Manager '" + wm_type +
                                      "' not supported.")

        if 'credentials' in ctx.instance.runtime_properties:
            credentials = ctx.instance.runtime_properties['credentials']
        client = SshClient(credentials)
        client.execute_shell_command('rm -r ' + workdir, wait_result=True)
        client.close_connection()
        ctx.logger.info('..all clean.')
    else:
        ctx.logger.warning('clean up simulated.')
Example #18
0
    def submit_job(self,
                   ssh_client,
                   name,
                   job_settings,
                   is_singularity,
                   logger,
                   workdir=None,
                   context=None):
        """
        Sends a job to the HPC

        @type ssh_client: SshClient
        @param ssh_client: ssh client connected to an HPC login node
        @type name: string
        @param name: name of the job
        @type job_settings: dictionary
        @param job_settings: dictionary with the job options
        @type is_singularity: bool
        @param is_singularity: True if the job is in a container
        @rtype string
        @param logger: Logger object to print log messages
        @rtype logger
        @param workdir: Path of the working directory of the job
        @rtype string
        @param context: Dictionary containing context env vars
        @rtype dictionary of strings
        @return Slurm's job name sent. None if an error arise.
        """
        if not SshClient.check_ssh_client(ssh_client, logger):
            return False

        # Build script if there is no one, or Singularity
        if 'script' not in job_settings or is_singularity:
            # generate script content
            if is_singularity:
                script_content = self._build_container_script(
                    name, job_settings, logger)
            else:
                script_content = self._build_script(name, job_settings, logger)

            if script_content is None:
                return False

            if not self._create_shell_script(ssh_client,
                                             name + ".script",
                                             script_content,
                                             logger,
                                             workdir=workdir):
                return False

            # @TODO: use more general type names (e.g., BATCH/INLINE, etc)
            settings = {"script": name + ".script"}

            if 'arguments' in job_settings:
                settings['arguments'] = job_settings['arguments']

            if 'scale' in job_settings:
                settings['scale'] = job_settings['scale']
                if 'scale_max_in_parallel' in job_settings:
                    settings['scale_max_in_parallel'] = \
                        job_settings['scale_max_in_parallel']
        else:
            settings = job_settings

        # build the call to submit the job
        response = self._build_job_submission_call(name, settings)

        if 'error' in response:
            logger.error("Couldn't build the call to send the job: " +
                         response['error'])
            return False

        # prepare the scale env variables
        if 'scale_env_mapping_call' in response:
            scale_env_mapping_call = response['scale_env_mapping_call']
            output, exit_code = ssh_client.execute_shell_command(
                scale_env_mapping_call, workdir=workdir, wait_result=True)
            if exit_code != 0:
                logger.error("Scale env vars mapping '" +
                             scale_env_mapping_call + "' failed with code " +
                             str(exit_code) + ":\n" + output)
                return False

        # submit the job
        call = response['call']

        output, exit_code = ssh_client.execute_shell_command(call,
                                                             env=context,
                                                             workdir=workdir,
                                                             wait_result=True)
        if exit_code != 0:
            logger.error("Job submission '" + call + "' exited with code " +
                         str(exit_code) + ":\n" + output)
            return False
        return True
Example #19
0
    def submit_job(self,
                   ssh_client,
                   name,
                   job_settings,
                   is_singularity,
                   logger,
                   workdir=None,
                   context=None):
        """
        Sends a job to the HPC

        @type ssh_client: SshClient
        @param ssh_client: ssh client connected to an HPC login node
        @type name: string
        @param name: name of the job
        @type job_settings: dictionary
        @param job_settings: dictionary with the job options
        @type is_singularity: bool
        @param is_singularity: True if the job is in a container
        @rtype string
        @param logger: Logger object to print log messages
        @rtype logger
        @param workdir: Path of the working directory of the job
        @rtype string
        @param context: Dictionary containing context env vars
        @rtype dictionary of strings
        @return Slurm's job name sent. None if an error arise.
        """
        if not SshClient.check_ssh_client(ssh_client, logger):
            return False

        if is_singularity:
            # generate script content for singularity
            script_content = self._build_container_script(
                name, job_settings, logger)
            if script_content is None:
                return False

            if not self._create_shell_script(ssh_client,
                                             name + ".script",
                                             script_content,
                                             logger,
                                             workdir=workdir):
                return False

            # @TODO: use more general type names (e.g., BATCH/INLINE, etc)
            settings = {"type": "SBATCH", "command": name + ".script"}

            if 'scale' in job_settings:
                settings['scale'] = job_settings['scale']
                if 'scale_max_in_parallel' in job_settings:
                    settings['scale_max_in_parallel'] = \
                        job_settings['scale_max_in_parallel']
        else:
            settings = job_settings

        # build the call to submit the job
        response = self._build_job_submission_call(name, settings, logger)

        if 'error' in response:
            logger.error("Couldn't build the call to send the job: " +
                         response['error'])
            return False

        # prepare the scale env variables
        if 'scale_env_mapping_call' in response:
            scale_env_mapping_call = response['scale_env_mapping_call']
            output, exit_code = ssh_client.execute_shell_command(
                scale_env_mapping_call, workdir=workdir, wait_result=True)
            if exit_code != 0:
                logger.error("Scale env vars mapping '" +
                             scale_env_mapping_call + "' failed with code " +
                             str(exit_code) + ":\n" + output)
                return False

        # submit the job
        call = response['call']
        if (settings['type'] == 'SPARK'):
            exit_code = ssh_client.execute_shell_command(call,
                                                         env=context,
                                                         workdir=workdir,
                                                         wait_result=False)
            if exit_code is True:
                exit_code = 0
            logger.debug("Job execution with exit code : " + str(exit_code))
            import time
            time.sleep(30)
        else:
            output, exit_code = ssh_client.execute_shell_command(
                call, env=context, workdir=workdir, wait_result=True)
        # if (job_settings['type'] == 'SPARK'):
        #    output, exit_code = ssh_client.execute_shell_command(   \
        #        call, env=context, workdir=workdir, wait_result=False)
        # else:
        #    output, exit_code = ssh_client.execute_shell_command(   \
        #        call, env=context, workdir=workdir, wait_result=True)
        if exit_code != 0:
            logger.error("Job submission '" + call + "' exited with code " +
                         str(exit_code) + ":\n" + output)
            return False

        # Job is successfully submitted, get the framework ID info
        # to manage the jobs in future
        # if (settings['type'] == 'SPARK'):
        # Parse output to get the framework ID
        #    framework_id = _parse_spark_output(output)
        # Store framework_id in each executables
        return True
Example #20
0
    def process_rsync_transfer(self, rsync_source_to_target):
        ssh_client = None
        ftp_client = None

        try:
            ctx.logger.info('Processing rsync data transfer')
            # Copy source data into target data by invoking rsync command at target data infrastructure Create rsync
            # command (check available credentials for target data infrastructure) If credential include
            # user/password, rsync command is: rsync -ratlz --rsh="/usr/bin/sshpass -p <passwd> ssh -o
            # StrictHostKeyChecking=no -o IdentitiesOnly=yes -l <user>" <source files to copy>  <HPC remote
            # server>:<target folder> If credential include user/key, rsync command is: rsync -ratlz -e "ssh -o
            # IdentitiesOnly=yes -i <key_file>"  <files to copy>  <user>@<HPC remote server>:<target folder> Copy key
            # in temporary file and destroy it (whatsoever) after usage (or failure) Invoke command in target
            # infrastructure

            dt_command = None

            # Source DS
            from_source_type = self.dt_config['from_source']['type']
            from_source_data_url = None
            if 'FileDataSource' in from_source_type:
                from_source_data_url = self.dt_config['from_source'][
                    'filepath']
            from_source_infra_endpoint = self.dt_config['from_source'][
                'located_at']['endpoint']
            from_source_infra_credentials = self.dt_config['from_source'][
                'located_at']['credentials']

            # Target DS
            to_target_type = self.dt_config['to_target']['type']
            to_target_data_url = None
            if 'FileDataSource' in to_target_type:
                to_target_data_url = self.dt_config['to_target']['filepath']
            to_target_infra_endpoint = self.dt_config['to_target'][
                'located_at']['endpoint']
            to_target_infra_credentials = self.dt_config['to_target'][
                'located_at']['credentials']

            if rsync_source_to_target:
                credentials = from_source_infra_credentials
            else:
                credentials = to_target_infra_credentials

            ssh_client = SshClient(credentials)
            ftp_client = SFtpClient(credentials)

            if rsync_source_to_target:
                if "user" in to_target_infra_credentials and "password" in to_target_infra_credentials:
                    # NOTE rsync authentication with username/password requires sshpass which it is not installed
                    # some HPC frontends
                    target_username = to_target_infra_credentials['user']
                    target_password = to_target_infra_credentials['password']
                    dt_command = 'rsync -ratlz --rsh="/usr/bin/sshpass -p {password} ssh -o StrictHostKeyChecking=no ' \
                                 '-o IdentitiesOnly=yes -l {username}" {ds_source}  {target_endpoint}:{ds_target}'\
                        .format(
                            username=target_username, password=target_password,
                            target_endpoint=to_target_infra_endpoint, ds_source=from_source_data_url,
                            ds_target=to_target_data_url
                        )
                elif "user" in to_target_infra_credentials and "private_key" in to_target_infra_credentials:
                    target_username = to_target_infra_credentials['user']
                    target_key = to_target_infra_credentials['private_key']
                    # Save key in temporary file
                    with tempfile.NamedTemporaryFile() as key_file:
                        key_file.write(bytes(target_key, 'utf-8'))
                        key_file.flush()
                        key_filepath = key_file.name
                        target_key_filepath = key_file.name.split('/')[-1]
                        # Transfer key_file
                        ftp_client.sendKeyFile(ssh_client, key_filepath,
                                               target_key_filepath)
                        dt_command = 'rsync -ratlz -e "ssh -o IdentitiesOnly=yes -o StrictHostKeyChecking=no -i ~/{key_file}" {ds_source} ' \
                                     '{username}@{target_endpoint}:{ds_target}'.format(
                                        username=target_username, key_file=target_key_filepath,
                                        target_endpoint=to_target_infra_endpoint,
                                        ds_source=from_source_data_url, ds_target=to_target_data_url
                                        )
            else:
                if "user" in from_source_infra_credentials and "password" in from_source_infra_credentials:
                    # NOTE rsync authentication with username/password requires sshpass which it is not installed
                    # some HPC frontends

                    source_username = from_source_infra_credentials['user']
                    source_password = from_source_infra_credentials['password']
                    dt_command = 'rsync -ratlz --rsh="/usr/bin/sshpass -p {password} ssh -o StrictHostKeyChecking=no ' \
                                 '-o IdentitiesOnly=yes -l {username}" {source_endpoint}:{ds_source} {ds_target}'\
                        .format(
                            username=source_username, password=source_password,
                            source_endpoint=from_source_infra_endpoint, ds_source=from_source_data_url,
                            ds_target=to_target_data_url
                        )
                elif "username" in from_source_infra_credentials and "private_key" in from_source_infra_credentials:
                    source_username = from_source_infra_credentials['user']
                    source_key = from_source_infra_credentials['private_key']
                    # Save key in temporary file
                    with tempfile.NamedTemporaryFile() as key_file:
                        key_file.write(bytes(source_key, 'utf-8'))
                        key_file.flush()
                        key_filepath = key_file.name
                        source_key_filepath = key_file.name.split('/')[-1]
                        # Transfer key_file
                        ftp_client.sendKeyFile(ssh_client, key_filepath,
                                               source_key_filepath)
                        dt_command = 'rsync -ratlz -e "ssh -o IdentitiesOnly=yes -o StrictHostKeyChecking=no -i ~/{key_file}" ' \
                                     '{username}@{source_endpoint}:{ds_source} {ds_target}'.format(
                                        username=source_username, key_file=source_key_filepath,
                                        source_endpoint=from_source_infra_endpoint,
                                        ds_source=from_source_data_url, ds_target=to_target_data_url
                                        )

            # Execute data transfer command
            ctx.logger.info(
                'rsync data transfer: executing command: {}'.format(
                    dt_command))
            exit_msg, exit_code = ssh_client.execute_shell_command(
                dt_command, wait_result=True)

            if exit_code != 0:
                raise CommandExecutionError(
                    "Failed executing rsync data transfer: exit code " +
                    str(exit_code) + " and msg: " + exit_msg)

        except Exception as exp:
            raise CommandExecutionError(
                "Failed trying to connect to data source infrastructure: " +
                str(exp))
        finally:
            ftp_client.close_connection()
            ssh_client.close_connection()
Example #21
0
    def process_http_transfer(self):

        try:
            ctx.logger.info(
                'Processing http data transfer from source {} to target {}'.
                format(self.dt_config['from_source']['name'],
                       self.dt_config['to_target']['name']))
            #  Copy source data into target data by invoking wget command at target data infrastructure
            #  Create wget command
            #  Invoke command in target infrastructure

            # Source DS
            resource = self.dt_config['from_source']['resource']
            endpoint = self.dt_config['from_source']['located_at']['endpoint']

            url = resource if resource.startswith('http') else \
                '{endpoint}/{resource}'.format(endpoint=endpoint[:-1] if endpoint.endswith('/') else endpoint,
                                               resource=resource[1:] if resource.startswith('/') else resource)

            # Target DS
            to_target_type = self.dt_config['to_target']['type']
            to_target_data_url = None
            if 'FileDataSource' in to_target_type:
                to_target_data_url = self.dt_config['to_target']['filepath']

            workdir = self.dt_config['to_target']['located_at']['workdir']

            to_target_infra_credentials = self.dt_config['to_target'][
                'located_at']['credentials']

            target_is_file = isFile(to_target_data_url)

            # Specifying target to copy using wget
            if target_is_file:
                wget_command = 'wget {url} -O {ds_target}'.format(
                    url=url, ds_target=to_target_data_url)
                curl_command = 'curl {url} -o {ds_target}'.format(
                    url=url, ds_target=to_target_data_url)
            else:
                wget_command = 'wget {url} -P {ds_target}'.format(
                    url=url, ds_target=to_target_data_url)
                curl_command = 'cd {ds_target} & curl -O {url}'.format(
                    url=url, ds_target=to_target_data_url)

            source_credentials = self.dt_config['from_source']['located_at'][
                'credentials']

            if 'user' in source_credentials and 'password' in source_credentials and \
                    source_credentials['user'] and source_credentials['password']:
                user = source_credentials['user']
                password = source_credentials['password']
                wget_command += ' --user {0} --password {1}'.format(
                    user, password)
                curl_command += ' -u {0}:{1}'.format(user, password)
            elif 'auth-header' in source_credentials and source_credentials[
                    'auth-header']:
                auth_header = ' --header \'' + source_credentials[
                    'auth-header-label'] + ': ' + source_credentials[
                        'auth-header'] + '\''
                wget_command += auth_header
                curl_command += auth_header

            ssh_client = SshClient(to_target_infra_credentials)

            # Execute data transfer command

            exit_msg, exit_code = ssh_client.execute_shell_command(
                wget_command, workdir=workdir, wait_result=True)
            if exit_code != 0:
                error_msg = 'Could not download using wget, trying with curl (exit code: {0}, error:{1})\n'.format(
                    str(exit_code), exit_msg)
                ctx.logger.warning(error_msg)
                exit_msg, exit_code = ssh_client.execute_shell_command(
                    curl_command, workdir=workdir, wait_result=True)

                if exit_code != 0:
                    error_msg = 'Could not download using curl (exit code: {0}, error:{1})\n'.format(
                        str(exit_code), exit_msg)
                    raise CommandExecutionError(error_msg)
                else:
                    ctx.logger.info("Data downloaded successfully with curl")
            else:
                ctx.logger.info("Data downloaded successfully with wget")
        except Exception as exp:
            ctx.logger.error(
                "There was a problem executing the data transfer: " + str(exp))
            raise
        finally:
            if 'ssh_client' in locals():
                ssh_client.close_connection()