Beispiel #1
0
    def _init_data_uri(self):
        """
        Create output data URI for the source context (local).

        Args:
            self: class instance.

        Returns:
            On success: True.
            On failure: False.

        """
        # make sure the source data URI has a compatible scheme (local)
        if self._parsed_data_uris[self._source_context]['scheme'] != 'local':
            msg = 'invalid data uri scheme for this step: {}'.format(
                self._parsed_data_uris[self._source_context]['scheme']
            )
            Log.an().error(msg)
            return self._fatal(msg)

        # delete old folder if it exists and clean==True
        if (
                DataManager.exists(
                    parsed_uri=self._parsed_data_uris[self._source_context]
                )
                and self._clean
        ):
            if not DataManager.delete(
                    parsed_uri=self._parsed_data_uris[self._source_context]
            ):
                Log.a().warning(
                    'cannot delete existing data uri: %s',
                    self._parsed_data_uris[self._source_context]['chopped_uri']
                )

        # create folder
        if not DataManager.mkdir(
                parsed_uri=self._parsed_data_uris[self._source_context],
                recursive=True
        ):
            msg = 'cannot create data uri: {}'.format(
                self._parsed_data_uris[self._source_context]['chopped_uri']
            )
            Log.an().error(msg)
            return self._fatal(msg)

        # create _log folder
        if not DataManager.mkdir(
                uri='{}/_log'.format(
                    self._parsed_data_uris[self._source_context]['chopped_uri']
                ),
                recursive=True
        ):
            msg = 'cannot create _log folder in data uri: {}/_log'.format(
                self._parsed_data_uris[self._source_context]['chopped_uri']
            )
            Log.an().error(msg)
            return self._fatal(msg)

        return True
    def upload_agave_test_data(self):
        """
        Upload Agave test data from workflow package.

        Args:
            self: class instance.

        Returns:
            None

        """
        if (not self._agave or not self._agave_params
                or not self._agave_params.get('agave')):
            Log.a().warning(
                'must provide agave parameters to upload test data')
            return False

        # create main test data URI
        parsed_base_test_uri = URIParser.parse('agave://{}/{}'.format(
            self._agave_params['agave']['deploymentSystem'],
            self._agave_params['agave']['testDataDir']))
        Log.some().info('creating base test data uri: %s',
                        parsed_base_test_uri['chopped_uri'])
        if not DataManager.mkdir(parsed_uri=parsed_base_test_uri,
                                 recursive=True,
                                 agave={
                                     'agave': self._agave,
                                     'agave_config': self._config['agave']
                                 }):
            Log.a().warning('cannot create base test data uri: %s',
                            parsed_base_test_uri['chopped_uri'])
            return False

        # upload test data
        parsed_local_test_uri = URIParser.parse(str(Path(self._path) / 'data'))
        parsed_agave_test_uri = URIParser.parse('{}/{}'.format(
            parsed_base_test_uri['chopped_uri'],
            Path(self._path).name))
        Log.some().info('copying test data from %s to %s',
                        parsed_local_test_uri['chopped_uri'],
                        parsed_agave_test_uri['chopped_uri'])
        if not DataManager.copy(parsed_src_uri=parsed_local_test_uri,
                                parsed_dest_uri=parsed_agave_test_uri,
                                local={},
                                agave={
                                    'agave': self._agave,
                                    'agave_config': self._config['agave']
                                }):
            Log.a().warning('cannot copy test data from %s to %s',
                            parsed_local_test_uri['chopped_uri'],
                            parsed_agave_test_uri['chopped_uri'])
            return False

        return True
Beispiel #3
0
    def _create_job_uris(self):
        """
        Create all work and output URIs.

        Args:
            self: class instance

        Returns:
            On success: True.
            On failure: False.

        """
        # create work URIs. a work URI is required for each workflow context
        for context in {
                Contexts.mapping[exec_context]['data_scheme']
                for exec_context in self._exec_contexts
        }:
            if not DataManager.mkdir(
                    parsed_uri=self._parsed_job_work_uri[context],
                    recursive=True,
                    **{
                        context: self._workflow_context[context]\
                        .get_context_options()
                    }
            ):
                msg = 'cannot create job work uri for context: {}->{}'.format(
                    context, self._parsed_job_work_uri[context]['chopped_uri']
                )
                Log.an().error(msg)
                return self._fatal(msg)

        # create output URI. output URI scheme must be in the set of data contexts
        output_context = self._parsed_job_output_uri['scheme']
        if output_context not in self._data_contexts:
            msg = 'invalid output context: {}'.format(output_context)
            Log.an().error(msg)
            return self._fatal(msg)

        if not DataManager.mkdir(
                parsed_uri=self._parsed_job_output_uri,
                recursive=True,
                **{
                    output_context: self._workflow_context[output_context]\
                        .get_context_options()
                }
        ):
            msg = 'cannot create job output uri: {}'.format(
                self._parsed_job_output_uri['chopped_uri']
            )
            Log.an().error(msg)
            return self._fatal(msg)

        return True
Beispiel #4
0
    def _create_job_uris(self):
        """
        Create all work and output URIs.

        Args:
            self: class instance

        Returns:
            On success: True.
            On failure: False.

        """
        # create work URI for each context
        for context in self._job['work_uri']:

            if not DataManager.mkdir(
                    parsed_uri=self._parsed_job_work_uri[context],
                    recursive=True,
                    **{
                        context: self._workflow_context[context]\
                        .get_context_options()
                    }
            ):
                msg = 'cannot create job work uri for context: {}->{}'.format(
                    context, self._parsed_job_work_uri[context]['chopped_uri'])
                Log.an().error(msg)
                return self._fatal(msg)

        # create output URI
        output_context = self._parsed_job_output_uri['scheme']
        if output_context not in self._workflow_context:
            msg = 'invalid output context: {}'.format(output_context)
            Log.an().error(msg)
            return self._fatal(msg)

        if not DataManager.mkdir(
                parsed_uri=self._parsed_job_output_uri,
                recursive=True,
                **{
                    output_context: self._workflow_context[output_context]\
                        .get_context_options()
                }
        ):
            msg = 'cannot create job output uri: {}'.format(
                self._parsed_job_output_uri['chopped_uri'])
            Log.an().error(msg)
            return self._fatal(msg)

        return True
Beispiel #5
0
    def _get_map_uri_list(self):
        """
        Get the contents of the map URI (agave URI).

        Args:
            self: class instance.

        Returns:
            Array of base file names in the map URI. Returns False on
            exception.

        """
        # make sure map URI is compatible scheme (agave)
        if self._parsed_map_uri['scheme'] != 'agave':
            msg = 'invalid map uri scheme for this step: {}'.format(
                self._parsed_map_uri['scheme'])
            Log.an().error(msg)
            return self._fatal(msg)

        # list files from URI
        file_list = DataManager.list(parsed_uri=self._parsed_map_uri,
                                     agave=self._agave)
        if file_list is False:
            msg = 'cannot get contents of map uri: {}'\
                .format(self._parsed_map_uri['chopped_uri'])
            Log.an().error(msg)
            return self._fatal(msg)

        return file_list
Beispiel #6
0
    def _init_archive_uri(self):
        """
        Initialize and validate Agave job archive URI.

        Args:
            None.

        Returns:
            On success: True.
            On failure: False.

        """
        if 'agave' not in self._parsed_job_work_uri:
            Log.an().error('job work uri must include an agave context')
            return False

        # construct archive URI
        self._parsed_archive_uri = URIParser.parse('{}/_agave_jobs'.format(
            self._parsed_job_work_uri['agave']['chopped_uri']))
        if not self._parsed_archive_uri:
            Log.an().error('invalid job work uri: %s',
                           self._parsed_job_work_uri['agave'])
            return False

        # create URI
        if not DataManager.mkdir(parsed_uri=self._parsed_archive_uri,
                                 recursive=True,
                                 agave=self.get_context_options()):
            Log.an().error('cannot create agave archive uri: %s',
                           self._parsed_archive_uri['chopped_uri'])
            return False

        return True
Beispiel #7
0
    def _init_app_paths(self):
        """
        Add app paths to environment PATH for local workflows.

        The package path contains the workflow definition YAML file and shell
        scripts for calling individual apps used in a workflow.

        Args:
            None.

        Output:
            On success: True.
            On failure: False.

        """
        parsed_uri = URIParser.parse(self._workflow_path)
        if not parsed_uri:
            Log.an().error('invalid workflow path: %s', self._workflow_path)
            return False

        apps_uri = ('{}{}' if parsed_uri['folder'] == '/' else '{}/{}')\
            .format(parsed_uri['folder'], 'apps')
        parsed_apps_uri = URIParser.parse(
            ('{}{}' if parsed_uri['folder'] == '/' else '{}/{}')\
            .format(parsed_uri['folder'], 'apps')
        )
        if not parsed_apps_uri:
            Log.an().error('cannot construct apps uri: %s', apps_uri)
            return False

        if not DataManager.exists(parsed_uri=parsed_apps_uri):
            # no apps directory
            return True

        for app_dir in DataManager.list(parsed_uri=parsed_apps_uri):
            try:
                os.environ['PATH'] = '{}{}{}'.format(
                    os.path.join(parsed_apps_uri['chopped_path'], app_dir,
                                 'assets'), os.pathsep, os.environ['PATH'])

            except OSError as err:
                Log.an().error('workflow app pathmunge error [%s]', str(err))
                return False

        return True
Beispiel #8
0
    def stage(self, **kwargs):
        """
        Copy data to all contexts except 'final' from source URI. Source URI can be multiple
        locations, but only copy to the first element of dest URIs.

        Set _staged indicator to True on success.

        Args:
            self: class instance.
            **kwargs: additional arguments required by DataManager.copy().

        Returns:
            True or False.

        """
        for context in self._parsed_data_uris:
            if context != self._source_context:
                if self._clean:
                    # remove target URI first
                    pass

                for i, parsed_source_uri in enumerate(
                        self._parsed_data_uris[self._source_context]):

                    Log.some().debug(
                        'staging data: %s->%s to %s->%s', self._source_context,
                        parsed_source_uri['chopped_uri'], context,
                        self._parsed_data_uris[context][i]['chopped_uri'])

                    if context != 'final':
                        if not DataManager.copy(
                                parsed_src_uri=parsed_source_uri,
                                parsed_dest_uri=self._parsed_data_uris[context]
                            [i],
                                **kwargs):
                            msg = 'cannot stage data by copying from {} to {}'.format(
                                parsed_source_uri['chopped_uri'],
                                self._parsed_data_uris[context][i]
                                ['chopped_uri'])
                            Log.an().error(msg)
                            return self._fatal(msg)

        self._staged = True

        return True
Beispiel #9
0
    def stage(self, **kwargs):
        """
        Copy data to all contexts except 'final' from source URI.

        Set _staged indicator to True on success.

        Args:
            self: class instance.
            **kwargs: additional arguments required by DataManager.copy().

        Returns:
            True or False.

        """
        for context in self._parsed_data_uris:
            if context != self._source_context:
                if self._clean:
                    # remove target URI first
                    pass

                Log.some().debug('staging data: {}->{} to {}->{}'.format(
                    self._source_context, self._parsed_data_uris[
                        self._source_context]['chopped_uri'], context,
                    self._parsed_data_uris[context]['chopped_uri']))

                if context != 'final':
                    if not DataManager.copy(
                            parsed_src_uri=self._parsed_data_uris\
                                [self._source_context],
                            parsed_dest_uri=self._parsed_data_uris[context],
                            **kwargs
                    ):
                        msg = 'cannot stage data by copying from {} to {}'.format(
                            self._parsed_data_uris[self._source_context]\
                                ['chopped_uri'],
                            self._parsed_data_uris[context]['chopped_uri']
                        )
                        Log.an().error(msg)
                        return self._fatal(msg)

        self._staged = True

        return True
Beispiel #10
0
    def _get_map_uri_list(self):
        """
        Get the contents of the map URI (agave URI).

        Args:
            self: class instance.

        Returns:
            Array of base file names in the map URI. Returns False on
            exception.

        """
        combined_file_list = []
        for uri in self._parsed_map_uris:
            # make sure map URI is compatible scheme (agave)
            if uri['scheme'] != 'agave':
                msg = 'invalid map uri scheme for this step: {}'.format(
                    uri['scheme']
                )
                Log.an().error(msg)
                return self._fatal(msg)

            # get file list from URI
            file_list = DataManager.list(
                parsed_uri=uri,
                globstr=self._step['map']['glob'],
                agave=self._agave
            )
            if file_list is False:
                msg = 'cannot get contents of map uri: {}'\
                    .format(uri['chopped_uri'])
                Log.an().error(msg)
                return self._fatal(msg)

            for f in file_list:
                combined_file_list.append({
                    'chopped_uri': uri['chopped_uri'],
                    'filename': f
                })

        return combined_file_list
Beispiel #11
0
    def clean_up(self):
        """
        Copy data from Agave archive location to step output location (data URI).

        Args:
            self: class instance.

        Returns:
            On success: True.
            On failure: False.

        """
        # destination _log directory, common for all map items
        dest_log_dir = '{}/{}'.format(
            self._parsed_data_uris[self._source_context][0]\
                ['chopped_uri'],
            '_log'
        )

        # copy data for each map item
        for map_item in self._map:

            # copy step output
            if not self._agave['agave_wrapper'].files_import_from_agave(
                    self._parsed_data_uris[self._source_context][0]['authority'],
                    self._parsed_data_uris[self._source_context][0]\
                        ['chopped_path'],
                    map_item['template']['output'],
                    '{}/{}'.format(
                        map_item['run'][map_item['attempt']]['archive_uri'],
                        map_item['template']['output']
                    )
            ):
                msg = 'agave import failed for step "{}"'\
                    .format(self._step['name'])
                Log.an().error(msg)
                return self._fatal(msg)

            # check for any agave log files (*.out and *.err files)
            agave_log_list = DataManager.list(
                uri=map_item['run'][map_item['attempt']]['archive_uri'],
                agave=self._agave
            )
            if agave_log_list is False:
                msg = 'cannot get agave log list for step "{}"'\
                    .format(self._step['name'])
                Log.an().error(msg)
                return self._fatal(msg)

            # copy each agave log file, the pattern is gf-{}-{}-{}.out or .err
            for item in agave_log_list:
                if re.match('^gf-\d*-.*\.(out|err)$', item):
                    if not self._agave['agave_wrapper'].files_import_from_agave(
                        self._parsed_data_uris[self._source_context][0]\
                            ['authority'],
                        '{}/{}'.format(
                            self._parsed_data_uris[self._source_context][0]\
                                ['chopped_path'],
                            '_log'
                        ),
                        item,
                        '{}/{}'.format(
                            map_item['run'][map_item['attempt']]\
                                ['archive_uri'],
                            item
                        )
                    ):
                        msg = 'cannot copy agave log item "{}"'.format(item)
                        Log.an().error(msg)
                        return self._fatal(msg)

            # check if anything is in the _log directory
            src_log_dir = '{}/{}'.format(
                map_item['run'][map_item['attempt']]['archive_uri'],
                '_log'
            )

            if DataManager.exists(
                uri=src_log_dir,
                agave=self._agave
            ):
                # create dest _log dir if it doesn't exist
                if not DataManager.exists(
                    uri=dest_log_dir,
                    agave=self._agave
                ):
                    if not DataManager.mkdir(
                        uri=dest_log_dir,
                        agave=self._agave
                    ):
                        msg = 'cannot create _log directory for step "{}"'\
                            .format(self._step['name'])
                        Log.an().error(msg)
                        return self._fatal(msg)

                # get list of all items in src_log_dir
                log_list = DataManager.list(
                    uri=src_log_dir,
                    agave=self._agave
                )
                if log_list is False:
                    msg = 'cannot get _log list for step "{}"'\
                        .format(self._step['name'])
                    Log.an().error(msg)
                    return self._fatal(msg)

                # copy each list item
                for item in log_list:
                    if not self._agave['agave_wrapper'].files_import_from_agave(
                        self._parsed_data_uris[self._source_context][0]\
                            ['authority'],
                        '{}/{}'.format(
                            self._parsed_data_uris[self._source_context][0]\
                                ['chopped_path'],
                            '_log'
                        ),
                        item,
                        '{}/{}/{}'.format(
                            map_item['run'][map_item['attempt']]\
                                ['archive_uri'],
                            '_log',
                            item
                        )
                    ):
                        msg = 'cannot copy log item "{}"'.format(item)
                        Log.an().error(msg)
                        return self._fatal(msg)

        self._update_status_db('FINISHED', '')

        return True
Beispiel #12
0
    def _run_map(self, map_item):
        """
        Run a job for each map item and store the job ID.

        Args:
            self: class instance.
            map_item: map item object (item of self._map).

        Returns:
            On success: True.
            On failure: False.

        """
        # load default app inputs overwrite with template inputs
        inputs = {}
        for input_key in self._app['inputs']:
            if input_key in map_item['template']:
                if map_item['template'][input_key]:
                    # only include an input if the value is a non-empty string
                    inputs[input_key] = urllib.parse.quote(
                        str(map_item['template'][input_key]),
                        safe='/:'
                    )
            else:
                if self._app['inputs'][input_key]['default']:
                    # only include an input if the value is a non-empty string
                    inputs[input_key] = urllib.parse.quote(
                        str(self._app['inputs'][input_key]['default']),
                        safe='/:'
                    )

        # load default app parameters, overwrite with template parameters
        parameters = {}
        for param_key in self._app['parameters']:
            if param_key in map_item['template']:
                if self._app['parameters'][param_key]['type'] in ['int', 'long']:
                    parameters[param_key] = int(map_item['template'][param_key])
                elif self._app['parameters'][param_key]['type'] == ['float', 'double']:
                    parameters[param_key] = float(map_item['template'][param_key])
                else:
                    parameters[param_key] = str(map_item['template'][param_key])
            else:
                if self._app['parameters'][param_key]['default'] not in [None, '']:
                    parameters[param_key] \
                        = self._app['parameters'][param_key]['default']

        # add execution method as parameter
        parameters['exec_method'] = self._step['execution']['method']

        # add execution init commands if 'init' param given
        if 'init' in self._step['execution']['parameters']:
            parameters['exec_init'] = self._step['execution']['parameters']['init']

        # construct agave app template
        name = 'gf-{}-{}-{}'.format(
            str(map_item['attempt']),
            slugify(self._step['name'], regex_pattern=r'[^-a-z0-9_]+'),
            slugify(map_item['template']['output'], regex_pattern=r'[^-a-z0-9_]+')
        )
        name = name[:62]+'..' if len(name) > 64 else name
        archive_path = '{}/{}'.format(
            self._agave['parsed_archive_uri']['chopped_path'],
            name
        )
        app_template = {
            'name': name,
            'appId': self._app['implementation']['agave']['agave_app_id'],
            'archive': True,
            'inputs': inputs,
            'parameters': parameters,
            'archiveSystem': self._agave['parsed_archive_uri']['authority'],
            'archivePath': archive_path
        }
        # specify processors if 'slots' param given
        if 'slots' in self._step['execution']['parameters']:
            app_template['processorsPerNode'] = int(
                self._step['execution']['parameters']['slots']
            )
        # specify memory if 'mem' param given
        if 'mem' in self._step['execution']['parameters']:
            app_template['memoryPerNode'] = '{}'.format(
                self._step['execution']['parameters']['mem']
            )

        Log.some().debug(
                "[step.%s]: agave app template:\n%s",
                self._step['name'],
                pprint.pformat(app_template)
        )

        # delete archive path if it exists
        if DataManager.exists(
                uri=self._agave['parsed_archive_uri']['chopped_uri']+'/'+name,
                agave=self._agave
        ):
            if not DataManager.delete(
                    uri=self._agave['parsed_archive_uri']['chopped_uri']+'/'+name,
                    agave=self._agave
            ):
                Log.a().warning(
                    'cannot delete archive uri: %s/%s',
                    self._agave['parsed_archive_uri']['chopped_uri'],
                    name
                )

        # submit job
        job = self._agave['agave_wrapper'].jobs_submit(app_template)
        if not job:
            msg = 'agave jobs submit failed for "{}"'.format(
                app_template['name']
            )
            Log.an().error(msg)
            return self._fatal(msg)

        # log agave job id
        Log.some().debug(
            '[step.%s]: agave job id: %s -> %s',
            self._step['name'],
            map_item['template']['output'],
            job['id']
        )

        # record job info
        map_item['run'][map_item['attempt']]['agave_job_id'] = job['id']
        map_item['run'][map_item['attempt']]['archive_uri'] = '{}/{}'\
            .format(
                self._agave['parsed_archive_uri']['chopped_uri'],
                name
            )
        map_item['run'][map_item['attempt']]['hpc_job_id'] = ''

        # set status of process
        map_item['status'] = 'PENDING'
        map_item['run'][map_item['attempt']]['status'] = 'PENDING'

        return True
Beispiel #13
0
    def _run_map(self, map_item):
        """
        Run a job for each map item and store the job ID.

        Args:
            self: class instance.
            map_item: map item object (item of self._map)

        Returns:
            On success: True.
            On failure: False.

        """
        # load default app inputs overwrite with template inputs
        inputs = {}
        for input_key in self._app['inputs']:
            if input_key in map_item['template']:
                inputs[input_key] = urllib.parse.quote(str(
                    map_item['template'][input_key] or ''),
                                                       safe='/:')
            else:
                inputs[input_key] = urllib.parse.quote(str(
                    self._app['inputs'][input_key]['default'] or ''),
                                                       safe='/:')

        # load default app parameters, overwrite with template parameters
        parameters = {}
        for param_key in self._app['parameters']:
            if param_key in map_item['template']:
                parameters[param_key] = map_item['template'][param_key]
            else:
                parameters[param_key] \
                    = self._app['parameters'][param_key]['default']

        # add execution method as parameter
        parameters['exec_method'] = self._step['execution']['method']

        # construct agave app template
        name = 'gf-{}-{}-{}'.format(str(map_item['attempt']),
                                    slugify(self._step['name']),
                                    slugify(map_item['template']['output']))
        name = name[:62] + '..' if len(name) > 64 else name
        archive_path = '{}/{}'.format(
            self._agave['parsed_archive_uri']['chopped_path'], name)
        app_template = {
            'name': name,
            'appId': self._app['definition']['agave']['agave_app_id'],
            'archive': True,
            'inputs': inputs,
            'parameters': parameters,
            'archiveSystem': self._agave['parsed_archive_uri']['authority'],
            'archivePath': archive_path
        }
        Log.some().debug("agave app template:\n%s",
                         pprint.pformat(app_template))

        # delete archive path if it exists
        if DataManager.exists(
                uri=self._agave['parsed_archive_uri']['chopped_uri'] + '/' +
                name,
                agave=self._agave):
            if not DataManager.delete(
                    uri=self._agave['parsed_archive_uri']['chopped_uri'] +
                    '/' + name,
                    agave=self._agave):
                Log.a().warning(
                    'cannot delete archive uri: %s/%s',
                    self._agave['parsed_archive_uri']['chopped_uri'], name)

        # submit job
        job = self._agave['agave_wrapper'].jobs_submit(app_template)
        if not job:
            msg = 'agave jobs submit failed for "{}"'.format(
                app_template['name'])
            Log.an().error(msg)
            return self._fatal(msg)

        # log agave job id
        Log.some().debug('agave job id: %s -> %s',
                         map_item['template']['output'], job['id'])

        # record job info
        map_item['run'][map_item['attempt']]['agave_job_id'] = job['id']
        map_item['run'][map_item['attempt']]['archive_uri'] = '{}/{}'\
            .format(
                self._agave['parsed_archive_uri']['chopped_uri'],
                name
            )
        map_item['run'][map_item['attempt']]['hpc_job_id'] = ''

        # set status of process
        map_item['status'] = 'PENDING'
        map_item['run'][map_item['attempt']]['status'] = 'PENDING'

        return True
Beispiel #14
0
    def _init_context_uris(self):
        """
        Generate all context URIs for this workflow run.

        Context URIs are generated based on contexts given in
        _parsed_job_work_uri, and the "final" context for steps given in the
        _parsed_job_output_uri.

        Args:
            None.

        Returns:
            On failure: Raises WorkflowDAGException.

        """
        self._context_uris['inputs'] = {}
        self._context_uris['steps'] = {'final': {}}
        self._parsed_context_uris['inputs'] = {}
        self._parsed_context_uris['steps'] = {'final': {}}

        # init contexts in parsed_job_work_uri for inputs and steps
        for context in self._parsed_job_work_uri:

            self._context_uris['inputs'][context] = {}
            self._context_uris['steps'][context] = {}
            self._parsed_context_uris['inputs'][context] = {}
            self._parsed_context_uris['steps'][context] = {}

            for node_name in self._topo_sort:

                node = self._graph.nodes[node_name]
                if node['type'] == 'input':
                    if node['source_context'] == context:
                        # use original input URI
                        parsed_uri = URIParser.parse(
                            self._workflow['inputs'][node['name']]['value'])
                        if not parsed_uri:
                            msg = 'invalid input uri: {}'.format(
                                self._workflow['inputs'][
                                    node['name']]['value'])
                            raise WorkflowDAGException(msg)

                        self._context_uris['inputs'][context][node['name']]\
                            = parsed_uri['chopped_uri']
                        self._parsed_context_uris['inputs'][context]\
                            [node['name']] = parsed_uri

                    else:
                        # switch context of input URI
                        new_base_uri = '{}/_input-{}'.format(
                            self._parsed_job_work_uri[context]['chopped_uri'],
                            slugify(node['name']))

                        # create new base URI
                        if not DataManager.mkdir(
                                uri=new_base_uri,
                                recursive=True,
                                **{context: self._context_options[context]}):
                            msg = 'cannot create new base uri for input: {}'\
                                .format(new_base_uri)
                            Log.an().error(msg)
                            raise WorkflowDAGException(msg)

                        # switch input URI base
                        switched_uri = URIParser.switch_context(
                            self._workflow['inputs'][node['name']]['value'],
                            new_base_uri)
                        if not switched_uri:
                            msg = (
                                'cannot switch input uri context to '
                                'new base URI: {}->{}'
                            ).format(
                                self._workflow['inputs'][node['name']]\
                                    ['value'],
                                new_base_uri
                            )
                            Log.an().error(msg)
                            raise WorkflowDAGException(msg)

                        self._context_uris['inputs'][context][node['name']]\
                            = switched_uri['chopped_uri']
                        self._parsed_context_uris['inputs'][context]\
                            [node['name']] = switched_uri

                else:  # node['type'] == 'step'
                    self._context_uris['steps'][context][node['name']]\
                        = '{}/{}'.format(
                            self._parsed_job_work_uri[context]['chopped_uri'],
                            slugify(node['name'])
                        )
                    self._parsed_context_uris['steps'][context][node['name']]\
                        = URIParser.parse(
                            self._context_uris['steps'][context][node['name']]
                        )

        # init final contexts for steps
        for node_name in self._topo_sort:

            node = self._graph.nodes[node_name]

            if node['type'] == 'step':
                self._context_uris['steps']['final'][node['name']]\
                    = '{}/{}'.format(
                        self._parsed_job_output_uri['chopped_uri'],
                        slugify(node['name'])
                    )
                self._parsed_context_uris['steps']['final'][node['name']]\
                    = URIParser.parse(
                        self._context_uris['steps']['final'][node['name']]
                    )
Beispiel #15
0
    def register_agave_app(self, agave, agave_config, agave_params, agave_publish):
        """
        Register app in Agave.

        Args:
            self: class instance

        Returns:
            On success: True.
            On failure: False.

        """
        Log.some().info('registering agave app %s', str(self._path))
        Log.some().info('app version: %s', self._config['version'])

        # compile agave app template
        if not TemplateCompiler.compile_template(
                self._path,
                'agave-app-def.json.j2',
                self._path / 'agave-app-def.json',
                version=self._config['version'],
                agave=agave_params['agave']
        ):
            Log.a().warning(
                'cannot compile agave app "%s" definition from template',
                self._app['name']
            )
            return False

        # create main apps URI
        parsed_agave_apps_uri = URIParser.parse(
            'agave://{}/{}'.format(
                agave_params['agave']['deploymentSystem'],
                agave_params['agave']['appsDir']
            )
        )
        Log.some().info(
            'creating main apps uri: %s',
            parsed_agave_apps_uri['chopped_uri']
        )
        if not DataManager.mkdir(
                parsed_uri=parsed_agave_apps_uri,
                recursive=True,
                agave={
                    'agave': agave,
                    'agave_config': agave_config
                }
        ):
            Log.a().warning('cannot create main agave apps uri')
            return False

        # delete app uri if it exists
        parsed_app_uri = URIParser.parse(
            'agave://{}/{}/{}'.format(
                agave_params['agave']['deploymentSystem'],
                agave_params['agave']['appsDir'],
                self._app['folder']
            )
        )
        Log.some().info(
            'deleting app uri if it exists: %s',
            parsed_app_uri['chopped_uri']
        )
        if not DataManager.delete(
                parsed_uri=parsed_app_uri,
                agave={
                    'agave': agave,
                    'agave_config': agave_config
                }
        ):
            # log warning, but ignore.. deleting non-existant uri returns False
            Log.a().warning(
                'cannot delete app uri: %s', parsed_app_uri['chopped_uri']
            )

        # upload app assets
        parsed_assets_uri = URIParser.parse(str(self._path / 'assets'))
        Log.some().info(
            'copying app assets from %s to %s',
            parsed_assets_uri['chopped_uri'],
            parsed_app_uri['chopped_uri']
        )

        if not DataManager.copy(
                parsed_src_uri=parsed_assets_uri,
                parsed_dest_uri=parsed_app_uri,
                local={},
                agave={
                    'agave': agave,
                    'agave_config': agave_config
                }
        ):
            Log.a().warning(
                'cannot copy app assets from %s to %s',
                parsed_assets_uri['chopped_uri'],
                parsed_app_uri['chopped_uri']
            )
            return False

        # upload test script
        parsed_test_uri = URIParser.parse(
            '{}/{}'.format(
                parsed_app_uri['chopped_uri'],
                'test'
            )
        )
        Log.some().info(
            'creating test uri: %s', parsed_test_uri['chopped_uri']
        )
        if not DataManager.mkdir(
                parsed_uri=parsed_test_uri,
                recursive=True,
                agave={
                    'agave': agave,
                    'agave_config': agave_config
                }
        ):
            Log.a().warning(
                'cannot create test uri: %s', parsed_test_uri['chopped_uri']
            )
            return False

        parsed_local_test_script = URIParser.parse(
            str(self._path / 'test' / 'test.sh')
        )
        parsed_agave_test_script = URIParser.parse(
            '{}/{}'.format(parsed_test_uri['chopped_uri'], 'test.sh')
        )
        Log.some().info(
            'copying test script from %s to %s',
            parsed_local_test_script['chopped_uri'],
            parsed_agave_test_script['chopped_uri']
        )
        if not DataManager.copy(
                parsed_src_uri=parsed_local_test_script,
                parsed_dest_uri=parsed_agave_test_script,
                local={},
                agave={
                    'agave': agave,
                    'agave_config': agave_config
                }
        ):
            Log.a().warning(
                'cannot copy test script from %s to %s',
                parsed_local_test_script['chopped_uri'],
                parsed_agave_test_script['chopped_uri']
            )
            return False

        # update existing app, or register new app
        Log.some().info('registering agave app')

        app_definition = self._yaml_to_dict(
            str(self._path / 'agave-app-def.json')
        )
        if not app_definition:
            Log.a().warning(
                'cannot load agave app definition: %s',
                str(self._path / 'agave-app-def.json')
            )
            return False

        agwrap = AgaveAppsAddUpdate(
            agave, agave_config
        )
        app_add_result = agwrap.call(app_definition)
        if not app_add_result:
            Log.a().warning(
                'cannot register agave app:\n%s', pprint.pformat(app_definition)
            )
            return False

        register_result = {}

        # publish app
        if agave_publish:
            Log.some().info('publishing agave app')

            agwrap = AgaveAppsPublish(
                agave, agave_config
            )
            app_publish_result = agwrap.call(app_add_result['id'])
            if not app_publish_result:
                Log.a().warning(
                    'cannot publish agave app: %s', app_add_result['id']
                )
                return False

            # return published id and revision
            register_result = {
                'id': app_publish_result['id'],
                'version': self._config['version'],
                'revision': 'u{}'.format(app_publish_result['revision'])
            }

        else:
            # return un-published id and blank revision
            register_result = {
                'id': app_add_result['id'],
                'version': self._config['version'],
                'revision': ''
            }

        return register_result
Beispiel #16
0
    def _get_map_uri_list(self):
        """
        Get the contents of the map URI (agave URI).

        Args:
            self: class instance.

        Returns:
            Array of base file names in the map URI. Returns False on
            exception.

        """
        combined_file_list = []
        for uri in self._parsed_map_uris:
            # make sure map URI is compatible scheme (agave)
            if uri['scheme'] != 'agave':
                msg = 'invalid map uri scheme for this step: {}'.format(
                    uri['scheme']
                )
                Log.an().error(msg)
                return self._fatal(msg)

            # get file list from URI
            file_list = DataManager.list(
                parsed_uri=uri,
                globstr=self._step['map']['glob'],
                agave=self._agave
            )
            if file_list is False:
                msg = 'cannot get contents of map uri: {}'\
                    .format(uri['chopped_uri'])
                Log.an().error(msg)
                return self._fatal(msg)

            if self._step['map']['inclusive']:
                # filter with glob
                if glob.globfilter(
                    [uri['name']],
                    self._step['map']['glob'],
                    flags=glob.EXTGLOB|glob.GLOBSTAR
                ):
                    combined_file_list.append({
                        'chopped_uri': '{}://{}{}'.format(
                            uri['scheme'],
                            uri['authority'],
                            uri['folder']
                        ),
                        'filename': uri['name']
                    })

            for f in file_list:
                if '/' in f:
                    # reparse uri to correctly represent recursive elements
                    new_uri = URIParser.parse('{}/{}'.format(uri['chopped_uri'], f))
                    combined_file_list.append({
                        'chopped_uri': '{}://{}{}'.format(
                            new_uri['scheme'],
                            new_uri['authority'],
                            new_uri['folder']
                        ),
                        'filename': new_uri['name']
                    })
                else:
                    combined_file_list.append({
                        'chopped_uri': uri['chopped_uri'],
                        'filename': f
                    })

        return combined_file_list
Beispiel #17
0
    def _init_context_uris(self):
        """
        Generate all context URIs for this workflow run.

        Context URIs are generated based on contexts given in
        _parsed_job_work_uri, and the "final" context for steps given in the
        _parsed_job_output_uri.

        Args:
            None.

        Returns:
            On failure: Raises WorkflowDAGException.

        """
        self._context_uris['inputs'] = {}
        self._context_uris['steps'] = {'final': {}}
        self._parsed_context_uris['inputs'] = {}
        self._parsed_context_uris['steps'] = {'final': {}}

        # init all data contexts
        for context in {
                Contexts.get_data_scheme_of_exec_context(con)
                for con in self._exec_contexts
        } | self._data_contexts:

            self._context_uris['inputs'][context] = {}
            self._parsed_context_uris['inputs'][context] = {}

            for node_name in self._topo_sort:

                node = self._graph.nodes[node_name]
                if node['type'] == 'input':
                    if node['source_context'] == context:
                        # use original input URI
                        parsed_uri = URIParser.parse(
                            self._workflow['inputs'][node['name']]['value'])
                        if not parsed_uri:
                            msg = 'invalid input uri: {}'.format(
                                self._workflow['inputs'][
                                    node['name']]['value'])
                            raise WorkflowDAGException(msg)

                        self._context_uris['inputs'][context][node['name']]\
                            = parsed_uri['chopped_uri']
                        self._parsed_context_uris['inputs'][context]\
                            [node['name']] = parsed_uri

                    else:
                        # skip if _parsed_job_work_uri is not defined for this context
                        # this implies that there is no execution defined for that context,
                        # so no need to setup the data staging location at the work_uri
                        if context not in self._parsed_job_work_uri:
                            continue

                        # switch context of input URI
                        new_base_uri = '{}/_input-{}'.format(
                            self._parsed_job_work_uri[context]['chopped_uri'],
                            slugify(node['name'],
                                    regex_pattern=r'[^-a-z0-9_]+'))

                        # create new base URI
                        if not DataManager.mkdir(
                                uri=new_base_uri,
                                recursive=True,
                                **{context: self._context_options[context]}):
                            msg = 'cannot create new base uri for input: {}'\
                                .format(new_base_uri)
                            Log.an().error(msg)
                            raise WorkflowDAGException(msg)

                        # switch input URI base
                        switched_uri = URIParser.switch_context(
                            self._workflow['inputs'][node['name']]['value'],
                            new_base_uri)
                        if not switched_uri:
                            msg = (
                                'cannot switch input uri context to '
                                'new base URI: {}->{}'
                            ).format(
                                self._workflow['inputs'][node['name']]\
                                    ['value'],
                                new_base_uri
                            )
                            Log.an().error(msg)
                            raise WorkflowDAGException(msg)

                        self._context_uris['inputs'][context][node['name']]\
                            = switched_uri['chopped_uri']
                        self._parsed_context_uris['inputs'][context]\
                            [node['name']] = switched_uri

        for context in {
                Contexts.get_data_scheme_of_exec_context(con)
                for con in self._exec_contexts
        }:

            self._context_uris['steps'][context] = {}
            self._parsed_context_uris['steps'][context] = {}

            for node_name in self._topo_sort:

                node = self._graph.nodes[node_name]
                if node['type'] == 'step':
                    self._context_uris['steps'][context][node['name']]\
                        = '{}/{}'.format(
                            self._parsed_job_work_uri[context]['chopped_uri'],
                            slugify(node['name'], regex_pattern=r'[^-a-z0-9_]+')
                        )
                    self._parsed_context_uris['steps'][context][node['name']]\
                        = URIParser.parse(
                            self._context_uris['steps'][context][node['name']]
                        )

        # init final contexts for steps
        for node_name in self._topo_sort:

            node = self._graph.nodes[node_name]

            if node['type'] == 'step':
                self._context_uris['steps']['final'][node['name']]\
                    = '{}/{}'.format(
                        self._parsed_job_output_uri['chopped_uri'],
                        slugify(node['name'], regex_pattern=r'[^-a-z0-9_]+')
                    )
                self._parsed_context_uris['steps']['final'][node['name']]\
                    = URIParser.parse(
                        self._context_uris['steps']['final'][node['name']]
                    )
Beispiel #18
0
    def stage_final(self, **kwargs):
        """
        Move data to final context from source URI.

        Set _staged_final indicator to True on success.

        Args:
            self: class instance.
            **kwargs: additional arguments required by DataManager.move().

        Returns:
            True or False.

        """
        for context in self._parsed_data_uris:
            if context != self._source_context:
                if self._clean:
                    # remove target URI first
                    pass

                Log.some().debug('staging final data: {}->{} to {}->{}'.format(
                    self._source_context, self._parsed_data_uris[
                        self._source_context]['chopped_uri'], context,
                    self._parsed_data_uris[context]['chopped_uri']))

                if context == 'final':
                    if (self._parsed_data_uris[self._source_context]['scheme']
                            == 'local'
                            and self._parsed_data_uris[context]['scheme']
                            == 'local'):
                        # move final data instead of copy, only for local-->local schemes
                        if not DataManager.move(
                                parsed_src_uri=self._parsed_data_uris\
                                    [self._source_context],
                                parsed_dest_uri=self._parsed_data_uris[context],
                                **kwargs
                        ):
                            msg = 'cannot stage final data by copying from {} to {}'.format(
                                self._parsed_data_uris[self._source_context]\
                                    ['chopped_uri'],
                                self._parsed_data_uris[context]['chopped_uri']
                            )
                            Log.an().error(msg)
                            return self._fatal(msg)

                    else:
                        if not DataManager.copy(
                                parsed_src_uri=self._parsed_data_uris\
                                    [self._source_context],
                                parsed_dest_uri=self._parsed_data_uris[context],
                                **kwargs
                        ):
                            msg = 'cannot stage final data by copying from {} to {}'.format(
                                self._parsed_data_uris[self._source_context]\
                                    ['chopped_uri'],
                                self._parsed_data_uris[context]['chopped_uri']
                            )
                            Log.an().error(msg)
                            return self._fatal(msg)

        self._staged_final = True

        return True
Beispiel #19
0
    def clean_up(self):
        """
        Copy data from Agave archive location to step output location (data URI).

        Args:
            self: class instance.

        Returns:
            On success: True.
            On failure: False.

        """
        # destination _log directory, common for all map items
        dest_log_dir = '{}/{}'.format(
            self._parsed_data_uris[self._source_context]\
                ['chopped_uri'],
            '_log'
        )

        # create instance of agave wrapper class for data import
        agwrap = AgaveFilesImportDataFromAgave(
            self._agave['agave'],
            self._config['agave']
        )

        # copy data for each map item
        for map_item in self._map:

            # copy step output
            if not agwrap.call(
                    self._parsed_data_uris[self._source_context]['authority'],
                    self._parsed_data_uris[self._source_context]\
                        ['chopped_path'],
                    map_item['template']['output'],
                    '{}/{}'.format(
                        map_item['run'][map_item['attempt']]['archive_uri'],
                        map_item['template']['output']
                    )
            ):
                msg = 'agave import failed for step "{}"'\
                    .format(self._step['name'])
                Log.an().error(msg)
                return self._fatal(msg)
           
            # check if anything is in the _log directory
            src_log_dir = '{}/{}'.format(
                map_item['run'][map_item['attempt']]['archive_uri'],
                '_log'
            )

            if DataManager.exists(
                uri=src_log_dir,
                agave={
                    'agave': self._agave['agave'],
                    'agave_config': self._config['agave']
                }
            ):
                # create dest _log dir if it doesn't exist
                if not DataManager.exists(
                    uri=dest_log_dir,
                    agave={
                        'agave': self._agave['agave'],
                        'agave_config': self._config['agave']
                    }
                ):
                    if not DataManager.mkdir(
                        uri=dest_log_dir,
                        agave={
                            'agave': self._agave['agave'],
                            'agave_config': self._config['agave']
                        }
                    ):
                        msg = 'cannot create _log directory for step "{}"'\
                            .format(self._step['name'])
                        Log.an().error(msg)
                        return self._fatal(msg)

                # get list of all items in src_log_dir
                log_list = DataManager.list(
                    uri=src_log_dir,
                    agave={
                        'agave': self._agave['agave'],
                        'agave_config': self._config['agave']
                    }
                )
                if not log_list:
                    msg = 'cannot get _log list for step "{}"'\
                        .format(self._step['name'])
                    Log.an().error(msg)
                    return self._fatal(msg)

                # copy each list item
                for item in log_list:
                    if not agwrap.call(
                        self._parsed_data_uris[self._source_context]\
                            ['authority'],
                        '{}/{}'.format(
                            self._parsed_data_uris[self._source_context]\
                                ['chopped_path'],
                            '_log'
                        ),
                        item,
                        '{}/{}/{}'.format(
                            map_item['run'][map_item['attempt']]\
                                ['archive_uri'],
                            '_log',
                            item
                        )
                    ):
                        msg = 'cannot copy log item "{}"'.format(item)
                        Log.an().error(msg)
                        return self._fatal(msg)
 
        self._update_status_db('FINISHED', '')

        return True