Example #1
0
    def clean_up(self):
        """
        Copy data from Agave archive location to step output location (data URI).

        Args:
            self: class instance.

        Returns:
            On success: True.
            On failure: False.

        """
        # destination _log directory, common for all map items
        dest_log_dir = '{}/{}'.format(
            self._parsed_data_uris[self._source_context]\
                ['chopped_uri'],
            '_log'
        )

        # copy data for each map item
        for map_item in self._map:

            # copy step output
            if not self._agave['agave_wrapper'].files_import_from_agave(
                    self._parsed_data_uris[self._source_context]['authority'],
                    self._parsed_data_uris[self._source_context]\
                        ['chopped_path'],
                    map_item['template']['output'],
                    '{}/{}'.format(
                        map_item['run'][map_item['attempt']]['archive_uri'],
                        map_item['template']['output']
                    )
            ):
                msg = 'agave import failed for step "{}"'\
                    .format(self._step['name'])
                Log.an().error(msg)
                return self._fatal(msg)

            # check for any agave log files (*.out and *.err files)
            agave_log_list = DataManager.list(
                uri=map_item['run'][map_item['attempt']]['archive_uri'],
                agave=self._agave)
            if not agave_log_list:
                msg = 'cannot get agave log list for step "{}"'\
                    .format(self._step['name'])
                Log.an().error(msg)
                return self._fatal(msg)

            # copy each agave log file, the pattern is gf-{}-{}-{}.out or .err
            for item in agave_log_list:
                if re.match('^gf-\d*-.*\.(out|err)$', item):
                    if not self._agave['agave_wrapper'].files_import_from_agave(
                        self._parsed_data_uris[self._source_context]\
                            ['authority'],
                        '{}/{}'.format(
                            self._parsed_data_uris[self._source_context]\
                                ['chopped_path'],
                            '_log'
                        ),
                        item,
                        '{}/{}'.format(
                            map_item['run'][map_item['attempt']]\
                                ['archive_uri'],
                            item
                        )
                    ):
                        msg = 'cannot copy agave log item "{}"'.format(item)
                        Log.an().error(msg)
                        return self._fatal(msg)

            # check if anything is in the _log directory
            src_log_dir = '{}/{}'.format(
                map_item['run'][map_item['attempt']]['archive_uri'], '_log')

            if DataManager.exists(uri=src_log_dir, agave=self._agave):
                # create dest _log dir if it doesn't exist
                if not DataManager.exists(uri=dest_log_dir, agave=self._agave):
                    if not DataManager.mkdir(uri=dest_log_dir,
                                             agave=self._agave):
                        msg = 'cannot create _log directory for step "{}"'\
                            .format(self._step['name'])
                        Log.an().error(msg)
                        return self._fatal(msg)

                # get list of all items in src_log_dir
                log_list = DataManager.list(uri=src_log_dir, agave=self._agave)
                if not log_list:
                    msg = 'cannot get _log list for step "{}"'\
                        .format(self._step['name'])
                    Log.an().error(msg)
                    return self._fatal(msg)

                # copy each list item
                for item in log_list:
                    if not self._agave['agave_wrapper'].files_import_from_agave(
                        self._parsed_data_uris[self._source_context]\
                            ['authority'],
                        '{}/{}'.format(
                            self._parsed_data_uris[self._source_context]\
                                ['chopped_path'],
                            '_log'
                        ),
                        item,
                        '{}/{}/{}'.format(
                            map_item['run'][map_item['attempt']]\
                                ['archive_uri'],
                            '_log',
                            item
                        )
                    ):
                        msg = 'cannot copy log item "{}"'.format(item)
                        Log.an().error(msg)
                        return self._fatal(msg)

        self._update_status_db('FINISHED', '')

        return True
Example #2
0
    def _init_data_uri(self):
        """
        Create output data URI for the source context (agave).

        Args:
            self: class instance.

        Returns:
            On success: True.
            On failure: False.

        """
        # make sure the source data URI has a compatible scheme (agave)
        if self._parsed_data_uris[self._source_context][0]['scheme'] != 'agave':
            msg = 'invalid data uri scheme for this step: {}'.format(
                self._parsed_data_uris[self._source_context][0]['scheme']
            )
            Log.an().error(msg)
            return self._fatal(msg)

        # delete folder if it already exists and clean==True
        if (
                DataManager.exists(
                    parsed_uri=self._parsed_data_uris[self._source_context][0],
                    agave=self._agave
                )
                and self._clean
        ):
            if not DataManager.delete(
                    parsed_uri=self._parsed_data_uris[self._source_context][0],
                    agave=self._agave
            ):
                Log.a().warning(
                    'cannot delete existing data uri: %s',
                    self._parsed_data_uris[self._source_context][0]['chopped_uri']
                )

        # create folder
        if not DataManager.mkdir(
                parsed_uri=self._parsed_data_uris[self._source_context][0],
                recursive=True,
                agave=self._agave
        ):
            msg = 'cannot create data uri: {}'.format(
                self._parsed_data_uris[self._source_context][0]['chopped_uri']
            )
            Log.an().error(msg)
            return self._fatal(msg)

        # create _log folder
        if not DataManager.mkdir(
                uri='{}/_log'.format(
                    self._parsed_data_uris[self._source_context][0]['chopped_uri']
                ),
                recursive=True,
                agave=self._agave
        ):
            msg = 'cannot create _log folder in data uri: {}/_log'.format(
                self._parsed_data_uris[self._source_context][0]['chopped_uri']
            )
            Log.an().error(msg)
            return self._fatal(msg)

        return True
Example #3
0
    def upload_agave_test_data(self):
        """
        Upload Agave test data from workflow package.

        Args:
            self: class instance.

        Returns:
            None

        """
        if (
                not self._agave_wrapper
                or not self._agave_params
                or not self._agave_params.get('agave')
        ):
            Log.a().warning('must provide agave parameters to upload test data')
            return False

        # create main test data URI
        parsed_base_test_uri = URIParser.parse(
            'agave://{}/{}'.format(
                self._agave_params['agave']['deploymentSystem'],
                self._agave_params['agave']['testDataDir']
            )
        )
        Log.some().info(
            'creating base test data uri: %s',
            parsed_base_test_uri['chopped_uri']
        )
        if not DataManager.mkdir(
                parsed_uri=parsed_base_test_uri,
                recursive=True,
                agave={
                    'agave_wrapper': self._agave_wrapper
                }
        ):
            Log.a().warning(
                'cannot create base test data uri: %s',
                parsed_base_test_uri['chopped_uri']
            )
            return False

        # upload test data
        parsed_local_test_uri = URIParser.parse(str(Path(self._path) / 'data'))
        parsed_agave_test_uri = URIParser.parse(
            '{}/{}'.format(
                parsed_base_test_uri['chopped_uri'],
                Path(self._path).name
            )
        )
        Log.some().info(
            'copying test data from %s to %s',
            parsed_local_test_uri['chopped_uri'],
            parsed_agave_test_uri['chopped_uri']
        )
        if not DataManager.copy(
                parsed_src_uri=parsed_local_test_uri,
                parsed_dest_uri=parsed_agave_test_uri,
                local={},
                agave={
                    'agave_wrapper': self._agave_wrapper
                }
        ):
            Log.a().warning(
                'cannot copy test data from %s to %s',
                parsed_local_test_uri['chopped_uri'],
                parsed_agave_test_uri['chopped_uri']
            )
            return False

        return True
Example #4
0
    def _init_context_uris(self):
        """
        Generate all context URIs for this workflow run.

        Context URIs are generated based on contexts given in
        _parsed_job_work_uri, and the "final" context for steps given in the
        _parsed_job_output_uri.

        Args:
            None.

        Returns:
            On failure: Raises WorkflowDAGException.

        """
        self._context_uris['inputs'] = {}
        self._context_uris['steps'] = {'final': {}}
        self._parsed_context_uris['inputs'] = {}
        self._parsed_context_uris['steps'] = {'final': {}}

        # init all data contexts
        for context in {
                Contexts.get_data_scheme_of_exec_context(con)
                for con in self._exec_contexts
        } | self._data_contexts:

            self._context_uris['inputs'][context] = {}
            self._parsed_context_uris['inputs'][context] = {}

            for node_name in self._topo_sort:

                node = self._graph.nodes[node_name]
                if node['type'] == 'input':
                    if node['source_context'] == context:
                        # use original input URI
                        parsed_uri = URIParser.parse(
                            self._workflow['inputs'][node['name']]['value'])
                        if not parsed_uri:
                            msg = 'invalid input uri: {}'.format(
                                self._workflow['inputs'][
                                    node['name']]['value'])
                            raise WorkflowDAGException(msg)

                        self._context_uris['inputs'][context][node['name']]\
                            = parsed_uri['chopped_uri']
                        self._parsed_context_uris['inputs'][context]\
                            [node['name']] = parsed_uri

                    else:
                        # skip if _parsed_job_work_uri is not defined for this context
                        # this implies that there is no execution defined for that context,
                        # so no need to setup the data staging location at the work_uri
                        if context not in self._parsed_job_work_uri:
                            continue

                        # switch context of input URI
                        new_base_uri = '{}/_input-{}'.format(
                            self._parsed_job_work_uri[context]['chopped_uri'],
                            slugify(node['name'],
                                    regex_pattern=r'[^-a-z0-9_]+'))

                        # create new base URI
                        if not DataManager.mkdir(
                                uri=new_base_uri,
                                recursive=True,
                                **{context: self._context_options[context]}):
                            msg = 'cannot create new base uri for input: {}'\
                                .format(new_base_uri)
                            Log.an().error(msg)
                            raise WorkflowDAGException(msg)

                        # switch input URI base
                        switched_uri = URIParser.switch_context(
                            self._workflow['inputs'][node['name']]['value'],
                            new_base_uri)
                        if not switched_uri:
                            msg = (
                                'cannot switch input uri context to '
                                'new base URI: {}->{}'
                            ).format(
                                self._workflow['inputs'][node['name']]\
                                    ['value'],
                                new_base_uri
                            )
                            Log.an().error(msg)
                            raise WorkflowDAGException(msg)

                        self._context_uris['inputs'][context][node['name']]\
                            = switched_uri['chopped_uri']
                        self._parsed_context_uris['inputs'][context]\
                            [node['name']] = switched_uri

        for context in {
                Contexts.get_data_scheme_of_exec_context(con)
                for con in self._exec_contexts
        }:

            self._context_uris['steps'][context] = {}
            self._parsed_context_uris['steps'][context] = {}

            for node_name in self._topo_sort:

                node = self._graph.nodes[node_name]
                if node['type'] == 'step':
                    self._context_uris['steps'][context][node['name']]\
                        = '{}/{}'.format(
                            self._parsed_job_work_uri[context]['chopped_uri'],
                            slugify(node['name'], regex_pattern=r'[^-a-z0-9_]+')
                        )
                    self._parsed_context_uris['steps'][context][node['name']]\
                        = URIParser.parse(
                            self._context_uris['steps'][context][node['name']]
                        )

        # init final contexts for steps
        for node_name in self._topo_sort:

            node = self._graph.nodes[node_name]

            if node['type'] == 'step':
                self._context_uris['steps']['final'][node['name']]\
                    = '{}/{}'.format(
                        self._parsed_job_output_uri['chopped_uri'],
                        slugify(node['name'], regex_pattern=r'[^-a-z0-9_]+')
                    )
                self._parsed_context_uris['steps']['final'][node['name']]\
                    = URIParser.parse(
                        self._context_uris['steps']['final'][node['name']]
                    )
Example #5
0
    def register_agave_app(self, agave, agave_config, agave_params, agave_publish):
        """
        Register app in Agave.

        Args:
            self: class instance

        Returns:
            On success: True.
            On failure: False.

        """
        Log.some().info('registering agave app %s', str(self._path))
        Log.some().info('app version: %s', self._config['version'])

        # compile agave app template
        if not TemplateCompiler.compile_template(
                self._path,
                'agave-app-def.json.j2',
                self._path / 'agave-app-def.json',
                version=self._config['version'],
                agave=agave_params['agave']
        ):
            Log.a().warning(
                'cannot compile agave app "%s" definition from template',
                self._app['name']
            )
            return False

        # create main apps URI
        parsed_agave_apps_uri = URIParser.parse(
            'agave://{}/{}'.format(
                agave_params['agave']['deploymentSystem'],
                agave_params['agave']['appsDir']
            )
        )
        Log.some().info(
            'creating main apps uri: %s',
            parsed_agave_apps_uri['chopped_uri']
        )
        if not DataManager.mkdir(
                parsed_uri=parsed_agave_apps_uri,
                recursive=True,
                agave={
                    'agave': agave,
                    'agave_config': agave_config
                }
        ):
            Log.a().warning('cannot create main agave apps uri')
            return False

        # delete app uri if it exists
        parsed_app_uri = URIParser.parse(
            'agave://{}/{}/{}'.format(
                agave_params['agave']['deploymentSystem'],
                agave_params['agave']['appsDir'],
                self._app['folder']
            )
        )
        Log.some().info(
            'deleting app uri if it exists: %s',
            parsed_app_uri['chopped_uri']
        )
        if not DataManager.delete(
                parsed_uri=parsed_app_uri,
                agave={
                    'agave': agave,
                    'agave_config': agave_config
                }
        ):
            # log warning, but ignore.. deleting non-existant uri returns False
            Log.a().warning(
                'cannot delete app uri: %s', parsed_app_uri['chopped_uri']
            )

        # upload app assets
        parsed_assets_uri = URIParser.parse(str(self._path / 'assets'))
        Log.some().info(
            'copying app assets from %s to %s',
            parsed_assets_uri['chopped_uri'],
            parsed_app_uri['chopped_uri']
        )

        if not DataManager.copy(
                parsed_src_uri=parsed_assets_uri,
                parsed_dest_uri=parsed_app_uri,
                local={},
                agave={
                    'agave': agave,
                    'agave_config': agave_config
                }
        ):
            Log.a().warning(
                'cannot copy app assets from %s to %s',
                parsed_assets_uri['chopped_uri'],
                parsed_app_uri['chopped_uri']
            )
            return False

        # upload test script
        parsed_test_uri = URIParser.parse(
            '{}/{}'.format(
                parsed_app_uri['chopped_uri'],
                'test'
            )
        )
        Log.some().info(
            'creating test uri: %s', parsed_test_uri['chopped_uri']
        )
        if not DataManager.mkdir(
                parsed_uri=parsed_test_uri,
                recursive=True,
                agave={
                    'agave': agave,
                    'agave_config': agave_config
                }
        ):
            Log.a().warning(
                'cannot create test uri: %s', parsed_test_uri['chopped_uri']
            )
            return False

        parsed_local_test_script = URIParser.parse(
            str(self._path / 'test' / 'test.sh')
        )
        parsed_agave_test_script = URIParser.parse(
            '{}/{}'.format(parsed_test_uri['chopped_uri'], 'test.sh')
        )
        Log.some().info(
            'copying test script from %s to %s',
            parsed_local_test_script['chopped_uri'],
            parsed_agave_test_script['chopped_uri']
        )
        if not DataManager.copy(
                parsed_src_uri=parsed_local_test_script,
                parsed_dest_uri=parsed_agave_test_script,
                local={},
                agave={
                    'agave': agave,
                    'agave_config': agave_config
                }
        ):
            Log.a().warning(
                'cannot copy test script from %s to %s',
                parsed_local_test_script['chopped_uri'],
                parsed_agave_test_script['chopped_uri']
            )
            return False

        # update existing app, or register new app
        Log.some().info('registering agave app')

        app_definition = self._yaml_to_dict(
            str(self._path / 'agave-app-def.json')
        )
        if not app_definition:
            Log.a().warning(
                'cannot load agave app definition: %s',
                str(self._path / 'agave-app-def.json')
            )
            return False

        agwrap = AgaveAppsAddUpdate(
            agave, agave_config
        )
        app_add_result = agwrap.call(app_definition)
        if not app_add_result:
            Log.a().warning(
                'cannot register agave app:\n%s', pprint.pformat(app_definition)
            )
            return False

        register_result = {}

        # publish app
        if agave_publish:
            Log.some().info('publishing agave app')

            agwrap = AgaveAppsPublish(
                agave, agave_config
            )
            app_publish_result = agwrap.call(app_add_result['id'])
            if not app_publish_result:
                Log.a().warning(
                    'cannot publish agave app: %s', app_add_result['id']
                )
                return False

            # return published id and revision
            register_result = {
                'id': app_publish_result['id'],
                'version': self._config['version'],
                'revision': 'u{}'.format(app_publish_result['revision'])
            }

        else:
            # return un-published id and blank revision
            register_result = {
                'id': app_add_result['id'],
                'version': self._config['version'],
                'revision': ''
            }

        return register_result
Example #6
0
    def _init_context_uris(self):
        """
        Generate all context URIs for this workflow run.

        Context URIs are generated based on contexts given in
        _parsed_job_work_uri, and the "final" context for steps given in the
        _parsed_job_output_uri.

        Args:
            None.

        Returns:
            On failure: Raises WorkflowDAGException.

        """
        self._context_uris['inputs'] = {}
        self._context_uris['steps'] = {'final': {}}
        self._parsed_context_uris['inputs'] = {}
        self._parsed_context_uris['steps'] = {'final': {}}

        # init contexts in parsed_job_work_uri for inputs and steps
        for context in self._parsed_job_work_uri:

            self._context_uris['inputs'][context] = {}
            self._context_uris['steps'][context] = {}
            self._parsed_context_uris['inputs'][context] = {}
            self._parsed_context_uris['steps'][context] = {}

            for node_name in self._topo_sort:

                node = self._graph.nodes[node_name]
                if node['type'] == 'input':
                    if node['source_context'] == context:
                        # use original input URI
                        parsed_uri = URIParser.parse(
                            self._workflow['inputs'][node['name']]['value'])
                        if not parsed_uri:
                            msg = 'invalid input uri: {}'.format(
                                self._workflow['inputs'][
                                    node['name']]['value'])
                            raise WorkflowDAGException(msg)

                        self._context_uris['inputs'][context][node['name']]\
                            = parsed_uri['chopped_uri']
                        self._parsed_context_uris['inputs'][context]\
                            [node['name']] = parsed_uri

                    else:
                        # switch context of input URI
                        new_base_uri = '{}/_input-{}'.format(
                            self._parsed_job_work_uri[context]['chopped_uri'],
                            slugify(node['name']))

                        # create new base URI
                        if not DataManager.mkdir(
                                uri=new_base_uri,
                                recursive=True,
                                **{context: self._context_options[context]}):
                            msg = 'cannot create new base uri for input: {}'\
                                .format(new_base_uri)
                            Log.an().error(msg)
                            raise WorkflowDAGException(msg)

                        # switch input URI base
                        switched_uri = URIParser.switch_context(
                            self._workflow['inputs'][node['name']]['value'],
                            new_base_uri)
                        if not switched_uri:
                            msg = (
                                'cannot switch input uri context to '
                                'new base URI: {}->{}'
                            ).format(
                                self._workflow['inputs'][node['name']]\
                                    ['value'],
                                new_base_uri
                            )
                            Log.an().error(msg)
                            raise WorkflowDAGException(msg)

                        self._context_uris['inputs'][context][node['name']]\
                            = switched_uri['chopped_uri']
                        self._parsed_context_uris['inputs'][context]\
                            [node['name']] = switched_uri

                else:  # node['type'] == 'step'
                    self._context_uris['steps'][context][node['name']]\
                        = '{}/{}'.format(
                            self._parsed_job_work_uri[context]['chopped_uri'],
                            slugify(node['name'])
                        )
                    self._parsed_context_uris['steps'][context][node['name']]\
                        = URIParser.parse(
                            self._context_uris['steps'][context][node['name']]
                        )

        # init final contexts for steps
        for node_name in self._topo_sort:

            node = self._graph.nodes[node_name]

            if node['type'] == 'step':
                self._context_uris['steps']['final'][node['name']]\
                    = '{}/{}'.format(
                        self._parsed_job_output_uri['chopped_uri'],
                        slugify(node['name'])
                    )
                self._parsed_context_uris['steps']['final'][node['name']]\
                    = URIParser.parse(
                        self._context_uris['steps']['final'][node['name']]
                    )
Example #7
0
    def clean_up(self):
        """
        Copy data from Agave archive location to step output location (data URI).

        Args:
            self: class instance.

        Returns:
            On success: True.
            On failure: False.

        """
        # destination _log directory, common for all map items
        dest_log_dir = '{}/{}'.format(
            self._parsed_data_uris[self._source_context]\
                ['chopped_uri'],
            '_log'
        )

        # create instance of agave wrapper class for data import
        agwrap = AgaveFilesImportDataFromAgave(
            self._agave['agave'],
            self._config['agave']
        )

        # copy data for each map item
        for map_item in self._map:

            # copy step output
            if not agwrap.call(
                    self._parsed_data_uris[self._source_context]['authority'],
                    self._parsed_data_uris[self._source_context]\
                        ['chopped_path'],
                    map_item['template']['output'],
                    '{}/{}'.format(
                        map_item['run'][map_item['attempt']]['archive_uri'],
                        map_item['template']['output']
                    )
            ):
                msg = 'agave import failed for step "{}"'\
                    .format(self._step['name'])
                Log.an().error(msg)
                return self._fatal(msg)
           
            # check if anything is in the _log directory
            src_log_dir = '{}/{}'.format(
                map_item['run'][map_item['attempt']]['archive_uri'],
                '_log'
            )

            if DataManager.exists(
                uri=src_log_dir,
                agave={
                    'agave': self._agave['agave'],
                    'agave_config': self._config['agave']
                }
            ):
                # create dest _log dir if it doesn't exist
                if not DataManager.exists(
                    uri=dest_log_dir,
                    agave={
                        'agave': self._agave['agave'],
                        'agave_config': self._config['agave']
                    }
                ):
                    if not DataManager.mkdir(
                        uri=dest_log_dir,
                        agave={
                            'agave': self._agave['agave'],
                            'agave_config': self._config['agave']
                        }
                    ):
                        msg = 'cannot create _log directory for step "{}"'\
                            .format(self._step['name'])
                        Log.an().error(msg)
                        return self._fatal(msg)

                # get list of all items in src_log_dir
                log_list = DataManager.list(
                    uri=src_log_dir,
                    agave={
                        'agave': self._agave['agave'],
                        'agave_config': self._config['agave']
                    }
                )
                if not log_list:
                    msg = 'cannot get _log list for step "{}"'\
                        .format(self._step['name'])
                    Log.an().error(msg)
                    return self._fatal(msg)

                # copy each list item
                for item in log_list:
                    if not agwrap.call(
                        self._parsed_data_uris[self._source_context]\
                            ['authority'],
                        '{}/{}'.format(
                            self._parsed_data_uris[self._source_context]\
                                ['chopped_path'],
                            '_log'
                        ),
                        item,
                        '{}/{}/{}'.format(
                            map_item['run'][map_item['attempt']]\
                                ['archive_uri'],
                            '_log',
                            item
                        )
                    ):
                        msg = 'cannot copy log item "{}"'.format(item)
                        Log.an().error(msg)
                        return self._fatal(msg)
 
        self._update_status_db('FINISHED', '')

        return True