Esempio n. 1
0
    def _init_data_uri(self):
        """
        Create output data URI for the source context (local).

        Args:
            self: class instance.

        Returns:
            On success: True.
            On failure: False.

        """
        # make sure the source data URI has a compatible scheme (local)
        if self._parsed_data_uris[self._source_context]['scheme'] != 'local':
            msg = 'invalid data uri scheme for this step: {}'.format(
                self._parsed_data_uris[self._source_context]['scheme']
            )
            Log.an().error(msg)
            return self._fatal(msg)

        # delete old folder if it exists and clean==True
        if (
                DataManager.exists(
                    parsed_uri=self._parsed_data_uris[self._source_context]
                )
                and self._clean
        ):
            if not DataManager.delete(
                    parsed_uri=self._parsed_data_uris[self._source_context]
            ):
                Log.a().warning(
                    'cannot delete existing data uri: %s',
                    self._parsed_data_uris[self._source_context]['chopped_uri']
                )

        # create folder
        if not DataManager.mkdir(
                parsed_uri=self._parsed_data_uris[self._source_context],
                recursive=True
        ):
            msg = 'cannot create data uri: {}'.format(
                self._parsed_data_uris[self._source_context]['chopped_uri']
            )
            Log.an().error(msg)
            return self._fatal(msg)

        # create _log folder
        if not DataManager.mkdir(
                uri='{}/_log'.format(
                    self._parsed_data_uris[self._source_context]['chopped_uri']
                ),
                recursive=True
        ):
            msg = 'cannot create _log folder in data uri: {}/_log'.format(
                self._parsed_data_uris[self._source_context]['chopped_uri']
            )
            Log.an().error(msg)
            return self._fatal(msg)

        return True
Esempio n. 2
0
    def _init_app_paths(self):
        """
        Add app paths to environment PATH for local workflows.

        The package path contains the workflow definition YAML file and shell
        scripts for calling individual apps used in a workflow.

        Args:
            None.

        Output:
            On success: True.
            On failure: False.

        """
        parsed_uri = URIParser.parse(self._workflow_path)
        if not parsed_uri:
            Log.an().error('invalid workflow path: %s', self._workflow_path)
            return False

        apps_uri = ('{}{}' if parsed_uri['folder'] == '/' else '{}/{}')\
            .format(parsed_uri['folder'], 'apps')
        parsed_apps_uri = URIParser.parse(
            ('{}{}' if parsed_uri['folder'] == '/' else '{}/{}')\
            .format(parsed_uri['folder'], 'apps')
        )
        if not parsed_apps_uri:
            Log.an().error('cannot construct apps uri: %s', apps_uri)
            return False

        if not DataManager.exists(parsed_uri=parsed_apps_uri):
            # no apps directory
            return True

        for app_dir in DataManager.list(parsed_uri=parsed_apps_uri):
            try:
                os.environ['PATH'] = '{}{}{}'.format(
                    os.path.join(parsed_apps_uri['chopped_path'], app_dir,
                                 'assets'), os.pathsep, os.environ['PATH'])

            except OSError as err:
                Log.an().error('workflow app pathmunge error [%s]', str(err))
                return False

        return True
Esempio n. 3
0
    def clean_up(self):
        """
        Copy data from Agave archive location to step output location (data URI).

        Args:
            self: class instance.

        Returns:
            On success: True.
            On failure: False.

        """
        # destination _log directory, common for all map items
        dest_log_dir = '{}/{}'.format(
            self._parsed_data_uris[self._source_context][0]\
                ['chopped_uri'],
            '_log'
        )

        # copy data for each map item
        for map_item in self._map:

            # copy step output
            if not self._agave['agave_wrapper'].files_import_from_agave(
                    self._parsed_data_uris[self._source_context][0]['authority'],
                    self._parsed_data_uris[self._source_context][0]\
                        ['chopped_path'],
                    map_item['template']['output'],
                    '{}/{}'.format(
                        map_item['run'][map_item['attempt']]['archive_uri'],
                        map_item['template']['output']
                    )
            ):
                msg = 'agave import failed for step "{}"'\
                    .format(self._step['name'])
                Log.an().error(msg)
                return self._fatal(msg)

            # check for any agave log files (*.out and *.err files)
            agave_log_list = DataManager.list(
                uri=map_item['run'][map_item['attempt']]['archive_uri'],
                agave=self._agave
            )
            if agave_log_list is False:
                msg = 'cannot get agave log list for step "{}"'\
                    .format(self._step['name'])
                Log.an().error(msg)
                return self._fatal(msg)

            # copy each agave log file, the pattern is gf-{}-{}-{}.out or .err
            for item in agave_log_list:
                if re.match('^gf-\d*-.*\.(out|err)$', item):
                    if not self._agave['agave_wrapper'].files_import_from_agave(
                        self._parsed_data_uris[self._source_context][0]\
                            ['authority'],
                        '{}/{}'.format(
                            self._parsed_data_uris[self._source_context][0]\
                                ['chopped_path'],
                            '_log'
                        ),
                        item,
                        '{}/{}'.format(
                            map_item['run'][map_item['attempt']]\
                                ['archive_uri'],
                            item
                        )
                    ):
                        msg = 'cannot copy agave log item "{}"'.format(item)
                        Log.an().error(msg)
                        return self._fatal(msg)

            # check if anything is in the _log directory
            src_log_dir = '{}/{}'.format(
                map_item['run'][map_item['attempt']]['archive_uri'],
                '_log'
            )

            if DataManager.exists(
                uri=src_log_dir,
                agave=self._agave
            ):
                # create dest _log dir if it doesn't exist
                if not DataManager.exists(
                    uri=dest_log_dir,
                    agave=self._agave
                ):
                    if not DataManager.mkdir(
                        uri=dest_log_dir,
                        agave=self._agave
                    ):
                        msg = 'cannot create _log directory for step "{}"'\
                            .format(self._step['name'])
                        Log.an().error(msg)
                        return self._fatal(msg)

                # get list of all items in src_log_dir
                log_list = DataManager.list(
                    uri=src_log_dir,
                    agave=self._agave
                )
                if log_list is False:
                    msg = 'cannot get _log list for step "{}"'\
                        .format(self._step['name'])
                    Log.an().error(msg)
                    return self._fatal(msg)

                # copy each list item
                for item in log_list:
                    if not self._agave['agave_wrapper'].files_import_from_agave(
                        self._parsed_data_uris[self._source_context][0]\
                            ['authority'],
                        '{}/{}'.format(
                            self._parsed_data_uris[self._source_context][0]\
                                ['chopped_path'],
                            '_log'
                        ),
                        item,
                        '{}/{}/{}'.format(
                            map_item['run'][map_item['attempt']]\
                                ['archive_uri'],
                            '_log',
                            item
                        )
                    ):
                        msg = 'cannot copy log item "{}"'.format(item)
                        Log.an().error(msg)
                        return self._fatal(msg)

        self._update_status_db('FINISHED', '')

        return True
Esempio n. 4
0
    def _run_map(self, map_item):
        """
        Run a job for each map item and store the job ID.

        Args:
            self: class instance.
            map_item: map item object (item of self._map).

        Returns:
            On success: True.
            On failure: False.

        """
        # load default app inputs overwrite with template inputs
        inputs = {}
        for input_key in self._app['inputs']:
            if input_key in map_item['template']:
                if map_item['template'][input_key]:
                    # only include an input if the value is a non-empty string
                    inputs[input_key] = urllib.parse.quote(
                        str(map_item['template'][input_key]),
                        safe='/:'
                    )
            else:
                if self._app['inputs'][input_key]['default']:
                    # only include an input if the value is a non-empty string
                    inputs[input_key] = urllib.parse.quote(
                        str(self._app['inputs'][input_key]['default']),
                        safe='/:'
                    )

        # load default app parameters, overwrite with template parameters
        parameters = {}
        for param_key in self._app['parameters']:
            if param_key in map_item['template']:
                if self._app['parameters'][param_key]['type'] in ['int', 'long']:
                    parameters[param_key] = int(map_item['template'][param_key])
                elif self._app['parameters'][param_key]['type'] == ['float', 'double']:
                    parameters[param_key] = float(map_item['template'][param_key])
                else:
                    parameters[param_key] = str(map_item['template'][param_key])
            else:
                if self._app['parameters'][param_key]['default'] not in [None, '']:
                    parameters[param_key] \
                        = self._app['parameters'][param_key]['default']

        # add execution method as parameter
        parameters['exec_method'] = self._step['execution']['method']

        # add execution init commands if 'init' param given
        if 'init' in self._step['execution']['parameters']:
            parameters['exec_init'] = self._step['execution']['parameters']['init']

        # construct agave app template
        name = 'gf-{}-{}-{}'.format(
            str(map_item['attempt']),
            slugify(self._step['name'], regex_pattern=r'[^-a-z0-9_]+'),
            slugify(map_item['template']['output'], regex_pattern=r'[^-a-z0-9_]+')
        )
        name = name[:62]+'..' if len(name) > 64 else name
        archive_path = '{}/{}'.format(
            self._agave['parsed_archive_uri']['chopped_path'],
            name
        )
        app_template = {
            'name': name,
            'appId': self._app['implementation']['agave']['agave_app_id'],
            'archive': True,
            'inputs': inputs,
            'parameters': parameters,
            'archiveSystem': self._agave['parsed_archive_uri']['authority'],
            'archivePath': archive_path
        }
        # specify processors if 'slots' param given
        if 'slots' in self._step['execution']['parameters']:
            app_template['processorsPerNode'] = int(
                self._step['execution']['parameters']['slots']
            )
        # specify memory if 'mem' param given
        if 'mem' in self._step['execution']['parameters']:
            app_template['memoryPerNode'] = '{}'.format(
                self._step['execution']['parameters']['mem']
            )

        Log.some().debug(
                "[step.%s]: agave app template:\n%s",
                self._step['name'],
                pprint.pformat(app_template)
        )

        # delete archive path if it exists
        if DataManager.exists(
                uri=self._agave['parsed_archive_uri']['chopped_uri']+'/'+name,
                agave=self._agave
        ):
            if not DataManager.delete(
                    uri=self._agave['parsed_archive_uri']['chopped_uri']+'/'+name,
                    agave=self._agave
            ):
                Log.a().warning(
                    'cannot delete archive uri: %s/%s',
                    self._agave['parsed_archive_uri']['chopped_uri'],
                    name
                )

        # submit job
        job = self._agave['agave_wrapper'].jobs_submit(app_template)
        if not job:
            msg = 'agave jobs submit failed for "{}"'.format(
                app_template['name']
            )
            Log.an().error(msg)
            return self._fatal(msg)

        # log agave job id
        Log.some().debug(
            '[step.%s]: agave job id: %s -> %s',
            self._step['name'],
            map_item['template']['output'],
            job['id']
        )

        # record job info
        map_item['run'][map_item['attempt']]['agave_job_id'] = job['id']
        map_item['run'][map_item['attempt']]['archive_uri'] = '{}/{}'\
            .format(
                self._agave['parsed_archive_uri']['chopped_uri'],
                name
            )
        map_item['run'][map_item['attempt']]['hpc_job_id'] = ''

        # set status of process
        map_item['status'] = 'PENDING'
        map_item['run'][map_item['attempt']]['status'] = 'PENDING'

        return True
Esempio n. 5
0
    def _run_map(self, map_item):
        """
        Run a job for each map item and store the job ID.

        Args:
            self: class instance.
            map_item: map item object (item of self._map)

        Returns:
            On success: True.
            On failure: False.

        """
        # load default app inputs overwrite with template inputs
        inputs = {}
        for input_key in self._app['inputs']:
            if input_key in map_item['template']:
                inputs[input_key] = urllib.parse.quote(str(
                    map_item['template'][input_key] or ''),
                                                       safe='/:')
            else:
                inputs[input_key] = urllib.parse.quote(str(
                    self._app['inputs'][input_key]['default'] or ''),
                                                       safe='/:')

        # load default app parameters, overwrite with template parameters
        parameters = {}
        for param_key in self._app['parameters']:
            if param_key in map_item['template']:
                parameters[param_key] = map_item['template'][param_key]
            else:
                parameters[param_key] \
                    = self._app['parameters'][param_key]['default']

        # add execution method as parameter
        parameters['exec_method'] = self._step['execution']['method']

        # construct agave app template
        name = 'gf-{}-{}-{}'.format(str(map_item['attempt']),
                                    slugify(self._step['name']),
                                    slugify(map_item['template']['output']))
        name = name[:62] + '..' if len(name) > 64 else name
        archive_path = '{}/{}'.format(
            self._agave['parsed_archive_uri']['chopped_path'], name)
        app_template = {
            'name': name,
            'appId': self._app['definition']['agave']['agave_app_id'],
            'archive': True,
            'inputs': inputs,
            'parameters': parameters,
            'archiveSystem': self._agave['parsed_archive_uri']['authority'],
            'archivePath': archive_path
        }
        Log.some().debug("agave app template:\n%s",
                         pprint.pformat(app_template))

        # delete archive path if it exists
        if DataManager.exists(
                uri=self._agave['parsed_archive_uri']['chopped_uri'] + '/' +
                name,
                agave=self._agave):
            if not DataManager.delete(
                    uri=self._agave['parsed_archive_uri']['chopped_uri'] +
                    '/' + name,
                    agave=self._agave):
                Log.a().warning(
                    'cannot delete archive uri: %s/%s',
                    self._agave['parsed_archive_uri']['chopped_uri'], name)

        # submit job
        job = self._agave['agave_wrapper'].jobs_submit(app_template)
        if not job:
            msg = 'agave jobs submit failed for "{}"'.format(
                app_template['name'])
            Log.an().error(msg)
            return self._fatal(msg)

        # log agave job id
        Log.some().debug('agave job id: %s -> %s',
                         map_item['template']['output'], job['id'])

        # record job info
        map_item['run'][map_item['attempt']]['agave_job_id'] = job['id']
        map_item['run'][map_item['attempt']]['archive_uri'] = '{}/{}'\
            .format(
                self._agave['parsed_archive_uri']['chopped_uri'],
                name
            )
        map_item['run'][map_item['attempt']]['hpc_job_id'] = ''

        # set status of process
        map_item['status'] = 'PENDING'
        map_item['run'][map_item['attempt']]['status'] = 'PENDING'

        return True
Esempio n. 6
0
    def clean_up(self):
        """
        Copy data from Agave archive location to step output location (data URI).

        Args:
            self: class instance.

        Returns:
            On success: True.
            On failure: False.

        """
        # destination _log directory, common for all map items
        dest_log_dir = '{}/{}'.format(
            self._parsed_data_uris[self._source_context]\
                ['chopped_uri'],
            '_log'
        )

        # create instance of agave wrapper class for data import
        agwrap = AgaveFilesImportDataFromAgave(
            self._agave['agave'],
            self._config['agave']
        )

        # copy data for each map item
        for map_item in self._map:

            # copy step output
            if not agwrap.call(
                    self._parsed_data_uris[self._source_context]['authority'],
                    self._parsed_data_uris[self._source_context]\
                        ['chopped_path'],
                    map_item['template']['output'],
                    '{}/{}'.format(
                        map_item['run'][map_item['attempt']]['archive_uri'],
                        map_item['template']['output']
                    )
            ):
                msg = 'agave import failed for step "{}"'\
                    .format(self._step['name'])
                Log.an().error(msg)
                return self._fatal(msg)
           
            # check if anything is in the _log directory
            src_log_dir = '{}/{}'.format(
                map_item['run'][map_item['attempt']]['archive_uri'],
                '_log'
            )

            if DataManager.exists(
                uri=src_log_dir,
                agave={
                    'agave': self._agave['agave'],
                    'agave_config': self._config['agave']
                }
            ):
                # create dest _log dir if it doesn't exist
                if not DataManager.exists(
                    uri=dest_log_dir,
                    agave={
                        'agave': self._agave['agave'],
                        'agave_config': self._config['agave']
                    }
                ):
                    if not DataManager.mkdir(
                        uri=dest_log_dir,
                        agave={
                            'agave': self._agave['agave'],
                            'agave_config': self._config['agave']
                        }
                    ):
                        msg = 'cannot create _log directory for step "{}"'\
                            .format(self._step['name'])
                        Log.an().error(msg)
                        return self._fatal(msg)

                # get list of all items in src_log_dir
                log_list = DataManager.list(
                    uri=src_log_dir,
                    agave={
                        'agave': self._agave['agave'],
                        'agave_config': self._config['agave']
                    }
                )
                if not log_list:
                    msg = 'cannot get _log list for step "{}"'\
                        .format(self._step['name'])
                    Log.an().error(msg)
                    return self._fatal(msg)

                # copy each list item
                for item in log_list:
                    if not agwrap.call(
                        self._parsed_data_uris[self._source_context]\
                            ['authority'],
                        '{}/{}'.format(
                            self._parsed_data_uris[self._source_context]\
                                ['chopped_path'],
                            '_log'
                        ),
                        item,
                        '{}/{}/{}'.format(
                            map_item['run'][map_item['attempt']]\
                                ['archive_uri'],
                            '_log',
                            item
                        )
                    ):
                        msg = 'cannot copy log item "{}"'.format(item)
                        Log.an().error(msg)
                        return self._fatal(msg)
 
        self._update_status_db('FINISHED', '')

        return True