Example #1
0
    def make_wrapper(self):
        """
        Generate the GeneFlow app wrapper script.

        Args:
            self: class instance

        Returns:
            On success: True.
            On failure: False.

        """
        # make assets folder, if it doesn't already exist
        asset_path = Path(self._path / 'assets')
        asset_path.mkdir(exist_ok=True)

        Log.some().info(
            'compiling %s',
            str(asset_path / '{}.sh'.format(self._config['name']))
        )

        # compile jinja2 template
        if not TemplateCompiler.compile_template(
                None,
                'wrapper-script.sh.j2',
                str(asset_path / '{}.sh'.format(self._config['name'])),
                **self._config
        ):
            Log.an().error('cannot compile GeneFlow app wrapper script')
            return False

        return True
Example #2
0
    def make_test(self):
        """
        Generate the GeneFlow app test script.

        Args:
            self: class instance

        Returns:
            On success: True.
            On failure: False.

        """
        # make test folder, if it doesn't already exist
        test_path = Path(self._path / 'test')
        test_path.mkdir(exist_ok=True)

        Log.some().info('compiling %s', str(test_path / 'test.sh'))

        # compile jinja2 template
        if not TemplateCompiler.compile_template(
                None,
                'test.sh.j2',
                str(test_path / 'test.sh'),
                **self._config
        ):
            Log.an().error('cannot compile GeneFlow app test script')
            return False

        return True
Example #3
0
    def _send_notifications(self, status):

        # construct message
        msg_data = {
            'to': '',
            'from': '*****@*****.**',
            'subject': 'GeneFlow Job "{}": {}'.format(
                self._job['name'], status
            ),
            'content': (
                'Your GeneFlow job status has changed to {}'
                '\nJob Name: {}'
                '\nJob ID: {}'
            ).format(status, self._job['name'], self._job_id)
        }

        # use agave token as header if available
        if 'agave' in self._workflow_context:
            msg_headers = {
                'Authorization':'Bearer {}'.format(
                    self._workflow_context['agave']\
                        .get_context_options()['agave_wrapper']\
                        ._agave.token.token_info.get('access_token')
                )
            }

        else:
            msg_headers = {}

        Log.some().info('message headers: %s', str(msg_headers))

        for notify in self._job['notifications']:
            Log.some().info(
                'sending notification(s) to %s @ %s',
                str(notify['to']),
                notify['url'],
            )

            to_list = notify['to']
            if isinstance(notify['to'], str):
                to_list = [notify['to']]

            for to_item in to_list:
                msg_data['to'] = to_item
                try:
                    response = requests.post(
                        notify['url'], data=msg_data, headers=msg_headers
                    )

                except requests.exceptions.RequestException as err:
                    Log.a().warning(
                        'cannot send notification to %s @ %s: %s',
                        to_item, notify['url'], str(err)
                    )

                if response.status_code != 201:
                    Log.a().warning(
                        'cannot send notification to %s @ %s: %s',
                        to_item, notify['url'], response.text
                    )
Example #4
0
    def connect(self):
        agave_connection_type = self._config.get('connection_type',
                                                 'impersonate')

        if agave_connection_type == 'impersonate':
            token_username = '******'.format(
                self._config['domain'], '/' if self._config['domain'] else '',
                self._config['token_username'])
            Log.some().debug('user impersonation: %s', token_username)
            self._agave = Agave(api_server=self._config['server'],
                                username=self._config['username'],
                                password=self._config['password'],
                                token_username=token_username,
                                client_name=self._config['client'],
                                api_key=self._config['key'],
                                api_secret=self._config['secret'],
                                verify=False)

        elif agave_connection_type == 'agave-cli':
            # get credentials from ~/.agave/current
            agave_clients = Agave._read_clients()
            agave_clients[0]['verify'] = False  # don't verify ssl
            self._agave = Agave(**agave_clients[0])
            # when using agave-cli, token_username must be the same as the
            # stored creds in user's home directory, this can be different
            # from job username
            self._config['token_username'] \
                = agave_clients[0]['username']

        else:
            Log.an().error('invalid agave connection type: %s',
                           agave_connection_type)
            return False

        return True
Example #5
0
    def files_import_from_agave(self, system_id, file_path, file_name,
                                url_to_ingest):
        """
        Wrap AgavePy import data file command.

        Args:
            self: class instance.
            system_id: Identifier for Agave storage system.
            file_path: Path where file is to be imported.
            file_name: Name of the imported file.
            url_to_ingest: Agave URL to be ingested.

        Returns:
            On success: True with no exceptions.
            On failure: Throws exception.

        """
        response = self._agave.files.importData(systemId=system_id,
                                                filePath=file_path,
                                                fileName=file_name,
                                                urlToIngest=urllib.parse.quote(
                                                    str(url_to_ingest or ''),
                                                    safe='/:'))
        async_response = AgaveAsyncResponse(self._agave, response)
        status = async_response.result()
        Log.some().debug('import %s: %s -> agave://%s/%s/%s', str(status),
                         url_to_ingest, system_id, file_path, file_name)
        if str(status) == 'FINISHED':
            return True

        # not finished, try again
        raise Exception('agave import failed')
Example #6
0
    def make_agave(self):
        """
        Generate the GeneFlow Agave app definition.

        Args:
            self: class instance

        Returns:
            On success: True.
            On failure: False.

        """
        Log.some().info('compiling %s',
                        str(self._path / 'agave-app-def.json.j2'))

        if not TemplateCompiler.compile_template(
                None,
                'agave-app-def.json.j2.j2',
                str(self._path / 'agave-app-def.json.j2'),
                slugify_name=slugify(self._app['name'],
                                     regex_pattern=r'[^-a-z0-9_]+'),
                **self._app):
            Log.an().error(
                'cannot compile GeneFlow Agave app definition template')
            return False

        return True
Example #7
0
    def make_wrapper(self):
        """
        Generate the GeneFlow app wrapper script.

        Args:
            self: class instance

        Returns:
            On success: True.
            On failure: False.

        """
        # make assets folder, if it doesn't already exist
        asset_path = Path(self._path / 'assets')
        asset_path.mkdir(exist_ok=True)

        script_path = str(asset_path / '{}.sh'.format(
            slugify(self._app['name'], regex_pattern=r'[^-a-z0-9_]+')))
        Log.some().info('compiling %s', script_path)

        # compile jinja2 template
        if not TemplateCompiler.compile_template(None, 'wrapper-script.sh.j2',
                                                 script_path, **self._app):
            Log.an().error('cannot compile GeneFlow app wrapper script')
            return False

        # make script executable by owner
        os.chmod(script_path, stat.S_IRWXU)

        return True
Example #8
0
    def update_def(self, agave):
        """
        Update GeneFlow app definition by adding the implementation section.

        Args:
            self: class instance

        Returns:
            On success: True.
            On failure: False.

        """
        Log.some().info('updating %s', str(self._path / 'app.yaml'))

        try:
            with open(str(self._path / 'app.yaml'), 'a') as app_yaml:
                app_yaml.write('\n\nimplementation:')
                if agave:
                    app_yaml.write('\n  agave:')
                    app_yaml.write('\n    agave_app_id: {}-{}-{}{}'.format(
                        agave['apps_prefix'],
                        slugify(self._app['name'],
                                regex_pattern=r'[^-a-z0-9_]+'),
                        self._app['agave_version'], agave['revision']))
                app_yaml.write('\n  local:')
                app_yaml.write('\n    script: {}.sh'.format(
                    slugify(self._app['name'], regex_pattern=r'[^-a-z0-9_]+')))
        except IOError as err:
            Log.an().error('cannot update GeneFlow app definition: %s', err)
            return False

        return True
Example #9
0
def main():
    """
    Geneflow CLI main entrypoint.

    Args:
        None.

    Returns:
        Nothing.

    """
    args, subparser = parse_args()
    if not args:
        sys.exit(1)

    # configure logging
    Log.config(args[0].log_level, args[0].log_file)

    # display GeneFlow version
    Log.some().info('GeneFlow %s', __version__)

    # call the appropriate command
    if not args[0].func(args=args[0], other_args=args[1], subparser=subparser):
        sys.exit(1)

    sys.exit(0)
Example #10
0
    def _call(self, systemId, filePath, fileName, urlToIngest):
        """
        Wrap agavePy import data file command.

        Args:
            self: class instance
            systemId: Identifier for Agave storage system
            filePath: Path where file is to be imported
            fileName: Name of the imported file
            urlToIngest: Agave URL to be ingested

        Returns:
            True

        """
        response = self._agave.files.importData(systemId=systemId,
                                                filePath=filePath,
                                                fileName=fileName,
                                                urlToIngest=urlToIngest)
        async_response = AgaveAsyncResponse(self._agave, response)
        status = async_response.result()
        Log.some().info('import %s: %s -> agave://%s/%s/%s', str(status),
                        urlToIngest, systemId, filePath, fileName)
        if str(status) == 'FINISHED':
            return True

        # not finished, try again
        raise Exception('agave import failed')
Example #11
0
    def _init_exec_context_set(self):
        """
        Initialize set of execution contexts, which is specified by the execution.context job
        parameters.

        Args:
            self: class instance

        Returns:
            On success: True.

        """
        # get explicit execution contexts from the job parameters
        self._exec_contexts = set(self._job['execution']['context'].values())

        # check validity of exec contexts
        for context in self._exec_contexts:
            if not Contexts.is_exec_context(context):
                msg = 'invalid exec context: {}'.format(context)
                Log.an().error(msg)
                return self._fatal(msg)

        Log.some().debug('execution contexts: %s', self._exec_contexts)

        return True
Example #12
0
    def retry_failed(self, map_item):
        """
        Retry a job.

        Args:
            self: class instance.

        Returns:
            True if failed/stopped job restarted successfully
            False if failed/stopped job not restarted due to error

        """
        # retry job
        Log.some().info(
            '[step.%s]: retrying agave job (%s), attempt number %s',
            self._step['name'],
            map_item['template']['output'],
            map_item['attempt']+1
        )

        # add another run to list
        map_item['attempt'] += 1
        map_item['run'].append({})
        if not self._run_map(map_item):
            Log.a().warning(
                '[step.%s]: cannot retry agave job (%s), attempt number %s',
                self._step['name'],
                map_item['template']['output'],
                map_item['attempt']
            )
            return False

        return True
Example #13
0
    def upload_agave_test_data(self):
        """
        Upload Agave test data from workflow package.

        Args:
            self: class instance.

        Returns:
            None

        """
        if (not self._agave or not self._agave_params
                or not self._agave_params.get('agave')):
            Log.a().warning(
                'must provide agave parameters to upload test data')
            return False

        # create main test data URI
        parsed_base_test_uri = URIParser.parse('agave://{}/{}'.format(
            self._agave_params['agave']['deploymentSystem'],
            self._agave_params['agave']['testDataDir']))
        Log.some().info('creating base test data uri: %s',
                        parsed_base_test_uri['chopped_uri'])
        if not DataManager.mkdir(parsed_uri=parsed_base_test_uri,
                                 recursive=True,
                                 agave={
                                     'agave': self._agave,
                                     'agave_config': self._config['agave']
                                 }):
            Log.a().warning('cannot create base test data uri: %s',
                            parsed_base_test_uri['chopped_uri'])
            return False

        # upload test data
        parsed_local_test_uri = URIParser.parse(str(Path(self._path) / 'data'))
        parsed_agave_test_uri = URIParser.parse('{}/{}'.format(
            parsed_base_test_uri['chopped_uri'],
            Path(self._path).name))
        Log.some().info('copying test data from %s to %s',
                        parsed_local_test_uri['chopped_uri'],
                        parsed_agave_test_uri['chopped_uri'])
        if not DataManager.copy(parsed_src_uri=parsed_local_test_uri,
                                parsed_dest_uri=parsed_agave_test_uri,
                                local={},
                                agave={
                                    'agave': self._agave,
                                    'agave_config': self._config['agave']
                                }):
            Log.a().warning('cannot copy test data from %s to %s',
                            parsed_local_test_uri['chopped_uri'],
                            parsed_agave_test_uri['chopped_uri'])
            return False

        return True
Example #14
0
    def retry_failed(self):
        """
        Check if any jobs failed or stopped.

        Args:
            self: class instance.

        Returns:
            If no failure: True.
            On failure: Error message.

        """
        # check if any jobs failed or stopped
        for map_item in self._map:
            if (
                    map_item['status'] == 'FAILED'
                    or map_item['status'] == 'STOPPED'
            ):
                # retry the job, if not at limit
                if map_item['attempt'] >= self._config['agave']['job_retry']:
                    msg = (
                        'agave job failed ({}) for step "{}", '
                        'retries for map item "{}" reached limit of {}'
                    ).format(
                        map_item['run'][map_item['attempt']]['agave_job_id'],
                        self._step['name'],
                        map_item['filename'],
                        self._config['agave']['job_retry']
                    )
                    Log.an().error(msg)
                    return self._fatal(msg)

                # retry job
                Log.some().info(
                    (
                        'agave job failed (%s) for step "%s", '
                        'retrying map item "%s"'
                    ),
                    map_item['run'][map_item['attempt']]['agave_job_id'],
                    self._step['name'],
                    map_item['filename']
                )
                # add another run to list
                map_item['attempt'] += 1
                map_item['run'].append({})
                if not self._run_map(map_item):
                    msg = 'cannot re-run agave job for map item "{}"'\
                        .format(map_item['filename'])
                    Log.an().error(msg)
                    return self._fatal(msg)

        return True
Example #15
0
    def _copy_asset(self, asset):
        """
        Copy app assets.

        Args:
            self: class instance
            asset: what to copy

        Returns:
            On success: True.
            On failure: False.

        """
        if not self._copy_prefix:
            Log.a().warning(
                'copy prefix must be specified when copying app assets'
            )
            return False

        if not asset.get('dst'):
            Log.a().warning('asset dst required for app %s', self._app['name'])
            return False

        if not asset.get('src'):
            Log.a().warning('asset src required for app %s', self._app['name'])
            return False

        # create asset destination
        asset_path = Path(self._path / asset['dst'])
        asset_path.mkdir(exist_ok=True)

        if 'zip' in asset:
            # create a tar.gz of src
            cmd = 'tar -czf "{}" --directory="{}" .'.format(
                str(Path(asset_path / '{}.tar.gz'.format(asset['zip']))),
                str(Path(self._copy_prefix) / asset['src'])
            )
            Log.some().info('zipping: %s', cmd)
            cmd_result = ShellWrapper.invoke(cmd)
            if cmd_result is False:
                Log.a().warning('cannot zip asset src: %s', cmd)
                return False

            Log.some().info('tar stdout: %s', cmd_result)

        else:
            # move without creating tar.gz
            cmd = 'cp -R "{}" "{}"'.format(
                str(Path(self._copy_prefix) / asset['src']),
                str(asset_path)
            )
            Log.some().info('copying: %s', cmd)
            cmd_result = ShellWrapper.invoke(cmd)
            if cmd_result is False:
                Log.a().warning('cannot copy asset src: %s', cmd)
                return False

            Log.some().info('copy stdout: %s', cmd_result)

        return True
Example #16
0
def add_apps(args):
    """
    Add GeneFlow apps to database.

    Args:
        args.app_yaml: GeneFlow definition with apps.
        args.config_file: GeneFlow config file path.
        args.environment: Config environment.

    Returns:
        On success: True.
        On failure: False.

    """
    app_yaml = args.app_yaml
    config_file = args.config_file
    environment = args.environment

    # load config file
    cfg = Config()
    if not cfg.load(config_file):
        Log.an().error('cannot load config file: %s', config_file)
        return False

    config_dict = cfg.config(environment)
    if not config_dict:
        Log.an().error('invalid config environment: %s', environment)
        return False

    # connect to data source
    try:
        data_source = DataSource(config_dict['database'])
    except DataSourceException as err:
        Log.an().error('data source initialization error [%s]', str(err))
        return False

    # import apps
    defs = data_source.import_apps_from_def(app_yaml)
    if not defs:
        Log.an().error('app definition load failed: %s', app_yaml)
        return False

    data_source.commit()

    # display new IDs
    for app in defs:
        Log.some().info('app loaded: %s -> %s', app, defs[app])

    return True
Example #17
0
def add_workflows(args, other_args, subparser=None):
    """
    Add GeneFlow workflows to database.

    Args:
        args.workflow_yaml: GeneFlow definition with workflows.
        args.config: GeneFlow config file path.
        args.environment: Config environment.

    Returns:
        On success: True.
        On failure: False.

    """
    workflow_yaml = args.workflow_yaml
    config = args.config
    environment = args.environment

    # load config file
    cfg = Config()
    if not cfg.load(config):
        Log.an().error('cannot load config file: %s', config)
        return False

    config_dict = cfg.config(environment)
    if not config_dict:
        Log.an().error('invalid config environment: %s', environment)
        return False

    # connect to data source
    try:
        data_source = DataSource(config_dict['database'])
    except DataSourceException as err:
        Log.an().error('data source initialization error [%s]', str(err))
        return False

    # import workflow
    defs = data_source.import_workflows_from_def(workflow_yaml)
    if not defs:
        Log.an().error('workflow definition load failed: %s', workflow_yaml)
        return False

    data_source.commit()

    # display new IDs
    for workflow in defs:
        Log.some().info('workflow loaded: %s -> %s', workflow, defs[workflow])

    return True
Example #18
0
    def stage(self, **kwargs):
        """
        Copy data to all contexts except 'final' from source URI. Source URI can be multiple
        locations, but only copy to the first element of dest URIs.

        Set _staged indicator to True on success.

        Args:
            self: class instance.
            **kwargs: additional arguments required by DataManager.copy().

        Returns:
            True or False.

        """
        for context in self._parsed_data_uris:
            if context != self._source_context:
                if self._clean:
                    # remove target URI first
                    pass

                for i, parsed_source_uri in enumerate(
                        self._parsed_data_uris[self._source_context]):

                    Log.some().debug(
                        'staging data: %s->%s to %s->%s', self._source_context,
                        parsed_source_uri['chopped_uri'], context,
                        self._parsed_data_uris[context][i]['chopped_uri'])

                    if context != 'final':
                        if not DataManager.copy(
                                parsed_src_uri=parsed_source_uri,
                                parsed_dest_uri=self._parsed_data_uris[context]
                            [i],
                                **kwargs):
                            msg = 'cannot stage data by copying from {} to {}'.format(
                                parsed_source_uri['chopped_uri'],
                                self._parsed_data_uris[context][i]
                                ['chopped_uri'])
                            Log.an().error(msg)
                            return self._fatal(msg)

        self._staged = True

        return True
Example #19
0
    def _init_data_context_set(self):
        """
        Initialize set of data contexts, which is determined by inputs and output.

        Args:
            self: class instance

        Returns:
            On success: True.
            On failure: False.

        """
        # check input URIs for data contexts
        for input_key in self._workflow['inputs']:
            parsed_uri = URIParser.parse(self._workflow['inputs'][input_key]['value'][0])
            if not parsed_uri:
                msg = 'invalid input uri: {}'.format(
                    self._workflow['inputs'][input_key]['value'][0]
                )
                Log.an().error(msg)
                return self._fatal(msg)

            self._data_contexts.add(parsed_uri['scheme'])

        # add output URI data context
        parsed_output_uri = URIParser.parse(self._job['output_uri'])
        if not parsed_output_uri:
            msg = 'invalid base of job output uri: {}'.format(
                self._job['output_uri']
            )
            Log.an().error(msg)
            return self._fatal(msg)

        self._data_contexts.add(parsed_output_uri['scheme'])

        # check validity of data contexts
        for context in self._data_contexts:
            if not Contexts.is_data_context(context):
                msg = 'invalid data context: {}'.format(context)
                Log.an().error(msg)
                return self._fatal(msg)

        Log.some().debug('data contexts: %s', self._data_contexts)

        return True
Example #20
0
    def checkpoint(self):
        """
        Check if step meets completion criteria, based on "checkpoint"
        execution parameter.

        Args:
            self: class instance.

        Returns:
            True if step meets completion criteria.
            False if it does not.

        """
        checkpoint = self._step['execution']['parameters'].get(
            'checkpoint', 'any')

        status = self.get_status()
        finished = [item == 'FINISHED' for item in status.values()]
        Log.some().info('[step.%s]: checkpoint: %s of %s job(s) finished',
                        self._step['name'], sum(finished), len(finished))

        # print summary of job result in debug mode
        for item in sorted(status):
            Log.some().debug('[step.%s]: checkpoint: %s -> %s',
                             self._step['name'], item, status[item])

        if checkpoint == 'all':
            # all jobs must be finished
            Log.some().info('[step.%s]: checkpoint: all jobs must finish',
                            self._step['name'])
            return all(finished)

        if checkpoint == 'none':
            # no jobs have to be finished
            Log.some().info(
                '[step.%s]: checkpoint: jobs do not have to finish',
                self._step['name'])
            return True

        # at least one job must be finished
        # default to 'any' if anything other than 'all', 'any', or 'none is used
        Log.some().info('[step.%s]: checkpoint: at least one job must finish',
                        self._step['name'])
        return any(finished)
Example #21
0
    def stage(self, **kwargs):
        """
        Copy data to all contexts except 'final' from source URI.

        Set _staged indicator to True on success.

        Args:
            self: class instance.
            **kwargs: additional arguments required by DataManager.copy().

        Returns:
            True or False.

        """
        for context in self._parsed_data_uris:
            if context != self._source_context:
                if self._clean:
                    # remove target URI first
                    pass

                Log.some().debug('staging data: {}->{} to {}->{}'.format(
                    self._source_context, self._parsed_data_uris[
                        self._source_context]['chopped_uri'], context,
                    self._parsed_data_uris[context]['chopped_uri']))

                if context != 'final':
                    if not DataManager.copy(
                            parsed_src_uri=self._parsed_data_uris\
                                [self._source_context],
                            parsed_dest_uri=self._parsed_data_uris[context],
                            **kwargs
                    ):
                        msg = 'cannot stage data by copying from {} to {}'.format(
                            self._parsed_data_uris[self._source_context]\
                                ['chopped_uri'],
                            self._parsed_data_uris[context]['chopped_uri']
                        )
                        Log.an().error(msg)
                        return self._fatal(msg)

        self._staged = True

        return True
Example #22
0
    def make_def(self):
        """
        Generate the GeneFlow app definition.

        Args:
            self: class instance

        Returns:
            On success: True.
            On failure: False.

        """
        Log.some().info('compiling %s', str(self._path / 'app.yaml.j2'))

        if not TemplateCompiler.compile_template(
                None, 'app.yaml.j2.j2', str(self._path / 'app.yaml.j2'), **
                self._config):
            Log.an().error('cannot compile GeneFlow app definition template')
            return False

        return True
Example #23
0
def run_workflow(job, config, log_level):
    """
    Run a GeneFlow workflow.

    Args:
        job: job dict describing run.
        config: GeneFlow configuration dict.
        log_level: logging level for this run.

    Returns:
        On success: Workflow job dict.
        On failure: False.

    """
    if job['log']:
        # reconfig log location for this run
        Log.config(log_level, job['log'])
    Log.some().info('job loaded: %s -> %s', job['name'], job['id'])

    # run job
    workflow = Workflow(job['id'], config)
    if not workflow.initialize():
        Log.an().error('workflow initialization failed: job_id=%s', job['id'])
        return False

    Log.some().info('running workflow:\n%s', str(workflow))

    if not workflow.run():
        Log.an().error('workflow run failed: job_id=%s', job['id'])
        return False

    Log.some().info('workflow complete:\n%s', str(workflow))

    return workflow.get_job()
Example #24
0
    def __init__(self, job, config, parsed_job_work_uri):
        """
        Instantiate LocalWorkflow class.
        """
        self._job = job
        self._config = config
        self._parsed_job_work_uri = parsed_job_work_uri

        # drmaa library for grid engine
        self._drmaa_session = drmaa.Session()
        Log.some().debug('DRMAA contact strings: {}'.format(
            self._drmaa_session.contact))
        Log.some().debug('DRMAA systems: {}'.format(
            self._drmaa_session.drmsInfo))
        Log.some().debug('DRMAA implementations: {}'.format(
            self._drmaa_session.drmaaImplementation))
        Log.some().debug('DRMAA version: {}'.format(
            self._drmaa_session.version))
Example #25
0
    def check_running_jobs(self):
        """
        Check the status/progress of all map-reduce items..

        And update _map status.

        Args:
            self: class instance.

        Returns:
            True.

        """
        # check if jobs are still running
        for map_item in self._map:

            map_item['status'] = self._agave['agave_wrapper'].jobs_get_status(
                map_item['run'][map_item['attempt']]['agave_job_id']
            )

            # for status failures, set to 'UNKNOWN'
            if not map_item['status']:
                msg = 'cannot get job status for step "{}"'\
                    .format(self._step['name'])
                Log.a().warning(msg)
                map_item['status'] = 'UNKNOWN'

            # set status of run-attempt
            map_item['run'][map_item['attempt']]['status'] = map_item['status']

            # check hpc job ids
            if map_item['run'][map_item['attempt']]['hpc_job_id']:
                # already have it
                continue

            # job id listed in history
            response = self._agave['agave_wrapper'].jobs_get_history(
                map_item['run'][map_item['attempt']]['agave_job_id']
            )

            if not response:
                msg = 'cannot get hpc job id for job: agave_job_id={}'.format(
                    map_item['run'][map_item['attempt']]['agave_job_id']
                )
                Log.a().warning(msg)
                continue

            for item in response:
                if item['status'] == 'QUEUED':
                    match = re.match(
                        r'^HPC.*local job (\d*)$', item['description']
                    )
                    if match:
                        map_item['run'][map_item['attempt']]['hpc_job_id'] \
                            = match.group(1)

                        # log hpc job id
                        Log.some().debug(
                            '[step.%s]: hpc job id: %s -> %s',
                            self._step['name'],
                            map_item['template']['output'],
                            match.group(1)
                        )

                        break

            if map_item['status'] == 'FAILED' and map_item['attempt'] < 5:
                # retry job if not at limit
                if not self.retry_failed(map_item):
                    Log.a().warning(
                        '[step.%s]: cannot retry failed agave job (%s)',
                        self._step['name'],
                        map_item['template']['output']
                    )

        self._update_status_db(self._status, '')

        return True
Example #26
0
    def _run_map(self, map_item):
        """
        Run a job for each map item and store the job ID.

        Args:
            self: class instance.
            map_item: map item object (item of self._map).

        Returns:
            On success: True.
            On failure: False.

        """
        # load default app inputs overwrite with template inputs
        inputs = {}
        for input_key in self._app['inputs']:
            if input_key in map_item['template']:
                if map_item['template'][input_key]:
                    # only include an input if the value is a non-empty string
                    inputs[input_key] = urllib.parse.quote(
                        str(map_item['template'][input_key]),
                        safe='/:'
                    )
            else:
                if self._app['inputs'][input_key]['default']:
                    # only include an input if the value is a non-empty string
                    inputs[input_key] = urllib.parse.quote(
                        str(self._app['inputs'][input_key]['default']),
                        safe='/:'
                    )

        # load default app parameters, overwrite with template parameters
        parameters = {}
        for param_key in self._app['parameters']:
            if param_key in map_item['template']:
                if self._app['parameters'][param_key]['type'] in ['int', 'long']:
                    parameters[param_key] = int(map_item['template'][param_key])
                elif self._app['parameters'][param_key]['type'] == ['float', 'double']:
                    parameters[param_key] = float(map_item['template'][param_key])
                else:
                    parameters[param_key] = str(map_item['template'][param_key])
            else:
                if self._app['parameters'][param_key]['default'] not in [None, '']:
                    parameters[param_key] \
                        = self._app['parameters'][param_key]['default']

        # add execution method as parameter
        parameters['exec_method'] = self._step['execution']['method']

        # add execution init commands if 'init' param given
        if 'init' in self._step['execution']['parameters']:
            parameters['exec_init'] = self._step['execution']['parameters']['init']

        # construct agave app template
        name = 'gf-{}-{}-{}'.format(
            str(map_item['attempt']),
            slugify(self._step['name'], regex_pattern=r'[^-a-z0-9_]+'),
            slugify(map_item['template']['output'], regex_pattern=r'[^-a-z0-9_]+')
        )
        name = name[:62]+'..' if len(name) > 64 else name
        archive_path = '{}/{}'.format(
            self._agave['parsed_archive_uri']['chopped_path'],
            name
        )
        app_template = {
            'name': name,
            'appId': self._app['implementation']['agave']['agave_app_id'],
            'archive': True,
            'inputs': inputs,
            'parameters': parameters,
            'archiveSystem': self._agave['parsed_archive_uri']['authority'],
            'archivePath': archive_path
        }
        # specify processors if 'slots' param given
        if 'slots' in self._step['execution']['parameters']:
            app_template['processorsPerNode'] = int(
                self._step['execution']['parameters']['slots']
            )
        # specify memory if 'mem' param given
        if 'mem' in self._step['execution']['parameters']:
            app_template['memoryPerNode'] = '{}'.format(
                self._step['execution']['parameters']['mem']
            )

        Log.some().debug(
                "[step.%s]: agave app template:\n%s",
                self._step['name'],
                pprint.pformat(app_template)
        )

        # delete archive path if it exists
        if DataManager.exists(
                uri=self._agave['parsed_archive_uri']['chopped_uri']+'/'+name,
                agave=self._agave
        ):
            if not DataManager.delete(
                    uri=self._agave['parsed_archive_uri']['chopped_uri']+'/'+name,
                    agave=self._agave
            ):
                Log.a().warning(
                    'cannot delete archive uri: %s/%s',
                    self._agave['parsed_archive_uri']['chopped_uri'],
                    name
                )

        # submit job
        job = self._agave['agave_wrapper'].jobs_submit(app_template)
        if not job:
            msg = 'agave jobs submit failed for "{}"'.format(
                app_template['name']
            )
            Log.an().error(msg)
            return self._fatal(msg)

        # log agave job id
        Log.some().debug(
            '[step.%s]: agave job id: %s -> %s',
            self._step['name'],
            map_item['template']['output'],
            job['id']
        )

        # record job info
        map_item['run'][map_item['attempt']]['agave_job_id'] = job['id']
        map_item['run'][map_item['attempt']]['archive_uri'] = '{}/{}'\
            .format(
                self._agave['parsed_archive_uri']['chopped_uri'],
                name
            )
        map_item['run'][map_item['attempt']]['hpc_job_id'] = ''

        # set status of process
        map_item['status'] = 'PENDING'
        map_item['run'][map_item['attempt']]['status'] = 'PENDING'

        return True
Example #27
0
    def run(self):
        """
        Run Workflow.

        Args:
            self: class instance

        Returns:
            On success: True.
            On failure: False.

        """
        self._update_status_db('RUNNING', '')

        for node_name in self._dag.get_topological_sort():
            node = self._dag.graph().nodes[node_name]
            if node['type'] == 'input':

                Log.some().debug('[%s]: staging input', node_name)
                if not node['node'].stage(
                        move_final=False,
                        **{
                            context: self._workflow_context[context]\
                                .get_context_options()\
                            for context in self._workflow_context
                        }
                ):
                    msg = 'staging failed for input {}'.format(node_name)
                    Log.an().error(msg)
                    return self._fatal(msg)

            else: # step node

                # Reinit connection to exec context
                if not self._re_init():
                    msg = 'cannot reinit exec context'
                    Log.an().error(msg)
                    return self._fatal(msg)

                Log.some().info(
                    '[%s]: app: %s:%s [%s]',
                    node_name,
                    node['node']._app['name'],
                    node['node']._app['version'],
                    node['node']._app['git']
                )

                Log.some().debug('[%s]: iterating map uri', node_name)
                if not node['node'].iterate_map_uri():
                    msg = 'iterate map uri failed for step {}'.format(node_name)
                    Log.an().error(msg)
                    return self._fatal(msg)

                # run jobs for step
                Log.some().info('[%s]: running', node_name)
                if not node['node'].run():
                    msg = 'run failed for step {}'.format(node_name)
                    Log.an().error(msg)
                    return self._fatal(msg)

                # poll until job(s) done
                while not node['node'].all_done():
                    node['node'].check_running_jobs()
                    time.sleep(self._config['run_poll_delay'])

                Log.some().debug('[%s]: all jobs complete', node_name)

                # check1 if step satisfies checkpoint of all, any, or none job completion
                if not node['node'].checkpoint():
                    msg = 'failed checkpoint for step {}'.format(node_name)
                    Log.an().error(msg)
                    return self._fatal(msg)

                # cleanup jobs
                Log.some().debug('[%s]: cleaning', node_name)
                if not node['node'].clean_up():
                    msg = 'clean up failed for step {}'.format(node_name)
                    Log.an().error(msg)
                    return self._fatal(msg)

                # stage outputs (non-final)
                Log.some().debug('[%s]: staging output', node_name)
                if not node['node'].stage(
                        **{
                            context: self._workflow_context[context]\
                                .get_context_options()\
                            for context in self._workflow_context
                        }
                ):
                    msg = 'staging failed for step {}'.format(node_name)
                    Log.an().error(msg)
                    return self._fatal(msg)


        # stage final outputs
        for node_name in self._dag.get_topological_sort():
            node = self._dag.graph().nodes[node_name]
            if node['type'] == 'step':

                Log.some().debug('[%s]: staging final output', node_name)
                if not node['node'].stage_final(
                        **{
                            context: self._workflow_context[context]\
                                .get_context_options()\
                            for context in self._workflow_context
                        }
                ):
                    msg = 'staging final output failed for step {}'.format(node_name)
                    Log.an().error(msg)
                    return self._fatal(msg)

                Log.some().info('[%s]: complete', node_name)


        self._update_status_db('FINISHED', '')

        return True
Example #28
0
    def _run_map(self, map_item):
        """
        Run a job for each map item and store the job ID.

        Args:
            self: class instance.
            map_item: map item object (item of self._map)

        Returns:
            On success: True.
            On failure: False.

        """
        # load default app inputs overwrite with template inputs
        inputs = {}
        for input_key in self._app['inputs']:
            if input_key in map_item['template']:
                inputs[input_key] = urllib.parse.quote(str(
                    map_item['template'][input_key] or ''),
                                                       safe='/:')
            else:
                inputs[input_key] = urllib.parse.quote(str(
                    self._app['inputs'][input_key]['default'] or ''),
                                                       safe='/:')

        # load default app parameters, overwrite with template parameters
        parameters = {}
        for param_key in self._app['parameters']:
            if param_key in map_item['template']:
                parameters[param_key] = map_item['template'][param_key]
            else:
                parameters[param_key] \
                    = self._app['parameters'][param_key]['default']

        # add execution method as parameter
        parameters['exec_method'] = self._step['execution']['method']

        # construct agave app template
        name = 'gf-{}-{}-{}'.format(str(map_item['attempt']),
                                    slugify(self._step['name']),
                                    slugify(map_item['template']['output']))
        name = name[:62] + '..' if len(name) > 64 else name
        archive_path = '{}/{}'.format(
            self._agave['parsed_archive_uri']['chopped_path'], name)
        app_template = {
            'name': name,
            'appId': self._app['definition']['agave']['agave_app_id'],
            'archive': True,
            'inputs': inputs,
            'parameters': parameters,
            'archiveSystem': self._agave['parsed_archive_uri']['authority'],
            'archivePath': archive_path
        }
        Log.some().debug("agave app template:\n%s",
                         pprint.pformat(app_template))

        # delete archive path if it exists
        if DataManager.exists(
                uri=self._agave['parsed_archive_uri']['chopped_uri'] + '/' +
                name,
                agave=self._agave):
            if not DataManager.delete(
                    uri=self._agave['parsed_archive_uri']['chopped_uri'] +
                    '/' + name,
                    agave=self._agave):
                Log.a().warning(
                    'cannot delete archive uri: %s/%s',
                    self._agave['parsed_archive_uri']['chopped_uri'], name)

        # submit job
        job = self._agave['agave_wrapper'].jobs_submit(app_template)
        if not job:
            msg = 'agave jobs submit failed for "{}"'.format(
                app_template['name'])
            Log.an().error(msg)
            return self._fatal(msg)

        # log agave job id
        Log.some().debug('agave job id: %s -> %s',
                         map_item['template']['output'], job['id'])

        # record job info
        map_item['run'][map_item['attempt']]['agave_job_id'] = job['id']
        map_item['run'][map_item['attempt']]['archive_uri'] = '{}/{}'\
            .format(
                self._agave['parsed_archive_uri']['chopped_uri'],
                name
            )
        map_item['run'][map_item['attempt']]['hpc_job_id'] = ''

        # set status of process
        map_item['status'] = 'PENDING'
        map_item['run'][map_item['attempt']]['status'] = 'PENDING'

        return True
Example #29
0
    def check_running_jobs(self):
        """
        Check the status/progress of all map-reduce items..

        And update _map status.

        Args:
            self: class instance.

        Returns:
            True.

        """
        # check if jobs are still running
        for map_item in self._map:
            if map_item['status'] not in ['FINISHED','FAILED','PENDING']:

                map_item['status'] = self._agave['agave_wrapper'].jobs_get_status(
                    map_item['run'][map_item['attempt']]['agave_job_id']
                )

                # for status failures, set to 'UNKNOWN'
                if not map_item['status']:
                    msg = 'cannot get job status for step "{}"'\
                        .format(self._step['name'])
                    Log.a().warning(msg)
                    map_item['status'] = 'UNKNOWN'

                if map_item['status'] in ['FINISHED','FAILED']:
                    # status changed to finished or failed
                    Log.a().debug(
                        '[step.%s]: exit status: %s -> %s',
                        self._step['name'],
                        map_item['template']['output'],
                        map_item['status']
                    )

                    # decrease num running procs
                    if self._num_running > 0:
                        self._num_running -= 1

            # check hpc job ids
            if (
                map_item['status'] != 'PENDING' \
                and not map_item['run'][map_item['attempt']].get('hpc_job_id', '')
            ):

                # job id listed in history
                response = self._agave['agave_wrapper'].jobs_get_history(
                    map_item['run'][map_item['attempt']]['agave_job_id']
                )

                if not response:
                    msg = 'cannot get hpc job id for job: agave_job_id={}'.format(
                        map_item['run'][map_item['attempt']]['agave_job_id']
                    )
                    Log.a().warning(msg)

                else:
                    for item in response:
                        if item['status'] == 'QUEUED':
                            match = re.match(
                                r'^HPC.*local job (\d*)$', item['description']
                            )
                            if match:
                                map_item['run'][map_item['attempt']]['hpc_job_id'] \
                                    = match.group(1)

                                # log hpc job id
                                Log.some().debug(
                                    '[step.%s]: hpc job id: %s -> %s',
                                    self._step['name'],
                                    map_item['template']['output'],
                                    match.group(1)
                                )

                                break

            map_item['run'][map_item['attempt']]['status'] = map_item['status']

            if map_item['status'] == 'FAILED' and map_item['attempt'] < 5:
                if self._throttle_limit == 0 or self._num_running < self._throttle_limit:
                    # retry job if not at retry or throttle limit
                    if not self.retry_failed(map_item):
                        Log.a().warning(
                            '[step.%s]: cannot retry failed agave job (%s)',
                            self._step['name'],
                            map_item['template']['output']
                        )
                    else:
                        self._num_running += 1

        self._update_status_db(self._status, '')

        return True
Example #30
0
def run(args, other_args, subparser):
    """
    Run GeneFlow workflow engine.

    Args:
        args.workflow_path: workflow definition or package directory.
        args.job: path to job definition

    Returns:
        On success: True.
        On failure: False.

    """
    # get absolute path to workflow
    workflow_path = resolve_workflow_path(args.workflow_path)
    if workflow_path:
        Log.some().info('workflow definition found: %s', workflow_path)
    else:
        Log.an().error('cannot find workflow definition: %s',
                       args.workflow_path)
        return False

    # setup environment
    env = Environment(workflow_path=workflow_path)
    if not env.initialize():
        Log.an().error('cannot initialize geneflow environment')
        return False

    # create default config file and SQLite db
    cfg = Config()
    cfg.default(env.get_sqlite_db_path())
    cfg.write(env.get_config_path())
    config_dict = cfg.config('local')

    # load workflow into db
    try:
        data_source = DataSource(config_dict['database'])
    except DataSourceException as err:
        Log.an().error('data source initialization error [%s]', str(err))
        return False

    defs = data_source.import_definition(workflow_path)
    if not defs:
        Log.an().error('workflow definition load failed: %s', workflow_path)
        return False

    if not defs['workflows']:
        Log.an().error('workflow definition load failed: %s', workflow_path)
        return False

    data_source.commit()

    for workflow in defs['workflows']:
        Log.some().info('workflow loaded: %s -> %s', workflow,
                        defs['workflows'][workflow])

    # get workflow definition back from database to ensure
    # that it's a valid definition
    workflow_id = next(iter(defs['workflows'].values()))
    workflow_dict = data_source.get_workflow_def_by_id(workflow_id)
    if not workflow_dict:
        Log.an().error(
            'cannot get workflow definition from data source: workflow_id=%s',
            workflow_id)
        return False

    ### define arg parsing methods
    def parse_dynamic_args(workflow_dict):
        """
        Parse dynamic args based on workflow dictionary as well as
        some static args.

        Args:
            other_args: List of remaining args from initial parse of
                workflow path.
            workflow_dict: Workflow dictionary

        Returns:
            On success: List of parsed arguments.
            On failure: False.

        """
        # parse dynamic args. these are determined from workflow definition
        dynamic_parser = argparse.ArgumentParser()

        dynamic_parser.add_argument('-j',
                                    '--job',
                                    type=str,
                                    default=None,
                                    dest='job_path',
                                    help='Job Definition(s)')
        for input_key in workflow_dict['inputs']:
            dynamic_parser.add_argument(
                '--in.{}'.format(input_key),
                dest='inputs.{}'.format(input_key),
                required=False,
                default=workflow_dict['inputs'][input_key]['default'],
                help=workflow_dict['inputs'][input_key]['label'])
        for param_key in workflow_dict['parameters']:
            dynamic_parser.add_argument(
                '--param.{}'.format(param_key),
                dest='parameters.{}'.format(param_key),
                required=False,
                default=workflow_dict['parameters'][param_key]['default'],
                help=workflow_dict['parameters'][param_key]['label'])
        dynamic_parser.add_argument('-o',
                                    '--output',
                                    type=str,
                                    default='~/geneflow-output',
                                    help='Output Folder')
        dynamic_parser.add_argument('-n',
                                    '--name',
                                    type=str,
                                    default='geneflow-job',
                                    help='Name of Job')
        dynamic_parser.add_argument('-w',
                                    '--work',
                                    nargs='+',
                                    type=str,
                                    default=[],
                                    help='Work Directory')
        dynamic_parser.add_argument('--exec-context',
                                    '--ec',
                                    nargs='+',
                                    type=str,
                                    dest='exec_context',
                                    default=[],
                                    help='Execution Contexts')
        dynamic_parser.add_argument('--exec-method',
                                    '--em',
                                    nargs='+',
                                    type=str,
                                    dest='exec_method',
                                    default=[],
                                    help='Execution Methods')
        dynamic_parser.add_argument('--exec-param',
                                    '--ep',
                                    nargs='+',
                                    type=str,
                                    dest='exec_param',
                                    default=[],
                                    help='Execution Parameters')

        dynamic_args = dynamic_parser.parse_known_args(other_args)

        return dynamic_args[0]

    if 'gooey' in sys.modules:

        @Gooey(program_name='GeneFlow: {}'.format(workflow_dict['name']),
               program_description=workflow_dict['description'],
               target='gf --log-level={} run {}'.format(
                   args.log_level, args.workflow_path),
               monospace_display=True)
        def parse_dynamic_args_gui(workflow_dict):
            """
            Parse dynamic args based on workflow dictionary as well as
            some static args. Display a GUI interface.

            Args:
                other_args: List of remaining args from initial parse of
                    workflow path.
                workflow_dict: Workflow dictionary

            Returns:
                On success: List of parsed arguments.
                On failure: False.

            """
            # parse dynamic args. these are determined from workflow definition
            dynamic_parser = GooeyParser()
            input_group = dynamic_parser.add_argument_group(
                "Workflow Inputs",
                "Files or folders to be passed to the workflow")
            for input_key in workflow_dict['inputs']:
                widget = 'FileChooser'
                if workflow_dict['inputs'][input_key]['type'] == 'Directory':
                    widget = 'DirChooser'
                input_group.add_argument(
                    '--in.{}'.format(input_key),
                    dest='inputs.{}'.format(input_key),
                    required=False,
                    default=workflow_dict['inputs'][input_key]['default'],
                    help=workflow_dict['inputs'][input_key]['label'],
                    widget=widget)
            param_group = dynamic_parser.add_argument_group(
                "Workflow Parameters",
                "Number or string parameters to be passed to the workflow")
            for param_key in workflow_dict['parameters']:
                param_group.add_argument(
                    '--param.{}'.format(param_key),
                    dest='parameters.{}'.format(param_key),
                    required=False,
                    default=workflow_dict['parameters'][param_key]['default'],
                    help=workflow_dict['parameters'][param_key]['label'])
            job_group = dynamic_parser.add_argument_group(
                "Job Options", "Output/intermediate folders and job name")
            job_group.add_argument('-o',
                                   '--output',
                                   type=str,
                                   default='~/geneflow-output',
                                   help='Output Folder',
                                   widget='DirChooser')
            job_group.add_argument('-n',
                                   '--name',
                                   type=str,
                                   default='geneflow-job',
                                   help='Name of Job')
            job_group.add_argument('-w',
                                   '--work',
                                   nargs='+',
                                   type=str,
                                   default=[],
                                   help='Work Directory')
            exec_group = dynamic_parser.add_argument_group(
                "Execution Options", "Customize workflow execution")
            exec_group.add_argument('--exec-context',
                                    '--ec',
                                    nargs='+',
                                    type=str,
                                    dest='exec_context',
                                    default=[],
                                    help='Execution Contexts')
            exec_group.add_argument('--exec-method',
                                    '--em',
                                    nargs='+',
                                    type=str,
                                    dest='exec_method',
                                    default=[],
                                    help='Execution Methods')
            exec_group.add_argument('--exec-param',
                                    '--ep',
                                    nargs='+',
                                    type=str,
                                    dest='exec_param',
                                    default=[],
                                    help='Execution Parameters')

            dynamic_args = dynamic_parser.parse_args(other_args)

            return dynamic_args

    # get dynamic args
    if args.gui and 'gooey' in sys.modules:
        dynamic_args = parse_dynamic_args_gui(workflow_dict)
    else:
        dynamic_args = parse_dynamic_args(workflow_dict)

    # get absolute path to job file if provided
    job_path = None
    if dynamic_args.job_path:
        job_path = Path(dynamic_args.job_path).absolute()

    # load job definition if provided
    jobs_dict = {}
    gf_def = Definition()
    if job_path:
        if not gf_def.load(job_path):
            Log.an().error('Job definition load failed')
            return False
        jobs_dict = gf_def.jobs()
    else:
        # create default definition
        jobs_dict = {
            'job': {
                'name': 'GeneFlow job',
                'output_uri': 'geneflow_output',
                'work_uri': {
                    'local': '~/.geneflow/work'
                }
            }
        }

    # override with known cli parameters
    apply_job_modifiers(jobs_dict, [
        'name={}'.format(dynamic_args.name), 'output_uri={}'.format(
            dynamic_args.output)
    ])

    # insert workflow name into job, if not provided
    workflow_name = next(iter(defs['workflows']))
    for job in jobs_dict.values():
        if 'workflow_name' not in job:
            job['workflow_name'] = workflow_name

    # add inputs and parameters to job definition
    apply_job_modifiers(
        jobs_dict,
        [
            '{}={}'.format(dynamic_arg, getattr(dynamic_args, dynamic_arg))
            for dynamic_arg in vars(dynamic_args) \
                if dynamic_arg.startswith('inputs.') or dynamic_arg.startswith('parameters.')
        ]
    )

    # add work URIs to job definition
    work_uris = {}
    for work_arg in dynamic_args.work:
        parsed_work_uri = URIParser.parse(work_arg)
        if not parsed_work_uri:
            # skip if invalid URI
            Log.a().warning('invalid work uri: %s', work_arg)
        else:
            work_uris[
                parsed_work_uri['scheme']] = parsed_work_uri['chopped_uri']

    apply_job_modifiers(jobs_dict, [
        'work_uri.{}={}'.format(context, work_uris[context])
        for context in work_uris
    ])

    # add execution options to job definition
    apply_job_modifiers(jobs_dict, [
        'execution.context.{}={}'.format(*exec_arg.split(':', 1)[0:2])
        for exec_arg in dynamic_args.exec_context
    ] + [
        'execution.method.{}={}'.format(*exec_arg.split(':', 1)[0:2])
        for exec_arg in dynamic_args.exec_method
    ] + [
        'execution.parameters.{}={}'.format(*exec_arg.split(':', 1)[0:2])
        for exec_arg in dynamic_args.exec_param
    ])

    # get default values from workflow definition
    for job in jobs_dict.values():
        if 'inputs' not in job:
            job['inputs'] = {}
        if 'parameters' not in job:
            job['parameters'] = {}
        for input_key in workflow_dict['inputs']:
            if input_key not in job['inputs']:
                job['inputs'][input_key]\
                    = workflow_dict['inputs'][input_key]['default']
        for param_key in workflow_dict['parameters']:
            if param_key not in job['parameters']:
                job['parameters'][param_key]\
                    = workflow_dict['parameters'][param_key]['default']

    # expand URIs
    for job in jobs_dict.values():
        # output URI
        parsed_uri = URIParser.parse(job['output_uri'])
        if not parsed_uri:
            Log.an().error('invalid output uri: %s', job['output_uri'])
            return False
        # expand relative path if local
        if parsed_uri['scheme'] == 'local':
            job['output_uri'] = str(
                Path(parsed_uri['chopped_path']).expanduser().resolve())
        # work URIs
        for context in job['work_uri']:
            parsed_uri = URIParser.parse(job['work_uri'][context])
            if not parsed_uri:
                Log.an().error('invalid work uri: %s', job['work_uri'])
                return False
            # expand relative path if local
            if parsed_uri['scheme'] == 'local':
                job['work_uri'][context] = str(
                    Path(parsed_uri['chopped_path']).expanduser().resolve())
        # input URIs
        for input_key in job['inputs']:
            parsed_uri = URIParser.parse(job['inputs'][input_key])
            if not parsed_uri:
                Log.an().error('invalid input uri: %s',
                               job['inputs'][input_key])
                return False
            # expand relative path if local
            if parsed_uri['scheme'] == 'local':
                job['inputs'][input_key] = str(
                    Path(parsed_uri['chopped_path']).expanduser().resolve())

    # import jobs into database
    job_ids = data_source.import_jobs_from_dict(jobs_dict)
    if job_ids is False:
        Log.an().error('cannot import jobs')
        return False
    data_source.commit()

    # create process pool to run workflows in parallel
    pool = Pool(min(5, len(job_ids)))
    jobs = [{'name': job, 'id': job_ids[job], 'log': None} for job in job_ids]

    result = pool.map(
        partial(geneflow.cli.common.run_workflow,
                config=config_dict,
                log_level=args.log_level), jobs)

    pool.close()
    pool.join()

    if not all(result):
        Log.an().error('some jobs failed')

    return result