Example #1
0
    def check_running_jobs(self):
        """
        Check the status/progress of all map-reduce items and update _map status.

        Args:
            self: class instance.

        Returns:
            True.

        """
        # check if procs are running, finished, or failed
        for map_item in self._map:
            try:
                if ShellWrapper.is_running(
                        map_item['run'][map_item['attempt']]['proc']
                ):
                    map_item['status'] = 'RUNNING'
                else:
                    if map_item['run'][map_item['attempt']]['proc'].returncode:
                        map_item['status'] = 'FAILED'
                    else:
                        map_item['status'] = 'FINISHED'
                map_item['run'][map_item['attempt']]['status']\
                    = map_item['status']
            except (OSError, AttributeError) as err:
                Log.a().warning(
                    'process polling failed for map item "%s" [%s]',
                    map_item['filename'], str(err)
                )
                map_item['status'] = 'FAILED'

        self._update_status_db(self._status, '')

        return True
Example #2
0
def apply_job_modifiers(jobs_dict, job_mods):
    """Update the jobs_dict with the given modifiers."""
    for mod in job_mods:
        # split at =
        try:
            parts = mod.split('=')
        except ValueError as err:
            Log.a().warning('job mod "%s" is malformed [%s]', mod, str(err))
            continue  # skip mod

        key = parts[0]
        if not key:
            Log.a().warning('empty job mod')
            continue  # skip mod

        val = None
        if len(parts) == 1:
            # only one key, treat as bool switch
            val = True
        elif len(parts) == 2:
            # two parts, key & value
            val = parts[1]
        else:
            # multiple '=', include '=' in value
            val = '='.join(parts[1:])

        # split key at .
        keys = key.split('.')

        # apply to all jobs
        for job in jobs_dict.values():
            set_dict_key_list(job, keys, val)
Example #3
0
    def _init_data_uri(self):
        """
        Create output data URI for the source context (local).

        Args:
            self: class instance.

        Returns:
            On success: True.
            On failure: False.

        """
        # make sure the source data URI has a compatible scheme (local)
        if self._parsed_data_uris[self._source_context]['scheme'] != 'local':
            msg = 'invalid data uri scheme for this step: {}'.format(
                self._parsed_data_uris[self._source_context]['scheme']
            )
            Log.an().error(msg)
            return self._fatal(msg)

        # delete old folder if it exists and clean==True
        if (
                DataManager.exists(
                    parsed_uri=self._parsed_data_uris[self._source_context]
                )
                and self._clean
        ):
            if not DataManager.delete(
                    parsed_uri=self._parsed_data_uris[self._source_context]
            ):
                Log.a().warning(
                    'cannot delete existing data uri: %s',
                    self._parsed_data_uris[self._source_context]['chopped_uri']
                )

        # create folder
        if not DataManager.mkdir(
                parsed_uri=self._parsed_data_uris[self._source_context],
                recursive=True
        ):
            msg = 'cannot create data uri: {}'.format(
                self._parsed_data_uris[self._source_context]['chopped_uri']
            )
            Log.an().error(msg)
            return self._fatal(msg)

        # create _log folder
        if not DataManager.mkdir(
                uri='{}/_log'.format(
                    self._parsed_data_uris[self._source_context]['chopped_uri']
                ),
                recursive=True
        ):
            msg = 'cannot create _log folder in data uri: {}/_log'.format(
                self._parsed_data_uris[self._source_context]['chopped_uri']
            )
            Log.an().error(msg)
            return self._fatal(msg)

        return True
Example #4
0
    def _copy_asset(self, asset):
        """
        Copy app assets.

        Args:
            self: class instance
            asset: what to copy

        Returns:
            On success: True.
            On failure: False.

        """
        if not self._copy_prefix:
            Log.a().warning(
                'copy prefix must be specified when copying app assets'
            )
            return False

        if not asset.get('dst'):
            Log.a().warning('asset dst required for app %s', self._app['name'])
            return False

        if not asset.get('src'):
            Log.a().warning('asset src required for app %s', self._app['name'])
            return False

        # create asset destination
        asset_path = Path(self._path / asset['dst'])
        asset_path.mkdir(exist_ok=True)

        if 'zip' in asset:
            # create a tar.gz of src
            cmd = 'tar -czf "{}" --directory="{}" .'.format(
                str(Path(asset_path / '{}.tar.gz'.format(asset['zip']))),
                str(Path(self._copy_prefix) / asset['src'])
            )
            Log.some().info('zipping: %s', cmd)
            cmd_result = ShellWrapper.invoke(cmd)
            if cmd_result is False:
                Log.a().warning('cannot zip asset src: %s', cmd)
                return False

            Log.some().info('tar stdout: %s', cmd_result)

        else:
            # move without creating tar.gz
            cmd = 'cp -R "{}" "{}"'.format(
                str(Path(self._copy_prefix) / asset['src']),
                str(asset_path)
            )
            Log.some().info('copying: %s', cmd)
            cmd_result = ShellWrapper.invoke(cmd)
            if cmd_result is False:
                Log.a().warning('cannot copy asset src: %s', cmd)
                return False

            Log.some().info('copy stdout: %s', cmd_result)

        return True
Example #5
0
    def _send_notifications(self, status):

        # construct message
        msg_data = {
            'to': '',
            'from': '*****@*****.**',
            'subject': 'GeneFlow Job "{}": {}'.format(
                self._job['name'], status
            ),
            'content': (
                'Your GeneFlow job status has changed to {}'
                '\nJob Name: {}'
                '\nJob ID: {}'
            ).format(status, self._job['name'], self._job_id)
        }

        # use agave token as header if available
        if 'agave' in self._workflow_context:
            msg_headers = {
                'Authorization':'Bearer {}'.format(
                    self._workflow_context['agave']\
                        .get_context_options()['agave_wrapper']\
                        ._agave.token.token_info.get('access_token')
                )
            }

        else:
            msg_headers = {}

        Log.some().info('message headers: %s', str(msg_headers))

        for notify in self._job['notifications']:
            Log.some().info(
                'sending notification(s) to %s @ %s',
                str(notify['to']),
                notify['url'],
            )

            to_list = notify['to']
            if isinstance(notify['to'], str):
                to_list = [notify['to']]

            for to_item in to_list:
                msg_data['to'] = to_item
                try:
                    response = requests.post(
                        notify['url'], data=msg_data, headers=msg_headers
                    )

                except requests.exceptions.RequestException as err:
                    Log.a().warning(
                        'cannot send notification to %s @ %s: %s',
                        to_item, notify['url'], str(err)
                    )

                if response.status_code != 201:
                    Log.a().warning(
                        'cannot send notification to %s @ %s: %s',
                        to_item, notify['url'], response.text
                    )
Example #6
0
    def retry_failed(self, map_item):
        """
        Retry a job.

        Args:
            self: class instance.

        Returns:
            True if failed/stopped job restarted successfully
            False if failed/stopped job not restarted due to error

        """
        # retry job
        Log.some().info(
            '[step.%s]: retrying agave job (%s), attempt number %s',
            self._step['name'],
            map_item['template']['output'],
            map_item['attempt']+1
        )

        # add another run to list
        map_item['attempt'] += 1
        map_item['run'].append({})
        if not self._run_map(map_item):
            Log.a().warning(
                '[step.%s]: cannot retry agave job (%s), attempt number %s',
                self._step['name'],
                map_item['template']['output'],
                map_item['attempt']
            )
            return False

        return True
Example #7
0
    def compile_template(template_path, template_name, compiled_name,
                         **kwargs):
        """
        Compile a GeneFlow template file.

        Args:
            template_path: search path for templates. If omitted, the
                GeneFlow package path of data/templates is used.
            template_name: name of the template file, must be stored in
                data/templates of the GeneFlow source package.
            compiled_name: full path of the compiled target file.
            kwargs: data to populate the template.

        Returns:
            On success: True.
            On failure: False.

        """
        # set default template path
        if not template_path:
            template_path = GF_PACKAGE_PATH / 'data/templates'

        # load template
        try:
            template_loader = jinja2.FileSystemLoader(
                searchpath=str(template_path))
            template_env = jinja2.Environment(loader=template_loader,
                                              trim_blocks=True,
                                              lstrip_blocks=True)
            template = template_env.get_template(template_name)

        except jinja2.TemplateSyntaxError as err:
            Log.an().warning('cannot load template, syntax error: %s [%s, %s]',
                             template_name, str(err), str(err.lineno))
            return False

        except jinja2.TemplateError as err:
            Log.an().warning('cannot load template: %s [%s]', template_name,
                             str(err))
            return False

        # compile and write
        try:
            with open(str(compiled_name), 'w') as compiled_file:
                compiled_file.write(template.render(**kwargs))

        except IOError as err:
            Log.an().warning('cannot write compiled template file: %s [%s]',
                             compiled_name, str(err))
            return False

        except jinja2.TemplateError as err:
            Log.a().warning('cannot compile template: %s [%s]', template_name,
                            str(err))
            return False

        return True
Example #8
0
    def __del__(self):
        """
        Disconnect from drmaa session when workflow class is deleted.

        Args:
            None.

        Returns:
            Nothing.

        """
        try:
            self._drmaa_session.exit()
        except drmaa.errors.DrmaaException as err:
            Log.a().warning('cannot exit drmaa session: [%s]', str(err))
Example #9
0
    def _re_init():
        """Reinit drmaa session."""
        # exit existing session
        try:
            self._drmaa_session.exit()
        except drmaa.errors.DrmaaException as err:
            Log.a().warning('cannot exit drmaa session: [%s]', str(err))

        # initialize session again
        try:
            self._drmaa_session.initialize()
        except drmaa.errors.DrmaaException as err:
            Log.an().error('cannot initialize drmaa session: [%s]', str(err))
            return False

        return True
Example #10
0
def apply_job_modifiers(jobs_dict, job_mods):
    """Update the jobs_dict with the given modifiers."""
    for mod in job_mods:
        # split at =
        try:
            key, val = mod.split('=')
        except ValueError as err:
            Log.a().warning('job mod "%s" is malformed [%s]', mod, str(err))
            continue  # skip mod

        # split key at .
        keys = key.split('.')

        # apply to all jobs
        for job in jobs_dict.values():
            set_dict_key_list(job, keys, val)
Example #11
0
    def check_running_jobs(self):
        """
        Check the status/progress of all map-reduce items and update _map status.

        Args:
            self: class instance.

        Returns:
            True.

        """
        # check if jobs are running, finished, or failed
        for map_item in self._map:
            if map_item['status'] != 'FINISHED' and map_item[
                    'status'] != 'FAILED':
                # can only get job status if it has not already been disposed with "wait"
                status = self._gridengine['drmaa_session'].jobStatus(
                    map_item['run'][map_item['attempt']]['hpc_job_id'])
                map_item['status'] = self._job_status_map[status]

                if map_item['status'] == 'FINISHED' or map_item[
                        'status'] == 'FAILED':
                    # check exit status
                    job_info = self._gridengine['drmaa_session'].wait(
                        map_item['run'][map_item['attempt']]['hpc_job_id'],
                        self._gridengine['drmaa_session'].TIMEOUT_NO_WAIT)
                    Log.a().debug('[step.%s]: exit status: %s -> %s',
                                  self._step['name'],
                                  map_item['template']['output'],
                                  job_info.exitStatus)
                    if job_info.exitStatus > 0:
                        # job actually failed
                        map_item['status'] = 'FAILED'

            map_item['run'][map_item['attempt']]['status'] = map_item['status']

            if map_item['status'] == 'FAILED' and map_item['attempt'] < 5:
                # retry job if not at limit
                if not self.retry_failed(map_item):
                    Log.a().warning(
                        '[step.%s]: cannot retry failed gridengine job (%s)',
                        self._step['name'], map_item['template']['output'])

        self._update_status_db(self._status, '')

        return True
Example #12
0
    def check_running_jobs(self):
        """
        Check the status/progress of all map-reduce items and update _map status.

        Args:
            self: class instance.

        Returns:
            True.

        """
        # check if procs are running, finished, or failed
        for map_item in self._map:
            if map_item['status'] in ['RUNNING', 'UNKNOWN']:
                try:
                    if not ShellWrapper.is_running(
                            map_item['run'][map_item['attempt']]['proc']):
                        returncode = map_item['run'][
                            map_item['attempt']]['proc'].returncode
                        if returncode:
                            map_item['status'] = 'FAILED'
                        else:
                            map_item['status'] = 'FINISHED'

                        Log.a().debug('[step.%s]: exit status: %s -> %s',
                                      self._step['name'],
                                      map_item['template']['output'],
                                      returncode)

                        # decrease num running procs
                        if self._num_running > 0:
                            self._num_running -= 1

                except (OSError, AttributeError) as err:
                    Log.a().warning(
                        'process polling failed for map item "%s" [%s]',
                        map_item['filename'], str(err))
                    map_item['status'] = 'UNKNOWN'

                map_item['run'][map_item['attempt']]['status']\
                    = map_item['status']

        self._update_status_db(self._status, '')

        return True
Example #13
0
    def check_running_jobs(self):
        """
        Check the status/progress of all map-reduce items and update _map status.

        Args:
            self: class instance.

        Returns:
            True.

        """
        # check if jobs are running, finished, or failed
        for map_item in self._map:
            if map_item['status'] not in ['FINISHED', 'FAILED', 'PENDING']:
                try:
                    # can only get job status if it has not already been disposed with "wait"
                    status = self._slurm['drmaa_session'].jobStatus(
                        map_item['run'][map_item['attempt']]['hpc_job_id'])
                    map_item['status'] = self._job_status_map[status]

                except drmaa.DrmCommunicationException as err:
                    msg = 'cannot get job status for step "{}" [{}]'\
                            .format(self._step['name'], str(err))
                    Log.a().warning(msg)
                    map_item['status'] = 'UNKNOWN'

                if map_item['status'] in ['FINISHED', 'FAILED']:
                    # check exit status
                    job_info = self._slurm['drmaa_session'].wait(
                        map_item['run'][map_item['attempt']]['hpc_job_id'],
                        self._slurm['drmaa_session'].TIMEOUT_NO_WAIT)
                    Log.a().debug('[step.%s]: exit status: %s -> %s',
                                  self._step['name'],
                                  map_item['template']['output'],
                                  job_info.exitStatus)
                    if job_info.exitStatus > 0:
                        # job actually failed
                        map_item['status'] = 'FAILED'

                    # decrease num running procs
                    if self._num_running > 0:
                        self._num_running -= 1

            map_item['run'][map_item['attempt']]['status'] = map_item['status']

            if map_item['status'] == 'FAILED' and map_item['attempt'] < 5:
                if self._throttle_limit == 0 or self._num_running < self._throttle_limit:
                    # retry job if not at retry or throttle limit
                    if not self.retry_failed(map_item):
                        Log.a().warning(
                            '[step.%s]: cannot retry failed slurm job (%s)',
                            self._step['name'], map_item['template']['output'])
                    else:
                        self._num_running += 1

        self._update_status_db(self._status, '')

        return True
Example #14
0
    def upload_agave_test_data(self):
        """
        Upload Agave test data from workflow package.

        Args:
            self: class instance.

        Returns:
            None

        """
        if (not self._agave or not self._agave_params
                or not self._agave_params.get('agave')):
            Log.a().warning(
                'must provide agave parameters to upload test data')
            return False

        # create main test data URI
        parsed_base_test_uri = URIParser.parse('agave://{}/{}'.format(
            self._agave_params['agave']['deploymentSystem'],
            self._agave_params['agave']['testDataDir']))
        Log.some().info('creating base test data uri: %s',
                        parsed_base_test_uri['chopped_uri'])
        if not DataManager.mkdir(parsed_uri=parsed_base_test_uri,
                                 recursive=True,
                                 agave={
                                     'agave': self._agave,
                                     'agave_config': self._config['agave']
                                 }):
            Log.a().warning('cannot create base test data uri: %s',
                            parsed_base_test_uri['chopped_uri'])
            return False

        # upload test data
        parsed_local_test_uri = URIParser.parse(str(Path(self._path) / 'data'))
        parsed_agave_test_uri = URIParser.parse('{}/{}'.format(
            parsed_base_test_uri['chopped_uri'],
            Path(self._path).name))
        Log.some().info('copying test data from %s to %s',
                        parsed_local_test_uri['chopped_uri'],
                        parsed_agave_test_uri['chopped_uri'])
        if not DataManager.copy(parsed_src_uri=parsed_local_test_uri,
                                parsed_dest_uri=parsed_agave_test_uri,
                                local={},
                                agave={
                                    'agave': self._agave,
                                    'agave_config': self._config['agave']
                                }):
            Log.a().warning('cannot copy test data from %s to %s',
                            parsed_local_test_uri['chopped_uri'],
                            parsed_agave_test_uri['chopped_uri'])
            return False

        return True
Example #15
0
    def _update_status_db(self, status, msg):
        """
        Update workflow status in DB.

        Args:
            self: class instance
            status: Workflow status
            msg: Success, error or warning message

        Returns:
            On success: True.
            On failure: False.

        """
        try:
            data_source = DataSource(self._config['database'])
        except DataSourceException as err:
            msg = 'data source initialization error [{}]'.format(str(err))
            Log.an().error(msg)
            return False

        # set start time (if started, or errored immediatedly)
        if (
                status in ['RUNNING', 'ERROR']
                and self._status == 'PENDING'
        ):
            if not data_source.set_job_started(self._job_id):
                Log.a().warning('cannot set job start time in data source')
                data_source.rollback()

        # set finished time (even on error)
        if status in ['FINISHED', 'ERROR']:
            if not data_source.set_job_finished(self._job_id):
                Log.a().warning('cannot set job finish time in data source')
                data_source.rollback()

        # if state change, contact notification endpoint
        if status != self._status:
            if self._job['notifications']:
                self._send_notifications(status)

        # update database
        self._status = status
        if not data_source.update_job_status(self._job_id, status, msg):
            Log.a().warning('cannot update job status in data source')
            data_source.rollback()

        data_source.commit()
        return True
Example #16
0
    def switch_context(cls, uri, new_base_uri):
        """
        Change the context of uri to the new_base.

        new_base can have a different scheme and base URL. If uri has no 'name'
        (e.g., ends with /), then the new context URI is identical to the
        normalized new_base_uri.

        Args:
            uri: URI to change context.
            new_base_uri: base URI of the new context.

        Returns:
            On success: parsed URI in new context.
            On failure: False.

        """
        # validate URIs
        parsed_uri = cls.parse(uri)
        if not parsed_uri:
            Log.a().debug('invalid uri: %s', uri)
            return False

        parsed_new_base_uri = cls.parse(new_base_uri)
        if not parsed_new_base_uri:
            Log.a().debug('invalid new base uri: %s', new_base_uri)
            return False

        # construct URI in new context
        new_uri = '{}:{}{}{}'.format(
            parsed_new_base_uri['scheme'],
            ('//{}'.format(parsed_new_base_uri['authority'])
             if parsed_new_base_uri['authority'] else ''),
            parsed_new_base_uri['chopped_path'],
            ('{}' if parsed_new_base_uri['chopped_path'] == '/' else
             '/{}').format(parsed_uri['name']))

        # parse the new URI to validate
        parsed_new_uri = cls.parse(new_uri)
        if not parsed_new_uri:
            Log.a().debug('invalid new uri: %s', new_uri)
            return False

        return parsed_new_uri
Example #17
0
    def _run_map(self, map_item):
        """
        Run a job for each map item and store the job ID.

        Args:
            self: class instance.
            map_item: map item object (item of self._map).

        Returns:
            On success: True.
            On failure: False.

        """
        # load default app inputs overwrite with template inputs
        inputs = {}
        for input_key in self._app['inputs']:
            if input_key in map_item['template']:
                if map_item['template'][input_key]:
                    # only include an input if the value is a non-empty string
                    inputs[input_key] = urllib.parse.quote(
                        str(map_item['template'][input_key]),
                        safe='/:'
                    )
            else:
                if self._app['inputs'][input_key]['default']:
                    # only include an input if the value is a non-empty string
                    inputs[input_key] = urllib.parse.quote(
                        str(self._app['inputs'][input_key]['default']),
                        safe='/:'
                    )

        # load default app parameters, overwrite with template parameters
        parameters = {}
        for param_key in self._app['parameters']:
            if param_key in map_item['template']:
                if self._app['parameters'][param_key]['type'] in ['int', 'long']:
                    parameters[param_key] = int(map_item['template'][param_key])
                elif self._app['parameters'][param_key]['type'] == ['float', 'double']:
                    parameters[param_key] = float(map_item['template'][param_key])
                else:
                    parameters[param_key] = str(map_item['template'][param_key])
            else:
                if self._app['parameters'][param_key]['default'] not in [None, '']:
                    parameters[param_key] \
                        = self._app['parameters'][param_key]['default']

        # add execution method as parameter
        parameters['exec_method'] = self._step['execution']['method']

        # add execution init commands if 'init' param given
        if 'init' in self._step['execution']['parameters']:
            parameters['exec_init'] = self._step['execution']['parameters']['init']

        # construct agave app template
        name = 'gf-{}-{}-{}'.format(
            str(map_item['attempt']),
            slugify(self._step['name'], regex_pattern=r'[^-a-z0-9_]+'),
            slugify(map_item['template']['output'], regex_pattern=r'[^-a-z0-9_]+')
        )
        name = name[:62]+'..' if len(name) > 64 else name
        archive_path = '{}/{}'.format(
            self._agave['parsed_archive_uri']['chopped_path'],
            name
        )
        app_template = {
            'name': name,
            'appId': self._app['implementation']['agave']['agave_app_id'],
            'archive': True,
            'inputs': inputs,
            'parameters': parameters,
            'archiveSystem': self._agave['parsed_archive_uri']['authority'],
            'archivePath': archive_path
        }
        # specify processors if 'slots' param given
        if 'slots' in self._step['execution']['parameters']:
            app_template['processorsPerNode'] = int(
                self._step['execution']['parameters']['slots']
            )
        # specify memory if 'mem' param given
        if 'mem' in self._step['execution']['parameters']:
            app_template['memoryPerNode'] = '{}'.format(
                self._step['execution']['parameters']['mem']
            )

        Log.some().debug(
                "[step.%s]: agave app template:\n%s",
                self._step['name'],
                pprint.pformat(app_template)
        )

        # delete archive path if it exists
        if DataManager.exists(
                uri=self._agave['parsed_archive_uri']['chopped_uri']+'/'+name,
                agave=self._agave
        ):
            if not DataManager.delete(
                    uri=self._agave['parsed_archive_uri']['chopped_uri']+'/'+name,
                    agave=self._agave
            ):
                Log.a().warning(
                    'cannot delete archive uri: %s/%s',
                    self._agave['parsed_archive_uri']['chopped_uri'],
                    name
                )

        # submit job
        job = self._agave['agave_wrapper'].jobs_submit(app_template)
        if not job:
            msg = 'agave jobs submit failed for "{}"'.format(
                app_template['name']
            )
            Log.an().error(msg)
            return self._fatal(msg)

        # log agave job id
        Log.some().debug(
            '[step.%s]: agave job id: %s -> %s',
            self._step['name'],
            map_item['template']['output'],
            job['id']
        )

        # record job info
        map_item['run'][map_item['attempt']]['agave_job_id'] = job['id']
        map_item['run'][map_item['attempt']]['archive_uri'] = '{}/{}'\
            .format(
                self._agave['parsed_archive_uri']['chopped_uri'],
                name
            )
        map_item['run'][map_item['attempt']]['hpc_job_id'] = ''

        # set status of process
        map_item['status'] = 'PENDING'
        map_item['run'][map_item['attempt']]['status'] = 'PENDING'

        return True
Example #18
0
    def _run_map(self, map_item):
        """
        Run a job for each map item and store the job ID.

        Args:
            self: class instance.
            map_item: map item object (item of self._map)

        Returns:
            On success: True.
            On failure: False.

        """
        # load default app inputs overwrite with template inputs
        inputs = {}
        for input_key in self._app['inputs']:
            if input_key in map_item['template']:
                inputs[input_key] = urllib.parse.quote(str(
                    map_item['template'][input_key] or ''),
                                                       safe='/:')
            else:
                inputs[input_key] = urllib.parse.quote(str(
                    self._app['inputs'][input_key]['default'] or ''),
                                                       safe='/:')

        # load default app parameters, overwrite with template parameters
        parameters = {}
        for param_key in self._app['parameters']:
            if param_key in map_item['template']:
                parameters[param_key] = map_item['template'][param_key]
            else:
                parameters[param_key] \
                    = self._app['parameters'][param_key]['default']

        # add execution method as parameter
        parameters['exec_method'] = self._step['execution']['method']

        # construct agave app template
        name = 'gf-{}-{}-{}'.format(str(map_item['attempt']),
                                    slugify(self._step['name']),
                                    slugify(map_item['template']['output']))
        name = name[:62] + '..' if len(name) > 64 else name
        archive_path = '{}/{}'.format(
            self._agave['parsed_archive_uri']['chopped_path'], name)
        app_template = {
            'name': name,
            'appId': self._app['definition']['agave']['agave_app_id'],
            'archive': True,
            'inputs': inputs,
            'parameters': parameters,
            'archiveSystem': self._agave['parsed_archive_uri']['authority'],
            'archivePath': archive_path
        }
        Log.some().debug("agave app template:\n%s",
                         pprint.pformat(app_template))

        # delete archive path if it exists
        if DataManager.exists(
                uri=self._agave['parsed_archive_uri']['chopped_uri'] + '/' +
                name,
                agave=self._agave):
            if not DataManager.delete(
                    uri=self._agave['parsed_archive_uri']['chopped_uri'] +
                    '/' + name,
                    agave=self._agave):
                Log.a().warning(
                    'cannot delete archive uri: %s/%s',
                    self._agave['parsed_archive_uri']['chopped_uri'], name)

        # submit job
        job = self._agave['agave_wrapper'].jobs_submit(app_template)
        if not job:
            msg = 'agave jobs submit failed for "{}"'.format(
                app_template['name'])
            Log.an().error(msg)
            return self._fatal(msg)

        # log agave job id
        Log.some().debug('agave job id: %s -> %s',
                         map_item['template']['output'], job['id'])

        # record job info
        map_item['run'][map_item['attempt']]['agave_job_id'] = job['id']
        map_item['run'][map_item['attempt']]['archive_uri'] = '{}/{}'\
            .format(
                self._agave['parsed_archive_uri']['chopped_uri'],
                name
            )
        map_item['run'][map_item['attempt']]['hpc_job_id'] = ''

        # set status of process
        map_item['status'] = 'PENDING'
        map_item['run'][map_item['attempt']]['status'] = 'PENDING'

        return True
Example #19
0
    def parse(cls, uri):
        """
        Parse a URI and return components. If the scheme is missing, it..

        defaults to "local".

        Args:
            uri: A generic URI string.

        Returns:
            On success: A dict that contains "uri", "scheme", "authority", and
            "path", etc:
                {
                    "uri": original URI
                    "chopped_uri": normalized URI
                    "scheme":
                    "authority":
                    "path": full path
                    "chopped_path": normalized path
                    "folder": folder part of path (to last slash, not including
                        last slash
                    "name": folder/file name, part of path after last slash
                }

            On failure: False.

        """
        matched = re.match(cls.uri_regex, str(uri))
        if not matched:
            Log.a().debug('invalid uri: %s', uri)
            return False

        # extract scheme, e.g., local, agave, http, etc.
        scheme = matched.group(2)
        if not scheme:
            scheme = 'local'

        # authority can be '' (e.g., server, or storage system)
        authority = matched.group(4) if matched.group(4) else ''
        path = matched.group(5) if matched.group(5) else '/'

        # replace one or more consecutive slashes with single slash
        path = re.sub('/+', '/', path)

        # get folder and name from path
        matched = re.match(cls.path_regex, path)
        if not matched:
            Log.a().debug('invalid path of uri: %s', path)
            return False

        folder = matched.group(1) if matched.group(1) else matched.group(2)
        name = matched.group(3) if matched.group(3) else ''

        # "normalized" path without extra slashes
        chopped_path = (folder +
                        name if folder == '/' or folder == '' else folder +
                        '/' + name) if name else folder

        # "normalized" URI without extra slashes and with scheme
        chopped_uri = '{}{}{}'.format(
            '{}:'.format(scheme),
            ('//{}'.format(authority) if authority else ''), chopped_path)

        return {
            'uri': uri,  # original URI
            'chopped_uri': chopped_uri,
            'scheme': scheme,
            'authority': authority,
            'path': path,
            'chopped_path': chopped_path,
            'folder': folder,
            'name': name
        }
Example #20
0
    def install_assets(self):
        """
        Install app assets.

        Args:
            self: class instance

        Returns:
            On success: True.
            On failure: False.

        """
        # set asset type
        default_asset = self._app_asset
        # if not set on CLI, use asset type specified in workflow apps-repo
        if not default_asset:
            default_asset = self._app.get('asset')
        # if not set in workflow apps-repo, use app default
        if not default_asset:
            default_asset = self._config.get('default_asset')

        Log.some().info('installing app asset type: %s', str(default_asset))
        if not default_asset:
            # no asset type specified, nothing left to do
            return True

        if 'assets' not in self._config:
            # app is not configured with any assets
            return True

        if default_asset not in self._config['assets']:
            # if asset type is not listed in config, display warning and
            # continue
            Log.a().warning(
                'unconfigured asset type specified: %s', str(default_asset)
            )
            return True

        assets = self._config['assets'][default_asset]

        # install all components for asset
        for asset in assets:
            Log.some().info('app asset:\n%s', pprint.pformat(asset))

            if 'type' not in asset:
                Log.a().warning('asset type missing for app "%s"', self._app['name'])
                continue

            if asset['type'] == 'copy':
                if not self._copy_asset(asset):
                    Log.a().warning(
                        'cannot copy assets for app "%s"', self._app['name']
                    )
                    continue

            elif asset['type'] == 'build':
                if not self._build_asset(asset):
                    Log.a().warning(
                        'cannot build assets for app "%s"', self._app['name']
                    )
                    continue

            else:
                Log.a().warning(
                    'invalid asset type "%s" for app "%s"',
                    asset['type'], self._app['name']
                )

        return True
Example #21
0
    def _build_asset(self, asset):
        """
        Build app assets.

        Args:
            self: class instance
            asset: what to build

        Returns:
            On success: True.
            On failure: False.

        """
        # make sure the build path exists
        build_path = self._path / 'build'
        build_path.mkdir(exist_ok=True)

        build_repo_path = None
        if not asset.get('folder'):
            Log.a().warning(
                'repo folder must be set when specifying a build asset'
            )
            return False

        # clone build repo
        build_repo_path = build_path / asset['folder']

        if asset.get('repo'):
            # if repo is set, clone and build it
            try:
                if asset.get('tag'):
                    Repo.clone_from(
                        asset['repo'], str(build_repo_path),
                        branch=asset['tag'], config='http.sslVerify=false'
                    )
                else:
                    Repo.clone_from(
                        asset['repo'], str(build_repo_path),
                        config='http.sslVerify=false'
                    )
            except GitError as err:
                Log.an().error(
                    'cannot clone git repo for build: %s [%s]',
                    asset['repo'], str(err)
                )
                return False

        # if repo is not set, packaged build scripts are included with the
        # workflow in the build_repo_path

        # build
        cmd = 'make -C "{}"'.format(str(build_repo_path))
        Log.some().info('build command: %s', cmd)
        cmd_result = ShellWrapper.invoke(cmd)
        if cmd_result is False:
            Log.a().warning('cannot build app: %s', cmd)
            return False

        Log.some().info('make stdout: %s', cmd_result)

        # move built assets
        # make sure asset folder exists
        if not asset.get('dst'):
            Log.a().warning('asset dst required for app %s', self._app['name'])
            return False

        if not asset.get('src'):
            Log.a().warning('asset src required for app %s', self._app['name'])
            return False

        # create asset destination
        asset_path = self._path / asset['dst']
        asset_path.mkdir(exist_ok=True)

        # set src path
        src_path = self._path / asset['src']

        if 'zip' in asset:
            # create a tar.gz of src
            cmd = 'tar -czf "{}" --directory="{}" .'.format(
                str(asset_path / '{}.tar.gz'.format(asset['zip'])),
                str(src_path)
            )
            Log.some().info('zipping: %s', cmd)
            cmd_result = ShellWrapper.invoke(cmd)
            if cmd_result is False:
                Log.a().warning('cannot zip asset src: %s', cmd)
                return False

            Log.some().info('tar stdout: %s', cmd_result)

        else:
            # move without creating tar.gz
            cmd = 'mv "{}" "{}"'.format(str(src_path), str(asset_path))
            Log.some().info('moving: %s', cmd)
            cmd_result = ShellWrapper.invoke(cmd)
            if cmd_result is False:
                Log.a().warning('cannot move asset src: %s', cmd)
                return False

            Log.some().info('mv stdout: %s', cmd_result)

        return True
Example #22
0
    def load(self, yaml_path):
        """
        Load and validate GeneFlow definition from a multi-doc YAML file.

        Read a GeneFlow definition file, which can contain apps, workflows,
        and jobs. Loaded docs are appended to the _apps, _workflows, and _jobs
        arrays. Load may be called multiple times. Docs are only added if
        successfully validated.

        Args:
            yaml_path: path to GeneFlow YAML definition file.

        Returns:
            On success: True
            On failure: False.

        """
        # load multi-doc yaml file
        gf_def = self.load_yaml(yaml_path)
        if gf_def is False:
            Log.an().error('cannot load yaml file: %s', yaml_path)
            return False

        # iterate through yaml docs
        for gf_doc in gf_def:
            # class must be specified, either app or workflow
            if 'class' not in gf_doc:
                Log.a().error('unspecified document class')
                return False

            if gf_doc['class'] == 'app':
                if 'apps' in gf_doc:
                    # this is a list of apps
                    for app in gf_doc['apps']:
                        if not self.add_app(app):
                            Log.an().error('invalid app in definition: %s',
                                           yaml_path)
                            return False

                else:
                    # only one app
                    if not self.add_app(gf_doc):
                        Log.an().error('invalid app in definition: %s',
                                       yaml_path)
                        return False

            elif gf_doc['class'] == 'workflow':
                # only one workflow per yaml file allowed
                if not self.add_workflow(gf_doc):
                    Log.an().error('invalid workflow in definition: %s',
                                   yaml_path)
                    return False

            elif gf_doc['class'] == 'job':
                if 'jobs' in gf_doc:
                    # this is a list of jobs
                    for job in gf_doc['jobs']:
                        if not self.add_job(job):
                            Log.an().error('invalid job in definition: %s',
                                           yaml_path)
                            return False

                else:
                    # only one job
                    if not self.add_job(gf_doc):
                        Log.an().error('invalid job in definition: %s',
                                       yaml_path)
                        return False

            else:
                Log.a().error('invalid document class: %s', gf_doc['class'])
                return False

        return True
Example #23
0
    def _run_map(self, map_item):
        """
        Run a job for each map item and store the proc and PID.

        Args:
            self: class instance.
            map_item: map item object (item of self._map).

        Returns:
            On success: True.
            On failure: False.

        """
        # load default app inputs, overwrite with template inputs
        inputs = {}
        for input_key in self._app['inputs']:
            if input_key in map_item['template']:
                inputs[input_key] = map_item['template'][input_key]
            else:
                if self._app['inputs'][input_key]['default']:
                    inputs[input_key] = self._app['inputs'][input_key]['default']

        # load default app parameters, overwrite with template parameters
        parameters = {}
        for param_key in self._app['parameters']:
            if param_key in map_item['template']:
                parameters[param_key] = map_item['template'][param_key]
            else:
                if self._app['parameters'][param_key]['default'] not in [None, '']:
                    parameters[param_key] \
                        = self._app['parameters'][param_key]['default']

        # construct shell command
        cmd = self._app['implementation']['local']['script']
        for input_key in inputs:
            if inputs[input_key]:
                cmd += ' --{}="{}"'.format(
                    input_key,
                    URIParser.parse(inputs[input_key])['chopped_path']
                )
        for param_key in parameters:
            if param_key == 'output':
                cmd += ' --output="{}/{}"'.format(
                    self._parsed_data_uris[self._source_context]\
                        ['chopped_path'],
                    parameters['output']
                )

            else:
                cmd += ' --{}="{}"'.format(
                    param_key, parameters[param_key]
                )

        # add exeuction method
        cmd += ' --exec_method="{}"'.format(self._step['execution']['method'])

        # specify execution init commands if 'init' param given
        if 'init' in self._step['execution']['parameters']:
            cmd += ' --exec_init="{}"'.format(self._step['execution']['parameters']['init'])

        # add stdout and stderr
        log_path = '{}/_log/gf-{}-{}-{}'.format(
            self._parsed_data_uris[self._source_context]['chopped_path'],
            map_item['attempt'],
            slugify(self._step['name'], regex_pattern=r'[^-a-z0-9_]+'),
            slugify(map_item['template']['output'], regex_pattern=r'[^-a-z0-9_]+')
        )
        cmd += ' > "{}.out" 2> "{}.err"'.format(log_path, log_path)

        Log.a().debug('command: %s', cmd)

        # launch process
        proc = ShellWrapper.spawn(cmd)
        if proc is False:
            msg = 'shell process error: {}'.format(cmd)
            Log.an().error(msg)
            return self._fatal(msg)

        # record job info
        map_item['run'][map_item['attempt']]['proc'] = proc
        map_item['run'][map_item['attempt']]['pid'] = proc.pid

        # set status of process
        map_item['status'] = 'RUNNING'
        map_item['run'][map_item['attempt']]['status'] = 'RUNNING'

        return True
Example #24
0
    def register_agave_app(self, agave, agave_config, agave_params, agave_publish):
        """
        Register app in Agave.

        Args:
            self: class instance

        Returns:
            On success: True.
            On failure: False.

        """
        Log.some().info('registering agave app %s', str(self._path))
        Log.some().info('app version: %s', self._config['version'])

        # compile agave app template
        if not TemplateCompiler.compile_template(
                self._path,
                'agave-app-def.json.j2',
                self._path / 'agave-app-def.json',
                version=self._config['version'],
                agave=agave_params['agave']
        ):
            Log.a().warning(
                'cannot compile agave app "%s" definition from template',
                self._app['name']
            )
            return False

        # create main apps URI
        parsed_agave_apps_uri = URIParser.parse(
            'agave://{}/{}'.format(
                agave_params['agave']['deploymentSystem'],
                agave_params['agave']['appsDir']
            )
        )
        Log.some().info(
            'creating main apps uri: %s',
            parsed_agave_apps_uri['chopped_uri']
        )
        if not DataManager.mkdir(
                parsed_uri=parsed_agave_apps_uri,
                recursive=True,
                agave={
                    'agave': agave,
                    'agave_config': agave_config
                }
        ):
            Log.a().warning('cannot create main agave apps uri')
            return False

        # delete app uri if it exists
        parsed_app_uri = URIParser.parse(
            'agave://{}/{}/{}'.format(
                agave_params['agave']['deploymentSystem'],
                agave_params['agave']['appsDir'],
                self._app['folder']
            )
        )
        Log.some().info(
            'deleting app uri if it exists: %s',
            parsed_app_uri['chopped_uri']
        )
        if not DataManager.delete(
                parsed_uri=parsed_app_uri,
                agave={
                    'agave': agave,
                    'agave_config': agave_config
                }
        ):
            # log warning, but ignore.. deleting non-existant uri returns False
            Log.a().warning(
                'cannot delete app uri: %s', parsed_app_uri['chopped_uri']
            )

        # upload app assets
        parsed_assets_uri = URIParser.parse(str(self._path / 'assets'))
        Log.some().info(
            'copying app assets from %s to %s',
            parsed_assets_uri['chopped_uri'],
            parsed_app_uri['chopped_uri']
        )

        if not DataManager.copy(
                parsed_src_uri=parsed_assets_uri,
                parsed_dest_uri=parsed_app_uri,
                local={},
                agave={
                    'agave': agave,
                    'agave_config': agave_config
                }
        ):
            Log.a().warning(
                'cannot copy app assets from %s to %s',
                parsed_assets_uri['chopped_uri'],
                parsed_app_uri['chopped_uri']
            )
            return False

        # upload test script
        parsed_test_uri = URIParser.parse(
            '{}/{}'.format(
                parsed_app_uri['chopped_uri'],
                'test'
            )
        )
        Log.some().info(
            'creating test uri: %s', parsed_test_uri['chopped_uri']
        )
        if not DataManager.mkdir(
                parsed_uri=parsed_test_uri,
                recursive=True,
                agave={
                    'agave': agave,
                    'agave_config': agave_config
                }
        ):
            Log.a().warning(
                'cannot create test uri: %s', parsed_test_uri['chopped_uri']
            )
            return False

        parsed_local_test_script = URIParser.parse(
            str(self._path / 'test' / 'test.sh')
        )
        parsed_agave_test_script = URIParser.parse(
            '{}/{}'.format(parsed_test_uri['chopped_uri'], 'test.sh')
        )
        Log.some().info(
            'copying test script from %s to %s',
            parsed_local_test_script['chopped_uri'],
            parsed_agave_test_script['chopped_uri']
        )
        if not DataManager.copy(
                parsed_src_uri=parsed_local_test_script,
                parsed_dest_uri=parsed_agave_test_script,
                local={},
                agave={
                    'agave': agave,
                    'agave_config': agave_config
                }
        ):
            Log.a().warning(
                'cannot copy test script from %s to %s',
                parsed_local_test_script['chopped_uri'],
                parsed_agave_test_script['chopped_uri']
            )
            return False

        # update existing app, or register new app
        Log.some().info('registering agave app')

        app_definition = self._yaml_to_dict(
            str(self._path / 'agave-app-def.json')
        )
        if not app_definition:
            Log.a().warning(
                'cannot load agave app definition: %s',
                str(self._path / 'agave-app-def.json')
            )
            return False

        agwrap = AgaveAppsAddUpdate(
            agave, agave_config
        )
        app_add_result = agwrap.call(app_definition)
        if not app_add_result:
            Log.a().warning(
                'cannot register agave app:\n%s', pprint.pformat(app_definition)
            )
            return False

        register_result = {}

        # publish app
        if agave_publish:
            Log.some().info('publishing agave app')

            agwrap = AgaveAppsPublish(
                agave, agave_config
            )
            app_publish_result = agwrap.call(app_add_result['id'])
            if not app_publish_result:
                Log.a().warning(
                    'cannot publish agave app: %s', app_add_result['id']
                )
                return False

            # return published id and revision
            register_result = {
                'id': app_publish_result['id'],
                'version': self._config['version'],
                'revision': 'u{}'.format(app_publish_result['revision'])
            }

        else:
            # return un-published id and blank revision
            register_result = {
                'id': app_add_result['id'],
                'version': self._config['version'],
                'revision': ''
            }

        return register_result
Example #25
0
    def install_apps(self):
        """
        Install apps for the workflow package.

        Args:
            self: class instance.

        Returns:
            None

        """
        apps_path = Path(self._path) / 'workflow' / 'apps'
        if self._clean:
            # remove apps folder
            if apps_path.is_dir():
                shutil.rmtree(str(apps_path))

        # create apps folder if not already there
        apps_path.mkdir(exist_ok=True)

        for app in self._apps_repo['apps']:
            if self._app_name == app['name'] or not self._app_name:

                Log.some().info('app:\n%s', pprint.pformat(app))

                repo_path = apps_path / app['folder']

                # create AppInstaller instance
                app_installer = AppInstaller(str(repo_path), app,
                                             self._app_asset,
                                             self._copy_prefix)

                # clone app into install location
                if not app_installer.clone_git_repo():
                    Log.an().error('cannot clone app to %s', str(repo_path))
                    # skip app
                    continue

                if not app_installer.load_config():
                    Log.an().error('cannot load app config.yaml')
                    # skip app
                    continue

                if self._make_apps:
                    if not app_installer.make():
                        Log.an().error('cannot compile app templates')
                        # skip app
                        continue

                if not app_installer.install_assets():
                    Log.an().error('cannot install app assets')
                    # skip app
                    continue

                # register in Agave
                if (self._agave and self._agave_params
                        and self._agave_params.get('agave')):
                    register_result = app_installer.register_agave_app(
                        self._agave, self._config['agave'], self._agave_params,
                        self._agave_publish)
                    if not register_result:
                        Log.a().warning('cannot register app "%s" in agave',
                                        app['name'])
                        # skip app
                        continue

                    Log.some().info('registered agave app:\n%s',
                                    pprint.pformat(register_result))

                    # compile jinja template for published app definition
                    if not TemplateCompiler.compile_template(
                            repo_path,
                            'app.yaml.j2',
                            repo_path / 'app.yaml',
                            agave=self._agave_params['agave'],
                            version=register_result['version'],
                            revision=register_result['revision']):
                        Log.a().warning(
                            'cannot compile app "%s" definition from template',
                            app['name'])
                        # skip app
                        continue

                else:

                    # compile jinja template for app definition
                    if not TemplateCompiler.compile_template(
                            repo_path, 'app.yaml.j2', repo_path / 'app.yaml'):
                        Log.a().warning(
                            'cannot compile app "%s" definition from template',
                            app['name'])
                        # skip app
                        continue

        return True
Example #26
0
    def _run_map(self, map_item):
        """
        Run a job for each map item and store the job ID.

        Args:
            self: class instance.
            map_item: map item object (item of self._map).

        Returns:
            On success: True.
            On failure: False.

        """
        # load default app inputs, overwrite with template inputs
        inputs = {}
        for input_key in self._app['inputs']:
            if input_key in map_item['template']:
                inputs[input_key] = map_item['template'][input_key]
            else:
                if self._app['inputs'][input_key]['default']:
                    inputs[input_key] = self._app['inputs'][input_key][
                        'default']

        # load default app parameters, overwrite with template parameters
        parameters = {}
        for param_key in self._app['parameters']:
            if param_key in map_item['template']:
                parameters[param_key] = map_item['template'][param_key]
            else:
                if self._app['parameters'][param_key]['default'] not in [
                        None, ''
                ]:
                    parameters[param_key] \
                        = self._app['parameters'][param_key]['default']

        # get full path of wrapper script
        path = shutil.which(self._app['implementation']['local']['script'])
        if not path:
            msg = 'wrapper script not found in path: %s'.format(
                self._app['implementation']['local']['script'])
            Log.an().error(msg)
            return self._fatal(msg)

        # construct argument list for wrapper script
        args = [path]
        for input_key in inputs:
            if inputs[input_key]:
                args.append('--{}={}'.format(
                    input_key,
                    URIParser.parse(inputs[input_key])['chopped_path']))
        for param_key in parameters:
            if param_key == 'output':
                args.append('--output={}/{}'.format(
                    self._parsed_data_uris[self._source_context][0]\
                        ['chopped_path'],
                    parameters['output']
                ))

            else:
                args.append('--{}={}'.format(param_key, parameters[param_key]))

        # add exeuction method
        args.append('--exec_method={}'.format(
            self._step['execution']['method']))

        # specify execution init commands if 'init' param given
        if 'init' in self._step['execution']['parameters']:
            args.append('--exec_init={}'.format(
                self._step['execution']['parameters']['init']))

        Log.a().debug('[step.%s]: command: %s -> %s', self._step['name'],
                      map_item['template']['output'], ' '.join(args))

        # construct job name
        name = 'gf-{}-{}-{}'.format(
            map_item['attempt'],
            slugify(self._step['name'], regex_pattern=r'[^-a-z0-9_]+'),
            slugify(map_item['template']['output'],
                    regex_pattern=r'[^-a-z0-9_]+'))

        # construct paths for logging stdout and stderr
        log_path = '{}/_log/{}'.format(
            self._parsed_data_uris[self._source_context][0]['chopped_path'],
            name)

        # create and populate job template
        jt = self._slurm['drmaa_session'].createJobTemplate()
        jt.remoteCommand = '/bin/bash'
        jt.args = args
        jt.jobName = name
        jt.errorPath = ':{}.err'.format(log_path)
        jt.outputPath = ':{}.out'.format(log_path)

        # pass execution parameters to job template
        native_spec = ' --nodes=1 --ntasks=1'
        if 'queue' in self._step['execution']['parameters']:
            native_spec += ' -p {}'.format(
                self._step['execution']['parameters']['queue'])
        if 'slots' in self._step['execution']['parameters']:
            native_spec += ' --cpus-per-task={}'.format(
                self._step['execution']['parameters']['slots'])
        if 'other' in self._step['execution']['parameters']:
            native_spec += ' {}'.format(
                self._step['execution']['parameters']['other'])
        jt.nativeSpecification = native_spec

        # submit hpc job using drmaa library
        try:
            job_id = self._slurm['drmaa_session'].runJob(jt)

        except drmaa.DrmCommunicationException as err:
            msg = 'cannot submit slurm job for step "{}" [{}]'\
                    .format(self._step['name'], str(err))
            Log.a().warning(msg)

            # set to failed, but return True so that it's retried
            map_item['status'] = 'FAILED'
            map_item['run'][map_item['attempt']]['status'] = 'FAILED'

            return True

        self._slurm['drmaa_session'].deleteJobTemplate(jt)

        Log.a().debug('[step.%s]: hpc job id: %s -> %s', self._step['name'],
                      map_item['template']['output'], job_id)

        # record job info
        map_item['run'][map_item['attempt']]['hpc_job_id'] = job_id

        # set status of process
        map_item['status'] = 'QUEUED'
        map_item['run'][map_item['attempt']]['status'] = 'QUEUED'

        return True
Example #27
0
    def check_running_jobs(self):
        """
        Check the status/progress of all map-reduce items..

        And update _map status.

        Args:
            self: class instance.

        Returns:
            True.

        """
        # check if jobs are still running
        for map_item in self._map:

            map_item['status'] = self._agave['agave_wrapper'].jobs_get_status(
                map_item['run'][map_item['attempt']]['agave_job_id']
            )

            # for status failures, set to 'UNKNOWN'
            if not map_item['status']:
                msg = 'cannot get job status for step "{}"'\
                    .format(self._step['name'])
                Log.a().warning(msg)
                map_item['status'] = 'UNKNOWN'

            # set status of run-attempt
            map_item['run'][map_item['attempt']]['status'] = map_item['status']

            # check hpc job ids
            if map_item['run'][map_item['attempt']]['hpc_job_id']:
                # already have it
                continue

            # job id listed in history
            response = self._agave['agave_wrapper'].jobs_get_history(
                map_item['run'][map_item['attempt']]['agave_job_id']
            )

            if not response:
                msg = 'cannot get hpc job id for job: agave_job_id={}'.format(
                    map_item['run'][map_item['attempt']]['agave_job_id']
                )
                Log.a().warning(msg)
                continue

            for item in response:
                if item['status'] == 'QUEUED':
                    match = re.match(
                        r'^HPC.*local job (\d*)$', item['description']
                    )
                    if match:
                        map_item['run'][map_item['attempt']]['hpc_job_id'] \
                            = match.group(1)

                        # log hpc job id
                        Log.some().debug(
                            '[step.%s]: hpc job id: %s -> %s',
                            self._step['name'],
                            map_item['template']['output'],
                            match.group(1)
                        )

                        break

            if map_item['status'] == 'FAILED' and map_item['attempt'] < 5:
                # retry job if not at limit
                if not self.retry_failed(map_item):
                    Log.a().warning(
                        '[step.%s]: cannot retry failed agave job (%s)',
                        self._step['name'],
                        map_item['template']['output']
                    )

        self._update_status_db(self._status, '')

        return True
Example #28
0
    def check_running_jobs(self):
        """
        Check the status/progress of all map-reduce items..

        And update _map status.

        Args:
            self: class instance.

        Returns:
            True.

        """
        # check if jobs are still running
        for map_item in self._map:
            if map_item['status'] not in ['FINISHED','FAILED','PENDING']:

                map_item['status'] = self._agave['agave_wrapper'].jobs_get_status(
                    map_item['run'][map_item['attempt']]['agave_job_id']
                )

                # for status failures, set to 'UNKNOWN'
                if not map_item['status']:
                    msg = 'cannot get job status for step "{}"'\
                        .format(self._step['name'])
                    Log.a().warning(msg)
                    map_item['status'] = 'UNKNOWN'

                if map_item['status'] in ['FINISHED','FAILED']:
                    # status changed to finished or failed
                    Log.a().debug(
                        '[step.%s]: exit status: %s -> %s',
                        self._step['name'],
                        map_item['template']['output'],
                        map_item['status']
                    )

                    # decrease num running procs
                    if self._num_running > 0:
                        self._num_running -= 1

            # check hpc job ids
            if (
                map_item['status'] != 'PENDING' \
                and not map_item['run'][map_item['attempt']].get('hpc_job_id', '')
            ):

                # job id listed in history
                response = self._agave['agave_wrapper'].jobs_get_history(
                    map_item['run'][map_item['attempt']]['agave_job_id']
                )

                if not response:
                    msg = 'cannot get hpc job id for job: agave_job_id={}'.format(
                        map_item['run'][map_item['attempt']]['agave_job_id']
                    )
                    Log.a().warning(msg)

                else:
                    for item in response:
                        if item['status'] == 'QUEUED':
                            match = re.match(
                                r'^HPC.*local job (\d*)$', item['description']
                            )
                            if match:
                                map_item['run'][map_item['attempt']]['hpc_job_id'] \
                                    = match.group(1)

                                # log hpc job id
                                Log.some().debug(
                                    '[step.%s]: hpc job id: %s -> %s',
                                    self._step['name'],
                                    map_item['template']['output'],
                                    match.group(1)
                                )

                                break

            map_item['run'][map_item['attempt']]['status'] = map_item['status']

            if map_item['status'] == 'FAILED' and map_item['attempt'] < 5:
                if self._throttle_limit == 0 or self._num_running < self._throttle_limit:
                    # retry job if not at retry or throttle limit
                    if not self.retry_failed(map_item):
                        Log.a().warning(
                            '[step.%s]: cannot retry failed agave job (%s)',
                            self._step['name'],
                            map_item['template']['output']
                        )
                    else:
                        self._num_running += 1

        self._update_status_db(self._status, '')

        return True
Example #29
0
def run(args, other_args, subparser):
    """
    Run GeneFlow workflow engine.

    Args:
        args.workflow_path: workflow definition or package directory.
        args.job: path to job definition

    Returns:
        On success: True.
        On failure: False.

    """
    # get absolute path to workflow
    workflow_path = resolve_workflow_path(args.workflow_path)
    if workflow_path:
        Log.some().info('workflow definition found: %s', workflow_path)
    else:
        Log.an().error('cannot find workflow definition: %s',
                       args.workflow_path)
        return False

    # setup environment
    env = Environment(workflow_path=workflow_path)
    if not env.initialize():
        Log.an().error('cannot initialize geneflow environment')
        return False

    # create default config file and SQLite db
    cfg = Config()
    cfg.default(env.get_sqlite_db_path())
    cfg.write(env.get_config_path())
    config_dict = cfg.config('local')

    # load workflow into db
    try:
        data_source = DataSource(config_dict['database'])
    except DataSourceException as err:
        Log.an().error('data source initialization error [%s]', str(err))
        return False

    defs = data_source.import_definition(workflow_path)
    if not defs:
        Log.an().error('workflow definition load failed: %s', workflow_path)
        return False

    if not defs['workflows']:
        Log.an().error('workflow definition load failed: %s', workflow_path)
        return False

    data_source.commit()

    for workflow in defs['workflows']:
        Log.some().info('workflow loaded: %s -> %s', workflow,
                        defs['workflows'][workflow])

    # get workflow definition back from database to ensure
    # that it's a valid definition
    workflow_id = next(iter(defs['workflows'].values()))
    workflow_dict = data_source.get_workflow_def_by_id(workflow_id)
    if not workflow_dict:
        Log.an().error(
            'cannot get workflow definition from data source: workflow_id=%s',
            workflow_id)
        return False

    ### define arg parsing methods
    def parse_dynamic_args(workflow_dict):
        """
        Parse dynamic args based on workflow dictionary as well as
        some static args.

        Args:
            other_args: List of remaining args from initial parse of
                workflow path.
            workflow_dict: Workflow dictionary

        Returns:
            On success: List of parsed arguments.
            On failure: False.

        """
        # parse dynamic args. these are determined from workflow definition
        dynamic_parser = argparse.ArgumentParser()

        dynamic_parser.add_argument('-j',
                                    '--job',
                                    type=str,
                                    default=None,
                                    dest='job_path',
                                    help='Job Definition(s)')
        for input_key in workflow_dict['inputs']:
            dynamic_parser.add_argument(
                '--in.{}'.format(input_key),
                dest='inputs.{}'.format(input_key),
                required=False,
                default=workflow_dict['inputs'][input_key]['default'],
                help=workflow_dict['inputs'][input_key]['label'])
        for param_key in workflow_dict['parameters']:
            dynamic_parser.add_argument(
                '--param.{}'.format(param_key),
                dest='parameters.{}'.format(param_key),
                required=False,
                default=workflow_dict['parameters'][param_key]['default'],
                help=workflow_dict['parameters'][param_key]['label'])
        dynamic_parser.add_argument('-o',
                                    '--output',
                                    type=str,
                                    default='~/geneflow-output',
                                    help='Output Folder')
        dynamic_parser.add_argument('-n',
                                    '--name',
                                    type=str,
                                    default='geneflow-job',
                                    help='Name of Job')
        dynamic_parser.add_argument('-w',
                                    '--work',
                                    nargs='+',
                                    type=str,
                                    default=[],
                                    help='Work Directory')
        dynamic_parser.add_argument('--exec-context',
                                    '--ec',
                                    nargs='+',
                                    type=str,
                                    dest='exec_context',
                                    default=[],
                                    help='Execution Contexts')
        dynamic_parser.add_argument('--exec-method',
                                    '--em',
                                    nargs='+',
                                    type=str,
                                    dest='exec_method',
                                    default=[],
                                    help='Execution Methods')
        dynamic_parser.add_argument('--exec-param',
                                    '--ep',
                                    nargs='+',
                                    type=str,
                                    dest='exec_param',
                                    default=[],
                                    help='Execution Parameters')

        dynamic_args = dynamic_parser.parse_known_args(other_args)

        return dynamic_args[0]

    if 'gooey' in sys.modules:

        @Gooey(program_name='GeneFlow: {}'.format(workflow_dict['name']),
               program_description=workflow_dict['description'],
               target='gf --log-level={} run {}'.format(
                   args.log_level, args.workflow_path),
               monospace_display=True)
        def parse_dynamic_args_gui(workflow_dict):
            """
            Parse dynamic args based on workflow dictionary as well as
            some static args. Display a GUI interface.

            Args:
                other_args: List of remaining args from initial parse of
                    workflow path.
                workflow_dict: Workflow dictionary

            Returns:
                On success: List of parsed arguments.
                On failure: False.

            """
            # parse dynamic args. these are determined from workflow definition
            dynamic_parser = GooeyParser()
            input_group = dynamic_parser.add_argument_group(
                "Workflow Inputs",
                "Files or folders to be passed to the workflow")
            for input_key in workflow_dict['inputs']:
                widget = 'FileChooser'
                if workflow_dict['inputs'][input_key]['type'] == 'Directory':
                    widget = 'DirChooser'
                input_group.add_argument(
                    '--in.{}'.format(input_key),
                    dest='inputs.{}'.format(input_key),
                    required=False,
                    default=workflow_dict['inputs'][input_key]['default'],
                    help=workflow_dict['inputs'][input_key]['label'],
                    widget=widget)
            param_group = dynamic_parser.add_argument_group(
                "Workflow Parameters",
                "Number or string parameters to be passed to the workflow")
            for param_key in workflow_dict['parameters']:
                param_group.add_argument(
                    '--param.{}'.format(param_key),
                    dest='parameters.{}'.format(param_key),
                    required=False,
                    default=workflow_dict['parameters'][param_key]['default'],
                    help=workflow_dict['parameters'][param_key]['label'])
            job_group = dynamic_parser.add_argument_group(
                "Job Options", "Output/intermediate folders and job name")
            job_group.add_argument('-o',
                                   '--output',
                                   type=str,
                                   default='~/geneflow-output',
                                   help='Output Folder',
                                   widget='DirChooser')
            job_group.add_argument('-n',
                                   '--name',
                                   type=str,
                                   default='geneflow-job',
                                   help='Name of Job')
            job_group.add_argument('-w',
                                   '--work',
                                   nargs='+',
                                   type=str,
                                   default=[],
                                   help='Work Directory')
            exec_group = dynamic_parser.add_argument_group(
                "Execution Options", "Customize workflow execution")
            exec_group.add_argument('--exec-context',
                                    '--ec',
                                    nargs='+',
                                    type=str,
                                    dest='exec_context',
                                    default=[],
                                    help='Execution Contexts')
            exec_group.add_argument('--exec-method',
                                    '--em',
                                    nargs='+',
                                    type=str,
                                    dest='exec_method',
                                    default=[],
                                    help='Execution Methods')
            exec_group.add_argument('--exec-param',
                                    '--ep',
                                    nargs='+',
                                    type=str,
                                    dest='exec_param',
                                    default=[],
                                    help='Execution Parameters')

            dynamic_args = dynamic_parser.parse_args(other_args)

            return dynamic_args

    # get dynamic args
    if args.gui and 'gooey' in sys.modules:
        dynamic_args = parse_dynamic_args_gui(workflow_dict)
    else:
        dynamic_args = parse_dynamic_args(workflow_dict)

    # get absolute path to job file if provided
    job_path = None
    if dynamic_args.job_path:
        job_path = Path(dynamic_args.job_path).absolute()

    # load job definition if provided
    jobs_dict = {}
    gf_def = Definition()
    if job_path:
        if not gf_def.load(job_path):
            Log.an().error('Job definition load failed')
            return False
        jobs_dict = gf_def.jobs()
    else:
        # create default definition
        jobs_dict = {
            'job': {
                'name': 'GeneFlow job',
                'output_uri': 'geneflow_output',
                'work_uri': {
                    'local': '~/.geneflow/work'
                }
            }
        }

    # override with known cli parameters
    apply_job_modifiers(jobs_dict, [
        'name={}'.format(dynamic_args.name), 'output_uri={}'.format(
            dynamic_args.output)
    ])

    # insert workflow name into job, if not provided
    workflow_name = next(iter(defs['workflows']))
    for job in jobs_dict.values():
        if 'workflow_name' not in job:
            job['workflow_name'] = workflow_name

    # add inputs and parameters to job definition
    apply_job_modifiers(
        jobs_dict,
        [
            '{}={}'.format(dynamic_arg, getattr(dynamic_args, dynamic_arg))
            for dynamic_arg in vars(dynamic_args) \
                if dynamic_arg.startswith('inputs.') or dynamic_arg.startswith('parameters.')
        ]
    )

    # add work URIs to job definition
    work_uris = {}
    for work_arg in dynamic_args.work:
        parsed_work_uri = URIParser.parse(work_arg)
        if not parsed_work_uri:
            # skip if invalid URI
            Log.a().warning('invalid work uri: %s', work_arg)
        else:
            work_uris[
                parsed_work_uri['scheme']] = parsed_work_uri['chopped_uri']

    apply_job_modifiers(jobs_dict, [
        'work_uri.{}={}'.format(context, work_uris[context])
        for context in work_uris
    ])

    # add execution options to job definition
    apply_job_modifiers(jobs_dict, [
        'execution.context.{}={}'.format(*exec_arg.split(':', 1)[0:2])
        for exec_arg in dynamic_args.exec_context
    ] + [
        'execution.method.{}={}'.format(*exec_arg.split(':', 1)[0:2])
        for exec_arg in dynamic_args.exec_method
    ] + [
        'execution.parameters.{}={}'.format(*exec_arg.split(':', 1)[0:2])
        for exec_arg in dynamic_args.exec_param
    ])

    # get default values from workflow definition
    for job in jobs_dict.values():
        if 'inputs' not in job:
            job['inputs'] = {}
        if 'parameters' not in job:
            job['parameters'] = {}
        for input_key in workflow_dict['inputs']:
            if input_key not in job['inputs']:
                job['inputs'][input_key]\
                    = workflow_dict['inputs'][input_key]['default']
        for param_key in workflow_dict['parameters']:
            if param_key not in job['parameters']:
                job['parameters'][param_key]\
                    = workflow_dict['parameters'][param_key]['default']

    # expand URIs
    for job in jobs_dict.values():
        # output URI
        parsed_uri = URIParser.parse(job['output_uri'])
        if not parsed_uri:
            Log.an().error('invalid output uri: %s', job['output_uri'])
            return False
        # expand relative path if local
        if parsed_uri['scheme'] == 'local':
            job['output_uri'] = str(
                Path(parsed_uri['chopped_path']).expanduser().resolve())
        # work URIs
        for context in job['work_uri']:
            parsed_uri = URIParser.parse(job['work_uri'][context])
            if not parsed_uri:
                Log.an().error('invalid work uri: %s', job['work_uri'])
                return False
            # expand relative path if local
            if parsed_uri['scheme'] == 'local':
                job['work_uri'][context] = str(
                    Path(parsed_uri['chopped_path']).expanduser().resolve())
        # input URIs
        for input_key in job['inputs']:
            parsed_uri = URIParser.parse(job['inputs'][input_key])
            if not parsed_uri:
                Log.an().error('invalid input uri: %s',
                               job['inputs'][input_key])
                return False
            # expand relative path if local
            if parsed_uri['scheme'] == 'local':
                job['inputs'][input_key] = str(
                    Path(parsed_uri['chopped_path']).expanduser().resolve())

    # import jobs into database
    job_ids = data_source.import_jobs_from_dict(jobs_dict)
    if job_ids is False:
        Log.an().error('cannot import jobs')
        return False
    data_source.commit()

    # create process pool to run workflows in parallel
    pool = Pool(min(5, len(job_ids)))
    jobs = [{'name': job, 'id': job_ids[job], 'log': None} for job in job_ids]

    result = pool.map(
        partial(geneflow.cli.common.run_workflow,
                config=config_dict,
                log_level=args.log_level), jobs)

    pool.close()
    pool.join()

    if not all(result):
        Log.an().error('some jobs failed')

    return result
Example #30
0
def run_pending(args):
    """
    Run any jobs in database in the PENDING state.

    Args:
        args.config_file: GeneFlow config file path.
        args.environment: Config environment.

    Returns:
        On success: True.
        On failure: False.

    """
    config_file = args.config_file
    environment = args.environment
    log_location = args.log_location

    # load config file
    cfg = Config()
    if not cfg.load(config_file):
        Log.an().error('cannot load config file: %s', config_file)
        return False

    config_dict = cfg.config(environment)
    if not config_dict:
        Log.an().error('invalid config environment: %s', environment)
        return False

    # connect to data source
    try:
        data_source = DataSource(config_dict['database'])
    except DataSourceException as err:
        Log.an().error('data source initialization error [%s]', str(err))
        return False

    # get pending jobs from database
    pending_jobs = data_source.get_pending_jobs()
    if pending_jobs is False:
        Log.an().error('cannot query for pending jobs')
        return False

    if not pending_jobs:
        # no jobs found
        return True

    Log.some().info('pending jobs found:\n%s', pprint.pformat(pending_jobs))

    # set job status to QUEUED to minimize the chance that another
    # process will try to run it
    for job in pending_jobs:
        if not data_source.update_job_status(job['id'], 'QUEUED', ''):
            Log.a().warning('cannot update job status in data source')
            data_source.rollback()
        data_source.commit()

    # create a thread pool to run at most 5 jobs concurrently
    pool = Pool(min(5, len(pending_jobs)))
    jobs = [{
        'name': job['name'],
        'id': job['id'],
        'log': str(Path(log_location) / (job['id'] + '.log'))
    } for job in pending_jobs]

    result = pool.map(
        partial(geneflow.cli.common.run_workflow,
                config=config_dict,
                log_level=args.log_level), jobs)
    pool.close()
    pool.join()

    if not all(result):
        Log.an().error('some jobs failed')

    return result