def _init_data_uri(self): """ Create output data URI for the source context (local). Args: self: class instance. Returns: On success: True. On failure: False. """ # make sure the source data URI has a compatible scheme (local) if self._parsed_data_uris[self._source_context]['scheme'] != 'local': msg = 'invalid data uri scheme for this step: {}'.format( self._parsed_data_uris[self._source_context]['scheme'] ) Log.an().error(msg) return self._fatal(msg) # delete old folder if it exists and clean==True if ( DataManager.exists( parsed_uri=self._parsed_data_uris[self._source_context] ) and self._clean ): if not DataManager.delete( parsed_uri=self._parsed_data_uris[self._source_context] ): Log.a().warning( 'cannot delete existing data uri: %s', self._parsed_data_uris[self._source_context]['chopped_uri'] ) # create folder if not DataManager.mkdir( parsed_uri=self._parsed_data_uris[self._source_context], recursive=True ): msg = 'cannot create data uri: {}'.format( self._parsed_data_uris[self._source_context]['chopped_uri'] ) Log.an().error(msg) return self._fatal(msg) # create _log folder if not DataManager.mkdir( uri='{}/_log'.format( self._parsed_data_uris[self._source_context]['chopped_uri'] ), recursive=True ): msg = 'cannot create _log folder in data uri: {}/_log'.format( self._parsed_data_uris[self._source_context]['chopped_uri'] ) Log.an().error(msg) return self._fatal(msg) return True
def _init_app_paths(self): """ Add app paths to environment PATH for local workflows. The package path contains the workflow definition YAML file and shell scripts for calling individual apps used in a workflow. Args: None. Output: On success: True. On failure: False. """ parsed_uri = URIParser.parse(self._workflow_path) if not parsed_uri: Log.an().error('invalid workflow path: %s', self._workflow_path) return False apps_uri = ('{}{}' if parsed_uri['folder'] == '/' else '{}/{}')\ .format(parsed_uri['folder'], 'apps') parsed_apps_uri = URIParser.parse( ('{}{}' if parsed_uri['folder'] == '/' else '{}/{}')\ .format(parsed_uri['folder'], 'apps') ) if not parsed_apps_uri: Log.an().error('cannot construct apps uri: %s', apps_uri) return False if not DataManager.exists(parsed_uri=parsed_apps_uri): # no apps directory return True for app_dir in DataManager.list(parsed_uri=parsed_apps_uri): try: os.environ['PATH'] = '{}{}{}'.format( os.path.join(parsed_apps_uri['chopped_path'], app_dir, 'assets'), os.pathsep, os.environ['PATH']) except OSError as err: Log.an().error('workflow app pathmunge error [%s]', str(err)) return False return True
def clean_up(self): """ Copy data from Agave archive location to step output location (data URI). Args: self: class instance. Returns: On success: True. On failure: False. """ # destination _log directory, common for all map items dest_log_dir = '{}/{}'.format( self._parsed_data_uris[self._source_context][0]\ ['chopped_uri'], '_log' ) # copy data for each map item for map_item in self._map: # copy step output if not self._agave['agave_wrapper'].files_import_from_agave( self._parsed_data_uris[self._source_context][0]['authority'], self._parsed_data_uris[self._source_context][0]\ ['chopped_path'], map_item['template']['output'], '{}/{}'.format( map_item['run'][map_item['attempt']]['archive_uri'], map_item['template']['output'] ) ): msg = 'agave import failed for step "{}"'\ .format(self._step['name']) Log.an().error(msg) return self._fatal(msg) # check for any agave log files (*.out and *.err files) agave_log_list = DataManager.list( uri=map_item['run'][map_item['attempt']]['archive_uri'], agave=self._agave ) if agave_log_list is False: msg = 'cannot get agave log list for step "{}"'\ .format(self._step['name']) Log.an().error(msg) return self._fatal(msg) # copy each agave log file, the pattern is gf-{}-{}-{}.out or .err for item in agave_log_list: if re.match('^gf-\d*-.*\.(out|err)$', item): if not self._agave['agave_wrapper'].files_import_from_agave( self._parsed_data_uris[self._source_context][0]\ ['authority'], '{}/{}'.format( self._parsed_data_uris[self._source_context][0]\ ['chopped_path'], '_log' ), item, '{}/{}'.format( map_item['run'][map_item['attempt']]\ ['archive_uri'], item ) ): msg = 'cannot copy agave log item "{}"'.format(item) Log.an().error(msg) return self._fatal(msg) # check if anything is in the _log directory src_log_dir = '{}/{}'.format( map_item['run'][map_item['attempt']]['archive_uri'], '_log' ) if DataManager.exists( uri=src_log_dir, agave=self._agave ): # create dest _log dir if it doesn't exist if not DataManager.exists( uri=dest_log_dir, agave=self._agave ): if not DataManager.mkdir( uri=dest_log_dir, agave=self._agave ): msg = 'cannot create _log directory for step "{}"'\ .format(self._step['name']) Log.an().error(msg) return self._fatal(msg) # get list of all items in src_log_dir log_list = DataManager.list( uri=src_log_dir, agave=self._agave ) if log_list is False: msg = 'cannot get _log list for step "{}"'\ .format(self._step['name']) Log.an().error(msg) return self._fatal(msg) # copy each list item for item in log_list: if not self._agave['agave_wrapper'].files_import_from_agave( self._parsed_data_uris[self._source_context][0]\ ['authority'], '{}/{}'.format( self._parsed_data_uris[self._source_context][0]\ ['chopped_path'], '_log' ), item, '{}/{}/{}'.format( map_item['run'][map_item['attempt']]\ ['archive_uri'], '_log', item ) ): msg = 'cannot copy log item "{}"'.format(item) Log.an().error(msg) return self._fatal(msg) self._update_status_db('FINISHED', '') return True
def _run_map(self, map_item): """ Run a job for each map item and store the job ID. Args: self: class instance. map_item: map item object (item of self._map). Returns: On success: True. On failure: False. """ # load default app inputs overwrite with template inputs inputs = {} for input_key in self._app['inputs']: if input_key in map_item['template']: if map_item['template'][input_key]: # only include an input if the value is a non-empty string inputs[input_key] = urllib.parse.quote( str(map_item['template'][input_key]), safe='/:' ) else: if self._app['inputs'][input_key]['default']: # only include an input if the value is a non-empty string inputs[input_key] = urllib.parse.quote( str(self._app['inputs'][input_key]['default']), safe='/:' ) # load default app parameters, overwrite with template parameters parameters = {} for param_key in self._app['parameters']: if param_key in map_item['template']: if self._app['parameters'][param_key]['type'] in ['int', 'long']: parameters[param_key] = int(map_item['template'][param_key]) elif self._app['parameters'][param_key]['type'] == ['float', 'double']: parameters[param_key] = float(map_item['template'][param_key]) else: parameters[param_key] = str(map_item['template'][param_key]) else: if self._app['parameters'][param_key]['default'] not in [None, '']: parameters[param_key] \ = self._app['parameters'][param_key]['default'] # add execution method as parameter parameters['exec_method'] = self._step['execution']['method'] # add execution init commands if 'init' param given if 'init' in self._step['execution']['parameters']: parameters['exec_init'] = self._step['execution']['parameters']['init'] # construct agave app template name = 'gf-{}-{}-{}'.format( str(map_item['attempt']), slugify(self._step['name'], regex_pattern=r'[^-a-z0-9_]+'), slugify(map_item['template']['output'], regex_pattern=r'[^-a-z0-9_]+') ) name = name[:62]+'..' if len(name) > 64 else name archive_path = '{}/{}'.format( self._agave['parsed_archive_uri']['chopped_path'], name ) app_template = { 'name': name, 'appId': self._app['implementation']['agave']['agave_app_id'], 'archive': True, 'inputs': inputs, 'parameters': parameters, 'archiveSystem': self._agave['parsed_archive_uri']['authority'], 'archivePath': archive_path } # specify processors if 'slots' param given if 'slots' in self._step['execution']['parameters']: app_template['processorsPerNode'] = int( self._step['execution']['parameters']['slots'] ) # specify memory if 'mem' param given if 'mem' in self._step['execution']['parameters']: app_template['memoryPerNode'] = '{}'.format( self._step['execution']['parameters']['mem'] ) Log.some().debug( "[step.%s]: agave app template:\n%s", self._step['name'], pprint.pformat(app_template) ) # delete archive path if it exists if DataManager.exists( uri=self._agave['parsed_archive_uri']['chopped_uri']+'/'+name, agave=self._agave ): if not DataManager.delete( uri=self._agave['parsed_archive_uri']['chopped_uri']+'/'+name, agave=self._agave ): Log.a().warning( 'cannot delete archive uri: %s/%s', self._agave['parsed_archive_uri']['chopped_uri'], name ) # submit job job = self._agave['agave_wrapper'].jobs_submit(app_template) if not job: msg = 'agave jobs submit failed for "{}"'.format( app_template['name'] ) Log.an().error(msg) return self._fatal(msg) # log agave job id Log.some().debug( '[step.%s]: agave job id: %s -> %s', self._step['name'], map_item['template']['output'], job['id'] ) # record job info map_item['run'][map_item['attempt']]['agave_job_id'] = job['id'] map_item['run'][map_item['attempt']]['archive_uri'] = '{}/{}'\ .format( self._agave['parsed_archive_uri']['chopped_uri'], name ) map_item['run'][map_item['attempt']]['hpc_job_id'] = '' # set status of process map_item['status'] = 'PENDING' map_item['run'][map_item['attempt']]['status'] = 'PENDING' return True
def _run_map(self, map_item): """ Run a job for each map item and store the job ID. Args: self: class instance. map_item: map item object (item of self._map) Returns: On success: True. On failure: False. """ # load default app inputs overwrite with template inputs inputs = {} for input_key in self._app['inputs']: if input_key in map_item['template']: inputs[input_key] = urllib.parse.quote(str( map_item['template'][input_key] or ''), safe='/:') else: inputs[input_key] = urllib.parse.quote(str( self._app['inputs'][input_key]['default'] or ''), safe='/:') # load default app parameters, overwrite with template parameters parameters = {} for param_key in self._app['parameters']: if param_key in map_item['template']: parameters[param_key] = map_item['template'][param_key] else: parameters[param_key] \ = self._app['parameters'][param_key]['default'] # add execution method as parameter parameters['exec_method'] = self._step['execution']['method'] # construct agave app template name = 'gf-{}-{}-{}'.format(str(map_item['attempt']), slugify(self._step['name']), slugify(map_item['template']['output'])) name = name[:62] + '..' if len(name) > 64 else name archive_path = '{}/{}'.format( self._agave['parsed_archive_uri']['chopped_path'], name) app_template = { 'name': name, 'appId': self._app['definition']['agave']['agave_app_id'], 'archive': True, 'inputs': inputs, 'parameters': parameters, 'archiveSystem': self._agave['parsed_archive_uri']['authority'], 'archivePath': archive_path } Log.some().debug("agave app template:\n%s", pprint.pformat(app_template)) # delete archive path if it exists if DataManager.exists( uri=self._agave['parsed_archive_uri']['chopped_uri'] + '/' + name, agave=self._agave): if not DataManager.delete( uri=self._agave['parsed_archive_uri']['chopped_uri'] + '/' + name, agave=self._agave): Log.a().warning( 'cannot delete archive uri: %s/%s', self._agave['parsed_archive_uri']['chopped_uri'], name) # submit job job = self._agave['agave_wrapper'].jobs_submit(app_template) if not job: msg = 'agave jobs submit failed for "{}"'.format( app_template['name']) Log.an().error(msg) return self._fatal(msg) # log agave job id Log.some().debug('agave job id: %s -> %s', map_item['template']['output'], job['id']) # record job info map_item['run'][map_item['attempt']]['agave_job_id'] = job['id'] map_item['run'][map_item['attempt']]['archive_uri'] = '{}/{}'\ .format( self._agave['parsed_archive_uri']['chopped_uri'], name ) map_item['run'][map_item['attempt']]['hpc_job_id'] = '' # set status of process map_item['status'] = 'PENDING' map_item['run'][map_item['attempt']]['status'] = 'PENDING' return True
def clean_up(self): """ Copy data from Agave archive location to step output location (data URI). Args: self: class instance. Returns: On success: True. On failure: False. """ # destination _log directory, common for all map items dest_log_dir = '{}/{}'.format( self._parsed_data_uris[self._source_context]\ ['chopped_uri'], '_log' ) # create instance of agave wrapper class for data import agwrap = AgaveFilesImportDataFromAgave( self._agave['agave'], self._config['agave'] ) # copy data for each map item for map_item in self._map: # copy step output if not agwrap.call( self._parsed_data_uris[self._source_context]['authority'], self._parsed_data_uris[self._source_context]\ ['chopped_path'], map_item['template']['output'], '{}/{}'.format( map_item['run'][map_item['attempt']]['archive_uri'], map_item['template']['output'] ) ): msg = 'agave import failed for step "{}"'\ .format(self._step['name']) Log.an().error(msg) return self._fatal(msg) # check if anything is in the _log directory src_log_dir = '{}/{}'.format( map_item['run'][map_item['attempt']]['archive_uri'], '_log' ) if DataManager.exists( uri=src_log_dir, agave={ 'agave': self._agave['agave'], 'agave_config': self._config['agave'] } ): # create dest _log dir if it doesn't exist if not DataManager.exists( uri=dest_log_dir, agave={ 'agave': self._agave['agave'], 'agave_config': self._config['agave'] } ): if not DataManager.mkdir( uri=dest_log_dir, agave={ 'agave': self._agave['agave'], 'agave_config': self._config['agave'] } ): msg = 'cannot create _log directory for step "{}"'\ .format(self._step['name']) Log.an().error(msg) return self._fatal(msg) # get list of all items in src_log_dir log_list = DataManager.list( uri=src_log_dir, agave={ 'agave': self._agave['agave'], 'agave_config': self._config['agave'] } ) if not log_list: msg = 'cannot get _log list for step "{}"'\ .format(self._step['name']) Log.an().error(msg) return self._fatal(msg) # copy each list item for item in log_list: if not agwrap.call( self._parsed_data_uris[self._source_context]\ ['authority'], '{}/{}'.format( self._parsed_data_uris[self._source_context]\ ['chopped_path'], '_log' ), item, '{}/{}/{}'.format( map_item['run'][map_item['attempt']]\ ['archive_uri'], '_log', item ) ): msg = 'cannot copy log item "{}"'.format(item) Log.an().error(msg) return self._fatal(msg) self._update_status_db('FINISHED', '') return True