def _init_data_uri(self): """ Create output data URI for the source context (local). Args: self: class instance. Returns: On success: True. On failure: False. """ # make sure the source data URI has a compatible scheme (local) if self._parsed_data_uris[self._source_context]['scheme'] != 'local': msg = 'invalid data uri scheme for this step: {}'.format( self._parsed_data_uris[self._source_context]['scheme'] ) Log.an().error(msg) return self._fatal(msg) # delete old folder if it exists and clean==True if ( DataManager.exists( parsed_uri=self._parsed_data_uris[self._source_context] ) and self._clean ): if not DataManager.delete( parsed_uri=self._parsed_data_uris[self._source_context] ): Log.a().warning( 'cannot delete existing data uri: %s', self._parsed_data_uris[self._source_context]['chopped_uri'] ) # create folder if not DataManager.mkdir( parsed_uri=self._parsed_data_uris[self._source_context], recursive=True ): msg = 'cannot create data uri: {}'.format( self._parsed_data_uris[self._source_context]['chopped_uri'] ) Log.an().error(msg) return self._fatal(msg) # create _log folder if not DataManager.mkdir( uri='{}/_log'.format( self._parsed_data_uris[self._source_context]['chopped_uri'] ), recursive=True ): msg = 'cannot create _log folder in data uri: {}/_log'.format( self._parsed_data_uris[self._source_context]['chopped_uri'] ) Log.an().error(msg) return self._fatal(msg) return True
def upload_agave_test_data(self): """ Upload Agave test data from workflow package. Args: self: class instance. Returns: None """ if (not self._agave or not self._agave_params or not self._agave_params.get('agave')): Log.a().warning( 'must provide agave parameters to upload test data') return False # create main test data URI parsed_base_test_uri = URIParser.parse('agave://{}/{}'.format( self._agave_params['agave']['deploymentSystem'], self._agave_params['agave']['testDataDir'])) Log.some().info('creating base test data uri: %s', parsed_base_test_uri['chopped_uri']) if not DataManager.mkdir(parsed_uri=parsed_base_test_uri, recursive=True, agave={ 'agave': self._agave, 'agave_config': self._config['agave'] }): Log.a().warning('cannot create base test data uri: %s', parsed_base_test_uri['chopped_uri']) return False # upload test data parsed_local_test_uri = URIParser.parse(str(Path(self._path) / 'data')) parsed_agave_test_uri = URIParser.parse('{}/{}'.format( parsed_base_test_uri['chopped_uri'], Path(self._path).name)) Log.some().info('copying test data from %s to %s', parsed_local_test_uri['chopped_uri'], parsed_agave_test_uri['chopped_uri']) if not DataManager.copy(parsed_src_uri=parsed_local_test_uri, parsed_dest_uri=parsed_agave_test_uri, local={}, agave={ 'agave': self._agave, 'agave_config': self._config['agave'] }): Log.a().warning('cannot copy test data from %s to %s', parsed_local_test_uri['chopped_uri'], parsed_agave_test_uri['chopped_uri']) return False return True
def _create_job_uris(self): """ Create all work and output URIs. Args: self: class instance Returns: On success: True. On failure: False. """ # create work URIs. a work URI is required for each workflow context for context in { Contexts.mapping[exec_context]['data_scheme'] for exec_context in self._exec_contexts }: if not DataManager.mkdir( parsed_uri=self._parsed_job_work_uri[context], recursive=True, **{ context: self._workflow_context[context]\ .get_context_options() } ): msg = 'cannot create job work uri for context: {}->{}'.format( context, self._parsed_job_work_uri[context]['chopped_uri'] ) Log.an().error(msg) return self._fatal(msg) # create output URI. output URI scheme must be in the set of data contexts output_context = self._parsed_job_output_uri['scheme'] if output_context not in self._data_contexts: msg = 'invalid output context: {}'.format(output_context) Log.an().error(msg) return self._fatal(msg) if not DataManager.mkdir( parsed_uri=self._parsed_job_output_uri, recursive=True, **{ output_context: self._workflow_context[output_context]\ .get_context_options() } ): msg = 'cannot create job output uri: {}'.format( self._parsed_job_output_uri['chopped_uri'] ) Log.an().error(msg) return self._fatal(msg) return True
def _create_job_uris(self): """ Create all work and output URIs. Args: self: class instance Returns: On success: True. On failure: False. """ # create work URI for each context for context in self._job['work_uri']: if not DataManager.mkdir( parsed_uri=self._parsed_job_work_uri[context], recursive=True, **{ context: self._workflow_context[context]\ .get_context_options() } ): msg = 'cannot create job work uri for context: {}->{}'.format( context, self._parsed_job_work_uri[context]['chopped_uri']) Log.an().error(msg) return self._fatal(msg) # create output URI output_context = self._parsed_job_output_uri['scheme'] if output_context not in self._workflow_context: msg = 'invalid output context: {}'.format(output_context) Log.an().error(msg) return self._fatal(msg) if not DataManager.mkdir( parsed_uri=self._parsed_job_output_uri, recursive=True, **{ output_context: self._workflow_context[output_context]\ .get_context_options() } ): msg = 'cannot create job output uri: {}'.format( self._parsed_job_output_uri['chopped_uri']) Log.an().error(msg) return self._fatal(msg) return True
def _get_map_uri_list(self): """ Get the contents of the map URI (agave URI). Args: self: class instance. Returns: Array of base file names in the map URI. Returns False on exception. """ # make sure map URI is compatible scheme (agave) if self._parsed_map_uri['scheme'] != 'agave': msg = 'invalid map uri scheme for this step: {}'.format( self._parsed_map_uri['scheme']) Log.an().error(msg) return self._fatal(msg) # list files from URI file_list = DataManager.list(parsed_uri=self._parsed_map_uri, agave=self._agave) if file_list is False: msg = 'cannot get contents of map uri: {}'\ .format(self._parsed_map_uri['chopped_uri']) Log.an().error(msg) return self._fatal(msg) return file_list
def _init_archive_uri(self): """ Initialize and validate Agave job archive URI. Args: None. Returns: On success: True. On failure: False. """ if 'agave' not in self._parsed_job_work_uri: Log.an().error('job work uri must include an agave context') return False # construct archive URI self._parsed_archive_uri = URIParser.parse('{}/_agave_jobs'.format( self._parsed_job_work_uri['agave']['chopped_uri'])) if not self._parsed_archive_uri: Log.an().error('invalid job work uri: %s', self._parsed_job_work_uri['agave']) return False # create URI if not DataManager.mkdir(parsed_uri=self._parsed_archive_uri, recursive=True, agave=self.get_context_options()): Log.an().error('cannot create agave archive uri: %s', self._parsed_archive_uri['chopped_uri']) return False return True
def _init_app_paths(self): """ Add app paths to environment PATH for local workflows. The package path contains the workflow definition YAML file and shell scripts for calling individual apps used in a workflow. Args: None. Output: On success: True. On failure: False. """ parsed_uri = URIParser.parse(self._workflow_path) if not parsed_uri: Log.an().error('invalid workflow path: %s', self._workflow_path) return False apps_uri = ('{}{}' if parsed_uri['folder'] == '/' else '{}/{}')\ .format(parsed_uri['folder'], 'apps') parsed_apps_uri = URIParser.parse( ('{}{}' if parsed_uri['folder'] == '/' else '{}/{}')\ .format(parsed_uri['folder'], 'apps') ) if not parsed_apps_uri: Log.an().error('cannot construct apps uri: %s', apps_uri) return False if not DataManager.exists(parsed_uri=parsed_apps_uri): # no apps directory return True for app_dir in DataManager.list(parsed_uri=parsed_apps_uri): try: os.environ['PATH'] = '{}{}{}'.format( os.path.join(parsed_apps_uri['chopped_path'], app_dir, 'assets'), os.pathsep, os.environ['PATH']) except OSError as err: Log.an().error('workflow app pathmunge error [%s]', str(err)) return False return True
def stage(self, **kwargs): """ Copy data to all contexts except 'final' from source URI. Source URI can be multiple locations, but only copy to the first element of dest URIs. Set _staged indicator to True on success. Args: self: class instance. **kwargs: additional arguments required by DataManager.copy(). Returns: True or False. """ for context in self._parsed_data_uris: if context != self._source_context: if self._clean: # remove target URI first pass for i, parsed_source_uri in enumerate( self._parsed_data_uris[self._source_context]): Log.some().debug( 'staging data: %s->%s to %s->%s', self._source_context, parsed_source_uri['chopped_uri'], context, self._parsed_data_uris[context][i]['chopped_uri']) if context != 'final': if not DataManager.copy( parsed_src_uri=parsed_source_uri, parsed_dest_uri=self._parsed_data_uris[context] [i], **kwargs): msg = 'cannot stage data by copying from {} to {}'.format( parsed_source_uri['chopped_uri'], self._parsed_data_uris[context][i] ['chopped_uri']) Log.an().error(msg) return self._fatal(msg) self._staged = True return True
def stage(self, **kwargs): """ Copy data to all contexts except 'final' from source URI. Set _staged indicator to True on success. Args: self: class instance. **kwargs: additional arguments required by DataManager.copy(). Returns: True or False. """ for context in self._parsed_data_uris: if context != self._source_context: if self._clean: # remove target URI first pass Log.some().debug('staging data: {}->{} to {}->{}'.format( self._source_context, self._parsed_data_uris[ self._source_context]['chopped_uri'], context, self._parsed_data_uris[context]['chopped_uri'])) if context != 'final': if not DataManager.copy( parsed_src_uri=self._parsed_data_uris\ [self._source_context], parsed_dest_uri=self._parsed_data_uris[context], **kwargs ): msg = 'cannot stage data by copying from {} to {}'.format( self._parsed_data_uris[self._source_context]\ ['chopped_uri'], self._parsed_data_uris[context]['chopped_uri'] ) Log.an().error(msg) return self._fatal(msg) self._staged = True return True
def _get_map_uri_list(self): """ Get the contents of the map URI (agave URI). Args: self: class instance. Returns: Array of base file names in the map URI. Returns False on exception. """ combined_file_list = [] for uri in self._parsed_map_uris: # make sure map URI is compatible scheme (agave) if uri['scheme'] != 'agave': msg = 'invalid map uri scheme for this step: {}'.format( uri['scheme'] ) Log.an().error(msg) return self._fatal(msg) # get file list from URI file_list = DataManager.list( parsed_uri=uri, globstr=self._step['map']['glob'], agave=self._agave ) if file_list is False: msg = 'cannot get contents of map uri: {}'\ .format(uri['chopped_uri']) Log.an().error(msg) return self._fatal(msg) for f in file_list: combined_file_list.append({ 'chopped_uri': uri['chopped_uri'], 'filename': f }) return combined_file_list
def clean_up(self): """ Copy data from Agave archive location to step output location (data URI). Args: self: class instance. Returns: On success: True. On failure: False. """ # destination _log directory, common for all map items dest_log_dir = '{}/{}'.format( self._parsed_data_uris[self._source_context][0]\ ['chopped_uri'], '_log' ) # copy data for each map item for map_item in self._map: # copy step output if not self._agave['agave_wrapper'].files_import_from_agave( self._parsed_data_uris[self._source_context][0]['authority'], self._parsed_data_uris[self._source_context][0]\ ['chopped_path'], map_item['template']['output'], '{}/{}'.format( map_item['run'][map_item['attempt']]['archive_uri'], map_item['template']['output'] ) ): msg = 'agave import failed for step "{}"'\ .format(self._step['name']) Log.an().error(msg) return self._fatal(msg) # check for any agave log files (*.out and *.err files) agave_log_list = DataManager.list( uri=map_item['run'][map_item['attempt']]['archive_uri'], agave=self._agave ) if agave_log_list is False: msg = 'cannot get agave log list for step "{}"'\ .format(self._step['name']) Log.an().error(msg) return self._fatal(msg) # copy each agave log file, the pattern is gf-{}-{}-{}.out or .err for item in agave_log_list: if re.match('^gf-\d*-.*\.(out|err)$', item): if not self._agave['agave_wrapper'].files_import_from_agave( self._parsed_data_uris[self._source_context][0]\ ['authority'], '{}/{}'.format( self._parsed_data_uris[self._source_context][0]\ ['chopped_path'], '_log' ), item, '{}/{}'.format( map_item['run'][map_item['attempt']]\ ['archive_uri'], item ) ): msg = 'cannot copy agave log item "{}"'.format(item) Log.an().error(msg) return self._fatal(msg) # check if anything is in the _log directory src_log_dir = '{}/{}'.format( map_item['run'][map_item['attempt']]['archive_uri'], '_log' ) if DataManager.exists( uri=src_log_dir, agave=self._agave ): # create dest _log dir if it doesn't exist if not DataManager.exists( uri=dest_log_dir, agave=self._agave ): if not DataManager.mkdir( uri=dest_log_dir, agave=self._agave ): msg = 'cannot create _log directory for step "{}"'\ .format(self._step['name']) Log.an().error(msg) return self._fatal(msg) # get list of all items in src_log_dir log_list = DataManager.list( uri=src_log_dir, agave=self._agave ) if log_list is False: msg = 'cannot get _log list for step "{}"'\ .format(self._step['name']) Log.an().error(msg) return self._fatal(msg) # copy each list item for item in log_list: if not self._agave['agave_wrapper'].files_import_from_agave( self._parsed_data_uris[self._source_context][0]\ ['authority'], '{}/{}'.format( self._parsed_data_uris[self._source_context][0]\ ['chopped_path'], '_log' ), item, '{}/{}/{}'.format( map_item['run'][map_item['attempt']]\ ['archive_uri'], '_log', item ) ): msg = 'cannot copy log item "{}"'.format(item) Log.an().error(msg) return self._fatal(msg) self._update_status_db('FINISHED', '') return True
def _run_map(self, map_item): """ Run a job for each map item and store the job ID. Args: self: class instance. map_item: map item object (item of self._map). Returns: On success: True. On failure: False. """ # load default app inputs overwrite with template inputs inputs = {} for input_key in self._app['inputs']: if input_key in map_item['template']: if map_item['template'][input_key]: # only include an input if the value is a non-empty string inputs[input_key] = urllib.parse.quote( str(map_item['template'][input_key]), safe='/:' ) else: if self._app['inputs'][input_key]['default']: # only include an input if the value is a non-empty string inputs[input_key] = urllib.parse.quote( str(self._app['inputs'][input_key]['default']), safe='/:' ) # load default app parameters, overwrite with template parameters parameters = {} for param_key in self._app['parameters']: if param_key in map_item['template']: if self._app['parameters'][param_key]['type'] in ['int', 'long']: parameters[param_key] = int(map_item['template'][param_key]) elif self._app['parameters'][param_key]['type'] == ['float', 'double']: parameters[param_key] = float(map_item['template'][param_key]) else: parameters[param_key] = str(map_item['template'][param_key]) else: if self._app['parameters'][param_key]['default'] not in [None, '']: parameters[param_key] \ = self._app['parameters'][param_key]['default'] # add execution method as parameter parameters['exec_method'] = self._step['execution']['method'] # add execution init commands if 'init' param given if 'init' in self._step['execution']['parameters']: parameters['exec_init'] = self._step['execution']['parameters']['init'] # construct agave app template name = 'gf-{}-{}-{}'.format( str(map_item['attempt']), slugify(self._step['name'], regex_pattern=r'[^-a-z0-9_]+'), slugify(map_item['template']['output'], regex_pattern=r'[^-a-z0-9_]+') ) name = name[:62]+'..' if len(name) > 64 else name archive_path = '{}/{}'.format( self._agave['parsed_archive_uri']['chopped_path'], name ) app_template = { 'name': name, 'appId': self._app['implementation']['agave']['agave_app_id'], 'archive': True, 'inputs': inputs, 'parameters': parameters, 'archiveSystem': self._agave['parsed_archive_uri']['authority'], 'archivePath': archive_path } # specify processors if 'slots' param given if 'slots' in self._step['execution']['parameters']: app_template['processorsPerNode'] = int( self._step['execution']['parameters']['slots'] ) # specify memory if 'mem' param given if 'mem' in self._step['execution']['parameters']: app_template['memoryPerNode'] = '{}'.format( self._step['execution']['parameters']['mem'] ) Log.some().debug( "[step.%s]: agave app template:\n%s", self._step['name'], pprint.pformat(app_template) ) # delete archive path if it exists if DataManager.exists( uri=self._agave['parsed_archive_uri']['chopped_uri']+'/'+name, agave=self._agave ): if not DataManager.delete( uri=self._agave['parsed_archive_uri']['chopped_uri']+'/'+name, agave=self._agave ): Log.a().warning( 'cannot delete archive uri: %s/%s', self._agave['parsed_archive_uri']['chopped_uri'], name ) # submit job job = self._agave['agave_wrapper'].jobs_submit(app_template) if not job: msg = 'agave jobs submit failed for "{}"'.format( app_template['name'] ) Log.an().error(msg) return self._fatal(msg) # log agave job id Log.some().debug( '[step.%s]: agave job id: %s -> %s', self._step['name'], map_item['template']['output'], job['id'] ) # record job info map_item['run'][map_item['attempt']]['agave_job_id'] = job['id'] map_item['run'][map_item['attempt']]['archive_uri'] = '{}/{}'\ .format( self._agave['parsed_archive_uri']['chopped_uri'], name ) map_item['run'][map_item['attempt']]['hpc_job_id'] = '' # set status of process map_item['status'] = 'PENDING' map_item['run'][map_item['attempt']]['status'] = 'PENDING' return True
def _run_map(self, map_item): """ Run a job for each map item and store the job ID. Args: self: class instance. map_item: map item object (item of self._map) Returns: On success: True. On failure: False. """ # load default app inputs overwrite with template inputs inputs = {} for input_key in self._app['inputs']: if input_key in map_item['template']: inputs[input_key] = urllib.parse.quote(str( map_item['template'][input_key] or ''), safe='/:') else: inputs[input_key] = urllib.parse.quote(str( self._app['inputs'][input_key]['default'] or ''), safe='/:') # load default app parameters, overwrite with template parameters parameters = {} for param_key in self._app['parameters']: if param_key in map_item['template']: parameters[param_key] = map_item['template'][param_key] else: parameters[param_key] \ = self._app['parameters'][param_key]['default'] # add execution method as parameter parameters['exec_method'] = self._step['execution']['method'] # construct agave app template name = 'gf-{}-{}-{}'.format(str(map_item['attempt']), slugify(self._step['name']), slugify(map_item['template']['output'])) name = name[:62] + '..' if len(name) > 64 else name archive_path = '{}/{}'.format( self._agave['parsed_archive_uri']['chopped_path'], name) app_template = { 'name': name, 'appId': self._app['definition']['agave']['agave_app_id'], 'archive': True, 'inputs': inputs, 'parameters': parameters, 'archiveSystem': self._agave['parsed_archive_uri']['authority'], 'archivePath': archive_path } Log.some().debug("agave app template:\n%s", pprint.pformat(app_template)) # delete archive path if it exists if DataManager.exists( uri=self._agave['parsed_archive_uri']['chopped_uri'] + '/' + name, agave=self._agave): if not DataManager.delete( uri=self._agave['parsed_archive_uri']['chopped_uri'] + '/' + name, agave=self._agave): Log.a().warning( 'cannot delete archive uri: %s/%s', self._agave['parsed_archive_uri']['chopped_uri'], name) # submit job job = self._agave['agave_wrapper'].jobs_submit(app_template) if not job: msg = 'agave jobs submit failed for "{}"'.format( app_template['name']) Log.an().error(msg) return self._fatal(msg) # log agave job id Log.some().debug('agave job id: %s -> %s', map_item['template']['output'], job['id']) # record job info map_item['run'][map_item['attempt']]['agave_job_id'] = job['id'] map_item['run'][map_item['attempt']]['archive_uri'] = '{}/{}'\ .format( self._agave['parsed_archive_uri']['chopped_uri'], name ) map_item['run'][map_item['attempt']]['hpc_job_id'] = '' # set status of process map_item['status'] = 'PENDING' map_item['run'][map_item['attempt']]['status'] = 'PENDING' return True
def _init_context_uris(self): """ Generate all context URIs for this workflow run. Context URIs are generated based on contexts given in _parsed_job_work_uri, and the "final" context for steps given in the _parsed_job_output_uri. Args: None. Returns: On failure: Raises WorkflowDAGException. """ self._context_uris['inputs'] = {} self._context_uris['steps'] = {'final': {}} self._parsed_context_uris['inputs'] = {} self._parsed_context_uris['steps'] = {'final': {}} # init contexts in parsed_job_work_uri for inputs and steps for context in self._parsed_job_work_uri: self._context_uris['inputs'][context] = {} self._context_uris['steps'][context] = {} self._parsed_context_uris['inputs'][context] = {} self._parsed_context_uris['steps'][context] = {} for node_name in self._topo_sort: node = self._graph.nodes[node_name] if node['type'] == 'input': if node['source_context'] == context: # use original input URI parsed_uri = URIParser.parse( self._workflow['inputs'][node['name']]['value']) if not parsed_uri: msg = 'invalid input uri: {}'.format( self._workflow['inputs'][ node['name']]['value']) raise WorkflowDAGException(msg) self._context_uris['inputs'][context][node['name']]\ = parsed_uri['chopped_uri'] self._parsed_context_uris['inputs'][context]\ [node['name']] = parsed_uri else: # switch context of input URI new_base_uri = '{}/_input-{}'.format( self._parsed_job_work_uri[context]['chopped_uri'], slugify(node['name'])) # create new base URI if not DataManager.mkdir( uri=new_base_uri, recursive=True, **{context: self._context_options[context]}): msg = 'cannot create new base uri for input: {}'\ .format(new_base_uri) Log.an().error(msg) raise WorkflowDAGException(msg) # switch input URI base switched_uri = URIParser.switch_context( self._workflow['inputs'][node['name']]['value'], new_base_uri) if not switched_uri: msg = ( 'cannot switch input uri context to ' 'new base URI: {}->{}' ).format( self._workflow['inputs'][node['name']]\ ['value'], new_base_uri ) Log.an().error(msg) raise WorkflowDAGException(msg) self._context_uris['inputs'][context][node['name']]\ = switched_uri['chopped_uri'] self._parsed_context_uris['inputs'][context]\ [node['name']] = switched_uri else: # node['type'] == 'step' self._context_uris['steps'][context][node['name']]\ = '{}/{}'.format( self._parsed_job_work_uri[context]['chopped_uri'], slugify(node['name']) ) self._parsed_context_uris['steps'][context][node['name']]\ = URIParser.parse( self._context_uris['steps'][context][node['name']] ) # init final contexts for steps for node_name in self._topo_sort: node = self._graph.nodes[node_name] if node['type'] == 'step': self._context_uris['steps']['final'][node['name']]\ = '{}/{}'.format( self._parsed_job_output_uri['chopped_uri'], slugify(node['name']) ) self._parsed_context_uris['steps']['final'][node['name']]\ = URIParser.parse( self._context_uris['steps']['final'][node['name']] )
def register_agave_app(self, agave, agave_config, agave_params, agave_publish): """ Register app in Agave. Args: self: class instance Returns: On success: True. On failure: False. """ Log.some().info('registering agave app %s', str(self._path)) Log.some().info('app version: %s', self._config['version']) # compile agave app template if not TemplateCompiler.compile_template( self._path, 'agave-app-def.json.j2', self._path / 'agave-app-def.json', version=self._config['version'], agave=agave_params['agave'] ): Log.a().warning( 'cannot compile agave app "%s" definition from template', self._app['name'] ) return False # create main apps URI parsed_agave_apps_uri = URIParser.parse( 'agave://{}/{}'.format( agave_params['agave']['deploymentSystem'], agave_params['agave']['appsDir'] ) ) Log.some().info( 'creating main apps uri: %s', parsed_agave_apps_uri['chopped_uri'] ) if not DataManager.mkdir( parsed_uri=parsed_agave_apps_uri, recursive=True, agave={ 'agave': agave, 'agave_config': agave_config } ): Log.a().warning('cannot create main agave apps uri') return False # delete app uri if it exists parsed_app_uri = URIParser.parse( 'agave://{}/{}/{}'.format( agave_params['agave']['deploymentSystem'], agave_params['agave']['appsDir'], self._app['folder'] ) ) Log.some().info( 'deleting app uri if it exists: %s', parsed_app_uri['chopped_uri'] ) if not DataManager.delete( parsed_uri=parsed_app_uri, agave={ 'agave': agave, 'agave_config': agave_config } ): # log warning, but ignore.. deleting non-existant uri returns False Log.a().warning( 'cannot delete app uri: %s', parsed_app_uri['chopped_uri'] ) # upload app assets parsed_assets_uri = URIParser.parse(str(self._path / 'assets')) Log.some().info( 'copying app assets from %s to %s', parsed_assets_uri['chopped_uri'], parsed_app_uri['chopped_uri'] ) if not DataManager.copy( parsed_src_uri=parsed_assets_uri, parsed_dest_uri=parsed_app_uri, local={}, agave={ 'agave': agave, 'agave_config': agave_config } ): Log.a().warning( 'cannot copy app assets from %s to %s', parsed_assets_uri['chopped_uri'], parsed_app_uri['chopped_uri'] ) return False # upload test script parsed_test_uri = URIParser.parse( '{}/{}'.format( parsed_app_uri['chopped_uri'], 'test' ) ) Log.some().info( 'creating test uri: %s', parsed_test_uri['chopped_uri'] ) if not DataManager.mkdir( parsed_uri=parsed_test_uri, recursive=True, agave={ 'agave': agave, 'agave_config': agave_config } ): Log.a().warning( 'cannot create test uri: %s', parsed_test_uri['chopped_uri'] ) return False parsed_local_test_script = URIParser.parse( str(self._path / 'test' / 'test.sh') ) parsed_agave_test_script = URIParser.parse( '{}/{}'.format(parsed_test_uri['chopped_uri'], 'test.sh') ) Log.some().info( 'copying test script from %s to %s', parsed_local_test_script['chopped_uri'], parsed_agave_test_script['chopped_uri'] ) if not DataManager.copy( parsed_src_uri=parsed_local_test_script, parsed_dest_uri=parsed_agave_test_script, local={}, agave={ 'agave': agave, 'agave_config': agave_config } ): Log.a().warning( 'cannot copy test script from %s to %s', parsed_local_test_script['chopped_uri'], parsed_agave_test_script['chopped_uri'] ) return False # update existing app, or register new app Log.some().info('registering agave app') app_definition = self._yaml_to_dict( str(self._path / 'agave-app-def.json') ) if not app_definition: Log.a().warning( 'cannot load agave app definition: %s', str(self._path / 'agave-app-def.json') ) return False agwrap = AgaveAppsAddUpdate( agave, agave_config ) app_add_result = agwrap.call(app_definition) if not app_add_result: Log.a().warning( 'cannot register agave app:\n%s', pprint.pformat(app_definition) ) return False register_result = {} # publish app if agave_publish: Log.some().info('publishing agave app') agwrap = AgaveAppsPublish( agave, agave_config ) app_publish_result = agwrap.call(app_add_result['id']) if not app_publish_result: Log.a().warning( 'cannot publish agave app: %s', app_add_result['id'] ) return False # return published id and revision register_result = { 'id': app_publish_result['id'], 'version': self._config['version'], 'revision': 'u{}'.format(app_publish_result['revision']) } else: # return un-published id and blank revision register_result = { 'id': app_add_result['id'], 'version': self._config['version'], 'revision': '' } return register_result
def _get_map_uri_list(self): """ Get the contents of the map URI (agave URI). Args: self: class instance. Returns: Array of base file names in the map URI. Returns False on exception. """ combined_file_list = [] for uri in self._parsed_map_uris: # make sure map URI is compatible scheme (agave) if uri['scheme'] != 'agave': msg = 'invalid map uri scheme for this step: {}'.format( uri['scheme'] ) Log.an().error(msg) return self._fatal(msg) # get file list from URI file_list = DataManager.list( parsed_uri=uri, globstr=self._step['map']['glob'], agave=self._agave ) if file_list is False: msg = 'cannot get contents of map uri: {}'\ .format(uri['chopped_uri']) Log.an().error(msg) return self._fatal(msg) if self._step['map']['inclusive']: # filter with glob if glob.globfilter( [uri['name']], self._step['map']['glob'], flags=glob.EXTGLOB|glob.GLOBSTAR ): combined_file_list.append({ 'chopped_uri': '{}://{}{}'.format( uri['scheme'], uri['authority'], uri['folder'] ), 'filename': uri['name'] }) for f in file_list: if '/' in f: # reparse uri to correctly represent recursive elements new_uri = URIParser.parse('{}/{}'.format(uri['chopped_uri'], f)) combined_file_list.append({ 'chopped_uri': '{}://{}{}'.format( new_uri['scheme'], new_uri['authority'], new_uri['folder'] ), 'filename': new_uri['name'] }) else: combined_file_list.append({ 'chopped_uri': uri['chopped_uri'], 'filename': f }) return combined_file_list
def _init_context_uris(self): """ Generate all context URIs for this workflow run. Context URIs are generated based on contexts given in _parsed_job_work_uri, and the "final" context for steps given in the _parsed_job_output_uri. Args: None. Returns: On failure: Raises WorkflowDAGException. """ self._context_uris['inputs'] = {} self._context_uris['steps'] = {'final': {}} self._parsed_context_uris['inputs'] = {} self._parsed_context_uris['steps'] = {'final': {}} # init all data contexts for context in { Contexts.get_data_scheme_of_exec_context(con) for con in self._exec_contexts } | self._data_contexts: self._context_uris['inputs'][context] = {} self._parsed_context_uris['inputs'][context] = {} for node_name in self._topo_sort: node = self._graph.nodes[node_name] if node['type'] == 'input': if node['source_context'] == context: # use original input URI parsed_uri = URIParser.parse( self._workflow['inputs'][node['name']]['value']) if not parsed_uri: msg = 'invalid input uri: {}'.format( self._workflow['inputs'][ node['name']]['value']) raise WorkflowDAGException(msg) self._context_uris['inputs'][context][node['name']]\ = parsed_uri['chopped_uri'] self._parsed_context_uris['inputs'][context]\ [node['name']] = parsed_uri else: # skip if _parsed_job_work_uri is not defined for this context # this implies that there is no execution defined for that context, # so no need to setup the data staging location at the work_uri if context not in self._parsed_job_work_uri: continue # switch context of input URI new_base_uri = '{}/_input-{}'.format( self._parsed_job_work_uri[context]['chopped_uri'], slugify(node['name'], regex_pattern=r'[^-a-z0-9_]+')) # create new base URI if not DataManager.mkdir( uri=new_base_uri, recursive=True, **{context: self._context_options[context]}): msg = 'cannot create new base uri for input: {}'\ .format(new_base_uri) Log.an().error(msg) raise WorkflowDAGException(msg) # switch input URI base switched_uri = URIParser.switch_context( self._workflow['inputs'][node['name']]['value'], new_base_uri) if not switched_uri: msg = ( 'cannot switch input uri context to ' 'new base URI: {}->{}' ).format( self._workflow['inputs'][node['name']]\ ['value'], new_base_uri ) Log.an().error(msg) raise WorkflowDAGException(msg) self._context_uris['inputs'][context][node['name']]\ = switched_uri['chopped_uri'] self._parsed_context_uris['inputs'][context]\ [node['name']] = switched_uri for context in { Contexts.get_data_scheme_of_exec_context(con) for con in self._exec_contexts }: self._context_uris['steps'][context] = {} self._parsed_context_uris['steps'][context] = {} for node_name in self._topo_sort: node = self._graph.nodes[node_name] if node['type'] == 'step': self._context_uris['steps'][context][node['name']]\ = '{}/{}'.format( self._parsed_job_work_uri[context]['chopped_uri'], slugify(node['name'], regex_pattern=r'[^-a-z0-9_]+') ) self._parsed_context_uris['steps'][context][node['name']]\ = URIParser.parse( self._context_uris['steps'][context][node['name']] ) # init final contexts for steps for node_name in self._topo_sort: node = self._graph.nodes[node_name] if node['type'] == 'step': self._context_uris['steps']['final'][node['name']]\ = '{}/{}'.format( self._parsed_job_output_uri['chopped_uri'], slugify(node['name'], regex_pattern=r'[^-a-z0-9_]+') ) self._parsed_context_uris['steps']['final'][node['name']]\ = URIParser.parse( self._context_uris['steps']['final'][node['name']] )
def stage_final(self, **kwargs): """ Move data to final context from source URI. Set _staged_final indicator to True on success. Args: self: class instance. **kwargs: additional arguments required by DataManager.move(). Returns: True or False. """ for context in self._parsed_data_uris: if context != self._source_context: if self._clean: # remove target URI first pass Log.some().debug('staging final data: {}->{} to {}->{}'.format( self._source_context, self._parsed_data_uris[ self._source_context]['chopped_uri'], context, self._parsed_data_uris[context]['chopped_uri'])) if context == 'final': if (self._parsed_data_uris[self._source_context]['scheme'] == 'local' and self._parsed_data_uris[context]['scheme'] == 'local'): # move final data instead of copy, only for local-->local schemes if not DataManager.move( parsed_src_uri=self._parsed_data_uris\ [self._source_context], parsed_dest_uri=self._parsed_data_uris[context], **kwargs ): msg = 'cannot stage final data by copying from {} to {}'.format( self._parsed_data_uris[self._source_context]\ ['chopped_uri'], self._parsed_data_uris[context]['chopped_uri'] ) Log.an().error(msg) return self._fatal(msg) else: if not DataManager.copy( parsed_src_uri=self._parsed_data_uris\ [self._source_context], parsed_dest_uri=self._parsed_data_uris[context], **kwargs ): msg = 'cannot stage final data by copying from {} to {}'.format( self._parsed_data_uris[self._source_context]\ ['chopped_uri'], self._parsed_data_uris[context]['chopped_uri'] ) Log.an().error(msg) return self._fatal(msg) self._staged_final = True return True
def clean_up(self): """ Copy data from Agave archive location to step output location (data URI). Args: self: class instance. Returns: On success: True. On failure: False. """ # destination _log directory, common for all map items dest_log_dir = '{}/{}'.format( self._parsed_data_uris[self._source_context]\ ['chopped_uri'], '_log' ) # create instance of agave wrapper class for data import agwrap = AgaveFilesImportDataFromAgave( self._agave['agave'], self._config['agave'] ) # copy data for each map item for map_item in self._map: # copy step output if not agwrap.call( self._parsed_data_uris[self._source_context]['authority'], self._parsed_data_uris[self._source_context]\ ['chopped_path'], map_item['template']['output'], '{}/{}'.format( map_item['run'][map_item['attempt']]['archive_uri'], map_item['template']['output'] ) ): msg = 'agave import failed for step "{}"'\ .format(self._step['name']) Log.an().error(msg) return self._fatal(msg) # check if anything is in the _log directory src_log_dir = '{}/{}'.format( map_item['run'][map_item['attempt']]['archive_uri'], '_log' ) if DataManager.exists( uri=src_log_dir, agave={ 'agave': self._agave['agave'], 'agave_config': self._config['agave'] } ): # create dest _log dir if it doesn't exist if not DataManager.exists( uri=dest_log_dir, agave={ 'agave': self._agave['agave'], 'agave_config': self._config['agave'] } ): if not DataManager.mkdir( uri=dest_log_dir, agave={ 'agave': self._agave['agave'], 'agave_config': self._config['agave'] } ): msg = 'cannot create _log directory for step "{}"'\ .format(self._step['name']) Log.an().error(msg) return self._fatal(msg) # get list of all items in src_log_dir log_list = DataManager.list( uri=src_log_dir, agave={ 'agave': self._agave['agave'], 'agave_config': self._config['agave'] } ) if not log_list: msg = 'cannot get _log list for step "{}"'\ .format(self._step['name']) Log.an().error(msg) return self._fatal(msg) # copy each list item for item in log_list: if not agwrap.call( self._parsed_data_uris[self._source_context]\ ['authority'], '{}/{}'.format( self._parsed_data_uris[self._source_context]\ ['chopped_path'], '_log' ), item, '{}/{}/{}'.format( map_item['run'][map_item['attempt']]\ ['archive_uri'], '_log', item ) ): msg = 'cannot copy log item "{}"'.format(item) Log.an().error(msg) return self._fatal(msg) self._update_status_db('FINISHED', '') return True