def initialize(self): """ Initialize the AgaveStep class. Validate that the step context is appropriate for this "agave" context. And that the app contains an "agave" definition. Args: self: class instance. Returns: On success: True. On failure: False. """ if 'agave_wrapper' not in self._agave: msg = 'agave_wrapper object required' Log.an().error(msg) return self._fatal(msg) if 'parsed_archive_uri' not in self._agave: msg = 'agave parsed archive uri required' Log.an().error(msg) return self._fatal(msg) # make sure the step context is agave if self._step['execution']['context'] != 'agave': msg = ( '"agave" step class can only be instantiated with a' ' step definition that has an "agave" execution context' ) Log.an().error(msg) return self._fatal(msg) # make sure app has an agave implementation if 'agave' not in self._app['implementation']: msg = ( '"agave" step class can only be instantiated with an app that' ' has an "agave" definition' ) Log.an().error(msg) return self._fatal(msg) if not super(AgaveStep, self).initialize(): msg = 'cannot initialize workflow step' Log.an().error(msg) return self._fatal(msg) return True
def _validate_workflow_package(self): package_path = Path(self._path) if not Path(package_path).is_dir(): Log.an().error('workflow package path is not a directory: %s', package_path) return False self._workflow_yaml = Path(package_path / 'workflow.yaml') if not self._workflow_yaml.is_file(): Log.an().error('missing workflow.yaml file in workflow package') return False return True
def retry_failed(self): """ Retry any map-reduce jobs that failed. This is not-yet supported for local apps. Args: self: class instance. Returns: False. """ msg = 'retry not yet supported for local apps' Log.an().error(msg) return self._fatal(msg)
def spawn(command): """ Spawn a process and return it. Args: command: The command to spawn a process. Returns: The result of Popen, or raises an exception. """ try: return Popen(command, shell=True) except OSError as err: Log.an().error('spawn command failed: %s [%s]', command, str(err)) return False
def _re_init(): """Reinit drmaa session.""" # exit existing session try: self._drmaa_session.exit() except drmaa.errors.DrmaaException as err: Log.a().warning('cannot exit drmaa session: [%s]', str(err)) # initialize session again try: self._drmaa_session.initialize() except drmaa.errors.DrmaaException as err: Log.an().error('cannot initialize drmaa session: [%s]', str(err)) return False return True
def initialize(self): """ Initialize the WorkflowStep class. By creating the source data URI, and parsing the step templates. Args: self: class instance. Returns: On success: True. On failure: False. """ # parse data uris in StageableData class if not StageableData.initialize(self): msg = 'cannot initialize data staging' Log.an().error(msg) return self._fatal(msg) # create data uri in the source context if not self._init_data_uri(): msg = 'cannot create data uris' Log.an().error(msg) return self._fatal(msg) # make sure URIs for dependent steps match step dict # and app context if not self._validate_depend_uris(): msg = 'validation failed for dependent step uris' Log.an().error(msg) return self._fatal(msg) # build template replacement list if not self._build_replace(): msg = 'cannot build replacement strings for templates' Log.an().error(msg) return self._fatal(msg) # parse map uri if not self._parse_map_uri(): msg = 'cannot parse map uri' Log.an().error(msg) return self._fatal(msg) return True
def _agave_connect(self): """ Connect to Agave. Args: self: class instance. Returns: On success: True. On failure: False. """ agave_connection_type = self._config['agave'].get( 'connection_type', 'impersonate') if agave_connection_type == 'impersonate': if not self._agave_username: Log.an().error('agave username required if impersonating') return False self._agave = Agave(api_server=self._config['agave']['server'], username=self._config['agave']['username'], password=self._config['agave']['password'], token_username=self._agave_username, client_name=self._config['agave']['client'], api_key=self._config['agave']['key'], api_secret=self._config['agave']['secret'], verify=False) elif agave_connection_type == 'agave-cli': # get credentials from ~/.agave/current agave_clients = Agave._read_clients() agave_clients[0]['verify'] = False # don't verify ssl self._agave = Agave(**agave_clients[0]) # when using agave-cli, token_username must be the same as the # stored creds in user's home directory, this can be different # from job username self._agave_username = agave_clients[0]['username'] else: Log.an().error('invalid agave connection type: %s', agave_connection_type) return False return True
def _update_status_db(self, status, msg): """ Update workflow status in DB. Args: self: class instance status: Workflow status msg: Success, error or warning message Returns: On success: True. On failure: False. """ try: data_source = DataSource(self._config['database']) except DataSourceException as err: msg = 'data source initialization error [{}]'.format(str(err)) Log.an().error(msg) return False # set start time (if started, or errored immediatedly) if (status in ['RUNNING', 'ERROR'] and self._status == 'PENDING'): if not data_source.set_job_started(self._job_id): Log.a().warning('cannot set job start time in data source') data_source.rollback() # set finished time (even on error) if status in ['FINISHED', 'ERROR']: if not data_source.set_job_finished(self._job_id): Log.a().warning('cannot set job finish time in data source') data_source.rollback() # if state change, contact notification endpoint if status != self._status: if self._job['notifications']: self._send_notifications(status) # update database self._status = status if not data_source.update_job_status(self._job_id, status, msg): Log.a().warning('cannot update job status in data source') data_source.rollback() data_source.commit() return True
def _load_workflow_def(self): # load geneflow definition file gf_def = Definition() if not gf_def.load(str(self._workflow_yaml)): Log.an().error('invalid geneflow definition: %s', self._workflow_yaml) return False # make sure there is a workflow definition in the file if not gf_def.workflows(): Log.an().error('no workflows in geneflow definition') return False # extract the workflow definition self._workflow = next(iter(gf_def.workflows().values())) return True
def stage(self, **kwargs): """ Copy data to all contexts except 'final' from source URI. Source URI can be multiple locations, but only copy to the first element of dest URIs. Set _staged indicator to True on success. Args: self: class instance. **kwargs: additional arguments required by DataManager.copy(). Returns: True or False. """ for context in self._parsed_data_uris: if context != self._source_context: if self._clean: # remove target URI first pass for i, parsed_source_uri in enumerate( self._parsed_data_uris[self._source_context]): Log.some().debug( 'staging data: %s->%s to %s->%s', self._source_context, parsed_source_uri['chopped_uri'], context, self._parsed_data_uris[context][i]['chopped_uri']) if context != 'final': if not DataManager.copy( parsed_src_uri=parsed_source_uri, parsed_dest_uri=self._parsed_data_uris[context] [i], **kwargs): msg = 'cannot stage data by copying from {} to {}'.format( parsed_source_uri['chopped_uri'], self._parsed_data_uris[context][i] ['chopped_uri']) Log.an().error(msg) return self._fatal(msg) self._staged = True return True
def _yaml_to_dict(cls, path): # read yaml file try: with open(path, 'rU') as yaml_file: yaml_data = yaml_file.read() except IOError as err: Log.an().warning('cannot read yaml file: %s [%s]', path, str(err)) return False # convert to dict try: yaml_dict = yaml.safe_load(yaml_data) except yaml.YAMLError as err: Log.an().warning('invalid yaml file: %s [%s]', path, str(err)) return False return yaml_dict
def _load_apps_repo(self): # read yaml file self._apps_repo = self._yaml_to_dict(self._apps_repo_path) # empty dict? if not self._apps_repo: Log.an().error('cannot load/parse apps repo file: %s', self._apps_repo_path) return False # make sure it's a list with at least 1 app if not self._apps_repo.get('apps'): Log.an().error( 'apps repo must have an "apps" section with at least one app') return False return True
def retry_failed(self): """ Check if any jobs failed or stopped. Args: self: class instance. Returns: If no failure: True. On failure: Error message. """ # check if any jobs failed or stopped for map_item in self._map: if (map_item['status'] == 'FAILED' or map_item['status'] == 'STOPPED'): # retry the job, if not at limit if map_item['attempt'] >= self._config['agave']['job_retry']: msg = ( 'agave job failed ({}) for step "{}", ' 'retries for map item "{}" reached limit of {}' ).format( map_item['run'][map_item['attempt']]['agave_job_id'], self._step['name'], map_item['filename'], self._config['agave']['job_retry']) Log.an().error(msg) return self._fatal(msg) # retry job Log.some().info( ('agave job failed (%s) for step "%s", ' 'retrying map item "%s"'), map_item['run'][map_item['attempt']]['agave_job_id'], self._step['name'], map_item['filename']) # add another run to list map_item['attempt'] += 1 map_item['run'].append({}) if not self._run_map(map_item): msg = 'cannot re-run agave job for map item "{}"'\ .format(map_item['filename']) Log.an().error(msg) return self._fatal(msg) return True
def stage(self, **kwargs): """ Copy data to all contexts except 'final' from source URI. Set _staged indicator to True on success. Args: self: class instance. **kwargs: additional arguments required by DataManager.copy(). Returns: True or False. """ for context in self._parsed_data_uris: if context != self._source_context: if self._clean: # remove target URI first pass Log.some().debug('staging data: {}->{} to {}->{}'.format( self._source_context, self._parsed_data_uris[ self._source_context]['chopped_uri'], context, self._parsed_data_uris[context]['chopped_uri'])) if context != 'final': if not DataManager.copy( parsed_src_uri=self._parsed_data_uris\ [self._source_context], parsed_dest_uri=self._parsed_data_uris[context], **kwargs ): msg = 'cannot stage data by copying from {} to {}'.format( self._parsed_data_uris[self._source_context]\ ['chopped_uri'], self._parsed_data_uris[context]['chopped_uri'] ) Log.an().error(msg) return self._fatal(msg) self._staged = True return True
def initialize(self): """ Initialize the GeneFlow AgaveWorkflow class. Initialize by connecting to Agave. Args: self: class instance Returns: On success: True. On failure: False. """ if not self._agave_wrapper.connect(): Log.an().error('cannot connect to agave') return False return True
def get_data_uri(self, context): """ Return the URI for a specific context. Args: self: class instance. context: context of requested data URI. Returns: On success: data URI for given context. On failure: False. """ if context not in self._parsed_data_uris: msg = 'invalid data uri context: {}'.format(context) Log.an().error(msg) return self._fatal(msg) return self._parsed_data_uris[context]
def _delete_agave(uri, agave): """ Delete agave file/folder specified by URI. Args: uri: parsed URI to delete. agave: dict that contains: agave_wrapper: Agave wrapper object. Returns: On success: True. On failure: False. """ if not agave['agave_wrapper'].files_delete(uri['authority'], uri['chopped_path']): Log.an().error('cannot delete uri: %s', uri['chopped_path']) return False return True
def init_data(self): """ Initialize any data specific to this context. Create the archive URI in Agave. Args: self: class instance Returns: On success: True. On failure: False. """ if not self._init_archive_uri(): Log.an().error('cannot create archive uri') return False return True
def validate_workflow(cls, workflow_def): """Validate workflow definition.""" validator = cerberus.Validator(WORKFLOW_SCHEMA[GF_VERSION]) valid_def = validator.validated(workflow_def) if not valid_def: Log.an().error('workflow validation error:\n%s', pprint.pformat(validator.errors)) return False numbered_def = cls.calculate_step_numbering(copy.deepcopy(valid_def)) if not numbered_def: Log.an().error('invalid workflow step dependencies') return False for step_name, step in valid_def['steps'].items(): step['name'] = step_name return numbered_def
def _load_context_classes(self): """ Import modules and load references to step classes. Dynamically load modules for step classes, and keep a reference to the step class in the _context_classes dict. Args: None. Returns: On failure: Raises WorkflowDAGException. """ # currently all data contexts also correspond to an exec context # but this may change in the future for context in self._exec_contexts | self._data_contexts: mod_name = '{}_step'.format(context) cls_name = '{}Step'.format(context.capitalize()) try: step_mod = __import__( 'geneflow.extend.{}'.format(mod_name), fromlist=[mod_name] ) except ImportError as err: msg = 'cannot import context-specific step module: {} [{}]'\ .format(mod_name, str(err)) Log.an().error(msg) raise WorkflowDAGException(msg) from err try: step_class = getattr(step_mod, cls_name) except AttributeError as err: msg = 'cannot import context-specific step class: {} [{}]'\ .format(cls_name, str(err)) Log.an().error(msg) raise WorkflowDAGException(msg) from err # reference to step class self._context_classes[context] = step_class
def config(self, env=None): """ Config environment definition. Args: env: environment section to return, if None, return entire config. Returns: Dict of environment section or entire config. False if env doesn't exist in config. """ if not env: return self._config if env not in self._config: Log.an().error('invalid config environment: %s', env) return False return self._config[env]
def _mkdir_agave(uri, agave): """ Create agave directory specified by URI. Args: uri: parsed URI to create. agave: dict that contains: agave_wrapper: Agave wrapper object. Returns: On success: True. On failure: False. """ if not agave['agave_wrapper'].files_mkdir(uri['authority'], uri['folder'], uri['name']): Log.an().error('cannot create folder at uri: %s', uri['chopped_uri']) return False return True
def _get_map_uri_list(self): """ Get the contents of the map URI (agave URI). Args: self: class instance. Returns: Array of base file names in the map URI. Returns False on exception. """ combined_file_list = [] for uri in self._parsed_map_uris: # make sure map URI is compatible scheme (agave) if uri['scheme'] != 'agave': msg = 'invalid map uri scheme for this step: {}'.format( uri['scheme'] ) Log.an().error(msg) return self._fatal(msg) # get file list from URI file_list = DataManager.list( parsed_uri=uri, globstr=self._step['map']['glob'], agave=self._agave ) if file_list is False: msg = 'cannot get contents of map uri: {}'\ .format(uri['chopped_uri']) Log.an().error(msg) return self._fatal(msg) for f in file_list: combined_file_list.append({ 'chopped_uri': uri['chopped_uri'], 'filename': f }) return combined_file_list
def run(self): """ Execute agave job for each of the map items. Store job IDs in run detail. Args: self: class instance. Returns: On success: True. On failure: False. """ if self._throttle_limit > 0 and self._num_running >= self._throttle_limit: # throttle limit reached # exit without running anything new return True for map_item in self._map: if map_item['status'] == 'PENDING': if not self._run_map(map_item): msg = 'cannot run agave job for map item "{}"'\ .format(map_item['filename']) Log.an().error(msg) map_item['status'] = 'FAILED' map_item['run'][map_item['attempt']]['status']\ = map_item['status'] else: self._num_running += 1 if ( self._throttle_limit > 0 and self._num_running >= self._throttle_limit ): # reached throttle limit break self._update_status_db('RUNNING', '') return True
def make_def(self): """ Generate the GeneFlow app definition. Args: self: class instance Returns: On success: True. On failure: False. """ Log.some().info('compiling %s', str(self._path / 'app.yaml.j2')) if not TemplateCompiler.compile_template( None, 'app.yaml.j2.j2', str(self._path / 'app.yaml.j2'), ** self._config): Log.an().error('cannot compile GeneFlow app definition template') return False return True
def initialize(self): """ Initialize the GridEngineWorkflow class. This workflow class has no additional functionality. Args: None. Returns: On success: True. On failure: False. """ try: self._drmaa_session.initialize() except drmaa.errors.DrmaaException as err: Log.an().error('cannot initialize drmaa session: [%s]', str(err)) return False return True
def _list_agave(uri, globstr, agave): """ List contents of agave URI. Args: uri: parsed URI to list. agave: dict that contains: agave_wrapper: Agave wrapper object. Returns: On success: a list of filenames (basenames only). On failure: False. """ # recurse to depth based on glob depth = -1 if '**' in globstr else 1 if depth == 1: depth = globstr.count('/')+1 file_list = agave['agave_wrapper'].files_list( uri['authority'], uri['chopped_path'], depth=depth ) if file_list is False: Log.an().error( 'cannot get file list for uri: %s', uri['chopped_uri'] ) return False # apply glob pattern to filter file list path_len = len(uri['chopped_path'])+1 globbed_file_list = glob.globfilter( [str(f['path']+'/'+f['name'])[path_len:] for f in file_list], globstr, flags=glob.EXTGLOB|glob.GLOBSTAR ) return globbed_file_list
def _init_data_uri(self): """ Create output data URI for the source context (local). Args: self: class instance. Returns: On success: True. On failure: False. """ # make sure the source data URI has a compatible scheme (local) if self._parsed_data_uris[self._source_context]['scheme'] != 'local': msg = 'invalid data uri scheme for this step: {}'.format( self._parsed_data_uris[self._source_context]['scheme'] ) Log.an().error(msg) return self._fatal(msg) # delete old folder if it exists and clean==True if ( DataManager.exists( parsed_uri=self._parsed_data_uris[self._source_context] ) and self._clean ): if not DataManager.delete( parsed_uri=self._parsed_data_uris[self._source_context] ): Log.a().warning( 'cannot delete existing data uri: %s', self._parsed_data_uris[self._source_context]['chopped_uri'] ) # create folder if not DataManager.mkdir( parsed_uri=self._parsed_data_uris[self._source_context], recursive=True ): msg = 'cannot create data uri: {}'.format( self._parsed_data_uris[self._source_context]['chopped_uri'] ) Log.an().error(msg) return self._fatal(msg) # create _log folder if not DataManager.mkdir( uri='{}/_log'.format( self._parsed_data_uris[self._source_context]['chopped_uri'] ), recursive=True ): msg = 'cannot create _log folder in data uri: {}/_log'.format( self._parsed_data_uris[self._source_context]['chopped_uri'] ) Log.an().error(msg) return self._fatal(msg) return True
def _init_archive_uri(self): """ Initialize and validate Agave job archive URI. Args: None. Returns: On success: True. On failure: False. """ if 'agave' not in self._parsed_job_work_uri: Log.an().error('job work uri must include an agave context') return False # construct archive URI self._parsed_archive_uri = URIParser.parse('{}/_agave_jobs'.format( self._parsed_job_work_uri['agave']['chopped_uri'])) if not self._parsed_archive_uri: Log.an().error('invalid job work uri: %s', self._parsed_job_work_uri['agave']) return False # create URI if not DataManager.mkdir(parsed_uri=self._parsed_archive_uri, recursive=True, agave=self.get_context_options()): Log.an().error('cannot create agave archive uri: %s', self._parsed_archive_uri['chopped_uri']) return False return True
def _init_workflow_context_data(self): """ Initialize data components of workflow contexts. Args: None. Returns: On success: True. On failure: False. """ for exec_context in self._exec_contexts: if not self._workflow_context[exec_context].init_data(): msg = ( 'cannot initialize data for workflow context: {}'\ .format(exec_context) ) Log.an().error(msg) return self._fatal(msg) return True