def __init__(self, name, data_uris, source_context, clean=False): """ Instantiate WorkflowInput class. Args: self: class instance. name: name of the input. data_uris: dict of all data URIs, including source. Formatted as: {context: URI, context: URI, ...} source_context: source context of data (e.g., local). This can be different from the URI scheme clean: remove old data directories? (default = False) Returns: WorkflowInput object. """ # input name self._name = name # init StageableData base class StageableData.__init__(self, data_uris, source_context, clean)
def initialize(self): """ Initialize the WorkflowStep class. By creating the source data URI, and parsing the step templates. Args: self: class instance. Returns: On success: True. On failure: False. """ # parse data uris in StageableData class if not StageableData.initialize(self): msg = 'cannot initialize data staging' Log.an().error(msg) return self._fatal(msg) # create data uri in the source context if not self._init_data_uri(): msg = 'cannot create data uris' Log.an().error(msg) return self._fatal(msg) # make sure URIs for dependent steps match step dict # and app context if not self._validate_depend_uris(): msg = 'validation failed for dependent step uris' Log.an().error(msg) return self._fatal(msg) # build template replacement list if not self._build_replace(): msg = 'cannot build replacement strings for templates' Log.an().error(msg) return self._fatal(msg) # parse map uri if not self._parse_map_uri(): msg = 'cannot parse map uri' Log.an().error(msg) return self._fatal(msg) return True
def initialize(self): """ Initialize the WorkflowInput class. Initialize the base classes and checking for the existence of the source data URI. Args: self: class instance. Returns: On success: True. On failure: False. """ # parse data URIs in StageableData class if not StageableData.initialize(self): msg = 'cannot initialize data staging' Log.an().error(msg) return False return True
def __init__( self, job, step, app, inputs, parameters, config, depend_uris, data_uris, source_context, clean=False ): """ Instantiate WorkflowStep class, save input variables. Args: job: workflow job definition step: dict of the normalized database record for step: { step_id: name: number: letter: app_id: app_name: map: uri: regex: template: (dict from json) item: value, item: value,... depend: [step1, step2, ...] } app: dict of normalized record for app { app_id: name: description: username: type: definition: inputs: (dict from json) parameters: (dict from json) public: } inputs: dict of workflow-level inputs (in the correct step context). parameters: dict of workflow-level parameters. config: workflow config info, including database connection. depend_uris: dict of URIs for each dependent step: {step-name: URI struct, step-name: URI struct, ...} data_uris: dict of all output data URIs, including source. Formatted as: {context: URI, context: URI, ...} source_context: source context of data (e.g., local). This can be different from the URI scheme clean: remove old data directories? (default = False) Returns: WorkflowStep object. """ # database records for job, step, and app self._job = job self._step = step self._app = app # step status self._status = 'PENDING' # workflow-level inputs and parameters self._inputs = inputs self._parameters = parameters # workflow config info self._config = config # parsed URIs of dependent steps self._depend_uris = depend_uris # remove old data URI during init? self._clean = clean # map/reduce self._map = [] self._map_uri = '' # expanded map_uri self._parsed_map_uri = {} self._replace = {} # init StageableData base class StageableData.__init__(self, data_uris, source_context, clean)