def _get_timeout(self, job: Job) -> Optional[int]: if isinstance(self.time_limit, int): timeout = self.time_limit elif isinstance(self.time_limit, Text): context = utils.build_context(job) timeout = int( eval_expression(expression=self.time_limit, context=context, full_js=self.full_js, expression_lib=self.expression_lib)) else: timeout = 0 return timeout if timeout > 0 else None
async def execute(self, job: Job) -> CWLCommandOutput: context = utils.build_context(job) if self.initial_work_dir is not None: await self._prepare_work_dir(job, context, self.initial_work_dir) logger.info('Evaluating expression for job {job}'.format(job=job.name)) timeout = self._get_timeout(job) result = eval_expression(expression=self.expression, context=context, full_js=self.full_js, expression_lib=self.expression_lib, timeout=timeout) return CWLCommandOutput(value=result, status=Status.COMPLETED, exit_code=0)
async def execute(self, job: Job) -> CommandOutput: context = utils.build_context(job) logger.info('Executing job {job}'.format(job=job.name)) # Process expressions processed_inputs = {} for k, v in self.input_expressions.items(): context = {**context, **{'self': context['inputs'][k]}} processed_inputs[k] = utils.eval_expression( expression=v, context=context, full_js=self.full_js, expression_lib=self.expression_lib) context['inputs'] = {**context['inputs'], **processed_inputs} # If condition is satisfied, return the updated inputs if self._evaulate_condition(context): return CWLCommandOutput(value=context['inputs'], status=Status.COMPLETED, exit_code=0) # Otherwise, skip and return None else: return CWLCommandOutput(value={t.name: None for t in job.inputs}, status=Status.SKIPPED, exit_code=0)
async def execute(self, job: Job) -> CWLCommandOutput: context = utils.build_context(job) if logger.isEnabledFor(logging.DEBUG): logger.debug("Job {job} inputs: {inputs}".format( job=job.name, inputs=json.dumps(context['inputs'], indent=4, sort_keys=True))) if self.initial_work_dir is not None: await self._prepare_work_dir(job, context, self.initial_work_dir) cmd = self._get_executable_command(context) parsed_env = { k: str( eval_expression(expression=v, context=context, full_js=self.full_js, expression_lib=self.expression_lib)) for (k, v) in self.environment.items() } if 'HOME' not in parsed_env: parsed_env['HOME'] = job.output_directory if 'TMPDIR' not in parsed_env: parsed_env['TMPDIR'] = job.tmp_directory if self.step.target is None: if self.is_shell_command: cmd = ["/bin/sh", "-c", " ".join(cmd)] # Open streams stderr = self._get_stream(job, context, self.stderr, sys.stderr) stdin = self._get_stream(job, context, self.stdin, sys.stdin, is_input=True) stdout = self._get_stream(job, context, self.stdout, sys.stderr) # Execute command logger.info( 'Executing job {job} into directory {outdir}: \n{command}'. format(job=job.name, outdir=job.output_directory, command=' \\\n\t'.join(cmd))) proc = await asyncio.create_subprocess_exec( *cmd, cwd=job.output_directory, env=parsed_env, stdin=stdin, stdout=stdout, stderr=stderr) result, error = await asyncio.wait_for(proc.communicate(), self._get_timeout(job)) exit_code = proc.returncode # Close streams if stdin is not sys.stdin: stdin.close() if stdout is not sys.stderr: stdout.close() if stderr is not sys.stderr: stderr.close() else: connector = self.step.get_connector() resources = job.get_resources() logger.info( 'Executing job {job} on resource {resource} into directory {outdir}:\n{command}' .format( job=job.name, resource=resources[0] if resources else None, outdir=job.output_directory, command=' \\\n\t'.join([ "/bin/sh", "-c", "\"{cmd}\"".format(cmd=" ".join(cmd)) ] if self.is_shell_command else cmd))) if self.is_shell_command: cmd = [ "/bin/sh", "-c", "\"$(echo {command} | base64 -d)\"".format( command=base64.b64encode(" ".join(cmd).encode( 'utf-8')).decode('utf-8')) ] # If step is assigned to multiple resources, add the STREAMFLOW_HOSTS environment variable if len(resources) > 1: available_resources = await connector.get_available_resources( self.step.target.service) hosts = { k: v.hostname for k, v in available_resources.items() if k in resources } parsed_env['STREAMFLOW_HOSTS'] = ','.join(hosts.values()) # Process streams stdin = eval_expression(expression=self.stdin, context=context, full_js=self.full_js, expression_lib=self.expression_lib) stdout = eval_expression(expression=self.stdout, context=context, full_js=self.full_js, expression_lib=self.expression_lib ) if self.stdout is not None else STDOUT stderr = eval_expression(expression=self.stderr, context=context, full_js=self.full_js, expression_lib=self.expression_lib ) if self.stderr is not None else stdout # Execute remote command result, exit_code = await asyncio.wait_for( connector.run(resources[0] if resources else None, cmd, environment=parsed_env, workdir=job.output_directory, stdin=stdin, stdout=stdout, stderr=stderr, capture_output=True, job_name=job.name), self._get_timeout(job)) # Handle exit codes if self.failure_codes is not None and exit_code in self.failure_codes: status = Status.FAILED elif (self.success_codes is not None and exit_code in self.success_codes) or exit_code == 0: status = Status.COMPLETED if result: logger.info(result) else: status = Status.FAILED return CWLCommandOutput(value=result, status=status, exit_code=exit_code)
async def _get_value_from_command(self, job: Job, command_output: CWLCommandOutput): context = utils.build_context(job) path_processor = get_path_processor(self.port.step) connector = job.step.get_connector() resources = job.get_resources() or [None] token_value = command_output.value if command_output.value is not None else self.default_value # Check if file `cwl.output.json` exists either locally on at least one resource cwl_output_path = path_processor.join(job.output_directory, 'cwl.output.json') for resource in resources: if await remotepath.exists(connector, resource, cwl_output_path): # If file exists, use its contents as token value token_value = json.loads(await remotepath.read( connector, resource, cwl_output_path)) break # If `token_value` is a dictionary, directly extract the token value from it if isinstance(token_value, MutableMapping) and self.port.name in token_value: token = token_value[self.port.name] return await self._build_token_value(job, token) # Otherwise, generate the output object as described in `outputs` field if self.glob is not None: # Adjust glob path if '$(' in self.glob or '${' in self.glob: globpath = utils.eval_expression( expression=self.glob, context=context, full_js=self.full_js, expression_lib=self.expression_lib) else: globpath = self.glob # Resolve glob resolve_tasks = [] for resource in resources: if isinstance(globpath, MutableSequence): for path in globpath: if not path_processor.isabs(path): path = path_processor.join(job.output_directory, path) resolve_tasks.append( _expand_glob(connector, resource, path)) else: if not path_processor.isabs(globpath): globpath = path_processor.join(job.output_directory, globpath) resolve_tasks.append( _expand_glob(connector, resource, globpath)) paths = flatten_list(await asyncio.gather(*resolve_tasks)) # Cannot glob outside the job output folder for path in paths: if not path.startswith(job.output_directory): raise WorkflowDefinitionException( "Globs outside the job's output folder are not allowed" ) # Get token class from paths class_tasks = [ asyncio.create_task(_get_class_from_path(p, job)) for p in paths ] paths = [{ 'path': p, 'class': c } for p, c in zip(paths, await asyncio.gather(*class_tasks))] # If evaluation is not needed, simply return paths as token value if self.output_eval is None: token_list = await self._build_token_value(job, paths) return token_list if len( token_list) > 1 else token_list[0] if len( token_list) == 1 else None # Otherwise, fill context['self'] with glob data and proceed else: context['self'] = await self._build_token_value(job, paths) if self.output_eval is not None: # Fill context with exit code context['runtime']['exitCode'] = command_output.exit_code # Evaluate output token = utils.eval_expression(expression=self.output_eval, context=context, full_js=self.full_js, expression_lib=self.expression_lib) # Build token if isinstance(token, MutableSequence): paths = [{ 'path': el['path'], 'class': el['class'] } for el in token] return await self._build_token_value(job, paths) else: return await self._build_token_value(job, token) # As the default value (no return path is met in previous code), simply process the command output return await self._build_token_value(job, token_value)
async def _build_token_value( self, job: Job, token_value: Any, load_contents: Optional[bool] = None, load_listing: Optional[LoadListing] = None) -> Any: if load_contents is None: load_contents = self.load_contents if token_value is None: return self.default_value elif isinstance(token_value, MutableSequence): value_tasks = [] for t in token_value: value_tasks.append( asyncio.create_task( self._build_token_value(job, t, load_listing))) return await asyncio.gather(*value_tasks) elif (isinstance(token_value, MutableMapping) and token_value.get( 'class', token_value.get('type')) in ['File', 'Directory']): step = job.step if job is not None else self.port.step # Get filepath filepath = get_path_from_token(token_value) if filepath is not None: # Process secondary files in token value sf_map = {} if 'secondaryFiles' in token_value: sf_tasks = [] for sf in token_value.get('secondaryFiles', []): sf_path = get_path_from_token(sf) path_processor = get_path_processor(step) if not path_processor.isabs(sf_path): path_processor.join( path_processor.dirname(filepath), sf_path) sf_tasks.append( asyncio.create_task( _get_file_token(step=step, job=job, token_class=sf['class'], filepath=sf_path, basename=sf.get('basename'), load_contents=load_contents, load_listing=load_listing or self.load_listing))) sf_map = { get_path_from_token(sf): sf for sf in await asyncio.gather(*sf_tasks) } # Compute the new token value token_value = await _get_file_token( step=step, job=job, token_class=token_value.get('class', token_value.get('type')), filepath=filepath, basename=token_value.get('basename'), load_contents=load_contents, load_listing=load_listing or self.load_listing) # Compute new secondary files from port specification if self.secondary_files: context = utils.build_context(job) context['self'] = token_value sf_tasks, sf_specs = [], [] for secondary_file in self.secondary_files: # If pattern is an expression, evaluate it and process result if '$(' in secondary_file.pattern or '${' in secondary_file.pattern: sf_value = utils.eval_expression( expression=secondary_file.pattern, context=context, full_js=self.full_js, expression_lib=self.expression_lib) if isinstance(sf_value, MutableSequence): for sf in sf_value: sf_tasks.append( asyncio.create_task( self._process_secondary_file( job=job, secondary_file=sf, token_value=token_value, from_expression=True, existing_sf=sf_map, load_contents=load_contents, load_listing=load_listing or self.load_listing))) sf_specs.append(secondary_file) else: sf_tasks.append( asyncio.create_task( self._process_secondary_file( job=job, secondary_file=sf_value, token_value=token_value, from_expression=True, existing_sf=sf_map, load_contents=load_contents, load_listing=load_listing or self.load_listing))) sf_specs.append(secondary_file) # Otherwise, simply process the pattern string else: sf_tasks.append( asyncio.create_task( self._process_secondary_file( job=job, secondary_file=secondary_file.pattern, token_value=token_value, from_expression=False, existing_sf=sf_map, load_contents=load_contents, load_listing=load_listing or self.load_listing))) sf_specs.append(secondary_file) for sf_value, sf_spec in zip( await asyncio.gather(*sf_tasks), sf_specs): if sf_value is not None: sf_map[get_path_from_token(sf_value)] = sf_value elif sf_spec.required: raise WorkflowExecutionException( "Required secondary file {sf} not found". format(sf=sf_spec.pattern)) # Add all secondary files to the token if sf_map: token_value['secondaryFiles'] = list(sf_map.values()) # If there is only a 'contents' field, create a file on the step's resource and build the token elif 'contents' in token_value: path_processor = get_path_processor(self.port.step) filepath = path_processor.join( job.output_directory, token_value.get('basename', random_name())) connector = job.step.get_connector() resources = job.get_resources() or [None ] if job is not None else [ None ] await asyncio.gather(*[ asyncio.create_task( remotepath.write(connector, res, filepath, token_value['contents'])) for res in resources ]) token_value = await _get_file_token( step=step, job=job, token_class=token_value.get('class', token_value.get('type')), filepath=filepath, basename=token_value.get('basename'), load_contents=load_contents, load_listing=load_listing or self.load_listing) return token_value