Exemple #1
0
 def _get_timeout(self, job: Job) -> Optional[int]:
     if isinstance(self.time_limit, int):
         timeout = self.time_limit
     elif isinstance(self.time_limit, Text):
         context = utils.build_context(job)
         timeout = int(
             eval_expression(expression=self.time_limit,
                             context=context,
                             full_js=self.full_js,
                             expression_lib=self.expression_lib))
     else:
         timeout = 0
     return timeout if timeout > 0 else None
Exemple #2
0
 async def execute(self, job: Job) -> CWLCommandOutput:
     context = utils.build_context(job)
     if self.initial_work_dir is not None:
         await self._prepare_work_dir(job, context, self.initial_work_dir)
     logger.info('Evaluating expression for job {job}'.format(job=job.name))
     timeout = self._get_timeout(job)
     result = eval_expression(expression=self.expression,
                              context=context,
                              full_js=self.full_js,
                              expression_lib=self.expression_lib,
                              timeout=timeout)
     return CWLCommandOutput(value=result,
                             status=Status.COMPLETED,
                             exit_code=0)
Exemple #3
0
 async def execute(self, job: Job) -> CommandOutput:
     context = utils.build_context(job)
     logger.info('Executing job {job}'.format(job=job.name))
     # Process expressions
     processed_inputs = {}
     for k, v in self.input_expressions.items():
         context = {**context, **{'self': context['inputs'][k]}}
         processed_inputs[k] = utils.eval_expression(
             expression=v,
             context=context,
             full_js=self.full_js,
             expression_lib=self.expression_lib)
     context['inputs'] = {**context['inputs'], **processed_inputs}
     # If condition is satisfied, return the updated inputs
     if self._evaulate_condition(context):
         return CWLCommandOutput(value=context['inputs'],
                                 status=Status.COMPLETED,
                                 exit_code=0)
     # Otherwise, skip and return None
     else:
         return CWLCommandOutput(value={t.name: None
                                        for t in job.inputs},
                                 status=Status.SKIPPED,
                                 exit_code=0)
Exemple #4
0
 async def execute(self, job: Job) -> CWLCommandOutput:
     context = utils.build_context(job)
     if logger.isEnabledFor(logging.DEBUG):
         logger.debug("Job {job} inputs: {inputs}".format(
             job=job.name,
             inputs=json.dumps(context['inputs'], indent=4,
                               sort_keys=True)))
     if self.initial_work_dir is not None:
         await self._prepare_work_dir(job, context, self.initial_work_dir)
     cmd = self._get_executable_command(context)
     parsed_env = {
         k: str(
             eval_expression(expression=v,
                             context=context,
                             full_js=self.full_js,
                             expression_lib=self.expression_lib))
         for (k, v) in self.environment.items()
     }
     if 'HOME' not in parsed_env:
         parsed_env['HOME'] = job.output_directory
     if 'TMPDIR' not in parsed_env:
         parsed_env['TMPDIR'] = job.tmp_directory
     if self.step.target is None:
         if self.is_shell_command:
             cmd = ["/bin/sh", "-c", " ".join(cmd)]
         # Open streams
         stderr = self._get_stream(job, context, self.stderr, sys.stderr)
         stdin = self._get_stream(job,
                                  context,
                                  self.stdin,
                                  sys.stdin,
                                  is_input=True)
         stdout = self._get_stream(job, context, self.stdout, sys.stderr)
         # Execute command
         logger.info(
             'Executing job {job} into directory {outdir}: \n{command}'.
             format(job=job.name,
                    outdir=job.output_directory,
                    command=' \\\n\t'.join(cmd)))
         proc = await asyncio.create_subprocess_exec(
             *cmd,
             cwd=job.output_directory,
             env=parsed_env,
             stdin=stdin,
             stdout=stdout,
             stderr=stderr)
         result, error = await asyncio.wait_for(proc.communicate(),
                                                self._get_timeout(job))
         exit_code = proc.returncode
         # Close streams
         if stdin is not sys.stdin:
             stdin.close()
         if stdout is not sys.stderr:
             stdout.close()
         if stderr is not sys.stderr:
             stderr.close()
     else:
         connector = self.step.get_connector()
         resources = job.get_resources()
         logger.info(
             'Executing job {job} on resource {resource} into directory {outdir}:\n{command}'
             .format(
                 job=job.name,
                 resource=resources[0] if resources else None,
                 outdir=job.output_directory,
                 command=' \\\n\t'.join([
                     "/bin/sh", "-c", "\"{cmd}\"".format(cmd=" ".join(cmd))
                 ] if self.is_shell_command else cmd)))
         if self.is_shell_command:
             cmd = [
                 "/bin/sh", "-c",
                 "\"$(echo {command} | base64 -d)\"".format(
                     command=base64.b64encode(" ".join(cmd).encode(
                         'utf-8')).decode('utf-8'))
             ]
         # If step is assigned to multiple resources, add the STREAMFLOW_HOSTS environment variable
         if len(resources) > 1:
             available_resources = await connector.get_available_resources(
                 self.step.target.service)
             hosts = {
                 k: v.hostname
                 for k, v in available_resources.items() if k in resources
             }
             parsed_env['STREAMFLOW_HOSTS'] = ','.join(hosts.values())
         # Process streams
         stdin = eval_expression(expression=self.stdin,
                                 context=context,
                                 full_js=self.full_js,
                                 expression_lib=self.expression_lib)
         stdout = eval_expression(expression=self.stdout,
                                  context=context,
                                  full_js=self.full_js,
                                  expression_lib=self.expression_lib
                                  ) if self.stdout is not None else STDOUT
         stderr = eval_expression(expression=self.stderr,
                                  context=context,
                                  full_js=self.full_js,
                                  expression_lib=self.expression_lib
                                  ) if self.stderr is not None else stdout
         # Execute remote command
         result, exit_code = await asyncio.wait_for(
             connector.run(resources[0] if resources else None,
                           cmd,
                           environment=parsed_env,
                           workdir=job.output_directory,
                           stdin=stdin,
                           stdout=stdout,
                           stderr=stderr,
                           capture_output=True,
                           job_name=job.name), self._get_timeout(job))
     # Handle exit codes
     if self.failure_codes is not None and exit_code in self.failure_codes:
         status = Status.FAILED
     elif (self.success_codes is not None
           and exit_code in self.success_codes) or exit_code == 0:
         status = Status.COMPLETED
         if result:
             logger.info(result)
     else:
         status = Status.FAILED
     return CWLCommandOutput(value=result,
                             status=status,
                             exit_code=exit_code)
Exemple #5
0
 async def _get_value_from_command(self, job: Job,
                                   command_output: CWLCommandOutput):
     context = utils.build_context(job)
     path_processor = get_path_processor(self.port.step)
     connector = job.step.get_connector()
     resources = job.get_resources() or [None]
     token_value = command_output.value if command_output.value is not None else self.default_value
     # Check if file `cwl.output.json` exists either locally on at least one resource
     cwl_output_path = path_processor.join(job.output_directory,
                                           'cwl.output.json')
     for resource in resources:
         if await remotepath.exists(connector, resource, cwl_output_path):
             # If file exists, use its contents as token value
             token_value = json.loads(await remotepath.read(
                 connector, resource, cwl_output_path))
             break
     # If `token_value` is a dictionary, directly extract the token value from it
     if isinstance(token_value,
                   MutableMapping) and self.port.name in token_value:
         token = token_value[self.port.name]
         return await self._build_token_value(job, token)
     # Otherwise, generate the output object as described in `outputs` field
     if self.glob is not None:
         # Adjust glob path
         if '$(' in self.glob or '${' in self.glob:
             globpath = utils.eval_expression(
                 expression=self.glob,
                 context=context,
                 full_js=self.full_js,
                 expression_lib=self.expression_lib)
         else:
             globpath = self.glob
         # Resolve glob
         resolve_tasks = []
         for resource in resources:
             if isinstance(globpath, MutableSequence):
                 for path in globpath:
                     if not path_processor.isabs(path):
                         path = path_processor.join(job.output_directory,
                                                    path)
                     resolve_tasks.append(
                         _expand_glob(connector, resource, path))
             else:
                 if not path_processor.isabs(globpath):
                     globpath = path_processor.join(job.output_directory,
                                                    globpath)
                 resolve_tasks.append(
                     _expand_glob(connector, resource, globpath))
         paths = flatten_list(await asyncio.gather(*resolve_tasks))
         # Cannot glob outside the job output folder
         for path in paths:
             if not path.startswith(job.output_directory):
                 raise WorkflowDefinitionException(
                     "Globs outside the job's output folder are not allowed"
                 )
         # Get token class from paths
         class_tasks = [
             asyncio.create_task(_get_class_from_path(p, job))
             for p in paths
         ]
         paths = [{
             'path': p,
             'class': c
         } for p, c in zip(paths, await asyncio.gather(*class_tasks))]
         # If evaluation is not needed, simply return paths as token value
         if self.output_eval is None:
             token_list = await self._build_token_value(job, paths)
             return token_list if len(
                 token_list) > 1 else token_list[0] if len(
                     token_list) == 1 else None
         # Otherwise, fill context['self'] with glob data and proceed
         else:
             context['self'] = await self._build_token_value(job, paths)
     if self.output_eval is not None:
         # Fill context with exit code
         context['runtime']['exitCode'] = command_output.exit_code
         # Evaluate output
         token = utils.eval_expression(expression=self.output_eval,
                                       context=context,
                                       full_js=self.full_js,
                                       expression_lib=self.expression_lib)
         # Build token
         if isinstance(token, MutableSequence):
             paths = [{
                 'path': el['path'],
                 'class': el['class']
             } for el in token]
             return await self._build_token_value(job, paths)
         else:
             return await self._build_token_value(job, token)
     # As the default value (no return path is met in previous code), simply process the command output
     return await self._build_token_value(job, token_value)
Exemple #6
0
 async def _build_token_value(
         self,
         job: Job,
         token_value: Any,
         load_contents: Optional[bool] = None,
         load_listing: Optional[LoadListing] = None) -> Any:
     if load_contents is None:
         load_contents = self.load_contents
     if token_value is None:
         return self.default_value
     elif isinstance(token_value, MutableSequence):
         value_tasks = []
         for t in token_value:
             value_tasks.append(
                 asyncio.create_task(
                     self._build_token_value(job, t, load_listing)))
         return await asyncio.gather(*value_tasks)
     elif (isinstance(token_value, MutableMapping) and token_value.get(
             'class', token_value.get('type')) in ['File', 'Directory']):
         step = job.step if job is not None else self.port.step
         # Get filepath
         filepath = get_path_from_token(token_value)
         if filepath is not None:
             # Process secondary files in token value
             sf_map = {}
             if 'secondaryFiles' in token_value:
                 sf_tasks = []
                 for sf in token_value.get('secondaryFiles', []):
                     sf_path = get_path_from_token(sf)
                     path_processor = get_path_processor(step)
                     if not path_processor.isabs(sf_path):
                         path_processor.join(
                             path_processor.dirname(filepath), sf_path)
                     sf_tasks.append(
                         asyncio.create_task(
                             _get_file_token(step=step,
                                             job=job,
                                             token_class=sf['class'],
                                             filepath=sf_path,
                                             basename=sf.get('basename'),
                                             load_contents=load_contents,
                                             load_listing=load_listing
                                             or self.load_listing)))
                 sf_map = {
                     get_path_from_token(sf): sf
                     for sf in await asyncio.gather(*sf_tasks)
                 }
             # Compute the new token value
             token_value = await _get_file_token(
                 step=step,
                 job=job,
                 token_class=token_value.get('class',
                                             token_value.get('type')),
                 filepath=filepath,
                 basename=token_value.get('basename'),
                 load_contents=load_contents,
                 load_listing=load_listing or self.load_listing)
             # Compute new secondary files from port specification
             if self.secondary_files:
                 context = utils.build_context(job)
                 context['self'] = token_value
                 sf_tasks, sf_specs = [], []
                 for secondary_file in self.secondary_files:
                     # If pattern is an expression, evaluate it and process result
                     if '$(' in secondary_file.pattern or '${' in secondary_file.pattern:
                         sf_value = utils.eval_expression(
                             expression=secondary_file.pattern,
                             context=context,
                             full_js=self.full_js,
                             expression_lib=self.expression_lib)
                         if isinstance(sf_value, MutableSequence):
                             for sf in sf_value:
                                 sf_tasks.append(
                                     asyncio.create_task(
                                         self._process_secondary_file(
                                             job=job,
                                             secondary_file=sf,
                                             token_value=token_value,
                                             from_expression=True,
                                             existing_sf=sf_map,
                                             load_contents=load_contents,
                                             load_listing=load_listing
                                             or self.load_listing)))
                                 sf_specs.append(secondary_file)
                         else:
                             sf_tasks.append(
                                 asyncio.create_task(
                                     self._process_secondary_file(
                                         job=job,
                                         secondary_file=sf_value,
                                         token_value=token_value,
                                         from_expression=True,
                                         existing_sf=sf_map,
                                         load_contents=load_contents,
                                         load_listing=load_listing
                                         or self.load_listing)))
                             sf_specs.append(secondary_file)
                     # Otherwise, simply process the pattern string
                     else:
                         sf_tasks.append(
                             asyncio.create_task(
                                 self._process_secondary_file(
                                     job=job,
                                     secondary_file=secondary_file.pattern,
                                     token_value=token_value,
                                     from_expression=False,
                                     existing_sf=sf_map,
                                     load_contents=load_contents,
                                     load_listing=load_listing
                                     or self.load_listing)))
                         sf_specs.append(secondary_file)
                 for sf_value, sf_spec in zip(
                         await asyncio.gather(*sf_tasks), sf_specs):
                     if sf_value is not None:
                         sf_map[get_path_from_token(sf_value)] = sf_value
                     elif sf_spec.required:
                         raise WorkflowExecutionException(
                             "Required secondary file {sf} not found".
                             format(sf=sf_spec.pattern))
             # Add all secondary files to the token
             if sf_map:
                 token_value['secondaryFiles'] = list(sf_map.values())
         # If there is only a 'contents' field, create a file on the step's resource and build the token
         elif 'contents' in token_value:
             path_processor = get_path_processor(self.port.step)
             filepath = path_processor.join(
                 job.output_directory,
                 token_value.get('basename', random_name()))
             connector = job.step.get_connector()
             resources = job.get_resources() or [None
                                                 ] if job is not None else [
                                                     None
                                                 ]
             await asyncio.gather(*[
                 asyncio.create_task(
                     remotepath.write(connector, res, filepath,
                                      token_value['contents']))
                 for res in resources
             ])
             token_value = await _get_file_token(
                 step=step,
                 job=job,
                 token_class=token_value.get('class',
                                             token_value.get('type')),
                 filepath=filepath,
                 basename=token_value.get('basename'),
                 load_contents=load_contents,
                 load_listing=load_listing or self.load_listing)
     return token_value