예제 #1
0
 async def compute_token(self, job: Job,
                         command_output: JupyterCommandOutput) -> Token:
     path_processor = utils.get_path_processor(self.port.step)
     if self.value is not None:
         connector = job.step.get_connector() if job is not None else None
         resources = job.get_resources() or [None]
         if job.output_directory and not path_processor.isabs(self.value):
             pattern = path_processor.join(job.output_directory, self.value)
         else:
             pattern = self.value
         token_value = utils.flatten_list(await asyncio.gather(*[
             asyncio.create_task(
                 remotepath.resolve(
                     connector=connector, target=resource, pattern=pattern))
             for resource in resources
         ]))
         if len(token_value) == 1:
             token_value = token_value[0]
     else:
         token_value = command_output.user_ns.get(self.value_from)
     if job.output_directory:
         if isinstance(token_value, MutableSequence):
             token_value = [
                 path_processor.join(job.output_directory, t)
                 if not path_processor.isabs(t) else t for t in token_value
             ]
         else:
             if not path_processor.isabs(token_value):
                 token_value = path_processor.join(job.output_directory,
                                                   token_value)
     return Token(name=self.port.name,
                  value=token_value,
                  job=job.name,
                  tag=utils.get_tag(job.inputs))
예제 #2
0
 async def _deserialize_namespace(
         self, job: Job, output_serializers: MutableMapping[Text, Any],
         remote_path: Text) -> MutableMapping[Text, Any]:
     if remote_path:
         with TemporaryDirectory() as d:
             path_processor = get_path_processor(self.step)
             dest_path = os.path.join(d,
                                      path_processor.basename(remote_path))
             await self.step.context.data_manager.transfer_data(
                 src=remote_path, src_job=job, dst=dest_path, dst_job=None)
             with open(dest_path, 'rb') as f:
                 namespace = dill.load(f)
             for name, value in namespace.items():
                 if name in output_serializers:
                     intermediate_type = output_serializers[name].get(
                         'type', 'name')
                     if intermediate_type == 'file':
                         dest_path = os.path.join(
                             mkdtemp(),
                             path_processor.basename(namespace[name]))
                         await self.step.context.data_manager.transfer_data(
                             src=namespace[name],
                             src_job=job,
                             dst=dest_path,
                             dst_job=None)
                         namespace[name] = dest_path
             return {
                 k: executor.postload(compiler=self.compiler,
                                      name=k,
                                      value=v,
                                      serializer=output_serializers.get(k))
                 for k, v in namespace.items()
             }
     else:
         return {}
예제 #3
0
 async def collect_output(self, token: Token, output_dir: Text) -> Token:
     if isinstance(token.job, MutableSequence):
         return token.update(await asyncio.gather(*[
             asyncio.create_task(self.collect_output(t, output_dir))
             for t in token.value
         ]))
     elif isinstance(token.value, MutableSequence):
         token_list = await asyncio.gather(*[
             asyncio.create_task(
                 self.collect_output(token.update(t), output_dir))
             for t in token.value
         ])
         return token.update([t.value for t in token_list])
     context = self.port.step.context
     path_processor = utils.get_path_processor(self.port.step)
     src_path = token.value
     src_job = context.scheduler.get_job(token.job)
     dest_path = os.path.join(output_dir, path_processor.basename(src_path))
     # Transfer file to local destination
     await self.port.step.context.data_manager.transfer_data(
         src=src_path,
         src_job=src_job,
         dst=dest_path,
         dst_job=None,
         writable=True)
     # Update token
     return token.update(dest_path)
예제 #4
0
 def _process_sf_path(self, pattern: Text, primary_path: Text) -> Text:
     if pattern.startswith('^'):
         path_processor = get_path_processor(self.port.step)
         return self._process_sf_path(
             pattern[1:],
             path_processor.splitext(primary_path)[0])
     else:
         return primary_path + pattern
예제 #5
0
 async def _transfer_file(self, job: Job, path: Text) -> Text:
     path_processor = get_path_processor(self.step)
     dest_path = path_processor.join(job.input_directory,
                                     os.path.basename(path))
     await self.step.context.data_manager.transfer_data(src=path,
                                                        src_job=None,
                                                        dst=dest_path,
                                                        dst_job=job,
                                                        writable=True)
     return dest_path
예제 #6
0
 async def _process_secondary_file(
     self, job: Job, secondary_file: Any, token_value: MutableMapping[Text,
                                                                      Any],
     from_expression: bool, existing_sf: MutableMapping[Text, Any],
     load_contents: bool, load_listing: Optional[LoadListing]
 ) -> Optional[MutableMapping[Text, Any]]:
     step = job.step if job is not None else self.port.step
     # If value is None, simply return None
     if secondary_file is None:
         return None
     # If value is a dictionary, simply append it to the list
     elif isinstance(secondary_file, MutableMapping):
         connector = job.step.get_connector()
         filepath = utils.get_path_from_token(secondary_file)
         for resource in job.get_resources() or [None]:
             if await remotepath.exists(connector, resource, filepath):
                 return await _get_file_token(
                     step=step,
                     job=job,
                     token_class=secondary_file['class'],
                     filepath=filepath,
                     basename=secondary_file.get('basename'),
                     load_contents=load_contents,
                     load_listing=load_listing)
     # If value is a string
     else:
         # If value doesn't come from an expression, apply it to the primary path
         filepath = (
             secondary_file if from_expression else self._process_sf_path(
                 secondary_file, utils.get_path_from_token(token_value)))
         path_processor = get_path_processor(step)
         if not path_processor.isabs(filepath):
             filepath = path_processor.join(
                 path_processor.dirname(get_path_from_token(token_value)),
                 filepath)
         if filepath not in existing_sf:
             # Search file in job resources and build token value
             connector = job.step.get_connector()
             for resource in job.get_resources() or [None]:
                 if await remotepath.exists(connector, resource, filepath):
                     token_class = 'File' if await remotepath.isfile(
                         connector, resource, filepath) else 'Directory'
                     return await _get_file_token(
                         step=step,
                         job=job,
                         token_class=token_class,
                         filepath=filepath,
                         load_contents=load_contents,
                         load_listing=load_listing)
         else:
             return existing_sf[filepath]
예제 #7
0
 async def _transfer_file(self,
                          src_job: Optional[Job],
                          dest_job: Optional[Job],
                          src_path: Text,
                          dest_path: Optional[Text] = None,
                          writable: Optional[bool] = None) -> Text:
     if dest_path is None:
         if isinstance(self.port, InputPort) and src_job is not None:
             if src_path.startswith(src_job.output_directory):
                 path_processor = get_path_processor(
                     self.port.dependee.step)
                 relpath = path_processor.relpath(
                     path_processor.normpath(src_path),
                     src_job.output_directory)
                 path_processor = get_path_processor(self.port.step)
                 dest_path = path_processor.join(dest_job.input_directory,
                                                 relpath)
             else:
                 path_processor = get_path_processor(
                     self.port.dependee.step)
                 basename = path_processor.basename(
                     path_processor.normpath(src_path))
                 path_processor = get_path_processor(self.port.step)
                 dest_path = path_processor.join(dest_job.input_directory,
                                                 basename)
         else:
             path_processor = get_path_processor(self.port.step)
             dest_path = path_processor.join(
                 dest_job.input_directory,
                 os.path.basename(os.path.normpath(src_path)))
     await self.get_context().data_manager.transfer_data(
         src=src_path,
         src_job=src_job,
         dst=dest_path,
         dst_job=dest_job,
         writable=writable if writable is not None else self.writable)
     return dest_path
예제 #8
0
async def _get_file_token(
        step: Step,
        job: Job,
        token_class: Text,
        filepath: Text,
        basename: Optional[Text] = None,
        load_contents: bool = False,
        load_listing: Optional[LoadListing] = None
) -> MutableMapping[Text, Any]:
    connector = step.get_connector()
    resources = job.get_resources() or [None] if job is not None else [None]
    path_processor = get_path_processor(step)
    basename = basename or path_processor.basename(filepath)
    location = ''.join(['file://', filepath])
    token = {
        'class': token_class,
        'location': location,
        'basename': basename,
        'path': filepath,
        'dirname': path_processor.dirname(filepath)
    }
    if token_class == 'File':
        token['nameroot'], token['nameext'] = path_processor.splitext(basename)
        for resource in resources:
            if await remotepath.exists(connector, resource, filepath):
                token['size'] = await remotepath.size(connector, resource,
                                                      filepath)
                if load_contents:
                    if token['size'] > CONTENT_LIMIT:
                        raise WorkflowExecutionException(
                            "Cannot read contents from files larger than {limit}kB"
                            .format(limit=CONTENT_LIMIT / 1024))
                    token['contents'] = await remotepath.head(
                        connector, resource, filepath, CONTENT_LIMIT)
                filepath = await remotepath.follow_symlink(
                    connector, resource, filepath)
                token['checksum'] = 'sha1${checksum}'.format(
                    checksum=await remotepath.checksum(connector, resource,
                                                       filepath))
                break
    elif token_class == 'Directory' and load_listing != LoadListing.no_listing:
        for resource in resources:
            if await remotepath.exists(connector, resource, filepath):
                token['listing'] = await _get_listing(
                    step, job, filepath, load_contents,
                    load_listing == LoadListing.deep_listing)
                break
    return token
예제 #9
0
 def _get_stream(self,
                 job: Job,
                 context: MutableMapping[Text, Any],
                 stream: Optional[Text],
                 default_stream: IO,
                 is_input: bool = False) -> IO:
     if isinstance(stream, str):
         stream = eval_expression(expression=stream,
                                  context=context,
                                  full_js=self.full_js,
                                  expression_lib=self.expression_lib)
         path_processor = get_path_processor(self.step)
         if not path_processor.isabs(stream):
             basedir = job.input_directory if is_input else job.output_directory
             stream = path_processor.join(basedir, stream)
         return open(stream, "rb" if is_input else "wb")
     else:
         return default_stream
예제 #10
0
 async def update_token(self, job: Job, token: Token) -> Token:
     if isinstance(token.job, MutableSequence):
         return token.update(await asyncio.gather(*[
             asyncio.create_task(self.update_token(job, t))
             for t in token.value
         ]))
     elif isinstance(token.value, MutableSequence):
         token_list = await asyncio.gather(*[
             asyncio.create_task(self.update_token(job, token.update(t)))
             for t in token.value
         ])
         return token.update([t.value for t in token_list])
     src_job = self.get_context().scheduler.get_job(token.job)
     path_processor = utils.get_path_processor(self.port.step)
     token_value = dill.loads(token.value) if isinstance(
         token.value, bytes) else token.value
     dest_path = path_processor.join(job.input_directory,
                                     os.path.basename(token_value))
     await self.port.step.context.data_manager.transfer_data(
         src=token_value, src_job=src_job, dst=dest_path, dst_job=job)
     return token.update(dill.dumps(dest_path))
예제 #11
0
 async def execute(self, job: Job) -> CommandOutput:
     connector = self.step.get_connector()
     # Transfer executor file to remote resource
     executor_path = await self._transfer_file(
         job, os.path.join(executor.__file__))
     # Modify code, environment and namespaces according to inputs
     input_names = {}
     environment = {}
     for token in job.inputs:
         if token.value is not None:
             command_token = self.input_tokens[token.name]
             token_value = ([token.value] if isinstance(
                 self.step.input_ports[token.name], ScatterInputPort) else
                            token.value)
             if command_token.token_type == 'file':
                 input_names[token.name] = token_value
             elif command_token.token_type == 'name':
                 input_names[token.name] = token_value
             elif command_token.token_type == 'env':
                 environment[token.name] = token_value
     # List output names to be retrieved from remote context
     output_names = [
         name for name, p in self.step.output_ports.items()
         if name != executor.CELL_OUTPUT
     ]
     # Serialize AST nodes to remote resource
     code_path = await self._serialize_to_remote_file(job, self.ast_nodes)
     # Configure output fiel path
     path_processor = get_path_processor(self.step)
     output_path = path_processor.join(job.output_directory, random_name())
     # Extract serializers from command tokens
     input_serializers = {
         k: v.serializer
         for k, v in self.input_tokens.items() if v.serializer is not None
     }
     output_serializers = {
         k: v.serializer
         for k, v in self.output_tokens.items() if v.serializer is not None
     }
     # Serialize namespaces to remote resource
     user_ns_path = await self._serialize_namespace(
         input_serializers=input_serializers,
         job=job,
         namespace=input_names)
     # Create dictionaries of postload input serializers and predump output serializers
     postload_input_serializers = {
         k: {
             'postload': v['postload']
         }
         for k, v in input_serializers.items() if 'postload' in v
     }
     predump_output_serializers = {
         k: {
             'predump': v['predump']
         }
         for k, v in output_serializers.items() if 'predump' in v
     }
     # Parse command
     cmd = [self.interpreter, executor_path]
     if os.path.basename(self.interpreter) == 'ipython':
         cmd.append('--')
     if self.step.workdir:
         cmd.extend(["--workdir", self.step.workdir])
     if self.autoawait:
         cmd.append("--autoawait")
     cmd.extend(["--local-ns-file", user_ns_path])
     if postload_input_serializers:
         postload_serializers_path = await self._serialize_to_remote_file(
             job, postload_input_serializers)
         cmd.extend(
             ["--postload-input-serializers", postload_serializers_path])
     if predump_output_serializers:
         predump_serializers_path = await self._serialize_to_remote_file(
             job, predump_output_serializers)
         cmd.extend(
             ["--predump-output-serializers", predump_serializers_path])
     for name in output_names:
         cmd.extend(["--output-name", name])
     cmd.extend([code_path, output_path])
     # Execute command
     if connector is not None:
         resources = job.get_resources()
         logger.info(
             'Executing job {job} on resource {resource} into directory {outdir}:\n{command}'
             .format(
                 job=job.name,
                 resource=resources[0] if resources else None,
                 outdir=job.output_directory,
                 command=' \\\n\t'.join(cmd),
             ))
         # If step is assigned to multiple resources, add the STREAMFLOW_HOSTS environment variable
         if len(resources) > 1:
             available_resources = await connector.get_available_resources(
                 self.step.target.service)
             hosts = {
                 k: v.hostname
                 for k, v in available_resources.items() if k in resources
             }
             environment['STREAMFLOW_HOSTS'] = ','.join(hosts.values())
         # Configure standard streams
         stdin = self.stdin
         stdout = self.stdout if self.stdout is not None else STDOUT
         stderr = self.stderr if self.stderr is not None else stdout
         # Execute command
         result, exit_code = await connector.run(
             resources[0] if resources else None,
             cmd,
             environment=environment,
             workdir=job.output_directory,
             stdin=stdin,
             stdout=stdout,
             stderr=stderr,
             capture_output=True,
             job_name=job.name)
     else:
         logger.info(
             'Executing job {job} into directory {outdir}: \n{command}'.
             format(job=job.name,
                    outdir=job.output_directory,
                    command=' \\\n\t'.join(cmd)))
         # Configure standard streams
         stdin = open(self.stdin, "rb") if self.stdin is not None else None
         stdout = open(self.stdout,
                       "wb") if self.stdout is not None else None
         stderr = open(self.stderr,
                       "wb") if self.stderr is not None else None
         # Execute command
         proc = await asyncio.create_subprocess_exec(
             *cmd,
             cwd=job.output_directory,
             env={
                 **os.environ,
                 **environment
             },
             stdin=stdin,
             stdout=stdout,
             stderr=stderr)
         result, error = await proc.communicate()
         exit_code = proc.returncode
         # Close streams
         if stdin is not None:
             stdin.close()
         if stdout is not None:
             stdout.close()
         if stderr is not None:
             stderr.close()
     # Retrieve outputs
     with TemporaryDirectory() as d:
         dest_path = os.path.join(d, path_processor.basename(output_path))
         await self.step.context.data_manager.transfer_data(src=output_path,
                                                            src_job=job,
                                                            dst=dest_path,
                                                            dst_job=None)
         with open(dest_path, mode='r') as f:
             json_output = json.load(f)
     # Infer status
     status = Status[json_output[executor.CELL_STATUS]]
     if status == Status.COMPLETED:
         command_stdout = json_output[executor.CELL_OUTPUT]
         if isinstance(command_stdout, MutableSequence
                       ):  # TODO: understand why we obtain a list here
             command_stdout = command_stdout[0]
         user_ns = await self._deserialize_namespace(
             job=job,
             output_serializers=output_serializers,
             remote_path=json_output[executor.CELL_LOCAL_NS])
     else:
         command_stdout = json_output[executor.CELL_OUTPUT]
         user_ns = {}
     # Return the command output object
     return JupyterCommandOutput(value=command_stdout,
                                 status=status,
                                 user_ns=user_ns)
예제 #12
0
 async def _prepare_work_dir(self,
                             job: Job,
                             context: MutableMapping[Text, Any],
                             element: Any,
                             dest_path: Optional[Text] = None,
                             writable: bool = False) -> None:
     path_processor = get_path_processor(job.step)
     connector = job.step.get_connector()
     resources = job.get_resources() or [None]
     # If current element is a string, it must be an expression
     if isinstance(element, Text):
         listing = eval_expression(expression=element,
                                   context=context,
                                   full_js=self.full_js,
                                   expression_lib=self.expression_lib)
     else:
         listing = element
     # If listing is a list, each of its elements must be processed independently
     if isinstance(listing, MutableSequence):
         await asyncio.gather(*[
             asyncio.create_task(
                 self._prepare_work_dir(job, context, el, dest_path,
                                        writable)) for el in listing
         ])
     # If listing is a dictionary, it could be a File, a Directory, a Dirent or some other object
     elif isinstance(listing, MutableMapping):
         # If it is a File or Directory element, put the correspnding file in the output directory
         if 'class' in listing and listing['class'] in [
                 'File', 'Directory'
         ]:
             src_path = utils.get_path_from_token(listing)
             src_found = False
             if src_path is not None:
                 if dest_path is None:
                     if src_path.startswith(job.input_directory):
                         relpath = path_processor.relpath(
                             src_path, job.input_directory)
                         dest_path = path_processor.join(
                             job.output_directory, relpath)
                     else:
                         basename = path_processor.basename(src_path)
                         dest_path = path_processor.join(
                             job.output_directory, basename)
                 for resource in resources:
                     if await remotepath.exists(connector, resource,
                                                src_path):
                         await self.step.context.data_manager.transfer_data(
                             src=src_path,
                             src_job=job,
                             dst=dest_path,
                             dst_job=job,
                             writable=writable)
                         src_found = True
                         break
             # If the source path does not exist, create a File or a Directory in the remote path
             if not src_found:
                 if dest_path is None:
                     dest_path = job.output_directory
                 if src_path is not None:
                     dest_path = path_processor.join(
                         dest_path, path_processor.basename(src_path))
                 if listing['class'] == 'Directory':
                     await remotepath.mkdir(connector, resources, dest_path)
                 else:
                     await self._write_remote_file(
                         job=job,
                         content=listing['contents']
                         if 'contents' in listing else '',
                         dest_path=dest_path,
                         writable=writable)
             # If `listing` is present, recursively process folder contents
             if 'listing' in listing:
                 if 'basename' in listing:
                     dest_path = path_processor.join(
                         dest_path, listing['basename'])
                     await remotepath.mkdir(connector, resources, dest_path)
                 await asyncio.gather(*[
                     asyncio.create_task(
                         self._prepare_work_dir(job, context, element,
                                                dest_path, writable))
                     for element in listing['listing']
                 ])
         # If it is a Dirent element, put or create the corresponding file according to the entryname field
         elif 'entry' in listing:
             entry = eval_expression(expression=listing['entry'],
                                     context=context,
                                     full_js=self.full_js,
                                     expression_lib=self.expression_lib,
                                     strip_whitespace=False)
             if 'entryname' in listing:
                 dest_path = eval_expression(
                     expression=listing['entryname'],
                     context=context,
                     full_js=self.full_js,
                     expression_lib=self.expression_lib)
                 if not path_processor.isabs(dest_path):
                     dest_path = path_processor.join(
                         job.output_directory, dest_path)
             writable = listing[
                 'writable'] if 'writable' in listing else False
             # If entry is a string, a new text file must be created with the string as the file contents
             if isinstance(entry, Text):
                 await self._write_remote_file(job, entry, dest_path,
                                               writable)
             # If entry is a list
             elif isinstance(entry, MutableSequence):
                 # If all elements are Files or Directories, each of them must be processed independently
                 if all('class' in t
                        and t['class'] in ['File', 'Directory']
                        for t in entry):
                     await self._prepare_work_dir(job, context, entry,
                                                  dest_path, writable)
                 # Otherwise, the content should be serialised to JSON
                 else:
                     await self._write_remote_file(job, json.dumps(entry),
                                                   dest_path, writable)
             # If entry is a dict
             elif isinstance(entry, MutableMapping):
                 # If it is a File or Directory, it must be put in the destination path
                 if 'class' in entry and entry['class'] in [
                         'File', 'Directory'
                 ]:
                     await self._prepare_work_dir(job, context, entry,
                                                  dest_path, writable)
                 # Otherwise, the content should be serialised to JSON
                 else:
                     await self._write_remote_file(job, json.dumps(entry),
                                                   dest_path, writable)
             # Every object different from a string should be serialised to JSON
             else:
                 await self._write_remote_file(job, json.dumps(entry),
                                               dest_path, writable)
예제 #13
0
 async def _update_file_token(
         self,
         job: Job,
         src_job: Job,
         token_value: Any,
         load_listing: Optional[LoadListing] = None,
         writable: Optional[bool] = None) -> MutableMapping[Text, Any]:
     path_processor = get_path_processor(
         src_job.step) if src_job is not None else os.path
     if 'location' not in token_value and 'path' in token_value:
         token_value['location'] = token_value['path']
     if 'location' in token_value and token_value['location'] is not None:
         location = token_value['location']
         # Manage remote files
         scheme = urllib.parse.urlsplit(location).scheme
         if scheme in ['http', 'https']:
             location = await _download_file(job, location)
         elif scheme == 'file':
             location = location[7:]
         # If basename is explicitly stated in the token, use it as destination path
         dest_path = None
         if 'basename' in token_value:
             path_processor = get_path_processor(self.port.step)
             dest_path = path_processor.join(job.input_directory,
                                             token_value['basename'])
         # Check if source file exists
         src_connector = src_job.step.get_connector(
         ) if src_job is not None else None
         src_resources = src_job.get_resources() or [
             None
         ] if src_job is not None else [None]
         src_found = False
         for src_resource in src_resources:
             if await remotepath.exists(src_connector, src_resource,
                                        location):
                 src_found = True
                 break
         # If source_path exists, ransfer file in task's input folder
         if src_found:
             filepath = await self._transfer_file(src_job=src_job,
                                                  dest_job=job,
                                                  src_path=location,
                                                  dest_path=dest_path,
                                                  writable=writable)
         # Otherwise, keep the current path
         else:
             filepath = location
         new_token_value = {'class': token_value['class'], 'path': filepath}
         # If token contains secondary files, transfer them, too
         if 'secondaryFiles' in token_value:
             sf_tasks = []
             for sf in token_value['secondaryFiles']:
                 path = get_path_from_token(sf)
                 # If basename is explicitly stated in the token, use it as destination path
                 dest_path = None
                 if 'basename' in sf:
                     path_processor = get_path_processor(self.port.step)
                     dest_path = path_processor.join(
                         job.input_directory, sf['basename'])
                 sf_tasks.append(
                     asyncio.create_task(
                         self._transfer_file(src_job=src_job,
                                             dest_job=job,
                                             src_path=path,
                                             dest_path=dest_path)))
             sf_paths = await asyncio.gather(*sf_tasks)
             new_token_value['secondaryFiles'] = [{
                 'class': sf['class'],
                 'path': sf_path
             } for sf, sf_path in zip(token_value['secondaryFiles'],
                                      sf_paths)]
         # Build token
         token_value = await self._build_token_value(
             job=job,
             token_value=new_token_value,
             load_contents=self.load_contents or 'contents' in token_value,
             load_listing=load_listing)
         return token_value
     # If there is only a 'contents' field, simply build the token value
     elif 'contents' in token_value:
         return await self._build_token_value(job, token_value,
                                              load_listing)
     # If there is only a 'listing' field, transfer all the listed files to the remote resource
     elif 'listing' in token_value:
         # Compute destination path
         dest_path = get_path_from_token(token_value)
         if dest_path is None and 'basename' in token_value:
             dest_path = path_processor.join(job.input_directory,
                                             token_value['basename'])
         # Copy each element of the listing into the destination folder
         tasks = []
         classes = []
         for element in cast(List, token_value['listing']):
             # Compute destination path
             if dest_path is not None:
                 basename = path_processor.basename(element['path'])
                 current_dest_path = path_processor.join(
                     dest_path, basename)
             else:
                 current_dest_path = None
             # Transfer element to the remote resource
             tasks.append(
                 asyncio.create_task(
                     self._transfer_file(src_job=src_job,
                                         dest_job=job,
                                         src_path=element['path'],
                                         dest_path=current_dest_path,
                                         writable=writable)))
             classes.append(element['class'])
         dest_paths = await asyncio.gather(*tasks)
         # Compute listing on remote resource
         listing_tasks = []
         for token_class, path in zip(classes, dest_paths):
             listing_tasks.append(
                 asyncio.create_task(
                     _get_file_token(step=self.port.step,
                                     job=job,
                                     token_class=token_class,
                                     filepath=path)))
         token_value['listing'] = await asyncio.gather(*listing_tasks)
     return token_value
예제 #14
0
 async def _get_value_from_command(self, job: Job,
                                   command_output: CWLCommandOutput):
     context = utils.build_context(job)
     path_processor = get_path_processor(self.port.step)
     connector = job.step.get_connector()
     resources = job.get_resources() or [None]
     token_value = command_output.value if command_output.value is not None else self.default_value
     # Check if file `cwl.output.json` exists either locally on at least one resource
     cwl_output_path = path_processor.join(job.output_directory,
                                           'cwl.output.json')
     for resource in resources:
         if await remotepath.exists(connector, resource, cwl_output_path):
             # If file exists, use its contents as token value
             token_value = json.loads(await remotepath.read(
                 connector, resource, cwl_output_path))
             break
     # If `token_value` is a dictionary, directly extract the token value from it
     if isinstance(token_value,
                   MutableMapping) and self.port.name in token_value:
         token = token_value[self.port.name]
         return await self._build_token_value(job, token)
     # Otherwise, generate the output object as described in `outputs` field
     if self.glob is not None:
         # Adjust glob path
         if '$(' in self.glob or '${' in self.glob:
             globpath = utils.eval_expression(
                 expression=self.glob,
                 context=context,
                 full_js=self.full_js,
                 expression_lib=self.expression_lib)
         else:
             globpath = self.glob
         # Resolve glob
         resolve_tasks = []
         for resource in resources:
             if isinstance(globpath, MutableSequence):
                 for path in globpath:
                     if not path_processor.isabs(path):
                         path = path_processor.join(job.output_directory,
                                                    path)
                     resolve_tasks.append(
                         _expand_glob(connector, resource, path))
             else:
                 if not path_processor.isabs(globpath):
                     globpath = path_processor.join(job.output_directory,
                                                    globpath)
                 resolve_tasks.append(
                     _expand_glob(connector, resource, globpath))
         paths = flatten_list(await asyncio.gather(*resolve_tasks))
         # Cannot glob outside the job output folder
         for path in paths:
             if not path.startswith(job.output_directory):
                 raise WorkflowDefinitionException(
                     "Globs outside the job's output folder are not allowed"
                 )
         # Get token class from paths
         class_tasks = [
             asyncio.create_task(_get_class_from_path(p, job))
             for p in paths
         ]
         paths = [{
             'path': p,
             'class': c
         } for p, c in zip(paths, await asyncio.gather(*class_tasks))]
         # If evaluation is not needed, simply return paths as token value
         if self.output_eval is None:
             token_list = await self._build_token_value(job, paths)
             return token_list if len(
                 token_list) > 1 else token_list[0] if len(
                     token_list) == 1 else None
         # Otherwise, fill context['self'] with glob data and proceed
         else:
             context['self'] = await self._build_token_value(job, paths)
     if self.output_eval is not None:
         # Fill context with exit code
         context['runtime']['exitCode'] = command_output.exit_code
         # Evaluate output
         token = utils.eval_expression(expression=self.output_eval,
                                       context=context,
                                       full_js=self.full_js,
                                       expression_lib=self.expression_lib)
         # Build token
         if isinstance(token, MutableSequence):
             paths = [{
                 'path': el['path'],
                 'class': el['class']
             } for el in token]
             return await self._build_token_value(job, paths)
         else:
             return await self._build_token_value(job, token)
     # As the default value (no return path is met in previous code), simply process the command output
     return await self._build_token_value(job, token_value)
예제 #15
0
 async def _build_token_value(
         self,
         job: Job,
         token_value: Any,
         load_contents: Optional[bool] = None,
         load_listing: Optional[LoadListing] = None) -> Any:
     if load_contents is None:
         load_contents = self.load_contents
     if token_value is None:
         return self.default_value
     elif isinstance(token_value, MutableSequence):
         value_tasks = []
         for t in token_value:
             value_tasks.append(
                 asyncio.create_task(
                     self._build_token_value(job, t, load_listing)))
         return await asyncio.gather(*value_tasks)
     elif (isinstance(token_value, MutableMapping) and token_value.get(
             'class', token_value.get('type')) in ['File', 'Directory']):
         step = job.step if job is not None else self.port.step
         # Get filepath
         filepath = get_path_from_token(token_value)
         if filepath is not None:
             # Process secondary files in token value
             sf_map = {}
             if 'secondaryFiles' in token_value:
                 sf_tasks = []
                 for sf in token_value.get('secondaryFiles', []):
                     sf_path = get_path_from_token(sf)
                     path_processor = get_path_processor(step)
                     if not path_processor.isabs(sf_path):
                         path_processor.join(
                             path_processor.dirname(filepath), sf_path)
                     sf_tasks.append(
                         asyncio.create_task(
                             _get_file_token(step=step,
                                             job=job,
                                             token_class=sf['class'],
                                             filepath=sf_path,
                                             basename=sf.get('basename'),
                                             load_contents=load_contents,
                                             load_listing=load_listing
                                             or self.load_listing)))
                 sf_map = {
                     get_path_from_token(sf): sf
                     for sf in await asyncio.gather(*sf_tasks)
                 }
             # Compute the new token value
             token_value = await _get_file_token(
                 step=step,
                 job=job,
                 token_class=token_value.get('class',
                                             token_value.get('type')),
                 filepath=filepath,
                 basename=token_value.get('basename'),
                 load_contents=load_contents,
                 load_listing=load_listing or self.load_listing)
             # Compute new secondary files from port specification
             if self.secondary_files:
                 context = utils.build_context(job)
                 context['self'] = token_value
                 sf_tasks, sf_specs = [], []
                 for secondary_file in self.secondary_files:
                     # If pattern is an expression, evaluate it and process result
                     if '$(' in secondary_file.pattern or '${' in secondary_file.pattern:
                         sf_value = utils.eval_expression(
                             expression=secondary_file.pattern,
                             context=context,
                             full_js=self.full_js,
                             expression_lib=self.expression_lib)
                         if isinstance(sf_value, MutableSequence):
                             for sf in sf_value:
                                 sf_tasks.append(
                                     asyncio.create_task(
                                         self._process_secondary_file(
                                             job=job,
                                             secondary_file=sf,
                                             token_value=token_value,
                                             from_expression=True,
                                             existing_sf=sf_map,
                                             load_contents=load_contents,
                                             load_listing=load_listing
                                             or self.load_listing)))
                                 sf_specs.append(secondary_file)
                         else:
                             sf_tasks.append(
                                 asyncio.create_task(
                                     self._process_secondary_file(
                                         job=job,
                                         secondary_file=sf_value,
                                         token_value=token_value,
                                         from_expression=True,
                                         existing_sf=sf_map,
                                         load_contents=load_contents,
                                         load_listing=load_listing
                                         or self.load_listing)))
                             sf_specs.append(secondary_file)
                     # Otherwise, simply process the pattern string
                     else:
                         sf_tasks.append(
                             asyncio.create_task(
                                 self._process_secondary_file(
                                     job=job,
                                     secondary_file=secondary_file.pattern,
                                     token_value=token_value,
                                     from_expression=False,
                                     existing_sf=sf_map,
                                     load_contents=load_contents,
                                     load_listing=load_listing
                                     or self.load_listing)))
                         sf_specs.append(secondary_file)
                 for sf_value, sf_spec in zip(
                         await asyncio.gather(*sf_tasks), sf_specs):
                     if sf_value is not None:
                         sf_map[get_path_from_token(sf_value)] = sf_value
                     elif sf_spec.required:
                         raise WorkflowExecutionException(
                             "Required secondary file {sf} not found".
                             format(sf=sf_spec.pattern))
             # Add all secondary files to the token
             if sf_map:
                 token_value['secondaryFiles'] = list(sf_map.values())
         # If there is only a 'contents' field, create a file on the step's resource and build the token
         elif 'contents' in token_value:
             path_processor = get_path_processor(self.port.step)
             filepath = path_processor.join(
                 job.output_directory,
                 token_value.get('basename', random_name()))
             connector = job.step.get_connector()
             resources = job.get_resources() or [None
                                                 ] if job is not None else [
                                                     None
                                                 ]
             await asyncio.gather(*[
                 asyncio.create_task(
                     remotepath.write(connector, res, filepath,
                                      token_value['contents']))
                 for res in resources
             ])
             token_value = await _get_file_token(
                 step=step,
                 job=job,
                 token_class=token_value.get('class',
                                             token_value.get('type')),
                 filepath=filepath,
                 basename=token_value.get('basename'),
                 load_contents=load_contents,
                 load_listing=load_listing or self.load_listing)
     return token_value