async def weight_token(self, job: Job, token_value: Any) -> int: if token_value is None or self.port_type not in ['File', 'Directory']: return 0 elif isinstance(token_value, MutableSequence): return sum(await asyncio.gather(*[ asyncio.create_task(self.weight_token(job, t)) for t in token_value ])) elif 'size' in token_value: weight = token_value['size'] if 'secondaryFiles' in token_value: sf_tasks = [] for sf in token_value['secondaryFiles']: sf_tasks.append( asyncio.create_task(self.weight_token(job, sf))) weight += sum(await asyncio.gather(*sf_tasks)) return weight else: if job is not None and job.get_resources(): connector = job.step.get_connector() for resource in job.get_resources(): return await remotepath.size(connector, resource, _get_paths(token_value)) return 0 else: return await remotepath.size( None, None, _get_paths(token_value)) if token_value is not None else 0
async def _process_secondary_file( self, job: Job, secondary_file: Any, token_value: MutableMapping[Text, Any], from_expression: bool, existing_sf: MutableMapping[Text, Any], load_contents: bool, load_listing: Optional[LoadListing] ) -> Optional[MutableMapping[Text, Any]]: step = job.step if job is not None else self.port.step # If value is None, simply return None if secondary_file is None: return None # If value is a dictionary, simply append it to the list elif isinstance(secondary_file, MutableMapping): connector = job.step.get_connector() filepath = utils.get_path_from_token(secondary_file) for resource in job.get_resources() or [None]: if await remotepath.exists(connector, resource, filepath): return await _get_file_token( step=step, job=job, token_class=secondary_file['class'], filepath=filepath, basename=secondary_file.get('basename'), load_contents=load_contents, load_listing=load_listing) # If value is a string else: # If value doesn't come from an expression, apply it to the primary path filepath = ( secondary_file if from_expression else self._process_sf_path( secondary_file, utils.get_path_from_token(token_value))) path_processor = get_path_processor(step) if not path_processor.isabs(filepath): filepath = path_processor.join( path_processor.dirname(get_path_from_token(token_value)), filepath) if filepath not in existing_sf: # Search file in job resources and build token value connector = job.step.get_connector() for resource in job.get_resources() or [None]: if await remotepath.exists(connector, resource, filepath): token_class = 'File' if await remotepath.isfile( connector, resource, filepath) else 'Directory' return await _get_file_token( step=step, job=job, token_class=token_class, filepath=filepath, load_contents=load_contents, load_listing=load_listing) else: return existing_sf[filepath]
async def weight_token(self, job: Job, token_value: Any) -> int: if isinstance(token_value, MutableSequence): return sum(await asyncio.gather(*[ asyncio.create_task(self.weight_token(job, v)) for v in token_value ])) if job is not None and job.get_resources(): connector = job.step.get_connector() for resource in job.get_resources(): return await remotepath.size(connector, resource, token_value) return 0 else: return await remotepath.size( None, None, token_value) if token_value is not None else 0
async def _get_class_from_path(path: Text, job: Job) -> Text: connector = job.step.get_connector() for resource in (job.get_resources() or [None]) if job is not None else [None]: t_path = await remotepath.follow_symlink(connector, resource, path) return 'File' if await remotepath.isfile(connector, resource, t_path) else 'Directory'
async def _get_listing( step: Step, job: Job, dirpath: Text, load_contents: bool, recursive: bool) -> MutableSequence[MutableMapping[Text, Any]]: listing_tokens = {} connector = step.get_connector() resources = job.get_resources() or [None] for resource in resources: directories = await remotepath.listdir(connector, resource, dirpath, FileType.DIRECTORY) for directory in directories: if directory not in listing_tokens: load_listing = LoadListing.deep_listing if recursive else LoadListing.no_listing listing_tokens[directory] = asyncio.create_task( _get_file_token(step=step, job=job, token_class='Directory', filepath=directory, load_contents=load_contents, load_listing=load_listing)) files = await remotepath.listdir(connector, resource, dirpath, FileType.FILE) for file in files: if file not in listing_tokens: listing_tokens[file] = asyncio.create_task( _get_file_token(step=step, job=job, token_class='File', filepath=file, load_contents=load_contents)) return cast(MutableSequence[MutableMapping[Text, Any]], await asyncio.gather(*listing_tokens.values()))
async def compute_token(self, job: Job, command_output: JupyterCommandOutput) -> Token: path_processor = utils.get_path_processor(self.port.step) if self.value is not None: connector = job.step.get_connector() if job is not None else None resources = job.get_resources() or [None] if job.output_directory and not path_processor.isabs(self.value): pattern = path_processor.join(job.output_directory, self.value) else: pattern = self.value token_value = utils.flatten_list(await asyncio.gather(*[ asyncio.create_task( remotepath.resolve( connector=connector, target=resource, pattern=pattern)) for resource in resources ])) if len(token_value) == 1: token_value = token_value[0] else: token_value = command_output.user_ns.get(self.value_from) if job.output_directory: if isinstance(token_value, MutableSequence): token_value = [ path_processor.join(job.output_directory, t) if not path_processor.isabs(t) else t for t in token_value ] else: if not path_processor.isabs(token_value): token_value = path_processor.join(job.output_directory, token_value) return Token(name=self.port.name, value=token_value, job=job.name, tag=utils.get_tag(job.inputs))
async def _download_file(job: Job, url: Text) -> Text: connector = job.step.get_connector() resources = job.get_resources() try: return await remotepath.download(connector, resources, url, job.input_directory) except Exception: raise WorkflowExecutionException("Error downloading file from " + url)
async def _register_data(self, job: Job, path: Text): connector = job.step.get_connector() resources = job.get_resources() or [None] register_path_tasks = [] for resource in resources: register_path_tasks.append( asyncio.create_task( self.port.step.context.data_manager.register_path( connector, resource, path))) await asyncio.gather(*register_path_tasks)
async def _get_file_token( step: Step, job: Job, token_class: Text, filepath: Text, basename: Optional[Text] = None, load_contents: bool = False, load_listing: Optional[LoadListing] = None ) -> MutableMapping[Text, Any]: connector = step.get_connector() resources = job.get_resources() or [None] if job is not None else [None] path_processor = get_path_processor(step) basename = basename or path_processor.basename(filepath) location = ''.join(['file://', filepath]) token = { 'class': token_class, 'location': location, 'basename': basename, 'path': filepath, 'dirname': path_processor.dirname(filepath) } if token_class == 'File': token['nameroot'], token['nameext'] = path_processor.splitext(basename) for resource in resources: if await remotepath.exists(connector, resource, filepath): token['size'] = await remotepath.size(connector, resource, filepath) if load_contents: if token['size'] > CONTENT_LIMIT: raise WorkflowExecutionException( "Cannot read contents from files larger than {limit}kB" .format(limit=CONTENT_LIMIT / 1024)) token['contents'] = await remotepath.head( connector, resource, filepath, CONTENT_LIMIT) filepath = await remotepath.follow_symlink( connector, resource, filepath) token['checksum'] = 'sha1${checksum}'.format( checksum=await remotepath.checksum(connector, resource, filepath)) break elif token_class == 'Directory' and load_listing != LoadListing.no_listing: for resource in resources: if await remotepath.exists(connector, resource, filepath): token['listing'] = await _get_listing( step, job, filepath, load_contents, load_listing == LoadListing.deep_listing) break return token
async def _recover_path(self, job: Job, resources: MutableSequence[Text], token: Token, path: Text) -> Optional[Text]: context = self.get_context() connector = self.port.step.get_connector() job_resources = job.get_resources() or [None] # Check if path is already present in actual job's resources for resource in job_resources: if await remotepath.exists(connector, resource, path): return path # Otherwise, get the list of other file locations from DataManager data_locations = set() for resource in resources: data_locations.update( context.data_manager.get_data_locations( resource, path, DataLocationType.PRIMARY)) # Check if path is still present in original resources for location in data_locations: if location.resource in job_resources: if await remotepath.exists(connector, location.resource, path): return path else: context.data_manager.invalidate_location( location.resource, path) # Check if files are saved locally for location in data_locations: if location.resource == LOCAL_RESOURCE: return await self._transfer_file(None, job, location.path) # If not, check if files are stored elsewhere for location in data_locations: if location.resource not in job_resources and location.resource != LOCAL_RESOURCE: location_job = context.scheduler.get_job(location.job) location_connector = location_job.step.get_connector() available_resources = await location_connector.get_available_resources( location_job.step.target.service) if (location.resource in available_resources and await remotepath.exists(location_connector, location.resource, location.path)): return await self._transfer_file(location_job, job, location.path) else: context.data_manager.invalidate_location( location.resource, location.path) # If file has been lost, raise an exception message = "Failed to recover path {path} for token {token} from job {job}".format( path=path, token=token.name, job=token.job) logger.info(message) raise UnrecoverableTokenException(message, token)
def _register_data(self, job: Job, token_value: Union[MutableSequence[MutableMapping[Text, Any]], MutableMapping[Text, Any]]): context = self.get_context() # If `token_value` is a list, process every item independently if isinstance(token_value, MutableSequence): for t in token_value: self._register_data(job, t) # Otherwise, if token value is a dictionary and it refers to a File or a Directory, register the path elif (isinstance(token_value, MutableMapping) and 'class' in token_value and token_value['class'] in ['File', 'Directory']): # Extract paths from token paths = [] if 'path' in token_value and token_value['path'] is not None: paths.append(token_value['path']) elif 'location' in token_value and token_value[ 'location'] is not None: paths.append(token_value['location']) elif 'listing' in token_value: paths.extend([ t['path'] if 'path' in t else t['location'] for t in token_value['listing'] ]) if 'secondaryFiles' in token_value: for sf in token_value['secondaryFiles']: paths.append(get_path_from_token(sf)) # Remove `file` protocol if present paths = [p[7:] if p.startswith('file://') else p for p in paths] # Register paths to the `DataManager` resources = job.get_resources() or [None] for path in paths: if resources: for resource in resources or [None]: context.data_manager.register_path(job, resource, path)
async def execute(self, job: Job) -> CommandOutput: connector = self.step.get_connector() # Transfer executor file to remote resource executor_path = await self._transfer_file( job, os.path.join(executor.__file__)) # Modify code, environment and namespaces according to inputs input_names = {} environment = {} for token in job.inputs: if token.value is not None: command_token = self.input_tokens[token.name] token_value = ([token.value] if isinstance( self.step.input_ports[token.name], ScatterInputPort) else token.value) if command_token.token_type == 'file': input_names[token.name] = token_value elif command_token.token_type == 'name': input_names[token.name] = token_value elif command_token.token_type == 'env': environment[token.name] = token_value # List output names to be retrieved from remote context output_names = [ name for name, p in self.step.output_ports.items() if name != executor.CELL_OUTPUT ] # Serialize AST nodes to remote resource code_path = await self._serialize_to_remote_file(job, self.ast_nodes) # Configure output fiel path path_processor = get_path_processor(self.step) output_path = path_processor.join(job.output_directory, random_name()) # Extract serializers from command tokens input_serializers = { k: v.serializer for k, v in self.input_tokens.items() if v.serializer is not None } output_serializers = { k: v.serializer for k, v in self.output_tokens.items() if v.serializer is not None } # Serialize namespaces to remote resource user_ns_path = await self._serialize_namespace( input_serializers=input_serializers, job=job, namespace=input_names) # Create dictionaries of postload input serializers and predump output serializers postload_input_serializers = { k: { 'postload': v['postload'] } for k, v in input_serializers.items() if 'postload' in v } predump_output_serializers = { k: { 'predump': v['predump'] } for k, v in output_serializers.items() if 'predump' in v } # Parse command cmd = [self.interpreter, executor_path] if os.path.basename(self.interpreter) == 'ipython': cmd.append('--') if self.step.workdir: cmd.extend(["--workdir", self.step.workdir]) if self.autoawait: cmd.append("--autoawait") cmd.extend(["--local-ns-file", user_ns_path]) if postload_input_serializers: postload_serializers_path = await self._serialize_to_remote_file( job, postload_input_serializers) cmd.extend( ["--postload-input-serializers", postload_serializers_path]) if predump_output_serializers: predump_serializers_path = await self._serialize_to_remote_file( job, predump_output_serializers) cmd.extend( ["--predump-output-serializers", predump_serializers_path]) for name in output_names: cmd.extend(["--output-name", name]) cmd.extend([code_path, output_path]) # Execute command if connector is not None: resources = job.get_resources() logger.info( 'Executing job {job} on resource {resource} into directory {outdir}:\n{command}' .format( job=job.name, resource=resources[0] if resources else None, outdir=job.output_directory, command=' \\\n\t'.join(cmd), )) # If step is assigned to multiple resources, add the STREAMFLOW_HOSTS environment variable if len(resources) > 1: available_resources = await connector.get_available_resources( self.step.target.service) hosts = { k: v.hostname for k, v in available_resources.items() if k in resources } environment['STREAMFLOW_HOSTS'] = ','.join(hosts.values()) # Configure standard streams stdin = self.stdin stdout = self.stdout if self.stdout is not None else STDOUT stderr = self.stderr if self.stderr is not None else stdout # Execute command result, exit_code = await connector.run( resources[0] if resources else None, cmd, environment=environment, workdir=job.output_directory, stdin=stdin, stdout=stdout, stderr=stderr, capture_output=True, job_name=job.name) else: logger.info( 'Executing job {job} into directory {outdir}: \n{command}'. format(job=job.name, outdir=job.output_directory, command=' \\\n\t'.join(cmd))) # Configure standard streams stdin = open(self.stdin, "rb") if self.stdin is not None else None stdout = open(self.stdout, "wb") if self.stdout is not None else None stderr = open(self.stderr, "wb") if self.stderr is not None else None # Execute command proc = await asyncio.create_subprocess_exec( *cmd, cwd=job.output_directory, env={ **os.environ, **environment }, stdin=stdin, stdout=stdout, stderr=stderr) result, error = await proc.communicate() exit_code = proc.returncode # Close streams if stdin is not None: stdin.close() if stdout is not None: stdout.close() if stderr is not None: stderr.close() # Retrieve outputs with TemporaryDirectory() as d: dest_path = os.path.join(d, path_processor.basename(output_path)) await self.step.context.data_manager.transfer_data(src=output_path, src_job=job, dst=dest_path, dst_job=None) with open(dest_path, mode='r') as f: json_output = json.load(f) # Infer status status = Status[json_output[executor.CELL_STATUS]] if status == Status.COMPLETED: command_stdout = json_output[executor.CELL_OUTPUT] if isinstance(command_stdout, MutableSequence ): # TODO: understand why we obtain a list here command_stdout = command_stdout[0] user_ns = await self._deserialize_namespace( job=job, output_serializers=output_serializers, remote_path=json_output[executor.CELL_LOCAL_NS]) else: command_stdout = json_output[executor.CELL_OUTPUT] user_ns = {} # Return the command output object return JupyterCommandOutput(value=command_stdout, status=status, user_ns=user_ns)
async def execute(self, job: Job) -> CWLCommandOutput: context = utils.build_context(job) if logger.isEnabledFor(logging.DEBUG): logger.debug("Job {job} inputs: {inputs}".format( job=job.name, inputs=json.dumps(context['inputs'], indent=4, sort_keys=True))) if self.initial_work_dir is not None: await self._prepare_work_dir(job, context, self.initial_work_dir) cmd = self._get_executable_command(context) parsed_env = { k: str( eval_expression(expression=v, context=context, full_js=self.full_js, expression_lib=self.expression_lib)) for (k, v) in self.environment.items() } if 'HOME' not in parsed_env: parsed_env['HOME'] = job.output_directory if 'TMPDIR' not in parsed_env: parsed_env['TMPDIR'] = job.tmp_directory if self.step.target is None: if self.is_shell_command: cmd = ["/bin/sh", "-c", " ".join(cmd)] # Open streams stderr = self._get_stream(job, context, self.stderr, sys.stderr) stdin = self._get_stream(job, context, self.stdin, sys.stdin, is_input=True) stdout = self._get_stream(job, context, self.stdout, sys.stderr) # Execute command logger.info( 'Executing job {job} into directory {outdir}: \n{command}'. format(job=job.name, outdir=job.output_directory, command=' \\\n\t'.join(cmd))) proc = await asyncio.create_subprocess_exec( *cmd, cwd=job.output_directory, env=parsed_env, stdin=stdin, stdout=stdout, stderr=stderr) result, error = await asyncio.wait_for(proc.communicate(), self._get_timeout(job)) exit_code = proc.returncode # Close streams if stdin is not sys.stdin: stdin.close() if stdout is not sys.stderr: stdout.close() if stderr is not sys.stderr: stderr.close() else: connector = self.step.get_connector() resources = job.get_resources() logger.info( 'Executing job {job} on resource {resource} into directory {outdir}:\n{command}' .format( job=job.name, resource=resources[0] if resources else None, outdir=job.output_directory, command=' \\\n\t'.join([ "/bin/sh", "-c", "\"{cmd}\"".format(cmd=" ".join(cmd)) ] if self.is_shell_command else cmd))) if self.is_shell_command: cmd = [ "/bin/sh", "-c", "\"$(echo {command} | base64 -d)\"".format( command=base64.b64encode(" ".join(cmd).encode( 'utf-8')).decode('utf-8')) ] # If step is assigned to multiple resources, add the STREAMFLOW_HOSTS environment variable if len(resources) > 1: available_resources = await connector.get_available_resources( self.step.target.service) hosts = { k: v.hostname for k, v in available_resources.items() if k in resources } parsed_env['STREAMFLOW_HOSTS'] = ','.join(hosts.values()) # Process streams stdin = eval_expression(expression=self.stdin, context=context, full_js=self.full_js, expression_lib=self.expression_lib) stdout = eval_expression(expression=self.stdout, context=context, full_js=self.full_js, expression_lib=self.expression_lib ) if self.stdout is not None else STDOUT stderr = eval_expression(expression=self.stderr, context=context, full_js=self.full_js, expression_lib=self.expression_lib ) if self.stderr is not None else stdout # Execute remote command result, exit_code = await asyncio.wait_for( connector.run(resources[0] if resources else None, cmd, environment=parsed_env, workdir=job.output_directory, stdin=stdin, stdout=stdout, stderr=stderr, capture_output=True, job_name=job.name), self._get_timeout(job)) # Handle exit codes if self.failure_codes is not None and exit_code in self.failure_codes: status = Status.FAILED elif (self.success_codes is not None and exit_code in self.success_codes) or exit_code == 0: status = Status.COMPLETED if result: logger.info(result) else: status = Status.FAILED return CWLCommandOutput(value=result, status=status, exit_code=exit_code)
async def _prepare_work_dir(self, job: Job, context: MutableMapping[Text, Any], element: Any, dest_path: Optional[Text] = None, writable: bool = False) -> None: path_processor = get_path_processor(job.step) connector = job.step.get_connector() resources = job.get_resources() or [None] # If current element is a string, it must be an expression if isinstance(element, Text): listing = eval_expression(expression=element, context=context, full_js=self.full_js, expression_lib=self.expression_lib) else: listing = element # If listing is a list, each of its elements must be processed independently if isinstance(listing, MutableSequence): await asyncio.gather(*[ asyncio.create_task( self._prepare_work_dir(job, context, el, dest_path, writable)) for el in listing ]) # If listing is a dictionary, it could be a File, a Directory, a Dirent or some other object elif isinstance(listing, MutableMapping): # If it is a File or Directory element, put the correspnding file in the output directory if 'class' in listing and listing['class'] in [ 'File', 'Directory' ]: src_path = utils.get_path_from_token(listing) src_found = False if src_path is not None: if dest_path is None: if src_path.startswith(job.input_directory): relpath = path_processor.relpath( src_path, job.input_directory) dest_path = path_processor.join( job.output_directory, relpath) else: basename = path_processor.basename(src_path) dest_path = path_processor.join( job.output_directory, basename) for resource in resources: if await remotepath.exists(connector, resource, src_path): await self.step.context.data_manager.transfer_data( src=src_path, src_job=job, dst=dest_path, dst_job=job, writable=writable) src_found = True break # If the source path does not exist, create a File or a Directory in the remote path if not src_found: if dest_path is None: dest_path = job.output_directory if src_path is not None: dest_path = path_processor.join( dest_path, path_processor.basename(src_path)) if listing['class'] == 'Directory': await remotepath.mkdir(connector, resources, dest_path) else: await self._write_remote_file( job=job, content=listing['contents'] if 'contents' in listing else '', dest_path=dest_path, writable=writable) # If `listing` is present, recursively process folder contents if 'listing' in listing: if 'basename' in listing: dest_path = path_processor.join( dest_path, listing['basename']) await remotepath.mkdir(connector, resources, dest_path) await asyncio.gather(*[ asyncio.create_task( self._prepare_work_dir(job, context, element, dest_path, writable)) for element in listing['listing'] ]) # If it is a Dirent element, put or create the corresponding file according to the entryname field elif 'entry' in listing: entry = eval_expression(expression=listing['entry'], context=context, full_js=self.full_js, expression_lib=self.expression_lib, strip_whitespace=False) if 'entryname' in listing: dest_path = eval_expression( expression=listing['entryname'], context=context, full_js=self.full_js, expression_lib=self.expression_lib) if not path_processor.isabs(dest_path): dest_path = path_processor.join( job.output_directory, dest_path) writable = listing[ 'writable'] if 'writable' in listing else False # If entry is a string, a new text file must be created with the string as the file contents if isinstance(entry, Text): await self._write_remote_file(job, entry, dest_path, writable) # If entry is a list elif isinstance(entry, MutableSequence): # If all elements are Files or Directories, each of them must be processed independently if all('class' in t and t['class'] in ['File', 'Directory'] for t in entry): await self._prepare_work_dir(job, context, entry, dest_path, writable) # Otherwise, the content should be serialised to JSON else: await self._write_remote_file(job, json.dumps(entry), dest_path, writable) # If entry is a dict elif isinstance(entry, MutableMapping): # If it is a File or Directory, it must be put in the destination path if 'class' in entry and entry['class'] in [ 'File', 'Directory' ]: await self._prepare_work_dir(job, context, entry, dest_path, writable) # Otherwise, the content should be serialised to JSON else: await self._write_remote_file(job, json.dumps(entry), dest_path, writable) # Every object different from a string should be serialised to JSON else: await self._write_remote_file(job, json.dumps(entry), dest_path, writable)
async def _build_token_value( self, job: Job, token_value: Any, load_contents: Optional[bool] = None, load_listing: Optional[LoadListing] = None) -> Any: if load_contents is None: load_contents = self.load_contents if token_value is None: return self.default_value elif isinstance(token_value, MutableSequence): value_tasks = [] for t in token_value: value_tasks.append( asyncio.create_task( self._build_token_value(job, t, load_listing))) return await asyncio.gather(*value_tasks) elif (isinstance(token_value, MutableMapping) and token_value.get( 'class', token_value.get('type')) in ['File', 'Directory']): step = job.step if job is not None else self.port.step # Get filepath filepath = get_path_from_token(token_value) if filepath is not None: # Process secondary files in token value sf_map = {} if 'secondaryFiles' in token_value: sf_tasks = [] for sf in token_value.get('secondaryFiles', []): sf_path = get_path_from_token(sf) path_processor = get_path_processor(step) if not path_processor.isabs(sf_path): path_processor.join( path_processor.dirname(filepath), sf_path) sf_tasks.append( asyncio.create_task( _get_file_token(step=step, job=job, token_class=sf['class'], filepath=sf_path, basename=sf.get('basename'), load_contents=load_contents, load_listing=load_listing or self.load_listing))) sf_map = { get_path_from_token(sf): sf for sf in await asyncio.gather(*sf_tasks) } # Compute the new token value token_value = await _get_file_token( step=step, job=job, token_class=token_value.get('class', token_value.get('type')), filepath=filepath, basename=token_value.get('basename'), load_contents=load_contents, load_listing=load_listing or self.load_listing) # Compute new secondary files from port specification if self.secondary_files: context = utils.build_context(job) context['self'] = token_value sf_tasks, sf_specs = [], [] for secondary_file in self.secondary_files: # If pattern is an expression, evaluate it and process result if '$(' in secondary_file.pattern or '${' in secondary_file.pattern: sf_value = utils.eval_expression( expression=secondary_file.pattern, context=context, full_js=self.full_js, expression_lib=self.expression_lib) if isinstance(sf_value, MutableSequence): for sf in sf_value: sf_tasks.append( asyncio.create_task( self._process_secondary_file( job=job, secondary_file=sf, token_value=token_value, from_expression=True, existing_sf=sf_map, load_contents=load_contents, load_listing=load_listing or self.load_listing))) sf_specs.append(secondary_file) else: sf_tasks.append( asyncio.create_task( self._process_secondary_file( job=job, secondary_file=sf_value, token_value=token_value, from_expression=True, existing_sf=sf_map, load_contents=load_contents, load_listing=load_listing or self.load_listing))) sf_specs.append(secondary_file) # Otherwise, simply process the pattern string else: sf_tasks.append( asyncio.create_task( self._process_secondary_file( job=job, secondary_file=secondary_file.pattern, token_value=token_value, from_expression=False, existing_sf=sf_map, load_contents=load_contents, load_listing=load_listing or self.load_listing))) sf_specs.append(secondary_file) for sf_value, sf_spec in zip( await asyncio.gather(*sf_tasks), sf_specs): if sf_value is not None: sf_map[get_path_from_token(sf_value)] = sf_value elif sf_spec.required: raise WorkflowExecutionException( "Required secondary file {sf} not found". format(sf=sf_spec.pattern)) # Add all secondary files to the token if sf_map: token_value['secondaryFiles'] = list(sf_map.values()) # If there is only a 'contents' field, create a file on the step's resource and build the token elif 'contents' in token_value: path_processor = get_path_processor(self.port.step) filepath = path_processor.join( job.output_directory, token_value.get('basename', random_name())) connector = job.step.get_connector() resources = job.get_resources() or [None ] if job is not None else [ None ] await asyncio.gather(*[ asyncio.create_task( remotepath.write(connector, res, filepath, token_value['contents'])) for res in resources ]) token_value = await _get_file_token( step=step, job=job, token_class=token_value.get('class', token_value.get('type')), filepath=filepath, basename=token_value.get('basename'), load_contents=load_contents, load_listing=load_listing or self.load_listing) return token_value
async def _get_value_from_command(self, job: Job, command_output: CWLCommandOutput): context = utils.build_context(job) path_processor = get_path_processor(self.port.step) connector = job.step.get_connector() resources = job.get_resources() or [None] token_value = command_output.value if command_output.value is not None else self.default_value # Check if file `cwl.output.json` exists either locally on at least one resource cwl_output_path = path_processor.join(job.output_directory, 'cwl.output.json') for resource in resources: if await remotepath.exists(connector, resource, cwl_output_path): # If file exists, use its contents as token value token_value = json.loads(await remotepath.read( connector, resource, cwl_output_path)) break # If `token_value` is a dictionary, directly extract the token value from it if isinstance(token_value, MutableMapping) and self.port.name in token_value: token = token_value[self.port.name] return await self._build_token_value(job, token) # Otherwise, generate the output object as described in `outputs` field if self.glob is not None: # Adjust glob path if '$(' in self.glob or '${' in self.glob: globpath = utils.eval_expression( expression=self.glob, context=context, full_js=self.full_js, expression_lib=self.expression_lib) else: globpath = self.glob # Resolve glob resolve_tasks = [] for resource in resources: if isinstance(globpath, MutableSequence): for path in globpath: if not path_processor.isabs(path): path = path_processor.join(job.output_directory, path) resolve_tasks.append( _expand_glob(connector, resource, path)) else: if not path_processor.isabs(globpath): globpath = path_processor.join(job.output_directory, globpath) resolve_tasks.append( _expand_glob(connector, resource, globpath)) paths = flatten_list(await asyncio.gather(*resolve_tasks)) # Cannot glob outside the job output folder for path in paths: if not path.startswith(job.output_directory): raise WorkflowDefinitionException( "Globs outside the job's output folder are not allowed" ) # Get token class from paths class_tasks = [ asyncio.create_task(_get_class_from_path(p, job)) for p in paths ] paths = [{ 'path': p, 'class': c } for p, c in zip(paths, await asyncio.gather(*class_tasks))] # If evaluation is not needed, simply return paths as token value if self.output_eval is None: token_list = await self._build_token_value(job, paths) return token_list if len( token_list) > 1 else token_list[0] if len( token_list) == 1 else None # Otherwise, fill context['self'] with glob data and proceed else: context['self'] = await self._build_token_value(job, paths) if self.output_eval is not None: # Fill context with exit code context['runtime']['exitCode'] = command_output.exit_code # Evaluate output token = utils.eval_expression(expression=self.output_eval, context=context, full_js=self.full_js, expression_lib=self.expression_lib) # Build token if isinstance(token, MutableSequence): paths = [{ 'path': el['path'], 'class': el['class'] } for el in token] return await self._build_token_value(job, paths) else: return await self._build_token_value(job, token) # As the default value (no return path is met in previous code), simply process the command output return await self._build_token_value(job, token_value)
async def _update_file_token( self, job: Job, src_job: Job, token_value: Any, load_listing: Optional[LoadListing] = None, writable: Optional[bool] = None) -> MutableMapping[Text, Any]: path_processor = get_path_processor( src_job.step) if src_job is not None else os.path if 'location' not in token_value and 'path' in token_value: token_value['location'] = token_value['path'] if 'location' in token_value and token_value['location'] is not None: location = token_value['location'] # Manage remote files scheme = urllib.parse.urlsplit(location).scheme if scheme in ['http', 'https']: location = await _download_file(job, location) elif scheme == 'file': location = location[7:] # If basename is explicitly stated in the token, use it as destination path dest_path = None if 'basename' in token_value: path_processor = get_path_processor(self.port.step) dest_path = path_processor.join(job.input_directory, token_value['basename']) # Check if source file exists src_connector = src_job.step.get_connector( ) if src_job is not None else None src_resources = src_job.get_resources() or [ None ] if src_job is not None else [None] src_found = False for src_resource in src_resources: if await remotepath.exists(src_connector, src_resource, location): src_found = True break # If source_path exists, ransfer file in task's input folder if src_found: filepath = await self._transfer_file(src_job=src_job, dest_job=job, src_path=location, dest_path=dest_path, writable=writable) # Otherwise, keep the current path else: filepath = location new_token_value = {'class': token_value['class'], 'path': filepath} # If token contains secondary files, transfer them, too if 'secondaryFiles' in token_value: sf_tasks = [] for sf in token_value['secondaryFiles']: path = get_path_from_token(sf) # If basename is explicitly stated in the token, use it as destination path dest_path = None if 'basename' in sf: path_processor = get_path_processor(self.port.step) dest_path = path_processor.join( job.input_directory, sf['basename']) sf_tasks.append( asyncio.create_task( self._transfer_file(src_job=src_job, dest_job=job, src_path=path, dest_path=dest_path))) sf_paths = await asyncio.gather(*sf_tasks) new_token_value['secondaryFiles'] = [{ 'class': sf['class'], 'path': sf_path } for sf, sf_path in zip(token_value['secondaryFiles'], sf_paths)] # Build token token_value = await self._build_token_value( job=job, token_value=new_token_value, load_contents=self.load_contents or 'contents' in token_value, load_listing=load_listing) return token_value # If there is only a 'contents' field, simply build the token value elif 'contents' in token_value: return await self._build_token_value(job, token_value, load_listing) # If there is only a 'listing' field, transfer all the listed files to the remote resource elif 'listing' in token_value: # Compute destination path dest_path = get_path_from_token(token_value) if dest_path is None and 'basename' in token_value: dest_path = path_processor.join(job.input_directory, token_value['basename']) # Copy each element of the listing into the destination folder tasks = [] classes = [] for element in cast(List, token_value['listing']): # Compute destination path if dest_path is not None: basename = path_processor.basename(element['path']) current_dest_path = path_processor.join( dest_path, basename) else: current_dest_path = None # Transfer element to the remote resource tasks.append( asyncio.create_task( self._transfer_file(src_job=src_job, dest_job=job, src_path=element['path'], dest_path=current_dest_path, writable=writable))) classes.append(element['class']) dest_paths = await asyncio.gather(*tasks) # Compute listing on remote resource listing_tasks = [] for token_class, path in zip(classes, dest_paths): listing_tasks.append( asyncio.create_task( _get_file_token(step=self.port.step, job=job, token_class=token_class, filepath=path))) token_value['listing'] = await asyncio.gather(*listing_tasks) return token_value