async def collect_output(self, token: Token, output_dir: Text) -> Token: if isinstance(token.job, MutableSequence): return token.update(await asyncio.gather(*[ asyncio.create_task(self.collect_output(t, output_dir)) for t in token.value ])) elif isinstance(token.value, MutableSequence): token_list = await asyncio.gather(*[ asyncio.create_task( self.collect_output(token.update(t), output_dir)) for t in token.value ]) return token.update([t.value for t in token_list]) context = self.port.step.context path_processor = utils.get_path_processor(self.port.step) src_path = token.value src_job = context.scheduler.get_job(token.job) dest_path = os.path.join(output_dir, path_processor.basename(src_path)) # Transfer file to local destination await self.port.step.context.data_manager.transfer_data( src=src_path, src_job=src_job, dst=dest_path, dst_job=None, writable=True) # Update token return token.update(dest_path)
async def update_token(self, job: Job, token: Token) -> Token: if isinstance(token.job, MutableSequence): return await super().update_token(job, token) if token.value is None and self.default_value is not None: token = token.update(await self._build_token_value( job, self.default_value)) if self.port_type == 'Any' or self.port_type is None: if (isinstance(self.port, InputPort) and isinstance( self.port.dependee.token_processor, CWLTokenProcessor) and self.port.dependee.token_processor.port_type != 'Any' and self.port.dependee.token_processor.port_type is not None): self.port_type = self.port.dependee.token_processor.port_type else: self.port_type = utils.infer_type_from_token(token.value) if isinstance(token.value, MutableMapping) and token.value.get('class') in [ 'File', 'Directory' ]: context = self.get_context() src_job = context.scheduler.get_job(token.job) if isinstance(token.value, MutableSequence): elements = [] for element in token.value: elements.append(await self._update_file_token( job, src_job, element)) return token.update(elements) elif token.value is not None: return token.update(await self._update_file_token( job, src_job, token.value)) else: return token else: return token.update(await self._build_token_value(job, token.value))
async def collect_output(self, token: Token, output_dir: Text) -> Token: if isinstance(token.job, MutableSequence): return token.update(await asyncio.gather(*[ asyncio.create_task( self.collect_output( t if isinstance(t, Token) else token.update(t), output_dir)) for t in token.value ])) else: return token
async def _recover_token(self, job: Job, resources: MutableSequence[Text], token: Token) -> Token: if isinstance(token.value, MutableSequence): elements = [] for t in token.value: elements.append(await self._recover_token_value( job, resources, token, t)) return token.update(elements) else: return token.update(await self._recover_token_value( job, resources, token, token.value))
async def collect_output(self, token: Token, output_dir: Text) -> Token: if isinstance(token.job, MutableSequence): return await super().collect_output(token, output_dir) self._check_list(token.value) token_tasks = [] for i, v in enumerate(token.value): token_tasks.append( asyncio.create_task( self.processor.collect_output(token.update(v), output_dir))) return token.update( [t.value for t in await asyncio.gather(*token_tasks)])
async def collect_output(self, token: Token, output_dir: Text) -> Token: if isinstance(token.job, MutableSequence): return await super().collect_output(token, output_dir) self._check_list(token.value) output_tasks = [] for i, processor in enumerate(self.processors): if i < len(token.value): partial_token = token.update(token.value[i]) output_tasks.append( asyncio.create_task( processor.collect_output(partial_token, output_dir))) return token.update( [t.value for t in await asyncio.gather(*output_tasks)])
async def update_token(self, job: Job, token: Token) -> Token: if isinstance(token.job, MutableSequence): return await super().update_token(job, token) self._check_list(token.value) token_tasks = [] for i, processor in enumerate(self.processors): if i < len(token.value): partial_token = token.update(token.value[i]) token_tasks.append( asyncio.create_task( processor.update_token(job, partial_token))) return token.update( [t.value for t in await asyncio.gather(*token_tasks)])
async def compute_token(self, job: Job, command_output: CommandOutput) -> Token: if isinstance(command_output.value, MutableSequence): token_value = [ t.value for t in await asyncio.gather(*[ asyncio.create_task( self.compute_token(job, command_output.update(cv))) for cv in command_output.value ]) ] return Token(name=self.port.name, value=token_value, job=job.name, tag=get_tag(job.inputs)) if isinstance(command_output.value, MutableMapping): if self.port.name in command_output.value: return await self.compute_token( job, command_output.update( command_output.value[self.port.name])) else: token_tasks = {} for key, processor in self.processors.items(): if key in command_output.value: partial_command = command_output.update( command_output.value[key]) token_tasks[key] = asyncio.create_task( processor.compute_token(job, partial_command)) token_value = dict( zip(token_tasks.keys(), [ t.value for t in await asyncio.gather(*token_tasks.values()) ])) return Token(name=self.port.name, value=token_value, job=job.name, tag=get_tag(job.inputs)) else: token_tasks = {} for key, processor in self.processors.items(): token_tasks[key] = asyncio.create_task( processor.compute_token(job, command_output)) token_value = dict( zip(token_tasks.keys(), [ t.value for t in await asyncio.gather(*token_tasks.values()) ])) return Token(name=self.port.name, value=token_value, job=job.name, tag=get_tag(job.inputs))
async def collect_output(self, token: Token, output_dir: Text) -> Token: if isinstance(token.job, MutableSequence): return await super().collect_output(token, output_dir) self._check_dict(token.value) output_tasks = [] for key, processor in self.processors.items(): if key in token.value: partial_token = token.update(token.value[key]) output_tasks.append( asyncio.create_task( processor.collect_output(partial_token, output_dir))) return token.update( dict( zip(token.value.keys(), [t.value for t in await asyncio.gather(*output_tasks)])))
async def update_token(self, job: Job, token: Token) -> Token: if isinstance(token.job, MutableSequence): update_tasks = [] for t in token.value: update_tasks.append( asyncio.create_task(self.processor.update_token(job, t))) return token.update(await asyncio.gather(*update_tasks)) self._check_list(token.value) token_tasks = [] for i, v in enumerate(token.value): token_tasks.append( asyncio.create_task( self.processor.update_token(job, token.update(v)))) return token.update( [t.value for t in await asyncio.gather(*token_tasks)])
async def compute_token(self, job: Job, command_output: JupyterCommandOutput) -> Token: path_processor = utils.get_path_processor(self.port.step) if self.value is not None: connector = job.step.get_connector() if job is not None else None resources = job.get_resources() or [None] if job.output_directory and not path_processor.isabs(self.value): pattern = path_processor.join(job.output_directory, self.value) else: pattern = self.value token_value = utils.flatten_list(await asyncio.gather(*[ asyncio.create_task( remotepath.resolve( connector=connector, target=resource, pattern=pattern)) for resource in resources ])) if len(token_value) == 1: token_value = token_value[0] else: token_value = command_output.user_ns.get(self.value_from) if job.output_directory: if isinstance(token_value, MutableSequence): token_value = [ path_processor.join(job.output_directory, t) if not path_processor.isabs(t) else t for t in token_value ] else: if not path_processor.isabs(token_value): token_value = path_processor.join(job.output_directory, token_value) return Token(name=self.port.name, value=token_value, job=job.name, tag=utils.get_tag(job.inputs))
def get_related_resources(self, token: Token) -> Set[Text]: self._check_list(token.value) related_resources = set() for v in token.value: related_resources.update( self.processor.get_related_resources(token.update(v))) return related_resources
async def update_token(self, job: Job, token: Token) -> Token: if isinstance(token.job, MutableSequence): return await super().update_token(job, token) self._check_dict(token.value) token_tasks = {} for key, processor in self.processors.items(): if key in token.value: partial_token = token.update(token.value[key]) token_tasks[key] = asyncio.create_task( processor.update_token(job, partial_token)) return token.update( dict( zip(token_tasks.keys(), [ t.value for t in await asyncio.gather(*token_tasks.values()) ])))
async def recover_token(self, job: Job, resources: MutableSequence[Text], token: Token) -> Token: if isinstance(token.job, MutableSequence): return token.update(await asyncio.gather(*[ asyncio.create_task(self.recover_token(job, resources, t)) for t in token.value ])) return token
async def update_token(self, job: Job, token: Token) -> Token: if isinstance(token.job, MutableSequence): return token.update(await asyncio.gather(*[ asyncio.create_task(self.update_token(job, t)) for t in token.value ])) else: return token
def get_related_resources(self, token: Token) -> Set[Text]: self._check_dict(token.value) related_resources = set() for key, processor in self.processors.items(): if key in token.value: partial_token = token.update(token.value[key]) related_resources.update( processor.get_related_resources(partial_token)) return related_resources
async def compute_token(self, job: Job, command_output: CWLCommandOutput) -> Any: if command_output.status == Status.SKIPPED: return Token(name=self.port.name, value=None, job=job.name, tag=get_tag(job.inputs)) else: return None
def get_related_resources(self, token: Token) -> Set[Text]: self._check_list(token.value) related_resources = set() for i, processor in enumerate(self.processors): if i < len(token.value): partial_token = token.update(token.value[i]) related_resources.update( processor.get_related_resources(partial_token)) return related_resources
async def collect_output(self, token: Token, output_dir: Text) -> Token: if isinstance(token.job, MutableSequence): return await super().collect_output(token, output_dir) return token.update( executor.postload(compiler=self.compiler, name=self.port.name, value=([dill.loads(v) for v in token.value] if isinstance(token.value, MutableSequence) else dill.loads(token.value)), serializer=self.serializer))
async def compute_token(self, job: Job, command_output: CommandOutput) -> Token: if isinstance(command_output.value, MutableSequence): token_list = await asyncio.gather(*[ asyncio.create_task( self.processor.compute_token(job, command_output.update(value))) for value in command_output.value ]) token = Token(name=self.port.name, value=[t.value for t in token_list], job=job.name, tag=get_tag(job.inputs)) else: token = await self.processor.compute_token(job, command_output) token.value = ( [] if token.value is None else [token.value] if not isinstance(token.value, MutableSequence) else token.value) return token
async def _build_token(self, job_name: Text, token_value: Any, count: int) -> Token: job = self.step.context.scheduler.get_job(job_name) weight = await self.token_processor.weight_token(job, token_value) return Token(name=self.name, value=token_value, job=job_name, tag=posixpath.join( get_tag(job.inputs if job is not None else []), str(count)), weight=weight)
async def update_token(self, job: Job, token: Token) -> Token: if isinstance(token.job, MutableSequence): return token.update(await asyncio.gather(*[ asyncio.create_task(self.update_token(job, t)) for t in token.value ])) elif isinstance(token.value, MutableSequence): token_list = await asyncio.gather(*[ asyncio.create_task(self.update_token(job, token.update(t))) for t in token.value ]) return token.update([t.value for t in token_list]) src_job = self.get_context().scheduler.get_job(token.job) path_processor = utils.get_path_processor(self.port.step) token_value = dill.loads(token.value) if isinstance( token.value, bytes) else token.value dest_path = path_processor.join(job.input_directory, os.path.basename(token_value)) await self.port.step.context.data_manager.transfer_data( src=token_value, src_job=src_job, dst=dest_path, dst_job=job) return token.update(dill.dumps(dest_path))
async def compute_token(self, job: Job, command_output: CWLCommandOutput) -> Any: if command_output.status == Status.SKIPPED: return None else: token_value = await self._get_value_from_command( job, command_output) self._register_data(job, token_value) weight = await self.weight_token(job, token_value) return Token(name=self.port.name, value=token_value, job=job.name, tag=get_tag(job.inputs), weight=weight)
async def get(self, consumer: Text) -> Token: outputs = await self._retrieve(consumer) # Check for termination if utils.check_termination(outputs): return TerminationToken(self.name) # Return token outputs = flatten_list(outputs) if self.merge_strategy is not None: outputs = self._merge(outputs) if isinstance(outputs, MutableSequence): return Token(name=self.name, job=[t.job for t in outputs], value=outputs, tag=get_tag(outputs), weight=sum([t.weight for t in outputs])) else: return outputs
async def compute_token(self, job: Job, command_output: JupyterCommandOutput) -> Token: value = executor.predump( compiler=self.compiler, name=self.port.name, value=(self.value if self.value is not None else command_output.user_ns[self.value_from] if self.value_from in command_output.user_ns else builtins.__dict__.get(self.value_from)), serializer=self.serializer) token_value = ([dill.dumps(v, recurse=True) for v in value] if isinstance(value, MutableSequence) else dill.dumps(value, recurse=True)) return Token(name=self.port.name, value=token_value, job=job.name, tag=utils.get_tag(job.inputs))
def get_related_resources(self, token: Token) -> Set[Text]: if isinstance(token.job, MutableSequence) or isinstance( token.value, MutableSequence): resources = set() for t in token.value: resources.update( self.get_related_resources(t if isinstance( token.job, MutableSequence) else token.update(t))) context = self.port.step.context src_job = context.scheduler.get_job(token.job) resources = set(src_job.get_resources() if src_job else []) data_locations = set() for resource in resources: data_locations.update( context.data_manager.get_data_locations(resource, token.value)) resources.update({loc.resource for loc in data_locations}) return resources
def put(self, token: Token): if isinstance(token, TerminationToken): token_list = self.token if token_list: self.token = [ Token(name=self.name, job=[t.job for t in token_list], tag=_get_tag(token_list), value=self.merge_strategy(token_list) if self.merge_strategy else token_list) ] self.token.append(token) else: self.token = [token] self.fireable.set() else: self.token.append(token)
async def collect_output(self, token: Token, output_dir: Text) -> Token: if isinstance(token.job, MutableSequence) or self.port_type not in [ 'File', 'Directory' ]: return await super().collect_output(token, output_dir) if token.value is not None and self.port_type in ['File', 'Directory']: context = self.get_context() output_collector = BaseJob(name=random_name(), step=BaseStep(name=random_name(), context=context), inputs=[], input_directory=output_dir) return token.update(await self._update_file_token( job=output_collector, src_job=context.scheduler.get_job(token.job), token_value=token.value, load_listing=LoadListing.deep_listing, writable=True)) else: return token
async def _replace_token(job: Job, token_processor: TokenProcessor, old_token: Token, new_token: Token): if isinstance(old_token.job, MutableSequence): token_value = [] if isinstance(token_processor, ListTokenProcessor): for (t, tp) in zip(old_token.value, token_processor.processors): token_value.append(await _replace_token(job, tp, t, new_token)) elif isinstance(token_processor, MapTokenProcessor): for t in old_token.value: token_value.append(await _replace_token(job, token_processor.processor, t, new_token)) else: for t in old_token.value: token_value.append(await _replace_token(job, token_processor, t, new_token)) return old_token.update(token_value) elif new_token.job == old_token.job: return await token_processor.update_token(job, new_token) else: return old_token
async def compute_token(self, job: Job, command_output: CommandOutput) -> Token: return Token(name=self.port.name, value=command_output.value, job=job.name, tag=get_tag(job.inputs))