def ascwl(self, global_step_outputs=False): """Serialize graph to CWL workflow. :param global_step_outputs: Make all step outputs global. """ workflow = Workflow() input_index = 1 steps = {} def _source_name(key): """Find source name for a node.""" if self.G.in_degree(key) == 0: return None assert self.G.in_degree(key) == 1 tool_key, node = list(self.G.pred[key].items())[0] return '{0}/{1}'.format(steps[tool_key], node['id']) def _relative_default(client, default): """Evolve ``File`` path.""" if isinstance(default, File): path = (client.workflow_path / default.path).resolve() return attr.evolve(default, path=path) return default for tool_index, (key, node) in enumerate(self._tool_nodes, 1): _, path = key tool = node['tool'] step_id = 'step_{0}'.format(tool_index) steps[key] = step_id ins = { edge_id: _source_name(target_id) for target_id, _, edge_id in self.G.in_edges(key, data='id') } outs = [ edge_id for _, _, edge_id in self.G.out_edges(key, data='id') ] for input_ in tool.inputs: input_mapping = ins.get(input_.id) if input_mapping is None: input_id = 'input_{0}'.format(input_index) workflow.inputs.append( InputParameter( id=input_id, type=input_.type, default=_relative_default(self.client, input_.default), )) input_index += 1 ins[input_.id] = input_id workflow.add_step( run=self.client.path / path, id=step_id, in_=ins, out=outs, ) output_keys = (key for _, key in self.G.out_edges(steps.keys()) ) if global_step_outputs else self._output_keys for index, key in enumerate(output_keys): output_id = 'output_{0}'.format(index) workflow.outputs.append( WorkflowOutputParameter( id=output_id, type='File', outputSource=_source_name(key), )) return workflow
def ascwl( self, input_paths=None, output_paths=None, outputs=None, use_latest=True, ): """Serialize graph to CWL workflow. :param global_step_outputs: Make all step outputs global. """ if output_paths is None: output_paths = { node.path for node in outputs if _safe_path(node.path) } workflow = Workflow() processes = set() stack = [] output_keys = {(node.commit, node.path) for node in outputs} nodes = {(node.commit, node.path): node for node in self.nodes} def connect_file_to_directory(node): """Return step connecting file to a directory.""" process = attr.evolve( LINK_CWL, inputs={ 'input_directory': 'Directory', 'filename': { 'type': 'string', 'default': str(Path(node.path).relative_to(node.parent.path)), }, }) process_run = ProcessRun( commit=node.commit, client=node.client, path=None, process=process, inputs={ node.parent.path: Usage( entity=node.parent, role='input_directory', ), }, outputs={ node.path: 'output_file', }, ) for generated in process_run.generated: nodes[(generated.commit, generated.path)] = generated return process_run for node in self.nodes: if (node.commit, node.path) not in output_keys: continue process_run = None if isinstance(node, Entity) and not hasattr(node, 'activity'): process_run = connect_file_to_directory(node) stack.append(process_run) processes.add(process_run) else: assert hasattr(node, 'activity'), node assert isinstance(node.activity, ProcessRun) plan = node.activity.association.plan latest = self.latest(plan) if use_latest and latest: plan = nodes[(latest, plan.path)] process_run = plan.activity if process_run not in processes: stack.append(process_run) processes.add(process_run) while stack: action = stack.pop() if not hasattr(action, 'inputs'): continue for path, dependency in action.inputs.items(): # Do not follow defined input paths. if input_paths and path in input_paths: continue node = nodes.get((dependency.commit, dependency.path), dependency) if isinstance(node, Generation): process_run = node.activity elif isinstance(node, Collection) and node.parent: raise NotImplementedError('Can not connect subdirectory') elif isinstance(node, Entity) and node.parent: process_run = connect_file_to_directory(node) else: process_run = None # Skip existing commits if process_run and isinstance(process_run, ProcessRun): plan = process_run.association.plan latest = self.latest(plan) if process_run.path and use_latest and latest: plan = nodes[(latest, plan.path)] process_run = plan.activity if process_run not in processes: stack.append(process_run) processes.add(process_run) steps = { tool: 'step_{0}'.format(tool_index) for tool_index, tool in enumerate(processes, 1) } def _source_name(commit, path): """Find source name for a node.""" try: process_run = nodes[(commit, path)].activity output_id = process_run.outputs[path] return '{0}/{1}'.format(steps[process_run], output_id) except (KeyError, AttributeError): pass def _relative_default(client, default): """Evolve ``File`` or ``Directory`` path.""" if isinstance(default, PATH_TYPES): path = (client.workflow_path / default.path).resolve() return attr.evolve(default, path=path) return default input_index = 1 for action, step_id in steps.items(): tool = action.process ins = {} for path, dependency in action.inputs.items(): alias = _source_name(dependency.commit, path) if alias: ins[dependency.role] = alias outs = list(set(action.outputs.values())) for generated in action.generated: if generated.entity.path not in output_paths: output_paths.add(generated.entity.path) outputs.add(generated.entity) for input_ in tool.inputs: input_mapping = ins.get(input_.id) if input_mapping is None: input_id = 'input_{0}'.format(input_index) workflow.inputs.append( InputParameter( id=input_id, type=input_.type, default=_relative_default(self.client, input_.default), )) input_index += 1 ins[input_.id] = input_id workflow.add_step( run=self.client.path / action.path if action.path else tool, id=step_id, in_=ins, out=outs, ) for index, node in enumerate( (node for node in outputs if node.path in output_paths)): commit, path = node.commit, node.path id_ = 'output_{0}'.format(index) process_run = nodes[(commit, path)].activity if process_run.process is None or process_run.path is None: continue output_id = process_run.outputs[path] type_ = next(output for output in process_run.process.outputs if output.id == output_id).type type_ = type_ if type_ == 'Directory' else 'File' output_source = _source_name(commit, path) if output_source is None: continue workflow.outputs.append( WorkflowOutputParameter( id=id_, type=type_, outputSource=output_source, )) return workflow
def ascwl( self, input_paths=None, output_paths=None, outputs=None, use_latest=True, ): """Serialize graph to CWL workflow. :param global_step_outputs: Make all step outputs global. """ if output_paths is None: output_paths = { node.path for node in outputs if _safe_path(node.path) } workflow = Workflow() processes = set() stack = [] output_keys = {(node.commit, node.path) for node in outputs} nodes = {(node.commit, node.path): node for node in self.nodes} for node in self.nodes: if (node.commit, node.path) not in output_keys: continue process_run = None if isinstance(node, ProcessRun): process_run = node elif isinstance(node.activity, ProcessRun): process_run = node.activity if process_run: latest = self.latest(process_run) if use_latest and latest: process_run = nodes[(latest, process_run.path)] if process_run not in processes: stack.append(process_run) processes.add(process_run) while stack: action = stack.pop() if not hasattr(action, 'inputs'): continue for path, dependency in action.inputs.items(): # Do not follow defined input paths. if input_paths and path in input_paths: continue try: process_run = nodes[(dependency.commit, dependency.path)].activity except AttributeError: continue # Skip existing commits if process_run and isinstance(process_run, ProcessRun): latest = self.latest(process_run) if use_latest and latest: process_run = nodes[(latest, process_run.path)] if process_run not in processes: stack.append(process_run) processes.add(process_run) steps = { tool: 'step_{0}'.format(tool_index) for tool_index, tool in enumerate(processes, 1) } def _source_name(commit, path): """Find source name for a node.""" try: process_run = nodes[(commit, path)].activity output_id = process_run.outputs[path] return '{0}/{1}'.format(steps[process_run], output_id) except (KeyError, AttributeError): pass def _relative_default(client, default): """Evolve ``File`` or ``Directory`` path.""" if isinstance(default, PATH_TYPES): path = (client.workflow_path / default.path).resolve() return attr.evolve(default, path=path) return default input_index = 1 for action, step_id in steps.items(): tool = action.process ins = {} for path, dependency in action.inputs.items(): alias = _source_name(dependency.commit, path) if alias: ins[dependency.role] = alias outs = list(set(action.outputs.values())) for generated in action.generated: if generated.entity.path not in output_paths: output_paths.add(generated.entity.path) outputs.add(generated.entity) for input_ in tool.inputs: input_mapping = ins.get(input_.id) if input_mapping is None: input_id = 'input_{0}'.format(input_index) workflow.inputs.append( InputParameter( id=input_id, type=input_.type, default=_relative_default(self.client, input_.default), )) input_index += 1 ins[input_.id] = input_id workflow.add_step( run=self.client.path / action.path, id=step_id, in_=ins, out=outs, ) for index, node in enumerate( (node for node in outputs if node.path in output_paths)): commit, path = node.commit, node.path id_ = 'output_{0}'.format(index) process_run = nodes[(commit, path)].activity if process_run.process is None: continue output_id = process_run.outputs[path] type_ = next(output for output in process_run.process.outputs if output.id == output_id).type type_ = type_ if type_ == 'Directory' else 'File' output_source = _source_name(commit, path) if output_source is None: continue workflow.outputs.append( WorkflowOutputParameter( id=id_, type=type_, outputSource=output_source, )) return workflow