예제 #1
0
    def ascwl(self, global_step_outputs=False):
        """Serialize graph to CWL workflow.

        :param global_step_outputs: Make all step outputs global.
        """
        workflow = Workflow()

        input_index = 1
        steps = {}

        def _source_name(key):
            """Find source name for a node."""
            if self.G.in_degree(key) == 0:
                return None

            assert self.G.in_degree(key) == 1

            tool_key, node = list(self.G.pred[key].items())[0]
            return '{0}/{1}'.format(steps[tool_key], node['id'])

        def _relative_default(client, default):
            """Evolve ``File`` path."""
            if isinstance(default, File):
                path = (client.workflow_path / default.path).resolve()
                return attr.evolve(default, path=path)
            return default

        for tool_index, (key, node) in enumerate(self._tool_nodes, 1):
            _, path = key
            tool = node['tool']
            step_id = 'step_{0}'.format(tool_index)
            steps[key] = step_id

            ins = {
                edge_id: _source_name(target_id)
                for target_id, _, edge_id in self.G.in_edges(key, data='id')
            }
            outs = [
                edge_id for _, _, edge_id in self.G.out_edges(key, data='id')
            ]

            for input_ in tool.inputs:
                input_mapping = ins.get(input_.id)
                if input_mapping is None:
                    input_id = 'input_{0}'.format(input_index)
                    workflow.inputs.append(
                        InputParameter(
                            id=input_id,
                            type=input_.type,
                            default=_relative_default(self.client,
                                                      input_.default),
                        ))
                    input_index += 1
                    ins[input_.id] = input_id

            workflow.add_step(
                run=self.client.path / path,
                id=step_id,
                in_=ins,
                out=outs,
            )

        output_keys = (key for _, key in self.G.out_edges(steps.keys())
                       ) if global_step_outputs else self._output_keys

        for index, key in enumerate(output_keys):
            output_id = 'output_{0}'.format(index)
            workflow.outputs.append(
                WorkflowOutputParameter(
                    id=output_id,
                    type='File',
                    outputSource=_source_name(key),
                ))

        return workflow
예제 #2
0
    def ascwl(
        self,
        input_paths=None,
        output_paths=None,
        outputs=None,
        use_latest=True,
    ):
        """Serialize graph to CWL workflow.

        :param global_step_outputs: Make all step outputs global.
        """
        if output_paths is None:
            output_paths = {
                node.path
                for node in outputs if _safe_path(node.path)
            }

        workflow = Workflow()

        processes = set()
        stack = []

        output_keys = {(node.commit, node.path) for node in outputs}
        nodes = {(node.commit, node.path): node for node in self.nodes}

        def connect_file_to_directory(node):
            """Return step connecting file to a directory."""
            process = attr.evolve(
                LINK_CWL,
                inputs={
                    'input_directory': 'Directory',
                    'filename': {
                        'type':
                        'string',
                        'default':
                        str(Path(node.path).relative_to(node.parent.path)),
                    },
                })
            process_run = ProcessRun(
                commit=node.commit,
                client=node.client,
                path=None,
                process=process,
                inputs={
                    node.parent.path:
                    Usage(
                        entity=node.parent,
                        role='input_directory',
                    ),
                },
                outputs={
                    node.path: 'output_file',
                },
            )

            for generated in process_run.generated:
                nodes[(generated.commit, generated.path)] = generated

            return process_run

        for node in self.nodes:
            if (node.commit, node.path) not in output_keys:
                continue

            process_run = None
            if isinstance(node, Entity) and not hasattr(node, 'activity'):
                process_run = connect_file_to_directory(node)

                stack.append(process_run)
                processes.add(process_run)

            else:
                assert hasattr(node, 'activity'), node
                assert isinstance(node.activity, ProcessRun)

                plan = node.activity.association.plan
                latest = self.latest(plan)
                if use_latest and latest:
                    plan = nodes[(latest, plan.path)]

                process_run = plan.activity

                if process_run not in processes:
                    stack.append(process_run)
                    processes.add(process_run)

        while stack:
            action = stack.pop()

            if not hasattr(action, 'inputs'):
                continue

            for path, dependency in action.inputs.items():
                # Do not follow defined input paths.
                if input_paths and path in input_paths:
                    continue

                node = nodes.get((dependency.commit, dependency.path),
                                 dependency)

                if isinstance(node, Generation):
                    process_run = node.activity
                elif isinstance(node, Collection) and node.parent:
                    raise NotImplementedError('Can not connect subdirectory')
                elif isinstance(node, Entity) and node.parent:
                    process_run = connect_file_to_directory(node)
                else:
                    process_run = None

                # Skip existing commits
                if process_run and isinstance(process_run, ProcessRun):
                    plan = process_run.association.plan
                    latest = self.latest(plan)
                    if process_run.path and use_latest and latest:
                        plan = nodes[(latest, plan.path)]

                    process_run = plan.activity

                    if process_run not in processes:
                        stack.append(process_run)
                        processes.add(process_run)

        steps = {
            tool: 'step_{0}'.format(tool_index)
            for tool_index, tool in enumerate(processes, 1)
        }

        def _source_name(commit, path):
            """Find source name for a node."""
            try:
                process_run = nodes[(commit, path)].activity
                output_id = process_run.outputs[path]
                return '{0}/{1}'.format(steps[process_run], output_id)
            except (KeyError, AttributeError):
                pass

        def _relative_default(client, default):
            """Evolve ``File`` or ``Directory`` path."""
            if isinstance(default, PATH_TYPES):
                path = (client.workflow_path / default.path).resolve()
                return attr.evolve(default, path=path)
            return default

        input_index = 1

        for action, step_id in steps.items():
            tool = action.process

            ins = {}
            for path, dependency in action.inputs.items():
                alias = _source_name(dependency.commit, path)
                if alias:
                    ins[dependency.role] = alias

            outs = list(set(action.outputs.values()))

            for generated in action.generated:
                if generated.entity.path not in output_paths:
                    output_paths.add(generated.entity.path)
                    outputs.add(generated.entity)

            for input_ in tool.inputs:
                input_mapping = ins.get(input_.id)
                if input_mapping is None:
                    input_id = 'input_{0}'.format(input_index)
                    workflow.inputs.append(
                        InputParameter(
                            id=input_id,
                            type=input_.type,
                            default=_relative_default(self.client,
                                                      input_.default),
                        ))
                    input_index += 1
                    ins[input_.id] = input_id

            workflow.add_step(
                run=self.client.path / action.path if action.path else tool,
                id=step_id,
                in_=ins,
                out=outs,
            )

        for index, node in enumerate(
            (node for node in outputs if node.path in output_paths)):
            commit, path = node.commit, node.path
            id_ = 'output_{0}'.format(index)
            process_run = nodes[(commit, path)].activity

            if process_run.process is None or process_run.path is None:
                continue

            output_id = process_run.outputs[path]
            type_ = next(output for output in process_run.process.outputs
                         if output.id == output_id).type
            type_ = type_ if type_ == 'Directory' else 'File'
            output_source = _source_name(commit, path)

            if output_source is None:
                continue

            workflow.outputs.append(
                WorkflowOutputParameter(
                    id=id_,
                    type=type_,
                    outputSource=output_source,
                ))

        return workflow
예제 #3
0
    def ascwl(
        self,
        input_paths=None,
        output_paths=None,
        outputs=None,
        use_latest=True,
    ):
        """Serialize graph to CWL workflow.

        :param global_step_outputs: Make all step outputs global.
        """
        if output_paths is None:
            output_paths = {
                node.path
                for node in outputs if _safe_path(node.path)
            }

        workflow = Workflow()

        processes = set()
        stack = []

        output_keys = {(node.commit, node.path) for node in outputs}
        nodes = {(node.commit, node.path): node for node in self.nodes}

        for node in self.nodes:
            if (node.commit, node.path) not in output_keys:
                continue

            process_run = None
            if isinstance(node, ProcessRun):
                process_run = node
            elif isinstance(node.activity, ProcessRun):
                process_run = node.activity

            if process_run:
                latest = self.latest(process_run)
                if use_latest and latest:
                    process_run = nodes[(latest, process_run.path)]

                if process_run not in processes:
                    stack.append(process_run)
                    processes.add(process_run)

        while stack:
            action = stack.pop()

            if not hasattr(action, 'inputs'):
                continue

            for path, dependency in action.inputs.items():
                # Do not follow defined input paths.
                if input_paths and path in input_paths:
                    continue

                try:
                    process_run = nodes[(dependency.commit,
                                         dependency.path)].activity
                except AttributeError:
                    continue

                # Skip existing commits
                if process_run and isinstance(process_run, ProcessRun):
                    latest = self.latest(process_run)
                    if use_latest and latest:
                        process_run = nodes[(latest, process_run.path)]

                    if process_run not in processes:
                        stack.append(process_run)
                        processes.add(process_run)

        steps = {
            tool: 'step_{0}'.format(tool_index)
            for tool_index, tool in enumerate(processes, 1)
        }

        def _source_name(commit, path):
            """Find source name for a node."""
            try:
                process_run = nodes[(commit, path)].activity
                output_id = process_run.outputs[path]
                return '{0}/{1}'.format(steps[process_run], output_id)
            except (KeyError, AttributeError):
                pass

        def _relative_default(client, default):
            """Evolve ``File`` or ``Directory`` path."""
            if isinstance(default, PATH_TYPES):
                path = (client.workflow_path / default.path).resolve()
                return attr.evolve(default, path=path)
            return default

        input_index = 1

        for action, step_id in steps.items():
            tool = action.process

            ins = {}
            for path, dependency in action.inputs.items():
                alias = _source_name(dependency.commit, path)
                if alias:
                    ins[dependency.role] = alias

            outs = list(set(action.outputs.values()))

            for generated in action.generated:
                if generated.entity.path not in output_paths:
                    output_paths.add(generated.entity.path)
                    outputs.add(generated.entity)

            for input_ in tool.inputs:
                input_mapping = ins.get(input_.id)
                if input_mapping is None:
                    input_id = 'input_{0}'.format(input_index)
                    workflow.inputs.append(
                        InputParameter(
                            id=input_id,
                            type=input_.type,
                            default=_relative_default(self.client,
                                                      input_.default),
                        ))
                    input_index += 1
                    ins[input_.id] = input_id

            workflow.add_step(
                run=self.client.path / action.path,
                id=step_id,
                in_=ins,
                out=outs,
            )

        for index, node in enumerate(
            (node for node in outputs if node.path in output_paths)):
            commit, path = node.commit, node.path
            id_ = 'output_{0}'.format(index)
            process_run = nodes[(commit, path)].activity

            if process_run.process is None:
                continue

            output_id = process_run.outputs[path]
            type_ = next(output for output in process_run.process.outputs
                         if output.id == output_id).type
            type_ = type_ if type_ == 'Directory' else 'File'
            output_source = _source_name(commit, path)

            if output_source is None:
                continue

            workflow.outputs.append(
                WorkflowOutputParameter(
                    id=id_,
                    type=type_,
                    outputSource=output_source,
                ))

        return workflow