Beispiel #1
0
    def with_workflow_storage(self):
        """Yield a workflow storage."""
        with self.lock:
            from renku.models.cwl._ascwl import ascwl
            from renku.models.cwl.workflow import Workflow

            workflow = Workflow()
            yield workflow

            for step in workflow.steps:
                step_name = '{0}_{1}.cwl'.format(
                    uuid.uuid4().hex,
                    secure_filename('_'.join(step.run.baseCommand)),
                )

                workflow_path = self.workflow_path
                if not workflow_path.exists():
                    workflow_path.mkdir()

                step_path = workflow_path / step_name
                with step_path.open('w') as step_file:
                    yaml.dump(
                        ascwl(
                            # filter=lambda _, x: not (x is False or bool(x)
                            step.run,
                            filter=lambda _, x: x is not None,
                            basedir=workflow_path,
                        ),
                        stream=step_file,
                        default_flow_style=False)
Beispiel #2
0
def rerun(client, revision, roots, siblings, inputs, paths):
    """Recreate files generated by a sequence of ``run`` commands."""
    graph = Graph(client)
    outputs = graph.build(paths=paths, revision=revision)

    # Check or extend siblings of outputs.
    outputs = siblings(graph, outputs)
    output_paths = {node.path for node in outputs}

    # Normalize and check all starting paths.
    roots = {graph.normalize_path(root) for root in roots}
    assert not roots & output_paths, "--from colides with output paths"

    # Generate workflow and check inputs.
    # NOTE The workflow creation is done before opening a new file.
    workflow = inputs(
        client,
        graph.ascwl(
            input_paths=roots,
            output_paths=output_paths,
            outputs=outputs,
        )
    )

    # Make sure all inputs are pulled from a storage.
    client.pull_paths_from_storage(
        *(path for _, path in workflow.iter_input_files(client.workflow_path))
    )

    # Store the generated workflow used for updating paths.
    import yaml

    output_file = client.workflow_path / '{0}.cwl'.format(uuid.uuid4().hex)
    with output_file.open('w') as f:
        f.write(
            yaml.dump(
                ascwl(
                    workflow,
                    filter=lambda _, x: x is not None,
                    basedir=client.workflow_path,
                ),
                default_flow_style=False
            )
        )

    # Execute the workflow and relocate all output files.
    from ._cwl import execute
    # FIXME get new output paths for edited tools
    # output_paths = {path for _, path in workflow.iter_output_files()}
    execute(
        client,
        output_file,
        output_paths=output_paths,
    )
Beispiel #3
0
def update(ctx, client, revision, siblings, paths):
    """Update existing files by rerunning their outdated workflow."""
    graph = Graph(client)
    status = graph.build_status(revision=revision)
    paths = {graph.normalize_path(path) for path in paths} \
        if paths else status['outdated'].keys()
    outputs = {graph.add_file(path, revision=revision) for path in paths}

    if not outputs:
        click.secho(
            'All files were generated from the latest inputs.', fg='green'
        )
        sys.exit(0)

    # Check or extend siblings of outputs.
    outputs = siblings(graph, outputs)
    output_paths = {path for _, path in outputs}

    # Get parents of all clean nodes
    import networkx as nx

    clean_paths = set(status['up-to-date'].keys()) - output_paths
    clean_nodes = {(c, p) for (c, p) in graph.G if p in clean_paths}
    clean_parents = set()
    for key in clean_nodes:
        clean_parents |= nx.ancestors(graph.G, key)

    subnodes = set()
    for key in outputs:
        if key in graph.G:
            subnodes |= nx.shortest_path_length(graph.G, target=key).keys()

    graph.G.remove_nodes_from(clean_parents)
    graph.G.remove_nodes_from([n for n in graph.G if n not in subnodes])

    # Store the generated workflow used for updating paths.
    import yaml

    output_file = client.workflow_path / '{0}.cwl'.format(uuid.uuid4().hex)
    with output_file.open('w') as f:
        f.write(
            yaml.dump(
                ascwl(
                    graph.ascwl(global_step_outputs=True),
                    filter=lambda _, x: x is not None and x != [],
                    basedir=client.workflow_path,
                ),
                default_flow_style=False
            )
        )

    from ._cwl import execute
    execute(client, output_file, output_paths=output_paths)
Beispiel #4
0
def create(client, output_file, revision, paths):
    """Create a workflow description for a file."""
    graph = Graph(client)
    outputs = graph.build(paths=paths, revision=revision)

    output_file.write(
        yaml.dump(ascwl(
            graph.ascwl(outputs=outputs),
            filter=lambda _, x: x is not None and x != [],
            basedir=os.path.dirname(getattr(output_file, 'name', '.')) or '.',
        ),
                  default_flow_style=False))
Beispiel #5
0
def update(client, revision, no_output, siblings, paths):
    """Update existing files by rerunning their outdated workflow."""
    graph = Graph(client)
    outputs = graph.build(revision=revision, can_be_cwl=no_output, paths=paths)
    outputs = {node for node in outputs if graph.need_update(node)}

    if not outputs:
        click.secho(
            'All files were generated from the latest inputs.', fg='green'
        )
        sys.exit(0)

    # Check or extend siblings of outputs.
    outputs = siblings(graph, outputs)
    output_paths = {node.path for node in outputs if _safe_path(node.path)}

    # Get all clean nodes.
    input_paths = {node.path for node in graph.nodes} - output_paths

    # Store the generated workflow used for updating paths.
    import yaml

    output_file = client.workflow_path / '{0}.cwl'.format(uuid.uuid4().hex)
    workflow = graph.ascwl(
        input_paths=input_paths,
        output_paths=output_paths,
        outputs=outputs,
    )

    # Don't compute paths if storage is disabled.
    if client.has_external_storage:
        # Make sure all inputs are pulled from a storage.
        paths_ = (
            path
            for _, path in workflow.iter_input_files(client.workflow_path)
        )
        client.pull_paths_from_storage(*paths_)

    with output_file.open('w') as f:
        f.write(
            yaml.dump(
                ascwl(
                    workflow,
                    filter=lambda _, x: x is not None,
                    basedir=client.workflow_path,
                ),
                default_flow_style=False
            )
        )

    from ._cwl import execute
    execute(client, output_file, output_paths=output_paths)
Beispiel #6
0
def rerun(ctx, client, revision, roots, siblings, paths):
    """Update existing files by rerunning their outdated workflow."""
    graph = Graph(client)
    outputs = {
        graph.add_file(graph.normalize_path(path), revision=revision)
        for path in paths
    }

    # Check or extend siblings of outputs.
    outputs = siblings(graph, outputs)
    output_paths = {path for _, path in outputs}

    # Get parents of all new roots
    import networkx as nx

    roots = {graph.normalize_path(root) for root in roots}
    assert not roots & output_paths, "--from colides with output paths"

    clean_nodes = {(c, p) for (c, p) in graph.G if p in roots}
    clean_parents = set()
    for key in clean_nodes:
        clean_parents |= nx.ancestors(graph.G, key)

    subnodes = set()
    for key in outputs:
        if key in graph.G:
            subnodes |= nx.shortest_path_length(graph.G, target=key).keys()

    graph.G.remove_nodes_from(clean_parents)
    graph.G.remove_nodes_from([n for n in graph.G if n not in subnodes])

    # Store the generated workflow used for updating paths.
    import yaml

    output_file = client.workflow_path / '{0}.cwl'.format(uuid.uuid4().hex)
    with output_file.open('w') as f:
        f.write(
            yaml.dump(ascwl(
                graph.ascwl(global_step_outputs=True),
                filter=lambda _, x: x is not None and x != [],
                basedir=client.workflow_path,
            ),
                      default_flow_style=False))

    from ._cwl import execute
    execute(client, output_file, output_paths=output_paths)
Beispiel #7
0
def rerun(ctx, client, revision, roots, siblings, inputs, paths):
    """Update existing files by rerunning their outdated workflow."""
    graph = Graph(client)
    paths = [graph.normalize_path(path) for path in paths]
    outputs = graph.build(paths=paths, revision=revision)

    # Check or extend siblings of outputs.
    outputs = siblings(graph, outputs)
    output_paths = {node.path for node in outputs}

    # Normalize and check all starting paths.
    roots = {graph.normalize_path(root) for root in roots}
    assert not roots & output_paths, "--from colides with output paths"

    # Generate workflow and check inputs.
    # NOTE The workflow creation is done before opening a new file.
    workflow = inputs(
        client,
        graph.ascwl(
            input_paths=roots,
            output_paths=output_paths,
            outputs=outputs,
        ))

    # Store the generated workflow used for updating paths.
    import yaml

    output_file = client.workflow_path / '{0}.cwl'.format(uuid.uuid4().hex)
    with output_file.open('w') as f:
        f.write(
            yaml.dump(ascwl(
                workflow,
                filter=lambda _, x: x is not None,
                basedir=client.workflow_path,
            ),
                      default_flow_style=False))

    # Execute the workflow and relocate all output files.
    from ._cwl import execute
    output_paths = {path for _, path in workflow.iter_output_files()}
    execute(
        client,
        output_file,
        output_paths=output_paths,
    )
Beispiel #8
0
def update(ctx, client, revision, paths):
    """Update existing files by rerunning their outdated workflow."""
    graph = Graph(client)

    status = graph.build_status(revision=revision)

    if not paths:
        outputs = {
            graph.add_file(path, revision=revision)
            for path in status['outdated']
        }
    else:
        outputs = {graph.add_file(path, revision=revision) for path in paths}

    # Get parents of all clean nodes
    clean_paths = status['up-to-date'].keys()
    clean_nodes = {(c, p) for (c, p) in graph.G if p in clean_paths}
    clean_parents = set()
    for key in clean_nodes:
        clean_parents |= nx.ancestors(graph.G, key)

    subnodes = set()
    for key in outputs:
        if key in graph.G:
            subnodes |= nx.shortest_path_length(graph.G, target=key).keys()

    graph.G.remove_nodes_from(clean_parents)
    graph.G.remove_nodes_from([n for n in graph.G if n not in subnodes])

    output_file = client.workflow_path / '{0}.cwl'.format(uuid.uuid4().hex)
    with open(output_file, 'w') as f:
        f.write(
            yaml.dump(ascwl(
                graph.ascwl(global_step_outputs=True),
                filter=lambda _, x: x is not None and x != [],
                basedir=client.workflow_path,
            ),
                      default_flow_style=False))

    # TODO remove existing outputs?
    call(['cwl-runner', str(output_file)])