def with_workflow_storage(self): """Yield a workflow storage.""" with self.lock: from renku.models.cwl._ascwl import ascwl from renku.models.cwl.workflow import Workflow workflow = Workflow() yield workflow for step in workflow.steps: step_name = '{0}_{1}.cwl'.format( uuid.uuid4().hex, secure_filename('_'.join(step.run.baseCommand)), ) workflow_path = self.workflow_path if not workflow_path.exists(): workflow_path.mkdir() step_path = workflow_path / step_name with step_path.open('w') as step_file: yaml.dump( ascwl( # filter=lambda _, x: not (x is False or bool(x) step.run, filter=lambda _, x: x is not None, basedir=workflow_path, ), stream=step_file, default_flow_style=False)
def rerun(client, revision, roots, siblings, inputs, paths): """Recreate files generated by a sequence of ``run`` commands.""" graph = Graph(client) outputs = graph.build(paths=paths, revision=revision) # Check or extend siblings of outputs. outputs = siblings(graph, outputs) output_paths = {node.path for node in outputs} # Normalize and check all starting paths. roots = {graph.normalize_path(root) for root in roots} assert not roots & output_paths, "--from colides with output paths" # Generate workflow and check inputs. # NOTE The workflow creation is done before opening a new file. workflow = inputs( client, graph.ascwl( input_paths=roots, output_paths=output_paths, outputs=outputs, ) ) # Make sure all inputs are pulled from a storage. client.pull_paths_from_storage( *(path for _, path in workflow.iter_input_files(client.workflow_path)) ) # Store the generated workflow used for updating paths. import yaml output_file = client.workflow_path / '{0}.cwl'.format(uuid.uuid4().hex) with output_file.open('w') as f: f.write( yaml.dump( ascwl( workflow, filter=lambda _, x: x is not None, basedir=client.workflow_path, ), default_flow_style=False ) ) # Execute the workflow and relocate all output files. from ._cwl import execute # FIXME get new output paths for edited tools # output_paths = {path for _, path in workflow.iter_output_files()} execute( client, output_file, output_paths=output_paths, )
def update(ctx, client, revision, siblings, paths): """Update existing files by rerunning their outdated workflow.""" graph = Graph(client) status = graph.build_status(revision=revision) paths = {graph.normalize_path(path) for path in paths} \ if paths else status['outdated'].keys() outputs = {graph.add_file(path, revision=revision) for path in paths} if not outputs: click.secho( 'All files were generated from the latest inputs.', fg='green' ) sys.exit(0) # Check or extend siblings of outputs. outputs = siblings(graph, outputs) output_paths = {path for _, path in outputs} # Get parents of all clean nodes import networkx as nx clean_paths = set(status['up-to-date'].keys()) - output_paths clean_nodes = {(c, p) for (c, p) in graph.G if p in clean_paths} clean_parents = set() for key in clean_nodes: clean_parents |= nx.ancestors(graph.G, key) subnodes = set() for key in outputs: if key in graph.G: subnodes |= nx.shortest_path_length(graph.G, target=key).keys() graph.G.remove_nodes_from(clean_parents) graph.G.remove_nodes_from([n for n in graph.G if n not in subnodes]) # Store the generated workflow used for updating paths. import yaml output_file = client.workflow_path / '{0}.cwl'.format(uuid.uuid4().hex) with output_file.open('w') as f: f.write( yaml.dump( ascwl( graph.ascwl(global_step_outputs=True), filter=lambda _, x: x is not None and x != [], basedir=client.workflow_path, ), default_flow_style=False ) ) from ._cwl import execute execute(client, output_file, output_paths=output_paths)
def create(client, output_file, revision, paths): """Create a workflow description for a file.""" graph = Graph(client) outputs = graph.build(paths=paths, revision=revision) output_file.write( yaml.dump(ascwl( graph.ascwl(outputs=outputs), filter=lambda _, x: x is not None and x != [], basedir=os.path.dirname(getattr(output_file, 'name', '.')) or '.', ), default_flow_style=False))
def update(client, revision, no_output, siblings, paths): """Update existing files by rerunning their outdated workflow.""" graph = Graph(client) outputs = graph.build(revision=revision, can_be_cwl=no_output, paths=paths) outputs = {node for node in outputs if graph.need_update(node)} if not outputs: click.secho( 'All files were generated from the latest inputs.', fg='green' ) sys.exit(0) # Check or extend siblings of outputs. outputs = siblings(graph, outputs) output_paths = {node.path for node in outputs if _safe_path(node.path)} # Get all clean nodes. input_paths = {node.path for node in graph.nodes} - output_paths # Store the generated workflow used for updating paths. import yaml output_file = client.workflow_path / '{0}.cwl'.format(uuid.uuid4().hex) workflow = graph.ascwl( input_paths=input_paths, output_paths=output_paths, outputs=outputs, ) # Don't compute paths if storage is disabled. if client.has_external_storage: # Make sure all inputs are pulled from a storage. paths_ = ( path for _, path in workflow.iter_input_files(client.workflow_path) ) client.pull_paths_from_storage(*paths_) with output_file.open('w') as f: f.write( yaml.dump( ascwl( workflow, filter=lambda _, x: x is not None, basedir=client.workflow_path, ), default_flow_style=False ) ) from ._cwl import execute execute(client, output_file, output_paths=output_paths)
def rerun(ctx, client, revision, roots, siblings, paths): """Update existing files by rerunning their outdated workflow.""" graph = Graph(client) outputs = { graph.add_file(graph.normalize_path(path), revision=revision) for path in paths } # Check or extend siblings of outputs. outputs = siblings(graph, outputs) output_paths = {path for _, path in outputs} # Get parents of all new roots import networkx as nx roots = {graph.normalize_path(root) for root in roots} assert not roots & output_paths, "--from colides with output paths" clean_nodes = {(c, p) for (c, p) in graph.G if p in roots} clean_parents = set() for key in clean_nodes: clean_parents |= nx.ancestors(graph.G, key) subnodes = set() for key in outputs: if key in graph.G: subnodes |= nx.shortest_path_length(graph.G, target=key).keys() graph.G.remove_nodes_from(clean_parents) graph.G.remove_nodes_from([n for n in graph.G if n not in subnodes]) # Store the generated workflow used for updating paths. import yaml output_file = client.workflow_path / '{0}.cwl'.format(uuid.uuid4().hex) with output_file.open('w') as f: f.write( yaml.dump(ascwl( graph.ascwl(global_step_outputs=True), filter=lambda _, x: x is not None and x != [], basedir=client.workflow_path, ), default_flow_style=False)) from ._cwl import execute execute(client, output_file, output_paths=output_paths)
def rerun(ctx, client, revision, roots, siblings, inputs, paths): """Update existing files by rerunning their outdated workflow.""" graph = Graph(client) paths = [graph.normalize_path(path) for path in paths] outputs = graph.build(paths=paths, revision=revision) # Check or extend siblings of outputs. outputs = siblings(graph, outputs) output_paths = {node.path for node in outputs} # Normalize and check all starting paths. roots = {graph.normalize_path(root) for root in roots} assert not roots & output_paths, "--from colides with output paths" # Generate workflow and check inputs. # NOTE The workflow creation is done before opening a new file. workflow = inputs( client, graph.ascwl( input_paths=roots, output_paths=output_paths, outputs=outputs, )) # Store the generated workflow used for updating paths. import yaml output_file = client.workflow_path / '{0}.cwl'.format(uuid.uuid4().hex) with output_file.open('w') as f: f.write( yaml.dump(ascwl( workflow, filter=lambda _, x: x is not None, basedir=client.workflow_path, ), default_flow_style=False)) # Execute the workflow and relocate all output files. from ._cwl import execute output_paths = {path for _, path in workflow.iter_output_files()} execute( client, output_file, output_paths=output_paths, )
def update(ctx, client, revision, paths): """Update existing files by rerunning their outdated workflow.""" graph = Graph(client) status = graph.build_status(revision=revision) if not paths: outputs = { graph.add_file(path, revision=revision) for path in status['outdated'] } else: outputs = {graph.add_file(path, revision=revision) for path in paths} # Get parents of all clean nodes clean_paths = status['up-to-date'].keys() clean_nodes = {(c, p) for (c, p) in graph.G if p in clean_paths} clean_parents = set() for key in clean_nodes: clean_parents |= nx.ancestors(graph.G, key) subnodes = set() for key in outputs: if key in graph.G: subnodes |= nx.shortest_path_length(graph.G, target=key).keys() graph.G.remove_nodes_from(clean_parents) graph.G.remove_nodes_from([n for n in graph.G if n not in subnodes]) output_file = client.workflow_path / '{0}.cwl'.format(uuid.uuid4().hex) with open(output_file, 'w') as f: f.write( yaml.dump(ascwl( graph.ascwl(global_step_outputs=True), filter=lambda _, x: x is not None and x != [], basedir=client.workflow_path, ), default_flow_style=False)) # TODO remove existing outputs? call(['cwl-runner', str(output_file)])