def inputs(ctx, client, revision, paths): r"""Show inputs files in the repository. <PATHS> Files to show. If no files are given all input files are shown. """ from renku.core.models.provenance.activities import ProcessRun graph = Graph(client) paths = set(paths) nodes = graph.build(revision=revision) commits = {node.commit for node in nodes} candidates = {(node.commit, node.path) for node in nodes if not paths or node.path in paths} input_paths = set() for commit in commits: activity = graph.activities[commit] if isinstance(activity, ProcessRun): for usage in activity.qualified_usage: for entity in usage.entity.entities: path = str((usage.client.path / entity.path).relative_to( client.path )) usage_key = (entity.commit, entity.path) if path not in input_paths and usage_key in candidates: input_paths.add(path) click.echo('\n'.join(graph._format_path(path) for path in input_paths)) ctx.exit(0 if not paths or len(input_paths) == len(paths) else 1)
def siblings(client, revision, paths): """Show siblings for given paths.""" graph = Graph(client) nodes = graph.build(paths=paths, revision=revision) siblings_ = set(nodes) for node in nodes: siblings_ |= graph.siblings(node) paths = {node.path for node in siblings_} for path in paths: click.echo(graph._format_path(path))
def log(client, revision, format, no_output, strict, paths): """Show logs for a file.""" graph = Graph(client) if not paths: start, is_range, stop = revision.partition('..') if not is_range: stop = start elif not stop: stop = 'HEAD' commit = client.repo.rev_parse(stop) paths = ( str(client.path / item.a_path) for item in commit.diff(commit.parents or NULL_TREE) # if not item.deleted_file ) # NOTE shall we warn when "not no_output and not paths"? graph.build(paths=paths, revision=revision, can_be_cwl=no_output) FORMATS[format](graph, strict=strict)
def create(client, output_file, revision, paths): """Create a workflow description for a file.""" graph = Graph(client) outputs = graph.build(paths=paths, revision=revision) output_file.write( yaml.dump(ascwl( graph.ascwl(outputs=outputs), filter=lambda _, x: x is not None and x != [], basedir=os.path.dirname(getattr(output_file, 'name', '.')) or '.', ), default_flow_style=False))
def _graph(client, revision, paths): if PG_AVAILABLE: provenance_graph = ProvenanceGraph.from_json( client.provenance_graph_path) provenance_graph.custom_bindings = [ ("mls", "http://www.w3.org/ns/mls#"), ("oa", "http://www.w3.org/ns/oa#"), ("xsd", "http://www.w3.org/2001/XMLSchema#") ] return provenance_graph renku_graph = Graph(client) renku_graph.build(paths=paths, revision=revision) cg = _conjunctive_graph(renku_graph) cg.bind("mls", "http://www.w3.org/ns/mls#") cg.bind("prov", "http://www.w3.org/ns/prov#") cg.bind("oa", "http://www.w3.org/ns/oa#") cg.bind("schema", "http://schema.org/") cg.bind("xsd", "http://www.w3.org/2001/XMLSchema#") return cg
def rerun(client, revision, roots, siblings, inputs, paths): """Recreate files generated by a sequence of ``run`` commands.""" graph = Graph(client) outputs = graph.build(paths=paths, revision=revision) # Check or extend siblings of outputs. outputs = siblings(graph, outputs) output_paths = {node.path for node in outputs} # Normalize and check all starting paths. roots = {graph.normalize_path(root) for root in roots} assert not roots & output_paths, '--from colides with output paths' # Generate workflow and check inputs. # NOTE The workflow creation is done before opening a new file. workflow = inputs( client, graph.ascwl( input_paths=roots, output_paths=output_paths, outputs=outputs, )) # Don't compute paths if storage is disabled. if client.has_external_storage: # Make sure all inputs are pulled from a storage. paths_ = ( path for _, path in workflow.iter_input_files(client.workflow_path)) client.pull_paths_from_storage(*paths_) # Store the generated workflow used for updating paths. import yaml output_file = client.workflow_path / '{0}.cwl'.format(uuid.uuid4().hex) with output_file.open('w') as f: f.write( yaml.dump(ascwl( workflow, filter=lambda _, x: x is not None, basedir=client.workflow_path, ), default_flow_style=False)) # Execute the workflow and relocate all output files. # FIXME get new output paths for edited tools # output_paths = {path for _, path in workflow.iter_output_files()} execute( client, output_file, output_paths=output_paths, )
def update(client, revision, no_output, siblings, paths): """Update existing files by rerunning their outdated workflow.""" graph = Graph(client) outputs = graph.build(revision=revision, can_be_cwl=no_output, paths=paths) outputs = {node for node in outputs if graph.need_update(node)} if not outputs: click.secho("All files were generated from the latest inputs.", fg="green") sys.exit(0) # Check or extend siblings of outputs. outputs = siblings(graph, outputs) output_paths = {node.path for node in outputs if _safe_path(node.path)} # Get all clean nodes. input_paths = {node.path for node in graph.nodes} - output_paths # Store the generated workflow used for updating paths. workflow = graph.as_workflow(input_paths=input_paths, output_paths=output_paths, outputs=outputs,) wf, path = CWLConverter.convert(workflow, client) # Don't compute paths if storage is disabled. if client.check_external_storage(): # Make sure all inputs are pulled from a storage. paths_ = (i.consumes.path for i in workflow.inputs) client.pull_paths_from_storage(*paths_) execute(client, path, output_paths=output_paths) paths = [o.produces.path for o in workflow.outputs] client.repo.git.add(*paths) if client.repo.is_dirty(): commit_msg = "renku update: committing {} newly added files".format(len(paths)) committer = Actor("renku {0}".format(__version__), version_url) client.repo.index.commit( commit_msg, committer=committer, skip_hooks=True, ) workflow_name = "{0}_update.yaml".format(uuid.uuid4().hex) path = client.workflow_path / workflow_name workflow.update_id_and_label_from_commit_path(client, client.repo.head.commit, path) with with_reference(path): cls = WorkflowRun if workflow.subprocesses else ProcessRun run = cls.from_run(run=workflow, client=client, path=path, update_commits=True) run.to_yaml() client.add_to_activity_index(run)
def create(client, output_file, revision, paths): """Create a workflow description for a file.""" graph = Graph(client) outputs = graph.build(paths=paths, revision=revision) workflow = graph.as_workflow(outputs=outputs, ) if output_file: output_file = Path(output_file) wf, path = CWLConverter.convert(workflow, client, path=output_file) if not output_file: click.echo(wf.export_string())
def update(client, revision, no_output, siblings, paths): """Update existing files by rerunning their outdated workflow.""" graph = Graph(client) outputs = graph.build(revision=revision, can_be_cwl=no_output, paths=paths) outputs = {node for node in outputs if graph.need_update(node)} if not outputs: click.secho('All files were generated from the latest inputs.', fg='green') sys.exit(0) # Check or extend siblings of outputs. outputs = siblings(graph, outputs) output_paths = {node.path for node in outputs if _safe_path(node.path)} # Get all clean nodes. input_paths = {node.path for node in graph.nodes} - output_paths # Store the generated workflow used for updating paths. import yaml output_file = client.workflow_path / '{0}.cwl'.format(uuid.uuid4().hex) workflow = graph.ascwl( input_paths=input_paths, output_paths=output_paths, outputs=outputs, ) # Don't compute paths if storage is disabled. if client.has_external_storage: # Make sure all inputs are pulled from a storage. paths_ = ( path for _, path in workflow.iter_input_files(client.workflow_path)) client.pull_paths_from_storage(*paths_) with output_file.open('w') as f: f.write( yaml.dump(ascwl( workflow, filter=lambda _, x: x is not None, basedir=client.workflow_path, ), default_flow_style=False)) execute(client, output_file, output_paths=output_paths)
def outputs(ctx, client, revision, paths): r"""Show output files in the repository. <PATHS> Files to show. If no files are given all output files are shown. """ graph = Graph(client) filter = graph.build(paths=paths, revision=revision) output_paths = graph.output_paths click.echo('\n'.join(graph._format_path(path) for path in output_paths)) if paths: if not output_paths: ctx.exit(1) from renku.core.models.datastructures import DirectoryTree tree = DirectoryTree.from_list(item.path for item in filter) for output in output_paths: if tree.get(output) is None: ctx.exit(1) return
def outputs(ctx, client, revision, verbose, paths): r"""Show output files in the repository. <PATHS> Files to show. If no files are given all output files are shown. """ graph = Graph(client) filter_ = graph.build(paths=paths, revision=revision) output_paths = {} for activity in graph.activities.values(): if isinstance(activity, ProcessRun): for entity in activity.generated: if entity.path not in graph.output_paths: continue output_paths[entity.path] = Result( path=entity.path, commit=entity.commit, time=activity.ended_at_time, workflow=activity.path ) if not verbose: click.echo("\n".join(graph._format_path(path) for path in output_paths.keys())) else: records = list(output_paths.values()) records.sort(key=lambda v: v[0]) HEADERS["time"] = "generation time" click.echo(tabulate(collection=records, headers=HEADERS)) if paths: if not output_paths: ctx.exit(1) from renku.core.models.datastructures import DirectoryTree tree = DirectoryTree.from_list(item.path for item in filter_) for output in output_paths: if tree.get(output) is None: ctx.exit(1) return
def inputs(ctx, client, revision, verbose, paths): r"""Show inputs files in the repository. <PATHS> Files to show. If no files are given all input files are shown. """ graph = Graph(client) paths = set(paths) nodes = graph.build(revision=revision) commits = {node.activity.commit if hasattr(node, "activity") else node.commit for node in nodes} commits |= {node.activity.commit for node in nodes if hasattr(node, "activity")} candidates = {(node.commit, node.path) for node in nodes if not paths or node.path in paths} input_paths = {} for commit in commits: activity = graph.activities.get(commit) if not activity: continue if isinstance(activity, ProcessRun): for usage in activity.qualified_usage: for entity in usage.entity.entities: path = str((usage.client.path / entity.path).relative_to(client.path)) usage_key = (entity.commit, entity.path) if path not in input_paths and usage_key in candidates: input_paths[path] = Result( path=path, commit=entity.commit, time=activity.started_at_time, workflow=activity.path ) if not verbose: click.echo("\n".join(graph._format_path(path) for path in input_paths)) else: records = list(input_paths.values()) records.sort(key=lambda v: v[0]) HEADERS["time"] = "usage time" click.echo(tabulate(collection=records, headers=HEADERS)) ctx.exit(0 if not paths or len(input_paths) == len(paths) else 1)
def siblings(client, revision, flat, verbose, paths): """Show siblings for given paths.""" graph = Graph(client) nodes = graph.build(paths=paths, revision=revision) nodes = [n for n in nodes if not isinstance(n, Entity) or n.parent] sibling_sets = {frozenset([n]) for n in set(nodes)} for node in nodes: try: sibling_sets.add(frozenset(graph.siblings(node))) except (errors.InvalidOutputPath): # ignore nodes that aren't outputs if no path was supplied if paths: raise else: sibling_sets.discard({node}) result_sets = [] for candidate in sibling_sets: new_result = [] for result in result_sets: if candidate & result: candidate |= result else: new_result.append(result) result_sets = new_result result_sets.append(candidate) result = [[sibling_name(graph, node, verbose) for node in r] for r in result_sets] if flat: click.echo('\n'.join({n for r in result for n in r})) else: click.echo('\n---\n'.join('\n'.join(r) for r in result))
def rerun(client, revision, roots, siblings, inputs, paths): """Recreate files generated by a sequence of ``run`` commands.""" graph = Graph(client) outputs = graph.build(paths=paths, revision=revision) # Check or extend siblings of outputs. outputs = siblings(graph, outputs) output_paths = {node.path for node in outputs} # Normalize and check all starting paths. roots = {graph.normalize_path(root) for root in roots} output_paths -= roots outputs = [o for o in outputs if o.path not in roots] # Generate workflow and check inputs. # NOTE The workflow creation is done before opening a new file. workflow = inputs( client, graph.as_workflow( input_paths=roots, output_paths=output_paths, outputs=outputs, ) ) wf, path = CWLConverter.convert(workflow, client) # Don't compute paths if storage is disabled. if client.check_external_storage(): # Make sure all inputs are pulled from a storage. paths_ = (i.consumes.path for i in workflow.inputs) client.pull_paths_from_storage(*paths_) # Execute the workflow and relocate all output files. # FIXME get new output paths for edited tools # output_paths = {path for _, path in workflow.iter_output_files()} execute( client, path, output_paths=output_paths, ) paths = [o.produces.path for o in workflow.outputs] client.repo.git.add(*paths) if client.repo.is_dirty(): commit_msg = ('renku rerun: ' 'committing {} newly added files').format(len(paths)) committer = Actor('renku {0}'.format(__version__), version_url) client.repo.index.commit( commit_msg, committer=committer, skip_hooks=True, ) workflow_name = '{0}_rerun.yaml'.format(uuid.uuid4().hex) path = client.workflow_path / workflow_name workflow.update_id_and_label_from_commit_path( client, client.repo.head.commit, path ) with with_reference(path): run = WorkflowRun.from_run(workflow, client, path) run.to_yaml() client.add_to_activity_index(run)