def update(client, revision, no_output, siblings, paths): """Update existing files by rerunning their outdated workflow.""" graph = Graph(client) outputs = graph.build(revision=revision, can_be_cwl=no_output, paths=paths) outputs = {node for node in outputs if graph.need_update(node)} if not outputs: click.secho("All files were generated from the latest inputs.", fg="green") sys.exit(0) # Check or extend siblings of outputs. outputs = siblings(graph, outputs) output_paths = {node.path for node in outputs if _safe_path(node.path)} # Get all clean nodes. input_paths = {node.path for node in graph.nodes} - output_paths # Store the generated workflow used for updating paths. workflow = graph.as_workflow(input_paths=input_paths, output_paths=output_paths, outputs=outputs,) wf, path = CWLConverter.convert(workflow, client) # Don't compute paths if storage is disabled. if client.check_external_storage(): # Make sure all inputs are pulled from a storage. paths_ = (i.consumes.path for i in workflow.inputs) client.pull_paths_from_storage(*paths_) execute(client, path, output_paths=output_paths) paths = [o.produces.path for o in workflow.outputs] client.repo.git.add(*paths) if client.repo.is_dirty(): commit_msg = "renku update: committing {} newly added files".format(len(paths)) committer = Actor("renku {0}".format(__version__), version_url) client.repo.index.commit( commit_msg, committer=committer, skip_hooks=True, ) workflow_name = "{0}_update.yaml".format(uuid.uuid4().hex) path = client.workflow_path / workflow_name workflow.update_id_and_label_from_commit_path(client, client.repo.head.commit, path) with with_reference(path): cls = WorkflowRun if workflow.subprocesses else ProcessRun run = cls.from_run(run=workflow, client=client, path=path, update_commits=True) run.to_yaml() client.add_to_activity_index(run)
def update(client, revision, no_output, siblings, paths): """Update existing files by rerunning their outdated workflow.""" graph = Graph(client) outputs = graph.build(revision=revision, can_be_cwl=no_output, paths=paths) outputs = {node for node in outputs if graph.need_update(node)} if not outputs: click.secho('All files were generated from the latest inputs.', fg='green') sys.exit(0) # Check or extend siblings of outputs. outputs = siblings(graph, outputs) output_paths = {node.path for node in outputs if _safe_path(node.path)} # Get all clean nodes. input_paths = {node.path for node in graph.nodes} - output_paths # Store the generated workflow used for updating paths. import yaml output_file = client.workflow_path / '{0}.cwl'.format(uuid.uuid4().hex) workflow = graph.ascwl( input_paths=input_paths, output_paths=output_paths, outputs=outputs, ) # Don't compute paths if storage is disabled. if client.has_external_storage: # Make sure all inputs are pulled from a storage. paths_ = ( path for _, path in workflow.iter_input_files(client.workflow_path)) client.pull_paths_from_storage(*paths_) with output_file.open('w') as f: f.write( yaml.dump(ascwl( workflow, filter=lambda _, x: x is not None, basedir=client.workflow_path, ), default_flow_style=False)) execute(client, output_file, output_paths=output_paths)
def watch(self, client, no_output=False): """Watch a Renku repository for changes to detect outputs.""" tool = self.generate_tool() repo = client.repo # Remove indirect files list if any self.delete_indirect_files_list() # NOTE consider to use git index instead existing_directories = { str(p.relative_to(client.path)) for p in client.path.glob('**/') } from renku.core.plugins.pluginmanager import get_plugin_manager pm = get_plugin_manager() pm.hook.pre_run(tool=tool) yield tool if repo: # Include indirect inputs and outputs before further processing self.add_indirect_inputs() self.add_indirect_outputs() # Remove indirect files list if any self.delete_indirect_files_list() # List of all output paths. paths = [] inputs = {input.id: input for input in self.inputs} outputs = list(tool.outputs) # Keep track of unmodified output files. unmodified = set() # Calculate possible output paths. # Capture newly created files through redirects. candidates = {file_ for file_ in repo.untracked_files} # Capture modified files through redirects. candidates |= { o.a_path for o in repo.index.diff(None) if not o.deleted_file } from renku.core.commands.graph import _safe_path candidates = {path for path in candidates if _safe_path(path)} for output, input, path in self.guess_outputs(candidates): outputs.append(output) paths.append(path) if input is not None: if input.id not in inputs: # pragma: no cover raise RuntimeError('Inconsistent input name.') inputs[input.id] = input for stream_name in ('stdout', 'stderr'): stream = getattr(self, stream_name) if ( stream and stream not in candidates and Path(os.path.abspath(stream)) not in self.explicit_outputs ): unmodified.add(stream) elif stream: paths.append(stream) if self.explicit_outputs: last_output_id = len(outputs) for output, input, path in self.find_explicit_outputs( last_output_id ): outputs.append(output) paths.append(path) if input is not None: if input.id not in inputs: # pragma: no cover raise RuntimeError('Inconsistent input name.') inputs[input.id] = input if unmodified: raise errors.UnmodifiedOutputs(repo, unmodified) if not no_output and not paths: raise errors.OutputsNotFound(repo, inputs.values()) if client.has_external_storage: client.track_paths_in_storage(*paths) tool.inputs = list(inputs.values()) tool.outputs = outputs # Requirement detection can be done anytime. from .process_requirements import InitialWorkDirRequirement, \ InlineJavascriptRequirement initial_work_dir_requirement = InitialWorkDirRequirement.from_tool( tool, existing_directories=existing_directories, working_dir=self.working_dir ) if initial_work_dir_requirement: tool.requirements.extend([ InlineJavascriptRequirement(), initial_work_dir_requirement, ]) results = pm.hook.cmdline_tool_annotations(tool=tool) tool.annotations = [a for r in results for a in r]
def watch(self, client, no_output=False): """Watch a Renku repository for changes to detect outputs.""" client.check_external_storage() repo = client.repo # Remove indirect files list if any self.delete_indirect_files_list() from renku.core.plugins.pluginmanager import get_plugin_manager pm = get_plugin_manager() pm.hook.pre_run(tool=self) self.existing_directories = { str(p.relative_to(client.path)) for p in client.path.glob('**/') } yield self if repo: # Include indirect inputs and outputs before further processing self.add_indirect_inputs() self.add_indirect_outputs() # Remove indirect files list if any self.delete_indirect_files_list() # List of all output paths. paths = [] inputs = {input.id: input for input in self.inputs} outputs = list(self.outputs) # Keep track of unmodified output files. unmodified = set() # Calculate possible output paths. # Capture newly created files through redirects. candidates = {file_ for file_ in repo.untracked_files} # Capture modified files through redirects. candidates |= { o.a_path for o in repo.index.diff(None) if not o.deleted_file } from renku.core.commands.graph import _safe_path candidates = {path for path in candidates if _safe_path(path)} for output, input, path in self.guess_outputs(candidates): outputs.append(output) paths.append(path) if input is not None: if input.id not in inputs: # pragma: no cover raise RuntimeError('Inconsistent input name.') inputs[input.id] = input for stream_name in ('stdout', 'stderr'): stream = getattr(self, stream_name) if ( stream and stream not in candidates and Path(os.path.abspath(stream)) not in self.explicit_outputs ): unmodified.add(stream) elif stream: paths.append(stream) if self.explicit_outputs: last_output_id = len(outputs) for output, input, path in self.find_explicit_outputs( last_output_id ): outputs.append(output) paths.append(path) if input is not None: if input.id not in inputs: # pragma: no cover raise RuntimeError('Inconsistent input name.') inputs[input.id] = input # remove outputs covered by explicit outputs output_glob = output.outputBinding.glob for input_id, input in inputs.items(): if ( input.type == 'string' and input.default.startswith(output_glob) ): input_glob = '$(inputs.{})'.format(input_id) existing_output = next( o for o in outputs if o.outputBinding.glob == input_glob ) if existing_output: outputs.remove(existing_output) if unmodified: raise errors.UnmodifiedOutputs(repo, unmodified) if not no_output and not paths: raise errors.OutputsNotFound(repo, inputs.values()) if client.check_external_storage(): lfs_paths = client.track_paths_in_storage(*paths) show_message = client.get_value('renku', 'show_lfs_message') if ( lfs_paths and (show_message is None or show_message == 'True') ): self.messages = ( INFO + 'Adding these files to Git LFS:\n' + '\t{}'.format('\n\t'.join(lfs_paths)) + '\nTo disable this message in the future, run:' + '\n\trenku config show_lfs_message False' ) repo.git.add(*paths) if repo.is_dirty(): commit_msg = ('renku run: ' 'committing {} newly added files').format( len(paths) ) committer = Actor('renku {0}'.format(__version__), version_url) repo.index.commit( commit_msg, committer=committer, skip_hooks=True, ) self._had_changes = True self.inputs = list(inputs.values()) self.outputs = outputs results = pm.hook.cmdline_tool_annotations(tool=self) self.annotations = [a for r in results for a in r]
def watch(self, client, no_output=False): """Watch a Renku repository for changes to detect outputs.""" client.check_external_storage() repo = client.repo # Remove indirect files list if any self.delete_indirect_files_list() from renku.core.plugins.pluginmanager import get_plugin_manager pm = get_plugin_manager() pm.hook.pre_run(tool=self) self.existing_directories = { str(p.relative_to(client.path)) for p in client.path.glob("**/") } yield self if repo: # Include indirect inputs and outputs before further processing self.add_indirect_inputs() self.add_indirect_outputs() # Remove indirect files list if any self.delete_indirect_files_list() # List of all output paths. output_paths = [] inputs = {input.id: input for input in self.inputs} outputs = list(self.outputs) # Keep track of unmodified output files. unmodified = set() candidates = set() if not self.no_output_detection: # Calculate possible output paths. # Capture newly created files through redirects. candidates |= {file_ for file_ in repo.untracked_files} # Capture modified files through redirects. candidates |= { o.a_path for o in repo.index.diff(None) if not o.deleted_file } # Include explicit outputs candidates |= { str(path.relative_to(self.working_dir)) for path in self.explicit_outputs } from renku.core.commands.graph import _safe_path candidates = {path for path in candidates if _safe_path(path)} for output, input, path in self.guess_outputs(candidates): outputs.append(output) output_paths.append(path) if input is not None: if input.id not in inputs: # pragma: no cover raise RuntimeError("Inconsistent input name.") inputs[input.id] = input for stream_name in ("stdout", "stderr"): stream = getattr(self, stream_name) if stream and stream not in candidates and Path( os.path.abspath(stream)) not in self.explicit_outputs: unmodified.add(stream) elif stream: output_paths.append(stream) if unmodified: raise errors.UnmodifiedOutputs(repo, unmodified) if not no_output and not output_paths: raise errors.OutputsNotFound(repo, inputs.values()) if client.check_external_storage(): lfs_paths = client.track_paths_in_storage(*output_paths) show_message = client.get_value("renku", "show_lfs_message") if lfs_paths and (show_message is None or show_message == "True"): self.messages = ( INFO + "Adding these files to Git LFS:\n" + "\t{}".format("\n\t".join(lfs_paths)) + "\nTo disable this message in the future, run:" + "\n\trenku config show_lfs_message False") repo.git.add(*output_paths) if repo.is_dirty(): commit_msg = ("renku run: " "committing {} newly added files").format( len(output_paths)) committer = Actor("renku {0}".format(__version__), version_url) repo.index.commit( commit_msg, committer=committer, skip_hooks=True, ) self._had_changes = True self.inputs = list(inputs.values()) self.outputs = outputs results = pm.hook.cmdline_tool_annotations(tool=self) self.annotations = [a for r in results for a in r]