def siblings(self, node): """Return siblings for a given node. The key is part of the result set, hence to check if the node has siblings you should check the lenght is greater than 1. """ parent = None if isinstance(node, Entity): if not node.parent: return {node} parent_siblings = self.siblings(node.parent) - {node.parent} return set(node.parent.members) | parent_siblings elif isinstance(node, Generation): parent = node.activity elif isinstance(node, Usage): parent = self.activities[node.commit] elif isinstance(node, Process) or isinstance(node, Run): return {node} if parent is None or not isinstance(parent, ProcessRun): raise errors.InvalidOutputPath( 'The file "{0}" was not created by a renku command. \n\n' 'Check the file history using: git log --follow "{0}"'.format( node.path ) ) return set(parent.generated)
def find_explicit_outputs(self, starting_output_id): """Yield explicit output and changed command input parameter.""" inputs = { str(i.default.path.relative_to(self.working_dir)): i for i in self.inputs if i.type in PATH_OBJECTS } output_id = starting_output_id for path in self.explicit_outputs: if self.file_candidate(path) is None: raise errors.InvalidOutputPath( 'The output file or directory does not exist.' '\n\n\t' + click.style(str(path), fg='yellow') + '\n\n' ) output_path = str(path.relative_to(self.working_dir)) type = 'Directory' if path.is_dir() else 'File' if output_path in inputs: # change input type to note that it is also an output input = inputs[output_path] input = attr.evolve(input, type='string', default=output_path) yield ( CommandOutputParameter( id='output_{0}'.format(output_id), type=type, outputBinding=dict( glob='$(inputs.{0})'.format(input.id) ) ), input, output_path ) else: yield ( CommandOutputParameter( id='output_{0}'.format(output_id), type=type, outputBinding=dict(glob=str(output_path)) ), None, output_path ) output_id += 1
def guess_outputs(self, paths): """Yield detected output and changed command input parameter.""" # TODO what to do with duplicate paths & inputs with same defauts paths = list(paths) tree = DirectoryTree.from_list(paths) input_candidates = {} conflicting_paths = {} for index, input in enumerate(self.inputs): # Convert input defaults to paths relative to working directory. if input.type not in PATH_OBJECTS: try: path = self.directory / str(input.default) input_path = path.resolve().relative_to(self.working_dir) except FileNotFoundError: continue else: input_path = input.default.path.relative_to(self.working_dir) if input_path.is_dir() and tree.get(input_path): # The directory might exist before running the script subpaths = { str(input_path / path) for path in tree.get(input_path, default=[]) } absolute_path = os.path.abspath(input_path) if Path(absolute_path) not in self.explicit_outputs: content = { str(path) for path in input_path.rglob('*') if not path.is_dir() and path.name != '.gitkeep' } extra_paths = content - subpaths if extra_paths: raise errors.InvalidOutputPath( 'The output directory "{0}" is not empty. \n\n' 'Delete existing files before running the ' 'command:' '\n (use "git rm <file>..." to remove them ' 'first)' '\n\n'.format(input_path) + '\n'.join( '\t' + click.style(path, fg='yellow') for path in extra_paths ) + '\n\n' 'Once you have removed files that should be used ' 'as outputs,\n' 'you can safely rerun the previous command.' ) # Remove files from the input directory paths = [path for path in paths if path not in subpaths] # Include input path in the paths to check paths.append(str(input_path)) input_candidates[str(input_path)] = input elif input.type not in PATH_OBJECTS: # Input need to be changed if an output is detected input_candidates[str(input_path)] = input else: # Names that can not be outputs because they are already inputs conflicting_paths[str(input_path)] = (index, input) streams = { path for path in (getattr(self, name) for name in ('stdout', 'stderr')) if path is not None } # TODO group by a common prefix for position, path in enumerate(paths): if Path(os.path.abspath(path)) in self.explicit_outputs: del paths[position] for position, path in enumerate(paths): candidate = self.file_candidate(self.working_dir / path) if candidate is None: raise ValueError('Path "{0}" does not exist.'.format(path)) glob = str(candidate.relative_to(self.working_dir)) if glob in streams: continue new_input = None if glob in conflicting_paths: # it means that it is rewriting a file index, input = conflicting_paths[glob] new_input = attr.evolve(input, type='string', default=glob) input_candidates[glob] = new_input del conflicting_paths[glob] # TODO add warning ('Output already exists in inputs.') candidate_type = 'Directory' if candidate.is_dir() else 'File' if glob in input_candidates: input = input_candidates[glob] if new_input is None: new_input = input_candidates[glob] = attr.evolve( input, type='string', default=glob ) yield ( CommandOutputParameter( id='output_{0}'.format(position), type=candidate_type, outputBinding=dict( glob='$(inputs.{0})'.format(input.id), ), ), new_input, glob ) else: yield ( CommandOutputParameter( id='output_{0}'.format(position), type=candidate_type, outputBinding=dict(glob=glob, ), ), None, glob )
def guess_outputs(self, candidates): """Yield detected output and changed command input parameter.""" # TODO what to do with duplicate paths & inputs with same defaults candidates = list(candidates) tree = DirectoryTree.from_list(candidates) input_candidates = {} conflicting_paths = {} for index, input in enumerate(self.inputs): # Convert input defaults to paths relative to working directory. if input.type not in PATH_OBJECTS: if self.no_input_detection: continue try: path = self.directory / str(input.default) input_path = Path(os.path.abspath(path)).relative_to( self.working_dir) except FileNotFoundError: continue else: input_path = input.default.path.relative_to(self.working_dir) if input_path.is_dir() and tree.get(input_path): # The directory might exist before running the script subpaths = { str(input_path / path) for path in tree.get(input_path, default=[]) } absolute_path = os.path.abspath(input_path) if Path(absolute_path) not in self.explicit_outputs: content = { str(path) for path in input_path.rglob("*") if not path.is_dir() and path.name != ".gitkeep" } preexisting_paths = content - subpaths if preexisting_paths: raise errors.InvalidOutputPath( 'The output directory "{0}" is not empty. \n\n' "Delete existing files before running the " "command:" '\n (use "git rm <file>..." to remove them ' "first)" "\n\n".format(input_path) + "\n".join("\t" + click.style(path, fg="yellow") for path in preexisting_paths) + "\n\n" "Once you have removed files that should be used " "as outputs,\n" "you can safely rerun the previous command.") # Remove files from the input directory candidates[:] = (path for path in candidates if path not in subpaths) # Include input path in the candidates to check candidates.append(str(input_path)) input_candidates[str(input_path)] = input elif input.type not in PATH_OBJECTS: # Input need to be changed if an output is detected input_candidates[str(input_path)] = input else: # Names that can not be outputs because they are already inputs conflicting_paths[str(input_path)] = input streams = { path for path in (getattr(self, name) for name in ("stdout", "stderr")) if path is not None } # TODO group by a common prefix for position, path in enumerate(candidates): candidate = self.is_existing_path(self.working_dir / path) if candidate is None: raise errors.UsageError( 'Path "{0}" does not exist.'.format(path)) glob = str(candidate.relative_to(self.working_dir)) if glob in streams: continue new_input = None if glob in conflicting_paths: # it means that it is rewriting a file input = conflicting_paths[glob] new_input = attr.evolve(input, type="string", default=glob) input_candidates[glob] = new_input del conflicting_paths[glob] # TODO add warning ('Output already exists in inputs.') candidate_type = "Directory" if candidate.is_dir() else "File" if glob in input_candidates: input = input_candidates[glob] if new_input is None: new_input = input_candidates[glob] = attr.evolve( input, type="string", default=glob) yield ( CommandOutputParameter( id="output_{0}".format(position), type=candidate_type, outputBinding=dict(glob="$(inputs.{0})".format( input.id), ), ), new_input, glob, ) else: yield ( CommandOutputParameter( id="output_{0}".format(position), type=candidate_type, outputBinding=dict(glob=glob, ), ), None, glob, )