Example #1
0
    def find_explicit_outputs(self, starting_output_id):
        """Yield explicit output and changed command input parameter."""
        inputs = {
            str(i.default.path.relative_to(self.working_dir)): i
            for i in self.inputs if i.type in PATH_OBJECTS
        }
        output_id = starting_output_id

        for path in self.explicit_outputs:
            if self.file_candidate(path) is None:
                raise errors.InvalidOutputPath(
                    'The output file or directory does not exist.'
                    '\n\n\t' + click.style(str(path), fg='yellow') + '\n\n')

            output_path = str(path.relative_to(self.working_dir))
            type = 'Directory' if path.is_dir() else 'File'
            if output_path in inputs:
                # change input type to note that it is also an output
                input = inputs[output_path]
                input = attr.evolve(input, type='string', default=output_path)
                yield (CommandOutputParameter(
                    id='output_{0}'.format(output_id),
                    type=type,
                    outputBinding=dict(glob='$(inputs.{0})'.format(input.id))),
                       input, output_path)
            else:
                yield (CommandOutputParameter(
                    id='output_{0}'.format(output_id),
                    type=type,
                    outputBinding=dict(glob=str(output_path))), None,
                       output_path)

            output_id += 1
Example #2
0
    def siblings(self, node):
        """Return siblings for a given node.

        The key is part of the result set, hence to check if the node has
        siblings you should check the lenght is greater than 1.
        """
        parent = None

        if isinstance(node, Entity):
            parent_siblings = self.siblings(node.parent) - {node.parent}
            return set(node.parent.members) | parent_siblings
        elif isinstance(node, Generation):
            parent = node.activity
        elif isinstance(node, Usage):
            parent = self.activities[node.commit]
        elif isinstance(node, Process):
            return {node}

        if parent is None or not isinstance(parent, ProcessRun):
            raise errors.InvalidOutputPath(
                'The file "{0}" was not created by a renku command. \n\n'
                'Check the file history using: git log --follow "{0}"'.format(
                    node.path))

        return set(parent.generated)
Example #3
0
    def guess_outputs(self, paths):
        """Yield detected output and changed command input parameter."""
        # TODO what to do with duplicate paths & inputs with same defauts
        paths = list(paths)
        tree = DirectoryTree.from_list(paths)

        input_candidates = {}
        conflicting_paths = {}

        for index, input in enumerate(self.inputs):
            # Convert input defaults to paths relative to working directory.
            if input.type not in PATH_OBJECTS:
                try:
                    path = self.directory / str(input.default)
                    input_path = path.resolve().relative_to(self.working_dir)
                except FileNotFoundError:
                    continue
            else:
                input_path = input.default.path.relative_to(self.working_dir)

            if input_path.is_dir() and tree.get(input_path):
                # The directory might exist before running the script
                subpaths = {
                    str(input_path / path)
                    for path in tree.get(input_path, default=[])
                }
                if input_path.resolve() not in self.explicit_outputs:
                    content = {
                        str(path)
                        for path in input_path.rglob('*')
                        if not path.is_dir() and path.name != '.gitkeep'
                    }
                    extra_paths = content - subpaths
                    if extra_paths:
                        raise errors.InvalidOutputPath(
                            'The output directory "{0}" is not empty. \n\n'
                            'Delete existing files before running the '
                            'command:'
                            '\n  (use "git rm <file>..." to remove them '
                            'first)'
                            '\n\n'.format(input_path) +
                            '\n'.join('\t' + click.style(path, fg='yellow')
                                      for path in extra_paths) + '\n\n'
                            'Once you have removed files that should be used '
                            'as outputs,\n'
                            'you can safely rerun the previous command.')

                # Remove files from the input directory
                paths = [path for path in paths if path not in subpaths]
                # Include input path in the paths to check
                paths.append(str(input_path))

                input_candidates[str(input_path)] = input
            elif input.type not in PATH_OBJECTS:
                # Input need to be changed if an output is detected
                input_candidates[str(input_path)] = input
            else:
                # Names that can not be outputs because they are already inputs
                conflicting_paths[str(input_path)] = (index, input)

        streams = {
            path
            for path in (getattr(self, name) for name in ('stdout', 'stderr'))
            if path is not None
        }

        # TODO group by a common prefix

        for position, path in enumerate(paths):
            if Path(path).resolve() in self.explicit_outputs:
                del paths[position]

        for position, path in enumerate(paths):
            candidate = self.file_candidate(self.working_dir / path)

            if candidate is None:
                raise ValueError('Path "{0}" does not exist.'.format(path))

            glob = str(candidate.relative_to(self.working_dir))

            if glob in streams:
                continue

            new_input = None

            if glob in conflicting_paths:
                # it means that it is rewriting a file
                index, input = conflicting_paths[glob]
                new_input = attr.evolve(input, type='string', default=glob)
                input_candidates[glob] = new_input

                del conflicting_paths[glob]
                # TODO add warning ('Output already exists in inputs.')

            candidate_type = 'Directory' if candidate.is_dir() else 'File'

            if glob in input_candidates:
                input = input_candidates[glob]

                if new_input is None:
                    new_input = input_candidates[glob] = attr.evolve(
                        input, type='string', default=glob)

                yield (CommandOutputParameter(
                    id='output_{0}'.format(position),
                    type=candidate_type,
                    outputBinding=dict(glob='$(inputs.{0})'.format(
                        input.id), ),
                ), new_input, glob)
            else:
                yield (CommandOutputParameter(
                    id='output_{0}'.format(position),
                    type=candidate_type,
                    outputBinding=dict(glob=glob, ),
                ), None, glob)