예제 #1
0
def test_show_inputs(tmpdir_factory, project, runner, run):
    """Test show inputs with submodules."""
    second_project = Path(str(tmpdir_factory.mktemp('second_project')))

    assert 0 == run(args=('init', str(second_project)))

    woop = second_project / 'woop'
    with woop.open('w') as fp:
        fp.write('woop')

    second_repo = git.Repo(str(second_project))
    second_repo.git.add('--all')
    second_repo.index.commit('Added woop file')

    assert 0 == run(args=('dataset', 'create', 'foo'))
    assert 0 == run(args=('dataset', 'add', 'foo', str(woop)))

    imported_woop = Path(project) / 'data' / 'foo' / woop.name
    assert imported_woop.exists()

    woop_wc = Path(project) / 'woop.wc'
    assert 0 == run(args=('run', 'wc'), stdin=imported_woop, stdout=woop_wc)

    result = runner.invoke(cli.cli, ['show', 'inputs'], catch_exceptions=False)
    assert {str(imported_woop.resolve().relative_to(Path(project).resolve()))
            } == set(result.output.strip().split('\n'))
예제 #2
0
    def add_file(self, path, revision='HEAD'):
        """Add a file node to the graph."""
        file_commits = list(self.client.git.iter_commits(revision, paths=path))

        if not file_commits:
            raise KeyError('Could not find a file {0} in range {1}'.format(
                path, revision))

        commit = file_commits[0]

        cwl = self.find_cwl(commit)
        if cwl is not None:
            file_key = self.add_node(commit, path)
            self.add_tool(commit, cwl, file_key=file_key)
            return file_key
        else:
            #: Does not have a parent CWL.
            root_node = self.add_node(commit, path)
            parent_commit, parent_path = root_node

            #: Capture information about the submodule in a submodule.
            root_submodule = self.G.nodes[root_node].get('submodule', [])

            #: Resolve Renku based submodules.
            original_path = Path(parent_path)
            if original_path.is_symlink() or str(original_path).startswith(
                    '.renku/vendors'):
                original_path = original_path.resolve()

                for submodule in Submodule.iter_items(
                        self.client.git, parent_commit=parent_commit):
                    try:
                        subpath = original_path.relative_to(
                            Path(submodule.path).resolve())
                        subgraph = Graph(client=LocalClient(
                            path=submodule.path))
                        subnode = subgraph.add_file(str(subpath),
                                                    revision=submodule.hexsha)

                        #: Extend node metadata.
                        for _, data in subgraph.G.nodes(data=True):
                            data['submodule'] = root_submodule + [
                                submodule.name
                            ]

                        #: Merge file node with it's symlinked version.
                        self.G = nx.contracted_nodes(
                            nx.compose(self.G, subgraph.G),
                            root_node,
                            subnode,
                        )  # TODO optionally it can be changed to an edge.
                        break
                    except ValueError:
                        continue

            return root_node
예제 #3
0
    def file_candidate(self, candidate, ignore=None):
        """Return a path instance if it exists in current directory."""
        if ignore and candidate in ignore:
            return

        candidate = Path(candidate)

        if not candidate.is_absolute():
            candidate = self.directory / candidate

        if candidate.exists():
            return candidate.resolve()
예제 #4
0
파일: move.py 프로젝트: gasnew/renku-python
def move(ctx, client, sources, destination):
    """Move files and check repository for potential problems."""
    from renku.api._git import _expand_directories

    dst = Path(destination)

    def fmt_path(path):
        """Format path as relative to the client path."""
        return str(Path(path).absolute().relative_to(client.path))

    files = {
        fmt_path(source): fmt_path(file_or_dir)
        for file_or_dir in sources
        for source in _expand_directories((file_or_dir, ))
    }

    def fmt_dst(path):
        """Build a destination path for a source path."""
        return str(dst / os.path.relpath(path, start=files[path]))

    destinations = {source: fmt_dst(source) for source in files}

    # 1. Check .gitignore.
    ignored = client.find_ignored_paths(*destinations.values())
    if ignored:
        click.echo(WARNING + 'Renamed files match .gitignore.\n')
        if click.confirm('Do you want to edit ".gitignore" now?',
                         default=False):
            click.edit(filename=str(client.path / '.gitignore'))

    # 2. Update dataset metadata files.
    with progressbar(
            client.datasets.items(),
            item_show_func=lambda item: str(item[1].short_id) if item else '',
            label='Updating dataset metadata',
            width=0,
    ) as bar:
        for (path, dataset) in bar:
            renames = {}

            for file_ in dataset.files:
                filepath = fmt_path(file_.path)

                if filepath in files:
                    renames[file_.path] = destinations[filepath]

            if renames:
                dataset = dataset.rename_files(
                    lambda key: renames.get(key, key))

                dataset.to_yaml()

    # 3. Manage .gitattributes for external storage.
    tracked = tuple()
    if client.has_external_storage:
        tracked = tuple(path
                        for path, attr in client.find_attr(*files).items()
                        if attr.get('filter') == 'lfs')
        client.untrack_paths_from_storage(*tracked)

        if client.find_attr(*tracked):
            click.echo(WARNING + 'There are custom .gitattributes.\n')
            if click.confirm('Do you want to edit ".gitattributes" now?',
                             default=False):
                click.edit(filename=str(client.path / '.gitattributes'))

    if tracked and client.has_external_storage:
        client.track_paths_in_storage(*(destinations[path]
                                        for path in tracked))

    # 4. Handle symlinks.
    dst.parent.mkdir(parents=True, exist_ok=True)

    for source, target in destinations.items():
        src = Path(source)
        if src.is_symlink():
            Path(target).parent.mkdir(parents=True, exist_ok=True)
            Path(target).symlink_to(
                os.path.relpath(str(src.resolve()),
                                start=os.path.dirname(target)))
            src.unlink()
            del files[source]

    # Finally move the files.
    final_sources = list(set(files.values()))
    if final_sources:
        run(['git', 'mv'] + final_sources + [destination], check=True)
예제 #5
0
    def _add_from_git(self, dataset, path, url, target, **kwargs):
        """Process adding resources from another git repository.

        The submodules are placed in ``.renku/vendors`` and linked
        to the *path* specified by the user.
        """
        # create the submodule
        u = parse.urlparse(url)
        submodule_path = self.renku_path / 'vendors' / (u.netloc or 'local')

        # Respect the directory struture inside the source path.
        relative_to = kwargs.get('relative_to', None)

        if u.scheme in ('', 'file'):
            warnings.warn('Importing local git repository, use HTTPS')
            # determine where is the base repo path
            r = git.Repo(url, search_parent_directories=True)
            src_repo_path = Path(r.git_dir).parent
            submodule_name = os.path.basename(src_repo_path)
            submodule_path = submodule_path / str(src_repo_path).lstrip('/')

            # if repo path is a parent, rebase the paths and update url
            if src_repo_path != Path(u.path):
                top_target = Path(
                    u.path
                ).resolve().absolute().relative_to(src_repo_path)
                if target:
                    target = top_target / target
                else:
                    target = top_target
                url = src_repo_path.as_posix()
        elif u.scheme in ('http', 'https'):
            submodule_name = os.path.splitext(os.path.basename(u.path))[0]
            submodule_path = submodule_path.joinpath(
                os.path.dirname(u.path).lstrip('/'), submodule_name
            )
        else:
            raise NotImplementedError(
                'Scheme {} not supported'.format(u.scheme)
            )

        # FIXME: do a proper check that the repos are not the same
        if submodule_name not in (s.name for s in self.git.submodules):
            # new submodule to add
            self.git.create_submodule(
                name=submodule_name, path=submodule_path.as_posix(), url=url
            )

        src = submodule_path / (target or '')

        if target and relative_to:
            relative_to = Path(relative_to)
            if relative_to.is_absolute():
                assert u.scheme in {
                    '', 'file'
                }, ('Only relative paths can be used with URLs.')
                target = (Path(url).resolve().absolute() / target).relative_to(
                    relative_to.resolve()
                )
            else:
                # src already includes target so we do not have to append it
                target = src.relative_to(submodule_path / relative_to)

        # link the target into the data directory
        dst = self.path / path / submodule_name / (target or '')

        # if we have a directory, recurse
        if src.is_dir():
            files = {}
            dst.mkdir(parents=True, exist_ok=True)
            # FIXME get all files from submodule index
            for f in src.iterdir():
                try:
                    files.update(
                        self._add_from_git(
                            dataset,
                            path,
                            url,
                            target=f.relative_to(submodule_path),
                            **kwargs
                        )
                    )
                except ValueError:
                    pass  # skip files outside the relative path
            return files

        if not dst.parent.exists():
            dst.parent.mkdir(parents=True)

        os.symlink(os.path.relpath(src, dst.parent), dst)

        # grab all the authors from the commit history
        git_repo = git.Repo(submodule_path.absolute().as_posix())
        authors = []
        for commit in git_repo.iter_commits(paths=target):
            author = Author.from_commit(commit)
            if author not in authors:
                authors.append(author)

        dataset_path = self.path / self.datadir / dataset.name
        result = dst.relative_to(dataset_path).as_posix()

        if u.scheme in ('', 'file'):
            url = None
        else:
            url = '{}/{}'.format(url, target)

        return {
            result:
                DatasetFile(
                    path=result,
                    url=url,
                    authors=authors,
                    dataset=dataset.name,  # TODO detect original dataset
                )
        }
예제 #6
0
    def _add_from_git(self, dataset, path, url, target, **kwargs):
        """Process adding resources from another git repository.

        The submodules are placed in ``.renku/vendors`` and linked
        to the *path* specified by the user.
        """
        from git import Repo

        # create the submodule
        if url.startswith('git@'):
            url = 'git+ssh://' + url

        u = parse.urlparse(url)
        submodule_path = self.renku_path / 'vendors' / (u.netloc or 'local')

        # Respect the directory struture inside the source path.
        relative_to = kwargs.get('relative_to', None)

        if u.scheme in ('', 'file'):
            try:
                relative_url = Path(url).resolve().relative_to(self.path)
            except Exception:
                relative_url = None

            if relative_url:
                return [{
                    'path': url,
                    'url': url,
                    'creator': dataset.creator,
                    'dataset': dataset.name,
                    'parent': self
                }]

            warnings.warn('Importing local git repository, use HTTPS')
            # determine where is the base repo path
            r = Repo(url, search_parent_directories=True)
            src_repo_path = Path(r.git_dir).parent.resolve()
            submodule_name = src_repo_path.name
            submodule_path = submodule_path / str(src_repo_path).lstrip('/')

            # if repo path is a parent, rebase the paths and update url
            if src_repo_path != Path(u.path):
                top_target = Path(
                    u.path
                ).resolve().absolute().relative_to(src_repo_path)
                if target:
                    target = top_target / target
                else:
                    target = top_target
                url = src_repo_path.as_posix()
        elif u.scheme in {'http', 'https', 'git+https', 'git+ssh'}:
            submodule_name = os.path.splitext(os.path.basename(u.path))[0]
            submodule_path = submodule_path.joinpath(
                os.path.dirname(u.path).lstrip('/'), submodule_name
            )
        else:
            raise NotImplementedError(
                'Scheme {} not supported'.format(u.scheme)
            )

        # FIXME: do a proper check that the repos are not the same
        if submodule_name not in (s.name for s in self.repo.submodules):
            if u.scheme in {'http', 'https', 'git+https', 'git+ssh'}:
                url = self.get_relative_url(url)

            # Submodule in python git does some custom magic that does not
            # allow for relative URLs, so we call the git function directly
            self.repo.git.submodule([
                'add', '--force', '--name', submodule_name, url,
                submodule_path.relative_to(self.path).as_posix()
            ])

        src = submodule_path / (target or '')

        if target and relative_to:
            relative_to = Path(relative_to)
            if relative_to.is_absolute():
                assert u.scheme in {
                    '', 'file'
                }, 'Only relative paths can be used with URLs.'
                target = (Path(url).resolve().absolute() / target).relative_to(
                    relative_to.resolve()
                )
            else:
                # src already includes target so we do not have to append it
                target = src.relative_to(submodule_path / relative_to)

        # link the target into the data directory
        dst = self.path / path / (target or '')

        # if we have a directory, recurse
        if src.is_dir():
            files = []
            dst.mkdir(parents=True, exist_ok=True)
            # FIXME get all files from submodule index
            for f in src.iterdir():
                try:
                    files.extend(
                        self._add_from_git(
                            dataset,
                            path,
                            url,
                            target=f.relative_to(submodule_path),
                            **kwargs
                        )
                    )
                except ValueError:
                    pass  # skip files outside the relative path
            return files

        if not dst.parent.exists():
            dst.parent.mkdir(parents=True)

        os.symlink(os.path.relpath(str(src), str(dst.parent)), str(dst))

        # grab all the creators from the commit history
        git_repo = Repo(str(submodule_path.absolute()))
        creators = []
        for commit in git_repo.iter_commits(paths=target):
            creator = Creator.from_commit(commit)
            if creator not in creators:
                creators.append(creator)

        if u.scheme in ('', 'file'):
            url = None
        else:
            url = '{}/{}'.format(url, target)

        return [{
            'path': dst.relative_to(self.path),
            'url': url,
            'creator': creators,
            'dataset': dataset.name,
            'parent': self
        }]
예제 #7
0
 def full_path(self):
     """Return full path in the current reference frame."""
     path = Path(self.path)
     if self.client:
         return (self.client.path / path).resolve()
     return path.resolve()