Example #1
0
    def track_paths_in_storage(self, *paths):
        """Track paths in the external storage."""
        # Calculate which paths can be tracked in lfs
        track_paths = []
        attrs = self.find_attr(*paths)

        for path in paths:
            # Do not add files with filter=lfs in .gitattributes
            if attrs.get(path, {}).get('filter') == 'lfs':
                continue

            path = Path(path)
            if path.is_dir():
                track_paths.append(str(path / '**'))
            elif path.suffix != '.ipynb':
                # TODO create configurable filter and follow .gitattributes
                track_paths.append(str(path))

        if track_paths:
            try:
                call(
                    self._CMD_STORAGE_TRACK + track_paths,
                    stdout=PIPE,
                    stderr=STDOUT,
                    cwd=str(self.path),
                )
            except (KeyboardInterrupt, OSError) as e:
                raise BadParameter('Couldn\'t run \'git lfs\':\n{0}'.format(e))
Example #2
0
    def track_paths_in_storage(self, *paths):
        """Track paths in the external storage."""
        if self.use_external_storage and self.external_storage_installed:
            track_paths = []
            attrs = self.find_attr(*paths)

            for path in paths:
                # Do not add files with filter=lfs in .gitattributes
                if attrs.get(path, {}).get('filter') == 'lfs':
                    continue

                path = Path(path)
                if path.is_dir():
                    track_paths.append(str(path / '**'))
                elif path.suffix != '.ipynb':
                    # TODO create configurable filter and follow .gitattributes
                    track_paths.append(str(path))

            call(
                self._CMD_STORAGE_TRACK + track_paths,
                stdout=PIPE,
                stderr=STDOUT,
                cwd=str(self.path),
            )
        elif self.use_external_storage:
            raise errors.ExternalStorageNotInstalled(self.repo)
Example #3
0
    def _add_from_url(self, dataset, path, url, nocopy=False, **kwargs):
        """Process an add from url and return the location on disk."""
        u = parse.urlparse(url)

        if u.scheme not in Dataset.SUPPORTED_SCHEMES:
            raise NotImplementedError('{} URLs are not supported'.format(
                u.scheme))

        dst = path.joinpath(os.path.basename(url)).absolute()

        if u.scheme in ('', 'file'):
            src = Path(u.path).absolute()

            # if we have a directory, recurse
            if src.is_dir():
                files = {}
                os.mkdir(dst)
                for f in src.iterdir():
                    files.update(
                        self._add_from_url(dataset,
                                           dst,
                                           f.absolute().as_posix(),
                                           nocopy=nocopy))
                return files
            if nocopy:
                try:
                    os.link(src, dst)
                except Exception as e:
                    raise Exception('Could not create hard link '
                                    '- retry without nocopy.') from e
            else:
                shutil.copy(src, dst)

            # Do not expose local paths.
            src = None
        else:
            try:
                response = requests.get(url)
                dst.write_bytes(response.content)
            except error.HTTPError as e:  # pragma nocover
                raise e

        # make the added file read-only
        mode = dst.stat().st_mode & 0o777
        dst.chmod(mode & ~(stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH))

        self.track_paths_in_storage(dst.relative_to(self.path))
        dataset_path = self.path / self.datadir / dataset.name
        result = dst.relative_to(dataset_path).as_posix()
        return {
            result:
            DatasetFile(
                path=result,
                url=url,
                authors=dataset.authors,
                dataset=dataset.name,
            )
        }
Example #4
0
def _expand_directories(paths):
    """Expand directory with all files it contains."""
    for path in paths:
        path_ = Path(path)
        if path_.is_dir():
            for expanded in path_.rglob('*'):
                yield str(expanded)
        else:
            yield path
Example #5
0
def get_project_config_path(path=None):
    """Return project configuration folder if exist."""
    project_path = Path(path or '.').absolute().joinpath(RENKU_HOME)
    if project_path.exists() and project_path.is_dir():
        return str(project_path)
Example #6
0
    def _add_from_url(self, dataset, path, url, link=False, **kwargs):
        """Process an add from url and return the location on disk."""
        u = parse.urlparse(url)

        if u.scheme not in Dataset.SUPPORTED_SCHEMES:
            raise NotImplementedError(
                '{} URLs are not supported'.format(u.scheme)
            )

        # Respect the directory struture inside the source path.
        relative_to = kwargs.pop('relative_to', None)
        if relative_to:
            dst_path = Path(u.path).resolve().absolute().relative_to(
                Path(relative_to).resolve().absolute()
            )
        else:
            dst_path = os.path.basename(u.path)

        dst = path.joinpath(dst_path).absolute()

        if u.scheme in ('', 'file'):
            src = Path(u.path).absolute()

            # if we have a directory, recurse
            if src.is_dir():
                files = []
                dst.mkdir(parents=True, exist_ok=True)
                for f in src.iterdir():
                    files.extend(
                        self._add_from_url(
                            dataset,
                            dst,
                            f.absolute().as_posix(),
                            link=link,
                            **kwargs
                        )
                    )
                return files

            # Make sure the parent directory exists.
            dst.parent.mkdir(parents=True, exist_ok=True)

            if link:
                try:
                    os.link(str(src), str(dst))
                except Exception as e:
                    raise Exception(
                        'Could not create hard link '
                        '- retry without --link.'
                    ) from e
            else:
                shutil.copy(str(src), str(dst))

            # Do not expose local paths.
            src = None
        else:
            try:
                response = requests.get(url)
                dst.write_bytes(response.content)
            except error.HTTPError as e:  # pragma nocover
                raise e

        # make the added file read-only
        mode = dst.stat().st_mode & 0o777
        dst.chmod(mode & ~(stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH))

        self.track_paths_in_storage(str(dst.relative_to(self.path)))

        return [{
            'path': dst.relative_to(self.path),
            'url': url,
            'creator': dataset.creator,
            'dataset': dataset.name,
            'parent': self
        }]