Ejemplo n.º 1
0
    def create_dataset(
        self,
        short_name=None,
        title=None,
        description=None,
        creators=None,
        keywords=None,
    ):
        """Create a dataset."""
        if not short_name:
            raise errors.ParameterError('Dataset short_name must be provided.')

        if not is_dataset_short_name_valid(short_name):
            raise errors.ParameterError(
                'Dataset short_name "{}" is not valid.'.format(short_name))

        if self.load_dataset(short_name=short_name):
            raise errors.DatasetExistsError(
                'Dataset exists: "{}".'.format(short_name))

        if not title:
            title = short_name

        identifier = str(uuid.uuid4())

        path = self.renku_datasets_path / identifier / self.METADATA

        if path.exists():
            raise errors.DatasetExistsError(
                'Dataset with reference {} exists'.format(path))

        path.parent.mkdir(parents=True, exist_ok=True)

        if creators is None:
            creators = [Person.from_git(self.repo)]

        keywords = keywords or ()

        with with_reference(path):
            dataset = Dataset(
                client=self,
                identifier=identifier,
                short_name=short_name,
                name=title,
                description=description,
                creator=creators,
                keywords=keywords,
            )

        dataset_ref = LinkReference.create(client=self,
                                           name='datasets/' + short_name)

        dataset_ref.set_reference(path)
        dataset.path = Path(dataset.path).relative_to(self.path)
        dataset.to_yaml()

        return dataset, path, dataset_ref
Ejemplo n.º 2
0
    def create_dataset(self,
                       name,
                       short_name=None,
                       description='',
                       creators=None):
        """Create a dataset."""
        if not name:
            raise errors.ParameterError('Dataset name must be provided.')

        if not short_name:
            short_name = generate_default_short_name(name, None)

        if not is_dataset_name_valid(short_name):
            raise errors.ParameterError(
                'Dataset name "{}" is not valid.'.format(short_name))

        if self.load_dataset(name=short_name):
            raise errors.DatasetExistsError(
                'Dataset exists: "{}".'.format(short_name))

        identifier = str(uuid.uuid4())

        path = self.renku_datasets_path / identifier / self.METADATA

        if path.exists():
            raise errors.DatasetExistsError(
                'Dataset with reference {} exists'.format(path))

        path.parent.mkdir(parents=True, exist_ok=True)

        if creators is None:
            creators = [Person.from_git(self.repo)]

        with with_reference(path):
            dataset = Dataset(client=self,
                              identifier=identifier,
                              name=name,
                              short_name=short_name,
                              description=description,
                              creator=creators)

        dataset_ref = LinkReference.create(client=self,
                                           name='datasets/' + short_name)

        dataset_ref.set_reference(path)
        dataset.to_yaml()

        return dataset, path, dataset_ref
Ejemplo n.º 3
0
    def with_dataset(self, short_name=None, create=False):
        """Yield an editable metadata object for a dataset."""
        dataset = self.load_dataset(short_name=short_name)
        clean_up_required = False

        if dataset is None:
            if not create:
                raise errors.DatasetNotFound(name=short_name)

            clean_up_required = True
            dataset, path, dataset_ref = self.create_dataset(
                short_name=short_name)
        elif create:
            raise errors.DatasetExistsError(
                'Dataset exists: "{}".'.format(short_name))

        dataset_path = self.path / self.datadir / dataset.short_name
        dataset_path.mkdir(parents=True, exist_ok=True)

        try:
            yield dataset
        except Exception:
            # TODO use a general clean-up strategy
            # https://github.com/SwissDataScienceCenter/renku-python/issues/736
            if clean_up_required:
                dataset_ref.delete()
                shutil.rmtree(path.parent, ignore_errors=True)
            raise

        dataset.to_yaml()