Esempio n. 1
0
def _construct_creators(creators, ignore_email=False):
    from collections.abc import Iterable

    creators = creators or ()

    if not isinstance(creators, Iterable) or isinstance(creators, str):
        raise errors.ParameterError("Invalid type")

    people = []
    no_email_warnings = []
    for creator in creators:
        if isinstance(creator, str):
            person = Person.from_string(creator)
        elif isinstance(creator, dict):
            person = Person.from_dict(creator)
        else:
            raise errors.ParameterError("Invalid type")

        message = 'A valid format is "Name <email> [affiliation]"'

        if not person.name:  # pragma: no cover
            raise errors.ParameterError(
                f'Name is invalid: "{creator}".\n{message}')

        if not person.email:
            if not ignore_email:  # pragma: no cover
                raise errors.ParameterError(
                    f'Email is invalid: "{creator}".\n{message}')
            else:
                no_email_warnings.append(creator)

        people.append(person)

    return people, no_email_warnings
Esempio n. 2
0
def test_data_add(scheme, path, overwrite, error, client, data_file, directory_tree, dataset_responses):
    """Test data import."""
    with raises(error):
        if path == "temp":
            path = str(data_file)
        elif path == "tempdir":
            path = str(directory_tree)

        with client.with_dataset("dataset", create=True) as d:
            d.creators = [Person(name="me", email="*****@*****.**", id="me_id")]

            client.add_data_to_dataset(d, ["{}{}".format(scheme, path)], overwrite=overwrite)

        target_path = os.path.join(DATA_DIR, "dataset", "file")

        with open(target_path) as f:
            assert f.read() == "1234"

        assert d.find_file(target_path)

        # check that the imported file is read-only
        assert not os.access(target_path, stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)

        # check the linking
        if scheme in ("", "file://"):
            shutil.rmtree("./data/dataset")
            with client.with_dataset("dataset") as d:
                d.creators = [Person(name="me", email="*****@*****.**", id="me_id")]
                client.add_data_to_dataset(d, ["{}{}".format(scheme, path)], overwrite=True)
            assert os.path.exists(target_path)
Esempio n. 3
0
def create_dataset(
    client,
    name,
    short_name=None,
    description=None,
    creators=None,
    commit_message=None,
):
    """Create an empty dataset in the current repo.

    :raises: ``renku.core.errors.ParameterError``
    """
    if not creators:
        creators = [Person.from_git(client.repo)]

    elif hasattr(creators, '__iter__') and isinstance(creators[0], str):
        creators = [Person.from_string(c) for c in creators]

    elif hasattr(creators, '__iter__') and isinstance(creators[0], dict):
        creators = [Person.from_dict(creator) for creator in creators]

    dataset, _, __ = client.create_dataset(name=name,
                                           short_name=short_name,
                                           description=description,
                                           creators=creators)

    return dataset
Esempio n. 4
0
def _convert_creators(value):
    """Convert creators."""
    if isinstance(value, dict):  # compatibility with previous versions
        return [Person.from_jsonld(value)]

    if isinstance(value, list):
        return [Person.from_jsonld(v) for v in value]
Esempio n. 5
0
def test_creators_with_same_email(tmp_path):
    """Test creators with different names and same email address."""
    creators = [Person(name="me", email="*****@*****.**"), Person(name="me2", email="*****@*****.**")]
    dataset = Dataset(name="dataset", creators=creators)
    path = tmp_path / "dataset.yml"
    dataset.__reference__ = path
    dataset.to_yaml()

    dataset = Dataset.from_yaml(path)
    assert 1 == len(dataset.creators)
    assert dataset.creators[0].name in ["me", "me2"]
Esempio n. 6
0
    def __attrs_post_init__(self):
        """Initialize computed attributes."""
        if not self.creator and self.client:
            if self.client.renku_metadata_path.exists():
                self.creator = Person.from_commit(
                    self.client.find_previous_commit(
                        self.client.renku_metadata_path, return_first=True), )
            else:
                # this assumes the project is being newly created
                self.creator = Person.from_git(self.client.repo)

        self._id = self.project_id
Esempio n. 7
0
def test_creator_parse(creators, data_file):
    """Test that different options for specifying creators work."""
    dataset = Dataset(name="dataset", creators=creators)
    creator = Person(name="me", email="*****@*****.**")
    assert creator in dataset.creators

    # email check
    with pytest.raises(ValueError):
        Person(name="me", email="meexample.com")

    # creators must be a set or list of dicts or Person
    with pytest.raises(ValueError):
        Dataset(name="dataset", creators=["name"])
Esempio n. 8
0
def test_creator_parse(creators, data_file):
    """Test that different options for specifying creators work."""
    f = DatasetFile(path='file', creator=creators)
    creator = Person(name='me', email='*****@*****.**')
    assert creator in f.creator

    # email check
    with pytest.raises(ValueError):
        Person(name='me', email='meexample.com')

    # creators must be a set or list of dicts or Person
    with pytest.raises(ValueError):
        f = DatasetFile(path='file', creator=['name'])
Esempio n. 9
0
def test_project_creator_deserialization(client, project):
    """Check that the correct creator is returned on deserialization."""
    from renku.core.models.provenance.agents import Person

    # modify the project metadata to change the creator
    project = client.project
    project.creator = Person(email='*****@*****.**', name='Johnny Doe')
    project.to_yaml()
    client.repo.git.commit('-a', '--amend', '-C', 'HEAD', '--author',
                           'Johnny Doe <*****@*****.**>', '--no-verify')

    # the project creator should always be the one in the metadata
    assert '*****@*****.**' == client.project.creator.email
    assert 'Johnny Doe' == client.project.creator.name
    assert client.project.creator.label == client.project.creator.name

    # Remove the creator from metadata
    project = client.project
    project.creator = None
    project.to_yaml()
    client.repo.git.commit('-a', '--amend', '-C', 'HEAD', '--author',
                           'Jane Doe <*****@*****.**>', '--no-verify')

    # now the creator should be the one from the commit
    project = Project.from_yaml(client.renku_metadata_path, client=client)
    assert '*****@*****.**' == project.creator.email
    assert 'Jane Doe' == project.creator.name
    assert project.creator.label == project.creator.name
Esempio n. 10
0
def test_data_add_recursive(directory_tree, client):
    """Test recursive data imports."""
    with client.with_dataset("dataset", create=True) as dataset:
        dataset.creators = [Person(name="me", email="*****@*****.**", id="me_id")]
        client.add_data_to_dataset(dataset, [directory_tree.join("dir2").strpath])

        assert os.path.basename(os.path.dirname(dataset.files[0].path)) == "dir2"
Esempio n. 11
0
def create_dataset(
    client,
    short_name,
    title=None,
    description='',
    creators=None,
    keywords=None,
    commit_message=None
):
    """Create an empty dataset in the current repo.

    :raises: ``renku.core.errors.ParameterError``
    """
    if not creators:
        creators = [Person.from_git(client.repo)]
    else:
        creators, _ = _construct_creators(creators)

    dataset, _, __ = client.create_dataset(
        short_name=short_name,
        title=title,
        description=description,
        creators=creators,
        keywords=keywords
    )

    return dataset
Esempio n. 12
0
    def init_repository(self, force=False):
        """Initialize an empty Renku repository."""
        from git import Repo
        from renku.core.models.provenance.agents import Person

        # verify if folder is empty
        if self.repo is not None and not force:
            raise errors.InvalidFileOperation(
                'Folder {0} already contains file. Use --force to overwrite'.
                format(self.repo.git_dir))

        # initialize repo
        path = self.path.absolute()
        self.repo = Repo.init(str(path))

        # verify if author information is available
        Person.from_git(self.repo)
Esempio n. 13
0
    def create_dataset(
        self,
        short_name=None,
        title=None,
        description=None,
        creators=None,
        keywords=None,
    ):
        """Create a dataset."""
        if not short_name:
            raise errors.ParameterError('Dataset short_name must be provided.')

        if not is_dataset_short_name_valid(short_name):
            raise errors.ParameterError(
                'Dataset short_name "{}" is not valid.'.format(short_name))

        if self.load_dataset(short_name=short_name):
            raise errors.DatasetExistsError(
                'Dataset exists: "{}".'.format(short_name))

        if not title:
            title = short_name

        identifier = str(uuid.uuid4())

        path = self.renku_datasets_path / identifier / self.METADATA

        if path.exists():
            raise errors.DatasetExistsError(
                'Dataset with reference {} exists'.format(path))

        path.parent.mkdir(parents=True, exist_ok=True)

        if creators is None:
            creators = [Person.from_git(self.repo)]

        keywords = keywords or ()

        with with_reference(path):
            dataset = Dataset(
                client=self,
                identifier=identifier,
                short_name=short_name,
                name=title,
                description=description,
                creator=creators,
                keywords=keywords,
            )

        dataset_ref = LinkReference.create(client=self,
                                           name='datasets/' + short_name)

        dataset_ref.set_reference(path)
        dataset.path = Path(dataset.path).relative_to(self.path)
        dataset.to_yaml()

        return dataset, path, dataset_ref
Esempio n. 14
0
def test_data_add_recursive(directory_tree, client):
    """Test recursive data imports."""
    with client.with_dataset('dataset', create=True) as dataset:
        dataset.creator = [
            Person(name='me', email='*****@*****.**', id='me_id')
        ]
        client.add_data_to_dataset(dataset,
                                   [directory_tree.join('dir2').strpath])

        assert os.path.basename(os.path.dirname(
            dataset.files[0].path)) == 'dir2'
Esempio n. 15
0
    def __attrs_post_init__(self):
        """Initialize computed attributes."""
        if not self.creator and self.client:
            if self.client.renku_metadata_path.exists():
                self.creator = Person.from_commit(
                    self.client.find_previous_commit(self.client.renku_metadata_path, return_first=True),
                )
            else:
                # this assumes the project is being newly created
                self.creator = Person.from_git(self.client.repo)

        try:
            self._id = self.project_id
        except ValueError:
            """Fallback to old behaviour."""
            if self._id:
                pass
            elif self.client and self.client.is_project_set():
                self._id = self.client.project._id
            else:
                raise
Esempio n. 16
0
    def init_repository(self, force=False, user=None):
        """Initialize an empty Renku repository."""
        from git import Repo

        from renku.core.models.provenance.agents import Person

        # verify if folder is empty
        if self.repo is not None and not force:
            raise errors.InvalidFileOperation(
                "Folder {0} already contains file. Use --force to overwrite".
                format(self.repo.git_dir))

        # initialize repo and set user data
        path = self.path.absolute()
        self.repo = Repo.init(str(path))
        if user:
            config_writer = self.repo.config_writer()
            for key, value in user.items():
                config_writer.set_value("user", key, value)
            config_writer.release()

        # verify if author information is available
        Person.from_git(self.repo)
Esempio n. 17
0
def test_data_add(scheme, path, overwrite, error, client, data_file,
                  directory_tree, dataset_responses):
    """Test data import."""
    with raises(error):
        if path == 'temp':
            path = str(data_file)
        elif path == 'tempdir':
            path = str(directory_tree)

        with client.with_dataset('dataset', create=True) as d:
            d.creator = [Person(name='me', email='*****@*****.**', id='me_id')]

            client.add_data_to_dataset(d, ['{}{}'.format(scheme, path)],
                                       overwrite=overwrite)

        target_path = os.path.join(DATA_DIR, 'dataset', 'file')

        with open(target_path) as f:
            assert f.read() == '1234'

        assert d.find_file(target_path)

        # check that the imported file is read-only
        assert not os.access(target_path,
                             stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)

        # check the linking
        if scheme in ('', 'file://'):
            shutil.rmtree('./data/dataset')
            with client.with_dataset('dataset') as d:
                d.creator = [
                    Person(name='me', email='*****@*****.**', id='me_id')
                ]
                client.add_data_to_dataset(d, ['{}{}'.format(scheme, path)],
                                           overwrite=True)
            assert os.path.exists(target_path)
Esempio n. 18
0
def dataset(client):
    """Create a dataset."""
    from renku.core.models.provenance.agents import Person

    with client.with_dataset("dataset", create=True) as dataset:
        dataset.creators = [
            Person(
                **{
                    "affiliation": "xxx",
                    "email": "*****@*****.**",
                    "id": "me_id",
                    "name": "me",
                })
        ]
    return dataset
Esempio n. 19
0
def dataset(client):
    """Create a dataset."""
    from renku.core.models.provenance.agents import Person

    with client.with_dataset('dataset', create=True) as dataset:
        dataset.creator = [
            Person(
                **{
                    'affiliation': 'xxx',
                    'email': '*****@*****.**',
                    'id': 'me_id',
                    'name': 'me',
                })
        ]
    return dataset
Esempio n. 20
0
    def create_dataset(self,
                       name,
                       short_name=None,
                       description='',
                       creators=None):
        """Create a dataset."""
        if not name:
            raise errors.ParameterError('Dataset name must be provided.')

        if not short_name:
            short_name = generate_default_short_name(name, None)

        if not is_dataset_name_valid(short_name):
            raise errors.ParameterError(
                'Dataset name "{}" is not valid.'.format(short_name))

        if self.load_dataset(name=short_name):
            raise errors.DatasetExistsError(
                'Dataset exists: "{}".'.format(short_name))

        identifier = str(uuid.uuid4())

        path = self.renku_datasets_path / identifier / self.METADATA

        if path.exists():
            raise errors.DatasetExistsError(
                'Dataset with reference {} exists'.format(path))

        path.parent.mkdir(parents=True, exist_ok=True)

        if creators is None:
            creators = [Person.from_git(self.repo)]

        with with_reference(path):
            dataset = Dataset(client=self,
                              identifier=identifier,
                              name=name,
                              short_name=short_name,
                              description=description,
                              creator=creators)

        dataset_ref = LinkReference.create(client=self,
                                           name='datasets/' + short_name)

        dataset_ref.set_reference(path)
        dataset.to_yaml()

        return dataset, path, dataset_ref
Esempio n. 21
0
def test_construct_person(value, has_name, has_email, has_affiliation):
    """Test construct person from string."""
    p = Person.from_string(value)

    if has_name:
        assert "John Doe" == p.name
    else:
        assert "" == p.name
    if has_email:
        assert "*****@*****.**" == p.email
    else:
        assert p.email is None
    if has_affiliation:
        assert "Some Affiliation" == p.affiliation
    else:
        assert p.affiliation is None
Esempio n. 22
0
    def _handle_sentry(self):
        """Handle exceptions using Sentry."""
        from sentry_sdk import capture_exception, configure_scope
        from sentry_sdk.utils import capture_internal_exceptions

        with configure_scope() as scope:
            with capture_internal_exceptions():
                from git import Repo
                from renku.core.commands import get_git_home
                from renku.core.models.provenance.agents import Person

                repo = Repo(get_git_home())
                user = Person.from_git(repo)

                scope.user = {'name': user.name, 'email': user.email}

            event_id = capture_exception()
            click.echo(
                _BUG + 'Recorded in Sentry with ID: {0}\n'.format(event_id),
                err=True,
            )
            raise
Esempio n. 23
0
def test_project_shacl(project, client):
    """Test project metadata structure."""
    from renku.core.models.provenance.agents import Person

    path = Path(
        __file__
    ).parent.parent.parent / 'fixtures' / 'force_project_shacl.json'

    project = client.project
    project.creator = Person(email='*****@*****.**', name='Johnny Doe')

    g = project.as_jsonld()
    rdf = pyld.jsonld.to_rdf(g,
                             options={
                                 'format': 'application/n-quads',
                                 'produceGeneralizedRdf': False
                             })
    r, _, t = validate_graph(rdf, shacl_path=str(path))
    assert r is True, t

    r, _, t = validate_graph(rdf)
    assert r is True, t
Esempio n. 24
0
    def update_dataset_files(self, files, ref, delete=False):
        """Update files and dataset metadata according to their remotes.

        :param files: List of files to be updated
        :param delete: Indicates whether to delete files or not

        :return: List of files that should be deleted
        """
        from renku import LocalClient

        visited_repos = {}
        updated_files = []
        deleted_files = []

        for file_ in files:
            if not file_.based_on:
                continue

            file_.based_on = DatasetFile.from_jsonld(file_.based_on)
            based_on = file_.based_on
            url = based_on.url
            if url in visited_repos:
                repo, repo_path, remote_client = visited_repos[url]
            else:
                repo, repo_path = self.prepare_git_repo(url, ref)
                remote_client = LocalClient(repo_path)
                visited_repos[url] = repo, repo_path, remote_client

            remote_file = self._fetch_file_metadata(remote_client,
                                                    based_on.path)

            if not remote_file:
                try:
                    remote_file = DatasetFile.from_revision(
                        remote_client,
                        path=based_on.path,
                        url=url,
                        added=based_on.added)
                except KeyError:
                    raise errors.ParameterError(
                        'Cannot find file {} in the repo {}'.format(
                            based_on.url, url))

            commit_sha = self._get_commit_sha_from_label(based_on)
            remote_commit_sha = self._get_commit_sha_from_label(remote_file)
            if commit_sha != remote_commit_sha:
                src = Path(repo.working_dir) / based_on.path
                dst = self.renku_path.parent / file_.path

                if src.exists():
                    # Fetch file is it is tracked by Git LFS
                    self._fetch_lfs_files(repo_path, {based_on.path})
                    if remote_client._is_external_file(src):
                        self.remove_file(dst)
                        self._create_external_file(src.resolve(), dst)
                    else:
                        shutil.copy(src, dst)
                    file_.based_on.commit = remote_file.commit
                    file_.based_on._label = remote_file._label
                    updated_files.append(file_)
                else:
                    # File was removed or renamed
                    if delete:
                        self.remove_file(dst)
                    deleted_files.append(file_)

        if not updated_files and (not delete or not deleted_files):
            # Nothing to commit or update
            return deleted_files

        # Commit changes in files

        file_paths = {str(f.path) for f in updated_files + deleted_files}
        # Force-add to include possible ignored files that are in datasets
        self.repo.git.add(*(file_paths), force=True)
        self.repo.index.commit(
            'renku dataset: updated {} files and deleted {} files'.format(
                len(updated_files), len(deleted_files)))

        # Update datasets' metadata

        modified_datasets = {}

        for file_ in updated_files:
            # Re-create list of creators
            creators = []
            # grab all the creators from the commit history
            for commit in repo.iter_commits(paths=file_.path):
                creator = Person.from_commit(commit)
                if creator not in creators:
                    creators.append(creator)

            new_file = DatasetFile.from_revision(self,
                                                 path=file_.path,
                                                 based_on=file_.based_on,
                                                 creator=creators)
            file_.dataset.update_files([new_file])
            modified_datasets[file_.dataset.name] = file_.dataset

        if delete:
            for file_ in deleted_files:
                file_.dataset.unlink_file(file_.path)
                modified_datasets[file_.dataset.name] = file_.dataset

        for dataset in modified_datasets.values():
            dataset.to_yaml()

        return deleted_files
Esempio n. 25
0
    def _add_from_git(self, dataset, url, sources, destination, ref):
        """Process adding resources from another git repository."""
        from renku import LocalClient

        u = parse.urlparse(url)

        sources = self._resolve_paths(u.path, sources)

        # Get all files from repo that match sources
        repo, repo_path = self.prepare_git_repo(url, ref)
        files = set()
        used_sources = set()
        for file in repo.head.commit.tree.traverse():
            path = file.path
            result = self._get_src_and_dst(path, repo_path, sources,
                                           destination, used_sources)

            if result:
                files.add(result)

        unused_sources = set(sources.keys()) - used_sources
        if unused_sources:
            unused_sources = {str(s) for s in unused_sources}
            raise errors.ParameterError('No such file or directory',
                                        param_hint=unused_sources)

        if destination.exists() and not destination.is_dir():
            if len(files) > 1:
                raise errors.ParameterError(
                    'Cannot copy multiple files or directories to a file')

        # Create metadata and move files to dataset
        results = []
        remote_client = LocalClient(repo_path)

        # Pull files from LFS
        paths = set()
        for path, src, _ in files:
            if src.is_dir():
                continue
            if src.is_symlink():
                try:
                    path = str(src.resolve().relative_to(repo_path))
                except ValueError:  # External file
                    pass
            paths.add(path)
        self._fetch_lfs_files(repo_path, paths)

        # Fetch metadata from Renku if any
        paths = {f[0] for f in files}
        metadata = self._fetch_files_metadata(remote_client, paths)

        for path, src, dst in files:
            if not src.is_dir():
                # Use original metadata if it exists
                based_on = metadata.get(path)
                if based_on:
                    based_on.url = url
                    based_on.based_on = None
                    creators = based_on.creator
                else:
                    creators = []
                    # grab all the creators from the commit history
                    for commit in repo.iter_commits(paths=path):
                        creator = Person.from_commit(commit)
                        if creator not in creators:
                            creators.append(creator)

                    based_on = DatasetFile.from_revision(remote_client,
                                                         path=path,
                                                         url=url,
                                                         creator=creators)

                path_in_dst_repo = dst.relative_to(self.path)

                if remote_client._is_external_file(src):
                    operation = (src.resolve(), dst, 'symlink')
                else:
                    operation = (src, dst, 'copy')

                results.append({
                    'path': path_in_dst_repo,
                    'url': remove_credentials(url),
                    'creator': creators,
                    'parent': self,
                    'based_on': based_on,
                    'operation': operation
                })

        return results
Esempio n. 26
0
    def _add_from_git(self, dataset, url, sources, destination, ref):
        """Process adding resources from another git repository."""
        from renku import LocalClient

        u = parse.urlparse(url)

        sources = self._resolve_paths(u.path, sources)

        # Get all files from repo that match sources
        repo, repo_path = self._prepare_git_repo(url, ref)
        copied_sources = set()
        files = set()
        for file in repo.head.commit.tree.traverse():
            path = file.path
            result = self._get_src_and_dst(path, repo_path, sources,
                                           destination)

            if result:
                files.add(result)
                source = result[3]
                copied_sources.add(source)

        uncopied_sources = sources - copied_sources
        if uncopied_sources:
            uncopied_sources = {str(s) for s in uncopied_sources}
            raise errors.ParameterError('No such file or directory',
                                        param_hint=uncopied_sources)

        # Create metadata and move files to dataset
        results = []
        remote_client = LocalClient(repo_path)

        # Pull files from LFS
        paths = set()
        for path, src, _, __ in files:
            if src.is_dir():
                continue
            if src.is_symlink():
                path = str(src.resolve().relative_to(repo_path))
            paths.add(path)
        self._fetch_lfs_files(repo_path, paths)

        # Fetch metadata from Renku if any
        paths = {f[0] for f in files}
        metadata = self._fetch_files_metadata(remote_client, paths)

        for path, src, dst, _ in files:
            if not src.is_dir():
                # Use original metadata if it exists
                based_on = metadata.get(path)
                if based_on:
                    based_on.url = url
                    based_on.based_on = None
                    creators = based_on.creator
                else:
                    creators = []
                    # grab all the creators from the commit history
                    for commit in repo.iter_commits(paths=path):
                        creator = Person.from_commit(commit)
                        if creator not in creators:
                            creators.append(creator)

                    based_on = DatasetFile.from_revision(remote_client,
                                                         path=path,
                                                         url=url)

                path_in_dst_repo = dst.relative_to(self.path)

                results.append({
                    'path': path_in_dst_repo,
                    'url': remove_credentials(url),
                    'creator': creators,
                    'parent': self,
                    'based_on': based_on
                })

                dst.parent.mkdir(parents=True, exist_ok=True)
                shutil.copy(str(src), str(dst))

        return results
Esempio n. 27
0
        assert os.path.basename(os.path.dirname(
            dataset.files[0].path)) == 'dir2'


def test_git_repo_import(client, dataset, tmpdir, data_repository):
    """Test an import from a git repository."""
    # add data from local repo
    client.add_data_to_dataset(
        dataset,
        [os.path.join(os.path.dirname(data_repository.git_dir), 'dir2')])
    assert os.stat('data/dataset/dir2/file2')
    assert dataset.files[0].path.endswith('dir2/file2')


@pytest.mark.parametrize('creators', [
    [Person(name='me', email='*****@*****.**')],
    [{
        'name': 'me',
        'email': '*****@*****.**',
    }],
])
def test_creator_parse(creators, data_file):
    """Test that different options for specifying creators work."""
    f = DatasetFile(path='file', creator=creators)
    creator = Person(name='me', email='*****@*****.**')
    assert creator in f.creator

    # email check
    with pytest.raises(ValueError):
        Person(name='me', email='meexample.com')
Esempio n. 28
0

def test_git_repo_import(client, dataset, tmpdir, data_repository):
    """Test an import from a git repository."""
    # add data from local repo
    client.add_data_to_dataset(dataset, [os.path.join(os.path.dirname(data_repository.git_dir), "dir2")])
    path = os.path.join(DATA_DIR, "dataset", "dir2", "file2")
    assert os.stat(path)
    path = os.path.join("dir2", "file2")
    assert dataset.files[0].path.endswith(path)


@pytest.mark.parametrize(
    "creators",
    [
        [Person(name="me", email="*****@*****.**")],
        [{"http://schema.org/name": "me", "http://schema.org/email": "*****@*****.**",}],
    ],
)
def test_creator_parse(creators, data_file):
    """Test that different options for specifying creators work."""
    dataset = Dataset(name="dataset", creators=creators)
    creator = Person(name="me", email="*****@*****.**")
    assert creator in dataset.creators

    # email check
    with pytest.raises(ValueError):
        Person(name="me", email="meexample.com")

    # creators must be a set or list of dicts or Person
    with pytest.raises(ValueError):