Esempio n. 1
0
    def test_find(self, client, created_datasets,
                  strs):  # essentially copied from test_dataset.py
        tag1, tag2, tag3 = strs[:3]
        dataset = client.create_dataset()
        created_datasets.append(dataset)

        version1 = dataset.create_version(Path("conftest.py"), tags=[tag1])
        version2 = dataset.create_version(Path("conftest.py"), tags=[tag1])
        version3 = dataset.create_version(Path("conftest.py"),
                                          tags=[tag1, tag2])
        version4 = dataset.create_version(Path("conftest.py"),
                                          tags=[tag2, tag3])

        versions = dataset.versions.find("tags ~= {}".format(tag1))
        assert len(versions) == 3
        assert set(version.id for version in versions) == {
            version1.id, version2.id, version3.id
        }

        versions = dataset.versions.find("tags ~= {}".format(tag2))
        assert len(versions) == 2
        assert set(version.id
                   for version in versions) == {version3.id, version4.id}

        versions = dataset.versions.find("tags ~= {}".format(tag3))
        assert len(versions) == 1
        assert set(version.id for version in versions) == {version4.id}
Esempio n. 2
0
    def test_run_log_dataset_version(self, client_2, client_3, organization,
                                     created_entities):
        """Log someone else's dataset version to my run."""
        organization.add_member(client_2._conn.email)
        organization.add_member(client_3._conn.email)
        client_2.set_workspace(organization.name)
        client_3.set_workspace(organization.name)

        created_entities.append(client_2.create_project())
        run = client_2.create_experiment_run()

        # private dataset version
        dataset = client_3.create_dataset(visibility=Private())
        created_entities.append(dataset)
        dataver = dataset.create_version(Path(__file__))
        with pytest.raises(requests.HTTPError,
                           match="Access Denied|Forbidden"):
            run.log_dataset_version("train", dataver)

        # org dataset version
        dataset = client_3.create_dataset()
        created_entities.append(dataset)
        dataver = dataset.create_version(Path(__file__))
        run.log_dataset_version("train", dataver)
        assert run.get_dataset_version("train").id == dataver.id
Esempio n. 3
0
    def test_create_get_path(self, client, dataset):
        dataset_version = dataset.create_version(Path(["modelapi_hypothesis/"
                                                       ]))
        dataset_version2 = dataset.create_version(
            Path(["modelapi_hypothesis/api_generator.py"]))

        assert dataset_version.id == client.get_dataset_version(
            dataset_version.id).id
        assert dataset_version2.id == client.get_dataset_version(
            id=dataset_version2.id).id
        assert dataset_version2.id == dataset.get_latest_version().id
Esempio n. 4
0
    def test_create_get_path(self, client, created_datasets):
        name = verta._internal_utils._utils.generate_default_name()
        dataset = client.set_dataset(name)
        created_datasets.append(dataset)
        dataset_version = dataset.create_version(Path(["modelapi_hypothesis/"
                                                       ]))
        dataset_version2 = dataset.create_version(
            Path(["modelapi_hypothesis/api_generator.py"]))

        assert dataset_version.id == client.get_dataset_version(
            dataset_version.id).id
        assert dataset_version2.id == client.get_dataset_version(
            id=dataset_version2.id).id
        assert dataset_version2.id == dataset.get_latest_version().id
Esempio n. 5
0
    def test_attributes(self, client, created_datasets):
        Attr = collections.namedtuple('Attr', ['key', 'value'])
        attr1 = Attr('key1', {'a': 1})
        attr2 = Attr('key2', ['a', 1])
        attr3 = Attr('key3', 'a')
        attr4 = Attr('key4', 1)
        dataset = client.create_dataset()
        created_datasets.append(dataset)

        version = dataset.create_version(Path("conftest.py"),
                                         attrs=dict([attr1]))
        assert version.get_attributes() == dict([attr1])

        version.add_attribute(*attr2)
        assert version.get_attributes() == dict([attr1, attr2])

        version.add_attributes(dict([attr3, attr4]))
        assert version.get_attributes() == dict([attr1, attr2, attr3, attr4])

        version.del_attribute(attr3.key)
        version.del_attribute(attr3.key)  # no error if nonexistent
        assert version.get_attributes() == dict([attr1, attr2, attr4])

        assert dataset.get_version(id=version.id).get_attributes() == dict(
            [attr1, attr2, attr4])

        for attr in [attr1, attr2, attr4]:
            assert version.get_attribute(attr.key) == attr.value

        # overwrite
        new_val = 'b'
        dataset.add_attribute(attr1.key, new_val)
        assert dataset.get_attribute(attr1.key) == new_val
Esempio n. 6
0
    def test_dataset_and_parent_ids(self, client, dataset, with_boto3):
        version1 = dataset.create_version(Path("conftest.py"))
        version2 = dataset.create_version(
            S3("s3://verta-starter/census-train.csv"))

        assert version1.dataset_id == version2.dataset_id == dataset.id
        assert version2.parent_id == version1.id
Esempio n. 7
0
    def test_get_latest_printing(self, client, dataset, capsys):
        dataset_version = dataset.create_version(Path(["modelapi_hypothesis/"
                                                       ]))
        dataset.get_latest_version()

        captured = capsys.readouterr()
        assert "got existing dataset version: {}".format(
            dataset_version.id) in captured.out
Esempio n. 8
0
    def test_creation_updates_dataset_timestamp(self, client, dataset):
        """Version creation should update its dataset's time_updated field."""
        time_updated = dataset._msg.time_updated

        dataset_version = dataset.create_version(Path(["conftest.py"]))

        time.sleep(60)  # wait for reconciler
        dataset._fetch_with_no_cache()
        assert dataset._msg.time_updated > time_updated
        assert dataset._msg.time_updated == dataset_version._msg.time_logged
Esempio n. 9
0
    def test_dataset_versions(self, client, organization, created_entities):
        dataset = client.create_dataset(workspace=organization.name)
        created_entities.append(dataset)

        version_ids = {
            dataset.create_version(Path(filename)).id
            for filename in os.listdir(".")[:2]
        }

        assert set(version.id for version in dataset.versions) == version_ids
    def test_dataset_and_parent_ids(self, client, created_entities,
                                    with_boto3):
        dataset = client.create_dataset()
        created_entities.append(dataset)

        version1 = dataset.create_version(Path("conftest.py"))
        version2 = dataset.create_version(
            S3("s3://verta-starter/census-train.csv"))

        assert version1.dataset_id == version2.dataset_id == dataset.id
        assert version2.parent_id == version1.id
Esempio n. 11
0
    def test_description(self, client, dataset, strs):
        first_desc, second_desc = strs[:2]

        version = dataset.create_version(Path("conftest.py"), desc=first_desc)
        assert version.get_description() == first_desc

        version.set_description(second_desc)
        assert version.get_description() == second_desc

        assert dataset.get_version(
            id=version.id).get_description() == second_desc
Esempio n. 12
0
    def test_path(self, client, dataset):
        filename = "tiny1.bin"
        FILE_CONTENTS = os.urandom(2**16)
        with open(filename, 'wb') as f:
            f.write(FILE_CONTENTS)

        content = Path(filename, enable_mdb_versioning=True)
        version = dataset.create_version(content)

        downloaded_filename = version.get_content().download(filename)
        with open(downloaded_filename, 'rb') as f:
            assert f.read() == FILE_CONTENTS
Esempio n. 13
0
    def test_get_latest_printing(self, client, created_datasets, capsys):
        name = verta._internal_utils._utils.generate_default_name()
        dataset = client.set_dataset(name)
        created_datasets.append(dataset)

        dataset_version = dataset.create_version(Path(["modelapi_hypothesis/"
                                                       ]))
        dataset.get_latest_version()

        captured = capsys.readouterr()
        assert "got existing dataset version: {}".format(
            dataset_version.id) in captured.out
Esempio n. 14
0
    def test_description(self, client, created_datasets, strs):
        first_desc, second_desc = strs[:2]
        dataset = client.create_dataset()
        created_datasets.append(dataset)

        version = dataset.create_version(Path("conftest.py"), desc=first_desc)
        assert version.get_description() == first_desc

        version.set_description(second_desc)
        assert version.get_description() == second_desc

        assert dataset.get_version(
            id=version.id).get_description() == second_desc
Esempio n. 15
0
    def test_tags(self, client, dataset, strs):
        tag1, tag2, tag3, tag4 = strs[:4]

        version = dataset.create_version(Path("conftest.py"), tags=[tag1])
        assert set(version.get_tags()) == {tag1}

        version.add_tag(tag2)
        assert set(version.get_tags()) == {tag1, tag2}

        version.add_tags([tag3, tag4])
        assert set(version.get_tags()) == {tag1, tag2, tag3, tag4}

        version.del_tag(tag3)
        version.del_tag(tag3)  # no error if nonexistent
        assert set(version.get_tags()) == {tag1, tag2, tag4}

        assert set(dataset.get_version(id=version.id).get_tags()) == {
            tag1, tag2, tag4
        }
Esempio n. 16
0
    def test_repr(self, client,
                  dataset):  # essentially copied from test_dataset.py
        description = "this is a cool version"
        tags = [u"tag1", u"tag2"]
        version = dataset.create_version(Path("conftest.py"),
                                         desc=description,
                                         tags=tags)

        str_repr = repr(version)

        assert "version: {}".format(version.version) in str_repr
        assert "id: {}".format(version.id) in str_repr
        assert "time created" in str_repr
        assert "time updated" in str_repr
        assert "description: {}".format(description) in str_repr
        assert "tags: {}".format(tags) in str_repr

        # check content
        content = version.get_content()
        for repr_line in repr(content).splitlines():
            assert repr_line in str_repr.split("content:")[-1]
Esempio n. 17
0
    def test_log_get(self, client, experiment_run, created_datasets, strs,
                     with_boto3):
        """Tests ExperimentRun.log_dataset_version() and ExperimentRun.get_dataset_version()."""
        key1, key2 = strs[:2]
        dataset = client.create_dataset()
        version1 = dataset.create_version(Path("conftest.py"))
        version2 = dataset.create_version(
            S3("s3://verta-starter/census-train.csv"))

        experiment_run.log_dataset_version(key1, version1)
        experiment_run.log_dataset_version(key2, version2)

        for key, version in [(key1, version1), (key2, version2)]:
            retrieved_version = experiment_run.get_dataset_version(key)
            assert retrieved_version.id == version.id

            retrieved_components = sorted(
                retrieved_version.get_content().list_components(),
                key=lambda component: component.path)
            components = sorted(version.get_content().list_components(),
                                key=lambda component: component.path)
            assert retrieved_components == components