Example #1
0
    def test_dataset_and_parent_ids(self, client, dataset, with_boto3):
        version1 = dataset.create_version(Path("conftest.py"))
        version2 = dataset.create_version(
            S3("s3://verta-starter/census-train.csv"))

        assert version1.dataset_id == version2.dataset_id == dataset.id
        assert version2.parent_id == version1.id
Example #2
0
    def test_create_get_s3(self, client, dataset, with_boto3):
        pytest.importorskip("boto3")

        dataset_version = dataset.create_version(
            S3(["s3://verta-starter/", "s3://verta-starter/census-test.csv"]))

        assert dataset_version.id == dataset.get_version(
            id=dataset_version.id).id
Example #3
0
    def test_create_get_s3(self, client, created_datasets, with_boto3):
        pytest.importorskip("boto3")

        name = verta._internal_utils._utils.generate_default_name()
        dataset = client.set_dataset(name)
        created_datasets.append(dataset)
        dataset_version = dataset.create_version(
            S3(["s3://verta-starter/", "s3://verta-starter/census-test.csv"]))

        assert dataset_version.id == dataset.get_version(
            id=dataset_version.id).id
    def test_dataset_and_parent_ids(self, client, created_entities,
                                    with_boto3):
        dataset = client.create_dataset()
        created_entities.append(dataset)

        version1 = dataset.create_version(Path("conftest.py"))
        version2 = dataset.create_version(
            S3("s3://verta-starter/census-train.csv"))

        assert version1.dataset_id == version2.dataset_id == dataset.id
        assert version2.parent_id == version1.id
Example #5
0
    def test_s3(self, client, dataset):
        s3 = pytest.importorskip("boto3").client('s3')

        filename = "tiny1.bin"
        bucket = "verta-versioned-bucket"
        key = "tiny-files/{}".format(filename)
        s3_key = "s3://{}/{}".format(bucket, key)

        # get file contents directly from S3 for reference
        s3.download_file(bucket, key, filename)
        with open(filename, 'rb') as f:
            FILE_CONTENTS = f.read()
        os.remove(filename)

        content = S3(s3_key, enable_mdb_versioning=True)
        version = dataset.create_version(content)

        downloaded_filename = version.get_content().download(s3_key)
        with open(downloaded_filename, 'rb') as f:
            assert f.read() == FILE_CONTENTS
Example #6
0
    def test_log_get(self, client, experiment_run, created_datasets, strs,
                     with_boto3):
        """Tests ExperimentRun.log_dataset_version() and ExperimentRun.get_dataset_version()."""
        key1, key2 = strs[:2]
        dataset = client.create_dataset()
        version1 = dataset.create_version(Path("conftest.py"))
        version2 = dataset.create_version(
            S3("s3://verta-starter/census-train.csv"))

        experiment_run.log_dataset_version(key1, version1)
        experiment_run.log_dataset_version(key2, version2)

        for key, version in [(key1, version1), (key2, version2)]:
            retrieved_version = experiment_run.get_dataset_version(key)
            assert retrieved_version.id == version.id

            retrieved_components = sorted(
                retrieved_version.get_content().list_components(),
                key=lambda component: component.path)
            components = sorted(version.get_content().list_components(),
                                key=lambda component: component.path)
            assert retrieved_components == components