def test_dataset_and_parent_ids(self, client, dataset, with_boto3): version1 = dataset.create_version(Path("conftest.py")) version2 = dataset.create_version( S3("s3://verta-starter/census-train.csv")) assert version1.dataset_id == version2.dataset_id == dataset.id assert version2.parent_id == version1.id
def test_create_get_s3(self, client, dataset, with_boto3): pytest.importorskip("boto3") dataset_version = dataset.create_version( S3(["s3://verta-starter/", "s3://verta-starter/census-test.csv"])) assert dataset_version.id == dataset.get_version( id=dataset_version.id).id
def test_create_get_s3(self, client, created_datasets, with_boto3): pytest.importorskip("boto3") name = verta._internal_utils._utils.generate_default_name() dataset = client.set_dataset(name) created_datasets.append(dataset) dataset_version = dataset.create_version( S3(["s3://verta-starter/", "s3://verta-starter/census-test.csv"])) assert dataset_version.id == dataset.get_version( id=dataset_version.id).id
def test_dataset_and_parent_ids(self, client, created_entities, with_boto3): dataset = client.create_dataset() created_entities.append(dataset) version1 = dataset.create_version(Path("conftest.py")) version2 = dataset.create_version( S3("s3://verta-starter/census-train.csv")) assert version1.dataset_id == version2.dataset_id == dataset.id assert version2.parent_id == version1.id
def test_s3(self, client, dataset): s3 = pytest.importorskip("boto3").client('s3') filename = "tiny1.bin" bucket = "verta-versioned-bucket" key = "tiny-files/{}".format(filename) s3_key = "s3://{}/{}".format(bucket, key) # get file contents directly from S3 for reference s3.download_file(bucket, key, filename) with open(filename, 'rb') as f: FILE_CONTENTS = f.read() os.remove(filename) content = S3(s3_key, enable_mdb_versioning=True) version = dataset.create_version(content) downloaded_filename = version.get_content().download(s3_key) with open(downloaded_filename, 'rb') as f: assert f.read() == FILE_CONTENTS
def test_log_get(self, client, experiment_run, created_datasets, strs, with_boto3): """Tests ExperimentRun.log_dataset_version() and ExperimentRun.get_dataset_version().""" key1, key2 = strs[:2] dataset = client.create_dataset() version1 = dataset.create_version(Path("conftest.py")) version2 = dataset.create_version( S3("s3://verta-starter/census-train.csv")) experiment_run.log_dataset_version(key1, version1) experiment_run.log_dataset_version(key2, version2) for key, version in [(key1, version1), (key2, version2)]: retrieved_version = experiment_run.get_dataset_version(key) assert retrieved_version.id == version.id retrieved_components = sorted( retrieved_version.get_content().list_components(), key=lambda component: component.path) components = sorted(version.get_content().list_components(), key=lambda component: component.path) assert retrieved_components == components