def test_generating_signed_url_from_dataset_and_recreating_dataset_from_it( self): """Test that a signed URL can be generated for a dataset that can be used to recreate/get it, its metadata, and all its files. """ with tempfile.TemporaryDirectory() as temporary_directory: dataset_local_path = os.path.join(temporary_directory, "my-dataset-to-sign") with Datafile(path=os.path.join(dataset_local_path, "my-file.dat"), mode="w") as (datafile, f): f.write("hello") datafile.tags = {"my": "metadata"} dataset = Dataset(path=dataset_local_path, tags={"hello": "world"}) dataset.upload( storage.path.generate_gs_path(TEST_BUCKET_NAME, "my-dataset-to-sign")) with patch("google.cloud.storage.blob.Blob.generate_signed_url", new=mock_generate_signed_url): signed_url = dataset.generate_signed_url() downloaded_dataset = Dataset(path=signed_url) self.assertEqual(downloaded_dataset.tags, {"hello": "world"}) with downloaded_dataset.files.one() as (downloaded_datafile, f): self.assertEqual(f.read(), "hello") self.assertEqual(downloaded_datafile.name, "my-file.dat") self.assertEqual(downloaded_datafile.extension, "dat")
def test_upload_with_nested_dataset_preserves_nested_structure(self): """Test that uploading a dataset containing datafiles in a nested directory structure to the cloud preserves this structure in the cloud. """ with tempfile.TemporaryDirectory() as temporary_directory: local_paths = self._create_files_and_nested_subdirectories( temporary_directory) dataset = Dataset(path=temporary_directory, recursive=True) upload_path = storage.path.generate_gs_path( TEST_BUCKET_NAME, "my-dataset") dataset.upload(cloud_path=upload_path) cloud_datafile_relative_paths = { blob.name.split(dataset.name)[-1].strip("/") for blob in GoogleCloudStorageClient().scandir( upload_path, filter=lambda blob: not blob.name.endswith(".octue") and SIGNED_METADATA_DIRECTORY not in blob.name, ) } # Check that the paths relative to the dataset directory are the same in the cloud as they are locally. local_datafile_relative_paths = { path.split(temporary_directory)[-1].strip(os.path.sep).replace( os.path.sep, "/") for path in local_paths } self.assertEqual(cloud_datafile_relative_paths, local_datafile_relative_paths)
def test_upload_works_with_implicit_cloud_location_if_cloud_location_previously_provided( self): """Test `Dataset.to_cloud` works with an implicit cloud location if the cloud location has previously been provided. """ dataset_path = self._create_nested_cloud_dataset() dataset = Dataset(path=dataset_path, recursive=True) dataset.upload()