Esempio n. 1
0
    def test_generating_signed_url_from_dataset_and_recreating_dataset_from_it(
            self):
        """Test that a signed URL can be generated for a dataset that can be used to recreate/get it, its metadata, and
        all its files.
        """
        with tempfile.TemporaryDirectory() as temporary_directory:
            dataset_local_path = os.path.join(temporary_directory,
                                              "my-dataset-to-sign")

            with Datafile(path=os.path.join(dataset_local_path, "my-file.dat"),
                          mode="w") as (datafile, f):
                f.write("hello")
                datafile.tags = {"my": "metadata"}

            dataset = Dataset(path=dataset_local_path, tags={"hello": "world"})
            dataset.upload(
                storage.path.generate_gs_path(TEST_BUCKET_NAME,
                                              "my-dataset-to-sign"))

        with patch("google.cloud.storage.blob.Blob.generate_signed_url",
                   new=mock_generate_signed_url):
            signed_url = dataset.generate_signed_url()

        downloaded_dataset = Dataset(path=signed_url)
        self.assertEqual(downloaded_dataset.tags, {"hello": "world"})

        with downloaded_dataset.files.one() as (downloaded_datafile, f):
            self.assertEqual(f.read(), "hello")

        self.assertEqual(downloaded_datafile.name, "my-file.dat")
        self.assertEqual(downloaded_datafile.extension, "dat")
Esempio n. 2
0
    def test_upload_with_nested_dataset_preserves_nested_structure(self):
        """Test that uploading a dataset containing datafiles in a nested directory structure to the cloud preserves
        this structure in the cloud.
        """
        with tempfile.TemporaryDirectory() as temporary_directory:
            local_paths = self._create_files_and_nested_subdirectories(
                temporary_directory)
            dataset = Dataset(path=temporary_directory, recursive=True)

            upload_path = storage.path.generate_gs_path(
                TEST_BUCKET_NAME, "my-dataset")
            dataset.upload(cloud_path=upload_path)

        cloud_datafile_relative_paths = {
            blob.name.split(dataset.name)[-1].strip("/")
            for blob in GoogleCloudStorageClient().scandir(
                upload_path,
                filter=lambda blob: not blob.name.endswith(".octue") and
                SIGNED_METADATA_DIRECTORY not in blob.name,
            )
        }

        # Check that the paths relative to the dataset directory are the same in the cloud as they are locally.
        local_datafile_relative_paths = {
            path.split(temporary_directory)[-1].strip(os.path.sep).replace(
                os.path.sep, "/")
            for path in local_paths
        }

        self.assertEqual(cloud_datafile_relative_paths,
                         local_datafile_relative_paths)
Esempio n. 3
0
 def test_upload_works_with_implicit_cloud_location_if_cloud_location_previously_provided(
         self):
     """Test `Dataset.to_cloud` works with an implicit cloud location if the cloud location has previously been
     provided.
     """
     dataset_path = self._create_nested_cloud_dataset()
     dataset = Dataset(path=dataset_path, recursive=True)
     dataset.upload()