Beispiel #1
0
    def test_generating_signed_url_from_dataset_and_recreating_dataset_from_it(
            self):
        """Test that a signed URL can be generated for a dataset that can be used to recreate/get it, its metadata, and
        all its files.
        """
        with tempfile.TemporaryDirectory() as temporary_directory:
            dataset_local_path = os.path.join(temporary_directory,
                                              "my-dataset-to-sign")

            with Datafile(path=os.path.join(dataset_local_path, "my-file.dat"),
                          mode="w") as (datafile, f):
                f.write("hello")
                datafile.tags = {"my": "metadata"}

            dataset = Dataset(path=dataset_local_path, tags={"hello": "world"})
            dataset.upload(
                storage.path.generate_gs_path(TEST_BUCKET_NAME,
                                              "my-dataset-to-sign"))

        with patch("google.cloud.storage.blob.Blob.generate_signed_url",
                   new=mock_generate_signed_url):
            signed_url = dataset.generate_signed_url()

        downloaded_dataset = Dataset(path=signed_url)
        self.assertEqual(downloaded_dataset.tags, {"hello": "world"})

        with downloaded_dataset.files.one() as (downloaded_datafile, f):
            self.assertEqual(f.read(), "hello")

        self.assertEqual(downloaded_datafile.name, "my-file.dat")
        self.assertEqual(downloaded_datafile.extension, "dat")
    def test_serialisation_and_deserialisation(self):
        """Test that manifests can be serialised and deserialised."""
        with tempfile.TemporaryDirectory() as temporary_directory:
            datasets = {
                "my_dataset_0":
                Dataset(
                    path=os.path.join(temporary_directory, "my_dataset_0"),
                    files=[
                        Datafile(
                            path=os.path.join(temporary_directory,
                                              "my_dataset_0", "my_file_0.txt"))
                    ],
                ),
                "my_dataset_1":
                Dataset(
                    path=os.path.join(temporary_directory, "my_dataset_1"),
                    files=[
                        Datafile(
                            path=os.path.join(temporary_directory,
                                              "my_dataset_1", "my_file_1.txt"))
                    ],
                ),
            }

            for dataset in datasets.values():
                dataset.update_local_metadata()

            manifest = Manifest(datasets=datasets,
                                id="7e0025cd-bd68-4de6-b48d-2643ebd5effd",
                                name="my-manifest")

            serialised_manifest = manifest.to_primitive()

            self.assertEqual(
                serialised_manifest,
                {
                    "id": manifest.id,
                    "name": "my-manifest",
                    "datasets": {
                        "my_dataset_0":
                        os.path.join(temporary_directory, "my_dataset_0"),
                        "my_dataset_1":
                        os.path.join(temporary_directory, "my_dataset_1"),
                    },
                },
            )

            deserialised_manifest = Manifest.deserialise(serialised_manifest)

        self.assertEqual(manifest.name, deserialised_manifest.name)
        self.assertEqual(manifest.id, deserialised_manifest.id)

        for key in manifest.datasets.keys():
            self.assertEqual(manifest.datasets[key].name,
                             deserialised_manifest.datasets[key].name)
            self.assertEqual(manifest.datasets[key].id,
                             deserialised_manifest.datasets[key].id)
            self.assertEqual(manifest.datasets[key].path,
                             deserialised_manifest.datasets[key].path)
Beispiel #3
0
 def test_metadata_hash_is_same_for_different_datasets_with_the_same_metadata(
         self):
     """Test that the metadata hash is the same for datasets with different files but the same metadata."""
     first_dataset = Dataset(labels={"a", "b", "c"})
     second_dataset = Dataset(
         files={Datafile(path="blah", hypothetical=True)},
         labels={"a", "b", "c"})
     self.assertEqual(first_dataset.metadata_hash_value,
                      second_dataset.metadata_hash_value)
Beispiel #4
0
    def test_exists_in_cloud(self):
        """Test whether all files of a dataset are in the cloud or not can be determined."""
        self.assertFalse(self.create_valid_dataset().all_files_are_in_cloud)

        with tempfile.TemporaryDirectory() as temporary_directory:
            self.assertTrue(
                Dataset(path=temporary_directory).all_files_are_in_cloud)

        files = [
            Datafile(path="gs://hello/file.txt"),
            Datafile(path="gs://goodbye/file.csv")
        ]
        self.assertTrue(Dataset(files=files).all_files_are_in_cloud)
Beispiel #5
0
    def test_exiting_context_manager_of_cloud_dataset_updates_cloud_metadata(
            self):
        """Test that cloud metadata for a cloud dataset is updated on exit of the dataset context manager."""
        dataset_path = self._create_nested_cloud_dataset()
        dataset = Dataset(path=dataset_path, recursive=True)

        with dataset:
            dataset.tags = {"cat": "dog"}
            dataset.labels = {"animals"}

        reloaded_dataset = Dataset(path=dataset_path)
        self.assertEqual(reloaded_dataset.id, dataset.id)
        self.assertEqual(reloaded_dataset.tags, {"cat": "dog"})
        self.assertEqual(reloaded_dataset.labels, {"animals"})
Beispiel #6
0
    def test_update_metadata_with_cloud_dataset(self):
        """Test the `update_metadata` method with a cloud dataset."""
        dataset_path = self._create_nested_cloud_dataset()
        dataset = Dataset(path=dataset_path)

        # Update the instance metadata but don't update the cloud stored metadata.
        dataset.tags["hello"] = "world"

        # Check the instance metadata hasn't been stored in the cloud.
        self.assertEqual(Dataset(path=dataset.path).tags, {})

        # Update the cloud stored metadata and check it.
        dataset.update_metadata()
        self.assertEqual(Dataset(path=dataset.path).tags, {"hello": "world"})
Beispiel #7
0
    def test_stored_metadata_ignored_if_hypothetical_is_true(self):
        """Test that instantiation metadata is used instead of stored metadata if `hypothetical` is `True`."""
        cloud_path = storage.path.generate_gs_path(TEST_BUCKET_NAME,
                                                   "existing_dataset")

        # Create a dataset in the cloud and set some metadata on it.
        with Dataset(path=cloud_path) as dataset:
            dataset.tags = {"existing": True}

        # Load it separately from the cloud object and check that the instantiation metadata is used instead of the
        # stored metadata.
        reloaded_datafile = Dataset(path=cloud_path,
                                    tags={"new": "tag"},
                                    hypothetical=True)
        self.assertEqual(reloaded_datafile.tags, {"new": "tag"})
Beispiel #8
0
    def test_update_metadata_with_local_dataset(self):
        """Test the `update_metadata` method with a local dataset."""
        with tempfile.TemporaryDirectory() as temporary_directory:
            dataset = Dataset(path=temporary_directory)

            # Update the instance metadata but don't update the local stored metadata.
            dataset.tags["hello"] = "world"

            # Check the instance metadata hasn't been stored locally.
            self.assertEqual(Dataset(path=temporary_directory).tags, {})

            # Update the local stored metadata and check it.
            dataset.update_metadata()
            self.assertEqual(
                Dataset(path=temporary_directory).tags, {"hello": "world"})
Beispiel #9
0
    def test_download(self):
        """Test that all files in a dataset can be downloaded with one command."""
        storage_client = GoogleCloudStorageClient()

        dataset_name = "another-dataset"
        storage_client.upload_from_string(
            string=json.dumps([1, 2, 3]),
            cloud_path=storage.path.generate_gs_path(TEST_BUCKET_NAME,
                                                     dataset_name,
                                                     "file_0.txt"),
        )
        storage_client.upload_from_string(
            string=json.dumps([4, 5, 6]),
            cloud_path=storage.path.generate_gs_path(TEST_BUCKET_NAME,
                                                     dataset_name,
                                                     "file_1.txt"),
        )

        dataset = Dataset(path=f"gs://{TEST_BUCKET_NAME}/{dataset_name}")

        with tempfile.TemporaryDirectory() as temporary_directory:
            dataset.download(local_directory=temporary_directory)

            with open(os.path.join(temporary_directory, "file_0.txt")) as f:
                self.assertEqual(f.read(), "[1, 2, 3]")

            with open(os.path.join(temporary_directory, "file_1.txt")) as f:
                self.assertEqual(f.read(), "[4, 5, 6]")
Beispiel #10
0
    def test_upload_with_nested_dataset_preserves_nested_structure(self):
        """Test that uploading a dataset containing datafiles in a nested directory structure to the cloud preserves
        this structure in the cloud.
        """
        with tempfile.TemporaryDirectory() as temporary_directory:
            local_paths = self._create_files_and_nested_subdirectories(
                temporary_directory)
            dataset = Dataset(path=temporary_directory, recursive=True)

            upload_path = storage.path.generate_gs_path(
                TEST_BUCKET_NAME, "my-dataset")
            dataset.upload(cloud_path=upload_path)

        cloud_datafile_relative_paths = {
            blob.name.split(dataset.name)[-1].strip("/")
            for blob in GoogleCloudStorageClient().scandir(
                upload_path,
                filter=lambda blob: not blob.name.endswith(".octue") and
                SIGNED_METADATA_DIRECTORY not in blob.name,
            )
        }

        # Check that the paths relative to the dataset directory are the same in the cloud as they are locally.
        local_datafile_relative_paths = {
            path.split(temporary_directory)[-1].strip(os.path.sep).replace(
                os.path.sep, "/")
            for path in local_paths
        }

        self.assertEqual(cloud_datafile_relative_paths,
                         local_datafile_relative_paths)
Beispiel #11
0
    def test_update_cloud_metadata(self):
        """Test that metadata for a cloud dataset can be stored in the cloud and used on re-instantiation of the same
        dataset.
        """
        dataset_path = self._create_nested_cloud_dataset()
        dataset = Dataset(path=dataset_path)
        self.assertEqual(dataset.tags, {})

        dataset.tags = {"some": "tags"}
        dataset.update_cloud_metadata()

        dataset_reloaded = Dataset(path=dataset_path)
        self.assertEqual(dataset.id, dataset_reloaded.id)
        self.assertEqual(dataset.tags, dataset_reloaded.tags)
        self.assertEqual(dataset.labels, dataset_reloaded.labels)
        self.assertEqual(dataset.hash_value, dataset_reloaded.hash_value)
Beispiel #12
0
    def test_from_cloud(self):
        """Test that a Dataset in cloud storage can be accessed via a cloud path."""
        with tempfile.TemporaryDirectory() as temporary_directory:
            dataset = create_dataset_with_two_files(temporary_directory)
            dataset.tags = {"a": "b", "c": 1}

            cloud_path = storage.path.generate_gs_path(TEST_BUCKET_NAME,
                                                       "a_directory",
                                                       dataset.name)
            dataset.upload(cloud_path)
            persisted_dataset = Dataset(path=cloud_path)

            self.assertEqual(
                persisted_dataset.path,
                f"gs://{TEST_BUCKET_NAME}/a_directory/{dataset.name}")
            self.assertEqual(persisted_dataset.id, dataset.id)
            self.assertEqual(persisted_dataset.name, dataset.name)
            self.assertEqual(persisted_dataset.hash_value, dataset.hash_value)
            self.assertEqual(persisted_dataset.tags, dataset.tags)
            self.assertEqual(persisted_dataset.labels, dataset.labels)
            self.assertEqual({file.name
                              for file in persisted_dataset.files},
                             {file.name
                              for file in dataset.files})

            for file in persisted_dataset:
                self.assertEqual(
                    file.cloud_path,
                    f"gs://{TEST_BUCKET_NAME}/a_directory/{dataset.name}/{file.name}"
                )
Beispiel #13
0
    def test_get_file_by_label(self):
        """Ensure files can be accessed by label from the dataset."""
        files = [
            Datafile(path="path-within-dataset/a_my_file.csv",
                     labels="one a b3 all"),
            Datafile(path="path-within-dataset/a_your_file.csv",
                     labels="two a2 b3 all"),
            Datafile(path="path-within-dataset/a_your_file.csv",
                     labels="three all"),
        ]

        resource = Dataset(files=files)

        # Check working for single result
        self.assertEqual(
            resource.get_file_by_label("three").labels, files[2].labels)

        # Check raises for too many results
        with self.assertRaises(
                exceptions.UnexpectedNumberOfResultsException) as e:
            resource.get_file_by_label("all")

        self.assertIn("More than one result found", e.exception.args[0])

        # Check raises for no result
        with self.assertRaises(
                exceptions.UnexpectedNumberOfResultsException) as e:
            resource.get_file_by_label("billyjeanisnotmylover")

        self.assertIn(
            "No results found for filters {'labels__contains': 'billyjeanisnotmylover'}",
            e.exception.args[0])
Beispiel #14
0
    def test_download_from_nested_dataset(self):
        """Test that all files in a nested dataset can be downloaded with one command."""
        dataset_path = self._create_nested_cloud_dataset()

        dataset = Dataset(path=dataset_path, recursive=True)

        with tempfile.TemporaryDirectory() as temporary_directory:
            dataset.download(local_directory=temporary_directory)

            with open(os.path.join(temporary_directory, "file_0.txt")) as f:
                self.assertEqual(f.read(), "[1, 2, 3]")

            with open(os.path.join(temporary_directory, "file_1.txt")) as f:
                self.assertEqual(f.read(), "[4, 5, 6]")

            with open(
                    os.path.join(temporary_directory, "sub-directory",
                                 "sub_file.txt")) as f:
                self.assertEqual(f.read(), "['a', 'b', 'c']")

            with open(
                    os.path.join(temporary_directory, "sub-directory",
                                 "sub-sub-directory",
                                 "sub_sub_file.txt")) as f:
                self.assertEqual(f.read(), "['blah', 'b', 'c']")
Beispiel #15
0
    def test_download_from_nested_dataset_with_no_local_directory_given(self):
        """Test that, when downloading all files from a nested dataset and no local directory is given, the dataset
        structure is preserved in the temporary directory used.
        """
        dataset_path = self._create_nested_cloud_dataset()

        dataset = Dataset(path=dataset_path, recursive=True)

        # Mock the temporary directory created in `Dataset.download_all_files` so we can access it for the test.
        temporary_directory = tempfile.TemporaryDirectory()

        with patch("tempfile.TemporaryDirectory",
                   return_value=temporary_directory):
            dataset.download()

        with open(os.path.join(temporary_directory.name, "file_0.txt")) as f:
            self.assertEqual(f.read(), "[1, 2, 3]")

        with open(os.path.join(temporary_directory.name, "file_1.txt")) as f:
            self.assertEqual(f.read(), "[4, 5, 6]")

        with open(
                os.path.join(temporary_directory.name, "sub-directory",
                             "sub_file.txt")) as f:
            self.assertEqual(f.read(), "['a', 'b', 'c']")

        with open(
                os.path.join(temporary_directory.name, "sub-directory",
                             "sub-sub-directory", "sub_sub_file.txt")) as f:
            self.assertEqual(f.read(), "['blah', 'b', 'c']")
Beispiel #16
0
 def test_instantiation_metadata_used_if_not_hypothetical_but_no_stored_metadata(
         self):
     """Test that instantiation metadata is used if `hypothetical` is `False` but there's no stored metadata."""
     cloud_path = storage.path.generate_gs_path(TEST_BUCKET_NAME,
                                                "non_existing_dataset")
     dataset = Dataset(path=cloud_path, tags={"new": "tag"})
     self.assertEqual(dataset.tags, {"new": "tag"})
Beispiel #17
0
    def test_instantiating_from_serialised_cloud_datasets_with_no_dataset_json_file(
            self):
        """Test that a Manifest can be instantiated from a serialized cloud dataset with no `dataset.json` file. This
        simulates what happens when such a cloud dataset is referred to in a manifest received by a child service.
        """
        GoogleCloudStorageClient().upload_from_string(
            "[1, 2, 3]",
            cloud_path=storage.path.generate_gs_path(TEST_BUCKET_NAME,
                                                     "my_dataset",
                                                     "file_0.txt"),
        )

        GoogleCloudStorageClient().upload_from_string(
            "[4, 5, 6]",
            cloud_path=storage.path.generate_gs_path(TEST_BUCKET_NAME,
                                                     "my_dataset",
                                                     "file_1.txt"),
        )

        serialised_cloud_dataset = Dataset(
            path=f"gs://{TEST_BUCKET_NAME}/my_dataset").to_primitive()

        manifest = Manifest(datasets={"my_dataset": serialised_cloud_dataset})
        self.assertEqual(len(manifest.datasets), 1)
        self.assertEqual(manifest.datasets["my_dataset"].path,
                         f"gs://{TEST_BUCKET_NAME}/my_dataset")
        self.assertEqual(len(manifest.datasets["my_dataset"].files), 2)
Beispiel #18
0
    def test_finalise_with_upload(self):
        """Test that the `finalise` method can be used to upload the output manifest's datasets to a cloud location
        and that it updates the manifest with signed URLs for accessing them.
        """
        with tempfile.TemporaryDirectory() as temporary_directory:
            dataset_path = os.path.join(temporary_directory, "the_dataset")

            with Datafile(path=os.path.join(dataset_path, "my_file.dat"), mode="w") as (datafile, f):
                f.write("hello")

            output_manifest = Manifest(
                datasets={
                    "the_dataset": Dataset(
                        path=dataset_path, files={datafile.local_path}, labels={"one", "two", "three"}
                    )
                }
            )

            analysis = Analysis(
                twine={
                    "output_values_schema": {"type": "object", "properties": {"blah": {"type": "integer"}}},
                    "output_manifest": {"datasets": {"the_dataset": {"purpose": "testing"}}},
                },
                output_values={"blah": 3},
                output_manifest=output_manifest,
            )

            with patch("google.cloud.storage.blob.Blob.generate_signed_url", new=mock_generate_signed_url):
                analysis.finalise(upload_output_datasets_to=f"gs://{TEST_BUCKET_NAME}/datasets")

        signed_url_for_dataset = analysis.output_manifest.datasets["the_dataset"].path
        self.assertTrue(storage.path.is_url(signed_url_for_dataset))

        self.assertTrue(
            signed_url_for_dataset.startswith(
                f"{self.test_result_modifier.storage_emulator_host}/{TEST_BUCKET_NAME}/datasets/the_dataset"
            )
        )

        downloaded_dataset = Dataset(path=signed_url_for_dataset)
        self.assertEqual(downloaded_dataset.name, "the_dataset")
        self.assertEqual(len(downloaded_dataset.files), 1)
        self.assertEqual(downloaded_dataset.labels, {"one", "two", "three"})

        with downloaded_dataset.files.one() as (downloaded_datafile, f):
            self.assertEqual(f.read(), "hello")
Beispiel #19
0
 def test_filter_name_filters_exclude_path(self):
     """Ensures that filters applied to the name will not catch terms in the extension"""
     resource = Dataset(files=[
         Datafile(path="first-path-within-dataset/a_test_file.csv"),
         Datafile(path="second-path-within-dataset/a_test_file.txt"),
     ])
     files = resource.files.filter(name__icontains="second")
     self.assertEqual(0, len(files))
Beispiel #20
0
    def test_cannot_add_non_datafiles(self):
        """Ensures that exception will be raised if adding a non-datafile object"""
        class NotADatafile:
            pass

        resource = Dataset()
        with self.assertRaises(exceptions.InvalidInputException):
            resource.add(NotADatafile())
Beispiel #21
0
    def test_exiting_context_manager_of_local_dataset_updates_local_metadata(
            self):
        """Test that local metadata for a local dataset is updated on exit of the dataset context manager."""
        with tempfile.TemporaryDirectory() as temporary_directory:
            self._create_files_and_nested_subdirectories(temporary_directory)

            dataset = Dataset(path=temporary_directory, recursive=True)

            with dataset:
                dataset.tags = {"cat": "dog"}
                dataset.labels = {"animals"}

            reloaded_dataset = Dataset(path=temporary_directory,
                                       recursive=True)
            self.assertEqual(reloaded_dataset.id, dataset.id)
            self.assertEqual(reloaded_dataset.tags, {"cat": "dog"})
            self.assertEqual(reloaded_dataset.labels, {"animals"})
Beispiel #22
0
 def test_upload_works_with_implicit_cloud_location_if_cloud_location_previously_provided(
         self):
     """Test `Dataset.to_cloud` works with an implicit cloud location if the cloud location has previously been
     provided.
     """
     dataset_path = self._create_nested_cloud_dataset()
     dataset = Dataset(path=dataset_path, recursive=True)
     dataset.upload()
Beispiel #23
0
    def test_error_raised_if_attempting_to_generate_signed_url_for_local_dataset(
            self):
        """Test that an error is raised if trying to generate a signed URL for a local dataset."""
        with tempfile.TemporaryDirectory() as temporary_directory:
            dataset = Dataset(path=temporary_directory,
                              tags={"hello": "world"})

            with self.assertRaises(exceptions.CloudLocationNotSpecified):
                dataset.generate_signed_url()
Beispiel #24
0
    def test_stored_metadata_has_priority_over_instantiation_metadata_if_not_hypothetical(
            self):
        """Test that stored metadata is used instead of instantiation metadata if `hypothetical` is `False`."""
        cloud_path = storage.path.generate_gs_path(TEST_BUCKET_NAME,
                                                   "existing_dataset")

        # Create a dataset in the cloud and set some metadata on it.
        with Dataset(path=cloud_path) as dataset:
            dataset.tags = {"existing": True}

        # Load it separately from the cloud object and check that the stored metadata is used instead of the
        # instantiation metadata.
        with self.assertLogs() as logging_context:
            reloaded_dataset = Dataset(path=cloud_path, tags={"new": "tag"})

        self.assertEqual(reloaded_dataset.tags, {"existing": True})
        self.assertIn(
            "Overriding metadata given at instantiation with stored metadata",
            logging_context.output[0])
Beispiel #25
0
    def test_filter_name_filters_include_extension(self):
        """Ensures that filters applied to the name will catch terms in the extension"""
        files = [
            Datafile(path="path-within-dataset/a_test_file.csv"),
            Datafile(path="path-within-dataset/a_test_file.txt"),
        ]

        self.assertEqual(
            Dataset(files=files).files.filter(
                name__icontains="txt").pop().path,
            FilterSet({files[1]}).pop().local_path)
Beispiel #26
0
    def test_all_datasets_are_in_cloud(self):
        """Test whether all files of all datasets in a manifest are in the cloud or not can be determined."""
        self.assertFalse(
            self.create_valid_manifest().all_datasets_are_in_cloud)
        self.assertTrue(Manifest().all_datasets_are_in_cloud)

        files = [
            Datafile(path="gs://hello/file.txt"),
            Datafile(path="gs://goodbye/file.csv")
        ]
        manifest = Manifest(datasets={"my_dataset": Dataset(files=files)})
        self.assertTrue(manifest.all_datasets_are_in_cloud)
Beispiel #27
0
    def create_valid_dataset(self, **kwargs):
        """Create a valid dataset with two valid datafiles (they're the same file in this case)."""
        path = os.path.join(self.data_path, "basic_files", "configuration", "test-dataset")

        return Dataset(
            path=path,
            files=[
                Datafile(path=os.path.join(path, "path-within-dataset", "a_test_file.csv")),
                Datafile(path=os.path.join(path, "path-within-dataset", "another_test_file.csv")),
            ],
            **kwargs
        )
Beispiel #28
0
    def test_from_local_directory_recursively(self):
        """Test that a dataset can be instantiated from a local nested directory including its subdirectories."""
        with tempfile.TemporaryDirectory() as temporary_directory:
            paths = self._create_files_and_nested_subdirectories(
                temporary_directory)
            dataset = Dataset(path=temporary_directory, recursive=True)

            # Check that all the files from the directory are present in the dataset.
            datafile_paths = {
                datafile.local_path
                for datafile in dataset.files
            }
            self.assertEqual(datafile_paths, set(paths))
Beispiel #29
0
    def test_filter_catches_single_underscore_mistake(self):
        """Ensure that if the filter name contains only single underscores, an error is raised."""
        resource = Dataset(files=[
            Datafile(path="path-within-dataset/A_Test_file.csv"),
            Datafile(path="path-within-dataset/a_test_file.txt"),
        ])

        with self.assertRaises(exceptions.InvalidInputException) as e:
            resource.files.filter(name_icontains="Test")

        self.assertIn(
            "Invalid filter name 'name_icontains'. Filter names should be in the form",
            e.exception.args[0])
Beispiel #30
0
    def test_update_local_metadata(self):
        """Test that metadata for a local dataset can be stored locally and used on re-instantiation of the same
        dataset.
        """
        with tempfile.TemporaryDirectory() as temporary_directory:
            self._create_files_and_nested_subdirectories(temporary_directory)

            dataset = Dataset(
                path=temporary_directory,
                recursive=True,
                id="69253db4-7972-42de-8ccc-61336a28cd50",
                tags={"cat": "dog"},
                labels=["animals"],
            )

            dataset.update_local_metadata()

            dataset_reloaded = Dataset(path=temporary_directory,
                                       recursive=True)
            self.assertEqual(dataset.id, dataset_reloaded.id)
            self.assertEqual(dataset.tags, dataset_reloaded.tags)
            self.assertEqual(dataset.labels, dataset_reloaded.labels)
            self.assertEqual(dataset.hash_value, dataset_reloaded.hash_value)