Ejemplo n.º 1
0
    def test_serialisation_and_deserialisation(self):
        """Test that a dataset can be serialised and deserialised."""
        dataset_id = "e376fb31-8f66-414d-b99f-b43395cebbf1"
        dataset = self.create_valid_dataset(id=dataset_id,
                                            labels=["b", "a"],
                                            tags={
                                                "a": 1,
                                                "b": 2
                                            })

        serialised_dataset = dataset.to_primitive()

        self.assertEqual(
            serialised_dataset,
            {
                "name":
                "test-dataset",
                "labels": ["a", "b"],
                "tags": {
                    "a": 1,
                    "b": 2
                },
                "id":
                dataset_id,
                "path":
                os.path.join(REPOSITORY_ROOT, "tests", "data", "basic_files",
                             "configuration", "test-dataset"),
                "files": [
                    os.path.join(
                        REPOSITORY_ROOT,
                        "tests",
                        "data",
                        "basic_files",
                        "configuration",
                        "test-dataset",
                        "path-within-dataset",
                        "a_test_file.csv",
                    ),
                    os.path.join(
                        REPOSITORY_ROOT,
                        "tests",
                        "data",
                        "basic_files",
                        "configuration",
                        "test-dataset",
                        "path-within-dataset",
                        "another_test_file.csv",
                    ),
                ],
            },
        )

        deserialised_dataset = Dataset.deserialise(serialised_dataset)
        self.assertEqual(dataset.id, deserialised_dataset.id)
        self.assertEqual(dataset.path, deserialised_dataset.path)
        self.assertEqual(dataset.name, deserialised_dataset.name)
        self.assertEqual(dataset.labels, deserialised_dataset.labels)
        self.assertEqual(dataset.tags, deserialised_dataset.tags)
Ejemplo n.º 2
0
    def test_from_cloud_with_no_metadata_file(self):
        """Test that any cloud directory can be accessed as a dataset if it has no `.octue` metadata file in it, the
        cloud dataset doesn't lose any information during serialization, and a metadata file is uploaded afterwards.
        """
        cloud_storage_client = GoogleCloudStorageClient()

        cloud_storage_client.upload_from_string(
            "[1, 2, 3]",
            cloud_path=storage.path.generate_gs_path(TEST_BUCKET_NAME,
                                                     "my_dataset",
                                                     "file_0.txt"),
        )

        cloud_storage_client.upload_from_string(
            "[4, 5, 6]",
            cloud_path=storage.path.generate_gs_path(TEST_BUCKET_NAME,
                                                     "my_dataset",
                                                     "file_1.txt"),
        )

        cloud_dataset = Dataset(path=f"gs://{TEST_BUCKET_NAME}/my_dataset")

        self.assertEqual(cloud_dataset.path,
                         f"gs://{TEST_BUCKET_NAME}/my_dataset")
        self.assertEqual(cloud_dataset.name, "my_dataset")
        self.assertEqual({file.name
                          for file in cloud_dataset.files},
                         {"file_0.txt", "file_1.txt"})

        for file in cloud_dataset:
            self.assertEqual(
                file.cloud_path,
                f"gs://{TEST_BUCKET_NAME}/my_dataset/{file.name}")

        # Test serialisation doesn't lose any information.
        deserialised_dataset = Dataset.deserialise(
            cloud_dataset.to_primitive())
        self.assertEqual(deserialised_dataset.id, cloud_dataset.id)
        self.assertEqual(deserialised_dataset.name, cloud_dataset.name)
        self.assertEqual(deserialised_dataset.path, cloud_dataset.path)
        self.assertEqual(deserialised_dataset.hash_value,
                         cloud_dataset.hash_value)