Example #1
0
    def test_error_raised_if_datasets_are_missing_from_manifest(self):
        """Test that an error is raised if a dataset is missing from a manifest."""
        twine = """
            {
                "input_manifest": {
                    "datasets": {
                        "cat": {
                            "purpose": "blah"
                        },
                        "dog": {
                            "purpose": "blah"
                        }
                    }
                }
            }
        """

        input_manifest = {
            "id": "30d2c75c-a7b9-4f16-8627-9c8d5cc04bf4",
            "datasets": {"my-dataset": "gs://my-bucket/my_dataset", "dog": "gs://dog-house/dog"},
        }

        twine = Twine(source=twine)

        with self.assertRaises(exceptions.InvalidManifestContents) as context:
            twine.validate_input_manifest(source=input_manifest)

        self.assertEqual(
            context.exception.message,
            "A dataset named 'cat' is expected in the input_manifest but is missing.",
        )
Example #2
0
    def test_missing_manifest_files(self):
        """Ensures that if you try to read values from missing files, the right exceptions get raised"""
        twine = Twine(source=self.VALID_MANIFEST_STRAND)
        file = os.path.join(self.path, "not_a_file.json")

        with self.assertRaises(exceptions.ConfigurationManifestFileNotFound):
            twine.validate_configuration_manifest(source=file)

        with self.assertRaises(exceptions.InputManifestFileNotFound):
            twine.validate_input_manifest(source=file)

        with self.assertRaises(exceptions.OutputManifestFileNotFound):
            twine.validate_output_manifest(source=file)
Example #3
0
    def test_error_raised_if_multiple_datasets_have_same_name(self):
        """Test that an error is raised if the input manifest has more than one dataset with the same name."""
        twine = """
            {
                "input_manifest": {
                    "datasets": {
                        "met_mast_data": {
                            "purpose": "A dataset containing meteorological mast data"
                        }
                    }
                }
            }
        """

        input_manifest = """
            {
                "id": "8ead7669-8162-4f64-8cd5-4abe92509e17",
                "datasets": {
                    "met_mast_data": {
                        "id": "7ead7669-8162-4f64-8cd5-4abe92509e19",
                        "name": "met_mast_data",
                        "tags": {},
                        "labels": [],
                        "files": []
                    },
                    "met_mast_data": {
                        "id": "7ead7669-8162-4f64-8cd5-4abe92509e18",
                        "name": "met_mast_data",
                        "tags": {},
                        "labels": [],
                        "files": []
                    }
                }
            }
        """

        twine = Twine(source=twine)

        with self.assertRaises(KeyError):
            twine.validate_input_manifest(source=input_manifest)
Example #4
0
    def test_valid_manifest_files(self):
        """Ensures that a manifest file will validate."""
        valid_configuration_manifest = """
            {
                "id": "3ead7669-8162-4f64-8cd5-4abe92509e17",
                "datasets": {
                    "configuration_files_data": {
                        "id": "34ad7669-8162-4f64-8cd5-4abe92509e17",
                        "name": "configuration_files_data",
                        "tags": {},
                        "labels": ["the", "config", "labels"],
                        "files": [
                            {
                                "path": "configuration/datasets/7ead7669/file_1.csv",
                                "cluster": 0,
                                "sequence": 0,
                                "extension": "csv",
                                "tags": {},
                                "labels": [],
                                "posix_timestamp": 0,
                                "id": "abff07bc-7c19-4ed5-be6d-a6546eae8e86",
                                "last_modified": "2019-02-28T22:40:30.533005Z",
                                "name": "file_1.csv",
                                "size_bytes": 59684813,
                                "sha-512/256": "somesha"
                            },
                            {
                                "path": "configuration/datasets/7ead7669/file_2.csv",
                                "cluster": 0,
                                "sequence": 1,
                                "extension": "csv",
                                "tags": {},
                                "labels": [],
                                "posix_timestamp": 0,
                                "id": "bbff07bc-7c19-4ed5-be6d-a6546eae8e45",
                                "last_modified": "2019-02-28T22:40:40.633001Z",
                                "name": "file_2.csv",
                                "size_bytes": 59684813,
                                "sha-512/256": "someothersha"
                            }
                        ]
                    }
                }
            }
        """

        valid_input_manifest = """
            {
                "id": "8ead7669-8162-4f64-8cd5-4abe92509e17",
                "datasets": {
                    "met_mast_data": {
                        "id": "7ead7669-8162-4f64-8cd5-4abe92509e17",
                        "name": "met_mast_data",
                        "tags": {},
                        "labels": ["met", "mast", "wind"],
                        "files": [
                            {
                                "path": "input/datasets/7ead7669/file_1.csv",
                                "cluster": 0,
                                "sequence": 0,
                                "extension": "csv",
                                "tags": {},
                                "labels": [],
                                "posix_timestamp": 0,
                                "id": "abff07bc-7c19-4ed5-be6d-a6546eae8e86",
                                "last_modified": "2019-02-28T22:40:30.533005Z",
                                "name": "file_1.csv",
                                "size_bytes": 59684813,
                                "sha-512/256": "somesha"
                            },
                            {
                                "path": "input/datasets/7ead7669/file_2.csv",
                                "cluster": 0,
                                "sequence": 1,
                                "extension": "csv",
                                "tags": {},
                                "labels": [],
                                "posix_timestamp": 0,
                                "id": "bbff07bc-7c19-4ed5-be6d-a6546eae8e45",
                                "last_modified": "2019-02-28T22:40:40.633001Z",
                                "name": "file_2.csv",
                                "size_bytes": 59684813,
                                "sha-512/256": "someothersha"
                            }
                        ]
                    },
                    "scada_data": "gs://my-bucket/scada-data"
                }
            }
        """

        valid_output_manifest = """
            {
                "id": "2ead7669-8162-4f64-8cd5-4abe92509e17",
                "datasets": {
                    "output_files_data": {
                        "id": "1ead7669-8162-4f64-8cd5-4abe92509e17",
                        "name": "output_files_data",
                        "tags": {},
                        "labels": ["the", "output", "labels"],
                        "files": [
                            {
                                "path": "input/datasets/7ead7669/file_1.csv",
                                "cluster": 0,
                                "sequence": 0,
                                "extension": "csv",
                                "tags": {},
                                "labels": [],
                                "posix_timestamp": 0,
                                "id": "abff07bc-7c19-4ed5-be6d-a6546eae8e86",
                                "last_modified": "2019-02-28T22:40:30.533005Z",
                                "name": "file_1.csv",
                                "size_bytes": 59684813,
                                "sha-512/256": "somesha"
                            },
                            {
                                "path": "input/datasets/7ead7669/file_2.csv",
                                "cluster": 0,
                                "sequence": 1,
                                "extension": "csv",
                                "tags": {},
                                "labels": [],
                                "posix_timestamp": 0,
                                "id": "bbff07bc-7c19-4ed5-be6d-a6546eae8e45",
                                "last_modified": "2019-02-28T22:40:40.633001Z",
                                "name": "file_2.csv",
                                "size_bytes": 59684813,
                                "sha-512/256": "someothersha"
                            }
                        ]
                    }
                }
            }
        """

        twine = Twine(source=self.VALID_MANIFEST_STRAND)
        twine.validate_configuration_manifest(source=valid_configuration_manifest)
        twine.validate_input_manifest(source=valid_input_manifest)
        twine.validate_output_manifest(source=valid_output_manifest)