Ejemplo n.º 1
0
    def test_error_raised_when_required_tags_missing_for_validate_input_manifest(
            self):
        """Test that an error is raised when required tags from the file tags template for a dataset are missing when
        validating the input manifest.
        """
        with tempfile.TemporaryDirectory() as temporary_directory:
            dataset_path = os.path.join(temporary_directory, "met_mast_data")

            # Make a datafile with no tags.
            with Datafile(os.path.join(dataset_path, "my_file_0.txt"),
                          mode="w") as (datafile, f):
                f.write("hello")

            input_manifest = {
                "id": "8ead7669-8162-4f64-8cd5-4abe92509e17",
                "datasets": {
                    "met_mast_data": dataset_path,
                },
            }

            runner = Runner(
                app_src=app,
                twine=self.TWINE_WITH_INPUT_MANIFEST_STRAND_WITH_TAG_TEMPLATE)

            with self.assertRaises(twined.exceptions.InvalidManifestContents):
                runner.run(input_manifest=input_manifest)
Ejemplo n.º 2
0
    def test_exception_raised_when_strand_data_missing(self):
        """Ensures that protected attributes can't be set"""
        runner = Runner(
            app_src=mock_app,
            twine="""{
                "configuration_values_schema": {
                    "type": "object",
                    "properties": {
                        "n_iterations": {
                            "type": "integer"
                        }
                    }
                },
                "input_values_schema": {
                    "type": "object",
                    "properties": {
                        "height": {
                            "type": "integer"
                        }
                    },
                    "required": ["height"]
                }
            }""",
            configuration_values={"n_iterations": 5},
        )

        with self.assertRaises(twined.exceptions.TwineValueException) as error:
            runner.run()

        self.assertIn(
            "The 'input_values' strand is defined in the twine, but no data is provided in sources",
            error.exception.args[0],
        )
Ejemplo n.º 3
0
    def test_validate_input_manifest_with_required_tags_for_remote_tag_template_schema(
            self):
        """Test that a remote tag template can be used for validating tags on the datafiles in a manifest."""
        schema_url = "https://refs.schema.octue.com/octue/my-file-type-tag-template/0.0.0.json"

        twine_with_input_manifest_with_remote_tag_template = {
            "input_manifest": {
                "datasets": {
                    "met_mast_data": {
                        "purpose":
                        "A dataset containing meteorological mast data",
                        "file_tags_template": {
                            "$ref": schema_url
                        },
                    }
                }
            },
            "output_values_schema": {},
        }

        remote_schema = {
            "type": "object",
            "properties": {
                "manufacturer": {
                    "type": "string"
                },
                "height": {
                    "type": "number"
                },
                "is_recycled": {
                    "type": "boolean"
                },
            },
            "required": ["manufacturer", "height", "is_recycled"],
        }

        runner = Runner(
            app_src=app,
            twine=twine_with_input_manifest_with_remote_tag_template)
        original_resolve_from_url = copy.copy(RefResolver.resolve_from_url)

        def patch_if_url_is_schema_url(instance, url):
            """Patch the jsonschema validator `RefResolver.resolve_from_url` if the url is the schema URL, otherwise
            leave it unpatched.

            :param jsonschema.validators.RefResolver instance:
            :param str url:
            :return mixed:
            """
            if url == schema_url:
                return remote_schema
            else:
                return original_resolve_from_url(instance, url)

        with patch("jsonschema.validators.RefResolver.resolve_from_url",
                   new=patch_if_url_is_schema_url):
            with tempfile.TemporaryDirectory() as temporary_directory:
                input_manifest = self._make_serialised_input_manifest_with_correct_dataset_file_tags(
                    temporary_directory)
                runner.run(input_manifest=input_manifest)
Ejemplo n.º 4
0
    def test_using_manifests_template(self):
        """Ensure the `using-manifests` template app works correctly."""
        self.set_template("template-using-manifests")

        runner = Runner(
            app_src=self.template_path,
            twine=self.template_twine,
            configuration_values=os.path.join("data", "configuration",
                                              "values.json"),
        )

        with patch("google.cloud.storage.blob.Blob.generate_signed_url",
                   new=mock_generate_signed_url):
            analysis = runner.run(
                input_manifest=os.path.join("data", "input", "manifest.json"))

        # Test that the signed URLs for the dataset and its files work and can be used to reinstantiate the output
        # manifest after serialisation.
        downloaded_output_manifest = Manifest.deserialise(
            analysis.output_manifest.to_primitive())

        self.assertEqual(
            downloaded_output_manifest.datasets["cleaned_met_mast_data"].
            labels,
            {"mast", "cleaned", "met"},
        )

        self.assertEqual(
            urlparse(
                downloaded_output_manifest.datasets["cleaned_met_mast_data"].
                files.one().cloud_path).path,
            f"/{TEST_BUCKET_NAME}/output/test_using_manifests_analysis/cleaned_met_mast_data/cleaned.csv",
        )
Ejemplo n.º 5
0
    def test_run_output_values_validation(self):
        """Ensures that runner can be instantiated with a string that points to a path"""
        twine = """
            {
                "output_values_schema": {
                    "type": "object",
                    "required": ["n_iterations"],
                    "properties": {
                        "n_iterations": {
                            "type": "integer"
                        }
                    }
                }
            }
        """
        runner = Runner(app_src=mock_app, twine=twine)

        # Test for failure with an incorrect output
        with self.assertRaises(twined.exceptions.TwineValueException):
            runner.run().finalise()

        # Test for success with a valid output
        def fcn(analysis):
            analysis.output_values["n_iterations"] = 10

        Runner(app_src=fcn, twine=twine).run().finalise()
Ejemplo n.º 6
0
    def test_invalid_app_directory(self):
        """Ensure an error containing the searched location is raised if the app source can't be found."""
        runner = Runner(app_src="..", twine="{}")

        with self.assertRaises(ModuleNotFoundError) as e:
            runner.run()
            self.assertTrue("No module named 'app'" in e.msg)
            self.assertTrue(os.path.abspath(runner.app_source) in e.msg)
Ejemplo n.º 7
0
    def test_fractal_template_with_default_configuration(self):
        """Ensure the `fractal` app can be configured with its default configuration and run."""
        self.set_template("template-fractal")

        runner = Runner(
            app_src=self.template_path,
            twine=self.template_twine,
            configuration_values=os.path.join("data", "configuration",
                                              "configuration_values.json"),
        )

        runner.run()
Ejemplo n.º 8
0
    def test_validate_input_manifest_with_required_tags(self):
        """Test that validating an input manifest with required tags from the file tags template for a dataset works
        for tags meeting the requirements.
        """
        runner = Runner(
            app_src=app,
            twine=self.TWINE_WITH_INPUT_MANIFEST_STRAND_WITH_TAG_TEMPLATE)

        with tempfile.TemporaryDirectory() as temporary_directory:
            input_manifest = self._make_serialised_input_manifest_with_correct_dataset_file_tags(
                temporary_directory)
            runner.run(input_manifest=input_manifest)
Ejemplo n.º 9
0
    def test_validate_input_manifest_raises_error_if_required_tags_are_not_of_required_type(
            self):
        """Test that an error is raised if the required tags from the file tags template for a dataset are present but
        are not of the required type when validating an input manifest.
        """
        with tempfile.TemporaryDirectory() as temporary_directory:
            dataset_path = os.path.join(temporary_directory, "met_mast_data")

            input_manifest = {
                "id": "8ead7669-8162-4f64-8cd5-4abe92509e17",
                "datasets": {
                    "met_mast_data": dataset_path,
                },
            }

            runner = Runner(
                app_src=app,
                twine=self.TWINE_WITH_INPUT_MANIFEST_STRAND_WITH_TAG_TEMPLATE)

            for tags in (
                {
                    "manufacturer": "Vestas",
                    "height": 350,
                    "is_recycled": False,
                    "number_of_blades": "3"
                },
                {
                    "manufacturer": "Vestas",
                    "height": 350,
                    "is_recycled": "no",
                    "number_of_blades": 3
                },
                {
                    "manufacturer": False,
                    "height": 350,
                    "is_recycled": "false",
                    "number_of_blades": 3
                },
            ):
                with self.subTest(tags=tags):

                    # Make a datafile with the given tags.
                    with Datafile(
                            path=os.path.join(dataset_path, "my_file_0.txt"),
                            tags=tags,
                            mode="w",
                    ) as (datafile, f):
                        f.write("hello")

                    with self.assertRaises(
                            twined.exceptions.InvalidManifestContents):
                        runner.run(input_manifest=input_manifest)
Ejemplo n.º 10
0
 def test_app_can_be_provided_as_a_module_containing_function_named_run(
         self):
     """Test that apps can be provided as a module containing a function named "run"."""
     analysis = Runner(app_src=app, twine={
         "output_values_schema": {}
     }).run()
     self.assertEqual(analysis.output_values, {"width": 3})
Ejemplo n.º 11
0
def asker(analysis_id, **kwargs):
    logger.debug("APPLICATION PATH: %s", settings.APPLICATION_PATH)

    analysis_group_name = f"analysis-{analysis_id}"

    ReelMessage(action="ask", status="started",
                value=analysis_id).group_send(analysis_group_name)

    # TODO get configuration_values and configuration_manifest out at server start
    logger.debug("SENT MESSAGE TO %s", analysis_group_name)
    try:
        runner = Runner(
            twine=os.path.join(settings.APPLICATION_PATH, "twine.json"),
            configuration_values='{"analysis_program": "kuethe_chow"}',
        )
        # TODO get a log handler and add it to the run
        # TODO get a monitor handler and add it to the run
        analysis = runner.run(app_src=settings.APPLICATION_PATH, **kwargs)
        print("\n\n\nDONE TIH ANALYSIS\n\n\n")

        print("\n\n\nREELING OUTPUTS\n\n\n")
        # TODO fix https://github.com/octue/octue-sdk-python/issues/19 then you can remove this horrifying thing
        kwargs = {}
        for k in OUTPUT_STRANDS:
            att = getattr(analysis, k, None)
            if att is not None:
                att = json.dumps(att, cls=OctueJSONEncoder)

            kwargs[k] = att

        # print('KWARGS', kwargs)
        # print('\n\n\nREELED OUTPUTS\n\n\n')

        # Create the completion message
        ReelMessage(action="ask",
                    status="complete",
                    value=analysis_id,
                    **kwargs).group_send(analysis_group_name)

    except Exception as e:
        print("\n\n\nRAISING REEL ERROR\n\n\n")
        # TODO Except TwinedExceptions and always forward to the client, but any other exceptions only forward if user has admin privilege
        ReelMessage(action="ask",
                    status="error",
                    value=analysis_id,
                    hints=e.args[0]).group_send(analysis_group_name)
        raise e
Ejemplo n.º 12
0
    def test_run_with_configuration_passes(self):
        """Ensures that runs can be made with configuration only"""
        runner = Runner(
            app_src=mock_app,
            twine="""{
            "configuration_values_schema": {
                "type": "object",
                "properties": {
                    "n_iterations": {
                        "type": "integer"
                    }
                }
            }
        }""",
            configuration_values="{}",
        )

        runner.run()
Ejemplo n.º 13
0
    def test_runner_with_credentials(self):
        """Test that credentials can be used with Runner."""
        with patch.dict(os.environ, {"LOCAL_CREDENTIAL": "my-secret"}):
            runner = Runner(
                app_src=mock_app,
                twine="""
                    {
                        "credentials": [
                            {
                                "name": "LOCAL_CREDENTIAL",
                                "purpose": "Token for accessing a 3rd party API service"
                            }
                        ]
                    }
                """,
            )

            runner.run()
            self.assertEqual(os.environ["LOCAL_CREDENTIAL"], "my-secret")
Ejemplo n.º 14
0
    def test_app(self):
        """Test that the app takes in input in the correct format and returns an analysis with the correct output
        values.
        """
        runner = Runner(app_src=REPOSITORY_ROOT, twine=TWINE_PATH)

        with patch("google.cloud.storage.blob.Blob.generate_signed_url", mock_generate_signed_url):
            analysis = runner.run(input_values={"n_iterations": 3})

        # Check the output values.
        self.assertEqual(analysis.output_values, [1, 2, 3, 4, 5])

        # Test that the signed URLs for the dataset and its files work and can be used to reinstantiate the output
        # manifest after serialisation.
        downloaded_output_manifest = Manifest.deserialise(analysis.output_manifest.to_primitive())

        # Check that the output dataset and its files can be accessed.
        with downloaded_output_manifest.datasets["example_dataset"].files.one() as (datafile, f):
            self.assertEqual(f.read(), "This is some example service output.")
Ejemplo n.º 15
0
    def test_app_can_be_provided_as_path_to_module_containing_class_named_app(
            self):
        """Test that apps can be provided as a path to a module containing a class named "App"."""
        analysis = Runner(
            app_src=os.path.join(TESTS_DIR, "test_app_modules", "app_class"),
            twine={
                "output_values_schema": {}
            },
        ).run()

        self.assertEqual(analysis.output_values, "App as a class works!")
Ejemplo n.º 16
0
    def test_output_manifest_is_not_none(self):
        """Ensure the output manifest of an analysis is not None if an output manifest is defined in the Twine."""
        runner = Runner(
            app_src=mock_app,
            twine="""
                {
                    "output_manifest": {
                        "datasets": {
                            "open_foam_result": {
                                "purpose": "A dataset containing solution fields of an openfoam case."
                            },
                            "airfoil_cp_values": {
                                "purpose": "A file containing cp values"
                            }
                        }
                    }
                }
            """,
        )

        analysis = runner.run()
        self.assertIsNotNone(analysis.output_manifest)
Ejemplo n.º 17
0
    def test_runner_with_google_secret_credentials(self):
        """Test that credentials can be found locally and populated into the environment from Google Cloud Secret
        Manager.
        """
        with patch.dict(os.environ, {"LOCAL_CREDENTIAL": "my-secret"}):

            runner = Runner(
                app_src=mock_app,
                twine="""
                    {
                        "credentials": [
                            {
                                "name": "LOCAL_CREDENTIAL",
                                "purpose": "Token for accessing a 3rd party API service"
                            },
                            {
                                "name": "CLOUD_CREDENTIALS",
                                "purpose": "Token for accessing another 3rd party API service"
                            }
                        ]
                    }
                """,
            )

            class MockAccessSecretVersionResponse:
                payload = Mock()
                payload.data = b"My precious!"

            with patch(
                    "google.cloud.secretmanager_v1.services.secret_manager_service.client.SecretManagerServiceClient"
                    ".access_secret_version",
                    return_value=MockAccessSecretVersionResponse(),
            ):
                runner.run()

            # Check that first secret is still present and that the Google Cloud secret is now in the environment.
            self.assertEqual(os.environ["LOCAL_CREDENTIAL"], "my-secret")
            self.assertEqual(os.environ["CLOUD_CREDENTIALS"], "My precious!")
Ejemplo n.º 18
0
        def create_exception_logging_run_function():
            def mock_app(analysis):
                try:
                    raise OSError("This is an OSError.")
                except OSError:
                    logger.exception(
                        "An example exception to log and forward to the parent."
                    )

            return Runner(
                app_src=mock_app,
                twine=
                '{"input_values_schema": {"type": "object", "required": []}}'
            ).run
Ejemplo n.º 19
0
        def create_run_function_with_monitoring():
            def mock_app(analysis):
                analysis.send_monitor_message(
                    {"status": "my first monitor message"})
                analysis.send_monitor_message(
                    {"status": "my second monitor message"})

            twine = """
                {
                    "input_values_schema": {"type": "object", "required": []},
                    "monitor_message_schema": {
                        "type": "object",
                        "properties": {"status": {"type": "string"}},
                        "required": ["status"]
                    }
                }
            """

            return Runner(app_src=mock_app, twine=twine).run
Ejemplo n.º 20
0
    def test_analysis_not_re_finalised_by_runner_if_finalised_in_app(self):
        """Test that the `Analysis.finalise` method is not called again if an analysis has already been finalised."""
        def app(analysis):
            analysis.output_values = {"hello": "world"}

            self.assertFalse(analysis.finalised)

            # Simulate the analysis being finalised while still being able to mock `Analysis.finalise` to count how
            # many times it's been called.
            analysis._finalised = True

        with patch(
                "octue.resources.analysis.Analysis.finalise") as mock_finalise:
            analysis = Runner(app_src=app, twine={
                "output_values_schema": {}
            }).run()

        self.assertEqual(mock_finalise.call_count, 0)
        self.assertTrue(analysis.finalised)
Ejemplo n.º 21
0
    def test_instantiation_without_configuration_fails(self):
        """Ensures that runner can be instantiated with a string that points to a path"""
        with self.assertRaises(twined.exceptions.TwineValueException) as error:
            Runner(
                app_src=".",
                twine="""{
                "configuration_values_schema": {
                    "type": "object",
                    "properties": {
                        "n_iterations": {
                            "type": "integer"
                        }
                    }
                }
            }""",
            )

        self.assertIn(
            "The 'configuration_values' strand is defined in the twine, but no data is provided in sources",
            error.exception.args[0],
        )
Ejemplo n.º 22
0
def create_run_function():
    """Create a run function that sends log messages back to the parent and gives a simple output value.

    :return callable: the run function
    """
    def mock_app(analysis):
        logger.info("Starting analysis.")
        analysis.output_values = "Hello! It worked!"
        analysis.output_manifest = None
        logger.info("Finished analysis.")

    twine = """
        {
            "input_values_schema": {
                "type": "object",
                "required": []
            },
            "output_values_schema": {}
        }
    """

    return Runner(app_src=mock_app, twine=twine).run
Ejemplo n.º 23
0
 def test_valid_output_location(self):
     """Test that a valid cloud path passes output location validation."""
     Runner(".", twine="{}", output_location="gs://my-bucket/blah")
Ejemplo n.º 24
0
 def test_error_raised_if_output_location_invalid(self):
     """Test that an error is raised if an invalid output location is given."""
     with self.assertRaises(exceptions.InvalidInputException):
         Runner(".", twine="{}", output_location="not_a_cloud_path")
Ejemplo n.º 25
0
    def test_validate_input_manifest_with_required_tags_in_several_datasets(
            self):
        """Test that required tags for different datasets' file tags templates are validated separately and correctly
        for each dataset.
        """
        twine_with_input_manifest_with_required_tags_for_multiple_datasets = {
            "input_manifest": {
                "datasets": {
                    "first_dataset": {
                        "purpose":
                        "A dataset containing meteorological mast data",
                        "file_tags_template": {
                            "type": "object",
                            "properties": {
                                "manufacturer": {
                                    "type": "string"
                                },
                                "height": {
                                    "type": "number"
                                }
                            },
                        },
                    },
                    "second_dataset": {
                        "file_tags_template": {
                            "type": "object",
                            "properties": {
                                "is_recycled": {
                                    "type": "boolean"
                                },
                                "number_of_blades": {
                                    "type": "number"
                                }
                            },
                        }
                    },
                }
            },
            "output_values_schema": {},
        }

        with tempfile.TemporaryDirectory() as temporary_directory:

            dataset_paths = (
                os.path.join(temporary_directory, "first_dataset"),
                os.path.join(temporary_directory, "second_dataset"),
            )

            input_manifest = {
                "id": "8ead7669-8162-4f64-8cd5-4abe92509e17",
                "datasets": {
                    "first_dataset": dataset_paths[0],
                    "second_dataset": dataset_paths[1],
                },
            }

            with Datafile(
                    path=os.path.join(dataset_paths[0], "file_0.csv"),
                    tags={
                        "manufacturer": "vestas",
                        "height": 503.7
                    },
                    mode="w",
            ) as (datafile, f):
                f.write("hello")

            with Datafile(
                    path=os.path.join(dataset_paths[1], "file_1.csv"),
                    tags={
                        "is_recycled": True,
                        "number_of_blades": 3
                    },
                    mode="w",
            ) as (datafile, f):
                f.write("hello")

            runner = Runner(
                app_src=app,
                twine=
                twine_with_input_manifest_with_required_tags_for_multiple_datasets
            )
            runner.run(input_manifest=input_manifest)
Ejemplo n.º 26
0
 def test_app_can_be_provided_as_a_class(self):
     """Test that apps can be written and provided as a class."""
     analysis = Runner(app_src=App, twine={
         "output_values_schema": {}
     }).run()
     self.assertEqual(analysis.output_values, "App as a class works!")
Ejemplo n.º 27
0
    def test_child_services_template(self):
        """Ensure the child services template works correctly (i.e. that children can be accessed by a parent and data
        collected from them). This template has a parent app and two children - an elevation app and wind speed app. The
        parent sends coordinates to both children, receiving the elevation and wind speed from them at these locations.
        """
        cli_path = os.path.join(REPOSITORY_ROOT, "octue", "cli.py")
        self.set_template("template-child-services")

        elevation_service_path = os.path.join(self.template_path,
                                              "elevation_service")
        elevation_service_id = f"elevation-service-{uuid.uuid4()}"

        with tempfile.NamedTemporaryFile() as elevation_service_configuration:
            with open(
                    os.path.join(self.template_path, "elevation_service",
                                 "octue.yaml")) as f:
                config = yaml.load(f, Loader=yaml.SafeLoader)
                config["services"][0]["name"] = elevation_service_id

            with open(elevation_service_configuration.name, "w") as f:
                yaml.dump(config, f)

            elevation_process = subprocess.Popen(
                [
                    sys.executable,
                    cli_path,
                    "start",
                    f"--service-config={elevation_service_configuration.name}",
                    "--rm",
                ],
                cwd=elevation_service_path,
            )

            wind_speed_service_path = os.path.join(self.template_path,
                                                   "wind_speed_service")
            wind_speed_service_id = f"wind-speed-service-{uuid.uuid4()}"

            with tempfile.NamedTemporaryFile(
            ) as wind_speed_service_configuration:
                with open(
                        os.path.join(self.template_path, "wind_speed_service",
                                     "octue.yaml")) as f:
                    config = yaml.load(f, Loader=yaml.SafeLoader)
                    config["services"][0]["name"] = wind_speed_service_id

                with open(wind_speed_service_configuration.name, "w") as f:
                    yaml.dump(config, f)

                wind_speed_process = subprocess.Popen(
                    [
                        sys.executable,
                        cli_path,
                        "start",
                        f"--service-config={wind_speed_service_configuration.name}",
                        "--rm",
                    ],
                    cwd=wind_speed_service_path,
                )

                parent_service_path = os.path.join(self.template_path,
                                                   "parent_service")

                with open(
                        os.path.join(parent_service_path,
                                     "app_configuration.json")) as f:
                    children = json.load(f)["children"]
                    children[0]["id"] = wind_speed_service_id
                    children[1]["id"] = elevation_service_id

                with ProcessesContextManager(processes=(elevation_process,
                                                        wind_speed_process)):

                    runner = Runner(
                        app_src=parent_service_path,
                        twine=os.path.join(parent_service_path, "twine.json"),
                        children=children,
                        service_id="template-child-services/parent-service",
                    )

                    analysis = runner.run(input_values=os.path.join(
                        parent_service_path, "data", "input", "values.json"), )

        self.assertTrue("elevations" in analysis.output_values)
        self.assertTrue("wind_speeds" in analysis.output_values)
Ejemplo n.º 28
0
 def test_instantiate_runner(self):
     """Ensures that runner whose twine requires configuration can be instantiated"""
     runner = Runner(app_src=".", twine="{}")
     self.assertEqual(runner.__class__.__name__, "Runner")
Ejemplo n.º 29
0
 def test_analysis_finalised_by_runner_if_not_finalised_in_app(self):
     """Test that analyses are finalised automatically if they're not finalised within their app."""
     analysis = Runner(app_src=App, twine={
         "output_values_schema": {}
     }).run()
     self.assertTrue(analysis.finalised)