Beispiel #1
0
    def __init__(self, twine, handle_monitor_message=None, **kwargs):
        if isinstance(twine, Twine):
            self.twine = twine
        else:
            self.twine = Twine(source=twine)

        self._handle_monitor_message = handle_monitor_message

        strand_kwargs = {name: kwargs.pop(name, None) for name in ALL_STRANDS}

        # Values strands.
        self.configuration_values = strand_kwargs.get("configuration_values",
                                                      None)
        self.input_values = strand_kwargs.get("input_values", None)
        self.output_values = strand_kwargs.get("output_values", None)

        # Manifest strands.
        self.configuration_manifest = strand_kwargs.get(
            "configuration_manifest", None)
        self.input_manifest = strand_kwargs.get("input_manifest", None)
        self.output_manifest = strand_kwargs.get("output_manifest", None)

        # Other strands.
        self.children = strand_kwargs.get("children", None)

        # Non-strands.
        self.output_location = kwargs.pop("output_location", None)

        self._calculate_strand_hashes(strands=strand_kwargs)
        self._finalised = False
        super().__init__(**kwargs)
Beispiel #2
0
    def test_error_raised_if_datasets_are_missing_from_manifest(self):
        """Test that an error is raised if a dataset is missing from a manifest."""
        twine = """
            {
                "input_manifest": {
                    "datasets": {
                        "cat": {
                            "purpose": "blah"
                        },
                        "dog": {
                            "purpose": "blah"
                        }
                    }
                }
            }
        """

        input_manifest = {
            "id": "30d2c75c-a7b9-4f16-8627-9c8d5cc04bf4",
            "datasets": {"my-dataset": "gs://my-bucket/my_dataset", "dog": "gs://dog-house/dog"},
        }

        twine = Twine(source=twine)

        with self.assertRaises(exceptions.InvalidManifestContents) as context:
            twine.validate_input_manifest(source=input_manifest)

        self.assertEqual(
            context.exception.message,
            "A dataset named 'cat' is expected in the input_manifest but is missing.",
        )
Beispiel #3
0
 def test_valid(self):
     """Test that a valid twine will validate valid children
     Valiantly and Validly validating validity since 1983.
     To those reading this, know that YOU'RE valid.
     """
     twine = Twine(source=self.VALID_TWINE_WITH_CHILDREN)
     twine.validate_children(source=self.VALID_CHILD_VALUE)
Beispiel #4
0
 def test_invalid_strand(self):
     """Ensures that an incorrect strand name would lead to the correct exception
     Note: This tests an internal method. The current API doesn't allow this error to emerge but tthis check allows
     us to extend to a generic method
     """
     twine = Twine(source=VALID_SCHEMA_TWINE)
     data = twine._load_json("configuration", source=self.VALID_CONFIGURATION_VALUE)
     with self.assertRaises(exceptions.UnknownStrand):
         twine._validate_against_schema("not_a_strand_name", data)
Beispiel #5
0
 def test_credentials(self):
     """Test that the environment will override a default value for a credential."""
     twine = Twine(source=self.VALID_CREDENTIALS_TWINE)
     with mock.patch.dict(
             os.environ,
         {
             "SECRET_THE_FIRST": "a value",
             "SECRET_THE_SECOND": "another value",
             "SECRET_THE_THIRD": "value"
         },
     ):
         twine.validate_credentials()
         self.assertEqual(os.environ["SECRET_THE_THIRD"], "value")
Beispiel #6
0
    def test_valid_values_files(self):
        """Ensures that values can be read and validated correctly from files on disk"""
        twine = Twine(source=VALID_SCHEMA_TWINE)

        with TemporaryDirectory() as tmp_dir:
            valid_configuration_file = self._write_json_string_to_file(self.VALID_CONFIGURATION_VALUE, tmp_dir)
            twine.validate_configuration_values(source=valid_configuration_file)
            twine.validate_input_values(source="""{"height": 40}""")
            twine.validate_output_values(source="""{"width": 36}""")
Beispiel #7
0
 def test_valid_children(self):
     """Ensures that a twine with one child can be instantiated correctly."""
     source = """
         {
             "children": [{"key": "gis", "purpose": "The purpose.", "notes": "Some notes.", "filters": "tags:gis"}]
         }
     """
     self.assertEqual(len(Twine(source=source).children), 1)
Beispiel #8
0
    def test_non_existent_attributes_cannot_be_retrieved(self):
        """Ensure attributes that don't exist on Analysis aren't retrieved as None and instead raise an error. See
        https://github.com/octue/octue-sdk-python/issues/45 for reasoning behind adding this.
        """
        analysis = Analysis(twine=Twine(source="{}"))

        with self.assertRaises(AttributeError):
            analysis.furry_purry_cat
Beispiel #9
0
    def test_valid_with_extra_values(self):
        """Ensures that extra values get ignored"""
        configuration_valid_with_extra_field = """
            {
                "n_iterations": 1,
                "another_field": "may or may not be quietly ignored"
            }
        """

        Twine(source=VALID_SCHEMA_TWINE).validate_configuration_values(source=configuration_valid_with_extra_field)
Beispiel #10
0
    def test_extra_key_validation_on_valid_twine(self):
        """Test that children with extra data will not raise a validation error on a non-empty valid twine.
        # TODO review this behaviour - possibly should raise an error but allow for a user specified extra_data property
        """
        single_child_with_extra_data = """
            [
                {
                    "key": "gis",
                    "id": "some-id",
                    "backend": {
                        "name": "GCPPubSubBackend",
                        "project_name": "my-project"
                    },
                    "some_extra_property": "should not be a problem if present"
                }
            ]
        """

        twine = Twine(source=self.VALID_TWINE_WITH_CHILDREN)
        twine.validate_children(source=single_child_with_extra_data)
Beispiel #11
0
    def test_extra_key_validation_on_empty_twine(self):
        """Test that children with extra data will not raise a validation error on an empty twine."""
        children_values_with_extra_data = """
            [
                {"key": "gis", "id": "id", "uri_env_name": "VAR_NAME", "an_extra_key": "not a problem if present"},
                {"key": "some_weird_other_child", "id": "some-other-id", "uri_env_name": "SOME_ENV_VAR_NAME"}
            ]
        """

        with self.assertRaises(exceptions.InvalidValuesContents):
            Twine().validate_children(source=children_values_with_extra_data)
Beispiel #12
0
    def test_missing_values_files(self):
        """Ensures that if you try to read values from missing files, the right exceptions get raised"""
        twine = Twine(source=VALID_SCHEMA_TWINE)
        values_file = os.path.join(self.path, "not_a_file.json")

        with self.assertRaises(exceptions.ConfigurationValuesFileNotFound):
            twine.validate_configuration_values(source=values_file)

        with self.assertRaises(exceptions.InputValuesFileNotFound):
            twine.validate_input_values(source=values_file)

        with self.assertRaises(exceptions.OutputValuesFileNotFound):
            twine.validate_output_values(source=values_file)
Beispiel #13
0
    def test_missing_manifest_files(self):
        """Ensures that if you try to read values from missing files, the right exceptions get raised"""
        twine = Twine(source=self.VALID_MANIFEST_STRAND)
        file = os.path.join(self.path, "not_a_file.json")

        with self.assertRaises(exceptions.ConfigurationManifestFileNotFound):
            twine.validate_configuration_manifest(source=file)

        with self.assertRaises(exceptions.InputManifestFileNotFound):
            twine.validate_input_manifest(source=file)

        with self.assertRaises(exceptions.OutputManifestFileNotFound):
            twine.validate_output_manifest(source=file)
Beispiel #14
0
    def __init__(
        self,
        app_src,
        twine="twine.json",
        configuration_values=None,
        configuration_manifest=None,
        children=None,
        output_location=None,
        project_name=None,
        service_id=None,
    ):
        self.app_source = app_src
        self.children = children

        if output_location and not re.match(r"^gs://[a-z\d][a-z\d_./-]*$",
                                            output_location):
            raise exceptions.InvalidInputException(
                "The output location must be a Google Cloud Storage path e.g. 'gs://bucket-name/output_directory'."
            )

        self.output_location = output_location

        # Ensure the twine is present and instantiate it.
        if isinstance(twine, Twine):
            self.twine = twine
        else:
            self.twine = Twine(source=twine)

        logger.debug("Parsed twine with strands %r",
                     self.twine.available_strands)

        # Validate and initialise configuration data.
        self.configuration = self.twine.validate(
            configuration_values=configuration_values,
            configuration_manifest=configuration_manifest,
            cls=CLASS_MAP,
        )
        logger.debug("Configuration validated.")

        self.service_id = service_id
        self._project_name = project_name
Beispiel #15
0
    def test_invalid_children_no_key(self):
        """Ensures InvalidTwine exceptions are raised when instantiating twines where a child
        is specified without the required `key` field
        """
        source = """
            {
                "children": [{"purpose": "The purpose.", "notes": "Here are some notes.", "filters": "tags:gis"}]
            }
        """

        with self.assertRaises(exceptions.InvalidTwine):
            Twine(source=source)
Beispiel #16
0
    def test_error_raised_if_multiple_datasets_have_same_name(self):
        """Test that an error is raised if the input manifest has more than one dataset with the same name."""
        twine = """
            {
                "input_manifest": {
                    "datasets": {
                        "met_mast_data": {
                            "purpose": "A dataset containing meteorological mast data"
                        }
                    }
                }
            }
        """

        input_manifest = """
            {
                "id": "8ead7669-8162-4f64-8cd5-4abe92509e17",
                "datasets": {
                    "met_mast_data": {
                        "id": "7ead7669-8162-4f64-8cd5-4abe92509e19",
                        "name": "met_mast_data",
                        "tags": {},
                        "labels": [],
                        "files": []
                    },
                    "met_mast_data": {
                        "id": "7ead7669-8162-4f64-8cd5-4abe92509e18",
                        "name": "met_mast_data",
                        "tags": {},
                        "labels": [],
                        "files": []
                    }
                }
            }
        """

        twine = Twine(source=twine)

        with self.assertRaises(KeyError):
            twine.validate_input_manifest(source=input_manifest)
Beispiel #17
0
    def test_invalid_env_name(self):
        """Test that a child uri env name not in ALL_CAPS_SNAKE_CASE doesn't validate"""
        child_with_invalid_environment_variable_name = """
            [
                {
                    "key": "gis",
                    "id": "some-id",
                    "uri_env_name": "an environment variable not in CAPS_CASE is invalid per the credentials spec"
                }
            ]
        """

        with self.assertRaises(exceptions.InvalidValuesContents):
            Twine().validate_children(source=child_with_invalid_environment_variable_name)
Beispiel #18
0
    def test_fails_on_dict(self):
        """Ensures InvalidTwine exceptions are raised when instantiating twines with invalid `credentials` entries
        (given as a dict, not an array).
        """
        invalid_credentials_dict_not_array_twine = """
            {
                "credentials": {
                    "name": "MY_API_SECRET_KEY",
                    "purpose": "Token for accessing a 3rd party API service"
                }
            }
        """

        with self.assertRaises(exceptions.InvalidTwine):
            Twine(source=invalid_credentials_dict_not_array_twine)
Beispiel #19
0
    def test_fails_on_name_whitespace(self):
        """Test that a credential with spaces in its name causes an error to be raised when validated."""
        invalid_credentials_space_in_name_twine = """
            {
                "credentials": [
                    {
                        "name": "MY NAME SHOULD NOT HAVE WHITESPACE",
                        "purpose": "Token for accessing a 3rd party API service"
                    }
                ]
            }
        """

        with self.assertRaises(exceptions.InvalidTwine):
            Twine(source=invalid_credentials_space_in_name_twine)
Beispiel #20
0
    def test_fails_on_no_name(self):
        """Ensures InvalidTwine exceptions are raised when instantiating twines with a missing `name` field in a
        credential.
        """
        invalid_credentials_no_name_twine = """
            {
                "credentials": [
                    {
                        "purpose": "credentials without a name should be invalid"
                    }
                ]
            }
        """

        with self.assertRaises(exceptions.InvalidTwine):
            Twine(source=invalid_credentials_no_name_twine)
Beispiel #21
0
    def test_fails_on_lowercase_name(self):
        """Ensures InvalidTwine exceptions are raised when instantiating twines with lowercase letters in the `name`
        field.
        """
        invalid_credentials_lowercase_name_twine = """
            {
                "credentials": [
                    {
                        "name": "my_secrets_should_be_uppercase",
                        "purpose": "Token for accessing a 3rd party API service"
                    }
                ]
            }
        """

        with self.assertRaises(exceptions.InvalidTwine):
            Twine(source=invalid_credentials_lowercase_name_twine)
Beispiel #22
0
def start(service_config, timeout, rm):
    """Start an Octue service or digital twin locally as a child so it can be asked questions by other Octue services."""
    service_configuration, app_configuration = load_service_and_app_configuration(
        service_config)

    runner = Runner(
        app_src=service_configuration.app_source_path,
        twine=Twine(source=service_configuration.twine_path),
        configuration_values=app_configuration.configuration_values,
        configuration_manifest=app_configuration.configuration_manifest,
        children=app_configuration.children,
        output_location=app_configuration.output_location,
        service_id=service_configuration.service_id,
    )

    run_function = functools.partial(
        runner.run,
        analysis_log_level=global_cli_context["log_level"],
        analysis_log_handler=global_cli_context["log_handler"],
    )

    backend_configuration_values = (app_configuration.configuration_values
                                    or {}).get("backend")

    if backend_configuration_values:
        backend_configuration_values = copy.deepcopy(
            backend_configuration_values)
        backend = service_backends.get_backend(
            backend_configuration_values.pop("name"))(
                **backend_configuration_values)
    else:
        # If no backend details are provided, use Google Pub/Sub with the default project.
        _, project_name = auth.default()
        backend = service_backends.get_backend()(project_name=project_name)

    service = Service(
        service_id=service_configuration.service_id,
        backend=backend,
        run_function=run_function,
    )

    service.serve(timeout=timeout, delete_topic_and_subscription_on_exit=rm)
Beispiel #23
0
    def test_missing_optional_datasets_do_not_raise_error(self):
        """Test that optional datasets specified in the twine missing from the manifest don't raise an error."""
        twine = """
            {
                "input_manifest": {
                    "datasets": {
                        "cat": {
                            "purpose": "blah",
                            "optional": true
                        },
                        "dog": {
                            "purpose": "blah"
                        }
                    }
                }
            }
        """

        input_manifest = {
            "id": "30d2c75c-a7b9-4f16-8627-9c8d5cc04bf4",
            "datasets": {"dog": "gs://dog-house/dog"},
        }

        Twine(source=twine).validate_input_manifest(source=input_manifest)
Beispiel #24
0
    def test_strand_not_found(self):
        """Ensures that if a twine doesn't have a strand, you can't validate against it"""
        valid_no_output_schema_twine = """
           {
                "configuration_values_schema": {
                    "$schema": "https://json-schema.org/draft/2020-12/schema",
                    "title": "The example configuration form",
                    "description": "The configuration strand of an example twine",
                    "type": "object",
                    "properties": {
                        "n_iterations": {
                            "description": "An example of an integer configuration variable, called 'n_iterations'.",
                            "type": "integer",
                            "minimum": 1,
                            "maximum": 10,
                            "default": 5
                        }
                    }
                }
            }
        """

        with self.assertRaises(exceptions.StrandNotFound):
            Twine(source=valid_no_output_schema_twine).validate_output_values(source="{}")
Beispiel #25
0
def run(service_config, input_dir, output_file, output_manifest_file,
        monitor_messages_file):
    """Run an analysis on the given input data using an Octue service or digital twin locally. The output values are
    printed to `stdout`. If an output manifest is produced, it will be saved locally (see the `--output-manifest-file`
    option).
    """
    service_configuration, app_configuration = load_service_and_app_configuration(
        service_config)

    input_values_path = os.path.join(input_dir, VALUES_FILENAME)
    input_manifest_path = os.path.join(input_dir, MANIFEST_FILENAME)

    input_values = None
    input_manifest = None

    if os.path.exists(input_values_path):
        input_values = input_values_path

    if os.path.exists(input_manifest_path):
        input_manifest = input_manifest_path

    runner = Runner(
        app_src=service_configuration.app_source_path,
        twine=Twine(source=service_configuration.twine_path),
        configuration_values=app_configuration.configuration_values,
        configuration_manifest=app_configuration.configuration_manifest,
        children=app_configuration.children,
        output_location=app_configuration.output_location,
    )

    if monitor_messages_file:
        if not os.path.exists(os.path.dirname(monitor_messages_file)):
            os.makedirs(os.path.dirname(monitor_messages_file))

        monitor_message_handler = lambda message: _add_monitor_message_to_file(
            monitor_messages_file, message)

    else:
        monitor_message_handler = None

    analysis = runner.run(
        analysis_id=global_cli_context["analysis_id"],
        input_values=input_values,
        input_manifest=input_manifest,
        analysis_log_level=global_cli_context["log_level"],
        analysis_log_handler=global_cli_context["log_handler"],
        handle_monitor_message=monitor_message_handler,
    )

    click.echo(json.dumps(analysis.output_values))

    if analysis.output_values and output_file:
        if not os.path.exists(os.path.dirname(output_file)):
            os.makedirs(os.path.dirname(output_file))

        with open(output_file, "w") as f:
            json.dump(analysis.output_values,
                      f,
                      cls=OctueJSONEncoder,
                      indent=4)

    if analysis.output_manifest:
        if not os.path.exists(os.path.dirname(output_manifest_file)):
            os.makedirs(os.path.dirname(output_manifest_file))

        with open(
                output_manifest_file or f"output_manifest_{analysis.id}.json",
                "w") as f:
            json.dump(analysis.output_manifest.to_primitive(),
                      f,
                      cls=OctueJSONEncoder,
                      indent=4)

    return 0
Beispiel #26
0
    def test_valid_manifest_files(self):
        """Ensures that a manifest file will validate."""
        valid_configuration_manifest = """
            {
                "id": "3ead7669-8162-4f64-8cd5-4abe92509e17",
                "datasets": {
                    "configuration_files_data": {
                        "id": "34ad7669-8162-4f64-8cd5-4abe92509e17",
                        "name": "configuration_files_data",
                        "tags": {},
                        "labels": ["the", "config", "labels"],
                        "files": [
                            {
                                "path": "configuration/datasets/7ead7669/file_1.csv",
                                "cluster": 0,
                                "sequence": 0,
                                "extension": "csv",
                                "tags": {},
                                "labels": [],
                                "posix_timestamp": 0,
                                "id": "abff07bc-7c19-4ed5-be6d-a6546eae8e86",
                                "last_modified": "2019-02-28T22:40:30.533005Z",
                                "name": "file_1.csv",
                                "size_bytes": 59684813,
                                "sha-512/256": "somesha"
                            },
                            {
                                "path": "configuration/datasets/7ead7669/file_2.csv",
                                "cluster": 0,
                                "sequence": 1,
                                "extension": "csv",
                                "tags": {},
                                "labels": [],
                                "posix_timestamp": 0,
                                "id": "bbff07bc-7c19-4ed5-be6d-a6546eae8e45",
                                "last_modified": "2019-02-28T22:40:40.633001Z",
                                "name": "file_2.csv",
                                "size_bytes": 59684813,
                                "sha-512/256": "someothersha"
                            }
                        ]
                    }
                }
            }
        """

        valid_input_manifest = """
            {
                "id": "8ead7669-8162-4f64-8cd5-4abe92509e17",
                "datasets": {
                    "met_mast_data": {
                        "id": "7ead7669-8162-4f64-8cd5-4abe92509e17",
                        "name": "met_mast_data",
                        "tags": {},
                        "labels": ["met", "mast", "wind"],
                        "files": [
                            {
                                "path": "input/datasets/7ead7669/file_1.csv",
                                "cluster": 0,
                                "sequence": 0,
                                "extension": "csv",
                                "tags": {},
                                "labels": [],
                                "posix_timestamp": 0,
                                "id": "abff07bc-7c19-4ed5-be6d-a6546eae8e86",
                                "last_modified": "2019-02-28T22:40:30.533005Z",
                                "name": "file_1.csv",
                                "size_bytes": 59684813,
                                "sha-512/256": "somesha"
                            },
                            {
                                "path": "input/datasets/7ead7669/file_2.csv",
                                "cluster": 0,
                                "sequence": 1,
                                "extension": "csv",
                                "tags": {},
                                "labels": [],
                                "posix_timestamp": 0,
                                "id": "bbff07bc-7c19-4ed5-be6d-a6546eae8e45",
                                "last_modified": "2019-02-28T22:40:40.633001Z",
                                "name": "file_2.csv",
                                "size_bytes": 59684813,
                                "sha-512/256": "someothersha"
                            }
                        ]
                    },
                    "scada_data": "gs://my-bucket/scada-data"
                }
            }
        """

        valid_output_manifest = """
            {
                "id": "2ead7669-8162-4f64-8cd5-4abe92509e17",
                "datasets": {
                    "output_files_data": {
                        "id": "1ead7669-8162-4f64-8cd5-4abe92509e17",
                        "name": "output_files_data",
                        "tags": {},
                        "labels": ["the", "output", "labels"],
                        "files": [
                            {
                                "path": "input/datasets/7ead7669/file_1.csv",
                                "cluster": 0,
                                "sequence": 0,
                                "extension": "csv",
                                "tags": {},
                                "labels": [],
                                "posix_timestamp": 0,
                                "id": "abff07bc-7c19-4ed5-be6d-a6546eae8e86",
                                "last_modified": "2019-02-28T22:40:30.533005Z",
                                "name": "file_1.csv",
                                "size_bytes": 59684813,
                                "sha-512/256": "somesha"
                            },
                            {
                                "path": "input/datasets/7ead7669/file_2.csv",
                                "cluster": 0,
                                "sequence": 1,
                                "extension": "csv",
                                "tags": {},
                                "labels": [],
                                "posix_timestamp": 0,
                                "id": "bbff07bc-7c19-4ed5-be6d-a6546eae8e45",
                                "last_modified": "2019-02-28T22:40:40.633001Z",
                                "name": "file_2.csv",
                                "size_bytes": 59684813,
                                "sha-512/256": "someothersha"
                            }
                        ]
                    }
                }
            }
        """

        twine = Twine(source=self.VALID_MANIFEST_STRAND)
        twine.validate_configuration_manifest(source=valid_configuration_manifest)
        twine.validate_input_manifest(source=valid_input_manifest)
        twine.validate_output_manifest(source=valid_output_manifest)
Beispiel #27
0
class Analysis(Identifiable, Serialisable, Labelable, Taggable):
    """A class representing a scientific or computational analysis. It holds references to all configuration, input, and
    output data, logs, connections to child services, credentials, etc. It's essentially the "Internal API" for your
    service - a single point of contact where you can get or update anything you need.

    An ``Analysis`` instance is automatically provided to the app in an Octue service when a question is received. Its
    attributes include every strand that can be added to a ``Twine``, although only the strands specified in the
    service's twine will be non-``None``. Incoming data is validated before it's added to the analysis.

    All input and configuration attributes are hashed using a `BLAKE3 hash <https://github.com/BLAKE3-team/BLAKE3>`_ so
    the inputs and configuration that produced a given output in your app can always be verified. These hashes exist on
    the following attributes:

    -   ``input_values_hash``
    -   ``input_manifest_hash``
    -   ``configuration_values_hash``
    -   ``configuration_manifest_hash``

    If a strand is ``None``, so will its corresponding hash attribute be. The hash of a datafile is the hash of its
    file, while the hash of a manifest or dataset is the cumulative hash of the files it refers to.

    :param twined.Twine|dict|str twine: the twine, dictionary defining a twine, or path to "twine.json" file defining the service's data interface
    :param callable|None handle_monitor_message: an optional function for sending monitor messages to the parent that requested the analysis
    :param any configuration_values: the configuration values for the analysis - this can be expressed as a python primitive (e.g. dict), a path to a JSON file, or a JSON string.
    :param octue.resources.manifest.Manifest configuration_manifest: a manifest of configuration datasets for the analysis if required
    :param any input_values: the input values for the analysis - this can be expressed as a python primitive (e.g. dict), a path to a JSON file, or a JSON string.
    :param octue.resources.manifest.Manifest input_manifest: a manifest of input datasets for the analysis if required
    :param any output_values: any output values the analysis produces
    :param octue.resources.manifest.Manifest output_manifest: a manifest of output dataset from the analysis if it produces any
    :param dict children: a mapping of string key to ``Child`` instance for all the children used by the service
    :param str id: Optional UUID for the analysis
    :return None:
    """
    def __init__(self, twine, handle_monitor_message=None, **kwargs):
        if isinstance(twine, Twine):
            self.twine = twine
        else:
            self.twine = Twine(source=twine)

        self._handle_monitor_message = handle_monitor_message

        strand_kwargs = {name: kwargs.pop(name, None) for name in ALL_STRANDS}

        # Values strands.
        self.configuration_values = strand_kwargs.get("configuration_values",
                                                      None)
        self.input_values = strand_kwargs.get("input_values", None)
        self.output_values = strand_kwargs.get("output_values", None)

        # Manifest strands.
        self.configuration_manifest = strand_kwargs.get(
            "configuration_manifest", None)
        self.input_manifest = strand_kwargs.get("input_manifest", None)
        self.output_manifest = strand_kwargs.get("output_manifest", None)

        # Other strands.
        self.children = strand_kwargs.get("children", None)

        # Non-strands.
        self.output_location = kwargs.pop("output_location", None)

        self._calculate_strand_hashes(strands=strand_kwargs)
        self._finalised = False
        super().__init__(**kwargs)

    @property
    def finalised(self):
        """Check whether the analysis has been finalised (i.e. whether its outputs have been validated and, if an output
        manifest is produced, its datasets uploaded).

        :return bool:
        """
        return self._finalised

    def send_monitor_message(self, data):
        """Send a monitor message to the parent that requested the analysis.

        :param any data: any JSON-compatible data structure
        :return None:
        """
        try:
            self.twine.validate_monitor_message(source=data)
        except twined.exceptions.InvalidValuesContents as e:
            raise InvalidMonitorMessage(e)

        if self._handle_monitor_message is None:
            logger.warning(
                "Attempted to send a monitor message but no handler is specified."
            )
            return

        self._handle_monitor_message(data)

    def finalise(self, upload_output_datasets_to=None):
        """Validate the output values and output manifest, optionally uploading the output manifest's datasets to the
        cloud and updating its dataset paths to signed URLs.

        :param str|None upload_output_datasets_to: if provided, upload any output datasets to this cloud directory and update the output manifest with their locations
        :return None:
        """
        serialised_strands = {"output_values": None, "output_manifest": None}

        if self.output_values:
            serialised_strands["output_values"] = json.dumps(
                self.output_values, cls=OctueJSONEncoder)

        if self.output_manifest:
            serialised_strands[
                "output_manifest"] = self.output_manifest.to_primitive()

        self.twine.validate(**serialised_strands)
        self._finalised = True
        logger.info(
            "Validated output values and output manifest against the twine.")

        if not (upload_output_datasets_to
                and hasattr(self, "output_manifest")):
            return

        for name, dataset in self.output_manifest.datasets.items():
            dataset.upload(
                cloud_path=storage.path.join(upload_output_datasets_to, name))

        self.output_manifest.use_signed_urls_for_datasets()

        logger.info("Uploaded output datasets to %r.",
                    upload_output_datasets_to)

    def _calculate_strand_hashes(self, strands):
        """Calculate the hashes of the strands specified in the HASH_FUNCTIONS constant.

        :param dict strands: strand names mapped to strand data
        :return None:
        """
        for strand_name, strand_data in strands.items():
            if strand_name in HASH_FUNCTIONS:
                strand_hash_name = f"{strand_name}_hash"

                if strand_data is not None:
                    setattr(self, strand_hash_name,
                            HASH_FUNCTIONS[strand_name](strand_data))
                else:
                    setattr(self, strand_hash_name, None)
Beispiel #28
0
    def test_backend_cannot_be_empty(self):
        """Test that the backend field of a child cannot be empty."""
        single_child_missing_backend = """[{"key": "gis", "id": "some-id", "backend": {}}]"""

        with self.assertRaises(exceptions.InvalidValuesContents):
            Twine().validate_children(source=single_child_missing_backend)
Beispiel #29
0
 def test_extra_children(self):
     """Test that a twine with no children will not validate a non-empty children input."""
     with self.assertRaises(exceptions.InvalidValuesContents):
         Twine().validate_children(source=self.VALID_CHILD_VALUE)
Beispiel #30
0
 def test_missing_children(self):
     """Test that a twine with children will not validate on an empty children input."""
     with self.assertRaises(exceptions.InvalidValuesContents):
         Twine(source=self.VALID_TWINE_WITH_CHILDREN).validate_children(source=[])