def test_exception_raised_when_strand_data_missing(self): """Ensures that protected attributes can't be set""" runner = Runner( app_src=mock_app, twine="""{ "configuration_values_schema": { "type": "object", "properties": { "n_iterations": { "type": "integer" } } }, "input_values_schema": { "type": "object", "properties": { "height": { "type": "integer" } }, "required": ["height"] } }""", configuration_values={"n_iterations": 5}, ) with self.assertRaises(twined.exceptions.TwineValueException) as error: runner.run() self.assertIn( "The 'input_values' strand is defined in the twine, but no data is provided in sources", error.exception.args[0], )
def test_error_raised_when_required_tags_missing_for_validate_input_manifest( self): """Test that an error is raised when required tags from the file tags template for a dataset are missing when validating the input manifest. """ with tempfile.TemporaryDirectory() as temporary_directory: dataset_path = os.path.join(temporary_directory, "met_mast_data") # Make a datafile with no tags. with Datafile(os.path.join(dataset_path, "my_file_0.txt"), mode="w") as (datafile, f): f.write("hello") input_manifest = { "id": "8ead7669-8162-4f64-8cd5-4abe92509e17", "datasets": { "met_mast_data": dataset_path, }, } runner = Runner( app_src=app, twine=self.TWINE_WITH_INPUT_MANIFEST_STRAND_WITH_TAG_TEMPLATE) with self.assertRaises(twined.exceptions.InvalidManifestContents): runner.run(input_manifest=input_manifest)
def test_validate_input_manifest_with_required_tags_for_remote_tag_template_schema( self): """Test that a remote tag template can be used for validating tags on the datafiles in a manifest.""" schema_url = "https://refs.schema.octue.com/octue/my-file-type-tag-template/0.0.0.json" twine_with_input_manifest_with_remote_tag_template = { "input_manifest": { "datasets": { "met_mast_data": { "purpose": "A dataset containing meteorological mast data", "file_tags_template": { "$ref": schema_url }, } } }, "output_values_schema": {}, } remote_schema = { "type": "object", "properties": { "manufacturer": { "type": "string" }, "height": { "type": "number" }, "is_recycled": { "type": "boolean" }, }, "required": ["manufacturer", "height", "is_recycled"], } runner = Runner( app_src=app, twine=twine_with_input_manifest_with_remote_tag_template) original_resolve_from_url = copy.copy(RefResolver.resolve_from_url) def patch_if_url_is_schema_url(instance, url): """Patch the jsonschema validator `RefResolver.resolve_from_url` if the url is the schema URL, otherwise leave it unpatched. :param jsonschema.validators.RefResolver instance: :param str url: :return mixed: """ if url == schema_url: return remote_schema else: return original_resolve_from_url(instance, url) with patch("jsonschema.validators.RefResolver.resolve_from_url", new=patch_if_url_is_schema_url): with tempfile.TemporaryDirectory() as temporary_directory: input_manifest = self._make_serialised_input_manifest_with_correct_dataset_file_tags( temporary_directory) runner.run(input_manifest=input_manifest)
def test_run_output_values_validation(self): """Ensures that runner can be instantiated with a string that points to a path""" twine = """ { "output_values_schema": { "type": "object", "required": ["n_iterations"], "properties": { "n_iterations": { "type": "integer" } } } } """ runner = Runner(app_src=mock_app, twine=twine) # Test for failure with an incorrect output with self.assertRaises(twined.exceptions.TwineValueException): runner.run().finalise() # Test for success with a valid output def fcn(analysis): analysis.output_values["n_iterations"] = 10 Runner(app_src=fcn, twine=twine).run().finalise()
def test_invalid_app_directory(self): """Ensure an error containing the searched location is raised if the app source can't be found.""" runner = Runner(app_src="..", twine="{}") with self.assertRaises(ModuleNotFoundError) as e: runner.run() self.assertTrue("No module named 'app'" in e.msg) self.assertTrue(os.path.abspath(runner.app_source) in e.msg)
def test_validate_input_manifest_with_required_tags(self): """Test that validating an input manifest with required tags from the file tags template for a dataset works for tags meeting the requirements. """ runner = Runner( app_src=app, twine=self.TWINE_WITH_INPUT_MANIFEST_STRAND_WITH_TAG_TEMPLATE) with tempfile.TemporaryDirectory() as temporary_directory: input_manifest = self._make_serialised_input_manifest_with_correct_dataset_file_tags( temporary_directory) runner.run(input_manifest=input_manifest)
def test_fractal_template_with_default_configuration(self): """Ensure the `fractal` app can be configured with its default configuration and run.""" self.set_template("template-fractal") runner = Runner( app_src=self.template_path, twine=self.template_twine, configuration_values=os.path.join("data", "configuration", "configuration_values.json"), ) runner.run()
def test_validate_input_manifest_raises_error_if_required_tags_are_not_of_required_type( self): """Test that an error is raised if the required tags from the file tags template for a dataset are present but are not of the required type when validating an input manifest. """ with tempfile.TemporaryDirectory() as temporary_directory: dataset_path = os.path.join(temporary_directory, "met_mast_data") input_manifest = { "id": "8ead7669-8162-4f64-8cd5-4abe92509e17", "datasets": { "met_mast_data": dataset_path, }, } runner = Runner( app_src=app, twine=self.TWINE_WITH_INPUT_MANIFEST_STRAND_WITH_TAG_TEMPLATE) for tags in ( { "manufacturer": "Vestas", "height": 350, "is_recycled": False, "number_of_blades": "3" }, { "manufacturer": "Vestas", "height": 350, "is_recycled": "no", "number_of_blades": 3 }, { "manufacturer": False, "height": 350, "is_recycled": "false", "number_of_blades": 3 }, ): with self.subTest(tags=tags): # Make a datafile with the given tags. with Datafile( path=os.path.join(dataset_path, "my_file_0.txt"), tags=tags, mode="w", ) as (datafile, f): f.write("hello") with self.assertRaises( twined.exceptions.InvalidManifestContents): runner.run(input_manifest=input_manifest)
def test_using_manifests_template(self): """Ensure the `using-manifests` template app works correctly.""" self.set_template("template-using-manifests") runner = Runner( app_src=self.template_path, twine=self.template_twine, configuration_values=os.path.join("data", "configuration", "values.json"), ) with patch("google.cloud.storage.blob.Blob.generate_signed_url", new=mock_generate_signed_url): analysis = runner.run( input_manifest=os.path.join("data", "input", "manifest.json")) # Test that the signed URLs for the dataset and its files work and can be used to reinstantiate the output # manifest after serialisation. downloaded_output_manifest = Manifest.deserialise( analysis.output_manifest.to_primitive()) self.assertEqual( downloaded_output_manifest.datasets["cleaned_met_mast_data"]. labels, {"mast", "cleaned", "met"}, ) self.assertEqual( urlparse( downloaded_output_manifest.datasets["cleaned_met_mast_data"]. files.one().cloud_path).path, f"/{TEST_BUCKET_NAME}/output/test_using_manifests_analysis/cleaned_met_mast_data/cleaned.csv", )
def test_run_with_configuration_passes(self): """Ensures that runs can be made with configuration only""" runner = Runner( app_src=mock_app, twine="""{ "configuration_values_schema": { "type": "object", "properties": { "n_iterations": { "type": "integer" } } } }""", configuration_values="{}", ) runner.run()
def test_runner_with_credentials(self): """Test that credentials can be used with Runner.""" with patch.dict(os.environ, {"LOCAL_CREDENTIAL": "my-secret"}): runner = Runner( app_src=mock_app, twine=""" { "credentials": [ { "name": "LOCAL_CREDENTIAL", "purpose": "Token for accessing a 3rd party API service" } ] } """, ) runner.run() self.assertEqual(os.environ["LOCAL_CREDENTIAL"], "my-secret")
def test_runner_with_google_secret_credentials(self): """Test that credentials can be found locally and populated into the environment from Google Cloud Secret Manager. """ with patch.dict(os.environ, {"LOCAL_CREDENTIAL": "my-secret"}): runner = Runner( app_src=mock_app, twine=""" { "credentials": [ { "name": "LOCAL_CREDENTIAL", "purpose": "Token for accessing a 3rd party API service" }, { "name": "CLOUD_CREDENTIALS", "purpose": "Token for accessing another 3rd party API service" } ] } """, ) class MockAccessSecretVersionResponse: payload = Mock() payload.data = b"My precious!" with patch( "google.cloud.secretmanager_v1.services.secret_manager_service.client.SecretManagerServiceClient" ".access_secret_version", return_value=MockAccessSecretVersionResponse(), ): runner.run() # Check that first secret is still present and that the Google Cloud secret is now in the environment. self.assertEqual(os.environ["LOCAL_CREDENTIAL"], "my-secret") self.assertEqual(os.environ["CLOUD_CREDENTIALS"], "My precious!")
def asker(analysis_id, **kwargs): logger.debug("APPLICATION PATH: %s", settings.APPLICATION_PATH) analysis_group_name = f"analysis-{analysis_id}" ReelMessage(action="ask", status="started", value=analysis_id).group_send(analysis_group_name) # TODO get configuration_values and configuration_manifest out at server start logger.debug("SENT MESSAGE TO %s", analysis_group_name) try: runner = Runner( twine=os.path.join(settings.APPLICATION_PATH, "twine.json"), configuration_values='{"analysis_program": "kuethe_chow"}', ) # TODO get a log handler and add it to the run # TODO get a monitor handler and add it to the run analysis = runner.run(app_src=settings.APPLICATION_PATH, **kwargs) print("\n\n\nDONE TIH ANALYSIS\n\n\n") print("\n\n\nREELING OUTPUTS\n\n\n") # TODO fix https://github.com/octue/octue-sdk-python/issues/19 then you can remove this horrifying thing kwargs = {} for k in OUTPUT_STRANDS: att = getattr(analysis, k, None) if att is not None: att = json.dumps(att, cls=OctueJSONEncoder) kwargs[k] = att # print('KWARGS', kwargs) # print('\n\n\nREELED OUTPUTS\n\n\n') # Create the completion message ReelMessage(action="ask", status="complete", value=analysis_id, **kwargs).group_send(analysis_group_name) except Exception as e: print("\n\n\nRAISING REEL ERROR\n\n\n") # TODO Except TwinedExceptions and always forward to the client, but any other exceptions only forward if user has admin privilege ReelMessage(action="ask", status="error", value=analysis_id, hints=e.args[0]).group_send(analysis_group_name) raise e
def test_app(self): """Test that the app takes in input in the correct format and returns an analysis with the correct output values. """ runner = Runner(app_src=REPOSITORY_ROOT, twine=TWINE_PATH) with patch("google.cloud.storage.blob.Blob.generate_signed_url", mock_generate_signed_url): analysis = runner.run(input_values={"n_iterations": 3}) # Check the output values. self.assertEqual(analysis.output_values, [1, 2, 3, 4, 5]) # Test that the signed URLs for the dataset and its files work and can be used to reinstantiate the output # manifest after serialisation. downloaded_output_manifest = Manifest.deserialise(analysis.output_manifest.to_primitive()) # Check that the output dataset and its files can be accessed. with downloaded_output_manifest.datasets["example_dataset"].files.one() as (datafile, f): self.assertEqual(f.read(), "This is some example service output.")
def test_output_manifest_is_not_none(self): """Ensure the output manifest of an analysis is not None if an output manifest is defined in the Twine.""" runner = Runner( app_src=mock_app, twine=""" { "output_manifest": { "datasets": { "open_foam_result": { "purpose": "A dataset containing solution fields of an openfoam case." }, "airfoil_cp_values": { "purpose": "A file containing cp values" } } } } """, ) analysis = runner.run() self.assertIsNotNone(analysis.output_manifest)
def test_validate_input_manifest_with_required_tags_in_several_datasets( self): """Test that required tags for different datasets' file tags templates are validated separately and correctly for each dataset. """ twine_with_input_manifest_with_required_tags_for_multiple_datasets = { "input_manifest": { "datasets": { "first_dataset": { "purpose": "A dataset containing meteorological mast data", "file_tags_template": { "type": "object", "properties": { "manufacturer": { "type": "string" }, "height": { "type": "number" } }, }, }, "second_dataset": { "file_tags_template": { "type": "object", "properties": { "is_recycled": { "type": "boolean" }, "number_of_blades": { "type": "number" } }, } }, } }, "output_values_schema": {}, } with tempfile.TemporaryDirectory() as temporary_directory: dataset_paths = ( os.path.join(temporary_directory, "first_dataset"), os.path.join(temporary_directory, "second_dataset"), ) input_manifest = { "id": "8ead7669-8162-4f64-8cd5-4abe92509e17", "datasets": { "first_dataset": dataset_paths[0], "second_dataset": dataset_paths[1], }, } with Datafile( path=os.path.join(dataset_paths[0], "file_0.csv"), tags={ "manufacturer": "vestas", "height": 503.7 }, mode="w", ) as (datafile, f): f.write("hello") with Datafile( path=os.path.join(dataset_paths[1], "file_1.csv"), tags={ "is_recycled": True, "number_of_blades": 3 }, mode="w", ) as (datafile, f): f.write("hello") runner = Runner( app_src=app, twine= twine_with_input_manifest_with_required_tags_for_multiple_datasets ) runner.run(input_manifest=input_manifest)
def test_child_services_template(self): """Ensure the child services template works correctly (i.e. that children can be accessed by a parent and data collected from them). This template has a parent app and two children - an elevation app and wind speed app. The parent sends coordinates to both children, receiving the elevation and wind speed from them at these locations. """ cli_path = os.path.join(REPOSITORY_ROOT, "octue", "cli.py") self.set_template("template-child-services") elevation_service_path = os.path.join(self.template_path, "elevation_service") elevation_service_id = f"elevation-service-{uuid.uuid4()}" with tempfile.NamedTemporaryFile() as elevation_service_configuration: with open( os.path.join(self.template_path, "elevation_service", "octue.yaml")) as f: config = yaml.load(f, Loader=yaml.SafeLoader) config["services"][0]["name"] = elevation_service_id with open(elevation_service_configuration.name, "w") as f: yaml.dump(config, f) elevation_process = subprocess.Popen( [ sys.executable, cli_path, "start", f"--service-config={elevation_service_configuration.name}", "--rm", ], cwd=elevation_service_path, ) wind_speed_service_path = os.path.join(self.template_path, "wind_speed_service") wind_speed_service_id = f"wind-speed-service-{uuid.uuid4()}" with tempfile.NamedTemporaryFile( ) as wind_speed_service_configuration: with open( os.path.join(self.template_path, "wind_speed_service", "octue.yaml")) as f: config = yaml.load(f, Loader=yaml.SafeLoader) config["services"][0]["name"] = wind_speed_service_id with open(wind_speed_service_configuration.name, "w") as f: yaml.dump(config, f) wind_speed_process = subprocess.Popen( [ sys.executable, cli_path, "start", f"--service-config={wind_speed_service_configuration.name}", "--rm", ], cwd=wind_speed_service_path, ) parent_service_path = os.path.join(self.template_path, "parent_service") with open( os.path.join(parent_service_path, "app_configuration.json")) as f: children = json.load(f)["children"] children[0]["id"] = wind_speed_service_id children[1]["id"] = elevation_service_id with ProcessesContextManager(processes=(elevation_process, wind_speed_process)): runner = Runner( app_src=parent_service_path, twine=os.path.join(parent_service_path, "twine.json"), children=children, service_id="template-child-services/parent-service", ) analysis = runner.run(input_values=os.path.join( parent_service_path, "data", "input", "values.json"), ) self.assertTrue("elevations" in analysis.output_values) self.assertTrue("wind_speeds" in analysis.output_values)