def test_validate_workflow_no_input(self): MLSchema.populate_registry() self.generic_schema_validator( SampleSchema.TEST.WORKFLOW_STEP, SampleSubmissions.UNIT_TESTS.WORKFLOW_NO_INPUT, ValidationError, "input", )
def test_validate_workflow_bad_semver(self): MLSchema.populate_registry() self.generic_schema_validator( SampleSchema.TEST.WORKFLOW_STEP, SampleSubmissions.UNIT_TESTS.WORKFLOW_BAD_SEMVER, ValidationError, "semver", )
def test_validate_workflow_valid(self): MLSchema.populate_registry() this_schema = self.generic_schema_validator( SampleSchema.TEST.WORKFLOW_STEP, SampleSubmissions.UNIT_TESTS.WORKFLOW_VALID, None, None, ) self.assertTrue(len(this_schema["steps"]["step_name"]) == 5)
def test_merge_two_dicts_with_invalid_base(self): # Should not work - trying to instantiate a schema with a base_type # but the base_type has not been registered try: marshmallow.class_registry._registry.pop("0_0_1_base") except KeyError: # This is acceptable because we want to make sure the registry is empty. pass with self.assertRaises(RegistryError): MLSchema.create_schema(SampleSchema.SCHEMAS.DATAPATH)
def generic_schema_validator(self, test_schema, test_submission, exception_type=None, exception_string=None) -> MLObject: error_string = None try: instantiated_schema = MLSchema.create_schema( self.wrap_schema_with_mlschema_info(test_schema)) # noqa except Exception as e: self.assertTrue(isinstance(e, exception_type)) error_string = str(e) if test_submission is not None: yaml_submission = convert_yaml_to_dict( self.wrap_submission_with_mlschema_info( test_submission)) # noqa if exception_type is not None: with self.assertRaises(exception_type) as context: instantiated_schema.load(yaml_submission) if context is not None: error_string = str(context.exception) # if error string is not none, we threw an error, return if error_string is not None: if exception_string is not None: self.assertTrue(exception_string in error_string) else: print(error_string) # Unexpected error, print it out return # Raised an exception during loading dict, return return instantiated_schema.load(yaml_submission)
def test_validate_constraints_constraint_valid_modulo(self): this_schema = MLSchema.create_schema( self.wrap_schema_with_mlschema_info( SampleSchema.TEST.OPERATOR_VALID_MODULO_2)) self.assertTrue( isinstance(this_schema.declared_fields["num"], fields.Integer))
def test_cascading_inheritence(self): MLSchema.populate_registry() mlobject = MLObject() mlobject.set_type("0.0.1", "data_version_control") mlobject.run_id = uuid.uuid4() mlobject.step_id = uuid.uuid4() mlobject.run_date = datetime.datetime.now() mlobject.data_store = "I_am_a_datastore" mlobject.storage_connection_type = "AWS_BLOB" mlobject.connection.endpoint = "con_endpoint" mlobject.connection.access_key_id = "AKIAIOSFODNN7EXAMPLE" mlobject.connection.secret_access_key = ( "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY") mlobject.dvc_hash = "923caceea54b38177505632f5612cc569a49b22246e346a7" mlobject.validate()
def test_create_nested_schema(self): connection_text = """ mlspec_schema_version: # Identifies the version of this schema meta: 0.0.1 mlspec_schema_type: # Identifies the type of this schema meta: datapath # Connection to datapath schema_version: type: semver required: True schema_type: type: string required: True connection: type: nested schema: # URI for the location of the data store endpoint: type: URI required: True one_more_field: type: String required: True""" nested_schema = MLSchema.create_schema(connection_text, "0_0_1_datapath") connection_submission = """ schema_version: 0.0.1 schema_type: datapath connection: endpoint: S3://mybucket/puppy.jpg one_more_field: foobaz """ connection_submission_dict = convert_yaml_to_dict(connection_submission) nested_object = nested_schema.load(connection_submission) self.assertTrue( nested_object["connection"]["endpoint"] == connection_submission_dict["connection"]["endpoint"] ) self.assertTrue( nested_object["one_more_field"] == connection_submission_dict["one_more_field"] ) nested_missing_endpoint_dict = convert_yaml_to_dict(connection_submission) nested_missing_endpoint_dict["connection"].pop("endpoint", None) with self.assertRaises(ValidationError): nested_schema.load(nested_missing_endpoint_dict) missing_extra_dict = convert_yaml_to_dict(connection_submission) missing_extra_dict.pop("one_more_field", None) with self.assertRaises(ValidationError): nested_schema.load(missing_extra_dict)
def test_incorrectly_indented_yaml(self): bad_yaml_string = """ mlspec_schema_version: # Identifies the version of this schema meta: 0.0.1 mlspec_schema_type: # Identifies the type of this schema meta: datapath connection: type: nested schema: # URI for the location of the data store endpoint: type: URI required: True""" with self.assertRaises(AttributeError): MLSchema.create_schema(bad_yaml_string)
def test_load_full_base_schema(self): instantiated_schema = MLSchema.create_schema(SampleSchema.SCHEMAS.BASE) submission_dict = convert_yaml_to_dict( SampleSubmissions.FULL_SUBMISSIONS.BASE) instantiated_object = instantiated_schema.load(submission_dict) assert (instantiated_object["run_date"].isoformat() == submission_dict["run_date"].isoformat()) submission_dict.pop("run_date", None) with self.assertRaises(ValidationError): instantiated_schema.load(submission_dict)