def test_extract_create_entities__success(self): ''' Assert that no errors are accumulated and that the extracted entities are of the expected type. ''' submission_payload = EXAMPLE_SOURCE_DATA mapping_definition = EXAMPLE_MAPPING schemas = {'Person': EXAMPLE_SCHEMA} mapping_id = '#testing#' submission_data, entities = extractor.extract_create_entities( submission_payload, mapping_definition, schemas, mapping_id, ) submission_errors = submission_data.get( extractor.ENTITY_EXTRACTION_ERRORS, []) self.assertEqual(len(submission_errors), 0) self.assertTrue(len(entities) > 0) for entity in entities: self.assertIn(entity.schemadecorator_name, schemas.keys()) self.assertEqual(entity.status, 'Publishable') self.assertEqual(entity.id, entity.payload['id']) # generated "id" saved for re-extractions self.assertEqual( submission_data[extractor.ENTITY_EXTRACTION_ENRICHMENT], { f'mapping:{mapping_id}': { 'Person': { 'id': [e.id for e in entities] } } })
def test_extract_create_entities__validation_error(self): ''' Assert that validation errors are accumulated and that they contain information about the non-validating entities. ''' submission_payload = EXAMPLE_SOURCE_DATA mapping_definition = EXAMPLE_MAPPING # This schema shares the field names `id` and `name` with # EXAMPLE_SCHEMA. The field types differ though, so we should expect a # validation error to occur during entity extraction. error_count = 2 schema = { 'type': 'record', 'name': 'Test', 'fields': [ { 'name': 'id', 'type': 'int', # error 1 }, { 'name': 'name', # error 2 'type': { 'type': 'enum', 'name': 'Name', 'symbols': ['John', 'Jane'], } }, ] } schemas = {'Person': schema} submission_data, entities = extractor.extract_create_entities( submission_payload, mapping_definition, schemas, '#testing#', ) submission_errors = submission_data.get( extractor.ENTITY_EXTRACTION_ERRORS, []) self.assertEqual( len(submission_errors), len(EXAMPLE_SOURCE_DATA['data']['people']) * error_count, ) self.assertEqual(len(entities), 0)
def test_extract_create_entities__no_requirements(self): ''' If the mapping contains neither paths nor entity references, no entities can be extracted. ''' submission_payload = EXAMPLE_SOURCE_DATA mapping_definition = {'mapping': [], 'entities': {}} schemas = {} submission_data, entities = extractor.extract_create_entities( submission_payload, mapping_definition, schemas, '#testing#', ) submission_errors = submission_data.get( extractor.ENTITY_EXTRACTION_ERRORS, []) self.assertEqual(len(submission_errors), 0) self.assertEqual(len(entities), 0)
def test_extract_create_entities__error_not_a_uuid(self): submission_payload = {'id': 'not-a-uuid', 'a': 1} mapping_definition = { 'entities': { 'Test': str(uuid.uuid4()) }, 'mapping': [ ['$.id', 'Test.id'], ['$.a', 'Test.b'], ], } schema = { 'type': 'record', 'name': 'Test', 'fields': [ { 'name': 'id', 'type': 'string', }, { 'name': 'b', 'type': 'int', }, ], } schemas = {'Test': schema} submission_data, entities = extractor.extract_create_entities( submission_payload, mapping_definition, schemas, '#testing#', ) submission_errors = submission_data.get( extractor.ENTITY_EXTRACTION_ERRORS, []) self.assertEqual(len(entities), 0) self.assertEqual(len(submission_errors), 1) self.assertIn('is not a valid uuid', submission_errors[0]['description'])
def test_extract_create_entities__multiple(self): ''' Assert that different mappings don't share IDs. ''' submission_payload = { 'parents': [{ 'name': 'Father' }, { 'name': 'Mother' }], 'children': [ { 'name': 'Boy' }, { 'name': 'Girl' }, ], } person_schema = { 'name': 'Person', 'type': 'record', 'fields': [ { 'name': 'id', 'type': 'string' }, { 'name': 'name', 'type': ['null', 'string'] }, ], } schemas = {'Person': person_schema} parent_definition = { 'entities': { 'Person': '1', }, 'mapping': [ ['#!uuid', 'Person.id'], ['parents[*].name', 'Person.name'], ], } child_definition = { 'entities': { 'Person': '1', }, 'mapping': [ ['#!uuid', 'Person.id'], ['children[*].name', 'Person.name'], ], } __, parents = extractor.extract_create_entities( submission_payload, parent_definition, schemas, '#testing-parent#', ) __, children = extractor.extract_create_entities( submission_payload, child_definition, schemas, '#testing-child#', ) self.assertNotEqual( [p.id for p in parents], [c.id for c in children], ) enrichment = submission_payload[extractor.ENTITY_EXTRACTION_ENRICHMENT] self.assertIn('Person', enrichment['mapping:#testing-parent#']) self.assertIn('Person', enrichment['mapping:#testing-child#']) self.assertNotEqual( enrichment['mapping:#testing-parent#']['Person'], enrichment['mapping:#testing-child#']['Person'], )
def test_extract_create_entities__old_enrichment(self): ''' Assert that old enrichment formats are migrated to new format. ''' old_enrichment = { 'Parent': { 'id': [str(uuid.uuid4()), str(uuid.uuid4())] }, 'Child': { 'id': [str(uuid.uuid4()), str(uuid.uuid4())] }, 'Another': { 'id': [str(uuid.uuid4()), str(uuid.uuid4())] }, } submission_payload = { 'parents': [{ 'name': 'Father' }, { 'name': 'Mother' }], 'children': [ { 'name': 'Boy' }, { 'name': 'Girl' }, ], extractor.ENTITY_EXTRACTION_ENRICHMENT: dict(old_enrichment), } mapping_definition = { 'entities': { 'Parent': '1', 'Child': '2', }, 'mapping': [ ['#!uuid', 'Parent.id'], ['parents[*].name', 'Parent.name'], ['#!uuid', 'Child.id'], ['children[*].name', 'Child.name'], ], } person_schema = { 'name': 'Person', 'type': 'record', 'fields': [ { 'name': 'id', 'type': 'string' }, { 'name': 'name', 'type': ['null', 'string'] }, ], } schemas = {'Parent': person_schema, 'Child': person_schema} mapping_id = '#testing#' submission_data, entities = extractor.extract_create_entities( submission_payload, mapping_definition, schemas, mapping_id, ) new_enrichment = submission_data[ extractor.ENTITY_EXTRACTION_ENRICHMENT] self.assertNotIn('Parent', new_enrichment) self.assertEqual(new_enrichment[f'mapping:{mapping_id}']['Parent'], old_enrichment['Parent']) self.assertNotIn('Child', new_enrichment) self.assertEqual(new_enrichment[f'mapping:{mapping_id}']['Child'], old_enrichment['Child']) self.assertIn('Another', new_enrichment, 'no migrated yet') self.assertEqual(new_enrichment['Another'], old_enrichment['Another'])
def test_extract_create_entities__enrichment(self): ''' Assert that in case of re-extraction the extracted entities have the same ids as the time before for the same mappings. ''' submission_payload = { 'parents': [{ 'name': 'Father' }, { 'name': 'Mother' }], 'children': [ { 'name': 'Boy' }, { 'name': 'Girl' }, ], } mapping_definition = { 'entities': { 'Parent': '1', 'Child': '2', }, 'mapping': [ ['#!uuid', 'Parent.id'], ['parents[*].name', 'Parent.name'], ['#!uuid', 'Child.id'], ['children[*].name', 'Child.name'], ], } person_schema = { 'name': 'Person', 'type': 'record', 'fields': [ { 'name': 'id', 'type': 'string' }, { 'name': 'name', 'type': ['null', 'string'] }, ], } schemas = {'Parent': person_schema, 'Child': person_schema} mapping_id = '#testing#' submission_data, entities = extractor.extract_create_entities( submission_payload, mapping_definition, schemas, mapping_id, ) submission_errors = submission_data.get( extractor.ENTITY_EXTRACTION_ERRORS, []) self.assertEqual(len(submission_errors), 0) self.assertEqual(len(entities), 4) # generated "id" saved for re-extractions enrichment = submission_data[extractor.ENTITY_EXTRACTION_ENRICHMENT] self.assertEqual( len(enrichment[f'mapping:{mapping_id}']['Parent']['id']), 2) self.assertEqual( len(enrichment[f'mapping:{mapping_id}']['Child']['id']), 2) self.assertNotEqual( enrichment[f'mapping:{mapping_id}']['Parent']['id'], enrichment[f'mapping:{mapping_id}']['Child']['id'], ) # second round submission_data_2, entities_2 = extractor.extract_create_entities( submission_payload, mapping_definition, schemas, mapping_id, ) self.assertEqual(entities, entities_2) enrichment_2 = submission_data_2[ extractor.ENTITY_EXTRACTION_ENRICHMENT] self.assertEqual(enrichment, enrichment_2) # third round, different mapping submission_data_3, entities_3 = extractor.extract_create_entities( submission_payload, mapping_definition, schemas, '#another#', ) self.assertNotEqual(entities, entities_3) enrichment_3 = submission_data_2[ extractor.ENTITY_EXTRACTION_ENRICHMENT] self.assertEqual(enrichment, enrichment_3)