def test_extract_create_entities__success(self):
        '''
        Assert that no errors are accumulated and that the
        extracted entities are of the expected type.
        '''
        submission_payload = EXAMPLE_SOURCE_DATA
        mapping_definition = EXAMPLE_MAPPING
        schemas = {'Person': EXAMPLE_SCHEMA}
        mapping_id = '#testing#'
        submission_data, entities = extractor.extract_create_entities(
            submission_payload,
            mapping_definition,
            schemas,
            mapping_id,
        )
        submission_errors = submission_data.get(
            extractor.ENTITY_EXTRACTION_ERRORS, [])
        self.assertEqual(len(submission_errors), 0)
        self.assertTrue(len(entities) > 0)
        for entity in entities:
            self.assertIn(entity.schemadecorator_name, schemas.keys())
            self.assertEqual(entity.status, 'Publishable')
            self.assertEqual(entity.id, entity.payload['id'])

        # generated "id" saved for re-extractions
        self.assertEqual(
            submission_data[extractor.ENTITY_EXTRACTION_ENRICHMENT], {
                f'mapping:{mapping_id}': {
                    'Person': {
                        'id': [e.id for e in entities]
                    }
                }
            })
 def test_extract_create_entities__validation_error(self):
     '''
     Assert that validation errors are accumulated and that they contain
     information about the non-validating entities.
     '''
     submission_payload = EXAMPLE_SOURCE_DATA
     mapping_definition = EXAMPLE_MAPPING
     # This schema shares the field names `id` and `name` with
     # EXAMPLE_SCHEMA. The field types differ though, so we should expect a
     # validation error to occur during entity extraction.
     error_count = 2
     schema = {
         'type':
         'record',
         'name':
         'Test',
         'fields': [
             {
                 'name': 'id',
                 'type': 'int',  # error 1
             },
             {
                 'name': 'name',  # error 2
                 'type': {
                     'type': 'enum',
                     'name': 'Name',
                     'symbols': ['John', 'Jane'],
                 }
             },
         ]
     }
     schemas = {'Person': schema}
     submission_data, entities = extractor.extract_create_entities(
         submission_payload,
         mapping_definition,
         schemas,
         '#testing#',
     )
     submission_errors = submission_data.get(
         extractor.ENTITY_EXTRACTION_ERRORS, [])
     self.assertEqual(
         len(submission_errors),
         len(EXAMPLE_SOURCE_DATA['data']['people']) * error_count,
     )
     self.assertEqual(len(entities), 0)
 def test_extract_create_entities__no_requirements(self):
     '''
     If the mapping contains neither paths nor entity references, no
     entities can be extracted.
     '''
     submission_payload = EXAMPLE_SOURCE_DATA
     mapping_definition = {'mapping': [], 'entities': {}}
     schemas = {}
     submission_data, entities = extractor.extract_create_entities(
         submission_payload,
         mapping_definition,
         schemas,
         '#testing#',
     )
     submission_errors = submission_data.get(
         extractor.ENTITY_EXTRACTION_ERRORS, [])
     self.assertEqual(len(submission_errors), 0)
     self.assertEqual(len(entities), 0)
 def test_extract_create_entities__error_not_a_uuid(self):
     submission_payload = {'id': 'not-a-uuid', 'a': 1}
     mapping_definition = {
         'entities': {
             'Test': str(uuid.uuid4())
         },
         'mapping': [
             ['$.id', 'Test.id'],
             ['$.a', 'Test.b'],
         ],
     }
     schema = {
         'type':
         'record',
         'name':
         'Test',
         'fields': [
             {
                 'name': 'id',
                 'type': 'string',
             },
             {
                 'name': 'b',
                 'type': 'int',
             },
         ],
     }
     schemas = {'Test': schema}
     submission_data, entities = extractor.extract_create_entities(
         submission_payload,
         mapping_definition,
         schemas,
         '#testing#',
     )
     submission_errors = submission_data.get(
         extractor.ENTITY_EXTRACTION_ERRORS, [])
     self.assertEqual(len(entities), 0)
     self.assertEqual(len(submission_errors), 1)
     self.assertIn('is not a valid uuid',
                   submission_errors[0]['description'])
    def test_extract_create_entities__multiple(self):
        '''
        Assert that different mappings don't share IDs.
        '''
        submission_payload = {
            'parents': [{
                'name': 'Father'
            }, {
                'name': 'Mother'
            }],
            'children': [
                {
                    'name': 'Boy'
                },
                {
                    'name': 'Girl'
                },
            ],
        }
        person_schema = {
            'name':
            'Person',
            'type':
            'record',
            'fields': [
                {
                    'name': 'id',
                    'type': 'string'
                },
                {
                    'name': 'name',
                    'type': ['null', 'string']
                },
            ],
        }
        schemas = {'Person': person_schema}

        parent_definition = {
            'entities': {
                'Person': '1',
            },
            'mapping': [
                ['#!uuid', 'Person.id'],
                ['parents[*].name', 'Person.name'],
            ],
        }
        child_definition = {
            'entities': {
                'Person': '1',
            },
            'mapping': [
                ['#!uuid', 'Person.id'],
                ['children[*].name', 'Person.name'],
            ],
        }

        __, parents = extractor.extract_create_entities(
            submission_payload,
            parent_definition,
            schemas,
            '#testing-parent#',
        )

        __, children = extractor.extract_create_entities(
            submission_payload,
            child_definition,
            schemas,
            '#testing-child#',
        )

        self.assertNotEqual(
            [p.id for p in parents],
            [c.id for c in children],
        )

        enrichment = submission_payload[extractor.ENTITY_EXTRACTION_ENRICHMENT]
        self.assertIn('Person', enrichment['mapping:#testing-parent#'])
        self.assertIn('Person', enrichment['mapping:#testing-child#'])
        self.assertNotEqual(
            enrichment['mapping:#testing-parent#']['Person'],
            enrichment['mapping:#testing-child#']['Person'],
        )
    def test_extract_create_entities__old_enrichment(self):
        '''
        Assert that old enrichment formats are migrated to new format.
        '''
        old_enrichment = {
            'Parent': {
                'id': [str(uuid.uuid4()), str(uuid.uuid4())]
            },
            'Child': {
                'id': [str(uuid.uuid4()), str(uuid.uuid4())]
            },
            'Another': {
                'id': [str(uuid.uuid4()), str(uuid.uuid4())]
            },
        }
        submission_payload = {
            'parents': [{
                'name': 'Father'
            }, {
                'name': 'Mother'
            }],
            'children': [
                {
                    'name': 'Boy'
                },
                {
                    'name': 'Girl'
                },
            ],
            extractor.ENTITY_EXTRACTION_ENRICHMENT: dict(old_enrichment),
        }
        mapping_definition = {
            'entities': {
                'Parent': '1',
                'Child': '2',
            },
            'mapping': [
                ['#!uuid', 'Parent.id'],
                ['parents[*].name', 'Parent.name'],
                ['#!uuid', 'Child.id'],
                ['children[*].name', 'Child.name'],
            ],
        }
        person_schema = {
            'name':
            'Person',
            'type':
            'record',
            'fields': [
                {
                    'name': 'id',
                    'type': 'string'
                },
                {
                    'name': 'name',
                    'type': ['null', 'string']
                },
            ],
        }
        schemas = {'Parent': person_schema, 'Child': person_schema}

        mapping_id = '#testing#'
        submission_data, entities = extractor.extract_create_entities(
            submission_payload,
            mapping_definition,
            schemas,
            mapping_id,
        )

        new_enrichment = submission_data[
            extractor.ENTITY_EXTRACTION_ENRICHMENT]
        self.assertNotIn('Parent', new_enrichment)
        self.assertEqual(new_enrichment[f'mapping:{mapping_id}']['Parent'],
                         old_enrichment['Parent'])
        self.assertNotIn('Child', new_enrichment)
        self.assertEqual(new_enrichment[f'mapping:{mapping_id}']['Child'],
                         old_enrichment['Child'])
        self.assertIn('Another', new_enrichment, 'no migrated yet')
        self.assertEqual(new_enrichment['Another'], old_enrichment['Another'])
    def test_extract_create_entities__enrichment(self):
        '''
        Assert that in case of re-extraction the
        extracted entities have the same ids as the time before
        for the same mappings.
        '''
        submission_payload = {
            'parents': [{
                'name': 'Father'
            }, {
                'name': 'Mother'
            }],
            'children': [
                {
                    'name': 'Boy'
                },
                {
                    'name': 'Girl'
                },
            ],
        }
        mapping_definition = {
            'entities': {
                'Parent': '1',
                'Child': '2',
            },
            'mapping': [
                ['#!uuid', 'Parent.id'],
                ['parents[*].name', 'Parent.name'],
                ['#!uuid', 'Child.id'],
                ['children[*].name', 'Child.name'],
            ],
        }
        person_schema = {
            'name':
            'Person',
            'type':
            'record',
            'fields': [
                {
                    'name': 'id',
                    'type': 'string'
                },
                {
                    'name': 'name',
                    'type': ['null', 'string']
                },
            ],
        }
        schemas = {'Parent': person_schema, 'Child': person_schema}

        mapping_id = '#testing#'
        submission_data, entities = extractor.extract_create_entities(
            submission_payload,
            mapping_definition,
            schemas,
            mapping_id,
        )

        submission_errors = submission_data.get(
            extractor.ENTITY_EXTRACTION_ERRORS, [])
        self.assertEqual(len(submission_errors), 0)
        self.assertEqual(len(entities), 4)

        # generated "id" saved for re-extractions
        enrichment = submission_data[extractor.ENTITY_EXTRACTION_ENRICHMENT]
        self.assertEqual(
            len(enrichment[f'mapping:{mapping_id}']['Parent']['id']), 2)
        self.assertEqual(
            len(enrichment[f'mapping:{mapping_id}']['Child']['id']), 2)
        self.assertNotEqual(
            enrichment[f'mapping:{mapping_id}']['Parent']['id'],
            enrichment[f'mapping:{mapping_id}']['Child']['id'],
        )

        # second round
        submission_data_2, entities_2 = extractor.extract_create_entities(
            submission_payload,
            mapping_definition,
            schemas,
            mapping_id,
        )
        self.assertEqual(entities, entities_2)
        enrichment_2 = submission_data_2[
            extractor.ENTITY_EXTRACTION_ENRICHMENT]
        self.assertEqual(enrichment, enrichment_2)

        # third round, different mapping
        submission_data_3, entities_3 = extractor.extract_create_entities(
            submission_payload,
            mapping_definition,
            schemas,
            '#another#',
        )
        self.assertNotEqual(entities, entities_3)
        enrichment_3 = submission_data_2[
            extractor.ENTITY_EXTRACTION_ENRICHMENT]
        self.assertEqual(enrichment, enrichment_3)