예제 #1
0
    def test_local_csv_load(self):
        url = 'file://' + os.path.join(self.fixture_path, 'experts.csv')
        mapping = {
            'csv_url': url
        }
        with self.assertRaises(InvalidMapping):
            list(model.map_entities(mapping))
        mapping['entities'] = {
            'expert': {
                'schema': 'Person',
                'properties': {
                    'name': {'column': 'name'},
                    'nationality': {'column': 'nationality'},
                    'gender': {'column': 'gender'},
                }
            }
        }

        with self.assertRaises(InvalidMapping):
            entities = list(model.map_entities(mapping))

        mapping['entities']['expert']['key'] = 'name'
        entities = list(model.map_entities(mapping))
        assert len(entities) == 14, len(entities)

        mapping['filters'] = {'gender': 'male'}
        entities = list(model.map_entities(mapping))
        assert len(entities) == 10, len(entities)

        mapping['filters_not'] = {'nationality': 'Portugal'}
        entities = list(model.map_entities(mapping))
        assert len(entities) == 7, len(entities)
예제 #2
0
    def test_key_column(self):
        csv_url = os.path.join(self.fixture_path, "experts.csv")
        mapping = {
            "csv_url": "file://" + csv_url,
            "entities": {
                "expert": {
                    "schema": "Person",
                    "key": "id",
                    "id_column": "id",
                    "properties": {
                        "name": {"column": "name"},
                        "nationality": {"column": "nationality"},
                        "gender": {"column": "gender"},
                    },
                }
            },
        }

        # only use key/keys or key_column
        with self.assertRaises(InvalidMapping):
            list(model.map_entities(mapping))

        del mapping["entities"]["expert"]["key"]

        entities = list(model.map_entities(mapping))
        self.assertEqual(len(entities), 14)
        self.assertEqual(entities[0].id, "1")
        self.assertEqual(entities[-1].id, "42")
예제 #3
0
    def test_key_column(self):
        csv_url = os.path.join(self.fixture_path, 'experts.csv')
        mapping = {
            'csv_url': 'file://' + csv_url,
            'entities': {
                'expert': {
                    'schema': 'Person',
                    'key': 'id',
                    'id_column': 'id',
                    'properties': {
                        'name': {
                            'column': 'name'
                        },
                        'nationality': {
                            'column': 'nationality'
                        },
                        'gender': {
                            'column': 'gender'
                        },
                    }
                }
            }
        }

        # only use key/keys or key_column
        with self.assertRaises(InvalidMapping):
            list(model.map_entities(mapping))

        del mapping['entities']['expert']['key']

        entities = list(model.map_entities(mapping))
        self.assertEqual(len(entities), 14)
        self.assertEqual(entities[0].id, '1')
        self.assertEqual(entities[-1].id, '42')
예제 #4
0
    def test_mapping_with_literal_keys(self):
        url = 'file://' + os.path.join(self.fixture_path, 'links.csv')
        mapping = {
            "csv_url": url,
            "entities": {
                "director": {
                    "schema": "Person",
                    "key": "id",
                    "key_literal": "person",
                    "properties": {
                        "name": {
                            "column": "name"
                        }
                    }
                },
                "company": {
                    "schema": "LegalEntity",
                    "key": "id",
                    "key_literal": "legalentity",
                    "properties": {
                        "name": {
                            "column": "comp_name"
                        }
                    }
                }
            }
        }

        entities = list(model.map_entities(mapping))
        assert len(entities) == 2, len(entities)
        assert entities[0].id != entities[1].id, entities
예제 #5
0
    def test_mapping_join(self):
        url = "file://" + os.path.join(self.fixture_path, "links.csv")
        mapping = {
            "csv_url": url,
            "entities": {
                "director": {
                    "schema": "Person",
                    "key": "id",
                    "key_literal": "person",
                    "properties": {
                        "name": {
                            "column": "name"
                        },
                        "address": {
                            "join": ", ",
                            "columns": ["house_number", "town", "zip"],
                        },
                    },
                }
            },
        }

        entities = list(model.map_entities(mapping))
        assert len(entities) == 1, len(entities)
        entity = entities[0]
        assert entity.get("address") == ["64, The Desert, 01234"
                                         ], entity.to_dict()
예제 #6
0
 def test_http_csv_load(self):
     with open(os.path.join(self.fixture_path, "experts.csv"), "r") as fh:
         data = fh.read()
     url = "http://pets.com/experts.csv"
     responses.add(responses.GET,
                   url,
                   body=data,
                   status=200,
                   content_type="text/csv")
     mapping = {
         "csv_url": url,
         "entities": {
             "expert": {
                 "schema": "Person",
                 "key": "name",
                 "properties": {
                     "name": {
                         "column": "name"
                     },
                     "nationality": {
                         "column": "nationality"
                     },
                     "gender": {
                         "column": "gender"
                     },
                 },
             }
         },
     }
     entities = list(model.map_entities(mapping))
     assert len(entities) == 14, len(entities)
예제 #7
0
 def test_http_csv_load(self):
     with open(os.path.join(self.fixture_path, 'experts.csv'), 'r') as fh:
         data = fh.read()
     url = 'http://pets.com/experts.csv'
     responses.add(responses.GET,
                   url,
                   body=data,
                   status=200,
                   content_type='text/csv')
     mapping = {
         'csv_url': url,
         'entities': {
             'expert': {
                 'schema': 'Person',
                 'key': 'name',
                 'properties': {
                     'name': {
                         'column': 'name'
                     },
                     'nationality': {
                         'column': 'nationality'
                     },
                     'gender': {
                         'column': 'gender'
                     },
                 }
             }
         }
     }
     entities = list(model.map_entities(mapping))
     assert len(entities) == 14, len(entities)
예제 #8
0
    def test_mapping_join(self):
        url = 'file://' + os.path.join(self.fixture_path, 'links.csv')
        mapping = {
            "csv_url": url,
            "entities": {
                "director": {
                    "schema": "Person",
                    "key": "id",
                    "key_literal": "person",
                    "properties": {
                        "name": {
                            "column": "name"
                        },
                        "address": {
                            "join": ", ",
                            "columns": ["house_number", "town", "zip"]
                        }
                    }
                }
            }
        }

        entities = list(model.map_entities(mapping))
        assert len(entities) == 1, len(entities)
        assert entities[0].get('address') == ['64, The Desert, 01234'
                                              ], entities  # noqa
예제 #9
0
    def test_mappings_with_links_slavery(self):
        url = 'file://' + os.path.join(self.fixture_path, 'links.csv')
        mapping_slavery = {
            "csv_url": url,
            "entities": {
                "owner": {
                    "schema": "LegalEntity",
                    "key": "le_id",
                    "properties": {"name": {"column": "le_name"}}
                },
                "person": {
                    "schema": "Person",
                    "key": "person_id",
                    "properties": {"name": {"column": "name"}}
                },
                "ownership": {
                    "schema": "Ownership",
                    "keys": ["person_id", "le_id"],
                    "properties": {
                        "owner": {"entity": "owner"},
                        "asset": {"entity": "person"},
                        "percentage": {"column": "percentage"},
                    },
                }
            }
        }

        with self.assertRaises(InvalidMapping):
            list(model.map_entities(mapping_slavery))
예제 #10
0
 def test_mappings_with_links(self):
     url = 'file://' + os.path.join(self.fixture_path, 'links.csv')
     mapping_director = {
         "csv_url": url,
         "entities": {
             "director": {
                 "schema": "Person",
                 "key": "id",
                 "properties": {"name": {"column": "name"}}
             },
             "company": {
                 "schema": "Company",
                 "key": "comp_id",
                 "properties": {"name": {"column": "comp_name"}}
             },
             "directorship": {
                 "schema": "Directorship",
                 "keys": ["comp_id", "id"],
                 "properties": {
                     "director": {"entity": "director"},
                     "organization": {"entity": "company"},
                     "role": {"column": "role"},
                 },
             }
         }
     }
     entities = list(model.map_entities(mapping_director))
     assert len(entities) == 3, len(entities)
예제 #11
0
    def test_mapping_split(self):
        url = 'file://' + os.path.join(self.fixture_path, 'links.csv')
        mapping = {
            "csv_url": url,
            "entities": {
                "director": {
                    "schema": "Person",
                    "key": "id",
                    "key_literal": "person",
                    "properties": {
                        "name": {
                            "column": "name"
                        },
                        "notes": {
                            "split": "; ",
                            "column": "fave_colours"
                        }
                    }
                }
            }
        }

        entities = list(model.map_entities(mapping))
        assert len(entities) == 1, len(entities)
        self.assertCountEqual(entities[0].get('notes'),
                              ['brown', 'black', 'blue'])  # noqa
예제 #12
0
    def test_key_column_from_sql(self):
        mapping = self.kek_mapping
        del mapping["entities"]["company"]["keys"]
        mapping["entities"]["company"]["id_column"] = "comp.id"

        mapped = model.make_mapping(mapping)
        assert len(mapped.source) == 2904, len(mapped.source)
        assert len(mapped.entities) == 3, mapped.entities
        assert len(mapped.refs) == 7, mapped.refs
        entities = list(model.map_entities(mapping))
        self.assertGreaterEqual(int(entities[0].id), 3000)  # FIXME?
예제 #13
0
def run_mapping(mapping_yaml):
    config = load_config_file(mapping_yaml)
    stream = click.get_text_stream('stdout')
    try:
        for dataset, meta in config.items():
            for mapping in dict_list(meta, 'queries', 'query'):
                entities = model.map_entities(mapping, key_prefix=dataset)
                for entity in entities:
                    write_object(stream, entity)
    except BrokenPipeError:
        pass
예제 #14
0
    def test_key_column_from_sql(self):
        mapping = self.kek_mapping
        del mapping['entities']['company']['keys']
        mapping['entities']['company']['id_column'] = 'comp.id'

        mapped = model.make_mapping(mapping)
        assert len(mapped.source) == 2904, len(mapped.source)
        assert len(mapped.entities) == 3, mapped.entities
        assert len(mapped.refs) == 7, mapped.refs
        entities = list(model.map_entities(mapping))
        self.assertGreaterEqual(int(entities[0].id), 3000)  # FIXME?
예제 #15
0
def run_mapping(outfile, mapping_yaml):
    config = load_mapping_file(mapping_yaml)
    try:
        for dataset, meta in config.items():
            for mapping in keys_values(meta, 'queries', 'query'):
                entities = model.map_entities(mapping, key_prefix=dataset)
                for entity in entities:
                    write_object(outfile, entity)
    except BrokenPipeError:
        raise click.Abort()
    except Exception as exc:
        raise click.ClickException(str(exc))
예제 #16
0
def run_mapping(mapping_yaml):
    config = load_mapping_file(mapping_yaml)
    stream = click.get_text_stream('stdout')
    try:
        for dataset, meta in config.items():
            for mapping in keys_values(meta, 'queries', 'query'):
                entities = model.map_entities(mapping, key_prefix=dataset)
                for entity in entities:
                    read_entity(stream, entity)
    except BrokenPipeError:
        raise click.Abort()
    except Exception as exc:
        raise click.ClickException(str(exc))
예제 #17
0
def run_mapping(outfile, mapping_yaml, sign=True):
    config = load_mapping_file(mapping_yaml)
    try:
        for dataset, meta in config.items():
            ns = Namespace(dataset)
            for mapping in keys_values(meta, "queries", "query"):
                entities = model.map_entities(mapping, key_prefix=dataset)
                for entity in entities:
                    if sign:
                        entity = ns.apply(entity)
                    write_object(outfile, entity)
    except BrokenPipeError:
        raise click.Abort()
    except Exception as exc:
        raise click.ClickException(str(exc))
예제 #18
0
def run_mapping(outfile: Path, mapping_yaml: Path, sign: bool = True) -> None:
    config = load_mapping_file(mapping_yaml)
    try:
        with path_writer(outfile) as outfh:
            for dataset, meta in config.items():
                ns = Namespace(dataset)
                for mapping in keys_values(meta, "queries", "query"):
                    entities = model.map_entities(mapping, key_prefix=dataset)
                    for entity in entities:
                        if sign:
                            entity = ns.apply(entity)
                        write_entity(outfh, entity)
    except BrokenPipeError:
        raise click.Abort()
    except Exception as exc:
        raise click.ClickException(str(exc))
예제 #19
0
    def test_local_csv_load(self):
        url = "file://" + os.path.join(self.fixture_path, "experts.csv")
        mapping = {"csv_url": url}
        with self.assertRaises(InvalidMapping):
            list(model.map_entities(mapping))
        mapping["entities"] = {
            "expert": {
                "schema": "Person",
                "properties": {
                    "name": {
                        "column": "name"
                    },
                    "nationality": {
                        "column": "nationality"
                    },
                    "gender": {
                        "column": "gender"
                    },
                },
            }
        }

        with self.assertRaises(InvalidMapping):
            entities = list(model.map_entities(mapping))

        mapping["entities"]["expert"]["key"] = "name"
        entities = list(model.map_entities(mapping))
        assert len(entities) == 14, len(entities)

        mapping["filters"] = {"gender": "male"}
        entities = list(model.map_entities(mapping))
        assert len(entities) == 10, len(entities)

        mapping["filters_not"] = {"nationality": "Portugal"}
        entities = list(model.map_entities(mapping))
        assert len(entities) == 7, len(entities)

        mapping["filters_not"] = {"nationality": ["Portugal", "Spain"]}
        entities = list(model.map_entities(mapping))
        assert len(entities) == 5, len(entities)
예제 #20
0
 def test_kek_sqlite(self):
     entities = list(model.map_entities(self.kek_mapping))
     assert len(entities) == 8712, len(entities)
     ids = set([e.id for e in entities])
     assert len(ids) == 5607, len(ids)