def test_local_csv_load(self): url = 'file://' + os.path.join(self.fixture_path, 'experts.csv') mapping = { 'csv_url': url } with self.assertRaises(InvalidMapping): list(model.map_entities(mapping)) mapping['entities'] = { 'expert': { 'schema': 'Person', 'properties': { 'name': {'column': 'name'}, 'nationality': {'column': 'nationality'}, 'gender': {'column': 'gender'}, } } } with self.assertRaises(InvalidMapping): entities = list(model.map_entities(mapping)) mapping['entities']['expert']['key'] = 'name' entities = list(model.map_entities(mapping)) assert len(entities) == 14, len(entities) mapping['filters'] = {'gender': 'male'} entities = list(model.map_entities(mapping)) assert len(entities) == 10, len(entities) mapping['filters_not'] = {'nationality': 'Portugal'} entities = list(model.map_entities(mapping)) assert len(entities) == 7, len(entities)
def test_key_column(self): csv_url = os.path.join(self.fixture_path, "experts.csv") mapping = { "csv_url": "file://" + csv_url, "entities": { "expert": { "schema": "Person", "key": "id", "id_column": "id", "properties": { "name": {"column": "name"}, "nationality": {"column": "nationality"}, "gender": {"column": "gender"}, }, } }, } # only use key/keys or key_column with self.assertRaises(InvalidMapping): list(model.map_entities(mapping)) del mapping["entities"]["expert"]["key"] entities = list(model.map_entities(mapping)) self.assertEqual(len(entities), 14) self.assertEqual(entities[0].id, "1") self.assertEqual(entities[-1].id, "42")
def test_key_column(self): csv_url = os.path.join(self.fixture_path, 'experts.csv') mapping = { 'csv_url': 'file://' + csv_url, 'entities': { 'expert': { 'schema': 'Person', 'key': 'id', 'id_column': 'id', 'properties': { 'name': { 'column': 'name' }, 'nationality': { 'column': 'nationality' }, 'gender': { 'column': 'gender' }, } } } } # only use key/keys or key_column with self.assertRaises(InvalidMapping): list(model.map_entities(mapping)) del mapping['entities']['expert']['key'] entities = list(model.map_entities(mapping)) self.assertEqual(len(entities), 14) self.assertEqual(entities[0].id, '1') self.assertEqual(entities[-1].id, '42')
def test_mapping_with_literal_keys(self): url = 'file://' + os.path.join(self.fixture_path, 'links.csv') mapping = { "csv_url": url, "entities": { "director": { "schema": "Person", "key": "id", "key_literal": "person", "properties": { "name": { "column": "name" } } }, "company": { "schema": "LegalEntity", "key": "id", "key_literal": "legalentity", "properties": { "name": { "column": "comp_name" } } } } } entities = list(model.map_entities(mapping)) assert len(entities) == 2, len(entities) assert entities[0].id != entities[1].id, entities
def test_mapping_join(self): url = "file://" + os.path.join(self.fixture_path, "links.csv") mapping = { "csv_url": url, "entities": { "director": { "schema": "Person", "key": "id", "key_literal": "person", "properties": { "name": { "column": "name" }, "address": { "join": ", ", "columns": ["house_number", "town", "zip"], }, }, } }, } entities = list(model.map_entities(mapping)) assert len(entities) == 1, len(entities) entity = entities[0] assert entity.get("address") == ["64, The Desert, 01234" ], entity.to_dict()
def test_http_csv_load(self): with open(os.path.join(self.fixture_path, "experts.csv"), "r") as fh: data = fh.read() url = "http://pets.com/experts.csv" responses.add(responses.GET, url, body=data, status=200, content_type="text/csv") mapping = { "csv_url": url, "entities": { "expert": { "schema": "Person", "key": "name", "properties": { "name": { "column": "name" }, "nationality": { "column": "nationality" }, "gender": { "column": "gender" }, }, } }, } entities = list(model.map_entities(mapping)) assert len(entities) == 14, len(entities)
def test_http_csv_load(self): with open(os.path.join(self.fixture_path, 'experts.csv'), 'r') as fh: data = fh.read() url = 'http://pets.com/experts.csv' responses.add(responses.GET, url, body=data, status=200, content_type='text/csv') mapping = { 'csv_url': url, 'entities': { 'expert': { 'schema': 'Person', 'key': 'name', 'properties': { 'name': { 'column': 'name' }, 'nationality': { 'column': 'nationality' }, 'gender': { 'column': 'gender' }, } } } } entities = list(model.map_entities(mapping)) assert len(entities) == 14, len(entities)
def test_mapping_join(self): url = 'file://' + os.path.join(self.fixture_path, 'links.csv') mapping = { "csv_url": url, "entities": { "director": { "schema": "Person", "key": "id", "key_literal": "person", "properties": { "name": { "column": "name" }, "address": { "join": ", ", "columns": ["house_number", "town", "zip"] } } } } } entities = list(model.map_entities(mapping)) assert len(entities) == 1, len(entities) assert entities[0].get('address') == ['64, The Desert, 01234' ], entities # noqa
def test_mappings_with_links_slavery(self): url = 'file://' + os.path.join(self.fixture_path, 'links.csv') mapping_slavery = { "csv_url": url, "entities": { "owner": { "schema": "LegalEntity", "key": "le_id", "properties": {"name": {"column": "le_name"}} }, "person": { "schema": "Person", "key": "person_id", "properties": {"name": {"column": "name"}} }, "ownership": { "schema": "Ownership", "keys": ["person_id", "le_id"], "properties": { "owner": {"entity": "owner"}, "asset": {"entity": "person"}, "percentage": {"column": "percentage"}, }, } } } with self.assertRaises(InvalidMapping): list(model.map_entities(mapping_slavery))
def test_mappings_with_links(self): url = 'file://' + os.path.join(self.fixture_path, 'links.csv') mapping_director = { "csv_url": url, "entities": { "director": { "schema": "Person", "key": "id", "properties": {"name": {"column": "name"}} }, "company": { "schema": "Company", "key": "comp_id", "properties": {"name": {"column": "comp_name"}} }, "directorship": { "schema": "Directorship", "keys": ["comp_id", "id"], "properties": { "director": {"entity": "director"}, "organization": {"entity": "company"}, "role": {"column": "role"}, }, } } } entities = list(model.map_entities(mapping_director)) assert len(entities) == 3, len(entities)
def test_mapping_split(self): url = 'file://' + os.path.join(self.fixture_path, 'links.csv') mapping = { "csv_url": url, "entities": { "director": { "schema": "Person", "key": "id", "key_literal": "person", "properties": { "name": { "column": "name" }, "notes": { "split": "; ", "column": "fave_colours" } } } } } entities = list(model.map_entities(mapping)) assert len(entities) == 1, len(entities) self.assertCountEqual(entities[0].get('notes'), ['brown', 'black', 'blue']) # noqa
def test_key_column_from_sql(self): mapping = self.kek_mapping del mapping["entities"]["company"]["keys"] mapping["entities"]["company"]["id_column"] = "comp.id" mapped = model.make_mapping(mapping) assert len(mapped.source) == 2904, len(mapped.source) assert len(mapped.entities) == 3, mapped.entities assert len(mapped.refs) == 7, mapped.refs entities = list(model.map_entities(mapping)) self.assertGreaterEqual(int(entities[0].id), 3000) # FIXME?
def run_mapping(mapping_yaml): config = load_config_file(mapping_yaml) stream = click.get_text_stream('stdout') try: for dataset, meta in config.items(): for mapping in dict_list(meta, 'queries', 'query'): entities = model.map_entities(mapping, key_prefix=dataset) for entity in entities: write_object(stream, entity) except BrokenPipeError: pass
def test_key_column_from_sql(self): mapping = self.kek_mapping del mapping['entities']['company']['keys'] mapping['entities']['company']['id_column'] = 'comp.id' mapped = model.make_mapping(mapping) assert len(mapped.source) == 2904, len(mapped.source) assert len(mapped.entities) == 3, mapped.entities assert len(mapped.refs) == 7, mapped.refs entities = list(model.map_entities(mapping)) self.assertGreaterEqual(int(entities[0].id), 3000) # FIXME?
def run_mapping(outfile, mapping_yaml): config = load_mapping_file(mapping_yaml) try: for dataset, meta in config.items(): for mapping in keys_values(meta, 'queries', 'query'): entities = model.map_entities(mapping, key_prefix=dataset) for entity in entities: write_object(outfile, entity) except BrokenPipeError: raise click.Abort() except Exception as exc: raise click.ClickException(str(exc))
def run_mapping(mapping_yaml): config = load_mapping_file(mapping_yaml) stream = click.get_text_stream('stdout') try: for dataset, meta in config.items(): for mapping in keys_values(meta, 'queries', 'query'): entities = model.map_entities(mapping, key_prefix=dataset) for entity in entities: read_entity(stream, entity) except BrokenPipeError: raise click.Abort() except Exception as exc: raise click.ClickException(str(exc))
def run_mapping(outfile, mapping_yaml, sign=True): config = load_mapping_file(mapping_yaml) try: for dataset, meta in config.items(): ns = Namespace(dataset) for mapping in keys_values(meta, "queries", "query"): entities = model.map_entities(mapping, key_prefix=dataset) for entity in entities: if sign: entity = ns.apply(entity) write_object(outfile, entity) except BrokenPipeError: raise click.Abort() except Exception as exc: raise click.ClickException(str(exc))
def run_mapping(outfile: Path, mapping_yaml: Path, sign: bool = True) -> None: config = load_mapping_file(mapping_yaml) try: with path_writer(outfile) as outfh: for dataset, meta in config.items(): ns = Namespace(dataset) for mapping in keys_values(meta, "queries", "query"): entities = model.map_entities(mapping, key_prefix=dataset) for entity in entities: if sign: entity = ns.apply(entity) write_entity(outfh, entity) except BrokenPipeError: raise click.Abort() except Exception as exc: raise click.ClickException(str(exc))
def test_local_csv_load(self): url = "file://" + os.path.join(self.fixture_path, "experts.csv") mapping = {"csv_url": url} with self.assertRaises(InvalidMapping): list(model.map_entities(mapping)) mapping["entities"] = { "expert": { "schema": "Person", "properties": { "name": { "column": "name" }, "nationality": { "column": "nationality" }, "gender": { "column": "gender" }, }, } } with self.assertRaises(InvalidMapping): entities = list(model.map_entities(mapping)) mapping["entities"]["expert"]["key"] = "name" entities = list(model.map_entities(mapping)) assert len(entities) == 14, len(entities) mapping["filters"] = {"gender": "male"} entities = list(model.map_entities(mapping)) assert len(entities) == 10, len(entities) mapping["filters_not"] = {"nationality": "Portugal"} entities = list(model.map_entities(mapping)) assert len(entities) == 7, len(entities) mapping["filters_not"] = {"nationality": ["Portugal", "Spain"]} entities = list(model.map_entities(mapping)) assert len(entities) == 5, len(entities)
def test_kek_sqlite(self): entities = list(model.map_entities(self.kek_mapping)) assert len(entities) == 8712, len(entities) ids = set([e.id for e in entities]) assert len(ids) == 5607, len(ids)