def filter(self, entity: dict): if not get_entity_value( entity, self.vot_identificatie) and get_entity_value( entity, self.city): return not get_entity_value(entity, self.city).lower().startswith( ('amsterdam', 'weesp')) return True
def filter(self, entity: dict): value = get_entity_value(entity, self.field) if value in self.values: return False else: self.values.add(value) return True
def test_get_entity_value_nested(self, mock_split_ref, mock_nested_get): entity = {'some': 'entity'} lookup_key = 'some.key' mock_split_ref.return_value = ['some', 'key'] res = get_entity_value(entity, lookup_key) self.assertEqual(mock_nested_get.return_value, res) mock_split_ref.assert_called_with('some.key') mock_nested_get.assert_called_with(entity, ['some', 'key'])
def csv_exporter(api, file, format=None, append=False, filter: EntityFilter = None): """CSV Exporter Exports the output of the API to a ; delimited csv file. Format is a dictionary which can have the following attributes: columns: A list of attributes which can be mapped 1-on-1 with the API output and csv column name Example: ['identificatie', 'code', 'naam'] reference: Can be found in the _embedded block of the HAL JSON output. Reference will contain a dictionary of API attributes with information on how to map them to csv columns. Example: ligtInBuurt: { 'ref': 'GBD.SDL', -- The abbreviations for this catalog and collection 'ref_name': 'ligtIn', -- A description of the relation used in the csv column name 'columns': ['identificatie', 'naam'], -- The columns to be taken from this _embedded reference } mapping: A dictionary of mapings between API output and CSV columns. This is currently being used for the state endpints as these aren't according to HAL JSON specs yet. Example: 'ligtIn:GBD.SDL.identificatie': 'gebieden:stadsdelenIdentificatie', :param filter: :param api: the API wrapper which can be iterated through :param file: the local file to write to :param format: format definition, see above for examples :param append: the file the result of this export will be appended to, or False :return: """ row_count = 0 mapping = build_mapping_from_format(format) fieldnames = [*mapping.keys()] if append: _ensure_fieldnames_match_existing_file(fieldnames, append) with open(file, 'a' if append else 'w', encoding='utf-8-sig') as fp, \ ProgressTicker("Export entities", 10000) as progress: # Get the fieldnames from the mapping writer = csv.DictWriter(fp, fieldnames=fieldnames, delimiter=';') if not append: writer.writeheader() for entity in api: if filter and not filter.filter(entity): continue row = {} for attribute_name, lookup_key in mapping.items(): row[attribute_name] = get_entity_value(entity, lookup_key) writer.writerow(row) row_count += 1 progress.tick() return row_count
def test_get_entity_value_dict_key(self, mock_dict): entity = {'some': 'entity'} lookup_key = {'some': 'dict lookup key'} self.assertEqual(mock_dict.return_value, get_entity_value(entity, lookup_key)) mock_dict.assert_called_with(entity, lookup_key)
def test_get_entity_value_bool(self, mock_split_ref): mock_split_ref.return_value = 'key' self.assertEqual('J', get_entity_value({'key': True}, 'key')) self.assertEqual('N', get_entity_value({'key': False}, 'key'))
def test_get_entity_value_none_key(self): self.assertIsNone(get_entity_value({}, None))
def test_get_entity_value(self): entity = {'key': 'value'} lookup_key = 'key' self.assertEqual('value', get_entity_value(entity, lookup_key))
def esri_exporter(api, file, format=None, append=False, filter: EntityFilter = None): """ESRI Exporter This function will transform the output of an API to ESRI shape files. The result will be 4 files (.shp, .dbf, .shx and .prj), which all contain some required data. It uses the python bindings to the GDAL library. :param api: The encapsulated API as an iterator :param file: The main file (.shp) to write to :param format: The mapping of the API output to ESRI fields as defined in the export config. The max length of an esri fieldname is 10 characters. """ if append: raise NotImplementedError( "Appending not implemented for this exporter") row_count = 0 driver = ogr.GetDriverByName("ESRI Shapefile") dstfile = driver.CreateDataSource(file) # Set spatialref to RD spatialref = osr.SpatialReference() spatialref.ImportFromEPSG(28992) geometry_field = format['geometrie'] if 'geometrie' in format.keys( ) else 'geometrie' with ProgressTicker("Export entities", 10000) as progress: # Get records from the API and build the esri file for entity in api: if filter and not filter.filter(entity): continue entity_geometry = get_entity_value(entity, geometry_field) # On the first entity determine the type of shapefile we need to export if row_count == 0: # Please note that it will fail if a file with the same name already exists geometry_type = _get_geometry_type(entity_geometry) # Auto-reduce field sizes, encode data to utf-8 # see https://gdal.org/drivers/vector/shapefile.html#layer-creation-options dstlayer = dstfile.CreateLayer( "layer", spatialref, geom_type=geometry_type, options=['RESIZE=YES', f'ENCODING={ENCODING}']) # Add all field definitions, but skip geometrie all_fields = { k: v for k, v in format.items() if k is not geometry_field } add_field_definitions(dstlayer, all_fields.keys()) feature = ogr.Feature(dstlayer.GetLayerDefn()) if entity_geometry: feature.SetGeometry(create_geometry(entity_geometry)) for attribute_name, source in all_fields.items(): mapping = split_field_reference(source) value = get_entity_value(entity, mapping) # Esri expects an emtpy string when value is None value = '' if value is None else value feature.SetField(attribute_name, value) dstlayer.CreateFeature(feature) feature.Destroy() row_count += 1 progress.tick() # When no rows are returned no layer has been made, so create it afterwards to make sure files exist dstlayer = dstfile.CreateLayer( "layer", spatialref, geom_type=ogr.wkbPolygon) if row_count == 0 else dstlayer dstfile.Destroy() _create_cpg(file) return row_count
def filter(self, entity: dict): return any([get_entity_value(entity, field) for field in self.fields])