Ejemplo n.º 1
0
        def filter(self, entity: dict):
            if not get_entity_value(
                    entity, self.vot_identificatie) and get_entity_value(
                        entity, self.city):
                return not get_entity_value(entity,
                                            self.city).lower().startswith(
                                                ('amsterdam', 'weesp'))

            return True
Ejemplo n.º 2
0
 def filter(self, entity: dict):
     value = get_entity_value(entity, self.field)
     if value in self.values:
         return False
     else:
         self.values.add(value)
         return True
Ejemplo n.º 3
0
    def test_get_entity_value_nested(self, mock_split_ref, mock_nested_get):
        entity = {'some': 'entity'}
        lookup_key = 'some.key'
        mock_split_ref.return_value = ['some', 'key']

        res = get_entity_value(entity, lookup_key)
        self.assertEqual(mock_nested_get.return_value, res)
        mock_split_ref.assert_called_with('some.key')
        mock_nested_get.assert_called_with(entity, ['some', 'key'])
Ejemplo n.º 4
0
def csv_exporter(api,
                 file,
                 format=None,
                 append=False,
                 filter: EntityFilter = None):
    """CSV Exporter

    Exports the output of the API to a ; delimited csv file.

    Format is a dictionary which can have the following attributes:

    columns: A list of attributes which can be mapped 1-on-1 with the API output and csv column name

        Example: ['identificatie', 'code', 'naam']

    reference: Can be found in the _embedded block of the HAL JSON output. Reference will contain a
               dictionary of API attributes with information on how to map them to csv columns.

        Example:
            ligtInBuurt: {
                'ref': 'GBD.SDL',   -- The abbreviations for this catalog and collection
                'ref_name': 'ligtIn',  -- A description of the relation used in the csv column name
                'columns': ['identificatie', 'naam'],  -- The columns to be taken from this _embedded reference
            }

    mapping: A dictionary of mapings between API output and CSV columns. This is currently being used for the
             state endpints as these aren't according to HAL JSON specs yet.

        Example: 'ligtIn:GBD.SDL.identificatie': 'gebieden:stadsdelenIdentificatie',



    :param filter:
    :param api: the API wrapper which can be iterated through
    :param file: the local file to write to
    :param format: format definition, see above for examples
    :param append: the file the result of this export will be appended to, or False
    :return:
    """
    row_count = 0

    mapping = build_mapping_from_format(format)
    fieldnames = [*mapping.keys()]

    if append:
        _ensure_fieldnames_match_existing_file(fieldnames, append)

    with open(file, 'a' if append else 'w', encoding='utf-8-sig') as fp, \
            ProgressTicker("Export entities", 10000) as progress:
        # Get the fieldnames from the mapping
        writer = csv.DictWriter(fp, fieldnames=fieldnames, delimiter=';')

        if not append:
            writer.writeheader()

        for entity in api:
            if filter and not filter.filter(entity):
                continue

            row = {}
            for attribute_name, lookup_key in mapping.items():
                row[attribute_name] = get_entity_value(entity, lookup_key)

            writer.writerow(row)
            row_count += 1
            progress.tick()

    return row_count
Ejemplo n.º 5
0
 def test_get_entity_value_dict_key(self, mock_dict):
     entity = {'some': 'entity'}
     lookup_key = {'some': 'dict lookup key'}
     self.assertEqual(mock_dict.return_value, get_entity_value(entity, lookup_key))
     mock_dict.assert_called_with(entity, lookup_key)
Ejemplo n.º 6
0
    def test_get_entity_value_bool(self, mock_split_ref):
        mock_split_ref.return_value = 'key'

        self.assertEqual('J', get_entity_value({'key': True}, 'key'))
        self.assertEqual('N', get_entity_value({'key': False}, 'key'))
Ejemplo n.º 7
0
 def test_get_entity_value_none_key(self):
     self.assertIsNone(get_entity_value({}, None))
Ejemplo n.º 8
0
 def test_get_entity_value(self):
     entity = {'key': 'value'}
     lookup_key = 'key'
     self.assertEqual('value', get_entity_value(entity, lookup_key))
Ejemplo n.º 9
0
def esri_exporter(api,
                  file,
                  format=None,
                  append=False,
                  filter: EntityFilter = None):
    """ESRI Exporter

    This function will transform the output of an API to ESRI shape files. The
    result will be 4 files (.shp, .dbf, .shx and .prj), which all contain some
    required data.

    It uses the python bindings to the GDAL library.

    :param api: The encapsulated API as an iterator
    :param file: The main file (.shp) to write to
    :param format: The mapping of the API output to ESRI fields as defined in the
    export config. The max length of an esri fieldname is 10 characters.
    """
    if append:
        raise NotImplementedError(
            "Appending not implemented for this exporter")

    row_count = 0
    driver = ogr.GetDriverByName("ESRI Shapefile")
    dstfile = driver.CreateDataSource(file)

    # Set spatialref to RD
    spatialref = osr.SpatialReference()
    spatialref.ImportFromEPSG(28992)

    geometry_field = format['geometrie'] if 'geometrie' in format.keys(
    ) else 'geometrie'

    with ProgressTicker("Export entities", 10000) as progress:
        # Get records from the API and build the esri file
        for entity in api:
            if filter and not filter.filter(entity):
                continue

            entity_geometry = get_entity_value(entity, geometry_field)

            # On the first entity determine the type of shapefile we need to export
            if row_count == 0:
                # Please note that it will fail if a file with the same name already exists
                geometry_type = _get_geometry_type(entity_geometry)

                # Auto-reduce field sizes, encode data to utf-8
                # see https://gdal.org/drivers/vector/shapefile.html#layer-creation-options
                dstlayer = dstfile.CreateLayer(
                    "layer",
                    spatialref,
                    geom_type=geometry_type,
                    options=['RESIZE=YES', f'ENCODING={ENCODING}'])

                # Add all field definitions, but skip geometrie
                all_fields = {
                    k: v
                    for k, v in format.items() if k is not geometry_field
                }
                add_field_definitions(dstlayer, all_fields.keys())

            feature = ogr.Feature(dstlayer.GetLayerDefn())
            if entity_geometry:
                feature.SetGeometry(create_geometry(entity_geometry))

            for attribute_name, source in all_fields.items():
                mapping = split_field_reference(source)
                value = get_entity_value(entity, mapping)

                # Esri expects an emtpy string when value is None
                value = '' if value is None else value

                feature.SetField(attribute_name, value)

            dstlayer.CreateFeature(feature)

            feature.Destroy()
            row_count += 1
            progress.tick()

    # When no rows are returned no layer has been made, so create it afterwards to make sure files exist
    dstlayer = dstfile.CreateLayer(
        "layer", spatialref,
        geom_type=ogr.wkbPolygon) if row_count == 0 else dstlayer

    dstfile.Destroy()
    _create_cpg(file)

    return row_count
Ejemplo n.º 10
0
 def filter(self, entity: dict):
     return any([get_entity_value(entity, field) for field in self.fields])