Beispiel #1
0
def shapefilereader(target):
    """Function to convert zipped shapefiles from the web or on disk into geopandas dataframes
    
    Parameters
    ----------
    target : str
        string representing path to file on disk or url to download the zipped shapefile.
    
    Returns
    -------
    Geopandas dataframe
        Pandas dataframe with geospatial features and operations.
    
    """

    # Detect whether we are using a web-based shapefile or local disk
    r = re.compile('^(http|https)://', re.I)
    if r.search(target):
        download = True
        request = requests.get(target)
        target = '/vsimem/{}.zip'.format(
            uuid.uuid4().hex)  #gdal/ogr requires a .zip extension
        gdal.FileFromMemBuffer(target, bytes(request.content))
    else:
        download = False

    with fiona.Collection(target, vsi='zip') as f:
        return gpd.GeoDataFrame.from_features(f, crs=f.crs)
Beispiel #2
0
def test_collection_zip_http():
    ds = fiona.Collection(
        "https://raw.githubusercontent.com/Toblerity/Fiona/master/tests/data/coutwildrnp.zip",
        vsi="zip+https",
    )
    assert ds.path == "/vsizip/vsicurl/https://raw.githubusercontent.com/Toblerity/Fiona/master/tests/data/coutwildrnp.zip"
    assert len(ds) == 67
    def from_kml(self, filename, kmz=False, variables=None):
        """
        read keyhole markup language (.kml) files
        """
        # set filename
        self.case_insensitive_filename(filename)
        # if input file is compressed
        if kmz:
            # decompress and parse KMZ file
            z = zipfile.ZipFile(self.filename, 'r')
            kml_file, = [s for s in z.namelist() if re.search(r'\.(kml)$', s)]
            # need to use osgeo virtual file system to add suffix to mmap name
            mmap_name = "/vsimem/{0}".format(kml_file)
            osgeo.gdal.FileFromMemBuffer(mmap_name, z.read(kml_file))
            with fiona.Collection(mmap_name, driver='LIBKML') as f:
                kml = geopandas.GeoDataFrame.from_features(f, crs=f.crs)
        else:
            kml = geopandas.read_file(self.filename)

        #-- converting x,y from polygon projection to output EPSG
        crs1 = pyproj.CRS.from_string(kml.crs['init'])
        crs2 = pyproj.CRS.from_string("epsg:{0:d}".format(self.epsg))
        transformer = pyproj.Transformer.from_crs(crs1, crs2, always_xy=True)

        # list of polygons
        poly_list = []

        # find features of interest
        geometries = ('LineString', 'Polygon')
        f = [
            f for f in kml.iterfeatures()
            if f['geometry']['type'] in geometries
        ]
        # reduce to variables of interest if specified
        f = [ft for ft in f if ft['id'] in variables] if variables else f

        # for each line string or polygon feature
        for feature in f:
            # extract coordinates for feature
            coords = np.squeeze(feature['geometry']['coordinates'])
            # convert points to EPSG
            xi, yi = transformer.transform(coords[:, 0], coords[:, 1])
            # create polygon from coordinate set
            poly_obj = shapely.geometry.Polygon(np.c_[xi, yi])
            # cannot have overlapping exterior or interior rings
            if (not poly_obj.is_valid):
                poly_obj = poly_obj.buffer(0)
            poly_list.append(poly_obj)
        # create shapely multipolygon object
        # return the polygon object
        self.feature = shapely.geometry.MultiPolygon(poly_list)
        self.shape = (len(self.feature), )
        return self
Beispiel #4
0
def test_encoding_option_warning(tmpdir, caplog):
    """There is no ENCODING creation option log warning for GeoJSON"""
    fiona.Collection(str(tmpdir.join("test.geojson")),
                     "w",
                     driver="GeoJSON",
                     crs="epsg:4326",
                     schema={
                         "geometry": "Point",
                         "properties": {
                             "foo": "int"
                         }
                     })
    assert not caplog.text
def wfs2gdf(url):
    response = requests.get(url)

    try:
        vsif = fiona.ogrext.buffer_to_virtual_file(bytes(response.content))
        vsiz = vsif + '.gml'
        gdal.Rename(vsif, vsiz)
        fc = fiona.Collection(vsiz)

        return gpd.GeoDataFrame.from_features([feature for feature in fc],
                                              crs='epsg:28992')
    except:
        print(response.text)
def dump_elbs(year=2016):
    storage_key = settings['azure']['pcs_storage_key']
    account = az.CloudStorageAccount(account_name='pcslive', account_key=storage_key)
    blob_service = account.create_block_blob_service()

    year_ids = elb_repo.get_elb_harvest_year_ids(year=2016)

    if not os.path.exists('data/elbs'): os.mkdir('data/elbs')

    for idx, elb_year_id in enumerate(year_ids):
        print("downloading elb GIS cells.  idx, yearid: ({} of {}), {}".format(idx, len(year_ids), elb_year_id))

        crop = gis_repo.get_pps_crop(elb_year_id)
        if not 'Corn' in crop:
            print("found not-corn crop, ignoring: {}".format(crop))
            continue

        # use the harvest layers
        elb_source_layers = [
            b.name
            for b in list(blob_service.list_blobs('sourcelayers', str(elb_year_id)))
            if any(x in b.name for x in ['_13_', '_14_', '_15_'])]
        elb_harvest_source_layer_name = elb_source_layers[0] if len(elb_source_layers) > 0 else None

        if elb_harvest_source_layer_name is None:
            print("ELB has no harvest layer: {}".format(elb_year_id))
            continue

        blob_zip = blob_service.get_blob_to_bytes('sourcelayers', elb_harvest_source_layer_name)

        vsiz = '/vsimem/{}.zip'.format(uuid.uuid4().hex)  # gdal/ogr requires a .zip extension
        FileFromMemBuffer(vsiz, bytes(blob_zip.content))
        with fiona.Collection(vsiz, vsi='zip') as f:
            shp = GeoDataFrame.from_features(f, crs={'init': 'epsg:4326'})

        elb_points = GeoDataFrame(shp.loc[shp['ELB_ID'] > 0])
        elb_centroids = list(elb_points.centroid)

        pps = gis_repo.processed_layer_shapes_by_year_id(elb_year_id)
        # get pps cells that have an elb
        pps_elb_cells = DataFrame(
            pps.loc[pps['geometry'].apply(lambda x: any(x.intersects(c) for c in elb_centroids))])
        pps_elb_cells.drop(['geometry'], inplace=True, axis=1)

        # load weather record
        wx = gis_repo.weather_by_year_id(elb_year_id)
        pps_elb_cells = pandas.concat([
            pps_elb_cells,
            pandas.DataFrame([wx.values], index=pps_elb_cells.index, columns=wx.keys())], axis=1)

        pps_elb_cells.to_pickle(f'data/elbs/{elb_year_id}_elb.pickle.gz', compression='gzip')
Beispiel #7
0
def pp_hook(d):
    if '_module' in d.keys() and '_class' in d.keys():
        class_name = d.pop('_class')
        module_name = d.pop('_module')
        obj = d.pop('_object')

        keys = copy.deepcopy(list(d.keys()))
        for key in keys:
            if isinstance(d[key], dict):
                d[key] = pp_hook(d[key])

        if class_name == 'Series':
            return pd.read_json(obj, precise_float=True, **d)
        elif class_name == "DataFrame":
            df = pd.read_json(obj, precise_float=True, **d)
            try:
                df.set_index(df.index.astype(numpy.int64), inplace=True)
            except (ValueError, TypeError, AttributeError):
                logger.debug("failed setting int64 index")
            return df
        elif GEOPANDAS_INSTALLED and class_name == 'GeoDataFrame':
            df = gpd.GeoDataFrame.from_features(fiona.Collection(obj),
                                                crs=d['crs'])
            if "id" in df:
                df.set_index(df['id'].values.astype(numpy.int64), inplace=True)
            # coords column is not handled properly when using from_features
            if 'coords' in df:
                # df['coords'] = df.coords.apply(json.loads)
                valid_coords = ~pd.isnull(df.coords)
                df.loc[valid_coords,
                       'coords'] = df.loc[valid_coords,
                                          "coords"].apply(json.loads)
            df = df.reindex(columns=d['columns'])
            return df
        elif SHAPELY_INSTALLED and module_name == "shapely":
            return shapely.geometry.shape(obj)
        elif class_name == "pandapowerNet":
            from pandapower import from_json_string
            return from_json_string(obj)
        elif module_name == "networkx":
            return json_graph.adjacency_graph(obj,
                                              attrs={
                                                  'id': 'json_id',
                                                  'key': 'json_key'
                                              })
        else:
            module = importlib.import_module(module_name)
            class_ = getattr(module, class_name)
            return class_(obj, **d)
    else:
        return d
Beispiel #8
0
 def GeoDataFrame(self):
     df = geopandas.GeoDataFrame.from_features(
         fiona.Collection(self.obj),
         crs=self.d['crs']).astype(self.d['dtype'])
     if "id" in df:
         df.set_index(df['id'].values.astype(numpy.int64), inplace=True)
     # coords column is not handled properly when using from_features
     if 'coords' in df:
         # df['coords'] = df.coords.apply(json.loads)
         valid_coords = ~pd.isnull(df.coords)
         df.loc[valid_coords,
                'coords'] = df.loc[valid_coords,
                                   "coords"].apply(json.loads)
     df = df.reindex(columns=self.d['columns'])
     return df
Beispiel #9
0
def wfs2gdf(url, crs, output_type, timeout):
    response = requests.get(url, timeout=timeout)
    try:
        if output_type == 'application/json':
            features = response.json()['features']
            return gpd.GeoDataFrame.from_features(features)
        else:
            vsif = fiona.ogrext.buffer_to_virtual_file(bytes(response.content))
            vsiz = vsif + '.gml'
            gdal.Rename(vsif, vsiz)
            fc = fiona.Collection(vsiz)

            return gpd.GeoDataFrame.from_features([feature for feature in fc], crs=crs)
    except:
        print(response.text)
Beispiel #10
0
def read_kml_file(input_file, EPSG=4326, KMZ=False, VARIABLES=None):
    #-- if input file is compressed
    if KMZ:
        #-- decompress and parse KMZ file
        kmz = zipfile.ZipFile(os.path.expanduser(input_file), 'r')
        kml_file, = [s for s in kmz.namelist() if re.search('\.(kml)$', s)]
        #-- need to use osgeo virtual file system to add suffix to mmap name
        mmap_name = "/vsimem/{0}".format(kml_file)
        osgeo.gdal.FileFromMemBuffer(mmap_name, kmz.read(kml_file))
        with fiona.Collection(mmap_name, driver='LIBKML') as f:
            kml = geopandas.GeoDataFrame.from_features(f, crs=f.crs)
    else:
        kml = geopandas.read_file(os.path.expanduser(input_file))

    #-- convert projection to EPSG
    proj1 = pyproj.Proj("+init={0}".format(kml.crs['init']))
    proj2 = pyproj.Proj("+init=EPSG:{0:d}".format(EPSG))

    #-- list of polygons
    poly_list = []

    #-- find features of interest
    geometries = ('LineString', 'Polygon')
    f = [f for f in kml.iterfeatures() if f['geometry']['type'] in geometries]
    #-- reduce to variables of interest if specified
    f = [ft for ft in f if ft['id'] in VARIABLES] if VARIABLES else f

    #-- for each line string or polygon feature
    for feature in f:
        #-- extract coordinates for feature
        coords = np.squeeze(feature['geometry']['coordinates'])
        #-- convert points to latitude/longitude
        lon, lat = pyproj.transform(proj1, proj2, coords[:, 0], coords[:, 1])
        #-- create polygon from coordinate set
        poly_obj = Polygon(list(zip(lon, lat)))
        #-- Valid Polygon cannot have overlapping exterior or interior rings
        if (not poly_obj.is_valid):
            poly_obj = poly_obj.buffer(0)
        poly_list.append(poly_obj)
    #-- create shapely multipolygon object
    mpoly_obj = MultiPolygon(poly_list)
    #-- return the polygon object
    return mpoly_obj
Beispiel #11
0
def pp_hook(d):
    if '_module' in d.keys() and '_class' in d.keys():
        class_name = d.pop('_class')
        module_name = d.pop('_module')
        obj = d.pop('_object')

        keys = copy.deepcopy(list(d.keys()))
        for key in keys:
            if isinstance(d[key], dict):
                d[key] = pp_hook(d[key])

        if class_name  == 'Series':
            return pd.read_json(obj, **d)
        elif class_name == "DataFrame":
            df = pd.read_json(obj, **d)
            try:
                df.set_index(df.index.astype(numpy.int64), inplace=True)
            except (ValueError, TypeError, AttributeError):
                logger.debug("failed setting int64 index")
            return df
        elif GEOPANDAS_INSTALLED and class_name == 'GeoDataFrame':
            df = gpd.GeoDataFrame.from_features(fiona.Collection(obj), crs=d['crs'])
            df.set_index(df['id'].values.astype(numpy.int64), inplace=True)
            # coords column is not handled properly when using from_features
            if 'coords' in df:
                df['coords'] = df.coords.apply(json.loads)
            df = df.reindex(columns=d['columns'])
            return df
        elif class_name == "pandapowerNet":
            from pandapower import from_json_string
            return from_json_string(obj)
        else:
            module = importlib.import_module(module_name)
            class_ = getattr(module, class_name)
            return class_(obj, **d)
    else:
        return d
Beispiel #12
0
def serialize_as_geodataframe(topo_object, url=False):
    """
    Convert a topology dictionary or string into a GeoDataFrame.

    Parameters
    ----------
    topo_object : dict, str
        a complete object representing an topojson encoded file as
        dict, str-object or str-url

    Returns
    -------
    gdf : geopandas.GeoDataFrame
        topojson object parsed GeoDataFrame
    """
    import fiona
    import geopandas
    import json

    # parse the object as byte string
    if isinstance(topo_object, dict):
        bytes_topo = str.encode(json.dumps(topo_object))
    elif url is True:
        import requests

        request = requests.get(topo_object)
        bytes_topo = bytes(request.content)
    else:
        bytes_topo = str.encode(topo_object)
    # into an in-memory file
    vsimem = fiona.ogrext.buffer_to_virtual_file(bytes_topo)

    # read the features from a fiona collection into a GeoDataFrame
    with fiona.Collection(vsimem, driver="TopoJSON") as f:
        gdf = geopandas.GeoDataFrame.from_features(f, crs=f.crs)
    return gdf
    def _handler(self, request, response):

        level = 12  # request.inputs['level'][0].data
        lakes = True  # request.inputs['lakes'][0].data
        collect_upstream = request.inputs['aggregate_upstream'][0].data
        lonlat = request.inputs['location'][0].data

        # shape_description = 'hydrobasins_{}na_lev{}'.format('lake_' if lakes else '', level)
        # table = DATA / 'hybas_{}na_lev{:02}.csv'.format('lake_' if lakes else '', level)
        # shape_url = TESTDATA[shape_description]

        # extensions = ['.gml', '.shp', '.gpkg', '.geojson', '.json']
        # shp = single_file_check(archive_sniffer(shape_url, working_dir=self.workdir, extensions=extensions))

        lon, lat = parse_lonlat(lonlat)
        bbox = (lon, lat, lon, lat)

        shape_url = tempfile.NamedTemporaryFile(prefix='hybas_',
                                                suffix='.gml',
                                                delete=False,
                                                dir=self.workdir).name

        hybas_gml = gis.get_hydrobasins_location_wfs(bbox,
                                                     lakes=lakes,
                                                     level=level)

        with open(shape_url, 'w') as f:
            f.write(hybas_gml)

        response.update_status('Found downstream watershed',
                               status_percentage=10)

        extensions = ['.gml', '.shp', '.gpkg', '.geojson', '.json']
        shp = single_file_check(
            archive_sniffer(shape_url,
                            working_dir=self.workdir,
                            extensions=extensions))

        shape_crs = crs_sniffer(shp)

        with fiona.Collection(shp, 'r', crs=shape_crs) as src:

            # Find HYBAS_ID
            feat = next(src)
            hybas_id = feat['properties']['HYBAS_ID']
            gml_id = feat['properties']['gml_id']

            if collect_upstream:

                main_bas = feat['properties']['MAIN_BAS']

                if lakes is False or level != 12:
                    raise InvalidParameterValue(
                        "Set lakes to True and level to 12.")

                # Collect features from GeoServer
                response.update_status('Collecting relevant features',
                                       status_percentage=70)

                region = tempfile.NamedTemporaryFile(prefix='hybas_',
                                                     suffix='.json',
                                                     delete=False,
                                                     dir=self.workdir).name
                region_url = gis.get_hydrobasins_attributes_wfs(
                    attribute='MAIN_BAS',
                    value=main_bas,
                    lakes=lakes,
                    level=level)

                # Read table of relevant features sharing main basin
                df = gpd.read_file(region_url)
                df.to_file(region, driver='GeoJSON')

                # TODO: Load and keep this data in memory; Figure out how to better handle encoding and column names.
                # Identify upstream sub-basins and write to a new file
                up = gis.hydrobasins_upstream_ids(hybas_id, df)
                upfile = tempfile.NamedTemporaryFile(prefix='hybas_',
                                                     suffix='.json',
                                                     delete=False,
                                                     dir=self.workdir).name
                up.to_file(upfile, driver='GeoJSON')

                # Aggregate upstream features into a single geometry.
                gdf = gpd.read_file(upfile)
                agg = gis.hydrobasins_aggregate(gdf)

                feat = json.loads(agg.to_json())['features'][0]
                response.outputs['feature'].data = json.dumps(feat)
                response.outputs['upstream_ids'].data = json.dumps(
                    up['id'].tolist())

            else:
                response.outputs['feature'].data = json.dumps(feat)
                response.outputs['upstream_ids'].data = json.dumps([
                    gml_id,
                ])

        return response
Beispiel #14
0
def test_collection_zip_http():
    ds = fiona.Collection(
        'http://raw.githubusercontent.com/OSGeo/gdal/master/autotest/ogr/data/poly.zip',
        vsi='zip+http')
    assert ds.path == '/vsizip/vsicurl/http://raw.githubusercontent.com/OSGeo/gdal/master/autotest/ogr/data/poly.zip'
    assert len(ds) == 10
Beispiel #15
0
        if any(x in b.name for x in ['_13_', '_14_', '_15_'])
    ]
    elb_harvest_source_layer_name = elb_source_layers[0] if len(
        elb_source_layers) > 0 else None

    if elb_harvest_source_layer_name is None:
        print("ELB has no indexed layer: {}".format(year_id))
        continue

    blob_zip = blob_service.get_blob_to_bytes('sourcelayers',
                                              elb_harvest_source_layer_name)

    vsiz = '/vsimem/{}.zip'.format(
        uuid.uuid4().hex)  # gdal/ogr requires a .zip extension
    FileFromMemBuffer(vsiz, bytes(blob_zip.content))
    with fiona.Collection(vsiz, vsi='zip') as f:
        shp = GeoDataFrame.from_features(f, crs={'init': 'epsg:4326'})

    elb_points = GeoDataFrame(shp.loc[shp['ELB_ID'] > 0])
    elb_centroids = list(elb_points.centroid)

    pps = gis_repo.processed_layer_shapes_by_year_id(year_id)
    # get pps cells that have an elb
    pps_elb_cells = pandas.DataFrame(pps.loc[pps['geometry'].apply(
        lambda x: any(x.intersects(c) for c in elb_centroids))])
    pps_elb_cells.drop(['geometry'], inplace=True, axis=1)

    # load weather record
    wx = gis_repo.weather_by_year_id(year_id)
    pps_elb_cells = pandas.concat([
        pps_elb_cells,
Beispiel #16
0
def get_bgt(extent, layer="waterdeel", cut_by_extent=True):
    """
    Get geometries within an extent or polygon from the Basis Registratie Grootschalige Topografie (BGT)

    Parameters
    ----------
    extent : list or tuple of length 4 or shapely Polygon
        The extent (xmin, xmax, ymin, ymax) or polygon for which shapes are
        requested.
    layer : string, optional
        The layer for which shapes are requested. The default is "waterdeel".
    cut_by_extent : bool, optional
        Only return the intersection with the extent if True. The default is True

    Returns
    -------
    gdf : GeoPandas GeoDataFrame
        A GeoDataFrame containing all geometries and properties.

    """

    api_url = 'https://api.pdok.nl'
    url = '{}/lv/bgt/download/v1_0/full/custom'.format(api_url)
    body = {"format": "citygml", "featuretypes": [layer]}

    if isinstance(extent, Polygon):
        polygon = extent
    else:
        polygon = extent2polygon(extent)

    body['geofilter'] = polygon.to_wkt()

    headers = {'content-type': 'application/json'}

    response = requests.post(url, headers=headers, data=json.dumps(body))

    # check api-status, if completed, download
    if response.status_code in range(200, 300):
        running = True
        href = response.json()["_links"]["status"]["href"]
        url = '{}{}'.format(api_url, href)

        while running:
            response = requests.get(url)
            if response.status_code in range(200, 300):
                status = response.json()['status']
                if status == "COMPLETED":
                    running = False
                else:
                    time.sleep(2)
            else:
                running = False
    else:
        msg = 'Download of bgt-data failed: {}'.format(response.text)
        raise (Exception(msg))

    href = response.json()["_links"]["download"]["href"]
    response = requests.get('{}{}'.format(api_url, href))

    vsif = fiona.ogrext.buffer_to_virtual_file(bytes(response.content))
    vsiz = vsif + '.zip'
    gdal.Rename(vsif, vsiz)

    fc = fiona.Collection(vsiz, vsi='zip')
    gdf = gpd.GeoDataFrame.from_features([feature for feature in fc],
                                         crs='epsg:28992')

    # remove double features by removing features with an eindRegistratie
    gdf = gdf[gdf['eindRegistratie'].isna()]

    # re-order columns
    columns = [col
               for col in gdf.columns if not col == 'geometry'] + ['geometry']
    gdf = gdf[columns]

    if cut_by_extent:
        gdf.geometry = gdf.intersection(polygon)
        gdf = gdf[~gdf.is_empty]

    return gdf
Beispiel #17
0
def test_collection_zip_http():
    ds = fiona.Collection(
        'http://svn.osgeo.org/gdal/trunk/autotest/ogr/data/poly.zip',
        vsi='zip+http')
    assert ds.path == '/vsizip/vsicurl/http://svn.osgeo.org/gdal/trunk/autotest/ogr/data/poly.zip'
    assert len(ds) == 10
Beispiel #18
0
def ppipes_hook(d, net=None):
    if '_module' in d and '_class' in d:
        if "_object" in d:
            obj = d.pop('_object')
        elif "_state" in d:
            obj = d['_state']
            if d['has_net']:
                obj['net'] = 'net'
            if '_init' in obj:
                del obj['_init']
            return obj  # backwards compatibility
        else:
            obj = {key: val for key, val in d.items() if key not in ['_module', '_class']}
        class_name = d.pop('_class')
        module_name = d.pop('_module')

        if class_name == 'Series':
            return pd.read_json(obj, precise_float=True, **d)
        elif class_name == "DataFrame":
            df = pd.read_json(obj, precise_float=True, **d)
            try:
                df.set_index(df.index.astype(numpy.int64), inplace=True)
            except (ValueError, TypeError, AttributeError):
                logger.debug("failed setting int64 index")
            # recreate jsoned objects
            for col in ('object', 'controller'):  # "controller" for backwards compatibility
                if col in df.columns:
                    df[col] = df[col].apply(ppipes_hook, args=(net,))
            return df
        elif GEOPANDAS_INSTALLED and class_name == 'GeoDataFrame':
            df = geopandas.GeoDataFrame.from_features(fiona.Collection(obj), crs=d['crs'])
            if "id" in df:
                df.set_index(df['id'].values.astype(numpy.int64), inplace=True)
            # coords column is not handled properly when using from_features
            if 'coords' in df:
                # df['coords'] = df.coords.apply(json.loads)
                valid_coords = ~pd.isnull(df.coords)
                df.loc[valid_coords, 'coords'] = df.loc[valid_coords, "coords"].apply(json.loads)
            df = df.reindex(columns=d['columns'])
            return df
        elif SHAPELY_INSTALLED and module_name == "shapely":
            return shapely.geometry.shape(obj)
        elif class_name == "pandapipesNet":
            net = create_fluid_network(add_stdtypes=False)
            net.update(obj)
            return net
        elif class_name == "pandapowerNet":
            if isinstance(obj, str):  # backwards compatibility
                from pandapower import from_json_string
                return from_json_string(obj)
            else:
                # net = create_empty_network()
                net.update(obj)
                return net
        elif module_name == "networkx":
            return json_graph.adjacency_graph(obj, attrs={'id': 'json_id', 'key': 'json_key'})
        else:
            module = importlib.import_module(module_name)
            if class_name == "method":
                logger.warning('Deserializing of method not tested. This might fail...')
                func = getattr(module, obj)  # doesn't always work
                return func
            elif class_name == "function":
                class_ = getattr(module, obj)  # works
                return class_
            class_ = getattr(module, class_name)
            if isclass(class_) and issubclass(class_, JSONSerializableClass):
                if isinstance(obj, str):
                    obj = json.loads(obj, cls=PPJSONDecoder)  # backwards compatibility
                return class_.from_dict(obj, net)
            if isclass(class_) and issubclass(class_, Component):
                return class_
            else:
                # for non-pp objects, e.g. tuple
                return class_(obj, **d)
    else:
        return d
Beispiel #19
0
def to_gdf(mask_poly, layer="waterdeel", bronhouders=None, end_registration='now', log_level="INFO", service='default'):

    logging.basicConfig(level=os.environ.get("LOGLEVEL", log_level))

    if end_registration == 'now':
        end_registration = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")

    if service == 'default':
        api_url = 'https://download.pdok.io'
        url = f'{api_url}/lv/bgt/api/v1/full/custom'
    elif service == 'website':
        api_url = 'https://api.pdok.nl'
        url = f'{api_url}/lv/bgt/download/v1_0/full/custom'

    body = {"format": "gmllight",
            "featuretypes": [layer]}

    if mask_poly.type == 'MultiPolygon':
        xmin, ymin, xmax, ymax = mask_poly.bounds
        body["geofilter"] = "POLYGON(({}))".format(
            ",".join(['{} {}'.format(coords[0], coords[1]) for coords in
                      [[xmin, ymax], [xmax, ymax], [xmax, ymin], [xmin, ymin], [xmin, ymax]]]))

    else:
        body["geofilter"] = "POLYGON(({}))".format(
            ",".join(['{} {}'.format(coords[0], coords[1]) for coords in
                      mask_poly.exterior.coords])
        )

    headers = {'content-type': 'application/json'}

    response = requests.post(url, headers=headers, data=json.dumps(body))

    # check api-status, if completed, download
    if response.status_code in range(200, 300):
        running = True
        url = '{}{}'.format(api_url, response.json()[
                            "_links"]["status"]["href"])

        while running:
            response = requests.get(url)
            if response.status_code in range(200, 300):
                logging.info('{}% ({})'.format(response.json()[
                             "progress"], response.json()['status']))
                status = response.json()['status']
                if status == "COMPLETED":
                    running = False
                else:
                    time.sleep(2)
            else:
                logging.error(response.text)
                logging.error(url)
                running = False
    else:
        logging.error(response.text)
        logging.error(url)

    logging.info('converting to gdf')
    response = requests.get('{}{}'.format(
        api_url, response.json()["_links"]["download"]["href"]))

    vsif = fiona.ogrext.buffer_to_virtual_file(bytes(response.content))
    vsiz = vsif + '.zip'
    gdal.Rename(vsif, vsiz)

    fc = fiona.Collection(vsiz, vsi='zip')
    gdf = gpd.GeoDataFrame.from_features(
        [feature for feature in fc], crs='epsg:28992')

    # select polygons after end_registration
    gdf = gdf[(gdf['eindRegistratie'] != gdf['eindRegistratie'])
              | (gdf['eindRegistratie'] > end_registration)]

    # select polygons of specific bronhouders
    if not bronhouders == None:
        gdf = gdf[gdf['bronhouder'].isin(bronhouders)]

    # select polygons within polygon mask
    gdf = gdf[gdf.intersects(mask_poly)]

    # re-order columns
    columns = [col for col in gdf.columns if not col ==
               'geometry'] + ['geometry']
    gdf = gdf[columns]

    return gdf
Beispiel #20
0
    def get_features(self, layer, poly=None, object_filter='', object_id=None):
        ''' to download features from a layer for a shapely polygon 

            Parameters:
            layer: integer layer number
            poly: shapely Polygon object used as a boundary
        '''

        if object_id == None:
            properties = requests.get(
                ('{url}/{layer}/?f=pjson').format(url=self.url, layer=layer)).json()

            if "uniqueIdField" in list(properties.keys()):
                object_id = properties["uniqueIdField"]["name"]
            else:
                if "fields" in list(properties.keys()):
                    field = [field['name'] for field in properties["fields"]
                             if field['name'].lower() == "objectid"]
                    if len(field) == 1:
                        object_id = field[0]
            if object_id == None:
                logging.error(
                    'Processing data from the following url failed: {url}/{layer}/?f=pjson'.format(url=self.url, layer=layer))
                logging.error(('ArcREST Layer has no Unique ID Field, script defaulted to {object_id}.'
                               'Please specify a correct object_id for this layer & adminstration').format(object_id=object_id))
                sys.exit()

        xmin, ymin, xmax, ymax = poly.bounds
        try:
            if not object_filter == '':
                object_filter = ' and {}'.format(object_filter)
            url = ('{url}/{layer}/query?'
                   'where={object_id}>=0{object_filter}'
                   '&geometry={xmin},{ymin},{xmax},{ymax}'
                   '&geometryType=esriGeometryEnvelope'
                   '&f=json'
                   '&inSR={epsg}'
                   '&returnIdsOnly=true').format(url=self.url,
                                                 layer=layer,
                                                 object_id=object_id,
                                                 xmin=xmin,
                                                 ymin=ymin,
                                                 xmax=xmax,
                                                 ymax=ymax,
                                                 epsg=self.epsg,
                                                 object_filter=object_filter)

            response = requests.get(url)
            if response.status_code == 200:
                if 'objectIds' in list(response.json().keys()):
                    object_ids = response.json()['objectIds']
                else:
                    object_ids = response.json()['properties']['objectIds']
                if (object_ids == None) | (object_ids == []):
                    return gpd.GeoDataFrame()
                else:
                    object_ids.sort()

                    downloads = round(len(object_ids) /
                                      self.maxRecordCount + 0.5)
                    gdf_list = []
                    for download in range(downloads):
                        min_object = download * self.maxRecordCount
                        max_object = min(
                            min_object + self.maxRecordCount - 1, len(object_ids) - 1)
                        url = ('{url}/{layer}/query?'
                               'where={min_objects}<={object_id} and {max_objects}>={object_id}{object_filter}'
                               '&outFields=*'
                               '&geometry={xmin},{ymin},{xmax},{ymax}'
                               '&geometryType=esriGeometryEnvelope'
                               '&inSR={epsg}'
                               '&outSR={epsg}&f={output_format}').format(url=self.url,
                                                                         layer=layer,
                                                                         object_id=object_id,
                                                                         min_objects=object_ids[min_object],
                                                                         max_objects=object_ids[max_object],
                                                                         object_filter=object_filter,
                                                                         xmin=xmin,
                                                                         ymin=ymin,
                                                                         xmax=xmax,
                                                                         ymax=ymax,
                                                                         epsg=self.epsg,
                                                                         output_format=self.format)
                        response = requests.post(url)
                        gdf = gpd.GeoDataFrame()
                        if len(response.json()['features']) > 0:
                            if self.format == 'json':
                                logging.warning(
                                    'reading ESRI-json format (GeoJSON is preferred)')
                                vsif = fiona.ogrext.buffer_to_virtual_file(
                                    bytes(response.content))
                                vsiz = vsif + '.json'
                                gdal.Rename(vsif, vsiz)
                                fc = fiona.Collection(vsiz)
                                gdf = gpd.GeoDataFrame.from_features(
                                    [feature for feature in fc], crs=self.crs)
                                columns = [
                                    col for col in gdf.columns if not col == 'geometry'] + ['geometry']
                                gdf = gdf[columns]
                            else:
                                features = response.json()['features']
                                gdf = gpd.GeoDataFrame.from_features(features)
                        else:
                            logging.warning(
                                'no features returned for url: {}'.format(url))

                        if len(gdf) > 0:
                            gdf.crs = self.crs
                            gdf = gdf[gdf.intersects(poly)]
                            gdf_list += [gdf]

                    if not gdf.empty:
                        if len(gdf_list) > 1:
                            gdf = gpd.GeoDataFrame(
                                pd.concat(gdf_list, ignore_index=True))

                        else:
                            gdf = gdf_list[0]

                    layer_name = [lay['name']
                                  for lay in self.layers if lay['id'] == layer][0]
                    gdf['layer_name'] = layer_name

                    return(gdf)

        except Exception as e:
            logging.error(
                'Processing data from the following url failed: {} with error {}'.format(url, e))
            raise e