Example #1
0
def download_osm_features(place, osm_type, tag, values=None, by_poly=True, timeout=180):
    """ Download OSM features within given place

    :param place: single place name query (e.g: "London", "Bonn", etc.)
    :param osm_type: OSM geometry type str ('node', 'way', 'relation')
    :param tag: OSM tag to query
    :param values: str/list of possible values for the provided OSM tag
    :param by_poly: if True, retrieve features within polygon's list of coordinates, otherwise use bounds
    :param timeout:
    :return:
    """
    gdf_geometry = geocode_to_gdf(place)

    try:
        geometry = gdf_geometry.geometry[0]
    except AttributeError:  # Empty GeoDataFrame
        return None

    responses = []

    if by_poly:
        polygon_coord_strs = get_polygons_coordinates(geometry)
        for poly_coord_str in polygon_coord_strs:
            query = ql_query(osm_type, tag, values, polygon_coord=poly_coord_str, timeout=timeout)
            responses.append(overpass_request(data={'data': query}))
    else:
        query = ql_query(osm_type, tag, values, bounds=geometry.bounds, timeout=timeout)
        responses.append(overpass_request(data={'data': query}))

    return responses
Example #2
0
def build_index(input_filename="../data.yaml", index_filename="rtree"):
    with open(input_filename) as f:
        data = yaml.safe_load(f)

    rect_list = [None] * len(data)
    for i in range(len(data)):
        registrar_dict = clean_registrar_dict(data[i])
        gdf = osmnx.geocode_to_gdf(registrar_dict["osm_name"])
        if len(gdf) == 0:
            raise Exception("Couldn't find geometry for {}"
                            .format(registrar_dict["osm_name"]))
        geometry = gdf.geometry[0]
        bbox_north = gdf.bbox_north[0]
        bbox_south = gdf.bbox_south[0]
        bbox_east = gdf.bbox_east[0]
        bbox_west = gdf.bbox_west[0]

        registrar_dict["id"] = i
        registrar_dict["geometry"] = geometry

        rect_list[i] = (
            i,
            (bbox_west, bbox_south, bbox_east, bbox_north),
            registrar_dict
        )

    if index_filename is None:
        return BetterPicklingIndex(rect_list)
    else:
        for filename in ("{}.idx".format(index_filename),
                         "{}.dat".format(index_filename)):
            if os.path.isfile(filename):
                os.remove(filename)
        return BetterPicklingIndex(index_filename, rect_list)
Example #3
0
def geography():
    """Устанавливаем необходимые данные для работы с географией:
    - в БД заносим данные по муниципалитетам.
    - сохраняем карту в GEOJSON
    - сохраняем граф географических названий"""
    District.objects.all().delete()
    ArticleDistrict.objects.all().delete()
    # Качаем файл с регионами
    logger.info('Скачиваем данные по регионам России')
    file_path = os.path.join(settings.ML_MODELS, 'federal_subjects_index.csv')
    wget.download(federal_subjects_index_path, file_path)
    config = configparser.ConfigParser()
    config.read(settings.CONFIG_INI_PATH)
    federal_subjects_index = pd.read_csv(file_path)
    region = config['REGION']['NAME']
    if region not in federal_subjects_index.name.to_list():
        logger.error('Данных для региона с данным кодом не найдено. \
        Проверьте правильность написания региона, либо обратитесь за \
        помощью к разработчику: https://tlgg.ru/blanchefort')
        return False
    idx = int(
        federal_subjects_index[federal_subjects_index.name == region].idx)
    _, disctict_ids = osm_get_info(idx)
    district_names = []
    extended_names = []
    for i in disctict_ids:
        dist_name, _ = osm_get_info(i)
        district_names.append(dist_name)
        extended_names.append(dist_name + ', ' + region + ', Россия')
    logger.info('Сохраняем муниципалитеты в базу данных')
    District.objects.create(name='region')
    for dist_name in district_names:
        District.objects.create(name=dist_name)

    logger.info('Формируем карту всего региона')
    region_geo = ox.geocode_to_gdf(extended_names)
    region_geo['russian_name'] = district_names
    file_path = os.path.join(settings.ML_MODELS, 'region.geojson')
    region_geo.to_file(file_path, driver='GeoJSON')
    # Граф адресов региона
    # TODO: Пересобрать граф
    fname = f"fias_{config['REGION']['CODE']}"
    selected_file = None
    for flink in freyr_region_graphs:
        if fname in flink:
            selected_file = flink
            break
    if selected_file is None:
        logger.error('Данных для региона с данным кодом не найдено. \
            Проверьте правильность написания кода региона, \
            либо обратитесь за помощью к разработчику: https://tlgg.ru/blanchefort'
                     )
        return False
    logger.info('Сохраняем граф адресов региона')
    file_path = os.path.join(settings.ML_MODELS, 'geograph.edgelist')
    wget.download(selected_file, file_path)

    logger.info('Геоданные для региона полностью собраны!')
    return True
Example #4
0
def get_boundries_osmnx(loc_name):

    poly = ox.geocode_to_gdf(loc_name)
    boundry = np.array(poly.geometry.exterior[0]).T
    bbox = np.array(
        poly[["bbox_north", "bbox_south", "bbox_east", "bbox_west"]])[0]
    boundry_line = Line(loc_name, boundry, color="r", type="boundry")
    return boundry_line, bbox
Example #5
0
    def get_osm_polygon(self, name, select=1, buffer_dist=20):

        log.info('searching for query=%s, which_result=%s', name, select)

        gdf = osmnx.geocode_to_gdf(name,
                                   buffer_dist=buffer_dist,
                                   which_result=select)
        log.info('gdf=%s', gdf)

        polygon = gdf.geometry.values[0]

        return polygon
def main(tiles_shp_filepath, nominatim_query, output_filepath, op):
    logger = logging.getLogger(__name__)

    tiles_gdf = gpd.read_file(tiles_shp_filepath)
    # get boundary
    logger.info("Querying Nominatim for boundaries for `%s`", nominatim_query)
    geom = ox.geocode_to_gdf(nominatim_query)["geometry"].to_crs(
        tiles_gdf.crs).iloc[0]

    # get the filename of the tiles whose geometry is within with the
    # municipal boundaries
    op_method = getattr(tiles_gdf["geometry"], op)
    tile_filename_ser = tiles_gdf[op_method(geom)]["location"]
    logger.info("Found %d intersecting tiles", len(tile_filename_ser))
    tile_filename_ser.to_csv(output_filepath, header=False)
    logger.info("Dumped list of intersecting tiles to %s", output_filepath)
Example #7
0
def main():
    with open("data.yaml") as f:
        data = yaml.safe_load(f)

    names = [registrar_dict["osm_name"] for registrar_dict in data]
    gdf = osmnx.geocode_to_gdf(names)

    fig, ax = plt.subplots()
    ax.set_aspect("equal")

    # downloaded from TIGER
    states = geopandas.read_file("tiger_files/tl_2019_us_state.shp")
    states.plot(ax=ax, color="white", edgecolor="gray")

    gdf.plot(ax=ax)

    plt.show()
Example #8
0
def setup(db_name: str = DB_NAME):

    db = db_connection(db_name)

    # Get the boundary of Downingtown PA
    gdf_bounds = ox.geocode_to_gdf({"city": "Downingtown", "state": "PA"})
    gdf_bounds = gdf_bounds.to_crs("EPSG:26918")

    one_mile_in_meters = 1609.34
    five_miles_in_meters = one_mile_in_meters * 5

    gdf_bounds_buffer = gdf_bounds.copy()

    gdf_bounds_buffer["geometry"] = gdf_bounds.geometry.buffer(
        five_miles_in_meters)

    # Get all OSM road features
    polygon = gdf_bounds_buffer.to_crs("EPSG:4326").geometry[0]
    g = ox.graph_from_polygon(polygon)
    g = g.to_undirected()
    nodes, edges = ox.graph_to_gdfs(g)

    db.import_geodataframe(gdf_bounds, "boundary")
    db.import_geodataframe(gdf_bounds_buffer, "boundary_5mi_buffer")

    db.import_geodataframe(edges, "osm_edges")
    db.import_geodataframe(nodes, "osm_nodes")

    # Reproject from 4326 to 26918 to facilitate analysis queries
    db.table_reproject_spatial_data("osm_edges", 4326, 26918, "LINESTRING")
    db.table_reproject_spatial_data("osm_nodes", 4326, 26918, "POINT")

    # Make a uuid column
    make_id_query = """
        CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
        alter table osm_edges add column osmuuid uuid;
        update osm_edges set osmuuid = uuid_generate_v4();
    """
    db.execute(make_id_query)
Example #9
0
def clip_by_nominatim(ldf, query, **geocode_to_gdf_kws):
    if ox:
        try:
            geometry = ox.geocode_to_gdf(
                query, **geocode_to_gdf_kws)["geometry"].iloc[0]
            return clip_by_geometry(ldf,
                                    geometry,
                                    geometry_crs=ox.settings.default_crs)

        except KeyError:
            logging.warning(
                "OSM returned no results (or fewer than which_result) for "
                'query "{}".\n Returning empty SLSDataFrame'.format(query))
            return ldf[0:0]

    else:
        # warn about missing dependences
        logging.warning(
            "The clip_by_nominatim module requires the osmnx package. "
            "The easiest wayto install it is as in\n"
            "conda install -c conda-forge osmnx"
            "See https://github.com/gboeing/osmnx for more information "
            "about installing osmnx")
Example #10
0
    def load_data_from_city(self,
                            city_name: str,
                            city_elements: dict = None,
                            city_limits: bool = True):
        """
        Creates info for plotting.
        cycleways: (networkx.MultiDiGraph) cycleways info from get_city func, or osmnx.graph_from_place
        roads: (networkx.MultiDiGraph) road info from get_city func, or ox.graph_from_place
        city_area: (geopandas.geodataframe.GeoDataFrame) city area from get_city func, or osmnx.geocode_to_gdf
        green
        water
        buildings
        """

        # print(f"Loading data for {city_name}. May take a few minutes.")

        self.city_name = city_name
        self.city_limits = city_limits
        self.query_type = 'city'

        self._reset_data()

        if city_elements is None:
            city_elements = {}
        city_dict_copy = self.city_dict.copy()
        self.city_dict.update(city_elements)

        self.city_area = ox.geocode_to_gdf(self.city_name)
        self.west, self.south, self.east, self.north = self.city_area.total_bounds

        if self.city_limits is True:
            self._get_city_data_within_city_limits()
        else:
            self._get_city_data_within_rectangle()

        # reset self.city_dict to default
        self.city_dict = city_dict_copy.copy()
Example #11
0
def osm_gdf_from_geocode(
    query,
    which_result=None,
    by_osmid=False,
    buffer_dist=None,
):
    """Retrieves place(s) by name or ID from the Nominatim API as a GeoDataFrame.

    Args:
        query (str | dict | list): Query string(s) or structured dict(s) to geocode.
        which_result (INT, optional): Which geocoding result to use. if None, auto-select the first (Multi)Polygon or raise an error if OSM doesn't return one. to get the top match regardless of geometry type, set which_result=1. Defaults to None.
        by_osmid (bool, optional): If True, handle query as an OSM ID for lookup rather than text search. Defaults to False.
        buffer_dist (float, optional): Distance to buffer around the place geometry, in meters. Defaults to None.

    Returns:
        GeoDataFrame: A GeoPandas GeoDataFrame.
    """

    check_package("osmnx", "https://osmnx.readthedocs.io/en/stable/")

    import osmnx as ox

    gdf = ox.geocode_to_gdf(query, which_result, by_osmid, buffer_dist)
    return gdf
Example #12
0
def poly_from_osm_cityid(osmid):
    #return shapely polygon
    admin_area = ox.geocode_to_gdf("R" + str(osmid), by_osmid=True)
    boundaries = admin_area.geometry[0]
    name = admin_area.display_name[0]
    return boundaries, name
Example #13
0
from pandas import read_csv
from sklearn.impute import SimpleImputer
import gdal
import math
import inspect
from sklearn.ensemble import RandomForestRegressor
import lightgbm as lgb
import rasterio
import rasterio.mask
import geopandas as gpd

filedir = '/data/projects/mobiair'
ras_rootdir = '/data/gghdc/gap/2021/output/areas/'

import osmnx as ox
wuhan = ox.geocode_to_gdf('武汉, China')
utrecht = ox.geocode_to_gdf('Utrecht')

#os.getcwd()

# if we just do random sampling in space-time. We have a quite high R2.
# But it is because if the Location 1 t1 is in the training, then the L1 t2 is going to be small. So it is not a reliable accuracy assessment.
# Most importantly, for the location we want to predict, we dont know the entire time series.
spreadurl = 'https://raw.githubusercontent.com/mengluchu/mobiair/master/mapping_data/DENL17_hr_spread.csv'
res = 25
ap = pd.read_csv(spreadurl)

#if for only 100m
if res == 100:
    ap = ap.drop(ap.filter(regex='_25$|_50$').columns, axis=1)
ap.shape
Example #14
0
def test_geocode_to_gdf():
    # test loading spatial boundaries and plotting
    city = ox.geocode_to_gdf(place1, which_result=1, buffer_dist=100)
    city_projected = ox.project_gdf(city, to_crs="epsg:3395")
Example #15
0
def main(intersecting_tiles_csv_filepath, tiles_dir, output_filepath,
         resample_factor, keep_raw, raw_dir, nominatim_query,
         exclude_nominatim_query, crs):
    logger = logging.getLogger(__name__)

    if resample_factor is None:
        resample_factor = RESAMPLE_FACTOR

    if raw_dir is None:
        raw_dir = 'data/raw/tiles'
        if not path.exists(raw_dir):
            os.mkdir(raw_dir)

    tile_filenames = pd.read_csv(intersecting_tiles_csv_filepath,
                                 index_col=0,
                                 header=None).iloc[:, 0]
    output_tiles = []
    for tile_filename in tqdm.tqdm(tile_filenames):
        raw_tile_filepath = path.join(raw_dir, tile_filename)
        tile_basename, tile_ext = path.splitext(tile_filename)
        if not path.exists(raw_tile_filepath):
            request.urlretrieve(BASE_URI + tile_filename, raw_tile_filepath)
        with rio.open(raw_tile_filepath) as src:
            interim_width = src.width // resample_factor
            interim_height = src.height // resample_factor
            data = src.read(out_shape=(src.count, interim_height,
                                       interim_width),
                            resampling=Resampling.average)
            t = src.transform
            interim_transform = affine.Affine(t.a * resample_factor, t.b, t.c,
                                              t.d, t.e * resample_factor, t.f)
            for i, (dst_window, dst_transform) in enumerate(
                    _get_window_transform(interim_width, interim_height,
                                          interim_transform,
                                          NUM_TILE_SUBDIVISIONS)):
                profile = src.profile.copy()
                profile.update(width=dst_window.width,
                               height=dst_window.height,
                               transform=dst_transform,
                               crs=settings.CRS)
                row_off, col_off = dst_window.row_off, dst_window.col_off
                tile_filepath = _get_output_tile_filepath(
                    tiles_dir, tile_basename, i, tile_ext)
                with rio.open(tile_filepath, 'w', **profile) as dst:
                    for channel in range(src.count):
                        dst.write(
                            data[channel][row_off:row_off + dst_window.height,
                                          col_off:col_off + dst_window.width],
                            channel + 1)

                output_tiles.append(tile_filepath)

        # if raw tiles are not to be preserved, remove them at the end of each
        # iteration (with original 10cm resolution, they can take a lot of
        # local storage)
        if not keep_raw:
            os.remove(raw_tile_filepath)

    # if raw tiles are not to be preserved, remove the folder at the end
    # if not keep_raw:
    #     shutil.rmtree(raw_dir)

    if nominatim_query:
        # get only the tiles that intersect the extent of the result of the
        # nominatim query
        logger.info("Querying Nominatim for boundaries for `%s`",
                    nominatim_query)
        gser = ox.geocode_to_gdf(nominatim_query)['geometry']
        if crs:
            pass
        else:
            crs = settings.CRS
        geom = gser.to_crs(crs).iloc[0]
        if exclude_nominatim_query:
            logger.info("Querying Nominatim for boundaries for `%s`",
                        exclude_nominatim_query)
            exclude_geom = ox.geocode_to_gdf(
                exclude_nominatim_query)['geometry'].to_crs(crs).iloc[0]
            geom = geom.difference(exclude_geom)

        def bbox_geom_from_tile(tile_filepath):
            with rio.open(tile_filepath) as src:
                return geometry.box(*src.bounds)

        tiles_gdf = gpd.GeoDataFrame(output_tiles,
                                     columns=['img_filepath'],
                                     geometry=list(
                                         map(bbox_geom_from_tile,
                                             output_tiles)),
                                     crs=crs)
        output_tiles_ser = gpd.sjoin(tiles_gdf,
                                     gpd.GeoDataFrame(geometry=[geom],
                                                      crs=crs),
                                     op='intersects',
                                     how='inner')['img_filepath']

        tiles_to_rm_ser = tiles_gdf['img_filepath'].loc[
            ~tiles_gdf.index.isin(output_tiles_ser.index)]
        for img_filepath in tiles_to_rm_ser:
            os.remove(img_filepath)
        logger.info(
            "removed %d tiles that do not intersect with the extent of %s",
            len(tiles_to_rm_ser), nominatim_query)
    else:
        # just create a pandas series anyway to use the `to_csv` method below
        output_tiles_ser = pd.Series(output_tiles)

    # logger.info("Successfully dumped downscaled tiles to %s", tiles_dir)
    output_tiles_ser.to_csv(output_filepath, header=False)
    logger.info("Dumped list of downscaled tiles to %s", output_filepath)
Example #16
0
    input_key = str(
        input(
            'Input category key:\n- for list of keys input "options"\n- for networks input "network"\n- for complex input, input "complex"\n'
        ))

if input_key == 'complex':
    input_c = str(
        input(
            'Complex input example:\n{"amenity":["pub","restaurant","hospital"],"tourism":"hotel"}\nFor complex inputs for network categories, use network option for category key input\n'
        ))
    input_c = json.loads(input_c)

if input_choice == 1:
    place_name = (str(
        input('Input place name: \n (for example Winchester USA):\n')))
    place = ox.geocode_to_gdf(place_name)
    #place.plot()
else:
    bbox = []
    put = input(
        'input coordinates of a bbox in format [left,top,right,bottom]:\n')
    spliter = put.split(',')
    for i in spliter:
        bbox.append(float(i))
#network section
if input_key == 'network':
    ntype = str(input('Input network type:\nfor example "drive"'))
    customf = str(
        input(
            'Input custom filter parameters\nfor example ["highway"="motorway"] or\n["highway"~"cycleway"]["bicycle"!~"no"]\n'
        ))
Example #17
0
def bikeability(place, scale='city', data=False):
    ''' A function that would calculate bikeability value for a given
    place of interest. 

    Parameters
    place: the place of interest e.g "Freiburg, Germany" datatype = string
    Scale: can be either "grid" or "city" default is "city" datatype = string
    data: if True output returns a dataframe along with the standard dictionary 
    output, datatype = boolean

    Returns the average_index for bikeability(number between 0 and 100) and some
    summary statistics of index, datatype = dictionary or dataframe and dictionary
    if data is set as True.
    
    Usage example
    a = bikeability('Freiburg, Germany', scale ='grid', data = False) ... for grid scale approach
    a,b = bikeability('Freiburg, Germany', scale ='grid', data = True)
    a =bikeability('Freiburg, Germany', scale = 'city')... for city scale approach
    a,b =bikeability('Freiburg, Germany', scale = 'city', data = True)
    '''

    if scale != 'grid':

        place = place

        # Create and set osmnx to select important tags
        useful_tags_way = [
            'bridge', 'length', 'oneway', 'lanes', 'ref', 'name', 'highway',
            'maxspeed', 'service', 'access', 'area', 'cycleway', 'landuse',
            'width', 'est_width', 'junction', 'surface'
        ]

        ox.utils.config(useful_tags_way=useful_tags_way
                        )  # = useful_tags_path  change here1

        # Create basic city graph
        place_name = place
        graph = ox.graph_from_place(place_name,
                                    network_type='all',
                                    retain_all=True)

        # # Calculate and add edge closeness centrality(connectedness)
        centrality = nx.degree_centrality(nx.line_graph(graph))
        nx.set_edge_attributes(graph, centrality, 'centrality')

        # Extract nodes and edges to geopandas from graph
        #edges = ox.graph_to_gdfs(graph, nodes=False)
        try:
            edges = ox.graph_to_gdfs(graph, nodes=False)
            pass
        except Exception as e:
            print('{} at {}'.format(e, place))

        # Remove unwanted columns and add weight variable
        cols = [
            'highway', 'cycleway', 'surface', 'maxspeed', 'length', 'lanes',
            'oneway', 'width', 'centrality', 'geometry'
        ]

        try:
            df = edges.loc[:, cols]
        except KeyError as e:
            print(e)

        # Set appropriate data types

        df['maxspeed'] = pd.to_numeric(df['maxspeed'],
                                       errors='coerce',
                                       downcast='integer')
        df['lanes'] = pd.to_numeric(df['lanes'],
                                    errors='coerce',
                                    downcast='integer')
        df['width'] = pd.to_numeric(df['width'],
                                    errors='coerce',
                                    downcast='unsigned')
        df['highway'] = df['highway'].astype(str)
        df['surface'] = df['surface'].astype(str)
        df['oneway'] = df['oneway'].astype(int)
        df['cycleway'] = df['cycleway'].astype(str)

        # Dataframe cleaning and preprocessing
        # highway column
        df['highway'] = df['highway'].str.replace(r'[^\w\s-]', '', regex=True)
        highway_cols = (pd.DataFrame(df.highway.str.split(' ', expand=True)))
        highway_map = ({
            'service': 6,
            'None': np.nan,
            'residential': 8,
            'unclassified': 7,
            'footway': 7,
            'track': 5,
            'tertiary': 6,
            'living_street': 9,
            'path': 5,
            'pedestrian': 7,
            'secondary': 5,
            'primary': 2,
            'steps': 2,
            'cycleway': 10,
            'rest_area': 5,
            'primary_link': 2,
            'ferry': 1,
            'construction': 2,
            'byway': 8,
            'bridleway': 6,
            'trunk': 2,
            'trunk_link': 2,
            'motorway': 1,
            'motorway_link': 1
        })
        for column in highway_cols:
            highway_cols[column] = highway_cols[column].map(highway_map)
        highway_cols['mean'] = np.nanmean(highway_cols, axis=1)
        df['highway'] = round(highway_cols['mean'])

        # cycleway column
        df['cycleway'] = df['cycleway'].str.replace(r'[^\w\s-]',
                                                    '',
                                                    regex=True)
        cycleway_cols = (pd.DataFrame(df.cycleway.str.split(' ', expand=True)))
        cycleway_map = ({
            'opposite': 9,
            'lane': 9,
            'share_busway': 8,
            'shared_lane': 8,
            'segregated': 10,
            'no': 1,
            'opposite_lane': 9,
            'crossing': 10,
            'track': 10,
            'designated': 10,
            'opposite_share_busway': 8,
            'seperate': 10,
            'shoulder': 8
        })
        for column in cycleway_cols:
            cycleway_cols[column] = cycleway_cols[column].map(cycleway_map)
        cycleway_cols['mean'] = np.nanmean(cycleway_cols, axis=1)
        df['cycleway'] = round(cycleway_cols['mean'])

        # surface column
        df['surface'] = df['surface'].str.replace(r'[^\w\s-]', '', regex=True)
        surface_cols = (pd.DataFrame(df.surface.str.split(' ', expand=True)))
        surface_map = ({
            'asphalt': 10,
            'paved': 10,
            'cobblestone': 5,
            'fine_gravel': 9,
            'ground': 7,
            'sett': 6,
            'gravel': 7,
            'metal': 6,
            'compacted': 10,
            'dirt': 6,
            'paving_stones': 7,
            'grass_paver': 5,
            'unpaved': 8,
            'pebblestone': 9,
            'concrete': 10,
            'grass': 5,
            'mud': 1
        })
        for column in surface_cols:
            surface_cols[column] = surface_cols[column].map(surface_map)
        surface_cols['mean'] = np.nanmean(surface_cols, axis=1)
        df['surface'] = round(surface_cols['mean'])

        # maxspeed column
        df.loc[df['maxspeed'] > 110, 'maxspeed'] = 110
        df.loc[df['maxspeed'] < 20, 'maxspeed'] = 20
        maxspeed_map = ({
            20: 10,
            30: 9,
            40: 8,
            50: 7,
            60: 6,
            70: 5,
            80: 4,
            90: 3,
            100: 2,
            110: 1
        })
        df['maxspeed'] = df['maxspeed'].map(maxspeed_map)

        # lanes column
        df.loc[df['lanes'] > 8, 'lanes'] = 8
        lanes_map = {1: 10, 2: 9, 3: 5, 4: 5, 5: 3, 6: 3, 7: 2, 8: 1}
        df['lanes'] = df['lanes'].map(lanes_map)

        # oneway column
        oneway_map = {0: 5, 1: 10, -1: 5}
        df['oneway'] = df['oneway'].map(oneway_map)

        # width column
        df.loc[df['width'] < 2, 'width'] = 1
        df.loc[df['width'] > 6, 'width'] = 6
        df['width'] = round(df['width'])
        width_map = ({1: 1, 2: 2, 3: 5, 4: 7, 5: 9, 6: 10})
        df['width'] = df['width'].map(width_map)

        # normalize centrality column (between o and 10)
        df['centrality'] = (
            (df['centrality'] - np.min(df['centrality'])) /
            (np.max(df['centrality']) - np.min(df['centrality']))) * 10

        # Switch to new df for calculation
        d_frame = df.copy(deep=True)

        # Multiply variables by weights
        d_frame['cycleway'] = d_frame['cycleway'] * 0.208074534
        d_frame['surface'] = d_frame['surface'] * 0.108695652
        d_frame['highway'] = d_frame['highway'] * 0.167701863
        d_frame['maxspeed'] = d_frame['maxspeed'] * 0.189440994
        d_frame['lanes'] = d_frame['lanes'] * 0.108695652
        d_frame['centrality'] = d_frame['centrality'] * 0.071428571
        d_frame['width'] = d_frame['width'] * 0.086956522
        d_frame['oneway'] = d_frame['oneway'] * 0.059006211

        # Normalize variables between 0 and 1
        d_frame['index'] = (np.nanmean(d_frame[[
            'cycleway', 'highway', 'surface', 'maxspeed', 'lanes', 'width',
            'oneway', 'centrality'
        ]],
                                       axis=1,
                                       dtype='float64')) * 80

        # Final statistics index of city
        mean_index = np.average(d_frame['index'], weights=d_frame['length'])
        max_index = d_frame['index'].max()
        min_index = d_frame['index'].min()
        std_index = d_frame['index'].std()

        # Plot result
        #d_frame.plot(column = 'index',legend = True)

        # Result dictionary
        result = ({
            'place': place,
            'average_index': mean_index,
            'max_index': max_index,
            'min_index': min_index,
            'std_index': std_index
        })

    else:
        #Get bounding box for place
        place_name = place
        area = ox.geocode_to_gdf(place_name)  # graph first
        xmin, ymin, xmax, ymax = area.total_bounds

        #divide into grids x = lon, y = lat
        height = 0.041667
        width = 0.041667
        rows = int(np.ceil((ymax - ymin) / height))
        cols = int(np.ceil((xmax - xmin) / width))
        XleftOrigin = xmin
        XrightOrigin = xmin + width
        YtopOrigin = ymax
        YbottomOrigin = ymax - height
        polygons = []
        for i in range(cols):
            Ytop = YtopOrigin
            Ybottom = YbottomOrigin
            for j in range(rows):
                polygons.append(
                    Polygon([(XleftOrigin, Ytop), (XrightOrigin, Ytop),
                             (XrightOrigin, Ybottom), (XleftOrigin, Ybottom)]))
                Ytop = Ytop - height
                Ybottom = Ybottom - height
            XleftOrigin = XleftOrigin + width
            XrightOrigin = XrightOrigin + width

        #Ensure the grids are within the polygon
        grid_list = []
        for i in range(len(polygons)):
            p = Point(polygons[i].centroid.x, polygons[i].centroid.y)
            geome = shape(polygons[i])
            q = gpd.GeoDataFrame({'geometry': geome}, index=[0])
            q = q.set_crs("EPSG:4326")
            if area.geometry.iloc[0].contains(polygons[i]) == True:
                grid_list.append(q)
            #elif p.within(area.geometry.iloc[0]) == True and area.geometry.iloc[0].contains(polygons[i])== False:
            elif area.geometry.iloc[0].intersects(polygons[i]):
                #grid_list.append(polygons[i])
                clip = gpd.clip(area, q)
                grid_list.append(clip)

        #Initialize important variables
        dflist = []
        exception_grids = []
        dfs = []

        for i in tqdm(range(len(grid_list))):

            #graph
            useful_tags_way = [
                'bridge', 'length', 'oneway', 'lanes', 'ref', 'name',
                'highway', 'maxspeed', 'surface', 'area', 'landuse', 'width',
                'est_width', 'junction', 'cycleway'
            ]
            ox.utils.config(useful_tags_way=useful_tags_way
                            )  # = =useful_tags_path change 2

            try:
                box_graph = ox.graph_from_polygon(
                    grid_list[i].geometry.iloc[0],
                    network_type='bike',
                    retain_all=True)
                pass
            except Exception as e:
                print('{} at grid {}, skip grid'.format(e, i + 1))
                exception_grids.append(i + 1)
                continue

            # Calculate and add edge closeness centrality(connectedness)
            centrality = nx.degree_centrality(nx.line_graph(box_graph))
            nx.set_edge_attributes(box_graph, centrality, 'centrality')

            # Extract nodes and edges to geopandas from graph
            try:
                edges = ox.graph_to_gdfs(box_graph, nodes=False)
                pass
            except Exception as e:
                print('{} at grid {}, skip grid'.format(e, i + 1))
                exception_grids.append(i + 1)
                continue

            # Select only the important variables
            cols = [
                'highway', 'cycleway', 'surface', 'maxspeed', 'length',
                'lanes', 'oneway', 'width', 'centrality', 'geometry'
            ]
            try:
                df = edges.loc[:, cols]
                pass
            except KeyError as e:
                print('{} at grid {}, skip grid'.format(e, i + 1))
                exception_grids.append(i + 1)
                continue

            # Set appropriate data types
            df['maxspeed'] = pd.to_numeric(df['maxspeed'],
                                           errors='coerce',
                                           downcast='integer')
            df['lanes'] = pd.to_numeric(df['lanes'],
                                        errors='coerce',
                                        downcast='integer')
            df['width'] = pd.to_numeric(df['width'],
                                        errors='coerce',
                                        downcast='unsigned')
            df['highway'] = df['highway'].astype(str)
            df['surface'] = df['surface'].astype(str)
            df['oneway'] = df['oneway'].astype(int)
            df['cycleway'] = df['cycleway'].astype(str)

            # Dataframe cleaning and preprocessing
            # highway column
            df['highway'] = df['highway'].str.replace(r'[^\w\s-]',
                                                      '',
                                                      regex=True)
            highway_cols = (pd.DataFrame(df.highway.str.split(' ',
                                                              expand=True)))
            highway_map = ({
                'service': 6,
                'None': np.nan,
                'residential': 8,
                'unclassified': 7,
                'footway': 7,
                'track': 5,
                'tertiary_link': 6,
                'tertiary': 6,
                'living_street': 9,
                'path': 5,
                'pedestrian': 7,
                'secondary': 5,
                'secondary_link': 5,
                'primary': 2,
                'steps': 2,
                'cycleway': 10,
                'rest_area': 5,
                'primary_link': 2,
                'ferry': 1,
                'construction': 2,
                'byway': 8,
                'bridleway': 6,
                'trunk': 2,
                'trunk_link': 2,
                'motorway': 1,
                'motorway_link': 1
            })
            for column in highway_cols:
                highway_cols[column] = highway_cols[column].map(highway_map)
            highway_cols['mean'] = np.nanmean(highway_cols, axis=1)
            df['highway'] = round(highway_cols['mean'])

            #cycleway column
            df['cycleway'] = df['cycleway'].str.replace(r'[^\w\s-]',
                                                        '',
                                                        regex=True)
            cycleway_cols = (pd.DataFrame(
                df.cycleway.str.split(' ', expand=True)))
            cycleway_map = ({
                'opposite': 9,
                'lane': 9,
                'share_busway': 8,
                'shared_lane': 8,
                'segregated': 10,
                'no': 1,
                'opposite_lane': 9,
                'crossing': 10,
                'track': 10,
                'designated': 10,
                'opposite_share_busway': 8,
                'seperate': 10,
                'shoulder': 8
            })
            for column in cycleway_cols:
                cycleway_cols[column] = cycleway_cols[column].map(cycleway_map)
            cycleway_cols['mean'] = np.nanmean(cycleway_cols, axis=1)
            df['cycleway'] = round(cycleway_cols['mean'])

            # surface column
            df['surface'] = df['surface'].str.replace(r'[^\w\s-]',
                                                      '',
                                                      regex=True)  #''
            surface_cols = (pd.DataFrame(df.surface.str.split(' ',
                                                              expand=True)))
            surface_map = ({
                'asphalt': 10,
                'paved': 10,
                'cobblestone': 3,
                'fine_gravel': 9,
                'ground': 6,
                'sett': 4,
                'gravel': 7,
                'metal': 7,
                'compacted': 9,
                'dirt': 6,
                'paving_stones': 7,
                'grass_paver': 4,
                'unpaved': 7,
                'pebblestone': 7,
                'concrete': 10,
                'grass': 5,
                'mud': 2,
                'sand': 5,
                'wood': 4,
                'earth': 6,
                'woodchips': 3,
                'snow': 2,
                'ice': 2,
                'salt': 2
            })
            for column in surface_cols:
                surface_cols[column] = surface_cols[column].map(surface_map)
            surface_cols['mean'] = np.nanmean(surface_cols, axis=1)
            df['surface'] = round(surface_cols['mean'])

            # maxspeed column
            df.loc[df['maxspeed'] > 110, 'maxspeed'] = 110
            df.loc[df['maxspeed'] < 20, 'maxspeed'] = 20
            df['maxspeed'] = round(df['maxspeed'], -1)
            maxspeed_map = ({
                20: 10,
                30: 9,
                40: 8,
                50: 7,
                60: 6,
                70: 5,
                80: 4,
                90: 3,
                100: 2,
                110: 1
            })
            df['maxspeed'] = df['maxspeed'].map(maxspeed_map)

            # lanes column
            df.loc[df['lanes'] > 8, 'lanes'] = 8
            lanes_map = {1: 10, 2: 9, 3: 5, 4: 5, 5: 3, 6: 3, 7: 2, 8: 1}
            df['lanes'] = df['lanes'].map(lanes_map)

            # oneway column
            oneway_map = {0: 5, 1: 10, -1: 5}
            df['oneway'] = df['oneway'].map(oneway_map)

            # width column
            df.loc[df['width'] < 2, 'width'] = 1
            df.loc[df['width'] > 6, 'width'] = 6
            df['width'] = round(df['width'])
            width_map = ({1: 1, 2: 2, 3: 5, 4: 7, 5: 9, 6: 10})
            df['width'] = df['width'].map(width_map)

            # normalize centrality column (between o and 10)
            df['centrality'] = (
                (df['centrality'] - np.min(df['centrality'])) /
                (np.max(df['centrality']) - np.min(df['centrality']))) * 10

            #Switch to new df for calculation
            d_frame = df.copy(deep=True)

            # Multiply variables by weights
            d_frame['cycleway'] = d_frame['cycleway'] * 0.208074534
            d_frame['surface'] = d_frame['surface'] * 0.108695652
            d_frame['highway'] = d_frame['highway'] * 0.167701863
            d_frame['maxspeed'] = d_frame['maxspeed'] * 0.189440994
            d_frame['lanes'] = d_frame['lanes'] * 0.108695652
            d_frame['centrality'] = d_frame['centrality'] * 0.071428571
            d_frame['width'] = d_frame['width'] * 0.086956522
            d_frame['oneway'] = d_frame['oneway'] * 0.059006211

            d_frame['index'] = (np.nanmean(d_frame[[
                'cycleway', 'highway', 'surface', 'maxspeed', 'lanes', 'width',
                'oneway', 'centrality'
            ]],
                                           axis=1,
                                           dtype='float64')) * 80

            d_frame['grid_index'] = np.average(d_frame['index'],
                                               weights=d_frame['length'])
            dflist.append(d_frame)
            dfs.append(df)

        #Final statistics index of city in dictionary
        df_indexes = pd.concat(dflist)
        result = ({
            'place':
            place_name,
            'average_index':
            np.average(df_indexes['index'], weights=df_indexes['length']),
            'max_index':
            df_indexes['index'].max(),
            'min_index':
            df_indexes['index'].min(),
            'std_index':
            df_indexes['index'].std(),
            'grids':
            len(grid_list),
            'nsegments':
            len(df_indexes),
            'unused_grids':
            len(exception_grids)
        })

    if data == False:
        return (result)
    else:
        return (d_frame, result)
Example #18
0
Remember, our variogram defines the spatial autocorrelation of the data (i.e., how the locations in our region affect one another). Once we have a variogram model, we can use it to estimate the weights in our kriging model. I won't go into detail on how this is done, but there is a neat walkthrough in the [scikit-gstat docs here](https://scikit-gstat.readthedocs.io/en/latest/userguide/kriging.html).

Anyway, I'll briefly use the [pykrige](https://github.com/GeoStat-Framework/PyKrige) library to do some kriging so you can get an idea of what it looks like:

krig = OrdinaryKriging(x=gpm25["Easting"], y=gpm25["Northing"], z=gpm25["PM_25"], variogram_model="spherical")
z, ss = krig.execute("grid", gridx, gridy)
plt.imshow(z);

Now let's convert our raster back to polygons so we can map it. I'm also going to load in a polygon of BC using `osmnx` to clip my data so it fits nicely on my map this time:

polygons, values = pixel2poly(gridx, gridy, z, resolution)
pm25_model = (gpd.GeoDataFrame({"PM_25_modelled": values}, geometry=polygons, crs="EPSG:3347")
                 .to_crs("EPSG:4326")
                 )
bc = ox.geocode_to_gdf("British Columbia, Canada")
pm25_model = gpd.clip(pm25_model, bc)

fig = px.choropleth_mapbox(pm25_model, geojson=pm25_model.geometry, locations=pm25_model.index,
                           color="PM_25_modelled", color_continuous_scale="RdYlGn_r",
                           center={"lat": 52.261, "lon": -123.246}, zoom=3.5,
                           mapbox_style="carto-positron")
fig.update_layout(margin=dict(l=0, r=0, t=30, b=10))
fig.update_traces(marker_line_width=0)

I used an "ordinary kriging" interpolation above which is the simplest implementation of kriging. The are many other forms of kriging too that can account for underlying trends in the data ("universal kriging"), or even use a regression or classification model to make use of additional explanatory variables. `pykrige` [supports most variations](https://geostat-framework.readthedocs.io/projects/pykrige/en/stable/examples/index.html). In particular for the latter, `pykrige` can accept `sklearn` models which is useful!

### 2.3. Areal interpolation

Areal interpolation is concerned with mapping data from one polygonal representation to another. Imagine I want to map the air pollution polygons I just made to FSA polygons (recall FSA is "forward sortation area", which are groups of postcodes). The most intuitive way to do this is to distribute values based on area proportions, hence "areal interpolation".
Example #19
0
def get_perimeter(query, by_osmid=False):
    return ox.geocode_to_gdf(query, by_osmid=by_osmid)
Example #20
0
import pandas as pd 
import geopandas as gpd
import numpy as np
import os
from shapely.geometry import Point
from rasterstats import zonal_stats, point_query
import pyproj
from shapely.ops import transform
import modelutils as m
import rasterio
from matplotlib import pyplot as plt
from rasterio.plot import show_hist
from math import modf
import osmnx as ox
from scipy import signal 
wuhan = ox.geocode_to_gdf('武汉, China')
utrecht = ox.geocode_to_gdf('Utrecht province') 
utrecht.plot() 
filedir = "/Users/menglu/Documents/GitHub/mobiair/"
preddir =f"{filedir}prediction/"

savedir = "/Volumes/Meng_Mac/mobi_result/Uni/" # each profile a savedir. 
                   
def wgs2laea (p):
    wgs84 = pyproj.CRS('EPSG:4326')
    rd= pyproj.CRS('+proj=laea +lat_0=51 +lon_0=9.5 +x_0=0 +y_0=0 +ellps=GRS80 +units=m +no_defs')
    project = pyproj.Transformer.from_crs(wgs84, rd, always_xy=True)
    p=transform(project.transform, p)
    return (p)  

def plot_raster ():
Example #21
0
cities.plot(ax=ax, markersize=180, edgecolor="0.2")
plt.title("Big cities in B.C.");

### 2.4. Loading from Open Street Map

So we can read vector data from a file and we can create our own, but let's see what real power feels like!

![](img/real-power.gif)

Often it will be helpful to obtain data from an online source using an API. The most relevant "online source" here is [OpenStreetMap (OSM)](https://www.openstreetmap.org/), which is like the Wikipedia of geospatial data (think world map, road networks, bike networks, buidling heights, sandy coastlines, you name it). There are plenty of Python APIs for getting data from OSM but by far the best I've come across is [osmnx](https://github.com/gboeing/osmnx/tree/master):

```console
conda install -c conda-forge osmnx
```

`osmnx` provides an easy-to-use API to query OSM data. I usually import it with the alias `ox`. Let's get a polygon of Vancouver now using the function `ox.geocode_to_gdf()`:

>By default `osmnx` caches responses locally in a folder `cache` so that you can quickly access data again without needing to call the API. You can turn this behaviour off if you wish.

import osmnx as ox

vancouver = ox.geocode_to_gdf("Vancouver, Canada")
vancouver.plot(edgecolor="0.2")
plt.title("Vancouver");

It's certainly Vancouver, but it looks a bit blocky. It might be a bit low resolution, or someone just decided this was the best way to encapsulate "Vancouver" on OSM. Either way, let's use this polygon to "clip" a section of our higher-resolution provinces data which we downloaded earlier (and which is the official shapefile downloaded from [statcan](https://www12.statcan.gc.ca/census-recensement/2011/geo/bound-limit/bound-limit-2016-eng.cfm)).

This is the first geometric wrangling operation we'll see. I'll show some more later, but think of "clipping" as passing a top layer of cookie dough (the map above), over a bottom layer cookiecutter (our high-resolution provinces data) to get a shape out:

van_bc = gpd.clip(bc, vancouver)
van_bc.plot(edgecolor="0.2")
Example #22
0
import osmnx as ox

munich = ox.geocode_to_gdf("Munich, Germany")
ax = ox.project_gdf(munich).plot()
_ = ax.axis("off")
def setup_osm(place_name):
    graph = ox.graph_from_place(place_name)
    buildings = ox.geometries_from_place(place_name, tags={'building': True})
    area = ox.geocode_to_gdf(place_name)
    nodes, edges = ox.graph_to_gdfs(graph)
    return graph, buildings, area, edges
Example #24
0
# test.to_file("/home/benjamin/srtm_corse.tif")

# test = DigitalElevationModel("/home/benjamin/dem_test.tif").clip((13, 42, 15, 45)).to_file(
#     "/home/benjamin/dem_clip_test.tif")
# pop = Raster("/home/benjamin/Documents/PRO/PRODUITS/POPULATION_DENSITY/001_DONNEES/COTE_D_IVOIRE"
#              "/population_civ_2019-07-01_geotiff/population_civ_2019-07-01.tif")
from pyrasta.tools.stats import _zonal_stats

dem = DigitalElevationModel(
    "/home/benjamin/Documents/PRO/PRODUITS/TESTS/dem_ci.tif")
test = dem.to_crs(32630).slope("degree")
test.to_file("/home/benjamin/dem_slope_ci.tif")

country = PolygonLayer.from_gpd(
    ox.geocode_to_gdf(
        dict(country="Cote d'Ivoire", admin_level=2,
             type="boundary"))).to_crs(32630).clean_geometry()

# dem = from_cgiar_online_database(country.total_bounds)
# dem.to_file("/home/benjamin/dem_ci.tif")

honeycomb = country.split(country.area[0] / 100,
                          method="hexana",
                          show_progressbar=True)

honeycomb = honeycomb.to_crs(dem.crs)
honeycomb.to_file("/home/benjamin/Documents/PRO/PRODUITS/TESTS/honeycomb.shp")
honeycomb["ID"] = honeycomb.index

# test = dem.clip(mask=honeycomb[[15]], all_touched=True)
# test.to_file("/home/benjamin/pop.tif")