Ejemplo n.º 1
0
def test_logging():
    # test OSMnx's logger
    ox.log("test a fake default message")
    ox.log("test a fake debug", level=lg.DEBUG)
    ox.log("test a fake info", level=lg.INFO)
    ox.log("test a fake warning", level=lg.WARNING)
    ox.log("test a fake error", level=lg.ERROR)

    ox.citation()
    ox.ts(style="date")
    ox.ts(style="time")
Ejemplo n.º 2
0
def test_logging():

    # test OSMnx's logger
    import logging as lg
    ox.log('test a fake debug', level=lg.DEBUG)
    ox.log('test a fake info', level=lg.INFO)
    ox.log('test a fake warning', level=lg.WARNING)
    ox.log('test a fake error', level=lg.ERROR)
Ejemplo n.º 3
0
def test_logging():

    # test OSMnx's logger
    import logging as lg
    ox.log('test a fake debug', level=lg.DEBUG)
    ox.log('test a fake info', level=lg.INFO)
    ox.log('test a fake warning', level=lg.WARNING)
    ox.log('test a fake error', level=lg.ERROR)

    ox.citation()
def get_graph(row):

    global count_failed
    global count_success
    global count_already
    global count_small
    global failed_list

    try:
        # graph name = country + country iso + uc + uc id
        graph_name = '{}-{}-{}-{}'.format(row['CTR_MN_NM'], row['CTR_MN_ISO'],
                                          row['UC_NM_MN'], row['ID_HDC_G0'])
        graphml_folder = '{}/{}-{}'.format(output_graphml_path,
                                           row['CTR_MN_NM'], row['CTR_MN_ISO'])
        graphml_file = '{}-{}.graphml'.format(row['UC_NM_MN'],
                                              row['ID_HDC_G0'])

        filepath = os.path.join(graphml_folder, graphml_file)
        if not os.path.exists(filepath):

            # get graph
            print(ox.ts(), graph_name)
            G = ox.graph_from_polygon(polygon=row['geometry'].buffer(0),
                                      network_type=network_type,
                                      retain_all=retain_all,
                                      simplify=simplify,
                                      truncate_by_edge=truncate_by_edge)

            # don't save graphs if they have fewer than 3 nodes
            if len(G) > 2:
                ox.save_graphml(G, filepath=filepath)
                count_success = count_success + 1
            else:
                count_small = count_small + 1
        else:
            count_already = count_already + 1

    except Exception as e:
        count_failed = count_failed + 1
        failed_list.append(graph_name)
        ox.log('"{}" failed: {}'.format(graph_name, e), level=lg.ERROR)
        print(e, graph_name)
Ejemplo n.º 5
0
def request_url(url, pause_duration=pause_duration):

    # check if this request is already in the cache (if ox.settings.use_cache=True)
    cached_response_json = ox.downloader._retrieve_from_cache(url)
    if cached_response_json is not None:
        response_json = cached_response_json
        ox.log('Got node elevations from cache')
    else:
        try:
            # request the elevations from the API
            ox.log('Requesting node elevations from API: {}'.format(url))
            time.sleep(pause_duration)

            # convert GET to POST to work around apache url length limits
            params = dict()
            endpoint, url_params = url.split('?')
            for chunk in url_params.split('&'):
                key, value = chunk.split('=')
                params[key] = value

            response = requests.post(endpoint, data=params, timeout=120)
            assert response.ok
            response_json = response.json()
            assert 'geonames' in response_json
            ox.downloader._save_to_cache(url, response_json, response.status_code)

        except Exception as e:
            ox.log(e)
            print(e)
            print('Error - server responded with {}: {}. {}'.format(response.status_code, response.reason, response.text))

    return response_json
Ejemplo n.º 6
0
def get_Y_X_features_population_data(cities_selection=None, cities_skip=None):
    """
        Returns the Y and X arrays for training/testing population downscaling estimates.
        It gathers either a selection of cities or all stored cities but a selected list to skip

        Y contains vectors with the correspondent population densities
        X contains vectors with normalized urban features
        X_columns columns referring to X values
        Numpy arrays are previously stored

        Parameters
        ----------
        cities_selection : string
                list of cities to select
        cities_skip : string
                list of cities to skip (retrieve the rest)

        Returns
        ----------
        np.array, np.array, np.array
                Y vector, X vector, X column names vector
        """
    arr_X, arr_Y = [], []

    # Get the complete training-testig dataset
    for Y_X_data_city in os.listdir("data/training"):
        # Only if it contains a valid extension
        if ".npz" not in Y_X_data_city:
            continue

        # Get city's name
        city_ref = Y_X_data_city.replace("_X_Y.npz", "")

        # Only retrieve data from cities_selection (if ever given)
        if (cities_selection is not None) and (city_ref
                                               not in cities_selection):
            log("Skipping city: " + str(city_ref))
            continue

            # Skip cities data from from cities_skip (if ever given)
        if (cities_skip is not None) and (city_ref in cities_skip):
            log("Skipping city:", city_ref)
            continue

        log("Retrieving data for city: " + str(city_ref))

        # Get stored data
        city_Y, city_X, city_X_cols = get_training_testing_data(city_ref)
        # Append values
        arr_Y.append(city_Y)
        arr_X.append(city_X)

        # Assumption: All generated testing-training data contain the same X columns
    return np.concatenate(arr_Y), np.concatenate(arr_X), city_X_cols
Ejemplo n.º 7
0
def request_url(url, pause_duration=pause_duration):

    # check if this request is already in the cache (if ox.settings.use_cache=True)
    cached_response_json = ox.downloader._retrieve_from_cache(url)
    if cached_response_json is not None:
        response_json = cached_response_json
        ox.log('Got node elevations from cache')
    else:
        try:
            # request the elevations from the API
            ox.log('Requesting node elevations from API: {}'.format(url))
            time.sleep(pause_duration)
            response = requests.get(url)
            assert response.ok
            response_json = response.json()
            ox.downloader._save_to_cache(url, response_json,
                                         response.status_code)
        except Exception as e:
            ox.log(e)
            print('Error - server responded with {}: {}'.format(
                response.status_code, response.reason))

    return response_json['results']
Ejemplo n.º 8
0
 def generateGraphPlot(self, graph):
     osmnx.log('Generating graph based on elevation !')
     nc = osmnx.plot.get_node_colors_by_attr(graph,
                                             'elevation',
                                             cmap='plasma')
     osmnx.plot_graph(graph, node_size=5, edge_color='#333333', bgcolor='k')
Ejemplo n.º 9
0
def get_training_testing_data(city_ref, df_insee_urban_features=None):
    """
	Returns the Y and X arrays for training/testing population downscaling estimates.

	Y contains vectors with the correspondent population densities
	X contains vectors with normalized urban features
	X_columns columns referring to X values
	Numpy arrays are stored locally

	Parameters
	----------
	city_ref : string
		city reference name
	df_insee_urban_features : geopandas.GeoDataFrame
		grid-cells with population count data and calculated urban features

	Returns
	----------
	np.array, np.array, np.array
		Y vector, X vector, X column names vector
	"""
    # Population extract exists?
    if (os.path.exists(get_population_training_validating_filename(city_ref))):
        log("Urban population training+validation data/features exist for input city: "
            + city_ref)
        # Read from Numpy.Arrays
        data = np.load(get_population_training_validating_filename(city_ref))
        # Project to UTM coordinates
        return data["Y"], data["X"], data["X_columns"]

    log("Calculating urban training+validation data/features for city: " +
        city_ref)
    start = time.time()

    # Select columns to normalize
    columns_to_normalise = [
        col for col in df_insee_urban_features.columns if "num_" in col
        or "m2_" in col or "dispersion" in col or "accessibility" in col
    ]
    # Normalize selected columns
    df_insee_urban_features.loc[:,
                                columns_to_normalise] = df_insee_urban_features.loc[:, columns_to_normalise].apply(
                                    lambda x: x / x.max(), axis=0)

    # By default, idINSPIRE for created squares (0 population count) is 0: Change for 'CRS' string: Coherent with squares aggregation procedure (string matching)
    df_insee_urban_features.loc[df_insee_urban_features.idINSPIRE == 0,
                                "idINSPIRE"] = "CRS"

    # Aggregate 5x5 squares: Get all possible aggregations (step of 200 meters = length of individual square)
    aggregated_df_insee_urban_features = get_aggregated_squares(
        ox.project_gdf(df_insee_urban_features, to_crs="+init=epsg:3035"),
        step=200.,
        conserve_squares_info=True)

    # X values: Vector <x1,x2, ... , xn> with normalized urban features
    X_values = []
    # Y values: Vector <y1, y2, ... , ym> with normalized population densities. m=25
    Y_values = []

    # For each <Indices> combination, create a X and Y vector
    for idx in aggregated_df_insee_urban_features.indices:
        # Extract the urban features in the given 'indices' order (Fill to 0 for non-existent squares)
        square_info = df_insee_urban_features.reindex(idx).fillna(0)
        # Y input (Ground truth): Population densities
        population_densities = (square_info["pop_count"] /
                                square_info["pop_count"].sum()).values

        if (all(pd.isna(population_densities))
            ):  # If sum of population count is 0, remove (NaN values)
            continue

        # X input: Normalized urban features
        urban_features = square_info[[
            col for col in square_info.columns
            if col not in ['idINSPIRE', 'geometry', 'pop_count']
        ]].values

        # Append X, Y
        X_values.append(urban_features)
        Y_values.append(population_densities)

    # Get the columns order referenced in each X vector
    X_values_columns = df_insee_urban_features[[
        col for col in square_info.columns
        if col not in ['idINSPIRE', 'geometry', 'pop_count']
    ]].columns
    X_values_columns = np.array(X_values_columns)

    # To Numpy Array
    X_values = np.array(X_values)
    Y_values = np.array(Y_values)

    # Save to file
    np.savez(get_population_training_validating_filename(city_ref),
             X=X_values,
             Y=Y_values,
             X_columns=X_values_columns)

    log("Done: urban training+validation data/features. Elapsed time (H:M:S): "
        + time.strftime("%H:%M:%S", time.gmtime(time.time() - start)))

    return Y_values, X_values, X_values_columns
Ejemplo n.º 10
0
def compute_full_urban_features(
        city_ref,
        df_osm_built=None,
        df_osm_pois=None,
        df_insee=None,
        data_source=None,
        landusemix_args={
            'walkable_distance': 600,
            'compute_activity_types_kde': True,
            'weighted_kde': True,
            'pois_weight': 9,
            'log_weighted': True
        },
        dispersion_args={
            "radius_search": 750,
            "use_median": True,
            "K_nearest": 50
        },
        kwargs={"max_dispersion": 15}):
    """
	Computes a set of urban features for each square where population count data exists

	Parameters
	----------
	city_ref : string
		city reference name
	df_osm_built : geopandas.GeoDataFrame
		input buildings
	df_osm_pois : geopandas.GeoDataFrame
		input points of interest
	df_insee : geopandas.GeoDataFrame
		grid-cells with population count where urban features will be calculated
	data_source : str
		define the type of population data for its retrieval in case it was stored
	kwargs : dict
		keyword arguments to guide the process

	Returns
	----------
	geopandas.GeoDataFrame
		geometry with updated urban features
	"""

    # Population extract exists?
    if (os.path.exists(
            get_population_urban_features_filename(city_ref, data_source))):
        log("Urban features from population gridded data exist for input city: "
            + city_ref)
        # Read from GeoJSON (default projection coordinates)
        df_insee_urban_features_4326 = gpd.read_file(
            get_population_urban_features_filename(city_ref, data_source))
        # Project to UTM coordinates
        return ox.project_gdf(df_insee_urban_features_4326)

    # Required arguments
    assert (not df_osm_built is None)
    assert (not df_osm_pois is None)
    assert (not df_insee is None)

    # Get population count data with filled empty squares (null population)
    df_insee_urban_features = get_population_df_filled_empty_squares(df_insee)
    # Set crs
    crs_proj = df_insee.crs
    df_insee_urban_features.crs = crs_proj

    ##################
    ### Urban features
    ##################
    # Compute the urban features for each square
    log("Calculating urban features")
    start = time.time()

    # Conserve building geometries
    df_osm_built['geom_building'] = df_osm_built['geometry']

    # Spatial join: grid-cell i - building j for all intersections
    df_insee_urban_features = gpd.sjoin(df_insee_urban_features,
                                        df_osm_built,
                                        op='intersects',
                                        how='left')

    # When a grid-cell i does not intersect any building: NaN values
    null_idx = df_insee_urban_features.loc[
        df_insee_urban_features['geom_building'].isnull()].index
    # Replace NaN for urban features calculation
    min_polygon = Polygon([(0, 0), (0, np.finfo(float).eps),
                           (np.finfo(float).eps, np.finfo(float).eps)])
    df_insee_urban_features.loc[
        null_idx, 'geom_building'] = df_insee_urban_features.loc[
            null_idx, 'geom_building'].apply(lambda x: min_polygon)
    df_insee_urban_features.loc[
        null_idx, 'landuses_m2'] = len(null_idx) * [{
            'residential': 0,
            'activity': 0
        }]
    df_insee_urban_features.loc[null_idx,
                                'building_levels'] = len(null_idx) * [0]

    ### Pre-calculation of urban features

    # Apply percentage of building presence within square: 1 if fully contained, 0.5 if half the building contained, ...
    df_insee_urban_features['building_ratio'] = df_insee_urban_features.apply(
        lambda x: x.geom_building.intersection(x.geometry
                                               ).area / x.geom_building.area,
        axis=1)

    df_insee_urban_features[
        'm2_total_residential'] = df_insee_urban_features.apply(
            lambda x: x.building_ratio * x.landuses_m2['residential'], axis=1)
    df_insee_urban_features[
        'm2_total_activity'] = df_insee_urban_features.apply(
            lambda x: x.building_ratio * x.landuses_m2['activity'], axis=1)

    df_insee_urban_features['m2_footprint_residential'] = 0
    df_insee_urban_features.loc[
        df_insee_urban_features.classification.isin(['residential']),
        'm2_footprint_residential'] = df_insee_urban_features.loc[
            df_insee_urban_features.classification.isin([
                'residential'
            ])].apply(lambda x: x.building_ratio * x.geom_building.area,
                      axis=1)
    df_insee_urban_features['m2_footprint_activity'] = 0
    df_insee_urban_features.loc[
        df_insee_urban_features.classification.isin(['activity']),
        'm2_footprint_activity'] = df_insee_urban_features.loc[
            df_insee_urban_features.classification.isin(['activity'])].apply(
                lambda x: x.building_ratio * x.geom_building.area, axis=1)
    df_insee_urban_features['m2_footprint_mixed'] = 0
    df_insee_urban_features.loc[
        df_insee_urban_features.classification.isin(['mixed']),
        'm2_footprint_mixed'] = df_insee_urban_features.loc[
            df_insee_urban_features.classification.isin(['mixed'])].apply(
                lambda x: x.building_ratio * x.geom_building.area, axis=1)

    df_insee_urban_features['num_built_activity'] = 0
    df_insee_urban_features.loc[
        df_insee_urban_features.classification.isin(['activity']),
        'num_built_activity'] = df_insee_urban_features.loc[
            df_insee_urban_features.classification.isin(['activity'
                                                         ])].building_ratio
    df_insee_urban_features['num_built_residential'] = 0
    df_insee_urban_features.loc[
        df_insee_urban_features.classification.isin(['residential']),
        'num_built_residential'] = df_insee_urban_features.loc[
            df_insee_urban_features.classification.isin(['residential'
                                                         ])].building_ratio
    df_insee_urban_features['num_built_mixed'] = 0
    df_insee_urban_features.loc[
        df_insee_urban_features.classification.isin(['mixed']),
        'num_built_mixed'] = df_insee_urban_features.loc[
            df_insee_urban_features.classification.isin(['mixed'
                                                         ])].building_ratio

    df_insee_urban_features['num_levels'] = df_insee_urban_features.apply(
        lambda x: x.building_ratio * x.building_levels, axis=1)
    df_insee_urban_features['num_buildings'] = df_insee_urban_features[
        'building_ratio']

    df_insee_urban_features['built_up_m2'] = df_insee_urban_features.apply(
        lambda x: x.geom_building.area * x.building_ratio, axis=1)

    ### Urban features aggregation functions
    urban_features_aggregation = {}
    urban_features_aggregation['idINSPIRE'] = lambda x: x.head(1)
    urban_features_aggregation['pop_count'] = lambda x: x.head(1)
    urban_features_aggregation['geometry'] = lambda x: x.head(1)

    urban_features_aggregation['m2_total_residential'] = 'sum'
    urban_features_aggregation['m2_total_activity'] = 'sum'

    urban_features_aggregation['m2_footprint_residential'] = 'sum'
    urban_features_aggregation['m2_footprint_activity'] = 'sum'
    urban_features_aggregation['m2_footprint_mixed'] = 'sum'

    urban_features_aggregation['num_built_activity'] = 'sum'
    urban_features_aggregation['num_built_residential'] = 'sum'
    urban_features_aggregation['num_built_mixed'] = 'sum'

    urban_features_aggregation['num_levels'] = 'sum'
    urban_features_aggregation['num_buildings'] = 'sum'

    urban_features_aggregation['built_up_m2'] = 'sum'

    # Apply aggregate functions
    df_insee_urban_features = df_insee_urban_features.groupby(
        df_insee_urban_features.index).agg(urban_features_aggregation)

    # Calculate built up relation (relative to the area of the grid-cell geometry)
    df_insee_urban_features[
        'built_up_relation'] = df_insee_urban_features.apply(
            lambda x: x.built_up_m2 / x.geometry.area, axis=1)
    df_insee_urban_features.drop('built_up_m2', axis=1, inplace=True)

    # To geopandas.GeoDataFrame and set crs
    df_insee_urban_features = gpd.GeoDataFrame(df_insee_urban_features)
    df_insee_urban_features.crs = crs_proj

    # POIs
    df_osm_pois_selection = df_osm_pois[df_osm_pois.classification.isin(
        ["activity", "mixed"])]
    gpd_intersection_pois = gpd.sjoin(df_insee_urban_features,
                                      df_osm_pois_selection,
                                      op='intersects',
                                      how='left')
    # Number of activity/mixed POIs
    df_insee_urban_features[
        'num_activity_pois'] = gpd_intersection_pois.groupby(
            gpd_intersection_pois.index).agg({'osm_id': 'count'})

    ##################
    ### Sprawling indices
    ##################
    df_insee_urban_features[
        'geometry_squares'] = df_insee_urban_features.geometry
    df_insee_urban_features[
        'geometry'] = df_insee_urban_features.geometry.centroid
    '''
	compute_grid_accessibility(df_insee_urban_features, graph, df_osm_built, df_osm_pois)
	'''

    # Compute land uses mix + densities estimation
    compute_grid_landusemix(df_insee_urban_features, df_osm_built, df_osm_pois,
                            landusemix_args)
    # Dispersion indices
    compute_grid_dispersion(df_insee_urban_features, df_osm_built,
                            dispersion_args)

    if (kwargs.get("max_dispersion")):  # Set max bounds for dispersion values
        df_insee_urban_features.loc[
            df_insee_urban_features.dispersion > kwargs.get("max_dispersion"),
            "dispersion"] = kwargs.get("max_dispersion")

    # Set back original geometries
    df_insee_urban_features[
        'geometry'] = df_insee_urban_features.geometry_squares
    df_insee_urban_features.drop('geometry_squares', axis=1, inplace=True)

    # Fill NaN sprawl indices with 0
    df_insee_urban_features.fillna(0, inplace=True)

    # Save to GeoJSON file (no projection conserved, then use EPSG 4326)
    ox.project_gdf(df_insee_urban_features, to_latlong=True).to_file(
        get_population_urban_features_filename(city_ref, data_source),
        driver='GeoJSON')

    elapsed_time = int(time.time() - start)
    log("Done: Urban features calculation. Elapsed time (H:M:S): " +
        '{:02d}:{:02d}:{:02d}'.format(elapsed_time // 3600, (
            elapsed_time % 3600 // 60), elapsed_time % 60))

    return df_insee_urban_features
Ejemplo n.º 11
0
def process_spatial_indices(
    city_ref=None,
    region_args={
        "polygon": None,
        "place": None,
        "which_result": 1,
        "point": None,
        "address": None,
        "distance": None,
        "north": None,
        "south": None,
        "east": None,
        "west": None,
    },
    grid_step=100,
    process_osm_args={
        "retrieve_graph": True,
        "default_height": 3,
        "meters_per_level": 3,
        "associate_landuses_m2": True,
        "minimum_m2_building_area": 9,
        "date": None,
    },
    dispersion_args={
        "radius_search": 750,
        "use_median": False,
        "K_nearest": 50,
    },
    landusemix_args={
        "walkable_distance": 600,
        "compute_activity_types_kde": True,
        "weighted_kde": True,
        "pois_weight": 9,
        "log_weighted": True,
    },
    accessibility_args={
        "fixed_distance": True,
        "fixed_activities": False,
        "max_edge_length": 200,
        "max_node_distance": 250,
        "fixed_distance_max_travel_distance": 2000,
        "fixed_distance_max_num_activities": 250,
        "fixed_activities_min_number": 20,
    },
    indices_computation={
        "dispersion": True,
        "landusemix": True,
        "accessibility": True,
    },
):
    """
        Process sprawling indices for an input region of interest
        1) OSM data is retrieved and processed.
                If the city name has already been processed, locally stored
    data will be loaded
        2) A regular grid is created where indices will be calculated
        3) Sprawling indices are calculated and returned

        Parameters
        ----------
        city_ref : str
                Name of input city / region
        grid_step : int
                step to sample the regular grid in meters
        region_args : dict
                contains the information to retrieve the region of interest as
        the following:
                        polygon : shapely Polygon or MultiPolygon
                                geographic shape to fetch the land use
        footprints within
                        place : string or dict
                                query string or structured query dict to
        geocode/download
                        which_result : int
                                result number to retrieve from geocode/download
        when using query string
                        point : tuple
                                the (lat, lon) central point around which to
        construct the region
                        address : string
                                the address to geocode and use as the central
        point around which to construct the region
                        distance : int
                                retain only those nodes within this many meters
        of the center of the region
                        north : float
                                northern latitude of bounding box
                        south : float
                                southern latitude of bounding box
                        east : float
                                eastern longitude of bounding box
                        west : float
                                western longitude of bounding box
        process_osm_args : dict
                additional arguments to drive the OSM data extraction process:
                        retrieve_graph : boolean
                                that determines if the street network for input
        city has to be retrieved and stored
                        default_height : float
                                height of buildings under missing data
                        meters_per_level : float
                                buildings number of levels assumed under
        missing data
                        associate_landuses_m2 : boolean
                                compute the total square meter for each land use
                        minimum_m2_building_area : float
                                minimum area to be considered a building
        (otherwise filtered)
                        date : datetime.datetime
                                query the database at a certain timestamp
        dispersion_args : dict
                arguments to drive the dispersion indices calculation
                        radius_search: int
                                circle radius to consider the dispersion
        calculation at a local point
                        use_median : bool
                                denotes whether the median or mean should be
        used to calculate the indices
                        K_nearest : int
                                number of neighboring buildings to consider in
        evaluation
        landusemix_args : dict
                arguments to drive the land use mix indices calculation
                        walkable_distance : int
                                the bandwidth assumption for Kernel Density
        Estimation calculations (meters)
                        compute_activity_types_kde : bool
                                determines if the densities for each activity
        type should be computed
                        weighted_kde : bool
                                use Weighted Kernel Density Estimation or
        classic version
                        pois_weight : int
                                Points of interest weight equivalence with
        buildings (squared meter)
                        log_weighted : bool
                                apply natural logarithmic function to surface
        weights
        accessibility_args : dict
                arguments to drive the accessibility indices calculation
                        fixed_distance : bool
                                denotes the cumulative opportunities access to
        activity land uses given a fixed maximum distance to travel
                        fixed_activities : bool
                                represents the distance needed to travel in
        order to reach a certain number of activity land uses
                        max_edge_length: int
                                maximum length, in meters, to tolerate an edge
        in a graph (otherwise, divide edge)
                        max_node_distance: int
                                maximum distance tolerated from input point to
        closest graph node in order to calculate accessibility values
                        fixed_distance_max_travel_distance: int
                                (fixed distance) maximum distance tolerated
        (cut&branch) when searching for the activities
                        fixed_distance_max_num_activities: int
                                (fixed distance) cut iteration if the number of
        activities exceeds a threshold
                        fixed_activities_min_number: int
                                (fixed activities) minimum number of activities
        required
        indices_computation : dict
                determines what sprawling indices should be computed

        Returns
        ----------
        gpd.GeoDataFrame
                returns the regular grid with the indicated sprawling indices
        """
    try:
        # Process OSM data
        df_osm_built, df_osm_building_parts, df_osm_pois = get_processed_osm_data(
            city_ref=city_ref,
            region_args=region_args,
            kwargs=process_osm_args)
        # Get route graph
        G = get_route_graph(city_ref)

        if not (indices_computation.get("accessibility")
                or indices_computation.get("landusemix")
                or indices_computation.get("dispersion")):
            log("Not computing any spatial indices")
            return None

            # Get indices grid
        df_indices = get_indices_grid(df_osm_built, df_osm_building_parts,
                                      df_osm_pois, grid_step)

        # Compute sprawling indices
        if indices_computation.get("accessibility"):
            compute_grid_accessibility(df_indices, G, df_osm_built,
                                       df_osm_pois, accessibility_args)
        if indices_computation.get("landusemix"):
            compute_grid_landusemix(df_indices, df_osm_built, df_osm_pois,
                                    landusemix_args)
        if indices_computation.get("dispersion"):
            compute_grid_dispersion(df_indices, df_osm_built, dispersion_args)

        return df_indices

    except Exception as e:
        log("Could not compute the spatial indices. An exception occurred: " +
            str(e))
        return None
Ejemplo n.º 12
0
def get_extract_population_data(
    city_ref,
    data_source,
    pop_shapefile=None,
    pop_data_file=None,
    to_crs={"init": "epsg:4326"},
    polygons_gdf=None,
):
    """Get data population extract of desired data source for input city,
    calculating the convex hull of input buildings geodataframe

    The population data frame is projected to the desired coordinate reference
    system

    Stores the extracted shapefile

    Returns the stored population data for input 'data source' and 'city
    reference' if it was previously stored

        Parameters
        ----------
        city_ref : string
                name of input city
        data_source : string
                desired population data source
        pop_shapefile : string
                path of population count shapefile
        pop_data_file : string
                path of population data additional file (required for INSEE format)
        to_crs : dict
                desired coordinate reference system
        polygons_gdf : geopandas.GeoDataFrame
                polygons (e.g. buildings) for input region of interest which
        will determine the shape to extract

        Returns
        ----------
        geopandas.GeoDataFrame
                returns the extracted population data
        """
    # Input data source type given?
    assert data_source in DATA_SOURCES

    # Population extract exists?
    if os.path.exists(get_population_extract_filename(city_ref, data_source)):
        log("Population extract exists for input city: " + city_ref)
        return gpd.read_file(
            get_population_extract_filename(city_ref, data_source))

        # Input shape given?
    assert not (np.all(polygons_gdf is None))
    # Input population shapefile given?
    assert pop_shapefile is not None
    # All input files given?
    assert not ((data_source == "insee") and (pop_data_file is None))

    # Get buildings convex hull
    polygon = GeometryCollection(
        polygons_gdf.geometry.values.tolist()).convex_hull
    # Convert to geo-dataframe with defined CRS
    poly_gdf = gpd.GeoDataFrame([polygon],
                                columns=["geometry"],
                                crs=polygons_gdf.crs)

    # Compute extract
    df_pop = get_population_df(pop_shapefile, pop_data_file, data_source,
                               to_crs, poly_gdf)

    # Save to shapefile
    df_pop.to_file(
        get_population_extract_filename(city_ref, data_source),
        driver="ESRI Shapefile",
    )
    return df_pop
Ejemplo n.º 13
0
def compute_grid_accessibility(
    df_indices,
    G,
    df_osm_built,
    df_osm_pois,
    kw_args={
        "fixed_distance": True,
        "fixed_activities": False,
        "max_edge_length": 200,
        "max_node_distance": 250,
        "fixed_distance_max_travel_distance": 2000,
        "fixed_distance_max_num_activities": 250,
        "fixed_activities_min_number": 20,
        "fixed_activities_max_travel_distance": 5000
    },
):
    """
        Calculate accessibility values at point_ref

        Parameters
        ----------
        df_indices : geopandas.GeoDataFrame
                data frame containing the (x,y) reference points to calculate
        indices
        G : networkx multidigraph
                input graph to calculate accessibility
        df_osm_built : geopandas.GeoDataFrame
                data frame containing the building's geometries and
        corresponding land uses
        df_osm_pois : geopandas.GeoDataFrame
                data frame containing the points' of interest geometries
        kw_args: dict
                additional keyword arguments for the indices calculation
        fixed_distance : bool
                denotes the cumulative opportunities access to activity land
        uses given a fixed maximum distance to travel
        fixed_activities : bool
                represents the distance needed to travel in order to reach a
        certain number of activity land uses
        max_edge_length: int
                maximum length, in meters, to tolerate an edge in a graph
        (otherwise, divide edge)
        max_node_distance: int
                maximum distance tolerated from input point to closest graph
        node in order to calculate accessibility values
        fixed_distance_max_travel_distance: int
                (fixed distance) maximum distance tolerated (cut&branch) when
        searching for the activities
        fixed_distance_max_num_activities: int
                (fixed distance) cut iteration if the number of activities
        exceeds a threshold
        fixed_activities_min_number: int
                (fixed activities) minimum number of activities required
        fixed_activities_max_travel_distance : int
                (fixed activities) maximum distance tolerated (cut&branch) when
        searching for the activities

        Returns
        ----------
        int
                number of activities found within a radius distance using the
        street network
        """
    log("Calculating accessibility indices")
    start = time.time()

    # Assert that only one option is set
    assert kw_args["fixed_distance"] ^ kw_args["fixed_activities"]

    # Arguments to pandas.Series
    kw_arguments = pd.Series(kw_args)

    ##############
    # Prepare input data for indices calculation in parallel call
    ##############
    # Temporary folder to pickle data
    if not os.path.exists("temp"):
        os.makedirs("temp")
    # Number of CPU cores on your system
    num_cores = cpu_count()
    # Prepare input data: As many chunks of data as cores
    prepare_data(
        G, df_osm_built, df_osm_pois, df_indices, num_cores, kw_arguments
    )

    # This command could have multiple commands separated by a new line \n
    parallel_code = os.path.realpath(__file__).replace(".py", "_parallel.py")
    command_call = (
        "python "
        + parallel_code
        + " temp/graph.gpickle temp/points_NUM_CHUNK.pkl temp/arguments.pkl"
    )

    ##############
    # Verify amount of memory used per subprocess
    ##############
    p = subprocess.Popen(
        command_call.replace("NUM_CHUNK", str(0)) + " memory_test",
        stdout=subprocess.PIPE,
        shell=True,
    )
    output, err = p.communicate()
    p.wait()

    # Max number of subprocess allocations given its memory consumption
    numbers = [
        numb
        for numb in str(output)
        if numb in ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
    ]
    max_processes = int("".join(numbers))
    log(
        "Maximum number of processes to allocate (considering memory availability): "
        + str(max_processes)
    )
    log("Number of available cores: " + str(num_cores))

    ##############
    # Set chunks to run in parallel: If more core than allowed processes, divide chunks to run at most X processes
    ##############
    if (
        num_cores > max_processes
    ):  # Run parallel-chunks at a splitted pace, to avoid memory swap
        chunks_run = np.array_split(list(range(num_cores)), max_processes)
    else:  # Run all chunks in parallel
        chunks_run = [list(range(num_cores))]

        # Parallel implementation
    for chunk in chunks_run:  # Run full chunk
        Ps_i = []
        for i in chunk:  # Run each index
            p = subprocess.Popen(
                command_call.replace("NUM_CHUNK", str(i)),
                stdout=subprocess.PIPE,
                shell=True,
            )
            Ps_i.append(p)

            # Get the output
        Output_errs = [p.communicate() for p in Ps_i]

        # This makes the wait possible
        Ps_status = [p.wait() for p in Ps_i]

        # Output for chunk
        for output, err in Output_errs:
            log(str(output))

            # Associate data by getting the chunk results concatenated
    index_column = "accessibility"
    df_indices[index_column] = pd.concat(
        [
            pd.read_pickle(
                "temp/indices_NUM_CHUNK.pkl".replace("NUM_CHUNK", str(i))
            )
            for i in range(num_cores)
        ],
        ignore_index=True,
    ).accessibility

    # Delete temporary folder
    shutil.rmtree("temp")

    log(
        "Done: Accessibility indices. Elapsed time (H:M:S): "
        + time.strftime("%H:%M:%S", time.gmtime(time.time() - start))
    )
Ejemplo n.º 14
0
mpl.use('Agg')  #use agg backend so you don't need a display on travis-ci

import os, shutil
if os.path.exists('.temp'):
    shutil.rmtree('.temp')

import osmnx as ox, logging as lg
ox.config(log_console=True,
          log_file=True,
          use_cache=True,
          data_folder='.temp/data',
          logs_folder='.temp/logs',
          imgs_folder='.temp/imgs',
          cache_folder='.temp/cache')

ox.log('test debug', level=lg.DEBUG)
ox.log('test info', level=lg.INFO)
ox.log('test warning', level=lg.WARNING)
ox.log('test error', level=lg.ERROR)


def test_imports():

    import json, math, sys, os, io, ast, unicodedata, hashlib, re, random, time, warnings, datetime as dt, logging as lg
    from collections import OrderedDict, Counter
    from itertools import groupby, chain
    from dateutil import parser as date_parser
    import requests, numpy as np, pandas as pd, geopandas as gpd, networkx as nx, matplotlib.pyplot as plt, matplotlib.cm as cm
    from matplotlib.collections import LineCollection
    from shapely.geometry import Point, LineString, Polygon, MultiPolygon
    from shapely import wkt
Ejemplo n.º 15
0
def compute_full_urban_features(city_ref,
                                df_osm_built=None,
                                df_osm_pois=None,
                                graph=None,
                                df_insee=None,
                                data_source=None,
                                kwargs={"max_dispersion": 15}):
    """
	Computes a set of urban features for each square where population count data exists

	Parameters
	----------
	city_ref : string
		city reference name
	df_osm_built : geopandas.GeoDataFrame
		input buildings
	df_osm_pois : geopandas.GeoDataFrame
		input points of interest
	graph : 
	x_square : geopandas.GeoSeries
		geometry square where urban features will be calculated

	Returns
	----------
	geopandas.GeoSeries
		geometry with updated urban features
	"""

    # Population extract exists?
    if (os.path.exists(
            get_population_urban_features_filename(city_ref, data_source))):
        log("Urban features from population gridded data exist for input city: "
            + city_ref)
        # Read from GeoJSON (default projection coordinates)
        df_insee_urban_features_4326 = gpd.read_file(
            get_population_urban_features_filename(city_ref, data_source))
        # Project to UTM coordinates
        return ox.project_gdf(df_insee_urban_features_4326)

    # Required arguments
    assert (not df_osm_built is None)
    assert (not df_osm_pois is None)
    assert (not graph is None)
    assert (not df_insee is None)

    # Copy data frame in order to modify it
    #df_insee_urban_features = df_insee.copy()
    # Data frame + creation of empty squares with 0 count population
    df_insee_urban_features = get_population_df_filled_empty_squares(df_insee)
    ##################
    ### Sprawling indices
    ##################
    df_insee_urban_features[
        'geometry_squares'] = df_insee_urban_features.geometry
    df_insee_urban_features[
        'geometry'] = df_insee_urban_features.geometry.centroid
    '''
	compute_grid_accessibility(df_insee_urban_features, graph, df_osm_built, df_osm_pois)
	'''

    # Compute land uses mix + densities estimation
    compute_grid_landusemix(df_insee_urban_features, df_osm_built, df_osm_pois)
    # Dispersion indices
    compute_grid_dispersion(df_insee_urban_features, df_osm_built)

    if (kwargs.get("max_dispersion")):  # Set max bounds for dispersion values
        df_insee_urban_features.loc[
            df_insee_urban_features.dispersion > kwargs.get("max_dispersion"),
            "dispersion"] = kwargs.get("max_dispersion")

    # Set back original geometries
    df_insee_urban_features[
        'geometry'] = df_insee_urban_features.geometry_squares
    df_insee_urban_features.drop('geometry_squares', axis=1, inplace=True)

    ##################
    ### Additional urban features
    ##################
    # Compute the urban features for each square
    log("Calculating urban features")
    start = time.time()

    df_insee_urban_features = df_insee_urban_features.apply(
        lambda x: compute_urban_features(df_osm_built, df_osm_pois, x), axis=1)
    # FillNA, set CRS
    df_insee_urban_features.fillna(0, inplace=True)
    df_insee_urban_features.crs = df_insee.crs

    # Save to GeoJSON file (no projection conserved, then use EPSG 4326)
    ox.project_gdf(df_insee_urban_features, to_latlong=True).to_file(
        get_population_urban_features_filename(city_ref, data_source),
        driver='GeoJSON')

    log("Done: Urban features calculation. Elapsed time (H:M:S): " +
        time.strftime("%H:%M:%S", time.gmtime(time.time() - start)))

    return df_insee_urban_features
for label, row in ucs_to_get.iterrows():

    graph_name = '{}-{}-{}-{}'.format(row['CTR_MN_NM'], row['CTR_MN_ISO'],
                                      row['UC_NM_MN'], row['ID_HDC_G0'])
    print(ox.ts(), graph_name)

    try:
        G = ox.graph_from_polygon(polygon=row['geometry'].buffer(0),
                                  network_type=network_type,
                                  retain_all=retain_all,
                                  simplify=simplify,
                                  truncate_by_edge=truncate_by_edge)
    except ox._errors.CacheOnlyModeInterrupt:
        # this happens every time because ox.settings.cache_only_mode = True
        pass

    except Exception as e:
        ox.log('"{}" failed: {}'.format(graph_name, e), level=lg.ERROR)
        print(e, graph_name)

# In[ ]:

end_time = time.time() - start_time
print(
    ox.ts(),
    'Finished caching raw data for {:,.0f} graphs in {:,.1f} seconds'.format(
        len(ucs_to_get), end_time))

# In[ ]:
Ejemplo n.º 17
0
def get_processed_osm_data(
    city_ref=None,
    region_args={
        "polygon": None,
        "place": None,
        "which_result": 1,
        "point": None,
        "address": None,
        "distance": None,
        "north": None,
        "south": None,
        "east": None,
        "west": None
    },
    kwargs={
        "retrieve_graph": True,
        "default_height": 3,
        "meters_per_level": 3,
        "associate_landuses_m2": True,
        "mixed_building_first_floor_activity": True,
        "minimum_m2_building_area": 9,
        "date": None
    }):
    """
	Retrieves buildings, building parts, and Points of Interest associated with a residential/activity land use from OpenStreetMap data for input city
	If a name for input city is given, the data will be loaded (if it was previously stored)
	If no stored files exist, it will query and process the data and store it under the city name
	Queries data for input region (polygon, place, point/address and distance around, or bounding box coordinates)
	Additional arguments will drive the overall process

	Parameters
	----------
	city_ref : str
		Name of input city / region
	region_args : dict
		contains the information to retrieve the region of interest as the following:
			polygon : shapely Polygon or MultiPolygon
				geographic shape to fetch the landuse footprints within
			place : string or dict
				query string or structured query dict to geocode/download
			which_result : int
				result number to retrieve from geocode/download when using query string 
			point : tuple
				the (lat, lon) central point around which to construct the graph
			address : string
				the address to geocode and use as the central point around which to construct the graph
			distance : int
				retain only those nodes within this many meters of the center of the graph
			north : float
				northern latitude of bounding box
			south : float
				southern latitude of bounding box
			east : float
				eastern longitude of bounding box
			west : float
				western longitude of bounding box
	kwargs : dict
		additional arguments to drive the process:
			retrieve_graph : boolean
				that determines if the street network for input city has to be retrieved and stored
			default_height : float
				height of buildings under missing data
			meters_per_level : float
				buildings number of levels assumed under missing data
			associate_landuses_m2 : boolean
				compute the total square meter for each land use
			mixed_building_first_floor_activity : Boolean
				if True: Associates building's first floor to activity uses and the rest to residential uses
				if False: Associates half of the building's area to each land use (Activity and Residential)
			minimum_m2_building_area : float
				minimum area to be considered a building (otherwise filtered)
			date : datetime.datetime
				query the database at a certain time-stamp

	Returns
	----------
	[ gpd.GeoDataFrame, gpd.GeoDataFrame, gpd.GeoDataFrame ]
		returns the output geo dataframe containing all buildings, building parts, and points associated to a residential or activity land usage
	
	"""
    log("OSM data requested for city: " + str(city_ref))

    start_time = time.time()

    if (city_ref):
        geo_poly_file, geo_poly_parts_file, geo_point_file = get_dataframes_filenames(
            city_ref)

        ##########################
        ### Stored file ?
        ##########################
        if (os.path.isfile(geo_poly_file)):  # File exists
            log("Found stored files for city " + city_ref)
            # Load local GeoDataFrames
            return load_geodataframe(geo_poly_file), load_geodataframe(
                geo_poly_parts_file), load_geodataframe(geo_point_file)

    # Get keyword arguments for input region of interest
    polygon, place, which_result, point, address, distance, north, south, east, west = region_args.get(
        "polygon"), region_args.get("place"), region_args.get(
            "which_result"), region_args.get("point"), region_args.get(
                "address"), region_args.get("distance"), region_args.get(
                    "north"), region_args.get("south"), region_args.get(
                        "east"), region_args.get("west")

    ### Valid input?
    if not (any([
            not (polygon is None), place, point, address, north, south, east,
            west
    ])):
        log("Error: Must provide at least one type of input")
        return None, None, None

    if (kwargs.get("date")):  # Non-null date
        date_ = kwargs.get("date").strftime("%Y-%m-%dT%H:%M:%SZ")
        log("Requesting OSM database at time-stamp: " + date_)
        # e.g.: [date:"2004-05-06T00:00:00Z"]
        date_query = '[date:"' + date_ + '"]'
    else:
        date_query = ""

    ##########################
    ### Overpass query: Buildings
    ##########################
    # Query and update bounding box / polygon
    df_osm_built, polygon, north, south, east, west = create_buildings_gdf_from_input(
        date=date_query,
        polygon=polygon,
        place=place,
        which_result=which_result,
        point=point,
        address=address,
        distance=distance,
        north=north,
        south=south,
        east=east,
        west=west)
    df_osm_built["osm_id"] = df_osm_built.index
    df_osm_built.reset_index(drop=True, inplace=True)
    df_osm_built.gdf_name = str(
        city_ref) + '_buildings' if not city_ref is None else 'buildings'
    ##########################
    ### Overpass query: Land use polygons. Aid to perform buildings land use inference
    ##########################
    df_osm_lu = create_landuse_gdf(date=date_query,
                                   polygon=polygon,
                                   north=north,
                                   south=south,
                                   east=east,
                                   west=west)
    df_osm_lu["osm_id"] = df_osm_lu.index
    # Drop useless columns
    columns_of_interest = ["osm_id", "geometry", "landuse"]
    df_osm_lu.drop([
        col
        for col in list(df_osm_lu.columns) if not col in columns_of_interest
    ],
                   axis=1,
                   inplace=True)
    df_osm_lu.reset_index(drop=True, inplace=True)
    df_osm_lu.gdf_name = str(
        city_ref) + '_landuse' if not city_ref is None else 'landuse'
    ##########################
    ### Overpass query: POIs
    ##########################
    df_osm_pois = create_pois_gdf(date=date_query,
                                  polygon=polygon,
                                  north=north,
                                  south=south,
                                  east=east,
                                  west=west)
    df_osm_pois["osm_id"] = df_osm_pois.index
    df_osm_pois.reset_index(drop=True, inplace=True)
    df_osm_pois.gdf_name = str(
        city_ref) + '_points' if not city_ref is None else 'points'
    ##########
    ### Overpass query: Building parts. Allow to calculate the real amount of M^2 for each building
    ##########
    df_osm_building_parts = create_building_parts_gdf(date=date_query,
                                                      polygon=polygon,
                                                      north=north,
                                                      south=south,
                                                      east=east,
                                                      west=west)
    # Filter: 1) rows not needed (roof, etc) and 2) building that already exists in `buildings` extract
    if ("building" in df_osm_building_parts.columns):
        df_osm_building_parts = df_osm_building_parts[
            (~df_osm_building_parts["building:part"].isin(
                building_parts_to_filter))
            & (~df_osm_building_parts["building:part"].isnull()) &
            (df_osm_building_parts["building"].isnull())]
    else:
        df_osm_building_parts = df_osm_building_parts[
            (~df_osm_building_parts["building:part"].isin(
                building_parts_to_filter))
            & (~df_osm_building_parts["building:part"].isnull())]
    df_osm_building_parts["osm_id"] = df_osm_building_parts.index
    df_osm_building_parts.reset_index(drop=True, inplace=True)
    df_osm_building_parts.gdf_name = str(
        city_ref
    ) + '_building_parts' if not city_ref is None else 'building_parts'

    log("Done: OSM data requests. Elapsed time (H:M:S): " +
        time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time)))

    ####################################################
    ### Sanity check of height tags
    ####################################################
    start_time = time.time()

    sanity_check_height_tags(df_osm_built)
    sanity_check_height_tags(df_osm_building_parts)

    def remove_nan_dict(x):  # Remove entries with NaN values
        return {k: v for k, v in x.items() if pd.notnull(v)}

    df_osm_built['height_tags'] = df_osm_built[[
        c for c in height_tags if c in df_osm_built.columns
    ]].apply(lambda x: remove_nan_dict(x.to_dict()), axis=1)
    df_osm_building_parts['height_tags'] = df_osm_building_parts[[
        c for c in height_tags if c in df_osm_building_parts.columns
    ]].apply(lambda x: remove_nan_dict(x.to_dict()), axis=1)

    ###########
    ### Remove columns which do not provide valuable information
    ###########
    columns_of_interest = columns_osm_tag + [
        "osm_id", "geometry", "height_tags"
    ]
    df_osm_built.drop([
        col
        for col in list(df_osm_built.columns) if not col in columns_of_interest
    ],
                      axis=1,
                      inplace=True)
    df_osm_building_parts.drop([
        col for col in list(df_osm_building_parts.columns)
        if not col in columns_of_interest
    ],
                               axis=1,
                               inplace=True)

    columns_of_interest = columns_osm_tag + ["osm_id", "geometry"]
    df_osm_pois.drop([
        col
        for col in list(df_osm_pois.columns) if not col in columns_of_interest
    ],
                     axis=1,
                     inplace=True)

    log('Done: Height tags sanity check and unnecessary columns have been dropped. Elapsed time (H:M:S): '
        + time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time)))

    ###########
    ### Classification
    ###########
    start_time = time.time()

    df_osm_built['classification'], df_osm_built['key_value'] = list(
        zip(*df_osm_built.apply(classify_tag, axis=1)))
    df_osm_pois['classification'], df_osm_pois['key_value'] = list(
        zip(*df_osm_pois.apply(classify_tag, axis=1)))
    df_osm_building_parts['classification'], df_osm_building_parts[
        'key_value'] = list(
            zip(*df_osm_building_parts.apply(classify_tag, axis=1)))

    # Remove unnecessary buildings
    df_osm_built.drop(df_osm_built[df_osm_built.classification.isnull()].index,
                      inplace=True)
    df_osm_built.reset_index(inplace=True, drop=True)
    # Remove unnecessary POIs
    df_osm_pois.drop(
        df_osm_pois[df_osm_pois.classification.isin(["infer", "other"])
                    | df_osm_pois.classification.isnull()].index,
        inplace=True)
    df_osm_pois.reset_index(inplace=True, drop=True)
    # Building parts will acquire its containing building land use if it is not available
    df_osm_building_parts.loc[
        df_osm_building_parts.classification.isin(["infer", "other"]),
        "classification"] = None

    log('Done: OSM tags classification. Elapsed time (H:M:S): ' +
        time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time)))

    ###########
    ### Remove already used tags
    ###########
    start_time = time.time()

    df_osm_built.drop(
        [c for c in columns_osm_tag if c in df_osm_built.columns],
        axis=1,
        inplace=True)
    df_osm_pois.drop([c for c in columns_osm_tag if c in df_osm_pois.columns],
                     axis=1,
                     inplace=True)
    df_osm_building_parts.drop(
        [c for c in columns_osm_tag if c in df_osm_building_parts.columns],
        axis=1,
        inplace=True)

    ###########
    ### Project, drop small buildings and reset indices
    ###########
    ### Project to UTM coordinates within the same zone
    df_osm_built = ox.project_gdf(df_osm_built)
    df_osm_lu = ox.project_gdf(df_osm_lu, to_crs=df_osm_built.crs)
    df_osm_pois = ox.project_gdf(df_osm_pois, to_crs=df_osm_built.crs)
    df_osm_building_parts = ox.project_gdf(df_osm_building_parts,
                                           to_crs=df_osm_built.crs)

    # Drop buildings with an area lower than a threshold
    df_osm_built.drop(df_osm_built[
        df_osm_built.geometry.area < kwargs["minimum_m2_building_area"]].index,
                      inplace=True)

    log('Done: Geometries re-projection. Elapsed time (H:M:S): ' +
        time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time)))

    ####################################################
    ### Infer buildings land use (under uncertainty)
    ####################################################
    start_time = time.time()

    compute_landuse_inference(df_osm_built, df_osm_lu)
    # Free space
    del df_osm_lu

    assert (len(df_osm_built[df_osm_built.key_value == {
        "inferred": "other"
    }]) == 0)
    assert (len(df_osm_built[df_osm_built.classification.isnull()]) == 0)
    assert (len(df_osm_pois[df_osm_pois.classification.isnull()]) == 0)

    log('Done: Land use deduction. Elapsed time (H:M:S): ' +
        time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time)))

    ####################################################
    ### Associate for each building, its containing building parts and Points of interest
    ####################################################
    start_time = time.time()

    associate_structures(df_osm_built,
                         df_osm_building_parts,
                         operation='contains',
                         column='containing_parts')
    associate_structures(df_osm_built,
                         df_osm_pois,
                         operation='intersects',
                         column='containing_poi')

    # Classify activity types
    df_osm_built['activity_category'] = df_osm_built.apply(
        lambda x: classify_activity_category(x.key_value), axis=1)
    df_osm_pois['activity_category'] = df_osm_pois.apply(
        lambda x: classify_activity_category(x.key_value), axis=1)
    df_osm_building_parts['activity_category'] = df_osm_building_parts.apply(
        lambda x: classify_activity_category(x.key_value), axis=1)

    log('Done: Building parts association and activity categorization. Elapsed time (H:M:S): '
        + time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time)))

    ####################################################
    ### Associate effective number of levels, and measure the surface dedicated to each land use per building
    ####################################################
    if (kwargs["associate_landuses_m2"]):
        start_time = time.time()

        default_height = kwargs["default_height"]
        meters_per_level = kwargs["meters_per_level"]
        mixed_building_first_floor_activity = kwargs[
            "mixed_building_first_floor_activity"]
        compute_landuses_m2(df_osm_built,
                            df_osm_building_parts,
                            df_osm_pois,
                            default_height=default_height,
                            meters_per_level=meters_per_level,
                            mixed_building_first_floor_activity=
                            mixed_building_first_floor_activity)

        # Set the composed classification given, for each building, its containing Points of Interest and building parts classification
        df_osm_built.loc[df_osm_built.apply(lambda x: x.landuses_m2[
            "activity"] > 0 and x.landuses_m2["residential"] > 0,
                                            axis=1),
                         "classification"] = "mixed"

        log('Done: Land uses surface association. Elapsed time (H:M:S): ' +
            time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time)))

    df_osm_built.loc[
        df_osm_built.activity_category.apply(lambda x: len(x) == 0),
        "activity_category"] = np.nan
    df_osm_pois.loc[df_osm_pois.activity_category.apply(lambda x: len(x) == 0),
                    "activity_category"] = np.nan
    df_osm_building_parts.loc[
        df_osm_building_parts.activity_category.apply(lambda x: len(x) == 0),
        "activity_category"] = np.nan

    ##########################
    ### Overpass query: Street network graph
    ##########################
    if (kwargs["retrieve_graph"]):  # Save graph for input city shape
        start_time = time.time()

        get_route_graph(city_ref,
                        date=date_query,
                        polygon=polygon,
                        north=north,
                        south=south,
                        east=east,
                        west=west,
                        force_crs=df_osm_built.crs)

        log('Done: Street network graph retrieval. Elapsed time (H:M:S): ' +
            time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time)))

    ##########################
    ### Store file ?
    ##########################
    if (city_ref):  # File exists
        # Save GeoDataFrames
        store_geodataframe(df_osm_built, geo_poly_file)
        store_geodataframe(df_osm_building_parts, geo_poly_parts_file)
        store_geodataframe(df_osm_pois, geo_point_file)
        log("Stored OSM data files for city: " + city_ref)

    return df_osm_built, df_osm_building_parts, df_osm_pois
Ejemplo n.º 18
0
def prepare_testing_data(city_ref, pop_features=None):
    """Return a X array for population downscaling inference, that contain
    normalized urban features

        X contains vectors with normalized urban features
        X_columns columns referring to X values
        Numpy arrays are stored locally

        Parameters
        ----------
        city_ref : string
                city reference name
        pop_features : geopandas.GeoDataFrame
                grid-cells with population count data and calculated urban features

        Returns
        ----------
        np.array, np.array, np.array
                Y vector, X vector, X column names vector
        """
    log("Calculating urban testing data/features for city: " + city_ref)
    start = time.time()

    # Select columns to normalize
    columns_to_normalise = [
        col for col in pop_features.columns if "num_" in col or "m2_" in col
        or "dispersion" in col or "accessibility" in col
    ]
    # Normalize selected columns
    pop_features.loc[:,
                     columns_to_normalise] = pop_features.loc[:,
                                                              columns_to_normalise].apply(
                                                                  lambda x: x /
                                                                  x.max(),
                                                                  axis=0)

    # X values: Vector <x1,x2, ... , xn> with normalized urban features
    X_values = []
    geom_values = []

    for idx in pop_features.idx.unique():
        square_info = pop_features[pop_features["idx"] == idx]
        urban_features = square_info[[
            col for col in square_info.columns
            if col not in ["geometry", "pop_count", "idx"]
        ]].values
        X_values.append(urban_features)
        geom = square_info["geometry"]
        geom_values.append(geom)

    # Get the columns order referenced in each X vector
    X_values_columns = pop_features[[
        col for col in square_info.columns
        if col not in ["geometry", "pop_count", "idx"]
    ]].columns
    X_values_columns = np.array(X_values_columns)
    X_values = np.array(X_values)
    geom_values = np.array(geom_values)

    log("Done: urban training+validation data/features. Elapsed time (H:M:S): "
        + time.strftime("%H:%M:%S", time.gmtime(time.time() - start)))

    return X_values, X_values_columns, geom_values
Ejemplo n.º 19
0
def calculate_graph_indicators(graphml_folder, country_folder, filename):

    # get filepath and country/city identifiers
    filepath = os.path.join(graphml_folder, country_folder, filename)
    country, country_iso = country_folder.split('-')
    core_city, uc_id = filename.replace('.graphml', '').split('-')
    uc_id = int(uc_id)

    start_time = time.time()
    print(ox.ts(), 'processing', filepath)
    G = ox.load_graphml(filepath=filepath)

    # clustering and pagerank: needs directed representation
    cc_avg_dir, cc_avg_undir, cc_wt_avg_dir, cc_wt_avg_undir, pagerank_max = get_clustering(
        G)

    # get an undirected representation of this network for everything else
    Gu = ox.get_undirected(G)
    G.clear()
    G = None

    # street lengths
    lengths = pd.Series(nx.get_edge_attributes(Gu, 'length'))
    length_total = lengths.sum()
    length_median = lengths.median()
    length_mean = lengths.mean()

    # nodes, edges, node degree, self loops
    n = len(Gu.nodes)
    m = len(Gu.edges)
    k_avg = 2 * m / n
    self_loop_proportion = sum(u == v for u, v, k in Gu.edges) / m

    # proportion of 4-way intersections, 3-ways, and dead-ends
    streets_per_node = nx.get_node_attributes(Gu, 'street_count')
    prop_4way = list(streets_per_node.values()).count(4) / n
    prop_3way = list(streets_per_node.values()).count(3) / n
    prop_deadend = list(streets_per_node.values()).count(1) / n

    # average circuity and straightness
    circuity = calculate_circuity(Gu, length_total)
    straightness = 1 / circuity

    # elevation and grade
    grade_mean, grade_median, elev_mean, elev_median, elev_std, elev_range, elev_iqr = elevation_grades(
        Gu)

    # bearing/orientation entropy/order
    orientation_entropy = calculate_orientation_entropy(Gu)
    orientation_order = calculate_orientation_order(orientation_entropy)

    # total and clean intersection counts
    intersect_count, intersect_count_clean, intersect_count_clean_topo = intersection_counts(
        ox.project_graph(Gu), streets_per_node)

    # assemble the results
    rslt = {
        'country': country,
        'country_iso': country_iso,
        'core_city': core_city,
        'uc_id': uc_id,
        'cc_avg_dir': cc_avg_dir,
        'cc_avg_undir': cc_avg_undir,
        'cc_wt_avg_dir': cc_wt_avg_dir,
        'cc_wt_avg_undir': cc_wt_avg_undir,
        'circuity': circuity,
        'elev_iqr': elev_iqr,
        'elev_mean': elev_mean,
        'elev_median': elev_median,
        'elev_range': elev_range,
        'elev_std': elev_std,
        'grade_mean': grade_mean,
        'grade_median': grade_median,
        'intersect_count': intersect_count,
        'intersect_count_clean': intersect_count_clean,
        'intersect_count_clean_topo': intersect_count_clean_topo,
        'k_avg': k_avg,
        'length_mean': length_mean,
        'length_median': length_median,
        'length_total': length_total,
        'street_segment_count': m,
        'node_count': n,
        'orientation_entropy': orientation_entropy,
        'orientation_order': orientation_order,
        'pagerank_max': pagerank_max,
        'prop_4way': prop_4way,
        'prop_3way': prop_3way,
        'prop_deadend': prop_deadend,
        'self_loop_proportion': self_loop_proportion,
        'straightness': straightness
    }

    elapsed = time.time() - start_time
    ox.log(f'finished {filepath} in {elapsed:.0f} seconds')
    return rslt
Ejemplo n.º 20
0
def compute_grid_dispersion(
    df_indices,
    df_osm_built,
    kwargs={"radius_search": 750, "use_median": True, "K_nearest": 50},
):
    """
        Creates grid and calculates dispersion indices.

        Parameters
        ----------
        df_indices : geopandas.GeoDataFrame
                data frame containing the (x,y) reference points to calculate indices
        df_osm_built : geopandas.GeoDataFrame
                data frame containing the building's geometries
        kw_args: dict
                additional keyword arguments for the indices calculation
                        radius_search: int
                                circle radius to consider the dispersion calculation at a local point
                        use_median : bool
                                denotes whether the median or mean should be used to calculate the indices
                        K_nearest : int
                                number of neighboring buildings to consider in evaluation

        Returns
        ----------
        geopandas.GeoDataFrame
                data frame with the added column for dispersion indices
        """
    log("Calculating dispersion indices")
    start = time.time()

    # Get radius search: circle radius to consider the dispersion calculation at a local point
    radius_search = kwargs["radius_search"]

    # Assign dispersion calculation method
    if kwargs["use_median"]:
        _calculate_dispersion = closest_building_distance_median
    else:
        _calculate_dispersion = closest_building_distance_average

        # Calculate the closest distance for each building within K_nearest centroid buildings
    _apply_polygon_closest_distance_neighbor(
        df_osm_built, K_nearest=kwargs["K_nearest"]
    )

    # For dispersion calculation approximation, create KDTree with buildings centroid
    coords_data = [
        point.coords[0]
        for point in df_osm_built.loc[
            df_osm_built.closest_d.notnull()
        ].geometry.apply(lambda x: x.centroid)
    ]
    # Create KDTree
    tree = spatial.KDTree(coords_data)

    # Compute dispersion indices
    index_column = "dispersion"
    df_indices[index_column] = df_indices.geometry.apply(
        lambda x: _calculate_dispersion(
            x, tree, df_osm_built.closest_d, radius_search
        )
    )

    # Remove added column
    df_osm_built.drop("closest_d", axis=1, inplace=True)

    log(
        "Done: Dispersion indices. Elapsed time (H:M:S): "
        + time.strftime("%H:%M:%S", time.gmtime(time.time() - start))
    )
Ejemplo n.º 21
0
def compute_full_urban_features(
    city_ref,
    df_osm_built=None,
    df_osm_pois=None,
    pop_grid=None,
    data_source=None,
    landusemix_args={
        "walkable_distance": 600,
        "compute_activity_types_kde": True,
        "weighted_kde": True,
        "pois_weight": 9,
        "log_weighted": True,
    },
    dispersion_args={
        "radius_search": 750,
        "use_median": True,
        "K_nearest": 50,
    },
    kwargs={"max_dispersion": 15},
):
    """
        Computes a set of urban features for each square where population count
    data exists

        Parameters
        ----------
        city_ref : string
                city reference name
        df_osm_built : geopandas.GeoDataFrame
                input buildings
        df_osm_pois : geopandas.GeoDataFrame
                input points of interest
        pop_grid : geopandas.GeoDataFrame
                grid-cells with population count where urban features will be
        calculated
        data_source : str
                define the type of population data for its retrieval in case it
        was stored
        kwargs : dict
                keyword arguments to guide the process

        Returns
        ----------
        geopandas.GeoDataFrame
                geometry with updated urban features
        """

    # Population extract exists?
    if os.path.exists(
            get_population_urban_features_filename(city_ref, data_source)):
        log("Urban features from population gridded data exist for city: " +
            city_ref)
        # Read from GeoJSON (default projection coordinates)
        pop_features_4326 = gpd.read_file(
            get_population_urban_features_filename(city_ref, data_source))
        # Project to UTM coordinates
        return ox.project_gdf(pop_features_4326)

        # Required arguments
    assert df_osm_built is not None
    assert df_osm_pois is not None
    assert pop_grid is not None

    # Get population count data with filled empty squares (null population)
    if data_source == "insee":
        pop_features = get_population_df_filled_empty_squares(pop_grid)
    elif data_source == "gpw":
        pop_features = pop_grid
    else:
        raise ValueError("Unknown data source.")
    # Set crs
    crs_proj = pop_grid.crs
    pop_features.crs = crs_proj

    ##################
    # Urban features
    ##################
    # Compute the urban features for each square
    log("Calculating urban features")
    start = time.time()

    # Conserve building geometries
    df_osm_built["geom_building"] = df_osm_built["geometry"]

    # Spatial join: grid-cell i - building j for all intersections
    pop_features = gpd.sjoin(pop_features,
                             df_osm_built,
                             op="intersects",
                             how="left")

    # When a grid-cell i does not intersect any building: NaN values
    null_idx = pop_features.loc[pop_features["geom_building"].isnull()].index
    # Replace NaN for urban features calculation
    min_polygon = Polygon([
        (0, 0),
        (0, np.finfo(float).eps),
        (np.finfo(float).eps, np.finfo(float).eps),
    ])
    pop_features.loc[null_idx, "geom_building"] = pop_features.loc[
        null_idx, "geom_building"].apply(lambda x: min_polygon)
    pop_features.loc[null_idx,
                     "landuses_m2"] = len(null_idx) * [{
                         "residential": 0,
                         "activity": 0
                     }]
    pop_features.loc[null_idx, "building_levels"] = len(null_idx) * [0]

    # Pre-calculation of urban features

    # Apply percentage of building presence within square:
    # 1 if fully contained, 0.5 if half the building contained, ...
    pop_features["building_ratio"] = pop_features.apply(
        lambda x: x.geom_building.intersection(x.geometry).area / x.
        geom_building.area,
        axis=1,
    )

    pop_features["m2_total_residential"] = pop_features.apply(
        lambda x: x.building_ratio * x.landuses_m2["residential"], axis=1)
    pop_features["m2_total_activity"] = pop_features.apply(
        lambda x: x.building_ratio * x.landuses_m2["activity"], axis=1)

    pop_features["m2_footprint_residential"] = 0
    pop_features.loc[pop_features.classification.isin(["residential"]),
                     "m2_footprint_residential", ] = pop_features.loc[
                         pop_features.classification.isin([
                             "residential"
                         ])].apply(
                             lambda x: x.building_ratio * x.geom_building.area,
                             axis=1)
    pop_features["m2_footprint_activity"] = 0
    pop_features.loc[pop_features.classification.isin(["activity"]),
                     "m2_footprint_activity", ] = pop_features.loc[
                         pop_features.classification.isin(["activity"])].apply(
                             lambda x: x.building_ratio * x.geom_building.area,
                             axis=1)
    pop_features["m2_footprint_mixed"] = 0
    pop_features.loc[pop_features.classification.isin(["mixed"]),
                     "m2_footprint_mixed", ] = pop_features.loc[
                         pop_features.classification.isin(["mixed"])].apply(
                             lambda x: x.building_ratio * x.geom_building.area,
                             axis=1)

    pop_features["num_built_activity"] = 0
    pop_features.loc[pop_features.classification.isin(["activity"]),
                     "num_built_activity", ] = pop_features.loc[
                         pop_features.classification.isin(["activity"
                                                           ])].building_ratio
    pop_features["num_built_residential"] = 0
    pop_features.loc[pop_features.classification.isin(["residential"]),
                     "num_built_residential", ] = pop_features.loc[
                         pop_features.classification.isin(["residential"
                                                           ])].building_ratio
    pop_features["num_built_mixed"] = 0
    pop_features.loc[pop_features.classification.isin(["mixed"]),
                     "num_built_mixed", ] = pop_features.loc[
                         pop_features.classification.isin(["mixed"
                                                           ])].building_ratio

    pop_features["num_levels"] = pop_features.apply(
        lambda x: x.building_ratio * x.building_levels, axis=1)
    pop_features["num_buildings"] = pop_features["building_ratio"]

    pop_features["built_up_m2"] = pop_features.apply(
        lambda x: x.geom_building.area * x.building_ratio, axis=1)

    # Urban features aggregation functions
    urban_features_aggregation = {}
    if data_source == "insee":
        urban_features_aggregation["idINSPIRE"] = lambda x: x.head(1)
        urban_features_aggregation["pop_count"] = lambda x: x.head(1)
    elif data_source == "gpw":
        urban_features_aggregation["idx"] = lambda x: x.head(1)
    urban_features_aggregation["geometry"] = lambda x: x.head(1)

    urban_features_aggregation["m2_total_residential"] = "sum"
    urban_features_aggregation["m2_total_activity"] = "sum"

    urban_features_aggregation["m2_footprint_residential"] = "sum"
    urban_features_aggregation["m2_footprint_activity"] = "sum"
    urban_features_aggregation["m2_footprint_mixed"] = "sum"

    urban_features_aggregation["num_built_activity"] = "sum"
    urban_features_aggregation["num_built_residential"] = "sum"
    urban_features_aggregation["num_built_mixed"] = "sum"

    urban_features_aggregation["num_levels"] = "sum"
    urban_features_aggregation["num_buildings"] = "sum"

    urban_features_aggregation["built_up_m2"] = "sum"

    # Apply aggregate functions
    pop_features = pop_features.groupby(
        pop_features.index).agg(urban_features_aggregation)

    # Calculate built up relation (relative to the area of the grid-cell geometry)
    pop_features["built_up_relation"] = pop_features.apply(
        lambda x: x.built_up_m2 / x.geometry.area, axis=1)
    pop_features.drop("built_up_m2", axis=1, inplace=True)

    # To geopandas.GeoDataFrame and set crs
    pop_features = gpd.GeoDataFrame(pop_features)
    pop_features.crs = crs_proj

    # POIs
    df_osm_pois_selection = df_osm_pois[df_osm_pois.classification.isin(
        ["activity", "mixed"])]
    gpd_intersection_pois = gpd.sjoin(
        pop_features,
        df_osm_pois_selection,
        op="intersects",
        how="left",
    )
    # Number of activity/mixed POIs
    pop_features["num_activity_pois"] = gpd_intersection_pois.groupby(
        gpd_intersection_pois.index).agg({"osm_id": "count"})

    ##################
    # Sprawling indices
    ##################
    pop_features["geometry_squares"] = pop_features.geometry
    pop_features["geometry"] = pop_features.geometry.centroid

    # Compute land uses mix + densities estimation
    compute_grid_landusemix(pop_features, df_osm_built, df_osm_pois,
                            landusemix_args)
    # Dispersion indices
    compute_grid_dispersion(pop_features, df_osm_built, dispersion_args)

    # Set back original geometries
    pop_features["geometry"] = pop_features.geometry_squares
    pop_features.drop("geometry_squares", axis=1, inplace=True)

    if kwargs.get("max_dispersion"):  # Set max bounds for dispersion values
        pop_features.loc[
            pop_features.dispersion > kwargs.get("max_dispersion"),
            "dispersion", ] = kwargs.get("max_dispersion")

    # Fill NaN sprawl indices with 0
    pop_features.fillna(0, inplace=True)

    # Save to GeoJSON file (no projection conserved, then use EPSG 4326)
    ox.project_gdf(pop_features, to_latlong=True).to_file(
        get_population_urban_features_filename(city_ref, data_source),
        driver="GeoJSON",
    )

    elapsed_time = int(time.time() - start)
    log("Done: Urban features calculation. Elapsed time (H:M:S): " +
        "{:02d}:{:02d}:{:02d}".format(
            elapsed_time // 3600,
            (elapsed_time % 3600 // 60),
            elapsed_time % 60,
        ))
    return pop_features
Ejemplo n.º 22
0
def compute_grid_landusemix(
    df_indices,
    df_osm_built,
    df_osm_pois,
    kw_args={
        "walkable_distance": 600,
        "compute_activity_types_kde": True,
        "weighted_kde": True,
        "pois_weight": 9,
        "log_weighted": True
    },
):
    """
        Calculate land use mix indices on input grid

        Parameters
        ----------
        df_indices : geopandas.GeoDataFrame
                data frame containing the (x,y) reference points to calculate indices
        df_osm_built : geopandas.GeoDataFrame
                data frame containing the building's geometries
        df_osm_pois : geopandas.GeoDataFrame
                data frame containing the points' of interest geometries
        kw_args: dict
                additional keyword arguments for the indices calculation
                        walkable_distance : int
                                the bandwidth assumption for Kernel Density Estimation calculations (meters)
                        compute_activity_types_kde : bool
                                determines if the densities for each activity type should be computed
                        weighted_kde : bool
                                use Weighted Kernel Density Estimation or classic version
                        pois_weight : int
                                Points of interest weight equivalence with buildings (squared meter)
                        log_weighted : bool
                                apply natural logarithmic function to surface weights

        Returns
        ----------
        pandas.DataFrame
                land use mix indices
        """
    log("Calculating land use mix indices")
    start = time.time()

    # Get the bandwidth, related to 'walkable distances'
    bandwidth = kw_args["walkable_distance"]
    # Compute a weighted KDE?
    weighted_kde = kw_args["weighted_kde"]
    X_weights = None

    # Get full list of contained POIs
    contained_pois = list(
        set(
            [
                element
                for list_ in df_osm_built.containing_poi[
                    df_osm_built.containing_poi.notnull()
                ]
                for element in list_
            ]
        )
    )
    # Get the POIs not contained by any building
    df_osm_pois_not_contained = df_osm_pois[
        ~df_osm_pois.index.isin(contained_pois)
    ]

    ############
    # Calculate land use density estimations
    ############

    ####
    # Residential
    ####
    df_osm_built_indexed = df_osm_built[
        df_osm_built.classification.isin(["residential", "mixed"])
    ]
    if weighted_kde:
        X_weights = df_osm_built_indexed.landuses_m2.apply(
            lambda x: x["residential"]
        )

    df_indices["residential_pdf"] = calculate_kde(
        df_indices.geometry,
        df_osm_built_indexed,
        None,
        bandwidth,
        X_weights,
        kw_args["pois_weight"],
        kw_args["log_weighted"],
    )
    log("Residential density estimation done")

    ####
    # Activities
    ####
    df_osm_built_indexed = df_osm_built[
        df_osm_built.classification.isin(["activity", "mixed"])
    ]
    df_osm_pois_not_cont_indexed = df_osm_pois_not_contained[
        df_osm_pois_not_contained.classification.isin(["activity", "mixed"])
    ]
    if weighted_kde:
        X_weights = df_osm_built_indexed.landuses_m2.apply(
            lambda x: x["activity"]
        )

    df_indices["activity_pdf"] = calculate_kde(
        df_indices.geometry,
        df_osm_built_indexed,
        df_osm_pois_not_cont_indexed,
        bandwidth,
        X_weights,
        kw_args["pois_weight"],
        kw_args["log_weighted"],
    )
    log("Activity density estimation done")

    ####
    # Compute activity types densities
    ####
    if kw_args["compute_activity_types_kde"]:
        assert "activity_category" in df_osm_built.columns

        # Get unique category values
        unique_categories_built = [
            list(x)
            for x in set(
                tuple(x)
                for x in df_osm_built.activity_category.values
                if isinstance(x, list)
            )
        ]
        unique_categories_pois = [
            list(x)
            for x in set(
                tuple(x)
                for x in df_osm_pois_not_cont_indexed.activity_category.values
                if isinstance(x, list)
            )
        ]
        flat_list = [
            item
            for sublist in unique_categories_built + unique_categories_pois
            for item in sublist
        ]
        categories = list(set(flat_list))

        for cat in categories:  # Get data frame selection of input category
            # Buildings and POIs within that category
            df_built_category = df_osm_built_indexed[
                df_osm_built_indexed.activity_category.apply(
                    lambda x: (isinstance(x, list)) and (cat in x)
                )
            ]
            df_pois_category = df_osm_pois_not_cont_indexed[
                df_osm_pois_not_cont_indexed.activity_category.apply(
                    lambda x: (isinstance(x, list)) and (cat in x)
                )
            ]
            if weighted_kde:
                X_weights = df_built_category.landuses_m2.apply(
                    lambda x: x[cat]
                )

            df_indices[cat + "_pdf"] = calculate_kde(
                df_indices.geometry,
                df_built_category,
                df_pois_category,
                bandwidth,
                X_weights,
                kw_args["pois_weight"],
                kw_args["log_weighted"],
            )

        log("Activity grouped by types density estimation done")

        # Compute land use mix indices
    index_column = "landusemix"
    df_indices[index_column] = df_indices.apply(
        lambda x: _land_use_mix(x.activity_pdf, x.residential_pdf), axis=1
    )
    df_indices["landuse_intensity"] = df_indices.apply(
        lambda x: (x.activity_pdf + x.residential_pdf) / 2.0, axis=1
    )

    log(
        "Done: Land use mix indices. Elapsed time (H:M:S): "
        + time.strftime("%H:%M:%S", time.gmtime(time.time() - start))
    )
Ejemplo n.º 23
0
def prepare_data(
    G, df_osm_built, df_osm_pois, df_indices, num_processes, kw_arguments
):
    """
        Pickles data to a temporary folder in order to achieve parallel accessibility calculation
        A new subprocess will be created in order to minimize memory requirements

        Parameters
        ----------
        G : networkx multidigraph
                input graph to calculate accessibility
        df_osm_built : geopandas.GeoDataFrame
                buildings data
        df_osm_pois : geopandas.GeoDataFrame
                buildings data
        df_indices : geopandas.GeoDataFrame
                data frame where indices will be calculated
        num_processes : int
                number of data chunks to create
        kw_arguments : pandas.Series
                additional keyword arguments

        Returns
        ----------

        """
    # Divide long edges
    divide_long_edges_graph(G, kw_arguments.max_edge_length)
    log("Graph long edges shortened")

    # Get activities
    df_built_activ = df_osm_built[
        df_osm_built.classification.isin(["activity", "mixed"])
    ]
    df_pois_activ = df_osm_pois[
        df_osm_pois.classification.isin(["activity", "mixed"])
    ]

    # Associate them to its closest node in the graph
    associate_activities_closest_node(G, df_built_activ, df_pois_activ)
    log("Activities associated to graph nodes")

    # Nodes dict
    for n, data in G.nodes.data(data=True):
        # Remove useless keys
        keys_ = list(data.keys())
        [data.pop(k) for k in keys_ if k not in ["x", "y", "num_activities"]]

        # Edges dict
    for u, v, data in G.edges.data(data=True, keys=False):
        # Remove useless keys
        keys_ = list(data.keys())
        [data.pop(k) for k in keys_ if k not in ["length", "key"]]

    try:
        G.graph.pop("streets_per_node")
    except Exception:
        pass
        # Pickle graph
    nx.write_gpickle(G, "temp/graph.gpickle")

    # Prepare input indices points
    data_split = np.array_split(df_indices, num_processes)
    for i in range(num_processes):
        data_split[i].to_pickle("temp/points_" + str(i) + ".pkl")
        # Pickle arguments
    kw_arguments.to_pickle("temp/arguments.pkl")