def test_Alignment(self): self.df_buildings["orient"] = mm.Orientation(self.df_buildings).series sw = Queen.from_dataframe(self.df_tessellation, ids="uID") self.df_buildings["align_sw"] = mm.Alignment( self.df_buildings, sw, "uID", self.df_buildings["orient"]).series assert self.df_buildings["align_sw"][0] == pytest.approx(18.299481296) sw_drop = Queen.from_dataframe(self.df_tessellation[2:], ids="uID") assert (mm.Alignment(self.df_buildings, sw_drop, "uID", self.df_buildings["orient"]).series.isna().any())
def test_MeanInterbuildingDistance(self): sw = Queen.from_dataframe(self.df_tessellation, ids="uID") self.df_buildings["m_dist"] = mm.MeanInterbuildingDistance( self.df_buildings, sw, "uID", order=3).series check = 29.305457092042744 assert self.df_buildings["m_dist"][0] == pytest.approx(check) sw_drop = Queen.from_dataframe(self.df_tessellation[2:], ids="uID") assert (mm.MeanInterbuildingDistance(self.df_buildings, sw_drop, "uID").series.isna().any())
def test_NeighborDistance(self): sw = Queen.from_dataframe(self.df_tessellation, ids="uID") self.df_buildings["dist_sw"] = mm.NeighborDistance( self.df_buildings, sw, "uID").series check = 29.18589019096464 assert self.df_buildings["dist_sw"][0] == check sw_drop = Queen.from_dataframe(self.df_tessellation[:-2], ids="uID") self.df_buildings["dist_sw"] = mm.NeighborDistance( self.df_buildings, sw_drop, "uID").series check = 29.18589019096464 assert self.df_buildings["dist_sw"][0] == check assert self.df_buildings["dist_sw"].isna().any()
def test_Neighbors(self): sw = Queen.from_dataframe(self.df_tessellation, ids="uID") sw_drop = Queen.from_dataframe(self.df_tessellation[2:], ids="uID") self.df_tessellation["nei_sw"] = mm.Neighbors( self.df_tessellation, sw, "uID" ).series self.df_tessellation["nei_wei"] = mm.Neighbors( self.df_tessellation, sw, "uID", weighted=True ).series check = 5.180555555555555 check_w = 0.029066398893536072 assert self.df_tessellation["nei_sw"].mean() == check assert self.df_tessellation["nei_wei"].mean() == check_w assert mm.Neighbors(self.df_tessellation, sw_drop, "uID").series.isna().any()
def __init__(self, gdf, spatial_weights=None, mean=False, verbose=True): self.gdf = gdf if spatial_weights is None: print("Calculating spatial weights...") if verbose else None from libpysal.weights import Queen spatial_weights = Queen.from_dataframe(gdf, silence_warnings=True) print("Spatial weights ready...") if verbose else None self.sw = spatial_weights lenghts = gdf.geometry.length sums = [] means = [] for index in tqdm(gdf.index, total=gdf.shape[0], disable=not verbose): neighbours = [index] neighbours += spatial_weights.neighbors[index] dims = lenghts.iloc[neighbours] if mean: means.append(np.mean(dims)) sums.append(sum(dims)) self.series = self.sum = pd.Series(sums, index=gdf.index) if mean: self.mean = pd.Series(means, index=gdf.index)
def test_courtyards(self): courtyards = mm.courtyards(self.df_buildings, 'bID') sw = Queen.from_dataframe(self.df_buildings) courtyards_wm = mm.courtyards(self.df_buildings, 'bID', sw) check = 0.6805555555555556 assert courtyards.mean() == check assert courtyards_wm.mean() == check
def __init__(self, gdf, spatial_weights=None): self.gdf = gdf if spatial_weights is None: print("Calculating spatial weights...") from libpysal.weights import Queen spatial_weights = Queen.from_dataframe(gdf, silence_warnings=True) print("Spatial weights ready...") self.sw = spatial_weights # dict to store walls for each uID walls = {} components = pd.Series(spatial_weights.component_labels, index=range(len(gdf))) geom = gdf.geometry for i in tqdm(range(gdf.shape[0]), total=gdf.shape[0]): # if the id is already present in walls, continue (avoid repetition) if i in walls: continue else: comp = spatial_weights.component_labels[i] to_join = components[components == comp].index joined = geom.iloc[to_join] dissolved = joined.buffer( 0.01 ).unary_union # buffer to avoid multipolygons where buildings touch by corners only for b in to_join: walls[b] = dissolved.exterior.length results_list = [] for i in tqdm(range(gdf.shape[0]), total=gdf.shape[0]): results_list.append(walls[i]) self.series = pd.Series(results_list, index=gdf.index)
def __init__(self, gdf, spatial_weights=None, mean=False): self.gdf = gdf self.mean = mean if spatial_weights is None: print("Calculating spatial weights...") from libpysal.weights import Queen spatial_weights = Queen.from_dataframe(gdf, silence_warnings=True) print("Spatial weights ready...") self.sw = spatial_weights results_list = [] for index, row in tqdm(gdf.iterrows(), total=gdf.shape[0]): neighbours = spatial_weights.neighbors[index].copy() if neighbours: neighbours.append(index) else: neighbours = [index] dims = gdf.iloc[neighbours].geometry.length if mean: results_list.append(np.mean(dims)) else: results_list.append(sum(dims)) self.series = pd.Series(results_list, index=gdf.index)
def setup(self): test_file_path = mm.datasets.get_path("bubenec") self.df_buildings = gpd.read_file(test_file_path, layer="buildings") self.df_streets = gpd.read_file(test_file_path, layer="streets") self.df_tessellation = gpd.read_file(test_file_path, layer="tessellation") self.df_streets["nID"] = mm.unique_id(self.df_streets) self.df_buildings["height"] = np.linspace(10.0, 30.0, 144) self.df_tessellation["area"] = self.df_tessellation.geometry.area self.df_buildings["area"] = self.df_buildings.geometry.area self.df_buildings["fl_area"] = mm.FloorArea(self.df_buildings, "height").series self.df_buildings["nID"] = mm.get_network_id(self.df_buildings, self.df_streets, "nID") blocks = mm.Blocks(self.df_tessellation, self.df_streets, self.df_buildings, "bID", "uID") self.blocks = blocks.blocks self.df_buildings["bID"] = blocks.buildings_id self.df_tessellation["bID"] = blocks.tessellation_id self.swb = Queen.from_dataframe(self.df_buildings) self.sw5 = mm.sw_high(k=5, gdf=self.df_tessellation, ids="uID") self.sw3 = mm.sw_high(k=3, gdf=self.df_tessellation, ids="uID") self.sws = mm.sw_high(k=2, gdf=self.df_streets) nx = mm.gdf_to_nx(self.df_streets) nx = mm.node_degree(nx) self.nodes, self.edges, W = mm.nx_to_gdf(nx, spatial_weights=True) self.swn = mm.sw_high(k=3, weights=W)
def test_Courtyards(self): courtyards = mm.Courtyards(self.df_buildings, "bID").series sw = Queen.from_dataframe(self.df_buildings) courtyards_wm = mm.Courtyards(self.df_buildings, self.df_buildings.bID, sw).series check = 0.6805555555555556 assert courtyards.mean() == check assert courtyards_wm.mean() == check
def setup(self): test_file_path = mm.datasets.get_path("bubenec") self.df_buildings = gpd.read_file(test_file_path, layer="buildings") self.df_streets = gpd.read_file(test_file_path, layer="streets") self.df_tessellation = gpd.read_file(test_file_path, layer="tessellation") self.df_buildings["height"] = np.linspace(10.0, 30.0, 144) self.df_buildings["volume"] = mm.Volume(self.df_buildings, "height").series self.df_streets["nID"] = mm.unique_id(self.df_streets) self.df_buildings["nID"] = mm.get_network_id( self.df_buildings, self.df_streets, "nID" ) self.df_buildings["orient"] = mm.Orientation(self.df_buildings).series self.df_tessellation["orient"] = mm.Orientation(self.df_tessellation).series self.sw = Queen.from_dataframe(self.df_tessellation, ids="uID") self.swh = mm.sw_high(k=3, gdf=self.df_tessellation, ids="uID") self.swb = Queen.from_dataframe(self.df_buildings, ids="uID")
def __init__(self): self.data_carto = read_carto('boston_housing') ## Renaming the geometry column from 'the_geom' to 'geometry' ## (pysal expect the geometry column to be called 'geometry') self.data = self.data_carto.copy() self.data['geometry'] = self.data.geometry self.data.drop(['the_geom'],axis = 1, inplace = True) self.data = gpd.GeoDataFrame(self.data, geometry = 'geometry') self.w = Queen.from_dataframe(self.data)
def test_neighbour_distance(self): self.df_buildings['dist'] = mm.neighbour_distance( self.df_buildings, self.df_tessellation, 'uID') sw = Queen.from_dataframe(self.df_tessellation) self.df_buildings['dist_sw'] = mm.neighbour_distance( self.df_buildings, self.df_tessellation, 'uID', sw) check = 29.18589019096464 assert self.df_buildings['dist'][0] == check assert self.df_buildings['dist_sw'][0] == check
def test_MeanInterbuildingDistance(self): sw = Queen.from_dataframe(self.df_tessellation, ids="uID") swh = mm.sw_high(k=3, gdf=self.df_tessellation, ids="uID") self.df_buildings["m_dist_sw"] = mm.MeanInterbuildingDistance( self.df_buildings, sw, "uID", swh).series self.df_buildings["m_dist"] = mm.MeanInterbuildingDistance( self.df_buildings, sw, "uID", order=3).series check = 29.305457092042744 assert self.df_buildings["m_dist_sw"][0] == check assert self.df_buildings["m_dist"][0] == check
def test_mean_interbuilding_distance(self): self.df_buildings['m_dist'] = mm.mean_interbuilding_distance( self.df_buildings, self.df_tessellation, 'uID') sw = Queen.from_dataframe(self.df_tessellation) swh = mm.Queen_higher(k=3, geodataframe=self.df_tessellation) self.df_buildings['m_dist_sw'] = mm.mean_interbuilding_distance( self.df_buildings, self.df_tessellation, 'uID', sw, swh) check = 29.305457092042744 assert self.df_buildings['m_dist'][0] == check assert self.df_buildings['m_dist_sw'][0] == check
def test_alignment(self): self.df_buildings['orient'] = mm.orientation(self.df_buildings) self.df_buildings['align'] = mm.alignment(self.df_buildings, 'orient', self.df_tessellation, 'uID') sw = Queen.from_dataframe(self.df_tessellation) self.df_buildings['align_sw'] = mm.alignment(self.df_buildings, 'orient', self.df_tessellation, 'uID', sw) check = 18.299481296455237 assert self.df_buildings['align'][0] == check assert self.df_buildings['align_sw'][0] == check
def test_building_adjacency(self): self.df_buildings['adj'] = mm.building_adjacency( self.df_buildings, self.df_tessellation) sw = Queen.from_dataframe(self.df_buildings) swh = mm.Queen_higher(k=3, geodataframe=self.df_tessellation) self.df_buildings['adj_sw'] = mm.building_adjacency( self.df_buildings, self.df_tessellation, spatial_weights=sw, spatial_weights_higher=swh) check = 0.2613824113909074 assert self.df_buildings['adj'].mean() == check assert self.df_buildings['adj_sw'].mean() == check
def test_neighbours(self): self.df_tessellation['nei'] = mm.neighbours(self.df_tessellation) sw = Queen.from_dataframe(self.df_tessellation) self.df_tessellation['nei_sw'] = mm.neighbours(self.df_tessellation, sw) self.df_tessellation['nei_wei'] = mm.neighbours(self.df_tessellation, sw, weighted=True) check = 5.180555555555555 check_w = 0.029066398893536072 assert self.df_tessellation['nei'].mean() == check assert self.df_tessellation['nei_sw'].mean() == check assert self.df_tessellation['nei_wei'].mean() == check_w
def test_BuildingAdjacencyy(self): sw = Queen.from_dataframe(self.df_buildings, ids="uID") swh = mm.sw_high(k=3, gdf=self.df_tessellation, ids="uID") self.df_buildings["adj_sw"] = mm.BuildingAdjacency( self.df_buildings, spatial_weights=sw, unique_id="uID", spatial_weights_higher=swh, ).series self.df_buildings["adj_sw_none"] = mm.BuildingAdjacency( self.df_buildings, unique_id="uID", spatial_weights_higher=swh).series check = 0.2613824113909074 assert self.df_buildings["adj_sw"].mean() == check assert self.df_buildings["adj_sw_none"].mean() == check
def __init__(self, gdf, spatial_weights_higher, unique_id, spatial_weights=None, verbose=True): self.gdf = gdf self.sw_higher = spatial_weights_higher self.id = gdf[unique_id] results_list = [] # if weights matrix is not passed, generate it from gdf if spatial_weights is None: print("Calculating spatial weights...") if verbose else None from libpysal.weights import Queen spatial_weights = Queen.from_dataframe(gdf, silence_warnings=True, ids=unique_id) print("Spatial weights ready...") if verbose else None self.sw = spatial_weights patches = dict(zip(gdf[unique_id], spatial_weights.component_labels)) for uid in tqdm( self.id, total=gdf.shape[0], disable=not verbose, desc="Calculating adjacency", ): if uid in spatial_weights_higher.neighbors.keys(): neighbours = spatial_weights_higher.neighbors[uid].copy() if neighbours: neighbours.append(uid) patches_sub = [patches[x] for x in neighbours] patches_nr = len(set(patches_sub)) results_list.append(patches_nr / len(neighbours)) else: results_list.append(np.nan) else: results_list.append(np.nan) self.series = pd.Series(results_list, index=gdf.index)
def __init__(self, gdf, block_id, spatial_weights=None): self.gdf = gdf results_list = [] gdf = gdf.copy() if not isinstance(block_id, str): gdf["mm_bid"] = block_id block_id = "mm_bid" self.block_id = gdf[block_id] # if weights matrix is not passed, generate it from objects if spatial_weights is None: print("Calculating spatial weights...") from libpysal.weights import Queen spatial_weights = Queen.from_dataframe(gdf, silence_warnings=True) self.sw = spatial_weights # dict to store nr of courtyards for each uID courtyards = {} components = pd.Series(spatial_weights.component_labels, index=gdf.index) for index in tqdm(gdf.index, total=gdf.shape[0]): # if the id is already present in courtyards, continue (avoid repetition) if index in courtyards: continue else: comp = spatial_weights.component_labels[index] to_join = components[components == comp].index joined = gdf.loc[to_join] dissolved = joined.geometry.buffer( 0.01 ).unary_union # buffer to avoid multipolygons where buildings touch by corners only try: interiors = len(list(dissolved.interiors)) except (ValueError): print("Something unexpected happened.") for b in to_join: courtyards[b] = interiors # fill dict with values # copy values from dict to gdf for index, row in tqdm(gdf.iterrows(), total=gdf.shape[0]): results_list.append(courtyards[index]) self.series = pd.Series(results_list, index=gdf.index)
def __init__(self, gdf, block_id=None, spatial_weights=None, verbose=True): if block_id is not None: warnings.warn( "block_id is deprecated and will be removed in v0.4.", FutureWarning, ) self.gdf = gdf results_list = [] gdf = gdf.copy() # if weights matrix is not passed, generate it from objects if spatial_weights is None: print("Calculating spatial weights...") if verbose else None from libpysal.weights import Queen spatial_weights = Queen.from_dataframe(gdf, silence_warnings=True) self.sw = spatial_weights # dict to store nr of courtyards for each uID courtyards = {} components = pd.Series(spatial_weights.component_labels, index=gdf.index) for i, index in tqdm( enumerate(gdf.index), total=gdf.shape[0], disable=not verbose ): # if the id is already present in courtyards, continue (avoid repetition) if index in courtyards: continue else: comp = spatial_weights.component_labels[i] to_join = components[components == comp].index joined = gdf.loc[to_join] dissolved = joined.geometry.buffer( 0.01 ).unary_union # buffer to avoid multipolygons where buildings touch by corners only try: interiors = len(list(dissolved.interiors)) except (ValueError): print("Something unexpected happened.") for b in to_join: courtyards[b] = interiors # fill dict with values results_list = [courtyards[index] for index in gdf.index] self.series = pd.Series(results_list, index=gdf.index)
def __init__(self, gdf, spatial_weights_higher, unique_id, spatial_weights=None): self.gdf = gdf self.sw_higher = spatial_weights_higher self.id = gdf[unique_id] results_list = [] # if weights matrix is not passed, generate it from gdf if spatial_weights is None: print("Calculating spatial weights...") from libpysal.weights import Queen spatial_weights = Queen.from_dataframe(gdf, silence_warnings=True, ids=unique_id) print("Spatial weights ready...") self.sw = spatial_weights patches = dict(zip(gdf[unique_id], spatial_weights.component_labels)) print("Calculating adjacency...") for index, row in tqdm(gdf.iterrows(), total=gdf.shape[0]): neighbours = spatial_weights_higher.neighbors[ row[unique_id]].copy() if neighbours: neighbours.append(row[unique_id]) patches_sub = [patches[x] for x in neighbours] patches_nr = len(set(patches_sub)) results_list.append(patches_nr / len(neighbours)) else: results_list.append(0) self.series = pd.Series(results_list, index=gdf.index)
import sys import geopandas as gpd from libpysal.weights import Queen from ..greedy import greedy import pytest world = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres")) sw = Queen.from_dataframe(world, ids=world.index.to_list(), silence_warnings=True) def test_default(): colors = greedy(world) assert len(colors) == len(world) assert set(colors) == set([0, 1, 2, 3, 4]) assert colors.value_counts().to_list() == [36, 36, 35, 35, 35] assert (colors.index == world.index).all() @pytest.mark.parametrize("pysal_geos", [None, 0]) def test_count(pysal_geos): colors = greedy( world, strategy="balanced", balance="count", min_distance=pysal_geos ) assert len(colors) == len(world) assert set(colors) == set([0, 1, 2, 3, 4]) assert colors.value_counts().to_list() == [36, 36, 35, 35, 35]
def _spatial_information_theory(data, group_pop_var, total_pop_var, w=None, unit_in_local_env=True, original_crs={'init': 'epsg:4326'}): """ Calculation of Spatial Information Theory index Parameters ---------- data : a geopandas DataFrame with a geometry column. group_pop_var : string The name of variable in data that contains the population size of the group of interest total_pop_var : string The name of variable in data that contains the total population of the unit w : W A PySAL weights object. If not provided, Queen contiguity matrix is used. This is used to construct the local environment around each spatial unit. unit_in_local_env : boolean A condition argument that states if the local environment around the unit comprises the unit itself. Default is True. original_crs : the original crs code given by a dict of data, but this is later be projected for the Mercator projection (EPSG = 3395). This argument is also to avoid passing data without crs and, therefore, raising unusual results. This index rely on the population density and we consider the area using squared kilometers. Attributes ---------- statistic : float Spatial Information Theory Index core_data : a geopandas DataFrame A geopandas DataFrame that contains the columns used to perform the estimate. Notes ----- Based on Reardon, Sean F., and David O’Sullivan. "Measures of spatial segregation." Sociological methodology 34.1 (2004): 121-162. This measure can be extended to a society with more than two groups. """ if (str(type(data)) != '<class \'geopandas.geodataframe.GeoDataFrame\'>'): raise TypeError( 'data is not a GeoDataFrame and, therefore, this index cannot be calculated.' ) if ((type(group_pop_var) is not str) or (type(total_pop_var) is not str)): raise TypeError('group_pop_var and total_pop_var must be strings') if ((group_pop_var not in data.columns) or (total_pop_var not in data.columns)): raise ValueError( 'group_pop_var and total_pop_var must be variables of data') if ('geometry' not in data.columns): data['geometry'] = data[data._geometry_column_name] data = data.drop([data._geometry_column_name], axis=1) data = data.set_geometry('geometry') if w is None: w_object = Queen.from_dataframe(data) else: w_object = w if (not issubclass(type(w_object), libpysal.weights.W)): raise TypeError('w is not a PySAL weights object') data = data.rename(columns={ group_pop_var: 'group_pop_var', total_pop_var: 'total_pop_var' }) data['compl_pop_var'] = data['total_pop_var'] - data['group_pop_var'] # In this case, M = 2 according to Reardon, Sean F., and David O’Sullivan. "Measures of spatial segregation." Sociological methodology 34.1 (2004): 121-162. pi_1 = data['group_pop_var'].sum() / data['total_pop_var'].sum() pi_2 = data['compl_pop_var'].sum() / data['total_pop_var'].sum() E = -1 * (pi_1 * math.log(pi_1, 2) + pi_2 * math.log(pi_2, 2)) T = data['total_pop_var'].sum() # Here you reproject the data using the Mercator projection data.crs = original_crs data = data.to_crs(crs={'init': 'epsg:3395'}) # Mercator sqm_to_sqkm = 10**6 data['area_sq_km'] = data.area / sqm_to_sqkm tau_p = data['total_pop_var'] / data['area_sq_km'] w_matrix = w_object.full()[0] if unit_in_local_env: np.fill_diagonal(w_matrix, 1) # The local context of each spatial unit is given by the aggregate context (this multiplication gives the local sum of each population) data['local_group_pop_var'] = np.matmul(data['group_pop_var'], w_matrix) data['local_compl_pop_var'] = np.matmul(data['compl_pop_var'], w_matrix) data['local_total_pop_var'] = np.matmul(data['total_pop_var'], w_matrix) pi_tilde_p_1 = np.array(data['local_group_pop_var'] / data['local_total_pop_var']) pi_tilde_p_2 = np.array(data['local_compl_pop_var'] / data['local_total_pop_var']) E_tilde_p = -1 * (pi_tilde_p_1 * np.log(pi_tilde_p_1) / np.log(2) + pi_tilde_p_2 * np.log(pi_tilde_p_2) / np.log(2)) SIT = 1 - 1 / (T * E) * (tau_p * E_tilde_p).sum( ) # This is the H_Tilde according to Reardon, Sean F., and David O’Sullivan. "Measures of spatial segregation." Sociological methodology 34.1 (2004): 121-162. core_data = data[['group_pop_var', 'total_pop_var', 'geometry']] return SIT, core_data
def _spatial_prox_profile(data, group_pop_var, total_pop_var, m=1000): """ Calculation of Spatial Proximity Profile Parameters ---------- data : a geopandas DataFrame with a geometry column. group_pop_var : string The name of variable in data that contains the population size of the group of interest total_pop_var : string The name of variable in data that contains the total population of the unit m : int a numeric value indicating the number of thresholds to be used. Default value is 1000. A large value of m creates a smoother-looking graph and a more precise spatial proximity profile value but slows down the calculation speed. Attributes ---------- statistic : float Spatial Proximity Index core_data : a geopandas DataFrame A geopandas DataFrame that contains the columns used to perform the estimate. Notes ----- Based on Hong, Seong-Yun, and Yukio Sadahiro. "Measuring geographic segregation: a graph-based approach." Journal of Geographical Systems 16.2 (2014): 211-231. """ if (str(type(data)) != '<class \'geopandas.geodataframe.GeoDataFrame\'>'): raise TypeError( 'data is not a GeoDataFrame and, therefore, this index cannot be calculated.' ) if ('geometry' not in data.columns): data['geometry'] = data[data._geometry_column_name] data = data.drop([data._geometry_column_name], axis=1) data = data.set_geometry('geometry') if (type(m) is not int): raise TypeError('m must be a string.') if (m < 2): raise ValueError('m must be greater than 1.') if ((type(group_pop_var) is not str) or (type(total_pop_var) is not str)): raise TypeError('group_pop_var and total_pop_var must be strings') if ((group_pop_var not in data.columns) or (total_pop_var not in data.columns)): raise ValueError( 'group_pop_var and total_pop_var must be variables of data') data = data.rename(columns={ group_pop_var: 'group_pop_var', total_pop_var: 'total_pop_var' }) if any(data.total_pop_var < data.group_pop_var): raise ValueError( 'Group of interest population must equal or lower than the total population of the units.' ) wij = Queen.from_dataframe(data).full()[0] delta = manhattan_distances(wij) def calculate_etat(t): g_t_i = np.where(data.group_pop_var / data.total_pop_var >= t, True, False) k = g_t_i.sum() sub_delta_ij = delta[g_t_i, :][:, g_t_i] den = sub_delta_ij.sum() eta_t = (k**2 - k) / den return eta_t grid = np.linspace(0, 1, m) aux = np.array(list(map(calculate_etat, grid))) aux[aux == inf] = 0 aux[aux == -inf] = 0 curve = np.nan_to_num(aux, 0) threshold = data.group_pop_var.sum() / data.total_pop_var.sum() SPP = ((threshold - ((curve[grid < threshold]).sum() / m - (curve[grid >= threshold]).sum() / m)) / (1 - threshold)) core_data = data[['group_pop_var', 'total_pop_var', 'geometry']] return SPP, grid, curve, core_data
def test_Alignment(self): self.df_buildings["orient"] = mm.Orientation(self.df_buildings).series sw = Queen.from_dataframe(self.df_tessellation, ids="uID") self.df_buildings["align_sw"] = mm.Alignment( self.df_buildings, sw, "uID", self.df_buildings["orient"]).series assert self.df_buildings["align_sw"][0] == 18.299481296455237
def __init__(self): self.data = gpd.GeoDataFrame(Dataset('boston_housing').download(decode_geom=True))# gpd.read_file(self.filename) self.data.crs = {'init': 'epsg:4326'} self.w = Queen.from_dataframe(self.data)
def _spatial_dissim(data, group_pop_var, total_pop_var, w=None, standardize=False): """Calculate of Spatial Dissimilarity index. Parameters ---------- data : a geopandas DataFrame with a geometry column. group_pop_var : string The name of variable in data that contains the population size of the group of interest total_pop_var : string The name of variable in data that contains the total population of the unit w : W A PySAL weights object. If not provided, Queen contiguity matrix is used. standardize : boolean A condition for row standardisation of the weights matrices. If True, the values of cij in the formulas gets row standardized. For the sake of comparison, the seg R package of Hong, Seong-Yun, David O'Sullivan, and Yukio Sadahiro. "Implementing spatial segregation measures in R." PloS one 9.11 (2014): e113767. works by default with row standardization. Returns ---------- statistic : float Spatial Dissimilarity Index core_data : a geopandas DataFrame A geopandas DataFrame that contains the columns used to perform the estimate. Notes ----- Based on Morrill, R. L. (1991) "On the Measure of Geographic Segregation". Geography Research Forum. Reference: :cite:`morrill1991measure`. """ if type(standardize) is not bool: raise TypeError("std is not a boolean object") if w is None: w_object = Queen.from_dataframe(data) else: w_object = w if not issubclass(type(w_object), libpysal.weights.W): raise TypeError("w is not a PySAL weights object") D = _dissim(data, group_pop_var, total_pop_var)[0] x = np.array(data[group_pop_var]) t = np.array(data[total_pop_var]) # If a unit has zero population, the group of interest frequency is zero pi = np.where(t == 0, 0, x / t) if not standardize: cij = w_object.sparse.toarray() else: cij = w_object.sparse.toarray() cij = cij / cij.sum(axis=1).reshape((cij.shape[0], 1)) # Inspired in (second solution): https://stackoverflow.com/questions/22720864/efficiently-calculating-a-euclidean-distance-matrix-using-numpy # Distance Matrix abs_dist = abs(pi[..., np.newaxis] - pi) # manhattan_distances used to compute absolute distances num = np.multiply(abs_dist, cij).sum() den = cij.sum() SD = D - num / den SD core_data = data[[group_pop_var, total_pop_var, data.geometry.name]] return SD, core_data
def greedy( gdf, strategy="balanced", balance="count", min_colors=4, sw="queen", min_distance=None, silence_warnings=True, interchange=False, ): """ Color GeoDataFrame using various strategies of greedy (topological) colouring. Attempts to color a GeoDataFrame using as few colors as possible, where no neighbours can have same color as the feature itself. Offers various strategies ported from QGIS or implemented within networkX for greedy graph coloring. ``greedy`` will return pandas.Series representing assinged color codes. Parameters ---------- gdf : GeoDataFrame GeoDataFrame strategy : str (default 'balanced') Determine coloring strategy. Options are ``'balanced'`` for algorithm based on QGIS Topological coloring. It is aiming for a visual balance, defined by the balance parameter. Other options are those supported by networkx.greedy_color: * ``'largest_first'`` * ``'random_sequential'`` * ``'smallest_last'`` * ``'independent_set'`` * ``'connected_sequential_bfs'`` * ``'connected_sequential_dfs'`` * ``'connected_sequential'`` (alias for the previous strategy) * ``'saturation_largest_first'`` * ``'DSATUR'`` (alias for the previous strategy) For details see https://networkx.github.io/documentation/stable/reference/algorithms/generated/networkx.algorithms.coloring.greedy_color.html balance : str (default 'count') If strategy is ``'balanced'``, determine the method of color balancing. * ``'count'`` attempts to balance the number of features per each color. * ``'area'`` attempts to balance the area covered by each color. * ``'centroid'`` attempts to balance the distance between colors based on the distance between centroids. * ``'distance'`` attempts to balance the distance between colors based on the distance between geometries. Slower than ``'centroid'``, but more precise. ``'centroid'`` and ``'distance'`` are significantly slower than other especially for larger GeoDataFrames. Apart from ``'count'``, all require CRS to be projected (not in degrees) to ensure metric values are correct. min_colors: int (default 4) If strategy is ``'balanced'``, define the minimal number of colors to be used. sw : 'queen', 'rook' or libpysal.weights.W (default 'queen') If min_distance is None, one can pass ``'libpysal.weights.W'`` object denoting neighbors or let greedy to generate one based on ``'queen'`` or ``'rook'`` contiguity. min_distance : float Set minimal distance between colors. If min_distance is not None, slower algorithm for generating spatial weghts is used based on intersection between geometries. Min_distance is then used as a tolerance of intersection. silence_warnings : bool (default True) Silence libpysal warnings when creating spatial weights. interchange : bool (defaul False) Use the color interchange algorithm (applicable for networkx strategies) For details see https://networkx.github.io/documentation/stable/reference/algorithms/generated/networkx.algorithms.coloring.greedy_color.html Examples -------- Default: >>> gdf['greedy_colors'] = greedy(gdf) Balanced by area: >>> gdf['balanced_area'] = greedy(gdf, strategy='balanced', >>> balance='area') Using rook adjacency: >>> gdf['rook_adjacency'] = greedy(gdf, sw='rook') Adding minimal distance between colors: >>> gdf['min_distance'] = greedy(gdf, min_distance=100) Using different coloring strategy: >>> gdf['smallest_last'] = greedy(gdf, strategy='smallest_last') Returns ------- color : pd.Series pandas.Series representing assinged color codes """ if min_distance is not None: sw = _geos_sw(gdf, tolerance=min_distance, silence_warnings=silence_warnings) if not isinstance(sw, W): if sw == "queen": sw = Queen.from_dataframe(gdf, ids=gdf.index.to_list(), silence_warnings=silence_warnings) elif sw == "rook": sw = Rook.from_dataframe(gdf, ids=gdf.index.to_list(), silence_warnings=silence_warnings) if strategy == "balanced": return pd.Series( _balanced(gdf, sw, balance=balance, min_colors=min_colors)) elif strategy in STRATEGIES: color = nx.greedy_color(sw.to_networkx(), strategy=strategy, interchange=interchange) color = pd.Series(color).sort_index() color.index = gdf.index return color else: raise ValueError("{} is not a valid strategy.".format(strategy))