Example #1
0
 def test_Alignment(self):
     self.df_buildings["orient"] = mm.Orientation(self.df_buildings).series
     sw = Queen.from_dataframe(self.df_tessellation, ids="uID")
     self.df_buildings["align_sw"] = mm.Alignment(
         self.df_buildings, sw, "uID", self.df_buildings["orient"]).series
     assert self.df_buildings["align_sw"][0] == pytest.approx(18.299481296)
     sw_drop = Queen.from_dataframe(self.df_tessellation[2:], ids="uID")
     assert (mm.Alignment(self.df_buildings, sw_drop, "uID",
                          self.df_buildings["orient"]).series.isna().any())
Example #2
0
 def test_MeanInterbuildingDistance(self):
     sw = Queen.from_dataframe(self.df_tessellation, ids="uID")
     self.df_buildings["m_dist"] = mm.MeanInterbuildingDistance(
         self.df_buildings, sw, "uID", order=3).series
     check = 29.305457092042744
     assert self.df_buildings["m_dist"][0] == pytest.approx(check)
     sw_drop = Queen.from_dataframe(self.df_tessellation[2:], ids="uID")
     assert (mm.MeanInterbuildingDistance(self.df_buildings, sw_drop,
                                          "uID").series.isna().any())
Example #3
0
    def test_NeighborDistance(self):
        sw = Queen.from_dataframe(self.df_tessellation, ids="uID")
        self.df_buildings["dist_sw"] = mm.NeighborDistance(
            self.df_buildings, sw, "uID").series
        check = 29.18589019096464
        assert self.df_buildings["dist_sw"][0] == check

        sw_drop = Queen.from_dataframe(self.df_tessellation[:-2], ids="uID")
        self.df_buildings["dist_sw"] = mm.NeighborDistance(
            self.df_buildings, sw_drop, "uID").series
        check = 29.18589019096464
        assert self.df_buildings["dist_sw"][0] == check
        assert self.df_buildings["dist_sw"].isna().any()
Example #4
0
 def test_Neighbors(self):
     sw = Queen.from_dataframe(self.df_tessellation, ids="uID")
     sw_drop = Queen.from_dataframe(self.df_tessellation[2:], ids="uID")
     self.df_tessellation["nei_sw"] = mm.Neighbors(
         self.df_tessellation, sw, "uID"
     ).series
     self.df_tessellation["nei_wei"] = mm.Neighbors(
         self.df_tessellation, sw, "uID", weighted=True
     ).series
     check = 5.180555555555555
     check_w = 0.029066398893536072
     assert self.df_tessellation["nei_sw"].mean() == check
     assert self.df_tessellation["nei_wei"].mean() == check_w
     assert mm.Neighbors(self.df_tessellation, sw_drop, "uID").series.isna().any()
Example #5
0
    def __init__(self, gdf, spatial_weights=None, mean=False, verbose=True):
        self.gdf = gdf

        if spatial_weights is None:
            print("Calculating spatial weights...") if verbose else None
            from libpysal.weights import Queen

            spatial_weights = Queen.from_dataframe(gdf, silence_warnings=True)
            print("Spatial weights ready...") if verbose else None
        self.sw = spatial_weights

        lenghts = gdf.geometry.length

        sums = []
        means = []
        for index in tqdm(gdf.index, total=gdf.shape[0], disable=not verbose):
            neighbours = [index]
            neighbours += spatial_weights.neighbors[index]

            dims = lenghts.iloc[neighbours]
            if mean:
                means.append(np.mean(dims))
            sums.append(sum(dims))

        self.series = self.sum = pd.Series(sums, index=gdf.index)
        if mean:
            self.mean = pd.Series(means, index=gdf.index)
Example #6
0
 def test_courtyards(self):
     courtyards = mm.courtyards(self.df_buildings, 'bID')
     sw = Queen.from_dataframe(self.df_buildings)
     courtyards_wm = mm.courtyards(self.df_buildings, 'bID', sw)
     check = 0.6805555555555556
     assert courtyards.mean() == check
     assert courtyards_wm.mean() == check
Example #7
0
    def __init__(self, gdf, spatial_weights=None):
        self.gdf = gdf

        if spatial_weights is None:
            print("Calculating spatial weights...")
            from libpysal.weights import Queen

            spatial_weights = Queen.from_dataframe(gdf, silence_warnings=True)
            print("Spatial weights ready...")
        self.sw = spatial_weights

        # dict to store walls for each uID
        walls = {}
        components = pd.Series(spatial_weights.component_labels,
                               index=range(len(gdf)))
        geom = gdf.geometry

        for i in tqdm(range(gdf.shape[0]), total=gdf.shape[0]):
            # if the id is already present in walls, continue (avoid repetition)
            if i in walls:
                continue
            else:
                comp = spatial_weights.component_labels[i]
                to_join = components[components == comp].index
                joined = geom.iloc[to_join]
                dissolved = joined.buffer(
                    0.01
                ).unary_union  # buffer to avoid multipolygons where buildings touch by corners only
                for b in to_join:
                    walls[b] = dissolved.exterior.length

        results_list = []
        for i in tqdm(range(gdf.shape[0]), total=gdf.shape[0]):
            results_list.append(walls[i])
        self.series = pd.Series(results_list, index=gdf.index)
Example #8
0
    def __init__(self, gdf, spatial_weights=None, mean=False):
        self.gdf = gdf
        self.mean = mean

        if spatial_weights is None:
            print("Calculating spatial weights...")
            from libpysal.weights import Queen

            spatial_weights = Queen.from_dataframe(gdf, silence_warnings=True)
            print("Spatial weights ready...")
        self.sw = spatial_weights

        results_list = []
        for index, row in tqdm(gdf.iterrows(), total=gdf.shape[0]):
            neighbours = spatial_weights.neighbors[index].copy()
            if neighbours:
                neighbours.append(index)
            else:
                neighbours = [index]

            dims = gdf.iloc[neighbours].geometry.length
            if mean:
                results_list.append(np.mean(dims))
            else:
                results_list.append(sum(dims))

        self.series = pd.Series(results_list, index=gdf.index)
Example #9
0
    def setup(self):

        test_file_path = mm.datasets.get_path("bubenec")
        self.df_buildings = gpd.read_file(test_file_path, layer="buildings")
        self.df_streets = gpd.read_file(test_file_path, layer="streets")
        self.df_tessellation = gpd.read_file(test_file_path,
                                             layer="tessellation")
        self.df_streets["nID"] = mm.unique_id(self.df_streets)
        self.df_buildings["height"] = np.linspace(10.0, 30.0, 144)
        self.df_tessellation["area"] = self.df_tessellation.geometry.area
        self.df_buildings["area"] = self.df_buildings.geometry.area
        self.df_buildings["fl_area"] = mm.FloorArea(self.df_buildings,
                                                    "height").series
        self.df_buildings["nID"] = mm.get_network_id(self.df_buildings,
                                                     self.df_streets, "nID")
        blocks = mm.Blocks(self.df_tessellation, self.df_streets,
                           self.df_buildings, "bID", "uID")
        self.blocks = blocks.blocks
        self.df_buildings["bID"] = blocks.buildings_id
        self.df_tessellation["bID"] = blocks.tessellation_id
        self.swb = Queen.from_dataframe(self.df_buildings)
        self.sw5 = mm.sw_high(k=5, gdf=self.df_tessellation, ids="uID")
        self.sw3 = mm.sw_high(k=3, gdf=self.df_tessellation, ids="uID")
        self.sws = mm.sw_high(k=2, gdf=self.df_streets)
        nx = mm.gdf_to_nx(self.df_streets)
        nx = mm.node_degree(nx)
        self.nodes, self.edges, W = mm.nx_to_gdf(nx, spatial_weights=True)
        self.swn = mm.sw_high(k=3, weights=W)
Example #10
0
 def test_Courtyards(self):
     courtyards = mm.Courtyards(self.df_buildings, "bID").series
     sw = Queen.from_dataframe(self.df_buildings)
     courtyards_wm = mm.Courtyards(self.df_buildings, self.df_buildings.bID,
                                   sw).series
     check = 0.6805555555555556
     assert courtyards.mean() == check
     assert courtyards_wm.mean() == check
Example #11
0
    def setup(self):

        test_file_path = mm.datasets.get_path("bubenec")
        self.df_buildings = gpd.read_file(test_file_path, layer="buildings")
        self.df_streets = gpd.read_file(test_file_path, layer="streets")
        self.df_tessellation = gpd.read_file(test_file_path, layer="tessellation")
        self.df_buildings["height"] = np.linspace(10.0, 30.0, 144)
        self.df_buildings["volume"] = mm.Volume(self.df_buildings, "height").series
        self.df_streets["nID"] = mm.unique_id(self.df_streets)
        self.df_buildings["nID"] = mm.get_network_id(
            self.df_buildings, self.df_streets, "nID"
        )
        self.df_buildings["orient"] = mm.Orientation(self.df_buildings).series
        self.df_tessellation["orient"] = mm.Orientation(self.df_tessellation).series
        self.sw = Queen.from_dataframe(self.df_tessellation, ids="uID")
        self.swh = mm.sw_high(k=3, gdf=self.df_tessellation, ids="uID")
        self.swb = Queen.from_dataframe(self.df_buildings, ids="uID")
Example #12
0
 def __init__(self):
     self.data_carto = read_carto('boston_housing')
     ## Renaming the geometry column from 'the_geom' to 'geometry' 
     ## (pysal expect the geometry column to be called 'geometry')
     self.data = self.data_carto.copy()
     self.data['geometry'] = self.data.geometry
     self.data.drop(['the_geom'],axis = 1, inplace = True)
     self.data = gpd.GeoDataFrame(self.data, geometry = 'geometry')
     self.w = Queen.from_dataframe(self.data)
Example #13
0
 def test_neighbour_distance(self):
     self.df_buildings['dist'] = mm.neighbour_distance(
         self.df_buildings, self.df_tessellation, 'uID')
     sw = Queen.from_dataframe(self.df_tessellation)
     self.df_buildings['dist_sw'] = mm.neighbour_distance(
         self.df_buildings, self.df_tessellation, 'uID', sw)
     check = 29.18589019096464
     assert self.df_buildings['dist'][0] == check
     assert self.df_buildings['dist_sw'][0] == check
Example #14
0
 def test_MeanInterbuildingDistance(self):
     sw = Queen.from_dataframe(self.df_tessellation, ids="uID")
     swh = mm.sw_high(k=3, gdf=self.df_tessellation, ids="uID")
     self.df_buildings["m_dist_sw"] = mm.MeanInterbuildingDistance(
         self.df_buildings, sw, "uID", swh).series
     self.df_buildings["m_dist"] = mm.MeanInterbuildingDistance(
         self.df_buildings, sw, "uID", order=3).series
     check = 29.305457092042744
     assert self.df_buildings["m_dist_sw"][0] == check
     assert self.df_buildings["m_dist"][0] == check
Example #15
0
 def test_mean_interbuilding_distance(self):
     self.df_buildings['m_dist'] = mm.mean_interbuilding_distance(
         self.df_buildings, self.df_tessellation, 'uID')
     sw = Queen.from_dataframe(self.df_tessellation)
     swh = mm.Queen_higher(k=3, geodataframe=self.df_tessellation)
     self.df_buildings['m_dist_sw'] = mm.mean_interbuilding_distance(
         self.df_buildings, self.df_tessellation, 'uID', sw, swh)
     check = 29.305457092042744
     assert self.df_buildings['m_dist'][0] == check
     assert self.df_buildings['m_dist_sw'][0] == check
Example #16
0
 def test_alignment(self):
     self.df_buildings['orient'] = mm.orientation(self.df_buildings)
     self.df_buildings['align'] = mm.alignment(self.df_buildings, 'orient',
                                               self.df_tessellation, 'uID')
     sw = Queen.from_dataframe(self.df_tessellation)
     self.df_buildings['align_sw'] = mm.alignment(self.df_buildings,
                                                  'orient',
                                                  self.df_tessellation,
                                                  'uID', sw)
     check = 18.299481296455237
     assert self.df_buildings['align'][0] == check
     assert self.df_buildings['align_sw'][0] == check
Example #17
0
 def test_building_adjacency(self):
     self.df_buildings['adj'] = mm.building_adjacency(
         self.df_buildings, self.df_tessellation)
     sw = Queen.from_dataframe(self.df_buildings)
     swh = mm.Queen_higher(k=3, geodataframe=self.df_tessellation)
     self.df_buildings['adj_sw'] = mm.building_adjacency(
         self.df_buildings,
         self.df_tessellation,
         spatial_weights=sw,
         spatial_weights_higher=swh)
     check = 0.2613824113909074
     assert self.df_buildings['adj'].mean() == check
     assert self.df_buildings['adj_sw'].mean() == check
Example #18
0
 def test_neighbours(self):
     self.df_tessellation['nei'] = mm.neighbours(self.df_tessellation)
     sw = Queen.from_dataframe(self.df_tessellation)
     self.df_tessellation['nei_sw'] = mm.neighbours(self.df_tessellation,
                                                    sw)
     self.df_tessellation['nei_wei'] = mm.neighbours(self.df_tessellation,
                                                     sw,
                                                     weighted=True)
     check = 5.180555555555555
     check_w = 0.029066398893536072
     assert self.df_tessellation['nei'].mean() == check
     assert self.df_tessellation['nei_sw'].mean() == check
     assert self.df_tessellation['nei_wei'].mean() == check_w
Example #19
0
 def test_BuildingAdjacencyy(self):
     sw = Queen.from_dataframe(self.df_buildings, ids="uID")
     swh = mm.sw_high(k=3, gdf=self.df_tessellation, ids="uID")
     self.df_buildings["adj_sw"] = mm.BuildingAdjacency(
         self.df_buildings,
         spatial_weights=sw,
         unique_id="uID",
         spatial_weights_higher=swh,
     ).series
     self.df_buildings["adj_sw_none"] = mm.BuildingAdjacency(
         self.df_buildings, unique_id="uID",
         spatial_weights_higher=swh).series
     check = 0.2613824113909074
     assert self.df_buildings["adj_sw"].mean() == check
     assert self.df_buildings["adj_sw_none"].mean() == check
Example #20
0
    def __init__(self,
                 gdf,
                 spatial_weights_higher,
                 unique_id,
                 spatial_weights=None,
                 verbose=True):
        self.gdf = gdf
        self.sw_higher = spatial_weights_higher
        self.id = gdf[unique_id]
        results_list = []

        # if weights matrix is not passed, generate it from gdf
        if spatial_weights is None:
            print("Calculating spatial weights...") if verbose else None
            from libpysal.weights import Queen

            spatial_weights = Queen.from_dataframe(gdf,
                                                   silence_warnings=True,
                                                   ids=unique_id)
            print("Spatial weights ready...") if verbose else None

        self.sw = spatial_weights
        patches = dict(zip(gdf[unique_id], spatial_weights.component_labels))

        for uid in tqdm(
                self.id,
                total=gdf.shape[0],
                disable=not verbose,
                desc="Calculating adjacency",
        ):
            if uid in spatial_weights_higher.neighbors.keys():
                neighbours = spatial_weights_higher.neighbors[uid].copy()
                if neighbours:
                    neighbours.append(uid)

                    patches_sub = [patches[x] for x in neighbours]
                    patches_nr = len(set(patches_sub))

                    results_list.append(patches_nr / len(neighbours))
                else:
                    results_list.append(np.nan)
            else:
                results_list.append(np.nan)

        self.series = pd.Series(results_list, index=gdf.index)
Example #21
0
    def __init__(self, gdf, block_id, spatial_weights=None):
        self.gdf = gdf

        results_list = []
        gdf = gdf.copy()

        if not isinstance(block_id, str):
            gdf["mm_bid"] = block_id
            block_id = "mm_bid"

        self.block_id = gdf[block_id]
        # if weights matrix is not passed, generate it from objects
        if spatial_weights is None:
            print("Calculating spatial weights...")
            from libpysal.weights import Queen

            spatial_weights = Queen.from_dataframe(gdf, silence_warnings=True)

        self.sw = spatial_weights
        # dict to store nr of courtyards for each uID
        courtyards = {}
        components = pd.Series(spatial_weights.component_labels,
                               index=gdf.index)
        for index in tqdm(gdf.index, total=gdf.shape[0]):
            # if the id is already present in courtyards, continue (avoid repetition)
            if index in courtyards:
                continue
            else:
                comp = spatial_weights.component_labels[index]
                to_join = components[components == comp].index
                joined = gdf.loc[to_join]
                dissolved = joined.geometry.buffer(
                    0.01
                ).unary_union  # buffer to avoid multipolygons where buildings touch by corners only
                try:
                    interiors = len(list(dissolved.interiors))
                except (ValueError):
                    print("Something unexpected happened.")
                for b in to_join:
                    courtyards[b] = interiors  # fill dict with values
        # copy values from dict to gdf
        for index, row in tqdm(gdf.iterrows(), total=gdf.shape[0]):
            results_list.append(courtyards[index])

        self.series = pd.Series(results_list, index=gdf.index)
Example #22
0
    def __init__(self, gdf, block_id=None, spatial_weights=None, verbose=True):
        if block_id is not None:
            warnings.warn(
                "block_id is deprecated and will be removed in v0.4.", FutureWarning,
            )
        self.gdf = gdf

        results_list = []
        gdf = gdf.copy()

        # if weights matrix is not passed, generate it from objects
        if spatial_weights is None:
            print("Calculating spatial weights...") if verbose else None
            from libpysal.weights import Queen

            spatial_weights = Queen.from_dataframe(gdf, silence_warnings=True)

        self.sw = spatial_weights
        # dict to store nr of courtyards for each uID
        courtyards = {}
        components = pd.Series(spatial_weights.component_labels, index=gdf.index)
        for i, index in tqdm(
            enumerate(gdf.index), total=gdf.shape[0], disable=not verbose
        ):
            # if the id is already present in courtyards, continue (avoid repetition)
            if index in courtyards:
                continue
            else:
                comp = spatial_weights.component_labels[i]
                to_join = components[components == comp].index
                joined = gdf.loc[to_join]
                dissolved = joined.geometry.buffer(
                    0.01
                ).unary_union  # buffer to avoid multipolygons where buildings touch by corners only
                try:
                    interiors = len(list(dissolved.interiors))
                except (ValueError):
                    print("Something unexpected happened.")
                for b in to_join:
                    courtyards[b] = interiors  # fill dict with values

        results_list = [courtyards[index] for index in gdf.index]

        self.series = pd.Series(results_list, index=gdf.index)
Example #23
0
    def __init__(self,
                 gdf,
                 spatial_weights_higher,
                 unique_id,
                 spatial_weights=None):
        self.gdf = gdf
        self.sw_higher = spatial_weights_higher
        self.id = gdf[unique_id]
        results_list = []

        # if weights matrix is not passed, generate it from gdf
        if spatial_weights is None:
            print("Calculating spatial weights...")
            from libpysal.weights import Queen

            spatial_weights = Queen.from_dataframe(gdf,
                                                   silence_warnings=True,
                                                   ids=unique_id)
            print("Spatial weights ready...")

        self.sw = spatial_weights
        patches = dict(zip(gdf[unique_id], spatial_weights.component_labels))

        print("Calculating adjacency...")
        for index, row in tqdm(gdf.iterrows(), total=gdf.shape[0]):
            neighbours = spatial_weights_higher.neighbors[
                row[unique_id]].copy()
            if neighbours:
                neighbours.append(row[unique_id])

                patches_sub = [patches[x] for x in neighbours]
                patches_nr = len(set(patches_sub))

                results_list.append(patches_nr / len(neighbours))
            else:
                results_list.append(0)

        self.series = pd.Series(results_list, index=gdf.index)
Example #24
0
import sys

import geopandas as gpd
from libpysal.weights import Queen

from ..greedy import greedy
import pytest


world = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))
sw = Queen.from_dataframe(world, ids=world.index.to_list(), silence_warnings=True)


def test_default():
    colors = greedy(world)
    assert len(colors) == len(world)
    assert set(colors) == set([0, 1, 2, 3, 4])
    assert colors.value_counts().to_list() == [36, 36, 35, 35, 35]
    assert (colors.index == world.index).all()


@pytest.mark.parametrize("pysal_geos", [None, 0])
def test_count(pysal_geos):
    colors = greedy(
        world, strategy="balanced", balance="count", min_distance=pysal_geos
    )
    assert len(colors) == len(world)
    assert set(colors) == set([0, 1, 2, 3, 4])
    assert colors.value_counts().to_list() == [36, 36, 35, 35, 35]

def _spatial_information_theory(data,
                                group_pop_var,
                                total_pop_var,
                                w=None,
                                unit_in_local_env=True,
                                original_crs={'init': 'epsg:4326'}):
    """
    Calculation of Spatial Information Theory index

    Parameters
    ----------

    data              : a geopandas DataFrame with a geometry column.
    
    group_pop_var     : string
                        The name of variable in data that contains the population size of the group of interest
                    
    total_pop_var     : string
                        The name of variable in data that contains the total population of the unit
                    
    w                 : W
                        A PySAL weights object. If not provided, Queen contiguity matrix is used.
                        This is used to construct the local environment around each spatial unit.
    
    unit_in_local_env : boolean
                        A condition argument that states if the local environment around the unit comprises the unit itself. Default is True.
                        
    original_crs      : the original crs code given by a dict of data, but this is later be projected for the Mercator projection (EPSG = 3395).
                        This argument is also to avoid passing data without crs and, therefore, raising unusual results.
                        This index rely on the population density and we consider the area using squared kilometers. 

    Attributes
    ----------

    statistic : float
                Spatial Information Theory Index
                
    core_data : a geopandas DataFrame
                A geopandas DataFrame that contains the columns used to perform the estimate.
                
    Notes
    -----
    Based on Reardon, Sean F., and David O’Sullivan. "Measures of spatial segregation." Sociological methodology 34.1 (2004): 121-162.
    
    This measure can be extended to a society with more than two groups.

    """
    if (str(type(data)) != '<class \'geopandas.geodataframe.GeoDataFrame\'>'):
        raise TypeError(
            'data is not a GeoDataFrame and, therefore, this index cannot be calculated.'
        )

    if ((type(group_pop_var) is not str) or (type(total_pop_var) is not str)):
        raise TypeError('group_pop_var and total_pop_var must be strings')

    if ((group_pop_var not in data.columns)
            or (total_pop_var not in data.columns)):
        raise ValueError(
            'group_pop_var and total_pop_var must be variables of data')

    if ('geometry' not in data.columns):
        data['geometry'] = data[data._geometry_column_name]
        data = data.drop([data._geometry_column_name], axis=1)
        data = data.set_geometry('geometry')

    if w is None:
        w_object = Queen.from_dataframe(data)
    else:
        w_object = w

    if (not issubclass(type(w_object), libpysal.weights.W)):
        raise TypeError('w is not a PySAL weights object')

    data = data.rename(columns={
        group_pop_var: 'group_pop_var',
        total_pop_var: 'total_pop_var'
    })

    data['compl_pop_var'] = data['total_pop_var'] - data['group_pop_var']

    # In this case, M = 2 according to Reardon, Sean F., and David O’Sullivan. "Measures of spatial segregation." Sociological methodology 34.1 (2004): 121-162.
    pi_1 = data['group_pop_var'].sum() / data['total_pop_var'].sum()
    pi_2 = data['compl_pop_var'].sum() / data['total_pop_var'].sum()
    E = -1 * (pi_1 * math.log(pi_1, 2) + pi_2 * math.log(pi_2, 2))
    T = data['total_pop_var'].sum()

    # Here you reproject the data using the Mercator projection
    data.crs = original_crs
    data = data.to_crs(crs={'init': 'epsg:3395'})  # Mercator
    sqm_to_sqkm = 10**6
    data['area_sq_km'] = data.area / sqm_to_sqkm
    tau_p = data['total_pop_var'] / data['area_sq_km']

    w_matrix = w_object.full()[0]

    if unit_in_local_env:
        np.fill_diagonal(w_matrix, 1)

    # The local context of each spatial unit is given by the aggregate context (this multiplication gives the local sum of each population)
    data['local_group_pop_var'] = np.matmul(data['group_pop_var'], w_matrix)
    data['local_compl_pop_var'] = np.matmul(data['compl_pop_var'], w_matrix)
    data['local_total_pop_var'] = np.matmul(data['total_pop_var'], w_matrix)

    pi_tilde_p_1 = np.array(data['local_group_pop_var'] /
                            data['local_total_pop_var'])
    pi_tilde_p_2 = np.array(data['local_compl_pop_var'] /
                            data['local_total_pop_var'])

    E_tilde_p = -1 * (pi_tilde_p_1 * np.log(pi_tilde_p_1) / np.log(2) +
                      pi_tilde_p_2 * np.log(pi_tilde_p_2) / np.log(2))

    SIT = 1 - 1 / (T * E) * (tau_p * E_tilde_p).sum(
    )  # This is the H_Tilde according to Reardon, Sean F., and David O’Sullivan. "Measures of spatial segregation." Sociological methodology 34.1 (2004): 121-162.

    core_data = data[['group_pop_var', 'total_pop_var', 'geometry']]

    return SIT, core_data
def _spatial_prox_profile(data, group_pop_var, total_pop_var, m=1000):
    """
    Calculation of Spatial Proximity Profile

    Parameters
    ----------

    data          : a geopandas DataFrame with a geometry column.
    
    group_pop_var : string
                    The name of variable in data that contains the population size of the group of interest
                    
    total_pop_var : string
                    The name of variable in data that contains the total population of the unit
                    
    m             : int
                    a numeric value indicating the number of thresholds to be used. Default value is 1000. 
                    A large value of m creates a smoother-looking graph and a more precise spatial proximity profile value but slows down the calculation speed.

    Attributes
    ----------

    statistic : float
                Spatial Proximity Index
                
    core_data : a geopandas DataFrame
                A geopandas DataFrame that contains the columns used to perform the estimate.

    Notes
    -----
    Based on Hong, Seong-Yun, and Yukio Sadahiro. "Measuring geographic segregation: a graph-based approach." Journal of Geographical Systems 16.2 (2014): 211-231.

    """

    if (str(type(data)) != '<class \'geopandas.geodataframe.GeoDataFrame\'>'):
        raise TypeError(
            'data is not a GeoDataFrame and, therefore, this index cannot be calculated.'
        )

    if ('geometry' not in data.columns):
        data['geometry'] = data[data._geometry_column_name]
        data = data.drop([data._geometry_column_name], axis=1)
        data = data.set_geometry('geometry')

    if (type(m) is not int):
        raise TypeError('m must be a string.')

    if (m < 2):
        raise ValueError('m must be greater than 1.')

    if ((type(group_pop_var) is not str) or (type(total_pop_var) is not str)):
        raise TypeError('group_pop_var and total_pop_var must be strings')

    if ((group_pop_var not in data.columns)
            or (total_pop_var not in data.columns)):
        raise ValueError(
            'group_pop_var and total_pop_var must be variables of data')

    data = data.rename(columns={
        group_pop_var: 'group_pop_var',
        total_pop_var: 'total_pop_var'
    })

    if any(data.total_pop_var < data.group_pop_var):
        raise ValueError(
            'Group of interest population must equal or lower than the total population of the units.'
        )

    wij = Queen.from_dataframe(data).full()[0]
    delta = manhattan_distances(wij)

    def calculate_etat(t):
        g_t_i = np.where(data.group_pop_var / data.total_pop_var >= t, True,
                         False)
        k = g_t_i.sum()
        sub_delta_ij = delta[g_t_i, :][:, g_t_i]
        den = sub_delta_ij.sum()
        eta_t = (k**2 - k) / den
        return eta_t

    grid = np.linspace(0, 1, m)
    aux = np.array(list(map(calculate_etat, grid)))
    aux[aux == inf] = 0
    aux[aux == -inf] = 0
    curve = np.nan_to_num(aux, 0)

    threshold = data.group_pop_var.sum() / data.total_pop_var.sum()
    SPP = ((threshold - ((curve[grid < threshold]).sum() / m -
                         (curve[grid >= threshold]).sum() / m)) /
           (1 - threshold))

    core_data = data[['group_pop_var', 'total_pop_var', 'geometry']]

    return SPP, grid, curve, core_data
Example #27
0
 def test_Alignment(self):
     self.df_buildings["orient"] = mm.Orientation(self.df_buildings).series
     sw = Queen.from_dataframe(self.df_tessellation, ids="uID")
     self.df_buildings["align_sw"] = mm.Alignment(
         self.df_buildings, sw, "uID", self.df_buildings["orient"]).series
     assert self.df_buildings["align_sw"][0] == 18.299481296455237
Example #28
0
 def __init__(self):
     self.data = gpd.GeoDataFrame(Dataset('boston_housing').download(decode_geom=True))# gpd.read_file(self.filename)
     self.data.crs = {'init': 'epsg:4326'}
     self.w = Queen.from_dataframe(self.data)
Example #29
0
def _spatial_dissim(data,
                    group_pop_var,
                    total_pop_var,
                    w=None,
                    standardize=False):
    """Calculate of Spatial Dissimilarity index.

    Parameters
    ----------
    data : a geopandas DataFrame with a geometry column.
    group_pop_var : string
        The name of variable in data that contains the population size of the group of interest
    total_pop_var : string
        The name of variable in data that contains the total population of the unit
    w : W
        A PySAL weights object. If not provided, Queen contiguity matrix is used.
    standardize  : boolean
        A condition for row standardisation of the weights matrices. If True, the values of cij in the formulas gets row standardized.
        For the sake of comparison, the seg R package of Hong, Seong-Yun, David O'Sullivan, and Yukio Sadahiro. "Implementing spatial segregation measures in R." PloS one 9.11 (2014): e113767.
        works by default with row standardization.

    Returns
    ----------
    statistic : float
        Spatial Dissimilarity Index
    core_data : a geopandas DataFrame
        A geopandas DataFrame that contains the columns used to perform the estimate.

    Notes
    -----
    Based on Morrill, R. L. (1991) "On the Measure of Geographic Segregation". Geography Research Forum.

    Reference: :cite:`morrill1991measure`.

    """
    if type(standardize) is not bool:
        raise TypeError("std is not a boolean object")

    if w is None:
        w_object = Queen.from_dataframe(data)
    else:
        w_object = w

    if not issubclass(type(w_object), libpysal.weights.W):
        raise TypeError("w is not a PySAL weights object")

    D = _dissim(data, group_pop_var, total_pop_var)[0]

    x = np.array(data[group_pop_var])
    t = np.array(data[total_pop_var])

    # If a unit has zero population, the group of interest frequency is zero
    pi = np.where(t == 0, 0, x / t)

    if not standardize:
        cij = w_object.sparse.toarray()
    else:
        cij = w_object.sparse.toarray()
        cij = cij / cij.sum(axis=1).reshape((cij.shape[0], 1))

    # Inspired in (second solution): https://stackoverflow.com/questions/22720864/efficiently-calculating-a-euclidean-distance-matrix-using-numpy
    # Distance Matrix
    abs_dist = abs(pi[..., np.newaxis] - pi)

    # manhattan_distances used to compute absolute distances
    num = np.multiply(abs_dist, cij).sum()
    den = cij.sum()
    SD = D - num / den
    SD

    core_data = data[[group_pop_var, total_pop_var, data.geometry.name]]

    return SD, core_data
Example #30
0
def greedy(
    gdf,
    strategy="balanced",
    balance="count",
    min_colors=4,
    sw="queen",
    min_distance=None,
    silence_warnings=True,
    interchange=False,
):
    """
    Color GeoDataFrame using various strategies of greedy (topological) colouring.

    Attempts to color a GeoDataFrame using as few colors as possible, where no
    neighbours can have same color as the feature itself. Offers various strategies
    ported from QGIS or implemented within networkX for greedy graph coloring.

    ``greedy`` will return pandas.Series representing assinged color codes.

    Parameters
    ----------
    gdf : GeoDataFrame
        GeoDataFrame
    strategy : str (default 'balanced')
        Determine coloring strategy. Options are ``'balanced'`` for algorithm based on
        QGIS Topological coloring. It is aiming for a visual balance, defined by the
        balance parameter.

        Other options are those supported by networkx.greedy_color:

        * ``'largest_first'``
        * ``'random_sequential'``
        * ``'smallest_last'``
        * ``'independent_set'``
        * ``'connected_sequential_bfs'``
        * ``'connected_sequential_dfs'``
        * ``'connected_sequential'`` (alias for the previous strategy)
        * ``'saturation_largest_first'``
        * ``'DSATUR'`` (alias for the previous strategy)

        For details see
        https://networkx.github.io/documentation/stable/reference/algorithms/generated/networkx.algorithms.coloring.greedy_color.html

    balance : str (default 'count')
        If strategy is ``'balanced'``, determine the method of color balancing.

        * ``'count'`` attempts to balance the number of features per each color.
        * ``'area'`` attempts to balance the area covered by each color.
        * ``'centroid'`` attempts to balance the distance between colors based on the distance between centroids.
        * ``'distance'`` attempts to balance the distance between colors based on the distance between geometries. Slower than ``'centroid'``, but more precise.

        ``'centroid'`` and ``'distance'`` are significantly slower than other especially
        for larger GeoDataFrames.

        Apart from ``'count'``, all require CRS to be projected (not in degrees) to ensure
        metric values are correct.

    min_colors: int (default 4)
        If strategy is ``'balanced'``, define the minimal number of colors to be used.

    sw : 'queen', 'rook' or libpysal.weights.W (default 'queen')
        If min_distance is None, one can pass ``'libpysal.weights.W'`` object denoting neighbors
        or let greedy to generate one based on ``'queen'`` or ``'rook'`` contiguity.

    min_distance : float
        Set minimal distance between colors.

        If min_distance is not None, slower algorithm for generating spatial weghts is used
        based on intersection between geometries. Min_distance is then used as a tolerance
        of intersection.

    silence_warnings : bool (default True)
        Silence libpysal warnings when creating spatial weights.

    interchange : bool (defaul False)
        Use the color interchange algorithm (applicable for networkx strategies)

        For details see
        https://networkx.github.io/documentation/stable/reference/algorithms/generated/networkx.algorithms.coloring.greedy_color.html

    Examples
    --------
    Default:

    >>> gdf['greedy_colors'] = greedy(gdf)

    Balanced by area:

    >>> gdf['balanced_area'] = greedy(gdf, strategy='balanced',
    >>>                               balance='area')

    Using rook adjacency:

    >>> gdf['rook_adjacency'] = greedy(gdf, sw='rook')

    Adding minimal distance between colors:

    >>> gdf['min_distance'] = greedy(gdf, min_distance=100)

    Using different coloring strategy:

    >>> gdf['smallest_last'] = greedy(gdf, strategy='smallest_last')


    Returns
    -------
    color : pd.Series
        pandas.Series representing assinged color codes
    """
    if min_distance is not None:
        sw = _geos_sw(gdf,
                      tolerance=min_distance,
                      silence_warnings=silence_warnings)

    if not isinstance(sw, W):
        if sw == "queen":
            sw = Queen.from_dataframe(gdf,
                                      ids=gdf.index.to_list(),
                                      silence_warnings=silence_warnings)
        elif sw == "rook":
            sw = Rook.from_dataframe(gdf,
                                     ids=gdf.index.to_list(),
                                     silence_warnings=silence_warnings)

    if strategy == "balanced":
        return pd.Series(
            _balanced(gdf, sw, balance=balance, min_colors=min_colors))

    elif strategy in STRATEGIES:
        color = nx.greedy_color(sw.to_networkx(),
                                strategy=strategy,
                                interchange=interchange)
        color = pd.Series(color).sort_index()
        color.index = gdf.index
        return color

    else:
        raise ValueError("{} is not a valid strategy.".format(strategy))