예제 #1
0
 def test_cutoff(self):
     cc = np.array([0.8, 0.9, 1, 1.2])
     sm = Spatial_Markov(self.rpci, self.w, cutoffs=cc, lag_cutoffs=cc)
     P = np.array(
         [[[0.96703297, 0.03296703, 0., 0., 0.],
           [0.10638298, 0.68085106, 0.21276596, 0., 0.],
           [0.,
            0.14285714, 0.7755102, 0.08163265, 0.], [0., 0., 0.5, 0.5, 0.],
           [0., 0., 0., 0., 0.]],
          [[0.88636364, 0.10606061, 0.00757576, 0., 0.],
           [0.04402516, 0.89308176, 0.06289308, 0., 0.],
           [0., 0.05882353, 0.8627451, 0.07843137, 0.],
           [0., 0., 0.13846154, 0.86153846, 0.], [0., 0., 0., 0., 1.]],
          [[0.78082192, 0.17808219, 0.02739726, 0.01369863, 0.],
           [0.03488372, 0.90406977, 0.05813953, 0.00290698, 0.],
           [0., 0.05919003, 0.84735202, 0.09034268, 0.00311526],
           [0., 0., 0.05811623, 0.92985972, 0.01202405],
           [0., 0., 0., 0.14285714, 0.85714286]],
          [[0.82692308, 0.15384615, 0., 0.01923077, 0.],
           [0.0703125, 0.7890625, 0.125, 0.015625, 0.],
           [0.00295858, 0.06213018, 0.82248521, 0.10946746, 0.00295858],
           [0., 0.00185529, 0.07606679, 0.88497217, 0.03710575],
           [0., 0., 0., 0.07803468, 0.92196532]],
          [[0., 0., 0., 0., 0.], [0., 0., 0., 0., 0.],
           [0., 0.06666667, 0.9, 0.03333333, 0.],
           [0., 0., 0.05660377, 0.90566038, 0.03773585],
           [0., 0., 0., 0.03932584, 0.96067416]]])
     np.testing.assert_array_almost_equal(P, sm.P)
예제 #2
0
    def __init__(self,
                 dataset,
                 w_type,
                 w_kwds=None,
                 permutations=0,
                 cluster_type=None):

        y = dataset.census.copy().reset_index()
        y = y[['geoid', 'year', cluster_type]]
        y = y.groupby(['geoid', 'year']).first().unstack()
        y = y.dropna()

        tracts = dataset.tracts.copy().merge(
            y.reset_index(), on='geoid', how='right')
        w_dict = {'rook': Rook, 'queen': Queen, 'knn': KNN, 'kernel': Kernel}
        w = w_dict[w_type].from_dataframe(tracts)
        y = y.astype(int)

        sm = Spatial_Markov(
            y,
            w,
            permutations=permutations,
            discrete=True,
            variable_name=cluster_type)
        self.p = sm.p
        self.transitions = sm.transitions
        self.P = sm.P
        self.T = sm.T
        self.summary = sm.summary
        self.cluster_type = cluster_type
        # keep the spatial markov instance here in case that users want to
        # estimate steady state distribution etc
        self.sm = sm
예제 #3
0
 def test___init__(self):
     sm = Spatial_Markov(self.rpci, self.w, fixed=True, k=5, m=5)
     S = np.array(
         [[0.43509425, 0.2635327, 0.20363044, 0.06841983, 0.02932278],
          [0.13391287, 0.33993305, 0.25153036, 0.23343016, 0.04119356],
          [0.12124869, 0.21137444, 0.2635101, 0.29013417, 0.1137326],
          [0.0776413, 0.19748806, 0.25352636, 0.22480415, 0.24654013],
          [0.01776781, 0.19964349, 0.19009833, 0.25524697, 0.3372434]])
     np.testing.assert_array_almost_equal(S, sm.S)
예제 #4
0
    def test_discretized(self):
        w = ps.weights.Contiguity.Queen.from_shapefile(
            ps.examples.get_path('us48.shp'))
        np.random.seed(24788)
        sm = Spatial_Markov(self.discretized, w, discrete=True)
        answer = np.array([[[92., 88., 75., 95.], [50., 55., 52., 35.],
                            [45., 48., 58., 48.], [45., 32., 39., 51.]],
                           [[54., 43., 40., 51.], [92., 97., 91., 89.],
                            [44., 49., 56., 55.], [40., 35., 75., 50.]],
                           [[67., 51., 43., 58.], [41., 58., 56., 35.],
                            [86., 88., 140., 89.], [42., 56., 61., 73.]],
                           [[56., 51., 39., 38.], [50., 49., 50., 45.],
                            [41., 61., 55., 46.], [93., 77., 87., 89.]]])

        np.testing.assert_array_equal(sm.T, answer)
예제 #5
0
 def test_chi2(self):
     f = ps.open(ps.examples.get_path('usjoin.csv'))
     pci = np.array([f.by_col[str(y)] for y in range(1929, 2010)])
     pci = pci.transpose()
     rpci = pci / (pci.mean(axis=0))
     w = ps.open(ps.examples.get_path("states48.gal")).read()
     w.transform = 'r'
     sm = Spatial_Markov(rpci, w, fixed=True, k=5, m=5)
     chi = np.array([[4.05598541e+01, 6.44644317e-04, 1.60000000e+01],
                     [5.54751974e+01, 2.97033748e-06, 1.60000000e+01],
                     [1.77528996e+01, 3.38563882e-01, 1.60000000e+01],
                     [4.00390961e+01, 7.68422046e-04, 1.60000000e+01],
                     [4.67966803e+01, 7.32512065e-05, 1.60000000e+01]])
     obs = np.array(sm.chi2)
     np.testing.assert_array_almost_equal(obs, chi)
     obs = np.array([[4.61209613e+02, 0.00000000e+00, 4.00000000e+00],
                     [1.48140694e+02, 0.00000000e+00, 4.00000000e+00],
                     [6.33129261e+01, 5.83089133e-13, 4.00000000e+00],
                     [7.22778509e+01, 7.54951657e-15, 4.00000000e+00],
                     [2.32659201e+02, 0.00000000e+00, 4.00000000e+00]])
     np.testing.assert_array_almost_equal(obs, np.array(sm.shtest))
예제 #6
0
def transition(gdf,
               cluster_col,
               time_var="year",
               id_var="geoid",
               w_type=None,
               permutations=0):
    """
    (Spatial) Markov approach to transitional dynamics of neighborhoods.

    Parameters
    ----------
    gdf             : (geo)DataFrame
                      Long-form (geo)DataFrame containing neighborhood
                      attributes with a column defining neighborhood clusters.
    cluster_col     : string or int
                      Column name for the neighborhood segmentation, such as
                      "ward", "kmeans", etc.
    time_var        : string, optional
                      Column defining time and or sequencing of the long-form data.
                      Default is "year".
    id_var          : string, optional
                      Column identifying the unique id of spatial units.
                      Default is "geoid".
    w_type          : string, optional
                      Type of spatial weights type ("rook", "queen", "knn" or
                      "kernel") to be used for spatial structure. Default is
                      None, if non-spatial Markov transition rates are desired.
    permutations    : int, optional
                      number of permutations for use in randomization based
                      inference (the default is 0).

    Return
    ------
    mar             : object
                      if w_type=None, return a giddy.markov.Markov instance;
                      if w_type is given, return a
                      giddy.markov.Spatial_Markov instance.

    Examples
    --------
    >>> from geosnap.data import Community
    >>> columbus = Community.from_ltdb(msa_fips=columbusfips)
    >>> columbus1 = columbus.cluster(columns=['median_household_income',
    ... 'p_poverty_rate', 'p_edu_college_greater', 'p_unemployment_rate'],
    ... method='ward', n_clusters=6)
    >>> gdf = columbus1.gdf
    >>> a = transition(gdf, "ward", w_type="rook")
    >>> a.p
    array([[0.79189189, 0.00540541, 0.0027027 , 0.13243243, 0.06216216,
        0.00540541],
       [0.0203252 , 0.75609756, 0.10569106, 0.11382114, 0.        ,
        0.00406504],
       [0.00917431, 0.20183486, 0.75229358, 0.01834862, 0.        ,
        0.01834862],
       [0.1959799 , 0.18341709, 0.00251256, 0.61809045, 0.        ,
        0.        ],
       [0.32307692, 0.        , 0.        , 0.        , 0.66153846,
        0.01538462],
       [0.09375   , 0.0625    , 0.        , 0.        , 0.        ,
        0.84375   ]])
    >>> a.P[0]
    array([[0.82119205, 0.        , 0.        , 0.10927152, 0.06622517,
        0.00331126],
       [0.14285714, 0.57142857, 0.14285714, 0.14285714, 0.        ,
        0.        ],
       [0.5       , 0.        , 0.5       , 0.        , 0.        ,
        0.        ],
       [0.21428571, 0.14285714, 0.        , 0.64285714, 0.        ,
        0.        ],
       [0.18918919, 0.        , 0.        , 0.        , 0.78378378,
        0.02702703],
       [0.28571429, 0.        , 0.        , 0.        , 0.        ,
        0.71428571]])
    """

    gdf_temp = gdf.copy().reset_index()
    df = gdf_temp[[id_var, time_var, cluster_col]]
    df_wide = (df.pivot(index=id_var, columns=time_var,
                        values=cluster_col).dropna().astype("int"))
    y = df_wide.values
    if w_type is None:
        mar = Markov(y)  # class markov modeling
    else:
        gdf_one = gdf_temp.drop_duplicates([id_var])
        gdf_wide = df_wide.merge(gdf_one, left_index=True, right_on=id_var)
        w_dict = {"rook": Rook, "queen": Queen, "knn": KNN, "kernel": Kernel}
        w = w_dict[w_type].from_dataframe(gdf_wide)
        w.transform = "r"
        mar = Spatial_Markov(y,
                             w,
                             permutations=permutations,
                             discrete=True,
                             variable_name=cluster_col)
    return mar
예제 #7
0
def transition(
    gdf,
    cluster_col,
    temporal_index="year",
    unit_index="geoid",
    w_type="rook",
    w_options=None,
    permutations=0,
):
    """
    (Spatial) Markov approach to transitional dynamics of neighborhoods.

    Parameters
    ----------
    gdf : geopandas.GeoDataFrame or pandas.DataFrame
        Long-form geopandas.GeoDataFrame or pandas.DataFrame containing neighborhood
        attributes with a column defining neighborhood clusters.
    cluster_col : string or int
        Column name for the neighborhood segmentation, such as
        "ward", "kmeans", etc.
    temporal_index : string, optional
        Column defining time and or sequencing of the long-form data.
        Default is "year".
    unit_index : string, optional
        Column identifying the unique id of spatial units.
        Default is "geoid".
    w_type : string, optional
        Type of spatial weights type ("rook", "queen", "knn" or
        "kernel") to be used for spatial structure. Default is
        None, if non-spatial Markov transition rates are desired.
    w_options : dict
        additional options passed to a libpysal weights constructor
        (e.g. `k` for a KNN weights matrix)
    permutations : int, optional
        number of permutations for use in randomization based
        inference (the default is 0).

    Returns
    --------
    mar : giddy.markov.Markov instance or giddy.markov.Spatial_Markov
        if w_type=None, a classic Markov instance is returned. 
        if w_type is given, a Spatial_Markov instance is returned.

    Examples
    --------
    >>> from geosnap import Community
    >>> columbus = Community.from_ltdb(msa_fips="18140")
    >>> columbus1 = columbus.cluster(columns=['median_household_income',
    ... 'p_poverty_rate', 'p_edu_college_greater', 'p_unemployment_rate'],
    ... method='ward', n_clusters=6)
    >>> gdf = columbus1.gdf
    >>> a = transition(gdf, "ward", w_type="rook")
    >>> a.p
    array([[0.79189189, 0.00540541, 0.0027027 , 0.13243243, 0.06216216,
        0.00540541],
       [0.0203252 , 0.75609756, 0.10569106, 0.11382114, 0.        ,
        0.00406504],
       [0.00917431, 0.20183486, 0.75229358, 0.01834862, 0.        ,
        0.01834862],
       [0.1959799 , 0.18341709, 0.00251256, 0.61809045, 0.        ,
        0.        ],
       [0.32307692, 0.        , 0.        , 0.        , 0.66153846,
        0.01538462],
       [0.09375   , 0.0625    , 0.        , 0.        , 0.        ,
        0.84375   ]])
    >>> a.P[0]
    array([[0.82119205, 0.        , 0.        , 0.10927152, 0.06622517,
        0.00331126],
       [0.14285714, 0.57142857, 0.14285714, 0.14285714, 0.        ,
        0.        ],
       [0.5       , 0.        , 0.5       , 0.        , 0.        ,
        0.        ],
       [0.21428571, 0.14285714, 0.        , 0.64285714, 0.        ,
        0.        ],
       [0.18918919, 0.        , 0.        , 0.        , 0.78378378,
        0.02702703],
       [0.28571429, 0.        , 0.        , 0.        , 0.        ,
        0.71428571]])
    """
    if not w_options:
        w_options = {}
    assert (
        unit_index in gdf.columns
    ), f"The unit_index ({unit_index}) column is not in the geodataframe"
    gdf_temp = gdf.copy().reset_index()
    df = gdf_temp[[unit_index, temporal_index, cluster_col]]
    df_wide = (df.pivot(index=unit_index,
                        columns=temporal_index,
                        values=cluster_col).dropna().astype("int"))
    y = df_wide.values
    if w_type is None:
        mar = Markov(y)  # class markov modeling
    else:
        geoms = gdf_temp.groupby(unit_index).first()[gdf_temp.geometry.name]
        gdf_wide = df_wide.merge(geoms, left_index=True, right_index=True)
        w = Ws[w_type].from_dataframe(gpd.GeoDataFrame(gdf_wide), **w_options)
        w.transform = "r"
        mar = Spatial_Markov(y,
                             w,
                             permutations=permutations,
                             discrete=True,
                             variable_name=cluster_col)
    return mar