def test_cutoff(self): cc = np.array([0.8, 0.9, 1, 1.2]) sm = Spatial_Markov(self.rpci, self.w, cutoffs=cc, lag_cutoffs=cc) P = np.array( [[[0.96703297, 0.03296703, 0., 0., 0.], [0.10638298, 0.68085106, 0.21276596, 0., 0.], [0., 0.14285714, 0.7755102, 0.08163265, 0.], [0., 0., 0.5, 0.5, 0.], [0., 0., 0., 0., 0.]], [[0.88636364, 0.10606061, 0.00757576, 0., 0.], [0.04402516, 0.89308176, 0.06289308, 0., 0.], [0., 0.05882353, 0.8627451, 0.07843137, 0.], [0., 0., 0.13846154, 0.86153846, 0.], [0., 0., 0., 0., 1.]], [[0.78082192, 0.17808219, 0.02739726, 0.01369863, 0.], [0.03488372, 0.90406977, 0.05813953, 0.00290698, 0.], [0., 0.05919003, 0.84735202, 0.09034268, 0.00311526], [0., 0., 0.05811623, 0.92985972, 0.01202405], [0., 0., 0., 0.14285714, 0.85714286]], [[0.82692308, 0.15384615, 0., 0.01923077, 0.], [0.0703125, 0.7890625, 0.125, 0.015625, 0.], [0.00295858, 0.06213018, 0.82248521, 0.10946746, 0.00295858], [0., 0.00185529, 0.07606679, 0.88497217, 0.03710575], [0., 0., 0., 0.07803468, 0.92196532]], [[0., 0., 0., 0., 0.], [0., 0., 0., 0., 0.], [0., 0.06666667, 0.9, 0.03333333, 0.], [0., 0., 0.05660377, 0.90566038, 0.03773585], [0., 0., 0., 0.03932584, 0.96067416]]]) np.testing.assert_array_almost_equal(P, sm.P)
def __init__(self, dataset, w_type, w_kwds=None, permutations=0, cluster_type=None): y = dataset.census.copy().reset_index() y = y[['geoid', 'year', cluster_type]] y = y.groupby(['geoid', 'year']).first().unstack() y = y.dropna() tracts = dataset.tracts.copy().merge( y.reset_index(), on='geoid', how='right') w_dict = {'rook': Rook, 'queen': Queen, 'knn': KNN, 'kernel': Kernel} w = w_dict[w_type].from_dataframe(tracts) y = y.astype(int) sm = Spatial_Markov( y, w, permutations=permutations, discrete=True, variable_name=cluster_type) self.p = sm.p self.transitions = sm.transitions self.P = sm.P self.T = sm.T self.summary = sm.summary self.cluster_type = cluster_type # keep the spatial markov instance here in case that users want to # estimate steady state distribution etc self.sm = sm
def test___init__(self): sm = Spatial_Markov(self.rpci, self.w, fixed=True, k=5, m=5) S = np.array( [[0.43509425, 0.2635327, 0.20363044, 0.06841983, 0.02932278], [0.13391287, 0.33993305, 0.25153036, 0.23343016, 0.04119356], [0.12124869, 0.21137444, 0.2635101, 0.29013417, 0.1137326], [0.0776413, 0.19748806, 0.25352636, 0.22480415, 0.24654013], [0.01776781, 0.19964349, 0.19009833, 0.25524697, 0.3372434]]) np.testing.assert_array_almost_equal(S, sm.S)
def test_discretized(self): w = ps.weights.Contiguity.Queen.from_shapefile( ps.examples.get_path('us48.shp')) np.random.seed(24788) sm = Spatial_Markov(self.discretized, w, discrete=True) answer = np.array([[[92., 88., 75., 95.], [50., 55., 52., 35.], [45., 48., 58., 48.], [45., 32., 39., 51.]], [[54., 43., 40., 51.], [92., 97., 91., 89.], [44., 49., 56., 55.], [40., 35., 75., 50.]], [[67., 51., 43., 58.], [41., 58., 56., 35.], [86., 88., 140., 89.], [42., 56., 61., 73.]], [[56., 51., 39., 38.], [50., 49., 50., 45.], [41., 61., 55., 46.], [93., 77., 87., 89.]]]) np.testing.assert_array_equal(sm.T, answer)
def test_chi2(self): f = ps.open(ps.examples.get_path('usjoin.csv')) pci = np.array([f.by_col[str(y)] for y in range(1929, 2010)]) pci = pci.transpose() rpci = pci / (pci.mean(axis=0)) w = ps.open(ps.examples.get_path("states48.gal")).read() w.transform = 'r' sm = Spatial_Markov(rpci, w, fixed=True, k=5, m=5) chi = np.array([[4.05598541e+01, 6.44644317e-04, 1.60000000e+01], [5.54751974e+01, 2.97033748e-06, 1.60000000e+01], [1.77528996e+01, 3.38563882e-01, 1.60000000e+01], [4.00390961e+01, 7.68422046e-04, 1.60000000e+01], [4.67966803e+01, 7.32512065e-05, 1.60000000e+01]]) obs = np.array(sm.chi2) np.testing.assert_array_almost_equal(obs, chi) obs = np.array([[4.61209613e+02, 0.00000000e+00, 4.00000000e+00], [1.48140694e+02, 0.00000000e+00, 4.00000000e+00], [6.33129261e+01, 5.83089133e-13, 4.00000000e+00], [7.22778509e+01, 7.54951657e-15, 4.00000000e+00], [2.32659201e+02, 0.00000000e+00, 4.00000000e+00]]) np.testing.assert_array_almost_equal(obs, np.array(sm.shtest))
def transition(gdf, cluster_col, time_var="year", id_var="geoid", w_type=None, permutations=0): """ (Spatial) Markov approach to transitional dynamics of neighborhoods. Parameters ---------- gdf : (geo)DataFrame Long-form (geo)DataFrame containing neighborhood attributes with a column defining neighborhood clusters. cluster_col : string or int Column name for the neighborhood segmentation, such as "ward", "kmeans", etc. time_var : string, optional Column defining time and or sequencing of the long-form data. Default is "year". id_var : string, optional Column identifying the unique id of spatial units. Default is "geoid". w_type : string, optional Type of spatial weights type ("rook", "queen", "knn" or "kernel") to be used for spatial structure. Default is None, if non-spatial Markov transition rates are desired. permutations : int, optional number of permutations for use in randomization based inference (the default is 0). Return ------ mar : object if w_type=None, return a giddy.markov.Markov instance; if w_type is given, return a giddy.markov.Spatial_Markov instance. Examples -------- >>> from geosnap.data import Community >>> columbus = Community.from_ltdb(msa_fips=columbusfips) >>> columbus1 = columbus.cluster(columns=['median_household_income', ... 'p_poverty_rate', 'p_edu_college_greater', 'p_unemployment_rate'], ... method='ward', n_clusters=6) >>> gdf = columbus1.gdf >>> a = transition(gdf, "ward", w_type="rook") >>> a.p array([[0.79189189, 0.00540541, 0.0027027 , 0.13243243, 0.06216216, 0.00540541], [0.0203252 , 0.75609756, 0.10569106, 0.11382114, 0. , 0.00406504], [0.00917431, 0.20183486, 0.75229358, 0.01834862, 0. , 0.01834862], [0.1959799 , 0.18341709, 0.00251256, 0.61809045, 0. , 0. ], [0.32307692, 0. , 0. , 0. , 0.66153846, 0.01538462], [0.09375 , 0.0625 , 0. , 0. , 0. , 0.84375 ]]) >>> a.P[0] array([[0.82119205, 0. , 0. , 0.10927152, 0.06622517, 0.00331126], [0.14285714, 0.57142857, 0.14285714, 0.14285714, 0. , 0. ], [0.5 , 0. , 0.5 , 0. , 0. , 0. ], [0.21428571, 0.14285714, 0. , 0.64285714, 0. , 0. ], [0.18918919, 0. , 0. , 0. , 0.78378378, 0.02702703], [0.28571429, 0. , 0. , 0. , 0. , 0.71428571]]) """ gdf_temp = gdf.copy().reset_index() df = gdf_temp[[id_var, time_var, cluster_col]] df_wide = (df.pivot(index=id_var, columns=time_var, values=cluster_col).dropna().astype("int")) y = df_wide.values if w_type is None: mar = Markov(y) # class markov modeling else: gdf_one = gdf_temp.drop_duplicates([id_var]) gdf_wide = df_wide.merge(gdf_one, left_index=True, right_on=id_var) w_dict = {"rook": Rook, "queen": Queen, "knn": KNN, "kernel": Kernel} w = w_dict[w_type].from_dataframe(gdf_wide) w.transform = "r" mar = Spatial_Markov(y, w, permutations=permutations, discrete=True, variable_name=cluster_col) return mar
def transition( gdf, cluster_col, temporal_index="year", unit_index="geoid", w_type="rook", w_options=None, permutations=0, ): """ (Spatial) Markov approach to transitional dynamics of neighborhoods. Parameters ---------- gdf : geopandas.GeoDataFrame or pandas.DataFrame Long-form geopandas.GeoDataFrame or pandas.DataFrame containing neighborhood attributes with a column defining neighborhood clusters. cluster_col : string or int Column name for the neighborhood segmentation, such as "ward", "kmeans", etc. temporal_index : string, optional Column defining time and or sequencing of the long-form data. Default is "year". unit_index : string, optional Column identifying the unique id of spatial units. Default is "geoid". w_type : string, optional Type of spatial weights type ("rook", "queen", "knn" or "kernel") to be used for spatial structure. Default is None, if non-spatial Markov transition rates are desired. w_options : dict additional options passed to a libpysal weights constructor (e.g. `k` for a KNN weights matrix) permutations : int, optional number of permutations for use in randomization based inference (the default is 0). Returns -------- mar : giddy.markov.Markov instance or giddy.markov.Spatial_Markov if w_type=None, a classic Markov instance is returned. if w_type is given, a Spatial_Markov instance is returned. Examples -------- >>> from geosnap import Community >>> columbus = Community.from_ltdb(msa_fips="18140") >>> columbus1 = columbus.cluster(columns=['median_household_income', ... 'p_poverty_rate', 'p_edu_college_greater', 'p_unemployment_rate'], ... method='ward', n_clusters=6) >>> gdf = columbus1.gdf >>> a = transition(gdf, "ward", w_type="rook") >>> a.p array([[0.79189189, 0.00540541, 0.0027027 , 0.13243243, 0.06216216, 0.00540541], [0.0203252 , 0.75609756, 0.10569106, 0.11382114, 0. , 0.00406504], [0.00917431, 0.20183486, 0.75229358, 0.01834862, 0. , 0.01834862], [0.1959799 , 0.18341709, 0.00251256, 0.61809045, 0. , 0. ], [0.32307692, 0. , 0. , 0. , 0.66153846, 0.01538462], [0.09375 , 0.0625 , 0. , 0. , 0. , 0.84375 ]]) >>> a.P[0] array([[0.82119205, 0. , 0. , 0.10927152, 0.06622517, 0.00331126], [0.14285714, 0.57142857, 0.14285714, 0.14285714, 0. , 0. ], [0.5 , 0. , 0.5 , 0. , 0. , 0. ], [0.21428571, 0.14285714, 0. , 0.64285714, 0. , 0. ], [0.18918919, 0. , 0. , 0. , 0.78378378, 0.02702703], [0.28571429, 0. , 0. , 0. , 0. , 0.71428571]]) """ if not w_options: w_options = {} assert ( unit_index in gdf.columns ), f"The unit_index ({unit_index}) column is not in the geodataframe" gdf_temp = gdf.copy().reset_index() df = gdf_temp[[unit_index, temporal_index, cluster_col]] df_wide = (df.pivot(index=unit_index, columns=temporal_index, values=cluster_col).dropna().astype("int")) y = df_wide.values if w_type is None: mar = Markov(y) # class markov modeling else: geoms = gdf_temp.groupby(unit_index).first()[gdf_temp.geometry.name] gdf_wide = df_wide.merge(geoms, left_index=True, right_index=True) w = Ws[w_type].from_dataframe(gpd.GeoDataFrame(gdf_wide), **w_options) w.transform = "r" mar = Spatial_Markov(y, w, permutations=permutations, discrete=True, variable_name=cluster_col) return mar