def setup(self): from esda.moran import Moran self.w = libpysal.io.open(libpysal.examples.get_path("stl.gal")).read() f = libpysal.io.open(libpysal.examples.get_path("stl_hom.txt")) self.y = np.array(f.by_col['HR8893']) self.rho = Moran(self.y, self.w).I self.n = len(self.y) self.k = int(self.n/2)
def moransI(obj, weights=None): """ Inputs: obj: xarray object for which you want the autocorrelation weights: same shape as obj, but with weights which will be applied to obj to scale the autocorrelation. Simplest could be all ones. """ if not weights: weights = np.ones_like(obj.values) return Moran(obj.values, weights)
def test_plot_moran_simulation(): # Load data and apply statistical analysis link_to_data = examples.get_path('Guerry.shp') gdf = gpd.read_file(link_to_data) y = gdf['Donatns'].values w = Queen.from_dataframe(gdf) w.transform = 'r' # Calc Global Moran w = Queen.from_dataframe(gdf) moran = Moran(y, w) # plot fig, _ = plot_moran_simulation(moran) plt.close(fig) # customize fig, _ = plot_moran_simulation(moran, fitline_kwds=dict(color='#4393c3')) plt.close(fig)
def test_plot_moran_simulation(): # Load data and apply statistical analysis gdf = _test_data() y = gdf['Donatns'].values w = Queen.from_dataframe(gdf) w.transform = 'r' # Calc Global Moran w = Queen.from_dataframe(gdf) moran = Moran(y, w) # plot fig, _ = plot_moran_simulation(moran) plt.close(fig) # customize fig, _ = plot_moran_simulation(moran, fitline_kwds=dict(color='#4393c3')) plt.close(fig)
def test_moran_global_scatterplot(): # Load data and apply statistical analysis gdf = _test_data() y = gdf['Donatns'].values w = Queen.from_dataframe(gdf) w.transform = 'r' # Calc Global Moran w = Queen.from_dataframe(gdf) moran = Moran(y, w) # plot fig, _ = _moran_global_scatterplot(moran) plt.close(fig) # customize fig, _ = _moran_global_scatterplot(moran, zstandard=False, aspect_equal=False, fitline_kwds=dict(color='#4393c3')) plt.close(fig)
def test_moran_global_scatterplot(): # Load data and apply statistical analysis guerry = examples.load_example('Guerry') link_to_data = guerry.get_path('Guerry.shp') gdf = gpd.read_file(link_to_data) y = gdf['Donatns'].values w = Queen.from_dataframe(gdf) w.transform = 'r' # Calc Global Moran w = Queen.from_dataframe(gdf) moran = Moran(y, w) # plot fig, _ = _moran_global_scatterplot(moran) plt.close(fig) # customize fig, _ = _moran_global_scatterplot(moran, zstandard=False, aspect_equal=False, fitline_kwds=dict(color='#4393c3')) plt.close(fig)
def test_moran_scatterplot(): gdf = _test_data() x = gdf['Suicids'].values y = gdf['Donatns'].values w = Queen.from_dataframe(gdf) w.transform = 'r' # Calculate `esda.moran` Objects moran = Moran(y, w) moran_bv = Moran_BV(y, x, w) moran_loc = Moran_Local(y, w) moran_loc_bv = Moran_Local_BV(y, x, w) # try with p value so points are colored or warnings apply fig, _ = moran_scatterplot(moran, p=0.05, aspect_equal=False) plt.close(fig) fig, _ = moran_scatterplot(moran_loc, p=0.05) plt.close(fig) fig, _ = moran_scatterplot(moran_bv, p=0.05) plt.close(fig) fig, _ = moran_scatterplot(moran_loc_bv, p=0.05) plt.close(fig)
def test_moran_scatterplot(): link_to_data = examples.get_path('Guerry.shp') gdf = gpd.read_file(link_to_data) x = gdf['Suicids'].values y = gdf['Donatns'].values w = Queen.from_dataframe(gdf) w.transform = 'r' # Calculate `esda.moran` Objects moran = Moran(y, w) moran_bv = Moran_BV(y, x, w) moran_loc = Moran_Local(y, w) moran_loc_bv = Moran_Local_BV(y, x, w) # try with p value so points are colored or warnings apply fig, _ = moran_scatterplot(moran, p=0.05) plt.close(fig) fig, _ = moran_scatterplot(moran_loc, p=0.05) plt.close(fig) fig, _ = moran_scatterplot(moran_bv, p=0.05) plt.close(fig) fig, _ = moran_scatterplot(moran_loc_bv, p=0.05) plt.close(fig)
def moran_facet(moran_matrix, figsize=(16,12), scatter_bv_kwds=None, fitline_bv_kwds=None, scatter_glob_kwds=dict(color='#737373'), fitline_glob_kwds=None): """ Moran Facet visualization. Includes BV Morans and Global Morans on the diagonal. Parameters ---------- moran_matrix : esda.moran.Moran_BV_matrix instance Dictionary of Moran_BV objects figsize : tuple, optional W, h of figure. Default =(16,12) scatter_bv_kwds : keyword arguments, optional Keywords used for creating and designing the scatter points of off-diagonal Moran_BV plots. Default =None. fitline_bv_kwds : keyword arguments, optional Keywords used for creating and designing the moran fitline of off-diagonal Moran_BV plots. Default =None. scatter_glob_kwds : keyword arguments, optional Keywords used for creating and designing the scatter points of diagonal Moran plots. Default =None. fitline_glob_kwds : keyword arguments, optional Keywords used for creating and designing the moran fitline of diagonal Moran plots. Default =None. Returns ------- fig : Matplotlib Figure instance Bivariate Moran Local scatterplot figure axarr : matplotlib Axes instance Axes in which the figure is plotted Examples -------- Imports >>> import matplotlib.pyplot as plt >>> import libpysal as lp >>> import numpy as np >>> import geopandas as gpd >>> from esda.moran import Moran_BV_matrix >>> from splot.esda import moran_facet Load data and calculate Moran Local statistics >>> f = gpd.read_file(lp.examples.get_path("sids2.dbf")) >>> varnames = ['SIDR74', 'SIDR79', 'NWR74', 'NWR79'] >>> vars = [np.array(f[var]) for var in varnames] >>> w = lp.io.open(lp.examples.get_path("sids2.gal")).read() >>> moran_matrix = Moran_BV_matrix(vars, w, varnames = varnames) Plot >>> fig, axarr = moran_facet(moran_matrix) >>> plt.show() Customize plot >>> fig, axarr = moran_facet(moran_matrix, ... fitline_bv_kwds=dict(color='#4393c3')) >>> plt.show() """ nrows = int(np.sqrt(len(moran_matrix))) + 1 ncols = nrows fig, axarr = plt.subplots(nrows, ncols, figsize=figsize, sharey=True, sharex=True) fig.suptitle('Moran Facet') for row in range(nrows): for col in range(ncols): if row == col: global_m = Moran(moran_matrix[row, (row+1) % 4].zy, moran_matrix[row, (row+1) % 4].w) _moran_global_scatterplot(global_m, ax= axarr[row,col], scatter_kwds=scatter_glob_kwds, fitline_kwds=fitline_glob_kwds) axarr[row, col].set_facecolor('#d9d9d9') else: _moran_bv_scatterplot(moran_matrix[row,col], ax=axarr[row,col], scatter_kwds=scatter_bv_kwds, fitline_kwds=fitline_bv_kwds) axarr[row, col].spines['bottom'].set_visible(False) axarr[row, col].spines['left'].set_visible(False) if row == nrows - 1: axarr[row, col].set_xlabel(str( moran_matrix[(col+1)%4, col].varnames['x']).format(col)) axarr[row, col].spines['bottom'].set_visible(True) else: axarr[row, col].set_xlabel('') if col == 0: axarr[row, col].set_ylabel(('Spatial Lag of '+str( moran_matrix[row, (row+1)%4].varnames['y'])).format(row)) axarr[row, col].spines['left'].set_visible(True) else: axarr[row, col].set_ylabel('') axarr[row, col].set_title('') plt.tight_layout() return fig, axarr
ax = axes[i, j] maps.geoplot(complete_table, col=str(index_year[i * 3 + j]), ax=ax, classi="Quantiles") ax.set_title('Per Capita Income %s Quintiles' % str(index_year[i * 3 + j])) plt.tight_layout() # It is quite obvious that the per capita incomes are not randomly distributed: we could spot clusters in the mid-south, south-east and north-east. Let's proceed to calculate Moran's I, a widely used measure of global spatial autocorrelation, to aid the visual interpretation. # In[21]: w = libpysal.open(libpysal.examples.get_path("states48.gal")).read() w.transform = 'R' mits = [Moran(cs, w) for cs in pci] res = np.array([(mi.I, mi.EI, mi.seI_norm, mi.sim[974]) for mi in mits]) years = np.arange(1929, 2010) fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 5)) ax.plot(years, res[:, 0], label='Moran\'s I') #plot(years, res[:,1], label='E[I]') ax.plot(years, res[:, 1] + 1.96 * res[:, 2], label='Upper bound', linestyle='dashed') ax.plot(years, res[:, 1] - 1.96 * res[:, 2], label='Lower bound', linestyle='dashed') ax.set_title("Global spatial autocorrelation for annual US per capita incomes", fontdict={'fontsize': 15})
output_path = r"C:\path_to_folder\Maps" filepath = os.path.join(output_path, week+'.png') #Spatial Weights - select one #w = weights.Queen.from_dataframe(sa_df, idVariable="region_name") # Queen Contiguity Matrix #w = weights.Rook.from_dataframe(sa_df, idVariable="region_name") # Rook contiguity Matrix w = weights.distance.KNN.from_dataframe(sa_df, ids="REG_NAME", k=6) # K-Nearest Neighbors w.transform = "R" sa_df["lag_infections"] = weights.lag_spatial(w, sa_df[week]) # Global spatial autocorrelation y = sa_df[week] moran = Moran(y, w) # Local spatial autocorrelation m_local = Moran_Local(y, w) lisa = m_local.Is # set CRS sa_df = sa_df.to_crs("EPSG:3857") #Plot map fig, ax = plt.subplots(figsize=(9,9)) lisa_cluster(m_local, sa_df, p=0.05, figsize = (9,9),ax=ax) description = 'Weekly Covid-19 Spatial Autocorrelation' info_text = 'Hot- and coldspots indicates clusters of high and low infection rates. \nDonuts are regions with low infection-rates sorrounded by areas with high infection-rates. \nDiamonds are regions with high infection-rates sorrounded by regions with low infection-rates' ax.set_title(str(week_name), fontdict={'fontsize': 22}, loc='left')
def quantiles_MoranI_Plot(data_df, zipPolygon): caseWeekly_unstack = data_df['CasesWeekly'].unstack(level=0) zip_codes = gpd.read_file(zipPolygon) data_df_zipGPD = zip_codes.merge(caseWeekly_unstack, left_on='zip', right_on=caseWeekly_unstack.index) # print(data_df_zipGPD.head()) # print(data_df_zipGPD.describe()) print(data_df_zipGPD.columns) weeks = idx_weekNumber = data_df.index.get_level_values('Week Number') weeks = np.unique(weeks) nrows = 2 ncols = math.ceil(len(weeks) / nrows) fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(30, 15)) for i in range(nrows): for j in range(ncols): # print("_"*50) # print(str(weeks[i*ncols+j])) try: plt.rcParams.update({'font.size': 5}) ax = axes[i, j] data_df_zipGPD.plot( ax=ax, column=weeks[i * ncols + j], cmap='OrRd', scheme='quantiles', legend=True, ) ax.set_title('daily cases %s Quintiles' % weeks[i * ncols + j]) ax.axis('off') leg = ax.get_legend() leg.set_bbox_to_anchor((0.8, 0.15, 0.16, 0.2)) data_df_zipGPD.apply(lambda x: ax.annotate( s=x.zip, xy=x.geometry.centroid.coords[0], ha='center'), axis=1) leg = ax.get_legend() leg.set_bbox_to_anchor((0., 0., 0.2, 0.2)) except: pass plt.tight_layout() W = ps.lib.weights.Queen(data_df_zipGPD.geometry) W.transform = 'R' valArray = data_df_zipGPD[weeks].to_numpy() valArray_fillNan = bfill(valArray).T valArray_fillNan[np.isnan(valArray_fillNan)] = 0 # print(valArray_fillNan,valArray_fillNan.shape) mits = [Moran(cs, W) for cs in valArray_fillNan] res = np.array([(mi.I, mi.EI, mi.seI_norm, mi.sim[974]) for mi in mits]) # print(res) fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 5)) ax.plot(weeks, res[:, 0], label='Moran\'s I') #plot(years, res[:,1], label='E[I]') ax.plot(weeks, res[:, 1] + 1.96 * res[:, 2], label='Upper bound', linestyle='dashed') ax.plot(weeks, res[:, 1] - 1.96 * res[:, 2], label='Lower bound', linestyle='dashed') ax.set_title("Global spatial autocorrelation for Covid-19-cases", fontdict={'fontsize': 15}) # ax.set_xlim(weeks) # plt.axhline(y=0, color='gray', linestyle='--',) ax.legend()
# Loop over all tifs in indir, clip each using clip_shp, and then calculate moran's i, append to list with fiona.open(clip_file_name, 'r') as clip_shp: shapes = [feature["geometry"] for feature in clip_shp] with rio.open(tif) as src: clipped_img, clipped_transform = rio.mask.mask(src, shapes, crop=True) clipped_meta = src.meta # Mask out nodata masked_clipped_img = np.ma.masked_array(clipped_img, clipped_img == 32767) # print(masked_clipped_img.shape[1]) # print(masked_clipped_img.shape[2]) # Calculate weights matrix for the raster, and calculate Moran's i print("Building weights matrix for {x}".format(x=tif)) weights = lat2W(masked_clipped_img.shape[1], masked_clipped_img.shape[2], rook=False, id_type='int') print("Calculating Moran's i for {x}".format(x=tif)) moran = Moran(masked_clipped_img, weights) # Add moran's i to csv stats_list.append((tif_name, moran.I)) # Write stats_list to csv with open(workspace + '_morani.csv', 'w') as csv_file: writer = csv.writer(csv_file) writer.writerow(csv_header) writer.writerows(stats_list)
def generate_clusters(gdf: gpd.GeoDataFrame, col: str, crs: Optional[int] = None, alpha: float = 0.005, geom_column: str = "geometry") -> gpd.GeoDataFrame: """Calculates spatial clusters/outliers based on a column in a geofataframe Workflow: 1. Create a spatial weights matrix 2. Create a spatially lagged version of the variable of interest 3. Calculate global spatial autocorrelation metrics 4. Calculate local spatial autocorrelation (the clusters) using LISA (local indicators of spatial autocorrelation) 5. Join data to original gdf While the code should work for any geodataframe, the current workflow is based on the assumption that the data being analyzed is in a hexagonal grid. This means we have polygons of approximately uniform weights. The https://pysal.org/libpysal/generated/libpysal.weights.Rook.html weighting calculates weights between all polygons that share an edge. Note that this requires the grid is filled with polygons, i.e. we don't have islands in the grid. Input: gdf The source geodataframe, should be a hexagonal grid if using this script as is crs A coordinate reference system, EPSG code col The column with the data being modeled alpha The threshold of statistical significance to be used when determing whether a cell is a cluster/outlier or not. Defaults to 0.005. Such a low value is used because our data typically contains large contrasts between areas of zero index (forest, seas) and built-up areas. - Larger values show the boundary between built-up and nature - Smaller values show contrasts within built-up areas The output is the original dataframe with 2 new columns: quadrant The quadrant to which the observation belongs to: LL = low clusters = low values surrounded by low values HH = high clusters = high values surrounded by high values LH = low outliers = low values surrounded by high values HL = high outliers = high values surrounded by low values significant Whether the quadrant information is statistically significant. The significance will depend on the number of iterations and the random seed used in the process, as polygons at the edge of significance may get slightly different values at different runs. """ # Project if crs: gdf = gdf.to_crs(crs) # The cluster algorithm fails if there are islands in the data, i.e. we must have a full grid. # This means filling the bbox of the geodataframe with zero values at each missing hex. # Zero index value indicates none of the datasets had any values on the hex. # Note that # 1) the datasets may have different bboxes and # 2) they may be sparse. # We cannot make any assumptions of the form of the data, other than that it is aggregated # in hex grid. gdf_filled = fill_hex_grid(gdf, geom_column=geom_column) # Compute spatial weights and row-standardize weights = lps.weights.Rook.from_dataframe(gdf_filled, geom_col=geom_column) weights.set_transform("R") # Compute spatial lag y = gdf_filled[col] y_lag = lps.weights.lag_spatial(weights, y) col_lag = f"{col}_lag" data_lag = pd.DataFrame(data={col: y, col_lag: y_lag}) # Global spatial autocorrelation mi = Moran(data_lag[col], weights) p_value = mi.p_sim print("\nGlobal spatial autocorrelation:\n" + "Moran's I: " + str(round(mi.I, 3)) + "\np-value: " + str(round(p_value, 3))) # Calculate LISA values lisa = Moran_Local( data_lag[col], weights, permutations=100000, # seed=1 # Use this if absolute repoducibility is needed ) # identify whether each observation is significant or not data_lag["significant"] = lisa.p_sim < alpha # identify the quadrant each observation belongs to data_lag["quadrant"] = lisa.q data_lag["quadrant"] = data_lag["quadrant"].replace({ 1: "HH", 2: "LH", 3: "LL", 4: "HL" }) # Print info print("\nDistribution of clusters/outliers (quadrants):\n" + str(data_lag["quadrant"].sort_values().value_counts())) print("\nSignificant clusters (using significance threshold " + str(alpha) + "):\n" + str(data_lag["significant"].value_counts())) # Merge original gdf and LISA quadrants data together gdf_clusters = gdf_filled.merge(data_lag[["quadrant", "significant"]], how="left", left_index=True, right_index=True) return gdf_clusters
def moran_global(dataset): y = dataset['AVG_SUICIDE_RATE'].values moran = Moran(y, weights) return moran
from libpysal.weights import Queen from esda.moran import Moran '''Make sure to replace the string by drag and dropping your data into line 5 before you run the script''' gdf = "REPLACEWITHYOURFILE" gdf.rename(columns = {"shape":"geometry"} , inplace = True) gdf = gdf.dropna() #drop polys with no income data '''Contiguity weights matrices define spatial connections through the existence of common boundaries. According to the queen method, two polygons need to share a point or more to be considered neighbors''' w_queen = Queen.from_dataframe(gdf) w_queen.transform = 'R' '''Morans I is a number that will tell us if there is a spatial pattern or not. Closer to 1 indicates positive or -1 indicates negative relationship. 0 indicates random. Return results to dataframe''' mi = Moran(gdf['b19013_001e'], w_queen) result = pd.DataFrame([["Moran's I", mi.I],["P-value", mi.p_sim]], columns = ["Value", "Name"]) %insights_return(result)