def _moran_loc_from_rose_calc(rose): """ Calculate esda.moran.Moran_Local values from giddy.rose object """ old_state = np.random.get_state() moran_locy = Moran_Local(rose.Y[:, 0], rose.w) np.random.set_state(old_state) moran_locx = Moran_Local(rose.Y[:, 1], rose.w) np.random.set_state(old_state) return moran_locy, moran_locx
def test_plot_local_autocorrelation(): columbus = examples.load_example('Columbus') link_to_data = columbus.get_path('columbus.shp') df = gpd.read_file(link_to_data) y = df['HOVAL'].values w = Queen.from_dataframe(df) w.transform = 'r' moran_loc = Moran_Local(y, w) fig, _ = plot_local_autocorrelation(moran_loc, df, 'HOVAL', p=0.05) plt.close(fig) # also test with quadrant and mask fig, _ = plot_local_autocorrelation(moran_loc, df, 'HOVAL', p=0.05, region_column='POLYID', aspect_equal=False, mask=['1', '2', '3'], quadrant=1) plt.close(fig) # also test with quadrant and mask assert_raises(ValueError, plot_local_autocorrelation, moran_loc, df, 'HOVAL', p=0.05, region_column='POLYID', mask=['100', '200', '300'], quadrant=1)
def test_moran_loc_bv_scatterplot(): link_to_data = examples.get_path('Guerry.shp') gdf = gpd.read_file(link_to_data) x = gdf['Suicids'].values y = gdf['Donatns'].values w = Queen.from_dataframe(gdf) w.transform = 'r' # Calculate Univariate and Bivariate Moran moran_loc = Moran_Local(y, w) moran_loc_bv = Moran_Local_BV(x, y, w) # try with p value so points are colored fig, _ = _moran_loc_bv_scatterplot(moran_loc_bv) plt.close(fig) # try with p value and different figure size fig, _ = _moran_loc_bv_scatterplot(moran_loc_bv, p=0.05, aspect_equal=False) plt.close(fig) assert_raises(ValueError, _moran_loc_bv_scatterplot, moran_loc, p=0.5) assert_warns(UserWarning, _moran_loc_bv_scatterplot, moran_loc_bv, p=0.5, scatter_kwds=dict(c='r'))
def _test_calc_moran_loc(gdf, var='HOVAL'): y = gdf[var].values w = Queen.from_dataframe(gdf) w.transform = 'r' moran_loc = Moran_Local(y, w) return moran_loc
def test_moran_loc_scatterplot(): df = _test_data_columbus() x = df['INC'].values y = df['HOVAL'].values w = Queen.from_dataframe(df) w.transform = 'r' moran_loc = Moran_Local(y, w) moran_bv = Moran_BV(x, y, w) # try without p value fig, _ = _moran_loc_scatterplot(moran_loc) plt.close(fig) # try with p value and different figure size fig, _ = _moran_loc_scatterplot(moran_loc, p=0.05, aspect_equal=False, fitline_kwds=dict(color='#4393c3')) plt.close(fig) # try with p value and zstandard=False fig, _ = _moran_loc_scatterplot(moran_loc, p=0.05, zstandard=False, fitline_kwds=dict(color='#4393c3')) plt.close(fig) # try without p value and zstandard=False fig, _ = _moran_loc_scatterplot(moran_loc, zstandard=False, fitline_kwds=dict(color='#4393c3')) plt.close(fig) raises(ValueError, _moran_loc_scatterplot, moran_bv, p=0.5) warns(UserWarning, _moran_loc_scatterplot, moran_loc, p=0.5, scatter_kwds=dict(c='#4393c3'))
def test_lisa_cluster(): link = examples.get_path('columbus.shp') df = gpd.read_file(link) y = df['HOVAL'].values w = Queen.from_dataframe(df) w.transform = 'r' moran_loc = Moran_Local(y, w) fig, _ = lisa_cluster(moran_loc, df) plt.close(fig)
def test_plot_local_autocorrelation(): link = examples.get_path('columbus.shp') df = gpd.read_file(link) y = df['HOVAL'].values w = lp.Queen.from_dataframe(df) w.transform = 'r' moran_loc = Moran_Local(y, w) fig, _ = plot_local_autocorrelation(moran_loc, df, 'HOVAL', p=0.05) plt.close(fig) # also test with quadrant and mask fig, _ = plot_local_autocorrelation(moran_loc, df, 'HOVAL', p=0.05, region_column='POLYID', mask=['1', '2', '3'], quadrant=1) plt.close(fig)
def test_moran_loc_scatterplot(): link = examples.get_path('columbus.shp') df = gpd.read_file(link) y = df['HOVAL'].values w = lp.Queen.from_dataframe(df) w.transform = 'r' moran_loc = Moran_Local(y, w) # try with p value so points are colored fig, _ = moran_loc_scatterplot(moran_loc, p=0.05) plt.close(fig) # try with p value and different figure size fig, _ = moran_loc_scatterplot(moran_loc, p=0.05, fitline_kwds=dict(color='#4393c3')) plt.close(fig)
def test_moran_loc_scatterplot(): columbus = examples.load_example('Columbus') link_to_data = columbus.get_path('columbus.shp') df = gpd.read_file(link_to_data) x = df['INC'].values y = df['HOVAL'].values w = Queen.from_dataframe(df) w.transform = 'r' moran_loc = Moran_Local(y, w) moran_bv = Moran_BV(x, y, w) # try without p value fig, _ = _moran_loc_scatterplot(moran_loc) plt.close(fig) # try with p value and different figure size fig, _ = _moran_loc_scatterplot(moran_loc, p=0.05, aspect_equal=False, fitline_kwds=dict(color='#4393c3')) plt.close(fig) # try with p value and zstandard=False fig, _ = _moran_loc_scatterplot(moran_loc, p=0.05, zstandard=False, fitline_kwds=dict(color='#4393c3')) plt.close(fig) # try without p value and zstandard=False fig, _ = _moran_loc_scatterplot(moran_loc, zstandard=False, fitline_kwds=dict(color='#4393c3')) plt.close(fig) assert_raises(ValueError, _moran_loc_scatterplot, moran_bv, p=0.5) assert_warns(UserWarning, _moran_loc_scatterplot, moran_loc, p=0.5, scatter_kwds=dict(c='#4393c3'))
def test_moran_scatterplot(): gdf = _test_data() x = gdf['Suicids'].values y = gdf['Donatns'].values w = Queen.from_dataframe(gdf) w.transform = 'r' # Calculate `esda.moran` Objects moran = Moran(y, w) moran_bv = Moran_BV(y, x, w) moran_loc = Moran_Local(y, w) moran_loc_bv = Moran_Local_BV(y, x, w) # try with p value so points are colored or warnings apply fig, _ = moran_scatterplot(moran, p=0.05, aspect_equal=False) plt.close(fig) fig, _ = moran_scatterplot(moran_loc, p=0.05) plt.close(fig) fig, _ = moran_scatterplot(moran_bv, p=0.05) plt.close(fig) fig, _ = moran_scatterplot(moran_loc_bv, p=0.05) plt.close(fig)
def test_moran_scatterplot(): link_to_data = examples.get_path('Guerry.shp') gdf = gpd.read_file(link_to_data) x = gdf['Suicids'].values y = gdf['Donatns'].values w = Queen.from_dataframe(gdf) w.transform = 'r' # Calculate `esda.moran` Objects moran = Moran(y, w) moran_bv = Moran_BV(y, x, w) moran_loc = Moran_Local(y, w) moran_loc_bv = Moran_Local_BV(y, x, w) # try with p value so points are colored or warnings apply fig, _ = moran_scatterplot(moran, p=0.05) plt.close(fig) fig, _ = moran_scatterplot(moran_loc, p=0.05) plt.close(fig) fig, _ = moran_scatterplot(moran_bv, p=0.05) plt.close(fig) fig, _ = moran_scatterplot(moran_loc_bv, p=0.05) plt.close(fig)
def test_moran_loc_bv_scatterplot(): gdf = _test_data() x = gdf['Suicids'].values y = gdf['Donatns'].values w = Queen.from_dataframe(gdf) w.transform = 'r' # Calculate Univariate and Bivariate Moran moran_loc = Moran_Local(y, w) moran_loc_bv = Moran_Local_BV(x, y, w) # try with p value so points are colored fig, _ = _moran_loc_bv_scatterplot(moran_loc_bv) plt.close(fig) # try with p value and different figure size fig, _ = _moran_loc_bv_scatterplot(moran_loc_bv, p=0.05, aspect_equal=False) plt.close(fig) raises(ValueError, _moran_loc_bv_scatterplot, moran_loc, p=0.5) warns(UserWarning, _moran_loc_bv_scatterplot, moran_loc_bv, p=0.5, scatter_kwds=dict(c='r'))
#Spatial Weights - select one #w = weights.Queen.from_dataframe(sa_df, idVariable="region_name") # Queen Contiguity Matrix #w = weights.Rook.from_dataframe(sa_df, idVariable="region_name") # Rook contiguity Matrix w = weights.distance.KNN.from_dataframe(sa_df, ids="REG_NAME", k=6) # K-Nearest Neighbors w.transform = "R" sa_df["lag_infections"] = weights.lag_spatial(w, sa_df[week]) # Global spatial autocorrelation y = sa_df[week] moran = Moran(y, w) # Local spatial autocorrelation m_local = Moran_Local(y, w) lisa = m_local.Is # set CRS sa_df = sa_df.to_crs("EPSG:3857") #Plot map fig, ax = plt.subplots(figsize=(9,9)) lisa_cluster(m_local, sa_df, p=0.05, figsize = (9,9),ax=ax) description = 'Weekly Covid-19 Spatial Autocorrelation' info_text = 'Hot- and coldspots indicates clusters of high and low infection rates. \nDonuts are regions with low infection-rates sorrounded by areas with high infection-rates. \nDiamonds are regions with high infection-rates sorrounded by regions with low infection-rates' ax.set_title(str(week_name), fontdict={'fontsize': 22}, loc='left') ax.annotate(description, xy=(0.325, 0.140), size=14, xycoords='figure fraction') ax.annotate(info_text, xy=(0.325, 0.090), size=8, xycoords='figure fraction')
def run_stats(self): """Main function which do the process.""" # Get the common fields..currentField() self.admin_layer = self.cbx_aggregation_layer.currentLayer() input_name = self.admin_layer.name() field = self.cbx_indicator_field.currentField() self.layer = QgsProject.instance().mapLayersByName(input_name)[0] # Output. self.output_file_path = self.le_output_filepath.text() try: self.button_box_ok.setDisabled(True) # noinspection PyArgumentList QApplication.setOverrideCursor(Qt.WaitCursor) # noinspection PyArgumentList QApplication.processEvents() if not self.admin_layer: raise NoLayerProvidedException if not self.admin_layer and self.use_point_layer: raise NoLayerProvidedException crs_admin_layer = self.admin_layer.crs() # Output if not self.output_file_path: temp_file = NamedTemporaryFile(delete=False, suffix='-geopublichealth.shp') self.output_file_path = temp_file.name temp_file.flush() temp_file.close() else: with open(self.output_file_path, 'w') as document: pass admin_layer_provider = self.layer.dataProvider() fields = admin_layer_provider.fields() if admin_layer_provider.fields().indexFromName( self.name_field) != -1: raise FieldExistingException(field=self.name_field) fields.append(QgsField('LISA_P', QVariant.Double)) fields.append(QgsField('LISA_Z', QVariant.Double)) fields.append(QgsField('LISA_Q', QVariant.Int)) fields.append(QgsField('LISA_I', QVariant.Double)) fields.append(QgsField('LISA_C', QVariant.Double)) # The QgsVectorFileWriter was Deprecated since 3.10 However,....... #The create() function DOEST NOT Flush the feature unless QGIS close. #options = QgsVectorFileWriter.SaveVectorOptions() #options.driverName = "ESRI Shapefile" #file_writer=QgsVectorFileWriter.create(self.output_file_path,fields,QgsWkbTypes.Polygon,self.admin_layer.crs(),QgsCoordinateTransformContext(),options) #It's currently a bug https://github.com/qgis/QGIS/issues/35021 # So I will keep it for now file_writer = QgsVectorFileWriter(self.output_file_path, 'utf-8', fields, QgsWkbTypes.Polygon, self.admin_layer.crs(), 'ESRI Shapefile') if self.cbx_contiguity.currentIndex() == 0: # queen # fix_print_with_import print('Info: Local Moran\'s using queen contiguity') #Pysal 2.0 change #https://github.com/pysal/pysal/blob/master/MIGRATING.md w = Queen.from_shapefile(self.admin_layer.source()) else: # 1 for rook # fix_print_with_import print('Info: Local Moran\'s using rook contiguity') w = Rook.from_shapefile(self.admin_layer.source()) #Pysal 2.0 #https://stackoverflow.com/questions/59455383/pysal-does-not-have-attribute-open import geopandas f = geopandas.read_file(self.admin_layer.source().replace( '.shp', '.dbf')) y = f[str(field)] lm = Moran_Local(y, w, transformation="r", permutations=999) sig_q = lm.q * (lm.p_sim <= 0.05 ) # could make significance level an option outFeat = QgsFeature() i = 0 count = self.admin_layer.featureCount() for i, feature in enumerate(self.admin_layer.getFeatures()): attributes = feature.attributes() attributes.append(float(lm.p_sim[i])) attributes.append(float(lm.z_sim[i])) attributes.append(int(lm.q[i])) attributes.append(float(lm.Is[i])) attributes.append(int(sig_q[i])) new_feature = QgsFeature() new_geom = QgsGeometry(feature.geometry()) new_feature.setAttributes(attributes) new_feature.setGeometry(new_geom) file_writer.addFeature(new_feature) del file_writer self.output_layer = QgsVectorLayer(self.output_file_path, "LISA Moran's I - " + field, 'ogr') QgsProject.instance().addMapLayer(self.output_layer) self.add_symbology() self.signalStatus.emit(3, tr('Successful process')) except GeoPublicHealthException as e: display_message_bar(msg=e.msg, level=e.level, duration=e.duration) finally: self.button_box_ok.setDisabled(False) # noinspection PyArgumentList QApplication.restoreOverrideCursor() # noinspection PyArgumentList QApplication.processEvents()
def moran_gen(file): # Read in shapefile df = file # df = gpd.read_file(file) # print(df.dtypes) y = df['ind_100t'] # Calculate weight # First calculate minimum threshold distance to nearest neightbor thresh = ps.min_threshold_dist_from_shapefile( "C:\zoovision\data\Region1.shp") # thresh = 1 # print(thresh) # weight based on fixed distance, for binary(0 or 1 if within threshold) # arcgis_swm = ps.open('C:\zoovision\data\weightfiles\week1test.swm', 'r') # w = arcgis_swm.read() # arcgis_swm.close() # e = open('C:\zoovision\data\Region1_count.txt') # x = e.readlines() # print(x.head()) # gwt = ps.open('C:\zoovision\weights.gwt', 'r') # w = gwt.read() # gwt.close() # w = ps.open('C:\zoovision\data\Region1_count.txt', 'r', 'Region1_count').read() testfile = ps.open('C:\zoovision\data\Region1_count.txt', 'r', 'arcgis_text') testfile = ps.open('C:\zoovision\data\Region1_count.txt', 'r', 'arcgis_text') w = testfile.read() testfile.close() # testfile = ps.open('C:\zoovision\data\weightfiles\Region1_genweights.swm', 'r') # w = testfile.read() testfile.close() w.n # f = tempfile.NamedTemporaryFile(suffix='.txt') # fname = f.name # f.close() # o = ps.open(fname, 'w', 'Region1_count') # o.write(w) # o.close() # wnew = ps.open(fname, 'r', 'Region1_count').read() # wnew.pct_nonzero == w.pct_nonzero # os.remove(fname) # arcgis_txt.close() # w = ps.queen_from_shapefile("C:\zoovision\data\Region1.shp") # w = ps.weights.DistanceBand.from_shapefile("C:\zoovision\data\Region1.shp", threshold=thresh, binary=False) # print(tuple(w1)) # f = ps.open(ps.examples.get_path("stl_hom.txt")) # y = np.array(f.by_col['HR8893']) # w = ps.open(ps.examples.get_path("stl.gal")).read() # np.random.seed(12345) # moran_loc = ps.Moran_Local(y, w) # print(tuple(w)) # w2 = ps.lat2W(6, 4) # w = ps.w_union(w1, w2) # w = w1.multiply(w2) moran_loc = Moran_Local(y, w, transformation='r', permutations=999) # moran_loc = ps.Moran_Local(y, w, permutations=999) fig, ax = plt.subplots(figsize=(15, 10)) fig, ax = lisa_cluster(moran_loc, df, p=0.05, figsize=(15, 10)) ax.set_title( "Local Indicators of Spatial Association ", fontsize=35) # plot_moran(moran_loc, zstandard=True, figsize=(10, 4))
def generate_clusters(gdf: gpd.GeoDataFrame, col: str, crs: Optional[int] = None, alpha: float = 0.005, geom_column: str = "geometry") -> gpd.GeoDataFrame: """Calculates spatial clusters/outliers based on a column in a geofataframe Workflow: 1. Create a spatial weights matrix 2. Create a spatially lagged version of the variable of interest 3. Calculate global spatial autocorrelation metrics 4. Calculate local spatial autocorrelation (the clusters) using LISA (local indicators of spatial autocorrelation) 5. Join data to original gdf While the code should work for any geodataframe, the current workflow is based on the assumption that the data being analyzed is in a hexagonal grid. This means we have polygons of approximately uniform weights. The https://pysal.org/libpysal/generated/libpysal.weights.Rook.html weighting calculates weights between all polygons that share an edge. Note that this requires the grid is filled with polygons, i.e. we don't have islands in the grid. Input: gdf The source geodataframe, should be a hexagonal grid if using this script as is crs A coordinate reference system, EPSG code col The column with the data being modeled alpha The threshold of statistical significance to be used when determing whether a cell is a cluster/outlier or not. Defaults to 0.005. Such a low value is used because our data typically contains large contrasts between areas of zero index (forest, seas) and built-up areas. - Larger values show the boundary between built-up and nature - Smaller values show contrasts within built-up areas The output is the original dataframe with 2 new columns: quadrant The quadrant to which the observation belongs to: LL = low clusters = low values surrounded by low values HH = high clusters = high values surrounded by high values LH = low outliers = low values surrounded by high values HL = high outliers = high values surrounded by low values significant Whether the quadrant information is statistically significant. The significance will depend on the number of iterations and the random seed used in the process, as polygons at the edge of significance may get slightly different values at different runs. """ # Project if crs: gdf = gdf.to_crs(crs) # The cluster algorithm fails if there are islands in the data, i.e. we must have a full grid. # This means filling the bbox of the geodataframe with zero values at each missing hex. # Zero index value indicates none of the datasets had any values on the hex. # Note that # 1) the datasets may have different bboxes and # 2) they may be sparse. # We cannot make any assumptions of the form of the data, other than that it is aggregated # in hex grid. gdf_filled = fill_hex_grid(gdf, geom_column=geom_column) # Compute spatial weights and row-standardize weights = lps.weights.Rook.from_dataframe(gdf_filled, geom_col=geom_column) weights.set_transform("R") # Compute spatial lag y = gdf_filled[col] y_lag = lps.weights.lag_spatial(weights, y) col_lag = f"{col}_lag" data_lag = pd.DataFrame(data={col: y, col_lag: y_lag}) # Global spatial autocorrelation mi = Moran(data_lag[col], weights) p_value = mi.p_sim print("\nGlobal spatial autocorrelation:\n" + "Moran's I: " + str(round(mi.I, 3)) + "\np-value: " + str(round(p_value, 3))) # Calculate LISA values lisa = Moran_Local( data_lag[col], weights, permutations=100000, # seed=1 # Use this if absolute repoducibility is needed ) # identify whether each observation is significant or not data_lag["significant"] = lisa.p_sim < alpha # identify the quadrant each observation belongs to data_lag["quadrant"] = lisa.q data_lag["quadrant"] = data_lag["quadrant"].replace({ 1: "HH", 2: "LH", 3: "LL", 4: "HL" }) # Print info print("\nDistribution of clusters/outliers (quadrants):\n" + str(data_lag["quadrant"].sort_values().value_counts())) print("\nSignificant clusters (using significance threshold " + str(alpha) + "):\n" + str(data_lag["significant"].value_counts())) # Merge original gdf and LISA quadrants data together gdf_clusters = gdf_filled.merge(data_lag[["quadrant", "significant"]], how="left", left_index=True, right_index=True) return gdf_clusters