def global_autocorrel(coord_data, y): for k in [2, 3, 4, 5, 6, 7, 8, 9, 10]: w = pysal.weights.KNN(coord_data, k=k) mi = pysal.Moran(y, w) print( str(k) + ";" + str(mi.I) + ";" + str(mi.p_rand) + ";" + str(mi.p_norm)) for th in [5, 4, 3, 2, 1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]: w = pysal.weights.DistanceBand.from_array(coord_data, th) mi = pysal.Moran(y, w) print( str(th) + ";" + str(mi.I) + ";" + str(mi.p_rand) + ";" + str(mi.p_norm))
def compute(self): if not self.output: self.output = VectorFileIO(name='result', uri=self.get_outpath()) for input in self.inputs: if input.name == 'input': first_df = input.read() col = self.var_col adjust_by_col = self.adjust_by_col permutations = self.permutations if not permutations: permutations = 999 # filter out null fields keep = first_df[col].notnull() filtered_df = first_df[keep] # get Global Moran's I f = np.array(filtered_df[col]) w = wt.gpd_contiguity(filtered_df) if adjust_by_col: adjust_by = np.array(filtered_df[adjust_by_col]) mi = pysal.esda.moran.Moran_Rate(e=f, b=adjust_by, w=w, permutations=permutations) else: mi = pysal.Moran(y=f, w=w, permutations=permutations) keep = ['I', 'EI', 'p_norm', 'EI_sim', 'p_sim', 'z_sim', 'p_z_sim'] mi_dict = {k: getattr(mi, k) for k in keep} self.output.data = mi_dict self.output.write() logger.debug(self.output)
def calc_morans_i(self, raster_as_array, lag_distance=10): """ Calculates the value of Moran's I at given lag distances using Distance Band Weights; Default ranges from 1 pixel to 10 pixels In this particular project, that translates to: 1 pixel (30 m) to 10 pixels (300 m) Reference: http://pysal.readthedocs.org/en/latest/users/tutorials/weights.html#distance-band-weights @param raster_as_array: the raster to use in the calculations as an array @param lag_distance: given lag distance @returns: an array of Moran's I values. """ self.raster_as_array = raster_as_array self.lag_distance = lag_distance _flattened_raster_array = self.raster_as_array.ravel() _x, _y = numpy.indices((self.rows, self.columns)) _x.shape = (self.rows * self.columns, 1) _y.shape = (self.rows * self.columns, 1) _horizontal_stack = numpy.hstack([_x, _y]) _Morans_I = numpy.zeros(self.lag_distance) # Get weights based on distance (distance-band method) and calculation Moran's I for i in range(1, self.lag_distance + 1): _wthresh = pysal.threshold_binaryW_from_array( _horizontal_stack, i) # distance-based weights _mi = pysal.Moran( _flattened_raster_array, _wthresh) # calculate Moran's I for given distance _Morans_I[ i - 1] = _mi.I # Value of individual result of Moran's I (_mi.I) saved into array return _Morans_I
def _autocorrelation_method(self, method): if method.lower() == 'moran': method_fct = lambda *args, **kwargs: pysal.Moran( *args, **kwargs, two_tailed=True) def get_stats(_object): return _object.I elif method.lower() == 'geary': method_fct = lambda *args, **kwargs: pysal.Geary( *args, **kwargs, two_tailed=True) def get_stats(_object): return _object.C elif method.lower() == 'getisord': method_fct = lambda *args, **kwargs: pysal.esda.getisord.G( *args, **kwargs, two_tailed=True) def get_stats(_object): return _object.G else: raise ValueError( "no corresponding autocorrelation function to {}".format( method)) return method_fct, get_stats
def processAlgorithm(self, progress): field = self.getParameterValue(self.FIELD) field = field[0:10] # try to handle Shapefile field length limit filename = self.getParameterValue(self.INPUT) layer = dataobjects.getObjectFromUri(filename) filename = dataobjects.exportVectorLayer(layer) contiguity = self.getParameterValue(self.CONTIGUITY) if contiguity == 0: # queen print 'INFO: Moran\'s using queen contiguity' w=pysal.queen_from_shapefile(filename) else: # 1 for rook print 'INFO: Moran\'s using rook contiguity' w=pysal.rook_from_shapefile(filename) f = pysal.open(filename.replace('.shp','.dbf')) y=np.array(f.by_col[str(field)]) m = pysal.Moran(y,w,transformation = "r", permutations = 999) self.setOutputValue(self.I,m.I) print "Moran's I: %f" % (m.I) print "INFO: Moran's I values range from -1 (indicating perfect dispersion) to +1 (perfect correlation). Values close to -1/(n-1) indicate a random spatial pattern." print "p_norm: %f" % (m.p_norm) print "p_rand: %f" % (m.p_rand) print "p_sim: %f" % (m.p_sim) print "INFO: p values smaller than 0.05 indicate spatial autocorrelation that is significant at the 5% level." print "z_norm: %f" % (m.z_norm) print "z_rand: %f" % (m.z_rand) print "z_sim: %f" % (m.z_sim) print "INFO: z values greater than 1.96 or smaller than -1.96 indicate spatial autocorrelation that is significant at the 5% level."
def test_sids(self): w = pysal.open(pysal.examples.get_path("sids2.gal")).read() f = pysal.open(pysal.examples.get_path("sids2.dbf")) SIDR = np.array(f.by_col("SIDR74")) mi = pysal.Moran(SIDR, w, two_tailed=False) self.assertAlmostEquals(mi.I, 0.24772519320480135) self.assertAlmostEquals(mi.p_norm, 5.7916539074498452e-05)
def moran(shape, residuals): tx = gpd.read_file(shape) #only indices that exist in the residuals would be counted #print("residuals") #print(residuals) tx = tx.merge(residuals, right_on='Ward', left_on="ward_id", how='left') tx = tx.set_index("ward_id") tx["resid"].fillna(value=0, inplace=True) ''' hr10 = ps.Quantiles(tx["resid"], k=10) f, ax = plt.subplots(1, figsize=(9, 9)) tx.assign(cl=hr10.yb).plot(column='cl', categorical=True, k=10, cmap='OrRd', linewidth=0.1, ax=ax, edgecolor='black', legend=True) ax.set_axis_off() plt.title("HR90 Deciles") ''' plt.show() indices_to_keep = tx.index.values df = ps.pdio.read_files(shapefile) df = df.set_index("ward_id") df = df.loc[indices_to_keep] W = ps.weights.Rook.from_dataframe(df) W.transform = 'r' score = ps.Moran(tx['resid'], W) return ([score.I, score.p_sim])
def test_sids(self): w = pysal.open(pysal.examples.get_path("sids2.gal")).read() f = pysal.open(pysal.examples.get_path("sids2.dbf")) SIDR = np.array(f.by_col("SIDR74")) mi = pysal.Moran(SIDR, w) self.assertAlmostEquals(mi.I, 0.24772519320480135) self.assertAlmostEquals(mi.p_norm, 0.00011583330781)
def get_ols_check_spatial_correlation(central_shape,location_shp_file,year): #Get the log transform of dependent and independent variables X_log = np.log(central_shape['Call_density']) Y_log = np.log(central_shape['{}_pop_density'.format(year)]) X_log = np.array(X_log).T Y_log = np.array(Y_log) Y_log.shape = (len(Y_log),1) X_log.shape = (len(X_log),1) ###fit ols model for log transformed variable################################## ls = ols.OLS(Y_log, X_log) central_ols=ls.summary ###Distance based weight matrix############################################# thresh = pysal.min_threshold_dist_from_shapefile(location_shp_file) wt = pysal.weights.DistanceBand.from_shapefile(location_shp_file, threshold=thresh, binary=True) mi_distance = pysal.Moran(ls.u, wt, two_tailed=False) #Get Moran's I P-value using distance based weight matrix a=mi_distance.p_norm if mi_distance.p_norm<0.05: #If p_value less than 0.05 then we are going ahead with lagrange's test #To check whether to go with lag model or error model #but in this case we are going with error model lms=pysal.spreg.diagnostics_sp.LMtests(ls,wt) if lms.lme[1]<0.05: spat_err = ml_error.ML_Error(Y_log, X_log, wt) central_spat_err_distance=spat_err.summary return central_ols, a,central_spat_err_distance
def main(): """ Reports the spatial autocorrelation for each target variable. """ ap = argparse.ArgumentParser() ap.add_argument( '--coords', default='/data/twcounty/stats2/counties/counties.top100.bounding.txt', help='tsv file with counties, lat/long, and bounding boxes') ap.add_argument( '--dbf', default= '/data/twcounty/stats2/counties/census_polygons/gz_2010_us_050_00_500k.dbf', help='county dbf files') ap.add_argument( '--input', default='/data/twcounty/features/counties.norm=none.liwc.pkl', help='pickled county feature Data object') ap.add_argument('--json', default='/data/twcounty/json', help='directory of jsons per county') ap.add_argument( '--shapes', default= '/data/twcounty/stats2/counties/census_polygons/gz_2010_us_050_00_500k.shp', help='county shape files') ap.add_argument('--targets', default='/data/twcounty/targets.tsv', help='targets per county') ap.add_argument('--tweets', default='/data/twcounty/tweets.tsv', help='tweets') args = ap.parse_args() coords = read_coords(args.coords) overlaps = get_overlaps(coords) print_tweets_in_overlaps(overlaps, args.json, args.tweets) shapes = pysal.open(args.shapes) print 'read', len(shapes), 'counties from shape file' dbf = pysal.open(args.dbf) counties = data.read(args.input).features targets, target_alpha = read_targets(args.targets, counties) for t in targets: didx, d = [(i, dd) for i, dd in enumerate(dbf) if dd[1] + dd[2] == t][0] targets[t]['dbf'] = d targets[t]['dbfi'] = didx shape = shapes[didx] targets[t]['shape'] = shape fips = sorted(targets) print '\t'.join(['mi.I ', 'mi.EI ', 'p_norm ', 'outcome']) weights = pysal.Kernel( np.array([targets[f]['shape'].centroid for f in fips])) for outcome in target_alpha: y = np.array([float(targets[t][outcome]) for t in fips]) y = y / np.sum(y) mi = pysal.Moran(y, weights) print '%.5f\t%.5f\t%.5f\t%s' % (mi.I, mi.EI, mi.p_norm, outcome)
def gincs(wed, y, permutations=999, segment=False): if segment: # segment wed and y # get new wed and extract new y raise NotImplementedError w = networkw.w_links(wed) mi = ps.Moran(y, w, permutations=permutations) return mi
def test_variance(self): y = np.arange(1, 10) w = pysal.lat2W(3, 3) mi = pysal.Moran(y, w, transformation='B') np.testing.assert_allclose(mi.VI_rand, 0.059687500000000004, atol=ATOL, rtol=RTOL) np.testing.assert_allclose(mi.VI_norm, 0.053125000000000006, atol=ATOL, rtol=RTOL)
def calculate_morans(self, columns, overwrite=False, *args, **kwargs): if not hasattr(self, 'weights'): # TODO: add id variable here idVariable='ID' self.calculate_weights(threshold=self.threshold) for col in columns: if not overwrite and col in self.results: continue y = np.array(self.data.by_col(col)) # TODO: is float always what we want? (morans breaks w/ string) y = y.astype(float) mi = pysal.Moran(y, self.weights, *args, **kwargs) self.results[col] = mi logging.info('{}: Finished Moran Calculation'.format(self.name)) return self.results
def test_mplot(): link = ps.examples.get_path('columbus.shp') db = read_files(link) y = db['HOVAL'].values w = ps.queen_from_shapefile(link) w.transform = 'R' m = ps.Moran(y, w) fig = mplot(m, xlabel='Response', ylabel='Spatial Lag', title='Moran Scatterplot', custom=(7, 7)) plt.close(fig)
def moran_df(df, w): df = df.select_dtypes(include=['int64', 'float64']) lista1 = [] lista2 = [] for i in df.columns: x = ps.Moran(df[i], w) lista2.append(x.I) lista1.append(i) df = pd.DataFrame({'variavel': lista1, 'valor': lista2}) df = df[['variavel', 'valor']] df = df.sort_values(by='valor', ascending=False) return df
def cal_moran(self, coors, value, K): kd = pysal.cg.kdtree.KDTree(np.array(coors)) # wnn2 = pysal.KNN(kd, 2) # weights K = int(K) weights = pysal.weights.Distance.KNN(kd, K) # moran i moran_i = pysal.Moran(value, weights, two_tailed=False) moran_index = moran_i.I expect_index = moran_i.EI variance_val = moran_i.VI_norm z_score = moran_i.z_norm z_ = moran_i.z_sim p = moran_i.p_sim p_val = moran_i.p_norm self.textBrowser.setText('Global Moran\'s I Summary') self.textBrowser.append('Moran\'s Index: ' + str(moran_index)) #self.textBrowser.append('z-score: ' + str(z_score)) self.textBrowser.append('p-value: ' + str(p_val))
def compute(self, vlayer, tfield, idvar, matType): vlayer = qgis.utils.iface.activeLayer() idvar = self.idVariable.currentText() tfield = self.inField.currentText() provider = vlayer.dataProvider() allAttrs = provider.attributeIndexes() caps = vlayer.dataProvider().capabilities() if caps & QgsVectorDataProvider.AddAttributes: TestField = idvar[:5] + "_qrr" res = vlayer.dataProvider().addAttributes( [QgsField(TestField, QVariant.Double)]) wp = str(self.dic[str(self.inShape.currentText())]) if matType == "Rook": w = py.rook_from_shapefile(wp, idVariable=unicode(idvar)) else: w = py.queen_from_shapefile(wp, idVariable=unicode(idvar)) w1 = wp[:-3] + "dbf" db = py.open(w1) y = np.array(db.by_col[unicode(tfield)]) mi = py.Moran(y, w) mg = mi.I self.SAresult.setText("Global Moran's I index is " + str(mg))
def plot_quartiles(shape, reviews, year): wards_data = ps.pdio.read_files(shape) scores_data = pd.read_csv(reviews) shp_link = shape tx = gpd.read_file(shp_link) tx = tx.merge(scores_data, right_on='Ward', left_on="ward_id", how='inner') #hr10 = ps.Quantiles(tx[year], k=10) #f, ax = plt.subplots(1, figsize=(9, 9)) #tx.assign(cl=hr10.yb).plot(column='cl', categorical=True, k=10, cmap='OrRd', linewidth=0.1, ax=ax, edgecolor='black', legend=True) #ax.set_axis_off() #plt.title("HR90 Deciles") # plt.show() df = ps.pdio.read_files(shapefile) df = df.set_index("ward_id") scores_data = scores_data.set_index("Ward") print(df) print(scores_data) df = df[scores_data[year] == -1] print(df) W = ps.weights.Rook.from_dataframe(df) W.transform = 'r' score = ps.Moran(tx['total'], W) print(score.I, score.p_sim)
os.system( 'ogr2ogr ' + shpname2 + ' ' + shpname + ' -dialect sqlite -sql "SELECT ST_Union(geometry), segm_id FROM ' + layrname + ' GROUP BY segm_id"') w = pysal.rook_from_shapefile(shpname2) print 'w.n is:', w.n wpn = w.n if wpn > 1: #!= print 'segmentation threshold is:', k sareas = sum(sumareas) # area of the PA (sum of the segments) thr.append(k) y30 = np.genfromtxt(hriname, delimiter=' ', skip_header=1, usecols=(i)) mi = pysal.Moran(y30, w) #, two_tailed=False) mm = abs(mi.I) print 'M.I. is:', mm i2 = i + 19 y330 = np.genfromtxt(hriname, delimiter=' ', skip_header=1, usecols=(i2)) wv = sum(y330) / sareas print 'Sum of the variance is:', wv mors.append(mm) varis.append(wv) wb = open(csvname2, 'a') outxt = str(k) + ' ' + str(w.n) + ' ' + str(sareas) wb.write(outxt) wb.write('\n')
def estMorans(frame): workingFrame = copy.deepcopy(frame) x, y, z = zip(*workingFrame) w = ps.lat2W(int(max(x) + 1.0), int(max(y) + 1.0), rook=False) lm = ps.Moran(z, w) return (lm.I)
def run(self): """Run method that performs all the real work""" # show the dialog self.clear_ui() layers, layers_shp = self.loadLayerList() if len(layers) == 0: return self.dlg.show() self.load_comboBox() # Run the dialog event loop result = self.dlg.exec_() # See if OK was pressed and fields are not empty if result and (self.validator() == 1): selectedLayerIndex = self.dlg.comboBox.currentIndex() if selectedLayerIndex < 0 or selectedLayerIndex > len(layers): return selectedLayer = layers_shp[selectedLayerIndex] layerName = selectedLayer.dataProvider().dataSourceUri() C = selectedLayer.fieldNameIndex(self.dlg.comboBox_C.currentText()) C2 = selectedLayer.fieldNameIndex( self.dlg.comboBox_C_2.currentText()) filename = self.dlg.lineEdit.text() (path, layer_id) = layerName.split('|') inDriver = ogr.GetDriverByName("ESRI Shapefile") inDataSource = inDriver.Open(path, 0) inLayer = inDataSource.GetLayer() type = inLayer.GetLayerDefn().GetGeomType() u = [] for i in range(0, inLayer.GetFeatureCount()): geometry = inLayer.GetFeature(i) u.append(geometry.GetField(C)) y = numpy.array(u) # attributes vector if self.dlg.checkBox_moranBi.isChecked() == 1: v = [] for i in range(0, inLayer.GetFeatureCount()): geometry = inLayer.GetFeature(i) v.append(geometry.GetField(C2)) x = numpy.array(v) if type == 1: # point t = () for feature in inLayer: geometry = feature.GetGeometryRef() xy = (geometry.GetX(), geometry.GetY()) t = t + (xy, ) # t = get_points_array_from_shapefile(layerName.split("|")[0]) if self.dlg.lineEditThreshold.text( ) and self.dlg.lineEditThreshold.text( ) != "": # if threshold is given threshold1 = int(self.dlg.lineEditThreshold.text()) elif self.dlg.checkBox_knn.isChecked( ) == 0: # if user needs to optimize threshold (no knn) mx_moran = -1000.0 mx_i = -1000.0 minT = int(self.dlg.lineEdit_minT.text()) maxT = int(self.dlg.lineEdit_maxT.text()) dist = int(self.dlg.lineEdit_dist.text()) for i in range(minT, maxT + dist, dist): w = DistanceBand(t, threshold=i, p=2, binary=False) moran = pysal.Moran(y, w) # print moran.z_norm if moran.z_norm > mx_moran: mx_i = i mx_moran = moran.z_norm threshold1 = int(mx_i) if self.dlg.checkBox_knn.isChecked() == 1: weightValue = int(self.dlg.knn_number.text()) w = pysal.knnW_from_shapefile(layerName.split("|")[0], k=weightValue, p=1) threshold1 = "None/KNN used " + self.dlg.knn_number.text() else: w = DistanceBand(t, threshold1, p=2, binary=False) else: # polygon w = pysal.queen_from_shapefile(layerName.split("|")[0]) threshold1 = "None/Queen's Case used" if self.dlg.checkBox_rowStandard.isChecked() == 1: type_w = "R" else: type_w = "B" if self.dlg.checkBox_randomPerm.isChecked() == 1: permutationsValue = int(self.dlg.lineEdit_random.text()) else: permutationsValue = 999 numpy.random.seed(12345) if self.dlg.checkBox_gi.isChecked() == 1: statistics = G_Local(y, w, star=True, transform=type_w, permutations=permutationsValue) elif self.dlg.checkBox_moran.isChecked() == 1: statistics = Moran_Local(y, w, transformation=type_w, permutations=permutationsValue) else: statistics = Moran_Local_BV(y, x, w, transformation=type_w, permutations=permutationsValue) self.write_file(filename, statistics, layerName, inLayer, inDataSource, y, threshold1) # assign the style to the output layer on QGIS if self.dlg.checkBox_gi.isChecked() == 1: if type == 1: # point stylePath = "/layer_style/hotspots_class.qml" else: stylePath = "/layer_style/hotspots_class_poly.qml" self.iface.activeLayer().loadNamedStyle( os.path.dirname(__file__) + stylePath) else: if type == 1: # point stylePath = "/layer_style/moran_class.qml" else: stylePath = "/layer_style/moran_class_poly.qml" self.iface.activeLayer().loadNamedStyle( os.path.dirname(__file__) + stylePath) elif result and (self.validator() == 0): self.error_msg() else: self.clear_ui() pass
def accept(self): if self.ui.savedshpradio.isChecked(): #when selecting saved shp openfile=str(self.ui.inputshpline.text()) #make a string of saved file savefile = str(self.ui.outputline.text()) #this will be a string like "c:\output.(.csv)" weightsfile=str(self.ui.Inputweightsline.text()) if self.ui.MoranIcheck.checkState(): #run moran's I value np.random.seed(10) #? Is this step necessary? f=pysal.open(openfile) #read a shp file, not need to read w=pysal.open(weightsfile).read() #read a weights file opendbf=openfile[:-3] + "dbf" #open the same file only with dbf f_dbf = pysal.open(opendbf) #read the dbf attribute file fileheader=f_dbf.header #select a column and let it function columnindex=self.ui.selectcombobox.currentText() #when select a column y=np.array(f_dbf.by_col[columnindex]) #change into array, by_col function is only for dbf file mi=pysal.Moran(y,w) #value of Moran's I MI=mi.I #list savestring=str(MI) #savestring=','.join(str(n) for n in MI) #change from list to string for saving output=pysal.open(savefile, 'w') output.write(savestring) output.close() #should rewrite the following part, because they can not do multi-selection? if self.ui.normalradioButton.isChecked(): #under the assumption of normal distribution if self.ui.expectedcheckbox.checkState(): NE=mi.EI savestring1=columnindex+'\n'+'Moran\'s I'+','+savestring+'\n'+'\n'+'Normality Assumption'+'\n'+'Expected Value'+','+str(NE) output=pysal.open(savefile, 'w') output.write(savestring1) output.close() #show in the screen? #if self.ui.showcheck.checkState(): # dlg = WeightsDialog(self.iface) # dlg.show() # results = dlg.exec_() #else: # pass else: pass if self.ui.variancecheckbox.checkState(): NV=mi.VI_norm savestring2=columnindex+'\n'+'Moran\'s I'+','+savestring+'\n'+'\n'+'Normality Assumption'+'\n'+'Variance'+','+str(NV) output=pysal.open(savefile, 'w') output.write(savestring2) output.close() else: pass if self.ui.standardcheckbox.checkState(): NS=mi.seI_norm savestring3=columnindex+'\n'+'Moran\'s I'+','+savestring+'\n'+'\n'+'Normality Assumption'+'\n'+'Standard Deviation'+','+str(NS) output=pysal.open(savefile, 'w') output.write(savestring3) output.close() else: pass if self.ui.Zcheckbox.checkState(): Nz=mi.z_norm savestring4=columnindex+'\n'+'Moran\'s I'+','+savestring+'\n'+'\n'+'Normality Assumption'+'\n'+'z-value'+','+str(Nz) output=pysal.open(savefile, 'w') output.write(savestring4) output.close() else: pass if self.ui.Pcheckbox.checkState(): Np=mi.p_norm savestring5=columnindex+'\n'+'Moran\'s I'+','+savestring+'\n'+'\n'+'Normality Assumption'+'\n'+'p-value'+','+str(Np) output=pysal.open(savefile, 'w') output.write(savestring5) output.close() else: pass elif self.ui.randomradiobutton.isChecked(): #under the assumption of random distribution if self.ui.variancecheckbox.checkState(): RV=mi.VI_rand savestring6=columnindex+'\n'+'Moran\'s I'+','+savestring+'\n'+'\n'+'Randomization Assumption'+'\n'+'Variance'+','+str(RV) output=pysal.open(savefile, 'w') output.write(savestring6) output.close() else: pass if self.ui.standardcheckbox.checkState(): RS=mi.seI_rand savestring7=columnindex+'\n'+'Moran\'s I'+','+savestring+'\n'+'\n'+'Randomization Assumption'+'\n'+'Standard Deviation'+','+str(RS) output=pysal.open(savefile, 'w') output.write(savestring7) output.close() else: pass if self.ui.Zcheckbox.checkState(): Rz=mi.z_rand savestring8=columnindex+'\n'+'Moran\'s I'+','+savestring+'\n'+'\n'+'Randomization Assumption'+'\n'+'z-value'+','+str(Rz) output=pysal.open(savefile, 'w') output.write(savestring8) output.close() else: pass if self.ui.Pcheckbox.checkState(): Rp=mi.p_rand savestring9=columnindex+'\n'+'Moran\'s I'+','+savestring+'\n'+'\n'+'Randomization Assumption'+'\n'+'p-value'+','+str(Rp) output=pysal.open(savefile, 'w') output.write(savestring9) output.close() else: pass else: pass else: return #without checking Moran's I, close directly #differences between pass and return: "pass" just skip the code, while "return" will terminate the program elif self.ui.activecombobox.isChecked(): #when selecting active shp and then import pysal layer = self.layers[self.ui.activecombobox.currentIndex()] #select a shp layer savefile = str(self.ui.outputline.text()) weightsfile=str(self.ui.Inputweightsline.text()) pass if self.ui.MoranIcheck.checkState(): np.random.seed(10) #f=pysal.open() #calculate Moran's I and other value, but do not know how to get the file path from active layers? else: return #how to show all results in the screen by creating a new dialogue? self.close() #close the dialog window
import pysal import numpy as np y = pysal.open(pysal.examples.get_path('usjoin.csv')).by_col['2009'] w = pysal.weights.rook_from_shapefile(pysal.examples.get_path('us48.shx')) mi = pysal.Moran(y, w, two_tailed=False) print(mi)
import numpy as np import pandas as pd from scipy.spatial.distance import pdist, squareform import pysal inf = r"/Users/miranda/Documents/AidData/github/Simulation_geoMatch/test.csv" dta = pd.read_csv(inf) inmatrix = dta[['lon', 'lat']] z = np.array(dta['z']) dist_inv = 1 / squareform(pdist(inmatrix, 'euclidean')) np.fill_diagonal(dist_inv, 0) w = dist_inv.mean(axis=1) mi = pysal.Moran(z, w) print dist_inv def get_w(coormatrix): dist_inv = 1 / squareform(pdist(coormatrix, 'euclidean')) np.fill_diagonal(dist_inv, 0) return dist_inv
def check_moran(self, rook=False, shpfile=None): """Get Moran's I statistic for georeferenced inventory This method is utilizing pysal package functions for Moran's I statistics. In case of regular grid input arrays the weight matrix is constructed as queen's case by default. Each cell (c) as only direct neighbors (n) in each direction per default. Alternatively the rook type of neighbors can be chosen. Rook: ––––––––––– Queen: ––––––––––– |- - - - -| |- - - - -| |- - n - -| |- n n n -| |- n c n -| |- n c n -| |- - n - -| |- n n n -| |- - - - -| |- - - - -| ––––––––––– ––––––––––– In case of vectorized input data an shape file has to be passed, which will be used as base for rook or queen weight matrix creation. Per default the file location is taken from class argument <infile>. Keyword arguments: rook Boolean to select spatial weights matrix as rook or queen case. shpfile Name of file used to setup weight matrix. """ # Mask nan values of input array. array = self.inv_array nnan = 0 # Construct weight matrix in input grid size. w = self.get_weight_matrix(array, rook=rook) try: # Reshape input array to N,1 dimension. array = array.reshape((w.n, 1)) # Remove weights and neighbors for nan value ids. if np.any(np.isnan(array)): nw, narray, nnan = self.rm_nan_weight(w, array) """idlist = range(len(w.id_order)) # Id list of weight object. !! Ids could be string objects !! wid = w.id_order # Get indices for nan values in array. nanids = [i for i in idlist if np.isnan(array[i])] nanitems = [wid[i] for i in nanids] w._reset() # Remove entries from spatial weight values for nan indices. for lid in idlist: if lid not in nanids: wlist = w.weights[wid[lid]] nlist = w.neighbors[wid[lid]] olist = w.neighbor_offsets[wid[lid]] idnonan = [nlist.index(ele) for ele in nlist if ele not in nanitems] wnew = [wlist[i] for i in idnonan] nnew = [nlist[i] for i in idnonan] onew = [olist[i] for i in idnonan] #print(str(w.neighbors[wid[lid]]) + "----" + str(nnew)) # TODO: change w.neighbor_offsets as well!!! w.weights[wid[lid]] = wnew w.neighbors[wid[lid]] = nnew w.neighbor_offsets[wid[lid]] = onew # Adjust spatial weight parameters. w._id_order = [wid[ele] for ele in idlist if ele not in nanids] # Remove entries from spatial weight keys for nan indices. for i in nanids: del w.weights[wid[i]] del w.neighbors[wid[i]] del w.neighbor_offsets[wid[i]] del w.cardinalities[wid[i]] del w.id2i[wid[i]] w._n = len(w.weights) # Remove nan values from array. array = np.delete(array, nanids, axis=0) print(w.weights)""" # TODO: Use wsp - sparse weight matrix for large grids. # Calculate Morans's statistic with NaN purged input. mi = pysal.Moran(narray, nw, two_tailed=False) else: # Calculate Morans's statistic. mi = pysal.Moran(array, w, two_tailed=False) logger.info("Moran's I successfully calculated") # Print out info box with statistcs. info = "\n" + \ "---------------------------------------------------\n" +\ "| #### Global Moran's I statistics ####\n" +\ "| Inventory name : " + self.name + "\n" +\ "| -------------------------------------------------\n" +\ "| Moran's I : " + "%.6f" % mi.I + "\n" +\ "| Expected value : " + "%.6f" % mi.EI + "\n" +\ "| p-value : " + "%.6f" % mi.p_norm + "\n" +\ "| -------------------------------------------------\n" +\ "| Number of non-NA cells : " + str(len(array)) + "\n" +\ "| Number of NA cells : " + str(nnan) + "\n" +\ "---------------------------------------------------\n" print(info) self.mi = mi.I except: msg = "Couldn't calculate Moran's I for inventory " "<%s>" % (self.name) raise RuntimeError(msg) self._Inventory__modmtime() return (self.mi)
def run(self): """Run method that performs all the real work""" self.clear_ui() layers_list = [] layers_shp = [] # Show the shapefiles in the ComboBox layers = self.iface.legendInterface().layers() if len(layers) != 0: # checklayers exist in the project for layer in layers: if hasattr( layer, "dataProvider" ): # to not consider Openlayers basemaps in the layer list myfilepath = layer.dataProvider().dataSourceUri( ) # directory including filename (myDirectory, nameFile) = os.path.split( myfilepath) # splitting into directory and filename if (".shp" in nameFile): layers_list.append(layer.name()) layers_shp.append(layer) self.dlg.comboBox.addItems( layers_list) # adding layers to comboBox selectedLayerIndex = self.dlg.comboBox.currentIndex() if selectedLayerIndex < 0 or selectedLayerIndex > len(layers_shp): return selectedLayer = layers_shp[selectedLayerIndex] fieldnames = [ field.name() for field in selectedLayer.pendingFields() ] # fetching fieldnames of layer self.clear_fields() self.dlg.comboBox_C.addItems(fieldnames) try: self.dlg.comboBox.activated.connect( lambda: self.load_comboBox(layers_shp)) self.dlg.comboBox.currentIndexChanged.connect( lambda: self.load_comboBox(layers_shp)) self.dlg.checkBox_optimizeDistance.toggled.connect( self.optimizedThreshold) # checkbox toggle event self.dlg.checkBox_randomPerm.toggled.connect( self.randomPerm) # checkbox toggle event self.load_comboBox(layers_shp) except: return # show the dialog self.dlg.show() # Run the dialog event loop result = self.dlg.exec_() # See if OK was pressed and fields are not empty if result and (self.validator() == 1): selectedLayerIndex = self.dlg.comboBox.currentIndex() if selectedLayerIndex < 0 or selectedLayerIndex > len(layers): return selectedLayer = layers_shp[selectedLayerIndex] layerName = selectedLayer.dataProvider().dataSourceUri() C = selectedLayer.fieldNameIndex( self.dlg.comboBox_C.currentText()) filename = self.dlg.lineEdit.text() (path, layer_id) = layerName.split('|') inDriver = ogr.GetDriverByName("ESRI Shapefile") inDataSource = inDriver.Open(path, 0) inLayer = inDataSource.GetLayer() type = inLayer.GetLayerDefn().GetGeomType() u = [] for i in range(0, inLayer.GetFeatureCount()): geometry = inLayer.GetFeature(i) u.append(geometry.GetField(C)) y = numpy.array(u) # attributes vector if type == 1: # point t = () for feature in inLayer: geometry = feature.GetGeometryRef() xy = (geometry.GetX(), geometry.GetY()) t = t + (xy, ) if self.dlg.checkBox_optimizeDistance.isChecked( ) == 0: # if threshold is given threshold1 = int(self.dlg.lineEditThreshold.text()) else: # if user needs to optimize threshold mx_moran = -1000.0 mx_i = -1000.0 minT = int(self.dlg.lineEdit_minT.text()) maxT = int(self.dlg.lineEdit_maxT.text()) dist = int(self.dlg.lineEdit_dist.text()) for i in range(minT, maxT + dist, dist): w = DistanceBand(t, threshold=i, p=2, binary=False) moran = pysal.Moran(y, w) # print moran.z_norm if moran.z_norm > mx_moran: mx_i = i mx_moran = moran.z_norm threshold1 = int(mx_i) w = DistanceBand(t, threshold1, p=2, binary=False) else: # polygon w = pysal.queen_from_shapefile(myfilepath.split("|")[0]) threshold1 = "None/Queen's Case used" if self.dlg.checkBox_rowStandard.isChecked() == 1: type_w = "R" else: type_w = "B" if self.dlg.checkBox_randomPerm.isChecked() == 1: permutationsValue = int(self.dlg.lineEdit_random.text()) else: permutationsValue = 999 numpy.random.seed(12345) lg_star = G_Local(y, w, star=True, transform=type_w, permutations=permutationsValue) self.write_file(filename, inLayer, lg_star, self.dlg.comboBox_C.currentText(), C, layerName, inLayer, inDataSource, y, threshold1) # assign the style to the output layer on QGIS if type == 1: # point stylePath = "/hotspots_class.qml" else: stylePath = "/hotspots_class_poly.qml" self.iface.activeLayer().loadNamedStyle( os.path.dirname(__file__) + stylePath) elif result and (self.validator() == 0): self.error_msg() else: self.clear_ui() pass
D = D[D.app.notnull()] M = ols('app ~ n_subwayex', data=D).fit() print(M.summary()) fig = plt.figure(figsize=(12, 8)) fig = smg.regressionplots.plot_regress_exog(M, 'n_subwayex', fig=fig) plt.show() qW = ps.queen_from_shapefile(shp_path) D = ps.pdio.read_files(shp_path) D.head() mi = ps.Moran(D.n_assaults.values[:, None], qW, two_tailed=False) mi.I mi.EI y = D.n_assaults.values[:, None] xs = D.n_subwayex.values[:, None] m1 = ps.spreg.OLS(y, xs, w=qW, spat_diag=True) print(m1.summary) sl = ps.lag_spatial(qW, D.n_subwayex.values[:, None]) D_sl = D.assign(w_subway=sl)
import processing from processing.tools.vector import VectorWriter from qgis.core import * from PyQt4.QtCore import * field = field[0:10] # try to handle Shapefile field length limit if contiguity == 'queen': print 'INFO: Global Moran\'s using queen contiguity' w = pysal.queen_from_shapefile(input) else: print 'INFO: Global Moran\'s using rook contiguity' w = pysal.rook_from_shapefile(input) f = pysal.open(pysal.examples.get_path(input.replace('.shp', '.dbf'))) y = np.array(f.by_col[str(field)]) m = pysal.Moran(y, w, transformation="r", permutations=999) i = m.I print "Moran's I: %f" % (m.I) print "INFO: Moran's I values range from -1 (indicating perfect dispersion) to +1 (perfect correlation). Values close to -1/(n-1) indicate a random spatial pattern." print "p_norm: %f" % (m.p_norm) print "p_rand: %f" % (m.p_rand) print "p_sim: %f" % (m.p_sim) print "INFO: p values smaller than 0.05 indicate spatial autocorrelation that is significant at the 5% level." print "z_norm: %f" % (m.z_norm) print "z_rand: %f" % (m.z_rand) print "z_sim: %f" % (m.z_sim) print "INFO: z values greater than 1.96 or smaller than -1.96 indicate spatial autocorrelation that is significant at the 5% level."
percent16.min(), percent16.max(), linestyle='--') # red line of best fit using global I as slope plt.plot(percent16, a + b * percent16, 'r') plt.title('Morans I Queen Scatterplot') plt.ylabel('Spatial Lag of Percentagew in 2016 per County ') plt.xlabel('Percent of Deaths') plt.show() # In[6]: #Calculating Moran's I for the dataset that is being used #This Caluculates the Slope of the Red line, AKA the Moran's I value. Along with the seudo P-Value I_percent16 = ps.Moran(data.percent16.values, Queen) I_percent16.I, I_percent16.p_sim # In[7]: #Calculating the Local Autocorrelation Statistic #Autocorrelation is a characteristic of data in which the correlation between the values of the same variables is based on related objects. #The output depicts teh Moran's I of each county specified and their related P-Value LMo_percent16 = ps.Moran_Local(data.percent16.values, Queen) LMo_percent16.Is[0:50], LMo_percent16.p_sim[0:50] # In[8]: LMo_percent16 = ps.Moran_Local(data.percent16.values, Queen, permutations=9999) LMo_percent16.Is[0:50], LMo_percent16.p_sim[0:50]
########################################### # Aux Objects (Time Series, sliders, etc. # ########################################### step = 5 years_by_step = list(map(str, list(range(1929, 2010, step)))) # Calculating Moran'I for every column started_datetime = datetime.now() dateYYMMDD = started_datetime.strftime('%Y%m%d') timeHHMMSS = started_datetime.strftime('%H%M%S') print("Calculating Moran'I start at %s %s" % (started_datetime.strftime('%Y-%m-%d'), started_datetime.strftime('%H:%M:%S'))) morans = [] for i in cols_to_calculate: aux = ps.Moran(df_map_pcr[i], W).I morans.append(aux) print('morans:', len(morans)) print(morans) ended_datetime = datetime.now() elapsed = ended_datetime - started_datetime total_seconds = int(elapsed.total_seconds()) hours, remainder = divmod(total_seconds, 60 * 60) minutes, seconds = divmod(remainder, 60) print("Calculating Moran'I ended at %s %s Elapsed %02d:%02d:%02d" % (ended_datetime.strftime('%Y-%m-%d'), ended_datetime.strftime('%H:%M:%S'), hours, minutes, seconds)) # Test code form timepath-graph def calculate_lag_value(x):