def fit(self, y, n_jobs=1, permutations=999): """ Arguments --------- y : numpy.ndarray array containing binary (0/1) data Returns ------- the fitted estimator. Notes ----- Technical details and derivations found in :cite:`AnselinLi2019`. Examples -------- >>> import libpysal >>> w = libpysal.weights.lat2W(4, 4) >>> y = np.ones(16) >>> y[0:8] = 0 >>> LJC_uni = Local_Join_Count(connectivity=w).fit(y) >>> LJC_uni.LJC >>> LJC_uni.p_sim Guerry data replicating GeoDa tutorial >>> import libpysal >>> import geopandas as gpd >>> guerry = libpysal.examples.load_example('Guerry') >>> guerry_ds = gpd.read_file(guerry.get_path('Guerry.shp')) >>> guerry_ds['SELECTED'] = 0 >>> guerry_ds.loc[(guerry_ds['Donatns'] > 10997), 'SELECTED'] = 1 >>> w = libpysal.weights.Queen.from_dataframe(guerry_ds) >>> LJC_uni = Local_Join_Count(connectivity=w).fit(guerry_ds['SELECTED']) >>> LJC_uni.LJC >>> LJC_uni.p_sim """ # Need to ensure that the np.array() are of # dtype='float' for numba y = np.array(y, dtype='float') w = self.connectivity # Fill the diagonal with 0s w = weights.util.fill_diagonal(w, val=0) w.transform = 'b' keep_simulations = self.keep_simulations n_jobs = self.n_jobs seed = self.seed self.y = y self.n = len(y) self.w = w self.LJC = self._statistic(y, w) if permutations: self.p_sim, self.rjoins = _crand_plus(z=self.y, w=self.w, observed=self.LJC, permutations=permutations, keep=keep_simulations, n_jobs=n_jobs, stat_func=_ljc_uni) # Set p-values for those with LJC of 0 to NaN self.p_sim[self.LJC == 0] = 'NaN' return self
def fit(self, variables, n_jobs=1, permutations=999): """ Arguments --------- variables : numpy.ndarray array(s) containing binary (0/1) data Returns ------- the fitted estimator. Notes ----- Technical details and derivations can be found in :cite:`AnselinLi2019`. Examples -------- >>> import libpysal >>> w = libpysal.weights.lat2W(4, 4) >>> x = np.ones(16) >>> x[0:8] = 0 >>> z = [0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1] >>> y = [0,1,1,1,1,1,1,1,0,0,0,1,0,0,1,1] >>> LJC_MV = Local_Join_Counts_MV(connectivity=w).fit([x, y, z]) >>> LJC_MV.LJC >>> LJC_MV.p_sim Guerry data extending GeoDa tutorial >>> import libpysal >>> import geopandas as gpd >>> guerry = libpysal.examples.load_example('Guerry') >>> guerry_ds = gpd.read_file(guerry.get_path('Guerry.shp')) >>> guerry_ds['infq5'] = 0 >>> guerry_ds['donq5'] = 0 >>> guerry_ds['suic5'] = 0 >>> guerry_ds.loc[(guerry_ds['Infants'] > 23574), 'infq5'] = 1 >>> guerry_ds.loc[(guerry_ds['Donatns'] > 10973), 'donq5'] = 1 >>> guerry_ds.loc[(guerry_ds['Suicids'] > 55564), 'suic5'] = 1 >>> w = libpysal.weights.Queen.from_dataframe(guerry_ds) >>> LJC_MV = Local_Join_Counts_MV(connectivity=w).fit([guerry_ds['infq5'], guerry_ds['donq5'], guerry_ds['suic5']]) >>> LJC_MV.LJC >>> LJC_MV.p_sim """ w = self.connectivity # Fill the diagonal with 0s w = weights.util.fill_diagonal(w, val=0) w.transform = 'b' self.n = len(variables[0]) self.w = w self.variables = np.array(variables, dtype='float') keep_simulations = self.keep_simulations n_jobs = self.n_jobs seed = self.seed # Need to ensure that the product is an # np.array() of dtype='float' for numba self.ext = np.array(np.prod(np.vstack(variables), axis=0), dtype='float') self.LJC = self._statistic(variables, w) if permutations: self.p_sim, self.rjoins = _crand_plus( z=self.ext, w=self.w, observed=self.LJC, permutations=permutations, keep=True, n_jobs=n_jobs, stat_func=_ljc_mv ) # Set p-values for those with LJC of 0 to NaN self.p_sim[self.LJC == 0] = 'NaN' return self
class Local_Join_Count_BV(BaseEstimator): """Univariate Local Join Count Statistic""" def __init__(self, connectivity=None, permutations=999, n_jobs=1, keep_simulations=True, seed=None): """ Initialize a Local_Join_Count_BV estimator Arguments --------- connectivity : scipy.sparse matrix object the connectivity structure describing the relationships between observed units. Need not be row-standardized. permutations : int number of random permutations for calculation of pseudo p_values n_jobs : int Number of cores to be used in the conditional randomisation. If -1, all available cores are used. keep_simulations : Boolean (default=True) If True, the entire matrix of replications under the null is stored in memory and accessible; otherwise, replications are not saved seed : None/int Seed to ensure reproducibility of conditional randomizations. Must be set here, and not outside of the function, since numba does not correctly interpret external seeds nor numpy.random.RandomState instances. """ self.connectivity = connectivity self.permutations = permutations self.n_jobs = n_jobs self.keep_simulations = keep_simulations self.seed = seed def fit(self, x, y, case="CLC"): """ Arguments --------- x : numpy.ndarray array containing binary (0/1) data y : numpy.ndarray array containing binary (0/1) data case : str "BJC" for bivariate local join count, "CLC" for co-location local join count. Details in :cite:`AnselinLi2019`. Returns ------- the fitted estimator. Notes ----- Technical details and derivations can be found in :cite:`AnselinLi2019`. Examples -------- >>> import libpysal >>> w = libpysal.weights.lat2W(4, 4) >>> x = np.ones(16) >>> x[0:8] = 0 >>> y = [0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1] >>> LJC_BV_C1 = Local_Join_Count_BV(connectivity=w).fit(x, y, case="BJC") >>> LJC_BV_C2 = Local_Join_Count_BV(connectivity=w).fit(x, y, case="CLC") >>> LJC_BV_C1.LJC >>> LJC_BV_C1.p_sim >>> LJC_BV_C2.LJC >>> LJC_BV_C2.p_sim Commpop data replicating GeoDa tutorial (Case 1) >>> import libpysal >>> import geopandas as gpd >>> commpop = gpd.read_file("https://github.com/jeffcsauer/GSOC2020/raw/master/validation/data/commpop.gpkg") >>> w = libpysal.weights.Queen.from_dataframe(commpop) >>> LJC_BV_Case1 = Local_Join_Count_BV(connectivity=w).fit(commpop['popneg'], commpop['popplus'], case='BJC') >>> LJC_BV_Case1.LJC >>> LJC_BV_Case1.p_sim Guerry data replicating GeoDa tutorial (Case 2) >>> import libpysal >>> import geopandas as gpd >>> guerry = libpysal.examples.load_example('Guerry') >>> guerry_ds = gpd.read_file(guerry.get_path('Guerry.shp')) >>> guerry_ds['infq5'] = 0 >>> guerry_ds['donq5'] = 0 >>> guerry_ds.loc[(guerry_ds['Infants'] > 23574), 'infq5'] = 1 >>> guerry_ds.loc[(guerry_ds['Donatns'] > 10973), 'donq5'] = 1 >>> w = libpysal.weights.Queen.from_dataframe(guerry_ds) >>> LJC_BV_Case2 = Local_Join_Count_BV(connectivity=w).fit(guerry_ds['infq5'], guerry_ds['donq5'], case='CLC') >>> LJC_BV_Case2.LJC >>> LJC_BV_Case2.p_sim """ # Need to ensure that the np.array() are of # dtype='float' for numba x = np.array(x, dtype='float') y = np.array(y, dtype='float') w = self.connectivity # Fill the diagonal with 0s w = weights.util.fill_diagonal(w, val=0) w.transform = 'b' self.x = x self.y = y self.n = len(x) self.w = w self.case = case keep_simulations = self.keep_simulations n_jobs = self.n_jobs seed = self.seed permutations = self.permutations self.LJC = self._statistic(x, y, w, case=case) if permutations: if case == "BJC": self.p_sim, self.rjoins = _crand_plus( z=np.column_stack((x, y)), w=self.w, observed=self.LJC, permutations=permutations, keep=True, n_jobs=n_jobs, stat_func=_ljc_bv_case1) # Set p-values for those with LJC of 0 to NaN self.p_sim[self.LJC == 0] = 'NaN' elif case == "CLC": self.p_sim, self.rjoins = _crand_plus( z=np.column_stack((x, y)), w=self.w, observed=self.LJC, permutations=permutations, keep=True, n_jobs=n_jobs, stat_func=_ljc_bv_case2) # Set p-values for those with LJC of 0 to NaN self.p_sim[self.LJC == 0] = 'NaN' else: raise NotImplementedError(f'The requested LJC method ({case}) \ is not currently supported!') del (self.n, self.keep_simulations, self.n_jobs, self.permutations, self.seed, self.w, self.x, self.y, self.connectivity, self.rjoins) return self
def fit(self, x): """ Arguments --------- x : numpy.ndarray array containing continuous data Returns ------- the fitted estimator. Notes ----- Technical details and derivations can be found in :cite:`Anselin1995`. Examples -------- Guerry data replication GeoDa tutorial >>> import libpysal as lp >>> import geopandas as gpd >>> guerry = lp.examples.load_example('Guerry') >>> guerry_ds = gpd.read_file(guerry.get_path('Guerry.shp')) >>> w = libpysal.weights.Queen.from_dataframe(guerry_ds) >>> y = guerry_ds['Donatns'] >>> lG = Local_Geary(connectivity=w).fit(y) >>> lG.localG[0:5] >>> lG.p_sim[0:5] """ x = np.asarray(x).flatten() w = self.connectivity w.transform = "r" permutations = self.permutations sig = self.sig keep_simulations = self.keep_simulations n_jobs = self.n_jobs seed = self.seed self.localG = self._statistic(x, w) if permutations: self.p_sim, self.rlocalG = _crand_plus( z=(x - np.mean(x)) / np.std(x), w=w, observed=self.localG, permutations=permutations, keep=keep_simulations, n_jobs=n_jobs, stat_func=_local_geary, ) if self.labels: Eij_mean = np.mean(self.localG) x_mean = np.mean(x) # Create empty vector to fill self.labs = np.empty(len(x)) * np.nan # Outliers self.labs[(self.localG < Eij_mean) & (x > x_mean) & (self.p_sim <= sig)] = 1 # Clusters self.labs[(self.localG < Eij_mean) & (x < x_mean) & (self.p_sim <= sig)] = 2 # Other self.labs[(self.localG > Eij_mean) & (self.p_sim <= sig)] = 3 # Non-significant self.labs[self.p_sim > sig] = 4 return self
self.w = w self.case = case keep_simulations = self.keep_simulations n_jobs = self.n_jobs seed = self.seed self.LJC = self._statistic(x, z, w, case=case) if permutations: if case == "BJC": self.p_sim, self.rjoins = _crand_plus( z=np.column_stack((x, z)), w=self.w, observed=self.LJC, permutations=permutations, keep=True, n_jobs=n_jobs, stat_func=_ljc_bv_case1, ) # Set p-values for those with LJC of 0 to NaN self.p_sim[self.LJC == 0] = "NaN" elif case == "CLC": self.p_sim, self.rjoins = _crand_plus( z=np.column_stack((x, z)), w=self.w, observed=self.LJC, permutations=permutations, keep=True, n_jobs=n_jobs, stat_func=_ljc_bv_case2,