Ejemplo n.º 1
0
    def fit(self, y, n_jobs=1, permutations=999):
        """
        Arguments
        ---------
        y               : numpy.ndarray
                          array containing binary (0/1) data
        Returns
        -------
        the fitted estimator.

        Notes
        -----
        Technical details and derivations found in :cite:`AnselinLi2019`.

        Examples
        --------
        >>> import libpysal
        >>> w = libpysal.weights.lat2W(4, 4)
        >>> y = np.ones(16)
        >>> y[0:8] = 0
        >>> LJC_uni = Local_Join_Count(connectivity=w).fit(y)
        >>> LJC_uni.LJC
        >>> LJC_uni.p_sim

        Guerry data replicating GeoDa tutorial
        >>> import libpysal
        >>> import geopandas as gpd
        >>> guerry = libpysal.examples.load_example('Guerry')
        >>> guerry_ds = gpd.read_file(guerry.get_path('Guerry.shp'))
        >>> guerry_ds['SELECTED'] = 0
        >>> guerry_ds.loc[(guerry_ds['Donatns'] > 10997), 'SELECTED'] = 1
        >>> w = libpysal.weights.Queen.from_dataframe(guerry_ds)
        >>> LJC_uni = Local_Join_Count(connectivity=w).fit(guerry_ds['SELECTED'])
        >>> LJC_uni.LJC
        >>> LJC_uni.p_sim
        """
        # Need to ensure that the np.array() are of
        # dtype='float' for numba
        y = np.array(y, dtype='float')

        w = self.connectivity
        # Fill the diagonal with 0s
        w = weights.util.fill_diagonal(w, val=0)
        w.transform = 'b'

        keep_simulations = self.keep_simulations
        n_jobs = self.n_jobs
        seed = self.seed

        self.y = y
        self.n = len(y)
        self.w = w

        self.LJC = self._statistic(y, w)

        if permutations:
            self.p_sim, self.rjoins = _crand_plus(z=self.y,
                                                  w=self.w,
                                                  observed=self.LJC,
                                                  permutations=permutations,
                                                  keep=keep_simulations,
                                                  n_jobs=n_jobs,
                                                  stat_func=_ljc_uni)
            # Set p-values for those with LJC of 0 to NaN
            self.p_sim[self.LJC == 0] = 'NaN'

        return self
Ejemplo n.º 2
0
    def fit(self, variables, n_jobs=1, permutations=999):
        """
        Arguments
        ---------
        variables     : numpy.ndarray
                        array(s) containing binary (0/1) data
        Returns
        -------
        the fitted estimator.

        Notes
        -----
        Technical details and derivations can be found in :cite:`AnselinLi2019`.

        Examples
        --------
        >>> import libpysal
        >>> w = libpysal.weights.lat2W(4, 4)
        >>> x = np.ones(16)
        >>> x[0:8] = 0
        >>> z = [0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1]
        >>> y = [0,1,1,1,1,1,1,1,0,0,0,1,0,0,1,1]
        >>> LJC_MV = Local_Join_Counts_MV(connectivity=w).fit([x, y, z])
        >>> LJC_MV.LJC
        >>> LJC_MV.p_sim

        Guerry data extending GeoDa tutorial
        >>> import libpysal
        >>> import geopandas as gpd
        >>> guerry = libpysal.examples.load_example('Guerry')
        >>> guerry_ds = gpd.read_file(guerry.get_path('Guerry.shp'))
        >>> guerry_ds['infq5'] = 0
        >>> guerry_ds['donq5'] = 0
        >>> guerry_ds['suic5'] = 0
        >>> guerry_ds.loc[(guerry_ds['Infants'] > 23574), 'infq5'] = 1
        >>> guerry_ds.loc[(guerry_ds['Donatns'] > 10973), 'donq5'] = 1
        >>> guerry_ds.loc[(guerry_ds['Suicids'] > 55564), 'suic5'] = 1
        >>> w = libpysal.weights.Queen.from_dataframe(guerry_ds)
        >>> LJC_MV = Local_Join_Counts_MV(connectivity=w).fit([guerry_ds['infq5'], guerry_ds['donq5'], guerry_ds['suic5']])
        >>> LJC_MV.LJC
        >>> LJC_MV.p_sim
        """

        w = self.connectivity
        # Fill the diagonal with 0s
        w = weights.util.fill_diagonal(w, val=0)
        w.transform = 'b'

        self.n = len(variables[0])
        self.w = w

        self.variables = np.array(variables, dtype='float')
        
        keep_simulations = self.keep_simulations
        n_jobs = self.n_jobs
        seed = self.seed

        # Need to ensure that the product is an 
        # np.array() of dtype='float' for numba
        self.ext = np.array(np.prod(np.vstack(variables), axis=0), 
                            dtype='float')

        self.LJC = self._statistic(variables, w)

        if permutations:
            self.p_sim, self.rjoins = _crand_plus(
                z=self.ext, 
                w=self.w, 
                observed=self.LJC,
                permutations=permutations, 
                keep=True, 
                n_jobs=n_jobs,
                stat_func=_ljc_mv
            )
            # Set p-values for those with LJC of 0 to NaN
            self.p_sim[self.LJC == 0] = 'NaN'
        
        return self
Ejemplo n.º 3
0
class Local_Join_Count_BV(BaseEstimator):
    """Univariate Local Join Count Statistic"""
    def __init__(self,
                 connectivity=None,
                 permutations=999,
                 n_jobs=1,
                 keep_simulations=True,
                 seed=None):
        """
        Initialize a Local_Join_Count_BV estimator
        Arguments
        ---------
        connectivity     : scipy.sparse matrix object
                           the connectivity structure describing
                           the relationships between observed units.
                           Need not be row-standardized.
        permutations     : int
                           number of random permutations for calculation of pseudo
                           p_values
        n_jobs           : int
                           Number of cores to be used in the conditional randomisation. If -1,
                           all available cores are used.    
        keep_simulations : Boolean
                           (default=True)
                           If True, the entire matrix of replications under the null 
                           is stored in memory and accessible; otherwise, replications 
                           are not saved
        seed             : None/int
                           Seed to ensure reproducibility of conditional randomizations. 
                           Must be set here, and not outside of the function, since numba 
                           does not correctly interpret external seeds 
                           nor numpy.random.RandomState instances.              
                           
        """

        self.connectivity = connectivity
        self.permutations = permutations
        self.n_jobs = n_jobs
        self.keep_simulations = keep_simulations
        self.seed = seed

    def fit(self, x, y, case="CLC"):
        """
        Arguments
        ---------
        x                : numpy.ndarray
                           array containing binary (0/1) data
        y                : numpy.ndarray
                           array containing binary (0/1) data
        case             : str
                           "BJC" for bivariate local join count,
                           "CLC" for co-location local join count.
                           Details in :cite:`AnselinLi2019`.

        Returns
        -------
        the fitted estimator.

        Notes
        -----
        Technical details and derivations can be found in :cite:`AnselinLi2019`.

        Examples
        --------
        >>> import libpysal
        >>> w = libpysal.weights.lat2W(4, 4)
        >>> x = np.ones(16)
        >>> x[0:8] = 0
        >>> y = [0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1]
        >>> LJC_BV_C1 = Local_Join_Count_BV(connectivity=w).fit(x, y, case="BJC")
        >>> LJC_BV_C2 = Local_Join_Count_BV(connectivity=w).fit(x, y, case="CLC")
        >>> LJC_BV_C1.LJC
        >>> LJC_BV_C1.p_sim
        >>> LJC_BV_C2.LJC
        >>> LJC_BV_C2.p_sim

        Commpop data replicating GeoDa tutorial (Case 1)
        >>> import libpysal
        >>> import geopandas as gpd
        >>> commpop = gpd.read_file("https://github.com/jeffcsauer/GSOC2020/raw/master/validation/data/commpop.gpkg")
        >>> w = libpysal.weights.Queen.from_dataframe(commpop)
        >>> LJC_BV_Case1 = Local_Join_Count_BV(connectivity=w).fit(commpop['popneg'], commpop['popplus'], case='BJC')
        >>> LJC_BV_Case1.LJC
        >>> LJC_BV_Case1.p_sim

        Guerry data replicating GeoDa tutorial (Case 2)
        >>> import libpysal
        >>> import geopandas as gpd
        >>> guerry = libpysal.examples.load_example('Guerry')
        >>> guerry_ds = gpd.read_file(guerry.get_path('Guerry.shp'))
        >>> guerry_ds['infq5'] = 0
        >>> guerry_ds['donq5'] = 0
        >>> guerry_ds.loc[(guerry_ds['Infants'] > 23574), 'infq5'] = 1
        >>> guerry_ds.loc[(guerry_ds['Donatns'] > 10973), 'donq5'] = 1
        >>> w = libpysal.weights.Queen.from_dataframe(guerry_ds)
        >>> LJC_BV_Case2 = Local_Join_Count_BV(connectivity=w).fit(guerry_ds['infq5'], guerry_ds['donq5'], case='CLC')
        >>> LJC_BV_Case2.LJC
        >>> LJC_BV_Case2.p_sim
        """
        # Need to ensure that the np.array() are of
        # dtype='float' for numba
        x = np.array(x, dtype='float')
        y = np.array(y, dtype='float')

        w = self.connectivity
        # Fill the diagonal with 0s
        w = weights.util.fill_diagonal(w, val=0)
        w.transform = 'b'

        self.x = x
        self.y = y
        self.n = len(x)
        self.w = w
        self.case = case

        keep_simulations = self.keep_simulations
        n_jobs = self.n_jobs
        seed = self.seed

        permutations = self.permutations

        self.LJC = self._statistic(x, y, w, case=case)

        if permutations:
            if case == "BJC":
                self.p_sim, self.rjoins = _crand_plus(
                    z=np.column_stack((x, y)),
                    w=self.w,
                    observed=self.LJC,
                    permutations=permutations,
                    keep=True,
                    n_jobs=n_jobs,
                    stat_func=_ljc_bv_case1)
                # Set p-values for those with LJC of 0 to NaN
                self.p_sim[self.LJC == 0] = 'NaN'
            elif case == "CLC":
                self.p_sim, self.rjoins = _crand_plus(
                    z=np.column_stack((x, y)),
                    w=self.w,
                    observed=self.LJC,
                    permutations=permutations,
                    keep=True,
                    n_jobs=n_jobs,
                    stat_func=_ljc_bv_case2)
                # Set p-values for those with LJC of 0 to NaN
                self.p_sim[self.LJC == 0] = 'NaN'
            else:
                raise NotImplementedError(f'The requested LJC method ({case}) \
                is not currently supported!')

        del (self.n, self.keep_simulations, self.n_jobs, self.permutations,
             self.seed, self.w, self.x, self.y, self.connectivity, self.rjoins)

        return self
Ejemplo n.º 4
0
    def fit(self, x):
        """
        Arguments
        ---------
        x                : numpy.ndarray
                           array containing continuous data

        Returns
        -------
        the fitted estimator.

        Notes
        -----
        Technical details and derivations can be found in :cite:`Anselin1995`.

        Examples
        --------
        Guerry data replication GeoDa tutorial
        >>> import libpysal as lp
        >>> import geopandas as gpd
        >>> guerry = lp.examples.load_example('Guerry')
        >>> guerry_ds = gpd.read_file(guerry.get_path('Guerry.shp'))
        >>> w = libpysal.weights.Queen.from_dataframe(guerry_ds)
        >>> y = guerry_ds['Donatns']
        >>> lG = Local_Geary(connectivity=w).fit(y)
        >>> lG.localG[0:5]
        >>> lG.p_sim[0:5]
        """
        x = np.asarray(x).flatten()

        w = self.connectivity
        w.transform = "r"

        permutations = self.permutations
        sig = self.sig
        keep_simulations = self.keep_simulations
        n_jobs = self.n_jobs
        seed = self.seed

        self.localG = self._statistic(x, w)

        if permutations:
            self.p_sim, self.rlocalG = _crand_plus(
                z=(x - np.mean(x)) / np.std(x),
                w=w,
                observed=self.localG,
                permutations=permutations,
                keep=keep_simulations,
                n_jobs=n_jobs,
                stat_func=_local_geary,
            )

        if self.labels:
            Eij_mean = np.mean(self.localG)
            x_mean = np.mean(x)
            # Create empty vector to fill
            self.labs = np.empty(len(x)) * np.nan
            # Outliers
            self.labs[(self.localG < Eij_mean) & (x > x_mean) & (self.p_sim <= sig)] = 1
            # Clusters
            self.labs[(self.localG < Eij_mean) & (x < x_mean) & (self.p_sim <= sig)] = 2
            # Other
            self.labs[(self.localG > Eij_mean) & (self.p_sim <= sig)] = 3
            # Non-significant
            self.labs[self.p_sim > sig] = 4

        return self
Ejemplo n.º 5
0
        self.w = w
        self.case = case

        keep_simulations = self.keep_simulations
        n_jobs = self.n_jobs
        seed = self.seed

        self.LJC = self._statistic(x, z, w, case=case)

        if permutations:
            if case == "BJC":
                self.p_sim, self.rjoins = _crand_plus(
                    z=np.column_stack((x, z)),
                    w=self.w,
                    observed=self.LJC,
                    permutations=permutations,
                    keep=True,
                    n_jobs=n_jobs,
                    stat_func=_ljc_bv_case1,
                )
                # Set p-values for those with LJC of 0 to NaN
                self.p_sim[self.LJC == 0] = "NaN"
            elif case == "CLC":
                self.p_sim, self.rjoins = _crand_plus(
                    z=np.column_stack((x, z)),
                    w=self.w,
                    observed=self.LJC,
                    permutations=permutations,
                    keep=True,
                    n_jobs=n_jobs,
                    stat_func=_ljc_bv_case2,