Beispiel #1
0
    def __init__(self, y, w, permutations=PERMUTATIONS):
        y = np.asarray(y).flatten()
        self.n = len(y)
        self.y = y
        w.transform = "B"
        self.w = w
        self.permutations = permutations
        self.__moments()
        self.y2 = y * y
        y = y.reshape(
            len(y), 1
        )  # Ensure that y is an n by 1 vector, otherwise y*y.T == y*y
        self.den_sum = (y * y.T).sum() - (y * y).sum()
        self.G = self.__calc(self.y)
        self.z_norm = (self.G - self.EG) / np.sqrt(self.VG)
        self.p_norm = 1.0 - stats.norm.cdf(np.abs(self.z_norm))

        if permutations:
            sim = [
                self.__calc(np.random.permutation(self.y)) for i in range(permutations)
            ]
            self.sim = sim = np.array(sim)
            above = sim >= self.G
            larger = sum(above)
            if (self.permutations - larger) < larger:
                larger = self.permutations - larger
            self.p_sim = (larger + 1.0) / (permutations + 1.0)
            self.EG_sim = sum(sim) / permutations
            self.seG_sim = sim.std()
            self.VG_sim = self.seG_sim ** 2
            self.z_sim = (self.G - self.EG_sim) / self.seG_sim
            self.p_z_sim = 1.0 - stats.norm.cdf(np.abs(self.z_sim))
Beispiel #2
0
    def calc(self):
        w = self.w
        W = w.sparse

        self.y_sum = self.y.sum()

        y = self.y
        remove_self = not self.star
        N = self.w.n - remove_self

        statistic = (W @ y) / (y.sum() - y * remove_self)

        # ----------------------------------------------------#
        # compute moments necessary for analytical inference  #
        # ----------------------------------------------------#

        empirical_mean = (y.sum() - y * remove_self) / N
        # variance looks complex, yes, but it obtains from E[x^2] - E[x]^2.
        # So, break it down to allow subtraction of the self-neighbor.
        mean_of_squares = ((y**2).sum() - (y**2) * remove_self) / N
        empirical_variance = mean_of_squares - empirical_mean**2

        # Since we have corrected the diagonal, this should work
        cardinality = np.asarray(W.sum(axis=1)).squeeze()
        expected_value = cardinality / N
        expected_variance = (cardinality * (N - cardinality) / (N - 1) *
                             (1 / N**2) * (empirical_variance /
                                           (empirical_mean**2)))
        z_scores = (statistic - expected_value) / np.sqrt(expected_variance)

        self.Gs = statistic
        self.EGs = expected_value
        self.VGs = expected_variance
        self.Zs = z_scores
Beispiel #3
0
    def calc(self):
        y = self.y
        y2 = y * y
        self.y_sum = y_sum = sum(y)
        y2_sum = sum(y2)

        if not self.star:
            yl = 1.0 * slag(self.w, y)
            ydi = y_sum - y
            self.Gs = yl / ydi
            N = self.n - 1
            yl_mean = ydi / N
            s2 = (y2_sum - y2) / N - (yl_mean) ** 2
        else:
            self.w.transform = "B"
            yl = 1.0 * slag(self.w, y)
            yl += y
            if self.w_transform == "r":
                yl = yl / (self.__getCardinalities() + 1.0)
            self.Gs = yl / y_sum
            N = self.n
            yl_mean = y.mean()
            s2 = y.var()

        EGs_num, VGs_num = 1.0, 1.0
        if self.w_transform == "b":
            W = self.__getCardinalities()
            W += self.star
            EGs_num = W * 1.0
            VGs_num = (W * (1.0 * N - W)) / (1.0 * N - 1)

        self.EGs = (EGs_num * 1.0) / N
        self.VGs = (VGs_num) * (1.0 / (N ** 2)) * ((s2 * 1.0) / (yl_mean ** 2))
        self.Zs = (self.Gs - self.EGs) / np.sqrt(self.VGs)

        self.w.transform = self.w_original