def __init__(self, y, w, permutations=PERMUTATIONS): y = np.asarray(y).flatten() self.n = len(y) self.y = y w.transform = "B" self.w = w self.permutations = permutations self.__moments() self.y2 = y * y y = y.reshape( len(y), 1 ) # Ensure that y is an n by 1 vector, otherwise y*y.T == y*y self.den_sum = (y * y.T).sum() - (y * y).sum() self.G = self.__calc(self.y) self.z_norm = (self.G - self.EG) / np.sqrt(self.VG) self.p_norm = 1.0 - stats.norm.cdf(np.abs(self.z_norm)) if permutations: sim = [ self.__calc(np.random.permutation(self.y)) for i in range(permutations) ] self.sim = sim = np.array(sim) above = sim >= self.G larger = sum(above) if (self.permutations - larger) < larger: larger = self.permutations - larger self.p_sim = (larger + 1.0) / (permutations + 1.0) self.EG_sim = sum(sim) / permutations self.seG_sim = sim.std() self.VG_sim = self.seG_sim ** 2 self.z_sim = (self.G - self.EG_sim) / self.seG_sim self.p_z_sim = 1.0 - stats.norm.cdf(np.abs(self.z_sim))
def calc(self): w = self.w W = w.sparse self.y_sum = self.y.sum() y = self.y remove_self = not self.star N = self.w.n - remove_self statistic = (W @ y) / (y.sum() - y * remove_self) # ----------------------------------------------------# # compute moments necessary for analytical inference # # ----------------------------------------------------# empirical_mean = (y.sum() - y * remove_self) / N # variance looks complex, yes, but it obtains from E[x^2] - E[x]^2. # So, break it down to allow subtraction of the self-neighbor. mean_of_squares = ((y**2).sum() - (y**2) * remove_self) / N empirical_variance = mean_of_squares - empirical_mean**2 # Since we have corrected the diagonal, this should work cardinality = np.asarray(W.sum(axis=1)).squeeze() expected_value = cardinality / N expected_variance = (cardinality * (N - cardinality) / (N - 1) * (1 / N**2) * (empirical_variance / (empirical_mean**2))) z_scores = (statistic - expected_value) / np.sqrt(expected_variance) self.Gs = statistic self.EGs = expected_value self.VGs = expected_variance self.Zs = z_scores
def calc(self): y = self.y y2 = y * y self.y_sum = y_sum = sum(y) y2_sum = sum(y2) if not self.star: yl = 1.0 * slag(self.w, y) ydi = y_sum - y self.Gs = yl / ydi N = self.n - 1 yl_mean = ydi / N s2 = (y2_sum - y2) / N - (yl_mean) ** 2 else: self.w.transform = "B" yl = 1.0 * slag(self.w, y) yl += y if self.w_transform == "r": yl = yl / (self.__getCardinalities() + 1.0) self.Gs = yl / y_sum N = self.n yl_mean = y.mean() s2 = y.var() EGs_num, VGs_num = 1.0, 1.0 if self.w_transform == "b": W = self.__getCardinalities() W += self.star EGs_num = W * 1.0 VGs_num = (W * (1.0 * N - W)) / (1.0 * N - 1) self.EGs = (EGs_num * 1.0) / N self.VGs = (VGs_num) * (1.0 / (N ** 2)) * ((s2 * 1.0) / (yl_mean ** 2)) self.Zs = (self.Gs - self.EGs) / np.sqrt(self.VGs) self.w.transform = self.w_original