def TwoSampleTest(self,sample1,sample2,numShuffles=1000,method='vanilla',blockSize=20): """ Compute the p-value associated to the MMD between two samples method determines the null approximation procedure: ----'vanilla': standard permutation test ----'block': block permutation test ----'wild': wild bootstrap ----'wild-center': wild bootstrap with empirical degeneration """ n1=shape(sample1)[0] n2=shape(sample2)[0] merged = concatenate( [sample1, sample2], axis=0 ) merged_len=shape(merged)[0] numBlocks = merged_len/blockSize K=self.kernel(merged) mmd = mean(K[:n1,:n1])+mean(K[n1:,n1:])-2*mean(K[n1:,:n1]) null_samples = zeros(numShuffles) if method=='vanilla': for i in range(numShuffles): pp = permutation(merged_len) Kpp = K[pp,:][:,pp] null_samples[i] = mean(Kpp[:n1,:n1])+mean(Kpp[n1:,n1:])-2*mean(Kpp[n1:,:n1]) elif method=='block': blocks=reshape(arange(merged_len),(numBlocks,blockSize)) for i in range(numShuffles): pb = permutation(numBlocks) pp = reshape(blocks[pb],(merged_len)) Kpp = K[pp,:][:,pp] null_samples[i] = mean(Kpp[:n1,:n1])+mean(Kpp[n1:,n1:])-2*mean(Kpp[n1:,:n1]) elif method=='wild' or method=='wild-center': if n1!=n2: raise ValueError("Wild bootstrap MMD available only on the same sample sizes") alpha = exp(-1/float(blockSize)) coreK = K[:n1,:n1]+K[n1:,n1:]-K[n1:,:n1]-K[:n1,n1:] for i in range(numShuffles): """ w is a draw from the Ornstein-Uhlenbeck process """ w = HelperFunctions.generateOU(n=n1,alpha=alpha) if method=='wild-center': """ empirical degeneration (V_{n,2} in Leucht & Neumann) """ w = w - mean(w) null_samples[i]=mean(outer(w,w)*coreK) elif method=='wild2': alpha = exp(-1/float(blockSize)) for i in range(numShuffles): wx=HelperFunctions.generateOU(n=n1,alpha=alpha) wx = wx - mean(wx) wy=HelperFunctions.generateOU(n=n2,alpha=alpha) wy = wy - mean(wy) null_samples[i]=mean(outer(wx,wx)*K[:n1,:n1])+mean(outer(wy,wy)*K[n1:,n1:])-2*mean(outer(wx,wy)*K[:n1,n1:]) else: raise ValueError("Unknown null approximation method") return sum(mmd<null_samples)/float(numShuffles)
def test_log_binom_coeff_many(self): for _ in range(100): n = randint(1, 10) k = randint(0, n) self.assertEqual(round(exp(HelperFunctions.log_bin_coeff(n, k))), round(binom(n, k)))
def log_pdf(self, X): if not type(X) is numpy.ndarray: raise TypeError("X must be a numpy array") if not len(X.shape) is 2: raise TypeError("X must be a 2D numpy array") # this also enforces correct data ranges if X.dtype != numpy.bool8: raise ValueError("X must be a bool8 numpy array") if not X.shape[1] == self.dimension: raise ValueError("Dimension of X does not match own dimension") num_active_self = sum(self.mu) #max_possible_change = min(num_active_self, self.dimension - num_active_self) # result vector log_liks = zeros(len(X)) # compute action dependent log likelihood parts for i in range(len(X)): x = X[i] num_active_x = sum(x) # hamming distances using numpy broadcasting # divide by two, integer division is always fine since even number of differences num_diff = sum(self.mu != x) if num_active_self == num_active_x: num_diff / 2 if num_diff > self.N: log_liks[i]=-inf continue if num_active_self != num_active_x: action = num_active_x < num_active_self if not all(x[self.mu==action]==action): log_liks[i]=-inf continue else: action = 2 #shared-terms log_liks[i] = HelperFunctions.log_bin_coeff(self.N - 1, num_diff - 1) \ + (num_diff - 1) * log(self.spread) \ + (self.N - num_diff) * log(1 - self.spread) # if there was a freedom of action, use factor 1/3 if num_diff <= min(num_active_self,self.dimension-num_active_self): log_liks[i] -= log(3) # action-specific terms if action == 0: # add log_liks[i] -= HelperFunctions.log_bin_coeff(self.dimension - num_active_self, num_diff) elif action == 1: # del log_liks[i] -= HelperFunctions.log_bin_coeff(num_active_self, num_diff) elif action == 2: # swap log_liks[i] -= HelperFunctions.log_bin_coeff(num_active_self, num_diff) \ - HelperFunctions.log_bin_coeff(self.dimension - num_active_self, num_diff) return log_liks
def test_log_binom_coeff_5(self): n = 2 k = 3 self.assertEqual(round(exp(HelperFunctions.log_bin_coeff(n, k))), binom(n, k))