Ejemplo n.º 1
0
 def TwoSampleTest(self,sample1,sample2,numShuffles=1000,method='vanilla',blockSize=20):
     """
     Compute the p-value associated to the MMD between two samples
     method determines the null approximation procedure:
     ----'vanilla': standard permutation test
     ----'block': block permutation test
     ----'wild': wild bootstrap
     ----'wild-center': wild bootstrap with empirical degeneration
     """
     n1=shape(sample1)[0]
     n2=shape(sample2)[0]
     merged = concatenate( [sample1, sample2], axis=0 )
     merged_len=shape(merged)[0]
     numBlocks = merged_len/blockSize
     K=self.kernel(merged)
     mmd = mean(K[:n1,:n1])+mean(K[n1:,n1:])-2*mean(K[n1:,:n1])
     null_samples = zeros(numShuffles)
     
     if method=='vanilla':
         for i in range(numShuffles):
             pp = permutation(merged_len)
             Kpp = K[pp,:][:,pp]
             null_samples[i] = mean(Kpp[:n1,:n1])+mean(Kpp[n1:,n1:])-2*mean(Kpp[n1:,:n1])
             
     elif method=='block':
         blocks=reshape(arange(merged_len),(numBlocks,blockSize))
         for i in range(numShuffles):
             pb = permutation(numBlocks)
             pp = reshape(blocks[pb],(merged_len))
             Kpp = K[pp,:][:,pp]
             null_samples[i] = mean(Kpp[:n1,:n1])+mean(Kpp[n1:,n1:])-2*mean(Kpp[n1:,:n1])
             
     elif method=='wild' or method=='wild-center':
         if n1!=n2:
             raise ValueError("Wild bootstrap MMD available only on the same sample sizes")
         alpha = exp(-1/float(blockSize))
         coreK = K[:n1,:n1]+K[n1:,n1:]-K[n1:,:n1]-K[:n1,n1:]
         for i in range(numShuffles):
             """
             w is a draw from the Ornstein-Uhlenbeck process
             """
             w = HelperFunctions.generateOU(n=n1,alpha=alpha)
             if method=='wild-center':
                 """
                 empirical degeneration (V_{n,2} in Leucht & Neumann)
                 """
                 w = w - mean(w)
             null_samples[i]=mean(outer(w,w)*coreK)
     elif method=='wild2':
         
         alpha = exp(-1/float(blockSize))
         for i in range(numShuffles):
             wx=HelperFunctions.generateOU(n=n1,alpha=alpha)
             wx = wx - mean(wx)
             wy=HelperFunctions.generateOU(n=n2,alpha=alpha)
             wy = wy - mean(wy)
             null_samples[i]=mean(outer(wx,wx)*K[:n1,:n1])+mean(outer(wy,wy)*K[n1:,n1:])-2*mean(outer(wx,wy)*K[:n1,n1:])
     else:
         raise ValueError("Unknown null approximation method")
     return sum(mmd<null_samples)/float(numShuffles)
 def test_log_binom_coeff_many(self):
     for _ in range(100):
         n = randint(1, 10)
         k = randint(0, n)
         self.assertEqual(round(exp(HelperFunctions.log_bin_coeff(n, k))), round(binom(n, k)))
Ejemplo n.º 3
0
    def log_pdf(self, X):
        if not type(X) is numpy.ndarray:
            raise TypeError("X must be a numpy array")
        
        if not len(X.shape) is 2:
            raise TypeError("X must be a 2D numpy array")
        
        # this also enforces correct data ranges
        if X.dtype != numpy.bool8:
            raise ValueError("X must be a bool8 numpy array")
        
        if not X.shape[1] == self.dimension:
            raise ValueError("Dimension of X does not match own dimension")

        num_active_self = sum(self.mu)
        #max_possible_change = min(num_active_self, self.dimension - num_active_self)
        
        # result vector
        log_liks = zeros(len(X))
        
        # compute action dependent log likelihood parts
        for i in range(len(X)):
            x = X[i]
            
            num_active_x = sum(x)
            
            # hamming distances using numpy broadcasting
            # divide by two, integer division is always fine since even number of differences
            num_diff = sum(self.mu != x)
            if num_active_self == num_active_x:
                num_diff / 2
                
            if num_diff > self.N:
                log_liks[i]=-inf
                continue
                
            if num_active_self != num_active_x:
                action = num_active_x < num_active_self
                if not all(x[self.mu==action]==action):
                    log_liks[i]=-inf
                    continue
            else:
                action = 2
            
            #shared-terms
            log_liks[i] = HelperFunctions.log_bin_coeff(self.N - 1, num_diff - 1) \
                            + (num_diff - 1) * log(self.spread) \
                            + (self.N - num_diff) * log(1 - self.spread)
            # if there was a freedom of action, use factor 1/3
            if num_diff <= min(num_active_self,self.dimension-num_active_self):
                log_liks[i] -= log(3)
            # action-specific terms
            if action == 0:
                # add
                log_liks[i] -= HelperFunctions.log_bin_coeff(self.dimension - num_active_self, num_diff)
            elif action == 1:
                # del
                log_liks[i] -= HelperFunctions.log_bin_coeff(num_active_self, num_diff)
            elif action == 2:
                # swap
                log_liks[i] -= HelperFunctions.log_bin_coeff(num_active_self, num_diff) \
                                 - HelperFunctions.log_bin_coeff(self.dimension - num_active_self, num_diff)
            
        return log_liks
 def test_log_binom_coeff_5(self):
     n = 2
     k = 3
     self.assertEqual(round(exp(HelperFunctions.log_bin_coeff(n, k))), binom(n, k))