Ejemplo n.º 1
0
def print_weight_vector(sm, sparm):
    global FN
    global FE
    w_list = [sm.w[i] for i in xrange(0, sm.size_psi)]
    #print
    #print sm.size_psi,FN,FE,NUM_CLASSES
    K = NUM_CLASSES
    node_feats_list = [15, 51]
    node_coeff = zeros((K, 20))

    for k in xrange(0, K):
        c = 0
        for f in node_feats_list:
            node_coeff[k, c * 10:(c + 1) *
                       10] = w_list[k * FN + f * 10:k * FN + (f + 1) * 10]
            c += 1
    filename = "node_weights.csv"
    savetxt(filename, node_coeff, fmt='%f', delimiter=',')
    edge_feats_list = [5, 6, 7, 10]
    edge_coeff = zeros((K, K, 40))
    for n1 in xrange(0, K):
        for n2 in xrange(0, K):
            c = 0
            for e in edge_feats_list:
                edge_coeff[n1, n2, c * 10:(c + 1) *
                           10] = w_list[FN * K + n1 * K * FE + n2 * FE +
                                        e * 10:FN * K + n1 * K * FE + n2 * FE +
                                        (e + 1) * 10]
                c += 1
            #for e in xrange(0,FE/10):
            # coeff[e,n1,n2] = max(w_list[FN*K+ n1*K*FE + n2*FE + e*10 : FN*K+ n1*K*FE + n2*FE + e*10 + 9 ])

    for k in xrange(0, K):
        filename = "edge_weights_%d.csv" % k
        savetxt(filename, edge_coeff[k, :, :], fmt='%f', delimiter=',')
Ejemplo n.º 2
0
def ARLeastSquares(inputseries, degree):

    k = 0
    length = len(inputseries)
    mat = zeros((degree, degree))
    coefficients = zeros(degree)

    i = degree - 1
    while i < length - 1:
        hi = i + 1
        j = 0
        while j < degree:
            hj = i - j
            coefficients[j] += inputseries[hi] * inputseries[hj]
            k = j

            while k < degree:
                mat[j][k] += inputseries[hj] * inputseries[i - k]
                k += 1
            j += 1
        i += 1
    for i in range(0, degree):
        coefficients[i] /= length - degree
        for j in range(i, degree):
            mat[i][j] /= (length - degree)
            mat[j][i] = mat[i][j]

    M = linalg.lstsq(mat, coefficients)[0]
    return M
Ejemplo n.º 3
0
def print_weight_vector( sm, sparm):
    global FN
    global FE
    w_list = [sm.w[i] for i in xrange(0,sm.size_psi)]
    #print
    #print sm.size_psi,FN,FE,NUM_CLASSES
    K = NUM_CLASSES
    node_feats_list = [15,51]
    node_coeff = zeros((K,20))
    
    for k in xrange(0,K):
        c = 0;
        for f in node_feats_list:
            node_coeff[k,c*10:(c+1)*10] = w_list[k*FN+f*10: k*FN+(f+1)*10]
            c += 1
    filename="node_weights.csv" 
    savetxt(filename,node_coeff,fmt='%f',delimiter=',');
    edge_feats_list = [5,6,7,10]
    edge_coeff = zeros((K,K,40))
    for n1 in xrange(0,K):
        for n2 in xrange(0,K):
            c = 0;
            for e in edge_feats_list:
                edge_coeff[n1,n2,c*10:(c+1)*10] = w_list[FN*K+ n1*K*FE + n2*FE+ e*10: FN*K+ n1*K*FE + n2*FE + (e+1)*10  ]
                c+=1
            #for e in xrange(0,FE/10):
               # coeff[e,n1,n2] = max(w_list[FN*K+ n1*K*FE + n2*FE + e*10 : FN*K+ n1*K*FE + n2*FE + e*10 + 9 ])
            
    for k in xrange(0,K):
        filename="edge_weights_%d.csv" % k
        savetxt(filename,edge_coeff[k,:,:],fmt='%f',delimiter=',');
Ejemplo n.º 4
0
def ARLeastSquares(inputseries, degree):

      k=0
      length = len(inputseries)
      mat = zeros((degree,degree))
      coefficients = zeros(degree)
      
      i=degree-1
      while i<length-1:
            hi = i+1
            j=0
            while j<degree:
                  hj = i-j
                  coefficients[j] += inputseries[hi] * inputseries[hj]
                  k=j
                  
                  while k <degree:
                        mat[j][k] +=inputseries[hj] * inputseries[i-k]
                        k+=1
                  j+=1
            i+=1      
      for i in range(0,degree):
            coefficients[i] /= length - degree
            for j in range(i, degree):
                  mat[i][j] /=(length - degree)
                  mat[j][i] = mat[i][j]
                  
      M= linalg.lstsq(mat,coefficients)[0]
      return M
Ejemplo n.º 5
0
 def __init__(self, params, sampler):
     self.params=params
     self.sampler=sampler
     
     self.iteration=int64(0)
     self.samples=zeros((params.chain_length, sampler.target.dimension))
     
     # fields for the chain
     num_iterations = self.params.chain_length
     self.samples = zeros((num_iterations, self.sampler.target.dimension))
     self.log_ratios = zeros(num_iterations)
     self.log_liks = zeros(num_iterations)
     self.accepteds = zeros(num_iterations)
def bicgstabReg(X,Y,my_Y,B):
    '''
    #稳定双共轭梯度下降
    '''
    my_Y_copy=[]
    for i in my_Y:
      my_Y_copy.append(i)
      
    error = CostFunctionReg(Y,my_Y_copy,B)
    
    R0star = Y - dot(X,B)
    R0 = Y - dot(X,B)
    rho0 = 1
    alp0 = 1
    w0 = 1
    V0 =mat(zeros(len(Y)).reshape(len(Y),1))
    P0 = mat(zeros(len(Y)).reshape(len(Y),1))
    #print R0
    while 1:
        rho1 = array(dot(R0star.T, R0))[0][0]
        beta = (rho1/rho0) * (alp0/w0)
        P1 = R0 + beta*(P0 - w0*V0)
        
        V1 = dot(X,P1)
        alp0 = rho1/(array(dot(R0star.T,V1))[0][0])
        h = B + alp0 * P1
        my_Y_copy = array(dot(X,array(h).reshape(len(h),1)).reshape(1,len(Y)))[0]
        new_error = CostFunctionReg(Y,my_Y_copy,h) 
        if abs(new_error -error) <=e:
            B=h
            break
        #error = new_error
        S = R0 - alp0*V1
        
        t = dot(X,S)
        w1 = array(dot(t.T, S))[0][0]/array(dot(t.T, t))[0][0]
        B = h + w1*S
        my_Y_copy = array(dot(X,array(B).reshape(len(B),1)).reshape(1,len(Y)))[0]
        new_error = CostFunctionReg(Y,my_Y_copy,B) 
       # print abs(new_error -error)
        if abs(new_error -error) <=e:
            break
        R0 = S - w1 * t
        rho0 = rho1
        P0 = P1
        V0 =V1
        w0 = w1
        error = new_error       
    return dot(X,B),B
Ejemplo n.º 7
0
    def test_mode_newton_2d(self):
        X = asarray([-1, 1])
        X = reshape(X, (len(X), 1))
        y = asarray([+1 if x >= 0 else -1 for x in X])
        covariance = SquaredExponentialCovariance(sigma=1, scale=1)
        likelihood = LogitLikelihood()
        gp = GaussianProcess(y, X, covariance, likelihood)
        laplace = LaplaceApproximation(gp, newton_start=asarray([3, 3]))
        
        f_mode, _, steps = laplace.find_mode_newton(return_full=True)
        F = linspace(-10, 10, 20)
        D = zeros((len(F), len(F)))
        for i in range(len(F)):
            for j in range(len(F)):
                f = asarray([F[i], F[j]])
                D[i, j] = gp.log_posterior_unnormalised(f)
           
        idx = unravel_index(D.argmax(), D.shape)
        empirical_max = asarray([F[idx[0]], F[idx[1]]])
        
        pcolor(F, F, D)
        hold(True)
        plot(steps[:, 0], steps[:, 1])
        plot(f_mode[1], f_mode[0], 'mo', markersize=10)
        hold(False)
        colorbar()
        clf()
#        show()
           
        self.assertLessEqual(norm(empirical_max - f_mode), 1)
Ejemplo n.º 8
0
 def test_testAverage2(self):
     # More tests of average.
     w1 = [0, 1, 1, 1, 1, 0]
     w2 = [[0, 1, 1, 1, 1, 0], [1, 0, 0, 0, 0, 1]]
     x = arange(6, dtype=np.float_)
     assert_equal(average(x, axis=0), 2.5)
     assert_equal(average(x, axis=0, weights=w1), 2.5)
     y = array([arange(6, dtype=np.float_), 2.0 * arange(6)])
     assert_equal(average(y, None), np.add.reduce(np.arange(6)) * 3. / 12.)
     assert_equal(average(y, axis=0), np.arange(6) * 3. / 2.)
     assert_equal(average(y, axis=1),
                  [average(x, axis=0), average(x, axis=0) * 2.0])
     assert_equal(average(y, None, weights=w2), 20. / 6.)
     assert_equal(average(y, axis=0, weights=w2),
                  [0., 1., 2., 3., 4., 10.])
     assert_equal(average(y, axis=1),
                  [average(x, axis=0), average(x, axis=0) * 2.0])
     m1 = zeros(6)
     m2 = [0, 0, 1, 1, 0, 0]
     m3 = [[0, 0, 1, 1, 0, 0], [0, 1, 1, 1, 1, 0]]
     m4 = ones(6)
     m5 = [0, 1, 1, 1, 1, 1]
     assert_equal(average(masked_array(x, m1), axis=0), 2.5)
     assert_equal(average(masked_array(x, m2), axis=0), 2.5)
     assert_equal(average(masked_array(x, m4), axis=0).mask, [True])
     assert_equal(average(masked_array(x, m5), axis=0), 0.0)
     assert_equal(count(average(masked_array(x, m4), axis=0)), 0)
     z = masked_array(y, m3)
     assert_equal(average(z, None), 20. / 6.)
     assert_equal(average(z, axis=0), [0., 1., 99., 99., 4.0, 7.5])
     assert_equal(average(z, axis=1), [2.5, 5.0])
     assert_equal(average(z, axis=0, weights=w2),
                  [0., 1., 99., 99., 4.0, 10.0])
Ejemplo n.º 9
0
    def __process_results__(self):
        lines = []
        if len(self.experiments) == 0:
            lines.append("no experiments to process")
            return

        # burnin and dimension are the same for all chains
        burnin = self.experiments[0].mcmc_chain.mcmc_params.burnin
        dim = self.experiments[
            0].mcmc_chain.mcmc_sampler.distribution.dimension

        # collect all thinned samples of all chains in here
        merged_samples = zeros((0, dim))

        for i in range(len(self.experiments)):
            lines.append("Processing chain %d" % i)

            # discard samples before burn in
            lines.append("Discarding burnin of %d" % burnin)
            burned_in = self.experiments[i].mcmc_chain.samples[burnin:, :]

            # thin out by factor and store thinned samples
            indices = arange(0, len(burned_in), self.thinning_factor)
            lines.append("Thinning by factor of %d, giving %d samples" \
                         % (self.thinning_factor, len(indices)))
            thinned = burned_in[indices, :]
            merged_samples = vstack((merged_samples, thinned))

        # dump merged samples to disc
        fname = self.experiments[0].name + "_merged_samples.txt"
        lines.append("Storing %d samples in file %s" %
                     (len(merged_samples), fname))
        savetxt(fname, merged_samples)

        return lines
Ejemplo n.º 10
0
    def find_mode_newton(self, return_full=False):
        """
        Newton search for mode of p(y|f)p(f)
        
        from GP book, algorithm 3.1, added step size
        """
        K = self.gp.K

        if self.newton_start is None:
            f = zeros(len(K))
        else:
            f = self.newton_start

        if return_full:
            steps = [f]

        iteration = 0
        norm_difference = inf
        objective_value = -inf

        while iteration < self.newton_max_iterations and norm_difference > self.newton_epsilon:
            # from GP book, algorithm 3.1, added step size
            # scale log_lik_grad_vector and K^-1 f = a

            w = -self.gp.likelihood.log_lik_hessian_vector(self.gp.y, f)
            w_sqrt = sqrt(w)

            # diag(w_sqrt).dot(K.dot(diag(w_sqrt))) == (K.T*w_sqrt).T*w_sqrt
            L = cholesky(eye(len(K)) + (K.T * w_sqrt).T * w_sqrt)
            b = f * w + self.newton_step * self.gp.likelihood.log_lik_grad_vector(self.gp.y, f)

            # a=b-diag(w_sqrt).dot(inv(eye(len(K)) + (K.T*w_sqrt).T*w_sqrt).dot(diag(w_sqrt).dot(K.dot(b))))
            a = w_sqrt * (K.dot(b))
            a = solve_triangular(L, a, lower=True)
            a = solve_triangular(L.T, a, lower=False)
            a = w_sqrt * a
            a = b - a

            f_new = K.dot(self.newton_step * a)

            # convergence stuff and next iteration
            objective_value_new = -0.5 * a.T.dot(f) + sum(self.gp.likelihood.log_lik_vector(self.gp.y, f))
            norm_difference = norm(f - f_new)

            if objective_value_new > objective_value:
                f = f_new
                if return_full:
                    steps.append(f)
            else:
                self.newton_step /= 2

            iteration += 1
            objective_value = objective_value_new

        self.computed = True

        if return_full:
            return f, L, asarray(steps)
        else:
            return f
Ejemplo n.º 11
0
    def test_get_gaussian_2d(self):
        X = asarray([-1, 1])
        X = reshape(X, (len(X), 1))
        y = asarray([+1 if x >= 0 else -1 for x in X])
        covariance = SquaredExponentialCovariance(sigma=1, scale=1)
        likelihood = LogitLikelihood()
        gp = GaussianProcess(y, X, covariance, likelihood)
        laplace = LaplaceApproximation(gp, newton_start=asarray([3, 3]))
        
        f_mode, L, steps = laplace.find_mode_newton(return_full=True)
        gaussian = laplace.get_gaussian(f_mode, L)
        F = linspace(-10, 10, 20)
        D = zeros((len(F), len(F)))
        Q = array(D, copy=True)
        for i in range(len(F)):
            for j in range(len(F)):
                f = asarray([F[i], F[j]])
                D[i, j] = gp.log_posterior_unnormalised(f)
                Q[i, j] = gaussian.log_pdf(f.reshape(1, len(f)))
        
        subplot(1, 2, 1)
        pcolor(F, F, D)
        hold(True)
        plot(steps[:, 0], steps[:, 1])
        plot(f_mode[1], f_mode[0], 'mo', markersize=10)
        hold(False)
        colorbar()
        subplot(1, 2, 2)
        pcolor(F, F, Q)
        hold(True)
        plot(f_mode[1], f_mode[0], 'mo', markersize=10)
        hold(False)
        colorbar()
#        show()
        clf()
Ejemplo n.º 12
0
    def log_pdf(self, thetas):
        assert (len(shape(thetas)) == 2)
        assert (shape(thetas)[1] == self.dimension)

        result = zeros(len(thetas))
        for i in range(len(thetas)):
            labels = BinaryLabels(self.y)
            feats_train = RealFeatures(self.X.T)

            # ARD: set set theta, which is in log-scale, as kernel weights
            kernel = GaussianARDKernel(10, 1)
            kernel.set_weights(exp(thetas[i]))

            mean = ZeroMean()
            likelihood = LogitLikelihood()
            inference = LaplacianInferenceMethod(kernel, feats_train, mean,
                                                 labels, likelihood)

            # fix kernel scaling for now
            inference.set_scale(exp(0))

            if self.ridge is not None:
                log_ml_estimate = inference.get_marginal_likelihood_estimate(
                    self.n_importance, self.ridge)
            else:
                log_ml_estimate = inference.get_marginal_likelihood_estimate(
                    self.n_importance)

            # prior is also in log-domain, so no exp of theta
            log_prior = self.prior.log_pdf(thetas[i].reshape(
                1, len(thetas[i])))
            result[i] = log_ml_estimate + log_prior

        return result
Ejemplo n.º 13
0
 def sample(self, n=1):
     rez = zeros([n, self.dimension])
     for ii in range(n):
         which_component = self.mixing_proportion.sample().samples
         rez[ii, :] = self.components[which_component].sample().samples
         
     return SampleFromMixture(rez,which_component)
    def __process_results__(self):
        lines = []
        if len(self.experiments) == 0:
            lines.append("no experiments to process")
            return
        
        # burnin and dimension are the same for all chains
        burnin = self.experiments[0].mcmc_chain.mcmc_params.burnin
        dim = self.experiments[0].mcmc_chain.mcmc_sampler.distribution.dimension
        
        # collect all thinned samples of all chains in here
        merged_samples = zeros((0, dim))
        
        for i in range(len(self.experiments)):
            lines.append("Processing chain %d" % i)
            
            # discard samples before burn in
            lines.append("Discarding burnin of %d" % burnin)
            burned_in = self.experiments[i].mcmc_chain.samples[burnin:, :]
            
            # thin out by factor and store thinned samples
            indices = arange(0, len(burned_in), self.thinning_factor)
            lines.append("Thinning by factor of %d, giving %d samples" \
                         % (self.thinning_factor, len(indices)))
            thinned = burned_in[indices, :]
            merged_samples = vstack((merged_samples, thinned))

        # dump merged samples to disc
        fname = self.experiments[0].name + "_merged_samples.txt"
        lines.append("Storing %d samples in file %s" % (len(merged_samples), fname))
        savetxt(fname, merged_samples)

        return lines
Ejemplo n.º 15
0
def find_distn(lgbinwidth, numlgbins, transient, N, M_t, M_i):
    totspkhist = zeros((numlgbins, 1))
    skiptime = transient * ms
    skipbin = int(ceil(skiptime / lgbinwidth))
    for i in xrange(numlgbins):
        step_start = (i) * lgbinwidth  #30*ms #
        step_end = (i + 1) * lgbinwidth  #30*ms #
        for j in xrange(N):
            #spks=where(logical_and(M[j]>step_start, M[j]<step_end))
            #totspkhist[i]+=len(M[j][spks])
            totspkhist[i] += len(M_i[logical_and(M_t > step_start,
                                                 M_t < step_end)])

    #totspkhist_1D=reshape(totspkhist,len(totspkhist))
    ###smooth plot first so thresholds work better
    #b,a=butter(3,0.4,'low')
    #totspkhist_smooth=filtfilt(b,a,totspkhist_1D)
    totspkhist_smooth = reshape(
        totspkhist, len(totspkhist)
    )  #here we took out the actual smoothing and left it as raw distn.

    #create distn based on hist, but skip first skiptime to cut out transient excessive spiking
    if max(totspkhist_smooth[skipbin:]) > 0:
        totspkdist_smooth = totspkhist_smooth / max(
            totspkhist_smooth[skipbin:])
    else:
        totspkdist_smooth = totspkhist_smooth
    totspkhist_list = [val for subl in totspkhist for val in subl]
    return [totspkhist, totspkdist_smooth, totspkhist_list]
    def log_pdf(self, thetas):
        assert(len(shape(thetas)) == 2)
        assert(shape(thetas)[1] == self.dimension)
        
        result=zeros(len(thetas))
        for i in range(len(thetas)):
            labels=BinaryLabels(self.y)
            feats_train=RealFeatures(self.X.T)

            # ARD: set set theta, which is in log-scale, as kernel weights            
            kernel=GaussianARDKernel(10,1)
            kernel.set_weights(exp(thetas[i]))
            
            mean=ZeroMean()
            likelihood=LogitLikelihood()
            inference=LaplacianInferenceMethod(kernel, feats_train, mean, labels, likelihood)
            
            # fix kernel scaling for now
            inference.set_scale(exp(0))
            
            if self.ridge is not None:
                log_ml_estimate=inference.get_marginal_likelihood_estimate(self.n_importance, self.ridge)
            else:
                log_ml_estimate=inference.get_marginal_likelihood_estimate(self.n_importance)
            
            # prior is also in log-domain, so no exp of theta
            log_prior=self.prior.log_pdf(thetas[i].reshape(1,len(thetas[i])))
            result[i]=log_ml_estimate+log_prior
            
        return result
Ejemplo n.º 17
0
 def test_testAverage2(self):
     # More tests of average.
     w1 = [0, 1, 1, 1, 1, 0]
     w2 = [[0, 1, 1, 1, 1, 0], [1, 0, 0, 0, 0, 1]]
     x = arange(6, dtype=np.float_)
     assert_equal(average(x, axis=0), 2.5)
     assert_equal(average(x, axis=0, weights=w1), 2.5)
     y = array([arange(6, dtype=np.float_), 2.0 * arange(6)])
     assert_equal(average(y, None), np.add.reduce(np.arange(6)) * 3. / 12.)
     assert_equal(average(y, axis=0), np.arange(6) * 3. / 2.)
     assert_equal(
         average(y, axis=1),
         [average(x, axis=0), average(x, axis=0) * 2.0])
     assert_equal(average(y, None, weights=w2), 20. / 6.)
     assert_equal(average(y, axis=0, weights=w2), [0., 1., 2., 3., 4., 10.])
     assert_equal(
         average(y, axis=1),
         [average(x, axis=0), average(x, axis=0) * 2.0])
     m1 = zeros(6)
     m2 = [0, 0, 1, 1, 0, 0]
     m3 = [[0, 0, 1, 1, 0, 0], [0, 1, 1, 1, 1, 0]]
     m4 = ones(6)
     m5 = [0, 1, 1, 1, 1, 1]
     assert_equal(average(masked_array(x, m1), axis=0), 2.5)
     assert_equal(average(masked_array(x, m2), axis=0), 2.5)
     assert_equal(average(masked_array(x, m4), axis=0).mask, [True])
     assert_equal(average(masked_array(x, m5), axis=0), 0.0)
     assert_equal(count(average(masked_array(x, m4), axis=0)), 0)
     z = masked_array(y, m3)
     assert_equal(average(z, None), 20. / 6.)
     assert_equal(average(z, axis=0), [0., 1., 99., 99., 4.0, 7.5])
     assert_equal(average(z, axis=1), [2.5, 5.0])
     assert_equal(average(z, axis=0, weights=w2),
                  [0., 1., 99., 99., 4.0, 10.0])
Ejemplo n.º 18
0
    def sample(self, n=1):
        rez = zeros([n, self.dimension])
        for ii in range(n):
            which_component = self.mixing_proportion.sample().samples
            rez[ii, :] = self.components[which_component].sample().samples

        return SampleFromMixture(rez, which_component)
Ejemplo n.º 19
0
 def log_pdf(self, X, component_index_given=None):
     """
     If component_index_given is given, then just condition on it,
     otherwise, should compute the overall log_pdf
     """
     if component_index_given == None:
         rez = zeros([len(X)])
         for ii in range(len(X)):
             logpdfs = zeros([self.num_components])
             for jj in range(self.num_components):
                 logpdfs[jj] = self.components[jj].log_pdf([X[ii]])
             lmax = max(logpdfs)
             rez[ii] = lmax + log(sum(self.mixing_proportion.omega * exp(logpdfs - lmax)))
         return rez
     else:
         assert(component_index_given < self.num_components)
         return self.components[component_index_given].log_pdf(X)
Ejemplo n.º 20
0
 def log_pdf(self, X, component_index_given=None):
     """
     If component_index_given is given, then just condition on it,
     otherwise, should compute the overall log_pdf
     """
     if component_index_given == None:
         rez = zeros([len(X)])
         for ii in range(len(X)):
             logpdfs = zeros([self.num_components])
             for jj in range(self.num_components):
                 logpdfs[jj] = self.components[jj].log_pdf([X[ii]])
             lmax = max(logpdfs)
             rez[ii] = lmax + log(
                 sum(self.mixing_proportion.omega * exp(logpdfs - lmax)))
         return rez
     else:
         assert (component_index_given < self.num_components)
         return self.components[component_index_given].log_pdf(X)
Ejemplo n.º 21
0
 def test_record_arrays(self):
     
     img = array([ [[0,1], [0,0]], [[0,0], [1,0]], [[0,0], [0,1]] ], dtype='float32')
     self.assertEqual(img.shape, (3,2,2))
     
     img = array([[(0,0,0), (1,0,0)], [(0,1,0), (0,0,1)]], [('r','float32'),('g','float32'),('b','float32')])
     img = array([[(0,0,0), (1,0,0)], [(0,1,0), (0,0,1)]], {'names': ('r','g','b'), 'formats': ('f4', 'f4', 'f4')})
     img = zeros((2,2), [('r','float32'),('g','float32'),('b','float32')])
     img.flat = [(0,0,0),(1,0,0),(0,1,0),(0,0,1)]
     print img.view(recarray)
Ejemplo n.º 22
0
    def linear_extrapolate(self, output=True):
        '''
        Return a 1D PPform which extrapolate linearly outside its basic interval
        '''
    
        max_order = 2
    
        if self.order <= max_order:
            if output:
                return self
            else: 
                return
        breaks = self.breaks.copy()
        coefs = self.coeffs.copy()
        #pieces = len(breaks) - 1
        
        # Add new breaks beyond each end
        breaks2add = breaks[[0, -1]] + np.array([-1, 1])
        newbreaks = np.hstack([breaks2add[0], breaks, breaks2add[1]])
    
        dx = newbreaks[[0, -2]] - breaks[[0, -2]]
    
        dx = dx.ravel()
       
        # Get coefficients for the new last polynomial piece (a_n)
        # by just relocate the previous last polynomial and
        # then set all terms of order > maxOrder to zero
        
        a_nn = coefs[:, -1]
        dxN = dx[-1]
         
        a_n = pl.polyreloc(a_nn, -dxN) # Relocate last polynomial
        #set to zero all terms of order > maxOrder 
        a_n[0:self.order - max_order] = 0
    
        #Get the coefficients for the new first piece (a_1)
        # by first setting all terms of order > maxOrder to zero and then
        # relocate the polynomial.

    
        #Set to zero all terms of order > maxOrder, i.e., not using them
        a_11 = coefs[self.order - max_order::, 0]
        dx1 = dx[0]
    
        a_1 = pl.polyreloc(a_11, -dx1) # Relocate first polynomial 
        a_1 = np.hstack([zeros(self.order - max_order), a_1])
      
        newcoefs = np.hstack([ a_1.reshape(-1, 1), coefs, a_n.reshape(-1, 1)])
        if output:
            return PPform(newcoefs, newbreaks, a= -inf, b=inf)
        else:
            self.coeffs = newcoefs
            self.breaks = newbreaks
            self.a = -inf
            self.b = inf
Ejemplo n.º 23
0
    def linear_extrapolate(self, output=True):
        '''
        Return 1D PPform which extrapolate linearly outside its basic interval
        '''

        max_order = 2

        if self.order <= max_order:
            if output:
                return self
            else:
                return
        breaks = self.breaks.copy()
        coefs = self.coeffs.copy()
        # pieces = len(breaks) - 1

        # Add new breaks beyond each end
        breaks2add = breaks[[0, -1]] + np.array([-1, 1])
        newbreaks = np.hstack([breaks2add[0], breaks, breaks2add[1]])

        dx = newbreaks[[0, -2]] - breaks[[0, -2]]

        dx = dx.ravel()

        # Get coefficients for the new last polynomial piece (a_n)
        # by just relocate the previous last polynomial and
        # then set all terms of order > maxOrder to zero

        a_nn = coefs[:, -1]
        dxN = dx[-1]

        a_n = pl.polyreloc(a_nn, -dxN)  # Relocate last polynomial
        # set to zero all terms of order > maxOrder
        a_n[0:self.order - max_order] = 0

        # Get the coefficients for the new first piece (a_1)
        # by first setting all terms of order > maxOrder to zero and then
        # relocate the polynomial.

        # Set to zero all terms of order > maxOrder, i.e., not using them
        a_11 = coefs[self.order - max_order::, 0]
        dx1 = dx[0]

        a_1 = pl.polyreloc(a_11, -dx1)  # Relocate first polynomial
        a_1 = np.hstack([zeros(self.order - max_order), a_1])

        newcoefs = np.hstack([a_1.reshape(-1, 1), coefs, a_n.reshape(-1, 1)])
        if output:
            return PPform(newcoefs, newbreaks, a=-inf, b=inf)
        else:
            self.coeffs = newcoefs
            self.breaks = newbreaks
            self.a = -inf
            self.b = inf
Ejemplo n.º 24
0
 def _calculateCurveDistanceMatrix(self):
   curves = self._lpcCurves
   num_curves = len(curves)
   distance_matrix = zeros((num_curves, num_curves))
   for i in range(num_curves):
     curve_i = curves[i]
     for j in range(i+1, num_curves):  
       curve_j = curves[j]
       distance_matrix[i,j] = self._lpcResiduals._distanceBetweenCurves(curve_i, curve_j)
       distance_matrix[j,i] = self._lpcResiduals._distanceBetweenCurves(curve_j, curve_i)
   return distance_matrix
Ejemplo n.º 25
0
 def sample_rectangle_data(n, noise_level=0.015, offset=0.05, seed_init=None):
     if seed_init is not None:
         seed(seed_init)
     
     # rectangle data
     a = rand(n / 2) * (1 - offset)
     b = rand(n / 2) * (1 - offset)
     data = zeros((n, 2))
     labels = zeros(n)
     for i in range(len(a)):
         labels[i] = 1.0 if rand() > 0.5 else -1.0
         data[i, 0] = a[i]
         data[i, 1] += labels[i] * offset + randn() * noise_level
         
     for i in range(len(b)):
         j = i + len(b)
         labels[j] = 1.0 if rand() > 0.5 else -1.0
         data[j, 1] = b[i]
         data[j, 0] += labels[j] * offset + randn() * noise_level
         
     return data, labels
Ejemplo n.º 26
0
 def getPathResidualDiags(self, curve):
   lpc_points = curve['save_xd']
   residuals = self._calculatePrunedPointResiduals(curve)
   #strip inf values from arrays with less than k NNs within radius_threshold
   point_dist = residuals[0]
   point_dist = [point_dist[j][invert(isinf(point_dist[j]))] for j in range(point_dist.shape[0])]
   k = self._params['k']
   num_NN = map(len, point_dist[:k])
   mean_NN = map(mean,point_dist[:k])
   std_NN = map(std, point_dist[:k])
   #indices will contain entries equal to self._X.shape[0], which are out of bounds
   #these are removed with the set symm difference below
   indices = residuals[1]
   num_tree_pts = set([self._X.shape[0]])
   num_lpc_pts = len(lpc_points)
   line_seg_mean_NN = zeros(num_lpc_pts - 1)
   line_seg_std_NN = zeros(num_lpc_pts - 1)
   line_seg_num_NN = zeros(num_lpc_pts - 1)
   
   for i in range(num_lpc_pts - 1):
     trial_points = self._X[list(set(indices[i:i+2].ravel()) - num_tree_pts)]
     if len(trial_points) != 0:
       line_seg_NN_dists = empty(len(trial_points))
       j = 0 
       for p in trial_points:
         line_seg_NN_dists[j] = self._distancePointToLineSegment(lpc_points[i], lpc_points[i+1], p)[0]
         j = j + 1
       line_seg_NN_dists.sort()
       
       line_seg_num_NN[i] = min(len(line_seg_NN_dists), k)
       line_seg_mean_NN[i] = mean(line_seg_NN_dists[:k])
       line_seg_std_NN[i] = std(line_seg_NN_dists[:k])
     else:
       line_seg_num_NN[i] = 0
       line_seg_mean_NN[i] = 0.0
       line_seg_std_NN[i] = 0.0
     
   return {'num_NN': num_NN, 'mean_NN': mean_NN, 'std_NN': std_NN, 
           'line_seg_num_NN': line_seg_num_NN, 'line_seg_mean_NN': line_seg_mean_NN, 'line_seg_std_NN': line_seg_std_NN}
     
Ejemplo n.º 27
0
 def __init__(self, dimension=2, num_components=2, components=None, mixing_proportion=None):
     Distribution.__init__(self, dimension)
     self.num_components = num_components
     if (components == None):
         self.components = [Gaussian(mu=zeros(self.dimension),Sigma=eye(self.dimension)) for _ in range(self.num_components)]
     else:
         assert(len(components)==self.num_components)
         self.components=components
     if (mixing_proportion == None):
         self.mixing_proportion=Discrete((1.0/num_components)*ones([num_components]))
     else:
         assert(num_components==mixing_proportion.num_objects)
         self.mixing_proportion = mixing_proportion
Ejemplo n.º 28
0
def main():
    distribution = Banana(dimension=8)
    
    sigma=5
    print "using sigma", sigma
    kernel = GaussianKernel(sigma=sigma)
    
    mcmc_sampler = Kameleon(distribution, kernel, distribution.sample(100).samples)
    
    start = zeros(distribution.dimension)
    mcmc_params = MCMCParams(start=start, num_iterations=20000)
    chain = MCMCChain(mcmc_sampler, mcmc_params)
    
    chain.append_mcmc_output(StatisticsOutput(plot_times=True))
    chain.run()
Ejemplo n.º 29
0
    def sample_rectangle_data(n,
                              noise_level=0.015,
                              offset=0.05,
                              seed_init=None):
        if seed_init is not None:
            seed(seed_init)

        # rectangle data
        a = rand(n / 2) * (1 - offset)
        b = rand(n / 2) * (1 - offset)
        data = zeros((n, 2))
        labels = zeros(n)
        for i in range(len(a)):
            labels[i] = 1.0 if rand() > 0.5 else -1.0
            data[i, 0] = a[i]
            data[i, 1] += labels[i] * offset + randn() * noise_level

        for i in range(len(b)):
            j = i + len(b)
            labels[j] = 1.0 if rand() > 0.5 else -1.0
            data[j, 1] = b[i]
            data[j, 0] += labels[j] * offset + randn() * noise_level

        return data, labels
Ejemplo n.º 30
0
def mandelbrot(h, w, maxit=20):
    
        y,x = ogrid[-1.4:1.4:h*1j, -2:0.8:w*1j]
        c = x+y*1j
        z = c

        divtime = maxit + zeros(z.shape, dtype=int)
                
        for i in xrange(maxit):
            z = z**2 + c
            diverge = z*numpy.conj(2) > 2**2
            div_now = diverge & (divtime==maxit)
            divtime[div_now] = i
            z[diverge] = 2
            
        return divtime
Ejemplo n.º 31
0
 def printALL(specRange,numOfComponents,y,v):
 # make some graphs of the components
       error = range(0,specRange)
       estimate = range(0,specRange)
       for i in range(0,specRange):
             estimate[i] = mixtureFNC(i, v)
             error[i] = (estimate[i] - y[i])**2
             
       models = zeros((numOfComponents,specRange))
       
       for i in range(0,numOfComponents):
             for j in range(0,specRange):
                   models[i][j] = normcurv(x[j],v[i*3],v[i*3+1],v[i*3+2])
             pylab.plot(x,models[i], linestyle='--')
             
       print v
Ejemplo n.º 32
0
def main():
    distribution = Banana(dimension=8)

    sigma = 5
    print "using sigma", sigma
    kernel = GaussianKernel(sigma=sigma)

    mcmc_sampler = Kameleon(distribution, kernel,
                            distribution.sample(100).samples)

    start = zeros(distribution.dimension)
    mcmc_params = MCMCParams(start=start, num_iterations=20000)
    chain = MCMCChain(mcmc_sampler, mcmc_params)

    chain.append_mcmc_output(StatisticsOutput(plot_times=True))
    chain.run()
Ejemplo n.º 33
0
 def log_pdf_multiple_points(self, X):
     assert(len(shape(X)) == 2)
     assert(shape(X)[1] == self.dimension)
     
     log_determinant_part = -sum(log(diag(self.L)))
     
     quadratic_parts = zeros(len(X))
     for i in range(len(X)):
         x = X[i] - self.mu
         
         # solve y=K^(-1)x = L^(-T)L^(-1)x
         y = solve_triangular(self.L, x.T, lower=True)
         y = solve_triangular(self.L.T, y, lower=False)
         quadratic_parts[i] = -0.5 * x.dot(y)
         
     const_part = -0.5 * len(self.L) * log(2 * pi)
     
     return const_part + log_determinant_part + quadratic_parts
Ejemplo n.º 34
0
 def __init__(self,
              dimension=2,
              num_components=2,
              components=None,
              mixing_proportion=None):
     Distribution.__init__(self, dimension)
     self.num_components = num_components
     if (components == None):
         self.components = [
             Gaussian(mu=zeros(self.dimension), Sigma=eye(self.dimension))
             for _ in range(self.num_components)
         ]
     else:
         assert (len(components) == self.num_components)
         self.components = components
     if (mixing_proportion == None):
         self.mixing_proportion = Discrete(
             (1.0 / num_components) * ones([num_components]))
     else:
         assert (num_components == mixing_proportion.num_objects)
         self.mixing_proportion = mixing_proportion
Ejemplo n.º 35
0
    def predict(self, X_test, f_mode=None):
        """
        Predictions for GP with Laplace approximation.
        
        from GP book, algorithm 3.2,
        
        """
        if f_mode is None:
            f_mode = self.find_mode_newton()

        predictions = zeros(len(X_test))

        K = self.gp.K
        K_train_test = self.gp.covariance.compute(self.gp.X, X_test)

        w = -self.gp.likelihood.log_lik_hessian_vector(self.gp.y, f_mode)
        w_sqrt = sqrt(w)

        # diag(w_sqrt).dot(K.dot(diag(w_sqrt))) == (K.T*w_sqrt).T*w_sqrt
        L = cholesky(eye(len(K)) + (K.T * w_sqrt).T * w_sqrt)

        # iterator for all testing points
        for i in range(len(X_test)):
            k = K_train_test[:, i]
            k_self = self.gp.covariance.compute([X_test[i]], [X_test[i]])[0]

            f_mean = k.dot(
                self.gp.likelihood.log_lik_grad_vector(self.gp.y, f_mode))
            v = solve_triangular(L, w_sqrt * k, lower=True)
            f_var = k_self - v.T.dot(v)

            predictions[i] = integrate.quad(
                lambda x: norm.pdf(x, f_mean, f_var), -inf, inf)[0]
#            # integrate over Gaussian using some crude numerical integration
#            samples=randn(1000)*sqrt(f_var) + f_mean
#
#            log_liks=self.gp.likelihood.log_lik_vector(1.0, samples)
#            predictions[i]=1.0/len(samples)*GPTools.log_sum_exp(log_liks)

        return predictions
Ejemplo n.º 36
0
    def predict(self, X_test, f_mode=None):
        """
        Predictions for GP with Laplace approximation.
        
        from GP book, algorithm 3.2,
        
        """
        if f_mode is None:
            f_mode = self.find_mode_newton()

        predictions = zeros(len(X_test))

        K = self.gp.K
        K_train_test = self.gp.covariance.compute(self.gp.X, X_test)

        w = -self.gp.likelihood.log_lik_hessian_vector(self.gp.y, f_mode)
        w_sqrt = sqrt(w)

        # diag(w_sqrt).dot(K.dot(diag(w_sqrt))) == (K.T*w_sqrt).T*w_sqrt
        L = cholesky(eye(len(K)) + (K.T * w_sqrt).T * w_sqrt)

        # iterator for all testing points
        for i in range(len(X_test)):
            k = K_train_test[:, i]
            k_self = self.gp.covariance.compute([X_test[i]], [X_test[i]])[0]

            f_mean = k.dot(self.gp.likelihood.log_lik_grad_vector(self.gp.y, f_mode))
            v = solve_triangular(L, w_sqrt * k, lower=True)
            f_var = k_self - v.T.dot(v)

            predictions[i] = integrate.quad(lambda x: norm.pdf(x, f_mean, f_var), -inf, inf)[0]
        #            # integrate over Gaussian using some crude numerical integration
        #            samples=randn(1000)*sqrt(f_var) + f_mean
        #
        #            log_liks=self.gp.likelihood.log_lik_vector(1.0, samples)
        #            predictions[i]=1.0/len(samples)*GPTools.log_sum_exp(log_liks)

        return predictions
Ejemplo n.º 37
0
 def __init__(self, P1, P2, timelim):
     self.P1 = P1  # needed to create zeros array for histogram
     self.P2 = P2  # needed to create zeros array for histogram
     self.timelim = timelim  # the timelimit of the simulation
     self.M_common_room_Histogram = zeros(
         P1 + 1
     )  # list that counts time that N mobile patients reside in common room during simulation
     self.M_private_room_Histogram = zeros(
         P1 + 1
     )  # list that counts time that N mobile patients reside in private room during simulation
     self.M_gym_Histogram = zeros(
         P1 + 1
     )  # list that counts time that N patients reside in the gym during simulation
     self.M_sum_in_common = 0  # used for calculating the mean number of mobile patients in the common room
     self.M_sum_in_common2 = 0  # used for calculating the variance of mobile patients in the common room
     self.M_sum_in_private = 0  # used for calculating the mean numberof mobile patients in the private room
     self.M_sum_in_private2 = 0  # used for calculating the variance of mobile patients in the private room
     self.M_sum_in_gym = 0  # used for calculating the mean number of mobile patients in the gym
     self.M_sum_in_gym2 = 0  # used for culculating the variance of the number of patients in the gym
     self.I_common_room_Histogram = zeros(
         P2 + 1
     )  # list that count time that N immobile patients reside in the common room
     self.I_private_room_Histogram = zeros(
         P2 + 1
     )  # list that counts time that N immobile patient reside in the private room
     self.I_sum_in_common = 0  # used for calculating the mean number of immobile patients in the common room
     self.I_sum_in_common2 = 0  # used for calculating the variance of the number of immobile patients in the common room
     self.I_sum_in_private = 0  # used for calculating the mean number of immobile patients in the private room
     self.I_sum_in_private2 = 0
     self.I_sum_Qcommon = 0  # used for calculating the mean number of immobile patients in the queue at the common room
     self.I_sum_Qcommon2 = 0  # used for calculating the variance of the number of immobile patients in the queue at the common room
     self.I_sum_Qprivate = 0  # used for calculating the mean number of immobile patients in the queue at the private room
     self.I_sum_Qprivate2 = 0  # used for calculating the variance of the number of immobile patients in the queue at the private room
     self.I_Qcommon_Histogram = zeros(P2 + 1)
     self.I_Qprivate_Histogram = zeros(P2 + 1)
     self.sum_in_common = 0
     self.sum_in_private = 0
Ejemplo n.º 38
0
print 'using this many samples for the long chain: ', shape(samples_long)[0]

how_many_chains = 20
stats_granularity = 10

path_above = "/nfs/home2/dino/git/kameleon-mcmc/main/gp/scripts/glass_gaussian_ard/"
#path_above = "/nfs/data3/ucabhst/kameleon_experiments/glass_ard/"
path_below = "output/experiment_output.bin"

#sampler_names = ["KameleonWindowLearnScale", "AdaptiveMetropolisLearnScale","AdaptiveMetropolis"]
sampler_names = ["StandardMetropolis"]
path_temp = "_PseudoMarginalHyperparameterDistribution_#/"

for sampler_name in sampler_names:
    mean_dist = zeros((stats_granularity, how_many_chains))
    mmds = zeros((stats_granularity, how_many_chains))
    for num_chain in range(0, how_many_chains):
        path = path_temp.replace('#', str(num_chain))
        print sampler_name + path
        f = open(path_above + sampler_name + path + path_below)
        experiment = load(f)
        f.close()
        mcmc_chain = experiment.mcmc_chain
        burnin = mcmc_chain.mcmc_params.burnin
        print 'burnin: ', burnin
        print 'total chain length: ', mcmc_chain.iteration
        thin = 20
        print 'thinning by: ', thin
        #indices = range(0, mcmc_chain.iteration,thin)
        indices = range(0, 100000, thin)
Ejemplo n.º 39
0
import sys
fr=open('D:\eclipse_workspace\Classify\data\data.txt')
macList=['c0:38:96:25:5b:c3','e0:05:c5:ba:80:40','b0:d5:9d:46:a3:9b','42:a5:89:51:c7:dd']
X=empty((4,60),numpy.int8)
for line in fr:
    parts=line.split(',')
    try:
        poi=macList.index(parts[2])
        print('poi',poi)
        if poi!='-1':
            print('try parts[2]:',parts[2])
            lie=int(parts[-1].strip())-1
            X[poi,lie] = parts[1]
    except :
        pass
        #print('haha',parts[2])
    else:
        print('no error')
print("final:",type(list),type(1),type(macList))
print(X)
w=ones((4,1))
b=1
print(transpose(w))
z=dot(transpose(w),X)+1
y1=zeros((30,1))
y2=ones((30,1))
y=row_stack((y1,y2))
print(y)
#plt.plot(list)
#plt.show()
Ejemplo n.º 40
0
    def _compute_coefs(self, xx, yy, p=None, var=1):
        x, y = np.atleast_1d(xx, yy)
        x = x.ravel()
        dx = np.diff(x)
        must_sort = (dx < 0).any()
        if must_sort:
            ind = x.argsort()
            x = x[ind]
            y = y[..., ind]
            dx = np.diff(x)

        n = len(x)

        #ndy = y.ndim
        szy = y.shape

        nd = prod(szy[:-1])
        ny = szy[-1]

        if n < 2:
            raise ValueError('There must be >=2 data points.')
        elif (dx <= 0).any():
            raise ValueError('Two consecutive values in x can not be equal.')
        elif n != ny:
            raise ValueError('x and y must have the same length.')

        dydx = np.diff(y) / dx

        if (n == 2):  # % straight line
            coefs = np.vstack([dydx.ravel(), y[0, :]])
        else:

            dx1 = 1. / dx
            D = sp.spdiags(var * ones(n), 0, n, n)  # The variance

            u, p = self._compute_u(p, D, dydx, dx, dx1, n)
            dx1.shape = (n - 1, -1)
            dx.shape = (n - 1, -1)
            zrs = zeros(nd)
            if p < 1:
                # faster than yi-6*(1-p)*Q*u
                ai = (y - (6 * (1 - p) * D *
                           diff(vstack([zrs,
                                    diff(vstack([zrs, u, zrs]), axis=0) * dx1,
                                                          zrs]), axis=0)).T).T
            else:
                ai = y.reshape(n, -1)

            # The piecewise polynominals are written as
            # fi=ai+bi*(x-xi)+ci*(x-xi)^2+di*(x-xi)^3
            # where the derivatives in the knots according to Carl de Boor are:
            #    ddfi  = 6*p*[0;u] = 2*ci;
            #    dddfi = 2*diff([ci;0])./dx = 6*di;
            #    dfi   = diff(ai)./dx-(ci+di.*dx).*dx = bi;

            ci = np.vstack([zrs, 3 * p * u])
            di = (diff(vstack([ci, zrs]), axis=0) * dx1 / 3)
            bi = (diff(ai, axis=0) * dx1 - (ci + di * dx) * dx)
            ai = ai[:n - 1, ...]
            if nd > 1:
                di = di.T
                ci = ci.T
                ai = ai.T
            if not any(di):
                if not any(ci):
                    coefs = vstack([bi.ravel(), ai.ravel()])
                else:
                    coefs = vstack([ci.ravel(), bi.ravel(), ai.ravel()])
            else:
                coefs = vstack(
                    [di.ravel(), ci.ravel(), bi.ravel(), ai.ravel()])

        return coefs, x
Ejemplo n.º 41
0
#    logging.info("Replacing engine with serial engine instance")
#    engine = SerialComputationEngine()
    
    # we have to collect aggregators somehow
    aggregators = []
    
    # submit job three times
    logging.info("starting loop over job submission")
    for i in range(3):
        logging.info("submitting job %d" % i)
        job = MyJob(ScalarResultAggregator())
        aggregators.append(engine.submit_job(job))
        
    # let the engine finish its business
    logging.info("wait for all call in engine")
    engine.wait_for_all()
    
    # lets collect the results
    results = zeros(len(aggregators))
    logging.info("Collecting results")
    for i in range(len(aggregators)):
        logging.info("collecting result %d" % i)
        # let the aggregator finalize things, not really needed here but in general
        aggregators[i].finalize()
        
        # aggregators[i].get_final_result() returns a ScalarResult instance,
        # which we need to extract the number from
        results[i] = aggregators[i].get_final_result().result
    
    print "results", results
Ejemplo n.º 42
0
                                                    ridge=1e-3)

    sigma = 23.0
    print "using sigma", sigma
    kernel = GaussianKernel(sigma=sigma)
    
    for i in range(n):
        
        mcmc_samplers = []
        
        burnin=20000
        num_iterations=100000
        
        mcmc_samplers.append(KameleonWindowLearnScale(distribution, kernel, stop_adapt=burnin))
        
        mean_est = zeros(distribution.dimension, dtype="float64")
        cov_est = 1.0 * eye(distribution.dimension)
        #cov_est[0, 0] = distribution.V
        mcmc_samplers.append(AdaptiveMetropolisLearnScale(distribution, mean_est=mean_est, cov_est=cov_est))
        mcmc_samplers.append(AdaptiveMetropolis(distribution, mean_est=mean_est, cov_est=cov_est))
        #mcmc_samplers.append(StandardMetropolis(distribution))
        
        start = zeros(distribution.dimension, dtype="float64")
        mcmc_params = MCMCParams(start=start, num_iterations=num_iterations, burnin=burnin)
        
        mcmc_chains = [MCMCChain(mcmc_sampler, mcmc_params) for mcmc_sampler in mcmc_samplers]
        for mcmc_chain in mcmc_chains:
            mcmc_chain.append_mcmc_output(StatisticsOutput())
        
        experiments = [SingleChainExperiment(mcmc_chain, experiment_dir) for mcmc_chain in mcmc_chains]
        
 def precompute_likelihood_estimates(self, tau, kappa):
     logging.debug("Entering")
     
     # submit all jobs for log-determinant Q
     aggregators_Q = []
     for _ in range(self.num_estimates):
         job = OzoneLogDetJob(ScalarResultAggregator(), self, tau, kappa, "Q")
         aggregators_Q.append(self.computation_engine.submit_job(job))
     
     # submit all jobs for log-determinant M
     aggregators_M = []
     for _ in range(self.num_estimates):
         job = OzoneLogDetJob(ScalarResultAggregator(), self, tau, kappa, "M")
         aggregators_M.append(self.computation_engine.submit_job(job))
     
     # submit job for remainder of likelihood
     job = OzoneLikelihoodWithoutLogDetJob(ScalarResultAggregator(), self, tau, kappa)
     aggregator_remainder = self.computation_engine.submit_job(job)
     
     # grab a coffee
     self.computation_engine.wait_for_all()
     
     # collect results from all aggregators
     log_dets_Q = zeros(self.num_estimates)
     log_dets_M = zeros(self.num_estimates)
     for i in range(self.num_estimates):
         aggregators_Q[i].finalize()
         aggregators_M[i].finalize()
         log_dets_Q[i] = aggregators_Q[i].get_final_result().result
         log_dets_M[i] = aggregators_M[i].get_final_result().result
         aggregators_Q[i].clean_up()
         aggregators_M[i].clean_up()
         
     aggregator_remainder.finalize()
     result_remainder = aggregator_remainder.get_final_result().result
     aggregator_remainder.clean_up()
         
     # load n since needed for likelihood
     y, _ = OzonePosterior.load_ozone_data()
     n = len(y)
     
     # construct all likelihood estimates
     log_det_parts = 0.5 * log_dets_Q + 0.5 * n * log(tau) - 0.5 * log_dets_M
     estimates = log_det_parts + result_remainder
     
     # crude check for an overflow to print error details
     limit = 1e100
     indices = where(abs(estimates) > limit)[0]
     if len(indices) > 0:
         logging.info("Log-likelihood estimates overflow occured at the following indices:")
         for idx in indices:
             logging.info("At index %d. Details are: " % idx)
             logging.info("log-det Q: " + aggregators_Q[idx].job_name + 
                          ". Result is %f" % log_dets_Q[idx])
             logging.info("log-det M: " + aggregators_M[idx].job_name + 
                          ". Result is %f" % log_dets_M[idx])
             logging.info("log-lik-without-log-det: " + 
                          aggregator_remainder.job_name + ". Result is %f" % result_remainder[idx])
             
         logging.info("Removing mentioned estimates from list")
         estimates = estimates[abs(estimates) < limit]
         logging.info("New number of estimates is %d, old was %d" % 
                      (len(estimates), self.num_estimates))
             
     
     logging.debug("Leaving")
     return estimates
from distribution.Gaussian import Gaussian
from distribution.SampleObject import SampleObject
from mcmc.chain.Chain import Chain
from mcmc.chain.ChainParams import ChainParams
from mcmc.sampler.MetropolisHastings import MetropolisHastings
from numpy.lib.twodim_base import eye
from numpy.ma.core import zeros

if __name__ == '__main__':
    start=SampleObject(zeros(2))
    target=Gaussian(mu=zeros(2), Sigma=eye(2))
    sampler=MetropolisHastings(target, start)
    params=ChainParams(1000)
    chain=Chain(params, sampler)
    chain.run()
Ejemplo n.º 45
0
    # prior on theta and posterior target estimate
    theta_prior = Gaussian(mu=0 * ones(dim), Sigma=eye(dim) * 5)
    target=PseudoMarginalHyperparameterDistribution(data, labels, \
                                                    n_importance=100, prior=theta_prior, \
                                                    ridge=1e-3)

    # create sampler
    burnin = 10000
    num_iterations = burnin + 300000
    kernel = GaussianKernel(sigma=23.0)
    sampler = KameleonWindowLearnScale(target, kernel, stop_adapt=burnin)
    #    sampler=AdaptiveMetropolisLearnScale(target)
    #    sampler=StandardMetropolis(target)

    # posterior mode derived by initial tests
    start = zeros(target.dimension)
    params = MCMCParams(start=start,
                        num_iterations=num_iterations,
                        burnin=burnin)

    # create MCMC chain
    chain = MCMCChain(sampler, params)
    chain.append_mcmc_output(StatisticsOutput(print_from=0, lag=100))
    #chain.append_mcmc_output(PlottingOutput(plot_from=0, lag=500))

    # create experiment instance to store results
    experiment_dir = str(os.path.abspath(sys.argv[0])).split(
        os.sep)[-1].split(".")[0] + os.sep
    experiment = SingleChainExperiment(chain, experiment_dir)

    experiment.run()
Ejemplo n.º 46
0
    def find_mode_newton(self, return_full=False):
        """
        Newton search for mode of p(y|f)p(f)
        
        from GP book, algorithm 3.1, added step size
        """
        K = self.gp.K

        if self.newton_start is None:
            f = zeros(len(K))
        else:
            f = self.newton_start

        if return_full:
            steps = [f]

        iteration = 0
        norm_difference = inf
        objective_value = -inf

        while iteration < self.newton_max_iterations and norm_difference > self.newton_epsilon:
            # from GP book, algorithm 3.1, added step size
            # scale log_lik_grad_vector and K^-1 f = a

            w = -self.gp.likelihood.log_lik_hessian_vector(self.gp.y, f)
            w_sqrt = sqrt(w)

            # diag(w_sqrt).dot(K.dot(diag(w_sqrt))) == (K.T*w_sqrt).T*w_sqrt
            L = cholesky(eye(len(K)) + (K.T * w_sqrt).T * w_sqrt)
            b = f * w + self.newton_step * \
                self.gp.likelihood.log_lik_grad_vector(self.gp.y, f)

            # a=b-diag(w_sqrt).dot(inv(eye(len(K)) + (K.T*w_sqrt).T*w_sqrt).dot(diag(w_sqrt).dot(K.dot(b))))
            a = (w_sqrt * (K.dot(b)))
            a = solve_triangular(L, a, lower=True)
            a = solve_triangular(L.T, a, lower=False)
            a = w_sqrt * a
            a = b - a

            f_new = K.dot(self.newton_step * a)

            # convergence stuff and next iteration
            objective_value_new = -0.5 * a.T.dot(f) + \
                                sum(self.gp.likelihood.log_lik_vector(self.gp.y, f))
            norm_difference = norm(f - f_new)

            if objective_value_new > objective_value:
                f = f_new
                if return_full:
                    steps.append(f)
            else:
                self.newton_step /= 2

            iteration += 1
            objective_value = objective_value_new

        self.computed = True

        if return_full:
            return f, L, asarray(steps)
        else:
            return f
Ejemplo n.º 47
0
    def __process_results__(self):
        lines = []
        if len(self.experiments) == 0:
            lines.append("no experiments to process")
            return

        # burnin is the same for all chains
        burnin = self.experiments[0].mcmc_chain.mcmc_params.burnin

        quantiles = zeros((len(self.experiments), len(self.ref_quantiles)))
        norm_of_means = zeros(len(self.experiments))
        acceptance_rates = zeros(len(self.experiments))
        #         ess_0 = zeros(len(self.experiments))
        #         ess_1 = zeros(len(self.experiments))
        #         ess_minima = zeros(len(self.experiments))
        #         ess_medians = zeros(len(self.experiments))
        #         ess_maxima = zeros(len(self.experiments))
        times = zeros(len(self.experiments))

        for i in range(len(self.experiments)):
            burned_in = self.experiments[i].mcmc_chain.samples[burnin:, :]

            # use precomputed quantiles if they match with the provided ones
            if hasattr(self.experiments[i], "ref_quantiles") and \
               hasattr(self.experiments[i], "quantiles") and \
               allclose(self.ref_quantiles, self.experiments[i].ref_quantiles):
                quantiles[i, :] = self.experiments[i].quantiles
            else:
                try:
                    quantiles[i, :] = self.experiments[i].mcmc_chain.mcmc_sampler.distribution.emp_quantiles(\
                                      burned_in, self.ref_quantiles)
                except NotImplementedError:
                    print "skipping quantile computations, distribution does", \
                          "not support it."

            # quantiles should be about average error rather than average quantile
            quantiles[i, :] = abs(quantiles[i, :] - self.ref_quantiles)

            dim = self.experiments[
                i].mcmc_chain.mcmc_sampler.distribution.dimension
            norm_of_means[i] = norm(mean(burned_in, 0))
            acceptance_rates[i] = mean(
                self.experiments[i].mcmc_chain.accepteds[burnin:])

            # dump burned in samples to disc
            # sample_filename=self.experiments[0].experiment_dir + self.experiments[0].name + "_burned_in.txt"
            # savetxt(sample_filename, burned_in)

            # store minimum ess for every experiment
            #ess_per_covariate = asarray([RCodaTools.ess_coda(burned_in[:, cov_idx]) for cov_idx in range(dim)])
            #             ess_per_covariate = asarray([0 for _ in range(dim)])
            #             ess_0=ess_per_covariate[0]
            #             ess_1=ess_per_covariate[1]
            #             ess_minima[i] = min(ess_per_covariate)
            #             ess_medians[i] = median(ess_per_covariate)
            #             ess_maxima[i] = max(ess_per_covariate)

            # save chain time needed
            ellapsed = self.experiments[i].mcmc_chain.mcmc_outputs[0].times
            times[i] = int(round(sum(ellapsed)))

        mean_quantiles = mean(quantiles, 0)
        std_quantiles = std(quantiles, 0)

        sqrt_num_trials = sqrt(len(self.experiments))

        # print median kernel width sigma
        #sigma=GaussianKernel.get_sigma_median_heuristic(burned_in.T)
        #lines.append("median kernel sigma: "+str(sigma))

        lines.append("quantiles:")
        for i in range(len(self.ref_quantiles)):
            lines.append(
                str(mean_quantiles[i]) + " +- " +
                str(std_quantiles[i] / sqrt_num_trials))

        lines.append("norm of means:")
        lines.append(
            str(mean(norm_of_means)) + " +- " +
            str(std(norm_of_means) / sqrt_num_trials))

        lines.append("acceptance rate:")
        lines.append(
            str(mean(acceptance_rates)) + " +- " +
            str(std(acceptance_rates) / sqrt_num_trials))

        #         lines.append("ess dimension 0:")
        #         lines.append(str(mean(ess_0)) + " +- " + str(std(ess_0)/sqrt_num_trials))
        #
        #         lines.append("ess dimension 1:")
        #         lines.append(str(mean(ess_1)) + " +- " + str(std(ess_1)/sqrt_num_trials))
        #
        #         lines.append("minimum ess:")
        #         lines.append(str(mean(ess_minima)) + " +- " + str(std(ess_minima)/sqrt_num_trials))
        #
        #         lines.append("median ess:")
        #         lines.append(str(mean(ess_medians)) + " +- " + str(std(ess_medians)/sqrt_num_trials))
        #
        #         lines.append("maximum ess:")
        #         lines.append(str(mean(ess_maxima)) + " +- " + str(std(ess_maxima)/sqrt_num_trials))

        lines.append("times:")
        lines.append(
            str(mean(times)) + " +- " + str(std(times) / sqrt_num_trials))

        # mean as a function of iterations, normalised by time
        step = round(
            (self.experiments[0].mcmc_chain.mcmc_params.num_iterations -
             burnin) / 5)
        iterations = arange(
            self.experiments[0].mcmc_chain.mcmc_params.num_iterations - burnin,
            step=step)

        running_means = zeros(len(iterations))
        running_errors = zeros(len(iterations))
        for i in arange(len(iterations)):
            # norm of mean of chain up
            norm_of_means_yet = zeros(len(self.experiments))
            for j in range(len(self.experiments)):
                samples_yet = self.experiments[j].mcmc_chain.samples[burnin:(
                    burnin + iterations[i] + 1 + step), :]
                norm_of_means_yet[j] = norm(mean(samples_yet, 0))

            running_means[i] = mean(norm_of_means_yet)
            error_level = 1.96
            running_errors[i] = error_level * std(norm_of_means_yet) / sqrt(
                len(norm_of_means_yet))

        ioff()
        figure()
        plot(iterations, running_means * mean(times))
        fill_between(iterations, (running_means - running_errors)*mean(times), \
                     (running_means + running_errors)*mean(times), hold=True, color="gray")

        # make sure path to save exists
        try:
            os.makedirs(self.experiments[0].experiment_dir)
        except OSError as exception:
            if exception.errno != errno.EEXIST:
                raise

        savefig(self.experiments[0].experiment_dir + self.experiments[0].name +
                "_running_mean.png")
        close()

        # also store plot X and Y
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean_X.txt", \
                iterations)
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean_Y.txt", \
                running_means*mean(times))
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean_errors.txt", \
                running_errors*mean(times))

        # dont produce quantile convergence plots here for now
        """# quantile convergence of a single one
        desired_quantile=0.5
        running_quantiles=zeros(len(iterations))
        running_quantile_errors=zeros(len(iterations))
        for i in arange(len(iterations)):
            quantiles_yet = zeros(len(self.experiments))
            for j in range(len(self.experiments)):
                samples_yet = self.experiments[j].mcmc_chain.samples[burnin:(burnin + iterations[i] + 1 + step), :]
                
                # just compute one quantile for now
                quantiles_yet[j]=self.experiments[j].mcmc_chain.mcmc_sampler.distribution.emp_quantiles(samples_yet, \
                                                                                          array([desired_quantile]))
                quantiles_yet[j]=abs(quantiles_yet[j]-desired_quantile)
            running_quantiles[i] = mean(quantiles_yet)
            error_level = 1.96
            running_quantile_errors[i] = error_level * std(quantiles_yet) / sqrt(len(quantiles_yet))
        
        
        ioff()
        figure()
        plot(iterations, running_quantiles*mean(times))
        fill_between(iterations, (running_quantiles - running_quantile_errors)*mean(times), \
                     (running_quantiles + running_quantile_errors)*mean(times), hold=True, color="gray")
        
        plot([iterations.min(),iterations.max()], [desired_quantile*mean(times) for _ in range(2)])
        
        title(str(desired_quantile)+"-quantile convergence")
        savefig(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile.png")
        close()
        
        # also store plot X and Y
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_X.txt", \
                iterations)
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_Y.txt", \
                running_quantiles*mean(times))
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_errors.txt", \
                running_quantile_errors*mean(times))
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_reference.txt", \
                [desired_quantile*mean(times)])
        """
        # add latex table line
        #         latex_lines = []
        #         latex_lines.append("Sampler & Acceptance & ESS2 & Norm(mean) & ")
        #         for i in range(len(self.ref_quantiles)):
        #             latex_lines.append('%.1f' % self.ref_quantiles[i] + "-quantile")
        #             if i < len(self.ref_quantiles) - 1:
        #                 latex_lines.append(" & ")
        #         latex_lines.append("\\\\")
        #         lines.append("".join(latex_lines))
        #
        #         latex_lines = []
        #         latex_lines.append(self.experiments[0].mcmc_chain.mcmc_sampler.__class__.__name__)
        #         latex_lines.append('$%.3f' % mean(acceptance_rates) + " \pm " + '%.3f$' % (std(acceptance_rates)/sqrt_num_trials))
        #         latex_lines.append('$%.3f' % mean(norm_of_means) + " \pm " + '%.3f$' % (std(norm_of_means)/sqrt_num_trials))
        #         for i in range(len(self.ref_quantiles)):
        #             latex_lines.append('$%.3f' % mean_quantiles[i] + " \pm " + '%.3f$' % (std_quantiles[i]/sqrt_num_trials))
        #
        #
        #         lines.append(" & ".join(latex_lines) + "\\\\")

        return lines
Ejemplo n.º 48
0
 def get_gp_prior(self):
     """
     Returns GP prior N(0,K), only possible do if K is psd
     """
     return Gaussian(zeros(len(self.K)), self.K, is_cholesky=False)
Ejemplo n.º 49
0
    def _compute_coefs(self, xx, yy, p=None, var=1):
        x, y = np.atleast_1d(xx, yy)
        x = x.ravel()
        dx = np.diff(x)
        must_sort = (dx < 0).any()
        if must_sort:
            ind = x.argsort()
            x = x[ind]
            y = y[..., ind]
            dx = np.diff(x)

        n = len(x)

        # ndy = y.ndim
        szy = y.shape

        nd = prod(szy[:-1])
        ny = szy[-1]

        if n < 2:
            raise ValueError('There must be >=2 data points.')
        elif (dx <= 0).any():
            raise ValueError('Two consecutive values in x can not be equal.')
        elif n != ny:
            raise ValueError('x and y must have the same length.')

        dydx = np.diff(y) / dx

        if (n == 2):  # % straight line
            coefs = np.vstack([dydx.ravel(), y[0, :]])
        else:

            dx1 = 1. / dx
            D = sparse.spdiags(var * ones(n), 0, n, n)  # The variance

            u, p = self._compute_u(p, D, dydx, dx, dx1, n)
            dx1.shape = (n - 1, -1)
            dx.shape = (n - 1, -1)
            zrs = zeros(nd)
            if p < 1:
                # faster than yi-6*(1-p)*Q*u
                Qu = D * diff(vstack(
                    [zrs, diff(vstack([zrs, u, zrs]), axis=0) * dx1, zrs]),
                              axis=0)
                ai = (y - (6 * (1 - p) * Qu).T).T
            else:
                ai = y.reshape(n, -1)

            # The piecewise polynominals are written as
            # fi=ai+bi*(x-xi)+ci*(x-xi)^2+di*(x-xi)^3
            # where the derivatives in the knots according to Carl de Boor are:
            #    ddfi  = 6*p*[0;u] = 2*ci;
            #    dddfi = 2*diff([ci;0])./dx = 6*di;
            #    dfi   = diff(ai)./dx-(ci+di.*dx).*dx = bi;

            ci = np.vstack([zrs, 3 * p * u])
            di = (diff(vstack([ci, zrs]), axis=0) * dx1 / 3)
            bi = (diff(ai, axis=0) * dx1 - (ci + di * dx) * dx)
            ai = ai[:n - 1, ...]
            if nd > 1:
                di = di.T
                ci = ci.T
                ai = ai.T
            if not any(di):
                if not any(ci):
                    coefs = vstack([bi.ravel(), ai.ravel()])
                else:
                    coefs = vstack([ci.ravel(), bi.ravel(), ai.ravel()])
            else:
                coefs = vstack(
                    [di.ravel(),
                     ci.ravel(),
                     bi.ravel(),
                     ai.ravel()])

        return coefs, x
    def precompute_likelihood_estimates(self, tau, kappa):
        logging.debug("Entering")

        # submit all jobs for log-determinant Q
        aggregators_Q = []
        for _ in range(self.num_estimates):
            job = OzoneLogDetJob(ScalarResultAggregator(), self, tau, kappa,
                                 "Q")
            aggregators_Q.append(self.computation_engine.submit_job(job))

        # submit all jobs for log-determinant M
        aggregators_M = []
        for _ in range(self.num_estimates):
            job = OzoneLogDetJob(ScalarResultAggregator(), self, tau, kappa,
                                 "M")
            aggregators_M.append(self.computation_engine.submit_job(job))

        # submit job for remainder of likelihood
        job = OzoneLikelihoodWithoutLogDetJob(ScalarResultAggregator(), self,
                                              tau, kappa)
        aggregator_remainder = self.computation_engine.submit_job(job)

        # grab a coffee
        self.computation_engine.wait_for_all()

        # collect results from all aggregators
        log_dets_Q = zeros(self.num_estimates)
        log_dets_M = zeros(self.num_estimates)
        for i in range(self.num_estimates):
            aggregators_Q[i].finalize()
            aggregators_M[i].finalize()
            log_dets_Q[i] = aggregators_Q[i].get_final_result().result
            log_dets_M[i] = aggregators_M[i].get_final_result().result
            aggregators_Q[i].clean_up()
            aggregators_M[i].clean_up()

        aggregator_remainder.finalize()
        result_remainder = aggregator_remainder.get_final_result().result
        aggregator_remainder.clean_up()

        # load n since needed for likelihood
        y, _ = OzonePosterior.load_ozone_data()
        n = len(y)

        # construct all likelihood estimates
        log_det_parts = 0.5 * log_dets_Q + 0.5 * n * log(
            tau) - 0.5 * log_dets_M
        estimates = log_det_parts + result_remainder

        # crude check for an overflow to print error details
        limit = 1e100
        indices = where(abs(estimates) > limit)[0]
        if len(indices) > 0:
            logging.info(
                "Log-likelihood estimates overflow occured at the following indices:"
            )
            for idx in indices:
                logging.info("At index %d. Details are: " % idx)
                logging.info("log-det Q: " + aggregators_Q[idx].job_name +
                             ". Result is %f" % log_dets_Q[idx])
                logging.info("log-det M: " + aggregators_M[idx].job_name +
                             ". Result is %f" % log_dets_M[idx])
                logging.info("log-lik-without-log-det: " +
                             aggregator_remainder.job_name +
                             ". Result is %f" % result_remainder[idx])

            logging.info("Removing mentioned estimates from list")
            estimates = estimates[abs(estimates) < limit]
            logging.info("New number of estimates is %d, old was %d" %
                         (len(estimates), self.num_estimates))

        logging.debug("Leaving")
        return estimates
Ejemplo n.º 51
0
def find_bursts(duration, dt, transient, N, M_t, M_i, max_freq):
    base = 2  #round lgbinwidth to nearest 2 so will always divide into durations
    expnum = 2.0264 * exp(-0.2656 * max_freq + 2.9288) + 5.7907
    lgbinwidth = (int(base * round(
        (-max_freq + 33) / base))) * ms  #23-good for higher freq stuff
    #lgbinwidth=(int(base*round((expnum)/base)))/1000   #use exptl based on some fit of choice binwidths
    #lgbinwidth=10*ms

    numlgbins = int(ceil(duration / lgbinwidth))
    #totspkhist=zeros((numlgbins,1))
    totspkhist = zeros(numlgbins)
    #totspkdist_smooth=zeros((numlgbins,1))
    skiptime = transient * ms
    skipbin = int(ceil(skiptime / lgbinwidth))

    inc_past_thresh = []
    dec_past_thresh = []

    #Create histogram given the bins calculated
    for i in xrange(numlgbins):
        step_start = (i) * lgbinwidth
        step_end = (i + 1) * lgbinwidth
        totspkhist[i] = len(M_i[logical_and(M_t > step_start, M_t < step_end)])

    ###smooth plot first so thresholds work better
    #totspkhist_1D=reshape(totspkhist,len(totspkhist))  #first just reshape so single row not single colm
    #b,a=butter(3,0.4,'low')
    #totspkhist_smooth=filtfilt(b,a,totspkhist_1D)

    #totspkhist_smooth=reshape(totspkhist,len(totspkhist))  #here we took out the actual smoothing and left it as raw distn. here just reshape so single row not single colm
    totspkdist_smooth = totspkhist / max(
        totspkhist[skipbin:]
    )  #create distn based on hist, but skip first skiptime to cut out transient excessive spiking

    #    ####### FOR MOVING THRESHOLD #################
    ## find points where increases and decreases over some threshold
    dist_thresh = []
    thresh_plot = []

    mul_fac = 0.35
    switch = 0  #keeps track of whether inc or dec last
    elim_noise = 1 / (max_freq * 2.5 * Hz)
    #For line 95, somehow not required in previous version?
    #elim_noise_units = 1/(max_freq*Hz*2.5)

    thresh_time = 5 / (max_freq)  #capture 5 cycles
    thresh_ind = int(floor(
        (thresh_time / lgbinwidth) /
        2))  #the number of indices on each side of the window

    #dist_thresh moves with window capturing approx 5 cycles (need special cases for borders) Find where increases and decreases past threshold (as long as a certain distance apart, based on "elim_noise" which is based on avg freq of bursts
    dist_thresh.append(
        totspkdist_smooth[skipbin:skipbin + thresh_ind].mean(0) +
        mul_fac * totspkdist_smooth[skipbin:skipbin + thresh_ind].std(0))

    for i in xrange(1, numlgbins):
        step_start = (i) * lgbinwidth
        step_end = (i + 1) * lgbinwidth

        #moving threshold
        if i > (skipbin +
                thresh_ind) and (i + thresh_ind) < len(totspkdist_smooth):
            #print(totspkdist_smooth[i-thresh_ind:i+thresh_ind])
            dist_thresh.append(
                totspkdist_smooth[i - thresh_ind:i + thresh_ind].mean(0) +
                mul_fac *
                totspkdist_smooth[i - thresh_ind:i + thresh_ind].std(0))
        elif (i + thresh_ind) >= len(totspkdist_smooth):
            dist_thresh.append(totspkdist_smooth[-thresh_ind:].mean(0) +
                               mul_fac *
                               totspkdist_smooth[-thresh_ind:].std(0))
        else:
            dist_thresh.append(
                totspkdist_smooth[skipbin:skipbin + thresh_ind].mean(0) +
                mul_fac *
                totspkdist_smooth[skipbin:skipbin + thresh_ind].std(0))

        if (totspkdist_smooth[i - 1] <
                dist_thresh[i]) and (totspkdist_smooth[i] >= dist_thresh[i]):
            #inc_past_thresh.append(step_start-0.5*lgbinwidth)
            if (inc_past_thresh):  #there has already been at least one inc,
                if (
                        abs(inc_past_thresh[-1] -
                            (step_start - 0.5 * lgbinwidth)) > elim_noise
                ) and switch == 0:  #must be at least x ms apart (yHz), and it was dec last..
                    inc_past_thresh.append(
                        step_start - 0.5 * lgbinwidth
                    )  #take lower point (therefore first) when increasing. Need to -0.5binwidth to adjust for shift between index of bin width and index of bin distn
                    #print (['incr=%f'%inc_past_thresh[-1]])
                    thresh_plot.append(dist_thresh[i])
                    switch = 1
            else:
                inc_past_thresh.append(
                    step_start - 0.5 * lgbinwidth
                )  #take lower point (therefore first) when increasing. Need to -0.5binwidth to adjust for shift between index of bin width and index of bin distn
                thresh_plot.append(dist_thresh[i])
                switch = 1  #keeps track of that it was inc. last
        elif (totspkdist_smooth[i - 1] >=
              dist_thresh[i]) and (totspkdist_smooth[i] < dist_thresh[i]):
            # dec_past_thresh.append(step_end-0.5*lgbinwidth)  #take lower point (therefore second) when decreasing
            if (dec_past_thresh):  #there has already been at least one dec
                if (
                        abs(dec_past_thresh[-1] -
                            (step_end - 0.5 * lgbinwidth)) > elim_noise
                ) and switch == 1:  #must be at least x ms apart (y Hz), and it was inc last
                    dec_past_thresh.append(
                        step_end - 0.5 * lgbinwidth
                    )  #take lower point (therefore second) when decreasing
                    #print (['decr=%f'%dec_past_thresh[-1]])
                    switch = 0
            else:
                dec_past_thresh.append(
                    step_end - 0.5 * lgbinwidth
                )  #take lower point (therefore second) when decreasing
                switch = 0  #keeps track of that it was dec last

    if totspkdist_smooth[0] < dist_thresh[
            0]:  #if you are starting below thresh, then pop first inc.  otherwise, don't (since will decrease first)
        if inc_past_thresh:  #if list is not empty
            inc_past_thresh.pop(0)
#

#####################################################################
#
######### TO DEFINE A STATIC THRESHOLD AND FIND CROSSING POINTS

#    dist_thresh=0.15 #static threshold
#    switch=0  #keeps track of whether inc or dec last
#    overall_freq=3.6 #0.9
#    elim_noise=1/(overall_freq*5)#2.5)
#
#
#    for i in xrange(1,numlgbins):
#        step_start=(i)*lgbinwidth
#        step_end=(i+1)*lgbinwidth
#
#        if (totspkdist_smooth[i-1]<dist_thresh) and (totspkdist_smooth[i]>=dist_thresh):   #if cross threshold (increasing)
#            if (inc_past_thresh):    #there has already been at least one inc,
#                if (abs(dec_past_thresh[-1]-(step_start-0.5*lgbinwidth))>elim_noise) and switch==0:   #must be at least x ms apart (yHz) from the previous dec, and it was dec last..
#                    inc_past_thresh.append(step_start-0.5*lgbinwidth)  #take lower point (therefore first) when increasing. Need to -0.5binwidth to adjust for shift between index of bin width and index of bin distn
#                    #print (['incr=%f'%inc_past_thresh[-1]])     #-0.5*lgbinwidth
#                    switch=1
#            else:
#                inc_past_thresh.append(step_start-0.5*lgbinwidth)  #take lower point (therefore first) when increasing. Need to -0.5binwidth to adjust for shift between index of bin width and index of bin distn
#                switch=1   #keeps track of that it was inc. last
#        elif (totspkdist_smooth[i-1]>=dist_thresh) and (totspkdist_smooth[i]<dist_thresh):
#            if (dec_past_thresh):    #there has already been at least one dec
#                if (abs(inc_past_thresh[-1]-(step_end-0.5*lgbinwidth))>elim_noise) and switch==1:    #must be at least x ms apart (y Hz) from the previous incr, and it was inc last
#                    dec_past_thresh.append(step_end-0.5*lgbinwidth)  #take lower point (therefore second) when decreasing
#                    #print (['decr=%f'%dec_past_thresh[-1]])
#                    switch=0
#            else:
#                dec_past_thresh.append(step_end-0.5*lgbinwidth)  #take lower point (therefore second) when decreasing
#                switch=0    #keeps track of that it was dec last
#
#
#    if totspkdist_smooth[0]<dist_thresh:   #if you are starting below thresh, then pop first inc.  otherwise, don't (since will decrease first)
#        if inc_past_thresh:  #if list is not empty
#            inc_past_thresh.pop(0)

################################################################
###############################################################

######## DEFINE INTER AND INTRA BURSTS ########

#since always start with dec, intraburst=time points from 1st inc:2nd dec, from 2nd inc:3rd dec, etc.
#interburst=time points from 1st dec:1st inc, from 2nd dec:2nd inc, etc.

    intraburst_time_ms_compound_list = []
    interburst_time_ms_compound_list = []
    intraburst_bins = []  #in seconds
    interburst_bins = []

    #print(inc_past_thresh)
    if len(inc_past_thresh) < len(dec_past_thresh):  #if you end on a decrease
        for i in xrange(len(inc_past_thresh)):
            intraburst_time_ms_compound_list.append(
                arange(inc_past_thresh[i] / ms, dec_past_thresh[i + 1] / ms,
                       1))  #10 is timestep
            interburst_time_ms_compound_list.append(
                arange((dec_past_thresh[i] + dt) / ms,
                       (inc_past_thresh[i] - dt) / ms, 1))  #10 is timestep
            intraburst_bins.append(inc_past_thresh[i])
            intraburst_bins.append(dec_past_thresh[i + 1])
            interburst_bins.append(dec_past_thresh[i])
            interburst_bins.append(inc_past_thresh[i])
    else:  #if you end on an increase
        for i in xrange(len(inc_past_thresh) - 1):
            intraburst_time_ms_compound_list.append(
                arange(inc_past_thresh[i] / ms, dec_past_thresh[i + 1] / ms,
                       1))  #10 is timestep
            interburst_time_ms_compound_list.append(
                arange((dec_past_thresh[i] + dt) / ms,
                       (inc_past_thresh[i] - dt) / ms, 1))  #10 is timestep
            intraburst_bins.append(inc_past_thresh[i])
            intraburst_bins.append(dec_past_thresh[i + 1])
            interburst_bins.append(dec_past_thresh[i] + dt)
            interburst_bins.append(inc_past_thresh[i] - dt)
        if dec_past_thresh and inc_past_thresh:  #if neither dec_past_thresh nor inc_past_thresh is empty
            interburst_bins.append(dec_past_thresh[-1] +
                                   dt)  #will have one more inter than intra
            interburst_bins.append(inc_past_thresh[-1] + dt)

    interburst_bins = interburst_bins / second
    intraburst_bins = intraburst_bins / second

    intraburst_time_ms = [
        num for elem in intraburst_time_ms_compound_list for num in elem
    ]  #flatten list
    interburst_time_ms = [
        num for elem in interburst_time_ms_compound_list for num in elem
    ]  #flatten list

    num_intraburst_bins = len(
        intraburst_bins
    ) / 2  #/2 since have both start and end points for each bin
    num_interburst_bins = len(interburst_bins) / 2

    intraburst_bins_ms = [x * 1000 for x in intraburst_bins]
    interburst_bins_ms = [x * 1000 for x in interburst_bins]

    ######################################
    #bin_s=[((inc_past_thresh-dec_past_thresh)/2+dec_past_thresh) for inc_past_thresh, dec_past_thresh in zip(inc_past_thresh,dec_past_thresh)]
    bin_s = [((x - y) / 2 + y)
             for x, y in zip(inc_past_thresh, dec_past_thresh)] / second

    binpt_ind = [int(floor(x / lgbinwidth)) for x in bin_s]

    ########## FIND PEAK TO TROUGH AND SAVE VALUES  ###################
    ########## CATEGORIZE BURSTING BASED ON PEAK TO TROUGH VALUES ###################
    ########## DISCARD BINPTS IF PEAK TO TROUGH IS TOO SMALL ###################

    peaks = []
    trough = []
    peak_to_trough_diff = []
    min_burst_size = 0.2  #defines a burst as 0.2 or larger.

    for i in xrange(len(binpt_ind) - 1):
        peaks.append(max(totspkdist_smooth[binpt_ind[i]:binpt_ind[i + 1]]))
        trough.append(min(totspkdist_smooth[binpt_ind[i]:binpt_ind[i + 1]]))

    peak_to_trough_diff = [
        max_dist - min_dist for max_dist, min_dist in zip(peaks, trough)
    ]

    #to delete all bins following any <min_burst_size
    first_ind_not_burst = next(
        (x[0] for x in enumerate(peak_to_trough_diff) if x[1] < 0.2), None)
    #    if first_ind_not_burst:
    #        del bin_s[first_ind_not_burst+1:]   #needs +1 since bin_s has one additional value (since counts edges)

    #to keep track of any bins <0.2 so can ignore in stats later
    all_ind_not_burst = [
        x[0] for x in enumerate(peak_to_trough_diff) if x[1] < 0.2
    ]  #defines a burst as 0.2 or larger.

    bin_ms = [x * 1000 for x in bin_s]
    binpt_ind = [int(floor(x / lgbinwidth)) for x in bin_s]

    #for moving threshold only
    thresh_plot = []
    thresh_plot = [dist_thresh[x] for x in binpt_ind]

    #for static threshold
    #thresh_plot=[dist_thresh]*len(bin_ms)
    #
    #
    #    bin_s=[((inc_past_thresh-dec_past_thresh)/2+dec_past_thresh) for inc_past_thresh, dec_past_thresh in zip(inc_past_thresh,dec_past_thresh)]
    #    bin_ms=[x*1000 for x in bin_s]
    #    thresh_plot=[]
    #    binpt_ind=[int(floor(x/lgbinwidth)) for x in bin_s]
    #    thresh_plot=[dist_thresh[x] for x in binpt_ind]
    #
    binpts = xrange(int(lgbinwidth * 1000 / 2),
                    int(numlgbins * lgbinwidth * 1000), int(lgbinwidth * 1000))
    totspkhist_list = totspkhist.tolist(
    )  #[val for subl in totspkhist for val in subl]

    #find first index after transient to see if have enough bins to do stats
    bin_ind_no_trans = bisect.bisect(bin_ms, transient)
    intrabin_ind_no_trans = bisect.bisect(intraburst_bins, transient /
                                          1000)  #transient to seconds
    if intrabin_ind_no_trans % 2 != 0:  #index must be even since format is ind0=start_bin, ind1=end_bin, ind2=start_bin, .... .
        intrabin_ind_no_trans += 1
    interbin_ind_no_trans = bisect.bisect(interburst_bins, transient / 1000)
    if interbin_ind_no_trans % 2 != 0:
        interbin_ind_no_trans += 1

    return [
        bin_s, bin_ms, binpts, totspkhist, totspkdist_smooth, dist_thresh,
        totspkhist_list, thresh_plot, binpt_ind, lgbinwidth, numlgbins,
        intraburst_bins, interburst_bins, intraburst_bins_ms,
        interburst_bins_ms, intraburst_time_ms, interburst_time_ms,
        num_intraburst_bins, num_interburst_bins, bin_ind_no_trans,
        intrabin_ind_no_trans, interbin_ind_no_trans
    ]
Ejemplo n.º 52
0
def incomplete_cholesky(X, kernel, eta, power=1, blocksize=100):
    """
    Computes the incomplete Cholesky factorisation of the kernel matrix defined
    by samples X and a given kernel. The kernel is evaluated on-the-fly.
    The optional power parameter is used to multiply the kernel output with
    itself.
    
    Original code from "Kernel Methods for Pattern Analysis" by Shawe-Taylor and
    Cristianini.
    Modified to compute kernel on the fly, to use kernels multiplied with 
    themselves (tensor product), and optimised speed via using vector
    operations and not pre-allocate full kernel matrix memory, but rather
    allocate memory of low-rank kernel block-wise
    Changes by Heiko Strathmann
    
    parameters:
    X         - list of input vectors to evaluate kernel on
    kernel    - a kernel object with a kernel method that takes 2d-arrays
                and returns a psd kernel matrix
    eta       - precision cutoff parameter for the low-rank approximation.
                Lies is (0,1) where smaller means more accurate.
    power     - every kernel evaluation is multiplied with itself this number
                of times. Zero is supported
    blocksize - tuning parameter for speed, determines how rows elements are
                allocated in a block for the (growing) kernel matrix. Larger
                means faster algorithm (to some extend if low rank dimension
                is larger than blocksize)
    
    output:
    K_chol, ell, I, R, W, where
    K    - is the kernel using only the pivot index features
    I    - is a list containing the pivots used to compute K_chol
    R    - is a low-rank factor such that R.T.dot(R) approximates the
           original K
    W    - is a matrix such that W.T.dot(K_chol.dot(W)) approximates the
           original K
    
    """
    assert(eta>0 and eta<1)
    assert(power>=0)
    assert(blocksize>=0)
    assert(len(X)>=0)
    
    m=len(X)

    # growing low rank basis
    R=zeros((blocksize,m))
    
    # diagonal (assumed to be one)
    d=ones(m)
    
    # used indices
    I=[]
    nu=[]
    
    # algorithm is executed as long as a is bigger than eta precision
    a=d.max()
    I.append(d.argmax())
    
    # growing set of evaluated kernel values
    K=zeros((blocksize,m))
    
    j=0
    while a>eta:
        nu.append(sqrt(a))
        
        if power>=1:
            K[j,:]=kernel.kernel([X[I[j]]], X)**power
        else:
            K[j,:]=ones(m)
            
        if j==0:
            R_dot_j=0
        elif j==1:
            R_dot_j=R[:j,:]*R[:j,I[j]]
        else:
            R_dot_j=R[:j,:].T.dot(R[:j,I[j]])
                        
        R[j,:]=(K[j,:] - R_dot_j)/nu[j]
        d=d-R[j,:]**2
        a=d.max()
        I.append(d.argmax())
        j=j+1
        
        # allocate more space for kernel
        if j>=len(K):
            K=vstack((K, zeros((blocksize,m))))
            R=vstack((R, zeros((blocksize,m))))
            
    # remove un-used rows which were located unnecessarily
    K=K[:j,:]
    R=R[:j,:]

    # remove list pivot index since it is not used
    I=I[:-1]
    
    # from low rank to full rank
    W=solve(R[:,I], R)
    
    # low rank K
    K_chol=K[:,I]
    
    return K_chol, I, R, W
Ejemplo n.º 53
0
    # prior on theta and posterior target estimate
    theta_prior=Gaussian(mu=0*ones(dim), Sigma=eye(dim)*5)
    target=PseudoMarginalHyperparameterDistribution(data, labels, \
                                                    n_importance=100, prior=theta_prior, \
                                                    ridge=1e-3)
    
    # create sampler
    burnin=10000
    num_iterations=burnin+300000
    kernel = GaussianKernel(sigma=23.0)
    sampler=KameleonWindowLearnScale(target, kernel, stop_adapt=burnin)
#    sampler=AdaptiveMetropolisLearnScale(target)
#    sampler=StandardMetropolis(target)
    
    # posterior mode derived by initial tests
    start=zeros(target.dimension)
    params = MCMCParams(start=start, num_iterations=num_iterations, burnin=burnin)
    
    # create MCMC chain
    chain=MCMCChain(sampler, params)
    chain.append_mcmc_output(StatisticsOutput(print_from=0, lag=100))
    #chain.append_mcmc_output(PlottingOutput(plot_from=0, lag=500))
    
    # create experiment instance to store results
    experiment_dir = str(os.path.abspath(sys.argv[0])).split(os.sep)[-1].split(".")[0] + os.sep
    experiment = SingleChainExperiment(chain, experiment_dir)
    
    experiment.run()
    sigma=GaussianKernel.get_sigma_median_heuristic(experiment.mcmc_chain.samples.T)
    print "median kernel width", sigma
Ejemplo n.º 54
0
        os.sep)[-1].split(".")[0] + os.sep

    distribution = Banana(dimension=8, bananicity=0.03, V=100)
    sigma = GaussianKernel.get_sigma_median_heuristic(
        distribution.sample(1000).samples)
    sigma = 10
    print "using sigma", sigma
    kernel = GaussianKernel(sigma=sigma)

    burnin = 20000
    num_iterations = 40000

    mcmc_sampler = KameleonWindowLearnScale(distribution,
                                            kernel,
                                            stop_adapt=burnin)
    mean_est = zeros(distribution.dimension, dtype="float64")
    cov_est = 1.0 * eye(distribution.dimension)
    cov_est[0, 0] = distribution.V
    #mcmc_sampler = AdaptiveMetropolisLearnScale(distribution, mean_est=mean_est, cov_est=cov_est)
    #mcmc_sampler = AdaptiveMetropolis(distribution, mean_est=mean_est, cov_est=cov_est)
    #mcmc_sampler = StandardMetropolis(distribution)

    start = zeros(distribution.dimension, dtype="float64")
    mcmc_params = MCMCParams(start=start,
                             num_iterations=num_iterations,
                             burnin=burnin)

    mcmc_chain = MCMCChain(mcmc_sampler, mcmc_params)
    mcmc_chain.append_mcmc_output(StatisticsOutput())

    experiment = SingleChainExperiment(mcmc_chain, experiment_dir)
Ejemplo n.º 55
0
    # prior on theta and posterior target estimate
    theta_prior=Gaussian(mu=0*ones(dim), Sigma=eye(dim)*5)
    target=PseudoMarginalHyperparameterDistribution(data, labels, \
                                                    n_importance=500, prior=theta_prior, \
                                                    ridge=1e-3)
    
    # create sampler
    burnin=5000
    num_iterations=burnin+50000
    kernel = GaussianKernel(sigma=8.0)
    sampler=KameleonWindowLearnScale(target, kernel, stop_adapt=burnin)
#    sampler=AdaptiveMetropolisLearnScale(target)
#    sampler=StandardMetropolis(target)
    
    # posterior mode derived by initial tests
    start=zeros(dim)
    params = MCMCParams(start=start, num_iterations=num_iterations, burnin=burnin)
    
    # create MCMC chain
    chain=MCMCChain(sampler, params)
    chain.append_mcmc_output(StatisticsOutput(print_from=0, lag=100))
#    chain.append_mcmc_output(PlottingOutput(plot_from=0, lag=500))
    
    # create experiment instance to store results
    experiment_dir = str(os.path.abspath(sys.argv[0])).split(os.sep)[-1].split(".")[0] + os.sep
    experiment = SingleChainExperiment(chain, experiment_dir)
    
    experiment.run()
    
    sigma=GaussianKernel.get_sigma_median_heuristic(experiment.mcmc_chain.samples.T)
    print "median kernel width", sigma