Example #1
0
def construct_posterior(N, conjugate_parameters, prior_points, partition, tables):
    posterior = Posterior(partition, prior_points)
    alphas, betas = conjugate_parameters
    alphas = numpy.array([alphas])
    betas = numpy.array([betas])
    beta_table, alpha_table = tables
    t = numpy.dot(numpy.array([1.]*(N-1)), (alphas.transpose() * alpha_table + betas.transpose() * beta_table))
    t = lognormalize(t)
    posterior.update(t)
    return posterior
Example #2
0
    def multinomial(cls,
                    inputs,
                    outputs,
                    buckets=None,
                    frequencies=None,
                    zero=0.0):
        ''' Create a Naive Bayes Classifier with a Multinomial A Priori distribution
        '''

        print "Training Naive Bayes Classifier..."
        start = time.time()

        # Determine Priori and Posterior lambda functions:
        posterior = Posterior.immutable(inputs, outputs, zero=zero)
        priori = Priori.multinomial(outputs,
                                    frequencies=frequencies,
                                    zero=zero)

        print "Finished Training Naive Bayes Classifier (%.2fs)" % (
            time.time() - start)

        # Determine the classifier's classes:
        buckets = set(outputs) if buckets is None else buckets

        # Create & Return classifier
        return cls(priori, posterior, buckets)
    def inference(self, kern, X, likelihood, Y, Y_metadata=None):
        """
        Returns a Posterior class containing essential quantities of the posterior
        """
        YYT_factor = self.get_YYTfactor(Y)

        K = kern.K(X)

        Ky = K.copy()
        diag.add(Ky, likelihood.gaussian_variance(Y_metadata))
        Wi, LW, LWi, W_logdet = pdinv(Ky)

        alpha, _ = dpotrs(LW, YYT_factor, lower=1)

        log_marginal = 0.5 * (-Y.size * log_2_pi - Y.shape[1] * W_logdet -
                              np.sum(alpha * YYT_factor))

        dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi)

        dL_dthetaL = likelihood.exact_inference_gradients(
            np.diag(dL_dK), Y_metadata)

        return Posterior(woodbury_chol=LW, woodbury_vector=alpha,
                         K=K), log_marginal, {
                             'dL_dK': dL_dK,
                             'dL_dthetaL': dL_dthetaL
                         }
Example #4
0
def nllcost(gp, Xt, Yt, log_sf2, log_rsn, log_W, Ht):
    sf2 = numpy.exp(log_sf2)
    noise = numpy.exp(log_sf2 + log_rsn)
    W = numpy.exp(log_W)
    if not numpy.isfinite(numpy.r_[sf2, noise, W]).all():
        return numpy.inf

    kernel = gp.KernelFun(sf2, W)
    Ktt = kernel(Xt, Xt)
    if not numpy.isfinite(Ktt).all():
        return numpy.inf

    post = Posterior(Ktt, Yt, noise, Ht)
    gp.post = post

    return gp._loolik(Ktt, Yt, Ht)
Example #5
0
    def inference(self, kern, X, likelihood, Y, Y_metadata=None, Z=None):
        num_data, output_dim = Y.shape
        assert output_dim ==1, "ep in 1D only (for now!)"

        K = kern.K(X)

        if self._ep_approximation is None:
            mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
        else:
            mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation

        Wi, LW, LWi, W_logdet = pdinv(K + np.diag(1./tau_tilde))

        alpha, _ = dpotrs(LW, mu_tilde, lower=1)

        log_marginal =  0.5*(-num_data * log_2_pi - W_logdet - np.sum(alpha * mu_tilde)) # TODO: add log Z_hat??

        dL_dK = 0.5 * (tdot(alpha[:,None]) - Wi)

        dL_dthetaL = np.zeros(likelihood.size)#TODO: derivatives of the likelihood parameters

        return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
Example #6
0
def optimize(gp, Xt, Yt, HPini=None, nelderMeadIters=50):
    if gp.KernelFun is None:
        gp.KernelFun = KernelSE
    KernelFun = gp.KernelFun
    if HPini is None:
        HPiniK = KernelFun.defaultHP(Xt)
        HPini = numpy.log(numpy.r_[Yt.var(), 1e-2, HPiniK])
    if gp.Basis is None:
        gp.Basis = BasisQuad()
    Basis = gp.Basis

    if Basis is not None:
        Ht = Basis(Xt)
    else:
        Ht = None

    f = lambda hp: nllcost(gp, Xt, Yt, hp[0], hp[1], hp[2:], Ht)

    # Starts with few Nelder-Mead iterations
    res = minimize(f,
                   HPini,
                   method='Nelder-Mead',
                   options={
                       'maxiter': nelderMeadIters,
                       'disp': False
                   })
    hpopt = res.x
    # Search
    res = minimize(f, hpopt, method='SLSQP', options={
        'disp': False,
    })
    HP = res.x
    kernel = KernelFun(numpy.exp(HP[0]), numpy.exp(HP[2:]))
    noise = numpy.exp(HP[0] + HP[1])
    Ktt = kernel(Xt, Xt)
    post = Posterior(Ktt, Yt, noise, Ht)
    return post, kernel, HP, noise, res.fun
Example #7
0
    def inference(self, kern, X, likelihood, Y, Y_metadata=None):
        """
        Returns a Posterior class containing essential quantities of the posterior
        """

        # Compute K
        K = kern.K(X)

        #Find mode
        if self.bad_fhat or self.first_run:
            Ki_f_init = np.zeros_like(Y)
            first_run = False
        else:
            Ki_f_init = self._previous_Ki_fhat

        f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
        self.f_hat = f_hat
        self.Ki_fhat =  Ki_fhat
        self.K = K.copy()
        #Compute hessian and other variables at mode
        log_marginal, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata)

        self._previous_Ki_fhat = Ki_fhat.copy()
        return Posterior(woodbury_vector=Ki_fhat, woodbury_inv=woodbury_inv, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
Example #8
0
meshwidth = 1. / 32.
variance = 1e-4
fem_ip = FEMInverseProblem(num_true_inputs, eval_pts, meshwidth, variance)
#print(fem_ip.locations)
#print(fem_ip.observations)
#print(fem_ip.true_observations)
#print(fem_ip.variance)

mean_fct = ZeroMean()
cov_fct = MaternCov(2.5)
gp = GaussianProcess(mean_fct, cov_fct)

num_design_pts = 25
design_ptset = Mesh1d.construct(num_design_pts)

posterior = Posterior(fem_ip)
approx_post = ApproximatePosterior(posterior, gp)

#print(approx_post.potential)
approx_post.approximate_likelihood(design_ptset)
#print(approx_post.potential)

#gp_data = ApproxDataPotential(design_ptset, 1, posterior)
#cond_gp = ConditionedGaussianProcess(gp, gp_data)
#print(gp_data.locations,, gp_data.observations)

import matplotlib.pyplot as plt

# gp_v = GPVisual(approx_post.cond_gp)
# gp_v.addplot_mean()
# gp_v.addplot_deviation()
Example #9
0
    def inference_likelihood(self, kern, X, Z, likelihood, Y):
        """
        The first phase of inference:
        Compute: log-likelihood, dL_dKmm

        Cached intermediate results: Kmm, KmmInv,
        """

        num_data, output_dim = Y.shape
        input_dim = Z.shape[0]
        if self.mpi_comm != None:
            num_data_all = np.array(num_data, dtype=np.int32)
            self.mpi_comm.Allreduce([np.int32(num_data), MPI.INT],
                                    [num_data_all, MPI.INT])
            num_data = num_data_all

        if isinstance(X, VariationalPosterior):
            uncertain_inputs = True
        else:
            uncertain_inputs = False

        #see whether we've got a different noise variance for each datum
        beta = 1. / np.fmax(likelihood.variance, 1e-6)
        het_noise = beta.size > 1
        if het_noise:
            self.batchsize = 1

        psi0_full, psi1Y_full, psi2_full, YRY_full = self.gatherPsiStat(
            kern, X, Z, Y, beta, uncertain_inputs)

        #======================================================================
        # Compute Common Components
        #======================================================================

        Kmm = kern.K(Z).copy()
        diag.add(Kmm, self.const_jitter)
        Lm = jitchol(Kmm, maxtries=100)

        LmInvPsi2LmInvT = backsub_both_sides(Lm, psi2_full, transpose='right')
        Lambda = np.eye(Kmm.shape[0]) + LmInvPsi2LmInvT
        LL = jitchol(Lambda, maxtries=100)
        logdet_L = 2. * np.sum(np.log(np.diag(LL)))
        b = dtrtrs(LL, dtrtrs(Lm, psi1Y_full.T)[0])[0]
        bbt = np.square(b).sum()
        v = dtrtrs(Lm, dtrtrs(LL, b, trans=1)[0], trans=1)[0]

        tmp = -backsub_both_sides(
            LL, tdot(b) + output_dim * np.eye(input_dim), transpose='left')
        dL_dpsi2R = backsub_both_sides(
            Lm, tmp + output_dim * np.eye(input_dim), transpose='left') / 2.

        # Cache intermediate results
        self.midRes['dL_dpsi2R'] = dL_dpsi2R
        self.midRes['v'] = v

        #======================================================================
        # Compute log-likelihood
        #======================================================================
        if het_noise:
            logL_R = -np.log(beta).sum()
        else:
            logL_R = -num_data * np.log(beta)
        logL = -(output_dim * (num_data * log_2_pi + logL_R + psi0_full -
                               np.trace(LmInvPsi2LmInvT)) + YRY_full -
                 bbt) / 2. - output_dim * logdet_L / 2.

        #======================================================================
        # Compute dL_dKmm
        #======================================================================

        dL_dKmm = dL_dpsi2R - output_dim * backsub_both_sides(
            Lm, LmInvPsi2LmInvT, transpose='left') / 2.

        #======================================================================
        # Compute the Posterior distribution of inducing points p(u|Y)
        #======================================================================

        if not self.Y_speedup or het_noise:
            wd_inv = backsub_both_sides(
                Lm,
                np.eye(input_dim) - backsub_both_sides(
                    LL, np.identity(input_dim), transpose='left'),
                transpose='left')
            post = Posterior(woodbury_inv=wd_inv,
                             woodbury_vector=v,
                             K=Kmm,
                             mean=None,
                             cov=None,
                             K_chol=Lm)
        else:
            post = None

        #======================================================================
        # Compute dL_dthetaL for uncertian input and non-heter noise
        #======================================================================

        if not het_noise:
            dL_dthetaL = (YRY_full * beta + beta * output_dim * psi0_full -
                          num_data * output_dim * beta) / 2. - beta * (
                              dL_dpsi2R * psi2_full).sum() - beta * (
                                  v.T * psi1Y_full).sum()
            self.midRes['dL_dthetaL'] = dL_dthetaL

        return logL, dL_dKmm, post
Example #10
0
    def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None, dL_dKmm=None):

        _, output_dim = Y.shape
        uncertain_inputs = isinstance(X, VariationalPosterior)

        #see whether we've got a different noise variance for each datum
        beta = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6)
        # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
        #self.YYTfactor = self.get_YYTfactor(Y)
        #VVT_factor = self.get_VVTfactor(self.YYTfactor, beta)
        het_noise = beta.size > 1
        if beta.ndim == 1:
            beta = beta[:, None]
        VVT_factor = beta*Y
        #VVT_factor = beta*Y
        trYYT = self.get_trYYT(Y)

        # do the inference:
        num_inducing = Z.shape[0]
        num_data = Y.shape[0]
        # kernel computations, using BGPLVM notation

        Kmm = kern.K(Z).copy()
        diag.add(Kmm, self.const_jitter)
        if Lm is None:
            Lm = jitchol(Kmm)

        # The rather complex computations of A, and the psi stats
        if uncertain_inputs:
            psi0 = kern.psi0(Z, X)
            psi1 = kern.psi1(Z, X)
            if het_noise:
                psi2_beta = np.sum([kern.psi2(Z,X[i:i+1,:]) * beta_i for i,beta_i in enumerate(beta)],0)
            else:
                psi2_beta = kern.psi2(Z,X) * beta
            LmInv = dtrtri(Lm)
            A = LmInv.dot(psi2_beta.dot(LmInv.T))
        else:
            psi0 = kern.Kdiag(X)
            psi1 = kern.K(X, Z)
            if het_noise:
                tmp = psi1 * (np.sqrt(beta))
            else:
                tmp = psi1 * (np.sqrt(beta))
            tmp, _ = dtrtrs(Lm, tmp.T, lower=1)
            A = tdot(tmp) #print A.sum()

        # factor B
        B = np.eye(num_inducing) + A
        LB = jitchol(B)
        psi1Vf = np.dot(psi1.T, VVT_factor)
        # back substutue C into psi1Vf
        tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0)
        _LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0)
        tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
        Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1)

        # data fit and derivative of L w.r.t. Kmm
        delit = tdot(_LBi_Lmi_psi1Vf)
        data_fit = np.trace(delit)
        DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit)
        if dL_dKmm is None:
            delit = -0.5 * DBi_plus_BiPBi
            delit += -0.5 * B * output_dim
            delit += output_dim * np.eye(num_inducing)
            # Compute dL_dKmm
            dL_dKmm = backsub_both_sides(Lm, delit)

        # derivatives of L w.r.t. psi
        dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm,
            VVT_factor, Cpsi1Vf, DBi_plus_BiPBi,
            psi1, het_noise, uncertain_inputs)

        # log marginal likelihood
        log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise,
            psi0, A, LB, trYYT, data_fit, Y)

        #noise derivatives
        dL_dR = _compute_dL_dR(likelihood,
            het_noise, uncertain_inputs, LB,
            _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
            psi0, psi1, beta,
            data_fit, num_data, output_dim, trYYT, Y, VVT_factor)

        dL_dthetaL = likelihood.exact_inference_gradients(dL_dR,Y_metadata)

        #put the gradients in the right places
        if uncertain_inputs:
            grad_dict = {'dL_dKmm': dL_dKmm,
                         'dL_dpsi0':dL_dpsi0,
                         'dL_dpsi1':dL_dpsi1,
                         'dL_dpsi2':dL_dpsi2,
                         'dL_dthetaL':dL_dthetaL}
        else:
            grad_dict = {'dL_dKmm': dL_dKmm,
                         'dL_dKdiag':dL_dpsi0,
                         'dL_dKnm':dL_dpsi1,
                         'dL_dthetaL':dL_dthetaL}

        #get sufficient things for posterior prediction
        #TODO: do we really want to do this in  the loop?
        if VVT_factor.shape[1] == Y.shape[1]:
            woodbury_vector = Cpsi1Vf # == Cpsi1V
        else:
            print 'foobar'
            import ipdb; ipdb.set_trace()
            psi1V = np.dot(Y.T*beta, psi1).T
            tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
            tmp, _ = dpotrs(LB, tmp, lower=1)
            woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
        Bi, _ = dpotri(LB, lower=1)
        symmetrify(Bi)
        Bi = -dpotri(LB, lower=1)[0]
        diag.add(Bi, 1)

        woodbury_inv = backsub_both_sides(Lm, Bi)

        #construct a posterior object
        post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
        return post, log_marginal, grad_dict
Example #11
0
    def inference(self, kern, X, X_variance, Z, likelihood, Y, Y_metadata):
        assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."

        num_inducing, _ = Z.shape
        num_data, output_dim = Y.shape

        #make sure the noise is not hetero
        beta = 1./likelihood.gaussian_variance(Y_metadata)
        if beta.size > 1:
            raise NotImplementedError, "no hetero noise with this implementation of DTC"

        Kmm = kern.K(Z)
        Knn = kern.Kdiag(X)
        Knm = kern.K(X, Z)
        U = Knm
        Uy = np.dot(U.T,Y)

        #factor Kmm
        Kmmi, L, Li, _ = pdinv(Kmm)

        # Compute A
        LiUTbeta = np.dot(Li, U.T)*np.sqrt(beta)
        A_ = tdot(LiUTbeta)
        trace_term = -0.5*(np.sum(Knn)*beta - np.trace(A_))
        A = A_ + np.eye(num_inducing)

        # factor A
        LA = jitchol(A)

        # back substutue to get b, P, v
        tmp, _ = dtrtrs(L, Uy, lower=1)
        b, _ = dtrtrs(LA, tmp*beta, lower=1)
        tmp, _ = dtrtrs(LA, b, lower=1, trans=1)
        v, _ = dtrtrs(L, tmp, lower=1, trans=1)
        tmp, _ = dtrtrs(LA, Li, lower=1, trans=0)
        P = tdot(tmp.T)
        stop

        #compute log marginal
        log_marginal = -0.5*num_data*output_dim*np.log(2*np.pi) + \
                       -np.sum(np.log(np.diag(LA)))*output_dim + \
                       0.5*num_data*output_dim*np.log(beta) + \
                       -0.5*beta*np.sum(np.square(Y)) + \
                       0.5*np.sum(np.square(b)) + \
                       trace_term

        # Compute dL_dKmm
        vvT_P = tdot(v.reshape(-1,1)) + P
        LAL = Li.T.dot(A).dot(Li)
        dL_dK = Kmmi - 0.5*(vvT_P + LAL)

        # Compute dL_dU
        vY = np.dot(v.reshape(-1,1),Y.T)
        #dL_dU = vY - np.dot(vvT_P, U.T)
        dL_dU = vY - np.dot(vvT_P - Kmmi, U.T)
        dL_dU *= beta

        #compute dL_dR
        Uv = np.dot(U, v)
        dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - 1./beta + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1) )*beta**2
        dL_dR -=beta*trace_term/num_data

        dL_dthetaL = likelihood.exact_inference_gradients(dL_dR)
        grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn) + -0.5*beta, 'dL_dKnm':dL_dU.T, 'dL_dthetaL':dL_dthetaL}

        #construct a posterior object
        post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)


        return post, log_marginal, grad_dict
Example #12
0
    def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
        num_data, output_dim = Y.shape
        assert output_dim == 1, "ep in 1D only (for now!)"

        Kmm = kern.K(Z)
        Kmn = kern.K(Z, X)

        if self._ep_approximation is None:
            mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(
                Kmm, Kmn, Y, likelihood, Y_metadata)
        else:
            mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation

        if isinstance(X, VariationalPosterior):
            uncertain_inputs = True
            psi0 = kern.psi0(Z, X)
            psi1 = Kmn.T  #kern.psi1(Z, X)
            psi2 = kern.psi2(Z, X)
        else:
            uncertain_inputs = False
            psi0 = kern.Kdiag(X)
            psi1 = Kmn.T  #kern.K(X, Z)
            psi2 = None

        #see whether we're using variational uncertain inputs

        _, output_dim = Y.shape

        #see whether we've got a different noise variance for each datum
        #beta = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6)
        beta = tau_tilde
        VVT_factor = beta[:, None] * mu_tilde[:, None]
        trYYT = self.get_trYYT(mu_tilde[:, None])

        # do the inference:
        het_noise = beta.size > 1
        num_inducing = Z.shape[0]
        num_data = Y.shape[0]
        # kernel computations, using BGPLVM notation

        Kmm = kern.K(Z).copy()
        diag.add(Kmm, self.const_jitter)
        Lm = jitchol(Kmm)

        # The rather complex computations of A
        if uncertain_inputs:
            if het_noise:
                psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1,
                                                           1)).sum(0)
            else:
                psi2_beta = psi2.sum(0) * beta
            LmInv = dtrtri(Lm)
            A = LmInv.dot(psi2_beta.dot(LmInv.T))
        else:
            if het_noise:
                tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
            else:
                tmp = psi1 * (np.sqrt(beta))
            tmp, _ = dtrtrs(Lm, tmp.T, lower=1)
            A = tdot(tmp)  #print A.sum()

        # factor B
        B = np.eye(num_inducing) + A
        LB = jitchol(B)
        psi1Vf = np.dot(psi1.T, VVT_factor)
        # back substutue C into psi1Vf
        tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0)
        _LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0)
        tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
        Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1)

        # data fit and derivative of L w.r.t. Kmm
        delit = tdot(_LBi_Lmi_psi1Vf)
        data_fit = np.trace(delit)
        DBi_plus_BiPBi = backsub_both_sides(
            LB,
            output_dim * np.eye(num_inducing) + delit)
        delit = -0.5 * DBi_plus_BiPBi
        delit += -0.5 * B * output_dim
        delit += output_dim * np.eye(num_inducing)
        # Compute dL_dKmm
        dL_dKmm = backsub_both_sides(Lm, delit)

        # derivatives of L w.r.t. psi
        dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(
            num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf,
            DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs)

        # log marginal likelihood
        log_marginal = _compute_log_marginal_likelihood(
            likelihood, num_data, output_dim, beta, het_noise, psi0, A, LB,
            trYYT, data_fit, VVT_factor)

        #put the gradients in the right places
        dL_dR = _compute_dL_dR(likelihood, het_noise, uncertain_inputs, LB,
                               _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0,
                               psi1, beta, data_fit, num_data, output_dim,
                               trYYT, mu_tilde[:, None])

        dL_dthetaL = 0  #likelihood.exact_inference_gradients(dL_dR,Y_metadata)

        if uncertain_inputs:
            grad_dict = {
                'dL_dKmm': dL_dKmm,
                'dL_dpsi0': dL_dpsi0,
                'dL_dpsi1': dL_dpsi1,
                'dL_dpsi2': dL_dpsi2,
                'dL_dthetaL': dL_dthetaL
            }
        else:
            grad_dict = {
                'dL_dKmm': dL_dKmm,
                'dL_dKdiag': dL_dpsi0,
                'dL_dKnm': dL_dpsi1,
                'dL_dthetaL': dL_dthetaL
            }

        #get sufficient things for posterior prediction
        #TODO: do we really want to do this in  the loop?
        if VVT_factor.shape[1] == Y.shape[1]:
            woodbury_vector = Cpsi1Vf  # == Cpsi1V
        else:
            print 'foobar'
            psi1V = np.dot(mu_tilde[:, None].T * beta, psi1).T
            tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
            tmp, _ = dpotrs(LB, tmp, lower=1)
            woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
        Bi, _ = dpotri(LB, lower=1)
        symmetrify(Bi)
        Bi = -dpotri(LB, lower=1)[0]
        diag.add(Bi, 1)

        woodbury_inv = backsub_both_sides(Lm, Bi)

        #construct a posterior object
        post = Posterior(woodbury_inv=woodbury_inv,
                         woodbury_vector=woodbury_vector,
                         K=Kmm,
                         mean=None,
                         cov=None,
                         K_chol=Lm)
        return post, log_marginal, grad_dict
Example #13
0
def cross_validation(sequences, training_method, decoder):
    """
    Performs the 10-fold cross-validation
    Requieres an array of dict sequences
    Requires the training function
    Requires a decoder objetct (Viterbi or Posterior)
    """
    # here we store the total_ac for each cross-validation
    vit_total_ac = np.array([.0] * len(sequences))
    post_total_ac = np.array([.0] * len(sequences))
    vit = Viterbi()
    post = Posterior()

    for i in range(len(sequences)):
        vit_total_scores = np.zeros([4])
        post_total_scores = np.zeros([4])
        # arrays with the sequences for training and for validation
        training_data_array = sequences[:]
        validation_data_array = [training_data_array.pop(i)]

        # merging the arrays into dictionaries
        training_data = merge(training_data_array)
        validation_data = merge(validation_data_array)
        # the training function returns a model
        model = training_method(training_data)

        #do viterbi prediction on set i
        for key, sequence in validation_data.items():
            # the sequence from the file
            true_seq = sequence['Z']
            # the sequence decoded using viterbi, or posterior and the model generated
            vit_pred_seq = vit.decode(model, sequence['X'])
            post_pred_seq = post.decode(model, sequence['X'])
            """
            print key
            print "PREDICTED"
            print pred_seq
            print "TRUE"
            print true_seq
            """
            tp, fp, tn, fn = compare_tm_pred.count(true_seq, vit_pred_seq)

            vit_total_scores += np.array([tp, fp, tn, fn])

            tp, fp, tn, fn = compare_tm_pred.count(true_seq, post_pred_seq)

            post_total_scores += np.array([tp, fp, tn, fn])
            if VERBOSE:
                print ">" + key
                compare_tm_pred.print_stats(tp, fp, tn, fn)
                print

        vit_total_ac[i] = compare_tm_pred.compute_stats(*vit_total_scores)[3]
        post_total_ac[i] = compare_tm_pred.compute_stats(*post_total_scores)[3]
        #print total_ac
        if VERBOSE:
            print "Summary 10-fold cross validation over index %i :" % (i)
            #  compare_tm_pred.print_stats( *total_scores  )
            print
            print
            print
            print "-------------------------------------------------------"
            if DEBUG:
                raw_input("press any key to continue\n")

    print "Overall viterbi result mean: %s, variance: %s" % (
        np.mean(vit_total_ac), np.var(vit_total_ac))
    print "Posterior mean: %s, variance %s" % (np.mean(post_total_ac),
                                               np.var(post_total_ac))
Example #14
0
 def fit(self, X, Y):
     if self.kernel is None or self.Basis is None:
         raise RuntimeError('you should call autoFit before')
     K = self.kernel(X, X)
     H = self.Basis(X)
     self.post = Posterior(K, Y, self.noise, H)
Example #15
0
def cross_validation(sequences, training_method, decoder):
    """
    Performs the 10-fold cross-validation
    Requieres an array of dict sequences
    Requires the training function
    Requires a decoder objetct (Viterbi or Posterior)
    """
    # here we store the total_ac for each cross-validation
    vit_total_ac = np.array([.0] * len(sequences))
    post_total_ac = np.array([.0] * len(sequences))
    vit = Viterbi()
    post = Posterior()
    

    for i in range(len(sequences)):
        vit_total_scores = np.zeros([4])
        post_total_scores = np.zeros([4])
        # arrays with the sequences for training and for validation
        training_data_array = sequences[:]
        validation_data_array = [ training_data_array.pop(i) ]

        # merging the arrays into dictionaries
        training_data = merge(training_data_array)
        validation_data = merge(validation_data_array)
        # the training function returns a model
        model = training_method(training_data)

        #do viterbi prediction on set i
        for key, sequence in validation_data.items():
            # the sequence from the file
            true_seq = sequence['Z']
            # the sequence decoded using viterbi, or posterior and the model generated
            vit_pred_seq = vit.decode(model, sequence['X'])
            post_pred_seq = post.decode(model, sequence['X'])
            """
            print key
            print "PREDICTED"
            print pred_seq
            print "TRUE"
            print true_seq
            """
            tp, fp, tn, fn = compare_tm_pred.count(true_seq, vit_pred_seq)

            vit_total_scores += np.array([tp, fp, tn, fn])
            
            tp, fp, tn, fn = compare_tm_pred.count(true_seq, post_pred_seq)

            post_total_scores += np.array([tp, fp, tn, fn])
            if VERBOSE:
                print ">" + key
                compare_tm_pred.print_stats(tp, fp, tn, fn)
                print

        vit_total_ac[i] = compare_tm_pred.compute_stats(*vit_total_scores)[3]
        post_total_ac[i] = compare_tm_pred.compute_stats(*post_total_scores)[3]
        #print total_ac
        if VERBOSE:
            print "Summary 10-fold cross validation over index %i :"%(i)
          #  compare_tm_pred.print_stats( *total_scores  )
            print
            print
            print
            print "-------------------------------------------------------"
            if DEBUG:
                raw_input("press any key to continue\n")

    print "Overall viterbi result mean: %s, variance: %s"%(np.mean(vit_total_ac), np.var(vit_total_ac))
    print "Posterior mean: %s, variance %s"%(np.mean(post_total_ac), np.var(post_total_ac))
Example #16
0

#outputs.to_project_1_sequences_file_from_posterior_decoding(sequences.get(), probs, 'posterior-decoding-sequences.txt')

outputs.to_project_1_sequences_file(sequences.get(), probs, 'viterbi-sequences.txt')
outputs.to_project_1_probs_file(sequences.get(), probs, 'viterbi-probs.txt')


"""
if __name__ == '__main__':
    model = hmm.Model(KEYS)
    model.load(HMMFILE)
    sequences = sequences.Sequences(SEQUENCEFILE)
    # load methods
    vit = Viterbi()
    post = Posterior()

    # viterbi
    probs = {}
    for key, sequence in sequences.get().items():
        probs[key] = vit.decode(model, sequence)

    outputs.to_project_2_viterbi(sequences.get(), probs,
                                 'pred-test-sequences-project2-viterbi.txt')

    probs = {}
    for key, value in sequences.get().items():
        sequence = {'Z': post.decode(model, value), 'X': value}
        log_joint = compute_hmm(model, sequence)

        probs[key] = (log_joint, sequence['Z'])
    def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):

        num_inducing, _ = Z.shape
        num_data, output_dim = Y.shape

        #make sure the noise is not hetero
        sigma_n = likelihood.gaussian_variance(Y_metadata)
        if sigma_n.size > 1:
            raise NotImplementedError, "no hetero noise with this implementation of FITC"

        Kmm = kern.K(Z)
        Knn = kern.Kdiag(X)
        Knm = kern.K(X, Z)
        U = Knm

        #factor Kmm
        diag.add(Kmm, self.const_jitter)
        Kmmi, L, Li, _ = pdinv(Kmm)

        #compute beta_star, the effective noise precision
        LiUT = np.dot(Li, U.T)
        sigma_star = Knn + sigma_n - np.sum(np.square(LiUT), 0)
        beta_star = 1. / sigma_star

        # Compute and factor A
        A = tdot(LiUT * np.sqrt(beta_star)) + np.eye(num_inducing)
        LA = jitchol(A)

        # back substutue to get b, P, v
        URiy = np.dot(U.T * beta_star, Y)
        tmp, _ = dtrtrs(L, URiy, lower=1)
        b, _ = dtrtrs(LA, tmp, lower=1)
        tmp, _ = dtrtrs(LA, b, lower=1, trans=1)
        v, _ = dtrtrs(L, tmp, lower=1, trans=1)
        tmp, _ = dtrtrs(LA, Li, lower=1, trans=0)
        P = tdot(tmp.T)

        #compute log marginal
        log_marginal = -0.5*num_data*output_dim*np.log(2*np.pi) + \
                       -np.sum(np.log(np.diag(LA)))*output_dim + \
                       0.5*output_dim*np.sum(np.log(beta_star)) + \
                       -0.5*np.sum(np.square(Y.T*np.sqrt(beta_star))) + \
                       0.5*np.sum(np.square(b))
        #compute dL_dR
        Uv = np.dot(U, v)
        dL_dR = 0.5 * (np.sum(U * np.dot(U, P), 1) - 1. / beta_star +
                       np.sum(np.square(Y), 1) - 2. * np.sum(Uv * Y, 1) +
                       np.sum(np.square(Uv), 1)) * beta_star**2

        # Compute dL_dKmm
        vvT_P = tdot(v.reshape(-1, 1)) + P
        dL_dK = 0.5 * (Kmmi - vvT_P)
        KiU = np.dot(Kmmi, U.T)
        dL_dK += np.dot(KiU * dL_dR, KiU.T)

        # Compute dL_dU
        vY = np.dot(v.reshape(-1, 1), Y.T)
        dL_dU = vY - np.dot(vvT_P, U.T)
        dL_dU *= beta_star
        dL_dU -= 2. * KiU * dL_dR

        dL_dthetaL = likelihood.exact_inference_gradients(dL_dR)
        grad_dict = {
            'dL_dKmm': dL_dK,
            'dL_dKdiag': dL_dR,
            'dL_dKnm': dL_dU.T,
            'dL_dthetaL': dL_dthetaL
        }

        #construct a posterior object
        post = Posterior(woodbury_inv=Kmmi - P,
                         woodbury_vector=v,
                         K=Kmm,
                         mean=None,
                         cov=None,
                         K_chol=L)

        return post, log_marginal, grad_dict