Ejemplo n.º 1
0
    def test_gpkronprod(self):
        # initialize
        covar_c = linear.LinearCF(n_dimensions=self.n_latent)
        covar_r = linear.LinearCF(n_dimensions=self.n_dimensions)
        X0_c = SP.random.randn(self.n_tasks, self.n_latent)

        lik = likelihood_base.GaussIsoLik()
        gp = gp_kronprod.KronProdGP(covar_c=covar_c,
                                    covar_r=covar_r,
                                    likelihood=lik)
        gp.setData(Y=self.Ykronprod['train'], X_r=self.X['train'])
        hyperparams = {
            'lik': SP.array([0.5]),
            'X_c': X0_c,
            'covar_r': SP.array([0.5]),
            'covar_c': SP.array([0.5]),
            'X_r': self.X['train']
        }
        # check predictions, likelihood and gradients
        gp.predict(hyperparams, Xstar_r=self.X['test'], debugging=True)

        gp._LML_covar(hyperparams, debugging=True)
        gp._LMLgrad_covar(hyperparams, debugging=True)
        gp._LMLgrad_lik(hyperparams, debugging=True)
        gp._LMLgrad_x(hyperparams, debugging=True)

        # optimize
        hyperparams = {
            'lik': SP.array([0.5]),
            'X_c': X0_c,
            'covar_r': SP.array([0.5]),
            'covar_c': SP.array([0.5])
        }
        opts = {'gradcheck': True}
        hyperparams_opt, lml_opt = optimize_base.opt_hyper(gp,
                                                           hyperparams,
                                                           opts=opts)
        Kest = covar_c.K(hyperparams_opt['covar_c'])

        # check predictions, likelihood and gradients
        gp._invalidate_cache(
        )  # otherwise debugging parameters are not up to date!
        gp.predict(hyperparams_opt, debugging=True, Xstar_r=self.X['test'])
        gp._LML_covar(hyperparams_opt, debugging=True)
        gp._LMLgrad_covar(hyperparams_opt, debugging=True)
        gp._LMLgrad_lik(hyperparams_opt, debugging=True)
        gp._LMLgrad_x(hyperparams_opt, debugging=True)
Ejemplo n.º 2
0
    def test_gplvm(self):
        covar = linear.LinearCF(n_dimensions=self.n_latent)
        lik = likelihood_base.GaussIsoLik()
        prior = priors.GaussianPrior(key='X', theta=SP.array([1.]))
        gp = gplvm.GPLVM(covar=covar, likelihood=lik, prior=prior)

        X0 = SP.random.randn(self.n_tasks, self.n_latent)
        X0 = self.Xlatent
        covar.X = X0
        gp.setData(Y=self.Ylatent)

        # gradient with respect to X
        hyperparams = {
            'covar': SP.array([0.5]),
            'lik': SP.array([0.5]),
            'X': X0
        }

        LML = gp.LML(hyperparams)
        LMLgrad = gp.LMLgrad(hyperparams)

        LMLgrad_x = SP.zeros((self.n_tasks, self.n_latent))
        W = gp.get_covariances(hyperparams)['W']
        for d in xrange(self.n_latent):
            for n in xrange(self.n_tasks):
                Knd_grad = covar.Kgrad_x(hyperparams['covar'], d, n)
                LMLgrad_x[n, d] = 0.5 * (W * Knd_grad).sum()

        LMLgrad_x += prior.LMLgrad(hyperparams)['X']
        assert SP.allclose(
            LMLgrad['X'],
            LMLgrad_x), 'ouch, gradient with respect to X is wrong'

        # optimize
        opts = {'gradcheck': True}
        hyperparams_opt, lml_opt = optimize_base.opt_hyper(gp,
                                                           hyperparams,
                                                           opts=opts)
        Ktrue = SP.dot(self.Xlatent, self.Xlatent.T)
        covar.X = hyperparams_opt['X']
        Kest = covar.K(hyperparams_opt['covar'])

        # gradient with respect to X
        LML = gp.LML(hyperparams_opt)
        LMLgrad = gp.LMLgrad(hyperparams_opt)
        LMLgrad_x = SP.zeros((self.n_tasks, self.n_latent))
        W = gp.get_covariances(hyperparams_opt)['W']
        for d in xrange(self.n_latent):
            for n in xrange(self.n_tasks):
                Knd_grad = covar.Kgrad_x(hyperparams_opt['covar'], d, n)
                LMLgrad_x[n, d] = 0.5 * (W * Knd_grad).sum()
        LMLgrad_x += prior.LMLgrad(hyperparams_opt)['X']
        assert SP.allclose(
            LMLgrad['X'],
            LMLgrad_x), 'ouch, gradient with respect to X is wrong'
Ejemplo n.º 3
0
def measure_runtime(env,N,D,n_reps=10,time_out=10000):
    opts = {'messages':False}
    out_dir = os.path.join(env['out_dir'],'simulations_runtime')
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    t_fast = SP.zeros(n_reps)
    t_slow = SP.zeros(n_reps)
    lml_fast = SP.zeros(n_reps)
    lml_slow = SP.zeros(n_reps)
     
    for i in range(n_reps):
        # load data
        var_signal = 0.5
        data,RV = load_simulations(env,var_signal,N,D,i)

        # initialize
        covar_c = lowrank.LowRankCF(n_dimensions=RV['n_c'])
        covar_r = linear.LinearCF(n_dimensions=RV['n_r'])
        covar_s = lowrank.LowRankCF(n_dimensions=RV['n_sigma'])
        covar_o = fixed.FixedCF(n_dimensions=RV['n_r'])
        X = data.getX(standardized=False)
        Y = data.getY(standardized=False).T
        hyperparams,Ifilter,bounds = initialize.init('GPkronsum_LIN',Y.T,X,RV)
        covar_r.X = X
        covar_o.X = X
        covar_o._K = SP.eye(RV['N'])
        covar_s.X = hyperparams['X_s']
        covar_c.X = hyperparams['X_c']
        kgp_fast = gp_kronsum.KronSumGP(covar_r=covar_r,covar_c=covar_c,covar_s=covar_s,covar_o=covar_o)
        kgp_fast.setData(Y=Y)
        
        # measure time
        signal.signal(signal.SIGALRM,handler)
        signal.alarm(time_out)
        try:
             t_start = time.clock()
             hyperparams_opt,lmltrain = opt.opt_hyper(kgp_fast,hyperparams,Ifilter=Ifilter,bounds=bounds,opts=opts)
             t_stop = time.clock()
             signal.alarm(0)
             t_fast[i] = t_stop - t_start
             lml_fast[i] = lmltrain
        except Exception, e:
            print e
            t_slow += time_out
            break
Ejemplo n.º 4
0
def sim_linear_kernel(X=None, N=None, n_dim=None, theta=None):
    """
    simulate positive definite kernel
    """
    if X == None:
        X = SP.random.randn(N, n_dim)
    else:
        N = X.shape[0]
        n_dim = X.shape[1]

    if theta == None:
        theta = SP.random.randn(1)

    cf = linear.LinearCF(n_dim)
    cf.X = X
    K = cf.K(theta)

    return K, X
Ejemplo n.º 5
0
def init_GPkronprod(Y, X_r, n_c):
    """
    init parameters for kron(C + sigma I,R) + sigma*I
    """
    # build linear kernel with the features
    covar0_r = SP.array([0])
    covar_r = linear.LinearCF(n_dimensions=X_r.shape[1])
    covar_r.X = X_r
    R = covar_r.K(covar0_r)
    var_R = utils.getVariance(R)
    cov = SP.cov(Y)

    # split into likelihood and noise terms
    ratio = SP.random.rand(3)
    ratio /= ratio.sum()
    lik0 = ratio[0] * SP.diag(cov).min()
    covar0_c = ratio[1] * SP.diag(cov).min()

    # remaining variance is assigned to latent factors
    if n_c > 1:
        X0_c = SP.zeros((Y.shape[0], n_c))
        ratio = SP.random.rand(n_c)
        ratio /= ratio.sum()
        for i in range(n_c):
            # split further up
            X0_c[:, i] = SP.sign(SP.random.rand) * SP.sqrt(
                ratio[i] * (SP.diag(cov) - lik0 - covar0_c))
    else:
        X0_c = SP.sign(
            SP.random.rand) * SP.sqrt(SP.diag(cov) - lik0 - covar0_c)
    X0_c = SP.reshape(X0_c, (X0_c.shape[0], n_c))

    # check if variance of initial values match observed variance
    assert SP.allclose(SP.diag(cov), (X0_c**2).sum(1) + lik0 +
                       covar0_c), 'ouch, something is wrong'

    # bring in correct format and transform as neccessary
    covar0_c = 0.5 * SP.log(SP.array([1. / var_R, covar0_c]))
    lik0 = 0.5 * SP.log(SP.array([lik0]))
    return X0_c, covar0_c, lik0, covar0_r
Ejemplo n.º 6
0
    def test_linear(self):
        theta = SP.array([SP.random.randn()**2])
        theta_hat = SP.exp(2 * theta)

        _K = SP.dot(self.Xtrain, self.Xtrain.T)
        _Kcross = SP.dot(self.Xtrain, self.Xtest.T)

        cov = linear.LinearCF(n_dimensions=self.n_dimensions)
        cov.X = self.Xtrain
        cov.Xcross = self.Xtest

        K = cov.K(theta)
        Kcross = cov.Kcross(theta)
        Kgrad_x = cov.Kgrad_x(theta, 0)
        Kgrad_theta = cov.Kgrad_theta(theta, 0)

        assert SP.allclose(K,
                           theta_hat * _K), 'ouch covariance matrix is wrong'
        assert SP.allclose(Kgrad_theta, 2 * theta_hat *
                           _K), 'ouch, gradient with respect to theta is wrong'
        assert SP.allclose(Kcross, theta_hat *
                           _Kcross), 'ouch, cross covariance is wrong'

        # gradient with respect to latent factors
        # for each entry
        for i in range(self.n_dimensions):
            for j in range(self.n_train):
                Xgrad = SP.zeros(self.Xtrain.shape)
                Xgrad[j, i] = 1
                _Kgrad_x = theta_hat * (SP.dot(Xgrad, self.Xtrain.T) +
                                        SP.dot(self.Xtrain, Xgrad.T))
                Kgrad_x = cov.Kgrad_x(theta, i, j)
                assert SP.allclose(
                    Kgrad_x, _Kgrad_x
                ), 'ouch, gradient with respect to x is wrong for entry [%d,%d]' % (
                    i, j)
Ejemplo n.º 7
0
            break

    # save
    fn_out =  os.path.join(out_dir,'results_runtime_signal%03d_N%d_D%d.hdf5'%(var_signal*1E3,N,D))
    f = h5py.File(fn_out,'w')
    f['t_fast'] = t_fast
    f['t_slow'] = t_slow
    f['lml_fast'] = lml_fast
    f['lml_slow'] = lml_slow
    f.close()

    for i in range(n_reps):
        # initialize
        data,RV = load_simulations(env,var_signal,N,D,i)
        covar_c = lowrank.LowRankCF(n_dimensions=RV['n_c'])
        covar_r = linear.LinearCF(n_dimensions=RV['n_r'])
        covar_s = lowrank.LowRankCF(n_dimensions=RV['n_sigma'])
        covar_o = fixed.FixedCF(n_dimensions=RV['n_r'])
        X = data.getX(standardized=False)
        Y = data.getY(standardized=False).T
        hyperparams,Ifilter,bounds = initialize.init('GPkronsum_LIN',Y.T,X,RV)
        covar_r.X = X
        covar_o.X = X
        covar_o._K = SP.eye(RV['N'])
        covar_s.X = hyperparams['X_s']
        covar_c.X = hyperparams['X_c']
        kgp_slow = gp_kronsum_naive.KronSumGP(covar_r=covar_r,covar_c=covar_c,covar_s=covar_s,covar_o=covar_o)
        kgp_slow.setData(Y=Y)

        # measure time
        signal.signal(signal.SIGALRM,handler)
Ejemplo n.º 8
0
def init_GPkronsum(Y, X_r, n_c, n_sigma):
    """
    init parameters for kron(C + sigmaI,R) + kron(Sigma + sigmaI,Omega)

    input:
    Y       task matrix
    X_r     feature matrxi
    n_c     number of hidden factors in C
    n_sigma number of hidden factors in Sigma
    """
    n_t, n_s = Y.shape
    n_f = X_r.shape[1]

    # build linear kernel with the features
    covar_r = linear.LinearCF(n_dimensions=n_f)
    covar_r.X = X_r
    covar0_r = SP.array([0])
    R = covar_r.K(covar0_r)
    var_R = utils.getVariance(R)

    # initialize hidden factors
    X0_c = SP.zeros((n_t, n_c))
    X0_sigma = SP.zeros((n_t, n_sigma))

    # observed variance
    var = Y.var(1)
    var0 = SP.copy(var)

    # assign parts of the variance to individual effects
    ratio = SP.random.rand(3)
    ratio /= ratio.sum()
    covar0_c = ratio[0] * var.min()
    covar0_sigma = ratio[1] * var.min()

    # remaining variance is assigned to latent factors
    var -= covar0_c
    var -= covar0_sigma
    for i in range(n_t):
        signal = SP.random.rand() * var[i]
        if n_c == 1:
            X0_c[i] = SP.sign(SP.random.rand()) * SP.sqrt(signal)
        else:
            ratio = SP.random.rand(n_c)
            ratio /= ratio.sum()
            for j in range(n_c):
                X0_c[i,
                     j] = SP.sign(SP.random.rand) * SP.sqrt(ratio[j] * signal)
        if n_sigma == 1:
            X0_sigma[i] = SP.sign(SP.random.rand()) * SP.sqrt(var[i] - signal)
        else:
            ratio = SP.random.rand(n_sigma)
            ratio /= ratio.sum()
            for j in range(n_sigma):
                X0_sigma[i, j] = SP.sign(SP.random.rand) * SP.sqrt(
                    ratio[j] * (var[i] - signal))

    # check if variance of initial values match observed variance
    assert SP.allclose(var0, (X0_c**2).sum(1).flatten() +
                       (X0_sigma**2).sum(1).flatten() + covar0_c +
                       covar0_sigma), 'ouch, something is wrong'

    # bring in correct format and transform as neccessary
    covar0_c = 0.5 * SP.log([1. / var_R, covar0_c])
    covar0_sigma = 0.5 * SP.log([1, covar0_sigma])
    X0_c = SP.reshape(X0_c, (X0_c.shape[0], n_c))
    X0_sigma = SP.reshape(X0_sigma, (X0_sigma.shape[0], n_sigma))

    return X0_c, X0_sigma, covar0_c, covar0_sigma, covar0_r
     X_scaler.fit(X_train)
     X_train = X_scaler.transform(X_train)
     X_test = X_scaler.transform(X_test)
 
     Y_scaler = StandardScaler()
     Y_scaler.fit(Y_train)
     Y_train = Y_scaler.transform(Y_train)
     
     for b in range(len(basis_num)):
         pca = PCA(n_components = basis_num[b])
         Y_train_hat = pca.fit_transform(Y_train)
         B = pca.components_.T
          
         hyperparams, Ifilter, bounds = initialize.init('MTGPR', Y_train_hat.T, X_train,{})
         covar_c = composite.SumCF(n_dimensions = Y_train_hat.shape[0])
         covar_c.append_covar(linear.LinearCF(n_dimensions = Y_train_hat.shape[0]))
         covar_c.append_covar(se.SqExpCF(n_dimensions = Y_train_hat.shape[0]))
         covar_c.append_covar(DiagIsoCF(n_dimensions = Y_train_hat.shape[0]))
         covar_c.X = Y_train_hat.T
         
         covar_r = composite.SumCF(n_dimensions = X_train.shape[1])
         covar_r.append_covar(linear.LinearCF(n_dimensions = X_train.shape[1]))
         covar_r.append_covar(se.SqExpCF(n_dimensions = X_train.shape[1]))
         covar_r.append_covar(DiagIsoCF(n_dimensions = X_train.shape[1]))
         covar_r.X = X_train
         
         likelihood = lik.GaussIsoLik()
         
         gp = sMTGPR.sMTGPR(covar_r = covar_r, covar_c = covar_c, likelihood = likelihood, basis = B)
         gp.setData(Y = Y_train, Y_hat = Y_train_hat, X = X_train)
         
Ejemplo n.º 10
0
import core.priors.priors as prior
import core.util.initialize as initialize

import matplotlib.pylab as PLT

if __name__ == "__main__":
    # settings
    n_latent = 1
    n_tasks = 10
    n_train = 100
    n_features = 100

    # initialize covariance functions
    covar_c = lowrank.LowRankCF(n_dimensions=n_latent)
    covar_s = lowrank.LowRankCF(n_dimensions=n_latent)
    covar_r = linear.LinearCF(n_dimensions=n_train)
    covar_o = diag.DiagIsoCF(n_dimensions=n_train)

    # true parameters
    X_c = SP.random.randn(n_tasks, n_latent)
    X_s = SP.random.randn(n_tasks, n_latent)
    X_r = SP.random.randn(n_train, n_features)  #/SP.sqrt(n_dimensions)
    R = SP.dot(X_r, X_r.T)
    C = SP.dot(X_c, X_c.T)
    Sigma = SP.dot(X_s, X_s.T)
    K = SP.kron(C, R) + SP.kron(Sigma, SP.eye(n_train))
    SP.all(SP.linalg.eigvals(K) >= 0)
    y = SP.random.multivariate_normal(SP.zeros(n_tasks * n_train), K)
    Y = SP.reshape(y, (n_train, n_tasks), order='F')

    # initialization parameters
Ejemplo n.º 11
0
 
 if (method == 'sMT_GPTR'):
     basis_num = [b,b,b]
     noise_basis_num = [nb,nb,nb]
     Y_train = partial_fold(Y_train, mode=0, shape = [Y_train.shape[0],
                         control_fmri_data.shape[1],control_fmri_data.shape[2],control_fmri_data.shape[3]])
     
     hyperparams, Ifilter, bounds = initialize.init('Zero', Y_train, X_train,{})
     covar_c = list()
     covar_s = list()
     covar_r = list()
     covar_o = list()
     for i in range(Y_train.ndim-1):
         n_dim = Y_train.shape[0] * np.prod(basis_num) / basis_num[i]
         covar_c.append(composite.SumCF(n_dimensions = n_dim))
         covar_c[i].append_covar(linear.LinearCF(n_dimensions = n_dim))
         covar_c[i].append_covar(se.SqExpCF(n_dimensions = n_dim))
         covar_c[i].append_covar(DiagIsoCF(n_dimensions = n_dim))
         n_dim = Y_train.shape[0] * np.prod(noise_basis_num) / noise_basis_num[i]
         covar_s.append(composite.SumCF(n_dimensions = n_dim))
         covar_s[i].append_covar(linear.LinearCF(n_dimensions = n_dim))
         covar_s[i].append_covar(se.SqExpCF(n_dimensions = n_dim))
         covar_s[i].append_covar(DiagIsoCF(n_dimensions = n_dim))
         
     covar_r.append(composite.SumCF(n_dimensions = X_train.shape[1]))
     covar_r[0].append_covar(linear.LinearCF(n_dimensions = X_train.shape[1]))
     covar_r[0].append_covar(se.SqExpCF(n_dimensions = X_train.shape[1]))
     covar_r[0].append_covar(DiagIsoCF(n_dimensions = X_train.shape[1]))
     covar_r[0].X = X_train    
     covar_o.append(DiagIsoCF(n_dimensions = X_train.shape[1]))
     covar_o[0].X = X_train
Ejemplo n.º 12
0
    def test_gpbase(self):

        covar = linear.LinearCF(n_dimensions=self.n_dimensions)
        n_train = self.X['train'].shape[0]

        theta = 1E-1
        prior_cov = priors.GaussianPrior(key='covar', theta=SP.array([1.]))
        prior_lik = priors.GaussianPrior(key='lik', theta=SP.array([1.]))
        prior = priors.PriorList([prior_cov, prior_lik])

        lik = likelihood_base.GaussIsoLik()
        gp = gp_base.GP(covar_r=covar, likelihood=lik, prior=prior)
        gp.setData(Y=self.Yuni['train'], X=self.X['train'])

        # log likelihood and gradient derivation
        hyperparams = {'covar': SP.array([0.5]), 'lik': SP.array([0.5])}
        LML = gp.LML(hyperparams)
        LMLgrad = gp.LMLgrad(hyperparams)

        K = covar.K(hyperparams['covar']) + lik.K(hyperparams['lik'], n_train)
        Kgrad_covar = covar.Kgrad_theta(hyperparams['covar'], 0)
        Kgrad_lik = lik.Kgrad_theta(hyperparams['lik'], n_train, 0)

        KinvY = LA.solve(K, self.Yuni['train'])
        _LML = self.n_train / 2 * SP.log(2 * SP.pi) + 0.5 * SP.log(
            LA.det(K)) + 0.5 * (self.Yuni['train'] *
                                KinvY).sum() + prior.LML(hyperparams)
        LMLgrad_covar = 0.5 * SP.trace(LA.solve(
            K, Kgrad_covar)) - 0.5 * SP.dot(KinvY.T, SP.dot(
                Kgrad_covar, KinvY))
        LMLgrad_covar += prior_cov.LMLgrad(hyperparams)['covar']
        LMLgrad_lik = 0.5 * SP.trace(LA.solve(K, Kgrad_lik)) - 0.5 * SP.dot(
            KinvY.T, SP.dot(Kgrad_lik, KinvY))
        LMLgrad_lik += prior_lik.LMLgrad(hyperparams)['lik']

        assert SP.allclose(
            LML, _LML), 'ouch, marginal log likelihood does not match'
        assert SP.allclose(
            LMLgrad['covar'], LMLgrad_covar
        ), 'ouch, gradient with respect to theta does not match'
        assert SP.allclose(
            LMLgrad['lik'],
            LMLgrad_lik), 'ouch, gradient with respect to theta does not match'

        # predict
        Ystar = gp.predict(hyperparams, self.X['test'])
        Kstar = covar.Kcross(hyperparams['covar'])
        _Ystar = SP.dot(Kstar.T, LA.solve(K, self.Yuni['train'])).flatten()
        assert SP.allclose(Ystar, _Ystar), 'ouch, predictions, do not match'

        # optimize
        opts = {'gradcheck': True, 'messages': False}
        hyperparams_opt, lml_opt = optimize_base.opt_hyper(gp,
                                                           hyperparams,
                                                           opts=opts)

        # log likelihood and gradient derivation
        LML = gp.LML(hyperparams_opt)
        LMLgrad = gp.LMLgrad(hyperparams_opt)

        K = covar.K(hyperparams_opt['covar']) + lik.K(hyperparams_opt['lik'],
                                                      n_train)
        Kgrad_covar = covar.Kgrad_theta(hyperparams_opt['covar'], 0)
        Kgrad_lik = lik.Kgrad_theta(hyperparams_opt['lik'], n_train, 0)

        KinvY = LA.solve(K, self.Yuni['train'])
        _LML = self.n_train / 2 * SP.log(2 * SP.pi) + 0.5 * SP.log(
            LA.det(K)) + 0.5 * (self.Yuni['train'] *
                                KinvY).sum() + prior.LML(hyperparams_opt)
        LMLgrad_covar = 0.5 * SP.trace(LA.solve(
            K, Kgrad_covar)) - 0.5 * SP.dot(KinvY.T, SP.dot(
                Kgrad_covar, KinvY))
        LMLgrad_covar += prior_cov.LMLgrad(hyperparams_opt)['covar']
        LMLgrad_lik = 0.5 * SP.trace(LA.solve(K, Kgrad_lik)) - 0.5 * SP.dot(
            KinvY.T, SP.dot(Kgrad_lik, KinvY))
        LMLgrad_lik += prior_lik.LMLgrad(hyperparams_opt)['lik']

        assert SP.allclose(
            LML, _LML), 'ouch, marginal log likelihood does not match'
        assert SP.allclose(
            LMLgrad['covar'], LMLgrad_covar
        ), 'ouch, gradient with respect to theta does not match'
        assert SP.allclose(
            LMLgrad['lik'],
            LMLgrad_lik), 'ouch, gradient with respect to theta does not match'

        # predict
        Ystar = gp.predict(hyperparams_opt, self.X['test'])
        Kstar = covar.Kcross(hyperparams_opt['covar'])
        _Ystar = SP.dot(Kstar.T, LA.solve(K, self.Yuni['train'])).flatten()
        assert SP.allclose(Ystar, _Ystar), 'ouch, predictions, do not match'
Ejemplo n.º 13
0
        # Normalization
        X_scaler = StandardScaler()
        X_scaler.fit(X_train)
        X_train = X_scaler.transform(X_train)
        X_test = X_scaler.transform(X_test)
        Y_scaler = StandardScaler()
        Y_scaler.fit(Y_train)
        Y_train = Y_scaler.transform(Y_train)

        ################################# GP_base Approach ########################
        if (method == 'base' or method == 'all'):
            for i in range(n_tasks):
                hyperparams, Ifilter, bounds = initialize.init(
                    'GPbase_LIN', Y_train[:, i].T, X_train, None)
                covariance = linear.LinearCF(n_dimensions=X_train.shape[0])
                likelihood = lik.GaussIsoLik()
                gp = gp_base.GP(covar=covariance, likelihood=likelihood)
                gp.setData(Y=Y_train[:, i:i + 1], X_r=X_train)

                # Training: optimize hyperparameters
                hyperparams_opt, lml_opt = optimize_base.opt_hyper(
                    gp, hyperparams, bounds=bounds, Ifilter=Ifilter)
                # Testing
                results_base['Y_pred'][s, :, i], results_base['Y_pred_cov'][
                    s, :, i] = gp.predict(hyperparams_opt, Xstar_r=X_test)
                results_base['s_n2'][s, i] = np.exp(2 * hyperparams_opt['lik'])

            results_base['Y_pred'][s, :, :] = Y_scaler.inverse_transform(
                results_base['Y_pred'][s, :, :])
            results_base['Y_pred_cov'][
Ejemplo n.º 14
0
def run(methods, data, opts, f):
    """
    run methods
    """
    # load data
    X_r = data.getX(standardized=opts['standardizedX'], maf=opts['maf'])
    Y = data.getY(standardized=opts['standardizedY']).T
    n_s, n_f = X_r.shape
    n_t = Y.shape[1]

    # indices for cross-validation
    r = SP.random.permutation(n_s)
    Icv = SP.floor(((SP.ones((n_s)) * opts['nfolds']) * r) / n_s)

    if 'CV_GPbase_LIN' in methods:
        print 'do cross-validation with GPbase'
        t_start = time.time()
        covariance = linear.LinearCF(n_dimensions=n_f)
        likelihood = lik.GaussIsoLik()
        gp = gp_base.GP(covar=covariance, likelihood=likelihood)
        Ypred = SP.zeros(Y.shape)
        YpredCV = SP.zeros(Y.shape)

        lml_test = SP.zeros((n_t, opts['nfolds']))
        for i in range(n_t):
            for j in range(opts['nfolds']):
                LG.info('Train Pheno %d' % i)
                LG.info('Train Fold %d' % j)
                Itrain = Icv != j
                Itest = Icv == j
                y = SP.reshape(Y[:, i], (n_s, 1))
                cv_idx = (j + 1) % opts['nfolds']
                RV = run_optimizer('GPbase_LIN',
                                   gp,
                                   opts=opts,
                                   Y=y[Itrain],
                                   X_r=X_r[Itrain],
                                   Icv=Icv[Itrain],
                                   cv_idx=cv_idx)
                lml_test[i, j] = RV['lml_test']
                Ypred[Itest, i] = gp.predict(RV['hyperparams_opt'], X_r[Itest])
                YpredCV[Icv == cv_idx, i] = RV['Ypred']

        lml_test = lml_test.sum(0)
        t_stop = time.time()
        r2 = (SP.corrcoef(Y.flatten(), Ypred.flatten())[0, 1])**2
        print '... squared correlation coefficient: %.4f' % r2
        RV = {
            'Y': Y,
            'Ypred': Ypred,
            'r2': r2,
            'time': t_stop - t_start,
            'Icv': Icv,
            'lml_test': lml_test,
            'YpredCV': YpredCV
        }

        if f != None:
            out = f.create_group('CV_GPbase_LIN')
            utils.storeHashHDF5(out, RV)

    if 'CV_GPpool_LIN' in methods:
        print 'do cross-validation with GPpool'
        t_start = time.time()
        covar_c = linear.LinearCF(n_dimensions=1)  # vector of 1s
        covar_r = linear.LinearCF(n_dimensions=n_f)
        likelihood = lik.GaussIsoLik()
        gp = gp_kronprod.KronProdGP(covar_r=covar_r,
                                    covar_c=covar_c,
                                    likelihood=likelihood)
        gp.setData(X_c=SP.ones((Y.shape[1], 1)))

        Ypred = SP.zeros(Y.shape)
        YpredCV = SP.zeros(Y.shape)
        lml_test = SP.zeros(opts['nfolds'])
        for j in range(opts['nfolds']):
            LG.info('Train Fold %d' % j)
            Itrain = Icv != j
            Itest = Icv == j
            cv_idx = (j + 1) % opts['nfolds']
            RV = run_optimizer('GPpool_LIN',
                               gp,
                               opts=opts,
                               Y=Y[Itrain],
                               X_r=X_r[Itrain],
                               Icv=Icv[Itrain],
                               cv_idx=cv_idx)
            Ypred[Itest] = gp.predict(RV['hyperparams_opt'], X_r[Itest])
            YpredCV[Icv == cv_idx] = RV['Ypred']
            lml_test[j] = RV['lml_test']

        t_stop = time.time()
        r2 = (SP.corrcoef(Y.flatten(), Ypred.flatten())[0, 1])**2
        print '... squared correlation coefficient: %.4f' % r2
        RV = {
            'Y': Y,
            'Ypred': Ypred,
            'r2': r2,
            'time': t_stop - t_start,
            'Icv': Icv,
            'lml_test': lml_test,
            'YpredCV': YpredCV
        }

        if f != None:
            out = f.create_group('CV_GPpool_LIN')
            utils.storeHashHDF5(out, RV)

    if 'CV_GPkronprod_LIN' in methods:
        print 'do cross-validation with GPkronprod (linear kernel)'
        t_start = time.time()
        covar_c = lowrank.LowRankCF(n_dimensions=opts['n_c'])
        covar_r = linear.LinearCF(n_dimensions=n_f)
        likelihood = lik.GaussIsoLik()
        Ypred = SP.zeros(Y.shape)
        YpredCV = SP.zeros(Y.shape)
        gp = gp_kronprod.KronProdGP(covar_r=covar_r,
                                    covar_c=covar_c,
                                    likelihood=likelihood)

        lml_test = SP.zeros(opts['nfolds'])
        for j in range(opts['nfolds']):
            LG.info('Train Fold %d' % j)
            Itrain = Icv != j
            Itest = Icv == j
            cv_idx = (j + 1) % opts['nfolds']
            RV = run_optimizer('GPkronprod_LIN',
                               gp,
                               opts=opts,
                               Y=Y[Itrain],
                               X_r=X_r[Itrain],
                               Icv=Icv[Itrain],
                               cv_idx=cv_idx)
            Ypred[Itest] = gp.predict(RV['hyperparams_opt'], X_r[Itest])
            YpredCV[Icv == cv_idx] = RV['Ypred']
            lml_test[j] = RV['lml_test']

        t_stop = time.time()
        r2 = (SP.corrcoef(Y.flatten(), Ypred.flatten())[0, 1])**2
        print '... squared correlation coefficient: %.4f' % r2
        RV = {
            'Y': Y,
            'Ypred': Ypred,
            'r2': r2,
            'time': t_stop - t_start,
            'Icv': Icv,
            'lml_test': lml_test,
            'YpredCV': YpredCV
        }

        if f != None:
            out = f.create_group('CV_GPkronprod_LIN')
            utils.storeHashHDF5(out, RV)

    if 'CV_GPkronsum_LIN' in methods:
        print 'do cross-validation with GPkronsum (linear kernel)'
        t_start = time.time()
        Ypred = SP.zeros(Y.shape)

        covar_c = lowrank.LowRankCF(n_dimensions=opts['n_c'])
        covar_r = linear.LinearCF(n_dimensions=n_f)
        covar_s = lowrank.LowRankCF(n_dimensions=opts['n_sigma'])

        X_o = SP.zeros((Y.shape[0], 1))
        covar_o = diag.DiagIsoCF(n_dimensions=1)

        gp = gp_kronsum.KronSumGP(covar_r=covar_r,
                                  covar_c=covar_c,
                                  covar_s=covar_s,
                                  covar_o=covar_o)
        lml_test = SP.zeros(opts['nfolds'])
        Ypred = SP.zeros(Y.shape)
        YpredCV = SP.zeros(Y.shape)

        for j in range(opts['nfolds']):
            LG.info('Train Fold %d' % j)
            Itrain = Icv != j
            Itest = Icv == j
            cv_idx = (j + 1) % opts['nfolds']
            RV = run_optimizer('GPkronsum_LIN',
                               gp,
                               opts=opts,
                               Y=Y[Itrain],
                               X_r=X_r[Itrain],
                               Icv=Icv[Itrain],
                               cv_idx=cv_idx,
                               X_o=X_o[Itrain])
            Ypred[Itest] = gp.predict(RV['hyperparams_opt'], X_r[Itest])
            YpredCV[Icv == cv_idx] = RV['Ypred']
            lml_test[j] = RV['lml_test']

        t_stop = time.time()
        r2 = (SP.corrcoef(Y.flatten(), Ypred.flatten())[0, 1])**2
        print '... squared correlation coefficient: %.4f' % r2
        RV = {
            'Y': Y,
            'Ypred': Ypred,
            'r2': r2,
            'time': t_stop - t_start,
            'Icv': Icv,
            'lml_test': lml_test,
            'YpredCV': YpredCV
        }

        if f != None:
            out = f.create_group('CV_GPkronsum_LIN')
            utils.storeHashHDF5(out, RV)

    return RV