Beispiel #1
0
def measure_runtime(env,N,D,n_reps=10,time_out=10000):
    opts = {'messages':False}
    out_dir = os.path.join(env['out_dir'],'simulations_runtime')
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    t_fast = SP.zeros(n_reps)
    t_slow = SP.zeros(n_reps)
    lml_fast = SP.zeros(n_reps)
    lml_slow = SP.zeros(n_reps)
     
    for i in range(n_reps):
        # load data
        var_signal = 0.5
        data,RV = load_simulations(env,var_signal,N,D,i)

        # initialize
        covar_c = lowrank.LowRankCF(n_dimensions=RV['n_c'])
        covar_r = linear.LinearCF(n_dimensions=RV['n_r'])
        covar_s = lowrank.LowRankCF(n_dimensions=RV['n_sigma'])
        covar_o = fixed.FixedCF(n_dimensions=RV['n_r'])
        X = data.getX(standardized=False)
        Y = data.getY(standardized=False).T
        hyperparams,Ifilter,bounds = initialize.init('GPkronsum_LIN',Y.T,X,RV)
        covar_r.X = X
        covar_o.X = X
        covar_o._K = SP.eye(RV['N'])
        covar_s.X = hyperparams['X_s']
        covar_c.X = hyperparams['X_c']
        kgp_fast = gp_kronsum.KronSumGP(covar_r=covar_r,covar_c=covar_c,covar_s=covar_s,covar_o=covar_o)
        kgp_fast.setData(Y=Y)
        
        # measure time
        signal.signal(signal.SIGALRM,handler)
        signal.alarm(time_out)
        try:
             t_start = time.clock()
             hyperparams_opt,lmltrain = opt.opt_hyper(kgp_fast,hyperparams,Ifilter=Ifilter,bounds=bounds,opts=opts)
             t_stop = time.clock()
             signal.alarm(0)
             t_fast[i] = t_stop - t_start
             lml_fast[i] = lmltrain
        except Exception, e:
            print e
            t_slow += time_out
            break
Beispiel #2
0
    def test_lowrank_iso(self):
        theta = SP.array(SP.random.randn(2)**2)
        theta_hat = SP.exp(2 * theta)

        _K = theta_hat[0] * SP.dot(
            self.Xtrain, self.Xtrain.T) + theta_hat[1] * SP.eye(self.n_train)
        _Kcross = theta_hat[0] * SP.dot(self.Xtrain, self.Xtest.T)
        _Kgrad_theta = []
        _Kgrad_theta.append(2 * theta_hat[0] *
                            SP.dot(self.Xtrain, self.Xtrain.T))
        _Kgrad_theta.append(2 * theta_hat[1] * SP.eye(self.n_train))

        cov = lowrank.LowRankCF(self.n_dimensions)
        cov.X = self.Xtrain
        cov.Xcross = self.Xtest

        K = cov.K(theta)
        Kcross = cov.Kcross(theta)

        assert SP.allclose(K, _K), 'ouch, covariance matrix is wrong'
        assert SP.allclose(Kcross,
                           _Kcross), 'ouch, cross covariance matrix is wrong'
        assert SP.allclose(_Kgrad_theta[0], cov.Kgrad_theta(theta, 0))
        assert SP.allclose(_Kgrad_theta[1], cov.Kgrad_theta(theta, 1))

        # gradient with respect to latent factors
        for i in range(self.n_dimensions):
            for j in range(self.n_train):
                Xgrad = SP.zeros(self.Xtrain.shape)
                Xgrad[j, i] = 1
                _Kgrad_x = theta_hat[0] * (SP.dot(Xgrad, self.Xtrain.T) +
                                           SP.dot(self.Xtrain, Xgrad.T))
                Kgrad_x = cov.Kgrad_x(theta, i, j)
                assert SP.allclose(
                    Kgrad_x, _Kgrad_x
                ), 'ouch, gradient with respect to x is wrong for entry [%d,%d]' % (
                    i, j)
Beispiel #3
0
            t_slow += time_out
            break

    # save
    fn_out =  os.path.join(out_dir,'results_runtime_signal%03d_N%d_D%d.hdf5'%(var_signal*1E3,N,D))
    f = h5py.File(fn_out,'w')
    f['t_fast'] = t_fast
    f['t_slow'] = t_slow
    f['lml_fast'] = lml_fast
    f['lml_slow'] = lml_slow
    f.close()

    for i in range(n_reps):
        # initialize
        data,RV = load_simulations(env,var_signal,N,D,i)
        covar_c = lowrank.LowRankCF(n_dimensions=RV['n_c'])
        covar_r = linear.LinearCF(n_dimensions=RV['n_r'])
        covar_s = lowrank.LowRankCF(n_dimensions=RV['n_sigma'])
        covar_o = fixed.FixedCF(n_dimensions=RV['n_r'])
        X = data.getX(standardized=False)
        Y = data.getY(standardized=False).T
        hyperparams,Ifilter,bounds = initialize.init('GPkronsum_LIN',Y.T,X,RV)
        covar_r.X = X
        covar_o.X = X
        covar_o._K = SP.eye(RV['N'])
        covar_s.X = hyperparams['X_s']
        covar_c.X = hyperparams['X_c']
        kgp_slow = gp_kronsum_naive.KronSumGP(covar_r=covar_r,covar_c=covar_c,covar_s=covar_s,covar_o=covar_o)
        kgp_slow.setData(Y=Y)

        # measure time
Beispiel #4
0
import core.covariance.diag as diag
import core.optimize.optimize_base as optimize_base
import core.priors.priors as prior
import core.util.initialize as initialize

import matplotlib.pylab as PLT

if __name__ == "__main__":
    # settings
    n_latent = 1
    n_tasks = 10
    n_train = 100
    n_features = 100

    # initialize covariance functions
    covar_c = lowrank.LowRankCF(n_dimensions=n_latent)
    covar_s = lowrank.LowRankCF(n_dimensions=n_latent)
    covar_r = linear.LinearCF(n_dimensions=n_train)
    covar_o = diag.DiagIsoCF(n_dimensions=n_train)

    # true parameters
    X_c = SP.random.randn(n_tasks, n_latent)
    X_s = SP.random.randn(n_tasks, n_latent)
    X_r = SP.random.randn(n_train, n_features)  #/SP.sqrt(n_dimensions)
    R = SP.dot(X_r, X_r.T)
    C = SP.dot(X_c, X_c.T)
    Sigma = SP.dot(X_s, X_s.T)
    K = SP.kron(C, R) + SP.kron(Sigma, SP.eye(n_train))
    SP.all(SP.linalg.eigvals(K) >= 0)
    y = SP.random.multivariate_normal(SP.zeros(n_tasks * n_train), K)
    Y = SP.reshape(y, (n_train, n_tasks), order='F')
Beispiel #5
0
    def test_gpkronsum(self):
        covar_c = lowrank.LowRankCF(n_dimensions=self.n_latent)
        covar_r = lowrank.LowRankCF(n_dimensions=self.n_dimensions)
        covar_s = lowrank.LowRankCF(n_dimensions=self.n_latent)
        covar_o = lowrank.LowRankCF(n_dimensions=self.n_dimensions)

        X0_c = SP.random.randn(self.n_tasks, self.n_latent)
        X0_s = SP.random.randn(self.n_tasks, self.n_latent)
        X0_r = SP.random.randn(self.n_train, self.n_dimensions)
        X0_o = SP.random.randn(self.n_train, self.n_dimensions)

        gp = gp_kronsum.KronSumGP(covar_c=covar_c,
                                  covar_r=covar_r,
                                  covar_s=covar_s,
                                  covar_o=covar_o)
        gp.setData(Y=self.Ykronsum['train'])

        gp2 = gp_kronsum_naive.KronSumGP(covar_c=covar_c,
                                         covar_r=covar_r,
                                         covar_s=covar_s,
                                         covar_o=covar_o)
        gp2.setData(Y=self.Ykronsum['train'])

        hyperparams = {
            'covar_c': SP.array([0.5, 0.5]),
            'X_c': X0_c,
            'covar_r': SP.array([0.5, 0.5]),
            'X_r': X0_r,
            'covar_s': SP.array([0.5, 0.5]),
            'X_s': X0_s,
            'covar_o': SP.array([0.5, 0.5]),
            'X_o': X0_o
        }

        yhat = gp.predict(hyperparams, Xstar_r=self.X['test'], debugging=True)
        lml = gp._LML_covar(hyperparams, debugging=True)
        grad = {}
        grad.update(gp._LMLgrad_c(hyperparams, debugging=True))
        grad.update(gp._LMLgrad_r(hyperparams, debugging=True))
        grad.update(gp._LMLgrad_o(hyperparams, debugging=True))
        grad.update(gp._LMLgrad_s(hyperparams, debugging=True))

        yhat2 = gp2.predict(hyperparams, Xstar_r=self.X['test'])
        lml2 = gp2._LML_covar(hyperparams)
        grad2 = {}
        grad2.update(gp2._LMLgrad_covar(hyperparams))
        grad2.update(gp2._LMLgrad_x(hyperparams))

        assert SP.allclose(yhat, yhat2), 'predictions does not match'
        assert SP.allclose(lml, lml2), 'log likelihood does not match'
        for key in grad.keys():
            assert SP.allclose(
                grad[key],
                grad2[key]), 'gradient with respect to x does not match'

        covar_o = diag.DiagIsoCF(n_dimensions=self.n_dimensions)
        gp = gp_kronsum.KronSumGP(covar_c=covar_c,
                                  covar_r=covar_r,
                                  covar_s=covar_s,
                                  covar_o=covar_o)
        gp.setData(Y=self.Ykronsum['train'],
                   X_r=self.X['train'],
                   X_o=self.X['train'])

        gp2 = gp_kronsum_naive.KronSumGP(covar_c=covar_c,
                                         covar_r=covar_r,
                                         covar_s=covar_s,
                                         covar_o=covar_o)
        gp2.setData(Y=self.Ykronsum['train'],
                    X_r=self.X['train'],
                    X_o=self.X['train'])

        hyperparams = {
            'covar_c': SP.array([0.5, 0.5]),
            'X_c': X0_c,
            'covar_r': SP.array([0.5, 0.5]),
            'covar_s': SP.array([0.5, 0.5]),
            'X_s': X0_s,
            'covar_o': SP.array([0.5])
        }

        bounds = {
            'covar_c': SP.array([[-5, +5]] * 2),
            'covar_r': SP.array([[-5, +5]] * 2),
            'covar_s': SP.array([[-5, +5]] * 2),
            'covar_o': SP.array([[-5, +5]])
        }
        opts = {'gradcheck': True}
        import time
        t_start = time.time()
        hyperparams_opt, lml_opt = optimize_base.opt_hyper(gp,
                                                           hyperparams,
                                                           opts=opts,
                                                           bounds=bounds)
        t_stop = time.time()
        print 'time(training): %.4f' % (t_stop - t_start)

        t_start = time.time()
        hyperparams_opt2, lml_opt2 = optimize_base.opt_hyper(gp2,
                                                             hyperparams,
                                                             opts=opts,
                                                             bounds=bounds)
        t_stop = time.time()

        print 'time(training): %.4f' % (t_stop - t_start)
        assert SP.allclose(lml_opt, lml_opt2), 'ouch, optimization did fail'

        gp._invalidate_cache(
        )  # otherwise debugging parameters are not up to date!
        yhat = gp.predict(hyperparams_opt,
                          Xstar_r=self.X['test'],
                          debugging=True)
        lml = gp._LML_covar(hyperparams_opt, debugging=True)
        grad = {}
        grad.update(gp._LMLgrad_c(hyperparams_opt, debugging=True))
        grad.update(gp._LMLgrad_r(hyperparams_opt, debugging=True))
        grad.update(gp._LMLgrad_o(hyperparams_opt, debugging=True))
        grad.update(gp._LMLgrad_s(hyperparams_opt, debugging=True))

        yhat2 = gp2.predict(hyperparams_opt, Xstar_r=self.X['test'])
        lml2 = gp2._LML_covar(hyperparams_opt)
        grad2 = {}
        grad2.update(gp2._LMLgrad_covar(hyperparams_opt))
        grad2.update(gp2._LMLgrad_x(hyperparams_opt))

        assert SP.allclose(yhat, yhat2), 'predictions does not match'
        assert SP.allclose(lml, lml2), 'log likelihood does not match'
        for key in grad.keys():
            assert SP.allclose(
                grad[key],
                grad2[key]), 'gradient with respect to x does not match'
def run(methods, data, opts, f):
    """
    run methods
    """
    # load data
    X_r = data.getX(standardized=opts['standardizedX'], maf=opts['maf'])
    Y = data.getY(standardized=opts['standardizedY']).T
    n_s, n_f = X_r.shape
    n_t = Y.shape[1]

    # indices for cross-validation
    r = SP.random.permutation(n_s)
    Icv = SP.floor(((SP.ones((n_s)) * opts['nfolds']) * r) / n_s)

    if 'CV_GPbase_LIN' in methods:
        print 'do cross-validation with GPbase'
        t_start = time.time()
        covariance = linear.LinearCF(n_dimensions=n_f)
        likelihood = lik.GaussIsoLik()
        gp = gp_base.GP(covar=covariance, likelihood=likelihood)
        Ypred = SP.zeros(Y.shape)
        YpredCV = SP.zeros(Y.shape)

        lml_test = SP.zeros((n_t, opts['nfolds']))
        for i in range(n_t):
            for j in range(opts['nfolds']):
                LG.info('Train Pheno %d' % i)
                LG.info('Train Fold %d' % j)
                Itrain = Icv != j
                Itest = Icv == j
                y = SP.reshape(Y[:, i], (n_s, 1))
                cv_idx = (j + 1) % opts['nfolds']
                RV = run_optimizer('GPbase_LIN',
                                   gp,
                                   opts=opts,
                                   Y=y[Itrain],
                                   X_r=X_r[Itrain],
                                   Icv=Icv[Itrain],
                                   cv_idx=cv_idx)
                lml_test[i, j] = RV['lml_test']
                Ypred[Itest, i] = gp.predict(RV['hyperparams_opt'], X_r[Itest])
                YpredCV[Icv == cv_idx, i] = RV['Ypred']

        lml_test = lml_test.sum(0)
        t_stop = time.time()
        r2 = (SP.corrcoef(Y.flatten(), Ypred.flatten())[0, 1])**2
        print '... squared correlation coefficient: %.4f' % r2
        RV = {
            'Y': Y,
            'Ypred': Ypred,
            'r2': r2,
            'time': t_stop - t_start,
            'Icv': Icv,
            'lml_test': lml_test,
            'YpredCV': YpredCV
        }

        if f != None:
            out = f.create_group('CV_GPbase_LIN')
            utils.storeHashHDF5(out, RV)

    if 'CV_GPpool_LIN' in methods:
        print 'do cross-validation with GPpool'
        t_start = time.time()
        covar_c = linear.LinearCF(n_dimensions=1)  # vector of 1s
        covar_r = linear.LinearCF(n_dimensions=n_f)
        likelihood = lik.GaussIsoLik()
        gp = gp_kronprod.KronProdGP(covar_r=covar_r,
                                    covar_c=covar_c,
                                    likelihood=likelihood)
        gp.setData(X_c=SP.ones((Y.shape[1], 1)))

        Ypred = SP.zeros(Y.shape)
        YpredCV = SP.zeros(Y.shape)
        lml_test = SP.zeros(opts['nfolds'])
        for j in range(opts['nfolds']):
            LG.info('Train Fold %d' % j)
            Itrain = Icv != j
            Itest = Icv == j
            cv_idx = (j + 1) % opts['nfolds']
            RV = run_optimizer('GPpool_LIN',
                               gp,
                               opts=opts,
                               Y=Y[Itrain],
                               X_r=X_r[Itrain],
                               Icv=Icv[Itrain],
                               cv_idx=cv_idx)
            Ypred[Itest] = gp.predict(RV['hyperparams_opt'], X_r[Itest])
            YpredCV[Icv == cv_idx] = RV['Ypred']
            lml_test[j] = RV['lml_test']

        t_stop = time.time()
        r2 = (SP.corrcoef(Y.flatten(), Ypred.flatten())[0, 1])**2
        print '... squared correlation coefficient: %.4f' % r2
        RV = {
            'Y': Y,
            'Ypred': Ypred,
            'r2': r2,
            'time': t_stop - t_start,
            'Icv': Icv,
            'lml_test': lml_test,
            'YpredCV': YpredCV
        }

        if f != None:
            out = f.create_group('CV_GPpool_LIN')
            utils.storeHashHDF5(out, RV)

    if 'CV_GPkronprod_LIN' in methods:
        print 'do cross-validation with GPkronprod (linear kernel)'
        t_start = time.time()
        covar_c = lowrank.LowRankCF(n_dimensions=opts['n_c'])
        covar_r = linear.LinearCF(n_dimensions=n_f)
        likelihood = lik.GaussIsoLik()
        Ypred = SP.zeros(Y.shape)
        YpredCV = SP.zeros(Y.shape)
        gp = gp_kronprod.KronProdGP(covar_r=covar_r,
                                    covar_c=covar_c,
                                    likelihood=likelihood)

        lml_test = SP.zeros(opts['nfolds'])
        for j in range(opts['nfolds']):
            LG.info('Train Fold %d' % j)
            Itrain = Icv != j
            Itest = Icv == j
            cv_idx = (j + 1) % opts['nfolds']
            RV = run_optimizer('GPkronprod_LIN',
                               gp,
                               opts=opts,
                               Y=Y[Itrain],
                               X_r=X_r[Itrain],
                               Icv=Icv[Itrain],
                               cv_idx=cv_idx)
            Ypred[Itest] = gp.predict(RV['hyperparams_opt'], X_r[Itest])
            YpredCV[Icv == cv_idx] = RV['Ypred']
            lml_test[j] = RV['lml_test']

        t_stop = time.time()
        r2 = (SP.corrcoef(Y.flatten(), Ypred.flatten())[0, 1])**2
        print '... squared correlation coefficient: %.4f' % r2
        RV = {
            'Y': Y,
            'Ypred': Ypred,
            'r2': r2,
            'time': t_stop - t_start,
            'Icv': Icv,
            'lml_test': lml_test,
            'YpredCV': YpredCV
        }

        if f != None:
            out = f.create_group('CV_GPkronprod_LIN')
            utils.storeHashHDF5(out, RV)

    if 'CV_GPkronsum_LIN' in methods:
        print 'do cross-validation with GPkronsum (linear kernel)'
        t_start = time.time()
        Ypred = SP.zeros(Y.shape)

        covar_c = lowrank.LowRankCF(n_dimensions=opts['n_c'])
        covar_r = linear.LinearCF(n_dimensions=n_f)
        covar_s = lowrank.LowRankCF(n_dimensions=opts['n_sigma'])

        X_o = SP.zeros((Y.shape[0], 1))
        covar_o = diag.DiagIsoCF(n_dimensions=1)

        gp = gp_kronsum.KronSumGP(covar_r=covar_r,
                                  covar_c=covar_c,
                                  covar_s=covar_s,
                                  covar_o=covar_o)
        lml_test = SP.zeros(opts['nfolds'])
        Ypred = SP.zeros(Y.shape)
        YpredCV = SP.zeros(Y.shape)

        for j in range(opts['nfolds']):
            LG.info('Train Fold %d' % j)
            Itrain = Icv != j
            Itest = Icv == j
            cv_idx = (j + 1) % opts['nfolds']
            RV = run_optimizer('GPkronsum_LIN',
                               gp,
                               opts=opts,
                               Y=Y[Itrain],
                               X_r=X_r[Itrain],
                               Icv=Icv[Itrain],
                               cv_idx=cv_idx,
                               X_o=X_o[Itrain])
            Ypred[Itest] = gp.predict(RV['hyperparams_opt'], X_r[Itest])
            YpredCV[Icv == cv_idx] = RV['Ypred']
            lml_test[j] = RV['lml_test']

        t_stop = time.time()
        r2 = (SP.corrcoef(Y.flatten(), Ypred.flatten())[0, 1])**2
        print '... squared correlation coefficient: %.4f' % r2
        RV = {
            'Y': Y,
            'Ypred': Ypred,
            'r2': r2,
            'time': t_stop - t_start,
            'Icv': Icv,
            'lml_test': lml_test,
            'YpredCV': YpredCV
        }

        if f != None:
            out = f.create_group('CV_GPkronsum_LIN')
            utils.storeHashHDF5(out, RV)

    return RV