def test_gpkronprod(self): # initialize covar_c = linear.LinearCF(n_dimensions=self.n_latent) covar_r = linear.LinearCF(n_dimensions=self.n_dimensions) X0_c = SP.random.randn(self.n_tasks, self.n_latent) lik = likelihood_base.GaussIsoLik() gp = gp_kronprod.KronProdGP(covar_c=covar_c, covar_r=covar_r, likelihood=lik) gp.setData(Y=self.Ykronprod['train'], X_r=self.X['train']) hyperparams = { 'lik': SP.array([0.5]), 'X_c': X0_c, 'covar_r': SP.array([0.5]), 'covar_c': SP.array([0.5]), 'X_r': self.X['train'] } # check predictions, likelihood and gradients gp.predict(hyperparams, Xstar_r=self.X['test'], debugging=True) gp._LML_covar(hyperparams, debugging=True) gp._LMLgrad_covar(hyperparams, debugging=True) gp._LMLgrad_lik(hyperparams, debugging=True) gp._LMLgrad_x(hyperparams, debugging=True) # optimize hyperparams = { 'lik': SP.array([0.5]), 'X_c': X0_c, 'covar_r': SP.array([0.5]), 'covar_c': SP.array([0.5]) } opts = {'gradcheck': True} hyperparams_opt, lml_opt = optimize_base.opt_hyper(gp, hyperparams, opts=opts) Kest = covar_c.K(hyperparams_opt['covar_c']) # check predictions, likelihood and gradients gp._invalidate_cache( ) # otherwise debugging parameters are not up to date! gp.predict(hyperparams_opt, debugging=True, Xstar_r=self.X['test']) gp._LML_covar(hyperparams_opt, debugging=True) gp._LMLgrad_covar(hyperparams_opt, debugging=True) gp._LMLgrad_lik(hyperparams_opt, debugging=True) gp._LMLgrad_x(hyperparams_opt, debugging=True)
def test_gplvm(self): covar = linear.LinearCF(n_dimensions=self.n_latent) lik = likelihood_base.GaussIsoLik() prior = priors.GaussianPrior(key='X', theta=SP.array([1.])) gp = gplvm.GPLVM(covar=covar, likelihood=lik, prior=prior) X0 = SP.random.randn(self.n_tasks, self.n_latent) X0 = self.Xlatent covar.X = X0 gp.setData(Y=self.Ylatent) # gradient with respect to X hyperparams = { 'covar': SP.array([0.5]), 'lik': SP.array([0.5]), 'X': X0 } LML = gp.LML(hyperparams) LMLgrad = gp.LMLgrad(hyperparams) LMLgrad_x = SP.zeros((self.n_tasks, self.n_latent)) W = gp.get_covariances(hyperparams)['W'] for d in xrange(self.n_latent): for n in xrange(self.n_tasks): Knd_grad = covar.Kgrad_x(hyperparams['covar'], d, n) LMLgrad_x[n, d] = 0.5 * (W * Knd_grad).sum() LMLgrad_x += prior.LMLgrad(hyperparams)['X'] assert SP.allclose( LMLgrad['X'], LMLgrad_x), 'ouch, gradient with respect to X is wrong' # optimize opts = {'gradcheck': True} hyperparams_opt, lml_opt = optimize_base.opt_hyper(gp, hyperparams, opts=opts) Ktrue = SP.dot(self.Xlatent, self.Xlatent.T) covar.X = hyperparams_opt['X'] Kest = covar.K(hyperparams_opt['covar']) # gradient with respect to X LML = gp.LML(hyperparams_opt) LMLgrad = gp.LMLgrad(hyperparams_opt) LMLgrad_x = SP.zeros((self.n_tasks, self.n_latent)) W = gp.get_covariances(hyperparams_opt)['W'] for d in xrange(self.n_latent): for n in xrange(self.n_tasks): Knd_grad = covar.Kgrad_x(hyperparams_opt['covar'], d, n) LMLgrad_x[n, d] = 0.5 * (W * Knd_grad).sum() LMLgrad_x += prior.LMLgrad(hyperparams_opt)['X'] assert SP.allclose( LMLgrad['X'], LMLgrad_x), 'ouch, gradient with respect to X is wrong'
def measure_runtime(env,N,D,n_reps=10,time_out=10000): opts = {'messages':False} out_dir = os.path.join(env['out_dir'],'simulations_runtime') if not os.path.exists(out_dir): os.makedirs(out_dir) t_fast = SP.zeros(n_reps) t_slow = SP.zeros(n_reps) lml_fast = SP.zeros(n_reps) lml_slow = SP.zeros(n_reps) for i in range(n_reps): # load data var_signal = 0.5 data,RV = load_simulations(env,var_signal,N,D,i) # initialize covar_c = lowrank.LowRankCF(n_dimensions=RV['n_c']) covar_r = linear.LinearCF(n_dimensions=RV['n_r']) covar_s = lowrank.LowRankCF(n_dimensions=RV['n_sigma']) covar_o = fixed.FixedCF(n_dimensions=RV['n_r']) X = data.getX(standardized=False) Y = data.getY(standardized=False).T hyperparams,Ifilter,bounds = initialize.init('GPkronsum_LIN',Y.T,X,RV) covar_r.X = X covar_o.X = X covar_o._K = SP.eye(RV['N']) covar_s.X = hyperparams['X_s'] covar_c.X = hyperparams['X_c'] kgp_fast = gp_kronsum.KronSumGP(covar_r=covar_r,covar_c=covar_c,covar_s=covar_s,covar_o=covar_o) kgp_fast.setData(Y=Y) # measure time signal.signal(signal.SIGALRM,handler) signal.alarm(time_out) try: t_start = time.clock() hyperparams_opt,lmltrain = opt.opt_hyper(kgp_fast,hyperparams,Ifilter=Ifilter,bounds=bounds,opts=opts) t_stop = time.clock() signal.alarm(0) t_fast[i] = t_stop - t_start lml_fast[i] = lmltrain except Exception, e: print e t_slow += time_out break
def sim_linear_kernel(X=None, N=None, n_dim=None, theta=None): """ simulate positive definite kernel """ if X == None: X = SP.random.randn(N, n_dim) else: N = X.shape[0] n_dim = X.shape[1] if theta == None: theta = SP.random.randn(1) cf = linear.LinearCF(n_dim) cf.X = X K = cf.K(theta) return K, X
def init_GPkronprod(Y, X_r, n_c): """ init parameters for kron(C + sigma I,R) + sigma*I """ # build linear kernel with the features covar0_r = SP.array([0]) covar_r = linear.LinearCF(n_dimensions=X_r.shape[1]) covar_r.X = X_r R = covar_r.K(covar0_r) var_R = utils.getVariance(R) cov = SP.cov(Y) # split into likelihood and noise terms ratio = SP.random.rand(3) ratio /= ratio.sum() lik0 = ratio[0] * SP.diag(cov).min() covar0_c = ratio[1] * SP.diag(cov).min() # remaining variance is assigned to latent factors if n_c > 1: X0_c = SP.zeros((Y.shape[0], n_c)) ratio = SP.random.rand(n_c) ratio /= ratio.sum() for i in range(n_c): # split further up X0_c[:, i] = SP.sign(SP.random.rand) * SP.sqrt( ratio[i] * (SP.diag(cov) - lik0 - covar0_c)) else: X0_c = SP.sign( SP.random.rand) * SP.sqrt(SP.diag(cov) - lik0 - covar0_c) X0_c = SP.reshape(X0_c, (X0_c.shape[0], n_c)) # check if variance of initial values match observed variance assert SP.allclose(SP.diag(cov), (X0_c**2).sum(1) + lik0 + covar0_c), 'ouch, something is wrong' # bring in correct format and transform as neccessary covar0_c = 0.5 * SP.log(SP.array([1. / var_R, covar0_c])) lik0 = 0.5 * SP.log(SP.array([lik0])) return X0_c, covar0_c, lik0, covar0_r
def test_linear(self): theta = SP.array([SP.random.randn()**2]) theta_hat = SP.exp(2 * theta) _K = SP.dot(self.Xtrain, self.Xtrain.T) _Kcross = SP.dot(self.Xtrain, self.Xtest.T) cov = linear.LinearCF(n_dimensions=self.n_dimensions) cov.X = self.Xtrain cov.Xcross = self.Xtest K = cov.K(theta) Kcross = cov.Kcross(theta) Kgrad_x = cov.Kgrad_x(theta, 0) Kgrad_theta = cov.Kgrad_theta(theta, 0) assert SP.allclose(K, theta_hat * _K), 'ouch covariance matrix is wrong' assert SP.allclose(Kgrad_theta, 2 * theta_hat * _K), 'ouch, gradient with respect to theta is wrong' assert SP.allclose(Kcross, theta_hat * _Kcross), 'ouch, cross covariance is wrong' # gradient with respect to latent factors # for each entry for i in range(self.n_dimensions): for j in range(self.n_train): Xgrad = SP.zeros(self.Xtrain.shape) Xgrad[j, i] = 1 _Kgrad_x = theta_hat * (SP.dot(Xgrad, self.Xtrain.T) + SP.dot(self.Xtrain, Xgrad.T)) Kgrad_x = cov.Kgrad_x(theta, i, j) assert SP.allclose( Kgrad_x, _Kgrad_x ), 'ouch, gradient with respect to x is wrong for entry [%d,%d]' % ( i, j)
break # save fn_out = os.path.join(out_dir,'results_runtime_signal%03d_N%d_D%d.hdf5'%(var_signal*1E3,N,D)) f = h5py.File(fn_out,'w') f['t_fast'] = t_fast f['t_slow'] = t_slow f['lml_fast'] = lml_fast f['lml_slow'] = lml_slow f.close() for i in range(n_reps): # initialize data,RV = load_simulations(env,var_signal,N,D,i) covar_c = lowrank.LowRankCF(n_dimensions=RV['n_c']) covar_r = linear.LinearCF(n_dimensions=RV['n_r']) covar_s = lowrank.LowRankCF(n_dimensions=RV['n_sigma']) covar_o = fixed.FixedCF(n_dimensions=RV['n_r']) X = data.getX(standardized=False) Y = data.getY(standardized=False).T hyperparams,Ifilter,bounds = initialize.init('GPkronsum_LIN',Y.T,X,RV) covar_r.X = X covar_o.X = X covar_o._K = SP.eye(RV['N']) covar_s.X = hyperparams['X_s'] covar_c.X = hyperparams['X_c'] kgp_slow = gp_kronsum_naive.KronSumGP(covar_r=covar_r,covar_c=covar_c,covar_s=covar_s,covar_o=covar_o) kgp_slow.setData(Y=Y) # measure time signal.signal(signal.SIGALRM,handler)
def init_GPkronsum(Y, X_r, n_c, n_sigma): """ init parameters for kron(C + sigmaI,R) + kron(Sigma + sigmaI,Omega) input: Y task matrix X_r feature matrxi n_c number of hidden factors in C n_sigma number of hidden factors in Sigma """ n_t, n_s = Y.shape n_f = X_r.shape[1] # build linear kernel with the features covar_r = linear.LinearCF(n_dimensions=n_f) covar_r.X = X_r covar0_r = SP.array([0]) R = covar_r.K(covar0_r) var_R = utils.getVariance(R) # initialize hidden factors X0_c = SP.zeros((n_t, n_c)) X0_sigma = SP.zeros((n_t, n_sigma)) # observed variance var = Y.var(1) var0 = SP.copy(var) # assign parts of the variance to individual effects ratio = SP.random.rand(3) ratio /= ratio.sum() covar0_c = ratio[0] * var.min() covar0_sigma = ratio[1] * var.min() # remaining variance is assigned to latent factors var -= covar0_c var -= covar0_sigma for i in range(n_t): signal = SP.random.rand() * var[i] if n_c == 1: X0_c[i] = SP.sign(SP.random.rand()) * SP.sqrt(signal) else: ratio = SP.random.rand(n_c) ratio /= ratio.sum() for j in range(n_c): X0_c[i, j] = SP.sign(SP.random.rand) * SP.sqrt(ratio[j] * signal) if n_sigma == 1: X0_sigma[i] = SP.sign(SP.random.rand()) * SP.sqrt(var[i] - signal) else: ratio = SP.random.rand(n_sigma) ratio /= ratio.sum() for j in range(n_sigma): X0_sigma[i, j] = SP.sign(SP.random.rand) * SP.sqrt( ratio[j] * (var[i] - signal)) # check if variance of initial values match observed variance assert SP.allclose(var0, (X0_c**2).sum(1).flatten() + (X0_sigma**2).sum(1).flatten() + covar0_c + covar0_sigma), 'ouch, something is wrong' # bring in correct format and transform as neccessary covar0_c = 0.5 * SP.log([1. / var_R, covar0_c]) covar0_sigma = 0.5 * SP.log([1, covar0_sigma]) X0_c = SP.reshape(X0_c, (X0_c.shape[0], n_c)) X0_sigma = SP.reshape(X0_sigma, (X0_sigma.shape[0], n_sigma)) return X0_c, X0_sigma, covar0_c, covar0_sigma, covar0_r
X_scaler.fit(X_train) X_train = X_scaler.transform(X_train) X_test = X_scaler.transform(X_test) Y_scaler = StandardScaler() Y_scaler.fit(Y_train) Y_train = Y_scaler.transform(Y_train) for b in range(len(basis_num)): pca = PCA(n_components = basis_num[b]) Y_train_hat = pca.fit_transform(Y_train) B = pca.components_.T hyperparams, Ifilter, bounds = initialize.init('MTGPR', Y_train_hat.T, X_train,{}) covar_c = composite.SumCF(n_dimensions = Y_train_hat.shape[0]) covar_c.append_covar(linear.LinearCF(n_dimensions = Y_train_hat.shape[0])) covar_c.append_covar(se.SqExpCF(n_dimensions = Y_train_hat.shape[0])) covar_c.append_covar(DiagIsoCF(n_dimensions = Y_train_hat.shape[0])) covar_c.X = Y_train_hat.T covar_r = composite.SumCF(n_dimensions = X_train.shape[1]) covar_r.append_covar(linear.LinearCF(n_dimensions = X_train.shape[1])) covar_r.append_covar(se.SqExpCF(n_dimensions = X_train.shape[1])) covar_r.append_covar(DiagIsoCF(n_dimensions = X_train.shape[1])) covar_r.X = X_train likelihood = lik.GaussIsoLik() gp = sMTGPR.sMTGPR(covar_r = covar_r, covar_c = covar_c, likelihood = likelihood, basis = B) gp.setData(Y = Y_train, Y_hat = Y_train_hat, X = X_train)
import core.priors.priors as prior import core.util.initialize as initialize import matplotlib.pylab as PLT if __name__ == "__main__": # settings n_latent = 1 n_tasks = 10 n_train = 100 n_features = 100 # initialize covariance functions covar_c = lowrank.LowRankCF(n_dimensions=n_latent) covar_s = lowrank.LowRankCF(n_dimensions=n_latent) covar_r = linear.LinearCF(n_dimensions=n_train) covar_o = diag.DiagIsoCF(n_dimensions=n_train) # true parameters X_c = SP.random.randn(n_tasks, n_latent) X_s = SP.random.randn(n_tasks, n_latent) X_r = SP.random.randn(n_train, n_features) #/SP.sqrt(n_dimensions) R = SP.dot(X_r, X_r.T) C = SP.dot(X_c, X_c.T) Sigma = SP.dot(X_s, X_s.T) K = SP.kron(C, R) + SP.kron(Sigma, SP.eye(n_train)) SP.all(SP.linalg.eigvals(K) >= 0) y = SP.random.multivariate_normal(SP.zeros(n_tasks * n_train), K) Y = SP.reshape(y, (n_train, n_tasks), order='F') # initialization parameters
if (method == 'sMT_GPTR'): basis_num = [b,b,b] noise_basis_num = [nb,nb,nb] Y_train = partial_fold(Y_train, mode=0, shape = [Y_train.shape[0], control_fmri_data.shape[1],control_fmri_data.shape[2],control_fmri_data.shape[3]]) hyperparams, Ifilter, bounds = initialize.init('Zero', Y_train, X_train,{}) covar_c = list() covar_s = list() covar_r = list() covar_o = list() for i in range(Y_train.ndim-1): n_dim = Y_train.shape[0] * np.prod(basis_num) / basis_num[i] covar_c.append(composite.SumCF(n_dimensions = n_dim)) covar_c[i].append_covar(linear.LinearCF(n_dimensions = n_dim)) covar_c[i].append_covar(se.SqExpCF(n_dimensions = n_dim)) covar_c[i].append_covar(DiagIsoCF(n_dimensions = n_dim)) n_dim = Y_train.shape[0] * np.prod(noise_basis_num) / noise_basis_num[i] covar_s.append(composite.SumCF(n_dimensions = n_dim)) covar_s[i].append_covar(linear.LinearCF(n_dimensions = n_dim)) covar_s[i].append_covar(se.SqExpCF(n_dimensions = n_dim)) covar_s[i].append_covar(DiagIsoCF(n_dimensions = n_dim)) covar_r.append(composite.SumCF(n_dimensions = X_train.shape[1])) covar_r[0].append_covar(linear.LinearCF(n_dimensions = X_train.shape[1])) covar_r[0].append_covar(se.SqExpCF(n_dimensions = X_train.shape[1])) covar_r[0].append_covar(DiagIsoCF(n_dimensions = X_train.shape[1])) covar_r[0].X = X_train covar_o.append(DiagIsoCF(n_dimensions = X_train.shape[1])) covar_o[0].X = X_train
def test_gpbase(self): covar = linear.LinearCF(n_dimensions=self.n_dimensions) n_train = self.X['train'].shape[0] theta = 1E-1 prior_cov = priors.GaussianPrior(key='covar', theta=SP.array([1.])) prior_lik = priors.GaussianPrior(key='lik', theta=SP.array([1.])) prior = priors.PriorList([prior_cov, prior_lik]) lik = likelihood_base.GaussIsoLik() gp = gp_base.GP(covar_r=covar, likelihood=lik, prior=prior) gp.setData(Y=self.Yuni['train'], X=self.X['train']) # log likelihood and gradient derivation hyperparams = {'covar': SP.array([0.5]), 'lik': SP.array([0.5])} LML = gp.LML(hyperparams) LMLgrad = gp.LMLgrad(hyperparams) K = covar.K(hyperparams['covar']) + lik.K(hyperparams['lik'], n_train) Kgrad_covar = covar.Kgrad_theta(hyperparams['covar'], 0) Kgrad_lik = lik.Kgrad_theta(hyperparams['lik'], n_train, 0) KinvY = LA.solve(K, self.Yuni['train']) _LML = self.n_train / 2 * SP.log(2 * SP.pi) + 0.5 * SP.log( LA.det(K)) + 0.5 * (self.Yuni['train'] * KinvY).sum() + prior.LML(hyperparams) LMLgrad_covar = 0.5 * SP.trace(LA.solve( K, Kgrad_covar)) - 0.5 * SP.dot(KinvY.T, SP.dot( Kgrad_covar, KinvY)) LMLgrad_covar += prior_cov.LMLgrad(hyperparams)['covar'] LMLgrad_lik = 0.5 * SP.trace(LA.solve(K, Kgrad_lik)) - 0.5 * SP.dot( KinvY.T, SP.dot(Kgrad_lik, KinvY)) LMLgrad_lik += prior_lik.LMLgrad(hyperparams)['lik'] assert SP.allclose( LML, _LML), 'ouch, marginal log likelihood does not match' assert SP.allclose( LMLgrad['covar'], LMLgrad_covar ), 'ouch, gradient with respect to theta does not match' assert SP.allclose( LMLgrad['lik'], LMLgrad_lik), 'ouch, gradient with respect to theta does not match' # predict Ystar = gp.predict(hyperparams, self.X['test']) Kstar = covar.Kcross(hyperparams['covar']) _Ystar = SP.dot(Kstar.T, LA.solve(K, self.Yuni['train'])).flatten() assert SP.allclose(Ystar, _Ystar), 'ouch, predictions, do not match' # optimize opts = {'gradcheck': True, 'messages': False} hyperparams_opt, lml_opt = optimize_base.opt_hyper(gp, hyperparams, opts=opts) # log likelihood and gradient derivation LML = gp.LML(hyperparams_opt) LMLgrad = gp.LMLgrad(hyperparams_opt) K = covar.K(hyperparams_opt['covar']) + lik.K(hyperparams_opt['lik'], n_train) Kgrad_covar = covar.Kgrad_theta(hyperparams_opt['covar'], 0) Kgrad_lik = lik.Kgrad_theta(hyperparams_opt['lik'], n_train, 0) KinvY = LA.solve(K, self.Yuni['train']) _LML = self.n_train / 2 * SP.log(2 * SP.pi) + 0.5 * SP.log( LA.det(K)) + 0.5 * (self.Yuni['train'] * KinvY).sum() + prior.LML(hyperparams_opt) LMLgrad_covar = 0.5 * SP.trace(LA.solve( K, Kgrad_covar)) - 0.5 * SP.dot(KinvY.T, SP.dot( Kgrad_covar, KinvY)) LMLgrad_covar += prior_cov.LMLgrad(hyperparams_opt)['covar'] LMLgrad_lik = 0.5 * SP.trace(LA.solve(K, Kgrad_lik)) - 0.5 * SP.dot( KinvY.T, SP.dot(Kgrad_lik, KinvY)) LMLgrad_lik += prior_lik.LMLgrad(hyperparams_opt)['lik'] assert SP.allclose( LML, _LML), 'ouch, marginal log likelihood does not match' assert SP.allclose( LMLgrad['covar'], LMLgrad_covar ), 'ouch, gradient with respect to theta does not match' assert SP.allclose( LMLgrad['lik'], LMLgrad_lik), 'ouch, gradient with respect to theta does not match' # predict Ystar = gp.predict(hyperparams_opt, self.X['test']) Kstar = covar.Kcross(hyperparams_opt['covar']) _Ystar = SP.dot(Kstar.T, LA.solve(K, self.Yuni['train'])).flatten() assert SP.allclose(Ystar, _Ystar), 'ouch, predictions, do not match'
# Normalization X_scaler = StandardScaler() X_scaler.fit(X_train) X_train = X_scaler.transform(X_train) X_test = X_scaler.transform(X_test) Y_scaler = StandardScaler() Y_scaler.fit(Y_train) Y_train = Y_scaler.transform(Y_train) ################################# GP_base Approach ######################## if (method == 'base' or method == 'all'): for i in range(n_tasks): hyperparams, Ifilter, bounds = initialize.init( 'GPbase_LIN', Y_train[:, i].T, X_train, None) covariance = linear.LinearCF(n_dimensions=X_train.shape[0]) likelihood = lik.GaussIsoLik() gp = gp_base.GP(covar=covariance, likelihood=likelihood) gp.setData(Y=Y_train[:, i:i + 1], X_r=X_train) # Training: optimize hyperparameters hyperparams_opt, lml_opt = optimize_base.opt_hyper( gp, hyperparams, bounds=bounds, Ifilter=Ifilter) # Testing results_base['Y_pred'][s, :, i], results_base['Y_pred_cov'][ s, :, i] = gp.predict(hyperparams_opt, Xstar_r=X_test) results_base['s_n2'][s, i] = np.exp(2 * hyperparams_opt['lik']) results_base['Y_pred'][s, :, :] = Y_scaler.inverse_transform( results_base['Y_pred'][s, :, :]) results_base['Y_pred_cov'][
def run(methods, data, opts, f): """ run methods """ # load data X_r = data.getX(standardized=opts['standardizedX'], maf=opts['maf']) Y = data.getY(standardized=opts['standardizedY']).T n_s, n_f = X_r.shape n_t = Y.shape[1] # indices for cross-validation r = SP.random.permutation(n_s) Icv = SP.floor(((SP.ones((n_s)) * opts['nfolds']) * r) / n_s) if 'CV_GPbase_LIN' in methods: print 'do cross-validation with GPbase' t_start = time.time() covariance = linear.LinearCF(n_dimensions=n_f) likelihood = lik.GaussIsoLik() gp = gp_base.GP(covar=covariance, likelihood=likelihood) Ypred = SP.zeros(Y.shape) YpredCV = SP.zeros(Y.shape) lml_test = SP.zeros((n_t, opts['nfolds'])) for i in range(n_t): for j in range(opts['nfolds']): LG.info('Train Pheno %d' % i) LG.info('Train Fold %d' % j) Itrain = Icv != j Itest = Icv == j y = SP.reshape(Y[:, i], (n_s, 1)) cv_idx = (j + 1) % opts['nfolds'] RV = run_optimizer('GPbase_LIN', gp, opts=opts, Y=y[Itrain], X_r=X_r[Itrain], Icv=Icv[Itrain], cv_idx=cv_idx) lml_test[i, j] = RV['lml_test'] Ypred[Itest, i] = gp.predict(RV['hyperparams_opt'], X_r[Itest]) YpredCV[Icv == cv_idx, i] = RV['Ypred'] lml_test = lml_test.sum(0) t_stop = time.time() r2 = (SP.corrcoef(Y.flatten(), Ypred.flatten())[0, 1])**2 print '... squared correlation coefficient: %.4f' % r2 RV = { 'Y': Y, 'Ypred': Ypred, 'r2': r2, 'time': t_stop - t_start, 'Icv': Icv, 'lml_test': lml_test, 'YpredCV': YpredCV } if f != None: out = f.create_group('CV_GPbase_LIN') utils.storeHashHDF5(out, RV) if 'CV_GPpool_LIN' in methods: print 'do cross-validation with GPpool' t_start = time.time() covar_c = linear.LinearCF(n_dimensions=1) # vector of 1s covar_r = linear.LinearCF(n_dimensions=n_f) likelihood = lik.GaussIsoLik() gp = gp_kronprod.KronProdGP(covar_r=covar_r, covar_c=covar_c, likelihood=likelihood) gp.setData(X_c=SP.ones((Y.shape[1], 1))) Ypred = SP.zeros(Y.shape) YpredCV = SP.zeros(Y.shape) lml_test = SP.zeros(opts['nfolds']) for j in range(opts['nfolds']): LG.info('Train Fold %d' % j) Itrain = Icv != j Itest = Icv == j cv_idx = (j + 1) % opts['nfolds'] RV = run_optimizer('GPpool_LIN', gp, opts=opts, Y=Y[Itrain], X_r=X_r[Itrain], Icv=Icv[Itrain], cv_idx=cv_idx) Ypred[Itest] = gp.predict(RV['hyperparams_opt'], X_r[Itest]) YpredCV[Icv == cv_idx] = RV['Ypred'] lml_test[j] = RV['lml_test'] t_stop = time.time() r2 = (SP.corrcoef(Y.flatten(), Ypred.flatten())[0, 1])**2 print '... squared correlation coefficient: %.4f' % r2 RV = { 'Y': Y, 'Ypred': Ypred, 'r2': r2, 'time': t_stop - t_start, 'Icv': Icv, 'lml_test': lml_test, 'YpredCV': YpredCV } if f != None: out = f.create_group('CV_GPpool_LIN') utils.storeHashHDF5(out, RV) if 'CV_GPkronprod_LIN' in methods: print 'do cross-validation with GPkronprod (linear kernel)' t_start = time.time() covar_c = lowrank.LowRankCF(n_dimensions=opts['n_c']) covar_r = linear.LinearCF(n_dimensions=n_f) likelihood = lik.GaussIsoLik() Ypred = SP.zeros(Y.shape) YpredCV = SP.zeros(Y.shape) gp = gp_kronprod.KronProdGP(covar_r=covar_r, covar_c=covar_c, likelihood=likelihood) lml_test = SP.zeros(opts['nfolds']) for j in range(opts['nfolds']): LG.info('Train Fold %d' % j) Itrain = Icv != j Itest = Icv == j cv_idx = (j + 1) % opts['nfolds'] RV = run_optimizer('GPkronprod_LIN', gp, opts=opts, Y=Y[Itrain], X_r=X_r[Itrain], Icv=Icv[Itrain], cv_idx=cv_idx) Ypred[Itest] = gp.predict(RV['hyperparams_opt'], X_r[Itest]) YpredCV[Icv == cv_idx] = RV['Ypred'] lml_test[j] = RV['lml_test'] t_stop = time.time() r2 = (SP.corrcoef(Y.flatten(), Ypred.flatten())[0, 1])**2 print '... squared correlation coefficient: %.4f' % r2 RV = { 'Y': Y, 'Ypred': Ypred, 'r2': r2, 'time': t_stop - t_start, 'Icv': Icv, 'lml_test': lml_test, 'YpredCV': YpredCV } if f != None: out = f.create_group('CV_GPkronprod_LIN') utils.storeHashHDF5(out, RV) if 'CV_GPkronsum_LIN' in methods: print 'do cross-validation with GPkronsum (linear kernel)' t_start = time.time() Ypred = SP.zeros(Y.shape) covar_c = lowrank.LowRankCF(n_dimensions=opts['n_c']) covar_r = linear.LinearCF(n_dimensions=n_f) covar_s = lowrank.LowRankCF(n_dimensions=opts['n_sigma']) X_o = SP.zeros((Y.shape[0], 1)) covar_o = diag.DiagIsoCF(n_dimensions=1) gp = gp_kronsum.KronSumGP(covar_r=covar_r, covar_c=covar_c, covar_s=covar_s, covar_o=covar_o) lml_test = SP.zeros(opts['nfolds']) Ypred = SP.zeros(Y.shape) YpredCV = SP.zeros(Y.shape) for j in range(opts['nfolds']): LG.info('Train Fold %d' % j) Itrain = Icv != j Itest = Icv == j cv_idx = (j + 1) % opts['nfolds'] RV = run_optimizer('GPkronsum_LIN', gp, opts=opts, Y=Y[Itrain], X_r=X_r[Itrain], Icv=Icv[Itrain], cv_idx=cv_idx, X_o=X_o[Itrain]) Ypred[Itest] = gp.predict(RV['hyperparams_opt'], X_r[Itest]) YpredCV[Icv == cv_idx] = RV['Ypred'] lml_test[j] = RV['lml_test'] t_stop = time.time() r2 = (SP.corrcoef(Y.flatten(), Ypred.flatten())[0, 1])**2 print '... squared correlation coefficient: %.4f' % r2 RV = { 'Y': Y, 'Ypred': Ypred, 'r2': r2, 'time': t_stop - t_start, 'Icv': Icv, 'lml_test': lml_test, 'YpredCV': YpredCV } if f != None: out = f.create_group('CV_GPkronsum_LIN') utils.storeHashHDF5(out, RV) return RV