예제 #1
0
    def test_missing_data(self):
        from GPy import kern
        from GPy.models.bayesian_gplvm_minibatch import BayesianGPLVMMiniBatch
        from GPy.examples.dimensionality_reduction import _simulate_matern

        D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4
        _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, False)
        Y = Ylist[0]

        inan = np.random.binomial(1, .9, size=Y.shape).astype(bool) # 80% missing data
        Ymissing = Y.copy()
        Ymissing[inan] = np.nan

        k = kern.Linear(Q, ARD=True) + kern.White(Q, np.exp(-2)) # + kern.bias(Q)
        m = BayesianGPLVMMiniBatch(Ymissing, Q, init="random", num_inducing=num_inducing,
                          kernel=k, missing_data=True)
        assert(m.checkgrad())
        mul, varl = m.predict(m.X)

        k = kern.RBF(Q, ARD=True) + kern.White(Q, np.exp(-2)) # + kern.bias(Q)
        m2 = BayesianGPLVMMiniBatch(Ymissing, Q, init="random", num_inducing=num_inducing,
                          kernel=k, missing_data=True)
        assert(m.checkgrad())
        m2.kern.rbf.lengthscale[:] = 1e6
        m2.X[:] = m.X.param_array
        m2.likelihood[:] = m.likelihood[:]
        m2.kern.white[:] = m.kern.white[:]
        mu, var = m.predict(m.X)
        np.testing.assert_allclose(mul, mu)
        np.testing.assert_allclose(varl, var)

        q50 = m.predict_quantiles(m.X, (50,))
        np.testing.assert_allclose(mul, q50[0])
예제 #2
0
파일: model_tests.py 프로젝트: yincheng/GPy
    def test_missing_data(self):
        from GPy import kern
        from GPy.models.bayesian_gplvm_minibatch import BayesianGPLVMMiniBatch
        from GPy.examples.dimensionality_reduction import _simulate_matern

        D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4
        _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, False)
        Y = Ylist[0]

        inan = np.random.binomial(1, .9, size=Y.shape).astype(
            bool)  # 80% missing data
        Ymissing = Y.copy()
        Ymissing[inan] = np.nan

        k = kern.Linear(Q, ARD=True) + kern.White(Q,
                                                  np.exp(-2))  # + kern.bias(Q)
        m = BayesianGPLVMMiniBatch(Ymissing,
                                   Q,
                                   init="random",
                                   num_inducing=num_inducing,
                                   kernel=k,
                                   missing_data=True)
        assert (m.checkgrad())

        k = kern.RBF(Q, ARD=True) + kern.White(Q, np.exp(-2))  # + kern.bias(Q)
        m = BayesianGPLVMMiniBatch(Ymissing,
                                   Q,
                                   init="random",
                                   num_inducing=num_inducing,
                                   kernel=k,
                                   missing_data=True)
        assert (m.checkgrad())
예제 #3
0
def mrd_simulation_missing_data(optimize=True,
                                verbose=True,
                                plot=True,
                                plot_sim=True,
                                **kw):
    from GPy import kern
    from GPy.models import MRD

    D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5
    _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim)

    k = kern.Linear(Q, ARD=True) + kern.White(Q, variance=1e-4)
    inanlist = []

    for Y in Ylist:
        inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool)
        inanlist.append(inan)
        Y[inan] = _np.nan

    m = MRD(Ylist,
            input_dim=Q,
            num_inducing=num_inducing,
            kernel=k,
            inference_method=None,
            initx="random",
            initz='permute',
            **kw)

    if optimize:
        print("Optimizing Model:")
        m.optimize('bfgs', messages=verbose, max_iters=8e3, gtol=.1)
    if plot:
        m.X.plot("MRD Latent Space 1D")
        m.plot_scales()
    return m
예제 #4
0
def mrd_simulation(optimize=True,
                   verbose=True,
                   plot=True,
                   plot_sim=True,
                   **kw):
    from GPy import kern
    from GPy.models import MRD

    D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5
    _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, plot_sim)

    k = kern.Linear(Q, ARD=True) + kern.White(Q, variance=1e-4)
    m = MRD(Ylist,
            input_dim=Q,
            num_inducing=num_inducing,
            kernel=k,
            initx="PCA_concat",
            initz='permute',
            **kw)

    m['.*noise'] = [Y.var() / 40. for Y in Ylist]

    if optimize:
        print("Optimizing Model:")
        m.optimize(messages=verbose, max_iters=8e3)
    if plot:
        m.X.plot("MRD Latent Space 1D")
        m.plot_scales()
    return m
예제 #5
0
    def optimize(self,
                 views,
                 latent_dims=7,
                 messages=True,
                 max_iters=8e3,
                 save_model=False):
        if (self.kernel):
            if (self.kernel == 'rbf'):
                print("Chosen kernel: RBF")
                print("Chosen lengthscale: " + self.lengthscale)
                k = kern.RBF(latent_dims,
                             ARD=True,
                             lengthscale=self.lengthscale) + kern.White(
                                 latent_dims,
                                 variance=1e-4) + GPy.kern.Bias(latent_dims)
            elif (self.kernel == 'linear'):
                print("Chosen kernel: Linear")
                k = kern.Linear(latent_dims, ARD=True) + kern.White(
                    latent_dims, variance=1e-4) + GPy.kern.Bias(latent_dims)
        else:
            print("No kernel or chosen - using RBF with lengthscale 10...")
            k = kern.RBF(latent_dims, ARD=True, lengthscale=10) + kern.White(
                latent_dims, variance=1e-4) + GPy.kern.Bias(latent_dims)

        print("Number of inducing inputs: " + str(self.num_inducing))
        m = MRD(views,
                input_dim=latent_dims,
                num_inducing=self.num_inducing,
                kernel=k,
                normalizer=False)
        print("Optimizing Model...")
        m.optimize(messages=True, max_iters=8e3)

        if (save_model):
            pickle.dump(m, open(save_model, "wb"), protocol=2)

        self.model = m
예제 #6
0
파일: util.py 프로젝트: juliangilg/CCGPMA
def latent_functions_prior(Q, lenghtscale=None, variance=None, input_dim=None):
    if lenghtscale is None:
        lenghtscale = np.random.rand(Q)
    else:
        lenghtscale = lenghtscale

    if variance is None:
        variance = np.random.rand(Q)
    else:
        variance = variance
    kern_list = []
    for q in range(Q):
        kern_q = kern.RBF(input_dim=input_dim,
                          lengthscale=lenghtscale[q],
                          variance=variance[q],
                          name='rbf') + kern.White(input_dim,
                                                   variance=1e-8)  # \
        kern_q.name = 'kern_q' + str(q)
        kern_list.append(kern_q)
    return kern_list
예제 #7
0
def main():
    sample_info = pd.read_csv('MOB_sample_info.csv', index_col=0)

    df = pd.read_csv('data/Rep11_MOB_0.csv', index_col=0)
    df = df.loc[sample_info.index]
    df = df.T[df.sum(0) >= 3].T  # Filter practically unobserved genes

    dfm = NaiveDE.stabilize(df.T).T
    res = NaiveDE.regress_out(sample_info, dfm.T, 'np.log(total_counts)').T

    X = sample_info[['x', 'y']].values

    times = pd.DataFrame(columns=['N', 'time'])
    Ns = [50, 100, 200, 300, 500, 750, 1000, 2000]

    j = 0
    for N in Ns:
        for i in range(5):

            Y = res.sample(N, axis=1).values.T

            t0 = time()

            m = GPclust.MOHGP(X=X,
                              Y=Y,
                              kernF=kern.RBF(2) + kern.Bias(2),
                              kernY=kern.RBF(1) + kern.White(1),
                              K=5,
                              prior_Z='DP')

            m.hyperparam_opt_args['messages'] = False
            m.optimize(step_length=0.1, verbose=False, maxiter=2000)

            times.loc[j] = [N, time() - t0]
            print(times.loc[j])
            j += 1

    times.to_csv('AEH_times.csv')
예제 #8
0
def WN(): return _Gk.White(1)

def C(): return _Gk.Bias(1)