Exemple #1
0
def latent_functions_prior(Q,
                           lenghtscale=None,
                           variance=None,
                           input_dim=None,
                           ARD=False,
                           inv_l=False):
    if lenghtscale is None:
        lenghtscale = np.random.rand(Q)
    else:
        lenghtscale = lenghtscale

    if variance is None:
        variance = np.random.rand(Q)
    else:
        variance = variance
    kern_list = []
    for q in range(Q):
        kern_q = kern.RBF(input_dim=input_dim,
                          lengthscale=lenghtscale[q],
                          variance=variance[q],
                          name='rbf',
                          ARD=ARD,
                          inv_l=inv_l)  #+ kern.White(input_dim)# \
        kern_q.name = 'kern_q' + str(q)
        kern_list.append(kern_q)
    return kern_list
Exemple #2
0
 def __init__(self, X, Y, Z):
     BlackBox.__init__(self, X, Y)
     #Z = 2.0 * np.random.rand(int(np.sqrt(X.shape[0])), X.shape[1])
     K = kern.RBF(X.shape[1], 1.0, 1.0 * np.ones(X.shape[1]), ARD=True)
     self.m = models.SparseGPRegression(X, Y, Z=Z,
                                        kernel=K)  # Z is "inducing inputs"
     self.m.optimize('bfgs', max_iters=200)
Exemple #3
0
    def __init__(self,
                 X,
                 Y,
                 kernel=None,
                 Y_metadata=None,
                 changepoint=0,
                 changepointDim=0):

        Ny = Y.shape[0]

        if Y_metadata is None:
            Y_metadata = {  #'output_index':np.arange(Ny)[:,None],
                'side':
                np.array([
                    0 if x < changepoint else 1 for x in X[:, changepointDim]
                ])[:, None]
            }
        else:
            assert Y_metadata['output_index'].shape[0] == Ny

        if kernel is None:
            kernel = kern.RBF(X.shape[1])

        #Likelihood
        #likelihoods_list = [likelihoods.Gaussian(name="Gaussian_noise_%s" %j) for j in range(Ny)]
        # noise_terms = np.unique(Y_metadata['output_index'].flatten())
        # likelihoods_list = [likelihoods.Gaussian(name="Gaussian_noise_%s" %j) for j in noise_terms]
        side1Noise = likelihoods.Gaussian(name="Gaussian_noise_side1")
        side2Noise = likelihoods.Gaussian(name="Gaussian_noise_side2")
        #likelihoods_list = [side1Noise if x < changepoint else side2Noise for x in X[:,changepointDim]]
        likelihood = MixedNoise_twoSide(side1Noise, side2Noise)

        super(GPHeteroscedasticRegression_twoSided,
              self).__init__(X, Y, kernel, likelihood, Y_metadata=Y_metadata)
    def __init__(self,
                 X,
                 Y,
                 kernel=None,
                 Y_metadata=None,
                 normalizer=None,
                 noise_var=1.,
                 mean_function=None,
                 A=None):

        if kernel is None:
            kernel = kern.RBF(X.shape[1])

        likelihood = likelihoods.Gaussian(variance=noise_var)

        super(GPRegression_Group, self).__init__(X,
                                                 Y,
                                                 kernel,
                                                 likelihood,
                                                 name='GP regression group',
                                                 Y_metadata=Y_metadata,
                                                 normalizer=normalizer,
                                                 mean_function=mean_function)
        self.inference_method = ExactGaussianInferenceGroup()
        self.A = A
Exemple #5
0
    def __init__(self,
                 X,
                 Y,
                 kernel=None,
                 warping_function=None,
                 warping_terms=3):

        if kernel is None:
            kernel = kern.RBF(X.shape[1])

        if warping_function == None:
            self.warping_function = TanhWarpingFunction_d(warping_terms)
            self.warping_params = (
                np.random.randn(self.warping_function.n_terms * 3 + 1, ) * 1)
        else:
            self.warping_function = warping_function

        self.scale_data = False
        if self.scale_data:
            Y = self._scale_data(Y)
        self.has_uncertain_inputs = False
        self.Y_untransformed = Y.copy()
        self.predict_in_warped_space = False
        likelihood = likelihoods.Gaussian()

        GP.__init__(self,
                    X,
                    self.transform_data(),
                    likelihood=likelihood,
                    kernel=kernel)
        self.link_parameter(self.warping_function)
 def __init__(self,
              X,
              Y,
              kernel=None,
              warping_function=None,
              warping_terms=3,
              normalizer=False):
     if kernel is None:
         kernel = kern.RBF(X.shape[1])
     if warping_function == None:
         self.warping_function = TanhFunction(warping_terms)
         self.warping_params = (
             np.random.randn(self.warping_function.n_terms * 3 + 1) * 1)
     else:
         self.warping_function = warping_function
     likelihood = likelihoods.Gaussian()
     super(WarpedGP, self).__init__(X,
                                    Y.copy(),
                                    likelihood=likelihood,
                                    kernel=kernel,
                                    normalizer=normalizer)
     self.Y_normalized = self.Y_normalized.copy()
     self.Y_untransformed = self.Y_normalized.copy()
     self.predict_in_warped_space = True
     self.link_parameter(self.warping_function)
Exemple #7
0
    def test_missing_data(self):
        from GPy import kern
        from GPy.models.bayesian_gplvm_minibatch import BayesianGPLVMMiniBatch
        from GPy.examples.dimensionality_reduction import _simulate_matern

        D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4
        _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, False)
        Y = Ylist[0]

        inan = np.random.binomial(1, .9, size=Y.shape).astype(
            bool)  # 80% missing data
        Ymissing = Y.copy()
        Ymissing[inan] = np.nan

        k = kern.Linear(Q, ARD=True) + kern.White(Q,
                                                  np.exp(-2))  # + kern.bias(Q)
        m = BayesianGPLVMMiniBatch(Ymissing,
                                   Q,
                                   init="random",
                                   num_inducing=num_inducing,
                                   kernel=k,
                                   missing_data=True)
        assert (m.checkgrad())

        k = kern.RBF(Q, ARD=True) + kern.White(Q, np.exp(-2))  # + kern.bias(Q)
        m = BayesianGPLVMMiniBatch(Ymissing,
                                   Q,
                                   init="random",
                                   num_inducing=num_inducing,
                                   kernel=k,
                                   missing_data=True)
        assert (m.checkgrad())
    def __init__(self,
                 X_list,
                 Y_list,
                 kernel=None,
                 likelihoods_list=None,
                 mean_function=None,
                 name='GPCR',
                 W_rank=1,
                 kernel_name='coreg'):
        # Input and Output
        X, Y, self.output_index = util.multioutput.build_XY(X_list, Y_list)
        Ny = len(Y_list)

        # Kernel
        if kernel is None:
            kernel = kern.RBF(X.shape[1] - 1)

            kernel = util.multioutput.ICM(input_dim=X.shape[1] - 1,
                                          num_outputs=Ny,
                                          kernel=kernel,
                                          W_rank=1,
                                          name=kernel_name)

        # Likelihood
        likelihood = util.multioutput.build_likelihood(Y_list,
                                                       self.output_index,
                                                       likelihoods_list)

        super(GPCoregionalizedWithMeanRegression,
              self).__init__(X,
                             Y,
                             kernel,
                             likelihood,
                             mean_function,
                             Y_metadata={'output_index': self.output_index})
Exemple #9
0
    def test_missing_data(self):
        from GPy import kern
        from GPy.models.bayesian_gplvm_minibatch import BayesianGPLVMMiniBatch
        from GPy.examples.dimensionality_reduction import _simulate_matern

        D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4
        _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, False)
        Y = Ylist[0]

        inan = np.random.binomial(1, .9, size=Y.shape).astype(bool) # 80% missing data
        Ymissing = Y.copy()
        Ymissing[inan] = np.nan

        k = kern.Linear(Q, ARD=True) + kern.White(Q, np.exp(-2)) # + kern.bias(Q)
        m = BayesianGPLVMMiniBatch(Ymissing, Q, init="random", num_inducing=num_inducing,
                          kernel=k, missing_data=True)
        assert(m.checkgrad())
        mul, varl = m.predict(m.X)

        k = kern.RBF(Q, ARD=True) + kern.White(Q, np.exp(-2)) # + kern.bias(Q)
        m2 = BayesianGPLVMMiniBatch(Ymissing, Q, init="random", num_inducing=num_inducing,
                          kernel=k, missing_data=True)
        assert(m.checkgrad())
        m2.kern.rbf.lengthscale[:] = 1e6
        m2.X[:] = m.X.param_array
        m2.likelihood[:] = m.likelihood[:]
        m2.kern.white[:] = m.kern.white[:]
        mu, var = m.predict(m.X)
        np.testing.assert_allclose(mul, mu)
        np.testing.assert_allclose(varl, var)

        q50 = m.predict_quantiles(m.X, (50,))
        np.testing.assert_allclose(mul, q50[0])
Exemple #10
0
def standard_models(X):
    """
    Return kernels for model selection
    """
    from GPy import kern
    return [
            ['Mat+Lin', kern.Matern32(X.shape[1]) + kern.Linear(X.shape[1], variances=.01) + kern.Bias(X.shape[1])], 
            ['Exp+Lin', kern.Exponential(X.shape[1]) + kern.Linear(X.shape[1], variances=.01) + kern.Bias(X.shape[1])], 
            ['RBF+Lin', kern.RBF(X.shape[1]) + kern.Linear(X.shape[1], variances=.01) + kern.Bias(X.shape[1])], 
            ['Lin', kern.Linear(X.shape[1], variances=.01) + kern.Bias(X.shape[1])],
            ]
Exemple #11
0
def main():
    sample_info = pd.read_csv('MOB_sample_info.csv', index_col=0)

    df = pd.read_csv('data/Rep11_MOB_0.csv', index_col=0)
    df = df.loc[sample_info.index]
    df = df.T[df.sum(0) >= 3].T  # Filter practically unobserved genes

    dfm = NaiveDE.stabilize(df.T).T
    res = NaiveDE.regress_out(sample_info, dfm.T, 'np.log(total_counts)').T

    X = sample_info[['x', 'y']].values

    times = pd.DataFrame(columns=['N', 'time'])
    Ns = [50, 100, 200, 300, 500, 750, 1000, 2000]

    j = 0
    for N in Ns:
        for i in range(5):

            Y = res.sample(N, axis=1).values.T

            t0 = time()

            m = GPclust.MOHGP(X=X,
                              Y=Y,
                              kernF=kern.RBF(2) + kern.Bias(2),
                              kernY=kern.RBF(1) + kern.White(1),
                              K=5,
                              prior_Z='DP')

            m.hyperparam_opt_args['messages'] = False
            m.optimize(step_length=0.1, verbose=False, maxiter=2000)

            times.loc[j] = [N, time() - t0]
            print(times.loc[j])
            j += 1

    times.to_csv('AEH_times.csv')
Exemple #12
0
    def fit_gaussian_process(self):
        """
        Fits a Gaussian process on the Collection while saving the GP and the kernel
        """

        # RBF = radial basis function / squared exponential by default
        self.gp_kernel = kern.RBF(input_dim=self.GP_INPUT_DIM,
                                  variance=self.GP_VARIANCE,
                                  lengthscale=self.GP_LENGTH_SCALE)

        x, y = self.collection.obs_times, self.collection.obs_seqs_norm

        self.fit_gp, self.gp_kernel = Metrics._fit_gaussian_process(
            x, y, self.gp_kernel, self.NUM_OPTIMIZE_RESTARTS)
Exemple #13
0
    def _fit_gaussian_process(x,
                              y,
                              kernel=kern.RBF(1, 1., 10.),
                              num_restarts: int = 7):
        """
        Fits a Gaussian process
        """

        x, y = Metrics._prepare_inputs_for_fitting_gp(x, y)

        gp = models.GPRegression(x, y, kernel)
        gp.optimize_restarts(num_restarts=num_restarts, messages=False)

        return gp, kernel
Exemple #14
0
    def optimize(self,
                 views,
                 latent_dims=7,
                 messages=True,
                 max_iters=8e3,
                 save_model=False):
        if (self.kernel):
            if (self.kernel == 'rbf'):
                print("Chosen kernel: RBF")
                print("Chosen lengthscale: " + self.lengthscale)
                k = kern.RBF(latent_dims,
                             ARD=True,
                             lengthscale=self.lengthscale) + kern.White(
                                 latent_dims,
                                 variance=1e-4) + GPy.kern.Bias(latent_dims)
            elif (self.kernel == 'linear'):
                print("Chosen kernel: Linear")
                k = kern.Linear(latent_dims, ARD=True) + kern.White(
                    latent_dims, variance=1e-4) + GPy.kern.Bias(latent_dims)
        else:
            print("No kernel or chosen - using RBF with lengthscale 10...")
            k = kern.RBF(latent_dims, ARD=True, lengthscale=10) + kern.White(
                latent_dims, variance=1e-4) + GPy.kern.Bias(latent_dims)

        print("Number of inducing inputs: " + str(self.num_inducing))
        m = MRD(views,
                input_dim=latent_dims,
                num_inducing=self.num_inducing,
                kernel=k,
                normalizer=False)
        print("Optimizing Model...")
        m.optimize(messages=True, max_iters=8e3)

        if (save_model):
            pickle.dump(m, open(save_model, "wb"), protocol=2)

        self.model = m
Exemple #15
0
def gen_s_curve(rng, emissions):
    """Generate synthetic data from datasets generating process.
    """
    N = 500
    J = 100
    D = 2

    # Generate latent manifold.
    # -------------------------
    X, t = make_s_curve(N, random_state=rng)
    X = np.delete(X, obj=1, axis=1)
    X = X / np.std(X, axis=0)
    inds = t.argsort()
    X = X[inds]
    t = t[inds]

    # Generate kernel `K` and latent GP-distributed maps `F`.
    # -------------------------------------------------------
    K = kern.RBF(input_dim=D, lengthscale=1).K(X)
    F = rng.multivariate_normal(np.zeros(N), K, size=J).T

    # Generate emissions using `F` and/or `K`.
    # ----------------------------------------
    if emissions == 'bernoulli':
        P = logistic(F)
        Y = rng.binomial(1, P).astype(np.double)
        return Dataset('s-curve', False, Y, X, F, K, None, t)
    if emissions == 'gaussian':
        Y = F + np.random.normal(0, scale=0.5, size=F.shape)
        return Dataset('s-curve', False, Y, X, F, K, None, t)
    elif emissions == 'multinomial':
        C = 100
        pi = np.exp(F - logsumexp(F, axis=1)[:, None])
        Y = np.zeros(pi.shape)
        for n in range(N):
            Y[n] = rng.multinomial(C, pi[n])
        return Dataset('s-curve', False, Y, X, F, K, None, t)
    elif emissions == 'negbinom':
        P = logistic(F)
        R = np.arange(1, J + 1, dtype=float)
        Y = rng.negative_binomial(R, 1 - P)
        return Dataset('s-curve', False, Y, X, F, K, R, t)
    else:
        assert (emissions == 'poisson')
        theta = np.exp(F)
        Y = rng.poisson(theta)
        return Dataset('s-curve', False, Y, X, F, K, None, t)
Exemple #16
0
def latent_functions_prior(Q, lenghtscale=None, variance=None, input_dim=None):
    if lenghtscale is None:
        lenghtscale = np.random.rand(Q)
    else:
        lenghtscale = lenghtscale

    if variance is None:
        variance = np.random.rand(Q)
    else:
        variance = variance
    kern_list = []
    for q in range(Q):
        #######   RBF GP prior  #######
        kern_q = kern.RBF(input_dim=input_dim, lengthscale=lenghtscale[q], variance=variance[q], name='rbf')# \
        kern_q.name = 'kern_q'+str(q)
        kern_list.append(kern_q)
    return kern_list
Exemple #17
0
    def __init__(  # pylint:disable=too-many-arguments
        self,
        X_list,
        Y_list,
        kernel=None,
        normalizer=None,
        likelihoods_list=None,
        name="GPCR",
        W_rank=1,
        kernel_name="coreg",
    ):

        # Input and Output
        (
            X,  # pylint:disable=invalid-name
            Y,  # pylint:disable=invalid-name
            self.output_index,
        ) = util.multioutput.build_XY(X_list, Y_list)
        Ny = len(Y_list)  # pylint:disable=invalid-name

        # Kernel
        if kernel is None:
            kernel = kern.RBF(X.shape[1] - 1)

            kernel = util.multioutput.ICM(
                input_dim=X.shape[1] - 1,
                num_outputs=Ny,
                kernel=kernel,
                W_rank=W_rank,
                name=kernel_name,
            )

        # Likelihood
        likelihood = util.multioutput.build_likelihood(Y_list,
                                                       self.output_index,
                                                       likelihoods_list)

        super(GPCoregionalizedRegression, self).__init__(  # pylint:disable=super-with-arguments
            X,
            Y,
            kernel,
            likelihood,
            Y_metadata={"output_index": self.output_index},
            normalizer=normalizer,
        )
Exemple #18
0
    def __init__(self,
                 X_list,
                 Y_list,
                 W,
                 kernel=None,
                 likelihoods_list=None,
                 name='GPCR',
                 W_rank=1,
                 kernel_name='coreg'):

        #Input and Output
        X, Y, self.output_index = util.multioutput.build_XY(X_list, Y_list)
        Ny = len(Y_list)
        self.opt_trajectory = []
        self.PEHE_trajectory = []
        self.MSE_trajectory = []
        self.treatment_assign = W

        self.logdetK = 0

        #Kernel
        if kernel is None:
            kernel = kern.RBF(X.shape[1] - 1)

            kernel = util.multioutput.ICM(input_dim=X.shape[1] - 1,
                                          num_outputs=Ny,
                                          kernel=kernel,
                                          W_rank=1,
                                          name=kernel_name)

        #Likelihood
        likelihood = util.multioutput.build_likelihood(Y_list,
                                                       self.output_index,
                                                       likelihoods_list)

        super(CMGP,
              self).__init__(X,
                             Y,
                             kernel,
                             likelihood,
                             inference_method=RiskEmpiricalBayes(),
                             Y_metadata={'output_index': self.output_index})

        self.X = Param("input", X)
Exemple #19
0
    def __init__(self,
                 X,
                 Y,
                 kernel=None,
                 Y_metadata=None,
                 noise_mult=1.,
                 known_variances=1.0):

        if kernel is None:
            kernel = kern.RBF(X.shape[1])

        assert known_variances.shape == Y.shape
        #Likelihood
        likelihood = ScaledHeteroscedasticGaussian(
            Y_metadata=Y_metadata,
            noise_mult=noise_mult,
            known_variances=known_variances)

        super(ScaledHeteroscedasticRegression,
              self).__init__(X, Y, kernel, likelihood, Y_metadata=Y_metadata)
Exemple #20
0
    def __init__(self, Y, dim_down, dim_up, likelihood, MLP_dims=None, X=None, X_variance=None, init='rand',  Z=None, num_inducing=10,  kernel=None, inference_method=None, uncertain_inputs=True,mpi_comm=None, mpi_root=0, back_constraint=True, name='mrd-view'):

        self.uncertain_inputs = uncertain_inputs
        self.layer_lower = None
        self.scale = 1.

        if back_constraint:
            from .mlp import MLP
            from copy import deepcopy
            self.encoder = MLP([dim_down, int((dim_down+dim_up)*2./3.), int((dim_down+dim_up)/3.), dim_up] if MLP_dims is None else [dim_down]+deepcopy(MLP_dims)+[dim_up])
            X = self.encoder.predict(Y.mean.values if isinstance(Y, VariationalPosterior) else Y)
            X_variance = 0.0001*np.ones(X.shape)
            self.back_constraint = True
        else:
            self.back_constraint = False

        if Z is None:
            Z = np.random.rand(num_inducing, dim_up)*2-1. #np.random.permutation(X.copy())[:num_inducing]
        assert Z.shape[1] == X.shape[1]
        
        if likelihood is None: likelihood = likelihoods.Gaussian(variance=Y.var()*0.01)
        
        if uncertain_inputs: X = NormalPosterior(X, X_variance)
        if kernel is None: kernel = kern.RBF(dim_up, ARD = True)
        
        # The command below will also give the field self.X to the view.
        super(MRDView, self).__init__(X, Y, Z, kernel, likelihood, inference_method=inference_method, mpi_comm=mpi_comm, mpi_root=mpi_root, name=name)
        if back_constraint: self.link_parameter(self.encoder)

        if self.uncertain_inputs and self.back_constraint:
            from GPy import Param
            from GPy.core.parameterization.transformations import Logexp
            self.X_var_common = Param('X_var',X_variance[0].copy(),Logexp())
            self.link_parameters(self.X_var_common)
        # There's some redundancy in the self.Xv and self.X. Currently we use self.X for the likelihood part and all calculations part,
        # self.Xv is only used for the self.Xv.gradient part. 
        # This is redundant but it's there in case we want to do the product of experts MRD model.
        self.Xv = self.X
Exemple #21
0
    def __init__(self, layer_lower, dim_down, dim_up, likelihood, X=None, X_variance=None, init='PCA',  Z=None, num_inducing=10,  kernel=None, inference_method=None, uncertain_inputs=True,mpi_comm=None, mpi_root=0, back_constraint=True, encoder=None, auto_update=True, name='layer'):

        self.uncertain_inputs = uncertain_inputs
        self.layer_lower = layer_lower
        Y = self.Y if self.layer_lower is None else self.layer_lower.X
        self.back_constraint = back_constraint

        from deepgp.util.util import initialize_latent
        if X is None: X, _ = initialize_latent(init, Y.shape[0], dim_up, Y.mean.values if isinstance(Y, VariationalPosterior) else Y)
        if X_variance is None: X_variance = 0.01*np.ones(X.shape) + 0.01*np.random.rand(*X.shape)
            
        if Z is None:
            if self.back_constraint: Z = np.random.rand(num_inducing, dim_up)*2-1.
            else:
                if num_inducing<=X.shape[0]:
                    Z = X[np.random.permutation(X.shape[0])[:num_inducing]].copy()
                else:
                    Z_more = np.random.rand(num_inducing-X.shape[0],X.shape[1])*(X.max(0)-X.min(0))+X.min(0)
                    Z = np.vstack([X.copy(),Z_more])
        assert Z.shape[1] == X.shape[1]
        
        if mpi_comm is not None:
            from ..util.parallel import broadcastArrays
            broadcastArrays([Z], mpi_comm, mpi_root)
        
        if uncertain_inputs: X = NormalPosterior(X, X_variance)
        if kernel is None: kernel = kern.RBF(dim_up, ARD = True)
        assert kernel.input_dim==X.shape[1], "The dimensionality of input has to be equal to the input dimensionality of kernel!"
        self.Kuu_sigma = Param('Kuu_var', np.zeros(num_inducing)+1e-3, Logexp())
        
        super(Layer, self).__init__(X, Y, Z, kernel, likelihood, inference_method=inference_method, mpi_comm=mpi_comm, mpi_root=mpi_root, auto_update=auto_update, name=name)
        self.link_parameter(self.Kuu_sigma)
        if back_constraint: self.encoder = encoder

        if self.uncertain_inputs and not self.back_constraint:
            self.link_parameter(self.X)
Exemple #22
0
def main(args):
    (training_file, label_file, test_file, test_label, unlabel, n_dim,
     output) = args
    X = load_feat(training_file)
    y = load_label(label_file)

    X = np.asarray(X)
    y = np.asarray(y)

    U = load_feat(unlabel)
    U = np.asarray(U[:10000])

    new_dim = int(n_dim)
    k = kern.RBF(new_dim, ARD=True)
    #m = GPy.models.GPRegression(X, y)
    print 'reduction model'
    #m_red = BayesianGPLVMMiniBatch(X, new_dim, init="random", num_inducing=50,
    #                               kernel=k, missing_data=True)
    m_U = GPy.models.SparseGPLVM(U, new_dim, kernel=k, num_inducing=50)
    #m_red.Ytrue = U
    print 'reduction optimize'
    m_U.optimize(optimizer='bfgs', max_iters=50)

    u_params = m_U.parameters

    m_O = GPy.models.SparseGPLVM(X, new_dim, kernel=k, num_inducing=50)
    m_O.parameters = u_params
    m_O.update_model()
    X = m_O.X.values

    #print dir(m_O.X)
    #print m_O.X.values
    print 'gp model'
    m = GPy.models.SparseGPRegression(X, y, num_inducing=50)
    print 'gp optimize'
    m.optimize(optimizer='bfgs', max_iters=50)

    test_X = load_feat(test_file)
    test_X = np.asarray(test_X)
    test_y = load_label(test_label)
    test_y = np.asarray(test_y)

    #test_latent = m_red.predict(test_X)[0]
    #print test_latent.shape
    #sys.exit(1)

    #m_redTest = BayesianGPLVMMiniBatch(test_X, new_dim, init="random", num_inducing=50,
    #                                    kernel=k, missing_data=True)

    m_test = GPy.models.SparseGPLVM(test_X, new_dim, num_inducing=50)
    m_test.parameters = u_params
    m_test.update_model()
    test_latent = m_test.X.values
    #print test_latent.shape
    #sys.exit(1)
    pred = m.predict(test_latent)[0]

    #TODO test_X to latent space!!!

    mae = mean_absolute_error(test_y, pred)
    mse = mean_squared_error(test_y, pred)
    print 'MAE: ', mae
    print 'RMSE: ', sqrt(mse)
    print 'pearson:', sp.stats.pearsonr(test_y, pred)[0]
    print 'true: ', mquantiles(test_y, prob=[0.1, 0.9])
    print 'pred: ', mquantiles(pred, prob=[0.1, 0.9])
    print 'resid: ', np.mean(test_y - pred)
    print 'r-sqr: ', sp.stats.linregress(test_y[:, 0], pred[:, 0])[2]**2

    #with open(output, 'w') as output:
    #    for p in pred:
    #        print >>output, p[0]

    return
Exemple #23
0
    def __init__(self,
                 layer_upper,
                 Xs,
                 X_win=0,
                 Us=None,
                 U_win=1,
                 Z=None,
                 num_inducing=10,
                 kernel=None,
                 inference_method=None,
                 likelihood=None,
                 noise_var=1.,
                 inducing_init='kmeans',
                 back_cstr=False,
                 MLP_dims=None,
                 name='layer'):

        self.layer_upper = layer_upper
        self.nSeq = len(Xs)

        self.X_win = X_win  # if X_win==0, it is not autoregressive.
        self.X_dim = Xs[0].shape[1]
        self.Xs_flat = Xs
        self.X_observed = False if isinstance(Xs[0],
                                              VariationalPosterior) else True

        self.withControl = Us is not None
        self.U_win = U_win
        self.U_dim = Us[0].shape[1] if self.withControl else None
        self.Us_flat = Us
        if self.withControl:
            assert len(Xs) == len(
                Us
            ), "The number of signals should be equal to the number controls!"

        if not self.X_observed and back_cstr:
            self._init_encoder(MLP_dims)
            self.back_cstr = True
        else:
            self.back_cstr = False
        self._init_XY()

        if Z is None:
            if not back_cstr and inducing_init == 'kmeans':
                from sklearn.cluster import KMeans
                m = KMeans(n_clusters=num_inducing, n_init=1000, max_iter=100)
                m.fit(self.X.mean.values.copy())
                Z = m.cluster_centers_.copy()
            else:
                Z = np.random.randn(num_inducing, self.X.shape[1])
        assert Z.shape[1] == self.X.shape[1]

        if kernel is None: kernel = kern.RBF(self.X.shape[1], ARD=True)

        if inference_method is None: inference_method = VarDTC()
        if likelihood is None:
            likelihood = likelihoods.Gaussian(variance=noise_var)
        self.normalPrior, self.normalEntropy = NormalPrior(), NormalEntropy()
        super(Layer, self).__init__(self.X,
                                    self.Y,
                                    Z,
                                    kernel,
                                    likelihood,
                                    inference_method=inference_method,
                                    name=name)
        if not self.X_observed:
            if back_cstr:
                assert self.X_win > 0
                self.link_parameters(*(self.init_Xs + self.Xs_var +
                                       [self.encoder]))
            else:
                self.link_parameters(*self.Xs_flat)
Exemple #24
0
               ("GP anisotropic RBF", ["All"], gp.GaussianProcessRegressor(kernel=gp.kernels.RBF(length_scale=np.array([1]*n_feats)))),
               ("GP ARD", ["All"], gp.GaussianProcessRegressor(kernel=ard_kernel(sigma=1.2, length_scale=np.array([1]*n_feats)))),
               ("GP isotropic matern nu=0.5", None, gp.GaussianProcessRegressor(kernel=gp.kernels.Matern(nu=0.5))),
               ("GP isotropic matern nu=1.5", None, gp.GaussianProcessRegressor(kernel=gp.kernels.Matern(nu=1.5))),
               ("GP isotropic matern nu=2.5", None, gp.GaussianProcessRegressor(kernel=gp.kernels.Matern(nu=2.5))),
# bad performance
               ("GP dot product", ["CFS", "CIFE", "All"], gp.GaussianProcessRegressor(kernel=gp.kernels.DotProduct())),
#  3-th leading minor not positive definite
#    ("GP exp sine squared", gp.GaussianProcessRegressor(kernel=gp.kernels.ExpSineSquared())),
               ("GP rational quadratic", None, gp.GaussianProcessRegressor(kernel=gp.kernels.RationalQuadratic())),
               ("GP white kernel", None, gp.GaussianProcessRegressor(kernel=gp.kernels.WhiteKernel())),
               ("GP abs_exp", None, gp.GaussianProcess(corr='absolute_exponential')),
               ("GP squared_exp", ["All"], gp.GaussianProcess(corr='squared_exponential')),
               ("GP cubic", None, gp.GaussianProcess(corr='cubic')),
               ("GP linear", None, gp.GaussianProcess(corr='linear')),
               ("GP RBF ARD", ["All"], RBF_ARD_WRAPPER(kern.RBF(input_dim=n_feats, variance=1., lengthscale=np.array([1]*n_feats), ARD=True)))]


models_rmse = []
for name, featSelectionMode, model in classifiers:
    modes = featSelectionMode
    if featSelectionMode==None:
        modes = featSelectionFns.keys()
    rmses = []
    for eaMode in modes:
        bitVec = bitVecs[eaMode]
        model.fit(X_train[:,bitVec], y_train[:])
        rmse_train = sqrt(mean_squared_error(y_train, model.predict(X_train[:,bitVec])))
        rmse_predict = sqrt(mean_squared_error(y_dev, model.predict(X_dev[:,bitVec])))
        rmses.append([name + '('+eaMode+')', rmse_train, rmse_predict])
        print(name + '('+eaMode+')')
Exemple #25
0
    1.0, (1e-3, 1e3)) + sk_kern.WhiteKernel()

clf = GPR(kernel=kernel,
          alpha=1e-10,
          optimizer="fmin_l_bfgs_b",
          n_restarts_optimizer=20,
          normalize_y=True)

clf.fit(x_train.reshape(-1, 1), y_train)
pred_mean, pred_var = clf.predict(x_test, return_std=True)
plot_result(x_test=x_test, mean=pred_mean[:, 0], std=pred_var)
plt.title("Scikit-learn")
plt.legend()
plt.savefig("sklern_predict.png", dpi=150)
plt.close("all")

#Gpy
kern = gp_kern.RBF(input_dim=1) + gp_kern.Bias(
    input_dim=1) + gp_kern.PeriodicExponential(input_dim=1)
gpy_model = GPy.models.GPRegression(X=x_train.reshape(-1, 1),
                                    Y=y_train,
                                    kernel=kern,
                                    normalizer=None)
gpy_model.optimize()
pred_mean, pred_var = gpy_model.predict(x_test.reshape(-1, 1), )
pred_std = pred_var**0.5
plot_result(x_test, mean=pred_mean[:, 0], std=pred_std[:, 0])
plt.legend()
plt.title("GPy")
plt.savefig("GPy_predict.png", dpi=150)
plt.close("all")
Exemple #26
0
def SE(): return _Gk.RBF(1)

def PER(): return _Pk.PureStdPeriodicKernel(1)