Esempio n. 1
0
 def _init_X(self, Ylist, input_dim, init='PCA'):
     if Ylist is None:
         Ylist = self.Ylist
     if init in "PCA_concat":
         print('# Initializing latent space with: PCA_concat')
         X, fracs = initialize_latent('PCA', input_dim, np.hstack(Ylist))
         fracs = [fracs]*len(Ylist)
     elif init in "PCA_single":
         print('# Initializing latent space with: PCA_single')
         X = np.zeros((Ylist[0].shape[0], input_dim))
         fracs = []
         for qs, Y in zip(np.array_split(np.arange(input_dim), len(Ylist)), Ylist):
             x,frcs = initialize_latent('PCA', len(qs), Y)
             X[:, qs] = x
             fracs.append(frcs)
     else: # init == 'random':
         print('# Initializing latent space with: random')
         X = np.random.randn(Ylist[0].shape[0], input_dim)
         fracs = X.var(0)
         fracs = [fracs]*len(Ylist)
     X -= X.mean()
     X /= X.std()
     return X, fracs        
Esempio n. 2
0
    def store(self, observed, inputs=None, Q=None, kernel=None, num_inducing=None, init_X='PCA'):
        """
        Store events.
        ARG: observed: A N x D matrix, where N is the number of points and D the number
        of features needed to describe each point.
        ARG: inputs: A N x Q matrix, where Q is the number of features per input. 
        Leave None for unsupervised learning.
        ARG: Q: Leave None for supervised learning (Q will then be the dimensionality of
        inputs). Otherwise, specify with Q the dimensionality (number of features) for the
        compressed space that acts as "latent" inputs.
        ARG: kernel: for the GP. can be left as None for default.
        ARG: num_inducing: says how many inducing points to use. Inducing points are
        a fixed number of variables through which all memory is filtered, to achieve
        full compression. E.g. it can correspond to the number of neurons.
        Of course, this is not absolutely fixed, but it also doesn't grow necessarily
        proportionally to the data, since synapses can make more complicated combinations
        of the existing neurons. The GP is here playing the role of "synapses", by learning
        non-linear and rich combinations of the inducing points.
        """
        assert(isinstance(observed, dict))
        self.observed = observed
        self.__num_views = len(self.observed.keys())
        self.Q = Q
        # self.D = observed.shape[1]
        self.N = observed[observed.keys()[0]].shape[0]
        self.num_inducing = num_inducing
        if num_inducing is None:
            self.num_inducing = self.N
        if inputs is None:
            if self.Q is None:
                self.Q = 2  # self.D
            if self.__num_views == 1:
                assert(self.type == [] or self.type == 'bgplvm')
                self.type = 'bgplvm'
            else:
                assert(self.type == [] or self.type == 'mrd')
                self.type = 'mrd'
        else:
            assert(self.type == [] or self.type == 'gp')
            assert(self.__num_views == 1)
            self.Q = inputs.shape[1]
            self.type = 'gp'
            self.inputs = inputs

        if kernel is None:
            kernel = GPy.kern.RBF(self.Q, ARD=True) + GPy.kern.Bias(self.Q) + GPy.kern.White(self.Q)
            
        if self.type == 'bgplvm':
            Ytmp = self.observed[self.observed.keys()[0]]
            pcaFailed = False
            if init_X == 'PCA':
                try:
                    self.model = GPy.models.BayesianGPLVM(Ytmp, self.Q, kernel=kernel, num_inducing=self.num_inducing)
                except ValueError:
                    pcaFailed = True
                    print "Initialisation with PCA failed. Initialising with PPCA..."
            elif init_X == 'PPCA' or pcaFailed:
                print "Initialising with PPCA..."
                Xr = GPy.util.linalg.ppca(Ytmp, self.Q, 2000)[0]
                Xr -= Xr.mean(0)
                Xr /= Xr.std(0)
                self.model = GPy.models.BayesianGPLVM(Ytmp, self.Q, kernel=kernel, num_inducing=self.num_inducing, X=Xr)
            self.model['.*noise'] = Ytmp.var() / 100.
        elif self.type == 'mrd':
            # Create a list of observation spaces (aka views)
            self.Ylist = []
            self.namesList = []
            for k in self.observed.keys():
                self.Ylist = [self.Ylist, self.observed[k]]
                self.namesList = [self.namesList, k]
            self.Ylist[0] = self.Ylist[0][1]
            self.namesList[0] = self.namesList[0][1]
            pcaFailed = False
            if init_X == 'PCA':
                try:
                    self.model = GPy.models.MRD(self.Ylist, input_dim=self.Q, num_inducing=self.num_inducing,
                                                kernel=kernel, initx="PCA_single", initz='permute')
                except ValueError:
                    pcaFailed = True
                    print "Initialisation with PCA failed. Initialising with PPCA..."
            elif init_X == 'PPCA' or pcaFailed:
                print "Initialising with PPCA..."
                from GPy.util.initialization import initialize_latent
                Xr = np.zeros((self.Ylist[0].shape[0], self.Q))
                for qs, Y in zip(np.array_split(np.arange(self.Q), len(self.Ylist)), self.Ylist):
                    try:
                        x, frcs = initialize_latent('PCA', len(qs), Y)
                    except ValueError:
                        x = GPy.util.linalg.ppca(Y, len(qs), 2000)[0]
                    Xr[:, qs] = x
                Xr -= Xr.mean()
                Xr /= Xr.std()
                self.model = GPy.models.MRD(self.Ylist, input_dim=self.Q, num_inducing=self.num_inducing, kernel=kernel, initx="PCA_single", initz='permute', X=Xr)
            self.model['.*noise'] = [yy.var() / 100. for yy in self.model.Ylist]
        elif self.type == 'gp':
            self.model = GPy.models.SparseGPRegression(self.inputs, self.observed[self.observed.keys()[0]], kernel=kernel, num_inducing=self.num_inducing)
        
        self.model.data_labels = None
        self.model.textLabelPts = dict()
Esempio n. 3
0
    def store(self,
              observed,
              inputs=None,
              Q=None,
              kernel=None,
              num_inducing=None,
              init_X='PCA'):
        """
        Store events.
        ARG: obserbved: A N x D matrix, where N is the number of points and D the number
        of features needed to describe each point.
        ARG: inputs: A N x Q matrix, where Q is the number of features per input. 
        Leave None for unsupervised learning.
        ARG: Q: Leave None for supervised learning (Q will then be the dimensionality of
        inputs). Otherwise, specify with Q the dimensionality (number of features) for the
        compressed space that acts as "latent" inputs.
        ARG: kernel: for the GP. can be left as None for default.
        ARG: num_inducing: says how many inducing points to use. Inducing points are
        a fixed number of variables through which all memory is filtered, to achieve
        full compression. E.g. it can correspond to the number of neurons.
        Of course, this is not absolutely fixed, but it also doesn't grow necessarily
        proportionally to the data, since synapses can make more complicated combinations
        of the existing neurons. The GP is here playing the role of "synapses", by learning
        non-linear and rich combinations of the inducing points.
        """
        assert (isinstance(observed, dict))
        self.observed = observed
        self.__num_views = len(self.observed.keys())
        self.Q = Q
        #self.D = observed.shape[1]
        self.N = observed[observed.keys()[0]].shape[0]
        self.num_inducing = num_inducing
        if num_inducing is None:
            self.num_inducing = self.N
        if inputs is None:
            if self.Q is None:
                self.Q = 2  #self.D
            if self.__num_views == 1:
                assert (self.type == [] or self.type == 'bgplvm')
                self.type = 'bgplvm'
            else:
                assert (self.type == [] or self.type == 'mrd')
                self.type = 'mrd'
        else:
            assert (self.type == [] or self.type == 'gp')
            assert (self.__num_views == 1)
            self.Q = inputs.shape[1]
            self.type = 'gp'
            self.inputs = inputs

        if kernel is None:
            kernel = GPy.kern.RBF(self.Q, ARD=True) + GPy.kern.Bias(
                self.Q) + GPy.kern.White(self.Q)

        if self.type == 'bgplvm':
            Ytmp = self.observed[self.observed.keys()[0]]
            pcaFailed = False
            if init_X == 'PCA':
                try:
                    self.model = GPy.models.BayesianGPLVM(
                        Ytmp,
                        self.Q,
                        kernel=kernel,
                        num_inducing=self.num_inducing)
                except ValueError:
                    pcaFailed = True
                    print "Initialisation with PCA failed. Initialising with PPCA..."
            elif init_X == 'PPCA' or pcaFailed:
                print "Initialising with PPCA..."
                Xr = GPy.util.linalg.ppca(Ytmp, self.Q, 2000)[0]
                Xr -= Xr.mean(0)
                Xr /= Xr.std(0)
                self.model = GPy.models.BayesianGPLVM(
                    Ytmp,
                    self.Q,
                    kernel=kernel,
                    num_inducing=self.num_inducing,
                    X=Xr)
            self.model['.*noise'] = Ytmp.var() / 100.
        elif self.type == 'mrd':
            # Create a list of observation spaces (aka views)
            self.Ylist = []
            self.namesList = []
            for k in self.observed.keys():
                self.Ylist = [self.Ylist, self.observed[k]]
                self.namesList = [self.namesList, k]
            self.Ylist[0] = self.Ylist[0][1]
            self.namesList[0] = self.namesList[0][1]
            pcaFailed = False
            if init_X == 'PCA':
                try:
                    self.model = GPy.models.MRD(self.Ylist,
                                                input_dim=self.Q,
                                                num_inducing=self.num_inducing,
                                                kernel=kernel,
                                                initx="PCA_single",
                                                initz='permute')
                except ValueError:
                    pcaFailed = True
                    print "Initialisation with PCA failed. Initialising with PPCA..."
            elif init_X == 'PPCA' or pcaFailed:
                print "Initialising with PPCA..."
                from GPy.util.initialization import initialize_latent
                Xr = np.zeros((self.Ylist[0].shape[0], self.Q))
                for qs, Y in zip(
                        np.array_split(np.arange(self.Q), len(self.Ylist)),
                        self.Ylist):
                    try:
                        x, frcs = initialize_latent('PCA', len(qs), Y)
                    except ValueError:
                        x = GPy.util.linalg.ppca(Y, len(qs), 2000)[0]
                    Xr[:, qs] = x
                Xr -= Xr.mean()
                Xr /= Xr.std()
                self.model = GPy.models.MRD(self.Ylist,
                                            input_dim=self.Q,
                                            num_inducing=self.num_inducing,
                                            kernel=kernel,
                                            initx="PCA_single",
                                            initz='permute',
                                            X=Xr)
            self.model['.*noise'] = [
                yy.var() / 100. for yy in self.model.Ylist
            ]
        elif self.type == 'gp':
            self.model = GPy.models.SparseGPRegression(
                self.inputs,
                self.observed[self.observed.keys()[0]],
                kernel=kernel,
                num_inducing=self.num_inducing)

        self.model.data_labels = None