Example #1
0
    def set_dataset(self, X_dataset, Y_dataset, X_cov=None, Y_var=None):
        # set dataset
        super(GP, self).set_dataset(X_dataset, Y_dataset)

        # extra operations when setting the dataset (specific to this class)
        if X_cov is not None:
            self.X_cov = X_cov
            self.nigp = S(np.zeros((self.E, self.N)),
                          name="%s>nigp" % (self.name))
        if Y_var is not None:
            if self.Y_var is None:
                self.Y_var = S(Y_var,
                               name='%s>Y_var' % (self.name),
                               borrow=True)
            else:
                self.Y_var.set_value(Y_var, borrow=True)

        if not self.trained:
            # init log hyperparameters and intermediate variables
            self.init_params()

        # we should be saving, since we updated the trianing dataset
        self.state_changed = True
        if self.N > 0:
            self.ready = True
Example #2
0
    def __pcpy__(self, nnt, **kwd):
        """
        shallowly paste parameter values onto another network of exact
        topology.

        nnt: the target network to print parameter values. if None, a new
        network is created.

        kwd: dictionary of additional keywords.

        return:
        the target neural network with parameter pasted.

        for a recursive deep copy, use hlp.cp instead.
        """
        if not self.__homo__(nnt):
            raise ValueError('cannot cp parameters to different shapes.')

        # parameters of target, they are shared tensors
        par = nnt.__parm__()
        dct = nnt.__dict__
        for k, v in self.__parm__().items():
            # get source parameter values:
            v = v.get_value()

            # update values in the target
            if k in par:
                par[k].set_value(v)
            elif k in dct:  # k is a member but not a shared tensor
                raise ValueError('cannot cp to non-shared-tensor.')
            else:
                dct[k] = S(v)  # creat new member if possible
        # done
        return nnt
Example #3
0
    def set_params(self, params, trainable=False):
        ''' Adds a a new parameter to the class instance. Every parameter will
        be stored as a Theano shared variable. This function exists so that we
        do not end up with different compiled functions referencing different
        shared variables in memory; which can be a problem when loading pickled
        compiled theano functions
        '''
        if isinstance(params, list):
            params = dict(list(zip(self.param_names, params)))
        for pname in list(params.keys()):
            # if the parameter that was passed here is a shared variable
            if isinstance(params[pname], tt.sharedvar.SharedVariable):
                p = params[pname]
                self.__dict__[pname] = p
                if pname not in self.param_names:
                    self.param_names.append(pname)
            # if the parameter that was passed here is NOT a shared variable
            else:
                # create shared variable if it doesn't exist
                if pname not in self.__dict__ or self.__dict__[pname] is None:
                    p = S(params[pname], name='%s>%s' % (self.name, pname))

                    self.__dict__[pname] = p
                    if pname not in self.param_names:
                        self.param_names.append(pname)
                # otherwise, update the value of the shared variable
                else:
                    p = self.__dict__[pname]
                    pv = params[pname].reshape(p.get_value().shape)
                    p.set_value(pv)
Example #4
0
    def get_loss(self, unroll_scan=False, cache_intermediate=True):
        msg = 'Building full GP loss'
        utils.print_with_stamp(msg, self.name)
        idims = self.D
        N = self.X.shape[0].astype(floatX)

        def nlml(Y, hyp, i, X, EyeN, nigp=None, y_var=None):
            # initialise the (before compilation) kernel function
            hyps = (hyp[:idims + 1], hyp[idims + 1])
            kernel_func = partial(cov.Sum, hyps, self.covs)

            # We initialise the kernel matrices (one for each output dimension)
            K = kernel_func(X)

            # add the contribution from the input noise
            if nigp:
                K += tt.diag(nigp[i])
            # add the contribution from the output uncertainty (acts as weight)
            if y_var:
                K += tt.diag(y_var[i])

            # compute chol(K)
            L = Cholesky()(K)

            # compute K^-1 and (K^-1)dot(y)
            rhs = tt.concatenate([EyeN, Y[:, None]], axis=1)
            sol = solve_upper_triangular(L.T, solve_lower_triangular(L, rhs))
            iK = sol[:, :-1]
            beta = sol[:, -1]

            return iK, L, beta

        nseq = [self.X, tt.eye(self.X.shape[0])]
        if self.nigp:
            nseq.append(self.nigp)
        if self.Y_var:
            nseq.append(self.Y_var.T)

        seq = [self.Y.T, self.hyp, tt.arange(self.X.shape[0])]

        if unroll_scan:
            from lasagne.utils import unroll_scan
            [iK, L, beta] = unroll_scan(nlml, seq, [], nseq, self.E)
            updts = {}
        else:
            (iK, L,
             beta), updts = theano.scan(fn=nlml,
                                        sequences=seq,
                                        non_sequences=nseq,
                                        allow_gc=False,
                                        strict=True,
                                        return_list=True,
                                        name="%s>logL_scan" % (self.name))

        # And finally, the negative log marginal likelihood
        loss = 0.5 * tt.sum(self.Y.T * beta, 1)
        idx = [theano.tensor.arange(L.shape[i]) for i in [1, 2]]
        loss += tt.sum(tt.log(L[:, idx[0], idx[1]]), 1)
        loss += 0.5 * N * tt.log(2 * np.pi)

        if cache_intermediate:
            # we are going to save the intermediate results in the following
            # shared variables, so we can use them during prediction without
            # having to recompute them
            N, E = self.N, self.E
            if type(self.iK) is not tt.sharedvar.SharedVariable:
                self.iK = S(np.tile(np.eye(N, dtype=floatX), (E, 1, 1)),
                            name="%s>iK" % (self.name))
            if type(self.L) is not tt.sharedvar.SharedVariable:
                self.L = S(np.tile(np.eye(N, dtype=floatX), (E, 1, 1)),
                           name="%s>L" % (self.name))
            if type(self.beta) is not tt.sharedvar.SharedVariable:
                self.beta = S(np.ones((E, N), dtype=floatX),
                              name="%s>beta" % (self.name))
            updts = [(self.iK, iK), (self.L, L), (self.beta, beta)]
        else:
            # save intermediate graphs (in case we require grads wrt params)
            self.iK, self.L, self.beta = iK, L, beta
            updts = None

        # we add some penalty to avoid having parameters that are too large
        if self.snr_penalty is not None:
            penalty_params = {
                'log_snr': np.log(1000, dtype=floatX),
                'log_ls': np.log(100, dtype=floatX),
                'log_std': tt.log(self.X.std(0) * (N / (N - 1.0))),
                'p': 30
            }
            loss += self.snr_penalty(tt.log(self.hyp), **penalty_params)
        inps = []
        self.state_changed = True  # for saving
        return loss.sum(), inps, updts
Example #5
0
    def get_loss(self, unroll_scan=False, cache_intermediate=True):
        utils.print_with_stamp('Building Sparse Spectrum loss', self.name)
        idims = self.D

        if self.sr is None:
            self.sr = self.w/(self.hyp[:, :idims])
            self.sr = self.sr.transpose(1, 0, 2)

        # init variables
        N = self.X.shape[0].astype(floatX)
        M = self.sr.shape[1].astype(floatX)
        Mi = 2*self.sr.shape[1]
        EyeM = tt.eye(Mi)
        sf2 = self.hyp[:, idims]**2
        sf2M = (sf2/M).dimshuffle(0, 'x', 'x')
        sn2 = (self.hyp[:, idims+1]**2).dimshuffle(0, 'x', 'x')
        srdotX = self.sr.dot(self.X.T)

        phi_f = tt.concatenate([tt.sin(srdotX), tt.cos(srdotX)], axis=1)
        Phi_f = tt.batched_dot(phi_f, phi_f.transpose(0, 2, 1))
        A = sf2M*Phi_f
        A += (sn2 + 1e-6)*EyeM
        phi_f_dotY = tt.batched_dot(phi_f, self.Y.T)

        def nlml(A, phidotY, EyeM):
            Lmm = Cholesky()(A)
            rhs = tt.concatenate([EyeM, phidotY[:, None]], axis=1)
            sol = solve_upper_triangular(
                Lmm.T, solve_lower_triangular(Lmm, rhs))
            iA = sol[:, :-1]
            beta_ss = sol[:, -1]

            return iA, Lmm, beta_ss

        seq = [A, phi_f_dotY]
        nseq = [EyeM]

        if unroll_scan:
            from lasagne.utils import unroll_scan
            [iA, Lmm, beta_ss] = unroll_scan(nlml, seq, [], nseq, self.E)
            updts = {}
        else:
            (iA, Lmm, beta_ss), updts = theano.scan(
                fn=nlml, sequences=seq, non_sequences=nseq,
                allow_gc=False, return_list=True,
                name='%s>logL_ss' % (self.name))

        # scale beta_ss
        beta_ss *= sf2M[:, :, 0]

        # And finally, the negative log marginal likelihood
        YdotY = tt.sum(self.Y**2, 0)
        Ydotphidotbeta = tt.sum(phi_f_dotY*beta_ss, -1)
        loss_ss = 0.5*(YdotY - Ydotphidotbeta)/sn2
        idx = [theano.tensor.arange(Lmm.shape[i]) for i in [1, 2]]
        loss_ss += tt.sum(tt.log(Lmm[:, idx[0], idx[1]]), 1)
        loss_ss += (0.5*N - M)*tt.log(sn2)
        loss_ss += 0.5*N*np.log(2*np.pi, dtype=floatX)

        if cache_intermediate:
            # we are going to save the intermediate results in the following
            # shared variables, so we can use them during prediction without
            # having to recompute them
            kk = 2*self.n_inducing
            N, E = self.N, self.E
            if type(self.iA) is not tt.sharedvar.SharedVariable:
                self.iA = S(np.tile(np.eye(kk, dtype=floatX), (E, 1, 1)),
                            name="%s>iA" % (self.name))
            if type(self.Lmm) is not tt.sharedvar.SharedVariable:
                self.Lmm = S(np.tile(np.eye(kk, dtype=floatX), (E, 1, 1)),
                             name="%s>Lmm" % (self.name))
            if type(self.beta_ss) is not tt.sharedvar.SharedVariable:
                self.beta_ss = S(np.ones((E, kk), dtype=floatX),
                                 name="%s>beta_ss" % (self.name))
            updts = [(self.iA, iA), (self.Lmm, Lmm), (self.beta_ss, beta_ss)]
        else:
            self.iA, self.Lmm, self.beta_ss = iA, Lmm, beta_ss
            updts = None

        # we add some penalty to avoid having parameters that are too large
        if self.snr_penalty is not None:
            penalty_params = {'log_snr': np.log(1000, dtype=floatX),
                              'log_ls': np.log(100, dtype=floatX),
                              'log_std': tt.log(self.X.std(0)*(N/(N-1.0))),
                              'p': 30}
            loss_ss += self.snr_penalty(tt.log(self.hyp), **penalty_params)

        # add a penalty for high frequencies
        freq_penalty = tt.square(self.w).sum(-1).mean(0)
        loss_ss = loss_ss + freq_penalty

        inps = []
        self.state_changed = True  # for saving
        return loss_ss.sum(), inps, updts
Example #6
0
parser.add_argument('-b', type=float, default=0.0, dest='b')
parser.add_argument('-N', type=int, default=1000, dest='N')
wt = parser.parse_args().w
bt = parser.parse_args().b
iterations = parser.parse_args().N

#  generate training data set of four points equispaced in y

ytarg = np.array([[1. / 8, 3. / 8, 5. / 8, 7. / 8]])
xtarg = invf(wt, bt, ytarg)

# initial values for model sigmoid

a = 1.0
b = 0.0
W = S(a)
B = S(b)

# symbolic computations for theano

X = T.matrix()
y = T.vector()
sig = 1 / (1 + T.exp(-T.dot(X, W) - B))
xent = -y * T.log(sig) - (1 - y) * T.log(1 - sig)
cost = xent.mean()
gw, gb = T.grad(cost, [W, B])

# compile theano functions

TRAIN = F(inputs=[X, y],
          outputs=[W, B],
Example #7
0
    def get_loss(self, cache_intermediate=True):
        if self.N < self.n_inducing:
            # initialize the training loss function of the GP class
            return super(SPGP, self).get_loss(
                cache_intermediate=cache_intermediate)
        else:
            utils.print_with_stamp('Building FITC loss', self.name)
            self.should_recompile = False
            odims = self.E
            idims = self.D
            N = self.X.shape[0].astype(theano.config.floatX)

            # initialize the training loss function of the sparse FITC
            # approximation
            def nlml(Y, hyp, X, X_sp, EyeM):
                # TODO allow for different pseudo inputs for each dimension
                # initialise the (before compilation) kernel function
                hyps = [hyp[:idims+1], hyp[idims+1]]
                kernel_func = partial(cov.Sum, hyps, self.covs)

                sf2 = hyp[idims]**2
                sn2 = hyp[idims+1]**2
                N = X.shape[0].astype(theano.config.floatX)

                ridge = 1e-6
                Kmm = kernel_func(X_sp) + ridge*EyeM
                Kmn = kernel_func(X_sp, X)
                Lmm = cholesky(Kmm)
                rhs = tt.concatenate([EyeM, Kmn], axis=1)
                sol = solve_lower_triangular(Lmm, rhs)
                iKmm = solve_upper_triangular(Lmm.T, sol[:, :EyeM.shape[0]])
                Lmn = sol[:, EyeM.shape[0]:]
                diagQnn = (Lmn**2).sum(0)

                # Gamma = diag(Knn - Qnn) + sn2*I
                Gamma = sf2 + sn2 - diagQnn
                Gamma_inv = 1.0/Gamma

                # these operations are done to avoid inverting Qnn+Gamma)
                sqrtGamma_inv = tt.sqrt(Gamma_inv)
                Lmn_ = Lmn*sqrtGamma_inv                      # Kmn_*Gamma^-.5
                Yi = Y*(sqrtGamma_inv)                        # Gamma^-.5* Y
                # I + Lmn * Gamma^-1 * Lnm
                Bmm = tt.eye(Kmm.shape[0]) + (Lmn_).dot(Lmn_.T)
                Amm = cholesky(Bmm)
                LAmm = Lmm.dot(Amm)
                Kmn_dotYi = Kmn.dot(Yi*(sqrtGamma_inv))
                rhs = tt.concatenate([EyeM, Kmn_dotYi[:, None]], axis=1)
                sol = solve_upper_triangular(
                    LAmm.T, solve_lower_triangular(LAmm, rhs))
                iBmm = sol[:, :-1]
                beta_sp = sol[:, -1]

                log_det_K_sp = tt.sum(tt.log(Gamma))
                log_det_K_sp += 2*tt.sum(tt.log(tt.diag(Amm)))

                loss_sp = Yi.dot(Yi) - Kmn_dotYi.dot(beta_sp)
                loss_sp += log_det_K_sp + N*np.log(2*np.pi)
                loss_sp *= 0.5

                return loss_sp, iKmm, Lmm, Amm, iBmm, beta_sp

            r_outs, updts = theano.scan(
                fn=nlml, sequences=[self.Y.T, self.hyp],
                non_sequences=[self.X, self.X_sp, tt.eye(self.X_sp.shape[0])],
                allow_gc=False, return_list=True)
            (loss_sp, iKmm, Lmm, Amm, iBmm, beta_sp) = r_outs

            if cache_intermediate:
                # we are going to save the intermediate results in the
                # following shared variables,
                # so we can use them during prediction without having to
                # recompute them
                # initialize shared variables
                kk = self.n_inducing
                self.iKmm = S(
                    np.tile(np.eye(kk).astype(floatX), (odims, 1, 1)),
                    name="%s>iKmm" % (self.name))
                self.Lmm = S(
                    np.tile(np.eye(kk).astype(floatX), (odims, 1, 1)),
                    name="%s>Lmm" % (self.name))
                self.Amm = S(
                    np.tile(np.eye(kk).astype(floatX), (odims, 1, 1)),
                    name="%s>Amm" % (self.name))
                self.iBmm = S(
                    np.tile(np.eye(kk).astype(floatX), (odims, 1, 1)),
                    name="%s>iBmm" % (self.name))
                self.beta_sp = S(
                    np.ones((self.E, kk)).astype(floatX),
                    name="%s>beta_sp" % (self.name))
                updts = [(self.iKmm, iKmm), (self.Lmm, Lmm), (self.Amm, Amm),
                         (self.iBmm, iBmm), (self.beta_sp, beta_sp)]
            else:
                self.iKmm, self.Lmm, self.Amm = iKmm, Lmm, Amm
                self.iBmm, self.beta_sp = iBmm, beta_sp
                updts = None

            # we add some penalty to avoid having parameters that are too large
            if self.snr_penalty is not None:
                penalty_params = {'log_snr': np.log(1000),
                                  'log_ls': np.log(100),
                                  'log_std': tt.log(
                                      self.X_sp.std(0)*(N/(N-1.0))),
                                  'p': 30}
                loss_sp += self.snr_penalty(self.hyp, **penalty_params)

            inps = []
            self.state_changed = True  # for saving
            return loss_sp.sum(), inps, updts