Beispiel #1
0
    def predict_symbolic(self, mx, Sx=None, unroll_scan=False):
        idims = self.D
        odims = self.E

        # initialize some variables
        sf2 = self.hyp[:, idims]**2
        eyeE = tt.tile(tt.eye(idims), (odims, 1, 1))
        lscales = self.hyp[:, :idims]
        iL = eyeE / lscales.dimshuffle(0, 1, 'x')

        if Sx is None:
            # first check if we received a vector [D] or a matrix [nxD]
            if mx.ndim == 1:
                mx = mx[None, :]
            # centralize inputs
            zeta = self.X[:, None, :] - mx[None, :, :]

            # predictive mean ( we don't need to do the rest )
            inp = (iL[:, None, :, None, :] * zeta[:, None, :, :]).sum(2)
            l = tt.exp(-0.5 * tt.sum(inp**2, -1))
            lb = l * self.beta[:, :, None]  # E x N
            M = tt.sum(lb, 1).T * sf2

            # apply saturating function to the output if available
            if self.sat_func is not None:
                # saturate the output
                M = self.sat_func(M)

            return M

        # centralize inputs
        zeta = self.X - mx

        # predictive mean
        inp = iL.dot(zeta.T).transpose(0, 2, 1)
        iLdotSx = iL.dot(Sx)
        B = (iLdotSx[:, :, None, :] *
             iL[:, None, :, :]).sum(-1) + tt.eye(idims)
        t = tt.stack([solve(B[i].T, inp[i].T).T for i in range(odims)])
        c = sf2 / tt.sqrt(tt.stack([det(B[i]) for i in range(odims)]))
        l = tt.exp(-0.5 * tt.sum(inp * t, 2))
        lb = l * self.beta
        M = tt.sum(lb, 1) * c

        # input output covariance
        tiL = tt.stack([t[i].dot(iL[i]) for i in range(odims)])
        V = tt.stack([tiL[i].T.dot(lb[i]) for i in range(odims)]).T * c

        # predictive covariance
        logk = (tt.log(sf2))[:, None] - 0.5 * tt.sum(inp * inp, 2)
        logk_r = logk.dimshuffle(0, 'x', 1)
        logk_c = logk.dimshuffle(0, 1, 'x')
        Lambda = tt.square(iL)
        LL = (Lambda.dimshuffle(0, 'x', 1, 2) + Lambda).transpose(0, 1, 3, 2)
        R = tt.dot(LL, Sx).transpose(0, 1, 3, 2) + tt.eye(idims)
        z_ = Lambda.dot(zeta.T).transpose(0, 2, 1)

        M2 = tt.zeros((odims, odims))

        # initialize indices
        triu_indices = np.triu_indices(odims)
        indices = [tt.as_index_variable(idx) for idx in triu_indices]

        def second_moments(i, j, M2, beta, R, logk_c, logk_r, z_, Sx, *args):
            # This comes from Deisenroth's thesis ( Eqs 2.51- 2.54 )
            Rij = R[i, j]
            n2 = logk_c[i] + logk_r[j]
            n2 += utils.maha(z_[i], -z_[j], 0.5 * solve(Rij, Sx))
            Q = tt.exp(n2) / tt.sqrt(det(Rij))

            # Eq 2.55
            m2 = matrix_dot(beta[i], Q, beta[j])

            m2 = theano.ifelse.ifelse(tt.eq(i, j), m2 + 1e-6, m2)
            M2 = tt.set_subtensor(M2[i, j], m2)
            return M2

        nseq = [self.beta, R, logk_c, logk_r, z_, Sx, self.iK, self.L]

        if unroll_scan:
            from lasagne.utils import unroll_scan
            [M2_] = unroll_scan(second_moments, indices, [M2], nseq,
                                len(triu_indices[0]))
            updts = {}
        else:
            M2_, updts = theano.scan(fn=second_moments,
                                     sequences=indices,
                                     outputs_info=[M2],
                                     non_sequences=nseq,
                                     allow_gc=False,
                                     strict=True,
                                     name="%s>M2_scan" % (self.name))
        M2 = M2_[-1]
        M2 = M2 + tt.triu(M2, k=1).T
        S = M2 - tt.outer(M, M)

        # apply saturating function to the output if available
        if self.sat_func is not None:
            # saturate the output
            M, S, U = self.sat_func(M, S)
            # compute the joint input output covariance
            V = V.dot(U)

        return M, S, V
Beispiel #2
0
    def predict_symbolic(self, mx, Sx, unroll_scan=False):
        idims = self.D
        odims = self.E

        Ms = self.sr.shape[1]
        sf2M = (self.hyp[:, idims]**2)/tt.cast(Ms, floatX)
        sn2 = self.hyp[:, idims+1]**2

        # TODO this should just fallback to the method from the SSGP class
        if Sx is None:
            # first check if we received a vector [D] or a matrix [nxD]
            if mx.ndim == 1:
                mx = mx[None, :]

            srdotx = self.sr.dot(self.X.T).transpose(0,2,1)
            phi_x = tt.concatenate([tt.sin(srdotx), tt.cos(srdotx)], 2)
            M = (phi_x*self.beta_ss[:, None, :]).sum(-1)
            phi_x_L = tt.stack([
                solve_lower_triangular(self.Lmm[i], phi_x[i].T)
                for i in range(odims)])
            S = sn2[:, None]*(1 + (sf2M[:, None])*(phi_x_L**2).sum(-2)) + 1e-6

            return M, S

        # precompute some variables
        srdotx = self.sr.dot(mx)
        srdotSx = self.sr.dot(Sx)
        srdotSxdotsr = tt.sum(srdotSx*self.sr, 2)
        e = tt.exp(-0.5*srdotSxdotsr)
        cos_srdotx = tt.cos(srdotx)
        sin_srdotx = tt.sin(srdotx)
        cos_srdotx_e = cos_srdotx*e
        sin_srdotx_e = sin_srdotx*e

        # compute the mean vector
        mphi = tt.horizontal_stack(sin_srdotx_e, cos_srdotx_e)  # E x 2*Ms
        M = tt.sum(mphi*self.beta_ss, 1)

        # input output covariance
        mx_c = mx.dimshuffle(0, 'x')
        sin_srdotx_e_r = sin_srdotx_e.dimshuffle(0, 'x', 1)
        cos_srdotx_e_r = cos_srdotx_e.dimshuffle(0, 'x', 1)
        srdotSx_tr = srdotSx.transpose(0, 2, 1)
        c = tt.concatenate([mx_c*sin_srdotx_e_r + srdotSx_tr*cos_srdotx_e_r,
                            mx_c*cos_srdotx_e_r - srdotSx_tr*sin_srdotx_e_r],
                           axis=2)  # E x D x 2*Ms
        beta_ss_r = self.beta_ss.dimshuffle(0, 'x', 1)

        # input output covariance (notice this is not premultiplied by the
        # input covariance inverse)
        V = tt.sum(c*beta_ss_r, 2).T - tt.outer(mx, M)

        srdotSxdotsr_c = srdotSxdotsr.dimshuffle(0, 1, 'x')
        srdotSxdotsr_r = srdotSxdotsr.dimshuffle(0, 'x', 1)
        M2 = tt.zeros((odims, odims))

        # initialize indices
        triu_indices = np.triu_indices(odims)
        indices = [tt.as_index_variable(idx) for idx in triu_indices]

        def second_moments(i, j, M2, beta, iA, sn2, sf2M, sr, srdotSx,
                           srdotSxdotsr_c, srdotSxdotsr_r,
                           sin_srdotx, cos_srdotx, *args):
            # compute the second moments of the spectrum feature vectors
            siSxsj = srdotSx[i].dot(sr[j].T)  # Ms x Ms
            sijSxsij = -0.5*(srdotSxdotsr_c[i] + srdotSxdotsr_r[j])
            em = tt.exp(sijSxsij+siSxsj)      # MsxMs
            ep = tt.exp(sijSxsij-siSxsj)     # MsxMs
            si = sin_srdotx[i]       # Msx1
            ci = cos_srdotx[i]       # Msx1
            sj = sin_srdotx[j]       # Msx1
            cj = cos_srdotx[j]       # Msx1
            sicj = tt.outer(si, cj)  # MsxMs
            cisj = tt.outer(ci, sj)  # MsxMs
            sisj = tt.outer(si, sj)  # MsxMs
            cicj = tt.outer(ci, cj)  # MsxMs
            sm = (sicj-cisj)*em
            sp = (sicj+cisj)*ep
            cm = (sisj+cicj)*em
            cp = (cicj-sisj)*ep

            # Populate the second moment matrix of the feature vector
            Q_up = tt.concatenate([cm-cp, sm+sp], axis=1)
            Q_lo = tt.concatenate([sp-sm, cm+cp], axis=1)
            Q = tt.concatenate([Q_up, Q_lo], axis=0)

            # Compute the second moment of the output
            m2 = 0.5*matrix_dot(beta[i], Q, beta[j].T)

            m2 = theano.ifelse.ifelse(
                tt.eq(i, j),
                m2 + sn2[i]*(1.0 + sf2M[i]*tt.sum(self.iA[i]*Q)) + 1e-6,
                m2)
            M2 = tt.set_subtensor(M2[i, j], m2)
            return M2

        nseq = [self.beta_ss, self.iA, sn2, sf2M, self.sr, srdotSx,
                srdotSxdotsr_c, srdotSxdotsr_r, sin_srdotx, cos_srdotx,
                self.Lmm]

        if unroll_scan:
            from lasagne.utils import unroll_scan
            [M2_] = unroll_scan(second_moments, indices,
                                [M2], nseq, len(triu_indices[0]))
            updts = {}
        else:
            M2_, updts = theano.scan(fn=second_moments,
                                     sequences=indices,
                                     outputs_info=[M2],
                                     non_sequences=nseq,
                                     allow_gc=False,
                                     name="%s>M2_scan" % (self.name))

        M2 = M2_[-1]
        M2 = M2 + tt.triu(M2, k=1).T
        S = M2 - tt.outer(M, M)

        return M, S, V
Beispiel #3
0
    def predict_symbolic(self, mx, Sx, unroll_scan=False):
        idims = self.D
        odims = self.E

        # centralize inputs
        zeta = self.X - mx

        # initialize some variables
        sf2 = self.hyp[:, idims]**2
        eyeE = tt.tile(tt.eye(idims), (odims, 1, 1))
        lscales = self.hyp[:, :idims]
        iL = eyeE / lscales.dimshuffle(0, 1, 'x')

        # predictive mean
        inp = iL.dot(zeta.T).transpose(0, 2, 1)
        iLdotSx = iL.dot(Sx)
        # TODO vectorize this
        B = (iLdotSx[:, :, None, :] *
             iL[:, None, :, :]).sum(-1) + tt.eye(idims)
        t = tt.stack([solve(B[i].T, inp[i].T).T for i in range(odims)])
        c = sf2 / tt.sqrt(tt.stack([det(B[i]) for i in range(odims)]))
        l = tt.exp(-0.5 * tt.sum(inp * t, 2))
        lb = l * self.beta  # E x N dot E x N
        M = tt.sum(lb, 1) * c

        # input output covariance
        tiL = (t[:, :, None, :] * iL[:, None, :, :]).sum(-1)
        # tiL = tt.stack([t[i].dot(iL[i]) for i in range(odims)])
        V = tt.stack([tiL[i].T.dot(lb[i]) for i in range(odims)]).T * c

        # predictive covariance
        logk = (tt.log(sf2))[:, None] - 0.5 * tt.sum(inp * inp, 2)
        logk_r = logk.dimshuffle(0, 'x', 1)
        logk_c = logk.dimshuffle(0, 1, 'x')
        Lambda = tt.square(iL)
        LL = (Lambda.dimshuffle(0, 'x', 1, 2) + Lambda).transpose(0, 1, 3, 2)
        R = tt.dot(LL, Sx).transpose(0, 1, 3, 2) + tt.eye(idims)
        z_ = Lambda.dot(zeta.T).transpose(0, 2, 1)

        M2 = tt.zeros((odims, odims))

        # initialize indices
        triu_indices = np.triu_indices(odims)
        indices = [tt.as_index_variable(idx) for idx in triu_indices]

        def second_moments(i, j, M2, beta, iK, sf2, R, logk_c, logk_r, z_, Sx,
                           *args):
            # This comes from Deisenroth's thesis ( Eqs 2.51- 2.54 )
            Rij = R[i, j]
            n2 = logk_c[i] + logk_r[j]
            n2 += utils.maha(z_[i], -z_[j], 0.5 * solve(Rij, Sx))

            Q = tt.exp(n2) / tt.sqrt(det(Rij))

            # Eq 2.55
            m2 = matrix_dot(beta[i], Q, beta[j])

            m2 = theano.ifelse.ifelse(tt.eq(i, j),
                                      m2 - tt.sum(iK[i] * Q) + sf2[i], m2)
            M2 = tt.set_subtensor(M2[i, j], m2)
            return M2

        nseq = [self.beta, self.iK, sf2, R, logk_c, logk_r, z_, Sx, self.L]
        if unroll_scan:
            from lasagne.utils import unroll_scan
            [M2_] = unroll_scan(second_moments, indices, [M2], nseq,
                                len(triu_indices[0]))
            updts = {}
        else:
            M2_, updts = theano.scan(fn=second_moments,
                                     sequences=indices,
                                     outputs_info=[M2],
                                     non_sequences=nseq,
                                     allow_gc=False,
                                     strict=True,
                                     name="%s>M2_scan" % (self.name))
        M2 = M2_[-1]
        M2 = M2 + tt.triu(M2, k=1).T
        S = M2 - tt.outer(M, M)

        return M, S, V
Beispiel #4
0
    def predict(self, mx, Sx, *args, **kwargs):
        if self.N < self.n_inducing:
            # stick with the full GP
            return GP_UI.predict(self, mx, Sx)

        idims = self.D
        odims = self.E

        # centralize inputs
        zeta = self.X_sp - mx

        # initialize some variables
        sf2 = self.hyp[:, idims]**2
        eyeE = tt.tile(tt.eye(idims), (odims, 1, 1))
        lscales = self.hyp[:, :idims]
        iL = eyeE/lscales.dimshuffle(0, 1, 'x')

        # predictive mean
        inp = iL.dot(zeta.T).transpose(0, 2, 1)
        iLdotSx = iL.dot(Sx)
        B = (iLdotSx[:, :, None, :]*iL[:, None, :, :]).sum(-1) + tt.eye(idims)
        t = tt.stack([solve(B[i].T, inp[i].T).T for i in range(odims)])
        c = sf2/tt.sqrt(tt.stack([det(B[i]) for i in range(odims)]))
        l_ = tt.exp(-0.5*tt.sum(inp*t, 2))
        lb = l_*self.beta_sp
        M = tt.sum(lb, 1)*c

        # input output covariance
        tiL = tt.stack([t[i].dot(iL[i]) for i in range(odims)])
        V = tt.stack([tiL[i].T.dot(lb[i]) for i in range(odims)]).T*c

        # predictive covariance
        logk = (tt.log(sf2))[:, None] - 0.5*tt.sum(inp*inp, 2)
        logk_r = logk.dimshuffle(0, 'x', 1)
        logk_c = logk.dimshuffle(0, 1, 'x')
        Lambda = tt.square(iL)
        LL = (Lambda.dimshuffle(0, 'x', 1, 2) + Lambda).transpose(0, 1, 3, 2)
        R = tt.dot(LL, Sx.T).transpose(0, 1, 3, 2) + tt.eye(idims)
        z_ = Lambda.dot(zeta.T).transpose(0, 2, 1)

        M2 = tt.zeros((odims, odims))

        # initialize indices
        triu_indices = np.triu_indices(odims)
        indices = [tt.as_index_variable(idx) for idx in triu_indices]

        def second_moments(i, j, M2, beta, iK, sf2, R, logk_c, logk_r, z_, Sx):
            # This comes from Deisenroth's thesis ( Eqs 2.51- 2.54 )
            Rij = R[i, j]
            n2 = logk_c[i] + logk_r[j]
            n2 += utils.maha(z_[i], -z_[j], 0.5*solve(Rij, Sx))

            Q = tt.exp(n2)/tt.sqrt(det(Rij))

            # Eq 2.55
            m2 = matrix_dot(beta[i], Q, beta[j])

            m2 = theano.ifelse.ifelse(
                tt.eq(i, j), m2 - tt.sum(iK[i]*Q) + sf2[i], m2)
            M2 = tt.set_subtensor(M2[i, j], m2)
            M2 = theano.ifelse.ifelse(
                tt.eq(i, j), M2 + 1e-6, tt.set_subtensor(M2[j, i], m2))
            return M2

        nseq = [self.beta_sp, (self.iKmm - self.iBmm), sf2,
                R, logk_c, logk_r, z_, Sx]
        M2_, updts = theano.scan(
            fn=second_moments, sequences=indices, outputs_info=[M2],
            non_sequences=nseq, allow_gc=False)
        M2 = M2_[-1]
        S = M2 - tt.outer(M, M)

        return M, S, V