예제 #1
0
    def grad_cd1(self, params, inputs, **kwargs):
        """
        """
        g = gzeros(params.shape)

        n, _ = inputs.shape

        m_end = self.m_end
        V = self.shape[0]
        H = self.shape[1]
        wm = params[:m_end].reshape(self.shape)
        prec = params[-V:][:, gpu.newaxis]

        h1, h_sampled = self.H(inputs, wm=prec*wm, bias=params[m_end:m_end+H], sampling=True)
        v2, v_sampled = gauss(h_sampled, wm=(wm/prec).T, bias=params[-(2*V):-V], prec=prec.T, sampling=True)
        h2, _ = self.H(v2, wm=prec*wm, bias=params[m_end:m_end+H])

        #print h1[0,0], h_sampled[0,0], v2[0,0], v_sampled[0,0]
        # Note the negative sign: the gradient is 
        # supposed to point into 'wrong' direction.
        g[:m_end] = -gdot(inputs.T*prec, h1).ravel()
        g[:m_end] += gdot(v_sampled.T*prec, h2).ravel()
        g[:m_end] *= 1./n
        g[:m_end] += self.l2*params[:m_end]

        g[m_end:m_end+H] = -h1.sum(axis=0)
        g[m_end:m_end+H] += h2.sum(axis=0)
        g[m_end:m_end+H] *= 1./n

        g[-2*V:-V] = -inputs.sum(axis=0)
        g[-2*V:-V] += v_sampled.sum(axis=0)
        g[-2*V:-V] *= 1./n
        g[-2*V:-V] *= (prec**2).T

        #print gsum(g[:m_end]**2), gsum(g[m_end:m_end+H]**2), gsum(g[-2*V:-V]**2)
        # Gradient for square root of precision
        g[-V:] = -gsum(2*prec.T*inputs*(params[-2*V:-V] - inputs/2), axis=0) + gsum(gdot(inputs.T, h1)*wm, axis=1)
        g[-V:] += (gsum(2*prec.T*v_sampled*(params[-2*V:-V] - v_sampled/2), axis=0) + gsum(gdot(v_sampled.T, h2)*wm, axis=1))
        g[-V:] *= 1./n

        #print gsum(g[-V:]**2)
        if self.lmbd > 0.:
            if self.rho_hat is None:
                self.rho_hat = h1.mean(axis=0)
            else:
                self.rho_hat *= 0.9
                self.rho_hat += 0.1 * h1.mean(axis=0)
            dKL_drho_hat = (self.rho - self.rho_hat)/(self.rho_hat*(1-self.rho_hat))
            h1_1mh1 = h1*(1 - h1)
            g[m_end:m_end+H] -= self.lmbd/n * gsum(h1_1mh1, axis=0) * dKL_drho_hat
            g[:m_end] -= self.lmbd/n * (gdot(inputs.T * prec, h1_1mh1) * dKL_drho_hat).ravel()

        #g[:] = -g[:]
        return g
예제 #2
0
    def reconstruction(self, params, inputs, **kwargs):
        """
        """
        m_end = self.m_end
        V = self.shape[0]
        H = self.shape[1]
        wm = params[:m_end].reshape(self.shape)
        prec = params[-V:][:, gpu.newaxis]

        h1, h_sampled = self.H(inputs, wm=prec*wm, bias=params[m_end:m_end+H], sampling=True)
        v2, v_sampled = gauss(h_sampled, wm=(wm/prec).T, bias=params[-(2*V):-V], prec=prec.T, sampling=True)

        rho_hat = h1.sum()
        rec = gsum((inputs - v_sampled)**2)
        
        return np.array([rec, self.lmbd*rho_hat])
예제 #3
0
    def reconstruction(self, params, inputs, **kwargs):
        """
        """
        m_end = self.m_end
        V = self.shape[0]
        H = self.shape[1]
        wm = params[:m_end].reshape(self.shape)
        prec = params[-V:][:, gpu.newaxis]

        h1, h_sampled = self.H(inputs,
                               wm=prec * wm,
                               bias=params[m_end:m_end + H],
                               sampling=True)
        v2, v_sampled = gauss(h_sampled,
                              wm=(wm / prec).T,
                              bias=params[-(2 * V):-V],
                              prec=prec.T,
                              sampling=True)

        rho_hat = h1.sum()
        rec = gsum((inputs - v_sampled)**2)

        return np.array([rec, self.lmbd * rho_hat])
예제 #4
0
    def grad_cd1(self, params, inputs, **kwargs):
        """
        """
        g = gzeros(params.shape)

        n, _ = inputs.shape

        m_end = self.m_end
        V = self.shape[0]
        H = self.shape[1]
        wm = params[:m_end].reshape(self.shape)
        prec = params[-V:][:, gpu.newaxis]

        h1, h_sampled = self.H(inputs,
                               wm=prec * wm,
                               bias=params[m_end:m_end + H],
                               sampling=True)
        v2, v_sampled = gauss(h_sampled,
                              wm=(wm / prec).T,
                              bias=params[-(2 * V):-V],
                              prec=prec.T,
                              sampling=True)
        h2, _ = self.H(v2, wm=prec * wm, bias=params[m_end:m_end + H])

        #print h1[0,0], h_sampled[0,0], v2[0,0], v_sampled[0,0]
        # Note the negative sign: the gradient is
        # supposed to point into 'wrong' direction.
        g[:m_end] = -gdot(inputs.T * prec, h1).ravel()
        g[:m_end] += gdot(v_sampled.T * prec, h2).ravel()
        g[:m_end] *= 1. / n
        g[:m_end] += self.l2 * params[:m_end]

        g[m_end:m_end + H] = -h1.sum(axis=0)
        g[m_end:m_end + H] += h2.sum(axis=0)
        g[m_end:m_end + H] *= 1. / n

        g[-2 * V:-V] = -inputs.sum(axis=0)
        g[-2 * V:-V] += v_sampled.sum(axis=0)
        g[-2 * V:-V] *= 1. / n
        g[-2 * V:-V] *= (prec**2).T

        #print gsum(g[:m_end]**2), gsum(g[m_end:m_end+H]**2), gsum(g[-2*V:-V]**2)
        # Gradient for square root of precision
        g[-V:] = -gsum(2 * prec.T * inputs * (params[-2 * V:-V] - inputs / 2),
                       axis=0) + gsum(gdot(inputs.T, h1) * wm, axis=1)
        g[-V:] += (gsum(2 * prec.T * v_sampled *
                        (params[-2 * V:-V] - v_sampled / 2),
                        axis=0) + gsum(gdot(v_sampled.T, h2) * wm, axis=1))
        g[-V:] *= 1. / n

        #print gsum(g[-V:]**2)
        if self.lmbd > 0.:
            if self.rho_hat is None:
                self.rho_hat = h1.mean(axis=0)
            else:
                self.rho_hat *= 0.9
                self.rho_hat += 0.1 * h1.mean(axis=0)
            dKL_drho_hat = (self.rho - self.rho_hat) / (self.rho_hat *
                                                        (1 - self.rho_hat))
            h1_1mh1 = h1 * (1 - h1)
            g[m_end:m_end +
              H] -= self.lmbd / n * gsum(h1_1mh1, axis=0) * dKL_drho_hat
            g[:m_end] -= self.lmbd / n * (gdot(inputs.T * prec, h1_1mh1) *
                                          dKL_drho_hat).ravel()

        #g[:] = -g[:]
        return g