Ejemplo n.º 1
0
    def grad_cd1(self, params, inputs, **kwargs):
        """
        """
        g = gzeros(params.shape)

        n, _ = inputs.shape

        m_end = self.m_end
        V = self.shape[0]
        H = self.shape[1]
        wm = params[:m_end].reshape(self.shape)
        prec = params[-V:][:, gpu.newaxis]

        h1, h_sampled = self.H(inputs, wm=prec*wm, bias=params[m_end:m_end+H], sampling=True)
        v2, v_sampled = gauss(h_sampled, wm=(wm/prec).T, bias=params[-(2*V):-V], prec=prec.T, sampling=True)
        h2, _ = self.H(v2, wm=prec*wm, bias=params[m_end:m_end+H])

        #print h1[0,0], h_sampled[0,0], v2[0,0], v_sampled[0,0]
        # Note the negative sign: the gradient is 
        # supposed to point into 'wrong' direction.
        g[:m_end] = -gdot(inputs.T*prec, h1).ravel()
        g[:m_end] += gdot(v_sampled.T*prec, h2).ravel()
        g[:m_end] *= 1./n
        g[:m_end] += self.l2*params[:m_end]

        g[m_end:m_end+H] = -h1.sum(axis=0)
        g[m_end:m_end+H] += h2.sum(axis=0)
        g[m_end:m_end+H] *= 1./n

        g[-2*V:-V] = -inputs.sum(axis=0)
        g[-2*V:-V] += v_sampled.sum(axis=0)
        g[-2*V:-V] *= 1./n
        g[-2*V:-V] *= (prec**2).T

        #print gsum(g[:m_end]**2), gsum(g[m_end:m_end+H]**2), gsum(g[-2*V:-V]**2)
        # Gradient for square root of precision
        g[-V:] = -gsum(2*prec.T*inputs*(params[-2*V:-V] - inputs/2), axis=0) + gsum(gdot(inputs.T, h1)*wm, axis=1)
        g[-V:] += (gsum(2*prec.T*v_sampled*(params[-2*V:-V] - v_sampled/2), axis=0) + gsum(gdot(v_sampled.T, h2)*wm, axis=1))
        g[-V:] *= 1./n

        #print gsum(g[-V:]**2)
        if self.lmbd > 0.:
            if self.rho_hat is None:
                self.rho_hat = h1.mean(axis=0)
            else:
                self.rho_hat *= 0.9
                self.rho_hat += 0.1 * h1.mean(axis=0)
            dKL_drho_hat = (self.rho - self.rho_hat)/(self.rho_hat*(1-self.rho_hat))
            h1_1mh1 = h1*(1 - h1)
            g[m_end:m_end+H] -= self.lmbd/n * gsum(h1_1mh1, axis=0) * dKL_drho_hat
            g[:m_end] -= self.lmbd/n * (gdot(inputs.T * prec, h1_1mh1) * dKL_drho_hat).ravel()

        #g[:] = -g[:]
        return g
Ejemplo n.º 2
0
def logsumexp(array, axis=0):
    """
    Compute log of (sum of exps) 
    along _axis_ in _array_ in a 
    stable way.
    """
    axis_max = gmax(array, axis)[:, gnewaxis]
    return axis_max + glog(gsum(gexp(array - axis_max), axis))[:, gnewaxis]
Ejemplo n.º 3
0
def logsumexp(array, axis=0):
    """
    Compute log of (sum of exps) 
    along _axis_ in _array_ in a 
    stable way.
    """
    axis_max = gmax(array, axis)[:, gnewaxis]
    return axis_max + glog(gsum(gexp(array-axis_max), axis))[:, gnewaxis]
Ejemplo n.º 4
0
    def reconstruction(self, params, inputs, **kwargs):
        """
        """
        x, y = inputs
        n, _ = x.shape

        weights_xf = params[:self.xf_sz].reshape(self.xfshape)
        weights_yf = params[self.xf_sz:self._cum_xy].reshape(self.yfshape)
        weights_fh = params[self._cum_xy:self._cum_xyh].reshape(self.fhshape)
        bias_h = params[self._cum_xyh:self.size]
        bias_x = params[self.size:-self.shape[0][1]]
        bias_y = params[-self.shape[0][1]:]


        factors_x = gdot(x, weights_xf) 
        factors_y = gdot(y, weights_yf)
        factors = factors_x * factors_y

        h, h_sampled = bernoulli(factors, wm=weights_fh, bias=bias_h, sampling=True)
        rho_hat = h.sum()
        factors_h = gdot(h, weights_fh.T)

        way = np.random.rand() > 0.5
        if way:
            # reconstruct y (output) first.
            tmp = factors_x * factors_h
            y1, _ = self.V(tmp, wm=weights_yf.T, bias=bias_y)
            factors_y[:] = gdot(y1, weights_yf)
            # then reconstruct x (input).
            tmp = factors_y * factors_h
            x1, _ = self.V(tmp, wm=weights_xf.T, bias=bias_x)
        else:
            # reconstruct x (input) first.
            tmp = factors_y * factors_h
            x1, _ = self.V(tmp, wm=weights_xf.T, bias=bias_x)
            factors_x[:] = gdot(x1, weights_xf)
            # then reconstruct y (output).
            tmp = factors_x * factors_h
            y1, _ = self.V(tmp, wm=weights_yf.T, bias=bias_y)

        xrec = gsum((x - x1)**2)
        yrec = gsum((y - y1)**2)

        return np.array([xrec, yrec, self.lmbd*rho_hat, self.avg_nxyf, self.avg_nfh])
Ejemplo n.º 5
0
    def reconstruction(self, params, inputs, **kwargs):
        """
        """
        m_end = self.m_end
        V = self.shape[0]
        H = self.shape[1]
        wm = params[:m_end].reshape(self.shape)
        prec = params[-V:][:, gpu.newaxis]

        h1, h_sampled = self.H(inputs, wm=prec*wm, bias=params[m_end:m_end+H], sampling=True)
        v2, v_sampled = gauss(h_sampled, wm=(wm/prec).T, bias=params[-(2*V):-V], prec=prec.T, sampling=True)

        rho_hat = h1.sum()
        rec = gsum((inputs - v_sampled)**2)
        
        return np.array([rec, self.lmbd*rho_hat])
Ejemplo n.º 6
0
    def grad_cd1(self, params, inputs, **kwargs):
        """
        """
        g = gzeros(params.shape)

        n, _ = inputs.shape

        m_end = self.m_end
        V = self.shape[0]
        H = self.shape[1]
        wm = params[:m_end].reshape(self.shape)

        h1, h_sampled = self.H(inputs,
                               wm=wm,
                               bias=params[m_end:-V],
                               sampling=True)
        v2, _ = self.V(h_sampled, wm=wm.T, bias=params[-V:])
        h2, _ = self.H(v2, wm=wm, bias=params[m_end:-V])

        # Note the negative sign: the gradient is
        # supposed to point into 'wrong' direction,
        # because the used optimizer likes to minimize.
        g[:m_end] = -gdot(inputs.T, h1).ravel()
        g[:m_end] += gdot(v2.T, h2).ravel()
        g[:m_end] *= 1. / n
        g[:m_end] += self.l2 * params[:m_end]

        g[m_end:-V] = -h1.mean(axis=0)
        g[m_end:-V] += h2.mean(axis=0)

        g[-V:] = -inputs.mean(axis=0)
        g[-V:] += v2.mean(axis=0)

        if self.rho_hat is None:
            self.rho_hat = h1.mean(axis=0)
        else:
            self.rho_hat *= 0.9
            self.rho_hat += 0.1 * h1.mean(axis=0)
        dKL_drho_hat = (self.rho - self.rho_hat) / (self.rho_hat *
                                                    (1 - self.rho_hat))
        h1_1mh1 = h1 * (1 - h1)
        g[m_end:-V] -= self.lmbd / n * gsum(h1_1mh1, axis=0) * dKL_drho_hat
        g[:m_end] -= self.lmbd / n * (gdot(inputs.T, h1_1mh1) *
                                      dKL_drho_hat).ravel()

        return g
Ejemplo n.º 7
0
Archivo: rbm.py Proyecto: osdf/gpustack
    def grad_cd1(self, params, inputs, **kwargs):
        """
        """
        g = gzeros(params.shape)

        n, _ = inputs.shape

        m_end = self.m_end
        V = self.shape[0]
        H = self.shape[1]
        wm = params[:m_end].reshape(self.shape)

        h1, h_sampled = self.H(inputs, wm=wm, bias=params[m_end:-V], sampling=True)
        v2, _ = self.V(h_sampled, wm=wm.T, bias=params[-V:])
        h2, _ = self.H(v2, wm=wm, bias=params[m_end:-V])

        # Note the negative sign: the gradient is 
        # supposed to point into 'wrong' direction,
        # because the used optimizer likes to minimize.
        g[:m_end] = -gdot(inputs.T, h1).ravel()
        g[:m_end] += gdot(v2.T, h2).ravel()
        g[:m_end] *= 1./n
        g[:m_end] += self.l2*params[:m_end]

        g[m_end:-V] = -h1.mean(axis=0)
        g[m_end:-V] += h2.mean(axis=0)

        g[-V:] = -inputs.mean(axis=0)
        g[-V:] += v2.mean(axis=0)

        if self.rho_hat is None:
            self.rho_hat = h1.mean(axis=0)
        else:
            self.rho_hat *= 0.9
            self.rho_hat += 0.1 * h1.mean(axis=0)
        dKL_drho_hat = (self.rho - self.rho_hat)/(self.rho_hat*(1-self.rho_hat))
        h1_1mh1 = h1*(1 - h1)
        g[m_end:-V] -= self.lmbd/n * gsum(h1_1mh1, axis=0) * dKL_drho_hat
        g[:m_end] -= self.lmbd/n * (gdot(inputs.T, h1_1mh1) * dKL_drho_hat).ravel()

        return g
Ejemplo n.º 8
0
    def reconstruction(self, params, inputs, **kwargs):
        """
        """
        m_end = self.m_end
        V = self.shape[0]
        H = self.shape[1]
        wm = params[:m_end].reshape(self.shape)
        prec = params[-V:][:, gpu.newaxis]

        h1, h_sampled = self.H(inputs,
                               wm=prec * wm,
                               bias=params[m_end:m_end + H],
                               sampling=True)
        v2, v_sampled = gauss(h_sampled,
                              wm=(wm / prec).T,
                              bias=params[-(2 * V):-V],
                              prec=prec.T,
                              sampling=True)

        rho_hat = h1.sum()
        rec = gsum((inputs - v_sampled)**2)

        return np.array([rec, self.lmbd * rho_hat])
Ejemplo n.º 9
0
    def grad_cd1(self, params, inputs, **kwargs):
        """
        """
        g = gzeros(params.shape)

        n, _ = inputs.shape

        m_end = self.m_end
        V = self.shape[0]
        H = self.shape[1]
        wm = params[:m_end].reshape(self.shape)
        prec = params[-V:][:, gpu.newaxis]

        h1, h_sampled = self.H(inputs,
                               wm=prec * wm,
                               bias=params[m_end:m_end + H],
                               sampling=True)
        v2, v_sampled = gauss(h_sampled,
                              wm=(wm / prec).T,
                              bias=params[-(2 * V):-V],
                              prec=prec.T,
                              sampling=True)
        h2, _ = self.H(v2, wm=prec * wm, bias=params[m_end:m_end + H])

        #print h1[0,0], h_sampled[0,0], v2[0,0], v_sampled[0,0]
        # Note the negative sign: the gradient is
        # supposed to point into 'wrong' direction.
        g[:m_end] = -gdot(inputs.T * prec, h1).ravel()
        g[:m_end] += gdot(v_sampled.T * prec, h2).ravel()
        g[:m_end] *= 1. / n
        g[:m_end] += self.l2 * params[:m_end]

        g[m_end:m_end + H] = -h1.sum(axis=0)
        g[m_end:m_end + H] += h2.sum(axis=0)
        g[m_end:m_end + H] *= 1. / n

        g[-2 * V:-V] = -inputs.sum(axis=0)
        g[-2 * V:-V] += v_sampled.sum(axis=0)
        g[-2 * V:-V] *= 1. / n
        g[-2 * V:-V] *= (prec**2).T

        #print gsum(g[:m_end]**2), gsum(g[m_end:m_end+H]**2), gsum(g[-2*V:-V]**2)
        # Gradient for square root of precision
        g[-V:] = -gsum(2 * prec.T * inputs * (params[-2 * V:-V] - inputs / 2),
                       axis=0) + gsum(gdot(inputs.T, h1) * wm, axis=1)
        g[-V:] += (gsum(2 * prec.T * v_sampled *
                        (params[-2 * V:-V] - v_sampled / 2),
                        axis=0) + gsum(gdot(v_sampled.T, h2) * wm, axis=1))
        g[-V:] *= 1. / n

        #print gsum(g[-V:]**2)
        if self.lmbd > 0.:
            if self.rho_hat is None:
                self.rho_hat = h1.mean(axis=0)
            else:
                self.rho_hat *= 0.9
                self.rho_hat += 0.1 * h1.mean(axis=0)
            dKL_drho_hat = (self.rho - self.rho_hat) / (self.rho_hat *
                                                        (1 - self.rho_hat))
            h1_1mh1 = h1 * (1 - h1)
            g[m_end:m_end +
              H] -= self.lmbd / n * gsum(h1_1mh1, axis=0) * dKL_drho_hat
            g[:m_end] -= self.lmbd / n * (gdot(inputs.T * prec, h1_1mh1) *
                                          dKL_drho_hat).ravel()

        #g[:] = -g[:]
        return g