Ejemplo n.º 1
0
    def pt_grad(self, params, inpts, **kwargs):
        g = gzeros(params.shape)
        m, _ = inpts.shape

        hddn = logistic(gpu.dot(inpts, params[:self.m_end].reshape(self.shape)) + params[self.m_end:self.m_end+self.shape[1]])
        Z = gdot(hddn, params[:self.m_end].reshape(self.shape).T) + params[-self.shape[0]:]

        if self.rho_hat_grad == None:
            self.rho_hat_grad = hddn.mean(axis=0)
        else:
            self.rho_hat_grad *= 0.9
            self.rho_hat_grad += 0.1*hddn.mean(axis=0)

#        rho_hat = hddn.mean(axis=0)
        rho_hat = self.rho_hat_grad
        rho = self.rho
        sparsity = self.beta * gpu.sum(bKL(rho, rho_hat))

        _, delta = self.score(Z, inpts, error=True, addon=sparsity)

        g[:self.m_end] = gdot(delta.T, hddn).ravel()
        g[-self.shape[0]:] = delta.sum(axis=0)

        diff = Dsigmoid(hddn)
        dsparse_dha = -rho/rho_hat + (1-rho)/(1-rho_hat)
        dsc_dha = diff * (gdot(delta, params[:self.m_end].reshape(self.shape)) + self.beta*dsparse_dha/m)

        g[:self.m_end] += gdot(inpts.T, dsc_dha).ravel()

        g[self.m_end:-self.shape[0]] = dsc_dha.sum(axis=0)
        # clean up
        del delta, hddn, Z
        return g
Ejemplo n.º 2
0
    def pt_grad(self, params, inpts, **kwargs):
        g = gzeros(params.shape)
        m, _ = inpts.shape

        hddn = logistic(
            gpu.dot(inpts, params[:self.m_end].reshape(self.shape)) +
            params[self.m_end:self.m_end + self.shape[1]])
        Z = gdot(hddn, params[:self.m_end].reshape(
            self.shape).T) + params[-self.shape[0]:]

        w = params[:self.m_end].reshape(self.shape)
        cae = gpu.sum(
            gpu.mean(Dsigmoid(hddn)**2, axis=0) * gpu.sum(w**2, axis=0))
        cae *= self.cae

        _, delta = self.score(Z, inpts, error=True, addon=cae)

        g[:self.m_end] = gdot(delta.T, hddn).ravel()
        g[-self.shape[0]:] = delta.sum(axis=0)

        cae_grad = gpu.mean(Dsigmoid(hddn)**2, axis=0) * w
        cae_grad += (gdot(inpts.T, (Dsigmoid(hddn)**2 * (1 - 2 * hddn))) / m *
                     gpu.sum(w**2, axis=0))
        g[:self.m_end] += self.cae * 2 * cae_grad.ravel()

        dsc_dha = Dsigmoid(hddn) * gdot(
            delta, params[:self.m_end].reshape(self.shape))

        g[:self.m_end] += gdot(inpts.T, dsc_dha).ravel()

        g[self.m_end:-self.shape[0]] = dsc_dha.sum(axis=0)
        # clean up
        del delta, hddn, Z
        return g
Ejemplo n.º 3
0
    def pt_grad(self, params, inpts, **kwargs):
        g = gzeros(params.shape)
        m, _ = inpts.shape

        hddn = logistic(gdot(inpts, params[:self.m_end].reshape(self.shape)) + params[self.m_end:self.size])
        Z = gdot(hddn, params[self.size:-self.shape[0]].reshape(self.Tshape)) + params[-self.shape[0]:]

        if self.rho_hat_grad == None:
            self.rho_hat_grad = hddn.mean(axis=0)
        else:
            self.rho_hat_grad *= 0.9
            self.rho_hat_grad += 0.1*hddn.mean(axis=0)

#        rho_hat = hddn.mean(axis=0)
        rho_hat = self.rho_hat_grad
        rho = self.rho
        sparsity = self.beta * gpu.sum(bKL(rho, rho_hat))
 
        _, delta = self.score(Z, inpts, error=True, addon=sparsity)

        g[self.size:-self.shape[0]] = gdot(hddn.T, delta).ravel()
        g[-self.shape[0]:] = delta.sum(axis=0)

        diff = Dsigmoid(hddn)
        dsparse_dha = -rho/rho_hat + (1-rho)/(1-rho_hat)
        dsc_dha = diff * (gdot(delta, params[:self.m_end].reshape(self.shape)) + self.beta*dsparse_dha/m)

        g[:self.m_end] = gdot(inpts.T, dsc_dha).ravel()

        g[self.m_end:self.size] = dsc_dha.sum(axis=0)
        # clean up
        del delta, hddn, Z
        return g
Ejemplo n.º 4
0
    def pt_score(self, params, inpts, **kwargs):
        hddn = self.activ(gdot(inpts, params[:self.m_end].reshape(self.shape)) + params[self.m_end:self.size])
        Z = gdot(hddn, params[self.size:-self.shape[0]].reshape(self.Tshape)) + params[-self.shape[0]:]

        sc = self.score(Z, inpts)
        
        return sc
Ejemplo n.º 5
0
    def pt_grad(self, params, inpts, **kwargs):
        g = gzeros(params.shape)

        hddn = self.activ(
            gpu.dot(inpts, params[: self.m_end].reshape(self.shape)) + params[self.m_end : self.m_end + self.shape[1]]
        )
        _hddn = hddn.as_numpy_array()
        idxs = np.argsort(_hddn, axis=1)
        _hddn[range(_hddn.shape[0]), idxs[:, self.ak :].T] = 0
        hddn = gpu.garray(_hddn)
        Z = gdot(hddn, params[: self.m_end].reshape(self.shape).T) + params[-self.shape[0] :]

        _, delta = self.score(Z, inpts, error=True)

        g[: self.m_end] = gdot(delta.T, hddn).ravel()
        g[-self.shape[0] :] = delta.sum(axis=0)

        dsc_dha = gdot(delta, params[: self.m_end].reshape(self.shape)) * diff_table[self.activ](hddn)

        g[: self.m_end] += gdot(inpts.T, dsc_dha).ravel()

        g[self.m_end : -self.shape[0]] = dsc_dha.sum(axis=0)
        # clean up
        del delta
        return g
Ejemplo n.º 6
0
    def pt_grad(self, params, inpts, **kwargs):
        g = gzeros(params.shape)
        m, _ = inpts.shape

        hddn = logistic(
            gpu.dot(inpts, params[: self.m_end].reshape(self.shape)) + params[self.m_end : self.m_end + self.shape[1]]
        )
        Z = gdot(hddn, params[: self.m_end].reshape(self.shape).T) + params[-self.shape[0] :]

        w = params[: self.m_end].reshape(self.shape)
        cae = gpu.sum(gpu.mean(Dsigmoid(hddn) ** 2, axis=0) * gpu.sum(w ** 2, axis=0))
        cae *= self.cae

        _, delta = self.score(Z, inpts, error=True, addon=cae)

        g[: self.m_end] = gdot(delta.T, hddn).ravel()
        g[-self.shape[0] :] = delta.sum(axis=0)

        cae_grad = gpu.mean(Dsigmoid(hddn) ** 2, axis=0) * w
        cae_grad += gdot(inpts.T, (Dsigmoid(hddn) ** 2 * (1 - 2 * hddn))) / m * gpu.sum(w ** 2, axis=0)
        g[: self.m_end] += self.cae * 2 * cae_grad.ravel()

        dsc_dha = Dsigmoid(hddn) * gdot(delta, params[: self.m_end].reshape(self.shape))

        g[: self.m_end] += gdot(inpts.T, dsc_dha).ravel()

        g[self.m_end : -self.shape[0]] = dsc_dha.sum(axis=0)
        # clean up
        del delta, hddn, Z
        return g
Ejemplo n.º 7
0
 def bprop(self, params, grad, delta):
     dE_da = delta * diff_table[self.activ](self.Z)
     # gradient of the bias
     grad[self.m_end:] = dE_da.sum(axis=0)
     # gradient of the weights
     grad[:self.m_end] = gdot(self.data.T, dE_da).ravel()
     # backpropagate the delta
     delta = gdot(dE_da, params[:self.m_end].reshape(self.shape).T)
     del self.Z
     return delta
Ejemplo n.º 8
0
    def grad_cd1(self, params, inputs, **kwargs):
        """
        """
        g = gzeros(params.shape)

        n, _ = inputs.shape

        m_end = self.m_end
        V = self.shape[0]
        H = self.shape[1]
        wm = params[:m_end].reshape(self.shape)
        prec = params[-V:][:, gpu.newaxis]

        h1, h_sampled = self.H(inputs, wm=prec*wm, bias=params[m_end:m_end+H], sampling=True)
        v2, v_sampled = gauss(h_sampled, wm=(wm/prec).T, bias=params[-(2*V):-V], prec=prec.T, sampling=True)
        h2, _ = self.H(v2, wm=prec*wm, bias=params[m_end:m_end+H])

        #print h1[0,0], h_sampled[0,0], v2[0,0], v_sampled[0,0]
        # Note the negative sign: the gradient is 
        # supposed to point into 'wrong' direction.
        g[:m_end] = -gdot(inputs.T*prec, h1).ravel()
        g[:m_end] += gdot(v_sampled.T*prec, h2).ravel()
        g[:m_end] *= 1./n
        g[:m_end] += self.l2*params[:m_end]

        g[m_end:m_end+H] = -h1.sum(axis=0)
        g[m_end:m_end+H] += h2.sum(axis=0)
        g[m_end:m_end+H] *= 1./n

        g[-2*V:-V] = -inputs.sum(axis=0)
        g[-2*V:-V] += v_sampled.sum(axis=0)
        g[-2*V:-V] *= 1./n
        g[-2*V:-V] *= (prec**2).T

        #print gsum(g[:m_end]**2), gsum(g[m_end:m_end+H]**2), gsum(g[-2*V:-V]**2)
        # Gradient for square root of precision
        g[-V:] = -gsum(2*prec.T*inputs*(params[-2*V:-V] - inputs/2), axis=0) + gsum(gdot(inputs.T, h1)*wm, axis=1)
        g[-V:] += (gsum(2*prec.T*v_sampled*(params[-2*V:-V] - v_sampled/2), axis=0) + gsum(gdot(v_sampled.T, h2)*wm, axis=1))
        g[-V:] *= 1./n

        #print gsum(g[-V:]**2)
        if self.lmbd > 0.:
            if self.rho_hat is None:
                self.rho_hat = h1.mean(axis=0)
            else:
                self.rho_hat *= 0.9
                self.rho_hat += 0.1 * h1.mean(axis=0)
            dKL_drho_hat = (self.rho - self.rho_hat)/(self.rho_hat*(1-self.rho_hat))
            h1_1mh1 = h1*(1 - h1)
            g[m_end:m_end+H] -= self.lmbd/n * gsum(h1_1mh1, axis=0) * dKL_drho_hat
            g[:m_end] -= self.lmbd/n * (gdot(inputs.T * prec, h1_1mh1) * dKL_drho_hat).ravel()

        #g[:] = -g[:]
        return g
Ejemplo n.º 9
0
    def pt_score(self, params, inpts, **kwargs):
        hddn = logistic(gdot(inpts, params[:self.m_end].reshape(self.shape)) + params[self.m_end:self.size])
        Z = gdot(hddn, params[self.size:-self.shape[0]].reshape(self.Tshape)) + params[-self.shape[0]:]

        if self.rho_hat == None:
            self.rho_hat = hddn.mean(axis=0)
        else:
            self.rho_hat *= 0.9
            self.rho_hat += 0.1*hddn.mean(axis=0)

        sparsity = self.beta * gpu.sum(bKL(self.rho, self.rho_hat))
        sc = self.score(Z, inpts, addon=sparsity)
        
        return sc
Ejemplo n.º 10
0
 def bprop(self, params, grad, delta):
     # TODO: check next line, is it according 
     # to formula in the paper? delta must be
     # defined correctly!!
     # self.C necessary? in loss Function, there is no C        
     dE_da = self.C * delta * diff_table[self.activ](self.Z)
     # gradient of the bias
     grad[self.m_end:] = dE_da.sum(axis=0)
     # gradient of the weights, takes care of weight 'decay' factor (second addend)
     grad[:self.m_end] = gdot(self.data.T, dE_da).ravel() + params[:self.m_end]
     # backpropagate the delta
     delta = gdot(dE_da, params[:self.m_end].reshape(self.shape).T)
     del self.Z
     return delta
Ejemplo n.º 11
0
 def bprop(self, params, grad, delta):
     # TODO: check next line, is it according
     # to formula in the paper? delta must be
     # defined correctly!!
     # self.C necessary? in loss Function, there is no C
     dE_da = self.C * delta * diff_table[self.activ](self.Z)
     # gradient of the bias
     grad[self.m_end:] = dE_da.sum(axis=0)
     # gradient of the weights, takes care of weight 'decay' factor (second addend)
     grad[:self.m_end] = gdot(self.data.T,
                              dE_da).ravel() + params[:self.m_end]
     # backpropagate the delta
     delta = gdot(dE_da, params[:self.m_end].reshape(self.shape).T)
     del self.Z
     return delta
Ejemplo n.º 12
0
    def pt_score(self, params, inpts, **kwargs):
        # fprop in tied AE
        hddn = self.activ(gpu.dot(inpts, params[:self.m_end].reshape(self.shape)), self.theta)
        # get indices
        Z = gdot(hddn, params[:self.m_end].reshape(self.shape).T) + params[-self.shape[0]:]

        sc = self.score(Z, inpts)
        return sc
Ejemplo n.º 13
0
    def pt_score(self, params, inpts, **kwargs):
        hddn = logistic(
            gdot(inpts, params[:self.m_end].reshape(self.shape)) +
            params[self.m_end:self.size])
        Z = gdot(hddn, params[self.size:-self.shape[0]].reshape(
            self.Tshape)) + params[-self.shape[0]:]

        if self.rho_hat == None:
            self.rho_hat = hddn.mean(axis=0)
        else:
            self.rho_hat *= 0.9
            self.rho_hat += 0.1 * hddn.mean(axis=0)

        sparsity = self.beta * gpu.sum(bKL(self.rho, self.rho_hat))
        sc = self.score(Z, inpts, addon=sparsity)

        return sc
Ejemplo n.º 14
0
    def grad_cd1(self, params, inputs, **kwargs):
        """
        """
        g = gzeros(params.shape)

        n, _ = inputs.shape

        m_end = self.m_end
        V = self.shape[0]
        H = self.shape[1]
        wm = params[:m_end].reshape(self.shape)

        h1, h_sampled = self.H(inputs,
                               wm=wm,
                               bias=params[m_end:-V],
                               sampling=True)
        v2, _ = self.V(h_sampled, wm=wm.T, bias=params[-V:])
        h2, _ = self.H(v2, wm=wm, bias=params[m_end:-V])

        # Note the negative sign: the gradient is
        # supposed to point into 'wrong' direction,
        # because the used optimizer likes to minimize.
        g[:m_end] = -gdot(inputs.T, h1).ravel()
        g[:m_end] += gdot(v2.T, h2).ravel()
        g[:m_end] *= 1. / n
        g[:m_end] += self.l2 * params[:m_end]

        g[m_end:-V] = -h1.mean(axis=0)
        g[m_end:-V] += h2.mean(axis=0)

        g[-V:] = -inputs.mean(axis=0)
        g[-V:] += v2.mean(axis=0)

        if self.rho_hat is None:
            self.rho_hat = h1.mean(axis=0)
        else:
            self.rho_hat *= 0.9
            self.rho_hat += 0.1 * h1.mean(axis=0)
        dKL_drho_hat = (self.rho - self.rho_hat) / (self.rho_hat *
                                                    (1 - self.rho_hat))
        h1_1mh1 = h1 * (1 - h1)
        g[m_end:-V] -= self.lmbd / n * gsum(h1_1mh1, axis=0) * dKL_drho_hat
        g[:m_end] -= self.lmbd / n * (gdot(inputs.T, h1_1mh1) *
                                      dKL_drho_hat).ravel()

        return g
Ejemplo n.º 15
0
    def pt_grad(self, params, inpts, **kwargs):
        g = gzeros(params.shape)

        hddn = self.activ(gpu.dot(inpts, params[:self.m_end].reshape(self.shape)), self.theta)
        Z = gdot(hddn, params[:self.m_end].reshape(self.shape).T) + params[-self.shape[0]:]

        _, delta = self.score(Z, inpts, error=True)

        g[:self.m_end] = gdot(delta.T, hddn).ravel()
        g[-self.shape[0]:] = delta.sum(axis=0)

        dsc_dha = gdot(delta, params[:self.m_end].reshape(self.shape)) * diff_table[self.activ](hddn)

        g[:self.m_end] += gdot(inpts.T, dsc_dha).ravel()

        # clean up
        del delta
        return g
Ejemplo n.º 16
0
    def reconstruction(self, params, inputs, **kwargs):
        """
        """
        x, y = inputs
        n, _ = x.shape

        weights_xf = params[:self.xf_sz].reshape(self.xfshape)
        weights_yf = params[self.xf_sz:self._cum_xy].reshape(self.yfshape)
        weights_fh = params[self._cum_xy:self._cum_xyh].reshape(self.fhshape)
        bias_h = params[self._cum_xyh:self.size]
        bias_x = params[self.size:-self.shape[0][1]]
        bias_y = params[-self.shape[0][1]:]


        factors_x = gdot(x, weights_xf) 
        factors_y = gdot(y, weights_yf)
        factors = factors_x * factors_y

        h, h_sampled = bernoulli(factors, wm=weights_fh, bias=bias_h, sampling=True)
        rho_hat = h.sum()
        factors_h = gdot(h, weights_fh.T)

        way = np.random.rand() > 0.5
        if way:
            # reconstruct y (output) first.
            tmp = factors_x * factors_h
            y1, _ = self.V(tmp, wm=weights_yf.T, bias=bias_y)
            factors_y[:] = gdot(y1, weights_yf)
            # then reconstruct x (input).
            tmp = factors_y * factors_h
            x1, _ = self.V(tmp, wm=weights_xf.T, bias=bias_x)
        else:
            # reconstruct x (input) first.
            tmp = factors_y * factors_h
            x1, _ = self.V(tmp, wm=weights_xf.T, bias=bias_x)
            factors_x[:] = gdot(x1, weights_xf)
            # then reconstruct y (output).
            tmp = factors_x * factors_h
            y1, _ = self.V(tmp, wm=weights_yf.T, bias=bias_y)

        xrec = gsum((x - x1)**2)
        yrec = gsum((y - y1)**2)

        return np.array([xrec, yrec, self.lmbd*rho_hat, self.avg_nxyf, self.avg_nfh])
Ejemplo n.º 17
0
    def pt_grad(self, params, noisy_inpts, targets, l2=0., **kwargs):
        g = gzeros(params.shape)

        hddn = self.activ(gpu.dot(noisy_inpts, params[:self.m_end].reshape(self.shape)) + params[self.m_end:self.m_end+self.shape[1]])
        Z = gdot(hddn, params[:self.m_end].reshape(self.shape).T) + params[-self.shape[0]:]

        _, delta = self.score(Z, targets, error=True)

        g[:self.m_end] = gdot(delta.T, hddn).ravel()
        g[-self.shape[0]:] = delta.sum(axis=0)

        dsc_dha = gdot(delta, params[:self.m_end].reshape(self.shape)) * diff_table[self.activ](hddn)

        g[:self.m_end] += gdot(noisy_inpts.T, dsc_dha).ravel()

        g[self.m_end:-self.shape[0]] = dsc_dha.sum(axis=0)
        # clean up
        del delta
        return g
Ejemplo n.º 18
0
    def pt_score(self, params, inpts, **kwargs):
        # fprop in tied AE
        hddn = self.activ(
            gpu.dot(inpts, params[:self.m_end].reshape(self.shape)),
            self.theta)
        # get indices
        Z = gdot(hddn, params[:self.m_end].reshape(
            self.shape).T) + params[-self.shape[0]:]

        sc = self.score(Z, inpts)
        return sc
Ejemplo n.º 19
0
 def pt_grad(self, params, inputs, targets, l2=0, **kwargs):
     g = gzeros(params.shape)
     Z = self.activ(gpu.dot(inputs, params[:self.m_end].reshape(self.shape)) + params[self.m_end:])
     _, delta = self.score(Z, targets, error=True)
     # necessary?
     delta = self.C * delta
     g[:self.m_end] = gdot(inputs.T, delta).ravel() + params[:self.m_end]
     g[self.m_end:] = delta.sum(axis=0)
     # clean up
     del delta
     return g
Ejemplo n.º 20
0
    def pt_grad(self, params, inpts, **kwargs):
        g = gzeros(params.shape)
        m, _ = inpts.shape

        hddn = self.activ(gdot(inpts, params[:self.m_end].reshape(self.shape)) + params[self.m_end:self.size])
        Z = gdot(hddn, params[self.size:-self.shape[0]].reshape(self.Tshape)) + params[-self.shape[0]:]

        _, delta = self.score(Z, inpts, error=True)

        g[self.end:-self.shape[0]] = gdot(hddn.T, delta).ravel()
        g[-self.shape[0]:] = delta.sum(axis=0)

        diff = diff_table[self.activ](hddn)
        dsc_dha = diff * gdot(delta, params[:self.m_end].reshape(self.shape))

        g[:self.m_end] = gdot(inpts.T, dsc_dha).ravel()
        g[self.m_end:self.size] = dsc_dha.sum(axis=0)
        # clean up
        del delta, hddn, Z
        return g
Ejemplo n.º 21
0
Archivo: rbm.py Proyecto: osdf/gpustack
    def grad_cd1(self, params, inputs, **kwargs):
        """
        """
        g = gzeros(params.shape)

        n, _ = inputs.shape

        m_end = self.m_end
        V = self.shape[0]
        H = self.shape[1]
        wm = params[:m_end].reshape(self.shape)

        h1, h_sampled = self.H(inputs, wm=wm, bias=params[m_end:-V], sampling=True)
        v2, _ = self.V(h_sampled, wm=wm.T, bias=params[-V:])
        h2, _ = self.H(v2, wm=wm, bias=params[m_end:-V])

        # Note the negative sign: the gradient is 
        # supposed to point into 'wrong' direction,
        # because the used optimizer likes to minimize.
        g[:m_end] = -gdot(inputs.T, h1).ravel()
        g[:m_end] += gdot(v2.T, h2).ravel()
        g[:m_end] *= 1./n
        g[:m_end] += self.l2*params[:m_end]

        g[m_end:-V] = -h1.mean(axis=0)
        g[m_end:-V] += h2.mean(axis=0)

        g[-V:] = -inputs.mean(axis=0)
        g[-V:] += v2.mean(axis=0)

        if self.rho_hat is None:
            self.rho_hat = h1.mean(axis=0)
        else:
            self.rho_hat *= 0.9
            self.rho_hat += 0.1 * h1.mean(axis=0)
        dKL_drho_hat = (self.rho - self.rho_hat)/(self.rho_hat*(1-self.rho_hat))
        h1_1mh1 = h1*(1 - h1)
        g[m_end:-V] -= self.lmbd/n * gsum(h1_1mh1, axis=0) * dKL_drho_hat
        g[:m_end] -= self.lmbd/n * (gdot(inputs.T, h1_1mh1) * dKL_drho_hat).ravel()

        return g
Ejemplo n.º 22
0
    def pt_score(self, params, inpts, **kwargs):
        # fprop in tied AE
        hddn = self.activ(gpu.dot(inpts, params[:self.m_end].reshape(self.shape)) + params[self.m_end:self.m_end+self.shape[1]])
        # get indices
        _hddn= hddn.as_numpy_array()
        idxs = np.argsort(_hddn, axis=1)
        _hddn[range(_hddn.shape[0]), idxs[:, self.ak:].T] = 0
        hddn = gpu.garray(_hddn)
        Z = gdot(hddn, params[:self.m_end].reshape(self.shape).T) + params[-self.shape[0]:]

        sc = self.score(Z, inpts)
        return sc
Ejemplo n.º 23
0
    def pt_grad(self, params, inputs, targets, l2=0, **kwargs):
        g = gzeros(params.shape)
        
        Z = self.activ(gpu.dot(inputs, params[:self.m_end].reshape(self.shape)) + params[self.m_end:])
        _, delta = self.score(Z, targets, error=True)

        g[:self.m_end] = gdot(inputs.T, delta).ravel()
        
        g[self.m_end:] = delta.sum(axis=0)
        # clean up
        del delta
        return g
Ejemplo n.º 24
0
    def pt_score(self, params, inpts, **kwargs):
        hddn = logistic(
            gpu.dot(inpts, params[: self.m_end].reshape(self.shape)) + params[self.m_end : self.m_end + self.shape[1]]
        )
        Z = gdot(hddn, params[: self.m_end].reshape(self.shape).T) + params[-self.shape[0] :]

        w = params[: self.m_end].reshape(self.shape)
        cae = gpu.sum(gpu.mean(Dsigmoid(hddn) ** 2, axis=0) * gpu.sum(w ** 2, axis=0))
        cae *= self.cae

        sc = self.score(Z, inpts, addon=cae)
        return np.array([sc, cae])
Ejemplo n.º 25
0
    def pt_grad(self, params, inpts, **kwargs):
        g = gzeros(params.shape)

        hddn = self.activ(
            gpu.dot(inpts, params[:self.m_end].reshape(self.shape)),
            self.theta)
        Z = gdot(hddn, params[:self.m_end].reshape(
            self.shape).T) + params[-self.shape[0]:]

        _, delta = self.score(Z, inpts, error=True)

        g[:self.m_end] = gdot(delta.T, hddn).ravel()
        g[-self.shape[0]:] = delta.sum(axis=0)

        dsc_dha = gdot(delta, params[:self.m_end].reshape(
            self.shape)) * diff_table[self.activ](hddn)

        g[:self.m_end] += gdot(inpts.T, dsc_dha).ravel()

        # clean up
        del delta
        return g
Ejemplo n.º 26
0
    def pt_score(self, params, inpts, **kwargs):
        # fprop in tied AE
        hddn = self.activ(
            gpu.dot(inpts, params[: self.m_end].reshape(self.shape)) + params[self.m_end : self.m_end + self.shape[1]]
        )
        # get indices
        _hddn = hddn.as_numpy_array()
        idxs = np.argsort(_hddn, axis=1)
        _hddn[range(_hddn.shape[0]), idxs[:, self.ak :].T] = 0
        hddn = gpu.garray(_hddn)
        Z = gdot(hddn, params[: self.m_end].reshape(self.shape).T) + params[-self.shape[0] :]

        sc = self.score(Z, inpts)
        return sc
Ejemplo n.º 27
0
    def pt_score(self, params, inpts, **kwargs):
        hddn = logistic(gpu.dot(inpts, params[:self.m_end].reshape(self.shape)) + params[self.m_end:self.m_end+self.shape[1]])
        Z = gdot(hddn, params[:self.m_end].reshape(self.shape).T) + params[-self.shape[0]:]

        if self.rho_hat == None:
            self.rho_hat = hddn.mean(axis=0)
        else:
            self.rho_hat *= 0.9
            self.rho_hat += 0.1*hddn.mean(axis=0)

        sparsity = self.beta * gpu.sum(bKL(self.rho, self.rho_hat))
        sc = self.score(Z, inpts, addon=sparsity)

        return np.array([sc, sc-sparsity, sparsity, gpu.mean(self.rho_hat)])
Ejemplo n.º 28
0
    def pt_grad(self, params, inpts, **kwargs):
        g = gzeros(params.shape)

        hddn = self.activ(gpu.dot(inpts, params[:self.m_end].reshape(self.shape)) + params[self.m_end:self.m_end+self.shape[1]])
        _hddn= hddn.as_numpy_array()
        idxs = np.argsort(_hddn, axis=1)
        _hddn[range(_hddn.shape[0]), idxs[:, self.ak:].T] = 0
        hddn = gpu.garray(_hddn)
        Z = gdot(hddn, params[:self.m_end].reshape(self.shape).T) + params[-self.shape[0]:]

        _, delta = self.score(Z, inpts, error=True)

        g[:self.m_end] = gdot(delta.T, hddn).ravel()
        g[-self.shape[0]:] = delta.sum(axis=0)

        dsc_dha = gdot(delta, params[:self.m_end].reshape(self.shape)) * diff_table[self.activ](hddn)

        g[:self.m_end] += gdot(inpts.T, dsc_dha).ravel()

        g[self.m_end:-self.shape[0]] = dsc_dha.sum(axis=0)
        # clean up
        del delta
        return g
Ejemplo n.º 29
0
    def pt_score(self, params, inpts, **kwargs):
        hddn = logistic(
            gpu.dot(inpts, params[:self.m_end].reshape(self.shape)) +
            params[self.m_end:self.m_end + self.shape[1]])
        Z = gdot(hddn, params[:self.m_end].reshape(
            self.shape).T) + params[-self.shape[0]:]

        w = params[:self.m_end].reshape(self.shape)
        cae = gpu.sum(
            gpu.mean(Dsigmoid(hddn)**2, axis=0) * gpu.sum(w**2, axis=0))
        cae *= self.cae

        sc = self.score(Z, inpts, addon=cae)
        return np.array([sc, cae])
Ejemplo n.º 30
0
    def cd1_3way_grad(self, params, inputs, **kwargs):
        """
        """
        g = gzeros(params.shape)
        x, y = inputs
        n, _ = x.shape

        #print self.avg_nxyf, self.avg_nfh

        weights_xf = params[:self.xf_sz].reshape(self.xfshape)
        weights_yf = params[self.xf_sz:self._cum_xy].reshape(self.yfshape)
        weights_fh = params[self._cum_xy:self._cum_xyh].reshape(self.fhshape)
        bias_h = params[self._cum_xyh:self.size]
        bias_x = params[self.size:-self.shape[0][1]]
        bias_y = params[-self.shape[0][1]:]

        # normalize weights
        sq_xf = weights_xf * weights_xf
        norm_xf = gpu.sqrt(sq_xf.sum(axis=0)) + SMALL
        sq_yf = weights_yf * weights_yf
        norm_yf = gpu.sqrt(sq_yf.sum(axis=0)) + SMALL
 
        norm_xyf = (norm_xf.mean() + norm_yf.mean())/2.
        self.avg_nxyf *= 0.95
        self.avg_nxyf += (0.05 * norm_xyf)
        weights_xf *= (self.avg_nxyf / norm_xf)
        weights_yf *= (self.avg_nxyf / norm_yf)

        sq_fh = weights_fh*weights_fh
        norm_fh = gpu.sqrt(sq_fh.sum(axis=1)) + SMALL
        self.avg_nfh *= 0.95
        self.avg_nfh += (0.05 * norm_fh.mean())
        weights_fh *= (self.avg_nfh / norm_fh[:, gpu.newaxis])
        # normalization done

        factors_x = gdot(x, weights_xf) 
        factors_y = gdot(y, weights_yf)
        factors = factors_x * factors_y

        h, h_sampled = bernoulli(factors, wm=weights_fh, bias=bias_h, sampling=True)
        factors_h = gdot(h_sampled, weights_fh.T)

        g[:self.xf_sz] = -gdot(x.T, factors_y*factors_h).ravel()
        g[self.xf_sz:self._cum_xy] = -gdot(y.T, factors_x*factors_h).ravel()
        g[self._cum_xy:self._cum_xyh] = -gdot(factors.T, h_sampled).ravel()
        g[self._cum_xyh:self.size] = -h.sum(axis=0)
        g[self.size:-self.shape[0][1]] = -x.sum(axis=0) 
        g[-self.shape[0][1]:] = -y.sum(axis=0)

        # 3way cd
        way = np.random.rand() > 0.5
        if way:
            # reconstruct y (output) first.
            tmp = factors_x * factors_h
            y1, _ = self.V(tmp, wm=weights_yf.T, bias=bias_y)
            factors_y[:] = gdot(y1, weights_yf)
            # then reconstruct x (input).
            tmp = factors_y * factors_h
            x1, _ = self.V(tmp, wm=weights_xf.T, bias=bias_x)
            factors_x[:] = gdot(x1, weights_xf)
        else:
            # reconstruct x (input) first.
            tmp = factors_y * factors_h
            x1, _ = self.V(tmp, wm=weights_xf.T, bias=bias_x)
            factors_x[:] = gdot(x1, weights_xf)
            # then reconstruct y (output).
            tmp = factors_x * factors_h
            y1, _ = self.V(tmp, wm=weights_yf.T, bias=bias_y)
            factors_y[:] = gdot(y1, weights_yf)

        factors[:] = factors_x * factors_y
        h1, _ = bernoulli(factors, wm=weights_fh, bias=bias_h)
        factors_h[:] = gdot(h1, weights_fh.T)

        g[:self.xf_sz] += gdot(x1.T, factors_y*factors_h).ravel()
        g[:self.xf_sz] *= 1./n

        g[self.xf_sz:self._cum_xy] += gdot(y1.T, factors_x*factors_h).ravel()
        g[self.xf_sz:self._cum_xy] *= 1./n

        g[self._cum_xy:self._cum_xyh] += gdot(factors.T, h1).ravel()
        g[self._cum_xy:self._cum_xyh] *= 1./n

        g[self._cum_xyh:self.size] += h1.sum(axis=0)
        g[self._cum_xyh:self.size] *= 1./n

        g[self.size:-self.shape[0][1]] += x1.sum(axis=0)
        g[self.size:-self.shape[0][1]] *= 1./n

        g[-self.shape[0][1]:] += y1.sum(axis=0)
        g[-self.shape[0][1]:] *= 1./n

        return g
Ejemplo n.º 31
0
 def fward(self, params, data):
     return gdot(data, params[:self.m_end].reshape(
         self.shape)) + params[self.m_end:]
Ejemplo n.º 32
0
 def fprop(self, params, data):
     self.data = data
     self.Z = gdot(data, params[:self.m_end].reshape(self.shape)) + params[self.m_end:]
     return self.Z
Ejemplo n.º 33
0
 def fward(self, params, data):
     return gdot(data, params[:self.m_end].reshape(self.shape)) + params[self.m_end:]
Ejemplo n.º 34
0
 def fprop_spike(self, params, data):
     self.data = data
     self.Z = self.activ(gdot(data, params[:self.m_end].reshape(self.shape)) + params[self.m_end:])
     spike = self.Z > gpu.rand(self.Z.shape)
     return spike
Ejemplo n.º 35
0
 def fprop(self, params, data):
     self.data = data
     self.Z = gdot(data, params[:self.m_end].reshape(
         self.shape)) + params[self.m_end:]
     return self.Z
Ejemplo n.º 36
0
    def grad_cd1(self, params, inputs, **kwargs):
        """
        """
        g = gzeros(params.shape)

        n, _ = inputs.shape

        m_end = self.m_end
        V = self.shape[0]
        H = self.shape[1]
        wm = params[:m_end].reshape(self.shape)
        prec = params[-V:][:, gpu.newaxis]

        h1, h_sampled = self.H(inputs,
                               wm=prec * wm,
                               bias=params[m_end:m_end + H],
                               sampling=True)
        v2, v_sampled = gauss(h_sampled,
                              wm=(wm / prec).T,
                              bias=params[-(2 * V):-V],
                              prec=prec.T,
                              sampling=True)
        h2, _ = self.H(v2, wm=prec * wm, bias=params[m_end:m_end + H])

        #print h1[0,0], h_sampled[0,0], v2[0,0], v_sampled[0,0]
        # Note the negative sign: the gradient is
        # supposed to point into 'wrong' direction.
        g[:m_end] = -gdot(inputs.T * prec, h1).ravel()
        g[:m_end] += gdot(v_sampled.T * prec, h2).ravel()
        g[:m_end] *= 1. / n
        g[:m_end] += self.l2 * params[:m_end]

        g[m_end:m_end + H] = -h1.sum(axis=0)
        g[m_end:m_end + H] += h2.sum(axis=0)
        g[m_end:m_end + H] *= 1. / n

        g[-2 * V:-V] = -inputs.sum(axis=0)
        g[-2 * V:-V] += v_sampled.sum(axis=0)
        g[-2 * V:-V] *= 1. / n
        g[-2 * V:-V] *= (prec**2).T

        #print gsum(g[:m_end]**2), gsum(g[m_end:m_end+H]**2), gsum(g[-2*V:-V]**2)
        # Gradient for square root of precision
        g[-V:] = -gsum(2 * prec.T * inputs * (params[-2 * V:-V] - inputs / 2),
                       axis=0) + gsum(gdot(inputs.T, h1) * wm, axis=1)
        g[-V:] += (gsum(2 * prec.T * v_sampled *
                        (params[-2 * V:-V] - v_sampled / 2),
                        axis=0) + gsum(gdot(v_sampled.T, h2) * wm, axis=1))
        g[-V:] *= 1. / n

        #print gsum(g[-V:]**2)
        if self.lmbd > 0.:
            if self.rho_hat is None:
                self.rho_hat = h1.mean(axis=0)
            else:
                self.rho_hat *= 0.9
                self.rho_hat += 0.1 * h1.mean(axis=0)
            dKL_drho_hat = (self.rho - self.rho_hat) / (self.rho_hat *
                                                        (1 - self.rho_hat))
            h1_1mh1 = h1 * (1 - h1)
            g[m_end:m_end +
              H] -= self.lmbd / n * gsum(h1_1mh1, axis=0) * dKL_drho_hat
            g[:m_end] -= self.lmbd / n * (gdot(inputs.T * prec, h1_1mh1) *
                                          dKL_drho_hat).ravel()

        #g[:] = -g[:]
        return g
Ejemplo n.º 37
0
 def fprop_dropout(self, params, data):
     self.data = data
     self.Z = self.activ(gdot(data, params[:self.m_end].reshape(self.shape)) + params[self.m_end:])
     self.drop = gpu.rand(self.Z.shape) > self.dropout
     self.Z *= self.drop
     return self.Z
Ejemplo n.º 38
0
 def fprop(self, params, data):
     self.data = data
     self.Z = self.activ(gdot(data, params[:self.m_end].reshape(self.shape))\
             + params[self.m_end:])
     return self.Z
Ejemplo n.º 39
0
 def fward_spike(self, params, data):
     Z = self.activ(gdot(data, params[:self.m_end].reshape(self.shape))\
             + params[self.m_end:])
     spike = Z > gpu.rand(Z.shape)
     return spike
Ejemplo n.º 40
0
 def fward_dropout(self, params, data):
     return (1 - self.dropout) * self.activ(gdot(data,\
             params[:self.m_end].reshape(self.shape)) + params[self.m_end:])
Ejemplo n.º 41
0
 def fward(self, params, data):
     return self.activ(gdot(data, params[:self.m_end].reshape(self.shape))\
             + params[self.m_end:])
Ejemplo n.º 42
0
 def pt_score(self, params, noisy_inpts, targets, l2=0., **kwargs):
     # fprop in tied AE
     hddn = self.activ(gpu.dot(noisy_inpts, params[:self.m_end].reshape(self.shape)) + params[self.m_end:self.m_end+self.shape[1]])
     Z = gdot(hddn, params[:self.m_end].reshape(self.shape).T) + params[-self.shape[0]:]
     sc = self.score(Z, targets)
     return sc