Example #1
0
    def pt_grad(self, params, inpts, **kwargs):
        g = gzeros(params.shape)
        m, _ = inpts.shape

        hddn = logistic(
            gpu.dot(inpts, params[: self.m_end].reshape(self.shape)) + params[self.m_end : self.m_end + self.shape[1]]
        )
        Z = gdot(hddn, params[: self.m_end].reshape(self.shape).T) + params[-self.shape[0] :]

        w = params[: self.m_end].reshape(self.shape)
        cae = gpu.sum(gpu.mean(Dsigmoid(hddn) ** 2, axis=0) * gpu.sum(w ** 2, axis=0))
        cae *= self.cae

        _, delta = self.score(Z, inpts, error=True, addon=cae)

        g[: self.m_end] = gdot(delta.T, hddn).ravel()
        g[-self.shape[0] :] = delta.sum(axis=0)

        cae_grad = gpu.mean(Dsigmoid(hddn) ** 2, axis=0) * w
        cae_grad += gdot(inpts.T, (Dsigmoid(hddn) ** 2 * (1 - 2 * hddn))) / m * gpu.sum(w ** 2, axis=0)
        g[: self.m_end] += self.cae * 2 * cae_grad.ravel()

        dsc_dha = Dsigmoid(hddn) * gdot(delta, params[: self.m_end].reshape(self.shape))

        g[: self.m_end] += gdot(inpts.T, dsc_dha).ravel()

        g[self.m_end : -self.shape[0]] = dsc_dha.sum(axis=0)
        # clean up
        del delta, hddn, Z
        return g
Example #2
0
def score_grad(weights, structure, inputs, score=score, **params):
    """
    """
    hdim = structure["hdim"]
    m, idim = inputs.shape
    ih = idim * hdim
    g = np.zeros(weights.shape, dtype=weights.dtype)
    
    # forward pass through model,
    # need 'error' signal at the end.
    sc, delta = score(weights, structure=structure, inputs=inputs, predict=False, error=True, **params)
    
    # recover saved hidden values
    hddn = structure["hiddens"]
    
    # weights are tied
    g[:ih] = np.dot(delta.T, hddn).ravel()
    g[ih+hdim:] = delta.sum(axis=0)
    
    # derivative of cae cost
    w = weights[:ih].reshape(idim, hdim)
    cae_grad = np.mean(Dsigmoid(hddn)**2, axis=0) * w
    cae_grad += (np.dot(inputs.T, (Dsigmoid(hddn)**2 * (1-2*hddn)))/m * np.sum(w**2, axis=0))
    cae_weight = structure["cae"]
    g[:ih] += cae_weight * 2 * cae_grad.ravel()

    dsc_dha = Dsigmoid(hddn) * np.dot(delta, weights[:ih].reshape(idim, hdim))
    g[:ih] += np.dot(inputs.T, dsc_dha).ravel()

    g[ih:ih+hdim] = dsc_dha.sum(axis=0)
    # clean up structure
    del structure["hiddens"]
    return sc, g
Example #3
0
    def pt_grad(self, params, inpts, **kwargs):
        g = gzeros(params.shape)
        m, _ = inpts.shape

        hddn = logistic(gpu.dot(inpts, params[:self.m_end].reshape(self.shape)) + params[self.m_end:self.m_end+self.shape[1]])
        Z = gdot(hddn, params[:self.m_end].reshape(self.shape).T) + params[-self.shape[0]:]

        if self.rho_hat_grad == None:
            self.rho_hat_grad = hddn.mean(axis=0)
        else:
            self.rho_hat_grad *= 0.9
            self.rho_hat_grad += 0.1*hddn.mean(axis=0)

#        rho_hat = hddn.mean(axis=0)
        rho_hat = self.rho_hat_grad
        rho = self.rho
        sparsity = self.beta * gpu.sum(bKL(rho, rho_hat))

        _, delta = self.score(Z, inpts, error=True, addon=sparsity)

        g[:self.m_end] = gdot(delta.T, hddn).ravel()
        g[-self.shape[0]:] = delta.sum(axis=0)

        diff = Dsigmoid(hddn)
        dsparse_dha = -rho/rho_hat + (1-rho)/(1-rho_hat)
        dsc_dha = diff * (gdot(delta, params[:self.m_end].reshape(self.shape)) + self.beta*dsparse_dha/m)

        g[:self.m_end] += gdot(inpts.T, dsc_dha).ravel()

        g[self.m_end:-self.shape[0]] = dsc_dha.sum(axis=0)
        # clean up
        del delta, hddn, Z
        return g
Example #4
0
    def pt_grad(self, params, inpts, **kwargs):
        g = gzeros(params.shape)
        m, _ = inpts.shape

        hddn = logistic(
            gpu.dot(inpts, params[:self.m_end].reshape(self.shape)) +
            params[self.m_end:self.m_end + self.shape[1]])
        Z = gdot(hddn, params[:self.m_end].reshape(
            self.shape).T) + params[-self.shape[0]:]

        w = params[:self.m_end].reshape(self.shape)
        cae = gpu.sum(
            gpu.mean(Dsigmoid(hddn)**2, axis=0) * gpu.sum(w**2, axis=0))
        cae *= self.cae

        _, delta = self.score(Z, inpts, error=True, addon=cae)

        g[:self.m_end] = gdot(delta.T, hddn).ravel()
        g[-self.shape[0]:] = delta.sum(axis=0)

        cae_grad = gpu.mean(Dsigmoid(hddn)**2, axis=0) * w
        cae_grad += (gdot(inpts.T, (Dsigmoid(hddn)**2 * (1 - 2 * hddn))) / m *
                     gpu.sum(w**2, axis=0))
        g[:self.m_end] += self.cae * 2 * cae_grad.ravel()

        dsc_dha = Dsigmoid(hddn) * gdot(
            delta, params[:self.m_end].reshape(self.shape))

        g[:self.m_end] += gdot(inpts.T, dsc_dha).ravel()

        g[self.m_end:-self.shape[0]] = dsc_dha.sum(axis=0)
        # clean up
        del delta, hddn, Z
        return g
Example #5
0
    def pt_score(self, params, inpts, **kwargs):
        hddn = logistic(
            gpu.dot(inpts, params[:self.m_end].reshape(self.shape)) +
            params[self.m_end:self.m_end + self.shape[1]])
        Z = gdot(hddn, params[:self.m_end].reshape(
            self.shape).T) + params[-self.shape[0]:]

        w = params[:self.m_end].reshape(self.shape)
        cae = gpu.sum(
            gpu.mean(Dsigmoid(hddn)**2, axis=0) * gpu.sum(w**2, axis=0))
        cae *= self.cae

        sc = self.score(Z, inpts, addon=cae)
        return np.array([sc, cae])
Example #6
0
def score(weights, structure, inputs, predict=False, 
        error=False, **params):
    """
    Computes the sparisty penalty using exponential weighting.
    """
    hdim = structure["hdim"]
    _, idim = inputs.shape
    ih = idim * hdim

    hddn = sigmoid(np.dot(inputs, weights[:ih].reshape(idim, hdim)) + weights[ih:ih+hdim])
    z = np.dot(hddn, weights[:ih].reshape(idim, hdim).T) + weights[ih+hdim:]

    w = weights[:ih].reshape(idim, hdim)
    cae = np.sum(np.mean(Dsigmoid(hddn)**2, axis=0) * np.sum(w**2, axis=0))
    cae_weight = structure["cae"]
    cae *= cae_weight

    if error:
        structure["hiddens"] = hddn
    return structure["score"](z, inputs, predict=predict, error=error, addon=cae)