Exemple #1
0
    def gradient(self, x, g, returnError=True):
        x = np.asarray(x)
        g = np.asarray(g)

        if self.flattenOut:
            g = g.ravel()

        # packed views of the hidden and visible gradient matrices
        views = util.packedViews(self.layerDims, dtype=self.dtype)
        pg = views[0]
        hgs = views[1:-1]
        vg = views[-1]

        # forward pass
        z1 = util.bias(x)
        z1s = [z1]
        zPrimes = []
        for hw, phi in zip(self.hws, self.transFunc):
            h = z1.dot(hw)

            z1 = util.bias(phi(h))
            z1s.append(z1)

            zPrime = phi(h, 1)
            zPrimes.append(zPrime)

        y = z1.dot(self.vw)

        if self.flattenOut:
            y = y.ravel()

        # error components
        e = util.colmat(y - g)
        delta = np.sign(e) / e.size

        # visible layer gradient
        vg[...] = z1.T.dot(delta)
        vg += self.penaltyGradient(-1)

        # backward pass for hidden layers
        w = self.vw
        for l in range(self.nHLayers - 1, -1, -1):
            delta = delta.dot(w[:-1, :].T) * zPrimes[l]
            hgs[l][...] = z1s[l].T.dot(delta)
            hgs[l] += self.penaltyGradient(l)
            w = self.hws[l]

        if returnError:
            error = np.mean(np.abs(e)) + self.penaltyError()
            return error, pg
        else:
            return pg
Exemple #2
0
    def train(self, x, g):
        x = np.asarray(x)
        g = np.asarray(g)

        x1 = util.bias(x)

        penaltyMat = self.penalty * np.eye(x1.shape[1], dtype=self.dtype)
        penaltyMat[-1, -1] = 0.0

        a = x1.T @ x1 + penaltyMat
        b = x1.T @ g

        if self.pseudoInv is None:
            if np.linalg.cond(a) < 1.0 / np.finfo(self.dtype).eps:
                pseudoInv = True
            else:
                pseudoInv = False
        else:
            pseudoInv = self.pseudoInv

        if pseudoInv:
            #self.weights, residuals, rank, s = \
            #    np.linalg.lstsq(a, b)

            #self.weights = np.linalg.pinv(a) @ b
            #self.weights = sp.linalg.pinv2(a) @ b

            # since x1.T @ x1 is symmetric, pinvh is equivalent but faster than pinv2
            self.weights = sp.linalg.pinvh(a) @ b

        else:
            #self.weights = sp.linalg.solve(a, b, sym_pos=True)
            self.weights = np.linalg.solve(a, b)
Exemple #3
0
    def evalRecs(self, x, context=None, returnContext=False):
        x = util.segmat(x)

        x1 = util.bias(x)

        nSeg = x1.shape[0]
        nObs = x1.shape[1]
        nIn1 = x1.shape[2]

        r = np.empty((nSeg, nObs, self.nHidden), dtype=self.dtype)

        if context is None:
            context = np.zeros((nSeg, self.nHidden), dtype=self.dtype)

        x1c = np.empty((nSeg, nIn1 + self.nHidden), dtype=self.dtype)

        for t in range(nObs):
            x1c[:, :nIn1] = x1[:, t]
            x1c[:, nIn1:] = context

            r[:, t] = self.phi(x1c.dot(self.hw))
            context[...] = r[:, t]

        if returnContext:
            return r, context
        else:
            return r
Exemple #4
0
    def gradient(self, x, g, returnError=True):
        x = np.asarray(x)
        g = np.asarray(g)

        probs = self.probs(x)

        delta = (probs - g) / probs.size

        penMask = np.ones_like(self.weights)
        penMask[-1, :] = 0.0

        grad = (
            util.bias(x).T.dot(delta) + self.elastic * 2.0 * self.penalty *
            penMask * self.weights / self.weights.size +  # L2-norm penalty
            (1.0 - self.elastic) * self.penalty * penMask *
            np.sign(self.weights) / self.weights.size)  # L1-norm penalty

        gf = grad.ravel()

        if returnError:
            pf = self.weights[:-1, :].ravel()
            err = (
                -np.mean(g * np.log(util.capZero(probs))) + self.elastic *
                self.penalty * pf.dot(pf) / pf.size +  # L2-norm penalty
                (1.0 - self.elastic) * self.penalty * np.mean(np.abs(pf))
            )  # L1-norm penalty

            return err, gf
        else:
            return gf
Exemple #5
0
    def evalRecs(self, x, contexts=None, returnContexts=False):
        x = util.segmat(x)

        x1 = util.bias(x)

        nSeg = x1.shape[0]
        nObs = x1.shape[1]

        r1Prev = x1
        rs = []

        if contexts is None:
            contexts = [
                np.zeros((nSeg, self.nRecHiddens[l]), dtype=self.dtype)
                for l in range(self.nRecLayers)
            ]

        for l in range(self.nRecLayers):
            nIn1 = r1Prev.shape[2]

            r = np.empty((nSeg, nObs, self.nRecHiddens[l]), dtype=self.dtype)
            r1c = np.empty((nSeg, nIn1 + self.nRecHiddens[l]),
                           dtype=self.dtype)
            context = contexts[l]

            for t in range(nObs):
                r1c[:, :nIn1] = r1Prev[:, t]
                r1c[:, nIn1:] = context

                r[:, t] = self.phi(r1c.dot(self.hws[l]))
                context[...] = r[:, t]

            r1Prev = util.bias(r)
            rs.append(r)

        if returnContexts:
            return rs, contexts
        else:
            return rs
Exemple #6
0
    def gradient(self, x, g, returnError=True):
        x = np.asarray(x)
        g = np.asarray(g)

        probs = self.probs(x)

        delta = (probs - g) / probs.size

        grad = util.bias(x).T.dot(delta)

        gf = grad.ravel()

        if returnError:
            err = -np.mean(g * np.log(util.capZero(probs)))

            return err, gf
        else:
            return gf
Exemple #7
0
    def gradient(self, x, g, returnError=True):
        x = np.asarray(x)
        g = np.asarray(g)

        if self.flattenOut:
            g = g.ravel()

        x1 = util.bias(x)
        y = x1 @ self.weights

        if self.flattenOut:
            y = y.ravel()

        e = util.colmat(y - g)
        delta = 2.0 * e / e.size

        penMask = np.ones_like(self.weights)
        penMask[-1, :] = 0.0
        grad = (x1.T @ delta + self.elastic * 2.0 * self.penalty * penMask *
                self.weights / self.weights.size +
                (1.0 - self.elastic) * self.penalty * penMask *
                np.sign(self.weights) / self.weights.size)

        gf = grad.ravel()

        if returnError:
            wf = self.weights[:-1, :].ravel()

            error = (
                np.mean(e**2) + self.elastic * self.penalty *
                (wf @ wf) / wf.size +  # L2-norm penalty
                (1.0 - self.elastic) * self.penalty * np.mean(np.abs(wf))
            )  # L1-norm penalty
            return error, gf
        else:
            return gf
Exemple #8
0
    def gradient(self, x, g, unrollSteps=10, returnError=True):
        x = util.segmat(x)
        g = util.segmat(g)

        # packed views of the hidden and visible gradient matrices
        pg, hg, vg = util.packedViews((self.hw.shape, self.vw.shape),
                                      dtype=self.dtype)

        x1 = util.bias(x)

        nSeg = x1.shape[0]
        nObs = x1.shape[1]
        nIn1 = x1.shape[2]

        h = np.empty((nSeg, nObs, self.nHidden), dtype=self.dtype)
        r = np.empty((nSeg, nObs, self.nHidden), dtype=self.dtype)
        x1c = np.empty((nSeg, nObs, nIn1 + self.nHidden), dtype=self.dtype)
        context = np.zeros((nSeg, self.nHidden), dtype=self.dtype)

        for t in range(nObs):
            x1c[:, t, :nIn1] = x1[:, t]
            x1c[:, t, nIn1:] = context

            h[:, t] = x1c[:, t].dot(self.hw)
            r[:, t] = self.phi(h[:, t])
            context[...] = r[:, t]

        r1 = util.bias(r)
        y = r1.dot(self.vw)
        rPrime = self.phi(h, 1)

        # error components, ditch transient
        e = (y - g)[:, self.transient:]
        delta = np.zeros(g.shape, dtype=self.dtype)
        delta[:, self.transient:] = 2.0 * e / e.size

        # visible layer gradient
        r1f = r1.reshape((-1, r1.shape[-1]))
        deltaf = delta.reshape((-1, delta.shape[-1]))
        vg[...] = r1f.T.dot(deltaf)

        vwDelta = delta.dot(self.vw[:-1].T)

        gamma = np.zeros((nSeg, unrollSteps, self.nHidden), dtype=self.dtype)
        #delta = np.zeros((nSeg, nObs-self.transient, self.nHidden), dtype=self.dtype)
        delta = np.zeros((nSeg, nObs, self.nHidden), dtype=self.dtype)

        ##hg[...] = 0.0

        # backward pass for hidden layer, unrolled through time
        #for t in range(nObs-self.transient-1, 0, -1):
        for t in range(nObs - 1, 0, -1):
            rPrimet = rPrime[:, t][:, None, :]
            #x1ct = x1c[:,t][:,None,:]
            ##x1ct = x1c[:,t]

            beta = gamma[:, :-1]
            beta = beta.dot(self.rw.T)

            gamma[:, 0] = vwDelta[:, t]
            gamma[:, 1:] = beta
            gamma *= rPrimet

            ##x1ctf = np.tile(x1ct, unrollSteps).reshape((-1, x1ct.shape[-1]))
            ##gammaf = gamma.reshape((-1, gamma.shape[-1]))
            delta[:, t] = gamma.sum(axis=1)

            #hg += x1ctf.T.dot(gammaf)
            ##hg += x1ct.T.dot(gamma.sum(axis=1))

            ##hg += x1ct.T.dot(gamma.swapaxes(0,1)).sum(axis=1)

        x1cf = x1c.reshape((-1, x1c.shape[-1]))
        deltaf = delta.reshape((-1, delta.shape[-1]))
        #hg[...] = x1c.reshape((-1, x1c.shape[-1])).T.dot(delta.reshape((-1, d.shape[-1])))
        hg[...] = x1cf.T.dot(deltaf)

        if returnError:
            return np.mean(e**2), pg
        else:
            return pg
Exemple #9
0
    def gradient(self, x, g, returnError=True):
        x = util.segmat(x)
        g = util.colmat(g)

        # packed views of the hidden and visible gradient matrices
        views = util.packedViews(self.layerDims, dtype=self.dtype)
        pg = views[0]

        if self.nHidden is None:
            cgs = views[1:-1]
            hg = None
            vg = views[-1]
        else:
            cgs = views[1:-2]
            hg = views[-2]
            vg = views[-1]

        # forward pass
        c = x
        c1s = []
        cPrimes = []
        for l, cw in enumerate(self.cws):
            width = self.convWidths[l]
            phi = self.transFunc[l]

            c = util.timeEmbed(c, lags=width - 1, axis=1)

            c1 = util.bias(c)
            c1s.append(c1)

            h = util.segdot(c1, cw)
            cPrime = phi(h, 1)
            cPrimes.append(cPrime)

            c = phi(h)

        c1 = util.bias(c)

        # evaluate hidden and visible layers
        if self.nHidden is None:
            y = util.segdot(c1, self.vw)
        else:
            h = util.segdot(c1, self.hw)
            z1 = util.bias(self.transFunc[-1](h))
            zPrime = self.transFunc[-1](h, 1)
            y = util.segdot(z1, self.vw)

        # error components
        trim = (g.shape[1] - y.shape[1]) // 2
        gTrim = g[:, :(g.shape[1] - trim)]
        gTrim = gTrim[:, -y.shape[1]:]

        # error components
        e = util.colmat(y - gTrim)
        delta = 2.0 * e / e.size

        if self.nHidden is None:
            # visible layer gradient
            c1f = c1.reshape((-1, c1.shape[-1]))
            deltaf = delta.reshape((-1, delta.shape[-1]))
            vg[...] = c1f.T.dot(deltaf)
            vg += self.penaltyGradient(-1)

            delta = util.segdot(delta, self.vw[:-1].T)

        else:
            # visible layer gradient
            z1f = z1.reshape((-1, z1.shape[-1]))
            deltaf = delta.reshape((-1, delta.shape[-1]))
            vg[...] = z1f.T.dot(deltaf)
            vg += self.penaltyGradient(-1)

            # hidden layer gradient
            c1f = c1.reshape((-1, c1.shape[-1]))
            delta = util.segdot(delta, self.vw[:-1].T) * zPrime
            deltaf = delta.reshape((-1, delta.shape[-1]))
            hg[...] = c1f.T.dot(deltaf)
            hg += self.penaltyGradient(-2)

            delta = util.segdot(delta, self.hw[:-1].T)

        # backward pass for convolutional layers
        for l in range(self.nConvLayers - 1, -1, -1):
            c1 = c1s[l]
            cPrime = cPrimes[l]

            delta = delta[:, :cPrime.shape[1]] * cPrime

            c1f = c1.reshape((-1, c1.shape[-1]))
            deltaf = delta.reshape((-1, delta.shape[-1]))
            cgs[l][...] = c1f.T.dot(deltaf)
            cgs[l] += self.penaltyGradient(l)

            if l > 0:  # won't propigate back to inputs
                delta = util.segdot(delta, self.cws[l][:-1].T)
                delta = deltaDeEmbedSum(delta, self.convWidths[l])

        if returnError:
            error = np.mean(e**2) + self.penaltyError()
            return error, pg
        else:
            return pg
Exemple #10
0
    def gradient(self, x, g, unrollSteps=10, returnError=True):
        x = util.segmat(x)
        g = util.segmat(g)

        if isinstance(unrollSteps, (int, )):
            unrollSteps = [
                unrollSteps,
            ] * self.nRecLayers

        views = util.packedViews(self.layerDims, dtype=self.dtype)
        pg = views[0]
        hgs = views[1:-1]
        vg = views[-1]

        x1 = util.bias(x)

        nSeg = x1.shape[0]
        nObs = x1.shape[1]

        r1Prev = x1
        r1cs = []
        rPrimes = []

        for l in range(self.nRecLayers):
            nIn1 = r1Prev.shape[2]

            r = np.empty((nSeg, nObs, self.nRecHiddens[l]), dtype=self.dtype)
            h = np.empty((nSeg, nObs, self.nRecHiddens[l]), dtype=self.dtype)
            r1c = np.empty((nSeg, nObs, nIn1 + self.nRecHiddens[l]),
                           dtype=self.dtype)
            context = np.zeros((nSeg, self.nRecHiddens[l]), dtype=self.dtype)

            for t in range(nObs):
                r1c[:, t, :nIn1] = r1Prev[:, t]
                r1c[:, t, nIn1:] = context

                h[:, t] = r1c[:, t].dot(self.hws[l])
                r[:, t] = self.phi(h[:, t])
                context[...] = r[:, t]

            r1Prev = util.bias(r)
            r1cs.append(r1c)

            rPrime = self.phi(h, 1)
            rPrimes.append(rPrime)

        # evaluate visible layer
        r1 = r1Prev
        y = r1.dot(self.vw)

        # error components, ditch transient
        e = (y - g)[:, self.transient:]
        delta = np.zeros(g.shape, dtype=self.dtype)
        delta[:, self.transient:] = 2.0 * e / e.size

        # visible layer gradient
        r1f = r1.reshape((-1, r1.shape[-1]))
        deltaf = delta.reshape((-1, delta.shape[-1]))
        vg[...] = r1f.T.dot(deltaf)

        # backward pass through each layer
        w = self.vw
        for l in range(self.nRecLayers - 1, -1, -1):
            r1c = r1cs[l]
            rwsTrans = self.rws[l].T
            rPrime = rPrimes[l]

            deltaPrev = delta.dot(w[:-1].T)

            gamma = np.zeros((nSeg, unrollSteps[l], self.nRecHiddens[l]),
                             dtype=self.dtype)
            #delta = np.zeros((nSeg, nObs-self.transient, self.nRecHiddens[l]), dtype=self.dtype)
            delta = np.zeros((nSeg, nObs, self.nRecHiddens[l]),
                             dtype=self.dtype)

            # unrolled through time
            #for t in range(nObs-self.transient-1, 0, -1):
            for t in range(nObs - 1, 0, -1):
                rPrimet = rPrime[:, t][:, None, :]

                beta = gamma[:, :-1]
                beta = beta.dot(rwsTrans)

                gamma[:, 0] = deltaPrev[:, t]
                gamma[:, 1:] = beta
                gamma *= rPrimet

                delta[:, t] = gamma.sum(axis=1)

            r1cf = r1c.reshape((-1, r1c.shape[-1]))
            deltaf = delta.reshape((-1, delta.shape[-1]))
            hgs[l][...] = r1cf.T.dot(deltaf)

            #print('hg %d: %f' % (l, np.sqrt(np.mean(hgs[l]**2))))

            w = self.iws[l]

        if returnError:
            return np.mean(e**2), pg
        else:
            return pg
Exemple #11
0
    def gradient(self, x, g, returnError=True):
        """Compute the gradient of the mean-squared error with respect to the
        network weights for each layer and given inputs and targets.  Useful
        for optimization routines that make use of first-order gradients.

        Args:
            x:

            g:

            returnError:    If True (default) then also return the
                            mean-squared error.  This can improve
                            performance in some optimization routines
                            by avoiding an additional forward pass.

        Returns:
            If returnError is True, then return a tuple containing
            the error followed by a 1d numpy array containing the
            gradient of the packed weights.  If returnError is False,
            then only return the gradient.
        """
        x = np.asarray(x)
        g = np.asarray(g)

        # packed views of the hidden and visible gradient matrices
        views = util.packedViews(self.layerDims, dtype=self.dtype)
        pg  = views[0]
        hgs = views[1:-1]
        vg  = views[-1]

        # forward pass
        z1 = util.bias(x)
        z1s = [z1]
        zPrimes = []
        for hw, phi in zip(self.hws, self.transFunc):
            h = z1.dot(hw)

            z1 = util.bias(phi(h))
            z1s.append(z1)

            zPrime = phi(h, 1)
            zPrimes.append(zPrime)

        v = z1.dot(self.vw)
        probs = util.softmax(v)

        # error components
        delta = util.colmat(probs - g) / probs.size

        # visible layer gradient
        vg[...] = z1.T.dot(delta)
        vg += self.penaltyGradient(-1)

        # backward pass for hidden layers
        w = self.vw
        for l in range(self.nHLayers-1, -1, -1):
            delta = delta.dot(w[:-1,:].T) * zPrimes[l]
            hgs[l][...] = z1s[l].T.dot(delta)
            hgs[l] += self.penaltyGradient(l)
            w = self.hws[l]

        if returnError:
            error = -np.mean(g*np.log(util.capZero(probs))) + self.penaltyError()
            return error, pg
        else:
            return pg