Example #1
0
    def train(self, x, g, optimFunc, **kwargs):
        x = util.segmat(x)
        x = np.require(x, requirements=['O', 'C'])

        g = util.segmat(g)
        g = np.require(g, requirements=['O', 'C'])

        self.trainResult = optimFunc(self, x=x, g=g, **kwargs)
Example #2
0
    def error(self, x, g, *args, **kwargs):
        x = util.segmat(x)
        g = util.segmat(g)[:, self.transient:]

        # evaluate network
        y = self.eval(x, returnTransient=False)

        # figure mse
        return np.mean((y - g)**2)
Example #3
0
    def error(self, x, g):
        x = util.segmat(x)
        g = util.segmat(g)

        # evaluate network
        y = self.eval(x)

        trim = (g.shape[1] - y.shape[1]) // 2
        gTrim = g[:, :(g.shape[1] - trim)]
        gTrim = gTrim[:, -y.shape[1]:]

        # figure mse
        return np.mean((y - gTrim)**2) + self.penaltyError()
Example #4
0
    def __init__(self, classData, autoRegClass=AutoRegression, **autoRegKwargs):
        # initialize Classifier base class
        Classifier.__init__(self, util.segmat(classData[0]).shape[2], len(classData))

        self.autoRegClass = autoRegClass

        self.train(classData, **autoRegKwargs)
Example #5
0
    def evalRecs(self, x, context=None, returnContext=False):
        x = util.segmat(x)

        x1 = util.bias(x)

        nSeg = x1.shape[0]
        nObs = x1.shape[1]
        nIn1 = x1.shape[2]

        r = np.empty((nSeg, nObs, self.nHidden), dtype=self.dtype)

        if context is None:
            context = np.zeros((nSeg, self.nHidden), dtype=self.dtype)

        x1c = np.empty((nSeg, nIn1 + self.nHidden), dtype=self.dtype)

        for t in range(nObs):
            x1c[:, :nIn1] = x1[:, t]
            x1c[:, nIn1:] = context

            r[:, t] = self.phi(x1c.dot(self.hw))
            context[...] = r[:, t]

        if returnContext:
            return r, context
        else:
            return r
Example #6
0
File: seg.py Project: idfah/cebl
 def reference(self, chans):
     chans = self.getChanIndices(chans)
     ref = self.data[:,:,chans]
     if len(chans) > 1:
         ref = ref.mean(axis=2)
     self.data -= util.segmat(ref)
     return self
Example #7
0
    def __init__(self, x, g, reservoir, transient=0, sideTrack=False,
                 verbose=False, **kwargs):
        x = util.segmat(x)
        g = util.segmat(g)
        self.dtype = np.result_type(x.dtype, g.dtype)

        nIn = x.shape[2]
        nOut = g.shape[2]
        Regression.__init__(self, nIn, nOut)

        self.reservoir = reservoir
        self.transient = transient
        self.sideTrack = sideTrack

        self.verbose = verbose

        self.train(x, g, **kwargs)
Example #8
0
File: seg.py Project: idfah/cebl
    def __init__(self, data, sampRate, chanNames=None, markers=None,
                 start=0.0, deviceName='', dtype=None, copy=False):
        """Construct a new SegmentedEEG instance for processing eeg data
        that has been split into segments of equal length.

        Args:
            data:       A 3D numpy array of floats of shape (nSeg,nObs[,nDim])
                        containing the eeg segments.  The first axis
                        corresponds to the eeg segments.  The second axis
                        corresponds to the observations (i.e., time steps).
                        The third axis is optional and corresponds to eeg
                        channels.

            sampRate:   The sampling rate (frequency) in samples-per-second
                        (Hertz) of the eeg data.  This defaults to 256Hz.

            chanNames:  A list of names of the channels in the eeg data.
                        If None (default) then the channel names are set
                        to '1', '2', ... 'nChan'.

            markers:    EEG event markers.  This is a list or tuple of floats
                        that mark each eeg segment.  There should be one marker
                        for each segment.  The interpretation of these marks is
                        up to the up to the user.  If None (default) then
                        markers are set to 1, 2, ..., nSeg.

            start:      Starting time in seconds of the segments.  Defaults
                        to 0.0.  This is useful if the data were segmented
                        using an offset from an event.  For example, if an
                        ERP is segmented starting at -0.2 seconds before
                        the stimulus onset.

            deviceName: The name of the device used to record the eeg data.

            dtype:      The data type used to store the signal.  Must be
                        a floating point type, e.g., np.float32 or np.float64.
                        If None (default) the data type is determined from
                        the data argument.

            copy:       If False (default) then data will not be copied if
                        possible.  If True, then the data definitely be 
                        copied.  Warning:  If multiple EEG instances use
                        the same un-copied data array, then modifying one
                        EEG instance may lead to undefined behavior in
                        the other instances.
        """
        # ensure we have numpy array with three axes
        # copy and cast if necessary
        self.data = util.segmat(data, dtype=dtype, copy=copy)
        self.dtype = self.data.dtype

        self.nSeg = data.shape[0]

        EEGBase.__init__(self, data.shape[1], data.shape[2],
            sampRate=sampRate, chanNames=chanNames, deviceName=deviceName)

        self.setMarkers(markers, copy=copy)
        self.setStart(start)
Example #9
0
    def addSideTrack(self, x, act):
        x = util.segmat(x)

        if self.sideTrack:
            if self.verbose:
                print('adding side track...')
            return np.concatenate((x, act), axis=2)
        else:
            return act
Example #10
0
    def train(self, x, g, readoutClass=RidgeRegression, **kwargs):
        x = util.segmat(x)
        g = util.segmat(g)

        act = self.reservoir.eval(x)
        act = self.addSideTrack(x, act)

        actf = act.reshape((-1, act.shape[-1]))

        if g.ndim == 3:
            gf = g.reshape((-1, g.shape[-1]))
        else:
            gf = g.ravel()

        if self.verbose:
            print('Training readout layer...')

        self.readout = readoutClass(actf[self.transient:], gf[self.transient:], **kwargs)
Example #11
0
    def eval(self, x, returnTransient=False):
        x = util.segmat(x)

        r = self.evalRecs(x)
        y = r.dot(self.vw[:-1]) + self.vw[-1]

        if not returnTransient:
            y = y[:, self.transient:]

        return y
Example #12
0
    def __init__(
            self,
            x,
            g,
            nHidden=10,
            transient=0,
            phi=transfer.tanh,
            #iwInitFunc=pinit.lecun, rwInitFunc=pinit.lecun,
            hwInitFunc=pinit.esp,
            vwInitFunc=pinit.lecun,
            optimFunc=optim.scg,
            **kwargs):
        x = util.segmat(x)
        g = util.segmat(g)  # flattenOut? XXX - idfah

        self.dtype = np.result_type(x.dtype, g.dtype)

        Regression.__init__(self, x.shape[2], g.shape[2])
        optim.Optable.__init__(self)

        self.nHidden = nHidden
        self.transient = transient
        self.phi = phi

        self.pw, self.hw, self.vw = \
            util.packedViews(((self.nIn+self.nHidden+1, self.nHidden),
                              (self.nHidden+1, self.nOut)),
                             dtype=self.dtype)

        self.iw = self.hw[:(self.nIn + 1)]
        self.rw = self.hw[(self.nIn + 1):]

        # initialize weights
        #self.iw[...] = iwInitFunc(self.iw.shape).astype(self.dtype, copy=False)
        #self.rw[...] = rwInitFunc(self.rw.shape).astype(self.dtype, copy=False)
        self.hw[...] = hwInitFunc(self.hw.shape).astype(self.dtype, copy=False)
        self.vw[...] = vwInitFunc(self.vw.shape).astype(self.dtype, copy=False)

        # train the network
        if optimFunc is not None:
            self.train(x, g, optimFunc, **kwargs)
Example #13
0
    def eval(self, x):
        x = util.segmat(x)

        # evaluate convolutional layers
        c = self.evalConvs(x)[-1]

        # evaluate hidden layer
        z = self.transFunc[-1](util.segdot(c, self.hw[:-1]) +
                               self.hw[-1]) if self.nHidden is not None else c

        # evaluate visible layer
        return util.segdot(z, self.vw[:-1]) + self.vw[-1]
Example #14
0
    def train(self, ss, *args, **kwargs):
        ss = util.segmat(ss)

        self.model = []
        for i in range(ss.shape[2]):
            v = ss[:, :, i]

            xs = self.getInputs(v)
            gs = self.getTargets(v)

            x = xs.reshape((xs.shape[0] * xs.shape[1], -1))
            g = gs.reshape((gs.shape[0] * gs.shape[1], -1))

            self.model.append(self.regClass(x, g, *args, **kwargs))
Example #15
0
    def evalConvs(self, x):
        x = util.segmat(x)

        c = x
        cs = []
        for l, cw in enumerate(self.cws):
            width = self.convWidths[l]
            phi = self.transFunc[l]

            c = util.timeEmbed(c, lags=width - 1, axis=1)
            c = phi(util.segdot(c, cw[:-1]) + cw[-1])

            cs.append(c)

        return cs
Example #16
0
    def eval(self, x, context=None, returncontext=False):
        x = util.segmat(x)

        cacheAct = False
        if returncontext is False and \
           context is None and \
           self.actCache.getMaxSize() > 0:
                key = util.hashArray(x)
                if key in self.actCache:
                    #print('cache hit.')
                    return self.actCache[key]
                else:
                    #print('cache miss.')
                    cacheAct = True

        nSeg = x.shape[0]
        nObs = x.shape[1]
        nIn  = x.shape[2]

        act = np.empty((nSeg, nObs, self.nRes))

        if context is None:
            context = np.zeros((nSeg, self.nRes), dtype=self.dtype)

        xt = np.empty((nSeg,nIn+self.nRes))

        hwT = self.hw[:-1].T

        for t in range(nObs):
            xt[:,:nIn] = x[:,t,:]
            xt[:,nIn:] = context

            if self.sparse:
                # need to have w first for sparse matrix
                # does not appear faster to convert xt to csr
                act[:,t,:] = self.transFunc(hwT.dot(xt.T).T + self.hw[-1])
            else:
                act[:,t,:] = self.transFunc(xt.dot(self.hw[:-1]) + self.hw[-1])

            context = act[:,t,:]

        if cacheAct:
            self.actCache[key] = act

        if returncontext:
            return act, context
        else:
            return act
Example #17
0
    def eval(self, x, returnTransient=False, **kwargs):
        x = util.segmat(x)

        act = self.reservoir.eval(x)
        act = self.addSideTrack(x, act)
        actf = act.reshape((-1, act.shape[-1]))

        yf = self.readout.eval(actf, **kwargs)
        y = yf.reshape((act.shape[0], x.shape[1], -1))

        if x.ndim == 2:
            y = y.squeeze(axis=2)

        if not returnTransient:
            y = y[:,self.transient:]

        return y
Example #18
0
File: seg.py Project: idfah/cebl
    def bipolarReference(self, pairs):
        for pair in pairs:
            if len(pair) > 2:
                raise RuntimeError('Bipolar reference assumes pairs of electrodes but got %s.' % pair)

            pair = self.getChanIndices(pair)

            ref = self.data[:,:,pair].mean(axis=2)
            self.data[:,:,pair] = util.segmat(ref)

        chanNames = []
        for pair in pairs:
            pair = self.getChanNames(pair)
            chanNames.append('-'.join(pair))

        self.deleteChans([r for l,r in pairs])
        self.setChanNames(chanNames)

        return self
Example #19
0
    def evalRecs(self, x, contexts=None, returnContexts=False):
        x = util.segmat(x)

        x1 = util.bias(x)

        nSeg = x1.shape[0]
        nObs = x1.shape[1]

        r1Prev = x1
        rs = []

        if contexts is None:
            contexts = [
                np.zeros((nSeg, self.nRecHiddens[l]), dtype=self.dtype)
                for l in range(self.nRecLayers)
            ]

        for l in range(self.nRecLayers):
            nIn1 = r1Prev.shape[2]

            r = np.empty((nSeg, nObs, self.nRecHiddens[l]), dtype=self.dtype)
            r1c = np.empty((nSeg, nIn1 + self.nRecHiddens[l]),
                           dtype=self.dtype)
            context = contexts[l]

            for t in range(nObs):
                r1c[:, :nIn1] = r1Prev[:, t]
                r1c[:, nIn1:] = context

                r[:, t] = self.phi(r1c.dot(self.hws[l]))
                context[...] = r[:, t]

            r1Prev = util.bias(r)
            rs.append(r)

        if returnContexts:
            return rs, contexts
        else:
            return rs
Example #20
0
    def __init__(self, x, nRes=1024, rwScale=0.95, rwConn=0.01,
                 iwScale=0.3, iwConn=0.2, transFunc=transfer.tanh,
                 sparse=None, actCacheSize=0, verbose=False):
        x = util.segmat(x)
        self.nIn = x.shape[2]
        self.dtype = x.dtype

        self.nRes = nRes
        self.transFunc = transFunc

        if sparse is None:
            if rwConn < 0.05:
                self.sparse = True
            else:
                self.sparse = False
        else:
            self.sparse = sparse

        self.actCache = util.Cache(actCacheSize)

        self.verbose = verbose

        # (ns, ni+nr) x (ni+nr, nr)
        #self.hw = np.empty((self.nIn+1+self.nRes,self.nRes), dtype=self.dtype)
        #iw = self.hw[:self.nIn+1,:]
        #rw = self.hw[self.nIn+1:,:]

        iw = self.initIW(iwScale, iwConn)
        rw = self.initRW(rwScale, rwConn)
        self.hw = np.vstack((iw,rw))

        if self.sparse:
            # is csc or csr faster? XXX - idfah
            self.hw = spsparse.csr_matrix(self.hw, dtype=self.dtype)

        self.scaleIW(x)
Example #21
0
    def eval(self, ss, returnResid=False, *args, **kwargs):
        ss = util.segmat(ss)

        preds = []
        gi = []
        for i in range(ss.shape[2]):
            v = ss[:, :, i]

            xs = self.getInputs(v)
            gs = self.getTargets(v)

            preds.append(self.model[i].evals(xs, *args, **kwargs).squeeze(2))

            if returnResid:
                gi.append(gs.squeeze(2))

        preds = np.rollaxis(np.array(preds), 0, 3)

        if returnResid:
            gs = np.rollaxis(np.array(gi), 0, 3)
            resids = gs - preds
            return preds, resids
        else:
            return preds
Example #22
0
 def getInputs(self, ss):
     ss = util.segmat(ss)
     return util.timeEmbed(ss, lags=self.order - 1,
                           axis=1)[:, :-self.horizon]
Example #23
0
 def getTargets(self, ss):
     ss = util.segmat(ss)
     return ss[:, (self.order + self.horizon - 1):]
Example #24
0
    def gradient(self, x, g, unrollSteps=10, returnError=True):
        x = util.segmat(x)
        g = util.segmat(g)

        if isinstance(unrollSteps, (int, )):
            unrollSteps = [
                unrollSteps,
            ] * self.nRecLayers

        views = util.packedViews(self.layerDims, dtype=self.dtype)
        pg = views[0]
        hgs = views[1:-1]
        vg = views[-1]

        x1 = util.bias(x)

        nSeg = x1.shape[0]
        nObs = x1.shape[1]

        r1Prev = x1
        r1cs = []
        rPrimes = []

        for l in range(self.nRecLayers):
            nIn1 = r1Prev.shape[2]

            r = np.empty((nSeg, nObs, self.nRecHiddens[l]), dtype=self.dtype)
            h = np.empty((nSeg, nObs, self.nRecHiddens[l]), dtype=self.dtype)
            r1c = np.empty((nSeg, nObs, nIn1 + self.nRecHiddens[l]),
                           dtype=self.dtype)
            context = np.zeros((nSeg, self.nRecHiddens[l]), dtype=self.dtype)

            for t in range(nObs):
                r1c[:, t, :nIn1] = r1Prev[:, t]
                r1c[:, t, nIn1:] = context

                h[:, t] = r1c[:, t].dot(self.hws[l])
                r[:, t] = self.phi(h[:, t])
                context[...] = r[:, t]

            r1Prev = util.bias(r)
            r1cs.append(r1c)

            rPrime = self.phi(h, 1)
            rPrimes.append(rPrime)

        # evaluate visible layer
        r1 = r1Prev
        y = r1.dot(self.vw)

        # error components, ditch transient
        e = (y - g)[:, self.transient:]
        delta = np.zeros(g.shape, dtype=self.dtype)
        delta[:, self.transient:] = 2.0 * e / e.size

        # visible layer gradient
        r1f = r1.reshape((-1, r1.shape[-1]))
        deltaf = delta.reshape((-1, delta.shape[-1]))
        vg[...] = r1f.T.dot(deltaf)

        # backward pass through each layer
        w = self.vw
        for l in range(self.nRecLayers - 1, -1, -1):
            r1c = r1cs[l]
            rwsTrans = self.rws[l].T
            rPrime = rPrimes[l]

            deltaPrev = delta.dot(w[:-1].T)

            gamma = np.zeros((nSeg, unrollSteps[l], self.nRecHiddens[l]),
                             dtype=self.dtype)
            #delta = np.zeros((nSeg, nObs-self.transient, self.nRecHiddens[l]), dtype=self.dtype)
            delta = np.zeros((nSeg, nObs, self.nRecHiddens[l]),
                             dtype=self.dtype)

            # unrolled through time
            #for t in range(nObs-self.transient-1, 0, -1):
            for t in range(nObs - 1, 0, -1):
                rPrimet = rPrime[:, t][:, None, :]

                beta = gamma[:, :-1]
                beta = beta.dot(rwsTrans)

                gamma[:, 0] = deltaPrev[:, t]
                gamma[:, 1:] = beta
                gamma *= rPrimet

                delta[:, t] = gamma.sum(axis=1)

            r1cf = r1c.reshape((-1, r1c.shape[-1]))
            deltaf = delta.reshape((-1, delta.shape[-1]))
            hgs[l][...] = r1cf.T.dot(deltaf)

            #print('hg %d: %f' % (l, np.sqrt(np.mean(hgs[l]**2))))

            w = self.iws[l]

        if returnError:
            return np.mean(e**2), pg
        else:
            return pg
Example #25
0
    def __init__(
            self,
            x,
            g,
            recs=(8, 4, 2),
            transient=0,
            phi=transfer.tanh,
            #iwInitFunc=pinit.lecun, rwInitFunc=pinit.lecun,
            hwInitFunc=pinit.esp,
            vwInitFunc=pinit.lecun,
            optimFunc=optim.scg,
            **kwargs):
        x = util.segmat(x)
        g = util.segmat(g)

        self.dtype = np.result_type(x.dtype, g.dtype)

        Regression.__init__(self, x.shape[2], g.shape[2])
        optim.Optable.__init__(self)

        self.transient = transient
        self.phi = phi

        self.nRecHiddens = list(recs)
        self.nRecLayers = len(self.nRecHiddens)

        self.layerDims = [(self.nIn + self.nRecHiddens[0] + 1,
                           self.nRecHiddens[0])]
        for l in range(1, self.nRecLayers):
            self.layerDims.append(
                (self.nRecHiddens[l - 1] + self.nRecHiddens[l] + 1,
                 self.nRecHiddens[l]))
        self.layerDims.append((self.nRecHiddens[-1] + 1, self.nOut))

        views = util.packedViews(self.layerDims, dtype=self.dtype)
        self.pw = views[0]
        self.hws = views[1:-1]
        self.vw = views[-1]

        self.iws = []
        self.rws = []
        nIn = self.nIn
        for l in range(self.nRecLayers):
            iw = self.hws[l][:(nIn + 1)]
            rw = self.hws[l][(nIn + 1):]
            self.iws.append(iw)
            self.rws.append(rw)

            #self.iws[l][...] = iwInitFunc(iw.shape).astype(self.dtype, copy=False)
            #self.rws[l][...] = rwInitFunc(rw.shape).astype(self.dtype, copy=False)

            nIn = self.nRecHiddens[l]

            self.hws[l][...] = hwInitFunc(self.hws[l].shape).astype(self.dtype,
                                                                    copy=False)

        self.vw[...] = vwInitFunc(self.vw.shape).astype(self.dtype, copy=False)

        # train the network
        if optimFunc is not None:
            self.train(x, g, optimFunc, **kwargs)
Example #26
0
    def gradient(self, x, g, unrollSteps=10, returnError=True):
        x = util.segmat(x)
        g = util.segmat(g)

        # packed views of the hidden and visible gradient matrices
        pg, hg, vg = util.packedViews((self.hw.shape, self.vw.shape),
                                      dtype=self.dtype)

        x1 = util.bias(x)

        nSeg = x1.shape[0]
        nObs = x1.shape[1]
        nIn1 = x1.shape[2]

        h = np.empty((nSeg, nObs, self.nHidden), dtype=self.dtype)
        r = np.empty((nSeg, nObs, self.nHidden), dtype=self.dtype)
        x1c = np.empty((nSeg, nObs, nIn1 + self.nHidden), dtype=self.dtype)
        context = np.zeros((nSeg, self.nHidden), dtype=self.dtype)

        for t in range(nObs):
            x1c[:, t, :nIn1] = x1[:, t]
            x1c[:, t, nIn1:] = context

            h[:, t] = x1c[:, t].dot(self.hw)
            r[:, t] = self.phi(h[:, t])
            context[...] = r[:, t]

        r1 = util.bias(r)
        y = r1.dot(self.vw)
        rPrime = self.phi(h, 1)

        # error components, ditch transient
        e = (y - g)[:, self.transient:]
        delta = np.zeros(g.shape, dtype=self.dtype)
        delta[:, self.transient:] = 2.0 * e / e.size

        # visible layer gradient
        r1f = r1.reshape((-1, r1.shape[-1]))
        deltaf = delta.reshape((-1, delta.shape[-1]))
        vg[...] = r1f.T.dot(deltaf)

        vwDelta = delta.dot(self.vw[:-1].T)

        gamma = np.zeros((nSeg, unrollSteps, self.nHidden), dtype=self.dtype)
        #delta = np.zeros((nSeg, nObs-self.transient, self.nHidden), dtype=self.dtype)
        delta = np.zeros((nSeg, nObs, self.nHidden), dtype=self.dtype)

        ##hg[...] = 0.0

        # backward pass for hidden layer, unrolled through time
        #for t in range(nObs-self.transient-1, 0, -1):
        for t in range(nObs - 1, 0, -1):
            rPrimet = rPrime[:, t][:, None, :]
            #x1ct = x1c[:,t][:,None,:]
            ##x1ct = x1c[:,t]

            beta = gamma[:, :-1]
            beta = beta.dot(self.rw.T)

            gamma[:, 0] = vwDelta[:, t]
            gamma[:, 1:] = beta
            gamma *= rPrimet

            ##x1ctf = np.tile(x1ct, unrollSteps).reshape((-1, x1ct.shape[-1]))
            ##gammaf = gamma.reshape((-1, gamma.shape[-1]))
            delta[:, t] = gamma.sum(axis=1)

            #hg += x1ctf.T.dot(gammaf)
            ##hg += x1ct.T.dot(gamma.sum(axis=1))

            ##hg += x1ct.T.dot(gamma.swapaxes(0,1)).sum(axis=1)

        x1cf = x1c.reshape((-1, x1c.shape[-1]))
        deltaf = delta.reshape((-1, delta.shape[-1]))
        #hg[...] = x1c.reshape((-1, x1c.shape[-1])).T.dot(delta.reshape((-1, d.shape[-1])))
        hg[...] = x1cf.T.dot(deltaf)

        if returnError:
            return np.mean(e**2), pg
        else:
            return pg
Example #27
0
    def __init__(self,
                 x,
                 g,
                 convs=((8, 16), (16, 8)),
                 nHidden=None,
                 transFunc=transfer.lecun,
                 weightInitFunc=pinit.lecun,
                 penalty=None,
                 elastic=1.0,
                 optimFunc=optim.scg,
                 **kwargs):
        x = util.segmat(x)
        g = util.segmat(g)
        self.dtype = np.result_type(x.dtype, g.dtype)

        Regression.__init__(self, x.shape[2], g.shape[2])
        optim.Optable.__init__(self)

        self.nConvHiddens, self.convWidths = zip(*convs)
        self.nConvLayers = len(convs)
        self.nHidden = nHidden

        self.layerDims = [
            (self.nIn * self.convWidths[0] + 1, self.nConvHiddens[0]),
        ]
        for l in range(1, self.nConvLayers):
            ni = self.nConvHiddens[l - 1] * self.convWidths[l] + 1
            no = self.nConvHiddens[l]
            self.layerDims.append((ni, no))

        if self.nHidden is None:
            self.layerDims.append((self.nConvHiddens[-1] + 1, self.nOut))
        else:
            self.layerDims.append((self.nConvHiddens[-1] + 1, self.nHidden))
            self.layerDims.append((self.nHidden + 1, self.nOut))

        self.transFunc = transFunc if util.isiterable(transFunc) \
                else (transFunc,) * (len(self.layerDims)-1)
        assert len(self.transFunc) == (len(self.layerDims) - 1)

        views = util.packedViews(self.layerDims, dtype=self.dtype)
        self.pw = views[0]

        if self.nHidden is None:
            self.cws = views[1:-1]
            self.hw = None
            self.vw = views[-1]
        else:
            self.cws = views[1:-2]
            self.hw = views[-2]
            self.vw = views[-1]

        if not util.isiterable(weightInitFunc):
            weightInitFunc = (weightInitFunc, ) * (self.nConvLayers + 2)
        assert len(weightInitFunc) == (len(self.cws) + 2)

        self.penalty = penalty
        if self.penalty is not None:
            if not util.isiterable(self.penalty):
                self.penalty = (self.penalty, ) * (self.nConvLayers + 2)
        assert (self.penalty is None) or (len(self.penalty)
                                          == (len(self.cws) + 2))

        self.elastic = elastic if util.isiterable(elastic) \
                else (elastic,) * (self.nConvLayers+2)
        assert (len(self.elastic) == (len(self.cws) + 2))

        # initialize weights
        for cw, wif in zip(self.cws, weightInitFunc):
            cw[...] = wif(cw.shape).astype(self.dtype, copy=False)

        if self.nHidden is not None:
            self.hw[...] = weightInitFunc[-2](self.hw.shape).astype(self.dtype,
                                                                    copy=False)

        self.vw[...] = weightInitFunc[-1](self.vw.shape).astype(self.dtype,
                                                                copy=False)

        # train the network
        if optimFunc is not None:
            self.train(x, g, optimFunc, **kwargs)
Example #28
0
    def gradient(self, x, g, returnError=True):
        x = util.segmat(x)
        g = util.colmat(g)

        # packed views of the hidden and visible gradient matrices
        views = util.packedViews(self.layerDims, dtype=self.dtype)
        pg = views[0]

        if self.nHidden is None:
            cgs = views[1:-1]
            hg = None
            vg = views[-1]
        else:
            cgs = views[1:-2]
            hg = views[-2]
            vg = views[-1]

        # forward pass
        c = x
        c1s = []
        cPrimes = []
        for l, cw in enumerate(self.cws):
            width = self.convWidths[l]
            phi = self.transFunc[l]

            c = util.timeEmbed(c, lags=width - 1, axis=1)

            c1 = util.bias(c)
            c1s.append(c1)

            h = util.segdot(c1, cw)
            cPrime = phi(h, 1)
            cPrimes.append(cPrime)

            c = phi(h)

        c1 = util.bias(c)

        # evaluate hidden and visible layers
        if self.nHidden is None:
            y = util.segdot(c1, self.vw)
        else:
            h = util.segdot(c1, self.hw)
            z1 = util.bias(self.transFunc[-1](h))
            zPrime = self.transFunc[-1](h, 1)
            y = util.segdot(z1, self.vw)

        # error components
        trim = (g.shape[1] - y.shape[1]) // 2
        gTrim = g[:, :(g.shape[1] - trim)]
        gTrim = gTrim[:, -y.shape[1]:]

        # error components
        e = util.colmat(y - gTrim)
        delta = 2.0 * e / e.size

        if self.nHidden is None:
            # visible layer gradient
            c1f = c1.reshape((-1, c1.shape[-1]))
            deltaf = delta.reshape((-1, delta.shape[-1]))
            vg[...] = c1f.T.dot(deltaf)
            vg += self.penaltyGradient(-1)

            delta = util.segdot(delta, self.vw[:-1].T)

        else:
            # visible layer gradient
            z1f = z1.reshape((-1, z1.shape[-1]))
            deltaf = delta.reshape((-1, delta.shape[-1]))
            vg[...] = z1f.T.dot(deltaf)
            vg += self.penaltyGradient(-1)

            # hidden layer gradient
            c1f = c1.reshape((-1, c1.shape[-1]))
            delta = util.segdot(delta, self.vw[:-1].T) * zPrime
            deltaf = delta.reshape((-1, delta.shape[-1]))
            hg[...] = c1f.T.dot(deltaf)
            hg += self.penaltyGradient(-2)

            delta = util.segdot(delta, self.hw[:-1].T)

        # backward pass for convolutional layers
        for l in range(self.nConvLayers - 1, -1, -1):
            c1 = c1s[l]
            cPrime = cPrimes[l]

            delta = delta[:, :cPrime.shape[1]] * cPrime

            c1f = c1.reshape((-1, c1.shape[-1]))
            deltaf = delta.reshape((-1, delta.shape[-1]))
            cgs[l][...] = c1f.T.dot(deltaf)
            cgs[l] += self.penaltyGradient(l)

            if l > 0:  # won't propigate back to inputs
                delta = util.segdot(delta, self.cws[l][:-1].T)
                delta = deltaDeEmbedSum(delta, self.convWidths[l])

        if returnError:
            error = np.mean(e**2) + self.penaltyError()
            return error, pg
        else:
            return pg