def train(self, x, g, optimFunc, **kwargs): x = util.segmat(x) x = np.require(x, requirements=['O', 'C']) g = util.segmat(g) g = np.require(g, requirements=['O', 'C']) self.trainResult = optimFunc(self, x=x, g=g, **kwargs)
def error(self, x, g, *args, **kwargs): x = util.segmat(x) g = util.segmat(g)[:, self.transient:] # evaluate network y = self.eval(x, returnTransient=False) # figure mse return np.mean((y - g)**2)
def error(self, x, g): x = util.segmat(x) g = util.segmat(g) # evaluate network y = self.eval(x) trim = (g.shape[1] - y.shape[1]) // 2 gTrim = g[:, :(g.shape[1] - trim)] gTrim = gTrim[:, -y.shape[1]:] # figure mse return np.mean((y - gTrim)**2) + self.penaltyError()
def __init__(self, classData, autoRegClass=AutoRegression, **autoRegKwargs): # initialize Classifier base class Classifier.__init__(self, util.segmat(classData[0]).shape[2], len(classData)) self.autoRegClass = autoRegClass self.train(classData, **autoRegKwargs)
def evalRecs(self, x, context=None, returnContext=False): x = util.segmat(x) x1 = util.bias(x) nSeg = x1.shape[0] nObs = x1.shape[1] nIn1 = x1.shape[2] r = np.empty((nSeg, nObs, self.nHidden), dtype=self.dtype) if context is None: context = np.zeros((nSeg, self.nHidden), dtype=self.dtype) x1c = np.empty((nSeg, nIn1 + self.nHidden), dtype=self.dtype) for t in range(nObs): x1c[:, :nIn1] = x1[:, t] x1c[:, nIn1:] = context r[:, t] = self.phi(x1c.dot(self.hw)) context[...] = r[:, t] if returnContext: return r, context else: return r
def reference(self, chans): chans = self.getChanIndices(chans) ref = self.data[:,:,chans] if len(chans) > 1: ref = ref.mean(axis=2) self.data -= util.segmat(ref) return self
def __init__(self, x, g, reservoir, transient=0, sideTrack=False, verbose=False, **kwargs): x = util.segmat(x) g = util.segmat(g) self.dtype = np.result_type(x.dtype, g.dtype) nIn = x.shape[2] nOut = g.shape[2] Regression.__init__(self, nIn, nOut) self.reservoir = reservoir self.transient = transient self.sideTrack = sideTrack self.verbose = verbose self.train(x, g, **kwargs)
def __init__(self, data, sampRate, chanNames=None, markers=None, start=0.0, deviceName='', dtype=None, copy=False): """Construct a new SegmentedEEG instance for processing eeg data that has been split into segments of equal length. Args: data: A 3D numpy array of floats of shape (nSeg,nObs[,nDim]) containing the eeg segments. The first axis corresponds to the eeg segments. The second axis corresponds to the observations (i.e., time steps). The third axis is optional and corresponds to eeg channels. sampRate: The sampling rate (frequency) in samples-per-second (Hertz) of the eeg data. This defaults to 256Hz. chanNames: A list of names of the channels in the eeg data. If None (default) then the channel names are set to '1', '2', ... 'nChan'. markers: EEG event markers. This is a list or tuple of floats that mark each eeg segment. There should be one marker for each segment. The interpretation of these marks is up to the up to the user. If None (default) then markers are set to 1, 2, ..., nSeg. start: Starting time in seconds of the segments. Defaults to 0.0. This is useful if the data were segmented using an offset from an event. For example, if an ERP is segmented starting at -0.2 seconds before the stimulus onset. deviceName: The name of the device used to record the eeg data. dtype: The data type used to store the signal. Must be a floating point type, e.g., np.float32 or np.float64. If None (default) the data type is determined from the data argument. copy: If False (default) then data will not be copied if possible. If True, then the data definitely be copied. Warning: If multiple EEG instances use the same un-copied data array, then modifying one EEG instance may lead to undefined behavior in the other instances. """ # ensure we have numpy array with three axes # copy and cast if necessary self.data = util.segmat(data, dtype=dtype, copy=copy) self.dtype = self.data.dtype self.nSeg = data.shape[0] EEGBase.__init__(self, data.shape[1], data.shape[2], sampRate=sampRate, chanNames=chanNames, deviceName=deviceName) self.setMarkers(markers, copy=copy) self.setStart(start)
def addSideTrack(self, x, act): x = util.segmat(x) if self.sideTrack: if self.verbose: print('adding side track...') return np.concatenate((x, act), axis=2) else: return act
def train(self, x, g, readoutClass=RidgeRegression, **kwargs): x = util.segmat(x) g = util.segmat(g) act = self.reservoir.eval(x) act = self.addSideTrack(x, act) actf = act.reshape((-1, act.shape[-1])) if g.ndim == 3: gf = g.reshape((-1, g.shape[-1])) else: gf = g.ravel() if self.verbose: print('Training readout layer...') self.readout = readoutClass(actf[self.transient:], gf[self.transient:], **kwargs)
def eval(self, x, returnTransient=False): x = util.segmat(x) r = self.evalRecs(x) y = r.dot(self.vw[:-1]) + self.vw[-1] if not returnTransient: y = y[:, self.transient:] return y
def __init__( self, x, g, nHidden=10, transient=0, phi=transfer.tanh, #iwInitFunc=pinit.lecun, rwInitFunc=pinit.lecun, hwInitFunc=pinit.esp, vwInitFunc=pinit.lecun, optimFunc=optim.scg, **kwargs): x = util.segmat(x) g = util.segmat(g) # flattenOut? XXX - idfah self.dtype = np.result_type(x.dtype, g.dtype) Regression.__init__(self, x.shape[2], g.shape[2]) optim.Optable.__init__(self) self.nHidden = nHidden self.transient = transient self.phi = phi self.pw, self.hw, self.vw = \ util.packedViews(((self.nIn+self.nHidden+1, self.nHidden), (self.nHidden+1, self.nOut)), dtype=self.dtype) self.iw = self.hw[:(self.nIn + 1)] self.rw = self.hw[(self.nIn + 1):] # initialize weights #self.iw[...] = iwInitFunc(self.iw.shape).astype(self.dtype, copy=False) #self.rw[...] = rwInitFunc(self.rw.shape).astype(self.dtype, copy=False) self.hw[...] = hwInitFunc(self.hw.shape).astype(self.dtype, copy=False) self.vw[...] = vwInitFunc(self.vw.shape).astype(self.dtype, copy=False) # train the network if optimFunc is not None: self.train(x, g, optimFunc, **kwargs)
def eval(self, x): x = util.segmat(x) # evaluate convolutional layers c = self.evalConvs(x)[-1] # evaluate hidden layer z = self.transFunc[-1](util.segdot(c, self.hw[:-1]) + self.hw[-1]) if self.nHidden is not None else c # evaluate visible layer return util.segdot(z, self.vw[:-1]) + self.vw[-1]
def train(self, ss, *args, **kwargs): ss = util.segmat(ss) self.model = [] for i in range(ss.shape[2]): v = ss[:, :, i] xs = self.getInputs(v) gs = self.getTargets(v) x = xs.reshape((xs.shape[0] * xs.shape[1], -1)) g = gs.reshape((gs.shape[0] * gs.shape[1], -1)) self.model.append(self.regClass(x, g, *args, **kwargs))
def evalConvs(self, x): x = util.segmat(x) c = x cs = [] for l, cw in enumerate(self.cws): width = self.convWidths[l] phi = self.transFunc[l] c = util.timeEmbed(c, lags=width - 1, axis=1) c = phi(util.segdot(c, cw[:-1]) + cw[-1]) cs.append(c) return cs
def eval(self, x, context=None, returncontext=False): x = util.segmat(x) cacheAct = False if returncontext is False and \ context is None and \ self.actCache.getMaxSize() > 0: key = util.hashArray(x) if key in self.actCache: #print('cache hit.') return self.actCache[key] else: #print('cache miss.') cacheAct = True nSeg = x.shape[0] nObs = x.shape[1] nIn = x.shape[2] act = np.empty((nSeg, nObs, self.nRes)) if context is None: context = np.zeros((nSeg, self.nRes), dtype=self.dtype) xt = np.empty((nSeg,nIn+self.nRes)) hwT = self.hw[:-1].T for t in range(nObs): xt[:,:nIn] = x[:,t,:] xt[:,nIn:] = context if self.sparse: # need to have w first for sparse matrix # does not appear faster to convert xt to csr act[:,t,:] = self.transFunc(hwT.dot(xt.T).T + self.hw[-1]) else: act[:,t,:] = self.transFunc(xt.dot(self.hw[:-1]) + self.hw[-1]) context = act[:,t,:] if cacheAct: self.actCache[key] = act if returncontext: return act, context else: return act
def eval(self, x, returnTransient=False, **kwargs): x = util.segmat(x) act = self.reservoir.eval(x) act = self.addSideTrack(x, act) actf = act.reshape((-1, act.shape[-1])) yf = self.readout.eval(actf, **kwargs) y = yf.reshape((act.shape[0], x.shape[1], -1)) if x.ndim == 2: y = y.squeeze(axis=2) if not returnTransient: y = y[:,self.transient:] return y
def bipolarReference(self, pairs): for pair in pairs: if len(pair) > 2: raise RuntimeError('Bipolar reference assumes pairs of electrodes but got %s.' % pair) pair = self.getChanIndices(pair) ref = self.data[:,:,pair].mean(axis=2) self.data[:,:,pair] = util.segmat(ref) chanNames = [] for pair in pairs: pair = self.getChanNames(pair) chanNames.append('-'.join(pair)) self.deleteChans([r for l,r in pairs]) self.setChanNames(chanNames) return self
def evalRecs(self, x, contexts=None, returnContexts=False): x = util.segmat(x) x1 = util.bias(x) nSeg = x1.shape[0] nObs = x1.shape[1] r1Prev = x1 rs = [] if contexts is None: contexts = [ np.zeros((nSeg, self.nRecHiddens[l]), dtype=self.dtype) for l in range(self.nRecLayers) ] for l in range(self.nRecLayers): nIn1 = r1Prev.shape[2] r = np.empty((nSeg, nObs, self.nRecHiddens[l]), dtype=self.dtype) r1c = np.empty((nSeg, nIn1 + self.nRecHiddens[l]), dtype=self.dtype) context = contexts[l] for t in range(nObs): r1c[:, :nIn1] = r1Prev[:, t] r1c[:, nIn1:] = context r[:, t] = self.phi(r1c.dot(self.hws[l])) context[...] = r[:, t] r1Prev = util.bias(r) rs.append(r) if returnContexts: return rs, contexts else: return rs
def __init__(self, x, nRes=1024, rwScale=0.95, rwConn=0.01, iwScale=0.3, iwConn=0.2, transFunc=transfer.tanh, sparse=None, actCacheSize=0, verbose=False): x = util.segmat(x) self.nIn = x.shape[2] self.dtype = x.dtype self.nRes = nRes self.transFunc = transFunc if sparse is None: if rwConn < 0.05: self.sparse = True else: self.sparse = False else: self.sparse = sparse self.actCache = util.Cache(actCacheSize) self.verbose = verbose # (ns, ni+nr) x (ni+nr, nr) #self.hw = np.empty((self.nIn+1+self.nRes,self.nRes), dtype=self.dtype) #iw = self.hw[:self.nIn+1,:] #rw = self.hw[self.nIn+1:,:] iw = self.initIW(iwScale, iwConn) rw = self.initRW(rwScale, rwConn) self.hw = np.vstack((iw,rw)) if self.sparse: # is csc or csr faster? XXX - idfah self.hw = spsparse.csr_matrix(self.hw, dtype=self.dtype) self.scaleIW(x)
def eval(self, ss, returnResid=False, *args, **kwargs): ss = util.segmat(ss) preds = [] gi = [] for i in range(ss.shape[2]): v = ss[:, :, i] xs = self.getInputs(v) gs = self.getTargets(v) preds.append(self.model[i].evals(xs, *args, **kwargs).squeeze(2)) if returnResid: gi.append(gs.squeeze(2)) preds = np.rollaxis(np.array(preds), 0, 3) if returnResid: gs = np.rollaxis(np.array(gi), 0, 3) resids = gs - preds return preds, resids else: return preds
def getInputs(self, ss): ss = util.segmat(ss) return util.timeEmbed(ss, lags=self.order - 1, axis=1)[:, :-self.horizon]
def getTargets(self, ss): ss = util.segmat(ss) return ss[:, (self.order + self.horizon - 1):]
def gradient(self, x, g, unrollSteps=10, returnError=True): x = util.segmat(x) g = util.segmat(g) if isinstance(unrollSteps, (int, )): unrollSteps = [ unrollSteps, ] * self.nRecLayers views = util.packedViews(self.layerDims, dtype=self.dtype) pg = views[0] hgs = views[1:-1] vg = views[-1] x1 = util.bias(x) nSeg = x1.shape[0] nObs = x1.shape[1] r1Prev = x1 r1cs = [] rPrimes = [] for l in range(self.nRecLayers): nIn1 = r1Prev.shape[2] r = np.empty((nSeg, nObs, self.nRecHiddens[l]), dtype=self.dtype) h = np.empty((nSeg, nObs, self.nRecHiddens[l]), dtype=self.dtype) r1c = np.empty((nSeg, nObs, nIn1 + self.nRecHiddens[l]), dtype=self.dtype) context = np.zeros((nSeg, self.nRecHiddens[l]), dtype=self.dtype) for t in range(nObs): r1c[:, t, :nIn1] = r1Prev[:, t] r1c[:, t, nIn1:] = context h[:, t] = r1c[:, t].dot(self.hws[l]) r[:, t] = self.phi(h[:, t]) context[...] = r[:, t] r1Prev = util.bias(r) r1cs.append(r1c) rPrime = self.phi(h, 1) rPrimes.append(rPrime) # evaluate visible layer r1 = r1Prev y = r1.dot(self.vw) # error components, ditch transient e = (y - g)[:, self.transient:] delta = np.zeros(g.shape, dtype=self.dtype) delta[:, self.transient:] = 2.0 * e / e.size # visible layer gradient r1f = r1.reshape((-1, r1.shape[-1])) deltaf = delta.reshape((-1, delta.shape[-1])) vg[...] = r1f.T.dot(deltaf) # backward pass through each layer w = self.vw for l in range(self.nRecLayers - 1, -1, -1): r1c = r1cs[l] rwsTrans = self.rws[l].T rPrime = rPrimes[l] deltaPrev = delta.dot(w[:-1].T) gamma = np.zeros((nSeg, unrollSteps[l], self.nRecHiddens[l]), dtype=self.dtype) #delta = np.zeros((nSeg, nObs-self.transient, self.nRecHiddens[l]), dtype=self.dtype) delta = np.zeros((nSeg, nObs, self.nRecHiddens[l]), dtype=self.dtype) # unrolled through time #for t in range(nObs-self.transient-1, 0, -1): for t in range(nObs - 1, 0, -1): rPrimet = rPrime[:, t][:, None, :] beta = gamma[:, :-1] beta = beta.dot(rwsTrans) gamma[:, 0] = deltaPrev[:, t] gamma[:, 1:] = beta gamma *= rPrimet delta[:, t] = gamma.sum(axis=1) r1cf = r1c.reshape((-1, r1c.shape[-1])) deltaf = delta.reshape((-1, delta.shape[-1])) hgs[l][...] = r1cf.T.dot(deltaf) #print('hg %d: %f' % (l, np.sqrt(np.mean(hgs[l]**2)))) w = self.iws[l] if returnError: return np.mean(e**2), pg else: return pg
def __init__( self, x, g, recs=(8, 4, 2), transient=0, phi=transfer.tanh, #iwInitFunc=pinit.lecun, rwInitFunc=pinit.lecun, hwInitFunc=pinit.esp, vwInitFunc=pinit.lecun, optimFunc=optim.scg, **kwargs): x = util.segmat(x) g = util.segmat(g) self.dtype = np.result_type(x.dtype, g.dtype) Regression.__init__(self, x.shape[2], g.shape[2]) optim.Optable.__init__(self) self.transient = transient self.phi = phi self.nRecHiddens = list(recs) self.nRecLayers = len(self.nRecHiddens) self.layerDims = [(self.nIn + self.nRecHiddens[0] + 1, self.nRecHiddens[0])] for l in range(1, self.nRecLayers): self.layerDims.append( (self.nRecHiddens[l - 1] + self.nRecHiddens[l] + 1, self.nRecHiddens[l])) self.layerDims.append((self.nRecHiddens[-1] + 1, self.nOut)) views = util.packedViews(self.layerDims, dtype=self.dtype) self.pw = views[0] self.hws = views[1:-1] self.vw = views[-1] self.iws = [] self.rws = [] nIn = self.nIn for l in range(self.nRecLayers): iw = self.hws[l][:(nIn + 1)] rw = self.hws[l][(nIn + 1):] self.iws.append(iw) self.rws.append(rw) #self.iws[l][...] = iwInitFunc(iw.shape).astype(self.dtype, copy=False) #self.rws[l][...] = rwInitFunc(rw.shape).astype(self.dtype, copy=False) nIn = self.nRecHiddens[l] self.hws[l][...] = hwInitFunc(self.hws[l].shape).astype(self.dtype, copy=False) self.vw[...] = vwInitFunc(self.vw.shape).astype(self.dtype, copy=False) # train the network if optimFunc is not None: self.train(x, g, optimFunc, **kwargs)
def gradient(self, x, g, unrollSteps=10, returnError=True): x = util.segmat(x) g = util.segmat(g) # packed views of the hidden and visible gradient matrices pg, hg, vg = util.packedViews((self.hw.shape, self.vw.shape), dtype=self.dtype) x1 = util.bias(x) nSeg = x1.shape[0] nObs = x1.shape[1] nIn1 = x1.shape[2] h = np.empty((nSeg, nObs, self.nHidden), dtype=self.dtype) r = np.empty((nSeg, nObs, self.nHidden), dtype=self.dtype) x1c = np.empty((nSeg, nObs, nIn1 + self.nHidden), dtype=self.dtype) context = np.zeros((nSeg, self.nHidden), dtype=self.dtype) for t in range(nObs): x1c[:, t, :nIn1] = x1[:, t] x1c[:, t, nIn1:] = context h[:, t] = x1c[:, t].dot(self.hw) r[:, t] = self.phi(h[:, t]) context[...] = r[:, t] r1 = util.bias(r) y = r1.dot(self.vw) rPrime = self.phi(h, 1) # error components, ditch transient e = (y - g)[:, self.transient:] delta = np.zeros(g.shape, dtype=self.dtype) delta[:, self.transient:] = 2.0 * e / e.size # visible layer gradient r1f = r1.reshape((-1, r1.shape[-1])) deltaf = delta.reshape((-1, delta.shape[-1])) vg[...] = r1f.T.dot(deltaf) vwDelta = delta.dot(self.vw[:-1].T) gamma = np.zeros((nSeg, unrollSteps, self.nHidden), dtype=self.dtype) #delta = np.zeros((nSeg, nObs-self.transient, self.nHidden), dtype=self.dtype) delta = np.zeros((nSeg, nObs, self.nHidden), dtype=self.dtype) ##hg[...] = 0.0 # backward pass for hidden layer, unrolled through time #for t in range(nObs-self.transient-1, 0, -1): for t in range(nObs - 1, 0, -1): rPrimet = rPrime[:, t][:, None, :] #x1ct = x1c[:,t][:,None,:] ##x1ct = x1c[:,t] beta = gamma[:, :-1] beta = beta.dot(self.rw.T) gamma[:, 0] = vwDelta[:, t] gamma[:, 1:] = beta gamma *= rPrimet ##x1ctf = np.tile(x1ct, unrollSteps).reshape((-1, x1ct.shape[-1])) ##gammaf = gamma.reshape((-1, gamma.shape[-1])) delta[:, t] = gamma.sum(axis=1) #hg += x1ctf.T.dot(gammaf) ##hg += x1ct.T.dot(gamma.sum(axis=1)) ##hg += x1ct.T.dot(gamma.swapaxes(0,1)).sum(axis=1) x1cf = x1c.reshape((-1, x1c.shape[-1])) deltaf = delta.reshape((-1, delta.shape[-1])) #hg[...] = x1c.reshape((-1, x1c.shape[-1])).T.dot(delta.reshape((-1, d.shape[-1]))) hg[...] = x1cf.T.dot(deltaf) if returnError: return np.mean(e**2), pg else: return pg
def __init__(self, x, g, convs=((8, 16), (16, 8)), nHidden=None, transFunc=transfer.lecun, weightInitFunc=pinit.lecun, penalty=None, elastic=1.0, optimFunc=optim.scg, **kwargs): x = util.segmat(x) g = util.segmat(g) self.dtype = np.result_type(x.dtype, g.dtype) Regression.__init__(self, x.shape[2], g.shape[2]) optim.Optable.__init__(self) self.nConvHiddens, self.convWidths = zip(*convs) self.nConvLayers = len(convs) self.nHidden = nHidden self.layerDims = [ (self.nIn * self.convWidths[0] + 1, self.nConvHiddens[0]), ] for l in range(1, self.nConvLayers): ni = self.nConvHiddens[l - 1] * self.convWidths[l] + 1 no = self.nConvHiddens[l] self.layerDims.append((ni, no)) if self.nHidden is None: self.layerDims.append((self.nConvHiddens[-1] + 1, self.nOut)) else: self.layerDims.append((self.nConvHiddens[-1] + 1, self.nHidden)) self.layerDims.append((self.nHidden + 1, self.nOut)) self.transFunc = transFunc if util.isiterable(transFunc) \ else (transFunc,) * (len(self.layerDims)-1) assert len(self.transFunc) == (len(self.layerDims) - 1) views = util.packedViews(self.layerDims, dtype=self.dtype) self.pw = views[0] if self.nHidden is None: self.cws = views[1:-1] self.hw = None self.vw = views[-1] else: self.cws = views[1:-2] self.hw = views[-2] self.vw = views[-1] if not util.isiterable(weightInitFunc): weightInitFunc = (weightInitFunc, ) * (self.nConvLayers + 2) assert len(weightInitFunc) == (len(self.cws) + 2) self.penalty = penalty if self.penalty is not None: if not util.isiterable(self.penalty): self.penalty = (self.penalty, ) * (self.nConvLayers + 2) assert (self.penalty is None) or (len(self.penalty) == (len(self.cws) + 2)) self.elastic = elastic if util.isiterable(elastic) \ else (elastic,) * (self.nConvLayers+2) assert (len(self.elastic) == (len(self.cws) + 2)) # initialize weights for cw, wif in zip(self.cws, weightInitFunc): cw[...] = wif(cw.shape).astype(self.dtype, copy=False) if self.nHidden is not None: self.hw[...] = weightInitFunc[-2](self.hw.shape).astype(self.dtype, copy=False) self.vw[...] = weightInitFunc[-1](self.vw.shape).astype(self.dtype, copy=False) # train the network if optimFunc is not None: self.train(x, g, optimFunc, **kwargs)
def gradient(self, x, g, returnError=True): x = util.segmat(x) g = util.colmat(g) # packed views of the hidden and visible gradient matrices views = util.packedViews(self.layerDims, dtype=self.dtype) pg = views[0] if self.nHidden is None: cgs = views[1:-1] hg = None vg = views[-1] else: cgs = views[1:-2] hg = views[-2] vg = views[-1] # forward pass c = x c1s = [] cPrimes = [] for l, cw in enumerate(self.cws): width = self.convWidths[l] phi = self.transFunc[l] c = util.timeEmbed(c, lags=width - 1, axis=1) c1 = util.bias(c) c1s.append(c1) h = util.segdot(c1, cw) cPrime = phi(h, 1) cPrimes.append(cPrime) c = phi(h) c1 = util.bias(c) # evaluate hidden and visible layers if self.nHidden is None: y = util.segdot(c1, self.vw) else: h = util.segdot(c1, self.hw) z1 = util.bias(self.transFunc[-1](h)) zPrime = self.transFunc[-1](h, 1) y = util.segdot(z1, self.vw) # error components trim = (g.shape[1] - y.shape[1]) // 2 gTrim = g[:, :(g.shape[1] - trim)] gTrim = gTrim[:, -y.shape[1]:] # error components e = util.colmat(y - gTrim) delta = 2.0 * e / e.size if self.nHidden is None: # visible layer gradient c1f = c1.reshape((-1, c1.shape[-1])) deltaf = delta.reshape((-1, delta.shape[-1])) vg[...] = c1f.T.dot(deltaf) vg += self.penaltyGradient(-1) delta = util.segdot(delta, self.vw[:-1].T) else: # visible layer gradient z1f = z1.reshape((-1, z1.shape[-1])) deltaf = delta.reshape((-1, delta.shape[-1])) vg[...] = z1f.T.dot(deltaf) vg += self.penaltyGradient(-1) # hidden layer gradient c1f = c1.reshape((-1, c1.shape[-1])) delta = util.segdot(delta, self.vw[:-1].T) * zPrime deltaf = delta.reshape((-1, delta.shape[-1])) hg[...] = c1f.T.dot(deltaf) hg += self.penaltyGradient(-2) delta = util.segdot(delta, self.hw[:-1].T) # backward pass for convolutional layers for l in range(self.nConvLayers - 1, -1, -1): c1 = c1s[l] cPrime = cPrimes[l] delta = delta[:, :cPrime.shape[1]] * cPrime c1f = c1.reshape((-1, c1.shape[-1])) deltaf = delta.reshape((-1, delta.shape[-1])) cgs[l][...] = c1f.T.dot(deltaf) cgs[l] += self.penaltyGradient(l) if l > 0: # won't propigate back to inputs delta = util.segdot(delta, self.cws[l][:-1].T) delta = deltaDeEmbedSum(delta, self.convWidths[l]) if returnError: error = np.mean(e**2) + self.penaltyError() return error, pg else: return pg