def gradLinearSP( Y, # Observed response S, # Stimulus P # Parameters ): # Extract parameters a1, v1, a2, v2, d = P # Model nonlinearities f1, f2 = logistic, softPlus # Derivative of model nonlinearities and cost function df1, df2, dfe = dlog, dSP, dllike ndim = v1.ndim x1 = a1 + tdot(v1, S, 2 * (list(range(ndim)), )) r1 = f1(x1) dr1 = df1(x1) x2 = a2 + (r1 * v2).sum() r2 = f2(x2) dr2 = df2(x2) dy = d * dfe(Y, d * r2) dd = dy * r2 / d da2 = dy * dr2 dv2 = dy * dr2 * r1 da1 = dy * dr2 * (dr1 * v2).sum() dv1 = dy * dr2 * tdot(dr1 * S, v2, (list(range(-ndim, 0)), list(range(ndim)))) return Params([da1, dv1, da2, dv2, dd])
def gradLog2( Y, # Observed response S, # Stimulus P # Parameters ): # Extract parameters a1, v1, J1, a2, v2, d = P # Model nonlinearities f1, f2 = logistic, logistic # Derivative of model nonlinearities and cost function df1, df2, dfe = dlog, dlog, dllike ndim = v1.ndim x1 = a1 + tdot(v1, S, 2 * (list(range(ndim)), )) + (tdot(J1, S, 2 * (list(range(ndim)), )) * S).sum(tuple(range(ndim))) r1 = f1(x1) dr1 = df1(x1) x2 = a2 + (r1 * v2).sum() r2 = f2(x2) dr2 = df2(x2) dy = d * dfe(Y, d * r2) dd = dy * r2 / d da2 = dy * dr2 dv2 = dy * dr2 * r1 da1 = dy * dr2 * (dr1 * v2).sum() dv1 = dy * dr2 * tdot(dr1 * S, v2, (list(range(-ndim, 0)), list(range(ndim)))) dJ1 = dy * dr2 * tdot( dr1 * S * S.reshape(S.shape[:ndim] + ndim * (1, ) + S.shape[ndim:]), v2, (list(range(-ndim, 0)), list(range(ndim)))) return Params([da1, dv1, dJ1, da2, dv2, dd])
def respLog2(S, P): # Inputs: # S - A stimulus # P - Model parameters # Output: # r - Response of model for given stimulus # Extract parameters a1, v1, J, a2, v2, d = P # Model nonlinearities f1, f2 = logistic, logistic ndim = v1.ndim # Calculate first layer responses r1 = f1(a1 + tdot(v1, S, 2 * (range(ndim), )) + (tdot(J, S, 2 * (range(ndim), )) * S).sum(tuple(range(ndim)))) # Calculate second layer responses r2 = f2(a2 + (r1 * v2).sum()) return d * r2
def respLinearSP(S, P): # Inputs: # S - A stimulus # P - Parameters # Output: # r - Response of model for give stimulus # Extract parameters a1, v1, a2, v2, d = P # Model nonlinearities f1, f2 = logistic, softPlus ndim = v1.ndim # Calculate first layer responses r1 = f1(a1 + tdot(v1, S, 2 * (list(range(ndim)), ))) # Calculate second layer responses r2 = f2(a2 + (r1 * v2).sum()) return d * r2
def reshape_coordinates(coordinates): """ reshape coordinates to align with coefficient array """ return tdot(coordinates, tdot(coordinates, coordinates, axes=0), axes=0)
def correlation_fct(component): X = map(lambda b: peps.make_double_layer(b, o=sigma[component]), a) mx = m[component] tmp = tdot(env.c4[lut[0,-1,-1]], env.t4[lut[0,-1,0]], [1,0]) tmp = tdot(tmp, env.c1[lut[0,-1,1]], [2,0]) tmp = tdot(tmp, env.t3[lut[0,0,-1]], [0,2]) tmp2 = tdot(tmp, X[0], [[3,0],[2,3]]) uX = tdot(tmp2, env.t1[lut[0,0,1]], [[0,2],[0,1]]).flatten() tmp2 = tdot(tmp, A[0], [[3,0],[2,3]]) uA = tdot(tmp2, env.t1[lut[0,0,1]], [[0,2],[0,1]]).flatten() vX = [None]*n vA = [None]*n for j in xrange(n): tmp = tdot(env.c3[lut[j,1,-1]], env.t2[lut[j,1,0]], [0,2]) tmp = tdot(tmp, env.c2[lut[j,1,1]], [1,1]) tmp = tdot(tmp, env.t3[lut[j,0,-1]], [0,0]) tmp2 = tdot(tmp, X[j], [[0,2],[1,2]]) vX[j] = tdot(tmp2, env.t1[lut[j,0,1]], [[0,2],[2,1]]).flatten() tmp2 = tdot(tmp, A[j], [[0,2],[1,2]]) vA[j] = tdot(tmp2, env.t1[lut[j,0,1]], [[0,2],[2,1]]).flatten() M = [None]*n for j in xrange(n): tmp = tdot(env.t3[lut[j,0,-1]], A[j], [1,2]) tmp = tdot(tmp, env.t1[lut[j,0,1]], [2,1]) M[j] = tmp.transpose([1,3,4,0,2,5]).reshape(len(vX[lut[j,-1,0]]), len(vX[lut[j,1,0]])) res = np.empty(Lmax+1) j = lut[0,1,0] for L in xrange(Lmax+1): print L res[L] = np.dot(uX, vX[j]) / np.dot(uA, vA[j]) - mx[0]*mx[j] if L < Lmax: uX = np.dot(uX, M[j]) uA = np.dot(uA, M[j]) j = nns[0][j] return res
def gradDescent(prefix, spikes, stim, jack, fsize, extrapSteps=10, pixelNorm=True, filepath=None, model='softplus', maxIts=None, maxHours=None, perm=True, overwrite=False, Njack=4, start='rand_rand', nlags=1, splits=None, LRType='DecayRate', LRParams={}): assert isinstance(prefix, StringType) print 'Prefix ' + prefix Njack = IntCheck(Njack) jack = IntCheck(jack) assert jack > 0 and jack <= Njack print 'Jack ', jack, 'out of ', Njack FSIZE = stim.shape[:-1] + (nlags, ) print 'Full frame size ', FSIZE assert isinstance(fsize, tuple) if len(fsize) < len(FSIZE): fsize = fsize + (len(FSIZE) - len(fsize)) * (1, ) print 'Patch frame size ', fsize gsize = tuple([F - f + 1 for F, f in zip(FSIZE, fsize)]) NGRID = prod(gsize) ng = len(gsize) print 'Grid size ', gsize assert model in [ 'softplus', 'linearSoftplus', 'logistic', 'linearLogistic' ] print 'Model ', model if model == 'softplus': resp = respSP grad = gradSP cost = llike AlgTag = '_QuadraticSoftPlus' elif model == 'linearSoftplus': resp = respLinearSP grad = gradLinearSP cost = llike AlgTag = '_LinearSoftPlus' elif model == 'logistic': resp = respLog2 grad = gradLog2 cost = llike AlgTag = '_QuadraticLogistic' elif model == 'linearLogistic': resp = respLinearLog2 grad = gradLinearLog2 cost = llike AlgTag = '_LinearLogistic' extrapSteps = IntCheck(extrapSteps) print 'Steps used to estimate error slipe ', extrapSteps if pixelNorm: print 'Normalizing by pixel statistics' else: print 'Normalizing by global statistics' if filepath in [None, '', './']: filepath = '' print 'Saving output in current directory' else: assert isinstance(filepath, StringType) filepath = expanduser(filepath) assert isdir(filepath) print 'Saving output files to ', filepath assert isinstance(start, list) or isinstance(start, tuple) or isinstance( start, str) or isinstance(start, Params) if isinstance(start, str): vstart, bstart = start.split('_') assert bstart in ['rand', 'sta', 'uniform'] assert vstart in ['rand', 'stim', 'sta'] if bstart == 'rand': print 'Initializing second layer randomly' elif bstart == 'unifrom': print 'Initializing second layer uniformly' elif bstart == 'sta': print 'Initializing second layer using STA' if vstart == 'rand': print 'Initializing first layer randomly' elif vstart == 'stim': print 'Initializing first layer using random stimuli' elif vstart == 'sta': print 'Initializing first layer using STA' else: print 'Starting parameters given' if maxIts is None: maxIts = inf print 'No limit on iterations' else: maxIts = IntCheck(maxIts) print 'Max iterations ', maxIts genesis = time() if maxHours is None: print 'No limit on runtime' eschaton = inf else: assert isinstance(maxHours, NumType) eschaton = genesis + maxHours * 3600 print 'Max hours ', maxHours if isinstance(perm, ndarray): print 'Permuting data by given array before division' else: if perm: print 'Randomly divided data sets' else: print 'Contiguous data sets' # Get stimulus shape and size Ntrials = stim.shape[-1] - nlags + 1 # Convert spikes from int Y = spikes.astype(float) del spikes # Drop spikes before first full stimulus Y = Y[nlags - 1:] # Check that stimulus and responses have the same size assert Y.size == Ntrials # Size of first layer input npix = prod(fsize) # Convert stimulus to zero mean and unit stdev stim = normStim(stim, pixelNorm)[0] Nvalid = Ntrials / Njack Ntrials -= Nvalid # Randomly permute stimulus and spikes if isinstance(perm, ndarray): assert perm.size == Ntrials + Nvalid p = perm[nlags - 1:] elif perm: RS = RandomState(0) p = RS.permutation(Ntrials + Nvalid - nlags + 1) # Split data into training and test sets validslice = slice((jack - 1) * Nvalid, jack * Nvalid) pv = p[validslice] pr = delete(p, validslice) # Remove samples that span recordings from training and validation sets if splits is not None: invalid = array([arange(sp - nlags + 1, sp) for sp in splits]).flatten() pv = array([pp for pp in pv if pp not in invalid]) pr = array([pp for pp in pr if pp not in invalid]) # Extract stimulus at grid locations S = gridStim(stim, fsize, nlags) # Divide responses into training and validation sets YR = Y[pr] YV = Y[pv] spikesmean = YR.mean() Nspikes = YR.sum() # Calcualte error of mean model errTrain0 = cost(YR, spikesmean) errValid0 = cost(YV, spikesmean) stdout.write('Training: {0} frames, {1} spikes\n'.format(Ntrials, Nspikes)) stdout.write('Validation: {0} frames, {1} spikes\n'.format( Nvalid, YV.sum())) stdout.flush() # Create filenames trainBestName = filepath + prefix + AlgTag + '_train_%u.dat' % (jack, ) validBestName = filepath + prefix + AlgTag + '_valid_%u.dat' % (jack, ) statusName = filepath + prefix + AlgTag + '_%u.temp' % (jack, ) errTrainName = filepath + prefix + AlgTag + '_errTrain_%u.dat' % (jack, ) errValidName = filepath + prefix + AlgTag + '_errValid_%u.dat' % (jack, ) # Calculate shapes of parameters if model in ['softplus', 'logistic']: shapes = [(1, ), fsize, 2 * fsize, (1, ), gsize, (1, )] else: shapes = [(1, ), fsize, (1, ), gsize, (1, )] # Check to see if previous run exists if exists(statusName): stdout.write('Loading previous run\n') stdout.flush() with open(statusName, 'r') as f: its = fromfile(f, count=1, dtype=int) errValidMin = fromfile(f, count=1) P = Params(trainBestName, shapes) PV = Params(validBestName, shapes) if its > maxIts: maxIts += its with open(errValidName, 'r') as f: errValidHist = list(fromfile(f)) if len(errValidHist) > extrapSteps: errValidHist = errValidHist[-extrapSteps:] with open(errTrainName, 'r') as f: errTrain = fromfile(f)[-1] else: if exists(trainBestName) and not overwrite: print 'Output files exist' return else: if isinstance(start, Params): # If start is a Params object with the right number of parameters, copy it if len(start) == len(shapes): P = start.copy() # If start is a linear model, create J from random stimulus combinations else: # Linear models have one less parameter assert len(start) == len(shapes) - 1 # Create J from randomly weighted stimulus patches RS = RandomState() J = zeros(2 * start[1].shape) for j in pr: r = RS.randn(NGRID).reshape(gsize) J += tdot(S[j, ...], S[j, ...] * r, (range(-ng, 0), range(-ng, 0))) # Initialize J so that it starts small relative to the linear term J *= 0.00001 * norm(start[1]) / norm(J) # Insert J into P P = start.copy().getParams() P.insert(2, J) P = Params(P) # If start is a list/tuple, reshape values and convert to Params elif isinstance(start, list) or isinstance(start, tuple): assert len(start) == len(shapes) for s, p in zip(shapes, start): p.shape = s P = Params(start, shapes) else: # Initialize first layer randomly if vstart == 'rand': RS = RandomState() v = RS.randn(npix).reshape(fsize) v /= norm(v) if model in ['softplus', 'logistic']: J = RS.randn(npix, npix) J = J + J.T J /= norm(J) J.shape = 2 * fsize # Initialize first layer with random stimuli from training set elif vstart == 'stim': RS = RandomState() v = zeros(fsize) for j in pr: r = RS.randn(NGRID).reshape(gsize) v += tdot(S[j, ...], r, (range(-ng, 0), range(ng))) v /= norm(v) if model in ['softplus', 'logistic']: J = zeros(2 * fsize) for j in pr: r = RS.randn(NGRID).reshape(gsize) J += tdot(S[j, ...], S[j, ...] * r, (range(-ng, 0), range(-ng, 0))) J /= norm(J) # Initialize first layer using STA/STC elif vstart == 'sta': ES = zeros(fsize) ESY = zeros(fsize) if model in ['softplus', 'logistic']: ESS = zeros(fsize * 2) ESSY = zeros(fsize * 2) for pp in pr: SS = S[pp, ...].sum(-1).sum(-1).sum(-1).sum(-1) ES += SS ESY += SS * Y[pp] if model in ['softplus', 'logistic']: SSS = SS * SS.reshape(SS.shape + 4 * (1, )) ESS += SSS ESSY += SSS * Y[pp] ES /= pr.size ESY /= YR.sum() v = ESY - ES v /= norm(v) if model in ['softplus', 'logistic']: ESS /= pr.size ESSY /= YR.sum() J = (ESSY - ESY * ESY.reshape(ESY.shape + 4 * (1, )) ) - (ESS - ES * ES.reshape(ES.shape + 4 * (1, ))) J /= norm(J) else: raise Exception('Unsupported initialization') # Scale v and J. v *= 0.1 if model in ['softplus', 'logistic']: J *= 0.1 # Initialize second layer randomly if bstart == 'rand': RS = RandomState() v2 = RS.randn(*gsize) v2 /= norm(v2) v2 *= 0.1 # Initialize second layer uniformly elif bstart == 'uniform': v2 = ones(gsize) v2 /= norm(v2) v2 *= 0.1 # Intialize second layer using STA elif bstart == 'sta': ES = zeros(gsize) ESY = zeros(gsize) for pp in pr: xv = tdot(S[pp, ...], v, 2 * (range(4), )) xJ = (tdot(J, S[pp, ...], 2 * (range(4), )) * S[pp, ...]).sum(0).sum(0).sum(0).sum(0) r1 = logistic(xv + xJ) ES += r1 ESY += r1 * Y[pp] ES /= pr.size ESY /= YR.sum() v2 = ESY - ES v2 /= norm(v2) v2 *= 0.1 # Combine intialized parameters into a Params object if model in ['softplus', 'logistic']: P = Params([zeros(1), v, J, zeros(1), v2, ones(1)]) else: P = Params([zeros(1), v, zeros(1), v2, ones(1)]) # Set d to match mean firing rate on training set R = array([resp(S[j, ...], P) for j in pr]) rmean = R.mean() P[-1][:] = spikesmean / rmean P = Params(P) # Calculate initial error R = Resp(S, P, resp) errTrain = cost(YR, R[pr]) / errTrain0 errValid = cost(YV, R[pv]) / errValid0 # Save initial errors with open(errTrainName, 'w') as f: errTrain.tofile(f) with open(errValidName, 'w') as f: errValid.tofile(f) errValidHist = [errValid] # Save initial values as best so far errValidMin = errValid.copy() PV = P.copy() # Save initial parameters to parameter files P.tofile(trainBestName) PV.tofile(validBestName) # Keep track of the number of iterations its = 0 stdout.write('Beginning optimization\n') stdout.flush() if model in ['softplus', 'logistic']: Pname = ['a1', 'v1', 'J1', 'a2', 'v2', 'd'] else: Pname = ['a1', 'v1', 'a2', 'v2', 'd'] # Start slope as negative slope = -1. # Print status print '%u Values:' % (its, ), for nam, p in zip(Pname, P): if p.size == 1: print ' %s %.3e' % (nam, p), else: print ' %s %.3e' % (nam, norm(p)), print '' errTrainLast = errTrain.copy() PLast = P.copy() # Select and initialize learning rate rule if LRType == 'DecayRate': LR = DecayRate(errTrain, its, **LRParams) elif LRType == 'BoldDriver': LR = BoldDriver(errTrain, **LRParams) else: LR = LearningRate(errTrain, **LRParams) # Run until slope of validation error becomes positive, time runs out, # maximum iterations is reached, or learning rate falls to eps while ((slope < 0) or (its < extrapSteps)) and (time() < eschaton) and ( its < maxIts) and (LR.lrate > eps): # For each training example, calculate gradient and update parameters for j in pr: y = Y[j] s = S[j, ...] P += grad(y, s, P) * LR.lrate # Increment to next iteration its += 1 # Calculate current training error and update learning rule R = Resp(S, P, resp) errTrain = cost(YR, R[pr]) / errTrain0 LR.update(errTrain) # If training error decreases if errTrain < errTrainLast: # Save new copies of last error and parameters errTrainLast = errTrain.copy() PLast = P.copy() # Calculate validation error errValid = cost(YV, R[pv]) / errValid0 errValidHist.append(errValid) # Calculate slope of the validation error if len(errValidHist) > extrapSteps: errValidHist = errValidHist[-extrapSteps:] x = ones((2, len(errValidHist))) x[1, :] = arange(len(errValidHist)) slope = dot(inv(dot(x, x.T)), dot(x, array(errValidHist)))[1] # Save current parameters P.tofile(trainBestName) # Append errors to history files with open(errTrainName, 'a') as f: errTrain.tofile(f) with open(errValidName, 'a') as f: errValid.tofile(f) # If validation error has reached new minimum if errValid < errValidMin: # Update best value errValidMin = errValid # Copy parameters and save to parameter file PV = P.copy() PV.tofile(validBestName) # Output note of improvement errDown = errValidMin - errValid print '%u: New validation minimum %.5g, down %.3g' % ( its, errValidMin, errDown) # Save current status with open(statusName, 'w') as f: array(its).tofile(f) errValidMin.tofile(f) # Print status print '%u Values:' % (its, ), for nam, p in zip(Pname, P): if p.size == 1: print ' %s %.3e' % (nam, p), else: print ' %s %.3e' % (nam, norm(p)), print '' print 'Slope %.3e' % (slope, ) else: print 'Training error increased: learning rate too high' print 'New learning rate %.3e' % LR.lrate its -= 1 P = PLast.copy() # If converged, delete status file if time() < eschaton and its < maxIts: remove(statusName) # Note that program has terminated successfully stdout.write('Time elapsed {0:.3f} hours\n'.format( (time() - genesis) / 3600.)) stdout.write('Finished\n') stdout.flush()