def basis_bsplines(n_functions=5, argvals=None, degree=3, knots=None, norm=False): """Define B-splines basis of function. Build a basis of :math:`K` functions using B-splines basis on the interval defined by ``argvals``. Parameters ---------- n_functions: int, default=5 Number of considered B-splines. argvals: numpy.ndarray, default = None The values on which evaluated the B-splines. If ``None``, the polynomials are evaluated on the interval :math:`[0, 1]`. degree: int, default=3 Degree of the B-splines. The default gives cubic splines. knots: numpy.ndarray, (n_knots,) Specify the break points defining the B-splines. If ``knots`` are provided, the provided value of ``K`` is ignored. And the number of basis functions is ``n_knots + degree - 1``. norm: boolean, default=True Should we normalize the functions? Returns ------- values: np.ndarray, shape=(n_functions, len(argvals)) An array containing the evaluation of `n_functions` functions of Wiener basis. Examples -------- >>> basis_bsplines(n_functions=5, argvals=np.arange(0, 1, 0.01)) """ if argvals is None: argvals = np.arange(0, 1, 0.01) if isinstance(argvals, list): raise ValueError('argvals has to be a numpy array!') if knots is not None: n_knots = len(knots) n_functions = n_knots + degree - 1 else: n_knots = n_functions - degree + 1 knots = np.linspace(argvals[0], argvals[-1], n_knots) values = bs(argvals, df=n_functions, knots=knots[1:-1], degree=degree, include_intercept=True) if norm: norm2 = np.sqrt(scipy.integrate.simps(values * values, argvals, axis=0)) values = values / norm2 return values.T
def spline(w, x): w = np.asarray(w) x = np.asarray(x) splines = patsy.bs( x, df=w.shape[0], lower_bound=np.min(x), upper_bound=np.max(x), include_intercept=True, ) return np.dot(splines, w)
def suggestPSTHKnots(dt, TR, N, bindat, bnsz=50, iknts=2): """ bnsz binsize used to calculate approximate PSTH """ spkts = _U.fromBinDat(bindat, SpkTs=True) h, bs = _N.histogram(spkts, bins=_N.linspace(0, N, (N / bnsz) + 1)) fs = (h / (TR * bnsz * dt)) apsth = _N.repeat(fs, bnsz) # piecewise boxy approximate PSTH apsth *= dt ITERS = 1000 x = _N.linspace(0., N - 1, N, endpoint=False) # in units of ms. r2s = _N.empty(ITERS) allKnts = _N.empty((ITERS, iknts)) allCoeffs = [] tAvg = 1. / iknts tsMin = tAvg * 0.5 tsMax = tAvg * 1.5 for it in xrange(ITERS): bGood = False while not bGood: try: pieces = tsMin + _N.random.rand(iknts + 1) * (tsMax - tsMin) knts = _N.empty(iknts + 1) knts[0] = pieces[0] for i in xrange(1, iknts + 1): knts[i] = knts[i - 1] + pieces[i] knts /= knts[-1] knts[0:-1] *= N #knts = _N.sort((0.1 + 0.85*_N.random.rand(iknts))*N) B = patsy.bs(x, knots=(knts[0:-1]), include_intercept=True) iBTB = _N.linalg.inv(_N.dot(B.T, B)) bGood = True except _N.linalg.linalg.LinAlgError, ValueError: print "Linalg Error or Value Error in suggestPSTHKnots" #a = _N.dot(iBTB, _N.dot(B.T, _N.log(apsth))) a = _N.dot(iBTB, _N.dot(B.T, apsth)) #ft = _N.exp(_N.dot(B, a)) ft = _N.dot(B, a) r2s[it] = _N.dot(ft - apsth, ft - apsth) allKnts[it, :] = knts[0:-1] allCoeffs.append(a)
def elastic_regression(f, y, time, B=None, lam=0, df=20, max_itr=20, cores=-1, smooth=False): """ This function identifies a regression model with phase-variablity using elastic methods :param f: numpy ndarray of shape (M,N) of N functions with M samples :param y: numpy array of N responses :param time: vector of size M describing the sample points :param B: optional matrix describing Basis elements :param lam: regularization parameter (default 0) :param df: number of degrees of freedom B-spline (default 20) :param max_itr: maximum number of iterations (default 20) :param cores: number of cores for parallel processing (default all) :type f: np.ndarray :type time: np.ndarray :rtype: tuple of numpy array :return alpha: alpha parameter of model :return beta: beta(t) of model :return fn: aligned functions - numpy ndarray of shape (M,N) of M functions with N samples :return qn: aligned srvfs - similar structure to fn :return gamma: calculated warping functions :return q: original training SRSFs :return B: basis matrix :return b: basis coefficients :return SSE: sum of squared error """ M = f.shape[0] N = f.shape[1] if M > 500: parallel = True elif N > 100: parallel = True else: parallel = False binsize = np.diff(time) binsize = binsize.mean() # Create B-Spline Basis if none provided if B is None: B = bs(time, df=df, degree=4, include_intercept=True) Nb = B.shape[1] # second derivative for regularization Bdiff = np.zeros((M, Nb)) for ii in range(0, Nb): Bdiff[:, ii] = np.gradient(np.gradient(B[:, ii], binsize), binsize) q = uf.f_to_srsf(f, time, smooth) gamma = np.tile(np.linspace(0, 1, M), (N, 1)) gamma = gamma.transpose() itr = 1 SSE = np.zeros(max_itr) while itr <= max_itr: print("Iteration: %d" % itr) # align data fn = np.zeros((M, N)) qn = np.zeros((M, N)) for ii in range(0, N): fn[:, ii] = np.interp((time[-1] - time[0]) * gamma[:, ii] + time[0], time, f[:, ii]) qn[:, ii] = uf.warp_q_gamma(time, q[:, ii], gamma[:, ii]) # OLS using basis Phi = np.ones((N, Nb+1)) for ii in range(0, N): for jj in range(1, Nb+1): Phi[ii, jj] = trapz(qn[:, ii] * B[:, jj-1], time) R = np.zeros((Nb+1, Nb+1)) for ii in range(1, Nb+1): for jj in range(1, Nb+1): R[ii, jj] = trapz(Bdiff[:, ii-1] * Bdiff[:, jj-1], time) xx = dot(Phi.T, Phi) inv_xx = inv(xx + lam * R) xy = dot(Phi.T, y) b = dot(inv_xx, xy) alpha = b[0] beta = B.dot(b[1:Nb+1]) beta = beta.reshape(M) # compute the SSE int_X = np.zeros(N) for ii in range(0, N): int_X[ii] = trapz(qn[:, ii] * beta, time) SSE[itr - 1] = sum((y.reshape(N) - alpha - int_X) ** 2) # find gamma gamma_new = np.zeros((M, N)) if parallel: out = Parallel(n_jobs=cores)(delayed(regression_warp)(beta, time, q[:, n], y[n], alpha) for n in range(N)) gamma_new = np.array(out) gamma_new = gamma_new.transpose() else: for ii in range(0, N): gamma_new[:, ii] = regression_warp(beta, time, q[:, ii], y[ii], alpha) if norm(gamma - gamma_new) < 1e-5: break else: gamma = gamma_new itr += 1 # Last Step with centering of gam gamI = uf.SqrtMeanInverse(gamma_new) gamI_dev = np.gradient(gamI, 1 / float(M - 1)) beta = np.interp((time[-1] - time[0]) * gamI + time[0], time, beta) * np.sqrt(gamI_dev) for ii in range(0, N): qn[:, ii] = np.interp((time[-1] - time[0]) * gamI + time[0], time, qn[:, ii]) * np.sqrt(gamI_dev) fn[:, ii] = np.interp((time[-1] - time[0]) * gamI + time[0], time, fn[:, ii]) gamma[:, ii] = np.interp((time[-1] - time[0]) * gamI + time[0], time, gamma_new[:, ii]) model = collections.namedtuple('model', ['alpha', 'beta', 'fn', 'qn', 'gamma', 'q', 'B', 'b', 'SSE', 'type']) out = model(alpha, beta, fn, qn, gamma, q, B, b[1:-1], SSE[0:itr], 'linear') return out
def initGibbs(self): ################################ INITGIBBS oo = self if oo.bpsth: oo.B = patsy.bs(_N.linspace(0, (oo.t1 - oo.t0) * oo.dt, (oo.t1 - oo.t0)), df=oo.dfPSTH, knots=oo.kntsPSTH, include_intercept=True) # spline basis if oo.dfPSTH is None: oo.dfPSTH = oo.B.shape[1] oo.B = oo.B.T # My convention for beta oo.aS = _N.linalg.solve( _N.dot(oo.B, oo.B.T), _N.dot(oo.B, _N.ones(oo.t1 - oo.t0) * _N.mean(oo.u))) # #generate initial values of parameters oo._d = _kfardat.KFARGauObsDat(oo.TR, oo.N, oo.k) oo._d.copyData(oo.y) sPR = "cmpref" if oo.use_prior == _cd.__FREQ_REF__: sPR = "frqref" elif oo.use_prior == _cd.__ONOF_REF__: sPR = "onfref" sAO = "sf" if (oo.ARord == _cd.__SF__) else "nf" ts = "[%(1)d-%(2)d]" % {"1": oo.t0, "2": oo.t1} baseFN = "rs=%(rs)d" % {"pr": sPR, "rs": oo.restarts} setdir = "%(sd)s/AR%(k)d_%(ts)s_%(pr)s_%(ao)s" % { "sd": oo.setname, "k": oo.k, "ts": ts, "pr": sPR, "ao": sAO } # baseFN_inter baseFN_comps baseFN_comps ############### oo.Bsmpx = _N.zeros((oo.TR, oo.NMC + oo.burn, (oo.N + 1) + 2)) oo.smp_u = _N.zeros((oo.TR, oo.burn + oo.NMC)) oo.smp_q2 = _N.zeros((oo.TR, oo.burn + oo.NMC)) oo.smp_x00 = _N.empty((oo.TR, oo.burn + oo.NMC - 1, oo.k)) # store samples of oo.allalfas = _N.empty((oo.burn + oo.NMC, oo.k), dtype=_N.complex) oo.uts = _N.empty((oo.TR, oo.burn + oo.NMC, oo.R, oo.N + 2)) oo.wts = _N.empty((oo.TR, oo.burn + oo.NMC, oo.C, oo.N + 3)) oo.ranks = _N.empty((oo.burn + oo.NMC, oo.C), dtype=_N.int) oo.pgs = _N.empty((oo.TR, oo.burn + oo.NMC, oo.N + 1)) oo.fs = _N.empty((oo.burn + oo.NMC, oo.C)) oo.amps = _N.empty((oo.burn + oo.NMC, oo.C)) if oo.bpsth: oo.smp_aS = _N.zeros((oo.burn + oo.NMC, oo.dfPSTH)) radians = buildLims(oo.Cn, oo.freq_lims, nzLimL=1.) oo.AR2lims = 2 * _N.cos(radians) if (oo.rs < 0): oo.smpx = _N.zeros( (oo.TR, (oo.N + 1) + 2, oo.k)) # start at 0 + u oo.ws = _N.empty((oo.TR, oo._d.N + 1), dtype=_N.float) oo.F_alfa_rep = initF(oo.R, oo.Cs, oo.Cn, ifs=oo.ifs).tolist() # init F_alfa_rep print "begin---" print ampAngRep(oo.F_alfa_rep) print "begin^^^" q20 = 1e-3 oo.q2 = _N.ones(oo.TR) * q20 oo.F0 = (-1 * _Npp.polyfromroots(oo.F_alfa_rep)[::-1].real)[1:] ######## Limit the amplitude to something reasonable xE, nul = createDataAR(oo.N, oo.F0, q20, 0.1) mlt = _N.std(xE) / 0.5 # we want amplitude around 0.5 oo.q2 /= mlt * mlt xE, nul = createDataAR(oo.N, oo.F0, oo.q2[0], 0.1) if oo.model == "Bernoulli": oo.initBernoulli() #smpx[0, 2:, 0] = x[0] ########## DEBUG #### initialize ws if starting for first time if oo.TR == 1: oo.ws = oo.ws.reshape(1, oo._d.N + 1) for m in xrange(oo._d.TR): lw.rpg_devroye(oo.rn, oo.smpx[m, 2:, 0] + oo.u[m], num=(oo.N + 1), out=oo.ws[m, :]) oo.smp_u[:, 0] = oo.u oo.smp_q2[:, 0] = oo.q2 if oo.bpsth: oo.u_a = _N.ones(oo.dfPSTH) * _N.mean(oo.u)
cnt += 1 y = y_orig M = f.shape[0] N = f.shape[1] # Code labels m = y.max() Y = np.zeros((N, m), dtype=int) for ii in range(0, N): Y[ii, y[ii]-1] = 1 binsize = np.diff(time) binsize = binsize.mean() # Create B-Spline Basis if none provided B = bs(time, df=20, degree=4, include_intercept=True) Nb = B.shape[1] Phi = np.ones((N, Nb+1)) for ii in range(0, N): for jj in range(1, Nb+1): Phi[ii, jj] = trapz(q[:, ii] * B[:, jj-1], time) # Find alpha and beta using l_bfgs b0 = np.zeros(m * (Nb+1)) out = fmin_l_bfgs_b(mlogit_loss, b0, fprime=mlogit_gradient, args=(Phi, Y), pgtol=1e-10, maxiter=200, maxfun=250, factr=1e-30) b = out[0] B0 = b.reshape(Nb+1, m) alpha = B0[0, :]
def findAndSaturateHist(cl, refrT=30, MAXcl=None): """ how high """ ITERS = 1000 dgr = 2 ktl = _N.empty(dgr + 1) cktl = _N.zeros(dgr + 2) xs = _N.linspace(0, 1, refrT) scr = _N.empty(ITERS) aS = _N.empty((ITERS, dgr + 4)) kts = _N.empty((ITERS, dgr)) lcl = _N.log(cl) for it in xrange(ITERS): bOK = False while not bOK: try: ktl = _N.random.rand(dgr + 1) for d in xrange(1, dgr + 2): cktl[d] = cktl[d - 1] + ktl[d - 1] cktl /= cktl[-1] B = patsy.bs(xs, knots=cktl[1:-1], include_intercept=True) iBvTBv = _N.linalg.inv(_N.dot(B.T, B)) a = _N.dot(iBvTBv, _N.dot(B.T, lcl)) ftd = _N.exp(_N.dot(B, a)) scr[it] = _N.sum((ftd - cl)**2) aS[it] = a kts[it] = cktl[1:-1] bOK = True except _N.linalg.linalg.LinAlgError: print "LinAlgError in findAndSaturateHist part 1" bI = _N.where(scr == _N.min(scr))[0][0] bestKts = kts[bI] bestAs = aS[bI] ###### B = patsy.bs(xs, knots=bestKts, include_intercept=True) ftdC = _N.exp(_N.dot(B, bestAs)) if MAXcl is not None: # now compress, and MAX = _N.max(ftdC[0:refrT]) maxInd = _N.where(ftdC == MAX)[0][0] ftdC[maxInd:] = _N.linspace(MAX, 1, refrT - maxInd) bg1Inds = _N.where(ftdC > 1)[0] ftdC[bg1Inds] = (((ftdC[bg1Inds] - 1) / (MAX - 1)) * (MAXcl - 1)) + 1 lt1Inds = _N.where(ftdC[refrT:] < 1)[0] ftdC[refrT + lt1Inds] = 1 lftdC = _N.log(ftdC) for it in xrange(ITERS): bOK = False while not bOK: try: ktl = _N.random.rand(dgr + 1) for d in xrange(1, dgr + 2): cktl[d] = cktl[d - 1] + ktl[d - 1] cktl /= cktl[-1] B = patsy.bs(xs, knots=cktl[1:-1], include_intercept=True) iBvTBv = _N.linalg.inv(_N.dot(B.T, B)) a = _N.dot(iBvTBv, _N.dot(B.T, lftdC)) ftd = _N.exp(_N.dot(B, a)) scr[it] = _N.sum((ftd - ftdC)**2) aS[it] = a kts[it] = cktl[1:-1] bOK = True except _N.linalg.linalg.LinAlgError: print "LinAlgError in findAndSaturateHist part 2" bI = _N.where(scr == _N.min(scr))[0][0] bestKts = kts[bI] bestAs = aS[bI] return xs, bestKts, bestAs
import numpy as np import matplotlib.pyplot as plt from scipy import stats from patsy import bs, dmatrix x = np.linspace(-0.0001, 1, 1000) knots = [0, 0.2, 0.4, 0.6, 0.8, 1] _, axes = plt.subplots(3, 1, figsize=(9, 6), sharex=True, sharey=True) degrees = [0, 1, 3] for i, ax in enumerate(axes): deg = degrees[i] b_splines = bs(x, degree=deg, knots=knots, lower_bound=-0.01, upper_bound=1.01) for b_s in b_splines.T: ax.plot(x, b_s, "C3", ls="--") ax.plot(x, b_splines[:, deg], lw=2) ax.plot(knots, np.zeros_like(knots), "ko", markersize=3) for i in range(1, deg + 1): ax.plot([0, 1], np.array([0, 0]) - (i / 15), "k.", clip_on=False) ax.plot(knots[:deg + 2], np.zeros_like(knots[:deg + 2]), "C4o", markersize=10) plt.ylim(0) plt.xticks([]) plt.yticks([])
def calc_model(self, B=None, lam=0, df=40, T=200, max_itr=20, cores=-1): """ This function identifies a regression model for open curves using elastic methods :param B: optional matrix describing Basis elements :param lam: regularization parameter (default 0) :param df: number of degrees of freedom B-spline (default 20) :param T: number of desired samples along curve (default 100) :param max_itr: maximum number of iterations (default 20) :param cores: number of cores for parallel processing (default all) """ n = self.beta.shape[0] N = self.beta.shape[2] time = np.linspace(0, 1, T) if n > 500: parallel = True elif T > 100: parallel = True else: parallel = False binsize = np.diff(time) binsize = binsize.mean() # Create B-Spline Basis if none provided if B is None: B = bs(time, df=df, degree=4, include_intercept=True) Nb = B.shape[1] # second derivative for regularization Bdiff = np.zeros((T, Nb)) for ii in range(0, Nb): Bdiff[:, ii] = np.gradient(np.gradient(B[:, ii], binsize), binsize) q, beta = preproc_open_curve(self.beta, T) self.q = q beta0 = beta.copy() qn = q.copy() gamma = np.tile(np.linspace(0, 1, T), (N, 1)) gamma = gamma.transpose() O_hat = np.tile(np.eye(n), (N, 1, 1)).T itr = 1 self.SSE = np.zeros(max_itr) while itr <= max_itr: print("Iteration: %d" % itr) # align data # OLS using basis Phi = np.ones((N, n * Nb + 1)) for ii in range(0, N): for jj in range(0, n): for kk in range(1, Nb + 1): Phi[ii, jj * Nb + kk] = trapz(qn[jj, :, ii] * B[:, kk - 1], time) R = np.zeros((n * Nb + 1, n * Nb + 1)) for kk in range(0, n): for ii in range(1, Nb + 1): for jj in range(1, Nb + 1): R[kk * Nb + ii, kk * Nb + jj] = trapz( Bdiff[:, ii - 1] * Bdiff[:, jj - 1], time) xx = np.dot(Phi.T, Phi) inv_xx = inv(xx + lam * R) xy = np.dot(Phi.T, self.y) b = np.dot(inv_xx, xy) alpha = b[0] nu = np.zeros((n, T)) for ii in range(0, n): nu[ii, :] = B.dot(b[(ii * Nb + 1):((ii + 1) * Nb + 1)]) # compute the SSE int_X = np.zeros(N) for ii in range(0, N): int_X[ii] = cf.innerprod_q2(qn[:, :, ii], nu) self.SSE[itr - 1] = sum((self.y.reshape(N) - alpha - int_X)**2) # find gamma gamma_new = np.zeros((T, N)) if parallel: out = Parallel(n_jobs=cores)( delayed(regression_warp)(nu, q[:, :, n], self.y[n], alpha) for n in range(N)) for ii in range(0, N): gamma_new[:, ii] = out[ii][0] beta1n = cf.group_action_by_gamma_coord( out[ii][1].dot(beta0[:, :, ii]), out[ii][0]) beta[:, :, ii] = beta1n O_hat[:, :, ii] = out[ii][1] qn[:, :, ii] = cf.curve_to_q(beta1n)[0] else: for ii in range(0, N): q1 = q[:, :, ii] gammatmp, Otmp = regression_warp(nu, q1, self.y[ii], alpha) gamma_new[:, ii] = gammatmp beta1n = cf.group_action_by_gamma_coord( Otmp.dot(beta0[:, :, ii]), gammatmp) beta[:, :, ii] = beta1n O_hat[:, :, ii] = Otmp qn[:, :, ii] = cf.curve_to_q(beta1n)[0] if np.abs(self.SSE[itr - 1] - self.SSE[itr - 2]) < 1e-15: break else: gamma = gamma_new itr += 1 tau = np.zeros(N) self.alpha = alpha self.nu = nu self.beta0 = beta0 self.betan = beta self.gamma = gamma self.qn = qn self.B = B self.O = O_hat self.b = b[1:-1] self.SSE = self.SSE[0:itr] return
def calc_model(self, B=None, lam=0, df=20, max_itr=20, delta=.01, cores=-1, smooth=False): """ This function identifies a regression model with phase-variability using elastic pca :param B: optional matrix describing Basis elements :param lam: regularization parameter (default 0) :param df: number of degrees of freedom B-spline (default 20) :param max_itr: maximum number of iterations (default 20) :param cores: number of cores for parallel processing (default all) """ M = self.f.shape[0] N = self.f.shape[1] m = self.y.max() if M > 500: parallel = True elif N > 100: parallel = True else: parallel = False binsize = np.diff(self.time) binsize = binsize.mean() # Create B-Spline Basis if none provided if B is None: B = bs(self.time, df=df, degree=4, include_intercept=True) Nb = B.shape[1] self.B = B self.q = uf.f_to_srsf(self.f, self.time, smooth) gamma = np.tile(np.linspace(0, 1, M), (N, 1)) gamma = gamma.transpose() itr = 1 self.LL = np.zeros(max_itr) while itr <= max_itr: print("Iteration: %d" % itr) # align data fn = np.zeros((M, N)) qn = np.zeros((M, N)) for ii in range(0, N): fn[:, ii] = np.interp( (self.time[-1] - self.time[0]) * gamma[:, ii] + self.time[0], self.time, self.f[:, ii]) qn[:, ii] = uf.warp_q_gamma(self.time, self.q[:, ii], gamma[:, ii]) Phi = np.ones((N, Nb + 1)) for ii in range(0, N): for jj in range(1, Nb + 1): Phi[ii, jj] = trapz(qn[:, ii] * B[:, jj - 1], self.time) # Find alpha and beta using l_bfgs b0 = np.zeros(m * (Nb + 1)) out = fmin_l_bfgs_b(mlogit_loss, b0, fprime=mlogit_gradient, args=(Phi, self.Y), pgtol=1e-10, maxiter=200, maxfun=250, factr=1e-30) b = out[0] B0 = b.reshape(Nb + 1, m) alpha = B0[0, :] beta = np.zeros((M, m)) for i in range(0, m): beta[:, i] = B.dot(B0[1:Nb + 1, i]) # compute the logistic loss self.LL[itr - 1] = mlogit_loss(b, Phi, self.Y) # find gamma gamma_new = np.zeros((M, N)) if parallel: out = Parallel(n_jobs=cores)( delayed(mlogit_warp_grad)(alpha, beta, self.time, self.q[:, n], self.Y[n, :], delta=delta) for n in range(N)) gamma_new = np.array(out) gamma_new = gamma_new.transpose() else: for ii in range(0, N): gamma_new[:, ii] = mlogit_warp_grad(alpha, beta, self.time, self.q[:, ii], self.Y[ii, :], delta=delta) if norm(gamma - gamma_new) < 1e-5: break else: gamma = gamma_new itr += 1 self.qn = qn self.fn = fn self.gamma = gamma self.alpha = alpha self.beta = beta self.b = b[1:-1] self.n_classes = m self.LL = self.LL[0:itr] return
plt.scatter(x, y) plt.plot(x, funeg(x)) # p = np.poly1d(np.polyfit(x, y, 4)) plt.scatter(x, y) plt.plot(x, p(x), 'k--') p = np.poly1d(np.polyfit(x, y, 12)) plt.plot(x, p(x), 'k-') # from patsy import bs kts = [0, 0.2, 0.4, 0.5, 0.6, 0.7, 0.8, 0.85, 0.9, 1] z = sm.OLS(y, bs(x, knots=kts, include_intercept=True)).fit() plt.scatter(x, y) plt.plot(x, z.fittedvalues) # ## Additive Models # from statsmodels.gam.api import GLMGam, BSplines xmat = ethanol[['C', 'E']] bs = BSplines(xmat, df=[4, 4], degree=[3, 3]) gamod = GLMGam.from_formula('NOx ~ C + E', ethanol, smoother=bs).fit() # fig = gamod.plot_partial(0, cpr=True)
def setParams(self): oo = self # #generate initial values of parameters oo._d = _kfardat.KFARGauObsDat(oo.TR, oo.N, 1) oo._d.copyData(oo.y) # baseFN_inter baseFN_comps baseFN_comps oo.smpx = _N.zeros((oo.TR, oo.N + 1)) # start at 0 + u oo.ws = _N.empty((oo.TR, oo._d.N+1), dtype=_N.float) if oo.q20 is None: oo.q20 = 0.00077 oo.q2 = _N.ones(oo.TR)*oo.q20 oo.F0 = _N.array([0.9]) ######## Limit the amplitude to something reasonable xE, nul = createDataAR(oo.N, oo.F0, oo.q20, 0.1) mlt = _N.std(xE) / 0.5 # we want amplitude around 0.5 oo.q2 /= mlt*mlt xE, nul = createDataAR(oo.N, oo.F0, oo.q2[0], 0.1) w = 5 wf = gauKer(w) gk = _N.empty((oo.TR, oo.N+1)) fgk= _N.empty((oo.TR, oo.N+1)) for m in xrange(oo.TR): gk[m] = _N.convolve(oo.y[m], wf, mode="same") gk[m] = gk[m] - _N.mean(gk[m]) gk[m] /= 5*_N.std(gk[m]) fgk[m] = bpFilt(15, 100, 1, 135, 500, gk[m]) # we want fgk[m, :] /= 2*_N.std(fgk[m, :]) if oo.noAR: oo.smpx[m, 0] = 0 else: oo.smpx[m] = fgk[m] oo.s_lrn = _N.empty((oo.TR, oo.N+1)) oo.sprb = _N.empty((oo.TR, oo.N+1)) oo.lrn_scr1 = _N.empty(oo.N+1) oo.lrn_iscr1 = _N.empty(oo.N+1) oo.lrn_scr2 = _N.empty(oo.N+1) oo.lrn_scr3 = _N.empty(oo.N+1) oo.lrn_scld = _N.empty(oo.N+1) if oo.bpsth: oo.B = patsy.bs(_N.linspace(0, (oo.t1 - oo.t0)*oo.dt, (oo.t1-oo.t0)), df=oo.dfPSTH, knots=oo.kntsPSTH, include_intercept=True) # spline basis if oo.dfPSTH is None: oo.dfPSTH = oo.B.shape[1] oo.B = oo.B.T # My convention for beta if oo.aS is None: oo.aS = _N.linalg.solve(_N.dot(oo.B, oo.B.T), _N.dot(oo.B, _N.ones(oo.t1 - oo.t0)*0.01)) # small amplitude psth at first oo.u_a = _N.zeros(oo.dfPSTH) else: oo.B = patsy.bs(_N.linspace(0, (oo.t1 - oo.t0)*oo.dt, (oo.t1-oo.t0)), df=4, include_intercept=True) # spline basis oo.B = oo.B.T # My convention for beta oo.aS = _N.zeros(4) #oo.u_a = _N.ones(oo.dfPSTH)*_N.mean(oo.us) oo.u_a = _N.zeros(oo.dfPSTH)
def suggestPSTHKnots(dt, TR, N, bindat, bnsz=10, psth_knts=10, psth_run=False): """ bnsz binsize used to calculate approximate PSTH """ rszd = False if N % bnsz != 0: rszd = True pcs = _N.ceil(N / bnsz) bnsz = int(_N.floor(N / pcs)) spkts = _U.fromBinDat(bindat, SpkTs=True) # apsth needs to be same size as N. ie N%bnsz needs to be 0 h, bs = _N.histogram(spkts, bins=_N.linspace(0, N, (N // bnsz) + 1)) fs = (h / (TR * bnsz * dt)) _apsth = _N.repeat(fs, bnsz) # piecewise boxy approximate PSTH if rszd: apsth = _N.zeros(N) apsth[0:(N // bnsz) * bnsz] = _apsth apsth[(N // bnsz) * bnsz:] = apsth[(N // bnsz) * bnsz - 1] else: apsth = _apsth apsth *= dt gk = gauKer(5) gk /= _N.sum(gk) f_apsth = _N.convolve(apsth, gk, mode="same") dpsth_pctl = _N.cumsum(_N.abs(_N.diff(f_apsth))) dpsth_pctl /= dpsth_pctl[-1] dpsth_pctl[0] = 0 ITERS = 40 x = _N.linspace(0., N - 1, N, endpoint=False) # in units of ms. r2s = _N.empty(ITERS) best_r2s = _N.zeros(5) for iknts in range(5, 6): allKnts = _N.empty((ITERS, iknts)) allCoeffs = [] tAvg = 1. / iknts tsMin = tAvg * 0.5 tsMax = tAvg * 1.5 for it in range(ITERS): knt_inds = _N.zeros(iknts + 1) bGood = False while not bGood: try: #pieces = tsMin + _N.random.rand(iknts+1)*(tsMax-tsMin) rnd_pctls = _N.sort(_N.random.rand(iknts + 1)) #pieces = tsMin + _N.random.rand(iknts+1)*(tsMax-tsMin) for i in range(iknts + 1): iHere = _N.where((rnd_pctls[i] >= dpsth_pctl[0:-1]) & (rnd_pctls[i] < dpsth_pctl[1:]))[0] knt_inds[i] = iHere[0] # knts = _N.empty(iknts+1) # knts[0] = pieces[0] # for i in range(1, iknts+1): # knts[i] = knts[i-1] + pieces[i] # knts /= knts[-1] # knts[0:-1] *= N #knts = _N.sort((0.1 + 0.85*_N.random.rand(iknts))*N) B = patsy.bs(x, knots=(knt_inds[0:-1]), include_intercept=True) iBTB = _N.linalg.inv(_N.dot(B.T, B)) bGood = True except _N.linalg.linalg.LinAlgError: print("Linalg Error or Value Error in suggestPSTHKnots") except ValueError: print("Linalg Error or Value Error in suggestPSTHKnots") #a = _N.dot(iBTB, _N.dot(B.T, _N.log(apsth))) a = _N.dot(iBTB, _N.dot(B.T, apsth)) #ft = _N.exp(_N.dot(B, a)) ft = _N.dot(B, a) r2s[it] = _N.dot(ft - apsth, ft - apsth) allKnts[it, :] = knt_inds[0:-1] allCoeffs.append(a) mnIt = _N.where(r2s == r2s.min())[0][0] best_r2s[iknts - 10] = r2s[mnIt] knts = allKnts[mnIt] cfs = allCoeffs[mnIt] B = patsy.bs(x, knots=knts, include_intercept=True) if psth_run: fig = _plt.figure() _plt.plot(_N.dot(B, cfs)) _plt.plot(apsth) return knts, apsth, cfs
def make_splines_patsy(x, num_knots, degree=3): knot_list = np.quantile(x, q=np.linspace(0, 1, num=num_knots)) #B = bs(x, knots=knot_list, degree=degree) # ncoef = knots + degree + 1 B = bs(x, df=num_knots, degree=degree) # uses quantiles return B
def elastic_mlogistic(f, y, time, B=None, df=20, max_itr=20, cores=-1, delta=.01, parallel=True, smooth=False): """ This function identifies a multinomial logistic regression model with phase-variablity using elastic methods :param f: numpy ndarray of shape (M,N) of N functions with M samples :param y: numpy array of labels {1,2,...,m} for m classes :param time: vector of size M describing the sample points :param B: optional matrix describing Basis elements :param df: number of degrees of freedom B-spline (default 20) :param max_itr: maximum number of iterations (default 20) :param cores: number of cores for parallel processing (default all) :type f: np.ndarray :type time: np.ndarray :rtype: tuple of numpy array :return alpha: alpha parameter of model :return beta: beta(t) of model :return fn: aligned functions - numpy ndarray of shape (M,N) of N functions with M samples :return qn: aligned srvfs - similar structure to fn :return gamma: calculated warping functions :return q: original training SRSFs :return B: basis matrix :return b: basis coefficients :return Loss: logistic loss """ M = f.shape[0] N = f.shape[1] # Code labels m = y.max() Y = np.zeros((N, m), dtype=int) for ii in range(0, N): Y[ii, y[ii] - 1] = 1 binsize = np.diff(time) binsize = binsize.mean() # Create B-Spline Basis if none provided if B is None: B = bs(time, df=df, degree=4, include_intercept=True) Nb = B.shape[1] q = uf.f_to_srsf(f, time, smooth) gamma = np.tile(np.linspace(0, 1, M), (N, 1)) gamma = gamma.transpose() itr = 1 LL = np.zeros(max_itr) while itr <= max_itr: print("Iteration: %d" % itr) # align data fn = np.zeros((M, N)) qn = np.zeros((M, N)) for ii in range(0, N): fn[:, ii] = np.interp( (time[-1] - time[0]) * gamma[:, ii] + time[0], time, f[:, ii]) qn[:, ii] = uf.warp_q_gamma(time, q[:, ii], gamma[:, ii]) Phi = np.ones((N, Nb + 1)) for ii in range(0, N): for jj in range(1, Nb + 1): Phi[ii, jj] = trapz(qn[:, ii] * B[:, jj - 1], time) # Find alpha and beta using l_bfgs b0 = np.zeros(m * (Nb + 1)) out = fmin_l_bfgs_b(mlogit_loss, b0, fprime=mlogit_gradient, args=(Phi, Y), pgtol=1e-10, maxiter=200, maxfun=250, factr=1e-30) b = out[0] B0 = b.reshape(Nb + 1, m) alpha = B0[0, :] beta = np.zeros((M, m)) for i in range(0, m): beta[:, i] = B.dot(B0[1:Nb + 1, i]) # compute the logistic loss LL[itr - 1] = mlogit_loss(b, Phi, Y) # find gamma gamma_new = np.zeros((M, N)) if parallel: out = Parallel(n_jobs=cores)(delayed(mlogit_warp_grad)( alpha, beta, time, q[:, n], Y[n, :], delta=delta) for n in range(N)) gamma_new = np.array(out) gamma_new = gamma_new.transpose() else: for ii in range(0, N): gamma_new[:, ii] = mlogit_warp_grad(alpha, beta, time, q[:, ii], Y[ii, :], delta=delta) if norm(gamma - gamma_new) < 1e-5: break else: gamma = gamma_new itr += 1 # Last Step with centering of gam gamma = gamma_new # gamI = uf.SqrtMeanInverse(gamma) # gamI_dev = np.gradient(gamI, 1 / float(M - 1)) # beta = np.interp((time[-1] - time[0]) * gamI + time[0], time, # beta) * np.sqrt(gamI_dev) # for ii in range(0, N): # qn[:, ii] = np.interp((time[-1] - time[0]) * gamI + time[0], # time, qn[:, ii]) * np.sqrt(gamI_dev) # fn[:, ii] = np.interp((time[-1] - time[0]) * gamI + time[0], # time, fn[:, ii]) # gamma[:, ii] = np.interp((time[-1] - time[0]) * gamI + time[0], # time, gamma[:, ii]) model = collections.namedtuple('model', [ 'alpha', 'beta', 'fn', 'qn', 'gamma', 'q', 'B', 'b', 'Loss', 'n_classes', 'type' ]) out = model(alpha, beta, fn, qn, gamma, q, B, b[1:-1], LL[0:itr], m, 'mlogistic') return out
def elastic_regression(f, y, time, B=None, lam=0, df=20, max_itr=20, cores=-1, smooth=False): """ This function identifies a regression model with phase-variablity using elastic methods :param f: numpy ndarray of shape (M,N) of N functions with M samples :param y: numpy array of N responses :param time: vector of size M describing the sample points :param B: optional matrix describing Basis elements :param lam: regularization parameter (default 0) :param df: number of degrees of freedom B-spline (default 20) :param max_itr: maximum number of iterations (default 20) :param cores: number of cores for parallel processing (default all) :type f: np.ndarray :type time: np.ndarray :rtype: tuple of numpy array :return alpha: alpha parameter of model :return beta: beta(t) of model :return fn: aligned functions - numpy ndarray of shape (M,N) of M functions with N samples :return qn: aligned srvfs - similar structure to fn :return gamma: calculated warping functions :return q: original training SRSFs :return B: basis matrix :return b: basis coefficients :return SSE: sum of squared error """ M = f.shape[0] N = f.shape[1] if M > 500: parallel = True elif N > 100: parallel = True else: parallel = False binsize = np.diff(time) binsize = binsize.mean() # Create B-Spline Basis if none provided if B is None: B = bs(time, df=df, degree=4, include_intercept=True) Nb = B.shape[1] # second derivative for regularization Bdiff = np.zeros((M, Nb)) for ii in range(0, Nb): Bdiff[:, ii] = np.gradient(np.gradient(B[:, ii], binsize), binsize) q = uf.f_to_srsf(f, time, smooth) gamma = np.tile(np.linspace(0, 1, M), (N, 1)) gamma = gamma.transpose() itr = 1 SSE = np.zeros(max_itr) while itr <= max_itr: print("Iteration: %d" % itr) # align data fn = np.zeros((M, N)) qn = np.zeros((M, N)) for ii in range(0, N): fn[:, ii] = np.interp( (time[-1] - time[0]) * gamma[:, ii] + time[0], time, f[:, ii]) qn[:, ii] = uf.warp_q_gamma(time, q[:, ii], gamma[:, ii]) # OLS using basis Phi = np.ones((N, Nb + 1)) for ii in range(0, N): for jj in range(1, Nb + 1): Phi[ii, jj] = trapz(qn[:, ii] * B[:, jj - 1], time) R = np.zeros((Nb + 1, Nb + 1)) for ii in range(1, Nb + 1): for jj in range(1, Nb + 1): R[ii, jj] = trapz(Bdiff[:, ii - 1] * Bdiff[:, jj - 1], time) xx = dot(Phi.T, Phi) inv_xx = inv(xx + lam * R) xy = dot(Phi.T, y) b = dot(inv_xx, xy) alpha = b[0] beta = B.dot(b[1:Nb + 1]) beta = beta.reshape(M) # compute the SSE int_X = np.zeros(N) for ii in range(0, N): int_X[ii] = trapz(qn[:, ii] * beta, time) SSE[itr - 1] = sum((y.reshape(N) - alpha - int_X)**2) # find gamma gamma_new = np.zeros((M, N)) if parallel: out = Parallel(n_jobs=cores)( delayed(regression_warp)(beta, time, q[:, n], y[n], alpha) for n in range(N)) gamma_new = np.array(out) gamma_new = gamma_new.transpose() else: for ii in range(0, N): gamma_new[:, ii] = regression_warp(beta, time, q[:, ii], y[ii], alpha) if norm(gamma - gamma_new) < 1e-5: break else: gamma = gamma_new itr += 1 # Last Step with centering of gam gamI = uf.SqrtMeanInverse(gamma_new) gamI_dev = np.gradient(gamI, 1 / float(M - 1)) beta = np.interp( (time[-1] - time[0]) * gamI + time[0], time, beta) * np.sqrt(gamI_dev) for ii in range(0, N): qn[:, ii] = np.interp((time[-1] - time[0]) * gamI + time[0], time, qn[:, ii]) * np.sqrt(gamI_dev) fn[:, ii] = np.interp((time[-1] - time[0]) * gamI + time[0], time, fn[:, ii]) gamma[:, ii] = np.interp((time[-1] - time[0]) * gamI + time[0], time, gamma_new[:, ii]) model = collections.namedtuple( 'model', ['alpha', 'beta', 'fn', 'qn', 'gamma', 'q', 'B', 'b', 'SSE', 'type']) out = model(alpha, beta, fn, qn, gamma, q, B, b[1:-1], SSE[0:itr], 'linear') return out
def calc_model(self, B=None, lam=0, df=20, max_itr=20, cores=-1, smooth=False): """ This function identifies a regression model with phase-variability using elastic pca :param B: optional matrix describing Basis elements :param lam: regularization parameter (default 0) :param df: number of degrees of freedom B-spline (default 20) :param max_itr: maximum number of iterations (default 20) :param cores: number of cores for parallel processing (default all) """ M = self.f.shape[0] N = self.f.shape[1] if M > 500: parallel = True elif N > 100: parallel = True else: parallel = False binsize = np.diff(self.time) binsize = binsize.mean() # Create B-Spline Basis if none provided if B is None: B = bs(self.time, df=df, degree=4, include_intercept=True) Nb = B.shape[1] self.B = B # second derivative for regularization Bdiff = np.zeros((M, Nb)) for ii in range(0, Nb): Bdiff[:, ii] = np.gradient(np.gradient(B[:, ii], binsize), binsize) self.Bdiff = Bdiff self.q = uf.f_to_srsf(self.f, self.time, smooth) gamma = np.tile(np.linspace(0, 1, M), (N, 1)) gamma = gamma.transpose() itr = 1 self.SSE = np.zeros(max_itr) while itr <= max_itr: print("Iteration: %d" % itr) # align data fn = np.zeros((M, N)) qn = np.zeros((M, N)) for ii in range(0, N): fn[:, ii] = np.interp( (self.time[-1] - self.time[0]) * gamma[:, ii] + self.time[0], self.time, self.f[:, ii]) qn[:, ii] = uf.warp_q_gamma(self.time, self.q[:, ii], gamma[:, ii]) # OLS using basis Phi = np.ones((N, Nb + 1)) for ii in range(0, N): for jj in range(1, Nb + 1): Phi[ii, jj] = trapz(qn[:, ii] * B[:, jj - 1], self.time) R = np.zeros((Nb + 1, Nb + 1)) for ii in range(1, Nb + 1): for jj in range(1, Nb + 1): R[ii, jj] = trapz(Bdiff[:, ii - 1] * Bdiff[:, jj - 1], self.time) xx = np.dot(Phi.T, Phi) inv_xx = inv(xx + lam * R) xy = np.dot(Phi.T, self.y) b = np.dot(inv_xx, xy) alpha = b[0] beta = B.dot(b[1:Nb + 1]) beta = beta.reshape(M) # compute the SSE int_X = np.zeros(N) for ii in range(0, N): int_X[ii] = trapz(qn[:, ii] * beta, self.time) self.SSE[itr - 1] = sum((self.y.reshape(N) - alpha - int_X)**2) # find gamma gamma_new = np.zeros((M, N)) if parallel: out = Parallel(n_jobs=cores)(delayed(regression_warp)( beta, self.time, self.q[:, n], self.y[n], alpha) for n in range(N)) gamma_new = np.array(out) gamma_new = gamma_new.transpose() else: for ii in range(0, N): gamma_new[:, ii] = regression_warp(beta, self.time, self.q[:, ii], self.y[ii], alpha) if norm(gamma - gamma_new) < 1e-5: break else: gamma = gamma_new itr += 1 # Last Step with centering of gam gamI = uf.SqrtMeanInverse(gamma_new) gamI_dev = np.gradient(gamI, 1 / float(M - 1)) beta = np.interp((self.time[-1] - self.time[0]) * gamI + self.time[0], self.time, beta) * np.sqrt(gamI_dev) for ii in range(0, N): qn[:, ii] = np.interp( (self.time[-1] - self.time[0]) * gamI + self.time[0], self.time, qn[:, ii]) * np.sqrt(gamI_dev) fn[:, ii] = np.interp( (self.time[-1] - self.time[0]) * gamI + self.time[0], self.time, fn[:, ii]) gamma[:, ii] = np.interp( (self.time[-1] - self.time[0]) * gamI + self.time[0], self.time, gamma_new[:, ii]) self.qn = qn self.fn = fn self.gamma = gamma self.alpha = alpha self.beta = beta self.b = b[1:-1] self.SSE = self.SSE[0:itr] return
def elastic_mlogistic(f, y, time, B=None, df=20, max_itr=20, cores=-1, delta=.01, parallel=True, smooth=False): """ This function identifies a multinomial logistic regression model with phase-variablity using elastic methods :param f: numpy ndarray of shape (M,N) of N functions with M samples :param y: numpy array of labels {1,2,...,m} for m classes :param time: vector of size M describing the sample points :param B: optional matrix describing Basis elements :param df: number of degrees of freedom B-spline (default 20) :param max_itr: maximum number of iterations (default 20) :param cores: number of cores for parallel processing (default all) :type f: np.ndarray :type time: np.ndarray :rtype: tuple of numpy array :return alpha: alpha parameter of model :return beta: beta(t) of model :return fn: aligned functions - numpy ndarray of shape (M,N) of N functions with M samples :return qn: aligned srvfs - similar structure to fn :return gamma: calculated warping functions :return q: original training SRSFs :return B: basis matrix :return b: basis coefficients :return Loss: logistic loss """ M = f.shape[0] N = f.shape[1] # Code labels m = y.max() Y = np.zeros((N, m), dtype=int) for ii in range(0, N): Y[ii, y[ii]-1] = 1 binsize = np.diff(time) binsize = binsize.mean() # Create B-Spline Basis if none provided if B is None: B = bs(time, df=df, degree=4, include_intercept=True) Nb = B.shape[1] q = uf.f_to_srsf(f, time, smooth) gamma = np.tile(np.linspace(0, 1, M), (N, 1)) gamma = gamma.transpose() itr = 1 LL = np.zeros(max_itr) while itr <= max_itr: print("Iteration: %d" % itr) # align data fn = np.zeros((M, N)) qn = np.zeros((M, N)) for ii in range(0, N): fn[:, ii] = np.interp((time[-1] - time[0]) * gamma[:, ii] + time[0], time, f[:, ii]) qn[:, ii] = uf.warp_q_gamma(time, q[:, ii], gamma[:, ii]) Phi = np.ones((N, Nb+1)) for ii in range(0, N): for jj in range(1, Nb+1): Phi[ii, jj] = trapz(qn[:, ii] * B[:, jj-1], time) # Find alpha and beta using l_bfgs b0 = np.zeros(m * (Nb+1)) out = fmin_l_bfgs_b(mlogit_loss, b0, fprime=mlogit_gradient, args=(Phi, Y), pgtol=1e-10, maxiter=200, maxfun=250, factr=1e-30) b = out[0] B0 = b.reshape(Nb+1, m) alpha = B0[0, :] beta = np.zeros((M, m)) for i in range(0, m): beta[:, i] = B.dot(B0[1:Nb+1, i]) # compute the logistic loss LL[itr - 1] = mlogit_loss(b, Phi, Y) # find gamma gamma_new = np.zeros((M, N)) if parallel: out = Parallel(n_jobs=cores)(delayed(mlogit_warp_grad)(alpha, beta, time, q[:, n], Y[n, :], delta=delta) for n in range(N)) gamma_new = np.array(out) gamma_new = gamma_new.transpose() else: for ii in range(0, N): gamma_new[:, ii] = mlogit_warp_grad(alpha, beta, time, q[:, ii], Y[ii, :], delta=delta) if norm(gamma - gamma_new) < 1e-5: break else: gamma = gamma_new itr += 1 # Last Step with centering of gam gamma = gamma_new # gamI = uf.SqrtMeanInverse(gamma) # gamI_dev = np.gradient(gamI, 1 / float(M - 1)) # beta = np.interp((time[-1] - time[0]) * gamI + time[0], time, # beta) * np.sqrt(gamI_dev) # for ii in range(0, N): # qn[:, ii] = np.interp((time[-1] - time[0]) * gamI + time[0], # time, qn[:, ii]) * np.sqrt(gamI_dev) # fn[:, ii] = np.interp((time[-1] - time[0]) * gamI + time[0], # time, fn[:, ii]) # gamma[:, ii] = np.interp((time[-1] - time[0]) * gamI + time[0], # time, gamma[:, ii]) model = collections.namedtuple('model', ['alpha', 'beta', 'fn', 'qn', 'gamma', 'q', 'B', 'b', 'Loss', 'n_classes', 'type']) out = model(alpha, beta, fn, qn, gamma, q, B, b[1:-1], LL[0:itr], m, 'mlogistic') return out
def oc_elastic_regression(beta, y, B=None, df=40, T=200, max_itr=20, cores=-1): """ This function identifies a regression model for open curves using elastic methods :param beta: numpy ndarray of shape (n, M, N) describing N curves in R^M :param y: numpy array of N responses :param B: optional matrix describing Basis elements :param df: number of degrees of freedom B-spline (default 20) :param T: number of desired samples along curve (default 100) :param max_itr: maximum number of iterations (default 20) :param cores: number of cores for parallel processing (default all) :type beta: np.ndarray :rtype: tuple of numpy array :return alpha: alpha parameter of model :return beta: beta(t) of model :return fn: aligned functions - numpy ndarray of shape (M,N) of M functions with N samples :return qn: aligned srvfs - similar structure to fn :return gamma: calculated warping functions :return q: original training SRSFs :return B: basis matrix :return b: basis coefficients :return SSE: sum of squared error """ n = beta.shape[0] N = beta.shape[2] time = np.linspace(0, 1, T) if n > 500: parallel = True elif T > 100: parallel = True else: parallel = False # Create B-Spline Basis if none provided if B is None: B = bs(time, df=df, degree=4, include_intercept=True) Nb = B.shape[1] q, beta = preproc_open_curve(beta, T) beta0 = beta.copy() qn = q.copy() gamma = np.tile(np.linspace(0, 1, T), (N, 1)) gamma = gamma.transpose() O_hat = np.tile(np.eye(n), (N, 1, 1)).T itr = 1 SSE = np.zeros(max_itr) while itr <= max_itr: print("Iteration: %d" % itr) # align data # OLS using basis Phi = np.ones((N, n * Nb + 1)) for ii in range(0, N): for jj in range(0, n): for kk in range(1, Nb + 1): Phi[ii, jj * Nb + kk] = trapz(qn[jj, :, ii] * B[:, kk - 1], time) xx = dot(Phi.T, Phi) inv_xx = inv(xx) xy = dot(Phi.T, y) b = dot(inv_xx, xy) alpha = b[0] nu = np.zeros((n, T)) for ii in range(0, n): nu[ii, :] = B.dot(b[(ii * Nb + 1):((ii + 1) * Nb + 1)]) # compute the SSE int_X = np.zeros(N) for ii in range(0, N): int_X[ii] = cf.innerprod_q2(qn[:, :, ii], nu) SSE[itr - 1] = sum((y.reshape(N) - alpha - int_X) ** 2) # find gamma gamma_new = np.zeros((T, N)) if parallel: out = Parallel(n_jobs=cores)(delayed(regression_warp)(nu, beta0[:, :, n], y[n], alpha) for n in range(N)) for ii in range(0, N): gamma_new[:, ii] = out[ii][0] beta1n = cf.group_action_by_gamma_coord(out[ii][1].dot(beta0[:, :, ii]), out[ii][0]) beta[:, :, ii] = beta1n O_hat[:, :, ii] = out[ii][1] qn[:, :, ii] = cf.curve_to_q(beta[:, :, ii]) else: for ii in range(0, N): beta1 = beta0[:, :, ii] gammatmp, Otmp, tau = regression_warp(nu, beta1, y[ii], alpha) gamma_new[:, ii] = gammatmp beta1n = cf.group_action_by_gamma_coord(Otmp.dot(beta0[:, :, ii]), gammatmp) beta[:, :, ii] = beta1n O_hat[:, :, ii] = Otmp qn[:, :, ii] = cf.curve_to_q(beta[:, :, ii]) if np.abs(SSE[itr - 1] - SSE[itr - 2]) < 1e-15: break else: gamma = gamma_new itr += 1 tau = np.zeros(N) model = collections.namedtuple('model', ['alpha', 'nu', 'betan' 'q', 'gamma', 'O', 'tau', 'B', 'b', 'SSE', 'type']) out = model(alpha, nu, beta, q, gamma, O_hat, tau, B, b[1:-1], SSE[0:itr], 'oclinear') return out
def calc_model(self, B=None, df=20, T=100, max_itr=30, cores=-1, deltaO=.003, deltag=.003): """ This function identifies a multinomial logistic regression model with phase-variability using elastic methods for open curves :param B: optional matrix describing Basis elements :param df: number of degrees of freedom B-spline (default 20) :param T: number of desired samples along curve (default 100) :param max_itr: maximum number of iterations (default 20) :param cores: number of cores for parallel processing (default all) """ n = self.beta.shape[0] N = self.beta.shape[2] time = np.linspace(0, 1, T) m = self.y.max() if n > 500: parallel = True elif T > 100: parallel = True else: parallel = True # Create B-Spline Basis if none provided if B is None: B = bs(time, df=df, degree=4, include_intercept=True) Nb = B.shape[1] q, beta = preproc_open_curve(self.beta, T) qn = q.copy() beta0 = beta.copy() gamma = np.tile(np.linspace(0, 1, T), (N, 1)) gamma = gamma.transpose() O_hat = np.tile(np.eye(n), (N, 1, 1)).T itr = 1 LL = np.zeros(max_itr + 1) while itr <= max_itr: print("Iteration: %d" % itr) Phi = np.ones((N, n * Nb + 1)) for ii in range(0, N): for jj in range(0, n): for kk in range(1, Nb + 1): Phi[ii, jj * Nb + kk] = trapz(qn[jj, :, ii] * B[:, kk - 1], time) # Find alpha and beta using l_bfgs b0 = np.zeros(m * (n * Nb + 1)) out = fmin_l_bfgs_b(mlogit_loss, b0, fprime=mlogit_gradient, args=(Phi, self.Y), pgtol=1e-10, maxiter=200, maxfun=250, factr=1e-30) b = out[0] B0 = b.reshape(n * Nb + 1, m) alpha = B0[0, :] nu = np.zeros((n, T, m)) for i in range(0, m): for j in range(0, n): nu[j, :, i] = B.dot(B0[(j * Nb + 1):((j + 1) * Nb + 1), i]) # compute the logistic loss LL[itr] = mlogit_loss(b, Phi, self.Y) # find gamma gamma_new = np.zeros((T, N)) if parallel: out = Parallel(n_jobs=cores)( delayed(mlogit_warp_grad)(alpha, nu, q[:, :, n], self.Y[n, :], deltaO=deltaO, deltag=deltag) for n in range(N)) for ii in range(0, N): gamma_new[:, ii] = out[ii][0] beta1n = cf.group_action_by_gamma_coord( out[ii][1].dot(beta0[:, :, ii]), out[ii][0]) beta[:, :, ii] = beta1n O_hat[:, :, ii] = out[ii][1] qn[:, :, ii] = cf.curve_to_q(beta[:, :, ii])[0] else: for ii in range(0, N): gammatmp, Otmp = mlogit_warp_grad(alpha, nu, q[:, :, ii], self.Y[ii, :], deltaO=deltaO, deltag=deltag) gamma_new[:, ii] = gammatmp beta1n = cf.group_action_by_gamma_coord( Otmp.dot(beta0[:, :, ii]), gammatmp) beta[:, :, ii] = beta1n O_hat[:, :, ii] = Otmp qn[:, :, ii] = cf.curve_to_q(beta[:, :, ii])[0] if norm(gamma - gamma_new) < 1e-5: break else: gamma = gamma_new.copy() itr += 1 self.alpha = alpha self.nu = nu self.beta0 = beta0 self.betan = beta self.q = q self.qn = qn self.gamma = gamma_new self.O = O_hat self.B = B self.b = b[1:-1] self.Loss = LL[1:itr] self.n_classes = m return
def setParams(self, psth_run=False, psth_knts=10): oo = self # #generate initial values of parameters #oo._d = _kfardat.KFARGauObsDat(oo.TR, oo.N, oo.k) #oo._d.copyData(oo.y) oo.Ns = _N.ones(oo.TR, dtype=_N.int) * oo.N oo.ks = _N.ones(oo.TR, dtype=_N.int) * oo.k oo.F = _N.zeros((oo.k, oo.k)) _N.fill_diagonal(oo.F[1:, 0:oo.k - 1], 1) oo.F[0] = _N.random.randn(oo.k) / _N.arange(1, oo.k + 1)**2 oo.F[0, 0] = 0.8 oo.Fs = _N.zeros((oo.TR, oo.k, oo.k)) for tr in range(oo.TR): oo.Fs[tr] = oo.F oo.Ik = _N.identity(oo.k) oo.IkN = _N.tile(oo.Ik, (oo.N + 1, 1, 1)) # need TR # pr_x[:, 0] empty, not used #oo.p_x = _N.empty((oo.TR, oo.N+1, oo.k, 1)) oo.p_x = _N.empty((oo.TR, oo.N + 1, oo.k)) oo.p_x[:, 0, 0] = 0 oo.p_V = _N.empty((oo.TR, oo.N + 1, oo.k, oo.k)) oo.p_Vi = _N.empty((oo.TR, oo.N + 1, oo.k, oo.k)) #oo.f_x = _N.empty((oo.TR, oo.N+1, oo.k, 1)) oo.f_x = _N.empty((oo.TR, oo.N + 1, oo.k)) oo.f_V = _N.empty((oo.TR, oo.N + 1, oo.k, oo.k)) #oo.s_x = _N.empty((oo.TR, oo.N+1, oo.k, 1)) oo.s_x = _N.empty((oo.TR, oo.N + 1, oo.k)) oo.s_V = _N.empty((oo.TR, oo.N + 1, oo.k, oo.k)) _N.fill_diagonal(oo.F[1:, 0:oo.k - 1], 1) oo.G = _N.zeros((oo.k, 1)) oo.G[0, 0] = 1 oo.Q = _N.empty(oo.TR) # baseFN_inter baseFN_comps baseFN_comps print("freq_lims") print(oo.freq_lims) radians = buildLims(0, oo.freq_lims, nzLimL=1., Fs=(1 / oo.dt)) oo.AR2lims = 2 * _N.cos(radians) oo.smpx = _N.zeros((oo.TR, (oo.N + 1) + 2, oo.k)) # start at 0 + u oo.ws = _N.empty((oo.TR, oo.N + 1), dtype=_N.float) ############# ADDED THIS FOR DEBUG #oo.F_alfa_rep = _N.array([-0.4 +0.j, 0.96999828+0.00182841j, 0.96999828-0.00182841j, 0.51000064+0.02405102j, 0.51000064-0.02405102j, 0.64524011+0.04059507j, 0.64524011-0.04059507j]).tolist() if oo.F_alfa_rep is None: oo.F_alfa_rep = initF(oo.R, oo.Cs + oo.Cn, 0).tolist() # init F_alfa_rep print("F_alfa_rep*********************") print(oo.F_alfa_rep) #print(ampAngRep(oo.F_alfa_rep)) if oo.q20 is None: oo.q20 = 0.00077 oo.q2 = _N.ones(oo.TR) * oo.q20 oo.F0 = (-1 * _Npp.polyfromroots(oo.F_alfa_rep)[::-1].real)[1:] oo.Fs = _N.zeros((oo.TR, oo.k, oo.k)) oo.F[0] = oo.F0 _N.fill_diagonal(oo.F[1:, 0:oo.k - 1], 1) for tr in range(oo.TR): oo.Fs[tr] = oo.F ######## Limit the amplitude to something reasonable xE, nul = createDataAR(oo.N, oo.F0, oo.q20, 0.1) mlt = _N.std(xE) / 0.5 # we want amplitude around 0.5 oo.q2 /= mlt * mlt xE, nul = createDataAR(oo.N, oo.F0, oo.q2[0], 0.1) w = 5 wf = gauKer(w) gk = _N.empty((oo.TR, oo.N + 1)) fgk = _N.empty((oo.TR, oo.N + 1)) for m in range(oo.TR): gk[m] = _N.convolve(oo.y[m], wf, mode="same") gk[m] = gk[m] - _N.mean(gk[m]) gk[m] /= 5 * _N.std(gk[m]) fgk[m] = bpFilt(15, 100, 1, 135, 500, gk[m]) # we want fgk[m, :] /= 3 * _N.std(fgk[m, :]) if oo.noAR: oo.smpx[m, 2:, 0] = 0 else: oo.smpx[m, 2:, 0] = fgk[m, :] for n in range(2 + oo.k - 1, oo.N + 1 + 2): # CREATE square smpx oo.smpx[m, n, 1:] = oo.smpx[m, n - oo.k + 1:n, 0][::-1] for n in range(2 + oo.k - 2, -1, -1): # CREATE square smpx oo.smpx[m, n, 0:oo.k - 1] = oo.smpx[m, n + 1, 1:oo.k] oo.smpx[m, n, oo.k - 1] = _N.dot(oo.F0, oo.smpx[m, n:n + oo.k, oo.k - 2]) # no noise if oo.bpsth: psthKnts, apsth, aWeights = _spknts.suggestPSTHKnots( oo.dt, oo.TR, oo.N + 1, oo.y.T, psth_knts=psth_knts, psth_run=psth_run) _N.savetxt("apsth.txt", apsth, fmt="%.4f") _N.savetxt("psthKnts.txt", psthKnts, fmt="%.4f") apprx_ps = _N.array(_N.abs(aWeights)) oo.u_a = -_N.log(1 / apprx_ps - 1) # For oo.u_a, use the values we get from aWeights oo.B = patsy.bs(_N.linspace(0, (oo.t1 - oo.t0) * oo.dt, (oo.t1 - oo.t0)), knots=(psthKnts * oo.dt), include_intercept=True) # spline basis oo.B = oo.B.T # My convention for beta oo.aS = _N.array(oo.u_a) # fig = _plt.figure(figsize=(4, 7)) # fig.add_subplot(2, 1, 1) # _plt.plot(apsth) # fig.add_subplot(2, 1, 2) # _plt.plot(_N.dot(oo.B.T, aWeights)) else: oo.B = patsy.bs(_N.linspace(0, (oo.t1 - oo.t0) * oo.dt, (oo.t1 - oo.t0)), df=4, include_intercept=True) # spline basis oo.B = oo.B.T # My convention for beta oo.aS = _N.zeros(4)
bGood = True except _N.linalg.linalg.LinAlgError, ValueError: print "Linalg Error or Value Error in suggestPSTHKnots" #a = _N.dot(iBTB, _N.dot(B.T, _N.log(apsth))) a = _N.dot(iBTB, _N.dot(B.T, apsth)) #ft = _N.exp(_N.dot(B, a)) ft = _N.dot(B, a) r2s[it] = _N.dot(ft - apsth, ft - apsth) allKnts[it, :] = knts[0:-1] allCoeffs.append(a) mnIt = _N.where(r2s == r2s.min())[0][0] knts = allKnts[mnIt] cfs = allCoeffs[mnIt] B = patsy.bs(x, knots=knts, include_intercept=True) #fig = _plt.figure() #_plt.plot(_N.dot(B, cfs)) #_plt.plot(apsth) return knts, apsth, cfs def display(N, dt, tscl, nhaz, apsth, lambda2, psth, histknts,
def calc_model(self, link='linear', B=None, lam=0, df=20, max_itr=20, smooth_data=False, sparam=25, parallel=False): """ This function identifies a regression model with phase-variability using elastic pca :param link: string of link function ('linear', 'quadratic', 'cubic') :param B: optional matrix describing Basis elements :param lam: regularization parameter (default 0) :param df: number of degrees of freedom B-spline (default 20) :param max_itr: maximum number of iterations (default 20) :param smooth_data: smooth data using box filter (default = F) :param sparam: number of times to apply box filter (default = 25) :param parallel: run in parallel (default = F) """ if smooth_data: self.f = fs.smooth_data(self.f, sparam) print("Link: %s" % link) print("Lambda: %5.1f" % lam) self.lam = lam self.link = link # Create B-Spline Basis if none provided if B is None: B = bs(self.time, df=df, degree=4, include_intercept=True) Nb = B.shape[1] self.B = B n = self.f.shape[1] print("Initializing") b0 = rand(Nb + 1) out = minimize(MyLogLikelihoodFn, b0, args=(self.y, self.B, self.time, self.f, parallel), method="SLSQP") a = out.x if self.link == 'linear': h1, c_hat, cost = Amplitude_Index(self.f, self.time, self.B, self.y, max_itr, a, 1, parallel) yhat1 = c_hat[0] + MapC_to_y(n, c_hat[1:], self.B, self.time, self.f, parallel) yhat = np.polyval(h1, yhat1) elif self.link == 'quadratic': h1, c_hat, cost = Amplitude_Index(self.f, self.time, self.B, self.y, max_itr, a, 2, parallel) yhat1 = c_hat[0] + MapC_to_y(n, c_hat[1:], self.B, self.time, self.f, parallel) yhat = np.polyval(h1, yhat1) elif self.link == 'cubic': h1, c_hat, cost = Amplitude_Index(self.f, self.time, self.B, self.y, max_itr, a, 3, parallel) yhat1 = c_hat[0] + MapC_to_y(n, c_hat[1:], self.B, self.time, self.f, parallel) yhat = np.polyval(h1, yhat1) else: raise Exception('Invalid Link') tmp = (self.y - yhat)**2 self.SSE = tmp.sum() self.h = h1 self.alpha = c_hat[0] self.b = c_hat[1:] return
def oc_elastic_logistic(beta, y, B=None, df=60, T=100, max_itr=40, cores=-1, deltaO=.1, deltag=.05, method=1): """ This function identifies a logistic regression model with phase-variablity using elastic methods for open curves :param beta: numpy ndarray of shape (n, M, N) describing N curves in R^M :param y: numpy array of N responses :param B: optional matrix describing Basis elements :param df: number of degrees of freedom B-spline (default 20) :param T: number of desired samples along curve (default 100) :param max_itr: maximum number of iterations (default 20) :param cores: number of cores for parallel processing (default all) :type beta: np.ndarray :rtype: tuple of numpy array :return alpha: alpha parameter of model :return nu: nu(t) of model :return betan: aligned curves - numpy ndarray of shape (n,T,N) :return O: calulated rotation matrices :return gamma: calculated warping functions :return B: basis matrix :return b: basis coefficients :return Loss: logistic loss """ n = beta.shape[0] N = beta.shape[2] time = np.linspace(0, 1, T) if n > 500: parallel = True elif T > 100: parallel = True else: parallel = True # Create B-Spline Basis if none provided if B is None: B = bs(time, df=df, degree=4, include_intercept=True) Nb = B.shape[1] q, beta = preproc_open_curve(beta, T) beta0 = beta.copy() qn = q.copy() gamma = np.tile(np.linspace(0, 1, T), (N, 1)) gamma = gamma.transpose() O_hat = np.tile(np.eye(n), (N, 1, 1)).T itr = 1 LL = np.zeros(max_itr + 1) while itr <= max_itr: print("Iteration: %d" % itr) Phi = np.ones((N, n * Nb + 1)) for ii in range(0, N): for jj in range(0, n): for kk in range(1, Nb + 1): Phi[ii, jj * Nb + kk] = trapz(qn[jj, :, ii] * B[:, kk - 1], time) # Find alpha and beta using l_bfgs b0 = np.zeros(n * Nb + 1) out = fmin_l_bfgs_b(logit_loss, b0, fprime=logit_gradient, args=(Phi, y), pgtol=1e-10, maxiter=200, maxfun=250, factr=1e-30) b = out[0] b = b/norm(b) # alpha_norm = b1[0] alpha = b[0] nu = np.zeros((n, T)) for ii in range(0, n): nu[ii, :] = B.dot(b[(ii * Nb + 1):((ii + 1) * Nb + 1)]) # compute the logistic loss LL[itr] = logit_loss(b, Phi, y) # find gamma gamma_new = np.zeros((T, N)) if parallel: out = Parallel(n_jobs=cores)(delayed(logistic_warp)(alpha, nu, q[:, :, ii], y[ii], deltaO=deltaO, deltag=deltag, method=method) for ii in range(N)) for ii in range(0, N): gamma_new[:, ii] = out[ii][0] beta1n = cf.group_action_by_gamma_coord(out[ii][1].dot(beta0[:, :, ii]), out[ii][0]) beta[:, :, ii] = beta1n O_hat[:, :, ii] = out[ii][1] if np.isinf(beta1n).any() or np.isnan(beta1n).any(): Tracer()() qn[:, :, ii] = cf.curve_to_q(beta[:, :, ii]) else: for ii in range(0, N): q1 = q[:, :, ii] gammatmp, Otmp, tautmp = logistic_warp(alpha, nu, q1, y[ii],deltaO=deltaO, deltag=deltag, method=method) gamma_new[:, ii] = gammatmp beta1n = cf.group_action_by_gamma_coord(Otmp.dot(beta0[:, :, ii]), gammatmp) beta[:, :, ii] = beta1n O_hat[:, :, ii] = Otmp qn[:, :, ii] = cf.curve_to_q(beta[:, :, ii]) if norm(gamma - gamma_new) < 1e-5: break else: gamma = gamma_new.copy() itr += 1 tau = np.zeros(N) model = collections.namedtuple('model', ['alpha', 'nu', 'betan', 'q', 'gamma', 'O', 'tau', 'B', 'b', 'Loss', 'type']) out = model(alpha, nu, beta, q, gamma_new, O_hat, tau, B, b[1:-1], LL[1:itr], 'oclogistic') return out
def display(N, dt, tscl, nhaz, apsth, lambda2, psth, histknts, psthknts, dir=None): """ N length of trial, also time in ms tscl nhaz normalized hazzard function. calculated under assumption of stationarity of psth apsth approximate stepwise psth lambda2 ground truth lambda2 term psth ground truth lambda1 term """ global v, c x = _N.linspace(0., N - 1, N, endpoint=False) # in units of ms. theknts = [histknts, psthknts] for f in xrange(1, 3): knts = theknts[f - 1] if f == 1: fig, ax = _plt.subplots(figsize=(6, 4)) B = patsy.bs(x[0:len(nhaz)], knots=knts, include_intercept=True) Bc = B[:, v:] Bv = B[:, 0:v] ac = _N.zeros(c) iBvTBv = _N.linalg.inv(_N.dot(Bv.T, Bv)) av = _N.dot(iBvTBv, _N.dot(Bv.T, _N.log(nhaz) - _N.dot(Bc, ac))) a = _N.array(av.tolist() + ac.tolist()) _plt.plot(x[0:len(nhaz)], nhaz, color="grey", lw=2) # empirical ymax = -1 if lambda2 is not None: _plt.plot(lambda2, color="red", lw=2) # ground truth ymax = max(lambda2) _plt.ylim(0, max(ymax, max(nhaz[0:tscl])) * 1.1) _plt.xlim(0, 3 * tscl) splFt = _N.exp(_N.dot(B, a)) _plt.plot(splFt) ax.spines["top"].set_visible(False) ax.spines["right"].set_visible(False) ax.xaxis.set_ticks_position("bottom") ax.yaxis.set_ticks_position("left") _plt.savefig(setFN("hist.eps", dir=dir)) _plt.xlim(0, tscl) #_plt.grid() _plt.savefig(setFN("histZ.eps", dir=dir)) _plt.close() else: fig = _plt.figure() B = patsy.bs(x, knots=knts, include_intercept=True) iBTB = _N.linalg.inv(_N.dot(B.T, B)) a = _N.dot(iBTB, _N.dot(B.T, _N.log(apsth))) _plt.plot(x, apsth, color="grey", lw=2) # empirical if psth is not None: fHz = ((_N.exp(psth) * dt) / (1 + dt * _N.exp(psth))) / dt _plt.plot(fHz, color="red", lw=2) # ground truth splFt = _N.exp(_N.dot(B, a)) _plt.plot(splFt) _plt.savefig(setFN("psth.eps", dir=dir)) _plt.close()
def oc_elastic_mlogistic(beta, y, B=None, df=20, T=100, max_itr=30, cores=-1, deltaO=.003, deltag=.003): """ This function identifies a multinomial logistic regression model with phase-variability using elastic methods for open curves :param beta: numpy ndarray of shape (n, M, N) describing N curves in R^M :param y: numpy array of labels {1,2,...,m} for m classes :param B: optional matrix describing Basis elements :param df: number of degrees of freedom B-spline (default 20) :param T: number of desired samples along curve (default 100) :param max_itr: maximum number of iterations (default 20) :param cores: number of cores for parallel processing (default all) :type beta: np.ndarray :rtype: tuple of numpy array :return alpha: alpha parameter of model :return nu: nu(t) of model :return betan: aligned curves - numpy ndarray of shape (n,T,N) :return O: calculated rotation matrices :return gamma: calculated warping functions :return B: basis matrix :return b: basis coefficients :return Loss: logistic loss """ n = beta.shape[0] N = beta.shape[2] time = np.linspace(0, 1, T) if n > 500: parallel = True elif T > 100: parallel = True else: parallel = True # Code labels m = y.max() Y = np.zeros((N, m), dtype=int) for ii in range(0, N): Y[ii, y[ii] - 1] = 1 # Create B-Spline Basis if none provided if B is None: B = bs(time, df=df, degree=4, include_intercept=True) Nb = B.shape[1] q, beta = preproc_open_curve(beta, T) qn = q.copy() beta0 = beta.copy() gamma = np.tile(np.linspace(0, 1, T), (N, 1)) gamma = gamma.transpose() O_hat = np.tile(np.eye(n), (N, 1, 1)).T itr = 1 LL = np.zeros(max_itr+1) while itr <= max_itr: print("Iteration: %d" % itr) Phi = np.ones((N, n * Nb + 1)) for ii in range(0, N): for jj in range(0, n): for kk in range(1, Nb + 1): Phi[ii, jj * Nb + kk] = trapz(qn[jj, :, ii] * B[:, kk - 1], time) # Find alpha and beta using l_bfgs b0 = np.zeros(m * (n * Nb + 1)) out = fmin_l_bfgs_b(mlogit_loss, b0, fprime=mlogit_gradient, args=(Phi, Y), pgtol=1e-10, maxiter=200, maxfun=250, factr=1e-30) b = out[0] B0 = b.reshape(n * Nb + 1, m) alpha = B0[0, :] nu = np.zeros((n, T, m)) for i in range(0, m): for j in range(0, n): nu[j, :, i] = B.dot(B0[(j * Nb + 1):((j + 1) * Nb + 1), i]) # compute the logistic loss LL[itr] = mlogit_loss(b, Phi, Y) # find gamma gamma_new = np.zeros((T, N)) if parallel: out = Parallel(n_jobs=cores)(delayed(mlogit_warp_grad)(alpha, nu, q[:, :, n], Y[n, :], deltaO=deltaO, deltag=deltag) for n in range(N)) for ii in range(0, N): gamma_new[:, ii] = out[ii][0] beta1n = cf.group_action_by_gamma_coord(out[ii][1].dot(beta0[:, :, ii]), out[ii][0]) beta[:, :, ii] = beta1n O_hat[:, :, ii] = out[ii][1] qn[:, :, ii] = cf.curve_to_q(beta[:, :, ii]) else: for ii in range(0, N): gammatmp, Otmp = mlogit_warp_grad(alpha, nu, q[:, :, ii], Y[ii, :], deltaO=deltaO, deltag=deltag) gamma_new[:, ii] = gammatmp beta1n = cf.group_action_by_gamma_coord(Otmp.dot(beta0[:, :, ii]), gammatmp) beta[:, :, ii] = beta1n O_hat[:, :, ii] = Otmp qn[:, :, ii] = cf.curve_to_q(beta[:, :, ii]) if norm(gamma - gamma_new) < 1e-5: break else: gamma = gamma_new.copy() itr += 1 model = collections.namedtuple('model', ['alpha', 'nu', 'betan', 'q', 'gamma', 'O', 'B', 'b', 'Loss', 'n_classes', 'type']) out = model(alpha, nu, beta, q, gamma_new, O_hat, B, b[1:-1], LL[1:itr], m, 'ocmlogistic') return out
def loadDat(self, runDir, datfilename, trials, multiply_shape_hyperparam=1, multiply_scale_hyperparam=1, hist_timescale_ms=70, n_interior_knots=8): ################# loadDat oo = self hist_timescale = hist_timescale_ms * 0.001 bGetFP = False x_st_cnts = _N.loadtxt(datfilename) #y_ch = 2 # spike channel y_ch = 0 # spike channel #p = _re.compile("^\d{6}") # starts like "exptDate-....." #m = p.match(oo.setname) #dch = 4 # # of data columns per trial dch = 1 bRealDat, dch = False, 1 TR = x_st_cnts.shape[1] // dch # number of trials will get filtered print("TR %d" % TR) print(trials) # If I only want to use a small portion of the data oo.N = x_st_cnts.shape[0] - 1 if oo.t1 == None: oo.t1 = oo.N + 1 # meaning of N changes here N = oo.t1 - 1 - oo.t0 #x = x_st_cnts[oo.t0:oo.t1, ::dch].T y = x_st_cnts[oo.t0:oo.t1, y_ch::dch].T # if bRealDat: # fx = x_st_cnts[oo.t0:oo.t1, flt_ch::dch].T # px = x_st_cnts[oo.t0:oo.t1, ph_ch::dch].T #### Now keep only trials that have spikes kpTrl = range(TR) if trials is None: trials = range(oo.TR) oo.useTrials = [] for utrl in trials: try: ki = kpTrl.index(utrl) if _N.sum(y[utrl, :]) > 1: # must see at least 2 spikes oo.useTrials.append(ki) except ValueError: print("a trial requested to use will be removed %d" % utrl) ###### oo.y are for trials that have at least 1 spike #y = _N.array(y[oo.useTrials], dtype=_N.int) y = _N.array(y, dtype=_N.int) if oo.downsamp: evry, dsdat = downsamplespkdat(y, 0.005, max_evry=3) else: evry = 1 dsdat = y print("NO downsamp") oo.evry = evry oo.dt *= oo.evry oo.fSigMax = 0.5 / oo.dt print("fSigMax %.3f" % oo.fSigMax) oo.freq_lims = [[0.000001, oo.fSigMax]] * oo.C print(oo.freq_lims) print("oo.dt %.3f" % oo.dt) print("!!!!!!!!!!!!!!!!!!!!!! evry %d" % evry) print(oo.useTrials) print(dsdat.shape) oo.y = _N.array(dsdat[oo.useTrials], dtype=_N.int) prb_spk_in_bin = _N.sum(oo.y) / (oo.y.shape[0] * oo.y.shape[1]) oo.u_u = -_N.log(1 / prb_spk_in_bin - 1) print(oo.u_u) num_dat_pts = oo.y.shape[0] * oo.y.shape[1] if (oo.a_q2 is None) or (oo.B_q2 is None): # we set a prior here #oo.a_q2 = num_dat_pts // 10 oo.a_q2 = (num_dat_pts // 10) * multiply_shape_hyperparam #md = B / (a+1) B = md oo.B_q2 = (1e-4 * (oo.a_q2 + 1) * evry) * multiply_scale_hyperparam print("setting prior for innovation %(a)d %(B).3e" % { "a": oo.a_q2, "B": oo.B_q2 }) #oo.x = _N.array(x[oo.useTrials]) # if bRealDat: # oo.fx = _N.array(fx[oo.useTrials]) # oo.px = _N.array(px[oo.useTrials]) # remove trials where data has no information rmTrl = [] oo.kp = oo.y - 0.5 oo.rn = 1 oo.TR = len(oo.useTrials) oo.N = N oo.t1 = oo.t0 + dsdat.shape[1] oo.N = oo.t1 - 1 - oo.t0 #oo.Bsmpx = _N.zeros((iters//oo.BsmpxSkp, oo.TR, (oo.N+1) + 2)) oo.smpx = _N.zeros((oo.TR, (oo.N + 1) + 2, oo.k)) # start at 0 + u oo.ws = _N.empty((oo.TR, oo.N + 1), dtype=_N.float) oo.lrn = _N.empty((oo.TR, oo.N + 1)) if oo.us is None: oo.us = _N.zeros(oo.TR) tot_isi = 0 nisi = 0 isis = ISIs(oo.y) # cnts will always be 0 in frist bin sisis = _N.sort(isis) Lisi = len(sisis) ### look at the isi distribution # cnts will always be 0 in frist bin maxisi = max(isis) minisi = min(isis) # >= 1 print("*****************") print(maxisi) print(oo.N) print("*****************") cnts, bins = _N.histogram( isis, bins=_N.linspace(0.5, maxisi + 0.5, maxisi + 1)) # cnts[0] are number of ISIs of size 1 smallisi = int(sisis[int(Lisi * 0.1)]) # hist_timescale in ms asymptote = smallisi + int(hist_timescale / oo.dt) # 100 ms hist_interior_knots = _N.empty(n_interior_knots) lin01 = _N.linspace(0, 1, n_interior_knots, endpoint=True) sqr01 = lin01**2 hist_interior_knots[0:8] = smallisi + sqr01 * (asymptote - smallisi) crats = _N.zeros(maxisi - 1) for n in range(0, maxisi - 2): crats[n + 1] = crats[n] + cnts[n] crats /= crats[-1] #### generate spike before time=0. PSTH estimation if oo.t0_is_t_since_1st_spk is None: oo.t0_is_t_since_1st_spk = _N.empty(oo.TR, dtype=_N.int) rands = _N.random.rand(oo.TR) for tr in range(oo.TR): spkts = _N.where(oo.y[tr] == 1)[0] if len(spkts) > 0: t0 = spkts[0] t0 = t0 if t0 < len(crats) else len(crats) - 1 r0 = crats[t0] # say 0.3 adjRnd = (1 - r0) * rands[tr] isi = _N.where( crats >= adjRnd)[0][0] # isi in units of bin sz oo.t0_is_t_since_1st_spk[tr] = isi else: print("using saved t0_is_t_since_1st_spk") oo.loghist = loadL2(runDir, fn=oo.histFN) oo.dohist = True if oo.loghist is None else False oo.knownSig = loadKnown(runDir, trials=oo.useTrials, fn=oo.knownSigFN) if oo.knownSig is None: oo.knownSig = _N.zeros((oo.TR, oo.N + 1)) else: oo.knownSig *= oo.xknownSig ### override knot locations upto = oo.N + 1 if int(maxisi * 1.3) > oo.N + 1 else int(maxisi * 1.3) print("upto %d" % upto) print("oo.N %d" % oo.N) print("maxisi %d" % maxisi) oo.Hbf = patsy.bs(_N.linspace(0, upto, upto + 1, endpoint=False), knots=hist_interior_knots, include_intercept=True) # spline basisp max_locs = _N.empty(oo.Hbf.shape[1]) for i in range(oo.Hbf.shape[1]): max_locs[i] = _N.where(oo.Hbf[:, i] == _N.max(oo.Hbf[:, i]))[0] print(max_locs) # find the knot that's closest to hist_interior_knots[4] (90th %tile) dist_from_90th = _N.abs(max_locs - asymptote) #print(dist_from_90th) oo.iHistKnotBeginFixed = _N.where( dist_from_90th == _N.min(dist_from_90th))[0][0] oo.histknots = oo.Hbf.shape[1]