def f_updatef2_pw(f2_curr, q2_curr, y2, q1, v_coef_curr, v_basis, SSE_curr, K_f2, K_f2prop, sigma_curr, sigma2_curr): time = np.linspace(0, 1, y2.shape[0]) v = uf.f_basistofunction(v_basis["x"], 0, v_coef_curr, v_basis) f2_prop = multivariate_normal(f2_curr, K_f2prop) q2_prop = uf.f_to_srsf(f2_prop, time) SSE_prop = f_SSEv_pw(v, q1, q2_prop) postlog_curr = f_f2postlogl_pw(f2_curr, y2, SSE_curr, K_f2, sigma_curr, sigma2_curr) postlog_prop = f_f2postlogl_pw(f2_prop, y2, SSE_prop, K_f2, sigma_curr, sigma2_curr) ratio = np.minimum(1, np.exp(postlog_prop - postlog_curr)) u = rand() if (u <= ratio): f2_curr = f2_prop q2_curr = q2_prop f2_accept = True else: f2_accept = False return f2_curr, q2_curr, f2_accept
def f_updatef1_pw(f1_curr, q1_curr, y1, q2, v_coef_curr, v_basis, SSE_curr, K_f1, K_f1prop, sigma_curr, sigma1_curr): time = np.linspace(0, 1, y1.shape[0]) v = uf.f_basistofunction(v_basis["x"], 0, v_coef_curr, v_basis) f1_prop = multivariate_normal(f1_curr, K_f1prop) q1_prop = uf.f_to_srsf(f1_prop, time) SSE_prop = f_SSEv_pw(v, q1_prop, q2) postlog_curr = f_f1postlogl_pw(f1_curr, y1, SSE_curr, K_f1, sigma_curr, sigma1_curr) postlog_prop = f_f1postlogl_pw(f1_prop, y1, SSE_prop, K_f1, sigma_curr, sigma1_curr) ratio = np.minimum(1, np.exp(postlog_prop - postlog_curr)) u = rand() if (u <= ratio): f1_curr = f1_prop q1_curr = q1_prop f1_accept = True else: f1_accept = False return f1_curr, q1_curr, f1_accept
def map_driver(q1, f, bet, t, dt): q2 = uf.f_to_srsf(f, t) gam = uf.optimum_reparam(q1, t, q2) fn = uf.warp_f_gamma(t, f, gam) tmp = bet * fn y = tmp.sum() * dt return y
def predict(self, newdata=None): """ This function performs prediction on regression model on new data if available or current stored data in object Usage: obj.predict() obj.predict(newdata) :param newdata: dict containing new data for prediction (needs the keys below, if None predicts on training data) :type newdata: dict :param f: (M,N) matrix of functions :param time: vector of time points :param y: truth if available :param smooth: smooth data if needed :param sparam: number of times to run filter """ if newdata != None: f = newdata['f'] time = newdata['time'] y = newdata['y'] q = uf.f_to_srsf(f, time, newdata['smooth']) n = f.shape[1] yhat = np.zeros(n) for ii in range(0, n): diff = self.q - q[:, ii][:, np.newaxis] dist = np.sum(np.abs(diff)**2, axis=0)**(1. / 2) q_tmp = uf.warp_q_gamma(time, q[:, ii], self.gamma[:, dist.argmin()]) yhat[ii] = self.alpha + trapz(q_tmp * self.beta, time) if y is None: self.SSE = np.nan else: self.SSE = np.sum((y - yhat)**2) self.y_pred = yhat else: n = self.f.shape[1] yhat = np.zeros(n) for ii in range(0, n): diff = self.q - self.q[:, ii][:, np.newaxis] dist = np.sum(np.abs(diff)**2, axis=0)**(1. / 2) q_tmp = uf.warp_q_gamma(self.time, self.q[:, ii], self.gamma[:, dist.argmin()]) yhat[ii] = self.alpha + trapz(q_tmp * self.beta, self.time) self.SSE = np.sum((self.y - yhat)**2) self.y_pred = yhat return
def MapC_to_y(n, c, B, t, f, parallel): dt = np.diff(t) dt = dt.mean() y = np.zeros(n) if parallel: bet = np.dot(B, c) q1 = uf.f_to_srsf(bet, t) y = Parallel(n_jobs=-1)(delayed(map_driver)(q1, f[:, k], bet, t, dt) for k in range(n)) else: for k in range(0, n): bet = np.dot(B, c) q1 = uf.f_to_srsf(bet, t) q2 = uf.f_to_srsf(f[:, k], t) gam = uf.optimum_reparam(q1, t, q2) fn = uf.warp_f_gamma(t, f[:, k], gam) tmp = bet * fn y[k] = tmp.sum() * dt return (y)
def predict(self, newdata=None): """ This function performs prediction on regression model on new data if available or current stored data in object Usage: obj.predict() obj.predict(newdata) :param newdata: dict containing new data for prediction (needs the keys below, if None predicts on training data) :type newdata: dict :param f: (M,N) matrix of functions :param time: vector of time points :param y: truth if available :param smooth: smooth data if needed :param sparam: number of times to run filter """ omethod = self.warp_data.method lam = self.warp_data.lam m = self.n_classes M = self.time.shape[0] if newdata != None: f = newdata['f'] time = newdata['time'] y = newdata['y'] sparam = newdata['sparam'] if newdata['smooth']: f = fs.smooth_data(f, sparam) q1 = fs.f_to_srsf(f, time) n = q1.shape[1] self.y_pred = np.zeros((n, m)) mq = self.warp_data.mqn fn = np.zeros((M, n)) qn = np.zeros((M, n)) gam = np.zeros((M, n)) for ii in range(0, n): gam[:, ii] = uf.optimum_reparam(mq, time, q1[:, ii], omethod) fn[:, ii] = uf.warp_f_gamma(time, f[:, ii], gam[:, ii]) qn[:, ii] = uf.f_to_srsf(fn[:, ii], time) m_new = np.sign(fn[self.pca.id, :]) * np.sqrt( np.abs(fn[self.pca.id, :])) qn1 = np.vstack((qn, m_new)) U = self.pca.U no = U.shape[1] if self.pca.__class__.__name__ == 'fdajpca': C = self.pca.C TT = self.time.shape[0] mu_g = self.pca.mu_g mu_psi = self.pca.mu_psi vec = np.zeros((M, n)) psi = np.zeros((TT, n)) binsize = np.mean(np.diff(self.time)) for i in range(0, n): psi[:, i] = np.sqrt(np.gradient(gam[:, i], binsize)) vec[:, i] = geo.inv_exp_map(mu_psi, psi[:, i]) g = np.vstack((qn1, C * vec)) a = np.zeros((n, no)) for i in range(0, n): for j in range(0, no): tmp = (g[:, i] - mu_g) a[i, j] = dot(tmp.T, U[:, j]) elif self.pca.__class__.__name__ == 'fdavpca': a = np.zeros((n, no)) for i in range(0, n): for j in range(0, no): tmp = (qn1[:, i] - self.pca.mqn) a[i, j] = dot(tmp.T, U[:, j]) elif self.pca.__class__.__name__ == 'fdahpca': a = np.zeros((n, no)) mu_psi = self.pca.psi_mu vec = np.zeros((M, n)) TT = self.time.shape[0] psi = np.zeros((TT, n)) binsize = np.mean(np.diff(self.time)) for i in range(0, n): psi[:, i] = np.sqrt(np.gradient(gam[:, i], binsize)) vec[:, i] = geo.inv_exp_map(mu_psi, psi[:, i]) vm = self.pca.vec.mean(axis=1) for i in range(0, n): for j in range(0, no): a[i, j] = np.sum(dot(vec[:, i] - vm, U[:, j])) else: raise Exception('Invalid fPCA Method') for ii in range(0, n): for jj in range(0, m): self.y_pred[ii, jj] = self.alpha[jj] + np.sum( a[ii, :] * self.b[:, jj]) if y == None: self.y_pred = rg.phi(self.y_pred.reshape((1, n * m))) self.y_pred = self.y_pred.reshape((n, m)) self.y_labels = np.argmax(self.y_pred, axis=1) self.PC = np.nan else: self.y_pred = rg.phi(self.y_pred.reshape((1, n * m))) self.y_pred = self.y_pred.reshape((n, m)) self.y_labels = np.argmax(self.y_pred, axis=1) self.PC = np.zeros(m) cls_set = np.arange(0, m) for ii in range(0, m): cls_sub = np.setdiff1d(cls_set, ii) TP = np.sum(y[self.y_labels == ii] == ii) FP = np.sum(y[np.in1d(self.y_labels, cls_sub)] == ii) TN = np.sum(y[np.in1d(self.y_labels, cls_sub)] == self.y_labels[np.in1d(self.y_labels, cls_sub)]) FN = np.sum(np.in1d(y[self.y_labels == ii], cls_sub)) self.PC[ii] = (TP + TN) / (TP + FP + FN + TN) self.PCo = np.sum(y == self.y_labels) / self.y_labels.shape[0] else: n = self.pca.coef.shape[1] self.y_pred = np.zeros((n, m)) for ii in range(0, n): for jj in range(0, m): self.y_pred[ii, jj] = self.alpha[jj] + np.sum( self.pca.coef[ii, :] * self.b[:, jj]) self.y_pred = rg.phi(self.y_pred.reshape((1, n * m))) self.y_pred = self.y_pred.reshape((n, m)) self.y_labels = np.argmax(self.y_pred, axis=1) self.PC = np.zeros(m) cls_set = np.arange(0, m) for ii in range(0, m): cls_sub = np.setdiff1d(cls_set, ii) TP = np.sum(self.y[self.y_labels == ii] == ii) FP = np.sum(self.y[np.in1d(self.y_labels, cls_sub)] == ii) TN = np.sum(self.y[np.in1d(self.y_labels, cls_sub)] == self.y_labels[np.in1d(self.y_labels, cls_sub)]) FN = np.sum(np.in1d(y[self.y_labels == ii], cls_sub)) self.PC[ii] = (TP + TN) / (TP + FP + FN + TN) self.PCo = np.sum(y == self.y_labels) / self.y_labels.shape[0] return
def srsf_align_pair(f, g, time, method="mean", showplot=True, smoothdata=False, lam=0.0): """ This function aligns a collection of functions using the elastic square- root slope (srsf) framework. :param f: numpy ndarray of shape (M,N) of N functions with M samples :param g: numpy ndarray of shape (M,N) of N functions with M samples :param time: vector of size M describing the sample points :param method: (string) warp calculate Karcher Mean or Median (options = "mean" or "median") (default="mean") :param showplot: Shows plots of results using matplotlib (default = T) :param smoothdata: Smooth the data using a box filter (default = F) :param lam: controls the elasticity (default = 0) :type lam: double :type smoothdata: bool :type f: np.ndarray :type time: np.ndarray :rtype: tuple of numpy array :return fn: aligned functions - numpy ndarray of shape (M,N) of N functions with M samples :return gn: aligned functions - numpy ndarray of shape (M,N) of N functions with M samples :return qfn: aligned srvfs - similar structure to fn :return qgn: aligned srvfs - similar structure to fn :return qf0: original srvf - similar structure to fn :return qg0: original srvf - similar structure to fn :return fmean: f function mean or median - vector of length N :return gmean: g function mean or median - vector of length N :return mqfn: srvf mean or median - vector of length N :return mqgn: srvf mean or median - vector of length N :return gam: warping functions - similar structure to fn """ M = f.shape[0] N = f.shape[1] if M > 500: parallel = True elif N > 100: parallel = True else: parallel = False eps = np.finfo(np.double).eps f0 = f g0 = g methods = ["mean", "median"] # 0 mean, 1-median method = [i for i, x in enumerate(methods) if x == method] if method != 0 or method != 1: method = 0 if showplot: plot.f_plot(time, f, title="Original Data") plot.f_plot(time, g, title="g Original Data") # Compute SRSF function from data f, g1, g2 = uf.gradient_spline(time, f, smoothdata) qf = g1 / np.sqrt(abs(g1) + eps) g, g1, g2 = uf.gradient_spline(time, g, smoothdata) qg = g1 / np.sqrt(abs(g1) + eps) print ("Initializing...") mnq = qf.mean(axis=1) a = mnq.repeat(N) d1 = a.reshape(M, N) d = (qf - d1) ** 2 dqq = np.sqrt(d.sum(axis=0)) min_ind = dqq.argmin() mq = np.column_stack((qf[:, min_ind], qg[:, min_ind])) mf = np.column_stack((f[:, min_ind], g[:, min_ind])) if parallel: out = Parallel(n_jobs=-1)(delayed(uf.optimum_reparam_pair)(mq, time, qf[:, n], qg[:, n], lam) for n in range(N)) gam = np.array(out) gam = gam.transpose() else: gam = uf.optimum_reparam_pair(mq, time, qf, qg, lam) gamI = uf.SqrtMeanInverse(gam) time0 = (time[-1] - time[0]) * gamI + time[0] for k in range(0, 2): mf[:, k] = np.interp(time0, time, mf[:, k]) mq[:, k] = uf.f_to_srsf(mf[:, k], time) # Compute Karcher Mean if method == 0: print("Compute Karcher Mean of %d function in SRSF space..." % N) if method == 1: print("Compute Karcher Median of %d function in SRSF space..." % N) MaxItr = 20 ds = np.repeat(0.0, MaxItr + 2) ds[0] = np.inf qfun = np.repeat(0.0, MaxItr + 1) qgun = np.repeat(0.0, MaxItr + 1) tmp = np.zeros((M, 2, MaxItr + 2)) tmp[:, :, 0] = mq mq = tmp tmp = np.zeros((M, N, MaxItr + 2)) tmp[:, :, 0] = f f = tmp tmp = np.zeros((M, N, MaxItr + 2)) tmp[:, :, 0] = g g = tmp tmp = np.zeros((M, N, MaxItr + 2)) tmp[:, :, 0] = qf qf = tmp tmp = np.zeros((M, N, MaxItr + 2)) tmp[:, :, 0] = qg qg = tmp for r in range(0, MaxItr): print("updating step: r=%d" % (r + 1)) if r == (MaxItr - 1): print("maximal number of iterations is reached") # Matching Step if parallel: out = Parallel(n_jobs=-1)( delayed(uf.optimum_reparam_pair)(mq[:, :, r], time, qf[:, n, 0], qg[:, n, 0], lam) for n in range(N)) gam = np.array(out) gam = gam.transpose() else: gam = uf.optimum_reparam_pair(mq[:, :, r], time, qf[:, :, 0], qg[:, :, 0], lam) gam_dev = np.zeros((M, N)) for k in range(0, N): time0 = (time[-1] - time[0]) * gam[:, k] + time[0] f[:, k, r + 1] = np.interp(time0, time, f[:, k, 0]) g[:, k, r + 1] = np.interp(time0, time, g[:, k, 0]) qf[:, k, r + 1] = uf.f_to_srsf(f[:, k, r + 1], time) qg[:, k, r + 1] = uf.f_to_srsf(g[:, k, r + 1], time) gam_dev[:, k] = np.gradient(gam[:, k], 1 / float(M - 1)) mqt = mq[:, 0, r] a = mqt.repeat(N) d1 = a.reshape(M, N) df = (qf[:, :, r + 1] - d1) ** 2 mqt = mq[:, 1, r] a = mqt.repeat(N) d1 = a.reshape(M, N) dg = (qg[:, :, r + 1] - d1) ** 2 if method == 0: d1 = sum(trapz(df, time, axis=0)) d2 = sum(trapz((1 - np.sqrt(gam_dev)) ** 2, time, axis=0)) ds_tmp = d1 + lam * d2 d1 = sum(trapz(dg, time, axis=0)) d2 = sum(trapz((1 - np.sqrt(gam_dev)) ** 2, time, axis=0)) ds_tmp1 = d1 + lam * d2 ds[r + 1] = (ds_tmp + ds_tmp1) / 2 # Minimization Step # compute the mean of the matched function qtemp = qf[:, :, r + 1] mq[:, 0, r + 1] = qtemp.mean(axis=1) qtemp = qg[:, :, r + 1] mq[:, 1, r + 1] = qtemp.mean(axis=1) qfun[r] = norm(mq[:, 0, r + 1] - mq[:, 0, r]) / norm(mq[:, 0, r]) qgun[r] = norm(mq[:, 1, r + 1] - mq[:, 1, r]) / norm(mq[:, 1, r]) if method == 1: d1 = sum(trapz(df, time, axis=0)) d2 = sum(trapz((1 - np.sqrt(gam_dev)) ** 2, time, axis=0)) ds_tmp = np.sqrt(d1) + lam * d2 ds_tmp1 = np.sqrt(sum(trapz(dg, time, axis=0))) + lam * sum( trapz((1 - np.sqrt(gam_dev)) ** 2, time, axis=0)) ds[r + 1] = (ds_tmp + ds_tmp1) / 2 # Minimization Step # compute the mean of the matched function dist_iinv = ds[r + 1] ** (-1) qtemp = qf[:, :, r + 1] / ds[r + 1] mq[:, 0, r + 1] = qtemp.sum(axis=1) * dist_iinv qtemp = qg[:, :, r + 1] / ds[r + 1] mq[:, 1, r + 1] = qtemp.sum(axis=1) * dist_iinv qfun[r] = norm(mq[:, 0, r + 1] - mq[:, 0, r]) / norm(mq[:, 0, r]) qgun[r] = norm(mq[:, 1, r + 1] - mq[:, 1, r]) / norm(mq[:, 1, r]) if (qfun[r] < 1e-2 and qgun[r] < 1e-2) or r >= MaxItr: break # Last Step with centering of gam r += 1 if parallel: out = Parallel(n_jobs=-1)( delayed(uf.optimum_reparam_pair)(mq[:, :, r], time, qf[:, n, 0], qg[:, n, 0], lam) for n in range(N)) gam = np.array(out) gam = gam.transpose() else: gam = uf.optimum_reparam_pair(mq[:, :, r], time, qf[:, :, 0], qg[:, :, 0], lam) gam_dev = np.zeros((M, N)) for k in range(0, N): gam_dev[:, k] = np.gradient(gam[:, k], 1 / float(M - 1)) gamI = uf.SqrtMeanInverse(gam) gamI_dev = np.gradient(gamI, 1 / float(M - 1)) time0 = (time[-1] - time[0]) * gamI + time[0] for k in range(0, 2): mq[:, k, r + 1] = np.interp(time0, time, mq[:, k, r]) * np.sqrt(gamI_dev) for k in range(0, N): qf[:, k, r + 1] = np.interp(time0, time, qf[:, k, r]) * np.sqrt(gamI_dev) f[:, k, r + 1] = np.interp(time0, time, f[:, k, r]) qg[:, k, r + 1] = np.interp(time0, time, qg[:, k, r]) * np.sqrt(gamI_dev) g[:, k, r + 1] = np.interp(time0, time, g[:, k, r]) gam[:, k] = np.interp(time0, time, gam[:, k]) # Aligned data & stats fn = f[:, :, r + 1] gn = g[:, :, r + 1] qfn = qf[:, :, r + 1] qf0 = qf[:, :, 0] qgn = qg[:, :, r + 1] qg0 = qg[:, :, 0] mean_f0 = f0.mean(axis=1) std_f0 = f0.std(axis=1) mean_fn = fn.mean(axis=1) std_fn = fn.std(axis=1) mean_g0 = g0.mean(axis=1) std_g0 = g0.std(axis=1) mean_gn = gn.mean(axis=1) std_gn = gn.std(axis=1) mqfn = mq[:, 0, r + 1] mqgn = mq[:, 1, r + 1] tmp = np.zeros(M) tmp[1:] = cumtrapz(mqfn * np.abs(mqfn), time) fmean = np.mean(f0[1, :]) + tmp tmp = np.zeros(M) tmp[1:] = cumtrapz(mqgn * np.abs(mqgn), time) gmean = np.mean(g0[1, :]) + tmp if showplot: fig, ax = plot.f_plot(np.arange(0, M) / float(M - 1), gam, title="Warping Functions") ax.set_aspect('equal') plot.f_plot(time, fn, title="fn Warped Data") plot.f_plot(time, gn, title="gn Warped Data") tmp = np.array([mean_f0, mean_f0 + std_f0, mean_f0 - std_f0]) tmp = tmp.transpose() plot.f_plot(time, tmp, title="f Original Data: Mean $\pm$ STD") tmp = np.array([mean_fn, mean_fn + std_fn, mean_fn - std_fn]) tmp = tmp.transpose() plot.f_plot(time, tmp, title="fn Warped Data: Mean $\pm$ STD") tmp = np.array([mean_g0, mean_g0 + std_g0, mean_g0 - std_g0]) tmp = tmp.transpose() plot.f_plot(time, tmp, title="g Original Data: Mean $\pm$ STD") tmp = np.array([mean_gn, mean_gn + std_gn, mean_gn - std_gn]) tmp = tmp.transpose() plot.f_plot(time, tmp, title="gn Warped Data: Mean $\pm$ STD") plot.f_plot(time, fmean, title="$f_{mean}$") plot.f_plot(time, gmean, title="$g_{mean}$") plt.show() align_results = collections.namedtuple('align', ['fn', 'gn', 'qfn', 'qf0', 'qgn', 'qg0', 'fmean', 'gmean', 'mqfn', 'mqgn', 'gam']) out = align_results(fn, gn, qfn, qf0, qgn, qg0, fmean, gmean, mqfn, mqgn, gam) return out
def align_fPCA(f, time, num_comp=3, showplot=True, smoothdata=False): """ aligns a collection of functions while extracting principal components. The functions are aligned to the principal components :param f: numpy ndarray of shape (M,N) of N functions with M samples :param time: vector of size M describing the sample points :param num_comp: number of fPCA components :param showplot: Shows plots of results using matplotlib (default = T) :param smooth_data: Smooth the data using a box filter (default = F) :param sparam: Number of times to run box filter (default = 25) :type sparam: double :type smooth_data: bool :type f: np.ndarray :type time: np.ndarray :rtype: tuple of numpy array :return fn: aligned functions - numpy ndarray of shape (M,N) of N functions with M samples :return qn: aligned srvfs - similar structure to fn :return q0: original srvf - similar structure to fn :return mqn: srvf mean or median - vector of length M :return gam: warping functions - similar structure to fn :return q_pca: srsf principal directions :return f_pca: functional principal directions :return latent: latent values :return coef: coefficients :return U: eigenvectors :return orig_var: Original Variance of Functions :return amp_var: Amplitude Variance :return phase_var: Phase Variance """ lam = 0.0 MaxItr = 50 coef = np.arange(-2., 3.) Nstd = coef.shape[0] M = f.shape[0] N = f.shape[1] if M > 500: parallel = True elif N > 100: parallel = True else: parallel = False eps = np.finfo(np.double).eps f0 = f if showplot: plot.f_plot(time, f, title="Original Data") # Compute SRSF function from data f, g, g2 = uf.gradient_spline(time, f, smoothdata) q = g / np.sqrt(abs(g) + eps) print ("Initializing...") mnq = q.mean(axis=1) a = mnq.repeat(N) d1 = a.reshape(M, N) d = (q - d1) ** 2 dqq = np.sqrt(d.sum(axis=0)) min_ind = dqq.argmin() print("Aligning %d functions in SRVF space to %d fPCA components..." % (N, num_comp)) itr = 0 mq = np.zeros((M, MaxItr + 1)) mq[:, itr] = q[:, min_ind] fi = np.zeros((M, N, MaxItr + 1)) fi[:, :, 0] = f qi = np.zeros((M, N, MaxItr + 1)) qi[:, :, 0] = q gam = np.zeros((M, N, MaxItr + 1)) cost = np.zeros(MaxItr + 1) while itr < MaxItr: print("updating step: r=%d" % (itr + 1)) if itr == MaxItr: print("maximal number of iterations is reached") # PCA Step a = mq[:, itr].repeat(N) d1 = a.reshape(M, N) qhat_cent = qi[:, :, itr] - d1 K = np.cov(qi[:, :, itr]) U, s, V = svd(K) alpha_i = np.zeros((num_comp, N)) for ii in range(0, num_comp): for jj in range(0, N): alpha_i[ii, jj] = trapz(qhat_cent[:, jj] * U[:, ii], time) U1 = U[:, 0:num_comp] tmp = U1.dot(alpha_i) qhat = d1 + tmp # Matching Step if parallel: out = Parallel(n_jobs=-1)( delayed(uf.optimum_reparam)(qhat[:, n], time, qi[:, n, itr], lam) for n in range(N)) gam_t = np.array(out) gam[:, :, itr] = gam_t.transpose() else: gam[:, :, itr] = uf.optimum_reparam(qhat, time, qi[:, :, itr], lam) for k in range(0, N): time0 = (time[-1] - time[0]) * gam[:, k, itr] + time[0] fi[:, k, itr + 1] = np.interp(time0, time, fi[:, k, itr]) qi[:, k, itr + 1] = uf.f_to_srsf(fi[:, k, itr + 1], time) qtemp = qi[:, :, itr + 1] mq[:, itr + 1] = qtemp.mean(axis=1) cost_temp = np.zeros(N) for ii in range(0, N): cost_temp[ii] = norm(qtemp[:, ii] - qhat[:, ii]) ** 2 cost[itr + 1] = cost_temp.mean() if abs(cost[itr + 1] - cost[itr]) < 1e-06: break itr += 1 if itr >= MaxItr: itrf = MaxItr else: itrf = itr+1 cost = cost[1:(itrf+1)] # Aligned data & stats fn = fi[:, :, itrf] qn = qi[:, :, itrf] q0 = qi[:, :, 0] mean_f0 = f0.mean(axis=1) std_f0 = f0.std(axis=1) mqn = mq[:, itrf] gamf = gam[:, :, 0] for k in range(1, itr): gam_k = gam[:, :, k] for l in range(0, N): time0 = (time[-1] - time[0]) * gam_k[:, l] + time[0] gamf[:, l] = np.interp(time0, time, gamf[:, l]) # Center Mean gamI = uf.SqrtMeanInverse(gamf) gamI_dev = np.gradient(gamI, 1 / float(M - 1)) time0 = (time[-1] - time[0]) * gamI + time[0] mqn = np.interp(time0, time, mqn) * np.sqrt(gamI_dev) for k in range(0, N): qn[:, k] = np.interp(time0, time, qn[:, k]) * np.sqrt(gamI_dev) fn[:, k] = np.interp(time0, time, fn[:, k]) gamf[:, k] = np.interp(time0, time, gamf[:, k]) mean_fn = fn.mean(axis=1) std_fn = fn.std(axis=1) # Get Final PCA mididx = np.round(time.shape[0] / 2) m_new = np.sign(fn[mididx, :]) * np.sqrt(np.abs(fn[mididx, :])) mqn2 = np.append(mqn, m_new.mean()) qn2 = np.vstack((qn, m_new)) K = np.cov(qn2) U, s, V = svd(K) stdS = np.sqrt(s) # compute the PCA in the q domain q_pca = np.ndarray(shape=(M + 1, Nstd, num_comp), dtype=float) for k in range(0, num_comp): for l in range(0, Nstd): q_pca[:, l, k] = mqn2 + coef[l] * stdS[k] * U[:, k] # compute the correspondence in the f domain f_pca = np.ndarray(shape=(M, Nstd, num_comp), dtype=float) for k in range(0, num_comp): for l in range(0, Nstd): q_pca_tmp = q_pca[0:M, l, k] * np.abs(q_pca[0:M, l, k]) q_pca_tmp2 = np.sign(q_pca[M, l, k]) * (q_pca[M, l, k] ** 2) f_pca[:, l, k] = uf.cumtrapzmid(time, q_pca_tmp, q_pca_tmp2) N2 = qn.shape[1] c = np.zeros((N2, num_comp)) for k in range(0, num_comp): for l in range(0, N2): c[l, k] = sum((np.append(qn[:, l], m_new[l]) - mqn2) * U[:, k]) if showplot: CBcdict = { 'Bl': (0, 0, 0), 'Or': (.9, .6, 0), 'SB': (.35, .7, .9), 'bG': (0, .6, .5), 'Ye': (.95, .9, .25), 'Bu': (0, .45, .7), 'Ve': (.8, .4, 0), 'rP': (.8, .6, .7), } cl = sorted(CBcdict.keys()) # Align Plots fig, ax = plot.f_plot(np.arange(0, M) / float(M - 1), gamf, title="Warping Functions") ax.set_aspect('equal') plot.f_plot(time, fn, title="Warped Data") tmp = np.array([mean_f0, mean_f0 + std_f0, mean_f0 - std_f0]) tmp = tmp.transpose() plot.f_plot(time, tmp, title="Original Data: Mean $\pm$ STD") tmp = np.array([mean_fn, mean_fn + std_fn, mean_fn - std_fn]) tmp = tmp.transpose() plot.f_plot(time, tmp, title="Warped Data: Mean $\pm$ STD") # PCA Plots fig, ax = plt.subplots(2, num_comp) for k in range(0, num_comp): axt = ax[0, k] for l in range(0, Nstd): axt.plot(time, q_pca[0:M, l, k], color=CBcdict[cl[l]]) axt.hold(True) axt.set_title('q domain: PD %d' % (k + 1)) plot.rstyle(axt) axt = ax[1, k] for l in range(0, Nstd): axt.plot(time, f_pca[:, l, k], color=CBcdict[cl[l]]) axt.hold(True) axt.set_title('f domain: PD %d' % (k + 1)) plot.rstyle(axt) fig.set_tight_layout(True) cumm_coef = 100 * np.cumsum(s) / sum(s) idx = np.arange(0, M + 1) + 1 plot.f_plot(idx, cumm_coef, "Coefficient Cumulative Percentage") plt.xlabel("Percentage") plt.ylabel("Index") plt.show() mean_f0 = f0.mean(axis=1) std_f0 = f0.std(axis=1) mean_fn = fn.mean(axis=1) std_fn = fn.std(axis=1) tmp = np.zeros(M) tmp[1:] = cumtrapz(mqn * np.abs(mqn), time) fmean = np.mean(f0[1, :]) + tmp fgam = np.zeros((M, N)) for k in range(0, N): time0 = (time[-1] - time[0]) * gamf[:, k] + time[0] fgam[:, k] = np.interp(time0, time, fmean) var_fgam = fgam.var(axis=1) orig_var = trapz(std_f0 ** 2, time) amp_var = trapz(std_fn ** 2, time) phase_var = trapz(var_fgam, time) K = np.cov(fn) U, s, V = svd(K) align_fPCAresults = collections.namedtuple('align_fPCA', ['fn', 'qn', 'q0', 'mqn', 'gam', 'q_pca', 'f_pca', 'latent', 'coef', 'U', 'orig_var', 'amp_var', 'phase_var', 'cost']) out = align_fPCAresults(fn, qn, q0, mqn, gamf, q_pca, f_pca, s, c, U, orig_var, amp_var, phase_var, cost) return out
def srsf_align(f, time, method="mean", showplot=True, smoothdata=False, lam=0.0): """ This function aligns a collection of functions using the elastic square-root slope (srsf) framework. :param f: numpy ndarray of shape (M,N) of N functions with M samples :param time: vector of size M describing the sample points :param method: (string) warp calculate Karcher Mean or Median (options = "mean" or "median") (default="mean") :param showplot: Shows plots of results using matplotlib (default = T) :param smoothdata: Smooth the data using a box filter (default = F) :param lam: controls the elasticity (default = 0) :type lam: double :type smoothdata: bool :type f: np.ndarray :type time: np.ndarray :rtype: tuple of numpy array :return fn: aligned functions - numpy ndarray of shape (M,N) of N functions with M samples :return qn: aligned srvfs - similar structure to fn :return q0: original srvf - similar structure to fn :return fmean: function mean or median - vector of length M :return mqn: srvf mean or median - vector of length M :return gam: warping functions - similar structure to fn :return orig_var: Original Variance of Functions :return amp_var: Amplitude Variance :return phase_var: Phase Variance Examples >>> import tables >>> fun=tables.open_file("../Data/simu_data.h5") >>> f = fun.root.f[:] >>> f = f.transpose() >>> time = fun.root.time[:] >>> out = srsf_align(f,time) """ M = f.shape[0] N = f.shape[1] if M > 500: parallel = True elif N > 100: parallel = True else: parallel = False eps = np.finfo(np.double).eps f0 = f methods = ["mean", "median"] # 0 mean, 1-median method = [i for i, x in enumerate(methods) if x == method] if len(method) == 0: method = 0 else: method = method[0] if showplot: plot.f_plot(time, f, title="f Original Data") # Compute SRSF function from data f, g, g2 = uf.gradient_spline(time, f, smoothdata) q = g / np.sqrt(abs(g) + eps) print("Initializing...") mnq = q.mean(axis=1) a = mnq.repeat(N) d1 = a.reshape(M, N) d = (q - d1) ** 2 dqq = np.sqrt(d.sum(axis=0)) min_ind = dqq.argmin() mq = q[:, min_ind] mf = f[:, min_ind] if parallel: out = Parallel(n_jobs=-1)(delayed(uf.optimum_reparam)(mq, time, q[:, n], lam) for n in range(N)) gam = np.array(out) gam = gam.transpose() else: gam = uf.optimum_reparam(mq, time, q, lam) gamI = uf.SqrtMeanInverse(gam) mf = np.interp((time[-1] - time[0]) * gamI + time[0], time, mf) mq = uf.f_to_srsf(mf, time) # Compute Karcher Mean if method == 0: print("Compute Karcher Mean of %d function in SRSF space..." % N) if method == 1: print("Compute Karcher Median of %d function in SRSF space..." % N) MaxItr = 20 ds = np.repeat(0.0, MaxItr + 2) ds[0] = np.inf qun = np.repeat(0.0, MaxItr + 1) tmp = np.zeros((M, MaxItr + 2)) tmp[:, 0] = mq mq = tmp tmp = np.zeros((M, N, MaxItr + 2)) tmp[:, :, 0] = f f = tmp tmp = np.zeros((M, N, MaxItr + 2)) tmp[:, :, 0] = q q = tmp for r in range(0, MaxItr): print("updating step: r=%d" % (r + 1)) if r == (MaxItr - 1): print("maximal number of iterations is reached") # Matching Step if parallel: out = Parallel(n_jobs=-1)(delayed(uf.optimum_reparam)(mq[:, r], time, q[:, n, 0], lam) for n in range(N)) gam = np.array(out) gam = gam.transpose() else: gam = uf.optimum_reparam(mq[:, r], time, q[:, :, 0], lam) gam_dev = np.zeros((M, N)) for k in range(0, N): f[:, k, r + 1] = np.interp((time[-1] - time[0]) * gam[:, k] + time[0], time, f[:, k, 0]) q[:, k, r + 1] = uf.f_to_srsf(f[:, k, r + 1], time) gam_dev[:, k] = np.gradient(gam[:, k], 1 / float(M - 1)) mqt = mq[:, r] a = mqt.repeat(N) d1 = a.reshape(M, N) d = (q[:, :, r + 1] - d1) ** 2 if method == 0: d1 = sum(trapz(d, time, axis=0)) d2 = sum(trapz((1 - np.sqrt(gam_dev)) ** 2, time, axis=0)) ds_tmp = d1 + lam * d2 ds[r + 1] = ds_tmp # Minimization Step # compute the mean of the matched function qtemp = q[:, :, r + 1] mq[:, r + 1] = qtemp.mean(axis=1) qun[r] = norm(mq[:, r + 1] - mq[:, r]) / norm(mq[:, r]) if method == 1: d1 = np.sqrt(sum(trapz(d, time, axis=0))) d2 = sum(trapz((1 - np.sqrt(gam_dev)) ** 2, time, axis=0)) ds_tmp = d1 + lam * d2 ds[r + 1] = ds_tmp # Minimization Step # compute the mean of the matched function dist_iinv = ds[r + 1] ** (-1) qtemp = q[:, :, r + 1] / ds[r + 1] mq[:, r + 1] = qtemp.sum(axis=1) * dist_iinv qun[r] = norm(mq[:, r + 1] - mq[:, r]) / norm(mq[:, r]) if qun[r] < 1e-2 or r >= MaxItr: break # Last Step with centering of gam r += 1 if parallel: out = Parallel(n_jobs=-1)(delayed(uf.optimum_reparam)(mq[:, r], time, q[:, n, 0], lam) for n in range(N)) gam = np.array(out) gam = gam.transpose() else: gam = uf.optimum_reparam(mq[:, r], time, q[:, :, 0], lam) gam_dev = np.zeros((M, N)) for k in range(0, N): gam_dev[:, k] = np.gradient(gam[:, k], 1 / float(M - 1)) gamI = uf.SqrtMeanInverse(gam) gamI_dev = np.gradient(gamI, 1 / float(M - 1)) time0 = (time[-1] - time[0]) * gamI + time[0] mq[:, r + 1] = np.interp(time0, time, mq[:, r]) * np.sqrt(gamI_dev) for k in range(0, N): q[:, k, r + 1] = np.interp(time0, time, q[:, k, r]) * np.sqrt(gamI_dev) f[:, k, r + 1] = np.interp(time0, time, f[:, k, r]) gam[:, k] = np.interp(time0, time, gam[:, k]) # Aligned data & stats fn = f[:, :, r + 1] qn = q[:, :, r + 1] q0 = q[:, :, 0] mean_f0 = f0.mean(axis=1) std_f0 = f0.std(axis=1) mean_fn = fn.mean(axis=1) std_fn = fn.std(axis=1) mqn = mq[:, r + 1] tmp = np.zeros((1, M)) tmp = tmp.flatten() tmp[1:] = cumtrapz(mqn * np.abs(mqn), time) fmean = np.mean(f0[1, :]) + tmp fgam = np.zeros((M, N)) for k in range(0, N): time0 = (time[-1] - time[0]) * gam[:, k] + time[0] fgam[:, k] = np.interp(time0, time, fmean) var_fgam = fgam.var(axis=1) orig_var = trapz(std_f0 ** 2, time) amp_var = trapz(std_fn ** 2, time) phase_var = trapz(var_fgam, time) if showplot: fig, ax = plot.f_plot(np.arange(0, M) / float(M - 1), gam, title="Warping Functions") ax.set_aspect('equal') plot.f_plot(time, fn, title="Warped Data") tmp = np.array([mean_f0, mean_f0 + std_f0, mean_f0 - std_f0]) tmp = tmp.transpose() plot.f_plot(time, tmp, title="Original Data: Mean $\pm$ STD") tmp = np.array([mean_fn, mean_fn + std_fn, mean_fn - std_fn]) tmp = tmp.transpose() plot.f_plot(time, tmp, title="Warped Data: Mean $\pm$ STD") plot.f_plot(time, fmean, title="$f_{mean}$") plt.show() align_results = collections.namedtuple('align', ['fn', 'qn', 'q0', 'fmean', 'mqn', 'gam', 'orig_var', 'amp_var', 'phase_var']) out = align_results(fn, qn, q0, fmean, mqn, gam, orig_var, amp_var, phase_var) return out
def elastic_regression(f, y, time, B=None, lam=0, df=20, max_itr=20, cores=-1, smooth=False): """ This function identifies a regression model with phase-variablity using elastic methods :param f: numpy ndarray of shape (M,N) of N functions with M samples :param y: numpy array of N responses :param time: vector of size M describing the sample points :param B: optional matrix describing Basis elements :param lam: regularization parameter (default 0) :param df: number of degrees of freedom B-spline (default 20) :param max_itr: maximum number of iterations (default 20) :param cores: number of cores for parallel processing (default all) :type f: np.ndarray :type time: np.ndarray :rtype: tuple of numpy array :return alpha: alpha parameter of model :return beta: beta(t) of model :return fn: aligned functions - numpy ndarray of shape (M,N) of M functions with N samples :return qn: aligned srvfs - similar structure to fn :return gamma: calculated warping functions :return q: original training SRSFs :return B: basis matrix :return b: basis coefficients :return SSE: sum of squared error """ M = f.shape[0] N = f.shape[1] if M > 500: parallel = True elif N > 100: parallel = True else: parallel = False binsize = np.diff(time) binsize = binsize.mean() # Create B-Spline Basis if none provided if B is None: B = bs(time, df=df, degree=4, include_intercept=True) Nb = B.shape[1] # second derivative for regularization Bdiff = np.zeros((M, Nb)) for ii in range(0, Nb): Bdiff[:, ii] = np.gradient(np.gradient(B[:, ii], binsize), binsize) q = uf.f_to_srsf(f, time, smooth) gamma = np.tile(np.linspace(0, 1, M), (N, 1)) gamma = gamma.transpose() itr = 1 SSE = np.zeros(max_itr) while itr <= max_itr: print("Iteration: %d" % itr) # align data fn = np.zeros((M, N)) qn = np.zeros((M, N)) for ii in range(0, N): fn[:, ii] = np.interp( (time[-1] - time[0]) * gamma[:, ii] + time[0], time, f[:, ii]) qn[:, ii] = uf.warp_q_gamma(time, q[:, ii], gamma[:, ii]) # OLS using basis Phi = np.ones((N, Nb + 1)) for ii in range(0, N): for jj in range(1, Nb + 1): Phi[ii, jj] = trapz(qn[:, ii] * B[:, jj - 1], time) R = np.zeros((Nb + 1, Nb + 1)) for ii in range(1, Nb + 1): for jj in range(1, Nb + 1): R[ii, jj] = trapz(Bdiff[:, ii - 1] * Bdiff[:, jj - 1], time) xx = dot(Phi.T, Phi) inv_xx = inv(xx + lam * R) xy = dot(Phi.T, y) b = dot(inv_xx, xy) alpha = b[0] beta = B.dot(b[1:Nb + 1]) beta = beta.reshape(M) # compute the SSE int_X = np.zeros(N) for ii in range(0, N): int_X[ii] = trapz(qn[:, ii] * beta, time) SSE[itr - 1] = sum((y.reshape(N) - alpha - int_X)**2) # find gamma gamma_new = np.zeros((M, N)) if parallel: out = Parallel(n_jobs=cores)( delayed(regression_warp)(beta, time, q[:, n], y[n], alpha) for n in range(N)) gamma_new = np.array(out) gamma_new = gamma_new.transpose() else: for ii in range(0, N): gamma_new[:, ii] = regression_warp(beta, time, q[:, ii], y[ii], alpha) if norm(gamma - gamma_new) < 1e-5: break else: gamma = gamma_new itr += 1 # Last Step with centering of gam gamI = uf.SqrtMeanInverse(gamma_new) gamI_dev = np.gradient(gamI, 1 / float(M - 1)) beta = np.interp( (time[-1] - time[0]) * gamI + time[0], time, beta) * np.sqrt(gamI_dev) for ii in range(0, N): qn[:, ii] = np.interp((time[-1] - time[0]) * gamI + time[0], time, qn[:, ii]) * np.sqrt(gamI_dev) fn[:, ii] = np.interp((time[-1] - time[0]) * gamI + time[0], time, fn[:, ii]) gamma[:, ii] = np.interp((time[-1] - time[0]) * gamI + time[0], time, gamma_new[:, ii]) model = collections.namedtuple( 'model', ['alpha', 'beta', 'fn', 'qn', 'gamma', 'q', 'B', 'b', 'SSE', 'type']) out = model(alpha, beta, fn, qn, gamma, q, B, b[1:-1], SSE[0:itr], 'linear') return out
def calc_model(self, B=None, lam=0, df=20, max_itr=20, delta=.01, cores=-1, smooth=False): """ This function identifies a regression model with phase-variability using elastic pca :param B: optional matrix describing Basis elements :param lam: regularization parameter (default 0) :param df: number of degrees of freedom B-spline (default 20) :param max_itr: maximum number of iterations (default 20) :param cores: number of cores for parallel processing (default all) """ M = self.f.shape[0] N = self.f.shape[1] m = self.y.max() if M > 500: parallel = True elif N > 100: parallel = True else: parallel = False binsize = np.diff(self.time) binsize = binsize.mean() # Create B-Spline Basis if none provided if B is None: B = bs(self.time, df=df, degree=4, include_intercept=True) Nb = B.shape[1] self.B = B self.q = uf.f_to_srsf(self.f, self.time, smooth) gamma = np.tile(np.linspace(0, 1, M), (N, 1)) gamma = gamma.transpose() itr = 1 self.LL = np.zeros(max_itr) while itr <= max_itr: print("Iteration: %d" % itr) # align data fn = np.zeros((M, N)) qn = np.zeros((M, N)) for ii in range(0, N): fn[:, ii] = np.interp( (self.time[-1] - self.time[0]) * gamma[:, ii] + self.time[0], self.time, self.f[:, ii]) qn[:, ii] = uf.warp_q_gamma(self.time, self.q[:, ii], gamma[:, ii]) Phi = np.ones((N, Nb + 1)) for ii in range(0, N): for jj in range(1, Nb + 1): Phi[ii, jj] = trapz(qn[:, ii] * B[:, jj - 1], self.time) # Find alpha and beta using l_bfgs b0 = np.zeros(m * (Nb + 1)) out = fmin_l_bfgs_b(mlogit_loss, b0, fprime=mlogit_gradient, args=(Phi, self.Y), pgtol=1e-10, maxiter=200, maxfun=250, factr=1e-30) b = out[0] B0 = b.reshape(Nb + 1, m) alpha = B0[0, :] beta = np.zeros((M, m)) for i in range(0, m): beta[:, i] = B.dot(B0[1:Nb + 1, i]) # compute the logistic loss self.LL[itr - 1] = mlogit_loss(b, Phi, self.Y) # find gamma gamma_new = np.zeros((M, N)) if parallel: out = Parallel(n_jobs=cores)( delayed(mlogit_warp_grad)(alpha, beta, self.time, self.q[:, n], self.Y[n, :], delta=delta) for n in range(N)) gamma_new = np.array(out) gamma_new = gamma_new.transpose() else: for ii in range(0, N): gamma_new[:, ii] = mlogit_warp_grad(alpha, beta, self.time, self.q[:, ii], self.Y[ii, :], delta=delta) if norm(gamma - gamma_new) < 1e-5: break else: gamma = gamma_new itr += 1 self.qn = qn self.fn = fn self.gamma = gamma self.alpha = alpha self.beta = beta self.b = b[1:-1] self.n_classes = m self.LL = self.LL[0:itr] return
def elastic_mlogistic(f, y, time, B=None, df=20, max_itr=20, cores=-1, delta=.01, parallel=True, smooth=False): """ This function identifies a multinomial logistic regression model with phase-variablity using elastic methods :param f: numpy ndarray of shape (M,N) of N functions with M samples :param y: numpy array of labels {1,2,...,m} for m classes :param time: vector of size M describing the sample points :param B: optional matrix describing Basis elements :param df: number of degrees of freedom B-spline (default 20) :param max_itr: maximum number of iterations (default 20) :param cores: number of cores for parallel processing (default all) :type f: np.ndarray :type time: np.ndarray :rtype: tuple of numpy array :return alpha: alpha parameter of model :return beta: beta(t) of model :return fn: aligned functions - numpy ndarray of shape (M,N) of N functions with M samples :return qn: aligned srvfs - similar structure to fn :return gamma: calculated warping functions :return q: original training SRSFs :return B: basis matrix :return b: basis coefficients :return Loss: logistic loss """ M = f.shape[0] N = f.shape[1] # Code labels m = y.max() Y = np.zeros((N, m), dtype=int) for ii in range(0, N): Y[ii, y[ii]-1] = 1 binsize = np.diff(time) binsize = binsize.mean() # Create B-Spline Basis if none provided if B is None: B = bs(time, df=df, degree=4, include_intercept=True) Nb = B.shape[1] q = uf.f_to_srsf(f, time, smooth) gamma = np.tile(np.linspace(0, 1, M), (N, 1)) gamma = gamma.transpose() itr = 1 LL = np.zeros(max_itr) while itr <= max_itr: print("Iteration: %d" % itr) # align data fn = np.zeros((M, N)) qn = np.zeros((M, N)) for ii in range(0, N): fn[:, ii] = np.interp((time[-1] - time[0]) * gamma[:, ii] + time[0], time, f[:, ii]) qn[:, ii] = uf.warp_q_gamma(time, q[:, ii], gamma[:, ii]) Phi = np.ones((N, Nb+1)) for ii in range(0, N): for jj in range(1, Nb+1): Phi[ii, jj] = trapz(qn[:, ii] * B[:, jj-1], time) # Find alpha and beta using l_bfgs b0 = np.zeros(m * (Nb+1)) out = fmin_l_bfgs_b(mlogit_loss, b0, fprime=mlogit_gradient, args=(Phi, Y), pgtol=1e-10, maxiter=200, maxfun=250, factr=1e-30) b = out[0] B0 = b.reshape(Nb+1, m) alpha = B0[0, :] beta = np.zeros((M, m)) for i in range(0, m): beta[:, i] = B.dot(B0[1:Nb+1, i]) # compute the logistic loss LL[itr - 1] = mlogit_loss(b, Phi, Y) # find gamma gamma_new = np.zeros((M, N)) if parallel: out = Parallel(n_jobs=cores)(delayed(mlogit_warp_grad)(alpha, beta, time, q[:, n], Y[n, :], delta=delta) for n in range(N)) gamma_new = np.array(out) gamma_new = gamma_new.transpose() else: for ii in range(0, N): gamma_new[:, ii] = mlogit_warp_grad(alpha, beta, time, q[:, ii], Y[ii, :], delta=delta) if norm(gamma - gamma_new) < 1e-5: break else: gamma = gamma_new itr += 1 # Last Step with centering of gam gamma = gamma_new # gamI = uf.SqrtMeanInverse(gamma) # gamI_dev = np.gradient(gamI, 1 / float(M - 1)) # beta = np.interp((time[-1] - time[0]) * gamI + time[0], time, # beta) * np.sqrt(gamI_dev) # for ii in range(0, N): # qn[:, ii] = np.interp((time[-1] - time[0]) * gamI + time[0], # time, qn[:, ii]) * np.sqrt(gamI_dev) # fn[:, ii] = np.interp((time[-1] - time[0]) * gamI + time[0], # time, fn[:, ii]) # gamma[:, ii] = np.interp((time[-1] - time[0]) * gamI + time[0], # time, gamma[:, ii]) model = collections.namedtuple('model', ['alpha', 'beta', 'fn', 'qn', 'gamma', 'q', 'B', 'b', 'Loss', 'n_classes', 'type']) out = model(alpha, beta, fn, qn, gamma, q, B, b[1:-1], LL[0:itr], m, 'mlogistic') return out
def calc_model(self, B=None, lam=0, df=20, max_itr=20, cores=-1, smooth=False): """ This function identifies a regression model with phase-variability using elastic pca :param B: optional matrix describing Basis elements :param lam: regularization parameter (default 0) :param df: number of degrees of freedom B-spline (default 20) :param max_itr: maximum number of iterations (default 20) :param cores: number of cores for parallel processing (default all) """ M = self.f.shape[0] N = self.f.shape[1] if M > 500: parallel = True elif N > 100: parallel = True else: parallel = False binsize = np.diff(self.time) binsize = binsize.mean() # Create B-Spline Basis if none provided if B is None: B = bs(self.time, df=df, degree=4, include_intercept=True) Nb = B.shape[1] self.B = B # second derivative for regularization Bdiff = np.zeros((M, Nb)) for ii in range(0, Nb): Bdiff[:, ii] = np.gradient(np.gradient(B[:, ii], binsize), binsize) self.Bdiff = Bdiff self.q = uf.f_to_srsf(self.f, self.time, smooth) gamma = np.tile(np.linspace(0, 1, M), (N, 1)) gamma = gamma.transpose() itr = 1 self.SSE = np.zeros(max_itr) while itr <= max_itr: print("Iteration: %d" % itr) # align data fn = np.zeros((M, N)) qn = np.zeros((M, N)) for ii in range(0, N): fn[:, ii] = np.interp( (self.time[-1] - self.time[0]) * gamma[:, ii] + self.time[0], self.time, self.f[:, ii]) qn[:, ii] = uf.warp_q_gamma(self.time, self.q[:, ii], gamma[:, ii]) # OLS using basis Phi = np.ones((N, Nb + 1)) for ii in range(0, N): for jj in range(1, Nb + 1): Phi[ii, jj] = trapz(qn[:, ii] * B[:, jj - 1], self.time) R = np.zeros((Nb + 1, Nb + 1)) for ii in range(1, Nb + 1): for jj in range(1, Nb + 1): R[ii, jj] = trapz(Bdiff[:, ii - 1] * Bdiff[:, jj - 1], self.time) xx = np.dot(Phi.T, Phi) inv_xx = inv(xx + lam * R) xy = np.dot(Phi.T, self.y) b = np.dot(inv_xx, xy) alpha = b[0] beta = B.dot(b[1:Nb + 1]) beta = beta.reshape(M) # compute the SSE int_X = np.zeros(N) for ii in range(0, N): int_X[ii] = trapz(qn[:, ii] * beta, self.time) self.SSE[itr - 1] = sum((self.y.reshape(N) - alpha - int_X)**2) # find gamma gamma_new = np.zeros((M, N)) if parallel: out = Parallel(n_jobs=cores)(delayed(regression_warp)( beta, self.time, self.q[:, n], self.y[n], alpha) for n in range(N)) gamma_new = np.array(out) gamma_new = gamma_new.transpose() else: for ii in range(0, N): gamma_new[:, ii] = regression_warp(beta, self.time, self.q[:, ii], self.y[ii], alpha) if norm(gamma - gamma_new) < 1e-5: break else: gamma = gamma_new itr += 1 # Last Step with centering of gam gamI = uf.SqrtMeanInverse(gamma_new) gamI_dev = np.gradient(gamI, 1 / float(M - 1)) beta = np.interp((self.time[-1] - self.time[0]) * gamI + self.time[0], self.time, beta) * np.sqrt(gamI_dev) for ii in range(0, N): qn[:, ii] = np.interp( (self.time[-1] - self.time[0]) * gamI + self.time[0], self.time, qn[:, ii]) * np.sqrt(gamI_dev) fn[:, ii] = np.interp( (self.time[-1] - self.time[0]) * gamI + self.time[0], self.time, fn[:, ii]) gamma[:, ii] = np.interp( (self.time[-1] - self.time[0]) * gamI + self.time[0], self.time, gamma_new[:, ii]) self.qn = qn self.fn = fn self.gamma = gamma self.alpha = alpha self.beta = beta self.b = b[1:-1] self.SSE = self.SSE[0:itr] return
def srsf_align(self, method="mean", omethod="DP", smoothdata=False, parallel=False, lam=0.0, cores=-1): """ This function aligns a collection of functions using the elastic square-root slope (srsf) framework. :param method: (string) warp calculate Karcher Mean or Median (options = "mean" or "median") (default="mean") :param omethod: optimization method (DP, DP2) (default = DP) :param smoothdata: Smooth the data using a box filter (default = F) :param parallel: run in parallel (default = F) :param lam: controls the elasticity (default = 0) :param cores: number of cores for parallel (default = -1 (all)) :type lam: double :type smoothdata: bool Examples >>> import tables >>> fun=tables.open_file("../Data/simu_data.h5") >>> f = fun.root.f[:] >>> f = f.transpose() >>> time = fun.root.time[:] >>> obj = fs.fdawarp(f,time) >>> obj.srsf_align() """ M = self.f.shape[0] N = self.f.shape[1] self.lam = lam if M > 500: parallel = True elif N > 100: parallel = True eps = np.finfo(np.double).eps f0 = self.f self.method = omethod methods = ["mean", "median"] self.type = method # 0 mean, 1-median method = [i for i, x in enumerate(methods) if x == method] if len(method) == 0: method = 0 else: method = method[0] # Compute SRSF function from data f, g, g2 = uf.gradient_spline(self.time, self.f, smoothdata) q = g / np.sqrt(abs(g) + eps) print("Initializing...") mnq = q.mean(axis=1) a = mnq.repeat(N) d1 = a.reshape(M, N) d = (q - d1)**2 dqq = np.sqrt(d.sum(axis=0)) min_ind = dqq.argmin() mq = q[:, min_ind] mf = f[:, min_ind] if parallel: out = Parallel(n_jobs=cores)(delayed(uf.optimum_reparam)( mq, self.time, q[:, n], omethod, lam, mf[0], f[0, n]) for n in range(N)) gam = np.array(out) gam = gam.transpose() else: gam = np.zeros((M, N)) for k in range(0, N): gam[:, k] = uf.optimum_reparam(mq, self.time, q[:, k], omethod, lam, mf[0], f[0, k]) gamI = uf.SqrtMeanInverse(gam) mf = np.interp((self.time[-1] - self.time[0]) * gamI + self.time[0], self.time, mf) mq = uf.f_to_srsf(mf, self.time) # Compute Karcher Mean if method == 0: print("Compute Karcher Mean of %d function in SRSF space..." % N) if method == 1: print("Compute Karcher Median of %d function in SRSF space..." % N) MaxItr = 20 ds = np.repeat(0.0, MaxItr + 2) ds[0] = np.inf qun = np.repeat(0.0, MaxItr + 1) tmp = np.zeros((M, MaxItr + 2)) tmp[:, 0] = mq mq = tmp tmp = np.zeros((M, MaxItr + 2)) tmp[:, 0] = mf mf = tmp tmp = np.zeros((M, N, MaxItr + 2)) tmp[:, :, 0] = self.f f = tmp tmp = np.zeros((M, N, MaxItr + 2)) tmp[:, :, 0] = q q = tmp for r in range(0, MaxItr): print("updating step: r=%d" % (r + 1)) if r == (MaxItr - 1): print("maximal number of iterations is reached") # Matching Step if parallel: out = Parallel(n_jobs=cores)(delayed(uf.optimum_reparam)( mq[:, r], self.time, q[:, n, 0], omethod, lam, mf[0, r], f[0, n, 0]) for n in range(N)) gam = np.array(out) gam = gam.transpose() else: for k in range(0, N): gam[:, k] = uf.optimum_reparam(mq[:, r], self.time, q[:, k, 0], omethod, lam, mf[0, r], f[0, k, 0]) gam_dev = np.zeros((M, N)) vtil = np.zeros((M, N)) dtil = np.zeros(N) for k in range(0, N): f[:, k, r + 1] = np.interp( (self.time[-1] - self.time[0]) * gam[:, k] + self.time[0], self.time, f[:, k, 0]) q[:, k, r + 1] = uf.f_to_srsf(f[:, k, r + 1], self.time) gam_dev[:, k] = np.gradient(gam[:, k], 1 / float(M - 1)) v = q[:, k, r + 1] - mq[:, r] d = np.sqrt(trapz(v * v, self.time)) vtil[:, k] = v / d dtil[k] = 1.0 / d mqt = mq[:, r] a = mqt.repeat(N) d1 = a.reshape(M, N) d = (q[:, :, r + 1] - d1)**2 if method == 0: d1 = sum(trapz(d, self.time, axis=0)) d2 = sum(trapz((1 - np.sqrt(gam_dev))**2, self.time, axis=0)) ds_tmp = d1 + lam * d2 ds[r + 1] = ds_tmp # Minimization Step # compute the mean of the matched function qtemp = q[:, :, r + 1] ftemp = f[:, :, r + 1] mq[:, r + 1] = qtemp.mean(axis=1) mf[:, r + 1] = ftemp.mean(axis=1) qun[r] = norm(mq[:, r + 1] - mq[:, r]) / norm(mq[:, r]) if method == 1: d1 = np.sqrt(sum(trapz(d, self.time, axis=0))) d2 = sum(trapz((1 - np.sqrt(gam_dev))**2, self.time, axis=0)) ds_tmp = d1 + lam * d2 ds[r + 1] = ds_tmp # Minimization Step # compute the mean of the matched function stp = .3 vbar = vtil.sum(axis=1) * (1 / dtil.sum()) qtemp = q[:, :, r + 1] ftemp = f[:, :, r + 1] mq[:, r + 1] = mq[:, r] + stp * vbar tmp = np.zeros(M) tmp[1:] = cumtrapz(mq[:, r + 1] * np.abs(mq[:, r + 1]), self.time) mf[:, r + 1] = np.median(f0[1, :]) + tmp qun[r] = norm(mq[:, r + 1] - mq[:, r]) / norm(mq[:, r]) if qun[r] < 1e-2 or r >= MaxItr: break # Last Step with centering of gam r += 1 if parallel: out = Parallel(n_jobs=cores)(delayed(uf.optimum_reparam)( mq[:, r], self.time, q[:, n, 0], omethod, lam, mf[0, r], f[0, n, 0]) for n in range(N)) gam = np.array(out) gam = gam.transpose() else: for k in range(0, N): gam[:, k] = uf.optimum_reparam(mq[:, r], self.time, q[:, k, 0], omethod, lam, mf[0, r], f[0, k, 0]) gam_dev = np.zeros((M, N)) for k in range(0, N): gam_dev[:, k] = np.gradient(gam[:, k], 1 / float(M - 1)) gamI = uf.SqrtMeanInverse(gam) gamI_dev = np.gradient(gamI, 1 / float(M - 1)) time0 = (self.time[-1] - self.time[0]) * gamI + self.time[0] mq[:, r + 1] = np.interp(time0, self.time, mq[:, r]) * np.sqrt(gamI_dev) for k in range(0, N): q[:, k, r + 1] = np.interp(time0, self.time, q[:, k, r]) * np.sqrt(gamI_dev) f[:, k, r + 1] = np.interp(time0, self.time, f[:, k, r]) gam[:, k] = np.interp(time0, self.time, gam[:, k]) # Aligned data & stats self.fn = f[:, :, r + 1] self.qn = q[:, :, r + 1] self.q0 = q[:, :, 0] mean_f0 = f0.mean(axis=1) std_f0 = f0.std(axis=1) mean_fn = self.fn.mean(axis=1) std_fn = self.fn.std(axis=1) self.gam = gam self.mqn = mq[:, r + 1] tmp = np.zeros(M) tmp[1:] = cumtrapz(self.mqn * np.abs(self.mqn), self.time) self.fmean = np.mean(f0[1, :]) + tmp fgam = np.zeros((M, N)) for k in range(0, N): time0 = (self.time[-1] - self.time[0]) * gam[:, k] + self.time[0] fgam[:, k] = np.interp(time0, self.time, self.fmean) var_fgam = fgam.var(axis=1) self.orig_var = trapz(std_f0**2, self.time) self.amp_var = trapz(std_fn**2, self.time) self.phase_var = trapz(var_fgam, self.time) return
def predict(self, newdata=None): """ This function performs prediction on regression model on new data if available or current stored data in object Usage: obj.predict() obj.predict(newdata) :param newdata: dict containing new data for prediction (needs the keys below, if None predicts on training data) :type newdata: dict :param f: (M,N) matrix of functions :param time: vector of time points :param y: truth if available :param smooth: smooth data if needed :param sparam: number of times to run filter """ omethod = self.warp_data.method lam = self.warp_data.lam M = self.time.shape[0] if newdata != None: f = newdata['f'] time = newdata['time'] y = newdata['y'] if newdata['smooth']: sparam = newdata['sparam'] f = fs.smooth_data(f,sparam) q1 = fs.f_to_srsf(f,time) n = q1.shape[1] self.y_pred = np.zeros(n) mq = self.warp_data.mqn fn = np.zeros((M,n)) qn = np.zeros((M,n)) gam = np.zeros((M,n)) for ii in range(0,n): gam[:,ii] = uf.optimum_reparam(mq,time,q1[:,ii],omethod,lam) fn[:,ii] = uf.warp_f_gamma(time,f[:,ii],gam[:,ii]) qn[:,ii] = uf.f_to_srsf(fn[:,ii],time) U = self.pca.U no = U.shape[1] if self.pca.__class__.__name__ == 'fdajpca': m_new = np.sign(fn[self.pca.id,:])*np.sqrt(np.abs(fn[self.pca.id,:])) qn1 = np.vstack((qn, m_new)) C = self.pca.C TT = self.time.shape[0] mu_g = self.pca.mu_g mu_psi = self.pca.mu_psi vec = np.zeros((M,n)) psi = np.zeros((TT,n)) binsize = np.mean(np.diff(self.time)) for i in range(0,n): psi[:,i] = np.sqrt(np.gradient(gam[:,i],binsize)) out, theta = geo.inv_exp_map(mu_psi, psi[:,i]) vec[:,i] = out g = np.vstack((qn1, C*vec)) a = np.zeros((n,no)) for i in range(0,n): for j in range(0,no): tmp = (g[:,i]-mu_g) a[i,j] = np.dot(tmp.T, U[:,j]) elif self.pca.__class__.__name__ == 'fdavpca': m_new = np.sign(fn[self.pca.id,:])*np.sqrt(np.abs(fn[self.pca.id,:])) qn1 = np.vstack((qn, m_new)) a = np.zeros((n,no)) for i in range(0,n): for j in range(0,no): tmp = (qn1[:,i]-self.pca.mqn) a[i,j] = np.dot(tmp.T, U[:,j]) elif self.pca.__class__.__name__ == 'fdahpca': a = np.zeros((n,no)) mu_psi = self.pca.psi_mu vec = np.zeros((M,n)) TT = self.time.shape[0] psi = np.zeros((TT,n)) binsize = np.mean(np.diff(self.time)) for i in range(0,n): psi[:,i] = np.sqrt(np.gradient(gam[:,i],binsize)) out, theta = geo.inv_exp_map(mu_psi, psi[:,i]) vec[:,i] = out vm = self.pca.vec.mean(axis=1) for i in range(0,n): for j in range(0,no): a[i,j] = np.sum(np.dot(vec[:,i]-vm,U[:,j])) else: raise Exception('Invalid fPCA Method') for ii in range(0,n): self.y_pred[ii] = self.alpha + np.dot(a[ii,:],self.b) if y is None: self.SSE = np.nan else: self.SSE = np.sum((y-self.y_pred)**2) else: n = self.pca.coef.shape[0] self.y_pred = np.zeros(n) for ii in range(0,n): self.y_pred[ii] = self.alpha + np.dot(self.pca.coef[ii,:],self.b) self.SSE = np.sum((self.y-self.y_pred)**2) return
def elastic_prediction(f, time, model, y=None, smooth=False): """ This function performs prediction from an elastic regression model with phase-variablity :param f: numpy ndarray of shape (M,N) of N functions with M samples :param time: vector of size M describing the sample points :param model: indentified model from elastic_regression :param y: truth, optional used to calculate SSE :rtype: tuple of numpy array :return alpha: alpha parameter of model :return beta: beta(t) of model :return fn: aligned functions - numpy ndarray of shape (M,N) of N functions with M samples :return qn: aligned srvfs - similar structure to fn :return gamma: calculated warping functions :return q: original training SRSFs :return B: basis matrix :return b: basis coefficients :return SSE: sum of squared error """ q = uf.f_to_srsf(f, time, smooth) n = q.shape[1] if model.type == 'linear' or model.type == 'logistic': y_pred = np.zeros(n) elif model.type == 'mlogistic': m = model.n_classes y_pred = np.zeros((n, m)) for ii in range(0, n): diff = model.q - q[:, ii][:, np.newaxis] dist = np.sum(np.abs(diff)**2, axis=0)**(1. / 2) q_tmp = uf.warp_q_gamma(time, q[:, ii], model.gamma[:, dist.argmin()]) if model.type == 'linear': y_pred[ii] = model.alpha + trapz(q_tmp * model.beta, time) elif model.type == 'logistic': y_pred[ii] = model.alpha + trapz(q_tmp * model.beta, time) elif model.type == 'mlogistic': for jj in range(0, m): y_pred[ii, jj] = model.alpha[jj] + trapz( q_tmp * model.beta[:, jj], time) if y is None: if model.type == 'linear': SSE = None elif model.type == 'logistic': y_pred = phi(y_pred) y_labels = np.ones(n) y_labels[y_pred < 0.5] = -1 PC = None elif model.type == 'mlogistic': y_pred = phi(y_pred.ravel()) y_pred = y_pred.reshape(n, m) y_labels = y_pred.argmax(axis=1) + 1 PC = None else: if model.type == 'linear': SSE = sum((y - y_pred)**2) elif model.type == 'logistic': y_pred = phi(y_pred) y_labels = np.ones(n) y_labels[y_pred < 0.5] = -1 TP = sum(y[y_labels == 1] == 1) FP = sum(y[y_labels == -1] == 1) TN = sum(y[y_labels == -1] == -1) FN = sum(y[y_labels == 1] == -1) PC = (TP + TN) / float(TP + FP + FN + TN) elif model.type == 'mlogistic': y_pred = phi(y_pred.ravel()) y_pred = y_pred.reshape(n, m) y_labels = y_pred.argmax(axis=1) + 1 PC = np.zeros(m) cls_set = np.arange(1, m + 1) for ii in range(0, m): cls_sub = np.delete(cls_set, ii) TP = sum(y[y_labels == (ii + 1)] == (ii + 1)) FP = sum(y[np.in1d(y_labels, cls_sub)] == (ii + 1)) TN = sum(y[np.in1d(y_labels, cls_sub)] == y_labels[np.in1d( y_labels, cls_sub)]) FN = sum(np.in1d(y[y_labels == (ii + 1)], cls_sub)) PC[ii] = (TP + TN) / float(TP + FP + FN + TN) PC = sum(y == y_labels) / float(y_labels.size) if model.type == 'linear': prediction = collections.namedtuple('prediction', ['y_pred', 'SSE']) out = prediction(y_pred, SSE) elif model.type == 'logistic': prediction = collections.namedtuple('prediction', ['y_prob', 'y_labels', 'PC']) out = prediction(y_pred, y_labels, PC) elif model.type == 'mlogistic': prediction = collections.namedtuple('prediction', ['y_prob', 'y_labels', 'PC']) out = prediction(y_pred, y_labels, PC) return out
def elastic_mlogistic(f, y, time, B=None, df=20, max_itr=20, cores=-1, delta=.01, parallel=True, smooth=False): """ This function identifies a multinomial logistic regression model with phase-variablity using elastic methods :param f: numpy ndarray of shape (M,N) of N functions with M samples :param y: numpy array of labels {1,2,...,m} for m classes :param time: vector of size M describing the sample points :param B: optional matrix describing Basis elements :param df: number of degrees of freedom B-spline (default 20) :param max_itr: maximum number of iterations (default 20) :param cores: number of cores for parallel processing (default all) :type f: np.ndarray :type time: np.ndarray :rtype: tuple of numpy array :return alpha: alpha parameter of model :return beta: beta(t) of model :return fn: aligned functions - numpy ndarray of shape (M,N) of N functions with M samples :return qn: aligned srvfs - similar structure to fn :return gamma: calculated warping functions :return q: original training SRSFs :return B: basis matrix :return b: basis coefficients :return Loss: logistic loss """ M = f.shape[0] N = f.shape[1] # Code labels m = y.max() Y = np.zeros((N, m), dtype=int) for ii in range(0, N): Y[ii, y[ii] - 1] = 1 binsize = np.diff(time) binsize = binsize.mean() # Create B-Spline Basis if none provided if B is None: B = bs(time, df=df, degree=4, include_intercept=True) Nb = B.shape[1] q = uf.f_to_srsf(f, time, smooth) gamma = np.tile(np.linspace(0, 1, M), (N, 1)) gamma = gamma.transpose() itr = 1 LL = np.zeros(max_itr) while itr <= max_itr: print("Iteration: %d" % itr) # align data fn = np.zeros((M, N)) qn = np.zeros((M, N)) for ii in range(0, N): fn[:, ii] = np.interp( (time[-1] - time[0]) * gamma[:, ii] + time[0], time, f[:, ii]) qn[:, ii] = uf.warp_q_gamma(time, q[:, ii], gamma[:, ii]) Phi = np.ones((N, Nb + 1)) for ii in range(0, N): for jj in range(1, Nb + 1): Phi[ii, jj] = trapz(qn[:, ii] * B[:, jj - 1], time) # Find alpha and beta using l_bfgs b0 = np.zeros(m * (Nb + 1)) out = fmin_l_bfgs_b(mlogit_loss, b0, fprime=mlogit_gradient, args=(Phi, Y), pgtol=1e-10, maxiter=200, maxfun=250, factr=1e-30) b = out[0] B0 = b.reshape(Nb + 1, m) alpha = B0[0, :] beta = np.zeros((M, m)) for i in range(0, m): beta[:, i] = B.dot(B0[1:Nb + 1, i]) # compute the logistic loss LL[itr - 1] = mlogit_loss(b, Phi, Y) # find gamma gamma_new = np.zeros((M, N)) if parallel: out = Parallel(n_jobs=cores)(delayed(mlogit_warp_grad)( alpha, beta, time, q[:, n], Y[n, :], delta=delta) for n in range(N)) gamma_new = np.array(out) gamma_new = gamma_new.transpose() else: for ii in range(0, N): gamma_new[:, ii] = mlogit_warp_grad(alpha, beta, time, q[:, ii], Y[ii, :], delta=delta) if norm(gamma - gamma_new) < 1e-5: break else: gamma = gamma_new itr += 1 # Last Step with centering of gam gamma = gamma_new # gamI = uf.SqrtMeanInverse(gamma) # gamI_dev = np.gradient(gamI, 1 / float(M - 1)) # beta = np.interp((time[-1] - time[0]) * gamI + time[0], time, # beta) * np.sqrt(gamI_dev) # for ii in range(0, N): # qn[:, ii] = np.interp((time[-1] - time[0]) * gamI + time[0], # time, qn[:, ii]) * np.sqrt(gamI_dev) # fn[:, ii] = np.interp((time[-1] - time[0]) * gamI + time[0], # time, fn[:, ii]) # gamma[:, ii] = np.interp((time[-1] - time[0]) * gamI + time[0], # time, gamma[:, ii]) model = collections.namedtuple('model', [ 'alpha', 'beta', 'fn', 'qn', 'gamma', 'q', 'B', 'b', 'Loss', 'n_classes', 'type' ]) out = model(alpha, beta, fn, qn, gamma, q, B, b[1:-1], LL[0:itr], m, 'mlogistic') return out
def multiple_align_functions(self, mu, omethod="DP", smoothdata=False, parallel=False, lam=0.0, cores=-1): """ This function aligns a collection of functions using the elastic square-root slope (srsf) framework. Usage: obj.multiple_align_functions(mu) obj.multiple_align_functions(lambda) obj.multiple_align_functions(lambda, ...) :param mu: vector of function to align to :param omethod: optimization method (DP, DP2) (default = DP) :param smoothdata: Smooth the data using a box filter (default = F) :param parallel: run in parallel (default = F) :param lam: controls the elasticity (default = 0) :param cores: number of cores for parallel (default = -1 (all)) :type lam: double :type smoothdata: bool """ M = self.f.shape[0] N = self.f.shape[1] self.lam = lam if M > 500: parallel = True elif N > 100: parallel = True eps = np.finfo(np.double).eps self.method = omethod self.type = "multiple" # Compute SRSF function from data f, g, g2 = uf.gradient_spline(self.time, self.f, smoothdata) q = g / np.sqrt(abs(g) + eps) mq = uf.f_to_srsf(mu, self.time) if parallel: out = Parallel(n_jobs=cores)(delayed(uf.optimum_reparam)( mq, self.time, q[:, n], omethod, lam, mu[0], f[0, n]) for n in range(N)) gam = np.array(out) gam = gam.transpose() else: gam = np.zeros((M, N)) for k in range(0, N): gam[:, k] = uf.optimum_reparam(mq, self.time, q[:, k], omethod, lam, mu[0], f[0, k]) self.gamI = uf.SqrtMeanInverse(gam) fn = np.zeros((M, N)) qn = np.zeros((M, N)) for k in range(0, N): fn[:, k] = np.interp( (self.time[-1] - self.time[0]) * gam[:, k] + self.time[0], self.time, f[:, k]) qn[:, k] = uf.f_to_srsf(f[:, k], self.time) # Aligned data & stats self.fn = fn self.qn = qn self.q0 = q mean_f0 = f.mean(axis=1) std_f0 = f.std(axis=1) mean_fn = self.fn.mean(axis=1) std_fn = self.fn.std(axis=1) self.gam = gam self.mqn = mq self.fmean = mu fgam = np.zeros((M, N)) for k in range(0, N): time0 = (self.time[-1] - self.time[0]) * gam[:, k] + self.time[0] fgam[:, k] = np.interp(time0, self.time, self.fmean) var_fgam = fgam.var(axis=1) self.orig_var = trapz(std_f0**2, self.time) self.amp_var = trapz(std_fn**2, self.time) self.phase_var = trapz(var_fgam, self.time) return
def elastic_prediction(f, time, model, y=None, smooth=False): """ This function performs prediction from an elastic regression model with phase-variablity :param f: numpy ndarray of shape (M,N) of N functions with M samples :param time: vector of size M describing the sample points :param model: indentified model from elastic_regression :param y: truth, optional used to calculate SSE :rtype: tuple of numpy array :return alpha: alpha parameter of model :return beta: beta(t) of model :return fn: aligned functions - numpy ndarray of shape (M,N) of N functions with M samples :return qn: aligned srvfs - similar structure to fn :return gamma: calculated warping functions :return q: original training SRSFs :return B: basis matrix :return b: basis coefficients :return SSE: sum of squared error """ q = uf.f_to_srsf(f, time, smooth) n = q.shape[1] if model.type == 'linear' or model.type == 'logistic': y_pred = np.zeros(n) elif model.type == 'mlogistic': m = model.n_classes y_pred = np.zeros((n, m)) for ii in range(0, n): diff = model.q - q[:, ii][:, np.newaxis] dist = np.sum(np.abs(diff) ** 2, axis=0) ** (1. / 2) q_tmp = uf.warp_q_gamma(time, q[:, ii], model.gamma[:, dist.argmin()]) if model.type == 'linear': y_pred[ii] = model.alpha + trapz(q_tmp * model.beta, time) elif model.type == 'logistic': y_pred[ii] = model.alpha + trapz(q_tmp * model.beta, time) elif model.type == 'mlogistic': for jj in range(0, m): y_pred[ii, jj] = model.alpha[jj] + trapz(q_tmp * model.beta[:, jj], time) if y is None: if model.type == 'linear': SSE = None elif model.type == 'logistic': y_pred = phi(y_pred) y_labels = np.ones(n) y_labels[y_pred < 0.5] = -1 PC = None elif model.type == 'mlogistic': y_pred = phi(y_pred.ravel()) y_pred = y_pred.reshape(n, m) y_labels = y_pred.argmax(axis=1)+1 PC = None else: if model.type == 'linear': SSE = sum((y - y_pred) ** 2) elif model.type == 'logistic': y_pred = phi(y_pred) y_labels = np.ones(n) y_labels[y_pred < 0.5] = -1 TP = sum(y[y_labels == 1] == 1) FP = sum(y[y_labels == -1] == 1) TN = sum(y[y_labels == -1] == -1) FN = sum(y[y_labels == 1] == -1) PC = (TP+TN)/float(TP+FP+FN+TN) elif model.type == 'mlogistic': y_pred = phi(y_pred.ravel()) y_pred = y_pred.reshape(n, m) y_labels = y_pred.argmax(axis=1)+1 PC = np.zeros(m) cls_set = np.arange(1, m+1) for ii in range(0, m): cls_sub = np.delete(cls_set, ii) TP = sum(y[y_labels == (ii+1)] == (ii+1)) FP = sum(y[np.in1d(y_labels, cls_sub)] == (ii+1)) TN = sum(y[np.in1d(y_labels, cls_sub)] == y_labels[np.in1d(y_labels, cls_sub)]) FN = sum(np.in1d(y[y_labels == (ii+1)], cls_sub)) PC[ii] = (TP+TN)/float(TP+FP+FN+TN) PC = sum(y == y_labels) / float(y_labels.size) if model.type == 'linear': prediction = collections.namedtuple('prediction', ['y_pred', 'SSE']) out = prediction(y_pred, SSE) elif model.type == 'logistic': prediction = collections.namedtuple('prediction', ['y_prob', 'y_labels', 'PC']) out = prediction(y_pred, y_labels, PC) elif model.type == 'mlogistic': prediction = collections.namedtuple('prediction', ['y_prob', 'y_labels', 'PC']) out = prediction(y_pred, y_labels, PC) return out
def pairwise_align_bayes(f1i, f2i, time, mcmcopts=None): """ This function aligns two functions using Bayesian framework. It will align f2 to f1. It is based on mapping warping functions to a hypersphere, and a subsequent exponential mapping to a tangent space. In the tangent space, the Z-mixture pCN algorithm is used to explore both local and global structure in the posterior distribution. The Z-mixture pCN algorithm uses a mixture distribution for the proposal distribution, controlled by input parameter zpcn. The zpcn$betas must be between 0 and 1, and are the coefficients of the mixture components, with larger coefficients corresponding to larger shifts in parameter space. The zpcn["probs"] give the probability of each shift size. Usage: out = pairwise_align_bayes(f1i, f2i, time) out = pairwise_align_bayes(f1i, f2i, time, mcmcopts) :param f1i: vector defining M samples of function 1 :param f2i: vector defining M samples of function 2 :param time: time vector of length M :param mcmopts: dict of mcmc parameters :type mcmcopts: dict default mcmc options: tmp = {"betas":np.array([0.5,0.5,0.005,0.0001]),"probs":np.array([0.1,0.1,0.7,0.1])} mcmcopts = {"iter":2*(10**4) ,"burnin":np.minimum(5*(10**3),2*(10**4)//2), "alpha0":0.1, "beta0":0.1,"zpcn":tmp,"propvar":1, "initcoef":np.repeat(0,20), "npoints":200, "extrainfo":True} :rtype collection containing :return f2_warped: aligned f2 :return gamma: warping function :return g_coef: final g_coef :return psi: final psi :return sigma1: final sigma if extrainfo :return accept: accept of psi samples :return betas_ind :return logl: log likelihood :return gamma_mat: posterior gammas :return gamma_stats: posterior gamma stats :return xdist: phase distance posterior :return ydist: amplitude distance posterior) """ if mcmcopts is None: tmp = { "betas": np.array([0.5, 0.5, 0.005, 0.0001]), "probs": np.array([0.1, 0.1, 0.7, 0.1]) } mcmcopts = { "iter": 2 * (10**4), "burnin": np.minimum(5 * (10**3), 2 * (10**4) // 2), "alpha0": 0.1, "beta0": 0.1, "zpcn": tmp, "propvar": 1, "initcoef": np.repeat(0, 20), "npoints": 200, "extrainfo": True } if f1i.shape[0] != f2i.shape[0]: raise Exception('Length of f1 and f2 must be equal') if f1i.shape[0] != time.shape[0]: raise Exception('Length of f1 and time must be equal') if mcmcopts["zpcn"]["betas"].shape[0] != mcmcopts["zpcn"]["probs"].shape[0]: raise Exception('In zpcn, betas must equal length of probs') if np.mod(mcmcopts["initcoef"].shape[0], 2) != 0: raise Exception('Length of mcmcopts.initcoef must be even') # Number of sig figs to report in gamma_mat SIG_GAM = 13 iter = mcmcopts["iter"] # parameter settings pw_sim_global_burnin = mcmcopts["burnin"] valid_index = np.arange(pw_sim_global_burnin - 1, iter) pw_sim_global_Mg = mcmcopts["initcoef"].shape[0] // 2 g_coef_ini = mcmcopts["initcoef"] numSimPoints = mcmcopts["npoints"] pw_sim_global_domain_par = np.linspace(0, 1, numSimPoints) g_basis = uf.basis_fourier(pw_sim_global_domain_par, pw_sim_global_Mg, 1) sigma1_ini = 1 zpcn = mcmcopts["zpcn"] pw_sim_global_sigma_g = mcmcopts["propvar"] def propose_g_coef(g_coef_curr): pCN_beta = zpcn["betas"] pCN_prob = zpcn["probs"] probm = np.insert(np.cumsum(pCN_prob), 0, 0) z = np.random.rand() result = {"prop": g_coef_curr, "ind": 1} for i in range(0, pCN_beta.shape[0]): if z <= probm[i + 1] and z > probm[i]: g_coef_new = normal( 0, pw_sim_global_sigma_g / np.repeat(np.arange(1, pw_sim_global_Mg + 1), 2)) result["prop"] = np.sqrt( 1 - pCN_beta[i]**2) * g_coef_curr + pCN_beta[i] * g_coef_new result["ind"] = i return result # normalize time to [0,1] time = (time - time.min()) / (time.max() - time.min()) timet = np.linspace(0, 1, numSimPoints) f1 = uf.f_predictfunction(f1i, timet, 0) f2 = uf.f_predictfunction(f2i, timet, 0) # srsf transformation q1 = uf.f_to_srsf(f1, timet) q1i = uf.f_to_srsf(f1i, time) q2 = uf.f_to_srsf(f2, timet) tmp = uf.f_exp1(uf.f_basistofunction(g_basis["x"], 0, g_coef_ini, g_basis)) if tmp.min() < 0: raise Exception("Invalid initial value of g") # result vectors g_coef = np.zeros((iter, g_coef_ini.shape[0])) sigma1 = np.zeros(iter) logl = np.zeros(iter) SSE = np.zeros(iter) accept = np.zeros(iter, dtype=bool) accept_betas = np.zeros(iter) # init g_coef_curr = g_coef_ini sigma1_curr = sigma1_ini SSE_curr = f_SSEg_pw( uf.f_basistofunction(g_basis["x"], 0, g_coef_ini, g_basis), q1, q2) logl_curr = f_logl_pw( uf.f_basistofunction(g_basis["x"], 0, g_coef_ini, g_basis), q1, q2, sigma1_ini**2, SSE_curr) g_coef[0, :] = g_coef_ini sigma1[0] = sigma1_ini SSE[0] = SSE_curr logl[0] = logl_curr # update the chain for iter-1 times for m in tqdm(range(1, iter)): # update g g_coef_curr, tmp, SSE_curr, accepti, zpcnInd = f_updateg_pw( g_coef_curr, g_basis, sigma1_curr**2, q1, q2, SSE_curr, propose_g_coef) # update sigma1 newshape = q1.shape[0] / 2 + mcmcopts["alpha0"] newscale = 1 / 2 * SSE_curr + mcmcopts["beta0"] sigma1_curr = np.sqrt(1 / np.random.gamma(newshape, 1 / newscale)) logl_curr = f_logl_pw( uf.f_basistofunction(g_basis["x"], 0, g_coef_curr, g_basis), q1, q2, sigma1_curr**2, SSE_curr) # save updates to results g_coef[m, :] = g_coef_curr sigma1[m] = sigma1_curr SSE[m] = SSE_curr if mcmcopts["extrainfo"]: logl[m] = logl_curr accept[m] = accepti accept_betas[m] = zpcnInd # calculate posterior mean of psi pw_sim_est_psi_matrix = np.zeros((numSimPoints, valid_index.shape[0])) for k in range(0, valid_index.shape[0]): g_temp = uf.f_basistofunction(g_basis["x"], 0, g_coef[valid_index[k], :], g_basis) psi_temp = uf.f_exp1(g_temp) pw_sim_est_psi_matrix[:, k] = psi_temp result_posterior_psi_simDomain = uf.f_psimean(pw_sim_global_domain_par, pw_sim_est_psi_matrix) # resample to same number of points as the input f1 and f2 interp = interp1d(np.linspace(0, 1, result_posterior_psi_simDomain.shape[0]), result_posterior_psi_simDomain, fill_value="extrapolate") result_posterior_psi = interp(np.linspace(0, 1, f1i.shape[0])) # transform posterior mean of psi to gamma result_posterior_gamma = uf.f_phiinv(result_posterior_psi) result_posterior_gamma = uf.norm_gam(result_posterior_gamma) # warped f2 f2_warped = uf.warp_f_gamma(time, f2i, result_posterior_gamma) if mcmcopts["extrainfo"]: M, N = pw_sim_est_psi_matrix.shape gamma_mat = np.zeros((time.shape[0], N)) one_v = np.ones(M) Dx = np.zeros(N) Dy = Dx for ii in range(0, N): interp = interp1d(np.linspace( 0, 1, result_posterior_psi_simDomain.shape[0]), pw_sim_est_psi_matrix[:, ii], fill_value="extrapolate") result_i = interp(time) tmp = uf.f_phiinv(result_i) gamma_mat[:, ii] = uf.norm_gam(tmp) v, theta = geo.inv_exp_map(one_v, pw_sim_est_psi_matrix[:, ii]) Dx[ii] = np.sqrt(trapz(v**2, pw_sim_global_domain_par)) q2warp = uf.warp_q_gamma(pw_sim_global_domain_par, q2, gamma_mat[:, ii]) Dy[ii] = np.sqrt(trapz((q1i - q2warp)**2, time)) gamma_stats = uf.statsFun(gamma_mat) results_o = collections.namedtuple('align_bayes', [ 'f2_warped', 'gamma', 'g_coef', 'psi', 'sigma1', 'accept', 'betas_ind', 'logl', 'gamma_mat', 'gamma_stats', 'xdist', 'ydist' ]) out = results_o(f2_warped, result_posterior_gamma, g_coef, result_posterior_psi, sigma1, accept[1:], accept_betas[1:], logl, gamma_mat, gamma_stats, Dx, Dy) return (out)
def predict(self, newdata=None): """ This function performs prediction on regression model on new data if available or current stored data in object Usage: obj.predict() obj.predict(newdata) :param newdata: dict containing new data for prediction (needs the keys below, if None predicts on training data) :type newdata: dict :param f: (M,N) matrix of functions :param time: vector of time points :param y: truth if available :param smooth: smooth data if needed :param sparam: number of times to run filter """ if newdata != None: f = newdata['f'] time = newdata['time'] y = newdata['y'] q = uf.f_to_srsf(f, time, newdata['smooth']) n = f.shape[1] m = self.n_classes yhat = np.zeros((n, m)) for ii in range(0, n): diff = self.q - q[:, ii][:, np.newaxis] dist = np.sum(np.abs(diff)**2, axis=0)**(1. / 2) q_tmp = uf.warp_q_gamma(time, q[:, ii], self.gamma[:, dist.argmin()]) for jj in range(0, m): yhat[ii, jj] = self.alpha[jj] + trapz( q_tmp * self.beta[:, jj], time) if y is None: yhat = phi(yhat.ravel()) yhat = yhat.reshape(n, m) y_labels = yhat.argmax(axis=1) + 1 self.PC = None else: yhat = phi(yhat.ravel()) yhat = yhat.reshape(n, m) y_labels = yhat.argmax(axis=1) + 1 PC = np.zeros(m) cls_set = np.arange(1, m + 1) for ii in range(0, m): cls_sub = np.delete(cls_set, ii) TP = sum(y[y_labels == (ii + 1)] == (ii + 1)) FP = sum(y[np.in1d(y_labels, cls_sub)] == (ii + 1)) TN = sum(y[np.in1d(y_labels, cls_sub)] == y_labels[np.in1d( y_labels, cls_sub)]) FN = sum(np.in1d(y[y_labels == (ii + 1)], cls_sub)) PC[ii] = (TP + TN) / float(TP + FP + FN + TN) self.PC = sum(y == y_labels) / float(y_labels.size) self.y_pred = yhat self.y_labels = y_labels else: n = self.f.shape[1] m = self.n_classes yhat = np.zeros((n, m)) for ii in range(0, n): diff = self.q - self.q[:, ii][:, np.newaxis] dist = np.sum(np.abs(diff)**2, axis=0)**(1. / 2) q_tmp = uf.warp_q_gamma(self.time, self.q[:, ii], self.gamma[:, dist.argmin()]) for jj in range(0, m): yhat[ii, jj] = self.alpha[jj] + trapz( q_tmp * self.beta[:, jj], self.time) yhat = phi(yhat.ravel()) yhat = yhat.reshape(n, m) y_labels = yhat.argmax(axis=1) + 1 PC = np.zeros(m) cls_set = np.arange(1, m + 1) for ii in range(0, m): cls_sub = np.delete(cls_set, ii) TP = sum(self.y[y_labels == (ii + 1)] == (ii + 1)) FP = sum(self.y[np.in1d(y_labels, cls_sub)] == (ii + 1)) TN = sum(self.y[np.in1d(y_labels, cls_sub)] == y_labels[ np.in1d(y_labels, cls_sub)]) FN = sum(np.in1d(self.y[y_labels == (ii + 1)], cls_sub)) PC[ii] = (TP + TN) / float(TP + FP + FN + TN) self.PC = sum(self.y == y_labels) / float(y_labels.size) self.y_pred = yhat self.y_labels = y_labels return
def align_fPCA(f, time, num_comp=3, showplot=True, smoothdata=False, cores=-1): """ aligns a collection of functions while extracting principal components. The functions are aligned to the principal components :param f: numpy ndarray of shape (M,N) of N functions with M samples :param time: vector of size M describing the sample points :param num_comp: number of fPCA components :param showplot: Shows plots of results using matplotlib (default = T) :param smooth_data: Smooth the data using a box filter (default = F) :param cores: number of cores for parallel (default = -1 (all)) :type sparam: double :type smooth_data: bool :type f: np.ndarray :type time: np.ndarray :rtype: tuple of numpy array :return fn: aligned functions - numpy ndarray of shape (M,N) of N functions with M samples :return qn: aligned srvfs - similar structure to fn :return q0: original srvf - similar structure to fn :return mqn: srvf mean or median - vector of length M :return gam: warping functions - similar structure to fn :return q_pca: srsf principal directions :return f_pca: functional principal directions :return latent: latent values :return coef: coefficients :return U: eigenvectors :return orig_var: Original Variance of Functions :return amp_var: Amplitude Variance :return phase_var: Phase Variance """ lam = 0.0 MaxItr = 50 coef = np.arange(-2., 3.) Nstd = coef.shape[0] M = f.shape[0] N = f.shape[1] if M > 500: parallel = True elif N > 100: parallel = True else: parallel = False eps = np.finfo(np.double).eps f0 = f if showplot: plot.f_plot(time, f, title="Original Data") # Compute SRSF function from data f, g, g2 = uf.gradient_spline(time, f, smoothdata) q = g / np.sqrt(abs(g) + eps) print("Initializing...") mnq = q.mean(axis=1) a = mnq.repeat(N) d1 = a.reshape(M, N) d = (q - d1)**2 dqq = np.sqrt(d.sum(axis=0)) min_ind = dqq.argmin() print("Aligning %d functions in SRVF space to %d fPCA components..." % (N, num_comp)) itr = 0 mq = np.zeros((M, MaxItr + 1)) mq[:, itr] = q[:, min_ind] fi = np.zeros((M, N, MaxItr + 1)) fi[:, :, 0] = f qi = np.zeros((M, N, MaxItr + 1)) qi[:, :, 0] = q gam = np.zeros((M, N, MaxItr + 1)) cost = np.zeros(MaxItr + 1) while itr < MaxItr: print("updating step: r=%d" % (itr + 1)) if itr == MaxItr: print("maximal number of iterations is reached") # PCA Step a = mq[:, itr].repeat(N) d1 = a.reshape(M, N) qhat_cent = qi[:, :, itr] - d1 K = np.cov(qi[:, :, itr]) U, s, V = svd(K) alpha_i = np.zeros((num_comp, N)) for ii in range(0, num_comp): for jj in range(0, N): alpha_i[ii, jj] = trapz(qhat_cent[:, jj] * U[:, ii], time) U1 = U[:, 0:num_comp] tmp = U1.dot(alpha_i) qhat = d1 + tmp # Matching Step if parallel: out = Parallel(n_jobs=cores)( delayed(uf.optimum_reparam)(qhat[:, n], time, qi[:, n, itr], "DP", lam) for n in range(N)) gam_t = np.array(out) gam[:, :, itr] = gam_t.transpose() else: gam[:, :, itr] = uf.optimum_reparam(qhat, time, qi[:, :, itr], "DP", lam) for k in range(0, N): time0 = (time[-1] - time[0]) * gam[:, k, itr] + time[0] fi[:, k, itr + 1] = np.interp(time0, time, fi[:, k, itr]) qi[:, k, itr + 1] = uf.f_to_srsf(fi[:, k, itr + 1], time) qtemp = qi[:, :, itr + 1] mq[:, itr + 1] = qtemp.mean(axis=1) cost_temp = np.zeros(N) for ii in range(0, N): cost_temp[ii] = norm(qtemp[:, ii] - qhat[:, ii])**2 cost[itr + 1] = cost_temp.mean() if abs(cost[itr + 1] - cost[itr]) < 1e-06: break itr += 1 if itr >= MaxItr: itrf = MaxItr else: itrf = itr + 1 cost = cost[1:(itrf + 1)] # Aligned data & stats fn = fi[:, :, itrf] qn = qi[:, :, itrf] q0 = qi[:, :, 0] mean_f0 = f0.mean(axis=1) std_f0 = f0.std(axis=1) mqn = mq[:, itrf] gamf = gam[:, :, 0] for k in range(1, itr): gam_k = gam[:, :, k] for l in range(0, N): time0 = (time[-1] - time[0]) * gam_k[:, l] + time[0] gamf[:, l] = np.interp(time0, time, gamf[:, l]) # Center Mean gamI = uf.SqrtMeanInverse(gamf) gamI_dev = np.gradient(gamI, 1 / float(M - 1)) time0 = (time[-1] - time[0]) * gamI + time[0] mqn = np.interp(time0, time, mqn) * np.sqrt(gamI_dev) for k in range(0, N): qn[:, k] = np.interp(time0, time, qn[:, k]) * np.sqrt(gamI_dev) fn[:, k] = np.interp(time0, time, fn[:, k]) gamf[:, k] = np.interp(time0, time, gamf[:, k]) mean_fn = fn.mean(axis=1) std_fn = fn.std(axis=1) # Get Final PCA mididx = int(np.round(time.shape[0] / 2)) m_new = np.sign(fn[mididx, :]) * np.sqrt(np.abs(fn[mididx, :])) mqn2 = np.append(mqn, m_new.mean()) qn2 = np.vstack((qn, m_new)) K = np.cov(qn2) U, s, V = svd(K) stdS = np.sqrt(s) # compute the PCA in the q domain q_pca = np.ndarray(shape=(M + 1, Nstd, num_comp), dtype=float) for k in range(0, num_comp): for l in range(0, Nstd): q_pca[:, l, k] = mqn2 + coef[l] * stdS[k] * U[:, k] # compute the correspondence in the f domain f_pca = np.ndarray(shape=(M, Nstd, num_comp), dtype=float) for k in range(0, num_comp): for l in range(0, Nstd): q_pca_tmp = q_pca[0:M, l, k] * np.abs(q_pca[0:M, l, k]) q_pca_tmp2 = np.sign(q_pca[M, l, k]) * (q_pca[M, l, k]**2) f_pca[:, l, k] = uf.cumtrapzmid(time, q_pca_tmp, q_pca_tmp2, np.floor(time.shape[0] / 2), mididx) N2 = qn.shape[1] c = np.zeros((N2, num_comp)) for k in range(0, num_comp): for l in range(0, N2): c[l, k] = sum((np.append(qn[:, l], m_new[l]) - mqn2) * U[:, k]) if showplot: CBcdict = { 'Bl': (0, 0, 0), 'Or': (.9, .6, 0), 'SB': (.35, .7, .9), 'bG': (0, .6, .5), 'Ye': (.95, .9, .25), 'Bu': (0, .45, .7), 'Ve': (.8, .4, 0), 'rP': (.8, .6, .7), } cl = sorted(CBcdict.keys()) # Align Plots fig, ax = plot.f_plot(np.arange(0, M) / float(M - 1), gamf, title="Warping Functions") ax.set_aspect('equal') plot.f_plot(time, fn, title="Warped Data") tmp = np.array([mean_f0, mean_f0 + std_f0, mean_f0 - std_f0]) tmp = tmp.transpose() plot.f_plot(time, tmp, title=r"Original Data: Mean $\pm$ STD") tmp = np.array([mean_fn, mean_fn + std_fn, mean_fn - std_fn]) tmp = tmp.transpose() plot.f_plot(time, tmp, title=r"Warped Data: Mean $\pm$ STD") # PCA Plots fig, ax = plt.subplots(2, num_comp) for k in range(0, num_comp): axt = ax[0, k] for l in range(0, Nstd): axt.plot(time, q_pca[0:M, l, k], color=CBcdict[cl[l]]) axt.hold(True) axt.set_title('q domain: PD %d' % (k + 1)) plot.rstyle(axt) axt = ax[1, k] for l in range(0, Nstd): axt.plot(time, f_pca[:, l, k], color=CBcdict[cl[l]]) axt.hold(True) axt.set_title('f domain: PD %d' % (k + 1)) plot.rstyle(axt) fig.set_tight_layout(True) cumm_coef = 100 * np.cumsum(s) / sum(s) idx = np.arange(0, M + 1) + 1 plot.f_plot(idx, cumm_coef, "Coefficient Cumulative Percentage") plt.xlabel("Percentage") plt.ylabel("Index") plt.show() mean_f0 = f0.mean(axis=1) std_f0 = f0.std(axis=1) mean_fn = fn.mean(axis=1) std_fn = fn.std(axis=1) tmp = np.zeros(M) tmp[1:] = cumtrapz(mqn * np.abs(mqn), time) fmean = np.mean(f0[1, :]) + tmp fgam = np.zeros((M, N)) for k in range(0, N): time0 = (time[-1] - time[0]) * gamf[:, k] + time[0] fgam[:, k] = np.interp(time0, time, fmean) var_fgam = fgam.var(axis=1) orig_var = trapz(std_f0**2, time) amp_var = trapz(std_fn**2, time) phase_var = trapz(var_fgam, time) K = np.cov(fn) U, s, V = svd(K) align_fPCAresults = collections.namedtuple('align_fPCA', [ 'fn', 'qn', 'q0', 'mqn', 'gam', 'q_pca', 'f_pca', 'latent', 'coef', 'U', 'orig_var', 'amp_var', 'phase_var', 'cost' ]) out = align_fPCAresults(fn, qn, q0, mqn, gamf, q_pca, f_pca, s, c, U, orig_var, amp_var, phase_var, cost) return out
def elastic_regression(f, y, time, B=None, lam=0, df=20, max_itr=20, cores=-1, smooth=False): """ This function identifies a regression model with phase-variablity using elastic methods :param f: numpy ndarray of shape (M,N) of N functions with M samples :param y: numpy array of N responses :param time: vector of size M describing the sample points :param B: optional matrix describing Basis elements :param lam: regularization parameter (default 0) :param df: number of degrees of freedom B-spline (default 20) :param max_itr: maximum number of iterations (default 20) :param cores: number of cores for parallel processing (default all) :type f: np.ndarray :type time: np.ndarray :rtype: tuple of numpy array :return alpha: alpha parameter of model :return beta: beta(t) of model :return fn: aligned functions - numpy ndarray of shape (M,N) of M functions with N samples :return qn: aligned srvfs - similar structure to fn :return gamma: calculated warping functions :return q: original training SRSFs :return B: basis matrix :return b: basis coefficients :return SSE: sum of squared error """ M = f.shape[0] N = f.shape[1] if M > 500: parallel = True elif N > 100: parallel = True else: parallel = False binsize = np.diff(time) binsize = binsize.mean() # Create B-Spline Basis if none provided if B is None: B = bs(time, df=df, degree=4, include_intercept=True) Nb = B.shape[1] # second derivative for regularization Bdiff = np.zeros((M, Nb)) for ii in range(0, Nb): Bdiff[:, ii] = np.gradient(np.gradient(B[:, ii], binsize), binsize) q = uf.f_to_srsf(f, time, smooth) gamma = np.tile(np.linspace(0, 1, M), (N, 1)) gamma = gamma.transpose() itr = 1 SSE = np.zeros(max_itr) while itr <= max_itr: print("Iteration: %d" % itr) # align data fn = np.zeros((M, N)) qn = np.zeros((M, N)) for ii in range(0, N): fn[:, ii] = np.interp((time[-1] - time[0]) * gamma[:, ii] + time[0], time, f[:, ii]) qn[:, ii] = uf.warp_q_gamma(time, q[:, ii], gamma[:, ii]) # OLS using basis Phi = np.ones((N, Nb+1)) for ii in range(0, N): for jj in range(1, Nb+1): Phi[ii, jj] = trapz(qn[:, ii] * B[:, jj-1], time) R = np.zeros((Nb+1, Nb+1)) for ii in range(1, Nb+1): for jj in range(1, Nb+1): R[ii, jj] = trapz(Bdiff[:, ii-1] * Bdiff[:, jj-1], time) xx = dot(Phi.T, Phi) inv_xx = inv(xx + lam * R) xy = dot(Phi.T, y) b = dot(inv_xx, xy) alpha = b[0] beta = B.dot(b[1:Nb+1]) beta = beta.reshape(M) # compute the SSE int_X = np.zeros(N) for ii in range(0, N): int_X[ii] = trapz(qn[:, ii] * beta, time) SSE[itr - 1] = sum((y.reshape(N) - alpha - int_X) ** 2) # find gamma gamma_new = np.zeros((M, N)) if parallel: out = Parallel(n_jobs=cores)(delayed(regression_warp)(beta, time, q[:, n], y[n], alpha) for n in range(N)) gamma_new = np.array(out) gamma_new = gamma_new.transpose() else: for ii in range(0, N): gamma_new[:, ii] = regression_warp(beta, time, q[:, ii], y[ii], alpha) if norm(gamma - gamma_new) < 1e-5: break else: gamma = gamma_new itr += 1 # Last Step with centering of gam gamI = uf.SqrtMeanInverse(gamma_new) gamI_dev = np.gradient(gamI, 1 / float(M - 1)) beta = np.interp((time[-1] - time[0]) * gamI + time[0], time, beta) * np.sqrt(gamI_dev) for ii in range(0, N): qn[:, ii] = np.interp((time[-1] - time[0]) * gamI + time[0], time, qn[:, ii]) * np.sqrt(gamI_dev) fn[:, ii] = np.interp((time[-1] - time[0]) * gamI + time[0], time, fn[:, ii]) gamma[:, ii] = np.interp((time[-1] - time[0]) * gamI + time[0], time, gamma_new[:, ii]) model = collections.namedtuple('model', ['alpha', 'beta', 'fn', 'qn', 'gamma', 'q', 'B', 'b', 'SSE', 'type']) out = model(alpha, beta, fn, qn, gamma, q, B, b[1:-1], SSE[0:itr], 'linear') return out