def SqrtMean(gam): """ calculates the srsf of warping functions with corresponding shooting vectors :param gam: numpy ndarray of shape (M,N) of M warping functions with N samples :rtype: 2 numpy ndarray and vector :return mu: Karcher mean psi function :return gam_mu: vector of dim N which is the Karcher mean warping function :return psi: numpy ndarray of shape (M,N) of M SRSF of the warping functions :return vec: numpy ndarray of shape (M,N) of M shooting vectors """ (T,n) = gam.shape time = linspace(0,1,T) binsize = mean(diff(time)) psi = zeros((T, n)) for k in range(0, n): psi[:, k] = sqrt(gradient(gam[:, k],binsize)) # Find Direction mnpsi = psi.mean(axis=1) a = mnpsi.repeat(n) d1 = a.reshape(T, n) d = (psi - d1) ** 2 dqq = sqrt(d.sum(axis=0)) min_ind = dqq.argmin() mu = psi[:, min_ind] maxiter = 501 tt = 1 lvm = zeros(maxiter) vec = zeros((T, n)) stp = .3 itr = 0 for i in range(0,n): out, theta = geo.inv_exp_map(mu,psi[:,i]) vec[:,i] = out vbar = vec.mean(axis=1) lvm[itr] = geo.L2norm(vbar) while (lvm[itr] > 0.00000001) and (itr<maxiter): mu = geo.exp_map(mu, stp*vbar) itr += 1 for i in range(0,n): out, theta = geo.inv_exp_map(mu,psi[:,i]) vec[:,i] = out vbar = vec.mean(axis=1) lvm[itr] = geo.L2norm(vbar) gam_mu = cumtrapz(mu*mu, time, initial=0) gam_mu = (gam_mu - gam_mu.min()) / (gam_mu.max() - gam_mu.min()) return mu, gam_mu, psi, vec
def SqrtMedian(gam): """ calculates the median srsf of warping functions with corresponding shooting vectors :param gam: numpy ndarray of shape (M,N) of M warping functions with N samples :rtype: 2 numpy ndarray and vector :return gam_median: Karcher median warping function :return psi_meidan: vector of dim N which is the Karcher median srsf function :return psi: numpy ndarray of shape (M,N) of M SRSF of the warping functions :return vec: numpy ndarray of shape (M,N) of M shooting vectors """ (T, n) = gam.shape time = linspace(0, 1, T) # Initialization psi_median = ones(T) r = 1 stp = 0.3 maxiter = 501 vbar_norm = zeros(maxiter + 1) # compute psi function binsize = mean(diff(time)) psi = zeros((T, n)) v = zeros((T, n)) vtil = zeros((T, n)) d = zeros(n) dtil = zeros(n) for k in range(0, n): psi[:, k] = sqrt(gradient(gam[:, k], binsize)) v[:, k], d[k] = geo.inv_exp_map(psi_median, psi[:, k]) vtil[:, k] = v[:, k] / d[k] dtil[k] = 1 / d[k] vbar = vtil.sum(axis=1) * dtil.sum()**(-1) vbar_norm[r] = geo.L2norm(vbar) # compute phase median by iterative algorithm while (vbar_norm[r] > 0.00000001) and (r < maxiter): psi_median = geo.exp_map(psi_median, stp * vbar) r += 1 for k in range(0, n): v[:, k], tmp = geo.inv_exp_map(psi_median, psi[:, k]) d[k] = arccos(geo.inner_product(psi_median, psi[:, k])) vtil[:, k] = v[:, k] / d[k] dtil[k] = 1 / d[k] vbar = vtil.sum(axis=1) * dtil.sum()**(-1) vbar_norm[r] = geo.L2norm(vbar) vec = v gam_median = cumtrapz(psi_median**2, time, initial=0.0) return gam_median, psi_median, psi, vec
def SqrtMeanInverse(gam): """ finds the inverse of the mean of the set of the diffeomorphisms gamma :param gam: numpy ndarray of shape (M,N) of M warping functions with N samples :rtype: vector :return gamI: inverse of gam """ (T,n) = gam.shape time = linspace(0,1,T) binsize = mean(diff(time)) psi = zeros((T, n)) for k in range(0, n): psi[:, k] = sqrt(gradient(gam[:, k],binsize)) # Find Direction mnpsi = psi.mean(axis=1) a = mnpsi.repeat(n) d1 = a.reshape(T, n) d = (psi - d1) ** 2 dqq = sqrt(d.sum(axis=0)) min_ind = dqq.argmin() mu = psi[:, min_ind] maxiter = 501 tt = 1 lvm = zeros(maxiter) vec = zeros((T, n)) stp = .3 itr = 0 for i in range(0,n): out, theta = geo.inv_exp_map(mu,psi[:,i]) vec[:,i] = out vbar = vec.mean(axis=1) lvm[itr] = geo.L2norm(vbar) while (lvm[itr] > 0.00000001) and (itr<maxiter): mu = geo.exp_map(mu, stp*vbar) itr += 1 for i in range(0,n): out, theta = geo.inv_exp_map(mu,psi[:,i]) vec[:,i] = out vbar = vec.mean(axis=1) lvm[itr] = geo.L2norm(vbar) gam_mu = cumtrapz(mu*mu, time, initial=0) gam_mu = (gam_mu - gam_mu.min()) / (gam_mu.max() - gam_mu.min()) gamI = invertGamma(gam_mu) return gamI
def pairwise_align_bayes(f1i, f2i, time, mcmcopts=None): """ This function aligns two functions using Bayesian framework. It will align f2 to f1. It is based on mapping warping functions to a hypersphere, and a subsequent exponential mapping to a tangent space. In the tangent space, the Z-mixture pCN algorithm is used to explore both local and global structure in the posterior distribution. The Z-mixture pCN algorithm uses a mixture distribution for the proposal distribution, controlled by input parameter zpcn. The zpcn$betas must be between 0 and 1, and are the coefficients of the mixture components, with larger coefficients corresponding to larger shifts in parameter space. The zpcn["probs"] give the probability of each shift size. Usage: out = pairwise_align_bayes(f1i, f2i, time) out = pairwise_align_bayes(f1i, f2i, time, mcmcopts) :param f1i: vector defining M samples of function 1 :param f2i: vector defining M samples of function 2 :param time: time vector of length M :param mcmopts: dict of mcmc parameters :type mcmcopts: dict default mcmc options: tmp = {"betas":np.array([0.5,0.5,0.005,0.0001]),"probs":np.array([0.1,0.1,0.7,0.1])} mcmcopts = {"iter":2*(10**4) ,"burnin":np.minimum(5*(10**3),2*(10**4)//2), "alpha0":0.1, "beta0":0.1,"zpcn":tmp,"propvar":1, "initcoef":np.repeat(0,20), "npoints":200, "extrainfo":True} :rtype collection containing :return f2_warped: aligned f2 :return gamma: warping function :return g_coef: final g_coef :return psi: final psi :return sigma1: final sigma if extrainfo :return accept: accept of psi samples :return betas_ind :return logl: log likelihood :return gamma_mat: posterior gammas :return gamma_stats: posterior gamma stats :return xdist: phase distance posterior :return ydist: amplitude distance posterior) """ if mcmcopts is None: tmp = { "betas": np.array([0.5, 0.5, 0.005, 0.0001]), "probs": np.array([0.1, 0.1, 0.7, 0.1]) } mcmcopts = { "iter": 2 * (10**4), "burnin": np.minimum(5 * (10**3), 2 * (10**4) // 2), "alpha0": 0.1, "beta0": 0.1, "zpcn": tmp, "propvar": 1, "initcoef": np.repeat(0, 20), "npoints": 200, "extrainfo": True } if f1i.shape[0] != f2i.shape[0]: raise Exception('Length of f1 and f2 must be equal') if f1i.shape[0] != time.shape[0]: raise Exception('Length of f1 and time must be equal') if mcmcopts["zpcn"]["betas"].shape[0] != mcmcopts["zpcn"]["probs"].shape[0]: raise Exception('In zpcn, betas must equal length of probs') if np.mod(mcmcopts["initcoef"].shape[0], 2) != 0: raise Exception('Length of mcmcopts.initcoef must be even') # Number of sig figs to report in gamma_mat SIG_GAM = 13 iter = mcmcopts["iter"] # parameter settings pw_sim_global_burnin = mcmcopts["burnin"] valid_index = np.arange(pw_sim_global_burnin - 1, iter) pw_sim_global_Mg = mcmcopts["initcoef"].shape[0] // 2 g_coef_ini = mcmcopts["initcoef"] numSimPoints = mcmcopts["npoints"] pw_sim_global_domain_par = np.linspace(0, 1, numSimPoints) g_basis = uf.basis_fourier(pw_sim_global_domain_par, pw_sim_global_Mg, 1) sigma1_ini = 1 zpcn = mcmcopts["zpcn"] pw_sim_global_sigma_g = mcmcopts["propvar"] def propose_g_coef(g_coef_curr): pCN_beta = zpcn["betas"] pCN_prob = zpcn["probs"] probm = np.insert(np.cumsum(pCN_prob), 0, 0) z = np.random.rand() result = {"prop": g_coef_curr, "ind": 1} for i in range(0, pCN_beta.shape[0]): if z <= probm[i + 1] and z > probm[i]: g_coef_new = normal( 0, pw_sim_global_sigma_g / np.repeat(np.arange(1, pw_sim_global_Mg + 1), 2)) result["prop"] = np.sqrt( 1 - pCN_beta[i]**2) * g_coef_curr + pCN_beta[i] * g_coef_new result["ind"] = i return result # normalize time to [0,1] time = (time - time.min()) / (time.max() - time.min()) timet = np.linspace(0, 1, numSimPoints) f1 = uf.f_predictfunction(f1i, timet, 0) f2 = uf.f_predictfunction(f2i, timet, 0) # srsf transformation q1 = uf.f_to_srsf(f1, timet) q1i = uf.f_to_srsf(f1i, time) q2 = uf.f_to_srsf(f2, timet) tmp = uf.f_exp1(uf.f_basistofunction(g_basis["x"], 0, g_coef_ini, g_basis)) if tmp.min() < 0: raise Exception("Invalid initial value of g") # result vectors g_coef = np.zeros((iter, g_coef_ini.shape[0])) sigma1 = np.zeros(iter) logl = np.zeros(iter) SSE = np.zeros(iter) accept = np.zeros(iter, dtype=bool) accept_betas = np.zeros(iter) # init g_coef_curr = g_coef_ini sigma1_curr = sigma1_ini SSE_curr = f_SSEg_pw( uf.f_basistofunction(g_basis["x"], 0, g_coef_ini, g_basis), q1, q2) logl_curr = f_logl_pw( uf.f_basistofunction(g_basis["x"], 0, g_coef_ini, g_basis), q1, q2, sigma1_ini**2, SSE_curr) g_coef[0, :] = g_coef_ini sigma1[0] = sigma1_ini SSE[0] = SSE_curr logl[0] = logl_curr # update the chain for iter-1 times for m in tqdm(range(1, iter)): # update g g_coef_curr, tmp, SSE_curr, accepti, zpcnInd = f_updateg_pw( g_coef_curr, g_basis, sigma1_curr**2, q1, q2, SSE_curr, propose_g_coef) # update sigma1 newshape = q1.shape[0] / 2 + mcmcopts["alpha0"] newscale = 1 / 2 * SSE_curr + mcmcopts["beta0"] sigma1_curr = np.sqrt(1 / np.random.gamma(newshape, 1 / newscale)) logl_curr = f_logl_pw( uf.f_basistofunction(g_basis["x"], 0, g_coef_curr, g_basis), q1, q2, sigma1_curr**2, SSE_curr) # save updates to results g_coef[m, :] = g_coef_curr sigma1[m] = sigma1_curr SSE[m] = SSE_curr if mcmcopts["extrainfo"]: logl[m] = logl_curr accept[m] = accepti accept_betas[m] = zpcnInd # calculate posterior mean of psi pw_sim_est_psi_matrix = np.zeros((numSimPoints, valid_index.shape[0])) for k in range(0, valid_index.shape[0]): g_temp = uf.f_basistofunction(g_basis["x"], 0, g_coef[valid_index[k], :], g_basis) psi_temp = uf.f_exp1(g_temp) pw_sim_est_psi_matrix[:, k] = psi_temp result_posterior_psi_simDomain = uf.f_psimean(pw_sim_global_domain_par, pw_sim_est_psi_matrix) # resample to same number of points as the input f1 and f2 interp = interp1d(np.linspace(0, 1, result_posterior_psi_simDomain.shape[0]), result_posterior_psi_simDomain, fill_value="extrapolate") result_posterior_psi = interp(np.linspace(0, 1, f1i.shape[0])) # transform posterior mean of psi to gamma result_posterior_gamma = uf.f_phiinv(result_posterior_psi) result_posterior_gamma = uf.norm_gam(result_posterior_gamma) # warped f2 f2_warped = uf.warp_f_gamma(time, f2i, result_posterior_gamma) if mcmcopts["extrainfo"]: M, N = pw_sim_est_psi_matrix.shape gamma_mat = np.zeros((time.shape[0], N)) one_v = np.ones(M) Dx = np.zeros(N) Dy = Dx for ii in range(0, N): interp = interp1d(np.linspace( 0, 1, result_posterior_psi_simDomain.shape[0]), pw_sim_est_psi_matrix[:, ii], fill_value="extrapolate") result_i = interp(time) tmp = uf.f_phiinv(result_i) gamma_mat[:, ii] = uf.norm_gam(tmp) v, theta = geo.inv_exp_map(one_v, pw_sim_est_psi_matrix[:, ii]) Dx[ii] = np.sqrt(trapz(v**2, pw_sim_global_domain_par)) q2warp = uf.warp_q_gamma(pw_sim_global_domain_par, q2, gamma_mat[:, ii]) Dy[ii] = np.sqrt(trapz((q1i - q2warp)**2, time)) gamma_stats = uf.statsFun(gamma_mat) results_o = collections.namedtuple('align_bayes', [ 'f2_warped', 'gamma', 'g_coef', 'psi', 'sigma1', 'accept', 'betas_ind', 'logl', 'gamma_mat', 'gamma_stats', 'xdist', 'ydist' ]) out = results_o(f2_warped, result_posterior_gamma, g_coef, result_posterior_psi, sigma1, accept[1:], accept_betas[1:], logl, gamma_mat, gamma_stats, Dx, Dy) return (out)
def predict(self, newdata=None): """ This function performs prediction on regression model on new data if available or current stored data in object Usage: obj.predict() obj.predict(newdata) :param newdata: dict containing new data for prediction (needs the keys below, if None predicts on training data) :type newdata: dict :param f: (M,N) matrix of functions :param time: vector of time points :param y: truth if available :param smooth: smooth data if needed :param sparam: number of times to run filter """ omethod = self.warp_data.method lam = self.warp_data.lam m = self.n_classes M = self.time.shape[0] if newdata != None: f = newdata['f'] time = newdata['time'] y = newdata['y'] sparam = newdata['sparam'] if newdata['smooth']: f = fs.smooth_data(f, sparam) q1 = fs.f_to_srsf(f, time) n = q1.shape[1] self.y_pred = np.zeros((n, m)) mq = self.warp_data.mqn fn = np.zeros((M, n)) qn = np.zeros((M, n)) gam = np.zeros((M, n)) for ii in range(0, n): gam[:, ii] = uf.optimum_reparam(mq, time, q1[:, ii], omethod) fn[:, ii] = uf.warp_f_gamma(time, f[:, ii], gam[:, ii]) qn[:, ii] = uf.f_to_srsf(fn[:, ii], time) m_new = np.sign(fn[self.pca.id, :]) * np.sqrt( np.abs(fn[self.pca.id, :])) qn1 = np.vstack((qn, m_new)) U = self.pca.U no = U.shape[1] if self.pca.__class__.__name__ == 'fdajpca': C = self.pca.C TT = self.time.shape[0] mu_g = self.pca.mu_g mu_psi = self.pca.mu_psi vec = np.zeros((M, n)) psi = np.zeros((TT, n)) binsize = np.mean(np.diff(self.time)) for i in range(0, n): psi[:, i] = np.sqrt(np.gradient(gam[:, i], binsize)) vec[:, i] = geo.inv_exp_map(mu_psi, psi[:, i]) g = np.vstack((qn1, C * vec)) a = np.zeros((n, no)) for i in range(0, n): for j in range(0, no): tmp = (g[:, i] - mu_g) a[i, j] = dot(tmp.T, U[:, j]) elif self.pca.__class__.__name__ == 'fdavpca': a = np.zeros((n, no)) for i in range(0, n): for j in range(0, no): tmp = (qn1[:, i] - self.pca.mqn) a[i, j] = dot(tmp.T, U[:, j]) elif self.pca.__class__.__name__ == 'fdahpca': a = np.zeros((n, no)) mu_psi = self.pca.psi_mu vec = np.zeros((M, n)) TT = self.time.shape[0] psi = np.zeros((TT, n)) binsize = np.mean(np.diff(self.time)) for i in range(0, n): psi[:, i] = np.sqrt(np.gradient(gam[:, i], binsize)) vec[:, i] = geo.inv_exp_map(mu_psi, psi[:, i]) vm = self.pca.vec.mean(axis=1) for i in range(0, n): for j in range(0, no): a[i, j] = np.sum(dot(vec[:, i] - vm, U[:, j])) else: raise Exception('Invalid fPCA Method') for ii in range(0, n): for jj in range(0, m): self.y_pred[ii, jj] = self.alpha[jj] + np.sum( a[ii, :] * self.b[:, jj]) if y == None: self.y_pred = rg.phi(self.y_pred.reshape((1, n * m))) self.y_pred = self.y_pred.reshape((n, m)) self.y_labels = np.argmax(self.y_pred, axis=1) self.PC = np.nan else: self.y_pred = rg.phi(self.y_pred.reshape((1, n * m))) self.y_pred = self.y_pred.reshape((n, m)) self.y_labels = np.argmax(self.y_pred, axis=1) self.PC = np.zeros(m) cls_set = np.arange(0, m) for ii in range(0, m): cls_sub = np.setdiff1d(cls_set, ii) TP = np.sum(y[self.y_labels == ii] == ii) FP = np.sum(y[np.in1d(self.y_labels, cls_sub)] == ii) TN = np.sum(y[np.in1d(self.y_labels, cls_sub)] == self.y_labels[np.in1d(self.y_labels, cls_sub)]) FN = np.sum(np.in1d(y[self.y_labels == ii], cls_sub)) self.PC[ii] = (TP + TN) / (TP + FP + FN + TN) self.PCo = np.sum(y == self.y_labels) / self.y_labels.shape[0] else: n = self.pca.coef.shape[1] self.y_pred = np.zeros((n, m)) for ii in range(0, n): for jj in range(0, m): self.y_pred[ii, jj] = self.alpha[jj] + np.sum( self.pca.coef[ii, :] * self.b[:, jj]) self.y_pred = rg.phi(self.y_pred.reshape((1, n * m))) self.y_pred = self.y_pred.reshape((n, m)) self.y_labels = np.argmax(self.y_pred, axis=1) self.PC = np.zeros(m) cls_set = np.arange(0, m) for ii in range(0, m): cls_sub = np.setdiff1d(cls_set, ii) TP = np.sum(self.y[self.y_labels == ii] == ii) FP = np.sum(self.y[np.in1d(self.y_labels, cls_sub)] == ii) TN = np.sum(self.y[np.in1d(self.y_labels, cls_sub)] == self.y_labels[np.in1d(self.y_labels, cls_sub)]) FN = np.sum(np.in1d(y[self.y_labels == ii], cls_sub)) self.PC[ii] = (TP + TN) / (TP + FP + FN + TN) self.PCo = np.sum(y == self.y_labels) / self.y_labels.shape[0] return
def construct_boxplot(self, alpha=.05, k_a=1): """ This function constructs phase boxplot for functional data using the elastic square-root slope (srsf) framework. :param alpha: quantile value (e.g.,=.05, i.e., 95\%) :param k_a: scalar for outlier cutoff (e.g.,=1) """ if self.warp_data.rsamps: gam = self.warp_data.gams else: gam = self.warp_data.gam M, N = gam.shape t = np.linspace(0, 1, M) time = t lam = 0.5 # compute phase median median_x, psi_median, psi, vec = uf.SqrtMedian(gam) # compute phase distances dx = np.zeros(N) v = np.zeros((M, N)) for k in range(0, N): v[:, k], d = geo.inv_exp_map(psi_median, psi[:, k]) dx[k] = np.sqrt(trapz(v[:, k]**2, t)) dx_ordering = dx.argsort() CR_50 = dx_ordering[0:np.ceil(N / 2).astype('int')] tmp = dx[CR_50] m = tmp.max() # identify phase quartiles angle = np.zeros((CR_50.shape[0], CR_50.shape[0])) energy = np.zeros((CR_50.shape[0], CR_50.shape[0])) for i in range(0, CR_50.shape[0] - 1): for j in range(i + 1, CR_50.shape[0]): q1 = v[:, CR_50[i]] q3 = v[:, CR_50[j]] q1 /= np.sqrt(trapz(q1**2, time)) q3 /= np.sqrt(trapz(q3**2, time)) angle[i, j] = trapz(q1 * q3, time) energy[i, j] = (1 - lam) * (dx[CR_50[i]] / m + dx[CR_50[j]] / m) - lam * (angle[i, j] + 1) maxloc = energy.argmax() maxloc_row, maxloc_col = np.unravel_index(maxloc, energy.shape) Q1_index = CR_50[maxloc_row] Q3_index = CR_50[maxloc_col] Q1 = gam[:, Q1_index] Q3 = gam[:, Q3_index] Q1_psi = np.sqrt(np.gradient(Q1, 1 / (M - 1))) Q3_psi = np.sqrt(np.gradient(Q3, 1 / (M - 1))) # identify phase quantiles dx_ordering = dx.argsort() CR_alpha = dx_ordering[0:np.round(N * (1 - alpha)).astype('int')] tmp = dx[CR_alpha] m = tmp.max() angle = np.zeros((CR_alpha.shape[0], CR_alpha.shape[0])) energy = np.zeros((CR_alpha.shape[0], CR_alpha.shape[0])) for i in range(0, CR_alpha.shape[0] - 1): for j in range(i + 1, CR_alpha.shape[0]): q1 = v[:, CR_alpha[i]] q3 = v[:, CR_alpha[j]] q1 /= np.sqrt(trapz(q1**2, time)) q3 /= np.sqrt(trapz(q3**2, time)) angle[i, j] = trapz(q1 * q3, time) energy[i, j] = (1 - lam) * (dx[CR_alpha[i]] / m + dx[CR_alpha[j]] / m) - lam * (angle[i, j] + 1) maxloc = energy.argmax() maxloc_row, maxloc_col = np.unravel_index(maxloc, energy.shape) Q1a_index = CR_alpha[maxloc_row] Q3a_index = CR_alpha[maxloc_col] Q1a = gam[:, Q1a_index] Q3a = gam[:, Q3a_index] Q1a_psi = np.sqrt(np.gradient(Q1a, 1 / (M - 1))) Q3a_psi = np.sqrt(np.gradient(Q3a, 1 / (M - 1))) # check quartile and quantile going in same direction tst = trapz(v[:, Q1a_index] * v[:, Q1_index]) if tst < 0: Q1a = gam[:, Q3a_index] Q3a = gam[:, Q1a_index] # compute phase whiskers IQR = dx[Q1_index] + dx[Q3_index] v1 = v[:, Q3a_index] v3 = v[:, Q3a_index] upper_v = v3 + k_a * IQR * v3 / np.sqrt(trapz(v3**2, time)) lower_v = v1 + k_a * IQR * v1 / np.sqrt(trapz(v1**2, time)) upper_dis = np.sqrt(trapz(v3**2, time)) lower_dis = np.sqrt(trapz(v1**2, time)) whisker_dis = max(upper_dis, lower_dis) # identify phase outliers outlier_index = np.array([]) for i in range(0, N): if dx[dx_ordering[N - 1 - i]] > whisker_dis: outlier_index = np.append(outlier_index, dx_ordering[N + 1 - i]) # identify phase extremes distance_to_upper = np.full(N, np.inf) distance_to_lower = np.full(N, np.inf) out_50_CR = np.setdiff1d(np.arange(0, N), outlier_index) for i in range(0, out_50_CR.shape[0]): j = out_50_CR[i] distance_to_upper[j] = np.sqrt(trapz((upper_v - v[:, j])**2, time)) distance_to_lower[j] = np.sqrt(trapz((lower_v - v[:, j])**2, time)) max_index = distance_to_upper.argmin() min_index = distance_to_lower.argmin() minn = gam[:, min_index] maxx = gam[:, max_index] min_psi = psi[:, min_index] max_psi = psi[:, max_index] s = np.linspace(0, 1, 100) Fs2 = np.zeros((time.shape[0], 595)) Fs2[:, 0] = (1 - s[0]) * (minn - t) + s[0] * (Q1 - t) for j in range(1, 100): Fs2[:, j] = (1 - s[j]) * (minn - t) + s[j] * (Q1a - t) Fs2[:, 98 + j] = (1 - s[j]) * (Q1a - t) + s[j] * (Q1 - t) Fs2[:, 197 + j] = (1 - s[j]) * (Q1 - t) + s[j] * (median_x - t) Fs2[:, 296 + j] = (1 - s[j]) * (median_x - t) + s[j] * (Q3 - t) Fs2[:, 395 + j] = (1 - s[j]) * (Q3 - t) + s[j] * (Q3a - t) Fs2[:, 494 + j] = (1 - s[j]) * (Q3a - t) + s[j] * (maxx - t) d1 = np.sqrt(trapz(psi_median * Q1_psi, time)) d1a = np.sqrt(trapz(Q1_psi * Q1a_psi, time)) dl = np.sqrt(trapz(Q1a_psi * min_psi, time)) d3 = np.sqrt(trapz((psi_median * Q3_psi), time)) d3a = np.sqrt(trapz((Q3_psi * Q3a_psi), time)) du = np.sqrt(trapz((Q3a_psi * max_psi), time)) part1 = np.linspace(-d1 - d1a - dl, -d1 - d1a, 100) part2 = np.linspace(-d1 - d1a, -d1, 100) part3 = np.linspace(-d1, 0, 100) part4 = np.linspace(0, d3, 100) part5 = np.linspace(d3, d3 + d3a, 100) part6 = np.linspace(d3 + d3a, d3 + d3a + du, 100) allparts = np.hstack((part1, part2[1:100], part3[1:100], part4[1:100], part5[1:100], part6[1:100])) U, V = np.meshgrid(time, allparts) U = np.transpose(U) V = np.transpose(V) self.Q1 = Q1 self.Q3 = Q3 self.Q1a = Q1a self.Q3a = Q3a self.minn = minn self.maxx = maxx self.outlier_index = outlier_index self.median_x = median_x self.psi_media = psi_median plt = collections.namedtuple('plt', [ 'U', 'V', 'Fs2', 'allparts', 'd1', 'd1a', 'dl', 'd3', 'd3a', 'du', 'Q1_psi', 'Q3_psi' ]) self.plt = plt(U, V, Fs2, allparts, d1, d1a, dl, d3, d3a, du, Q1a_psi, Q3a_psi) return
def predict(self, newdata=None): """ This function performs prediction on regression model on new data if available or current stored data in object Usage: obj.predict() obj.predict(newdata) :param newdata: dict containing new data for prediction (needs the keys below, if None predicts on training data) :type newdata: dict :param f: (M,N) matrix of functions :param time: vector of time points :param y: truth if available :param smooth: smooth data if needed :param sparam: number of times to run filter """ omethod = self.warp_data.method lam = self.warp_data.lam M = self.time.shape[0] if newdata != None: f = newdata['f'] time = newdata['time'] y = newdata['y'] if newdata['smooth']: sparam = newdata['sparam'] f = fs.smooth_data(f,sparam) q1 = fs.f_to_srsf(f,time) n = q1.shape[1] self.y_pred = np.zeros(n) mq = self.warp_data.mqn fn = np.zeros((M,n)) qn = np.zeros((M,n)) gam = np.zeros((M,n)) for ii in range(0,n): gam[:,ii] = uf.optimum_reparam(mq,time,q1[:,ii],omethod,lam) fn[:,ii] = uf.warp_f_gamma(time,f[:,ii],gam[:,ii]) qn[:,ii] = uf.f_to_srsf(fn[:,ii],time) U = self.pca.U no = U.shape[1] if self.pca.__class__.__name__ == 'fdajpca': m_new = np.sign(fn[self.pca.id,:])*np.sqrt(np.abs(fn[self.pca.id,:])) qn1 = np.vstack((qn, m_new)) C = self.pca.C TT = self.time.shape[0] mu_g = self.pca.mu_g mu_psi = self.pca.mu_psi vec = np.zeros((M,n)) psi = np.zeros((TT,n)) binsize = np.mean(np.diff(self.time)) for i in range(0,n): psi[:,i] = np.sqrt(np.gradient(gam[:,i],binsize)) out, theta = geo.inv_exp_map(mu_psi, psi[:,i]) vec[:,i] = out g = np.vstack((qn1, C*vec)) a = np.zeros((n,no)) for i in range(0,n): for j in range(0,no): tmp = (g[:,i]-mu_g) a[i,j] = np.dot(tmp.T, U[:,j]) elif self.pca.__class__.__name__ == 'fdavpca': m_new = np.sign(fn[self.pca.id,:])*np.sqrt(np.abs(fn[self.pca.id,:])) qn1 = np.vstack((qn, m_new)) a = np.zeros((n,no)) for i in range(0,n): for j in range(0,no): tmp = (qn1[:,i]-self.pca.mqn) a[i,j] = np.dot(tmp.T, U[:,j]) elif self.pca.__class__.__name__ == 'fdahpca': a = np.zeros((n,no)) mu_psi = self.pca.psi_mu vec = np.zeros((M,n)) TT = self.time.shape[0] psi = np.zeros((TT,n)) binsize = np.mean(np.diff(self.time)) for i in range(0,n): psi[:,i] = np.sqrt(np.gradient(gam[:,i],binsize)) out, theta = geo.inv_exp_map(mu_psi, psi[:,i]) vec[:,i] = out vm = self.pca.vec.mean(axis=1) for i in range(0,n): for j in range(0,no): a[i,j] = np.sum(np.dot(vec[:,i]-vm,U[:,j])) else: raise Exception('Invalid fPCA Method') for ii in range(0,n): self.y_pred[ii] = self.alpha + np.dot(a[ii,:],self.b) if y is None: self.SSE = np.nan else: self.SSE = np.sum((y-self.y_pred)**2) else: n = self.pca.coef.shape[0] self.y_pred = np.zeros(n) for ii in range(0,n): self.y_pred[ii] = self.alpha + np.dot(self.pca.coef[ii,:],self.b) self.SSE = np.sum((self.y-self.y_pred)**2) return
def inv_exp_map_sub(mu, psi): out, theta = geo.inv_exp_map(mu, psi) return out