def tedpca(catd, OCcatd, combmode, mask, t2s, t2sG, stabilize, ref_img, tes, kdaw, rdaw, ste=0, wvpca=False): """ Use principal components analysis (PCA) to identify and remove thermal noise from multi-echo data. Parameters ---------- catd : (S x E x T) array_like Input functional data OCcatd : (S x T) array_like Optimally-combined time series data combmode : {'t2s', 'ste'} str How optimal combination of echos should be made, where 't2s' indicates using the method of Posse 1999 and 'ste' indicates using the method of Poser 2006 mask : (S,) array_like Boolean mask array stabilize : :obj:`bool` Whether to attempt to stabilize convergence of ICA by returning dimensionally-reduced data from PCA and component selection. ref_img : :obj:`str` or img_like Reference image to dictate how outputs are saved to disk tes : :obj:`list` List of echo times associated with `catd`, in milliseconds kdaw : :obj:`float` Dimensionality augmentation weight for Kappa calculations rdaw : :obj:`float` Dimensionality augmentation weight for Rho calculations ste : :obj:`int` or :obj:`list` of :obj:`int`, optional Which echos to use in PCA. Values -1 and 0 are special, where a value of -1 will indicate using all the echos and 0 will indicate using the optimal combination of the echos. A list can be provided to indicate a subset of echos. Default: 0 wvpca : :obj:`bool`, optional Whether to apply wavelet denoising to data. Default: False Returns ------- n_components : :obj:`int` Number of components retained from PCA decomposition dd : (S x T) :obj:`numpy.ndarray` Dimensionally reduced optimally combined functional data Notes ----- ====================== ================================================= Notation Meaning ====================== ================================================= :math:`\\kappa` Component pseudo-F statistic for TE-dependent (BOLD) model. :math:`\\rho` Component pseudo-F statistic for TE-independent (artifact) model. :math:`v` Voxel :math:`V` Total number of voxels in mask :math:`\\zeta` Something :math:`c` Component :math:`p` Something else ====================== ================================================= Steps: 1. Variance normalize either multi-echo or optimally combined data, depending on settings. 2. Decompose normalized data using PCA or SVD. 3. Compute :math:`{\\kappa}` and :math:`{\\rho}`: .. math:: {\\kappa}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,R_2^*}}{\sum {\\zeta}_{c,v}^p} {\\rho}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,S_0}}{\sum {\\zeta}_{c,v}^p} 4. Some other stuff. Something about elbows. 5. Classify components as thermal noise if they meet both of the following criteria: - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`. - Nonsignificant variance explained. Outputs: This function writes out several files: ====================== ================================================= Filename Content ====================== ================================================= pcastate.pkl Values from PCA results. comp_table_pca.txt PCA component table. mepca_mix.1D PCA mixing matrix. ====================== ================================================= """ n_samp, n_echos, n_vols = catd.shape ste = np.array([int(ee) for ee in str(ste).split(',')]) if len(ste) == 1 and ste[0] == -1: LGR.info('Computing PCA of optimally combined multi-echo data') d = OCcatd[utils.make_min_mask(OCcatd[:, np.newaxis, :])][:, np.newaxis, :] elif len(ste) == 1 and ste[0] == 0: LGR.info('Computing PCA of spatially concatenated multi-echo data') d = catd[mask].astype('float64') else: LGR.info('Computing PCA of echo #%s' % ','.join([str(ee) for ee in ste])) d = np.stack([catd[mask, ee] for ee in ste - 1], axis=1).astype('float64') eim = np.squeeze(eimask(d)) d = np.squeeze(d[eim]) dz = ((d.T - d.T.mean(axis=0)) / d.T.std(axis=0)).T # var normalize ts dz = (dz - dz.mean()) / dz.std() # var normalize everything if wvpca: dz, cAl = dwtmat(dz) if not op.exists('pcastate.pkl'): voxel_comp_weights, varex, comp_ts = run_svd(dz) # actual variance explained (normalized) varex_norm = varex / varex.sum() eigenvalue_elbow = getelbow(varex_norm, return_val=True) diff_varex_norm = np.abs(np.diff(varex_norm)) lower_diff_varex_norm = diff_varex_norm[(len(diff_varex_norm) // 2):] varex_norm_thr = np.mean( [lower_diff_varex_norm.max(), diff_varex_norm.min()]) varex_norm_min = varex_norm[ (len(diff_varex_norm) // 2) + np.arange(len(lower_diff_varex_norm)) [lower_diff_varex_norm >= varex_norm_thr][0] + 1] varex_norm_cum = np.cumsum(varex_norm) # Compute K and Rho for PCA comps eimum = np.atleast_2d(eim) eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1]) eimum = eimum.prod(axis=1) o = np.zeros((mask.shape[0], *eimum.shape[1:])) o[mask] = eimum eimum = np.squeeze(o).astype(bool) vTmix = comp_ts.T vTmixN = ((vTmix.T - vTmix.T.mean(0)) / vTmix.T.std(0)).T LGR.info('Making initial component selection guess from PCA results') _, ct_df, betasv, v_T = model.fitmodels_direct(catd, comp_ts.T, eimum, t2s, t2sG, tes, combmode, ref_img, mmixN=vTmixN, full_sel=False) # varex_norm overrides normalized varex computed by fitmodels_direct ct_df['normalized variance explained'] = varex_norm # Save state fname = op.abspath('pcastate.pkl') LGR.info('Saving PCA results to: {}'.format(fname)) pcastate = { 'voxel_comp_weights': voxel_comp_weights, 'varex': varex, 'comp_ts': comp_ts, 'comptable': ct_df, 'eigenvalue_elbow': eigenvalue_elbow, 'varex_norm_min': varex_norm_min, 'varex_norm_cum': varex_norm_cum } try: with open(fname, 'wb') as handle: pickle.dump(pcastate, handle) except TypeError: LGR.warning('Could not save PCA solution') else: # if loading existing state LGR.info('Loading PCA from: pcastate.pkl') with open('pcastate.pkl', 'rb') as handle: pcastate = pickle.load(handle) voxel_comp_weights, varex = pcastate['voxel_comp_weights'], pcastate[ 'varex'] comp_ts = pcastate['comp_ts'] ct_df = pcastate['comptable'] eigenvalue_elbow = pcastate['eigenvalue_elbow'] varex_norm_min = pcastate['varex_norm_min'] varex_norm_cum = pcastate['varex_norm_cum'] np.savetxt('mepca_mix.1D', comp_ts.T) # write component maps to 4D image comp_maps = np.zeros((OCcatd.shape[0], comp_ts.shape[0])) for i_comp in range(comp_ts.shape[0]): temp_comp_ts = comp_ts[i_comp, :][:, None] comp_map = utils.unmask( model.computefeats2(OCcatd, temp_comp_ts, mask), mask) comp_maps[:, i_comp] = np.squeeze(comp_map) io.filewrite(comp_maps, 'mepca_OC_components.nii', ref_img) fmin, fmid, fmax = utils.getfbounds(n_echos) kappa_thr = np.average(sorted( [fmin, getelbow(ct_df['kappa'], return_val=True) / 2, fmid]), weights=[kdaw, 1, 1]) rho_thr = np.average(sorted( [fmin, getelbow_cons(ct_df['rho'], return_val=True) / 2, fmid]), weights=[rdaw, 1, 1]) if int(kdaw) == -1: lim_idx = utils.andb([ct_df['kappa'] < fmid, ct_df['kappa'] > fmin]) == 2 kappa_lim = ct_df.loc[lim_idx, 'kappa'].values kappa_thr = kappa_lim[getelbow(kappa_lim)] lim_idx = utils.andb([ct_df['rho'] < fmid, ct_df['rho'] > fmin]) == 2 rho_lim = ct_df.loc[lim_idx, 'rho'].values rho_thr = rho_lim[getelbow(rho_lim)] stabilize = True elif int(rdaw) == -1: lim_idx = utils.andb([ct_df['rho'] < fmid, ct_df['rho'] > fmin]) == 2 rho_lim = ct_df.loc[lim_idx, 'rho'].values rho_thr = rho_lim[getelbow(rho_lim)] # Add new columns to comptable for classification ct_df['classification'] = 'accepted' ct_df['rationale'] = '' # Reject if low Kappa, Rho, and variance explained is_lowk = ct_df['kappa'] <= kappa_thr is_lowr = ct_df['rho'] <= rho_thr is_lowe = ct_df['normalized variance explained'] <= eigenvalue_elbow is_lowkre = is_lowk & is_lowr & is_lowe ct_df.loc[is_lowkre, 'classification'] = 'rejected' ct_df.loc[is_lowkre, 'rationale'] += 'low rho, kappa, and varex;' # Reject if low variance explained is_lows = ct_df['normalized variance explained'] <= varex_norm_min ct_df.loc[is_lows, 'classification'] = 'rejected' ct_df.loc[is_lows, 'rationale'] += 'low variance explained;' # Reject if Kappa over limit is_fmax1 = ct_df['kappa'] == F_MAX ct_df.loc[is_fmax1, 'classification'] = 'rejected' ct_df.loc[is_fmax1, 'rationale'] += 'kappa equals fmax;' # Reject if Rho over limit is_fmax2 = ct_df['rho'] == F_MAX ct_df.loc[is_fmax2, 'classification'] = 'rejected' ct_df.loc[is_fmax2, 'rationale'] += 'rho equals fmax;' if stabilize: temp7 = varex_norm_cum >= 0.95 ct_df.loc[temp7, 'classification'] = 'rejected' ct_df.loc[temp7, 'rationale'] += 'cumulative var. explained above 95%;' under_fmin1 = ct_df['kappa'] <= fmin ct_df.loc[under_fmin1, 'classification'] = 'rejected' ct_df.loc[under_fmin1, 'rationale'] += 'kappa below fmin;' under_fmin2 = ct_df['rho'] <= fmin ct_df.loc[under_fmin2, 'classification'] = 'rejected' ct_df.loc[under_fmin2, 'rationale'] += 'rho below fmin;' ct_df.to_csv('comp_table_pca.txt', sep='\t', index=True, index_label='component', float_format='%.6f') sel_idx = ct_df['classification'] == 'accepted' n_components = np.sum(sel_idx) voxel_kept_comp_weighted = (voxel_comp_weights[:, sel_idx] * varex[None, sel_idx]) kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[sel_idx, :]) if wvpca: kept_data = idwtmat(kept_data, cAl) LGR.info('Selected {0} components with Kappa threshold: {1:.02f}, ' 'Rho threshold: {2:.02f}'.format(n_components, kappa_thr, rho_thr)) kept_data = stats.zscore(kept_data, axis=1) # variance normalize timeseries kept_data = stats.zscore(kept_data, axis=None) # variance normalize everything return n_components, kept_data
def tedpca(catd, OCcatd, combmode, mask, t2s, t2sG, stabilize, ref_img, tes, kdaw, rdaw, ste=0, mlepca=True): """ Use principal components analysis (PCA) to identify and remove thermal noise from multi-echo data. Parameters ---------- catd : (S x E x T) array_like Input functional data OCcatd : (S x T) array_like Optimally-combined time series data combmode : {'t2s', 'ste'} str How optimal combination of echos should be made, where 't2s' indicates using the method of Posse 1999 and 'ste' indicates using the method of Poser 2006 mask : (S,) array_like Boolean mask array stabilize : bool Whether to attempt to stabilize convergence of ICA by returning dimensionally-reduced data from PCA and component selection. ref_img : str or img_like Reference image to dictate how outputs are saved to disk tes : list List of echo times associated with `catd`, in milliseconds kdaw : float Dimensionality augmentation weight for Kappa calculations rdaw : float Dimensionality augmentation weight for Rho calculations ste : int or list-of-int, optional Which echos to use in PCA. Values -1 and 0 are special, where a value of -1 will indicate using all the echos and 0 will indicate using the optimal combination of the echos. A list can be provided to indicate a subset of echos. Default: 0 mlepca : bool, optional Whether to use the method originally explained in Minka, NIPS 2000 for guessing PCA dimensionality instead of a traditional SVD. Default: True Returns ------- n_components : int Number of components retained from PCA decomposition dd : (S x E x T) :obj:`numpy.ndarray` Dimensionally-reduced functional data Notes ----- ====================== ================================================= Notation Meaning ====================== ================================================= :math:`\\kappa` Component pseudo-F statistic for TE-dependent (BOLD) model. :math:`\\rho` Component pseudo-F statistic for TE-independent (artifact) model. :math:`v` Voxel :math:`V` Total number of voxels in mask :math:`\\zeta` Something :math:`c` Component :math:`p` Something else ====================== ================================================= Steps: 1. Variance normalize either multi-echo or optimally combined data, depending on settings. 2. Decompose normalized data using PCA or SVD. 3. Compute :math:`{\\kappa}` and :math:`{\\rho}`: .. math:: {\\kappa}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,R_2^*}}{\sum {\\zeta}_{c,v}^p} {\\rho}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,S_0}}{\sum {\\zeta}_{c,v}^p} 4. Some other stuff. Something about elbows. 5. Classify components as thermal noise if they meet both of the following criteria: - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`. - Nonsignificant variance explained. """ n_samp, n_echos, n_vols = catd.shape ste = np.array([int(ee) for ee in str(ste).split(',')]) if len(ste) == 1 and ste[0] == -1: LGR.info('Computing PCA of optimally combined multi-echo data') d = OCcatd[utils.make_min_mask(OCcatd[:, np.newaxis, :])][:, np.newaxis, :] elif len(ste) == 1 and ste[0] == 0: LGR.info('Computing PCA of spatially concatenated multi-echo data') d = catd[mask].astype('float64') else: LGR.info('Computing PCA of echo #%s' % ','.join([str(ee) for ee in ste])) d = np.stack([catd[mask, ee] for ee in ste - 1], axis=1).astype('float64') eim = np.squeeze(eimask(d)) d = np.squeeze(d[eim]) dz = ((d.T - d.T.mean(axis=0)) / d.T.std(axis=0)).T # var normalize ts dz = (dz - dz.mean()) / dz.std() # var normalize everything if not op.exists('pcastate.pkl'): # do PC dimension selection and get eigenvalue cutoff if mlepca: from sklearn.decomposition import PCA ppca = PCA(n_components='mle', svd_solver='full') ppca.fit(dz) v = ppca.components_ s = ppca.explained_variance_ u = np.dot(np.dot(dz, v.T), np.diag(1. / s)) else: u, s, v = np.linalg.svd(dz, full_matrices=0) # actual variance explained (normalized) sp = s / s.sum() eigelb = getelbow_mod(sp, val=True) spdif = np.abs(np.diff(sp)) spdifh = spdif[(len(spdif)//2):] spdthr = np.mean([spdifh.max(), spdif.min()]) spmin = sp[(len(spdif)//2) + np.arange(len(spdifh))[spdifh >= spdthr][0] + 1] spcum = np.cumsum(sp) # Compute K and Rho for PCA comps eimum = np.atleast_2d(eim) eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1]) eimum = eimum.prod(axis=1) o = np.zeros((mask.shape[0], *eimum.shape[1:])) o[mask] = eimum eimum = np.squeeze(o).astype(bool) vTmix = v.T vTmixN = ((vTmix.T - vTmix.T.mean(0)) / vTmix.T.std(0)).T LGR.info('Making initial component selection guess from PCA results') _, ctb, betasv, v_T = model.fitmodels_direct(catd, v.T, eimum, t2s, t2sG, tes, combmode, ref_img, mmixN=vTmixN, full_sel=False) ctb = ctb[ctb[:, 0].argsort(), :] ctb = np.vstack([ctb.T[:3], sp]).T # Save state fname = op.abspath('pcastate.pkl') LGR.info('Saving PCA results to: {}'.format(fname)) pcastate = {'u': u, 's': s, 'v': v, 'ctb': ctb, 'eigelb': eigelb, 'spmin': spmin, 'spcum': spcum} try: with open(fname, 'wb') as handle: pickle.dump(pcastate, handle) except TypeError: LGR.warning('Could not save PCA solution') else: # if loading existing state LGR.info('Loading PCA from: {}'.format('pcastate.pkl')) with open('pcastate.pkl', 'rb') as handle: pcastate = pickle.load(handle) u, s, v = pcastate['u'], pcastate['s'], pcastate['v'] ctb, eigelb = pcastate['ctb'], pcastate['eigelb'] spmin, spcum = pcastate['spmin'], pcastate['spcum'] np.savetxt('comp_table_pca.txt', ctb[ctb[:, 1].argsort(), :][::-1]) np.savetxt('mepca_mix.1D', v[ctb[:, 1].argsort()[::-1], :].T) kappas = ctb[ctb[:, 1].argsort(), 1] rhos = ctb[ctb[:, 2].argsort(), 2] fmin, fmid, fmax = utils.getfbounds(n_echos) kappa_thr = np.average(sorted([fmin, getelbow_mod(kappas, val=True)/2, fmid]), weights=[kdaw, 1, 1]) rho_thr = np.average(sorted([fmin, getelbow_cons(rhos, val=True)/2, fmid]), weights=[rdaw, 1, 1]) if int(kdaw) == -1: kappas_lim = kappas[utils.andb([kappas < fmid, kappas > fmin]) == 2] kappa_thr = kappas_lim[getelbow_mod(kappas_lim)] rhos_lim = rhos[utils.andb([rhos < fmid, rhos > fmin]) == 2] rho_thr = rhos_lim[getelbow_mod(rhos_lim)] stabilize = True if int(kdaw) != -1 and int(rdaw) == -1: rhos_lim = rhos[utils.andb([rhos < fmid, rhos > fmin]) == 2] rho_thr = rhos_lim[getelbow_mod(rhos_lim)] is_hik = np.array(ctb[:, 1] > kappa_thr, dtype=np.int) is_hir = np.array(ctb[:, 2] > rho_thr, dtype=np.int) is_hie = np.array(ctb[:, 3] > eigelb, dtype=np.int) is_his = np.array(ctb[:, 3] > spmin, dtype=np.int) is_not_fmax1 = np.array(ctb[:, 1] != F_MAX, dtype=np.int) is_not_fmax2 = np.array(ctb[:, 2] != F_MAX, dtype=np.int) pcscore = (is_hik + is_hir + is_hie) * is_his * is_not_fmax1 * is_not_fmax2 if stabilize: temp7 = np.array(spcum < 0.95, dtype=np.int) temp8 = np.array(ctb[:, 2] > fmin, dtype=np.int) temp9 = np.array(ctb[:, 1] > fmin, dtype=np.int) pcscore = pcscore * temp7 * temp8 * temp9 pcsel = pcscore > 0 dd = u.dot(np.diag(s*np.array(pcsel, dtype=np.int))).dot(v) n_components = s[pcsel].shape[0] LGR.info('Selected {0} components with Kappa threshold: {1:.02f}, ' 'Rho threshold: {2:.02f}'.format(n_components, kappa_thr, rho_thr)) dd = stats.zscore(dd.T, axis=0).T # variance normalize timeseries dd = stats.zscore(dd, axis=None) # variance normalize everything return n_components, dd
def tedpca(data_cat, data_oc, combmode, mask, t2s, t2sG, ref_img, tes, algorithm='mle', source_tes=-1, kdaw=10., rdaw=1., out_dir='.', verbose=False, low_mem=False): """ Use principal components analysis (PCA) to identify and remove thermal noise from multi-echo data. Parameters ---------- data_cat : (S x E x T) array_like Input functional data data_oc : (S x T) array_like Optimally combined time series data combmode : {'t2s', 'paid'} str How optimal combination of echos should be made, where 't2s' indicates using the method of Posse 1999 and 'paid' indicates using the method of Poser 2006 mask : (S,) array_like Boolean mask array t2s : (S,) array_like Map of voxel-wise T2* estimates. t2sG : (S,) array_like Map of voxel-wise T2* estimates. ref_img : :obj:`str` or img_like Reference image to dictate how outputs are saved to disk tes : :obj:`list` List of echo times associated with `data_cat`, in milliseconds algorithm : {'mle', 'kundu', 'kundu-stabilize'}, optional Method with which to select components in TEDPCA. Default is 'mle'. source_tes : :obj:`int` or :obj:`list` of :obj:`int`, optional Which echos to use in PCA. Values -1 and 0 are special, where a value of -1 will indicate using the optimal combination of the echos and 0 will indicate using all the echos. A list can be provided to indicate a subset of echos. Default: -1 kdaw : :obj:`float`, optional Dimensionality augmentation weight for Kappa calculations. Must be a non-negative float, or -1 (a special value). Default is 10. rdaw : :obj:`float`, optional Dimensionality augmentation weight for Rho calculations. Must be a non-negative float, or -1 (a special value). Default is 1. out_dir : :obj:`str`, optional Output directory. verbose : :obj:`bool`, optional Whether to output files from fitmodels_direct or not. Default: False low_mem : :obj:`bool`, optional Whether to use incremental PCA (for low-memory systems) or not. Default: False Returns ------- kept_data : (S x T) :obj:`numpy.ndarray` Dimensionally reduced optimally combined functional data n_components : :obj:`int` Number of components retained from PCA decomposition Notes ----- ====================== ================================================= Notation Meaning ====================== ================================================= :math:`\\kappa` Component pseudo-F statistic for TE-dependent (BOLD) model. :math:`\\rho` Component pseudo-F statistic for TE-independent (artifact) model. :math:`v` Voxel :math:`V` Total number of voxels in mask :math:`\\zeta` Something :math:`c` Component :math:`p` Something else ====================== ================================================= Steps: 1. Variance normalize either multi-echo or optimally combined data, depending on settings. 2. Decompose normalized data using PCA or SVD. 3. Compute :math:`{\\kappa}` and :math:`{\\rho}`: .. math:: {\\kappa}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,R_2^*}}{\\sum {\\zeta}_{c,v}^p} {\\rho}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,S_0}}{\\sum {\\zeta}_{c,v}^p} 4. Some other stuff. Something about elbows. 5. Classify components as thermal noise if they meet both of the following criteria: - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`. - Nonsignificant variance explained. Outputs: This function writes out several files: ====================== ================================================= Filename Content ====================== ================================================= pcastate.pkl Values from PCA results. comp_table_pca.txt PCA component table. mepca_mix.1D PCA mixing matrix. ====================== ================================================= """ if low_mem and algorithm == 'mle': LGR.warning('Low memory option is not compatible with MLE ' 'dimensionality estimation. Switching to Kundu decision ' 'tree.') algorithm = 'kundu' n_samp, n_echos, n_vols = data_cat.shape source_tes = np.array([int(ee) for ee in str(source_tes).split(',')]) if len(source_tes) == 1 and source_tes[0] == -1: LGR.info('Computing PCA of optimally combined multi-echo data') data = data_oc[mask, :][:, np.newaxis, :] elif len(source_tes) == 1 and source_tes[0] == 0: LGR.info('Computing PCA of spatially concatenated multi-echo data') data = data_cat[mask, ...] else: LGR.info('Computing PCA of echo #{0}'.format(','.join([str(ee) for ee in source_tes]))) data = np.stack([data_cat[mask, ee, :] for ee in source_tes - 1], axis=1) eim = np.squeeze(eimask(data)) data = np.squeeze(data[eim]) data_z = ((data.T - data.T.mean(axis=0)) / data.T.std(axis=0)).T # var normalize ts data_z = (data_z - data_z.mean()) / data_z.std() # var normalize everything if algorithm == 'mle': voxel_comp_weights, varex, varex_norm, comp_ts = run_mlepca(data_z) elif low_mem: voxel_comp_weights, varex, comp_ts = low_mem_pca(data_z) varex_norm = varex / varex.sum() else: ppca = PCA(copy=False, n_components=(n_vols - 1)) ppca.fit(data_z) comp_ts = ppca.components_.T varex = ppca.explained_variance_ voxel_comp_weights = np.dot(np.dot(data_z, comp_ts), np.diag(1. / varex)) varex_norm = varex / varex.sum() # Compute Kappa and Rho for PCA comps eimum = np.atleast_2d(eim) eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1]) eimum = eimum.prod(axis=1) o = np.zeros((mask.shape[0], *eimum.shape[1:])) o[mask, ...] = eimum eimum = np.squeeze(o).astype(bool) # Normalize each component's time series vTmixN = stats.zscore(comp_ts, axis=0) comptable, _, _, _ = metrics.dependence_metrics( data_cat, data_oc, comp_ts, t2s, tes, ref_img, reindex=False, mmixN=vTmixN, algorithm=None, label='mepca_', out_dir=out_dir, verbose=verbose) # varex_norm from PCA overrides varex_norm from dependence_metrics, # but we retain the original comptable['estimated normalized variance explained'] = \ comptable['normalized variance explained'] comptable['normalized variance explained'] = varex_norm np.savetxt('mepca_mix.1D', comp_ts) # write component maps to 4D image comp_maps = np.zeros((data_oc.shape[0], comp_ts.shape[1])) for i_comp in range(comp_ts.shape[1]): temp_comp_ts = comp_ts[:, i_comp][:, None] comp_map = utils.unmask(computefeats2(data_oc, temp_comp_ts, mask), mask) comp_maps[:, i_comp] = np.squeeze(comp_map) io.filewrite(comp_maps, 'mepca_OC_components.nii', ref_img) # Select components using decision tree if algorithm == 'kundu': comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=False) elif algorithm == 'kundu-stabilize': comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=True) elif algorithm == 'mle': LGR.info('Selected {0} components with MLE dimensionality ' 'detection'.format(comptable.shape[0])) comptable['classification'] = 'accepted' comptable['rationale'] = '' comptable.to_csv('comp_table_pca.txt', sep='\t', index=True, index_label='component', float_format='%.6f') acc = comptable[comptable.classification == 'accepted'].index.values n_components = acc.size voxel_kept_comp_weighted = (voxel_comp_weights[:, acc] * varex[None, acc]) kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[:, acc].T) kept_data = stats.zscore(kept_data, axis=1) # variance normalize time series kept_data = stats.zscore(kept_data, axis=None) # variance normalize everything return kept_data, n_components
def tedpca(data_cat, data_oc, combmode, mask, t2s, t2sG, ref_img, tes, algorithm='mdl', source_tes=-1, kdaw=10., rdaw=1., out_dir='.', verbose=False, low_mem=False): """ Use principal components analysis (PCA) to identify and remove thermal noise from multi-echo data. Parameters ---------- data_cat : (S x E x T) array_like Input functional data data_oc : (S x T) array_like Optimally combined time series data combmode : {'t2s', 'paid'} str How optimal combination of echos should be made, where 't2s' indicates using the method of Posse 1999 and 'paid' indicates using the method of Poser 2006 mask : (S,) array_like Boolean mask array t2s : (S,) array_like Map of voxel-wise T2* estimates. t2sG : (S,) array_like Map of voxel-wise T2* estimates. ref_img : :obj:`str` or img_like Reference image to dictate how outputs are saved to disk tes : :obj:`list` List of echo times associated with `data_cat`, in milliseconds algorithm : {'mle', 'kundu', 'kundu-stabilize', 'mdl', 'aic', 'kic'}, optional Method with which to select components in TEDPCA. Default is 'mdl'. PCA decomposition with the mdl, kic and aic options are based on a Moving Average (stationary Gaussian) process and are ordered from most to least aggresive. See (Li et al., 2007). source_tes : :obj:`int` or :obj:`list` of :obj:`int`, optional Which echos to use in PCA. Values -1 and 0 are special, where a value of -1 will indicate using the optimal combination of the echos and 0 will indicate using all the echos. A list can be provided to indicate a subset of echos. Default: -1 kdaw : :obj:`float`, optional Dimensionality augmentation weight for Kappa calculations. Must be a non-negative float, or -1 (a special value). Default is 10. rdaw : :obj:`float`, optional Dimensionality augmentation weight for Rho calculations. Must be a non-negative float, or -1 (a special value). Default is 1. out_dir : :obj:`str`, optional Output directory. verbose : :obj:`bool`, optional Whether to output files from fitmodels_direct or not. Default: False low_mem : :obj:`bool`, optional Whether to use incremental PCA (for low-memory systems) or not. Default: False Returns ------- kept_data : (S x T) :obj:`numpy.ndarray` Dimensionally reduced optimally combined functional data n_components : :obj:`int` Number of components retained from PCA decomposition Notes ----- ====================== ================================================= Notation Meaning ====================== ================================================= :math:`\\kappa` Component pseudo-F statistic for TE-dependent (BOLD) model. :math:`\\rho` Component pseudo-F statistic for TE-independent (artifact) model. :math:`v` Voxel :math:`V` Total number of voxels in mask :math:`\\zeta` Something :math:`c` Component :math:`p` Something else ====================== ================================================= Steps: 1. Variance normalize either multi-echo or optimally combined data, depending on settings. 2. Decompose normalized data using PCA or SVD. 3. Compute :math:`{\\kappa}` and :math:`{\\rho}`: .. math:: {\\kappa}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,R_2^*}}{\\sum {\\zeta}_{c,v}^p} {\\rho}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,S_0}}{\\sum {\\zeta}_{c,v}^p} 4. Some other stuff. Something about elbows. 5. Classify components as thermal noise if they meet both of the following criteria: - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`. - Nonsignificant variance explained. Outputs: This function writes out several files: ====================== ================================================= Filename Content ====================== ================================================= pca_decomposition.json PCA component table. pca_mixing.tsv PCA mixing matrix. pca_components.nii.gz Component weight maps. ====================== ================================================= """ if low_mem and algorithm == 'mle': LGR.warning('Low memory option is not compatible with MLE ' 'dimensionality estimation. Switching to Kundu decision ' 'tree.') algorithm = 'kundu' if algorithm == 'mle': alg_str = "using MLE dimensionality estimation (Minka, 2001)" RefLGR.info("Minka, T. P. (2001). Automatic choice of dimensionality " "for PCA. In Advances in neural information processing " "systems (pp. 598-604).") elif algorithm == 'kundu': alg_str = ("followed by the Kundu component selection decision " "tree (Kundu et al., 2013)") RefLGR.info("Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., " "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. " "(2013). Integrated strategy for improving functional " "connectivity mapping using multiecho fMRI. Proceedings " "of the National Academy of Sciences, 110(40), " "16187-16192.") elif algorithm == 'kundu-stabilize': alg_str = ("followed by the 'stabilized' Kundu component " "selection decision tree (Kundu et al., 2013)") RefLGR.info("Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., " "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. " "(2013). Integrated strategy for improving functional " "connectivity mapping using multiecho fMRI. Proceedings " "of the National Academy of Sciences, 110(40), " "16187-16192.") else: alg_str = ("based on the PCA component estimation with a Moving Average" "(stationary Gaussian) process (Li et al., 2007)") RefLGR.info("Li, Y.O., Adalı, T. and Calhoun, V.D., (2007). " "Estimating the number of independent components for " "functional magnetic resonance imaging data. " "Human brain mapping, 28(11), pp.1251-1266.") if source_tes == -1: dat_str = "the optimally combined data" elif source_tes == 0: dat_str = "the z-concatenated multi-echo data" else: dat_str = "a z-concatenated subset of echoes from the input data" RepLGR.info("Principal component analysis {0} was applied to " "{1} for dimensionality reduction.".format(alg_str, dat_str)) n_samp, n_echos, n_vols = data_cat.shape source_tes = np.array([int(ee) for ee in str(source_tes).split(',')]) if len(source_tes) == 1 and source_tes[0] == -1: LGR.info('Computing PCA of optimally combined multi-echo data') data = data_oc[mask, :][:, np.newaxis, :] elif len(source_tes) == 1 and source_tes[0] == 0: LGR.info('Computing PCA of spatially concatenated multi-echo data') data = data_cat[mask, ...] else: LGR.info('Computing PCA of echo #{0}'.format(','.join([str(ee) for ee in source_tes]))) data = np.stack([data_cat[mask, ee, :] for ee in source_tes - 1], axis=1) eim = np.squeeze(_utils.eimask(data)) data = np.squeeze(data[eim]) data_z = ((data.T - data.T.mean(axis=0)) / data.T.std(axis=0)).T # var normalize ts data_z = (data_z - data_z.mean()) / data_z.std() # var normalize everything if algorithm in ['mdl', 'aic', 'kic']: data_img = io.new_nii_like( ref_img, utils.unmask(utils.unmask(data, eim), mask)) mask_img = io.new_nii_like(ref_img, utils.unmask(eim, mask).astype(int)) voxel_comp_weights, varex, varex_norm, comp_ts = ma_pca.ma_pca( data_img, mask_img, algorithm) elif algorithm == 'mle': voxel_comp_weights, varex, varex_norm, comp_ts = run_mlepca(data_z) elif low_mem: voxel_comp_weights, varex, comp_ts = low_mem_pca(data_z) varex_norm = varex / varex.sum() else: ppca = PCA(copy=False, n_components=(n_vols - 1)) ppca.fit(data_z) comp_ts = ppca.components_.T varex = ppca.explained_variance_ voxel_comp_weights = np.dot(np.dot(data_z, comp_ts), np.diag(1. / varex)) varex_norm = varex / varex.sum() # Compute Kappa and Rho for PCA comps eimum = np.atleast_2d(eim) eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1]) eimum = eimum.prod(axis=1) o = np.zeros((mask.shape[0], *eimum.shape[1:])) o[mask, ...] = eimum eimum = np.squeeze(o).astype(bool) # Normalize each component's time series vTmixN = stats.zscore(comp_ts, axis=0) comptable, _, _, _ = metrics.dependence_metrics(data_cat, data_oc, comp_ts, t2s, tes, ref_img, reindex=False, mmixN=vTmixN, algorithm=None, label='mepca_', out_dir=out_dir, verbose=verbose) # varex_norm from PCA overrides varex_norm from dependence_metrics, # but we retain the original comptable['estimated normalized variance explained'] = \ comptable['normalized variance explained'] comptable['normalized variance explained'] = varex_norm # write component maps to 4D image comp_ts_z = stats.zscore(comp_ts, axis=0) comp_maps = utils.unmask(computefeats2(data_oc, comp_ts_z, mask), mask) io.filewrite(comp_maps, op.join(out_dir, 'pca_components.nii.gz'), ref_img) # Select components using decision tree if algorithm == 'kundu': comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=False) elif algorithm == 'kundu-stabilize': comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=True) elif algorithm == 'mle': LGR.info('Selected {0} components with MLE dimensionality ' 'detection'.format(comptable.shape[0])) comptable['classification'] = 'accepted' comptable['rationale'] = '' elif algorithm in ['mdl', 'aic', 'kic']: LGR.info('Selected {0} components with {1} dimensionality ' 'detection'.format(comptable.shape[0], algorithm)) comptable['classification'] = 'accepted' comptable['rationale'] = '' # Save decomposition comp_names = [io.add_decomp_prefix(comp, prefix='pca', max_value=comptable.index.max()) for comp in comptable.index.values] mixing_df = pd.DataFrame(data=comp_ts, columns=comp_names) mixing_df.to_csv(op.join(out_dir, 'pca_mixing.tsv'), sep='\t', index=False) data_type = 'optimally combined data' if source_tes == -1 else 'z-concatenated data' comptable['Description'] = 'PCA fit to {0}.'.format(data_type) mmix_dict = {} mmix_dict['Method'] = ('Principal components analysis implemented by ' 'sklearn. Components are sorted by variance ' 'explained in descending order. ' 'Component signs are flipped to best match the ' 'data.') io.save_comptable(comptable, op.join(out_dir, 'pca_decomposition.json'), label='pca', metadata=mmix_dict) acc = comptable[comptable.classification == 'accepted'].index.values n_components = acc.size voxel_kept_comp_weighted = (voxel_comp_weights[:, acc] * varex[None, acc]) kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[:, acc].T) kept_data = stats.zscore(kept_data, axis=1) # variance normalize time series kept_data = stats.zscore(kept_data, axis=None) # variance normalize everything return kept_data, n_components
def tedpca(catd, OCcatd, combmode, mask, t2s, t2sG, ref_img, tes, method='mle', ste=-1, kdaw=10., rdaw=1., wvpca=False, verbose=False): """ Use principal components analysis (PCA) to identify and remove thermal noise from multi-echo data. Parameters ---------- catd : (S x E x T) array_like Input functional data OCcatd : (S x T) array_like Optimally combined time series data combmode : {'t2s', 'ste'} str How optimal combination of echos should be made, where 't2s' indicates using the method of Posse 1999 and 'ste' indicates using the method of Poser 2006 mask : (S,) array_like Boolean mask array ref_img : :obj:`str` or img_like Reference image to dictate how outputs are saved to disk tes : :obj:`list` List of echo times associated with `catd`, in milliseconds kdaw : :obj:`float` Dimensionality augmentation weight for Kappa calculations rdaw : :obj:`float` Dimensionality augmentation weight for Rho calculations method : {'mle', 'kundu', 'kundu-stabilize'}, optional Method with which to select components in TEDPCA. Default is 'mle'. ste : :obj:`int` or :obj:`list` of :obj:`int`, optional Which echos to use in PCA. Values -1 and 0 are special, where a value of -1 will indicate using the optimal combination of the echos and 0 will indicate using all the echos. A list can be provided to indicate a subset of echos. Default: -1 wvpca : :obj:`bool`, optional Whether to apply wavelet denoising to data. Default: False verbose : :obj:`bool`, optional Whether to output files from fitmodels_direct or not. Default: False Returns ------- n_components : :obj:`int` Number of components retained from PCA decomposition dd : (S x T) :obj:`numpy.ndarray` Dimensionally reduced optimally combined functional data Notes ----- ====================== ================================================= Notation Meaning ====================== ================================================= :math:`\\kappa` Component pseudo-F statistic for TE-dependent (BOLD) model. :math:`\\rho` Component pseudo-F statistic for TE-independent (artifact) model. :math:`v` Voxel :math:`V` Total number of voxels in mask :math:`\\zeta` Something :math:`c` Component :math:`p` Something else ====================== ================================================= Steps: 1. Variance normalize either multi-echo or optimally combined data, depending on settings. 2. Decompose normalized data using PCA or SVD. 3. Compute :math:`{\\kappa}` and :math:`{\\rho}`: .. math:: {\\kappa}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,R_2^*}}{\sum {\\zeta}_{c,v}^p} {\\rho}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,S_0}}{\sum {\\zeta}_{c,v}^p} 4. Some other stuff. Something about elbows. 5. Classify components as thermal noise if they meet both of the following criteria: - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`. - Nonsignificant variance explained. Outputs: This function writes out several files: ====================== ================================================= Filename Content ====================== ================================================= pcastate.pkl Values from PCA results. comp_table_pca.txt PCA component table. mepca_mix.1D PCA mixing matrix. ====================== ================================================= """ n_samp, n_echos, n_vols = catd.shape ste = np.array([int(ee) for ee in str(ste).split(',')]) if len(ste) == 1 and ste[0] == -1: LGR.info('Computing PCA of optimally combined multi-echo data') d = OCcatd[mask, :][:, np.newaxis, :] elif len(ste) == 1 and ste[0] == 0: LGR.info('Computing PCA of spatially concatenated multi-echo data') d = catd[mask, ...] else: LGR.info('Computing PCA of echo #%s' % ','.join([str(ee) for ee in ste])) d = np.stack([catd[mask, ee, :] for ee in ste - 1], axis=1) eim = np.squeeze(eimask(d)) d = np.squeeze(d[eim]) dz = ((d.T - d.T.mean(axis=0)) / d.T.std(axis=0)).T # var normalize ts dz = (dz - dz.mean()) / dz.std() # var normalize everything if wvpca: dz, cAl = dwtmat(dz) fname = op.abspath('pcastate.pkl') if op.exists('pcastate.pkl'): LGR.info('Loading PCA from: pcastate.pkl') with open('pcastate.pkl', 'rb') as handle: pcastate = pickle.load(handle) if pcastate['method'] != method: LGR.warning('Method from PCA state file ({0}) does not match ' 'requested method ({1}).'.format( pcastate['method'], method)) state_found = False else: state_found = True else: state_found = False if not state_found: if method == 'mle': voxel_comp_weights, varex, comp_ts = run_mlepca(dz) else: ppca = PCA() ppca.fit(dz) comp_ts = ppca.components_ varex = ppca.explained_variance_ voxel_comp_weights = np.dot(np.dot(dz, comp_ts.T), np.diag(1. / varex)) # actual variance explained (normalized) varex_norm = varex / varex.sum() # Compute K and Rho for PCA comps eimum = np.atleast_2d(eim) eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1]) eimum = eimum.prod(axis=1) o = np.zeros((mask.shape[0], *eimum.shape[1:])) o[mask, ...] = eimum eimum = np.squeeze(o).astype(bool) vTmix = comp_ts.T vTmixN = ((vTmix.T - vTmix.T.mean(0)) / vTmix.T.std(0)).T LGR.info('Making initial component selection guess from PCA results') _, ct_df, betasv, v_T = model.fitmodels_direct(catd, comp_ts.T, eimum, t2s, t2sG, tes, combmode, ref_img, mmixN=vTmixN, full_sel=False, label='mepca_', verbose=verbose) # varex_norm overrides normalized varex computed by fitmodels_direct ct_df['normalized variance explained'] = varex_norm pcastate = { 'method': method, 'voxel_comp_weights': voxel_comp_weights, 'varex': varex, 'comp_ts': comp_ts, 'comptable': ct_df } # Save state LGR.info('Saving PCA results to: {}'.format(fname)) try: with open(fname, 'wb') as handle: pickle.dump(pcastate, handle) except TypeError: LGR.warning('Could not save PCA solution') else: # if loading existing state voxel_comp_weights = pcastate['voxel_comp_weights'] varex = pcastate['varex'] comp_ts = pcastate['comp_ts'] ct_df = pcastate['comptable'] np.savetxt('mepca_mix.1D', comp_ts.T) # write component maps to 4D image comp_maps = np.zeros((OCcatd.shape[0], comp_ts.shape[0])) for i_comp in range(comp_ts.shape[0]): temp_comp_ts = comp_ts[i_comp, :][:, None] comp_map = utils.unmask( model.computefeats2(OCcatd, temp_comp_ts, mask), mask) comp_maps[:, i_comp] = np.squeeze(comp_map) io.filewrite(comp_maps, 'mepca_OC_components.nii', ref_img) # Add new columns to comptable for classification ct_df['classification'] = 'accepted' ct_df['rationale'] = '' # Select components using decision tree if method == 'kundu': ct_df = kundu_tedpca(ct_df, n_echos, kdaw, rdaw, stabilize=False) elif method == 'kundu-stabilize': ct_df = kundu_tedpca(ct_df, n_echos, kdaw, rdaw, stabilize=True) elif method == 'mle': LGR.info('Selected {0} components with MLE dimensionality ' 'detection'.format(ct_df.shape[0])) ct_df.to_csv('comp_table_pca.txt', sep='\t', index=True, index_label='component', float_format='%.6f') sel_idx = ct_df['classification'] == 'accepted' n_components = np.sum(sel_idx) voxel_kept_comp_weighted = (voxel_comp_weights[:, sel_idx] * varex[None, sel_idx]) kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[sel_idx, :]) if wvpca: kept_data = idwtmat(kept_data, cAl) kept_data = stats.zscore(kept_data, axis=1) # variance normalize time series kept_data = stats.zscore(kept_data, axis=None) # variance normalize everything return n_components, kept_data
def tedpca(catd, OCcatd, combmode, mask, t2s, t2sG, stabilize, ref_img, tes, kdaw, rdaw, ste=0, mlepca=True, wvpca=False): """ Use principal components analysis (PCA) to identify and remove thermal noise from multi-echo data. Parameters ---------- catd : (S x E x T) array_like Input functional data OCcatd : (S x T) array_like Optimally-combined time series data combmode : {'t2s', 'ste'} str How optimal combination of echos should be made, where 't2s' indicates using the method of Posse 1999 and 'ste' indicates using the method of Poser 2006 mask : (S,) array_like Boolean mask array stabilize : :obj:`bool` Whether to attempt to stabilize convergence of ICA by returning dimensionally-reduced data from PCA and component selection. ref_img : :obj:`str` or img_like Reference image to dictate how outputs are saved to disk tes : :obj:`list` List of echo times associated with `catd`, in milliseconds kdaw : :obj:`float` Dimensionality augmentation weight for Kappa calculations rdaw : :obj:`float` Dimensionality augmentation weight for Rho calculations ste : :obj:`int` or :obj:`list` of :obj:`int`, optional Which echos to use in PCA. Values -1 and 0 are special, where a value of -1 will indicate using all the echos and 0 will indicate using the optimal combination of the echos. A list can be provided to indicate a subset of echos. Default: 0 mlepca : :obj:`bool`, optional Whether to use the method originally explained in Minka, NIPS 2000 for guessing PCA dimensionality instead of a traditional SVD. Default: True wvpca : :obj:`bool`, optional Whether to apply wavelet denoising to data. Default: False Returns ------- n_components : :obj:`int` Number of components retained from PCA decomposition dd : (S x E x T) :obj:`numpy.ndarray` Dimensionally-reduced functional data Notes ----- ====================== ================================================= Notation Meaning ====================== ================================================= :math:`\\kappa` Component pseudo-F statistic for TE-dependent (BOLD) model. :math:`\\rho` Component pseudo-F statistic for TE-independent (artifact) model. :math:`v` Voxel :math:`V` Total number of voxels in mask :math:`\\zeta` Something :math:`c` Component :math:`p` Something else ====================== ================================================= Steps: 1. Variance normalize either multi-echo or optimally combined data, depending on settings. 2. Decompose normalized data using PCA or SVD. 3. Compute :math:`{\\kappa}` and :math:`{\\rho}`: .. math:: {\\kappa}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,R_2^*}}{\sum {\\zeta}_{c,v}^p} {\\rho}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,S_0}}{\sum {\\zeta}_{c,v}^p} 4. Some other stuff. Something about elbows. 5. Classify components as thermal noise if they meet both of the following criteria: - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`. - Nonsignificant variance explained. Outputs: This function writes out several files: ====================== ================================================= Filename Content ====================== ================================================= pcastate.pkl Values from PCA results. comp_table_pca.txt PCA component table. mepca_mix.1D PCA mixing matrix. ====================== ================================================= """ n_samp, n_echos, n_vols = catd.shape ste = np.array([int(ee) for ee in str(ste).split(',')]) if len(ste) == 1 and ste[0] == -1: LGR.info('Computing PCA of optimally combined multi-echo data') d = OCcatd[utils.make_min_mask(OCcatd[:, np.newaxis, :])][:, np.newaxis, :] elif len(ste) == 1 and ste[0] == 0: LGR.info('Computing PCA of spatially concatenated multi-echo data') d = catd[mask].astype('float64') else: LGR.info('Computing PCA of echo #%s' % ','.join([str(ee) for ee in ste])) d = np.stack([catd[mask, ee] for ee in ste - 1], axis=1).astype('float64') eim = np.squeeze(eimask(d)) d = np.squeeze(d[eim]) dz = ((d.T - d.T.mean(axis=0)) / d.T.std(axis=0)).T # var normalize ts dz = (dz - dz.mean()) / dz.std() # var normalize everything if wvpca: dz, cAl = dwtmat(dz) if not op.exists('pcastate.pkl'): # do PC dimension selection and get eigenvalue cutoff if mlepca: from sklearn.decomposition import PCA ppca = PCA(n_components='mle', svd_solver='full') ppca.fit(dz) v = ppca.components_ s = ppca.explained_variance_ u = np.dot(np.dot(dz, v.T), np.diag(1. / s)) else: u, s, v = np.linalg.svd(dz, full_matrices=0) # actual variance explained (normalized) sp = s / s.sum() eigelb = getelbow_mod(sp, return_val=True) spdif = np.abs(np.diff(sp)) spdifh = spdif[(len(spdif) // 2):] spdthr = np.mean([spdifh.max(), spdif.min()]) spmin = sp[(len(spdif) // 2) + np.arange(len(spdifh))[spdifh >= spdthr][0] + 1] spcum = np.cumsum(sp) # Compute K and Rho for PCA comps eimum = np.atleast_2d(eim) eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1]) eimum = eimum.prod(axis=1) o = np.zeros((mask.shape[0], *eimum.shape[1:])) o[mask] = eimum eimum = np.squeeze(o).astype(bool) vTmix = v.T vTmixN = ((vTmix.T - vTmix.T.mean(0)) / vTmix.T.std(0)).T LGR.info('Making initial component selection guess from PCA results') _, ctb, betasv, v_T = model.fitmodels_direct(catd, v.T, eimum, t2s, t2sG, tes, combmode, ref_img, mmixN=vTmixN, full_sel=False) ctb = ctb[ctb[:, 0].argsort(), :] ctb = np.vstack([ctb.T[:3], sp]).T # Save state fname = op.abspath('pcastate.pkl') LGR.info('Saving PCA results to: {}'.format(fname)) pcastate = { 'u': u, 's': s, 'v': v, 'ctb': ctb, 'eigelb': eigelb, 'spmin': spmin, 'spcum': spcum } try: with open(fname, 'wb') as handle: pickle.dump(pcastate, handle) except TypeError: LGR.warning('Could not save PCA solution') else: # if loading existing state LGR.info('Loading PCA from: pcastate.pkl') with open('pcastate.pkl', 'rb') as handle: pcastate = pickle.load(handle) u, s, v = pcastate['u'], pcastate['s'], pcastate['v'] ctb, eigelb = pcastate['ctb'], pcastate['eigelb'] spmin, spcum = pcastate['spmin'], pcastate['spcum'] np.savetxt('comp_table_pca.txt', ctb[ctb[:, 1].argsort(), :][::-1]) np.savetxt('mepca_mix.1D', v[ctb[:, 1].argsort()[::-1], :].T) kappas = ctb[ctb[:, 1].argsort(), 1] rhos = ctb[ctb[:, 2].argsort(), 2] fmin, fmid, fmax = utils.getfbounds(n_echos) kappa_thr = np.average(sorted( [fmin, getelbow_mod(kappas, return_val=True) / 2, fmid]), weights=[kdaw, 1, 1]) rho_thr = np.average(sorted( [fmin, getelbow_cons(rhos, return_val=True) / 2, fmid]), weights=[rdaw, 1, 1]) if int(kdaw) == -1: kappas_lim = kappas[utils.andb([kappas < fmid, kappas > fmin]) == 2] kappa_thr = kappas_lim[getelbow_mod(kappas_lim)] rhos_lim = rhos[utils.andb([rhos < fmid, rhos > fmin]) == 2] rho_thr = rhos_lim[getelbow_mod(rhos_lim)] stabilize = True if int(kdaw) != -1 and int(rdaw) == -1: rhos_lim = rhos[utils.andb([rhos < fmid, rhos > fmin]) == 2] rho_thr = rhos_lim[getelbow_mod(rhos_lim)] is_hik = np.array(ctb[:, 1] > kappa_thr, dtype=np.int) is_hir = np.array(ctb[:, 2] > rho_thr, dtype=np.int) is_hie = np.array(ctb[:, 3] > eigelb, dtype=np.int) is_his = np.array(ctb[:, 3] > spmin, dtype=np.int) is_not_fmax1 = np.array(ctb[:, 1] != F_MAX, dtype=np.int) is_not_fmax2 = np.array(ctb[:, 2] != F_MAX, dtype=np.int) pcscore = (is_hik + is_hir + is_hie) * is_his * is_not_fmax1 * is_not_fmax2 if stabilize: temp7 = np.array(spcum < 0.95, dtype=np.int) temp8 = np.array(ctb[:, 2] > fmin, dtype=np.int) temp9 = np.array(ctb[:, 1] > fmin, dtype=np.int) pcscore = pcscore * temp7 * temp8 * temp9 pcsel = pcscore > 0 dd = u.dot(np.diag(s * np.array(pcsel, dtype=np.int))).dot(v) if wvpca: dd = idwtmat(dd, cAl) n_components = s[pcsel].shape[0] LGR.info('Selected {0} components with Kappa threshold: {1:.02f}, ' 'Rho threshold: {2:.02f}'.format(n_components, kappa_thr, rho_thr)) dd = stats.zscore(dd.T, axis=0).T # variance normalize timeseries dd = stats.zscore(dd, axis=None) # variance normalize everything return n_components, dd