def tedpca(catd, OCcatd, combmode, mask, t2s, t2sG, stabilize, ref_img, tes, kdaw, rdaw, ste=0, wvpca=False): """ Use principal components analysis (PCA) to identify and remove thermal noise from multi-echo data. Parameters ---------- catd : (S x E x T) array_like Input functional data OCcatd : (S x T) array_like Optimally-combined time series data combmode : {'t2s', 'ste'} str How optimal combination of echos should be made, where 't2s' indicates using the method of Posse 1999 and 'ste' indicates using the method of Poser 2006 mask : (S,) array_like Boolean mask array stabilize : :obj:`bool` Whether to attempt to stabilize convergence of ICA by returning dimensionally-reduced data from PCA and component selection. ref_img : :obj:`str` or img_like Reference image to dictate how outputs are saved to disk tes : :obj:`list` List of echo times associated with `catd`, in milliseconds kdaw : :obj:`float` Dimensionality augmentation weight for Kappa calculations rdaw : :obj:`float` Dimensionality augmentation weight for Rho calculations ste : :obj:`int` or :obj:`list` of :obj:`int`, optional Which echos to use in PCA. Values -1 and 0 are special, where a value of -1 will indicate using all the echos and 0 will indicate using the optimal combination of the echos. A list can be provided to indicate a subset of echos. Default: 0 wvpca : :obj:`bool`, optional Whether to apply wavelet denoising to data. Default: False Returns ------- n_components : :obj:`int` Number of components retained from PCA decomposition dd : (S x T) :obj:`numpy.ndarray` Dimensionally reduced optimally combined functional data Notes ----- ====================== ================================================= Notation Meaning ====================== ================================================= :math:`\\kappa` Component pseudo-F statistic for TE-dependent (BOLD) model. :math:`\\rho` Component pseudo-F statistic for TE-independent (artifact) model. :math:`v` Voxel :math:`V` Total number of voxels in mask :math:`\\zeta` Something :math:`c` Component :math:`p` Something else ====================== ================================================= Steps: 1. Variance normalize either multi-echo or optimally combined data, depending on settings. 2. Decompose normalized data using PCA or SVD. 3. Compute :math:`{\\kappa}` and :math:`{\\rho}`: .. math:: {\\kappa}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,R_2^*}}{\sum {\\zeta}_{c,v}^p} {\\rho}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,S_0}}{\sum {\\zeta}_{c,v}^p} 4. Some other stuff. Something about elbows. 5. Classify components as thermal noise if they meet both of the following criteria: - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`. - Nonsignificant variance explained. Outputs: This function writes out several files: ====================== ================================================= Filename Content ====================== ================================================= pcastate.pkl Values from PCA results. comp_table_pca.txt PCA component table. mepca_mix.1D PCA mixing matrix. ====================== ================================================= """ n_samp, n_echos, n_vols = catd.shape ste = np.array([int(ee) for ee in str(ste).split(',')]) if len(ste) == 1 and ste[0] == -1: LGR.info('Computing PCA of optimally combined multi-echo data') d = OCcatd[utils.make_min_mask(OCcatd[:, np.newaxis, :])][:, np.newaxis, :] elif len(ste) == 1 and ste[0] == 0: LGR.info('Computing PCA of spatially concatenated multi-echo data') d = catd[mask].astype('float64') else: LGR.info('Computing PCA of echo #%s' % ','.join([str(ee) for ee in ste])) d = np.stack([catd[mask, ee] for ee in ste - 1], axis=1).astype('float64') eim = np.squeeze(eimask(d)) d = np.squeeze(d[eim]) dz = ((d.T - d.T.mean(axis=0)) / d.T.std(axis=0)).T # var normalize ts dz = (dz - dz.mean()) / dz.std() # var normalize everything if wvpca: dz, cAl = dwtmat(dz) if not op.exists('pcastate.pkl'): voxel_comp_weights, varex, comp_ts = run_svd(dz) # actual variance explained (normalized) varex_norm = varex / varex.sum() eigenvalue_elbow = getelbow(varex_norm, return_val=True) diff_varex_norm = np.abs(np.diff(varex_norm)) lower_diff_varex_norm = diff_varex_norm[(len(diff_varex_norm) // 2):] varex_norm_thr = np.mean( [lower_diff_varex_norm.max(), diff_varex_norm.min()]) varex_norm_min = varex_norm[ (len(diff_varex_norm) // 2) + np.arange(len(lower_diff_varex_norm)) [lower_diff_varex_norm >= varex_norm_thr][0] + 1] varex_norm_cum = np.cumsum(varex_norm) # Compute K and Rho for PCA comps eimum = np.atleast_2d(eim) eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1]) eimum = eimum.prod(axis=1) o = np.zeros((mask.shape[0], *eimum.shape[1:])) o[mask] = eimum eimum = np.squeeze(o).astype(bool) vTmix = comp_ts.T vTmixN = ((vTmix.T - vTmix.T.mean(0)) / vTmix.T.std(0)).T LGR.info('Making initial component selection guess from PCA results') _, ct_df, betasv, v_T = model.fitmodels_direct(catd, comp_ts.T, eimum, t2s, t2sG, tes, combmode, ref_img, mmixN=vTmixN, full_sel=False) # varex_norm overrides normalized varex computed by fitmodels_direct ct_df['normalized variance explained'] = varex_norm # Save state fname = op.abspath('pcastate.pkl') LGR.info('Saving PCA results to: {}'.format(fname)) pcastate = { 'voxel_comp_weights': voxel_comp_weights, 'varex': varex, 'comp_ts': comp_ts, 'comptable': ct_df, 'eigenvalue_elbow': eigenvalue_elbow, 'varex_norm_min': varex_norm_min, 'varex_norm_cum': varex_norm_cum } try: with open(fname, 'wb') as handle: pickle.dump(pcastate, handle) except TypeError: LGR.warning('Could not save PCA solution') else: # if loading existing state LGR.info('Loading PCA from: pcastate.pkl') with open('pcastate.pkl', 'rb') as handle: pcastate = pickle.load(handle) voxel_comp_weights, varex = pcastate['voxel_comp_weights'], pcastate[ 'varex'] comp_ts = pcastate['comp_ts'] ct_df = pcastate['comptable'] eigenvalue_elbow = pcastate['eigenvalue_elbow'] varex_norm_min = pcastate['varex_norm_min'] varex_norm_cum = pcastate['varex_norm_cum'] np.savetxt('mepca_mix.1D', comp_ts.T) # write component maps to 4D image comp_maps = np.zeros((OCcatd.shape[0], comp_ts.shape[0])) for i_comp in range(comp_ts.shape[0]): temp_comp_ts = comp_ts[i_comp, :][:, None] comp_map = utils.unmask( model.computefeats2(OCcatd, temp_comp_ts, mask), mask) comp_maps[:, i_comp] = np.squeeze(comp_map) io.filewrite(comp_maps, 'mepca_OC_components.nii', ref_img) fmin, fmid, fmax = utils.getfbounds(n_echos) kappa_thr = np.average(sorted( [fmin, getelbow(ct_df['kappa'], return_val=True) / 2, fmid]), weights=[kdaw, 1, 1]) rho_thr = np.average(sorted( [fmin, getelbow_cons(ct_df['rho'], return_val=True) / 2, fmid]), weights=[rdaw, 1, 1]) if int(kdaw) == -1: lim_idx = utils.andb([ct_df['kappa'] < fmid, ct_df['kappa'] > fmin]) == 2 kappa_lim = ct_df.loc[lim_idx, 'kappa'].values kappa_thr = kappa_lim[getelbow(kappa_lim)] lim_idx = utils.andb([ct_df['rho'] < fmid, ct_df['rho'] > fmin]) == 2 rho_lim = ct_df.loc[lim_idx, 'rho'].values rho_thr = rho_lim[getelbow(rho_lim)] stabilize = True elif int(rdaw) == -1: lim_idx = utils.andb([ct_df['rho'] < fmid, ct_df['rho'] > fmin]) == 2 rho_lim = ct_df.loc[lim_idx, 'rho'].values rho_thr = rho_lim[getelbow(rho_lim)] # Add new columns to comptable for classification ct_df['classification'] = 'accepted' ct_df['rationale'] = '' # Reject if low Kappa, Rho, and variance explained is_lowk = ct_df['kappa'] <= kappa_thr is_lowr = ct_df['rho'] <= rho_thr is_lowe = ct_df['normalized variance explained'] <= eigenvalue_elbow is_lowkre = is_lowk & is_lowr & is_lowe ct_df.loc[is_lowkre, 'classification'] = 'rejected' ct_df.loc[is_lowkre, 'rationale'] += 'low rho, kappa, and varex;' # Reject if low variance explained is_lows = ct_df['normalized variance explained'] <= varex_norm_min ct_df.loc[is_lows, 'classification'] = 'rejected' ct_df.loc[is_lows, 'rationale'] += 'low variance explained;' # Reject if Kappa over limit is_fmax1 = ct_df['kappa'] == F_MAX ct_df.loc[is_fmax1, 'classification'] = 'rejected' ct_df.loc[is_fmax1, 'rationale'] += 'kappa equals fmax;' # Reject if Rho over limit is_fmax2 = ct_df['rho'] == F_MAX ct_df.loc[is_fmax2, 'classification'] = 'rejected' ct_df.loc[is_fmax2, 'rationale'] += 'rho equals fmax;' if stabilize: temp7 = varex_norm_cum >= 0.95 ct_df.loc[temp7, 'classification'] = 'rejected' ct_df.loc[temp7, 'rationale'] += 'cumulative var. explained above 95%;' under_fmin1 = ct_df['kappa'] <= fmin ct_df.loc[under_fmin1, 'classification'] = 'rejected' ct_df.loc[under_fmin1, 'rationale'] += 'kappa below fmin;' under_fmin2 = ct_df['rho'] <= fmin ct_df.loc[under_fmin2, 'classification'] = 'rejected' ct_df.loc[under_fmin2, 'rationale'] += 'rho below fmin;' ct_df.to_csv('comp_table_pca.txt', sep='\t', index=True, index_label='component', float_format='%.6f') sel_idx = ct_df['classification'] == 'accepted' n_components = np.sum(sel_idx) voxel_kept_comp_weighted = (voxel_comp_weights[:, sel_idx] * varex[None, sel_idx]) kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[sel_idx, :]) if wvpca: kept_data = idwtmat(kept_data, cAl) LGR.info('Selected {0} components with Kappa threshold: {1:.02f}, ' 'Rho threshold: {2:.02f}'.format(n_components, kappa_thr, rho_thr)) kept_data = stats.zscore(kept_data, axis=1) # variance normalize timeseries kept_data = stats.zscore(kept_data, axis=None) # variance normalize everything return n_components, kept_data
def tedana(data, tes, mixm=None, ctab=None, manacc=None, strict=False, gscontrol=True, kdaw=10., rdaw=1., conv=2.5e-5, ste=-1, combmode='t2s', dne=False, initcost='tanh', finalcost='tanh', stabilize=False, fout=False, filecsdata=False, label=None, fixed_seed=42, debug=False, quiet=False): """ Run the "canonical" TE-Dependent ANAlysis workflow. Parameters ---------- data : :obj:`list` of :obj:`str` Either a single z-concatenated file (single-entry list) or a list of echo-specific files, in ascending order. tes : :obj:`list` List of echo times associated with data in milliseconds. mixm : :obj:`str`, optional File containing mixing matrix. If not provided, ME-PCA and ME-ICA are done. ctab : :obj:`str`, optional File containing component table from which to extract pre-computed classifications. manacc : :obj:`str`, optional Comma separated list of manually accepted components in string form. Default is None. strict : :obj:`bool`, optional Ignore low-variance ambiguous components. Default is False. gscontrol : :obj:`bool`, optional Control global signal using spatial approach. Default is True. kdaw : :obj:`float`, optional Dimensionality augmentation weight (Kappa). Default is 10. -1 for low-dimensional ICA. rdaw : :obj:`float`, optional Dimensionality augmentation weight (Rho). Default is 1. -1 for low-dimensional ICA. conv : :obj:`float`, optional Convergence limit. Default is 2.5e-5. ste : :obj:`int`, optional Source TEs for models. 0 for all, -1 for optimal combination. Default is -1. combmode : {'t2s', 'ste'}, optional Combination scheme for TEs: 't2s' (Posse 1999, default), 'ste' (Poser). dne : :obj:`bool`, optional Denoise each TE dataset separately. Default is False. initcost : {'tanh', 'pow3', 'gaus', 'skew'}, optional Initial cost function for ICA. Default is 'tanh'. finalcost : {'tanh', 'pow3', 'gaus', 'skew'}, optional Final cost function. Default is 'tanh'. stabilize : :obj:`bool`, optional Stabilize convergence by reducing dimensionality, for low quality data. Default is False. fout : :obj:`bool`, optional Save output TE-dependence Kappa/Rho SPMs. Default is False. filecsdata : :obj:`bool`, optional Save component selection data to file. Default is False. label : :obj:`str` or :obj:`None`, optional Label for output directory. Default is None. fixed_seed : :obj:`int`, optional Seeded value for ICA, for reproducibility. """ # ensure tes are in appropriate format tes = [float(te) for te in tes] n_echos = len(tes) # coerce data to samples x echos x time array LGR.info('Loading input data: {}'.format([op.abspath(f) for f in data])) catd, ref_img = utils.load_data(data, n_echos=n_echos) n_samp, n_echos, n_vols = catd.shape LGR.debug('Resulting data shape: {}'.format(catd.shape)) if fout: fout = ref_img else: fout = None kdaw, rdaw = float(kdaw), float(rdaw) if label is not None: out_dir = 'TED.{0}'.format(label) else: out_dir = 'TED' out_dir = op.abspath(out_dir) if not op.isdir(out_dir): LGR.info('Creating output directory: {}'.format(out_dir)) os.mkdir(out_dir) else: LGR.info('Using output directory: {}'.format(out_dir)) if mixm is not None and op.isfile(mixm): shutil.copyfile(mixm, op.join(out_dir, 'meica_mix.1D')) shutil.copyfile(mixm, op.join(out_dir, op.basename(mixm))) elif mixm is not None: raise IOError('Argument "mixm" must be an existing file.') if ctab is not None and op.isfile(ctab): shutil.copyfile(ctab, op.join(out_dir, 'comp_table.txt')) shutil.copyfile(ctab, op.join(out_dir, op.basename(ctab))) elif ctab is not None: raise IOError('Argument "ctab" must be an existing file.') os.chdir(out_dir) LGR.info('Computing adapative mask') mask, masksum = utils.make_adaptive_mask(catd, minimum=False, getsum=True) LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp)) LGR.info('Computing T2* map') t2s, s0, t2ss, s0s, t2sG, s0G = model.fit_decay(catd, tes, mask, masksum, start_echo=1) # set a hard cap for the T2* map # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile cap_t2s = stats.scoreatpercentile(t2s.flatten(), 99.5, interpolation_method='lower') LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10)) t2s[t2s > cap_t2s * 10] = cap_t2s utils.filewrite(t2s, op.join(out_dir, 't2sv'), ref_img) utils.filewrite(s0, op.join(out_dir, 's0v'), ref_img) utils.filewrite(t2ss, op.join(out_dir, 't2ss'), ref_img) utils.filewrite(s0s, op.join(out_dir, 's0vs'), ref_img) utils.filewrite(t2sG, op.join(out_dir, 't2svG'), ref_img) utils.filewrite(s0G, op.join(out_dir, 's0vG'), ref_img) # optimally combine data OCcatd = model.make_optcom(catd, t2sG, tes, mask, combmode) # regress out global signal unless explicitly not desired if gscontrol: catd, OCcatd = model.gscontrol_raw(catd, OCcatd, n_echos, ref_img) if mixm is None: n_components, dd = decomposition.tedpca(catd, OCcatd, combmode, mask, t2s, t2sG, stabilize, ref_img, tes=tes, kdaw=kdaw, rdaw=rdaw, ste=ste) mmix_orig = decomposition.tedica(n_components, dd, conv, fixed_seed, cost=initcost, final_cost=finalcost, verbose=debug) np.savetxt(op.join(out_dir, '__meica_mix.1D'), mmix_orig) LGR.info('Making second component selection guess from ICA results') seldict, comptable, betas, mmix = model.fitmodels_direct(catd, mmix_orig, mask, t2s, t2sG, tes, combmode, ref_img, fout=fout, reindex=True) np.savetxt(op.join(out_dir, 'meica_mix.1D'), mmix) acc, rej, midk, empty = selection.selcomps(seldict, mmix, mask, ref_img, manacc, n_echos, t2s, s0, strict_mode=strict, filecsdata=filecsdata) else: LGR.info('Using supplied mixing matrix from ICA') mmix_orig = np.loadtxt(op.join(out_dir, 'meica_mix.1D')) seldict, comptable, betas, mmix = model.fitmodels_direct(catd, mmix_orig, mask, t2s, t2sG, tes, combmode, ref_img, fout=fout) if ctab is None: acc, rej, midk, empty = selection.selcomps(seldict, mmix, mask, ref_img, manacc, n_echos, t2s, s0, filecsdata=filecsdata, strict_mode=strict) else: acc, rej, midk, empty = utils.ctabsel(ctab) if len(acc) == 0: LGR.warning( 'No BOLD components detected! Please check data and results!') utils.writeresults(OCcatd, mask, comptable, mmix, n_vols, acc, rej, midk, empty, ref_img) utils.gscontrol_mmix(OCcatd, mmix, mask, acc, rej, midk, ref_img) if dne: utils.writeresults_echoes(catd, mmix, mask, acc, rej, midk, ref_img)
def tedpca(catd, OCcatd, combmode, mask, t2s, t2sG, stabilize, ref_img, tes, kdaw, rdaw, ste=0, mlepca=True): """ Use principal components analysis (PCA) to identify and remove thermal noise from multi-echo data. Parameters ---------- catd : (S x E x T) array_like Input functional data OCcatd : (S x T) array_like Optimally-combined time series data combmode : {'t2s', 'ste'} str How optimal combination of echos should be made, where 't2s' indicates using the method of Posse 1999 and 'ste' indicates using the method of Poser 2006 mask : (S,) array_like Boolean mask array stabilize : bool Whether to attempt to stabilize convergence of ICA by returning dimensionally-reduced data from PCA and component selection. ref_img : str or img_like Reference image to dictate how outputs are saved to disk tes : list List of echo times associated with `catd`, in milliseconds kdaw : float Dimensionality augmentation weight for Kappa calculations rdaw : float Dimensionality augmentation weight for Rho calculations ste : int or list-of-int, optional Which echos to use in PCA. Values -1 and 0 are special, where a value of -1 will indicate using all the echos and 0 will indicate using the optimal combination of the echos. A list can be provided to indicate a subset of echos. Default: 0 mlepca : bool, optional Whether to use the method originally explained in Minka, NIPS 2000 for guessing PCA dimensionality instead of a traditional SVD. Default: True Returns ------- n_components : int Number of components retained from PCA decomposition dd : (S x E x T) :obj:`numpy.ndarray` Dimensionally-reduced functional data Notes ----- ====================== ================================================= Notation Meaning ====================== ================================================= :math:`\\kappa` Component pseudo-F statistic for TE-dependent (BOLD) model. :math:`\\rho` Component pseudo-F statistic for TE-independent (artifact) model. :math:`v` Voxel :math:`V` Total number of voxels in mask :math:`\\zeta` Something :math:`c` Component :math:`p` Something else ====================== ================================================= Steps: 1. Variance normalize either multi-echo or optimally combined data, depending on settings. 2. Decompose normalized data using PCA or SVD. 3. Compute :math:`{\\kappa}` and :math:`{\\rho}`: .. math:: {\\kappa}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,R_2^*}}{\sum {\\zeta}_{c,v}^p} {\\rho}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,S_0}}{\sum {\\zeta}_{c,v}^p} 4. Some other stuff. Something about elbows. 5. Classify components as thermal noise if they meet both of the following criteria: - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`. - Nonsignificant variance explained. """ n_samp, n_echos, n_vols = catd.shape ste = np.array([int(ee) for ee in str(ste).split(',')]) if len(ste) == 1 and ste[0] == -1: LGR.info('Computing PCA of optimally combined multi-echo data') d = OCcatd[utils.make_min_mask(OCcatd[:, np.newaxis, :])][:, np.newaxis, :] elif len(ste) == 1 and ste[0] == 0: LGR.info('Computing PCA of spatially concatenated multi-echo data') d = catd[mask].astype('float64') else: LGR.info('Computing PCA of echo #%s' % ','.join([str(ee) for ee in ste])) d = np.stack([catd[mask, ee] for ee in ste - 1], axis=1).astype('float64') eim = np.squeeze(eimask(d)) d = np.squeeze(d[eim]) dz = ((d.T - d.T.mean(axis=0)) / d.T.std(axis=0)).T # var normalize ts dz = (dz - dz.mean()) / dz.std() # var normalize everything if not op.exists('pcastate.pkl'): # do PC dimension selection and get eigenvalue cutoff if mlepca: from sklearn.decomposition import PCA ppca = PCA(n_components='mle', svd_solver='full') ppca.fit(dz) v = ppca.components_ s = ppca.explained_variance_ u = np.dot(np.dot(dz, v.T), np.diag(1. / s)) else: u, s, v = np.linalg.svd(dz, full_matrices=0) # actual variance explained (normalized) sp = s / s.sum() eigelb = getelbow_mod(sp, val=True) spdif = np.abs(np.diff(sp)) spdifh = spdif[(len(spdif)//2):] spdthr = np.mean([spdifh.max(), spdif.min()]) spmin = sp[(len(spdif)//2) + np.arange(len(spdifh))[spdifh >= spdthr][0] + 1] spcum = np.cumsum(sp) # Compute K and Rho for PCA comps eimum = np.atleast_2d(eim) eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1]) eimum = eimum.prod(axis=1) o = np.zeros((mask.shape[0], *eimum.shape[1:])) o[mask] = eimum eimum = np.squeeze(o).astype(bool) vTmix = v.T vTmixN = ((vTmix.T - vTmix.T.mean(0)) / vTmix.T.std(0)).T LGR.info('Making initial component selection guess from PCA results') _, ctb, betasv, v_T = model.fitmodels_direct(catd, v.T, eimum, t2s, t2sG, tes, combmode, ref_img, mmixN=vTmixN, full_sel=False) ctb = ctb[ctb[:, 0].argsort(), :] ctb = np.vstack([ctb.T[:3], sp]).T # Save state fname = op.abspath('pcastate.pkl') LGR.info('Saving PCA results to: {}'.format(fname)) pcastate = {'u': u, 's': s, 'v': v, 'ctb': ctb, 'eigelb': eigelb, 'spmin': spmin, 'spcum': spcum} try: with open(fname, 'wb') as handle: pickle.dump(pcastate, handle) except TypeError: LGR.warning('Could not save PCA solution') else: # if loading existing state LGR.info('Loading PCA from: {}'.format('pcastate.pkl')) with open('pcastate.pkl', 'rb') as handle: pcastate = pickle.load(handle) u, s, v = pcastate['u'], pcastate['s'], pcastate['v'] ctb, eigelb = pcastate['ctb'], pcastate['eigelb'] spmin, spcum = pcastate['spmin'], pcastate['spcum'] np.savetxt('comp_table_pca.txt', ctb[ctb[:, 1].argsort(), :][::-1]) np.savetxt('mepca_mix.1D', v[ctb[:, 1].argsort()[::-1], :].T) kappas = ctb[ctb[:, 1].argsort(), 1] rhos = ctb[ctb[:, 2].argsort(), 2] fmin, fmid, fmax = utils.getfbounds(n_echos) kappa_thr = np.average(sorted([fmin, getelbow_mod(kappas, val=True)/2, fmid]), weights=[kdaw, 1, 1]) rho_thr = np.average(sorted([fmin, getelbow_cons(rhos, val=True)/2, fmid]), weights=[rdaw, 1, 1]) if int(kdaw) == -1: kappas_lim = kappas[utils.andb([kappas < fmid, kappas > fmin]) == 2] kappa_thr = kappas_lim[getelbow_mod(kappas_lim)] rhos_lim = rhos[utils.andb([rhos < fmid, rhos > fmin]) == 2] rho_thr = rhos_lim[getelbow_mod(rhos_lim)] stabilize = True if int(kdaw) != -1 and int(rdaw) == -1: rhos_lim = rhos[utils.andb([rhos < fmid, rhos > fmin]) == 2] rho_thr = rhos_lim[getelbow_mod(rhos_lim)] is_hik = np.array(ctb[:, 1] > kappa_thr, dtype=np.int) is_hir = np.array(ctb[:, 2] > rho_thr, dtype=np.int) is_hie = np.array(ctb[:, 3] > eigelb, dtype=np.int) is_his = np.array(ctb[:, 3] > spmin, dtype=np.int) is_not_fmax1 = np.array(ctb[:, 1] != F_MAX, dtype=np.int) is_not_fmax2 = np.array(ctb[:, 2] != F_MAX, dtype=np.int) pcscore = (is_hik + is_hir + is_hie) * is_his * is_not_fmax1 * is_not_fmax2 if stabilize: temp7 = np.array(spcum < 0.95, dtype=np.int) temp8 = np.array(ctb[:, 2] > fmin, dtype=np.int) temp9 = np.array(ctb[:, 1] > fmin, dtype=np.int) pcscore = pcscore * temp7 * temp8 * temp9 pcsel = pcscore > 0 dd = u.dot(np.diag(s*np.array(pcsel, dtype=np.int))).dot(v) n_components = s[pcsel].shape[0] LGR.info('Selected {0} components with Kappa threshold: {1:.02f}, ' 'Rho threshold: {2:.02f}'.format(n_components, kappa_thr, rho_thr)) dd = stats.zscore(dd.T, axis=0).T # variance normalize timeseries dd = stats.zscore(dd, axis=None) # variance normalize everything return n_components, dd
def tedana_workflow(data, tes, mask=None, mixm=None, ctab=None, manacc=None, tedort=False, gscontrol=None, tedpca='mle', source_tes=-1, combmode='t2s', verbose=False, stabilize=False, out_dir='.', fixed_seed=42, maxit=500, maxrestart=10, debug=False, quiet=False, png=False, png_cmap='coolwarm'): """ Run the "canonical" TE-Dependent ANAlysis workflow. Parameters ---------- data : :obj:`str` or :obj:`list` of :obj:`str` Either a single z-concatenated file (single-entry list or str) or a list of echo-specific files, in ascending order. tes : :obj:`list` List of echo times associated with data in milliseconds. mask : :obj:`str`, optional Binary mask of voxels to include in TE Dependent ANAlysis. Must be spatially aligned with `data`. If an explicit mask is not provided, then Nilearn's compute_epi_mask function will be used to derive a mask from the first echo's data. mixm : :obj:`str`, optional File containing mixing matrix. If not provided, ME-PCA and ME-ICA are done. ctab : :obj:`str`, optional File containing component table from which to extract pre-computed classifications. manacc : :obj:`list`, :obj:`str`, or None, optional List of manually accepted components. Can be a list of the components, a comma-separated string with component numbers, or None. Default is None. tedort : :obj:`bool`, optional Orthogonalize rejected components w.r.t. accepted ones prior to denoising. Default is False. gscontrol : {None, 't1c', 'gsr'} or :obj:`list`, optional Perform additional denoising to remove spatially diffuse noise. Default is None. tedpca : {'mle', 'kundu', 'kundu-stabilize'}, optional Method with which to select components in TEDPCA. Default is 'mle'. source_tes : :obj:`int`, optional Source TEs for models. 0 for all, -1 for optimal combination. Default is -1. combmode : {'t2s'}, optional Combination scheme for TEs: 't2s' (Posse 1999, default). verbose : :obj:`bool`, optional Generate intermediate and additional files. Default is False. png : obj:'bool', optional Generate simple plots and figures. Default is false. png_cmap : obj:'str', optional Name of a matplotlib colormap to be used when generating figures. --png must still be used to request figures. Default is 'coolwarm' out_dir : :obj:`str`, optional Output directory. Other Parameters ---------------- fixed_seed : :obj:`int`, optional Value passed to ``mdp.numx_rand.seed()``. Set to a positive integer value for reproducible ICA results; otherwise, set to -1 for varying results across calls. maxit : :obj:`int`, optional Maximum number of iterations for ICA. Default is 500. maxrestart : :obj:`int`, optional Maximum number of attempts for ICA. If ICA fails to converge, the fixed seed will be updated and ICA will be run again. If convergence is achieved before maxrestart attempts, ICA will finish early. Default is 10. debug : :obj:`bool`, optional Whether to run in debugging mode or not. Default is False. quiet : :obj:`bool`, optional If True, suppresses logging/printing of messages. Default is False. Notes ----- This workflow writes out several files. For a complete list of the files generated by this workflow, please visit https://tedana.readthedocs.io/en/latest/outputs.html """ out_dir = op.abspath(out_dir) if not op.isdir(out_dir): os.mkdir(out_dir) if debug and not quiet: formatter = logging.Formatter( '%(asctime)s\t%(name)-12s\t%(levelname)-8s\t%(message)s', datefmt='%Y-%m-%dT%H:%M:%S') fh = logging.FileHandler( op.join( out_dir, 'runlog-{0}.tsv'.format(datetime.now().isoformat().replace( ':', '.')))) fh.setFormatter(formatter) logging.basicConfig(level=logging.DEBUG, handlers=[fh, logging.StreamHandler()]) elif quiet: logging.basicConfig(level=logging.WARNING) else: logging.basicConfig(level=logging.INFO) LGR.info('Using output directory: {}'.format(out_dir)) # ensure tes are in appropriate format tes = [float(te) for te in tes] n_echos = len(tes) # Coerce gscontrol to list if not isinstance(gscontrol, list): gscontrol = [gscontrol] # coerce data to samples x echos x time array if isinstance(data, str): data = [data] LGR.info('Loading input data: {}'.format([f for f in data])) catd, ref_img = io.load_data(data, n_echos=n_echos) n_samp, n_echos, n_vols = catd.shape LGR.debug('Resulting data shape: {}'.format(catd.shape)) if mixm is not None and op.isfile(mixm): mixm = op.abspath(mixm) # Allow users to re-run on same folder if mixm != op.join(out_dir, 'meica_mix.1D'): shutil.copyfile(mixm, op.join(out_dir, 'meica_mix.1D')) shutil.copyfile(mixm, op.join(out_dir, op.basename(mixm))) elif mixm is not None: raise IOError('Argument "mixm" must be an existing file.') if ctab is not None and op.isfile(ctab): ctab = op.abspath(ctab) # Allow users to re-run on same folder if ctab != op.join(out_dir, 'comp_table_ica.txt'): shutil.copyfile(ctab, op.join(out_dir, 'comp_table_ica.txt')) shutil.copyfile(ctab, op.join(out_dir, op.basename(ctab))) elif ctab is not None: raise IOError('Argument "ctab" must be an existing file.') if isinstance(manacc, str): manacc = [int(comp) for comp in manacc.split(',')] if ctab and not mixm: LGR.warning('Argument "ctab" requires argument "mixm".') ctab = None elif ctab and (manacc is None): LGR.warning('Argument "ctab" requires argument "manacc".') ctab = None elif manacc is not None and not mixm: LGR.warning('Argument "manacc" requires argument "mixm".') manacc = None if mask is None: LGR.info('Computing EPI mask from first echo') first_echo_img = io.new_nii_like(ref_img, catd[:, 0, :]) mask = compute_epi_mask(first_echo_img) else: # TODO: add affine check LGR.info('Using user-defined mask') mask, masksum = utils.make_adaptive_mask(catd, mask=mask, getsum=True) LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp)) if verbose: io.filewrite(masksum, op.join(out_dir, 'adaptive_mask.nii'), ref_img) os.chdir(out_dir) LGR.info('Computing T2* map') t2s, s0, t2ss, s0s, t2sG, s0G = decay.fit_decay(catd, tes, mask, masksum) # set a hard cap for the T2* map # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile cap_t2s = stats.scoreatpercentile(t2s.flatten(), 99.5, interpolation_method='lower') LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10)) t2s[t2s > cap_t2s * 10] = cap_t2s io.filewrite(t2s, op.join(out_dir, 't2sv.nii'), ref_img) io.filewrite(s0, op.join(out_dir, 's0v.nii'), ref_img) if verbose: io.filewrite(t2ss, op.join(out_dir, 't2ss.nii'), ref_img) io.filewrite(s0s, op.join(out_dir, 's0vs.nii'), ref_img) io.filewrite(t2sG, op.join(out_dir, 't2svG.nii'), ref_img) io.filewrite(s0G, op.join(out_dir, 's0vG.nii'), ref_img) # optimally combine data data_oc = combine.make_optcom(catd, tes, mask, t2s=t2sG, combmode=combmode) # regress out global signal unless explicitly not desired if 'gsr' in gscontrol: catd, data_oc = gsc.gscontrol_raw(catd, data_oc, n_echos, ref_img) if mixm is None: # Identify and remove thermal noise from data dd, n_components = decomposition.tedpca(catd, data_oc, combmode, mask, t2s, t2sG, ref_img, tes=tes, method=tedpca, source_tes=source_tes, kdaw=10., rdaw=1., out_dir=out_dir, verbose=verbose) mmix_orig = decomposition.tedica(dd, n_components, fixed_seed, maxit, maxrestart) if verbose: np.savetxt(op.join(out_dir, '__meica_mix.1D'), mmix_orig) if source_tes == -1: io.filewrite(utils.unmask(dd, mask), op.join(out_dir, 'ts_OC_whitened.nii'), ref_img) LGR.info('Making second component selection guess from ICA results') # Estimate betas and compute selection metrics for mixing matrix # generated from dimensionally reduced data using full data (i.e., data # with thermal noise) seldict, comptable, betas, mmix = model.fitmodels_direct( catd, mmix_orig, mask, t2s, t2sG, tes, combmode, ref_img, reindex=True, label='meica_', out_dir=out_dir, verbose=verbose) np.savetxt(op.join(out_dir, 'meica_mix.1D'), mmix) comptable = selection.selcomps(seldict, comptable, mmix, manacc, n_echos) elif ctab is not None and manacc is not None: LGR.info('Using supplied ICA mixing matrix, component table, and ' 'accepted components') mmix = np.loadtxt(op.join(out_dir, 'meica_mix.1D')) comptable = pd.read_csv(ctab, sep='\t', index_col='component') comptable = selection.selcomps({}, comptable, mmix, manacc, n_echos) else: LGR.info('Using supplied mixing matrix from ICA') mmix_orig = np.loadtxt(op.join(out_dir, 'meica_mix.1D')) seldict, comptable, betas, mmix = model.fitmodels_direct( catd, mmix_orig, mask, t2s, t2sG, tes, combmode, ref_img, label='meica_', out_dir=out_dir, verbose=verbose) comptable = selection.selcomps(seldict, comptable, mmix, manacc, n_echos) comptable.to_csv(op.join(out_dir, 'comp_table_ica.txt'), sep='\t', index=True, index_label='component', float_format='%.6f') if comptable[comptable.classification == 'accepted'].shape[0] == 0: LGR.warning('No BOLD components detected! Please check data and ' 'results!') mmix_orig = mmix.copy() if tedort: acc_idx = comptable.loc[~comptable.classification.str. contains('rejected')].index.values rej_idx = comptable.loc[comptable.classification.str.contains( 'rejected')].index.values acc_ts = mmix[:, acc_idx] rej_ts = mmix[:, rej_idx] betas = np.linalg.lstsq(acc_ts, rej_ts, rcond=None)[0] pred_rej_ts = np.dot(acc_ts, betas) resid = rej_ts - pred_rej_ts mmix[:, rej_idx] = resid np.savetxt(op.join(out_dir, 'meica_mix_orth.1D'), mmix) io.writeresults(data_oc, mask=mask, comptable=comptable, mmix=mmix, n_vols=n_vols, ref_img=ref_img) if 't1c' in gscontrol: LGR.info('Performing T1c global signal regression to remove spatially ' 'diffuse noise') gsc.gscontrol_mmix(data_oc, mmix, mask, comptable, ref_img) if verbose: io.writeresults_echoes(catd, mmix, mask, comptable, ref_img) if png: LGR.info('Making figures folder with static component maps and ' 'timecourse plots.') # make figure folder first if not op.isdir(op.join(out_dir, 'figures')): os.mkdir(op.join(out_dir, 'figures')) viz.write_comp_figs(data_oc, mask=mask, comptable=comptable, mmix=mmix_orig, ref_img=ref_img, out_dir=op.join(out_dir, 'figures'), png_cmap=png_cmap) LGR.info('Making Kappa vs Rho scatter plot') viz.write_kappa_scatter(comptable=comptable, out_dir=op.join(out_dir, 'figures')) LGR.info('Making overall summary figure') viz.write_summary_fig(comptable=comptable, out_dir=op.join(out_dir, 'figures')) LGR.info('Workflow completed') for handler in logging.root.handlers[:]: logging.root.removeHandler(handler)
def tedana(data, tes, mixm=None, ctab=None, manacc=None, strict=False, gscontrol=True, kdaw=10., rdaw=1., conv=2.5e-5, ste=-1, combmode='t2s', dne=False, initcost='tanh', finalcost='tanh', stabilize=False, fout=False, filecsdata=False, label=None, fixed_seed=42, debug=False, quiet=False): """ Run the "canonical" TE-Dependent ANAlysis workflow. Parameters ---------- data : :obj:`list` of :obj:`str` Either a single z-concatenated file (single-entry list) or a list of echo-specific files, in ascending order. tes : :obj:`list` List of echo times associated with data in milliseconds. mixm : :obj:`str`, optional File containing mixing matrix. If not provided, ME-PCA and ME-ICA are done. ctab : :obj:`str`, optional File containing component table from which to extract pre-computed classifications. manacc : :obj:`str`, optional Comma separated list of manually accepted components in string form. Default is None. strict : :obj:`bool`, optional Ignore low-variance ambiguous components. Default is False. gscontrol : :obj:`bool`, optional Control global signal using spatial approach. Default is True. kdaw : :obj:`float`, optional Dimensionality augmentation weight (Kappa). Default is 10. -1 for low-dimensional ICA. rdaw : :obj:`float`, optional Dimensionality augmentation weight (Rho). Default is 1. -1 for low-dimensional ICA. conv : :obj:`float`, optional Convergence limit. Default is 2.5e-5. ste : :obj:`int`, optional Source TEs for models. 0 for all, -1 for optimal combination. Default is -1. combmode : {'t2s', 'ste'}, optional Combination scheme for TEs: 't2s' (Posse 1999, default), 'ste' (Poser). dne : :obj:`bool`, optional Denoise each TE dataset separately. Default is False. initcost : {'tanh', 'pow3', 'gaus', 'skew'}, optional Initial cost function for ICA. Default is 'tanh'. finalcost : {'tanh', 'pow3', 'gaus', 'skew'}, optional Final cost function. Default is 'tanh'. stabilize : :obj:`bool`, optional Stabilize convergence by reducing dimensionality, for low quality data. Default is False. fout : :obj:`bool`, optional Save output TE-dependence Kappa/Rho SPMs. Default is False. filecsdata : :obj:`bool`, optional Save component selection data to file. Default is False. label : :obj:`str` or :obj:`None`, optional Label for output directory. Default is None. fixed_seed : :obj:`int`, optional Seeded value for ICA, for reproducibility. """ # ensure tes are in appropriate format tes = [float(te) for te in tes] n_echos = len(tes) # coerce data to samples x echos x time array LGR.info('Loading input data: {}'.format([op.abspath(f) for f in data])) catd, ref_img = utils.load_data(data, n_echos=n_echos) n_samp, n_echos, n_vols = catd.shape LGR.debug('Resulting data shape: {}'.format(catd.shape)) if fout: fout = ref_img else: fout = None kdaw, rdaw = float(kdaw), float(rdaw) if label is not None: out_dir = 'TED.{0}'.format(label) else: out_dir = 'TED' out_dir = op.abspath(out_dir) if not op.isdir(out_dir): LGR.info('Creating output directory: {}'.format(out_dir)) os.mkdir(out_dir) else: LGR.info('Using output directory: {}'.format(out_dir)) if mixm is not None and op.isfile(mixm): shutil.copyfile(mixm, op.join(out_dir, 'meica_mix.1D')) shutil.copyfile(mixm, op.join(out_dir, op.basename(mixm))) elif mixm is not None: raise IOError('Argument "mixm" must be an existing file.') if ctab is not None and op.isfile(ctab): shutil.copyfile(ctab, op.join(out_dir, 'comp_table.txt')) shutil.copyfile(ctab, op.join(out_dir, op.basename(ctab))) elif ctab is not None: raise IOError('Argument "ctab" must be an existing file.') os.chdir(out_dir) LGR.info('Computing adapative mask') mask, masksum = utils.make_adaptive_mask(catd, minimum=False, getsum=True) LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp)) LGR.info('Computing T2* map') t2s, s0, t2ss, s0s, t2sG, s0G = model.fit_decay(catd, tes, mask, masksum, start_echo=1) # set a hard cap for the T2* map # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile cap_t2s = stats.scoreatpercentile(t2s.flatten(), 99.5, interpolation_method='lower') LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10)) t2s[t2s > cap_t2s * 10] = cap_t2s utils.filewrite(t2s, op.join(out_dir, 't2sv'), ref_img) utils.filewrite(s0, op.join(out_dir, 's0v'), ref_img) utils.filewrite(t2ss, op.join(out_dir, 't2ss'), ref_img) utils.filewrite(s0s, op.join(out_dir, 's0vs'), ref_img) utils.filewrite(t2sG, op.join(out_dir, 't2svG'), ref_img) utils.filewrite(s0G, op.join(out_dir, 's0vG'), ref_img) # optimally combine data OCcatd = model.make_optcom(catd, t2sG, tes, mask, combmode) # regress out global signal unless explicitly not desired if gscontrol: catd, OCcatd = model.gscontrol_raw(catd, OCcatd, n_echos, ref_img) if mixm is None: n_components, dd = decomposition.tedpca(catd, OCcatd, combmode, mask, t2s, t2sG, stabilize, ref_img, tes=tes, kdaw=kdaw, rdaw=rdaw, ste=ste) mmix_orig = decomposition.tedica(n_components, dd, conv, fixed_seed, cost=initcost, final_cost=finalcost, verbose=debug) np.savetxt(op.join(out_dir, '__meica_mix.1D'), mmix_orig) LGR.info('Making second component selection guess from ICA results') seldict, comptable, betas, mmix = model.fitmodels_direct(catd, mmix_orig, mask, t2s, t2sG, tes, combmode, ref_img, fout=fout, reindex=True) np.savetxt(op.join(out_dir, 'meica_mix.1D'), mmix) acc, rej, midk, empty = selection.selcomps(seldict, mmix, mask, ref_img, manacc, n_echos, t2s, s0, strict_mode=strict, filecsdata=filecsdata) else: LGR.info('Using supplied mixing matrix from ICA') mmix_orig = np.loadtxt(op.join(out_dir, 'meica_mix.1D')) seldict, comptable, betas, mmix = model.fitmodels_direct(catd, mmix_orig, mask, t2s, t2sG, tes, combmode, ref_img, fout=fout) if ctab is None: acc, rej, midk, empty = selection.selcomps(seldict, mmix, mask, ref_img, manacc, n_echos, t2s, s0, filecsdata=filecsdata, strict_mode=strict) else: acc, rej, midk, empty = utils.ctabsel(ctab) if len(acc) == 0: LGR.warning('No BOLD components detected! Please check data and results!') utils.writeresults(OCcatd, mask, comptable, mmix, n_vols, acc, rej, midk, empty, ref_img) utils.gscontrol_mmix(OCcatd, mmix, mask, acc, rej, midk, ref_img) if dne: utils.writeresults_echoes(catd, mmix, mask, acc, rej, midk, ref_img)
def tedana_workflow(data, tes, mask=None, mixm=None, ctab=None, manacc=None, strict=False, gscontrol=True, kdaw=10., rdaw=1., conv=2.5e-5, ste=-1, combmode='t2s', dne=False, cost='logcosh', stabilize=False, filecsdata=False, wvpca=False, label=None, fixed_seed=42, debug=False, quiet=False): """ Run the "canonical" TE-Dependent ANAlysis workflow. Parameters ---------- data : :obj:`str` or :obj:`list` of :obj:`str` Either a single z-concatenated file (single-entry list or str) or a list of echo-specific files, in ascending order. tes : :obj:`list` List of echo times associated with data in milliseconds. mask : :obj:`str`, optional Binary mask of voxels to include in TE Dependent ANAlysis. Must be spatially aligned with `data`. mixm : :obj:`str`, optional File containing mixing matrix. If not provided, ME-PCA and ME-ICA are done. ctab : :obj:`str`, optional File containing component table from which to extract pre-computed classifications. manacc : :obj:`str`, optional Comma separated list of manually accepted components in string form. Default is None. strict : :obj:`bool`, optional Ignore low-variance ambiguous components. Default is False. gscontrol : :obj:`bool`, optional Control global signal using spatial approach. Default is True. kdaw : :obj:`float`, optional Dimensionality augmentation weight (Kappa). Default is 10. -1 for low-dimensional ICA. rdaw : :obj:`float`, optional Dimensionality augmentation weight (Rho). Default is 1. -1 for low-dimensional ICA. conv : :obj:`float`, optional Convergence limit. Default is 2.5e-5. ste : :obj:`int`, optional Source TEs for models. 0 for all, -1 for optimal combination. Default is -1. combmode : {'t2s', 'ste'}, optional Combination scheme for TEs: 't2s' (Posse 1999, default), 'ste' (Poser). dne : :obj:`bool`, optional Denoise each TE dataset separately. Default is False. cost : {'logcosh', 'exp', 'cube'} str, optional Cost function for ICA stabilize : :obj:`bool`, optional Stabilize convergence by reducing dimensionality, for low quality data. Default is False. filecsdata : :obj:`bool`, optional Save component selection data to file. Default is False. wvpca : :obj:`bool`, optional Whether or not to perform PCA on wavelet-transformed data. Default is False. label : :obj:`str` or :obj:`None`, optional Label for output directory. Default is None. Other Parameters ---------------- fixed_seed : :obj:`int`, optional Value passed to ``mdp.numx_rand.seed()``. Set to a positive integer value for reproducible ICA results; otherwise, set to -1 for varying results across calls. debug : :obj:`bool`, optional Whether to run in debugging mode or not. Default is False. quiet : :obj:`bool`, optional If True, suppresses logging/printing of messages. Default is False. Notes ----- This workflow writes out several files, which are written out to a folder named TED.[ref_label].[label] if ``label`` is provided and TED.[ref_label] if not. ``ref_label`` is determined based on the name of the first ``data`` file. For a complete list of the files generated by this workflow, please visit https://tedana.readthedocs.io/en/latest/outputs.html """ # ensure tes are in appropriate format tes = [float(te) for te in tes] n_echos = len(tes) # coerce data to samples x echos x time array if isinstance(data, str): data = [data] LGR.info('Loading input data: {}'.format([f for f in data])) catd, ref_img = io.load_data(data, n_echos=n_echos) n_samp, n_echos, n_vols = catd.shape LGR.debug('Resulting data shape: {}'.format(catd.shape)) kdaw, rdaw = float(kdaw), float(rdaw) try: ref_label = op.basename(ref_img).split('.')[0] except (TypeError, AttributeError): ref_label = op.basename(str(data[0])).split('.')[0] if label is not None: out_dir = 'TED.{0}.{1}'.format(ref_label, label) else: out_dir = 'TED.{0}'.format(ref_label) out_dir = op.abspath(out_dir) if not op.isdir(out_dir): LGR.info('Creating output directory: {}'.format(out_dir)) os.mkdir(out_dir) else: LGR.info('Using output directory: {}'.format(out_dir)) if mixm is not None and op.isfile(mixm): shutil.copyfile(mixm, op.join(out_dir, 'meica_mix.1D')) shutil.copyfile(mixm, op.join(out_dir, op.basename(mixm))) elif mixm is not None: raise IOError('Argument "mixm" must be an existing file.') if ctab is not None and op.isfile(ctab): shutil.copyfile(ctab, op.join(out_dir, 'comp_table_ica.txt')) shutil.copyfile(ctab, op.join(out_dir, op.basename(ctab))) elif ctab is not None: raise IOError('Argument "ctab" must be an existing file.') os.chdir(out_dir) if mask is None: LGR.info('Computing adaptive mask') else: # TODO: add affine check LGR.info('Using user-defined mask') mask, masksum = utils.make_adaptive_mask(catd, mask=mask, minimum=False, getsum=True) LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp)) LGR.info('Computing T2* map') t2s, s0, t2ss, s0s, t2sG, s0G = decay.fit_decay(catd, tes, mask, masksum) # set a hard cap for the T2* map # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile cap_t2s = stats.scoreatpercentile(t2s.flatten(), 99.5, interpolation_method='lower') LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10)) t2s[t2s > cap_t2s * 10] = cap_t2s io.filewrite(t2s, op.join(out_dir, 't2sv.nii'), ref_img) io.filewrite(s0, op.join(out_dir, 's0v.nii'), ref_img) io.filewrite(t2ss, op.join(out_dir, 't2ss.nii'), ref_img) io.filewrite(s0s, op.join(out_dir, 's0vs.nii'), ref_img) io.filewrite(t2sG, op.join(out_dir, 't2svG.nii'), ref_img) io.filewrite(s0G, op.join(out_dir, 's0vG.nii'), ref_img) # optimally combine data data_oc = combine.make_optcom(catd, tes, mask, t2s=t2sG, combmode=combmode) # regress out global signal unless explicitly not desired if gscontrol: catd, data_oc = model.gscontrol_raw(catd, data_oc, n_echos, ref_img) if mixm is None: # Identify and remove thermal noise from data n_components, dd = decomposition.tedpca(catd, data_oc, combmode, mask, t2s, t2sG, stabilize, ref_img, tes=tes, kdaw=kdaw, rdaw=rdaw, ste=ste, wvpca=wvpca) mmix_orig, fixed_seed = decomposition.tedica(n_components, dd, conv, fixed_seed, cost=cost) np.savetxt(op.join(out_dir, '__meica_mix.1D'), mmix_orig) LGR.info('Making second component selection guess from ICA results') # Estimate betas and compute selection metrics for mixing matrix # generated from dimensionally reduced data using full data (i.e., data # with thermal noise) seldict, comptable, betas, mmix = model.fitmodels_direct(catd, mmix_orig, mask, t2s, t2sG, tes, combmode, ref_img, reindex=True) np.savetxt(op.join(out_dir, 'meica_mix.1D'), mmix) comptable = selection.selcomps(seldict, comptable, mmix, manacc, n_echos) else: LGR.info('Using supplied mixing matrix from ICA') mmix_orig = np.loadtxt(op.join(out_dir, 'meica_mix.1D')) seldict, comptable, betas, mmix = model.fitmodels_direct( catd, mmix_orig, mask, t2s, t2sG, tes, combmode, ref_img) if ctab is None: comptable = selection.selcomps(seldict, comptable, mmix, manacc, n_echos) else: comptable = pd.read_csv(ctab, sep='\t', index_col='component') comptable.to_csv(op.join(out_dir, 'comp_table_ica.txt'), sep='\t', index=True, index_label='component', float_format='%.6f') if 'component' not in comptable.columns: comptable['component'] = comptable.index acc = comptable.loc[comptable['classification'] == 'accepted', 'component'] rej = comptable.loc[comptable['classification'] == 'rejected', 'component'] midk = comptable.loc[comptable['classification'] == 'midk', 'component'] ign = comptable.loc[comptable['classification'] == 'ignored', 'component'] if len(acc) == 0: LGR.warning('No BOLD components detected! Please check data and ' 'results!') io.writeresults(data_oc, mask=mask, comptable=comptable, mmix=mmix, n_vols=n_vols, fixed_seed=fixed_seed, acc=acc, rej=rej, midk=midk, empty=ign, ref_img=ref_img) io.gscontrol_mmix(data_oc, mmix, mask, comptable, ref_img) if dne: io.writeresults_echoes(catd, mmix, mask, acc, rej, midk, ref_img)
def tedpca(catd, OCcatd, combmode, mask, t2s, t2sG, ref_img, tes, method='mle', ste=-1, kdaw=10., rdaw=1., wvpca=False, verbose=False): """ Use principal components analysis (PCA) to identify and remove thermal noise from multi-echo data. Parameters ---------- catd : (S x E x T) array_like Input functional data OCcatd : (S x T) array_like Optimally combined time series data combmode : {'t2s', 'ste'} str How optimal combination of echos should be made, where 't2s' indicates using the method of Posse 1999 and 'ste' indicates using the method of Poser 2006 mask : (S,) array_like Boolean mask array ref_img : :obj:`str` or img_like Reference image to dictate how outputs are saved to disk tes : :obj:`list` List of echo times associated with `catd`, in milliseconds kdaw : :obj:`float` Dimensionality augmentation weight for Kappa calculations rdaw : :obj:`float` Dimensionality augmentation weight for Rho calculations method : {'mle', 'kundu', 'kundu-stabilize'}, optional Method with which to select components in TEDPCA. Default is 'mle'. ste : :obj:`int` or :obj:`list` of :obj:`int`, optional Which echos to use in PCA. Values -1 and 0 are special, where a value of -1 will indicate using the optimal combination of the echos and 0 will indicate using all the echos. A list can be provided to indicate a subset of echos. Default: -1 wvpca : :obj:`bool`, optional Whether to apply wavelet denoising to data. Default: False verbose : :obj:`bool`, optional Whether to output files from fitmodels_direct or not. Default: False Returns ------- n_components : :obj:`int` Number of components retained from PCA decomposition dd : (S x T) :obj:`numpy.ndarray` Dimensionally reduced optimally combined functional data Notes ----- ====================== ================================================= Notation Meaning ====================== ================================================= :math:`\\kappa` Component pseudo-F statistic for TE-dependent (BOLD) model. :math:`\\rho` Component pseudo-F statistic for TE-independent (artifact) model. :math:`v` Voxel :math:`V` Total number of voxels in mask :math:`\\zeta` Something :math:`c` Component :math:`p` Something else ====================== ================================================= Steps: 1. Variance normalize either multi-echo or optimally combined data, depending on settings. 2. Decompose normalized data using PCA or SVD. 3. Compute :math:`{\\kappa}` and :math:`{\\rho}`: .. math:: {\\kappa}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,R_2^*}}{\sum {\\zeta}_{c,v}^p} {\\rho}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,S_0}}{\sum {\\zeta}_{c,v}^p} 4. Some other stuff. Something about elbows. 5. Classify components as thermal noise if they meet both of the following criteria: - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`. - Nonsignificant variance explained. Outputs: This function writes out several files: ====================== ================================================= Filename Content ====================== ================================================= pcastate.pkl Values from PCA results. comp_table_pca.txt PCA component table. mepca_mix.1D PCA mixing matrix. ====================== ================================================= """ n_samp, n_echos, n_vols = catd.shape ste = np.array([int(ee) for ee in str(ste).split(',')]) if len(ste) == 1 and ste[0] == -1: LGR.info('Computing PCA of optimally combined multi-echo data') d = OCcatd[mask, :][:, np.newaxis, :] elif len(ste) == 1 and ste[0] == 0: LGR.info('Computing PCA of spatially concatenated multi-echo data') d = catd[mask, ...] else: LGR.info('Computing PCA of echo #%s' % ','.join([str(ee) for ee in ste])) d = np.stack([catd[mask, ee, :] for ee in ste - 1], axis=1) eim = np.squeeze(eimask(d)) d = np.squeeze(d[eim]) dz = ((d.T - d.T.mean(axis=0)) / d.T.std(axis=0)).T # var normalize ts dz = (dz - dz.mean()) / dz.std() # var normalize everything if wvpca: dz, cAl = dwtmat(dz) fname = op.abspath('pcastate.pkl') if op.exists('pcastate.pkl'): LGR.info('Loading PCA from: pcastate.pkl') with open('pcastate.pkl', 'rb') as handle: pcastate = pickle.load(handle) if pcastate['method'] != method: LGR.warning('Method from PCA state file ({0}) does not match ' 'requested method ({1}).'.format( pcastate['method'], method)) state_found = False else: state_found = True else: state_found = False if not state_found: if method == 'mle': voxel_comp_weights, varex, comp_ts = run_mlepca(dz) else: ppca = PCA() ppca.fit(dz) comp_ts = ppca.components_ varex = ppca.explained_variance_ voxel_comp_weights = np.dot(np.dot(dz, comp_ts.T), np.diag(1. / varex)) # actual variance explained (normalized) varex_norm = varex / varex.sum() # Compute K and Rho for PCA comps eimum = np.atleast_2d(eim) eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1]) eimum = eimum.prod(axis=1) o = np.zeros((mask.shape[0], *eimum.shape[1:])) o[mask, ...] = eimum eimum = np.squeeze(o).astype(bool) vTmix = comp_ts.T vTmixN = ((vTmix.T - vTmix.T.mean(0)) / vTmix.T.std(0)).T LGR.info('Making initial component selection guess from PCA results') _, ct_df, betasv, v_T = model.fitmodels_direct(catd, comp_ts.T, eimum, t2s, t2sG, tes, combmode, ref_img, mmixN=vTmixN, full_sel=False, label='mepca_', verbose=verbose) # varex_norm overrides normalized varex computed by fitmodels_direct ct_df['normalized variance explained'] = varex_norm pcastate = { 'method': method, 'voxel_comp_weights': voxel_comp_weights, 'varex': varex, 'comp_ts': comp_ts, 'comptable': ct_df } # Save state LGR.info('Saving PCA results to: {}'.format(fname)) try: with open(fname, 'wb') as handle: pickle.dump(pcastate, handle) except TypeError: LGR.warning('Could not save PCA solution') else: # if loading existing state voxel_comp_weights = pcastate['voxel_comp_weights'] varex = pcastate['varex'] comp_ts = pcastate['comp_ts'] ct_df = pcastate['comptable'] np.savetxt('mepca_mix.1D', comp_ts.T) # write component maps to 4D image comp_maps = np.zeros((OCcatd.shape[0], comp_ts.shape[0])) for i_comp in range(comp_ts.shape[0]): temp_comp_ts = comp_ts[i_comp, :][:, None] comp_map = utils.unmask( model.computefeats2(OCcatd, temp_comp_ts, mask), mask) comp_maps[:, i_comp] = np.squeeze(comp_map) io.filewrite(comp_maps, 'mepca_OC_components.nii', ref_img) # Add new columns to comptable for classification ct_df['classification'] = 'accepted' ct_df['rationale'] = '' # Select components using decision tree if method == 'kundu': ct_df = kundu_tedpca(ct_df, n_echos, kdaw, rdaw, stabilize=False) elif method == 'kundu-stabilize': ct_df = kundu_tedpca(ct_df, n_echos, kdaw, rdaw, stabilize=True) elif method == 'mle': LGR.info('Selected {0} components with MLE dimensionality ' 'detection'.format(ct_df.shape[0])) ct_df.to_csv('comp_table_pca.txt', sep='\t', index=True, index_label='component', float_format='%.6f') sel_idx = ct_df['classification'] == 'accepted' n_components = np.sum(sel_idx) voxel_kept_comp_weighted = (voxel_comp_weights[:, sel_idx] * varex[None, sel_idx]) kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[sel_idx, :]) if wvpca: kept_data = idwtmat(kept_data, cAl) kept_data = stats.zscore(kept_data, axis=1) # variance normalize time series kept_data = stats.zscore(kept_data, axis=None) # variance normalize everything return n_components, kept_data
def tedana_workflow(data, tes, mask=None, mixm=None, ctab=None, manacc=None, tedort=False, gscontrol=None, tedpca='mle', ste=-1, combmode='t2s', verbose=False, stabilize=False, wvpca=False, out_dir='.', fixed_seed=42, debug=False, quiet=False): """ Run the "canonical" TE-Dependent ANAlysis workflow. Parameters ---------- data : :obj:`str` or :obj:`list` of :obj:`str` Either a single z-concatenated file (single-entry list or str) or a list of echo-specific files, in ascending order. tes : :obj:`list` List of echo times associated with data in milliseconds. mask : :obj:`str`, optional Binary mask of voxels to include in TE Dependent ANAlysis. Must be spatially aligned with `data`. mixm : :obj:`str`, optional File containing mixing matrix. If not provided, ME-PCA and ME-ICA are done. ctab : :obj:`str`, optional File containing component table from which to extract pre-computed classifications. manacc : :obj:`str`, optional Comma separated list of manually accepted components in string form. Default is None. tedort : :obj:`bool`, optional Orthogonalize rejected components w.r.t. accepted ones prior to denoising. Default is False. gscontrol : {None, 't1c', 'gsr'} or :obj:`list`, optional Perform additional denoising to remove spatially diffuse noise. Default is None. tedpca : {'mle', 'kundu', 'kundu-stabilize'}, optional Method with which to select components in TEDPCA. Default is 'mle'. ste : :obj:`int`, optional Source TEs for models. 0 for all, -1 for optimal combination. Default is -1. combmode : {'t2s', 'ste'}, optional Combination scheme for TEs: 't2s' (Posse 1999, default), 'ste' (Poser). verbose : :obj:`bool`, optional Generate intermediate and additional files. Default is False. wvpca : :obj:`bool`, optional Whether or not to perform PCA on wavelet-transformed data. Default is False. out_dir : :obj:`str`, optional Output directory. Other Parameters ---------------- fixed_seed : :obj:`int`, optional Value passed to ``mdp.numx_rand.seed()``. Set to a positive integer value for reproducible ICA results; otherwise, set to -1 for varying results across calls. debug : :obj:`bool`, optional Whether to run in debugging mode or not. Default is False. quiet : :obj:`bool`, optional If True, suppresses logging/printing of messages. Default is False. Notes ----- This workflow writes out several files. For a complete list of the files generated by this workflow, please visit https://tedana.readthedocs.io/en/latest/outputs.html """ out_dir = op.abspath(out_dir) if not op.isdir(out_dir): os.mkdir(out_dir) if debug and not quiet: formatter = logging.Formatter( '%(asctime)s\t%(name)-12s\t%(levelname)-8s\t%(message)s', datefmt='%Y-%m-%dT%H:%M:%S') fh = logging.FileHandler( op.join( out_dir, 'runlog-{0}.tsv'.format(datetime.now().isoformat().replace( ':', '.')))) fh.setFormatter(formatter) logging.basicConfig(level=logging.DEBUG, handlers=[fh, logging.StreamHandler()]) elif quiet: logging.basicConfig(level=logging.WARNING) else: logging.basicConfig(level=logging.INFO) LGR.info('Using output directory: {}'.format(out_dir)) # ensure tes are in appropriate format tes = [float(te) for te in tes] n_echos = len(tes) # Coerce gscontrol to list if not isinstance(gscontrol, list): gscontrol = [gscontrol] # coerce data to samples x echos x time array if isinstance(data, str): data = [data] LGR.info('Loading input data: {}'.format([f for f in data])) catd, ref_img = io.load_data(data, n_echos=n_echos) n_samp, n_echos, n_vols = catd.shape LGR.debug('Resulting data shape: {}'.format(catd.shape)) if mixm is not None and op.isfile(mixm): shutil.copyfile(mixm, op.join(out_dir, 'meica_mix.1D')) shutil.copyfile(mixm, op.join(out_dir, op.basename(mixm))) elif mixm is not None: raise IOError('Argument "mixm" must be an existing file.') if ctab is not None and op.isfile(ctab): shutil.copyfile(ctab, op.join(out_dir, 'comp_table_ica.txt')) shutil.copyfile(ctab, op.join(out_dir, op.basename(ctab))) elif ctab is not None: raise IOError('Argument "ctab" must be an existing file.') if mask is None: LGR.info('Computing adaptive mask') else: # TODO: add affine check LGR.info('Using user-defined mask') mask, masksum = utils.make_adaptive_mask(catd, mask=mask, minimum=False, getsum=True) LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp)) if verbose: io.filewrite(masksum, op.join(out_dir, 'adaptive_mask.nii'), ref_img) os.chdir(out_dir) LGR.info('Computing T2* map') t2s, s0, t2ss, s0s, t2sG, s0G = decay.fit_decay(catd, tes, mask, masksum) # set a hard cap for the T2* map # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile cap_t2s = stats.scoreatpercentile(t2s.flatten(), 99.5, interpolation_method='lower') LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10)) t2s[t2s > cap_t2s * 10] = cap_t2s io.filewrite(t2s, op.join(out_dir, 't2sv.nii'), ref_img) io.filewrite(s0, op.join(out_dir, 's0v.nii'), ref_img) if verbose: io.filewrite(t2ss, op.join(out_dir, 't2ss.nii'), ref_img) io.filewrite(s0s, op.join(out_dir, 's0vs.nii'), ref_img) io.filewrite(t2sG, op.join(out_dir, 't2svG.nii'), ref_img) io.filewrite(s0G, op.join(out_dir, 's0vG.nii'), ref_img) # optimally combine data data_oc = combine.make_optcom(catd, tes, mask, t2s=t2sG, combmode=combmode) # regress out global signal unless explicitly not desired if 'gsr' in gscontrol: catd, data_oc = model.gscontrol_raw(catd, data_oc, n_echos, ref_img) if mixm is None: # Identify and remove thermal noise from data n_components, dd = decomposition.tedpca(catd, data_oc, combmode, mask, t2s, t2sG, ref_img, tes=tes, method=tedpca, ste=ste, kdaw=10., rdaw=1., wvpca=wvpca, verbose=verbose) mmix_orig, fixed_seed = decomposition.tedica(n_components, dd, fixed_seed) if verbose: np.savetxt(op.join(out_dir, '__meica_mix.1D'), mmix_orig) if ste == -1: io.filewrite(utils.unmask(dd, mask), op.join(out_dir, 'ts_OC_whitened.nii'), ref_img) LGR.info('Making second component selection guess from ICA results') # Estimate betas and compute selection metrics for mixing matrix # generated from dimensionally reduced data using full data (i.e., data # with thermal noise) seldict, comptable, betas, mmix = model.fitmodels_direct( catd, mmix_orig, mask, t2s, t2sG, tes, combmode, ref_img, reindex=True, label='meica_', out_dir=out_dir, verbose=verbose) np.savetxt(op.join(out_dir, 'meica_mix.1D'), mmix) comptable = selection.selcomps(seldict, comptable, mmix, manacc, n_echos) else: LGR.info('Using supplied mixing matrix from ICA') mmix_orig = np.loadtxt(op.join(out_dir, 'meica_mix.1D')) seldict, comptable, betas, mmix = model.fitmodels_direct( catd, mmix_orig, mask, t2s, t2sG, tes, combmode, ref_img, label='meica_', out_dir=out_dir, verbose=verbose) if ctab is None: comptable = selection.selcomps(seldict, comptable, mmix, manacc, n_echos) else: comptable = pd.read_csv(ctab, sep='\t', index_col='component') comptable.to_csv(op.join(out_dir, 'comp_table_ica.txt'), sep='\t', index=True, index_label='component', float_format='%.6f') if 'component' not in comptable.columns: comptable['component'] = comptable.index acc = comptable.loc[comptable['classification'] == 'accepted', 'component'] rej = comptable.loc[comptable['classification'] == 'rejected', 'component'] midk = comptable.loc[comptable['classification'] == 'midk', 'component'] ign = comptable.loc[comptable['classification'] == 'ignored', 'component'] if len(acc) == 0: LGR.warning('No BOLD components detected! Please check data and ' 'results!') if tedort: acc_idx = comptable.loc[ ~comptable['classification'].str.contains('rejected'), 'component'] rej_idx = comptable.loc[ comptable['classification'].str.contains('rejected'), 'component'] acc_ts = mmix[:, acc_idx] rej_ts = mmix[:, rej_idx] betas = np.linalg.lstsq(acc_ts, rej_ts, rcond=None)[0] pred_rej_ts = np.dot(acc_ts, betas) resid = rej_ts - pred_rej_ts mmix[:, rej_idx] = resid np.savetxt(op.join(out_dir, 'meica_mix_orth.1D'), mmix) io.writeresults(data_oc, mask=mask, comptable=comptable, mmix=mmix, n_vols=n_vols, fixed_seed=fixed_seed, acc=acc, rej=rej, midk=midk, empty=ign, ref_img=ref_img) if 't1c' in gscontrol: LGR.info('Performing T1c global signal regression to remove spatially ' 'diffuse noise') io.gscontrol_mmix(data_oc, mmix, mask, comptable, ref_img) if verbose: io.writeresults_echoes(catd, mmix, mask, acc, rej, midk, ref_img) LGR.info('Workflow completed') for handler in logging.root.handlers[:]: logging.root.removeHandler(handler)
def tedpca(catd, OCcatd, combmode, mask, t2s, t2sG, stabilize, ref_img, tes, kdaw, rdaw, ste=0, mlepca=True, wvpca=False): """ Use principal components analysis (PCA) to identify and remove thermal noise from multi-echo data. Parameters ---------- catd : (S x E x T) array_like Input functional data OCcatd : (S x T) array_like Optimally-combined time series data combmode : {'t2s', 'ste'} str How optimal combination of echos should be made, where 't2s' indicates using the method of Posse 1999 and 'ste' indicates using the method of Poser 2006 mask : (S,) array_like Boolean mask array stabilize : :obj:`bool` Whether to attempt to stabilize convergence of ICA by returning dimensionally-reduced data from PCA and component selection. ref_img : :obj:`str` or img_like Reference image to dictate how outputs are saved to disk tes : :obj:`list` List of echo times associated with `catd`, in milliseconds kdaw : :obj:`float` Dimensionality augmentation weight for Kappa calculations rdaw : :obj:`float` Dimensionality augmentation weight for Rho calculations ste : :obj:`int` or :obj:`list` of :obj:`int`, optional Which echos to use in PCA. Values -1 and 0 are special, where a value of -1 will indicate using all the echos and 0 will indicate using the optimal combination of the echos. A list can be provided to indicate a subset of echos. Default: 0 mlepca : :obj:`bool`, optional Whether to use the method originally explained in Minka, NIPS 2000 for guessing PCA dimensionality instead of a traditional SVD. Default: True wvpca : :obj:`bool`, optional Whether to apply wavelet denoising to data. Default: False Returns ------- n_components : :obj:`int` Number of components retained from PCA decomposition dd : (S x E x T) :obj:`numpy.ndarray` Dimensionally-reduced functional data Notes ----- ====================== ================================================= Notation Meaning ====================== ================================================= :math:`\\kappa` Component pseudo-F statistic for TE-dependent (BOLD) model. :math:`\\rho` Component pseudo-F statistic for TE-independent (artifact) model. :math:`v` Voxel :math:`V` Total number of voxels in mask :math:`\\zeta` Something :math:`c` Component :math:`p` Something else ====================== ================================================= Steps: 1. Variance normalize either multi-echo or optimally combined data, depending on settings. 2. Decompose normalized data using PCA or SVD. 3. Compute :math:`{\\kappa}` and :math:`{\\rho}`: .. math:: {\\kappa}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,R_2^*}}{\sum {\\zeta}_{c,v}^p} {\\rho}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,S_0}}{\sum {\\zeta}_{c,v}^p} 4. Some other stuff. Something about elbows. 5. Classify components as thermal noise if they meet both of the following criteria: - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`. - Nonsignificant variance explained. Outputs: This function writes out several files: ====================== ================================================= Filename Content ====================== ================================================= pcastate.pkl Values from PCA results. comp_table_pca.txt PCA component table. mepca_mix.1D PCA mixing matrix. ====================== ================================================= """ n_samp, n_echos, n_vols = catd.shape ste = np.array([int(ee) for ee in str(ste).split(',')]) if len(ste) == 1 and ste[0] == -1: LGR.info('Computing PCA of optimally combined multi-echo data') d = OCcatd[utils.make_min_mask(OCcatd[:, np.newaxis, :])][:, np.newaxis, :] elif len(ste) == 1 and ste[0] == 0: LGR.info('Computing PCA of spatially concatenated multi-echo data') d = catd[mask].astype('float64') else: LGR.info('Computing PCA of echo #%s' % ','.join([str(ee) for ee in ste])) d = np.stack([catd[mask, ee] for ee in ste - 1], axis=1).astype('float64') eim = np.squeeze(eimask(d)) d = np.squeeze(d[eim]) dz = ((d.T - d.T.mean(axis=0)) / d.T.std(axis=0)).T # var normalize ts dz = (dz - dz.mean()) / dz.std() # var normalize everything if wvpca: dz, cAl = dwtmat(dz) if not op.exists('pcastate.pkl'): # do PC dimension selection and get eigenvalue cutoff if mlepca: from sklearn.decomposition import PCA ppca = PCA(n_components='mle', svd_solver='full') ppca.fit(dz) v = ppca.components_ s = ppca.explained_variance_ u = np.dot(np.dot(dz, v.T), np.diag(1. / s)) else: u, s, v = np.linalg.svd(dz, full_matrices=0) # actual variance explained (normalized) sp = s / s.sum() eigelb = getelbow_mod(sp, return_val=True) spdif = np.abs(np.diff(sp)) spdifh = spdif[(len(spdif) // 2):] spdthr = np.mean([spdifh.max(), spdif.min()]) spmin = sp[(len(spdif) // 2) + np.arange(len(spdifh))[spdifh >= spdthr][0] + 1] spcum = np.cumsum(sp) # Compute K and Rho for PCA comps eimum = np.atleast_2d(eim) eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1]) eimum = eimum.prod(axis=1) o = np.zeros((mask.shape[0], *eimum.shape[1:])) o[mask] = eimum eimum = np.squeeze(o).astype(bool) vTmix = v.T vTmixN = ((vTmix.T - vTmix.T.mean(0)) / vTmix.T.std(0)).T LGR.info('Making initial component selection guess from PCA results') _, ctb, betasv, v_T = model.fitmodels_direct(catd, v.T, eimum, t2s, t2sG, tes, combmode, ref_img, mmixN=vTmixN, full_sel=False) ctb = ctb[ctb[:, 0].argsort(), :] ctb = np.vstack([ctb.T[:3], sp]).T # Save state fname = op.abspath('pcastate.pkl') LGR.info('Saving PCA results to: {}'.format(fname)) pcastate = { 'u': u, 's': s, 'v': v, 'ctb': ctb, 'eigelb': eigelb, 'spmin': spmin, 'spcum': spcum } try: with open(fname, 'wb') as handle: pickle.dump(pcastate, handle) except TypeError: LGR.warning('Could not save PCA solution') else: # if loading existing state LGR.info('Loading PCA from: pcastate.pkl') with open('pcastate.pkl', 'rb') as handle: pcastate = pickle.load(handle) u, s, v = pcastate['u'], pcastate['s'], pcastate['v'] ctb, eigelb = pcastate['ctb'], pcastate['eigelb'] spmin, spcum = pcastate['spmin'], pcastate['spcum'] np.savetxt('comp_table_pca.txt', ctb[ctb[:, 1].argsort(), :][::-1]) np.savetxt('mepca_mix.1D', v[ctb[:, 1].argsort()[::-1], :].T) kappas = ctb[ctb[:, 1].argsort(), 1] rhos = ctb[ctb[:, 2].argsort(), 2] fmin, fmid, fmax = utils.getfbounds(n_echos) kappa_thr = np.average(sorted( [fmin, getelbow_mod(kappas, return_val=True) / 2, fmid]), weights=[kdaw, 1, 1]) rho_thr = np.average(sorted( [fmin, getelbow_cons(rhos, return_val=True) / 2, fmid]), weights=[rdaw, 1, 1]) if int(kdaw) == -1: kappas_lim = kappas[utils.andb([kappas < fmid, kappas > fmin]) == 2] kappa_thr = kappas_lim[getelbow_mod(kappas_lim)] rhos_lim = rhos[utils.andb([rhos < fmid, rhos > fmin]) == 2] rho_thr = rhos_lim[getelbow_mod(rhos_lim)] stabilize = True if int(kdaw) != -1 and int(rdaw) == -1: rhos_lim = rhos[utils.andb([rhos < fmid, rhos > fmin]) == 2] rho_thr = rhos_lim[getelbow_mod(rhos_lim)] is_hik = np.array(ctb[:, 1] > kappa_thr, dtype=np.int) is_hir = np.array(ctb[:, 2] > rho_thr, dtype=np.int) is_hie = np.array(ctb[:, 3] > eigelb, dtype=np.int) is_his = np.array(ctb[:, 3] > spmin, dtype=np.int) is_not_fmax1 = np.array(ctb[:, 1] != F_MAX, dtype=np.int) is_not_fmax2 = np.array(ctb[:, 2] != F_MAX, dtype=np.int) pcscore = (is_hik + is_hir + is_hie) * is_his * is_not_fmax1 * is_not_fmax2 if stabilize: temp7 = np.array(spcum < 0.95, dtype=np.int) temp8 = np.array(ctb[:, 2] > fmin, dtype=np.int) temp9 = np.array(ctb[:, 1] > fmin, dtype=np.int) pcscore = pcscore * temp7 * temp8 * temp9 pcsel = pcscore > 0 dd = u.dot(np.diag(s * np.array(pcsel, dtype=np.int))).dot(v) if wvpca: dd = idwtmat(dd, cAl) n_components = s[pcsel].shape[0] LGR.info('Selected {0} components with Kappa threshold: {1:.02f}, ' 'Rho threshold: {2:.02f}'.format(n_components, kappa_thr, rho_thr)) dd = stats.zscore(dd.T, axis=0).T # variance normalize timeseries dd = stats.zscore(dd, axis=None) # variance normalize everything return n_components, dd
def tedpca(data_cat, data_oc, combmode, mask, t2s, t2sG, ref_img, tes, method='mle', source_tes=-1, kdaw=10., rdaw=1., out_dir='.', verbose=False): """ Use principal components analysis (PCA) to identify and remove thermal noise from multi-echo data. Parameters ---------- data_cat : (S x E x T) array_like Input functional data data_oc : (S x T) array_like Optimally combined time series data combmode : {'t2s', 'paid'} str How optimal combination of echos should be made, where 't2s' indicates using the method of Posse 1999 and 'paid' indicates using the method of Poser 2006 mask : (S,) array_like Boolean mask array t2s : (S,) array_like Map of voxel-wise T2* estimates. t2sG : (S,) array_like Map of voxel-wise T2* estimates. ref_img : :obj:`str` or img_like Reference image to dictate how outputs are saved to disk tes : :obj:`list` List of echo times associated with `data_cat`, in milliseconds method : {'mle', 'kundu', 'kundu-stabilize'}, optional Method with which to select components in TEDPCA. Default is 'mle'. source_tes : :obj:`int` or :obj:`list` of :obj:`int`, optional Which echos to use in PCA. Values -1 and 0 are special, where a value of -1 will indicate using the optimal combination of the echos and 0 will indicate using all the echos. A list can be provided to indicate a subset of echos. Default: -1 kdaw : :obj:`float`, optional Dimensionality augmentation weight for Kappa calculations. Must be a non-negative float, or -1 (a special value). Default is 10. rdaw : :obj:`float`, optional Dimensionality augmentation weight for Rho calculations. Must be a non-negative float, or -1 (a special value). Default is 1. out_dir : :obj:`str`, optional Output directory. verbose : :obj:`bool`, optional Whether to output files from fitmodels_direct or not. Default: False Returns ------- kept_data : (S x T) :obj:`numpy.ndarray` Dimensionally reduced optimally combined functional data n_components : :obj:`int` Number of components retained from PCA decomposition Notes ----- ====================== ================================================= Notation Meaning ====================== ================================================= :math:`\\kappa` Component pseudo-F statistic for TE-dependent (BOLD) model. :math:`\\rho` Component pseudo-F statistic for TE-independent (artifact) model. :math:`v` Voxel :math:`V` Total number of voxels in mask :math:`\\zeta` Something :math:`c` Component :math:`p` Something else ====================== ================================================= Steps: 1. Variance normalize either multi-echo or optimally combined data, depending on settings. 2. Decompose normalized data using PCA or SVD. 3. Compute :math:`{\\kappa}` and :math:`{\\rho}`: .. math:: {\\kappa}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,R_2^*}}{\sum {\\zeta}_{c,v}^p} {\\rho}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,S_0}}{\sum {\\zeta}_{c,v}^p} 4. Some other stuff. Something about elbows. 5. Classify components as thermal noise if they meet both of the following criteria: - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`. - Nonsignificant variance explained. Outputs: This function writes out several files: ====================== ================================================= Filename Content ====================== ================================================= pcastate.pkl Values from PCA results. comp_table_pca.txt PCA component table. mepca_mix.1D PCA mixing matrix. ====================== ================================================= """ n_samp, n_echos, n_vols = data_cat.shape source_tes = np.array([int(ee) for ee in str(source_tes).split(',')]) if len(source_tes) == 1 and source_tes[0] == -1: LGR.info('Computing PCA of optimally combined multi-echo data') data = data_oc[mask, :][:, np.newaxis, :] elif len(source_tes) == 1 and source_tes[0] == 0: LGR.info('Computing PCA of spatially concatenated multi-echo data') data = data_cat[mask, ...] else: LGR.info('Computing PCA of echo #{0}'.format(','.join( [str(ee) for ee in source_tes]))) data = np.stack([data_cat[mask, ee, :] for ee in source_tes - 1], axis=1) eim = np.squeeze(eimask(data)) data = np.squeeze(data[eim]) data_z = ((data.T - data.T.mean(axis=0)) / data.T.std(axis=0)).T # var normalize ts data_z = (data_z - data_z.mean()) / data_z.std() # var normalize everything if method == 'mle': voxel_comp_weights, varex, comp_ts = run_mlepca(data_z) else: ppca = PCA() ppca.fit(data_z) comp_ts = ppca.components_.T varex = ppca.explained_variance_ voxel_comp_weights = np.dot(np.dot(data_z, comp_ts), np.diag(1. / varex)) # actual variance explained (normalized) varex_norm = varex / varex.sum() # Compute Kappa and Rho for PCA comps eimum = np.atleast_2d(eim) eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1]) eimum = eimum.prod(axis=1) o = np.zeros((mask.shape[0], *eimum.shape[1:])) o[mask, ...] = eimum eimum = np.squeeze(o).astype(bool) # Normalize each component's time series vTmixN = stats.zscore(comp_ts, axis=0) _, comptable, _, _ = model.fitmodels_direct(data_cat, comp_ts, eimum, t2s, t2sG, tes, combmode, ref_img, reindex=False, mmixN=vTmixN, full_sel=False, label='mepca_', out_dir=out_dir, verbose=verbose) # varex_norm from PCA retained on top of varex from fitmodels_direct comptable['original normalized variance explained'] = varex_norm np.savetxt('mepca_mix.1D', comp_ts) # write component maps to 4D image comp_maps = np.zeros((data_oc.shape[0], comp_ts.shape[1])) for i_comp in range(comp_ts.shape[1]): temp_comp_ts = comp_ts[:, i_comp][:, None] comp_map = utils.unmask( model.computefeats2(data_oc, temp_comp_ts, mask), mask) comp_maps[:, i_comp] = np.squeeze(comp_map) io.filewrite(comp_maps, 'mepca_OC_components.nii', ref_img) # Select components using decision tree if method == 'kundu': comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=False) elif method == 'kundu-stabilize': comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=True) elif method == 'mle': LGR.info('Selected {0} components with MLE dimensionality ' 'detection'.format(comptable.shape[0])) comptable['classification'] = 'accepted' comptable['rationale'] = '' comptable.to_csv('comp_table_pca.txt', sep='\t', index=True, index_label='component', float_format='%.6f') acc = comptable[comptable.classification == 'accepted'].index.values n_components = acc.size voxel_kept_comp_weighted = (voxel_comp_weights[:, acc] * varex[None, acc]) kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[:, acc].T) kept_data = stats.zscore(kept_data, axis=1) # variance normalize time series kept_data = stats.zscore(kept_data, axis=None) # variance normalize everything return kept_data, n_components
def tedana_workflow(data, tes, mask=None, mixm=None, ctab=None, manacc=None, strict=False, gscontrol=True, kdaw=10., rdaw=1., conv=2.5e-5, ste=-1, combmode='t2s', dne=False, initcost='tanh', finalcost='tanh', stabilize=False, filecsdata=False, wvpca=False, label=None, fixed_seed=42, debug=False, quiet=False): """ Run the "canonical" TE-Dependent ANAlysis workflow. Parameters ---------- data : :obj:`str` or :obj:`list` of :obj:`str` Either a single z-concatenated file (single-entry list or str) or a list of echo-specific files, in ascending order. tes : :obj:`list` List of echo times associated with data in milliseconds. mask : :obj:`str`, optional Binary mask of voxels to include in TE Dependent ANAlysis. Must be spatially aligned with `data`. mixm : :obj:`str`, optional File containing mixing matrix. If not provided, ME-PCA and ME-ICA are done. ctab : :obj:`str`, optional File containing component table from which to extract pre-computed classifications. manacc : :obj:`str`, optional Comma separated list of manually accepted components in string form. Default is None. strict : :obj:`bool`, optional Ignore low-variance ambiguous components. Default is False. gscontrol : :obj:`bool`, optional Control global signal using spatial approach. Default is True. kdaw : :obj:`float`, optional Dimensionality augmentation weight (Kappa). Default is 10. -1 for low-dimensional ICA. rdaw : :obj:`float`, optional Dimensionality augmentation weight (Rho). Default is 1. -1 for low-dimensional ICA. conv : :obj:`float`, optional Convergence limit. Default is 2.5e-5. ste : :obj:`int`, optional Source TEs for models. 0 for all, -1 for optimal combination. Default is -1. combmode : {'t2s', 'ste'}, optional Combination scheme for TEs: 't2s' (Posse 1999, default), 'ste' (Poser). dne : :obj:`bool`, optional Denoise each TE dataset separately. Default is False. initcost : {'tanh', 'pow3', 'gaus', 'skew'}, optional Initial cost function for ICA. Default is 'tanh'. finalcost : {'tanh', 'pow3', 'gaus', 'skew'}, optional Final cost function. Default is 'tanh'. stabilize : :obj:`bool`, optional Stabilize convergence by reducing dimensionality, for low quality data. Default is False. filecsdata : :obj:`bool`, optional Save component selection data to file. Default is False. wvpca : :obj:`bool`, optional Whether or not to perform PCA on wavelet-transformed data. Default is False. label : :obj:`str` or :obj:`None`, optional Label for output directory. Default is None. Other Parameters ---------------- fixed_seed : :obj:`int`, optional Value passed to ``mdp.numx_rand.seed()``. Set to a positive integer value for reproducible ICA results; otherwise, set to -1 for varying results across calls. debug : :obj:`bool`, optional Whether to run in debugging mode or not. Default is False. quiet : :obj:`bool`, optional If True, suppresses logging/printing of messages. Default is False. Notes ----- PROCEDURE 2 : Computes ME-PCA and ME-ICA - Computes T2* map - Computes PCA of concatenated ME data, then computes TE-dependence of PCs - Computes ICA of TE-dependence PCs - Identifies TE-dependent ICs, outputs high-\kappa (BOLD) component and denoised time series or computes TE-dependence of each component of a general linear model specified by input (includes MELODIC FastICA mixing matrix) PROCEDURE 2a: Model fitting and component selection routines This workflow writes out several files, which are written out to a folder named TED.[ref_label].[label] if ``label`` is provided and TED.[ref_label] if not. ``ref_label`` is determined based on the name of the first ``data`` file. Files are listed below: ====================== ================================================= Filename Content ====================== ================================================= t2sv.nii Limited estimated T2* 3D map. The difference between the limited and full maps is that, for voxels affected by dropout where only one echo contains good data, the full map uses the single echo's value while the limited map has a NaN. s0v.nii Limited S0 3D map. The difference between the limited and full maps is that, for voxels affected by dropout where only one echo contains good data, the full map uses the single echo's value while the limited map has a NaN. t2ss.nii ??? s0vs.nii ??? t2svG.nii Full T2* map/timeseries. The difference between the limited and full maps is that, for voxels affected by dropout where only one echo contains good data, the full map uses the single echo's value while the limited map has a NaN. s0vG.nii Full S0 map/timeseries. __meica_mix.1D A mixing matrix meica_mix.1D Another mixing matrix ts_OC.nii Optimally combined timeseries. betas_OC.nii Full ICA coefficient feature set. betas_hik_OC.nii Denoised ICA coefficient feature set feats_OC2.nii Z-normalized spatial component maps comp_table.txt Component table sphis_hik.nii T1-like effect hik_ts_OC_T1c.nii T1 corrected time series by regression dn_ts_OC_T1c.nii ME-DN version of T1 corrected time series betas_hik_OC_T1c.nii T1-GS corrected components meica_mix_T1c.1D T1-GS corrected mixing matrix ====================== ================================================= If ``dne`` is set to True: ====================== ================================================= Filename Content ====================== ================================================= hik_ts_e[echo].nii High-Kappa timeseries for echo number ``echo`` midk_ts_e[echo].nii Mid-Kappa timeseries for echo number ``echo`` lowk_ts_e[echo].nii Low-Kappa timeseries for echo number ``echo`` dn_ts_e[echo].nii Denoised timeseries for echo number ``echo`` ====================== ================================================= """ # ensure tes are in appropriate format tes = [float(te) for te in tes] n_echos = len(tes) # coerce data to samples x echos x time array if isinstance(data, str): data = [data] LGR.info('Loading input data: {}'.format([f for f in data])) catd, ref_img = utils.load_data(data, n_echos=n_echos) n_samp, n_echos, n_vols = catd.shape LGR.debug('Resulting data shape: {}'.format(catd.shape)) kdaw, rdaw = float(kdaw), float(rdaw) try: ref_label = op.basename(ref_img).split('.')[0] except TypeError: ref_label = op.basename(str(data[0])).split('.')[0] if label is not None: out_dir = 'TED.{0}.{1}'.format(ref_label, label) else: out_dir = 'TED.{0}'.format(ref_label) out_dir = op.abspath(out_dir) if not op.isdir(out_dir): LGR.info('Creating output directory: {}'.format(out_dir)) os.mkdir(out_dir) else: LGR.info('Using output directory: {}'.format(out_dir)) if mixm is not None and op.isfile(mixm): shutil.copyfile(mixm, op.join(out_dir, 'meica_mix.1D')) shutil.copyfile(mixm, op.join(out_dir, op.basename(mixm))) elif mixm is not None: raise IOError('Argument "mixm" must be an existing file.') if ctab is not None and op.isfile(ctab): shutil.copyfile(ctab, op.join(out_dir, 'comp_table.txt')) shutil.copyfile(ctab, op.join(out_dir, op.basename(ctab))) elif ctab is not None: raise IOError('Argument "ctab" must be an existing file.') os.chdir(out_dir) if mask is None: LGR.info('Computing adaptive mask') else: # TODO: add affine check LGR.info('Using user-defined mask') mask, masksum = utils.make_adaptive_mask(catd, mask=mask, minimum=False, getsum=True) LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp)) LGR.info('Computing T2* map') t2s, s0, t2ss, s0s, t2sG, s0G = model.fit_decay(catd, tes, mask, masksum) # set a hard cap for the T2* map # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile cap_t2s = stats.scoreatpercentile(t2s.flatten(), 99.5, interpolation_method='lower') LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10)) t2s[t2s > cap_t2s * 10] = cap_t2s utils.filewrite(t2s, op.join(out_dir, 't2sv.nii'), ref_img) utils.filewrite(s0, op.join(out_dir, 's0v.nii'), ref_img) utils.filewrite(t2ss, op.join(out_dir, 't2ss.nii'), ref_img) utils.filewrite(s0s, op.join(out_dir, 's0vs.nii'), ref_img) utils.filewrite(t2sG, op.join(out_dir, 't2svG.nii'), ref_img) utils.filewrite(s0G, op.join(out_dir, 's0vG.nii'), ref_img) # optimally combine data OCcatd = model.make_optcom(catd, tes, mask, t2s=t2sG, combmode=combmode) # regress out global signal unless explicitly not desired if gscontrol: catd, OCcatd = model.gscontrol_raw(catd, OCcatd, n_echos, ref_img) if mixm is None: n_components, dd = decomposition.tedpca(catd, OCcatd, combmode, mask, t2s, t2sG, stabilize, ref_img, tes=tes, kdaw=kdaw, rdaw=rdaw, ste=ste, wvpca=wvpca) mmix_orig, fixed_seed = decomposition.tedica(n_components, dd, conv, fixed_seed, cost=initcost, final_cost=finalcost, verbose=debug) np.savetxt(op.join(out_dir, '__meica_mix.1D'), mmix_orig) LGR.info('Making second component selection guess from ICA results') seldict, comptable, betas, mmix = model.fitmodels_direct(catd, mmix_orig, mask, t2s, t2sG, tes, combmode, ref_img, reindex=True) np.savetxt(op.join(out_dir, 'meica_mix.1D'), mmix) acc, rej, midk, empty = selection.selcomps(seldict, mmix, mask, ref_img, manacc, n_echos, t2s, s0, strict_mode=strict, filecsdata=filecsdata) else: LGR.info('Using supplied mixing matrix from ICA') mmix_orig = np.loadtxt(op.join(out_dir, 'meica_mix.1D')) seldict, comptable, betas, mmix = model.fitmodels_direct(catd, mmix_orig, mask, t2s, t2sG, tes, combmode, ref_img) if ctab is None: acc, rej, midk, empty = selection.selcomps(seldict, mmix, mask, ref_img, manacc, n_echos, t2s, s0, filecsdata=filecsdata, strict_mode=strict) else: acc, rej, midk, empty = utils.ctabsel(ctab) if len(acc) == 0: LGR.warning('No BOLD components detected! Please check data and ' 'results!') utils.writeresults(OCcatd, mask, comptable, mmix, fixed_seed, n_vols, acc, rej, midk, empty, ref_img) utils.gscontrol_mmix(OCcatd, mmix, mask, acc, ref_img) if dne: utils.writeresults_echoes(catd, mmix, mask, acc, rej, midk, ref_img)