def fit(data, mask, tes, masksum, start_echo): """ Fit voxel- and timepoint-wise monoexponential decay models to estimate T2* and S0 timeseries. """ nx, ny, nz, n_echoes, n_trs = data.shape echodata = fmask(data, mask) tes = np.array(tes) t2sa_ts = np.zeros([nx, ny, nz, n_trs]) s0va_ts = np.zeros([nx, ny, nz, n_trs]) t2saf_ts = np.zeros([nx, ny, nz, n_trs]) s0vaf_ts = np.zeros([nx, ny, nz, n_trs]) for vol in range(echodata.shape[-1]): t2ss = np.zeros([nx, ny, nz, n_echoes - 1]) s0vs = t2ss.copy() # Fit monoexponential decay first for first echo only, # then first two echoes, etc. for i_echo in range(start_echo, n_echoes + 1): B = np.abs(echodata[:, :i_echo, vol]) + 1 B = np.log(B).transpose() neg_tes = -1 * tes[:i_echo] # First row is constant, second is TEs for decay curve # Independent variables for least-squares model x = np.array([np.ones(i_echo), neg_tes]) X = np.sort(x)[:, ::-1].transpose() beta, _, _, _ = np.linalg.lstsq(X, B) t2s = 1. / beta[1, :].transpose() s0 = np.exp(beta[0, :]).transpose() t2s[np.isinf(t2s)] = 500. s0[np.isnan(s0)] = 0. t2ss[:, :, :, i_echo - 2] = np.squeeze(unmask(t2s, mask)) s0vs[:, :, :, i_echo - 2] = np.squeeze(unmask(s0, mask)) # Limited T2* and S0 maps fl = np.zeros([nx, ny, nz, len(tes) - 1], bool) for i_echo in range(n_echoes - 1): fl_ = np.squeeze(fl[:, :, :, i_echo]) fl_[masksum == i_echo + 2] = True fl[:, :, :, i_echo] = fl_ t2sa = np.squeeze(unmask(t2ss[fl], masksum > 1)) s0va = np.squeeze(unmask(s0vs[fl], masksum > 1)) # Full T2* maps with S0 estimation errors t2saf = t2sa.copy() s0vaf = s0va.copy() t2saf[masksum == 1] = t2ss[masksum == 1, 0] s0vaf[masksum == 1] = s0vs[masksum == 1, 0] t2sa_ts[:, :, :, vol] = t2sa s0va_ts[:, :, :, vol] = s0va t2saf_ts[:, :, :, vol] = t2saf s0vaf_ts[:, :, :, vol] = s0vaf return t2sa_ts, s0va_ts, t2saf_ts, s0vaf_ts
def write_split_ts(data, mmix, mask, acc, rej, midk, ref_img, suffix=''): """ Splits `data` into denoised / noise / ignored time series and saves to disk Parameters ---------- data : (S x T) array_like Input time series mmix : (C x T) array_like Mixing matrix for converting input data to component space, where `C` is components and `T` is the same as in `data` mask : (S,) array_like Boolean mask array acc : list Indices of accepted (BOLD) components in `mmix` rej : list Indices of rejected (non-BOLD) components in `mmix` midk : list Indices of mid-K (questionable) components in `mmix` ref_img : str or img_like Reference image to dictate how outputs are saved to disk suffix : str, optional Appended to name of saved files (before extension). Default: '' Returns ------- varexpl : float Percent variance of data explained by extracted + retained components """ # mask and de-mean data mdata = data[mask] dmdata = mdata.T - mdata.T.mean(axis=0) # get variance explained by retained components betas = model.get_coeffs(utils.unmask(dmdata.T, mask), mask, mmix)[mask] varexpl = (1 - ((dmdata.T - betas.dot(mmix.T))**2.).sum() / (dmdata**2.).sum()) * 100 LGR.info('Variance explained by ICA decomposition: {:.02f}%'.format(varexpl)) # create component and de-noised time series and save to files hikts = betas[:, acc].dot(mmix.T[acc, :]) midkts = betas[:, midk].dot(mmix.T[midk, :]) lowkts = betas[:, rej].dot(mmix.T[rej, :]) dnts = data[mask] - lowkts - midkts if len(acc) != 0: fout = utils.filewrite(utils.unmask(hikts, mask), 'hik_ts_{0}'.format(suffix), ref_img) LGR.info('Writing high-Kappa time series: {}'.format(op.abspath(fout))) if len(midk) != 0: fout = utils.filewrite(utils.unmask(midkts, mask), 'midk_ts_{0}'.format(suffix), ref_img) LGR.info('Writing mid-Kappa time series: {}'.format(op.abspath(fout))) if len(rej) != 0: fout = utils.filewrite(utils.unmask(lowkts, mask), 'lowk_ts_{0}'.format(suffix), ref_img) LGR.info('Writing low-Kappa time series: {}'.format(op.abspath(fout))) fout = utils.filewrite(utils.unmask(dnts, mask), 'dn_ts_{0}'.format(suffix), ref_img) LGR.info('Writing denoised time series: {}'.format(op.abspath(fout))) return varexpl
def t2sadmap(catd, mask, tes, masksum, start_echo): """ t2sadmap(catd,mask,tes,masksum) Input: catd has shape (nx,ny,nz,Ne,nt) mask has shape (nx,ny,nz) tes is a 1d numpy array masksum """ nx, ny, nz, Ne, nt = catd.shape echodata = fmask(catd, mask) Nm = echodata.shape[0] t2ss = np.zeros([nx, ny, nz, Ne - 1]) s0vs = t2ss.copy() for ne in range(start_echo, Ne + 1): # Do Log Linear fit B = np.reshape(np.abs(echodata[:, :ne]) + 1, (Nm, ne * nt)).transpose() B = np.log(B) neg_tes = [-1 * te for te in tes[:ne]] x = np.array([np.ones(ne), neg_tes]) X = np.tile(x, (1, nt)) X = np.sort(X)[:, ::-1].transpose() beta, res, rank, sing = np.linalg.lstsq(X, B) t2s = 1 / beta[1, :].transpose() s0 = np.exp(beta[0, :]).transpose() t2s[np.isinf(t2s)] = 500. s0[np.isnan(s0)] = 0. t2ss[:, :, :, ne - 2] = np.squeeze(unmask(t2s, mask)) s0vs[:, :, :, ne - 2] = np.squeeze(unmask(s0, mask)) # Limited T2* and S0 maps fl = np.zeros([nx, ny, nz, len(tes) - 2 + 1]) for ne in range(Ne - 1): fl_ = np.squeeze(fl[:, :, :, ne]) fl_[masksum == ne + 2] = True fl[:, :, :, ne] = fl_ fl = np.array(fl, dtype=bool) t2sa = np.squeeze(unmask(t2ss[fl], masksum > 1)) s0va = np.squeeze(unmask(s0vs[fl], masksum > 1)) # Full T2* maps with S0 estimation errors t2saf = t2sa.copy() s0vaf = s0va.copy() t2saf[masksum == 1] = t2ss[masksum == 1, 0] s0vaf[masksum == 1] = s0vs[masksum == 1, 0] return t2sa, s0va, t2ss, s0vs, t2saf, s0vaf
def test_smoke_unmask(): """ ensure that unmask returns reasonable objects with random inputs in the correct format Note: unmask could take in 1D or 2D or 3D arrays """ data_1d = np.random.random((100)) data_2d = np.random.random((100, 5)) data_3d = np.random.random((100, 5, 20)) mask = np.random.randint(2, size=100) assert utils.unmask(data_1d, mask) is not None assert utils.unmask(data_2d, mask) is not None assert utils.unmask(data_3d, mask) is not None
def writefeats(data, mmix, mask, ref_img, suffix=''): """ Converts `data` to component space with `mmix` and saves to disk Parameters ---------- data : (S x T) array_like Input time series mmix : (C x T) array_like Mixing matrix for converting input data to component space, where `C` is components and `T` is the same as in `data` mask : (S,) array_like Boolean mask array ref_img : str or img_like Reference image to dictate how outputs are saved to disk suffix : str, optional Appended to name of saved files (before extension). Default: '' Returns ------- fname : str Filepath to saved file """ # write feature versions of components feats = utils.unmask(model.computefeats2(data, mmix, mask), mask) fname = utils.filewrite(feats, 'feats_{0}'.format(suffix), ref_img) return fname
def split_ts(data, mmix, mask, acc): """ Splits `data` time series into accepted component time series and remainder Parameters ---------- data : (S x T) array_like Input data, where `S` is samples and `T` is time mmix : (T x C) array_like Mixing matrix for converting input data to component space, where `C` is components and `T` is the same as in `data` mask : (S,) array_like Boolean mask array acc : :obj:`list` List of accepted components used to subset `mmix` Returns ------- hikts : (S x T) :obj:`numpy.ndarray` Time series reconstructed using only components in `acc` rest : (S x T) :obj:`numpy.ndarray` Original data with `hikts` removed """ cbetas = model.get_coeffs(data - data.mean(axis=-1, keepdims=True), mmix, mask) betas = cbetas[mask] if len(acc) != 0: hikts = utils.unmask(betas[:, acc].dot(mmix.T[acc, :]), mask) else: hikts = None resid = data - hikts return hikts, resid
def split_ts(data, mmix, mask, acc): """ Splits `data` time series into accepted component time series and remainder Parameters ---------- data : (S x T) array_like Input data, where `S` is samples and `T` is time mmix : (T x C) array_like Mixing matrix for converting input data to component space, where `C` is components and `T` is the same as in `data` mask : (S,) array_like Boolean mask array acc : list List of accepted components used to subset `mmix` Returns ------- hikts : (S x T) :obj:`numpy.ndarray` Time series reconstructed using only components in `acc` rest : (S x T) :obj:`numpy.ndarray` Original data with `hikts` removed """ cbetas = model.get_coeffs(data - data.mean(axis=-1, keepdims=True), mask, mmix) betas = cbetas[mask] if len(acc) != 0: hikts = utils.unmask(betas[:, acc].dot(mmix.T[acc, :]), mask) else: hikts = None return hikts, data - hikts
def optcom(data, t2, tes, mask, combmode, useG=False): """ Optimally combine BOLD data across TEs. out = optcom(data,t2s) Parameters ---------- data : :obj:`numpy.ndarray` Concatenated BOLD data. Has shape (nx, ny, nz, n_echoes, n_trs) t2 : :obj:`numpy.ndarray` 3D map of estimated T2* values. Has shape (nx, ny, nz) tes : :obj:`numpy.ndarray` Array of TEs, in seconds. mask : :obj:`numpy.ndarray` Brain mask in 3D array. Has shape (nx, ny, nz) combmode : :obj:`str` How to combine data. Either 'ste' or 't2s'. useG : :obj:`bool`, optional Use G. Default is False. Returns ------- out : :obj:`numpy.ndarray` Optimally combined data. Has shape (nx, ny, nz, n_trs) """ _, _, _, _, n_trs = data.shape if useG: fdat = fmask(data, mask) ft2s = fmask(t2, mask) else: fdat = fmask(data, mask) ft2s = fmask(t2, mask) tes = np.array(tes) tes = tes[np.newaxis, :] if len(t2.shape) == 3: print('Optimally combining with voxel-wise T2 estimates') ft2s = ft2s[:, np.newaxis] else: print('Optimally combining with voxel- and volume-wise T2 estimates') ft2s = ft2s[:, :, np.newaxis] if combmode == 'ste': alpha = fdat.mean(-1) * tes else: alpha = tes * np.exp(-tes / ft2s) if len(t2.shape) == 3: alpha = np.tile(alpha[:, :, np.newaxis], (1, 1, n_trs)) else: alpha = np.swapaxes(alpha, 1, 2) ax0_idx, ax2_idx = np.where(np.all(alpha == 0, axis=1)) alpha[ax0_idx, :, ax2_idx] = 1. fout = np.average(fdat, axis=1, weights=alpha) out = unmask(fout, mask) return out
def get_coeffs(data, X, mask=None, add_const=False): """ Performs least-squares fit of `X` against `data` Parameters ---------- data : (S [x E] x T) array_like Array where `S` is samples, `E` is echoes, and `T` is time X : (T [x C]) array_like Array where `T` is time and `C` is predictor variables mask : (S [x E]) array_like Boolean mask array add_const : bool, optional Add intercept column to `X` before fitting. Default: False Returns ------- betas : (S [x E] x C) :obj:`numpy.ndarray` Array of `S` sample betas for `C` predictors """ if data.ndim not in [2, 3]: raise ValueError('Parameter data should be 2d or 3d, not {0}d'.format(data.ndim)) elif X.ndim not in [2]: raise ValueError('Parameter X should be 2d, not {0}d'.format(X.ndim)) elif data.shape[-1] != X.shape[0]: raise ValueError('Last dimension (dimension {0}) of data ({1}) does not ' 'match first dimension of ' 'X ({2})'.format(data.ndim, data.shape[-1], X.shape[0])) # mask data and flip (time x samples) if mask is not None: if mask.ndim not in [1, 2]: raise ValueError('Parameter data should be 1d or 2d, not {0}d'.format(mask.ndim)) elif data.shape[0] != mask.shape[0]: raise ValueError('First dimensions of data ({0}) and mask ({1}) do not ' 'match'.format(data.shape[0], mask.shape[0])) mdata = data[mask, :].T else: mdata = data.T # coerce X to >=2d X = np.atleast_2d(X) if len(X) == 1: X = X.T if add_const: # add intercept, if specified X = np.column_stack([X, np.ones((len(X), 1))]) betas = np.linalg.lstsq(X, mdata, rcond=None)[0].T if add_const: # drop beta for intercept, if specified betas = betas[:, :-1] if mask is not None: betas = utils.unmask(betas, mask) return betas
def denoise_ts(data, mmix, mask, comptable): """Apply component classifications to data for denoising. Parameters ---------- data : (S x T) array_like Input time series mmix : (C x T) array_like Mixing matrix for converting input data to component space, where `C` is components and `T` is the same as in `data` mask : (S,) array_like Boolean mask array comptable : (C x X) :obj:`pandas.DataFrame` Component metric table. One row for each component, with a column for each metric. Requires at least one column: "classification". Returns ------- dnts : (S x T) array_like Denoised data (i.e., data with rejected components removed). hikts : (S x T) array_like High-Kappa data (i.e., data composed only of accepted components). lowkts : (S x T) array_like Low-Kappa data (i.e., data composed only of rejected components). """ acc = comptable[comptable.classification == "accepted"].index.values rej = comptable[comptable.classification == "rejected"].index.values # mask and de-mean data mdata = data[mask] dmdata = mdata.T - mdata.T.mean(axis=0) # get variance explained by retained components betas = get_coeffs(dmdata.T, mmix, mask=None) varexpl = (1 - ((dmdata.T - betas.dot(mmix.T))**2.0).sum() / (dmdata**2.0).sum()) * 100 LGR.info("Variance explained by decomposition: {:.02f}%".format(varexpl)) # create component-based data hikts = utils.unmask(betas[:, acc].dot(mmix.T[acc, :]), mask) lowkts = utils.unmask(betas[:, rej].dot(mmix.T[rej, :]), mask) dnts = utils.unmask(data[mask] - lowkts[mask], mask) return dnts, hikts, lowkts
def test_unmask(): # generate boolean mask + get number of True values mask = rs.choice([0, 1], size=(100, )).astype(bool) n_data = mask.sum() inputs = [ (rs.rand(n_data, 3), float), # 2D float (rs.rand(n_data, 3, 3), float), # 3D float (rs.randint(10, size=(n_data, 3)), int), # 2D int (rs.randint(10, size=(n_data, 3, 3)), int) # 3D int ] for (input, dtype) in inputs: out = utils.unmask(input, mask) assert out.shape == (100, ) + input.shape[1:] assert out.dtype == dtype
def test_unmask(): # generate boolean mask + get number of True values mask = rs.choice([0, 1], size=(100,)).astype(bool) n_data = mask.sum() inputs = [ (rs.rand(n_data, 3), float), # 2D float (rs.rand(n_data, 3, 3), float), # 3D float (rs.randint(10, size=(n_data, 3)), int), # 2D int (rs.randint(10, size=(n_data, 3, 3)), int) # 3D int ] for (input, dtype) in inputs: out = utils.unmask(input, mask) assert out.shape == (100,) + input.shape[1:] assert out.dtype == dtype
def get_coeffs(data, X, mask=None, add_const=False): """ Performs least-squares fit of `X` against `data` Parameters ---------- data : (S x T) array_like Array where `S` is samples and `T` is time mask : (S,) array_like Boolean mask array X : (T x C) array_like Array where `T` is time and `C` is predictor variables add_const : bool, optional Add intercept column to `X` before fitting. Default: False Returns ------- betas : (S x C) :obj:`numpy.ndarray` Array of `S` sample betas for `C` predictors """ # mask data and flip (time x samples) if mask is not None: mdata = data[mask, :].T else: mdata = data.T # coerce X to >=2d X = np.atleast_2d(X) if len(X) == 1: X = X.T if add_const: # add intercept, if specified X = np.column_stack([X, np.ones((len(X), 1))]) betas = np.linalg.lstsq(X, mdata, rcond=None)[0].T if add_const: # drop beta for intercept, if specified betas = betas[:, :-1] if mask is not None: betas = utils.unmask(betas, mask) return betas
def computefeats2(data, mmix, mask, normalize=True): """ Converts `data` to component space using `mmix` Parameters ---------- data : (S x T) array_like Input data mmix : (T x C) array_like Mixing matrix for converting input data to component space, where `C` is components and `T` is the same as in `data` mask : (S,) array-like Boolean mask array normalize : bool, optional Whether to z-score output. Default: True Returns ------- data_Z : (S x C) :obj:`numpy.ndarray` Data in component space """ # demean masked data data_vn = stats.zscore(data[mask], axis=-1) # get betas of `data`~`mmix` and limit to range [-0.999, 0.999] data_R = get_coeffs(utils.unmask(data_vn, mask), mask, mmix)[mask] data_R[data_R < -0.999] = -0.999 data_R[data_R > 0.999] = 0.999 # R-to-Z transform data_Z = np.arctanh(data_R) if data_Z.ndim == 1: data_Z = np.atleast_2d(data_Z).T # normalize data if normalize: data_Zm = stats.zscore(data_Z, axis=0) data_Z = data_Zm + (data_Z.mean(axis=0, keepdims=True) / data_Z.std(axis=0, keepdims=True)) return data_Z
def writefeats(data, mmix, mask, ref_img, out_dir='.', suffix=''): """ Converts `data` to component space with `mmix` and saves to disk Parameters ---------- data : (S x T) array_like Input time series mmix : (C x T) array_like Mixing matrix for converting input data to component space, where `C` is components and `T` is the same as in `data` mask : (S,) array_like Boolean mask array ref_img : :obj:`str` or img_like Reference image to dictate how outputs are saved to disk out_dir : :obj:`str`, optional Output directory. suffix : :obj:`str`, optional Appended to name of saved files (before extension). Default: '' Returns ------- fname : :obj:`str` Filepath to saved file Notes ----- This function writes out a file: ====================== ================================================= Filename Content ====================== ================================================= feats_[suffix].nii Z-normalized spatial component maps. ====================== ================================================= """ # write feature versions of components feats = utils.unmask(computefeats2(data, mmix, mask), mask) fname = filewrite(feats, op.join(out_dir, 'feats_{0}'.format(suffix)), ref_img) return fname
def threshold_map(maps, mask, ref_img, threshold, csize=None): """Perform cluster-extent thresholding. Parameters ---------- maps : (M x C) array_like Statistical maps to be thresholded. mask : (S) array_like Binary mask. ref_img : img_like Reference image to convert to niimgs with. threshold : :obj:`float` Value threshold to apply to maps. csize : :obj:`int` or :obj:`None`, optional Minimum cluster size. If None, standard thresholding (non-cluster-extent) will be done. Default is None. Returns ------- maps_thresh : (M x C) array_like """ n_voxels, n_components = maps.shape maps_thresh = np.zeros([n_voxels, n_components], bool) if csize is None: csize = np.max([int(n_voxels * 0.0005) + 5, 20]) else: csize = int(csize) for i_comp in range(n_components): # Cluster-extent threshold and binarize F-maps ccimg = io.new_nii_like( ref_img, np.squeeze(utils.unmask(maps[:, i_comp], mask))) maps_thresh[:, i_comp] = utils.threshold_map(ccimg, min_cluster_size=csize, threshold=threshold, mask=mask, binarize=True) return maps_thresh
def optcom(data, t2, tes, mask, combmode, useG=False): """ out = optcom(data,t2s) Input: data.shape = (nx,ny,nz,Ne,Nt) t2s.shape = (nx,ny,nz) tes.shape = len(Ne) Output: out.shape = (nx,ny,nz,Nt) """ nx, ny, nz, Ne, Nt = data.shape if useG: fdat = fmask(data, mask) ft2s = fmask(t2, mask) else: fdat = fmask(data, mask) ft2s = fmask(t2, mask) tes = np.array(tes) tes = tes[np.newaxis, :] ft2s = ft2s[:, np.newaxis] if combmode == 'ste': alpha = fdat.mean(-1) * tes else: alpha = tes * np.exp(-tes / ft2s) alpha = np.tile(alpha[:, :, np.newaxis], (1, 1, Nt)) fout = np.average(fdat, axis=1, weights=alpha) out = unmask(fout, mask) return out
def get_coeffs(data, mask, X, add_const=False): """ Performs least-squares fit of `X` against `data` Parameters ---------- data : (S x T) array-like Array where `S` is samples and `T` is time mask : (S,) array-like Boolean mask array X : (T x C) array-like Array where `T` is time and `C` is predictor variables add_const : bool, optional Add intercept column to `X` before fitting. Default: False Returns ------- betas : (S x C) :obj:`numpy.ndarray` Array of `S` sample betas for `C` predictors """ # mask data and flip (time x samples) mdata = data[mask].T # coerce X to >=2d X = np.atleast_2d(X) if len(X) == 1: X = X.T if add_const: # add intercept, if specified X = np.column_stack([X, np.ones((len(X), 1))]) betas = np.linalg.lstsq(X, mdata, rcond=None)[0].T if add_const: # drop beta for intercept, if specified betas = betas[:, :-1] betas = utils.unmask(betas, mask) return betas
def split_ts(data, mmix, mask, comptable): """ Splits `data` time series into accepted component time series and remainder Parameters ---------- data : (S x T) array_like Input data, where `S` is samples and `T` is time mmix : (T x C) array_like Mixing matrix for converting input data to component space, where `C` is components and `T` is the same as in `data` mask : (S,) array_like Boolean mask array comptable : (C x X) :obj:`pandas.DataFrame` Component metric table. One row for each component, with a column for each metric. Requires at least two columns: "component" and "classification". Returns ------- hikts : (S x T) :obj:`numpy.ndarray` Time series reconstructed using only components in `acc` rest : (S x T) :obj:`numpy.ndarray` Original data with `hikts` removed """ acc = comptable[comptable.classification == 'accepted'].index.values cbetas = get_coeffs(data - data.mean(axis=-1, keepdims=True), mmix, mask) betas = cbetas[mask] if len(acc) != 0: hikts = utils.unmask(betas[:, acc].dot(mmix.T[acc, :]), mask) else: hikts = None resid = data - hikts return hikts, resid
def gscontrol_mmix(OCcatd, mmix, mask, acc, rej, midk, ref_img): """ Perform global signal regression. Parameters ---------- OCcatd : (S x T) array_like Optimally-combined time series data mmix : (C x T) array_like Mixing matrix for converting input data to component space, where `C` is components and `T` is the same as in `OCcatd` mask : (S,) array_like Boolean mask array acc : list Indices of accepted (BOLD) components in `mmix` rej : list Indices of rejected (non-BOLD) components in `mmix` midk : list Indices of mid-K (questionable) components in `mmix` ref_img : str or img_like Reference image to dictate how outputs are saved to disk """ Gmu = OCcatd.mean(axis=-1) Gstd = OCcatd.std(axis=-1) Gmask = (Gmu != 0) """ Compute temporal regression """ dat = (OCcatd[Gmask] - Gmu[Gmask][:, np.newaxis]) / Gstd[mask][:, np.newaxis] solG = np.linalg.lstsq(mmix, dat.T, rcond=None)[0] resid = dat - np.dot(solG.T, mmix.T) """ Build BOLD time series without amplitudes, and save T1-like effect """ bold_ts = np.dot(solG.T[:, acc], mmix[:, acc].T) sphis = bold_ts.min(axis=-1) sphis -= sphis.mean() utils.utils.filewrite(utils.utils.unmask(sphis, mask), 'sphis_hik', ref_img) """ Find the global signal based on the T1-like effect """ sol = np.linalg.lstsq(np.atleast_2d(sphis).T, dat, rcond=None) glsig = sol[0] """ T1 correct time series by regression """ bold_noT1gs = bold_ts - np.dot(np.linalg.lstsq(glsig.T, bold_ts.T, rcond=None)[0].T, glsig) utils.utils.filewrite(utils.unmask(bold_noT1gs * Gstd[mask][:, np.newaxis], mask), 'hik_ts_OC_T1c.nii', ref_img) """ Make medn version of T1 corrected time series """ utils.filewrite(Gmu[..., np.newaxis] + utils.unmask((bold_noT1gs+resid)*Gstd[mask][:, np.newaxis], mask), 'dn_ts_OC_T1c', ref_img) """ Orthogonalize mixing matrix w.r.t. T1-GS """ mmixnogs = mmix.T - np.dot(np.linalg.lstsq(glsig.T, mmix, rcond=None)[0].T, glsig) mmixnogs_mu = mmixnogs.mean(-1) mmixnogs_std = mmixnogs.std(-1) mmixnogs_norm = (mmixnogs - mmixnogs_mu[:, np.newaxis]) / mmixnogs_std[:, np.newaxis] mmixnogs_norm = np.vstack([np.atleast_2d(np.ones(max(glsig.shape))), glsig, mmixnogs_norm]) """ Write T1-GS corrected components and mixing matrix """ sol = np.linalg.lstsq(mmixnogs_norm.T, dat.T, rcond=None) utils.filewrite(utils.unmask(sol[0].T[:, 2:], mask), 'betas_hik_OC_T1c', ref_img) np.savetxt('meica_mix_T1c.1D', mmixnogs)
def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img, fout=None, reindex=False, mmixN=None, full_sel=True): """ Fit models directly. Parameters ---------- catd : (S x E x T) array_like Input data, where `S` is samples, `E` is echos, and `T` is time mmix : (T x C) array_like Mixing matrix for converting input data to component space, where `C` is components and `T` is the same as in `catd` mask : (S,) array_like Boolean mask array t2s : (S,) array_like t2sG : (S,) array_like tes : list List of echo times associated with `catd`, in milliseconds combmode : {'t2s', 'ste'} str How optimal combination of echos should be made, where 't2s' indicates using the method of Posse 1999 and 'ste' indicates using the method of Poser 2006 ref_img : str or img_like Reference image to dictate how outputs are saved to disk fout : bool Whether to output per-component TE-dependence maps. Default: None reindex : bool, optional Default: False mmixN : array_like, optional Default: None full_sel : bool, optional Whether to perform selection of components based on Rho/Kappa scores. Default: True Returns ------- seldict : dict comptab : (N x 5) :obj:`numpy.ndarray` Array with columns denoting (1) index of component, (2) Kappa score of component, (3) Rho score of component, (4) variance explained by component, and (5) normalized variance explained bycomponent betas : :obj:`numpy.ndarray` mmix_new : :obj:`numpy.ndarray` """ # compute optimal combination of raw data tsoc = model.make_optcom(catd, t2sG, tes, mask, combmode, verbose=False).astype(float)[mask] # demean optimal combination tsoc_dm = tsoc - tsoc.mean(axis=-1, keepdims=True) # compute un-normalized weight dataset (features) if mmixN is None: mmixN = mmix WTS = computefeats2(utils.unmask(tsoc, mask), mmixN, mask, normalize=False) # compute PSC dataset - shouldn't have to refit data tsoc_B = get_coeffs(utils.unmask(tsoc_dm, mask), mask, mmix)[mask] tsoc_Babs = np.abs(tsoc_B) PSC = tsoc_B / tsoc.mean(axis=-1, keepdims=True) * 100 # compute skews to determine signs based on unnormalized weights, # correct mmix & WTS signs based on spatial distribution tails signs = stats.skew(WTS, axis=0) signs /= np.abs(signs) mmix = mmix.copy() mmix *= signs WTS *= signs PSC *= signs totvar = (tsoc_B**2).sum() totvar_norm = (WTS**2).sum() # compute Betas and means over TEs for TE-dependence analysis betas = get_coeffs(catd, np.repeat(mask[:, np.newaxis], len(tes), axis=1), mmix) n_samp, n_echos, n_components = betas.shape n_voxels = mask.sum() n_data_voxels = (t2s != 0).sum() mu = catd.mean(axis=-1, dtype=float) tes = np.reshape(tes, (n_echos, 1)) fmin, fmid, fmax = utils.getfbounds(n_echos) # mask arrays mumask = mu[t2s != 0] t2smask = t2s[t2s != 0] betamask = betas[t2s != 0] # set up Xmats X1 = mumask.T # Model 1 X2 = np.tile(tes, (1, n_data_voxels)) * mumask.T / t2smask.T # Model 2 # tables for component selection Kappas = np.zeros([n_components]) Rhos = np.zeros([n_components]) varex = np.zeros([n_components]) varex_norm = np.zeros([n_components]) Z_maps = np.zeros([n_voxels, n_components]) F_R2_maps = np.zeros([n_data_voxels, n_components]) F_S0_maps = np.zeros([n_data_voxels, n_components]) Z_clmaps = np.zeros([n_voxels, n_components]) F_R2_clmaps = np.zeros([n_data_voxels, n_components]) F_S0_clmaps = np.zeros([n_data_voxels, n_components]) Br_clmaps_R2 = np.zeros([n_voxels, n_components]) Br_clmaps_S0 = np.zeros([n_voxels, n_components]) LGR.info('Fitting TE- and S0-dependent models to components') for i in range(n_components): # size of B is (n_components, nx*ny*nz) B = np.atleast_3d(betamask)[:, :, i].T alpha = (np.abs(B)**2).sum(axis=0) varex[i] = (tsoc_B[:, i]**2).sum() / totvar * 100. varex_norm[i] = (utils.unmask(WTS, mask)[t2s != 0][:, i]** 2).sum() / totvar_norm * 100. # S0 Model coeffs_S0 = (B * X1).sum(axis=0) / (X1**2).sum(axis=0) SSE_S0 = (B - X1 * np.tile(coeffs_S0, (n_echos, 1)))**2 SSE_S0 = SSE_S0.sum(axis=0) F_S0 = (alpha - SSE_S0) * 2 / (SSE_S0) F_S0_maps[:, i] = F_S0 # R2 Model coeffs_R2 = (B * X2).sum(axis=0) / (X2**2).sum(axis=0) SSE_R2 = (B - X2 * np.tile(coeffs_R2, (n_echos, 1)))**2 SSE_R2 = SSE_R2.sum(axis=0) F_R2 = (alpha - SSE_R2) * 2 / (SSE_R2) F_R2_maps[:, i] = F_R2 # compute weights as Z-values wtsZ = (WTS[:, i] - WTS[:, i].mean()) / WTS[:, i].std() wtsZ[np.abs(wtsZ) > Z_MAX] = ( Z_MAX * (np.abs(wtsZ) / wtsZ))[np.abs(wtsZ) > Z_MAX] Z_maps[:, i] = wtsZ # compute Kappa and Rho F_S0[F_S0 > F_MAX] = F_MAX F_R2[F_R2 > F_MAX] = F_MAX norm_weights = np.abs( np.squeeze(utils.unmask(wtsZ, mask)[t2s != 0]**2.)) Kappas[i] = np.average(F_R2, weights=norm_weights) Rhos[i] = np.average(F_S0, weights=norm_weights) # tabulate component values comptab_pre = np.vstack( [np.arange(n_components), Kappas, Rhos, varex, varex_norm]).T if reindex: # re-index all components in Kappa order comptab = comptab_pre[comptab_pre[:, 1].argsort()[::-1], :] Kappas = comptab[:, 1] Rhos = comptab[:, 2] varex = comptab[:, 3] varex_norm = comptab[:, 4] nnc = np.array(comptab[:, 0], dtype=np.int) mmix_new = mmix[:, nnc] F_S0_maps = F_S0_maps[:, nnc] F_R2_maps = F_R2_maps[:, nnc] Z_maps = Z_maps[:, nnc] WTS = WTS[:, nnc] PSC = PSC[:, nnc] tsoc_B = tsoc_B[:, nnc] tsoc_Babs = tsoc_Babs[:, nnc] comptab[:, 0] = np.arange(comptab.shape[0]) else: comptab = comptab_pre mmix_new = mmix # full selection including clustering criteria seldict = None if full_sel: LGR.info('Performing spatial clustering of components') csize = np.max([int(n_voxels * 0.0005) + 5, 20]) LGR.debug('Using minimum cluster size: {}'.format(csize)) for i in range(n_components): # save out files out = np.zeros((n_samp, 4)) out[:, 0] = np.squeeze(utils.unmask(PSC[:, i], mask)) out[:, 1] = np.squeeze(utils.unmask(F_R2_maps[:, i], t2s != 0)) out[:, 2] = np.squeeze(utils.unmask(F_S0_maps[:, i], t2s != 0)) out[:, 3] = np.squeeze(utils.unmask(Z_maps[:, i], mask)) if utils.get_dtype(ref_img) == 'GIFTI': continue # TODO: pass through GIFTI file data as below ccimg = utils.new_nii_like(ref_img, out) # Do simple clustering on F sel = spatclust(ccimg, min_cluster_size=csize, threshold=int(fmin), index=[1, 2], mask=(t2s != 0)) F_R2_clmaps[:, i] = sel[:, 0] F_S0_clmaps[:, i] = sel[:, 1] countsigFR2 = F_R2_clmaps[:, i].sum() countsigFS0 = F_S0_clmaps[:, i].sum() # Do simple clustering on Z at p<0.05 sel = spatclust(ccimg, min_cluster_size=csize, threshold=1.95, index=3, mask=mask) Z_clmaps[:, i] = sel # Do simple clustering on ranked signal-change map spclust_input = utils.unmask(stats.rankdata(tsoc_Babs[:, i]), mask) spclust_input = utils.new_nii_like(ref_img, spclust_input) Br_clmaps_R2[:, i] = spatclust(spclust_input, min_cluster_size=csize, threshold=max(tsoc_Babs.shape) - countsigFR2, mask=mask) Br_clmaps_S0[:, i] = spatclust(spclust_input, min_cluster_size=csize, threshold=max(tsoc_Babs.shape) - countsigFS0, mask=mask) seldict = {} selvars = [ 'Kappas', 'Rhos', 'WTS', 'varex', 'Z_maps', 'F_R2_maps', 'F_S0_maps', 'Z_clmaps', 'F_R2_clmaps', 'F_S0_clmaps', 'tsoc_B', 'Br_clmaps_R2', 'Br_clmaps_S0', 'PSC' ] for vv in selvars: seldict[vv] = eval(vv) return seldict, comptab, betas, mmix_new
def tedpca(data_cat, data_oc, combmode, mask, adaptive_mask, t2sG, ref_img, tes, algorithm='mdl', kdaw=10., rdaw=1., out_dir='.', verbose=False, low_mem=False): """ Use principal components analysis (PCA) to identify and remove thermal noise from multi-echo data. Parameters ---------- data_cat : (S x E x T) array_like Input functional data data_oc : (S x T) array_like Optimally combined time series data combmode : {'t2s', 'paid'} str How optimal combination of echos should be made, where 't2s' indicates using the method of Posse 1999 and 'paid' indicates using the method of Poser 2006 mask : (S,) array_like Boolean mask array adaptive_mask : (S,) array_like Array where each value indicates the number of echoes with good signal for that voxel. This mask may be thresholded; for example, with values less than 3 set to 0. For more information on thresholding, see `make_adaptive_mask`. t2sG : (S,) array_like Map of voxel-wise T2* estimates. ref_img : :obj:`str` or img_like Reference image to dictate how outputs are saved to disk tes : :obj:`list` List of echo times associated with `data_cat`, in milliseconds algorithm : {'kundu', 'kundu-stabilize', 'mdl', 'aic', 'kic', float}, optional Method with which to select components in TEDPCA. PCA decomposition with the mdl, kic and aic options are based on a Moving Average (stationary Gaussian) process and are ordered from most to least aggressive (see Li et al., 2007). If a float is provided, then it is assumed to represent percentage of variance explained (0-1) to retain from PCA. Default is 'mdl'. kdaw : :obj:`float`, optional Dimensionality augmentation weight for Kappa calculations. Must be a non-negative float, or -1 (a special value). Default is 10. rdaw : :obj:`float`, optional Dimensionality augmentation weight for Rho calculations. Must be a non-negative float, or -1 (a special value). Default is 1. out_dir : :obj:`str`, optional Output directory. verbose : :obj:`bool`, optional Whether to output files from fitmodels_direct or not. Default: False low_mem : :obj:`bool`, optional Whether to use incremental PCA (for low-memory systems) or not. This is only compatible with the "kundu" or "kundu-stabilize" algorithms. Default: False Returns ------- kept_data : (S x T) :obj:`numpy.ndarray` Dimensionally reduced optimally combined functional data n_components : :obj:`int` Number of components retained from PCA decomposition Notes ----- ====================== ================================================= Notation Meaning ====================== ================================================= :math:`\\kappa` Component pseudo-F statistic for TE-dependent (BOLD) model. :math:`\\rho` Component pseudo-F statistic for TE-independent (artifact) model. :math:`v` Voxel :math:`V` Total number of voxels in mask :math:`\\zeta` Something :math:`c` Component :math:`p` Something else ====================== ================================================= Steps: 1. Variance normalize either multi-echo or optimally combined data, depending on settings. 2. Decompose normalized data using PCA or SVD. 3. Compute :math:`{\\kappa}` and :math:`{\\rho}`: .. math:: {\\kappa}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,R_2^*}}{\\sum {\\zeta}_{c,v}^p} {\\rho}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \ F_{c,v,S_0}}{\\sum {\\zeta}_{c,v}^p} 4. Some other stuff. Something about elbows. 5. Classify components as thermal noise if they meet both of the following criteria: - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`. - Nonsignificant variance explained. Outputs: This function writes out several files: ====================== ================================================= Filename Content ====================== ================================================= pca_decomposition.json PCA component table. pca_mixing.tsv PCA mixing matrix. pca_components.nii.gz Component weight maps. ====================== ================================================= See Also -------- :func:`tedana.utils.make_adaptive_mask` : The function used to create the ``adaptive_mask`` parameter. """ if algorithm == 'kundu': alg_str = ("followed by the Kundu component selection decision " "tree (Kundu et al., 2013)") RefLGR.info("Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., " "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. " "(2013). Integrated strategy for improving functional " "connectivity mapping using multiecho fMRI. Proceedings " "of the National Academy of Sciences, 110(40), " "16187-16192.") elif algorithm == 'kundu-stabilize': alg_str = ("followed by the 'stabilized' Kundu component " "selection decision tree (Kundu et al., 2013)") RefLGR.info("Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., " "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. " "(2013). Integrated strategy for improving functional " "connectivity mapping using multiecho fMRI. Proceedings " "of the National Academy of Sciences, 110(40), " "16187-16192.") elif isinstance(algorithm, Number): alg_str = ( "in which the number of components was determined based on a " "variance explained threshold") else: alg_str = ( "based on the PCA component estimation with a Moving Average" "(stationary Gaussian) process (Li et al., 2007)") RefLGR.info("Li, Y.O., Adalı, T. and Calhoun, V.D., (2007). " "Estimating the number of independent components for " "functional magnetic resonance imaging data. " "Human brain mapping, 28(11), pp.1251-1266.") RepLGR.info("Principal component analysis {0} was applied to " "the optimally combined data for dimensionality " "reduction.".format(alg_str)) n_samp, n_echos, n_vols = data_cat.shape LGR.info('Computing PCA of optimally combined multi-echo data') data = data_oc[mask, :] data_z = ((data.T - data.T.mean(axis=0)) / data.T.std(axis=0)).T # var normalize ts data_z = (data_z - data_z.mean()) / data_z.std() # var normalize everything if algorithm in ['mdl', 'aic', 'kic']: data_img = io.new_nii_like(ref_img, utils.unmask(data, mask)) mask_img = io.new_nii_like(ref_img, mask.astype(int)) voxel_comp_weights, varex, varex_norm, comp_ts = ma_pca.ma_pca( data_img, mask_img, algorithm) elif isinstance(algorithm, Number): ppca = PCA(copy=False, n_components=algorithm, svd_solver="full") ppca.fit(data_z) comp_ts = ppca.components_.T varex = ppca.explained_variance_ voxel_comp_weights = np.dot(np.dot(data_z, comp_ts), np.diag(1. / varex)) varex_norm = varex / varex.sum() elif low_mem: voxel_comp_weights, varex, comp_ts = low_mem_pca(data_z) varex_norm = varex / varex.sum() else: ppca = PCA(copy=False, n_components=(n_vols - 1)) ppca.fit(data_z) comp_ts = ppca.components_.T varex = ppca.explained_variance_ voxel_comp_weights = np.dot(np.dot(data_z, comp_ts), np.diag(1. / varex)) varex_norm = varex / varex.sum() # Compute Kappa and Rho for PCA comps # Normalize each component's time series vTmixN = stats.zscore(comp_ts, axis=0) comptable, _, _, _ = metrics.dependence_metrics(data_cat, data_oc, comp_ts, adaptive_mask, tes, ref_img, reindex=False, mmixN=vTmixN, algorithm=None, label='mepca_', out_dir=out_dir, verbose=verbose) # varex_norm from PCA overrides varex_norm from dependence_metrics, # but we retain the original comptable['estimated normalized variance explained'] = \ comptable['normalized variance explained'] comptable['normalized variance explained'] = varex_norm # write component maps to 4D image comp_ts_z = stats.zscore(comp_ts, axis=0) comp_maps = utils.unmask(computefeats2(data_oc, comp_ts_z, mask), mask) io.filewrite(comp_maps, op.join(out_dir, 'pca_components.nii.gz'), ref_img) # Select components using decision tree if algorithm == 'kundu': comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=False) elif algorithm == 'kundu-stabilize': comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=True) else: alg_str = "variance explained-based" if isinstance( algorithm, Number) else algorithm LGR.info('Selected {0} components with {1} dimensionality ' 'detection'.format(comptable.shape[0], alg_str)) comptable['classification'] = 'accepted' comptable['rationale'] = '' # Save decomposition comp_names = [ io.add_decomp_prefix(comp, prefix='pca', max_value=comptable.index.max()) for comp in comptable.index.values ] mixing_df = pd.DataFrame(data=comp_ts, columns=comp_names) mixing_df.to_csv(op.join(out_dir, 'pca_mixing.tsv'), sep='\t', index=False) comptable['Description'] = 'PCA fit to optimally combined data.' mmix_dict = {} mmix_dict['Method'] = ('Principal components analysis implemented by ' 'sklearn. Components are sorted by variance ' 'explained in descending order. ' 'Component signs are flipped to best match the ' 'data.') io.save_comptable(comptable, op.join(out_dir, 'pca_decomposition.json'), label='pca', metadata=mmix_dict) acc = comptable[comptable.classification == 'accepted'].index.values n_components = acc.size voxel_kept_comp_weighted = (voxel_comp_weights[:, acc] * varex[None, acc]) kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[:, acc].T) kept_data = stats.zscore(kept_data, axis=1) # variance normalize time series kept_data = stats.zscore(kept_data, axis=None) # variance normalize everything return kept_data, n_components
def selcomps(seldict, mmix, mask, ref_img, manacc, n_echos, t2s, s0, olevel=2, oversion=99, filecsdata=True, savecsdiag=True, strict_mode=False): """ Labels components in `mmix` Parameters ---------- seldict : :obj:`dict` As output from `fitmodels_direct` mmix : (C x T) array_like Mixing matrix for converting input data to component space, where `C` is components and `T` is the number of volumes in the original data mask : (S,) array_like Boolean mask array ref_img : str or img_like Reference image to dictate how outputs are saved to disk manacc : list Comma-separated list of indices of manually accepted components n_echos : int Number of echos in original data t2s : (S,) array_like s0 : (S,) array_like olevel : int, optional Default: 2 oversion : int, optional Default: 99 filecsdata: bool, optional Default: False savecsdiag: bool, optional Default: True strict_mode: bool, optional Default: False Returns ------- acc : list Indices of accepted (BOLD) components in `mmix` rej : list Indices of rejected (non-BOLD) components in `mmix` midk : list Indices of mid-K (questionable) components in `mmix` ign : list Indices of ignored components in `mmix` """ if filecsdata: import bz2 if seldict is not None: LGR.info('Saving component selection data') with bz2.BZ2File('compseldata.pklbz', 'wb') as csstate_f: pickle.dump(seldict, csstate_f) else: try: with bz2.BZ2File('compseldata.pklbz', 'rb') as csstate_f: seldict = pickle.load(csstate_f) except FileNotFoundError: LGR.warning('Failed to load component selection data') return None # List of components midk = [] ign = [] nc = np.arange(len(seldict['Kappas'])) ncl = np.arange(len(seldict['Kappas'])) # If user has specified components to accept manually if manacc: acc = sorted([int(vv) for vv in manacc.split(',')]) midk = [] rej = sorted(np.setdiff1d(ncl, acc)) return acc, rej, midk, [] # Add string for ign """ Do some tallies for no. of significant voxels """ countsigFS0 = seldict['F_S0_clmaps'].sum(0) countsigFR2 = seldict['F_R2_clmaps'].sum(0) countnoise = np.zeros(len(nc)) """ Make table of dice values """ dice_tbl = np.zeros([nc.shape[0], 2]) for ii in ncl: dice_FR2 = utils.dice(utils.unmask(seldict['Br_clmaps_R2'][:, ii], mask)[t2s != 0], seldict['F_R2_clmaps'][:, ii]) dice_FS0 = utils.dice(utils.unmask(seldict['Br_clmaps_S0'][:, ii], mask)[t2s != 0], seldict['F_S0_clmaps'][:, ii]) dice_tbl[ii, :] = [dice_FR2, dice_FS0] # step 3a here and above dice_tbl[np.isnan(dice_tbl)] = 0 """ Make table of noise gain """ tt_table = np.zeros([len(nc), 4]) counts_FR2_Z = np.zeros([len(nc), 2]) for ii in nc: comp_noise_sel = utils.andb([np.abs(seldict['Z_maps'][:, ii]) > 1.95, seldict['Z_clmaps'][:, ii] == 0]) == 2 countnoise[ii] = np.array(comp_noise_sel, dtype=np.int).sum() noise_FR2_Z_mask = utils.unmask(comp_noise_sel, mask)[t2s != 0] noise_FR2_Z = np.log10(np.unique(seldict['F_R2_maps'][noise_FR2_Z_mask, ii])) signal_FR2_Z_mask = utils.unmask(seldict['Z_clmaps'][:, ii], mask)[t2s != 0] == 1 signal_FR2_Z = np.log10(np.unique(seldict['F_R2_maps'][signal_FR2_Z_mask, ii])) counts_FR2_Z[ii, :] = [len(signal_FR2_Z), len(noise_FR2_Z)] try: ttest = stats.ttest_ind(signal_FR2_Z, noise_FR2_Z, equal_var=True) # avoid DivideByZero RuntimeWarning if signal_FR2_Z.size > 0 and noise_FR2_Z.size > 0: mwu = stats.norm.ppf(stats.mannwhitneyu(signal_FR2_Z, noise_FR2_Z)[1]) else: mwu = -np.inf tt_table[ii, 0] = np.abs(mwu) * ttest[0] / np.abs(ttest[0]) tt_table[ii, 1] = ttest[1] except Exception: # TODO: what is the error that might be caught here? pass tt_table[np.isnan(tt_table)] = 0 tt_table[np.isinf(tt_table[:, 0]), 0] = np.percentile(tt_table[~np.isinf(tt_table[:, 0]), 0], 98) # Time series derivative kurtosis mmix_dt = (mmix[:-1] - mmix[1:]) mmix_kurt = stats.kurtosis(mmix_dt) mmix_std = np.std(mmix_dt, axis=0) """ Step 1: Reject anything that's obviously an artifact a. Estimate a null variance """ LGR.debug('Rejecting gross artifacts based on Rho/Kappa values and S0/R2 counts') rej = ncl[utils.andb([seldict['Rhos'] > seldict['Kappas'], countsigFS0 > countsigFR2]) > 0] ncl = np.setdiff1d(ncl, rej) """ Step 2: Compute 3-D spatial FFT of Beta maps to detect high-spatial frequency artifacts """ LGR.debug('Computing 3D spatial FFT of beta maps to detect high-spatial frequency artifacts') # spatial information is important so for NIFTI we convert back to 3D space if utils.get_dtype(ref_img) == 'NIFTI': dim1 = np.prod(ref_img.shape[:2]) else: dim1 = mask.shape[0] fproj_arr = np.zeros([dim1, len(nc)]) fproj_arr_val = np.zeros([dim1, len(nc)]) spr = [] fdist = [] for ii in nc: # convert data back to 3D array if utils.get_dtype(ref_img) == 'NIFTI': tproj = utils.new_nii_like(ref_img, utils.unmask(seldict['PSC'], mask)[:, ii]).get_data() else: tproj = utils.unmask(seldict['PSC'], mask)[:, ii] fproj = np.fft.fftshift(np.abs(np.fft.rfftn(tproj))) fproj_z = fproj.max(axis=2) fproj[fproj == fproj.max()] = 0 fproj_arr[:, ii] = stats.rankdata(fproj_z.flatten()) fproj_arr_val[:, ii] = fproj_z.flatten() spr.append(np.array(fproj_z > fproj_z.max() / 4, dtype=np.int).sum()) fprojr = np.array([fproj, fproj[:, :, ::-1]]).max(0) fdist.append(np.max([utils.fitgaussian(fproj.max(jj))[3:].max() for jj in range(fprojr.ndim)])) fdist = np.array(fdist) spr = np.array(spr) """ Step 3: Create feature space of component properties """ LGR.debug('Creating feature space of component properties') fdist_pre = fdist.copy() fdist_pre[fdist > np.median(fdist) * 3] = np.median(fdist) * 3 fdist_z = (fdist_pre - np.median(fdist_pre)) / fdist_pre.std() spz = (spr-spr.mean())/spr.std() Tz = (tt_table[:, 0] - tt_table[:, 0].mean()) / tt_table[:, 0].std() varex_ = np.log(seldict['varex']) Vz = (varex_-varex_.mean()) / varex_.std() Rz = (seldict['Rhos'] - seldict['Rhos'].mean()) / seldict['Rhos'].std() Ktz = np.log(seldict['Kappas']) / 2 Ktz = (Ktz-Ktz.mean()) / Ktz.std() Rtz = np.log(seldict['Rhos']) / 2 Rtz = (Rtz-Rtz.mean())/Rtz.std() KRr = stats.zscore(np.log(seldict['Kappas']) / np.log(seldict['Rhos'])) cnz = (countnoise-countnoise.mean()) / countnoise.std() Dz = stats.zscore(np.arctanh(dice_tbl[:, 0] + 0.001)) fz = np.array([Tz, Vz, Ktz, KRr, cnz, Rz, mmix_kurt, fdist_z]) """ Step 3: Make initial guess of where BOLD components are and use DBSCAN to exclude noise components and find a sample set of 'good' components """ LGR.debug('Making initial guess of BOLD components') # epsmap is [index,level of overlap with dicemask, # number of high Rho components] F05, F025, F01 = utils.getfbounds(n_echos) epsmap = [] Rhos_sorted = np.array(sorted(seldict['Rhos']))[::-1] # Make an initial guess as to number of good components based on # consensus of control points across Rhos and Kappas KRcutguesses = [getelbow_mod(seldict['Rhos']), getelbow_cons(seldict['Rhos']), getelbow_aggr(seldict['Rhos']), getelbow_mod(seldict['Kappas']), getelbow_cons(seldict['Kappas']), getelbow_aggr(seldict['Kappas'])] Khighelbowval = stats.scoreatpercentile([getelbow_mod(seldict['Kappas'], val=True), getelbow_cons(seldict['Kappas'], val=True), getelbow_aggr(seldict['Kappas'], val=True)] + list(utils.getfbounds(n_echos)), 75, interpolation_method='lower') KRcut = np.median(KRcutguesses) # only use exclusive when inclusive is extremely inclusive - double KRcut cond1 = getelbow_cons(seldict['Kappas']) > KRcut * 2 cond2 = getelbow_mod(seldict['Kappas'], val=True) < F01 if cond1 and cond2: Kcut = getelbow_mod(seldict['Kappas'], val=True) else: Kcut = getelbow_cons(seldict['Kappas'], val=True) # only use inclusive when exclusive is extremely exclusive - half KRcut # (remember for Rho inclusive is higher, so want both Kappa and Rho # to defaut to lower) if getelbow_cons(seldict['Rhos']) > KRcut * 2: Rcut = getelbow_mod(seldict['Rhos'], val=True) # for above, consider something like: # min([getelbow_mod(Rhos,True),sorted(Rhos)[::-1][KRguess] ]) else: Rcut = getelbow_cons(seldict['Rhos'], val=True) if Rcut > Kcut: Kcut = Rcut # Rcut should never be higher than Kcut KRelbow = utils.andb([seldict['Kappas'] > Kcut, seldict['Rhos'] < Rcut]) # Make guess of Kundu et al 2011 plus remove high frequencies, # generally high variance, and high variance given low Kappa tt_lim = stats.scoreatpercentile(tt_table[tt_table[:, 0] > 0, 0], 75, interpolation_method='lower') / 3 KRguess = np.setdiff1d(np.setdiff1d(nc[KRelbow == 2], rej), np.union1d(nc[tt_table[:, 0] < tt_lim], np.union1d(np.union1d(nc[spz > 1], nc[Vz > 2]), nc[utils.andb([seldict['varex'] > 0.5 * sorted(seldict['varex'])[::-1][int(KRcut)], seldict['Kappas'] < 2*Kcut]) == 2]))) guessmask = np.zeros(len(nc)) guessmask[KRguess] = 1 # Throw lower-risk bad components out rejB = ncl[utils.andb([tt_table[ncl, 0] < 0, seldict['varex'][ncl] > np.median(seldict['varex']), ncl > KRcut]) == 3] rej = np.union1d(rej, rejB) ncl = np.setdiff1d(ncl, rej) LGR.debug('Using DBSCAN to find optimal set of "good" BOLD components') for ii in range(20000): eps = .005 + ii * .005 db = DBSCAN(eps=eps, min_samples=3).fit(fz.T) # it would be great to have descriptive names, here # DBSCAN found at least three non-noisy clusters cond1 = db.labels_.max() > 1 # DBSCAN didn't detect more classes than the total # of components / 6 cond2 = db.labels_.max() < len(nc) / 6 # TODO: confirm if 0 is a special label for DBSCAN # my intuition here is that we're confirming DBSCAN labelled previously # rejected components as noise (i.e., no overlap between `rej` and # labelled DBSCAN components) cond3 = np.intersect1d(rej, nc[db.labels_ == 0]).shape[0] == 0 # DBSCAN labelled less than half of the total components as noisy cond4 = np.array(db.labels_ == -1, dtype=int).sum() / float(len(nc)) < .5 if cond1 and cond2 and cond3 and cond4: epsmap.append([ii, utils.dice(guessmask, db.labels_ == 0), np.intersect1d(nc[db.labels_ == 0], nc[seldict['Rhos'] > getelbow_mod(Rhos_sorted, val=True)]).shape[0]]) db = None epsmap = np.array(epsmap) LGR.debug('Found DBSCAN solutions for {}/20000 eps resolutions'.format(len(epsmap))) group0 = [] dbscanfailed = False if len(epsmap) != 0: # Select index that maximizes Dice with guessmask but first # minimizes number of higher Rho components ii = int(epsmap[np.argmax(epsmap[epsmap[:, 2] == np.min(epsmap[:, 2]), 1], 0), 0]) LGR.debug('Component selection tuning: {:.05f}'.format(epsmap[:, 1].max())) db = DBSCAN(eps=.005+ii*.005, min_samples=3).fit(fz.T) ncl = nc[db.labels_ == 0] ncl = np.setdiff1d(ncl, rej) ncl = np.setdiff1d(ncl, ncl[ncl > len(nc) - len(rej)]) group0 = ncl.copy() group_n1 = nc[db.labels_ == -1] to_clf = np.setdiff1d(nc, np.union1d(ncl, rej)) if len(group0) == 0 or len(group0) < len(KRguess) * .5: dbscanfailed = True LGR.debug('DBSCAN guess failed; using elbow guess method instead') ncl = np.setdiff1d(np.setdiff1d(nc[KRelbow == 2], rej), np.union1d(nc[tt_table[:, 0] < tt_lim], np.union1d(np.union1d(nc[spz > 1], nc[Vz > 2]), nc[utils.andb([seldict['varex'] > 0.5 * sorted(seldict['varex'])[::-1][int(KRcut)], seldict['Kappas'] < 2 * Kcut]) == 2]))) group0 = ncl.copy() group_n1 = [] to_clf = np.setdiff1d(nc, np.union1d(group0, rej)) if len(group0) < 2 or (len(group0) < 4 and float(len(rej))/len(group0) > 3): LGR.warning('Extremely limited reliable BOLD signal space! ' 'Not filtering components beyond BOLD/non-BOLD guesses.') midkfailed = True min_acc = np.array([]) if len(group0) != 0: # For extremes, building in a 20% tolerance toacc_hi = np.setdiff1d(nc[utils.andb([fdist <= np.max(fdist[group0]), seldict['Rhos'] < F025, Vz > -2]) == 3], np.union1d(group0, rej)) min_acc = np.union1d(group0, toacc_hi) to_clf = np.setdiff1d(nc, np.union1d(min_acc, rej)) diagstep_keys = ['Rejected components', 'Kappa-Rho cut point', 'Kappa cut point', 'Rho cut point', 'DBSCAN failed to converge', 'Mid-Kappa failed (limited BOLD signal)', 'Kappa-Rho guess', 'min_acc', 'toacc_hi'] diagstep_vals = [rej.tolist(), KRcut, Kcut, Rcut, dbscanfailed, midkfailed, KRguess.tolist(), min_acc.tolist(), toacc_hi.tolist()] with open('csstepdata.json', 'w') as ofh: json.dump(dict(zip(diagstep_keys, diagstep_vals)), ofh, indent=4, sort_keys=True) return list(sorted(min_acc)), list(sorted(rej)), [], list(sorted(to_clf)) # Find additional components to reject based on Dice - doing this here # since Dice is a little unstable, need to reference group0 rej_supp = [] dice_rej = False if not dbscanfailed and len(rej) + len(group0) < 0.75 * len(nc): dice_rej = True rej_supp = np.setdiff1d(np.setdiff1d(np.union1d(rej, nc[dice_tbl[nc, 0] <= dice_tbl[nc, 1]]), group0), group_n1) rej = np.union1d(rej, rej_supp) # Temporal features # larger is worse - spike mmix_kurt_z = (mmix_kurt-mmix_kurt[group0].mean()) / mmix_kurt[group0].std() # smaller is worse - drift mmix_std_z = -1 * ((mmix_std-mmix_std[group0].mean()) / mmix_std[group0].std()) mmix_kurt_z_max = np.max([mmix_kurt_z, mmix_std_z], 0) """ Step 2: Classifiy midk and ignore using separte SVMs for different variance regimes # To render hyperplane: min_x = np.min(spz2);max_x=np.max(spz2) # plotting separating hyperplane ww = clf_.coef_[0] aa = -ww[0] / ww[1] # make sure the next line is long enough xx = np.linspace(min_x - 2, max_x + 2) yy = aa * xx - (clf_.intercept_[0]) / ww[1] plt.plot(xx, yy, '-') """ LGR.debug('Attempting to classify midk components') # Tried getting rid of accepting based on SVM altogether, # now using only rejecting toacc_hi = np.setdiff1d(nc[utils.andb([fdist <= np.max(fdist[group0]), seldict['Rhos'] < F025, Vz > -2]) == 3], np.union1d(group0, rej)) toacc_lo = np.intersect1d(to_clf, nc[utils.andb([spz < 1, Rz < 0, mmix_kurt_z_max < 5, Dz > -1, Tz > -1, Vz < 0, seldict['Kappas'] >= F025, fdist < 3 * np.percentile(fdist[group0], 98)]) == 8]) midk_clf, clf_ = do_svm(fproj_arr_val[:, np.union1d(group0, rej)].T, [0] * len(group0) + [1] * len(rej), fproj_arr_val[:, to_clf].T, svmtype=2) midk = np.setdiff1d(to_clf[utils.andb([midk_clf == 1, seldict['varex'][to_clf] > np.median(seldict['varex'][group0])]) == 2], np.union1d(toacc_hi, toacc_lo)) # only use SVM to augment toacc_hi only if toacc_hi isn't already # conflicting with SVM choice if len(np.intersect1d(to_clf[utils.andb([midk_clf == 1, Vz[to_clf] > 0]) == 2], toacc_hi)) == 0: svm_acc_fail = True toacc_hi = np.union1d(toacc_hi, to_clf[midk_clf == 0]) else: svm_acc_fail = False """ Step 3: Compute variance associated with low T2* areas (e.g. draining veins and low T2* areas) # To write out veinmask veinout = np.zeros(t2s.shape) veinout[t2s!=0] = veinmaskf utils.filewrite(veinout, 'veinmaskf', ref_img) veinBout = utils.unmask(veinmaskB, mask) utils.filewrite(veinBout, 'veins50', ref_img) """ LGR.debug('Computing variance associated with low T2* areas (e.g., draining veins)') tsoc_B_Zcl = np.zeros(seldict['tsoc_B'].shape) tsoc_B_Zcl[seldict['Z_clmaps'] != 0] = np.abs(seldict['tsoc_B'])[seldict['Z_clmaps'] != 0] sig_B = [stats.scoreatpercentile(tsoc_B_Zcl[tsoc_B_Zcl[:, ii] != 0, ii], 25) if len(tsoc_B_Zcl[tsoc_B_Zcl[:, ii] != 0, ii]) != 0 else 0 for ii in nc] sig_B = np.abs(seldict['tsoc_B']) > np.tile(sig_B, [seldict['tsoc_B'].shape[0], 1]) veinmask = utils.andb([t2s < stats.scoreatpercentile(t2s[t2s != 0], 15, interpolation_method='lower'), t2s != 0]) == 2 veinmaskf = veinmask[mask] veinR = np.array(sig_B[veinmaskf].sum(0), dtype=float) / sig_B[~veinmaskf].sum(0) veinR[np.isnan(veinR)] = 0 veinc = np.union1d(rej, midk) rej_veinRZ = ((veinR-veinR[veinc].mean())/veinR[veinc].std())[veinc] rej_veinRZ[rej_veinRZ < 0] = 0 rej_veinRZ[countsigFR2[veinc] > np.array(veinmaskf, dtype=int).sum()] = 0 t2s_lim = [stats.scoreatpercentile(t2s[t2s != 0], 50, interpolation_method='lower'), stats.scoreatpercentile(t2s[t2s != 0], 80, interpolation_method='lower') / 2] phys_var_zs = [] for t2sl_i in range(len(t2s_lim)): t2sl = t2s_lim[t2sl_i] veinW = sig_B[:, veinc]*np.tile(rej_veinRZ, [sig_B.shape[0], 1]) veincand = utils.unmask(utils.andb([s0[t2s != 0] < np.median(s0[t2s != 0]), t2s[t2s != 0] < t2sl]) >= 1, t2s != 0)[mask] veinW[~veincand] = 0 invein = veinW.sum(axis=1)[(utils.unmask(veinmaskf, mask) * utils.unmask(veinW.sum(axis=1) > 1, mask))[mask]] minW = 10 * (np.log10(invein).mean()) - 1 * 10**(np.log10(invein).std()) veinmaskB = veinW.sum(axis=1) > minW tsoc_Bp = seldict['tsoc_B'].copy() tsoc_Bp[tsoc_Bp < 0] = 0 vvex = np.array([(tsoc_Bp[veinmaskB, ii]**2.).sum() / (tsoc_Bp[:, ii]**2.).sum() for ii in nc]) group0_res = np.intersect1d(KRguess, group0) phys_var_zs.append((vvex - vvex[group0_res].mean()) / vvex[group0_res].std()) veinBout = utils.unmask(veinmaskB, mask) utils.filewrite(veinBout.astype(float), 'veins_l%i' % t2sl_i, ref_img) # Mask to sample veins phys_var_z = np.array(phys_var_zs).max(0) Vz2 = (varex_ - varex_[group0].mean())/varex_[group0].std() """ Step 4: Learn joint TE-dependence spatial and temporal models to move remaining artifacts to ignore class """ LGR.debug('Learning joint TE-dependence spatial/temporal models to ignore remaining artifacts') to_ign = [] minK_ign = np.max([F05, getelbow_cons(seldict['Kappas'], val=True)]) newcest = len(group0) + len(toacc_hi[seldict['Kappas'][toacc_hi] > minK_ign]) phys_art = np.setdiff1d(nc[utils.andb([phys_var_z > 3.5, seldict['Kappas'] < minK_ign]) == 2], group0) rank_diff = stats.rankdata(phys_var_z) - stats.rankdata(seldict['Kappas']) phys_art = np.union1d(np.setdiff1d(nc[utils.andb([phys_var_z > 2, rank_diff > newcest / 2, Vz2 > -1]) == 3], group0), phys_art) # Want to replace field_art with an acf/SVM based approach # instead of a kurtosis/filter one field_art = np.setdiff1d(nc[utils.andb([mmix_kurt_z_max > 5, seldict['Kappas'] < minK_ign]) == 2], group0) field_art = np.union1d(np.setdiff1d(nc[utils.andb([mmix_kurt_z_max > 2, (stats.rankdata(mmix_kurt_z_max) - stats.rankdata(seldict['Kappas'])) > newcest / 2, Vz2 > 1, seldict['Kappas'] < F01]) == 4], group0), field_art) field_art = np.union1d(np.setdiff1d(nc[utils.andb([mmix_kurt_z_max > 3, Vz2 > 3, seldict['Rhos'] > np.percentile(seldict['Rhos'][group0], 75)]) == 3], group0), field_art) field_art = np.union1d(np.setdiff1d(nc[utils.andb([mmix_kurt_z_max > 5, Vz2 > 5]) == 2], group0), field_art) misc_art = np.setdiff1d(nc[utils.andb([(stats.rankdata(Vz) - stats.rankdata(Ktz)) > newcest / 2, seldict['Kappas'] < Khighelbowval]) == 2], group0) ign_cand = np.unique(list(field_art)+list(phys_art)+list(misc_art)) midkrej = np.union1d(midk, rej) to_ign = np.setdiff1d(list(ign_cand), midkrej) toacc = np.union1d(toacc_hi, toacc_lo) ncl = np.setdiff1d(np.union1d(ncl, toacc), np.union1d(to_ign, midkrej)) ign = np.setdiff1d(nc, list(ncl) + list(midk) + list(rej)) orphan = np.setdiff1d(nc, list(ncl) + list(to_ign) + list(midk) + list(rej)) # Last ditch effort to save some transient components if not strict_mode: Vz3 = (varex_ - varex_[ncl].mean())/varex_[ncl].std() ncl = np.union1d(ncl, np.intersect1d(orphan, nc[utils.andb([seldict['Kappas'] > F05, seldict['Rhos'] < F025, seldict['Kappas'] > seldict['Rhos'], Vz3 <= -1, Vz3 > -3, mmix_kurt_z_max < 2.5]) == 6])) ign = np.setdiff1d(nc, list(ncl)+list(midk)+list(rej)) orphan = np.setdiff1d(nc, list(ncl) + list(to_ign) + list(midk) + list(rej)) if savecsdiag: diagstep_keys = ['Rejected components', 'Kappa-Rho cut point', 'Kappa cut', 'Rho cut', 'DBSCAN failed to converge', 'Kappa-Rho guess', 'Dice rejected', 'rej_supp', 'to_clf', 'Mid-kappa components', 'svm_acc_fail', 'toacc_hi', 'toacc_lo', 'Field artifacts', 'Physiological artifacts', 'Miscellaneous artifacts', 'ncl', 'Ignored components'] diagstep_vals = [rej.tolist(), KRcut, Kcut, Rcut, dbscanfailed, KRguess.tolist(), dice_rej, rej_supp.tolist(), to_clf.tolist(), midk.tolist(), svm_acc_fail, toacc_hi.tolist(), toacc_lo.tolist(), field_art.tolist(), phys_art.tolist(), misc_art.tolist(), ncl.tolist(), ign.tolist()] with open('csstepdata.json', 'w') as ofh: json.dump(dict(zip(diagstep_keys, diagstep_vals)), ofh, indent=4, sort_keys=True) allfz = np.array([Tz, Vz, Ktz, KRr, cnz, Rz, mmix_kurt, fdist_z]) np.savetxt('csdata.txt', allfz) return list(sorted(ncl)), list(sorted(rej)), list(sorted(midk)), list(sorted(ign))
def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img, fout=None, reindex=False, mmixN=None, full_sel=True): """ Fit models directly. Parameters ---------- catd : (S x E x T) array_like Input data, where `S` is samples, `E` is echos, and `T` is time mmix : (T x C) array_like Mixing matrix for converting input data to component space, where `C` is components and `T` is the same as in `catd` mask : (S,) array_like Boolean mask array t2s : (S,) array_like t2sG : (S,) array_like tes : list List of echo times associated with `catd`, in milliseconds combmode : {'t2s', 'ste'} str How optimal combination of echos should be made, where 't2s' indicates using the method of Posse 1999 and 'ste' indicates using the method of Poser 2006 ref_img : str or img_like Reference image to dictate how outputs are saved to disk fout : bool Whether to output per-component TE-dependence maps. Default: None reindex : bool, optional Default: False mmixN : array_like, optional Default: None full_sel : bool, optional Whether to perform selection of components based on Rho/Kappa scores. Default: True Returns ------- seldict : dict comptab : (N x 5) :obj:`numpy.ndarray` Array with columns denoting (1) index of component, (2) Kappa score of component, (3) Rho score of component, (4) variance explained by component, and (5) normalized variance explained bycomponent betas : :obj:`numpy.ndarray` mmix_new : :obj:`numpy.ndarray` """ # compute optimal combination of raw data tsoc = model.make_optcom(catd, t2sG, tes, mask, combmode, verbose=False).astype(float)[mask] # demean optimal combination tsoc_dm = tsoc - tsoc.mean(axis=-1, keepdims=True) # compute un-normalized weight dataset (features) if mmixN is None: mmixN = mmix WTS = computefeats2(utils.unmask(tsoc, mask), mmixN, mask, normalize=False) # compute PSC dataset - shouldn't have to refit data tsoc_B = get_coeffs(utils.unmask(tsoc_dm, mask), mask, mmix)[mask] tsoc_Babs = np.abs(tsoc_B) PSC = tsoc_B / tsoc.mean(axis=-1, keepdims=True) * 100 # compute skews to determine signs based on unnormalized weights, # correct mmix & WTS signs based on spatial distribution tails signs = stats.skew(WTS, axis=0) signs /= np.abs(signs) mmix = mmix.copy() mmix *= signs WTS *= signs PSC *= signs totvar = (tsoc_B**2).sum() totvar_norm = (WTS**2).sum() # compute Betas and means over TEs for TE-dependence analysis betas = get_coeffs(catd, np.repeat(mask[:, np.newaxis], len(tes), axis=1), mmix) n_samp, n_echos, n_components = betas.shape n_voxels = mask.sum() n_data_voxels = (t2s != 0).sum() mu = catd.mean(axis=-1, dtype=float) tes = np.reshape(tes, (n_echos, 1)) fmin, fmid, fmax = utils.getfbounds(n_echos) # mask arrays mumask = mu[t2s != 0] t2smask = t2s[t2s != 0] betamask = betas[t2s != 0] # set up Xmats X1 = mumask.T # Model 1 X2 = np.tile(tes, (1, n_data_voxels)) * mumask.T / t2smask.T # Model 2 # tables for component selection Kappas = np.zeros([n_components]) Rhos = np.zeros([n_components]) varex = np.zeros([n_components]) varex_norm = np.zeros([n_components]) Z_maps = np.zeros([n_voxels, n_components]) F_R2_maps = np.zeros([n_data_voxels, n_components]) F_S0_maps = np.zeros([n_data_voxels, n_components]) Z_clmaps = np.zeros([n_voxels, n_components]) F_R2_clmaps = np.zeros([n_data_voxels, n_components]) F_S0_clmaps = np.zeros([n_data_voxels, n_components]) Br_clmaps_R2 = np.zeros([n_voxels, n_components]) Br_clmaps_S0 = np.zeros([n_voxels, n_components]) LGR.info('Fitting TE- and S0-dependent models to components') for i in range(n_components): # size of B is (n_components, nx*ny*nz) B = np.atleast_3d(betamask)[:, :, i].T alpha = (np.abs(B)**2).sum(axis=0) varex[i] = (tsoc_B[:, i]**2).sum() / totvar * 100. varex_norm[i] = (utils.unmask(WTS, mask)[t2s != 0][:, i]**2).sum() / totvar_norm * 100. # S0 Model coeffs_S0 = (B * X1).sum(axis=0) / (X1**2).sum(axis=0) SSE_S0 = (B - X1 * np.tile(coeffs_S0, (n_echos, 1)))**2 SSE_S0 = SSE_S0.sum(axis=0) F_S0 = (alpha - SSE_S0) * 2 / (SSE_S0) F_S0_maps[:, i] = F_S0 # R2 Model coeffs_R2 = (B * X2).sum(axis=0) / (X2**2).sum(axis=0) SSE_R2 = (B - X2 * np.tile(coeffs_R2, (n_echos, 1)))**2 SSE_R2 = SSE_R2.sum(axis=0) F_R2 = (alpha - SSE_R2) * 2 / (SSE_R2) F_R2_maps[:, i] = F_R2 # compute weights as Z-values wtsZ = (WTS[:, i] - WTS[:, i].mean()) / WTS[:, i].std() wtsZ[np.abs(wtsZ) > Z_MAX] = (Z_MAX * (np.abs(wtsZ) / wtsZ))[np.abs(wtsZ) > Z_MAX] Z_maps[:, i] = wtsZ # compute Kappa and Rho F_S0[F_S0 > F_MAX] = F_MAX F_R2[F_R2 > F_MAX] = F_MAX norm_weights = np.abs(np.squeeze(utils.unmask(wtsZ, mask)[t2s != 0]**2.)) Kappas[i] = np.average(F_R2, weights=norm_weights) Rhos[i] = np.average(F_S0, weights=norm_weights) # tabulate component values comptab_pre = np.vstack([np.arange(n_components), Kappas, Rhos, varex, varex_norm]).T if reindex: # re-index all components in Kappa order comptab = comptab_pre[comptab_pre[:, 1].argsort()[::-1], :] Kappas = comptab[:, 1] Rhos = comptab[:, 2] varex = comptab[:, 3] varex_norm = comptab[:, 4] nnc = np.array(comptab[:, 0], dtype=np.int) mmix_new = mmix[:, nnc] F_S0_maps = F_S0_maps[:, nnc] F_R2_maps = F_R2_maps[:, nnc] Z_maps = Z_maps[:, nnc] WTS = WTS[:, nnc] PSC = PSC[:, nnc] tsoc_B = tsoc_B[:, nnc] tsoc_Babs = tsoc_Babs[:, nnc] comptab[:, 0] = np.arange(comptab.shape[0]) else: comptab = comptab_pre mmix_new = mmix # full selection including clustering criteria seldict = None if full_sel: LGR.info('Performing spatial clustering of components') csize = np.max([int(n_voxels * 0.0005) + 5, 20]) LGR.debug('Using minimum cluster size: {}'.format(csize)) for i in range(n_components): # save out files out = np.zeros((n_samp, 4)) out[:, 0] = np.squeeze(utils.unmask(PSC[:, i], mask)) out[:, 1] = np.squeeze(utils.unmask(F_R2_maps[:, i], t2s != 0)) out[:, 2] = np.squeeze(utils.unmask(F_S0_maps[:, i], t2s != 0)) out[:, 3] = np.squeeze(utils.unmask(Z_maps[:, i], mask)) if utils.get_dtype(ref_img) == 'GIFTI': continue # TODO: pass through GIFTI file data as below ccimg = utils.new_nii_like(ref_img, out) # Do simple clustering on F sel = spatclust(ccimg, min_cluster_size=csize, threshold=int(fmin), index=[1, 2], mask=(t2s != 0)) F_R2_clmaps[:, i] = sel[:, 0] F_S0_clmaps[:, i] = sel[:, 1] countsigFR2 = F_R2_clmaps[:, i].sum() countsigFS0 = F_S0_clmaps[:, i].sum() # Do simple clustering on Z at p<0.05 sel = spatclust(ccimg, min_cluster_size=csize, threshold=1.95, index=3, mask=mask) Z_clmaps[:, i] = sel # Do simple clustering on ranked signal-change map spclust_input = utils.unmask(stats.rankdata(tsoc_Babs[:, i]), mask) spclust_input = utils.new_nii_like(ref_img, spclust_input) Br_clmaps_R2[:, i] = spatclust(spclust_input, min_cluster_size=csize, threshold=max(tsoc_Babs.shape)-countsigFR2, mask=mask) Br_clmaps_S0[:, i] = spatclust(spclust_input, min_cluster_size=csize, threshold=max(tsoc_Babs.shape)-countsigFS0, mask=mask) seldict = {} selvars = ['Kappas', 'Rhos', 'WTS', 'varex', 'Z_maps', 'F_R2_maps', 'F_S0_maps', 'Z_clmaps', 'F_R2_clmaps', 'F_S0_clmaps', 'tsoc_B', 'Br_clmaps_R2', 'Br_clmaps_S0', 'PSC'] for vv in selvars: seldict[vv] = eval(vv) return seldict, comptab, betas, mmix_new
def fit_decay(data, tes, mask, adaptive_mask, fittype, report=True): """ Fit voxel-wise monoexponential decay models to `data` Parameters ---------- data : (S x E [x T]) array_like Multi-echo data array, where `S` is samples, `E` is echos, and `T` is time tes : (E,) :obj:`list` Echo times mask : (S,) array_like Boolean array indicating samples that are consistently (i.e., across time AND echoes) non-zero adaptive_mask : (S,) array_like Valued array indicating number of echos that have sufficient signal in given sample fittype : {loglin, curvefit} The type of model fit to use report : bool, optional Whether to log a description of this step or not. Default is True. Returns ------- t2s_limited : (S,) :obj:`numpy.ndarray` Limited T2* map. The limited map only keeps the T2* values for data where there are at least two echos with good signal. s0_limited : (S,) :obj:`numpy.ndarray` Limited S0 map. The limited map only keeps the S0 values for data where there are at least two echos with good signal. t2s_full : (S,) :obj:`numpy.ndarray` Full T2* map. For voxels affected by dropout, with good signal from only one echo, the full map uses the T2* estimate from the first two echoes. s0_full : (S,) :obj:`numpy.ndarray` Full S0 map. For voxels affected by dropout, with good signal from only one echo, the full map uses the S0 estimate from the first two echoes. Notes ----- This function replaces infinite values in the :math:`T_2^*` map with 500 and :math:`T_2^*` values less than or equal to zero with 1. Additionally, very small :math:`T_2^*` values above zero are replaced with a floor value to prevent zero-division errors later on in the workflow. It also replaces NaN values in the :math:`S_0` map with 0. """ if data.shape[1] != len(tes): raise ValueError( 'Second dimension of data ({0}) does not match number ' 'of echoes provided (tes; {1})'.format(data.shape[1], len(tes))) elif not (data.shape[0] == mask.shape[0] == adaptive_mask.shape[0]): raise ValueError('First dimensions (number of samples) of data ({0}), ' 'mask ({1}), and adaptive_mask ({2}) do not ' 'match'.format(data.shape[0], mask.shape[0], adaptive_mask.shape[0])) data = data.copy() if data.ndim == 2: data = data[:, :, None] # Mask the inputs data_masked = data[mask, :, :] adaptive_mask_masked = adaptive_mask[mask] if fittype == 'loglin': t2s_limited, s0_limited, t2s_full, s0_full = fit_loglinear( data_masked, tes, adaptive_mask_masked, report=report) elif fittype == 'curvefit': t2s_limited, s0_limited, t2s_full, s0_full = fit_monoexponential( data_masked, tes, adaptive_mask_masked, report=report) else: raise ValueError('Unknown fittype option: {}'.format(fittype)) t2s_limited[np.isinf(t2s_limited)] = 500. # why 500? # let's get rid of negative values, but keep zeros where limited != full t2s_limited[(adaptive_mask_masked > 1) & (t2s_limited <= 0)] = 1. t2s_limited = _apply_t2s_floor(t2s_limited, tes) s0_limited[np.isnan(s0_limited)] = 0. # why 0? t2s_full[np.isinf(t2s_full)] = 500. # why 500? t2s_full[t2s_full <= 0] = 1. # let's get rid of negative values! t2s_full = _apply_t2s_floor(t2s_full, tes) s0_full[np.isnan(s0_full)] = 0. # why 0? t2s_limited = utils.unmask(t2s_limited, mask) s0_limited = utils.unmask(s0_limited, mask) t2s_full = utils.unmask(t2s_full, mask) s0_full = utils.unmask(s0_full, mask) return t2s_limited, s0_limited, t2s_full, s0_full
def fit_decay(data, tes, mask, masksum, start_echo=1): """ Fit voxel-wise monoexponential decay models to `data` Parameters ---------- data : (S x E [x T]) array_like Multi-echo data array, where `S` is samples, `E` is echos, and `T` is time tes : (E, ) list Echo times mask : (S, ) array_like Boolean array indicating samples that are consistently (i.e., across time AND echoes) non-zero masksum : (S, ) array_like Valued array indicating number of echos that have sufficient signal in given sample start_echo : int, optional First echo to consider. Default is 1 (first echo). Returns ------- t2sa : (S) :obj:`numpy.ndarray` Limited T2* map s0va : (S) :obj:`numpy.ndarray` Limited S0 map t2ss : (S, E-1) :obj:`numpy.ndarray` ??? s0vs : (S, E-1) :obj:`numpy.ndarray` ??? t2saf : (S) :obj:`numpy.ndarray` Full T2* map s0vaf : (S) :obj:`numpy.ndarray` Full S0 map Notes ----- 1. Fit monoexponential decay function to all values for a given voxel across TRs, per TE, to estimate voxel-wise :math:`S_0` and :math:`T_2^*`: .. math:: S(TE) = S_0 * exp(-R_2^* * TE) T_2^* = 1 / R_2^* 2. Replace infinite values in :math:`T_2^*` map with 500 and NaN values in :math:`S_0` map with 0. 3. Generate limited :math:`T_2^*` and :math:`S_0` maps by doing something. """ if len(data.shape) == 3: n_samp, n_echos, n_vols = data.shape else: n_samp, n_echos = data.shape n_vols = 1 data = data[mask] t2ss = np.zeros([n_samp, n_echos - 1]) s0vs = np.zeros([n_samp, n_echos - 1]) for echo in range(start_echo, n_echos + 1): # perform log linear fit of echo times against MR signal # make DV matrix: samples x (time series * echos) B = np.log((np.abs(data[:, :echo, :]) + 1).reshape(len(data), -1).T) # make IV matrix: intercept/TEs x (time series * echos) x = np.column_stack([np.ones(echo), [-te for te in tes[:echo]]]) X = np.repeat(x, n_vols, axis=0) beta = np.linalg.lstsq(X, B, rcond=None)[0] t2s = 1. / beta[1, :].T s0 = np.exp(beta[0, :]).T t2s[np.isinf(t2s)] = 500. # why 500? s0[np.isnan(s0)] = 0. # why 0? t2ss[..., echo - 2] = np.squeeze(utils.unmask(t2s, mask)) s0vs[..., echo - 2] = np.squeeze(utils.unmask(s0, mask)) # create limited T2* and S0 maps fl = np.zeros([n_samp, len(tes) - 1], dtype=bool) for echo in range(n_echos - 1): fl_ = np.squeeze(fl[..., echo]) fl_[masksum == echo + 2] = True fl[..., echo] = fl_ t2sa = utils.unmask(t2ss[fl], masksum > 1) s0va = utils.unmask(s0vs[fl], masksum > 1) # create full T2* maps with S0 estimation errors t2saf, s0vaf = t2sa.copy(), s0va.copy() t2saf[masksum == 1] = t2ss[masksum == 1, 0] s0vaf[masksum == 1] = s0vs[masksum == 1, 0] return t2sa, s0va, t2ss, s0vs, t2saf, s0vaf
def dependence_metrics(catd, tsoc, mmix, t2s, tes, ref_img, reindex=False, mmixN=None, algorithm=None, label=None, out_dir='.', verbose=False): """ Fit TE-dependence and -independence models to components. Parameters ---------- catd : (S x E x T) array_like Input data, where `S` is samples, `E` is echos, and `T` is time tsoc : (S x T) array_like Optimally combined data mmix : (T x C) array_like Mixing matrix for converting input data to component space, where `C` is components and `T` is the same as in `catd` t2s : (S [x T]) array_like Limited T2* map or timeseries. tes : list List of echo times associated with `catd`, in milliseconds ref_img : str or img_like Reference image to dictate how outputs are saved to disk reindex : bool, optional Whether to sort components in descending order by Kappa. Default: False mmixN : (T x C) array_like, optional Z-scored mixing matrix. Default: None algorithm : {'kundu_v2', 'kundu_v3', None}, optional Decision tree to be applied to metrics. Determines which maps will be generated and stored in seldict. Default: None label : :obj:`str` or None, optional Prefix to apply to generated files. Default is None. out_dir : :obj:`str`, optional Output directory for generated files. Default is current working directory. verbose : :obj:`bool`, optional Whether or not to generate additional files. Default is False. Returns ------- comptable : (C x X) :obj:`pandas.DataFrame` Component metric table. One row for each component, with a column for each metric. The index is the component number. seldict : :obj:`dict` or None Dictionary containing component-specific metric maps to be used for component selection. If `algorithm` is None, then seldict will be None as well. betas : :obj:`numpy.ndarray` mmix_new : :obj:`numpy.ndarray` """ # Use t2s as mask mask = t2s != 0 if not (catd.shape[0] == t2s.shape[0] == mask.shape[0] == tsoc.shape[0]): raise ValueError('First dimensions (number of samples) of catd ({0}), ' 'tsoc ({1}), and t2s ({2}) do not ' 'match'.format(catd.shape[0], tsoc.shape[0], t2s.shape[0])) elif catd.shape[1] != len(tes): raise ValueError('Second dimension of catd ({0}) does not match ' 'number of echoes provided (tes; ' '{1})'.format(catd.shape[1], len(tes))) elif not (catd.shape[2] == tsoc.shape[1] == mmix.shape[0]): raise ValueError('Number of volumes in catd ({0}), ' 'tsoc ({1}), and mmix ({2}) do not ' 'match.'.format(catd.shape[2], tsoc.shape[1], mmix.shape[0])) elif t2s.ndim == 2: if catd.shape[2] != t2s.shape[1]: raise ValueError('Number of volumes in catd ' '({0}) does not match number of volumes in ' 't2s ({1})'.format(catd.shape[2], t2s.shape[1])) # mask everything we can tsoc = tsoc[mask, :] catd = catd[mask, ...] t2s = t2s[mask] # demean optimal combination tsoc_dm = tsoc - tsoc.mean(axis=-1, keepdims=True) # compute un-normalized weight dataset (features) if mmixN is None: mmixN = mmix WTS = computefeats2(tsoc, mmixN, mask=None, normalize=False) # compute PSC dataset - shouldn't have to refit data tsoc_B = get_coeffs(tsoc_dm, mmix, mask=None) del tsoc_dm tsoc_Babs = np.abs(tsoc_B) PSC = tsoc_B / tsoc.mean(axis=-1, keepdims=True) * 100 # compute skews to determine signs based on unnormalized weights, # correct mmix & WTS signs based on spatial distribution tails signs = stats.skew(WTS, axis=0) signs /= np.abs(signs) mmix = mmix.copy() mmix *= signs WTS *= signs PSC *= signs totvar = (tsoc_B**2).sum() totvar_norm = (WTS**2).sum() # compute Betas and means over TEs for TE-dependence analysis betas = get_coeffs(utils.unmask(catd, mask), mmix, np.repeat(mask[:, np.newaxis], len(tes), axis=1)) betas = betas[mask, ...] n_voxels, n_echos, n_components = betas.shape mu = catd.mean(axis=-1, dtype=float) tes = np.reshape(tes, (n_echos, 1)) fmin, _, _ = getfbounds(n_echos) # set up Xmats X1 = mu.T # Model 1 X2 = np.tile(tes, (1, n_voxels)) * mu.T / t2s.T # Model 2 # tables for component selection kappas = np.zeros([n_components]) rhos = np.zeros([n_components]) varex = np.zeros([n_components]) varex_norm = np.zeros([n_components]) Z_maps = np.zeros([n_voxels, n_components]) F_R2_maps = np.zeros([n_voxels, n_components]) F_S0_maps = np.zeros([n_voxels, n_components]) pred_R2_maps = np.zeros([n_voxels, n_echos, n_components]) pred_S0_maps = np.zeros([n_voxels, n_echos, n_components]) LGR.info('Fitting TE- and S0-dependent models to components') for i_comp in range(n_components): # size of comp_betas is (n_echoes, n_samples) comp_betas = np.atleast_3d(betas)[:, :, i_comp].T alpha = (np.abs(comp_betas)**2).sum(axis=0) varex[i_comp] = (tsoc_B[:, i_comp]**2).sum() / totvar * 100. varex_norm[i_comp] = (WTS[:, i_comp]**2).sum() / totvar_norm # S0 Model # (S,) model coefficient map coeffs_S0 = (comp_betas * X1).sum(axis=0) / (X1**2).sum(axis=0) pred_S0 = X1 * np.tile(coeffs_S0, (n_echos, 1)) pred_S0_maps[:, :, i_comp] = pred_S0.T SSE_S0 = (comp_betas - pred_S0)**2 SSE_S0 = SSE_S0.sum(axis=0) # (S,) prediction error map F_S0 = (alpha - SSE_S0) * (n_echos - 1) / (SSE_S0) F_S0_maps[:, i_comp] = F_S0 # R2 Model coeffs_R2 = (comp_betas * X2).sum(axis=0) / (X2**2).sum(axis=0) pred_R2 = X2 * np.tile(coeffs_R2, (n_echos, 1)) pred_R2_maps[:, :, i_comp] = pred_R2.T SSE_R2 = (comp_betas - pred_R2)**2 SSE_R2 = SSE_R2.sum(axis=0) F_R2 = (alpha - SSE_R2) * (n_echos - 1) / (SSE_R2) F_R2_maps[:, i_comp] = F_R2 # compute weights as Z-values wtsZ = (WTS[:, i_comp] - WTS[:, i_comp].mean()) / WTS[:, i_comp].std() wtsZ[np.abs(wtsZ) > Z_MAX] = ( Z_MAX * (np.abs(wtsZ) / wtsZ))[np.abs(wtsZ) > Z_MAX] Z_maps[:, i_comp] = wtsZ # compute Kappa and Rho F_S0[F_S0 > F_MAX] = F_MAX F_R2[F_R2 > F_MAX] = F_MAX norm_weights = np.abs(wtsZ**2.) kappas[i_comp] = np.average(F_R2, weights=norm_weights) rhos[i_comp] = np.average(F_S0, weights=norm_weights) del SSE_S0, SSE_R2, wtsZ, F_S0, F_R2, norm_weights, comp_betas if algorithm != 'kundu_v3': del WTS, PSC, tsoc_B # tabulate component values comptable = np.vstack([kappas, rhos, varex, varex_norm]).T if reindex: # re-index all components in descending Kappa order sort_idx = comptable[:, 0].argsort()[::-1] comptable = comptable[sort_idx, :] mmix_new = mmix[:, sort_idx] betas = betas[..., sort_idx] pred_R2_maps = pred_R2_maps[:, :, sort_idx] pred_S0_maps = pred_S0_maps[:, :, sort_idx] F_R2_maps = F_R2_maps[:, sort_idx] F_S0_maps = F_S0_maps[:, sort_idx] Z_maps = Z_maps[:, sort_idx] tsoc_Babs = tsoc_Babs[:, sort_idx] if algorithm == 'kundu_v3': WTS = WTS[:, sort_idx] PSC = PSC[:, sort_idx] tsoc_B = tsoc_B[:, sort_idx] else: mmix_new = mmix del mmix if verbose: # Echo-specific weight maps for each of the ICA components. io.filewrite(utils.unmask(betas, mask), op.join(out_dir, '{0}betas_catd.nii'.format(label)), ref_img) # Echo-specific maps of predicted values for R2 and S0 models for each # component. io.filewrite(utils.unmask(pred_R2_maps, mask), op.join(out_dir, '{0}R2_pred.nii'.format(label)), ref_img) io.filewrite(utils.unmask(pred_S0_maps, mask), op.join(out_dir, '{0}S0_pred.nii'.format(label)), ref_img) # Weight maps used to average metrics across voxels io.filewrite(utils.unmask(Z_maps**2., mask), op.join(out_dir, '{0}metric_weights.nii'.format(label)), ref_img) del pred_R2_maps, pred_S0_maps comptable = pd.DataFrame(comptable, columns=[ 'kappa', 'rho', 'variance explained', 'normalized variance explained' ]) comptable.index.name = 'component' # Generate clustering criteria for component selection if algorithm in ['kundu_v2', 'kundu_v3']: Z_clmaps = np.zeros([n_voxels, n_components], bool) F_R2_clmaps = np.zeros([n_voxels, n_components], bool) F_S0_clmaps = np.zeros([n_voxels, n_components], bool) Br_R2_clmaps = np.zeros([n_voxels, n_components], bool) Br_S0_clmaps = np.zeros([n_voxels, n_components], bool) LGR.info('Performing spatial clustering of components') csize = np.max([int(n_voxels * 0.0005) + 5, 20]) LGR.debug('Using minimum cluster size: {}'.format(csize)) for i_comp in range(n_components): # Cluster-extent threshold and binarize F-maps ccimg = io.new_nii_like( ref_img, np.squeeze(utils.unmask(F_R2_maps[:, i_comp], mask))) F_R2_clmaps[:, i_comp] = utils.threshold_map(ccimg, min_cluster_size=csize, threshold=fmin, mask=mask, binarize=True) countsigFR2 = F_R2_clmaps[:, i_comp].sum() ccimg = io.new_nii_like( ref_img, np.squeeze(utils.unmask(F_S0_maps[:, i_comp], mask))) F_S0_clmaps[:, i_comp] = utils.threshold_map(ccimg, min_cluster_size=csize, threshold=fmin, mask=mask, binarize=True) countsigFS0 = F_S0_clmaps[:, i_comp].sum() # Cluster-extent threshold and binarize Z-maps with CDT of p < 0.05 ccimg = io.new_nii_like( ref_img, np.squeeze(utils.unmask(Z_maps[:, i_comp], mask))) Z_clmaps[:, i_comp] = utils.threshold_map(ccimg, min_cluster_size=csize, threshold=1.95, mask=mask, binarize=True) # Cluster-extent threshold and binarize ranked signal-change map ccimg = io.new_nii_like( ref_img, utils.unmask(stats.rankdata(tsoc_Babs[:, i_comp]), mask)) Br_R2_clmaps[:, i_comp] = utils.threshold_map( ccimg, min_cluster_size=csize, threshold=(max(tsoc_Babs.shape) - countsigFR2), mask=mask, binarize=True) Br_S0_clmaps[:, i_comp] = utils.threshold_map( ccimg, min_cluster_size=csize, threshold=(max(tsoc_Babs.shape) - countsigFS0), mask=mask, binarize=True) del ccimg, tsoc_Babs if algorithm == 'kundu_v2': # WTS, tsoc_B, PSC, and F_S0_maps are not used by Kundu v2.5 selvars = [ 'Z_maps', 'F_R2_maps', 'Z_clmaps', 'F_R2_clmaps', 'F_S0_clmaps', 'Br_R2_clmaps', 'Br_S0_clmaps' ] elif algorithm == 'kundu_v3': selvars = [ 'WTS', 'tsoc_B', 'PSC', 'Z_maps', 'F_R2_maps', 'F_S0_maps', 'Z_clmaps', 'F_R2_clmaps', 'F_S0_clmaps', 'Br_R2_clmaps', 'Br_S0_clmaps' ] elif algorithm is None: selvars = [] else: raise ValueError( 'Algorithm "{0}" not recognized.'.format(algorithm)) seldict = {} for vv in selvars: seldict[vv] = eval(vv) else: seldict = None return comptable, seldict, betas, mmix_new
def gscontrol_raw(catd, optcom, n_echos, ref_img, dtrank=4): """ Removes global signal from individual echo `catd` and `optcom` time series This function uses the spatial global signal estimation approach to to removal global signal out of individual echo time series datasets. The spatial global signal is estimated from the optimally combined data after detrending with a Legendre polynomial basis of `order = 0` and `degree = dtrank`. Parameters ---------- catd : (S x E x T) array_like Input functional data optcom : (S x T) array_like Optimally-combined functional data (i.e., the output of `make_optcom`) n_echos : int Number of echos in data. Should be the same as `E` dimension of `catd` ref_img : str or img_like Reference image to dictate how outputs are saved to disk dtrank : int, optional Specfies degree of Legendre polynomial basis function for estimating spatial global signal. Default: 4 Returns ------- dm_catd : (S x E x T) array_like Input `catd` with global signal removed from time series dm_optcom : (S x T) array_like Input `optcom` with global signal removed from time series """ LGR.info('Applying amplitude-based T1 equilibration correction') # Legendre polynomial basis for denoising bounds = np.linspace(-1, 1, optcom.shape[-1]) Lmix = np.column_stack([lpmv(0, vv, bounds) for vv in range(dtrank)]) # compute mean, std, mask local to this function # inefficient, but makes this function a bit more modular Gmu = optcom.mean(axis=-1) # temporal mean Gmask = Gmu != 0 # find spatial global signal dat = optcom[Gmask] - Gmu[Gmask][:, np.newaxis] sol = np.linalg.lstsq(Lmix, dat.T, rcond=None)[0] # Legendre basis for detrending detr = dat - np.dot(sol.T, Lmix.T)[0] sphis = (detr).min(axis=1) sphis -= sphis.mean() utils.filewrite(utils.unmask(sphis, Gmask), 'T1gs', ref_img) # find time course ofc the spatial global signal # make basis with the Legendre basis glsig = np.linalg.lstsq(np.atleast_2d(sphis).T, dat, rcond=None)[0] glsig = stats.zscore(glsig, axis=None) np.savetxt('glsig.1D', glsig) glbase = np.hstack([Lmix, glsig.T]) # Project global signal out of optimally combined data sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T, rcond=None)[0] tsoc_nogs = dat - np.dot( np.atleast_2d(sol[dtrank]).T, np.atleast_2d( glbase.T[dtrank])) + Gmu[Gmask][:, np.newaxis] utils.filewrite(optcom, 'tsoc_orig', ref_img) dm_optcom = utils.unmask(tsoc_nogs, Gmask) utils.filewrite(dm_optcom, 'tsoc_nogs', ref_img) # Project glbase out of each echo dm_catd = catd.copy() # don't overwrite catd for echo in range(n_echos): dat = dm_catd[:, echo, :][Gmask] sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T, rcond=None)[0] e_nogs = dat - np.dot( np.atleast_2d(sol[dtrank]).T, np.atleast_2d(glbase.T[dtrank])) dm_catd[:, echo, :] = utils.unmask(e_nogs, Gmask) return dm_catd, dm_optcom
def fit_decay(data, tes, mask, masksum, start_echo): """ Fit voxel-wise monoexponential decay models to `data` Parameters ---------- data : (S x E [x T]) array_like Multi-echo data array, where `S` is samples, `E` is echos, and `T` is time tes : (E, ) list Echo times mask : (S, ) array_like Boolean array indicating samples that are consistently (i.e., across time AND echoes) non-zero masksum : (S, ) array_like Valued array indicating number of echos that have sufficient signal in given sample start_echo : int First echo to consider Returns ------- t2sa : (S x E) :obj:`numpy.ndarray` Limited T2* map s0va : (S x E) :obj:`numpy.ndarray` Limited S0 map t2ss : (S x E) :obj:`numpy.ndarray` ??? s0vs : (S x E) :obj:`numpy.ndarray` ??? t2saf : (S x E) :obj:`numpy.ndarray` Full T2* map s0vaf : (S x E) :obj:`numpy.ndarray` Full S0 map Notes ----- 1. Fit monoexponential decay function to all values for a given voxel across TRs, per TE, to estimate voxel-wise :math:`S_0` and :math:`T_2^*`: .. math:: S(TE) = S_0 * exp(-R_2^* * TE) T_2^* = 1 / R_2^* 2. Replace infinite values in :math:`T_2^*` map with 500 and NaN values in :math:`S_0` map with 0. 3. Generate limited :math:`T_2^*` and :math:`S_0` maps by doing something. """ if len(data.shape) == 3: n_samp, n_echos, n_vols = data.shape else: n_samp, n_echos = data.shape n_vols = 1 data = data[mask] t2ss = np.zeros([n_samp, n_echos - 1]) s0vs = np.zeros([n_samp, n_echos - 1]) for echo in range(start_echo, n_echos + 1): # perform log linear fit of echo times against MR signal # make DV matrix: samples x (time series * echos) B = np.log((np.abs(data[:, :echo, :]) + 1).reshape(len(data), -1).T) # make IV matrix: intercept/TEs x (time series * echos) x = np.column_stack([np.ones(echo), [-te for te in tes[:echo]]]) X = np.repeat(x, n_vols, axis=0) beta = np.linalg.lstsq(X, B, rcond=None)[0] t2s = 1. / beta[1, :].T s0 = np.exp(beta[0, :]).T t2s[np.isinf(t2s)] = 500. # why 500? s0[np.isnan(s0)] = 0. # why 0? t2ss[..., echo - 2] = np.squeeze(utils.unmask(t2s, mask)) s0vs[..., echo - 2] = np.squeeze(utils.unmask(s0, mask)) # create limited T2* and S0 maps fl = np.zeros([n_samp, len(tes) - 1], dtype=bool) for echo in range(n_echos - 1): fl_ = np.squeeze(fl[..., echo]) fl_[masksum == echo + 2] = True fl[..., echo] = fl_ t2sa = utils.unmask(t2ss[fl], masksum > 1) s0va = utils.unmask(s0vs[fl], masksum > 1) # create full T2* maps with S0 estimation errors t2saf, s0vaf = t2sa.copy(), s0va.copy() t2saf[masksum == 1] = t2ss[masksum == 1, 0] s0vaf[masksum == 1] = s0vs[masksum == 1, 0] return t2sa, s0va, t2ss, s0vs, t2saf, s0vaf
def make_optcom(data, t2s, tes, mask, combmode, verbose=True): """ Optimally combine BOLD data across TEs. Parameters ---------- data : (S x E x T) :obj:`numpy.ndarray` Concatenated BOLD data. t2 : (S,) :obj:`numpy.ndarray` Estimated T2* values. tes : :obj:`numpy.ndarray` Array of TEs, in seconds. mask : (S,) :obj:`numpy.ndarray` Brain mask in 3D array. combmode : :obj:`str` How to combine data. Either 'ste' or 't2s'. verbose : :obj:`bool`, optional Whether to print status updates Returns ------- combined : (S x T) :obj:`numpy.ndarray` Optimally combined data. Notes ----- 1. Estimate voxel- and TE-specific weights based on estimated :math:`T_2^*`: .. math:: w(T_2^*)_n = \\frac{TE_n * exp(\\frac{-TE}\ {T_{2(est)}^*})}{\sum TE_n * exp(\\frac{-TE}{T_{2(est)}^*})} 2. Perform weighted average per voxel and TR across TEs based on weights estimated in the previous step. """ _, _, n_vols = data.shape mdata = data[mask] tes = np.array(tes)[np.newaxis] # (1 x E) array_like if t2s.ndim == 1: msg = 'Optimally combining data with voxel-wise T2 estimates' ft2s = t2s[mask, np.newaxis] else: msg = 'Optimally combining data with voxel- and volume-wise T2 estimates' ft2s = t2s[mask, :, np.newaxis] if verbose: LGR.info(msg) if combmode == 'ste': alpha = mdata.mean(axis=-1) * tes else: alpha = tes * np.exp(-tes / ft2s) if t2s.ndim == 1: alpha = np.tile(alpha[:, :, np.newaxis], (1, 1, n_vols)) else: alpha = np.swapaxes(alpha, 1, 2) ax0_idx, ax2_idx = np.where(np.all(alpha == 0, axis=1)) alpha[ax0_idx, :, ax2_idx] = 1. combined = np.average(mdata, axis=1, weights=alpha) combined = utils.unmask(combined, mask) return combined
def selcomps(seldict, mmix, mask, ref_img, manacc, n_echos, t2s, s0, olevel=2, oversion=99, filecsdata=True, savecsdiag=True, strict_mode=False): """ Labels components in `mmix` Parameters ---------- seldict : :obj:`dict` As output from `fitmodels_direct` mmix : (C x T) array_like Mixing matrix for converting input data to component space, where `C` is components and `T` is the number of volumes in the original data mask : (S,) array_like Boolean mask array ref_img : str or img_like Reference image to dictate how outputs are saved to disk manacc : list Comma-separated list of indices of manually accepted components n_echos : int Number of echos in original data t2s : (S,) array_like s0 : (S,) array_like olevel : int, optional Default: 2 oversion : int, optional Default: 99 filecsdata: bool, optional Default: False savecsdiag: bool, optional Default: True strict_mode: bool, optional Default: False Returns ------- acc : list Indices of accepted (BOLD) components in `mmix` rej : list Indices of rejected (non-BOLD) components in `mmix` midk : list Indices of mid-K (questionable) components in `mmix` ign : list Indices of ignored components in `mmix` """ if filecsdata: import bz2 if seldict is not None: LGR.info('Saving component selection data') with bz2.BZ2File('compseldata.pklbz', 'wb') as csstate_f: pickle.dump(seldict, csstate_f) else: try: with bz2.BZ2File('compseldata.pklbz', 'rb') as csstate_f: seldict = pickle.load(csstate_f) except FileNotFoundError: LGR.warning('Failed to load component selection data') return None # List of components midk = [] ign = [] nc = np.arange(len(seldict['Kappas'])) ncl = np.arange(len(seldict['Kappas'])) # If user has specified components to accept manually if manacc: acc = sorted([int(vv) for vv in manacc.split(',')]) midk = [] rej = sorted(np.setdiff1d(ncl, acc)) return acc, rej, midk, [] # Add string for ign """ Do some tallies for no. of significant voxels """ countsigFS0 = seldict['F_S0_clmaps'].sum(0) countsigFR2 = seldict['F_R2_clmaps'].sum(0) countnoise = np.zeros(len(nc)) """ Make table of dice values """ dice_tbl = np.zeros([nc.shape[0], 2]) for ii in ncl: dice_FR2 = utils.dice( utils.unmask(seldict['Br_clmaps_R2'][:, ii], mask)[t2s != 0], seldict['F_R2_clmaps'][:, ii]) dice_FS0 = utils.dice( utils.unmask(seldict['Br_clmaps_S0'][:, ii], mask)[t2s != 0], seldict['F_S0_clmaps'][:, ii]) dice_tbl[ii, :] = [dice_FR2, dice_FS0] # step 3a here and above dice_tbl[np.isnan(dice_tbl)] = 0 """ Make table of noise gain """ tt_table = np.zeros([len(nc), 4]) counts_FR2_Z = np.zeros([len(nc), 2]) for ii in nc: comp_noise_sel = utils.andb([ np.abs(seldict['Z_maps'][:, ii]) > 1.95, seldict['Z_clmaps'][:, ii] == 0 ]) == 2 countnoise[ii] = np.array(comp_noise_sel, dtype=np.int).sum() noise_FR2_Z_mask = utils.unmask(comp_noise_sel, mask)[t2s != 0] noise_FR2_Z = np.log10( np.unique(seldict['F_R2_maps'][noise_FR2_Z_mask, ii])) signal_FR2_Z_mask = utils.unmask(seldict['Z_clmaps'][:, ii], mask)[t2s != 0] == 1 signal_FR2_Z = np.log10( np.unique(seldict['F_R2_maps'][signal_FR2_Z_mask, ii])) counts_FR2_Z[ii, :] = [len(signal_FR2_Z), len(noise_FR2_Z)] ttest = stats.ttest_ind(signal_FR2_Z, noise_FR2_Z, equal_var=True) # avoid DivideByZero RuntimeWarning if signal_FR2_Z.size > 0 and noise_FR2_Z.size > 0: mwu = stats.norm.ppf( stats.mannwhitneyu(signal_FR2_Z, noise_FR2_Z)[1]) else: mwu = -np.inf tt_table[ii, 0] = np.abs(mwu) * ttest[0] / np.abs(ttest[0]) tt_table[ii, 1] = ttest[1] tt_table[np.isnan(tt_table)] = 0 tt_table[np.isinf(tt_table[:, 0]), 0] = np.percentile(tt_table[~np.isinf(tt_table[:, 0]), 0], 98) # Time series derivative kurtosis mmix_dt = (mmix[:-1] - mmix[1:]) mmix_kurt = stats.kurtosis(mmix_dt) mmix_std = np.std(mmix_dt, axis=0) """ Step 1: Reject anything that's obviously an artifact a. Estimate a null variance """ LGR.debug( 'Rejecting gross artifacts based on Rho/Kappa values and S0/R2 counts') rej = ncl[utils.andb( [seldict['Rhos'] > seldict['Kappas'], countsigFS0 > countsigFR2]) > 0] ncl = np.setdiff1d(ncl, rej) """ Step 2: Compute 3-D spatial FFT of Beta maps to detect high-spatial frequency artifacts """ LGR.debug( 'Computing 3D spatial FFT of beta maps to detect high-spatial frequency artifacts' ) # spatial information is important so for NIFTI we convert back to 3D space if utils.get_dtype(ref_img) == 'NIFTI': dim1 = np.prod(check_niimg(ref_img).shape[:2]) else: dim1 = mask.shape[0] fproj_arr = np.zeros([dim1, len(nc)]) fproj_arr_val = np.zeros([dim1, len(nc)]) spr = [] fdist = [] for ii in nc: # convert data back to 3D array if utils.get_dtype(ref_img) == 'NIFTI': tproj = utils.new_nii_like( ref_img, utils.unmask(seldict['PSC'], mask)[:, ii]).get_data() else: tproj = utils.unmask(seldict['PSC'], mask)[:, ii] fproj = np.fft.fftshift(np.abs(np.fft.rfftn(tproj))) fproj_z = fproj.max(axis=-1) fproj[fproj == fproj.max()] = 0 spr.append(np.array(fproj_z > fproj_z.max() / 4, dtype=np.int).sum()) fproj_arr[:, ii] = stats.rankdata(fproj_z.flatten()) fproj_arr_val[:, ii] = fproj_z.flatten() if utils.get_dtype(ref_img) == 'NIFTI': fprojr = np.array([fproj, fproj[:, :, ::-1]]).max(0) fdist.append( np.max([ utils.fitgaussian(fproj.max(jj))[3:].max() for jj in range(fprojr.ndim) ])) else: fdist = np.load(os.path.join(RESOURCES, 'fdist.npy')) if type(fdist) is not np.ndarray: fdist = np.array(fdist) spr = np.array(spr) # import ipdb; ipdb.set_trace() """ Step 3: Create feature space of component properties """ LGR.debug('Creating feature space of component properties') fdist_pre = fdist.copy() fdist_pre[fdist > np.median(fdist) * 3] = np.median(fdist) * 3 fdist_z = (fdist_pre - np.median(fdist_pre)) / fdist_pre.std() spz = (spr - spr.mean()) / spr.std() Tz = (tt_table[:, 0] - tt_table[:, 0].mean()) / tt_table[:, 0].std() varex_ = np.log(seldict['varex']) Vz = (varex_ - varex_.mean()) / varex_.std() Rz = (seldict['Rhos'] - seldict['Rhos'].mean()) / seldict['Rhos'].std() Ktz = np.log(seldict['Kappas']) / 2 Ktz = (Ktz - Ktz.mean()) / Ktz.std() Rtz = np.log(seldict['Rhos']) / 2 Rtz = (Rtz - Rtz.mean()) / Rtz.std() KRr = stats.zscore(np.log(seldict['Kappas']) / np.log(seldict['Rhos'])) cnz = (countnoise - countnoise.mean()) / countnoise.std() Dz = stats.zscore(np.arctanh(dice_tbl[:, 0] + 0.001)) fz = np.array([Tz, Vz, Ktz, KRr, cnz, Rz, mmix_kurt, fdist_z]) """ Step 3: Make initial guess of where BOLD components are and use DBSCAN to exclude noise components and find a sample set of 'good' components """ LGR.debug('Making initial guess of BOLD components') # epsmap is [index,level of overlap with dicemask, # number of high Rho components] F05, F025, F01 = utils.getfbounds(n_echos) epsmap = [] Rhos_sorted = np.array(sorted(seldict['Rhos']))[::-1] # Make an initial guess as to number of good components based on # consensus of control points across Rhos and Kappas KRcutguesses = [ getelbow_mod(seldict['Rhos']), getelbow_cons(seldict['Rhos']), getelbow_aggr(seldict['Rhos']), getelbow_mod(seldict['Kappas']), getelbow_cons(seldict['Kappas']), getelbow_aggr(seldict['Kappas']) ] Khighelbowval = stats.scoreatpercentile([ getelbow_mod(seldict['Kappas'], val=True), getelbow_cons(seldict['Kappas'], val=True), getelbow_aggr(seldict['Kappas'], val=True) ] + list(utils.getfbounds(n_echos)), 75, interpolation_method='lower') KRcut = np.median(KRcutguesses) # only use exclusive when inclusive is extremely inclusive - double KRcut cond1 = getelbow_cons(seldict['Kappas']) > KRcut * 2 cond2 = getelbow_mod(seldict['Kappas'], val=True) < F01 if cond1 and cond2: Kcut = getelbow_mod(seldict['Kappas'], val=True) else: Kcut = getelbow_cons(seldict['Kappas'], val=True) # only use inclusive when exclusive is extremely exclusive - half KRcut # (remember for Rho inclusive is higher, so want both Kappa and Rho # to defaut to lower) if getelbow_cons(seldict['Rhos']) > KRcut * 2: Rcut = getelbow_mod(seldict['Rhos'], val=True) # for above, consider something like: # min([getelbow_mod(Rhos,True),sorted(Rhos)[::-1][KRguess] ]) else: Rcut = getelbow_cons(seldict['Rhos'], val=True) if Rcut > Kcut: Kcut = Rcut # Rcut should never be higher than Kcut KRelbow = utils.andb([seldict['Kappas'] > Kcut, seldict['Rhos'] < Rcut]) # Make guess of Kundu et al 2011 plus remove high frequencies, # generally high variance, and high variance given low Kappa tt_lim = stats.scoreatpercentile( tt_table[tt_table[:, 0] > 0, 0], 75, interpolation_method='lower') / 3 KRguess = np.setdiff1d( np.setdiff1d(nc[KRelbow == 2], rej), np.union1d( nc[tt_table[:, 0] < tt_lim], np.union1d( np.union1d(nc[spz > 1], nc[Vz > 2]), nc[utils.andb([ seldict['varex'] > 0.5 * sorted(seldict['varex'])[::-1][int(KRcut)], seldict['Kappas'] < 2 * Kcut ]) == 2]))) guessmask = np.zeros(len(nc)) guessmask[KRguess] = 1 # Throw lower-risk bad components out rejB = ncl[utils.andb([ tt_table[ncl, 0] < 0, seldict['varex'][ncl] > np.median(seldict['varex']), ncl > KRcut ]) == 3] rej = np.union1d(rej, rejB) ncl = np.setdiff1d(ncl, rej) LGR.debug('Using DBSCAN to find optimal set of "good" BOLD components') for ii in range(20000): eps = .005 + ii * .005 db = DBSCAN(eps=eps, min_samples=3).fit(fz.T) # it would be great to have descriptive names, here # DBSCAN found at least three non-noisy clusters cond1 = db.labels_.max() > 1 # DBSCAN didn't detect more classes than the total # of components / 6 cond2 = db.labels_.max() < len(nc) / 6 # TODO: confirm if 0 is a special label for DBSCAN # my intuition here is that we're confirming DBSCAN labelled previously # rejected components as noise (i.e., no overlap between `rej` and # labelled DBSCAN components) cond3 = np.intersect1d(rej, nc[db.labels_ == 0]).shape[0] == 0 # DBSCAN labelled less than half of the total components as noisy cond4 = np.array(db.labels_ == -1, dtype=int).sum() / float( len(nc)) < .5 if cond1 and cond2 and cond3 and cond4: epsmap.append([ ii, utils.dice(guessmask, db.labels_ == 0), np.intersect1d( nc[db.labels_ == 0], nc[seldict['Rhos'] > getelbow_mod(Rhos_sorted, val=True)]). shape[0] ]) db = None epsmap = np.array(epsmap) LGR.debug('Found DBSCAN solutions for {}/20000 eps resolutions'.format( len(epsmap))) group0 = [] dbscanfailed = False if len(epsmap) != 0: # Select index that maximizes Dice with guessmask but first # minimizes number of higher Rho components ii = int( epsmap[np.argmax(epsmap[epsmap[:, 2] == np.min(epsmap[:, 2]), 1], 0), 0]) LGR.debug('Component selection tuning: {:.05f}'.format( epsmap[:, 1].max())) db = DBSCAN(eps=.005 + ii * .005, min_samples=3).fit(fz.T) ncl = nc[db.labels_ == 0] ncl = np.setdiff1d(ncl, rej) ncl = np.setdiff1d(ncl, ncl[ncl > len(nc) - len(rej)]) group0 = ncl.copy() group_n1 = nc[db.labels_ == -1] to_clf = np.setdiff1d(nc, np.union1d(ncl, rej)) if len(group0) == 0 or len(group0) < len(KRguess) * .5: dbscanfailed = True LGR.debug('DBSCAN guess failed; using elbow guess method instead') ncl = np.setdiff1d( np.setdiff1d(nc[KRelbow == 2], rej), np.union1d( nc[tt_table[:, 0] < tt_lim], np.union1d( np.union1d(nc[spz > 1], nc[Vz > 2]), nc[utils.andb([ seldict['varex'] > 0.5 * sorted(seldict['varex'])[::-1][int(KRcut)], seldict['Kappas'] < 2 * Kcut ]) == 2]))) group0 = ncl.copy() group_n1 = [] to_clf = np.setdiff1d(nc, np.union1d(group0, rej)) if len(group0) < 2 or (len(group0) < 4 and float(len(rej)) / len(group0) > 3): LGR.warning('Extremely limited reliable BOLD signal space! ' 'Not filtering components beyond BOLD/non-BOLD guesses.') midkfailed = True min_acc = np.array([]) if len(group0) != 0: # For extremes, building in a 20% tolerance toacc_hi = np.setdiff1d( nc[utils.andb([ fdist <= np.max(fdist[group0]), seldict['Rhos'] < F025, Vz > -2 ]) == 3], np.union1d(group0, rej)) min_acc = np.union1d(group0, toacc_hi) to_clf = np.setdiff1d(nc, np.union1d(min_acc, rej)) else: toacc_hi = [] min_acc = [] diagstep_keys = [ 'Rejected components', 'Kappa-Rho cut point', 'Kappa cut point', 'Rho cut point', 'DBSCAN failed to converge', 'Mid-Kappa failed (limited BOLD signal)', 'Kappa-Rho guess', 'min_acc', 'toacc_hi' ] diagstep_vals = [ list(rej), KRcut, Kcut, Rcut, dbscanfailed, midkfailed, list(KRguess), list(min_acc), list(toacc_hi) ] with open('csstepdata.json', 'w') as ofh: json.dump(dict(zip(diagstep_keys, diagstep_vals)), ofh, indent=4, sort_keys=True, default=str) return list(sorted(min_acc)), list(sorted(rej)), [], list( sorted(to_clf)) # Find additional components to reject based on Dice - doing this here # since Dice is a little unstable, need to reference group0 rej_supp = [] dice_rej = False if not dbscanfailed and len(rej) + len(group0) < 0.75 * len(nc): dice_rej = True rej_supp = np.setdiff1d( np.setdiff1d( np.union1d(rej, nc[dice_tbl[nc, 0] <= dice_tbl[nc, 1]]), group0), group_n1) rej = np.union1d(rej, rej_supp) # Temporal features # larger is worse - spike mmix_kurt_z = (mmix_kurt - mmix_kurt[group0].mean()) / mmix_kurt[group0].std() # smaller is worse - drift mmix_std_z = -1 * ( (mmix_std - mmix_std[group0].mean()) / mmix_std[group0].std()) mmix_kurt_z_max = np.max([mmix_kurt_z, mmix_std_z], 0) """ Step 2: Classifiy midk and ignore using separte SVMs for different variance regimes # To render hyperplane: min_x = np.min(spz2);max_x=np.max(spz2) # plotting separating hyperplane ww = clf_.coef_[0] aa = -ww[0] / ww[1] # make sure the next line is long enough xx = np.linspace(min_x - 2, max_x + 2) yy = aa * xx - (clf_.intercept_[0]) / ww[1] plt.plot(xx, yy, '-') """ LGR.debug('Attempting to classify midk components') # Tried getting rid of accepting based on SVM altogether, # now using only rejecting toacc_hi = np.setdiff1d( nc[utils.andb([ fdist <= np.max(fdist[group0]), seldict['Rhos'] < F025, Vz > -2 ]) == 3], np.union1d(group0, rej)) toacc_lo = np.intersect1d( to_clf, nc[utils.andb([ spz < 1, Rz < 0, mmix_kurt_z_max < 5, Dz > -1, Tz > -1, Vz < 0, seldict['Kappas'] >= F025, fdist < 3 * np.percentile(fdist[group0], 98) ]) == 8]) midk_clf, clf_ = do_svm(fproj_arr_val[:, np.union1d(group0, rej)].T, [0] * len(group0) + [1] * len(rej), fproj_arr_val[:, to_clf].T, svmtype=2) midk = np.setdiff1d( to_clf[utils.andb([ midk_clf == 1, seldict['varex'][to_clf] > np.median(seldict['varex'][group0]) ]) == 2], np.union1d(toacc_hi, toacc_lo)) # only use SVM to augment toacc_hi only if toacc_hi isn't already # conflicting with SVM choice if len( np.intersect1d( to_clf[utils.andb([midk_clf == 1, Vz[to_clf] > 0]) == 2], toacc_hi)) == 0: svm_acc_fail = True toacc_hi = np.union1d(toacc_hi, to_clf[midk_clf == 0]) else: svm_acc_fail = False """ Step 3: Compute variance associated with low T2* areas (e.g. draining veins and low T2* areas) # To write out veinmask veinout = np.zeros(t2s.shape) veinout[t2s!=0] = veinmaskf utils.filewrite(veinout, 'veinmaskf', ref_img) veinBout = utils.unmask(veinmaskB, mask) utils.filewrite(veinBout, 'veins50', ref_img) """ LGR.debug( 'Computing variance associated with low T2* areas (e.g., draining veins)' ) tsoc_B_Zcl = np.zeros(seldict['tsoc_B'].shape) tsoc_B_Zcl[seldict['Z_clmaps'] != 0] = np.abs( seldict['tsoc_B'])[seldict['Z_clmaps'] != 0] sig_B = [ stats.scoreatpercentile(tsoc_B_Zcl[tsoc_B_Zcl[:, ii] != 0, ii], 25) if len(tsoc_B_Zcl[tsoc_B_Zcl[:, ii] != 0, ii]) != 0 else 0 for ii in nc ] sig_B = np.abs(seldict['tsoc_B']) > np.tile( sig_B, [seldict['tsoc_B'].shape[0], 1]) veinmask = utils.andb([ t2s < stats.scoreatpercentile( t2s[t2s != 0], 15, interpolation_method='lower'), t2s != 0 ]) == 2 veinmaskf = veinmask[mask] veinR = np.array(sig_B[veinmaskf].sum(0), dtype=float) / sig_B[~veinmaskf].sum(0) veinR[np.isnan(veinR)] = 0 veinc = np.union1d(rej, midk) rej_veinRZ = ((veinR - veinR[veinc].mean()) / veinR[veinc].std())[veinc] rej_veinRZ[rej_veinRZ < 0] = 0 rej_veinRZ[countsigFR2[veinc] > np.array(veinmaskf, dtype=int).sum()] = 0 t2s_lim = [ stats.scoreatpercentile(t2s[t2s != 0], 50, interpolation_method='lower'), stats.scoreatpercentile( t2s[t2s != 0], 80, interpolation_method='lower') / 2 ] phys_var_zs = [] for t2sl_i in range(len(t2s_lim)): t2sl = t2s_lim[t2sl_i] veinW = sig_B[:, veinc] * np.tile(rej_veinRZ, [sig_B.shape[0], 1]) veincand = utils.unmask( utils.andb([ s0[t2s != 0] < np.median(s0[t2s != 0]), t2s[t2s != 0] < t2sl ]) >= 1, t2s != 0)[mask] veinW[~veincand] = 0 invein = veinW.sum( axis=1)[(utils.unmask(veinmaskf, mask) * utils.unmask(veinW.sum(axis=1) > 1, mask))[mask]] minW = 10 * (np.log10(invein).mean()) - 1 * 10**( np.log10(invein).std()) veinmaskB = veinW.sum(axis=1) > minW tsoc_Bp = seldict['tsoc_B'].copy() tsoc_Bp[tsoc_Bp < 0] = 0 vvex = np.array([ (tsoc_Bp[veinmaskB, ii]**2.).sum() / (tsoc_Bp[:, ii]**2.).sum() for ii in nc ]) group0_res = np.intersect1d(KRguess, group0) phys_var_zs.append( (vvex - vvex[group0_res].mean()) / vvex[group0_res].std()) veinBout = utils.unmask(veinmaskB, mask) utils.filewrite(veinBout.astype(float), 'veins_l%i' % t2sl_i, ref_img) # Mask to sample veins phys_var_z = np.array(phys_var_zs).max(0) Vz2 = (varex_ - varex_[group0].mean()) / varex_[group0].std() """ Step 4: Learn joint TE-dependence spatial and temporal models to move remaining artifacts to ignore class """ LGR.debug( 'Learning joint TE-dependence spatial/temporal models to ignore remaining artifacts' ) to_ign = [] minK_ign = np.max([F05, getelbow_cons(seldict['Kappas'], val=True)]) newcest = len(group0) + len( toacc_hi[seldict['Kappas'][toacc_hi] > minK_ign]) phys_art = np.setdiff1d( nc[utils.andb([phys_var_z > 3.5, seldict['Kappas'] < minK_ign]) == 2], group0) rank_diff = stats.rankdata(phys_var_z) - stats.rankdata(seldict['Kappas']) phys_art = np.union1d( np.setdiff1d( nc[utils.andb([phys_var_z > 2, rank_diff > newcest / 2, Vz2 > -1]) == 3], group0), phys_art) # Want to replace field_art with an acf/SVM based approach # instead of a kurtosis/filter one field_art = np.setdiff1d( nc[utils.andb([mmix_kurt_z_max > 5, seldict['Kappas'] < minK_ign]) == 2], group0) field_art = np.union1d( np.setdiff1d( nc[utils.andb([ mmix_kurt_z_max > 2, (stats.rankdata(mmix_kurt_z_max) - stats.rankdata(seldict['Kappas'])) > newcest / 2, Vz2 > 1, seldict['Kappas'] < F01 ]) == 4], group0), field_art) field_art = np.union1d( np.setdiff1d( nc[utils.andb([ mmix_kurt_z_max > 3, Vz2 > 3, seldict['Rhos'] > np.percentile(seldict['Rhos'][group0], 75) ]) == 3], group0), field_art) field_art = np.union1d( np.setdiff1d(nc[utils.andb([mmix_kurt_z_max > 5, Vz2 > 5]) == 2], group0), field_art) misc_art = np.setdiff1d( nc[utils.andb([(stats.rankdata(Vz) - stats.rankdata(Ktz)) > newcest / 2, seldict['Kappas'] < Khighelbowval]) == 2], group0) ign_cand = np.unique(list(field_art) + list(phys_art) + list(misc_art)) midkrej = np.union1d(midk, rej) to_ign = np.setdiff1d(list(ign_cand), midkrej) toacc = np.union1d(toacc_hi, toacc_lo) ncl = np.setdiff1d(np.union1d(ncl, toacc), np.union1d(to_ign, midkrej)) ign = np.setdiff1d(nc, list(ncl) + list(midk) + list(rej)) orphan = np.setdiff1d(nc, list(ncl) + list(to_ign) + list(midk) + list(rej)) # Last ditch effort to save some transient components if not strict_mode: Vz3 = (varex_ - varex_[ncl].mean()) / varex_[ncl].std() ncl = np.union1d( ncl, np.intersect1d( orphan, nc[utils.andb([ seldict['Kappas'] > F05, seldict['Rhos'] < F025, seldict['Kappas'] > seldict['Rhos'], Vz3 <= -1, Vz3 > -3, mmix_kurt_z_max < 2.5 ]) == 6])) ign = np.setdiff1d(nc, list(ncl) + list(midk) + list(rej)) orphan = np.setdiff1d( nc, list(ncl) + list(to_ign) + list(midk) + list(rej)) if savecsdiag: diagstep_keys = [ 'Rejected components', 'Kappa-Rho cut point', 'Kappa cut', 'Rho cut', 'DBSCAN failed to converge', 'Kappa-Rho guess', 'Dice rejected', 'rej_supp', 'to_clf', 'Mid-kappa components', 'svm_acc_fail', 'toacc_hi', 'toacc_lo', 'Field artifacts', 'Physiological artifacts', 'Miscellaneous artifacts', 'ncl', 'Ignored components' ] diagstep_vals = [ list(rej), KRcut.item(), Kcut.item(), Rcut.item(), dbscanfailed, list(KRguess), dice_rej, list(rej_supp), list(to_clf), list(midk), svm_acc_fail, list(toacc_hi), list(toacc_lo), list(field_art), list(phys_art), list(misc_art), list(ncl), list(ign) ] with open('csstepdata.json', 'w') as ofh: json.dump(dict(zip(diagstep_keys, diagstep_vals)), ofh, indent=4, sort_keys=True, default=str) allfz = np.array([Tz, Vz, Ktz, KRr, cnz, Rz, mmix_kurt, fdist_z]) np.savetxt('csdata.txt', allfz) return list(sorted(ncl)), list(sorted(rej)), list(sorted(midk)), list( sorted(ign))
def generate_metrics( data_cat, data_optcom, mixing, adaptive_mask, tes, io_generator, label, metrics=None, ): """Fit TE-dependence and -independence models to components. Parameters ---------- data_cat : (S x E x T) array_like Input data, where `S` is samples, `E` is echos, and `T` is time data_optcom : (S x T) array_like Optimally combined data mixing : (T x C) array_like Mixing matrix for converting input data to component space, where `C` is components and `T` is the same as in `data_cat` adaptive_mask : (S) array_like Array where each value indicates the number of echoes with good signal for that voxel. This mask may be thresholded; for example, with values less than 3 set to 0. For more information on thresholding, see `make_adaptive_mask`. tes : list List of echo times associated with `data_cat`, in milliseconds io_generator : tedana.io.OutputGenerator The output generator object for this workflow label : str in ['ICA', 'PCA'] The label for this metric generation type metrics : list List of metrics to return Returns ------- comptable : (C x X) :obj:`pandas.DataFrame` Component metric table. One row for each component, with a column for each metric. The index is the component number. """ # Load metric dependency tree from json file dependency_config = op.join(utils.get_resource_path(), "config", "metrics.json") dependency_config = io.load_json(dependency_config) if metrics is None: metrics = ["map weight"] RepLGR.info("The following metrics were calculated: {}.".format(", ".join(metrics))) if not (data_cat.shape[0] == data_optcom.shape[0] == adaptive_mask.shape[0]): raise ValueError( "First dimensions (number of samples) of data_cat ({0}), " "data_optcom ({1}), and adaptive_mask ({2}) do not " "match".format(data_cat.shape[0], data_optcom.shape[0], adaptive_mask.shape[0]) ) elif data_cat.shape[1] != len(tes): raise ValueError( "Second dimension of data_cat ({0}) does not match " "number of echoes provided (tes; " "{1})".format(data_cat.shape[1], len(tes)) ) elif not (data_cat.shape[2] == data_optcom.shape[1] == mixing.shape[0]): raise ValueError( "Number of volumes in data_cat ({0}), " "data_optcom ({1}), and mixing ({2}) do not " "match.".format(data_cat.shape[2], data_optcom.shape[1], mixing.shape[0]) ) # Derive mask from thresholded adaptive mask mask = adaptive_mask >= 3 # Apply masks before anything else data_cat = data_cat[mask, ...] data_optcom = data_optcom[mask, :] adaptive_mask = adaptive_mask[mask] # Ensure that echo times are in an array, rather than a list tes = np.asarray(tes) # Get reference image from io_generator ref_img = io_generator.reference_img required_metrics = dependency_resolver( dependency_config["dependencies"], metrics, dependency_config["inputs"], ) # Use copy to avoid changing the original variable outside of this function mixing = mixing.copy() # Generate the component table, which will be filled out, column by column, # throughout this function n_components = mixing.shape[1] comptable = pd.DataFrame(index=np.arange(n_components, dtype=int)) comptable["Component"] = [ io.add_decomp_prefix(comp, prefix=label, max_value=comptable.shape[0]) for comp in comptable.index.values ] # Metric maps # Maps will be stored as arrays in an easily-indexable dictionary metric_maps = {} if "map weight" in required_metrics: LGR.info("Calculating weight maps") metric_maps["map weight"] = dependence.calculate_weights(data_optcom, mixing) signs = determine_signs(metric_maps["map weight"], axis=0) comptable["optimal sign"] = signs metric_maps["map weight"], mixing = flip_components( metric_maps["map weight"], mixing, signs=signs ) if "map optcom betas" in required_metrics: LGR.info("Calculating parameter estimate maps for optimally combined data") metric_maps["map optcom betas"] = dependence.calculate_betas(data_optcom, mixing) if io_generator.verbose: metric_maps["map echo betas"] = dependence.calculate_betas(data_cat, mixing) if "map percent signal change" in required_metrics: LGR.info("Calculating percent signal change maps") # used in kundu v3.2 tree metric_maps["map percent signal change"] = dependence.calculate_psc( data_optcom, metric_maps["map optcom betas"] ) if "map Z" in required_metrics: LGR.info("Calculating z-statistic maps") metric_maps["map Z"] = dependence.calculate_z_maps(metric_maps["map weight"]) if io_generator.verbose: io_generator.save_file( utils.unmask(metric_maps["map Z"] ** 2, mask), label + " component weights img", ) if ("map FT2" in required_metrics) or ("map FS0" in required_metrics): LGR.info("Calculating F-statistic maps") m_T2, m_S0, p_m_T2, p_m_S0 = dependence.calculate_f_maps( data_cat, metric_maps["map Z"], mixing, adaptive_mask, tes ) metric_maps["map FT2"] = m_T2 metric_maps["map FS0"] = m_S0 metric_maps["map predicted T2"] = p_m_T2 metric_maps["map predicted S0"] = p_m_S0 if "map Z clusterized" in required_metrics: LGR.info("Thresholding z-statistic maps") z_thresh = 1.95 metric_maps["map Z clusterized"] = dependence.threshold_map( metric_maps["map Z"], mask, ref_img, z_thresh ) if "map FT2 clusterized" in required_metrics: LGR.info("Calculating T2* F-statistic maps") f_thresh, _, _ = getfbounds(len(tes)) metric_maps["map FT2 clusterized"] = dependence.threshold_map( metric_maps["map FT2"], mask, ref_img, f_thresh ) if "map FS0 clusterized" in required_metrics: LGR.info("Calculating S0 F-statistic maps") f_thresh, _, _ = getfbounds(len(tes)) metric_maps["map FS0 clusterized"] = dependence.threshold_map( metric_maps["map FS0"], mask, ref_img, f_thresh ) # Intermediate metrics if "countsigFT2" in required_metrics: LGR.info("Counting significant voxels in T2* F-statistic maps") comptable["countsigFT2"] = dependence.compute_countsignal( metric_maps["map FT2 clusterized"] ) if "countsigFS0" in required_metrics: LGR.info("Counting significant voxels in S0 F-statistic maps") comptable["countsigFS0"] = dependence.compute_countsignal( metric_maps["map FS0 clusterized"] ) # Back to maps if "map beta T2 clusterized" in required_metrics: LGR.info("Thresholding optimal combination beta maps to match T2* F-statistic maps") metric_maps["map beta T2 clusterized"] = dependence.threshold_to_match( metric_maps["map optcom betas"], comptable["countsigFT2"], mask, ref_img ) if "map beta S0 clusterized" in required_metrics: LGR.info("Thresholding optimal combination beta maps to match S0 F-statistic maps") metric_maps["map beta S0 clusterized"] = dependence.threshold_to_match( metric_maps["map optcom betas"], comptable["countsigFS0"], mask, ref_img ) # Dependence metrics if ("kappa" in required_metrics) or ("rho" in required_metrics): LGR.info("Calculating kappa and rho") comptable["kappa"], comptable["rho"] = dependence.calculate_dependence_metrics( F_T2_maps=metric_maps["map FT2"], F_S0_maps=metric_maps["map FS0"], Z_maps=metric_maps["map Z"], ) # Generic metrics if "variance explained" in required_metrics: LGR.info("Calculating variance explained") comptable["variance explained"] = dependence.calculate_varex( metric_maps["map optcom betas"] ) if "normalized variance explained" in required_metrics: LGR.info("Calculating normalized variance explained") comptable["normalized variance explained"] = dependence.calculate_varex_norm( metric_maps["map weight"] ) # Spatial metrics if "dice_FT2" in required_metrics: LGR.info( "Calculating DSI between thresholded T2* F-statistic and " "optimal combination beta maps" ) comptable["dice_FT2"] = dependence.compute_dice( metric_maps["map beta T2 clusterized"], metric_maps["map FT2 clusterized"], axis=0, ) if "dice_FS0" in required_metrics: LGR.info( "Calculating DSI between thresholded S0 F-statistic and " "optimal combination beta maps" ) comptable["dice_FS0"] = dependence.compute_dice( metric_maps["map beta S0 clusterized"], metric_maps["map FS0 clusterized"], axis=0, ) if "signal-noise_t" in required_metrics: LGR.info("Calculating signal-noise t-statistics") RepLGR.info( "A t-test was performed between the distributions of T2*-model " "F-statistics associated with clusters (i.e., signal) and " "non-cluster voxels (i.e., noise) to generate a t-statistic " "(metric signal-noise_z) and p-value (metric signal-noise_p) " "measuring relative association of the component to signal " "over noise." ) ( comptable["signal-noise_t"], comptable["signal-noise_p"], ) = dependence.compute_signal_minus_noise_t( Z_maps=metric_maps["map Z"], Z_clmaps=metric_maps["map Z clusterized"], F_T2_maps=metric_maps["map FT2"], ) if "signal-noise_z" in required_metrics: LGR.info("Calculating signal-noise z-statistics") RepLGR.info( "A t-test was performed between the distributions of T2*-model " "F-statistics associated with clusters (i.e., signal) and " "non-cluster voxels (i.e., noise) to generate a z-statistic " "(metric signal-noise_z) and p-value (metric signal-noise_p) " "measuring relative association of the component to signal " "over noise." ) ( comptable["signal-noise_z"], comptable["signal-noise_p"], ) = dependence.compute_signal_minus_noise_z( Z_maps=metric_maps["map Z"], Z_clmaps=metric_maps["map Z clusterized"], F_T2_maps=metric_maps["map FT2"], ) if "countnoise" in required_metrics: LGR.info("Counting significant noise voxels from z-statistic maps") RepLGR.info( "The number of significant voxels not from clusters was " "calculated for each component." ) comptable["countnoise"] = dependence.compute_countnoise( metric_maps["map Z"], metric_maps["map Z clusterized"] ) # Composite metrics if "d_table_score" in required_metrics: LGR.info("Calculating decision table score") comptable["d_table_score"] = dependence.generate_decision_table_score( comptable["kappa"], comptable["dice_FT2"], comptable["signal-noise_t"], comptable["countnoise"], comptable["countsigFT2"], ) # Write verbose metrics if needed if io_generator.verbose: write_betas = "map echo betas" in metric_maps write_T2S0 = "map predicted T2" in metric_maps if write_betas: betas = metric_maps["map echo betas"] if write_T2S0: pred_T2_maps = metric_maps["map predicted T2"] pred_S0_maps = metric_maps["map predicted S0"] for i_echo in range(len(tes)): if write_betas: echo_betas = betas[:, i_echo, :] io_generator.save_file( utils.unmask(echo_betas, mask), "echo weight " + label + " map split img", echo=(i_echo + 1), ) if write_T2S0: echo_pred_T2_maps = pred_T2_maps[:, i_echo, :] io_generator.save_file( utils.unmask(echo_pred_T2_maps, mask), "echo T2 " + label + " split img", echo=(i_echo + 1), ) echo_pred_S0_maps = pred_S0_maps[:, i_echo, :] io_generator.save_file( utils.unmask(echo_pred_S0_maps, mask), "echo S0 " + label + " split img", echo=(i_echo + 1), ) # Reorder component table columns based on previous tedana versions # NOTE: Some new columns will be calculated and columns may be reordered during # component selection preferred_order = ( "Component", "kappa", "rho", "variance explained", "normalized variance explained", "estimated normalized variance explained", "countsigFT2", "countsigFS0", "dice_FT2", "dice_FS0", "countnoise", "signal-noise_t", "signal-noise_p", "d_table_score", "kappa ratio", "d_table_score_scrub", "classification", "rationale", ) first_columns = [col for col in preferred_order if col in comptable.columns] other_columns = [col for col in comptable.columns if col not in preferred_order] comptable = comptable[first_columns + other_columns] return comptable
def write_split_ts(data, mmix, mask, comptable, ref_img, suffix=''): """ Splits `data` into denoised / noise / ignored time series and saves to disk Parameters ---------- data : (S x T) array_like Input time series mmix : (C x T) array_like Mixing matrix for converting input data to component space, where `C` is components and `T` is the same as in `data` mask : (S,) array_like Boolean mask array ref_img : :obj:`str` or img_like Reference image to dictate how outputs are saved to disk suffix : :obj:`str`, optional Appended to name of saved files (before extension). Default: '' Returns ------- varexpl : :obj:`float` Percent variance of data explained by extracted + retained components Notes ----- This function writes out several files: ====================== ================================================= Filename Content ====================== ================================================= hik_ts_[suffix].nii High-Kappa time series. midk_ts_[suffix].nii Mid-Kappa time series. low_ts_[suffix].nii Low-Kappa time series. dn_ts_[suffix].nii Denoised time series. ====================== ================================================= """ acc = comptable[comptable.classification == 'accepted'].index.values rej = comptable[comptable.classification == 'rejected'].index.values # mask and de-mean data mdata = data[mask] dmdata = mdata.T - mdata.T.mean(axis=0) # get variance explained by retained components betas = get_coeffs(dmdata.T, mmix, mask=None) varexpl = (1 - ((dmdata.T - betas.dot(mmix.T))**2.).sum() / (dmdata**2.).sum()) * 100 LGR.info('Variance explained by ICA decomposition: {:.02f}%'.format(varexpl)) # create component and de-noised time series and save to files hikts = betas[:, acc].dot(mmix.T[acc, :]) lowkts = betas[:, rej].dot(mmix.T[rej, :]) dnts = data[mask] - lowkts if len(acc) != 0: fout = filewrite(utils.unmask(hikts, mask), 'hik_ts_{0}'.format(suffix), ref_img) LGR.info('Writing high-Kappa time series: {}'.format(op.abspath(fout))) if len(rej) != 0: fout = filewrite(utils.unmask(lowkts, mask), 'lowk_ts_{0}'.format(suffix), ref_img) LGR.info('Writing low-Kappa time series: {}'.format(op.abspath(fout))) fout = filewrite(utils.unmask(dnts, mask), 'dn_ts_{0}'.format(suffix), ref_img) LGR.info('Writing denoised time series: {}'.format(op.abspath(fout))) return varexpl
def fit_loglinear(data_cat, echo_times, adaptive_mask, report=True): """ """ if report: RepLGR.info( "A monoexponential model was fit to the data at each voxel " "using log-linear regression in order to estimate T2* and S0 " "maps. For each voxel, the value from the adaptive mask was " "used to determine which echoes would be used to estimate T2* " "and S0.") n_samp, n_echos, n_vols = data_cat.shape echos_to_run = np.unique(adaptive_mask) if 1 in echos_to_run: echos_to_run = np.sort(np.unique(np.append(echos_to_run, 2))) echos_to_run = echos_to_run[echos_to_run >= 2] t2s_asc_maps = np.zeros([n_samp, len(echos_to_run)]) s0_asc_maps = np.zeros([n_samp, len(echos_to_run)]) echo_masks = np.zeros([n_samp, len(echos_to_run)], dtype=bool) for i_echo, echo_num in enumerate(echos_to_run): if echo_num == 2: voxel_idx = np.where(adaptive_mask <= echo_num)[0] else: voxel_idx = np.where(adaptive_mask == echo_num)[0] # Create echo masks to assign values to limited vs full maps later echo_mask = np.squeeze(echo_masks[..., i_echo]) echo_mask[adaptive_mask == echo_num] = True echo_masks[..., i_echo] = echo_mask # perform log linear fit of echo times against MR signal # make DV matrix: samples x (time series * echos) data_2d = data_cat[voxel_idx, :echo_num, :].reshape( len(voxel_idx), -1).T log_data = np.log(np.abs(data_2d) + 1) # make IV matrix: intercept/TEs x (time series * echos) x = np.column_stack( [np.ones(echo_num), [-te for te in echo_times[:echo_num]]]) X = np.repeat(x, n_vols, axis=0) # Log-linear fit betas = np.linalg.lstsq(X, log_data, rcond=None)[0] t2s = 1. / betas[1, :].T s0 = np.exp(betas[0, :]).T t2s_asc_maps[voxel_idx, i_echo] = t2s s0_asc_maps[voxel_idx, i_echo] = s0 # create limited T2* and S0 maps t2s_limited = utils.unmask(t2s_asc_maps[echo_masks], adaptive_mask > 1) s0_limited = utils.unmask(s0_asc_maps[echo_masks], adaptive_mask > 1) # create full T2* maps with S0 estimation errors t2s_full, s0_full = t2s_limited.copy(), s0_limited.copy() t2s_full[adaptive_mask == 1] = t2s_asc_maps[adaptive_mask == 1, 0] s0_full[adaptive_mask == 1] = s0_asc_maps[adaptive_mask == 1, 0] return t2s_limited, s0_limited, t2s_full, s0_full
def make_optcom(data, tes, mask, t2s=None, combmode='t2s', verbose=True): """ Optimally combine BOLD data across TEs. Parameters ---------- data : (S x E x T) :obj:`numpy.ndarray` Concatenated BOLD data. tes : (E,) :obj:`numpy.ndarray` Array of TEs, in seconds. mask : (S,) :obj:`numpy.ndarray` Brain mask in 3D array. t2s : (S [x T]) :obj:`numpy.ndarray` or None, optional Estimated T2* values. Only required if combmode = 't2s'. Default is None. combmode : {'t2s', 'paid'}, optional How to combine data. Either 'paid' or 't2s'. If 'paid', argument 't2s' is not required. Default is 't2s'. verbose : :obj:`bool`, optional Whether to print status updates. Default is True. Returns ------- combined : (S x T) :obj:`numpy.ndarray` Optimally combined data. Notes ----- 1. Estimate voxel- and TE-specific weights based on estimated :math:`T_2^*`: .. math:: w(T_2^*)_n = \\frac{TE_n * exp(\\frac{-TE}\ {T_{2(est)}^*})}{\\sum TE_n * exp(\\frac{-TE}{T_{2(est)}^*})} 2. Perform weighted average per voxel and TR across TEs based on weights estimated in the previous step. """ if data.ndim != 3: raise ValueError('Input data must be 3D (S x E x T)') if len(tes) != data.shape[1]: raise ValueError('Number of echos provided does not match second ' 'dimension of input data: {0} != ' '{1}'.format(len(tes), data.shape[1])) if mask.ndim != 1: raise ValueError('Mask is not 1D') elif mask.shape[0] != data.shape[0]: raise ValueError('Mask and data do not have same number of ' 'voxels/samples: {0} != {1}'.format( mask.shape[0], data.shape[0])) if combmode not in ['t2s', 'paid']: raise ValueError("Argument 'combmode' must be either 't2s' or 'paid'") elif combmode == 't2s' and t2s is None: raise ValueError("Argument 't2s' must be supplied if 'combmode' is " "set to 't2s'.") elif combmode == 'paid' and t2s is not None: LGR.warning("Argument 't2s' is not required if 'combmode' is 'paid'. " "'t2s' array will not be used.") data = data[mask, :, :] # mask out empty voxels/samples tes = np.array(tes)[np.newaxis, ...] # (1 x E) array_like if combmode == 'paid': LGR.info( 'Optimally combining data with parallel-acquired inhomogeneity ' 'desensitized (PAID) method') combined = _combine_paid(data, tes) else: if t2s.ndim == 1: msg = 'Optimally combining data with voxel-wise T2 estimates' else: msg = ('Optimally combining data with voxel- and volume-wise T2 ' 'estimates') t2s = t2s[mask, ..., np.newaxis] # mask out empty voxels/samples LGR.info(msg) combined = _combine_t2s(data, tes, t2s) combined = unmask(combined, mask) return combined
def fit_decay(data, tes, mask, adaptive_mask, fittype): """ Fit voxel-wise monoexponential decay models to `data` Parameters ---------- data : (S x E [x T]) array_like Multi-echo data array, where `S` is samples, `E` is echos, and `T` is time tes : (E,) :obj:`list` Echo times mask : (S,) array_like Boolean array indicating samples that are consistently (i.e., across time AND echoes) non-zero adaptive_mask : (S,) array_like Valued array indicating number of echos that have sufficient signal in given sample fittype : {loglin, curvefit} The type of model fit to use Returns ------- t2s_limited : (S,) :obj:`numpy.ndarray` Limited T2* map. The limited map only keeps the T2* values for data where there are at least two echos with good signal. s0_limited : (S,) :obj:`numpy.ndarray` Limited S0 map. The limited map only keeps the S0 values for data where there are at least two echos with good signal. t2s_full : (S,) :obj:`numpy.ndarray` Full T2* map. For voxels affected by dropout, with good signal from only one echo, the full map uses the T2* estimate from the first two echoes. s0_full : (S,) :obj:`numpy.ndarray` Full S0 map. For voxels affected by dropout, with good signal from only one echo, the full map uses the S0 estimate from the first two echoes. Notes ----- 1. Fit monoexponential decay function to all values for a given voxel across TRs, per TE, to estimate voxel-wise :math:`S_0` and :math:`T_2^*`: .. math:: S(TE) = S_0 * exp(-R_2^* * TE) T_2^* = 1 / R_2^* 2. Replace infinite values in :math:`T_2^*` map with 500 and NaN values in :math:`S_0` map with 0. 3. Generate limited :math:`T_2^*` and :math:`S_0` maps by doing something. """ if data.shape[1] != len(tes): raise ValueError( 'Second dimension of data ({0}) does not match number ' 'of echoes provided (tes; {1})'.format(data.shape[1], len(tes))) elif not (data.shape[0] == mask.shape[0] == adaptive_mask.shape[0]): raise ValueError('First dimensions (number of samples) of data ({0}), ' 'mask ({1}), and adaptive_mask ({2}) do not ' 'match'.format(data.shape[0], mask.shape[0], adaptive_mask.shape[0])) data = data.copy() if data.ndim == 2: data = data[:, :, None] # Mask the inputs data_masked = data[mask, :, :] adaptive_mask_masked = adaptive_mask[mask] if fittype == 'loglin': t2s_limited, s0_limited, t2s_full, s0_full = fit_loglinear( data_masked, tes, adaptive_mask_masked) elif fittype == 'curvefit': t2s_limited, s0_limited, t2s_full, s0_full = fit_monoexponential( data_masked, tes, adaptive_mask_masked) else: raise ValueError('Unknown fittype option: {}'.format(fittype)) t2s_limited[np.isinf(t2s_limited)] = 500. # why 500? # let's get rid of negative values, but keep zeros where limited != full t2s_limited[(adaptive_mask_masked > 1) & (t2s_limited <= 0)] = 1. t2s_limited = _apply_t2s_floor(t2s_limited, tes) s0_limited[np.isnan(s0_limited)] = 0. # why 0? t2s_full[np.isinf(t2s_full)] = 500. # why 500? t2s_full[t2s_full <= 0] = 1. # let's get rid of negative values! t2s_full = _apply_t2s_floor(t2s_full, tes) s0_full[np.isnan(s0_full)] = 0. # why 0? t2s_limited = utils.unmask(t2s_limited, mask) s0_limited = utils.unmask(s0_limited, mask) t2s_full = utils.unmask(t2s_full, mask) s0_full = utils.unmask(s0_full, mask) return t2s_limited, s0_limited, t2s_full, s0_full
def fit_loglinear(data_cat, echo_times, adaptive_mask, report=True): """Fit monoexponential decay model with log-linear regression. The monoexponential decay function is fitted to all values for a given voxel across TRs, per TE, to estimate voxel-wise :math:`S_0` and :math:`T_2^*`. At a given voxel, only those echoes with "good signal", as indicated by the value of the voxel in the adaptive mask, are used. Therefore, for a voxel with an adaptive mask value of five, the first five echoes would be used to estimate T2* and S0. Parameters ---------- data_cat : (S x E x T) :obj:`numpy.ndarray` Multi-echo data. S is samples, E is echoes, and T is timepoints. echo_times : (E,) array_like Echo times in milliseconds. adaptive_mask : (S,) :obj:`numpy.ndarray` Array where each value indicates the number of echoes with good signal for that voxel. report : :obj:`bool`, optional Whether to log a description of this step or not. Default is True. Returns ------- t2s_limited, s0_limited, t2s_full, s0_full: (S,) :obj:`numpy.ndarray` T2* and S0 estimate maps. Notes ----- The approach used in this function involves transforming the raw signal values (:math:`log(|data| + 1)`) and then fitting a line to the transformed data using ordinary least squares. This results in two parameter estimates: one for the slope and one for the intercept. The slope estimate is inverted (i.e., 1 / slope) to get :math:`T_2^*`, while the intercept estimate is exponentiated (i.e., e^intercept) to get :math:`S_0`. This method is faster, but less accurate, than the nonlinear approach. """ if report: RepLGR.info( "A monoexponential model was fit to the data at each voxel " "using log-linear regression in order to estimate T2* and S0 " "maps. For each voxel, the value from the adaptive mask was " "used to determine which echoes would be used to estimate T2* " "and S0.") n_samp, n_echos, n_vols = data_cat.shape echos_to_run = np.unique(adaptive_mask) if 1 in echos_to_run: echos_to_run = np.sort(np.unique(np.append(echos_to_run, 2))) echos_to_run = echos_to_run[echos_to_run >= 2] t2s_asc_maps = np.zeros([n_samp, len(echos_to_run)]) s0_asc_maps = np.zeros([n_samp, len(echos_to_run)]) echo_masks = np.zeros([n_samp, len(echos_to_run)], dtype=bool) for i_echo, echo_num in enumerate(echos_to_run): if echo_num == 2: voxel_idx = np.where(adaptive_mask <= echo_num)[0] else: voxel_idx = np.where(adaptive_mask == echo_num)[0] # Create echo masks to assign values to limited vs full maps later echo_mask = np.squeeze(echo_masks[..., i_echo]) echo_mask[adaptive_mask == echo_num] = True echo_masks[..., i_echo] = echo_mask # perform log linear fit of echo times against MR signal # make DV matrix: samples x (time series * echos) data_2d = data_cat[voxel_idx, :echo_num, :].reshape( len(voxel_idx), -1).T log_data = np.log(np.abs(data_2d) + 1) # make IV matrix: intercept/TEs x (time series * echos) x = np.column_stack( [np.ones(echo_num), [-te for te in echo_times[:echo_num]]]) X = np.repeat(x, n_vols, axis=0) # Log-linear fit betas = np.linalg.lstsq(X, log_data, rcond=None)[0] t2s = 1. / betas[1, :].T s0 = np.exp(betas[0, :]).T t2s_asc_maps[voxel_idx, i_echo] = t2s s0_asc_maps[voxel_idx, i_echo] = s0 # create limited T2* and S0 maps t2s_limited = utils.unmask(t2s_asc_maps[echo_masks], adaptive_mask > 1) s0_limited = utils.unmask(s0_asc_maps[echo_masks], adaptive_mask > 1) # create full T2* maps with S0 estimation errors t2s_full, s0_full = t2s_limited.copy(), s0_limited.copy() t2s_full[adaptive_mask == 1] = t2s_asc_maps[adaptive_mask == 1, 0] s0_full[adaptive_mask == 1] = s0_asc_maps[adaptive_mask == 1, 0] return t2s_limited, s0_limited, t2s_full, s0_full
def tedana_workflow(data, tes, mask=None, mixm=None, ctab=None, manacc=None, tedort=False, gscontrol=None, tedpca='mle', source_tes=-1, combmode='t2s', verbose=False, stabilize=False, out_dir='.', fixed_seed=42, maxit=500, maxrestart=10, debug=False, quiet=False, png=False, png_cmap='coolwarm'): """ Run the "canonical" TE-Dependent ANAlysis workflow. Parameters ---------- data : :obj:`str` or :obj:`list` of :obj:`str` Either a single z-concatenated file (single-entry list or str) or a list of echo-specific files, in ascending order. tes : :obj:`list` List of echo times associated with data in milliseconds. mask : :obj:`str`, optional Binary mask of voxels to include in TE Dependent ANAlysis. Must be spatially aligned with `data`. If an explicit mask is not provided, then Nilearn's compute_epi_mask function will be used to derive a mask from the first echo's data. mixm : :obj:`str`, optional File containing mixing matrix. If not provided, ME-PCA and ME-ICA are done. ctab : :obj:`str`, optional File containing component table from which to extract pre-computed classifications. manacc : :obj:`list`, :obj:`str`, or None, optional List of manually accepted components. Can be a list of the components, a comma-separated string with component numbers, or None. Default is None. tedort : :obj:`bool`, optional Orthogonalize rejected components w.r.t. accepted ones prior to denoising. Default is False. gscontrol : {None, 't1c', 'gsr'} or :obj:`list`, optional Perform additional denoising to remove spatially diffuse noise. Default is None. tedpca : {'mle', 'kundu', 'kundu-stabilize'}, optional Method with which to select components in TEDPCA. Default is 'mle'. source_tes : :obj:`int`, optional Source TEs for models. 0 for all, -1 for optimal combination. Default is -1. combmode : {'t2s'}, optional Combination scheme for TEs: 't2s' (Posse 1999, default). verbose : :obj:`bool`, optional Generate intermediate and additional files. Default is False. png : obj:'bool', optional Generate simple plots and figures. Default is false. png_cmap : obj:'str', optional Name of a matplotlib colormap to be used when generating figures. --png must still be used to request figures. Default is 'coolwarm' out_dir : :obj:`str`, optional Output directory. Other Parameters ---------------- fixed_seed : :obj:`int`, optional Value passed to ``mdp.numx_rand.seed()``. Set to a positive integer value for reproducible ICA results; otherwise, set to -1 for varying results across calls. maxit : :obj:`int`, optional Maximum number of iterations for ICA. Default is 500. maxrestart : :obj:`int`, optional Maximum number of attempts for ICA. If ICA fails to converge, the fixed seed will be updated and ICA will be run again. If convergence is achieved before maxrestart attempts, ICA will finish early. Default is 10. debug : :obj:`bool`, optional Whether to run in debugging mode or not. Default is False. quiet : :obj:`bool`, optional If True, suppresses logging/printing of messages. Default is False. Notes ----- This workflow writes out several files. For a complete list of the files generated by this workflow, please visit https://tedana.readthedocs.io/en/latest/outputs.html """ out_dir = op.abspath(out_dir) if not op.isdir(out_dir): os.mkdir(out_dir) if debug and not quiet: # ensure old logs aren't over-written basename = 'tedana_run' extension = 'txt' logname = op.join(out_dir, (basename + '.' + extension)) logex = op.join(out_dir, (basename + '*')) previouslogs = glob.glob(logex) previouslogs.sort(reverse=True) for f in previouslogs: previousparts = op.splitext(f) newname = previousparts[0] + '_old' + previousparts[1] os.rename(f, newname) # set logging format formatter = logging.Formatter( '%(asctime)s\t%(name)-12s\t%(levelname)-8s\t%(message)s', datefmt='%Y-%m-%dT%H:%M:%S') # set up logging file and open it for writing fh = logging.FileHandler(logname) fh.setFormatter(formatter) logging.basicConfig(level=logging.DEBUG, handlers=[fh, logging.StreamHandler()]) elif quiet: logging.basicConfig(level=logging.WARNING) else: logging.basicConfig(level=logging.INFO) LGR.info('Using output directory: {}'.format(out_dir)) # ensure tes are in appropriate format tes = [float(te) for te in tes] n_echos = len(tes) # Coerce gscontrol to list if not isinstance(gscontrol, list): gscontrol = [gscontrol] # coerce data to samples x echos x time array if isinstance(data, str): data = [data] LGR.info('Loading input data: {}'.format([f for f in data])) catd, ref_img = io.load_data(data, n_echos=n_echos) n_samp, n_echos, n_vols = catd.shape LGR.debug('Resulting data shape: {}'.format(catd.shape)) if mixm is not None and op.isfile(mixm): mixm = op.abspath(mixm) # Allow users to re-run on same folder if mixm != op.join(out_dir, 'meica_mix.1D'): shutil.copyfile(mixm, op.join(out_dir, 'meica_mix.1D')) shutil.copyfile(mixm, op.join(out_dir, op.basename(mixm))) elif mixm is not None: raise IOError('Argument "mixm" must be an existing file.') if ctab is not None and op.isfile(ctab): ctab = op.abspath(ctab) # Allow users to re-run on same folder if ctab != op.join(out_dir, 'comp_table_ica.txt'): shutil.copyfile(ctab, op.join(out_dir, 'comp_table_ica.txt')) shutil.copyfile(ctab, op.join(out_dir, op.basename(ctab))) elif ctab is not None: raise IOError('Argument "ctab" must be an existing file.') if isinstance(manacc, str): manacc = [int(comp) for comp in manacc.split(',')] if ctab and not mixm: LGR.warning('Argument "ctab" requires argument "mixm".') ctab = None elif ctab and (manacc is None): LGR.warning('Argument "ctab" requires argument "manacc".') ctab = None elif manacc is not None and not mixm: LGR.warning('Argument "manacc" requires argument "mixm".') manacc = None if mask is None: LGR.info('Computing EPI mask from first echo') first_echo_img = io.new_nii_like(ref_img, catd[:, 0, :]) mask = compute_epi_mask(first_echo_img) else: # TODO: add affine check LGR.info('Using user-defined mask') mask, masksum = utils.make_adaptive_mask(catd, mask=mask, getsum=True) LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp)) if verbose: io.filewrite(masksum, op.join(out_dir, 'adaptive_mask.nii'), ref_img) os.chdir(out_dir) LGR.info('Computing T2* map') t2s, s0, t2ss, s0s, t2sG, s0G = decay.fit_decay(catd, tes, mask, masksum) # set a hard cap for the T2* map # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile cap_t2s = stats.scoreatpercentile(t2s.flatten(), 99.5, interpolation_method='lower') LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10)) t2s[t2s > cap_t2s * 10] = cap_t2s io.filewrite(t2s, op.join(out_dir, 't2sv.nii'), ref_img) io.filewrite(s0, op.join(out_dir, 's0v.nii'), ref_img) if verbose: io.filewrite(t2ss, op.join(out_dir, 't2ss.nii'), ref_img) io.filewrite(s0s, op.join(out_dir, 's0vs.nii'), ref_img) io.filewrite(t2sG, op.join(out_dir, 't2svG.nii'), ref_img) io.filewrite(s0G, op.join(out_dir, 's0vG.nii'), ref_img) # optimally combine data data_oc = combine.make_optcom(catd, tes, mask, t2s=t2sG, combmode=combmode) # regress out global signal unless explicitly not desired if 'gsr' in gscontrol: catd, data_oc = gsc.gscontrol_raw(catd, data_oc, n_echos, ref_img) if mixm is None: # Identify and remove thermal noise from data dd, n_components = decomposition.tedpca(catd, data_oc, combmode, mask, t2s, t2sG, ref_img, tes=tes, algorithm=tedpca, source_tes=source_tes, kdaw=10., rdaw=1., out_dir=out_dir, verbose=verbose) mmix_orig = decomposition.tedica(dd, n_components, fixed_seed, maxit, maxrestart) if verbose: np.savetxt(op.join(out_dir, '__meica_mix.1D'), mmix_orig) if source_tes == -1: io.filewrite(utils.unmask(dd, mask), op.join(out_dir, 'ts_OC_whitened.nii'), ref_img) LGR.info('Making second component selection guess from ICA results') # Estimate betas and compute selection metrics for mixing matrix # generated from dimensionally reduced data using full data (i.e., data # with thermal noise) comptable, metric_maps, betas, mmix = model.dependence_metrics( catd, data_oc, mmix_orig, mask, t2s, tes, ref_img, reindex=True, label='meica_', out_dir=out_dir, algorithm='kundu_v2', verbose=verbose) np.savetxt(op.join(out_dir, 'meica_mix.1D'), mmix) comptable = model.kundu_metrics(comptable, metric_maps) comptable = selection.kundu_selection_v2(comptable, n_echos, n_vols) else: LGR.info('Using supplied mixing matrix from ICA') mmix_orig = np.loadtxt(op.join(out_dir, 'meica_mix.1D')) comptable, metric_maps, betas, mmix = model.dependence_metrics( catd, data_oc, mmix_orig, mask, t2s, tes, ref_img, label='meica_', out_dir=out_dir, algorithm='kundu_v2', verbose=verbose) if ctab is None: comptable = model.kundu_metrics(comptable, metric_maps) comptable = selection.kundu_selection_v2(comptable, n_echos, n_vols) else: comptable = pd.read_csv(ctab, sep='\t', index_col='component') comptable = selection.manual_selection(comptable, acc=manacc) comptable.to_csv(op.join(out_dir, 'comp_table_ica.txt'), sep='\t', index=True, index_label='component', float_format='%.6f') if comptable[comptable.classification == 'accepted'].shape[0] == 0: LGR.warning('No BOLD components detected! Please check data and ' 'results!') mmix_orig = mmix.copy() if tedort: acc_idx = comptable.loc[ ~comptable.classification.str.contains('rejected')].index.values rej_idx = comptable.loc[ comptable.classification.str.contains('rejected')].index.values acc_ts = mmix[:, acc_idx] rej_ts = mmix[:, rej_idx] betas = np.linalg.lstsq(acc_ts, rej_ts, rcond=None)[0] pred_rej_ts = np.dot(acc_ts, betas) resid = rej_ts - pred_rej_ts mmix[:, rej_idx] = resid np.savetxt(op.join(out_dir, 'meica_mix_orth.1D'), mmix) io.writeresults(data_oc, mask=mask, comptable=comptable, mmix=mmix, n_vols=n_vols, ref_img=ref_img) if 't1c' in gscontrol: LGR.info('Performing T1c global signal regression to remove spatially ' 'diffuse noise') gsc.gscontrol_mmix(data_oc, mmix, mask, comptable, ref_img) if verbose: io.writeresults_echoes(catd, mmix, mask, comptable, ref_img) if png: LGR.info('Making figures folder with static component maps and ' 'timecourse plots.') # make figure folder first if not op.isdir(op.join(out_dir, 'figures')): os.mkdir(op.join(out_dir, 'figures')) viz.write_comp_figs(data_oc, mask=mask, comptable=comptable, mmix=mmix_orig, ref_img=ref_img, out_dir=op.join(out_dir, 'figures'), png_cmap=png_cmap) LGR.info('Making Kappa vs Rho scatter plot') viz.write_kappa_scatter(comptable=comptable, out_dir=op.join(out_dir, 'figures')) LGR.info('Making overall summary figure') viz.write_summary_fig(comptable=comptable, out_dir=op.join(out_dir, 'figures')) LGR.info('Workflow completed') for handler in logging.root.handlers[:]: logging.root.removeHandler(handler)
def fit_monoexponential(data_cat, echo_times, adaptive_mask, report=True): """ Fit monoexponential decay model with nonlinear curve-fitting. Parameters ---------- data_cat : (S x E x T) :obj:`numpy.ndarray` Multi-echo data. echo_times : (E,) array_like Echo times in milliseconds. adaptive_mask : (S,) :obj:`numpy.ndarray` Array where each value indicates the number of echoes with good signal for that voxel. report : bool, optional Whether to log a description of this step or not. Default is True. Returns ------- t2s_limited, s0_limited, t2s_full, s0_full : (S,) :obj:`numpy.ndarray` T2* and S0 estimate maps. Notes ----- This method is slower, but more accurate, than the log-linear approach. """ if report: RepLGR.info( "A monoexponential model was fit to the data at each voxel " "using nonlinear model fitting in order to estimate T2* and S0 " "maps, using T2*/S0 estimates from a log-linear fit as " "initial values. For each voxel, the value from the adaptive " "mask was used to determine which echoes would be used to " "estimate T2* and S0. In cases of model fit failure, T2*/S0 " "estimates from the log-linear fit were retained instead.") n_samp, n_echos, n_vols = data_cat.shape # Currently unused # fit_data = np.mean(data_cat, axis=2) # fit_sigma = np.std(data_cat, axis=2) t2s_limited, s0_limited, t2s_full, s0_full = fit_loglinear(data_cat, echo_times, adaptive_mask, report=False) echos_to_run = np.unique(adaptive_mask) if 1 in echos_to_run: echos_to_run = np.sort(np.unique(np.append(echos_to_run, 2))) echos_to_run = echos_to_run[echos_to_run >= 2] t2s_asc_maps = np.zeros([n_samp, len(echos_to_run)]) s0_asc_maps = np.zeros([n_samp, len(echos_to_run)]) echo_masks = np.zeros([n_samp, len(echos_to_run)], dtype=bool) for i_echo, echo_num in enumerate(echos_to_run): if echo_num == 2: voxel_idx = np.where(adaptive_mask <= echo_num)[0] else: voxel_idx = np.where(adaptive_mask == echo_num)[0] # Create echo masks to assign values to limited vs full maps later echo_mask = np.squeeze(echo_masks[..., i_echo]) echo_mask[adaptive_mask == echo_num] = True echo_masks[..., i_echo] = echo_mask data_2d = data_cat[:, :echo_num, :].reshape(len(data_cat), -1).T echo_times_1d = np.repeat(echo_times[:echo_num], n_vols) # perform a monoexponential fit of echo times against MR signal # using loglin estimates as initial starting points for fit fail_count = 0 for voxel in voxel_idx: try: popt, cov = scipy.optimize.curve_fit( monoexponential, echo_times_1d, data_2d[:, voxel], p0=(s0_full[voxel], t2s_full[voxel]), bounds=((np.min(data_2d[:, voxel]), 0), (np.inf, np.inf))) s0_full[voxel] = popt[0] t2s_full[voxel] = popt[1] except (RuntimeError, ValueError): # If curve_fit fails to converge, fall back to loglinear estimate fail_count += 1 if fail_count: fail_percent = 100 * fail_count / len(voxel_idx) LGR.debug('With {0} echoes, monoexponential fit failed on {1}/{2} ' '({3:.2f}%) voxel(s), used log linear estimate ' 'instead'.format(echo_num, fail_count, len(voxel_idx), fail_percent)) t2s_asc_maps[:, i_echo] = t2s_full s0_asc_maps[:, i_echo] = s0_full # create limited T2* and S0 maps t2s_limited = utils.unmask(t2s_asc_maps[echo_masks], adaptive_mask > 1) s0_limited = utils.unmask(s0_asc_maps[echo_masks], adaptive_mask > 1) # create full T2* maps with S0 estimation errors t2s_full, s0_full = t2s_limited.copy(), s0_limited.copy() t2s_full[adaptive_mask == 1] = t2s_asc_maps[adaptive_mask == 1, 0] s0_full[adaptive_mask == 1] = s0_asc_maps[adaptive_mask == 1, 0] return t2s_limited, s0_limited, t2s_full, s0_full
def gscontrol_raw(catd, optcom, n_echos, ref_img, dtrank=4): """ Removes global signal from individual echo `catd` and `optcom` time series This function uses the spatial global signal estimation approach to to removal global signal out of individual echo time series datasets. The spatial global signal is estimated from the optimally combined data after detrending with a Legendre polynomial basis of `order = 0` and `degree = dtrank`. Parameters ---------- catd : (S x E x T) array_like Input functional data optcom : (S x T) array_like Optimally-combined functional data (i.e., the output of `make_optcom`) n_echos : int Number of echos in data. Should be the same as `E` dimension of `catd` ref_img : str or img_like Reference image to dictate how outputs are saved to disk dtrank : int, optional Specfies degree of Legendre polynomial basis function for estimating spatial global signal. Default: 4 Returns ------- dm_catd : (S x E x T) array_like Input `catd` with global signal removed from time series dm_optcom : (S x T) array_like Input `optcom` with global signal removed from time series """ LGR.info('Applying amplitude-based T1 equilibration correction') # Legendre polynomial basis for denoising bounds = np.linspace(-1, 1, optcom.shape[-1]) Lmix = np.column_stack([lpmv(0, vv, bounds) for vv in range(dtrank)]) # compute mean, std, mask local to this function # inefficient, but makes this function a bit more modular Gmu = optcom.mean(axis=-1) # temporal mean Gmask = Gmu != 0 # find spatial global signal dat = optcom[Gmask] - Gmu[Gmask][:, np.newaxis] sol = np.linalg.lstsq(Lmix, dat.T, rcond=None)[0] # Legendre basis for detrending detr = dat - np.dot(sol.T, Lmix.T)[0] sphis = (detr).min(axis=1) sphis -= sphis.mean() utils.filewrite(utils.unmask(sphis, Gmask), 'T1gs', ref_img) # find time course ofc the spatial global signal # make basis with the Legendre basis glsig = np.linalg.lstsq(np.atleast_2d(sphis).T, dat, rcond=None)[0] glsig = stats.zscore(glsig, axis=None) np.savetxt('glsig.1D', glsig) glbase = np.hstack([Lmix, glsig.T]) # Project global signal out of optimally combined data sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T, rcond=None)[0] tsoc_nogs = dat - np.dot(np.atleast_2d(sol[dtrank]).T, np.atleast_2d(glbase.T[dtrank])) + Gmu[Gmask][:, np.newaxis] utils.filewrite(optcom, 'tsoc_orig', ref_img) dm_optcom = utils.unmask(tsoc_nogs, Gmask) utils.filewrite(dm_optcom, 'tsoc_nogs', ref_img) # Project glbase out of each echo dm_catd = catd.copy() # don't overwrite catd for echo in range(n_echos): dat = dm_catd[:, echo, :][Gmask] sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T, rcond=None)[0] e_nogs = dat - np.dot(np.atleast_2d(sol[dtrank]).T, np.atleast_2d(glbase.T[dtrank])) dm_catd[:, echo, :] = utils.unmask(e_nogs, Gmask) return dm_catd, dm_optcom