Example #1
0
def test_smoke_filewrite():
    """
    Ensures that filewrite writes out a neuroimage with random input, since there is no name, remove the image named .nii
    """
    n_samples, n_times, n_components = 64350, 10, 6
    data_1d = np.random.random((n_samples))
    data_2d = np.random.random((n_samples, n_times))
    filename = ""
    ref_img = os.path.join(data_dir, 'mask.nii.gz')

    assert me.filewrite(data_1d, filename, ref_img) is not None
    assert me.filewrite(data_2d, filename, ref_img) is not None

    try: 
        os.remove(".nii")
    except OSError: 
        print(".nii not generated")
Example #2
0
def t2smap_workflow(data, tes, mask=None, fitmode='all', combmode='t2s',
                    label=None, debug=False, fittype='loglin', quiet=False):
    """
    Estimate T2 and S0, and optimally combine data across TEs.

    Parameters
    ----------
    data : :obj:`str` or :obj:`list` of :obj:`str`
        Either a single z-concatenated file (single-entry list or str) or a
        list of echo-specific files, in ascending order.
    tes : :obj:`list`
        List of echo times associated with data in milliseconds.
    mask : :obj:`str`, optional
        Binary mask of voxels to include in TE Dependent ANAlysis. Must be spatially
        aligned with `data`.
    fitmode : {'all', 'ts'}, optional
        Monoexponential model fitting scheme.
        'all' means that the model is fit, per voxel, across all timepoints.
        'ts' means that the model is fit, per voxel and per timepoint.
        Default is 'all'.
    combmode : {'t2s', 'paid'}, optional
        Combination scheme for TEs: 't2s' (Posse 1999, default), 'paid' (Poser).
    label : :obj:`str` or :obj:`None`, optional
        Label for output directory. Default is None.
    fittype : {'loglin', 'curvefit'}, optional
        Monoexponential fitting method.
        'loglin' means to use the the default linear fit to the log of
        the data.
        'curvefit' means to use a monoexponential fit to the raw data,
        which is slightly slower but may be more accurate.

    Other Parameters
    ----------------
    debug : :obj:`bool`, optional
        Whether to run in debugging mode or not. Default is False.
    quiet : :obj:`bool`, optional
        If True, suppresses logging/printing of messages. Default is False.


    Notes
    -----
    This workflow writes out several files, which are written out to a folder
    named TED.[ref_label].[label] if ``label`` is provided and TED.[ref_label]
    if not. ``ref_label`` is determined based on the name of the first ``data``
    file.

    Files are listed below:

    ======================    =================================================
    Filename                  Content
    ======================    =================================================
    t2sv.nii                  Limited estimated T2* 3D map or 4D timeseries.
                              Will be a 3D map if ``fitmode`` is 'all' and a
                              4D timeseries if it is 'ts'.
    s0v.nii                   Limited S0 3D map or 4D timeseries.
    t2svG.nii                 Full T2* map/timeseries. The difference between
                              the limited and full maps is that, for voxels
                              affected by dropout where only one echo contains
                              good data, the full map uses the single echo's
                              value while the limited map has a NaN.
    s0vG.nii                  Full S0 map/timeseries.
    ts_OC.nii                 Optimally combined timeseries.
    ======================    =================================================
    """
    # ensure tes are in appropriate format
    tes = [float(te) for te in tes]
    n_echos = len(tes)

    # coerce data to samples x echos x time array
    if isinstance(data, str):
        data = [data]

    LGR.info('Loading input data: {}'.format([f for f in data]))
    catd, ref_img = io.load_data(data, n_echos=n_echos)
    n_samp, n_echos, n_vols = catd.shape
    LGR.debug('Resulting data shape: {}'.format(catd.shape))

    try:
        ref_label = op.basename(ref_img).split('.')[0]
    except (TypeError, AttributeError):
        ref_label = op.basename(str(data[0])).split('.')[0]

    if label is not None:
        out_dir = 'TED.{0}.{1}'.format(ref_label, label)
    else:
        out_dir = 'TED.{0}'.format(ref_label)
    out_dir = op.abspath(out_dir)
    if not op.isdir(out_dir):
        LGR.info('Creating output directory: {}'.format(out_dir))
        os.mkdir(out_dir)
    else:
        LGR.info('Using output directory: {}'.format(out_dir))

    if mask is None:
        LGR.info('Computing adaptive mask')
    else:
        LGR.info('Using user-defined mask')
    mask, masksum = utils.make_adaptive_mask(catd, getsum=True)

    LGR.info('Computing adaptive T2* map')
    if fitmode == 'all':
        (t2s_limited, s0_limited,
         t2ss, s0s,
         t2s_full, s0_full) = decay.fit_decay(catd, tes, mask, masksum,
                                              fittype)
    else:
        (t2s_limited, s0_limited,
         t2s_full, s0_full) = decay.fit_decay_ts(catd, tes, mask, masksum,
                                                 fittype)

    # set a hard cap for the T2* map/timeseries
    # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile
    cap_t2s = stats.scoreatpercentile(t2s_limited.flatten(), 99.5,
                                      interpolation_method='lower')
    LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10))
    t2s_limited[t2s_limited > cap_t2s * 10] = cap_t2s

    LGR.info('Computing optimal combination')
    # optimally combine data
    OCcatd = combine.make_optcom(catd, tes, mask, t2s=t2s_full,
                                 combmode=combmode)

    # clean up numerical errors
    for arr in (OCcatd, s0_limited, t2s_limited):
        np.nan_to_num(arr, copy=False)

    s0_limited[s0_limited < 0] = 0
    t2s_limited[t2s_limited < 0] = 0

    io.filewrite(t2s_limited, op.join(out_dir, 't2sv.nii'), ref_img)
    io.filewrite(s0_limited, op.join(out_dir, 's0v.nii'), ref_img)
    io.filewrite(t2s_full, op.join(out_dir, 't2svG.nii'), ref_img)
    io.filewrite(s0_full, op.join(out_dir, 's0vG.nii'), ref_img)
    io.filewrite(OCcatd, op.join(out_dir, 'ts_OC.nii'), ref_img)
Example #3
0
def tedpca(catd,
           OCcatd,
           combmode,
           mask,
           t2s,
           t2sG,
           stabilize,
           ref_img,
           tes,
           kdaw,
           rdaw,
           ste=0,
           wvpca=False):
    """
    Use principal components analysis (PCA) to identify and remove thermal
    noise from multi-echo data.

    Parameters
    ----------
    catd : (S x E x T) array_like
        Input functional data
    OCcatd : (S x T) array_like
        Optimally-combined time series data
    combmode : {'t2s', 'ste'} str
        How optimal combination of echos should be made, where 't2s' indicates
        using the method of Posse 1999 and 'ste' indicates using the method of
        Poser 2006
    mask : (S,) array_like
        Boolean mask array
    stabilize : :obj:`bool`
        Whether to attempt to stabilize convergence of ICA by returning
        dimensionally-reduced data from PCA and component selection.
    ref_img : :obj:`str` or img_like
        Reference image to dictate how outputs are saved to disk
    tes : :obj:`list`
        List of echo times associated with `catd`, in milliseconds
    kdaw : :obj:`float`
        Dimensionality augmentation weight for Kappa calculations
    rdaw : :obj:`float`
        Dimensionality augmentation weight for Rho calculations
    ste : :obj:`int` or :obj:`list` of :obj:`int`, optional
        Which echos to use in PCA. Values -1 and 0 are special, where a value
        of -1 will indicate using all the echos and 0 will indicate using the
        optimal combination of the echos. A list can be provided to indicate
        a subset of echos. Default: 0
    wvpca : :obj:`bool`, optional
        Whether to apply wavelet denoising to data. Default: False

    Returns
    -------
    n_components : :obj:`int`
        Number of components retained from PCA decomposition
    dd : (S x T) :obj:`numpy.ndarray`
        Dimensionally reduced optimally combined functional data

    Notes
    -----
    ======================    =================================================
    Notation                  Meaning
    ======================    =================================================
    :math:`\\kappa`            Component pseudo-F statistic for TE-dependent
                              (BOLD) model.
    :math:`\\rho`              Component pseudo-F statistic for TE-independent
                              (artifact) model.
    :math:`v`                 Voxel
    :math:`V`                 Total number of voxels in mask
    :math:`\\zeta`             Something
    :math:`c`                 Component
    :math:`p`                 Something else
    ======================    =================================================

    Steps:

    1.  Variance normalize either multi-echo or optimally combined data,
        depending on settings.
    2.  Decompose normalized data using PCA or SVD.
    3.  Compute :math:`{\\kappa}` and :math:`{\\rho}`:

            .. math::
                {\\kappa}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,R_2^*}}{\sum {\\zeta}_{c,v}^p}

                {\\rho}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,S_0}}{\sum {\\zeta}_{c,v}^p}

    4.  Some other stuff. Something about elbows.
    5.  Classify components as thermal noise if they meet both of the
        following criteria:

            - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`.
            - Nonsignificant variance explained.

    Outputs:

    This function writes out several files:

    ======================    =================================================
    Filename                  Content
    ======================    =================================================
    pcastate.pkl              Values from PCA results.
    comp_table_pca.txt        PCA component table.
    mepca_mix.1D              PCA mixing matrix.
    ======================    =================================================
    """

    n_samp, n_echos, n_vols = catd.shape
    ste = np.array([int(ee) for ee in str(ste).split(',')])

    if len(ste) == 1 and ste[0] == -1:
        LGR.info('Computing PCA of optimally combined multi-echo data')
        d = OCcatd[utils.make_min_mask(OCcatd[:,
                                              np.newaxis, :])][:,
                                                               np.newaxis, :]
    elif len(ste) == 1 and ste[0] == 0:
        LGR.info('Computing PCA of spatially concatenated multi-echo data')
        d = catd[mask].astype('float64')
    else:
        LGR.info('Computing PCA of echo #%s' %
                 ','.join([str(ee) for ee in ste]))
        d = np.stack([catd[mask, ee] for ee in ste - 1],
                     axis=1).astype('float64')

    eim = np.squeeze(eimask(d))
    d = np.squeeze(d[eim])

    dz = ((d.T - d.T.mean(axis=0)) / d.T.std(axis=0)).T  # var normalize ts
    dz = (dz - dz.mean()) / dz.std()  # var normalize everything

    if wvpca:
        dz, cAl = dwtmat(dz)

    if not op.exists('pcastate.pkl'):
        voxel_comp_weights, varex, comp_ts = run_svd(dz)

        # actual variance explained (normalized)
        varex_norm = varex / varex.sum()
        eigenvalue_elbow = getelbow(varex_norm, return_val=True)

        diff_varex_norm = np.abs(np.diff(varex_norm))
        lower_diff_varex_norm = diff_varex_norm[(len(diff_varex_norm) // 2):]
        varex_norm_thr = np.mean(
            [lower_diff_varex_norm.max(),
             diff_varex_norm.min()])
        varex_norm_min = varex_norm[
            (len(diff_varex_norm) // 2) + np.arange(len(lower_diff_varex_norm))
            [lower_diff_varex_norm >= varex_norm_thr][0] + 1]
        varex_norm_cum = np.cumsum(varex_norm)

        # Compute K and Rho for PCA comps
        eimum = np.atleast_2d(eim)
        eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1])
        eimum = eimum.prod(axis=1)
        o = np.zeros((mask.shape[0], *eimum.shape[1:]))
        o[mask] = eimum
        eimum = np.squeeze(o).astype(bool)

        vTmix = comp_ts.T
        vTmixN = ((vTmix.T - vTmix.T.mean(0)) / vTmix.T.std(0)).T
        LGR.info('Making initial component selection guess from PCA results')
        _, ct_df, betasv, v_T = model.fitmodels_direct(catd,
                                                       comp_ts.T,
                                                       eimum,
                                                       t2s,
                                                       t2sG,
                                                       tes,
                                                       combmode,
                                                       ref_img,
                                                       mmixN=vTmixN,
                                                       full_sel=False)
        # varex_norm overrides normalized varex computed by fitmodels_direct
        ct_df['normalized variance explained'] = varex_norm

        # Save state
        fname = op.abspath('pcastate.pkl')
        LGR.info('Saving PCA results to: {}'.format(fname))
        pcastate = {
            'voxel_comp_weights': voxel_comp_weights,
            'varex': varex,
            'comp_ts': comp_ts,
            'comptable': ct_df,
            'eigenvalue_elbow': eigenvalue_elbow,
            'varex_norm_min': varex_norm_min,
            'varex_norm_cum': varex_norm_cum
        }
        try:
            with open(fname, 'wb') as handle:
                pickle.dump(pcastate, handle)
        except TypeError:
            LGR.warning('Could not save PCA solution')

    else:  # if loading existing state
        LGR.info('Loading PCA from: pcastate.pkl')
        with open('pcastate.pkl', 'rb') as handle:
            pcastate = pickle.load(handle)
        voxel_comp_weights, varex = pcastate['voxel_comp_weights'], pcastate[
            'varex']
        comp_ts = pcastate['comp_ts']
        ct_df = pcastate['comptable']
        eigenvalue_elbow = pcastate['eigenvalue_elbow']
        varex_norm_min = pcastate['varex_norm_min']
        varex_norm_cum = pcastate['varex_norm_cum']

    np.savetxt('mepca_mix.1D', comp_ts.T)

    # write component maps to 4D image
    comp_maps = np.zeros((OCcatd.shape[0], comp_ts.shape[0]))
    for i_comp in range(comp_ts.shape[0]):
        temp_comp_ts = comp_ts[i_comp, :][:, None]
        comp_map = utils.unmask(
            model.computefeats2(OCcatd, temp_comp_ts, mask), mask)
        comp_maps[:, i_comp] = np.squeeze(comp_map)
    io.filewrite(comp_maps, 'mepca_OC_components.nii', ref_img)

    fmin, fmid, fmax = utils.getfbounds(n_echos)
    kappa_thr = np.average(sorted(
        [fmin, getelbow(ct_df['kappa'], return_val=True) / 2, fmid]),
                           weights=[kdaw, 1, 1])
    rho_thr = np.average(sorted(
        [fmin, getelbow_cons(ct_df['rho'], return_val=True) / 2, fmid]),
                         weights=[rdaw, 1, 1])
    if int(kdaw) == -1:
        lim_idx = utils.andb([ct_df['kappa'] < fmid,
                              ct_df['kappa'] > fmin]) == 2
        kappa_lim = ct_df.loc[lim_idx, 'kappa'].values
        kappa_thr = kappa_lim[getelbow(kappa_lim)]

        lim_idx = utils.andb([ct_df['rho'] < fmid, ct_df['rho'] > fmin]) == 2
        rho_lim = ct_df.loc[lim_idx, 'rho'].values
        rho_thr = rho_lim[getelbow(rho_lim)]
        stabilize = True
    elif int(rdaw) == -1:
        lim_idx = utils.andb([ct_df['rho'] < fmid, ct_df['rho'] > fmin]) == 2
        rho_lim = ct_df.loc[lim_idx, 'rho'].values
        rho_thr = rho_lim[getelbow(rho_lim)]

    # Add new columns to comptable for classification
    ct_df['classification'] = 'accepted'
    ct_df['rationale'] = ''

    # Reject if low Kappa, Rho, and variance explained
    is_lowk = ct_df['kappa'] <= kappa_thr
    is_lowr = ct_df['rho'] <= rho_thr
    is_lowe = ct_df['normalized variance explained'] <= eigenvalue_elbow
    is_lowkre = is_lowk & is_lowr & is_lowe
    ct_df.loc[is_lowkre, 'classification'] = 'rejected'
    ct_df.loc[is_lowkre, 'rationale'] += 'low rho, kappa, and varex;'

    # Reject if low variance explained
    is_lows = ct_df['normalized variance explained'] <= varex_norm_min
    ct_df.loc[is_lows, 'classification'] = 'rejected'
    ct_df.loc[is_lows, 'rationale'] += 'low variance explained;'

    # Reject if Kappa over limit
    is_fmax1 = ct_df['kappa'] == F_MAX
    ct_df.loc[is_fmax1, 'classification'] = 'rejected'
    ct_df.loc[is_fmax1, 'rationale'] += 'kappa equals fmax;'

    # Reject if Rho over limit
    is_fmax2 = ct_df['rho'] == F_MAX
    ct_df.loc[is_fmax2, 'classification'] = 'rejected'
    ct_df.loc[is_fmax2, 'rationale'] += 'rho equals fmax;'

    if stabilize:
        temp7 = varex_norm_cum >= 0.95
        ct_df.loc[temp7, 'classification'] = 'rejected'
        ct_df.loc[temp7, 'rationale'] += 'cumulative var. explained above 95%;'
        under_fmin1 = ct_df['kappa'] <= fmin
        ct_df.loc[under_fmin1, 'classification'] = 'rejected'
        ct_df.loc[under_fmin1, 'rationale'] += 'kappa below fmin;'
        under_fmin2 = ct_df['rho'] <= fmin
        ct_df.loc[under_fmin2, 'classification'] = 'rejected'
        ct_df.loc[under_fmin2, 'rationale'] += 'rho below fmin;'

    ct_df.to_csv('comp_table_pca.txt',
                 sep='\t',
                 index=True,
                 index_label='component',
                 float_format='%.6f')

    sel_idx = ct_df['classification'] == 'accepted'
    n_components = np.sum(sel_idx)
    voxel_kept_comp_weighted = (voxel_comp_weights[:, sel_idx] *
                                varex[None, sel_idx])
    kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[sel_idx, :])

    if wvpca:
        kept_data = idwtmat(kept_data, cAl)

    LGR.info('Selected {0} components with Kappa threshold: {1:.02f}, '
             'Rho threshold: {2:.02f}'.format(n_components, kappa_thr,
                                              rho_thr))

    kept_data = stats.zscore(kept_data,
                             axis=1)  # variance normalize timeseries
    kept_data = stats.zscore(kept_data,
                             axis=None)  # variance normalize everything

    return n_components, kept_data
Example #4
0
def gscontrol_raw(catd, optcom, n_echos, ref_img, dtrank=4):
    """
    Removes global signal from individual echo `catd` and `optcom` time series

    This function uses the spatial global signal estimation approach to
    to removal global signal out of individual echo time series datasets. The
    spatial global signal is estimated from the optimally combined data after
    detrending with a Legendre polynomial basis of `order = 0` and
    `degree = dtrank`.

    Parameters
    ----------
    catd : (S x E x T) array_like
        Input functional data
    optcom : (S x T) array_like
        Optimally combined functional data (i.e., the output of `make_optcom`)
    n_echos : :obj:`int`
        Number of echos in data. Should be the same as `E` dimension of `catd`
    ref_img : :obj:`str` or img_like
        Reference image to dictate how outputs are saved to disk
    dtrank : :obj:`int`, optional
        Specifies degree of Legendre polynomial basis function for estimating
        spatial global signal. Default: 4

    Returns
    -------
    dm_catd : (S x E x T) array_like
        Input `catd` with global signal removed from time series
    dm_optcom : (S x T) array_like
        Input `optcom` with global signal removed from time series
    """
    LGR.info('Applying amplitude-based T1 equilibration correction')
    if catd.shape[0] != optcom.shape[0]:
        raise ValueError(
            'First dimensions of catd ({0}) and optcom ({1}) do not '
            'match'.format(catd.shape[0], optcom.shape[0]))
    elif catd.shape[1] != n_echos:
        raise ValueError('Second dimension of catd ({0}) does not match '
                         'n_echos ({1})'.format(catd.shape[1], n_echos))
    elif catd.shape[2] != optcom.shape[1]:
        raise ValueError('Third dimension of catd ({0}) does not match '
                         'second dimension of optcom '
                         '({1})'.format(catd.shape[2], optcom.shape[1]))

    # Legendre polynomial basis for denoising
    bounds = np.linspace(-1, 1, optcom.shape[-1])
    Lmix = np.column_stack([lpmv(0, vv, bounds) for vv in range(dtrank)])

    # compute mean, std, mask local to this function
    # inefficient, but makes this function a bit more modular
    Gmu = optcom.mean(axis=-1)  # temporal mean
    Gmask = Gmu != 0

    # find spatial global signal
    dat = optcom[Gmask] - Gmu[Gmask][:, np.newaxis]
    sol = np.linalg.lstsq(Lmix, dat.T,
                          rcond=None)[0]  # Legendre basis for detrending
    detr = dat - np.dot(sol.T, Lmix.T)[0]
    sphis = (detr).min(axis=1)
    sphis -= sphis.mean()
    io.filewrite(utils.unmask(sphis, Gmask), 'T1gs', ref_img)

    # find time course ofc the spatial global signal
    # make basis with the Legendre basis
    glsig = np.linalg.lstsq(np.atleast_2d(sphis).T, dat, rcond=None)[0]
    glsig = stats.zscore(glsig, axis=None)
    np.savetxt('glsig.1D', glsig)
    glbase = np.hstack([Lmix, glsig.T])

    # Project global signal out of optimally combined data
    sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T, rcond=None)[0]
    tsoc_nogs = dat - np.dot(
        np.atleast_2d(sol[dtrank]).T, np.atleast_2d(
            glbase.T[dtrank])) + Gmu[Gmask][:, np.newaxis]

    io.filewrite(optcom, 'tsoc_orig', ref_img)
    dm_optcom = utils.unmask(tsoc_nogs, Gmask)
    io.filewrite(dm_optcom, 'tsoc_nogs', ref_img)

    # Project glbase out of each echo
    dm_catd = catd.copy()  # don't overwrite catd
    for echo in range(n_echos):
        dat = dm_catd[:, echo, :][Gmask]
        sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T, rcond=None)[0]
        e_nogs = dat - np.dot(
            np.atleast_2d(sol[dtrank]).T, np.atleast_2d(glbase.T[dtrank]))
        dm_catd[:, echo, :] = utils.unmask(e_nogs, Gmask)

    return dm_catd, dm_optcom
Example #5
0
def gscontrol_mmix(optcom_ts, mmix, mask, comptable, ref_img):
    """
    Perform global signal regression.

    Parameters
    ----------
    optcom_ts : (S x T) array_like
        Optimally combined time series data
    mmix : (T x C) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `optcom_ts`
    mask : (S,) array_like
        Boolean mask array
    comptable : (C x X) :obj:`pandas.DataFrame`
        Component metric table. One row for each component, with a column for
        each metric. The index should be the component number.
    ref_img : :obj:`str` or img_like
        Reference image to dictate how outputs are saved to disk

    Notes
    -----
    This function writes out several files:

    ======================    =================================================
    Filename                  Content
    ======================    =================================================
    sphis_hik.nii             T1-like effect
    hik_ts_OC_T1c.nii         T1-corrected BOLD (high-Kappa) time series
    dn_ts_OC_T1c.nii          Denoised version of T1-corrected time series
    betas_hik_OC_T1c.nii      T1 global signal-corrected components
    meica_mix_T1c.1D          T1 global signal-corrected mixing matrix
    ======================    =================================================
    """
    all_comps = comptable.index.values
    acc = comptable[comptable.classification == 'accepted'].index.values
    ign = comptable[comptable.classification == 'ignored'].index.values
    not_ign = sorted(np.setdiff1d(all_comps, ign))

    optcom_masked = optcom_ts[mask, :]
    optcom_mu = optcom_masked.mean(axis=-1)[:, np.newaxis]
    optcom_std = optcom_masked.std(axis=-1)[:, np.newaxis]
    """
    Compute temporal regression
    """
    data_norm = (optcom_masked - optcom_mu) / optcom_std
    cbetas = lstsq(mmix, data_norm.T, rcond=None)[0].T
    resid = data_norm - np.dot(cbetas[:, not_ign], mmix[:, not_ign].T)
    """
    Build BOLD time series without amplitudes, and save T1-like effect
    """
    bold_ts = np.dot(cbetas[:, acc], mmix[:, acc].T)
    t1_map = bold_ts.min(axis=-1)
    t1_map -= t1_map.mean()
    io.filewrite(utils.unmask(t1_map, mask), 'sphis_hik', ref_img)
    t1_map = t1_map[:, np.newaxis]
    """
    Find the global signal based on the T1-like effect
    """
    glob_sig = lstsq(t1_map, data_norm, rcond=None)[0]
    """
    T1-correct time series by regression
    """
    bold_noT1gs = bold_ts - np.dot(
        lstsq(glob_sig.T, bold_ts.T, rcond=None)[0].T, glob_sig)
    hik_ts = bold_noT1gs * optcom_std
    io.filewrite(utils.unmask(hik_ts, mask), 'hik_ts_OC_T1c.nii', ref_img)
    """
    Make denoised version of T1-corrected time series
    """
    medn_ts = optcom_mu + ((bold_noT1gs + resid) * optcom_std)
    io.filewrite(utils.unmask(medn_ts, mask), 'dn_ts_OC_T1c.nii', ref_img)
    """
    Orthogonalize mixing matrix w.r.t. T1-GS
    """
    mmixnogs = mmix.T - np.dot(
        lstsq(glob_sig.T, mmix, rcond=None)[0].T, glob_sig)
    mmixnogs_mu = mmixnogs.mean(-1)[:, np.newaxis]
    mmixnogs_std = mmixnogs.std(-1)[:, np.newaxis]
    mmixnogs_norm = (mmixnogs - mmixnogs_mu) / mmixnogs_std
    mmixnogs_norm = np.vstack(
        [np.atleast_2d(np.ones(max(glob_sig.shape))), glob_sig, mmixnogs_norm])
    """
    Write T1-GS corrected components and mixing matrix
    """
    cbetas_norm = lstsq(mmixnogs_norm.T, data_norm.T, rcond=None)[0].T
    io.filewrite(utils.unmask(cbetas_norm[:, 2:], mask),
                 'betas_hik_OC_T1c.nii', ref_img)
    np.savetxt('meica_mix_T1c.1D', mmixnogs)
Example #6
0
def tedana_workflow(data,
                    tes,
                    mask=None,
                    mixm=None,
                    ctab=None,
                    manacc=None,
                    strict=False,
                    gscontrol=True,
                    kdaw=10.,
                    rdaw=1.,
                    conv=2.5e-5,
                    ste=-1,
                    combmode='t2s',
                    dne=False,
                    cost='logcosh',
                    stabilize=False,
                    filecsdata=False,
                    wvpca=False,
                    label=None,
                    fixed_seed=42,
                    debug=False,
                    quiet=False):
    """
    Run the "canonical" TE-Dependent ANAlysis workflow.

    Parameters
    ----------
    data : :obj:`str` or :obj:`list` of :obj:`str`
        Either a single z-concatenated file (single-entry list or str) or a
        list of echo-specific files, in ascending order.
    tes : :obj:`list`
        List of echo times associated with data in milliseconds.
    mask : :obj:`str`, optional
        Binary mask of voxels to include in TE Dependent ANAlysis. Must be spatially
        aligned with `data`.
    mixm : :obj:`str`, optional
        File containing mixing matrix. If not provided, ME-PCA and ME-ICA are
        done.
    ctab : :obj:`str`, optional
        File containing component table from which to extract pre-computed
        classifications.
    manacc : :obj:`str`, optional
        Comma separated list of manually accepted components in string form.
        Default is None.
    strict : :obj:`bool`, optional
        Ignore low-variance ambiguous components. Default is False.
    gscontrol : :obj:`bool`, optional
        Control global signal using spatial approach. Default is True.
    kdaw : :obj:`float`, optional
        Dimensionality augmentation weight (Kappa). Default is 10.
        -1 for low-dimensional ICA.
    rdaw : :obj:`float`, optional
        Dimensionality augmentation weight (Rho). Default is 1.
        -1 for low-dimensional ICA.
    conv : :obj:`float`, optional
        Convergence limit. Default is 2.5e-5.
    ste : :obj:`int`, optional
        Source TEs for models. 0 for all, -1 for optimal combination.
        Default is -1.
    combmode : {'t2s', 'ste'}, optional
        Combination scheme for TEs: 't2s' (Posse 1999, default), 'ste' (Poser).
    dne : :obj:`bool`, optional
        Denoise each TE dataset separately. Default is False.
    cost : {'logcosh', 'exp', 'cube'} str, optional
        Cost function for ICA
    stabilize : :obj:`bool`, optional
        Stabilize convergence by reducing dimensionality, for low quality data.
        Default is False.
    filecsdata : :obj:`bool`, optional
        Save component selection data to file. Default is False.
    wvpca : :obj:`bool`, optional
        Whether or not to perform PCA on wavelet-transformed data.
        Default is False.
    label : :obj:`str` or :obj:`None`, optional
        Label for output directory. Default is None.

    Other Parameters
    ----------------
    fixed_seed : :obj:`int`, optional
        Value passed to ``mdp.numx_rand.seed()``.
        Set to a positive integer value for reproducible ICA results;
        otherwise, set to -1 for varying results across calls.
    debug : :obj:`bool`, optional
        Whether to run in debugging mode or not. Default is False.
    quiet : :obj:`bool`, optional
        If True, suppresses logging/printing of messages. Default is False.

    Notes
    -----
    This workflow writes out several files, which are written out to a folder
    named TED.[ref_label].[label] if ``label`` is provided and TED.[ref_label]
    if not. ``ref_label`` is determined based on the name of the first ``data``
    file. For a complete list of the files generated by this workflow, please
    visit https://tedana.readthedocs.io/en/latest/outputs.html
    """

    # ensure tes are in appropriate format
    tes = [float(te) for te in tes]
    n_echos = len(tes)

    # coerce data to samples x echos x time array
    if isinstance(data, str):
        data = [data]

    LGR.info('Loading input data: {}'.format([f for f in data]))
    catd, ref_img = io.load_data(data, n_echos=n_echos)
    n_samp, n_echos, n_vols = catd.shape
    LGR.debug('Resulting data shape: {}'.format(catd.shape))

    kdaw, rdaw = float(kdaw), float(rdaw)

    try:
        ref_label = op.basename(ref_img).split('.')[0]
    except (TypeError, AttributeError):
        ref_label = op.basename(str(data[0])).split('.')[0]

    if label is not None:
        out_dir = 'TED.{0}.{1}'.format(ref_label, label)
    else:
        out_dir = 'TED.{0}'.format(ref_label)
    out_dir = op.abspath(out_dir)
    if not op.isdir(out_dir):
        LGR.info('Creating output directory: {}'.format(out_dir))
        os.mkdir(out_dir)
    else:
        LGR.info('Using output directory: {}'.format(out_dir))

    if mixm is not None and op.isfile(mixm):
        shutil.copyfile(mixm, op.join(out_dir, 'meica_mix.1D'))
        shutil.copyfile(mixm, op.join(out_dir, op.basename(mixm)))
    elif mixm is not None:
        raise IOError('Argument "mixm" must be an existing file.')

    if ctab is not None and op.isfile(ctab):
        shutil.copyfile(ctab, op.join(out_dir, 'comp_table_ica.txt'))
        shutil.copyfile(ctab, op.join(out_dir, op.basename(ctab)))
    elif ctab is not None:
        raise IOError('Argument "ctab" must be an existing file.')

    os.chdir(out_dir)

    if mask is None:
        LGR.info('Computing adaptive mask')
    else:
        # TODO: add affine check
        LGR.info('Using user-defined mask')
    mask, masksum = utils.make_adaptive_mask(catd,
                                             mask=mask,
                                             minimum=False,
                                             getsum=True)
    LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp))

    LGR.info('Computing T2* map')
    t2s, s0, t2ss, s0s, t2sG, s0G = decay.fit_decay(catd, tes, mask, masksum)

    # set a hard cap for the T2* map
    # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile
    cap_t2s = stats.scoreatpercentile(t2s.flatten(),
                                      99.5,
                                      interpolation_method='lower')
    LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10))
    t2s[t2s > cap_t2s * 10] = cap_t2s
    io.filewrite(t2s, op.join(out_dir, 't2sv.nii'), ref_img)
    io.filewrite(s0, op.join(out_dir, 's0v.nii'), ref_img)
    io.filewrite(t2ss, op.join(out_dir, 't2ss.nii'), ref_img)
    io.filewrite(s0s, op.join(out_dir, 's0vs.nii'), ref_img)
    io.filewrite(t2sG, op.join(out_dir, 't2svG.nii'), ref_img)
    io.filewrite(s0G, op.join(out_dir, 's0vG.nii'), ref_img)

    # optimally combine data
    data_oc = combine.make_optcom(catd, tes, mask, t2s=t2sG, combmode=combmode)

    # regress out global signal unless explicitly not desired
    if gscontrol:
        catd, data_oc = model.gscontrol_raw(catd, data_oc, n_echos, ref_img)

    if mixm is None:
        # Identify and remove thermal noise from data
        n_components, dd = decomposition.tedpca(catd,
                                                data_oc,
                                                combmode,
                                                mask,
                                                t2s,
                                                t2sG,
                                                stabilize,
                                                ref_img,
                                                tes=tes,
                                                kdaw=kdaw,
                                                rdaw=rdaw,
                                                ste=ste,
                                                wvpca=wvpca)
        mmix_orig, fixed_seed = decomposition.tedica(n_components,
                                                     dd,
                                                     conv,
                                                     fixed_seed,
                                                     cost=cost)
        np.savetxt(op.join(out_dir, '__meica_mix.1D'), mmix_orig)

        LGR.info('Making second component selection guess from ICA results')
        # Estimate betas and compute selection metrics for mixing matrix
        # generated from dimensionally reduced data using full data (i.e., data
        # with thermal noise)
        seldict, comptable, betas, mmix = model.fitmodels_direct(catd,
                                                                 mmix_orig,
                                                                 mask,
                                                                 t2s,
                                                                 t2sG,
                                                                 tes,
                                                                 combmode,
                                                                 ref_img,
                                                                 reindex=True)
        np.savetxt(op.join(out_dir, 'meica_mix.1D'), mmix)

        comptable = selection.selcomps(seldict, comptable, mmix, manacc,
                                       n_echos)
    else:
        LGR.info('Using supplied mixing matrix from ICA')
        mmix_orig = np.loadtxt(op.join(out_dir, 'meica_mix.1D'))
        seldict, comptable, betas, mmix = model.fitmodels_direct(
            catd, mmix_orig, mask, t2s, t2sG, tes, combmode, ref_img)
        if ctab is None:
            comptable = selection.selcomps(seldict, comptable, mmix, manacc,
                                           n_echos)
        else:
            comptable = pd.read_csv(ctab, sep='\t', index_col='component')

    comptable.to_csv(op.join(out_dir, 'comp_table_ica.txt'),
                     sep='\t',
                     index=True,
                     index_label='component',
                     float_format='%.6f')
    if 'component' not in comptable.columns:
        comptable['component'] = comptable.index
    acc = comptable.loc[comptable['classification'] == 'accepted', 'component']
    rej = comptable.loc[comptable['classification'] == 'rejected', 'component']
    midk = comptable.loc[comptable['classification'] == 'midk', 'component']
    ign = comptable.loc[comptable['classification'] == 'ignored', 'component']
    if len(acc) == 0:
        LGR.warning('No BOLD components detected! Please check data and '
                    'results!')

    io.writeresults(data_oc,
                    mask=mask,
                    comptable=comptable,
                    mmix=mmix,
                    n_vols=n_vols,
                    fixed_seed=fixed_seed,
                    acc=acc,
                    rej=rej,
                    midk=midk,
                    empty=ign,
                    ref_img=ref_img)
    io.gscontrol_mmix(data_oc, mmix, mask, comptable, ref_img)
    if dne:
        io.writeresults_echoes(catd, mmix, mask, acc, rej, midk, ref_img)
Example #7
0
def godec(optcom_ts,
          mmix,
          mask,
          comptable,
          ref_img,
          ranks=[2],
          thresh=10,
          norm_mode='vn',
          power=2,
          max_iter=2000,
          out_dir='.'):
    """
    optcom_ts : (S x T) array_like
        Optimally combined time series data
    mmix : (T x C) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `optcom_ts`
    mask : (S,) array_like
        Boolean mask array
    acc : :obj:`list`
        Indices of accepted (BOLD) components in `mmix`
    ign : :obj:`list`
        Indices of all ignored components in `mmix`
    ref_img : :obj:`str` or img_like
        Reference image to dictate how outputs are saved to disk
    ranks : list of int
        Ranks of low-rank components to run
    norm_mode : {'vn', 'dm', None}
    """
    all_comps = comptable.index.values
    acc = comptable[comptable.classification == 'accepted'].index.values
    ign = comptable[comptable.classification == 'ignored'].index.values
    not_ign = sorted(np.setdiff1d(all_comps, ign))

    # Construct denoised data from optcom, mmix, acc, and all_ref
    optcom_masked = optcom_ts[mask, :]
    optcom_mu = optcom_masked.mean(axis=-1)[:, np.newaxis]
    optcom_std = optcom_masked.std(axis=-1)[:, np.newaxis]
    data_norm = (optcom_masked - optcom_mu) / optcom_std
    cbetas = lstsq(mmix, data_norm.T, rcond=None)[0].T
    resid = data_norm - np.dot(cbetas[:, not_ign], mmix[:, not_ign].T)
    bold_ts = np.dot(cbetas[:, acc], mmix[:, acc].T)
    medn_ts = optcom_mu + ((bold_ts + resid) * optcom_std)

    for rank in ranks:
        L, S, G = _wrap_godec(medn_ts,
                              rank=rank,
                              power=power,
                              thresh=thresh,
                              max_iter=max_iter,
                              norm_mode=norm_mode)

        if norm_mode is None:
            name_norm_mode = ''
        else:
            name_norm_mode = 'n{0}'.format(norm_mode)

        suffix = '{0}r{1}p{2}t{3}'.format(name_norm_mode, rank, power, thresh)
        io.filewrite(utils.unmask(L, mask),
                     op.join(out_dir, 'lowrank_{0}.nii'.format(suffix)),
                     ref_img)
        io.filewrite(utils.unmask(S, mask),
                     op.join(out_dir, 'sparse_{0}.nii'.format(suffix)),
                     ref_img)
        io.filewrite(utils.unmask(G, mask),
                     op.join(out_dir, 'noise_{0}.nii'.format(suffix)), ref_img)
Example #8
0
def dependence_metrics(catd,
                       tsoc,
                       mmix,
                       t2s,
                       tes,
                       ref_img,
                       reindex=False,
                       mmixN=None,
                       algorithm=None,
                       label=None,
                       out_dir='.',
                       verbose=False):
    """
    Fit TE-dependence and -independence models to components.

    Parameters
    ----------
    catd : (S x E x T) array_like
        Input data, where `S` is samples, `E` is echos, and `T` is time
    tsoc : (S x T) array_like
        Optimally combined data
    mmix : (T x C) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `catd`
    t2s : (S [x T]) array_like
        Limited T2* map or timeseries.
    tes : list
        List of echo times associated with `catd`, in milliseconds
    ref_img : str or img_like
        Reference image to dictate how outputs are saved to disk
    reindex : bool, optional
        Whether to sort components in descending order by Kappa. Default: False
    mmixN : (T x C) array_like, optional
        Z-scored mixing matrix. Default: None
    algorithm : {'kundu_v2', 'kundu_v3', None}, optional
        Decision tree to be applied to metrics. Determines which maps will be
        generated and stored in seldict. Default: None
    label : :obj:`str` or None, optional
        Prefix to apply to generated files. Default is None.
    out_dir : :obj:`str`, optional
        Output directory for generated files. Default is current working
        directory.
    verbose : :obj:`bool`, optional
        Whether or not to generate additional files. Default is False.

    Returns
    -------
    comptable : (C x X) :obj:`pandas.DataFrame`
        Component metric table. One row for each component, with a column for
        each metric. The index is the component number.
    seldict : :obj:`dict` or None
        Dictionary containing component-specific metric maps to be used for
        component selection. If `algorithm` is None, then seldict will be None as
        well.
    betas : :obj:`numpy.ndarray`
    mmix_new : :obj:`numpy.ndarray`
    """
    # Use t2s as mask
    mask = t2s != 0
    if not (catd.shape[0] == t2s.shape[0] == mask.shape[0] == tsoc.shape[0]):
        raise ValueError('First dimensions (number of samples) of catd ({0}), '
                         'tsoc ({1}), and t2s ({2}) do not '
                         'match'.format(catd.shape[0], tsoc.shape[0],
                                        t2s.shape[0]))
    elif catd.shape[1] != len(tes):
        raise ValueError('Second dimension of catd ({0}) does not match '
                         'number of echoes provided (tes; '
                         '{1})'.format(catd.shape[1], len(tes)))
    elif not (catd.shape[2] == tsoc.shape[1] == mmix.shape[0]):
        raise ValueError('Number of volumes in catd ({0}), '
                         'tsoc ({1}), and mmix ({2}) do not '
                         'match.'.format(catd.shape[2], tsoc.shape[1],
                                         mmix.shape[0]))
    elif t2s.ndim == 2:
        if catd.shape[2] != t2s.shape[1]:
            raise ValueError('Number of volumes in catd '
                             '({0}) does not match number of volumes in '
                             't2s ({1})'.format(catd.shape[2], t2s.shape[1]))

    # mask everything we can
    tsoc = tsoc[mask, :]
    catd = catd[mask, ...]
    t2s = t2s[mask]

    # demean optimal combination
    tsoc_dm = tsoc - tsoc.mean(axis=-1, keepdims=True)

    # compute un-normalized weight dataset (features)
    if mmixN is None:
        mmixN = mmix
    WTS = computefeats2(tsoc, mmixN, mask=None, normalize=False)

    # compute PSC dataset - shouldn't have to refit data
    tsoc_B = get_coeffs(tsoc_dm, mmix, mask=None)
    del tsoc_dm
    tsoc_Babs = np.abs(tsoc_B)
    PSC = tsoc_B / tsoc.mean(axis=-1, keepdims=True) * 100

    # compute skews to determine signs based on unnormalized weights,
    # correct mmix & WTS signs based on spatial distribution tails
    signs = stats.skew(WTS, axis=0)
    signs /= np.abs(signs)
    mmix = mmix.copy()
    mmix *= signs
    WTS *= signs
    PSC *= signs
    totvar = (tsoc_B**2).sum()
    totvar_norm = (WTS**2).sum()

    # compute Betas and means over TEs for TE-dependence analysis
    betas = get_coeffs(utils.unmask(catd, mask), mmix,
                       np.repeat(mask[:, np.newaxis], len(tes), axis=1))
    betas = betas[mask, ...]
    n_voxels, n_echos, n_components = betas.shape
    mu = catd.mean(axis=-1, dtype=float)
    tes = np.reshape(tes, (n_echos, 1))
    fmin, _, _ = getfbounds(n_echos)

    # set up Xmats
    X1 = mu.T  # Model 1
    X2 = np.tile(tes, (1, n_voxels)) * mu.T / t2s.T  # Model 2

    # tables for component selection
    kappas = np.zeros([n_components])
    rhos = np.zeros([n_components])
    varex = np.zeros([n_components])
    varex_norm = np.zeros([n_components])
    Z_maps = np.zeros([n_voxels, n_components])
    F_R2_maps = np.zeros([n_voxels, n_components])
    F_S0_maps = np.zeros([n_voxels, n_components])
    pred_R2_maps = np.zeros([n_voxels, n_echos, n_components])
    pred_S0_maps = np.zeros([n_voxels, n_echos, n_components])

    LGR.info('Fitting TE- and S0-dependent models to components')
    for i_comp in range(n_components):
        # size of comp_betas is (n_echoes, n_samples)
        comp_betas = np.atleast_3d(betas)[:, :, i_comp].T
        alpha = (np.abs(comp_betas)**2).sum(axis=0)
        varex[i_comp] = (tsoc_B[:, i_comp]**2).sum() / totvar * 100.
        varex_norm[i_comp] = (WTS[:, i_comp]**2).sum() / totvar_norm

        # S0 Model
        # (S,) model coefficient map
        coeffs_S0 = (comp_betas * X1).sum(axis=0) / (X1**2).sum(axis=0)
        pred_S0 = X1 * np.tile(coeffs_S0, (n_echos, 1))
        pred_S0_maps[:, :, i_comp] = pred_S0.T
        SSE_S0 = (comp_betas - pred_S0)**2
        SSE_S0 = SSE_S0.sum(axis=0)  # (S,) prediction error map
        F_S0 = (alpha - SSE_S0) * (n_echos - 1) / (SSE_S0)
        F_S0_maps[:, i_comp] = F_S0

        # R2 Model
        coeffs_R2 = (comp_betas * X2).sum(axis=0) / (X2**2).sum(axis=0)
        pred_R2 = X2 * np.tile(coeffs_R2, (n_echos, 1))
        pred_R2_maps[:, :, i_comp] = pred_R2.T
        SSE_R2 = (comp_betas - pred_R2)**2
        SSE_R2 = SSE_R2.sum(axis=0)
        F_R2 = (alpha - SSE_R2) * (n_echos - 1) / (SSE_R2)
        F_R2_maps[:, i_comp] = F_R2

        # compute weights as Z-values
        wtsZ = (WTS[:, i_comp] - WTS[:, i_comp].mean()) / WTS[:, i_comp].std()
        wtsZ[np.abs(wtsZ) > Z_MAX] = (
            Z_MAX * (np.abs(wtsZ) / wtsZ))[np.abs(wtsZ) > Z_MAX]
        Z_maps[:, i_comp] = wtsZ

        # compute Kappa and Rho
        F_S0[F_S0 > F_MAX] = F_MAX
        F_R2[F_R2 > F_MAX] = F_MAX
        norm_weights = np.abs(wtsZ**2.)
        kappas[i_comp] = np.average(F_R2, weights=norm_weights)
        rhos[i_comp] = np.average(F_S0, weights=norm_weights)
    del SSE_S0, SSE_R2, wtsZ, F_S0, F_R2, norm_weights, comp_betas
    if algorithm != 'kundu_v3':
        del WTS, PSC, tsoc_B

    # tabulate component values
    comptable = np.vstack([kappas, rhos, varex, varex_norm]).T
    if reindex:
        # re-index all components in descending Kappa order
        sort_idx = comptable[:, 0].argsort()[::-1]
        comptable = comptable[sort_idx, :]
        mmix_new = mmix[:, sort_idx]
        betas = betas[..., sort_idx]
        pred_R2_maps = pred_R2_maps[:, :, sort_idx]
        pred_S0_maps = pred_S0_maps[:, :, sort_idx]
        F_R2_maps = F_R2_maps[:, sort_idx]
        F_S0_maps = F_S0_maps[:, sort_idx]
        Z_maps = Z_maps[:, sort_idx]
        tsoc_Babs = tsoc_Babs[:, sort_idx]
        if algorithm == 'kundu_v3':
            WTS = WTS[:, sort_idx]
            PSC = PSC[:, sort_idx]
            tsoc_B = tsoc_B[:, sort_idx]
    else:
        mmix_new = mmix
    del mmix

    if verbose:
        # Echo-specific weight maps for each of the ICA components.
        io.filewrite(utils.unmask(betas, mask),
                     op.join(out_dir, '{0}betas_catd.nii'.format(label)),
                     ref_img)

        # Echo-specific maps of predicted values for R2 and S0 models for each
        # component.
        io.filewrite(utils.unmask(pred_R2_maps, mask),
                     op.join(out_dir, '{0}R2_pred.nii'.format(label)), ref_img)
        io.filewrite(utils.unmask(pred_S0_maps, mask),
                     op.join(out_dir, '{0}S0_pred.nii'.format(label)), ref_img)
        # Weight maps used to average metrics across voxels
        io.filewrite(utils.unmask(Z_maps**2., mask),
                     op.join(out_dir, '{0}metric_weights.nii'.format(label)),
                     ref_img)
    del pred_R2_maps, pred_S0_maps

    comptable = pd.DataFrame(comptable,
                             columns=[
                                 'kappa', 'rho', 'variance explained',
                                 'normalized variance explained'
                             ])
    comptable.index.name = 'component'

    # Generate clustering criteria for component selection
    if algorithm in ['kundu_v2', 'kundu_v3']:
        Z_clmaps = np.zeros([n_voxels, n_components], bool)
        F_R2_clmaps = np.zeros([n_voxels, n_components], bool)
        F_S0_clmaps = np.zeros([n_voxels, n_components], bool)
        Br_R2_clmaps = np.zeros([n_voxels, n_components], bool)
        Br_S0_clmaps = np.zeros([n_voxels, n_components], bool)

        LGR.info('Performing spatial clustering of components')
        csize = np.max([int(n_voxels * 0.0005) + 5, 20])
        LGR.debug('Using minimum cluster size: {}'.format(csize))
        for i_comp in range(n_components):
            # Cluster-extent threshold and binarize F-maps
            ccimg = io.new_nii_like(
                ref_img, np.squeeze(utils.unmask(F_R2_maps[:, i_comp], mask)))
            F_R2_clmaps[:,
                        i_comp] = utils.threshold_map(ccimg,
                                                      min_cluster_size=csize,
                                                      threshold=fmin,
                                                      mask=mask,
                                                      binarize=True)
            countsigFR2 = F_R2_clmaps[:, i_comp].sum()

            ccimg = io.new_nii_like(
                ref_img, np.squeeze(utils.unmask(F_S0_maps[:, i_comp], mask)))
            F_S0_clmaps[:,
                        i_comp] = utils.threshold_map(ccimg,
                                                      min_cluster_size=csize,
                                                      threshold=fmin,
                                                      mask=mask,
                                                      binarize=True)
            countsigFS0 = F_S0_clmaps[:, i_comp].sum()

            # Cluster-extent threshold and binarize Z-maps with CDT of p < 0.05
            ccimg = io.new_nii_like(
                ref_img, np.squeeze(utils.unmask(Z_maps[:, i_comp], mask)))
            Z_clmaps[:, i_comp] = utils.threshold_map(ccimg,
                                                      min_cluster_size=csize,
                                                      threshold=1.95,
                                                      mask=mask,
                                                      binarize=True)

            # Cluster-extent threshold and binarize ranked signal-change map
            ccimg = io.new_nii_like(
                ref_img,
                utils.unmask(stats.rankdata(tsoc_Babs[:, i_comp]), mask))
            Br_R2_clmaps[:, i_comp] = utils.threshold_map(
                ccimg,
                min_cluster_size=csize,
                threshold=(max(tsoc_Babs.shape) - countsigFR2),
                mask=mask,
                binarize=True)
            Br_S0_clmaps[:, i_comp] = utils.threshold_map(
                ccimg,
                min_cluster_size=csize,
                threshold=(max(tsoc_Babs.shape) - countsigFS0),
                mask=mask,
                binarize=True)
        del ccimg, tsoc_Babs

        if algorithm == 'kundu_v2':
            # WTS, tsoc_B, PSC, and F_S0_maps are not used by Kundu v2.5
            selvars = [
                'Z_maps', 'F_R2_maps', 'Z_clmaps', 'F_R2_clmaps',
                'F_S0_clmaps', 'Br_R2_clmaps', 'Br_S0_clmaps'
            ]
        elif algorithm == 'kundu_v3':
            selvars = [
                'WTS', 'tsoc_B', 'PSC', 'Z_maps', 'F_R2_maps', 'F_S0_maps',
                'Z_clmaps', 'F_R2_clmaps', 'F_S0_clmaps', 'Br_R2_clmaps',
                'Br_S0_clmaps'
            ]
        elif algorithm is None:
            selvars = []
        else:
            raise ValueError(
                'Algorithm "{0}" not recognized.'.format(algorithm))

        seldict = {}
        for vv in selvars:
            seldict[vv] = eval(vv)
    else:
        seldict = None

    return comptable, seldict, betas, mmix_new
Example #9
0
def tedana_workflow(data, tes, mask=None, mixm=None, ctab=None, manacc=None,
                    tedort=False, gscontrol=None, tedpca='mle',
                    source_tes=-1, combmode='t2s', verbose=False, stabilize=False,
                    out_dir='.', fixed_seed=42, maxit=500, maxrestart=10,
                    debug=False, quiet=False, png=False, png_cmap='coolwarm',
                    low_mem=False):
    """
    Run the "canonical" TE-Dependent ANAlysis workflow.

    Parameters
    ----------
    data : :obj:`str` or :obj:`list` of :obj:`str`
        Either a single z-concatenated file (single-entry list or str) or a
        list of echo-specific files, in ascending order.
    tes : :obj:`list`
        List of echo times associated with data in milliseconds.
    mask : :obj:`str`, optional
        Binary mask of voxels to include in TE Dependent ANAlysis. Must be
        spatially aligned with `data`. If an explicit mask is not provided,
        then Nilearn's compute_epi_mask function will be used to derive a mask
        from the first echo's data.
    mixm : :obj:`str`, optional
        File containing mixing matrix. If not provided, ME-PCA and ME-ICA are
        done.
    ctab : :obj:`str`, optional
        File containing component table from which to extract pre-computed
        classifications.
    manacc : :obj:`list`, :obj:`str`, or None, optional
        List of manually accepted components. Can be a list of the components,
        a comma-separated string with component numbers, or None. Default is
        None.
    tedort : :obj:`bool`, optional
        Orthogonalize rejected components w.r.t. accepted ones prior to
        denoising. Default is False.
    gscontrol : {None, 't1c', 'gsr'} or :obj:`list`, optional
        Perform additional denoising to remove spatially diffuse noise. Default
        is None.
    tedpca : {'mle', 'kundu', 'kundu-stabilize'}, optional
        Method with which to select components in TEDPCA. Default is 'mle'.
    source_tes : :obj:`int`, optional
        Source TEs for models. 0 for all, -1 for optimal combination.
        Default is -1.
    combmode : {'t2s'}, optional
        Combination scheme for TEs: 't2s' (Posse 1999, default).
    verbose : :obj:`bool`, optional
        Generate intermediate and additional files. Default is False.
    png : obj:'bool', optional
        Generate simple plots and figures. Default is false.
    png_cmap : obj:'str', optional
            Name of a matplotlib colormap to be used when generating figures.
            --png must still be used to request figures. Default is 'coolwarm'
    out_dir : :obj:`str`, optional
        Output directory.

    Other Parameters
    ----------------
    fixed_seed : :obj:`int`, optional
        Value passed to ``mdp.numx_rand.seed()``.
        Set to a positive integer value for reproducible ICA results;
        otherwise, set to -1 for varying results across calls.
    maxit : :obj:`int`, optional
        Maximum number of iterations for ICA. Default is 500.
    maxrestart : :obj:`int`, optional
        Maximum number of attempts for ICA. If ICA fails to converge, the
        fixed seed will be updated and ICA will be run again. If convergence
        is achieved before maxrestart attempts, ICA will finish early.
        Default is 10.
    low_mem : :obj:`bool`, optional
        Enables low-memory processing, including the use of IncrementalPCA.
        May increase workflow duration. Default is False.
    debug : :obj:`bool`, optional
        Whether to run in debugging mode or not. Default is False.
    quiet : :obj:`bool`, optional
        If True, suppresses logging/printing of messages. Default is False.

    Notes
    -----
    This workflow writes out several files. For a complete list of the files
    generated by this workflow, please visit
    https://tedana.readthedocs.io/en/latest/outputs.html
    """
    out_dir = op.abspath(out_dir)
    if not op.isdir(out_dir):
        os.mkdir(out_dir)

    # boilerplate
    refs = []
    basename = 'report'
    extension = 'txt'
    repname = op.join(out_dir, (basename + '.' + extension))
    repex = op.join(out_dir, (basename + '*'))
    previousreps = glob(repex)
    previousreps.sort(reverse=True)
    for f in previousreps:
        previousparts = op.splitext(f)
        newname = previousparts[0] + '_old' + previousparts[1]
        os.rename(f, newname)

    if debug and not quiet:
        # ensure old logs aren't over-written
        basename = 'tedana_run'
        extension = 'txt'
        logname = op.join(out_dir, (basename + '.' + extension))
        logex = op.join(out_dir, (basename + '*'))
        previouslogs = glob(logex)
        previouslogs.sort(reverse=True)
        for f in previouslogs:
            previousparts = op.splitext(f)
            newname = previousparts[0] + '_old' + previousparts[1]
            os.rename(f, newname)

        # set logging format
        formatter = logging.Formatter(
                    '%(asctime)s\t%(name)-12s\t%(levelname)-8s\t%(message)s',
                    datefmt='%Y-%m-%dT%H:%M:%S')

        # set up logging file and open it for writing
        fh = logging.FileHandler(logname)
        fh.setFormatter(formatter)
        logging.basicConfig(level=logging.DEBUG,
                            handlers=[fh, logging.StreamHandler()])
    elif quiet:
        logging.basicConfig(level=logging.WARNING)
    else:
        logging.basicConfig(level=logging.INFO)

    LGR.info('Using output directory: {}'.format(out_dir))

    # ensure tes are in appropriate format
    tes = [float(te) for te in tes]
    n_echos = len(tes)

    # Coerce gscontrol to list
    if not isinstance(gscontrol, list):
        gscontrol = [gscontrol]

    # coerce data to samples x echos x time array
    if isinstance(data, str):
        data = [data]

    LGR.info('Loading input data: {}'.format([f for f in data]))
    catd, ref_img = io.load_data(data, n_echos=n_echos)
    n_samp, n_echos, n_vols = catd.shape
    LGR.debug('Resulting data shape: {}'.format(catd.shape))

    # check if TR is 0
    img_t_r = ref_img.header.get_zooms()[-1]
    if img_t_r == 0 and png:
        raise IOError('Dataset has a TR of 0. This indicates incorrect'
                      ' header information. To correct this, we recommend'
                      ' using this snippet:'
                      '\n'
                      'https://gist.github.com/jbteves/032c87aeb080dd8de8861cb151bff5d6'
                      '\n'
                      'to correct your TR to the value it should be.')

    if mixm is not None and op.isfile(mixm):
        mixm = op.abspath(mixm)
        # Allow users to re-run on same folder
        if mixm != op.join(out_dir, 'meica_mix.1D'):
            shutil.copyfile(mixm, op.join(out_dir, 'meica_mix.1D'))
            shutil.copyfile(mixm, op.join(out_dir, op.basename(mixm)))
    elif mixm is not None:
        raise IOError('Argument "mixm" must be an existing file.')

    if ctab is not None and op.isfile(ctab):
        ctab = op.abspath(ctab)
        # Allow users to re-run on same folder
        if ctab != op.join(out_dir, 'comp_table_ica.txt'):
            shutil.copyfile(ctab, op.join(out_dir, 'comp_table_ica.txt'))
            shutil.copyfile(ctab, op.join(out_dir, op.basename(ctab)))
    elif ctab is not None:
        raise IOError('Argument "ctab" must be an existing file.')

    if isinstance(manacc, str):
        manacc = [int(comp) for comp in manacc.split(',')]

    if ctab and not mixm:
        LGR.warning('Argument "ctab" requires argument "mixm".')
        ctab = None
    elif ctab and (manacc is None):
        LGR.warning('Argument "ctab" requires argument "manacc".')
        ctab = None
    elif manacc is not None and not mixm:
        LGR.warning('Argument "manacc" requires argument "mixm".')
        manacc = None

    bp_str = ("TE-dependence analysis was performed on input data.")
    if mask is None:
        LGR.info('Computing EPI mask from first echo')
        first_echo_img = io.new_nii_like(ref_img, catd[:, 0, :])
        mask = compute_epi_mask(first_echo_img)
        bp_str += (" An initial mask was generated from the first echo using "
                   "nilearn's compute_epi_mask function.")
    else:
        # TODO: add affine check
        LGR.info('Using user-defined mask')
        bp_str += (" A user-defined mask was applied to the data.")

    mask, masksum = utils.make_adaptive_mask(catd, mask=mask, getsum=True)
    bp_str += (" An adaptive mask was then generated, in which each voxel's "
               "value reflects the number of echoes with 'good' data.")
    LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp))
    if verbose:
        io.filewrite(masksum, op.join(out_dir, 'adaptive_mask.nii'), ref_img)

    os.chdir(out_dir)

    LGR.info('Computing T2* map')
    t2s, s0, t2ss, s0s, t2sG, s0G = decay.fit_decay(catd, tes, mask, masksum)
    bp_str += (" A monoexponential model was fit to the data at each voxel "
               "using log-linear regression in order to estimate T2* and S0 "
               "maps. For each voxel, the value from the adaptive mask was "
               "used to determine which echoes would be used to estimate T2* "
               "and S0.")

    # set a hard cap for the T2* map
    # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile
    cap_t2s = stats.scoreatpercentile(t2s.flatten(), 99.5,
                                      interpolation_method='lower')
    LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10))
    t2s[t2s > cap_t2s * 10] = cap_t2s
    io.filewrite(t2s, op.join(out_dir, 't2sv.nii'), ref_img)
    io.filewrite(s0, op.join(out_dir, 's0v.nii'), ref_img)

    if verbose:
        io.filewrite(t2ss, op.join(out_dir, 't2ss.nii'), ref_img)
        io.filewrite(s0s, op.join(out_dir, 's0vs.nii'), ref_img)
        io.filewrite(t2sG, op.join(out_dir, 't2svG.nii'), ref_img)
        io.filewrite(s0G, op.join(out_dir, 's0vG.nii'), ref_img)

    # optimally combine data
    data_oc = combine.make_optcom(catd, tes, mask, t2s=t2sG, combmode=combmode)
    if combmode == 't2s':
        cm_str = "'t2s' (Posse et al., 1999)"
        refs += ["Posse, S., Wiese, S., Gembris, D., Mathiak, K., Kessler, "
                 "C., Grosse‐Ruyken, M. L., ... & Kiselev, V. G. (1999). "
                 "Enhancement of BOLD‐contrast sensitivity by single‐shot "
                 "multi‐echo functional MR imaging. Magnetic Resonance in "
                 "Medicine: An Official Journal of the International Society "
                 "for Magnetic Resonance in Medicine, 42(1), 87-97."]

    bp_str += (" Multi-echo data were then optimally combined using the {0} "
               "combination method.").format(cm_str)

    # regress out global signal unless explicitly not desired
    if 'gsr' in gscontrol:
        catd, data_oc = gsc.gscontrol_raw(catd, data_oc, n_echos, ref_img)
        bp_str += (" Global signal regression was applied to the multi-echo "
                   "and optimally combined datasets.")

    if mixm is None:
        # Identify and remove thermal noise from data
        dd, n_components = decomposition.tedpca(catd, data_oc, combmode, mask,
                                                t2s, t2sG, ref_img,
                                                tes=tes, algorithm=tedpca,
                                                source_tes=source_tes,
                                                kdaw=10., rdaw=1.,
                                                out_dir=out_dir,
                                                verbose=verbose,
                                                low_mem=low_mem)
        if tedpca == 'mle':
            alg_str = "using MLE dimensionality estimation (Minka, 2001)"
            refs += ["Minka, T. P. (2001). Automatic choice of dimensionality "
                     "for PCA. In Advances in neural information processing "
                     "systems (pp. 598-604)."]
        elif tedpca == 'kundu':
            alg_str = ("followed by the Kundu component selection decision "
                       "tree (Kundu et al., 2013)")
            refs += ["Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., "
                     "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. "
                     "(2013). Integrated strategy for improving functional "
                     "connectivity mapping using multiecho fMRI. Proceedings "
                     "of the National Academy of Sciences, 110(40), "
                     "16187-16192."]
        elif tedpca == 'kundu-stabilize':
            alg_str = ("followed by the 'stabilized' Kundu component "
                       "selection decision tree (Kundu et al., 2013)")
            refs += ["Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., "
                     "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. "
                     "(2013). Integrated strategy for improving functional "
                     "connectivity mapping using multiecho fMRI. Proceedings "
                     "of the National Academy of Sciences, 110(40), "
                     "16187-16192."]

        if source_tes == -1:
            dat_str = "the optimally combined data"
        elif source_tes == 0:
            dat_str = "the z-concatenated multi-echo data"
        else:
            dat_str = "a z-concatenated subset of echoes from the input data"

        bp_str += (" Principal component analysis {0} was applied to "
                   "{1} for dimensionality reduction.").format(alg_str,
                                                               dat_str)

        mmix_orig = decomposition.tedica(dd, n_components, fixed_seed,
                                         maxit, maxrestart)
        bp_str += (" Independent component analysis was then used to "
                   "decompose the dimensionally reduced dataset.")

        if verbose:
            np.savetxt(op.join(out_dir, '__meica_mix.1D'), mmix_orig)
            if source_tes == -1:
                io.filewrite(utils.unmask(dd, mask),
                             op.join(out_dir, 'ts_OC_whitened.nii'), ref_img)

        LGR.info('Making second component selection guess from ICA results')
        # Estimate betas and compute selection metrics for mixing matrix
        # generated from dimensionally reduced data using full data (i.e., data
        # with thermal noise)
        comptable, metric_maps, betas, mmix = metrics.dependence_metrics(
                    catd, data_oc, mmix_orig, t2s, tes,
                    ref_img, reindex=True, label='meica_', out_dir=out_dir,
                    algorithm='kundu_v2', verbose=verbose)
        bp_str += (" A series of TE-dependence metrics were calculated for "
                   "each ICA component, including Kappa, Rho, and variance "
                   "explained.")
        np.savetxt(op.join(out_dir, 'meica_mix.1D'), mmix)

        comptable = metrics.kundu_metrics(comptable, metric_maps)
        comptable = selection.kundu_selection_v2(comptable, n_echos, n_vols)
        bp_str += (" Next, component selection was performed to identify "
                   "BOLD (TE-dependent), non-BOLD (TE-independent), and "
                   "uncertain (low-variance) components using the Kundu "
                   "decision tree (v2.5; Kundu et al., 2013).")
        refs += ["Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., "
                 "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. "
                 "(2013). Integrated strategy for improving functional "
                 "connectivity mapping using multiecho fMRI. Proceedings "
                 "of the National Academy of Sciences, 110(40), "
                 "16187-16192."]
    else:
        LGR.info('Using supplied mixing matrix from ICA')
        mmix_orig = np.loadtxt(op.join(out_dir, 'meica_mix.1D'))
        comptable, metric_maps, betas, mmix = metrics.dependence_metrics(
                    catd, data_oc, mmix_orig, t2s, tes,
                    ref_img, label='meica_', out_dir=out_dir,
                    algorithm='kundu_v2', verbose=verbose)
        if ctab is None:
            comptable = metrics.kundu_metrics(comptable, metric_maps)
            comptable = selection.kundu_selection_v2(comptable, n_echos, n_vols)
            bp_str += (" Next, component selection was performed to identify "
                       "BOLD (TE-dependent), non-BOLD (TE-independent), and "
                       "uncertain (low-variance) components using the Kundu "
                       "decision tree (v2.5; Kundu et al., 2013).")
            refs += ["Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., "
                     "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. "
                     "(2013). Integrated strategy for improving functional "
                     "connectivity mapping using multiecho fMRI. Proceedings "
                     "of the National Academy of Sciences, 110(40), "
                     "16187-16192."]
        else:
            comptable = pd.read_csv(ctab, sep='\t', index_col='component')
            comptable = selection.manual_selection(comptable, acc=manacc)
            bp_str += (" Next, components were manually classified as "
                       "BOLD (TE-dependent), non-BOLD (TE-independent), or "
                       "uncertain (low-variance).")

    comptable.to_csv(op.join(out_dir, 'comp_table_ica.txt'), sep='\t',
                     index=True, index_label='component', float_format='%.6f')

    if comptable[comptable.classification == 'accepted'].shape[0] == 0:
        LGR.warning('No BOLD components detected! Please check data and '
                    'results!')

    mmix_orig = mmix.copy()
    if tedort:
        acc_idx = comptable.loc[
            ~comptable.classification.str.contains('rejected')].index.values
        rej_idx = comptable.loc[
            comptable.classification.str.contains('rejected')].index.values
        acc_ts = mmix[:, acc_idx]
        rej_ts = mmix[:, rej_idx]
        betas = np.linalg.lstsq(acc_ts, rej_ts, rcond=None)[0]
        pred_rej_ts = np.dot(acc_ts, betas)
        resid = rej_ts - pred_rej_ts
        mmix[:, rej_idx] = resid
        np.savetxt(op.join(out_dir, 'meica_mix_orth.1D'), mmix)
        bp_str += (" Rejected components' time series were then "
                   "orthogonalized with respect to accepted components' time "
                   "series.")

    io.writeresults(data_oc, mask=mask, comptable=comptable, mmix=mmix,
                    n_vols=n_vols, ref_img=ref_img)

    if 't1c' in gscontrol:
        LGR.info('Performing T1c global signal regression to remove spatially '
                 'diffuse noise')
        gsc.gscontrol_mmix(data_oc, mmix, mask, comptable, ref_img)
        bp_str += (" T1c global signal regression was then applied to the "
                   "data in order to remove spatially diffuse noise.")

    if verbose:
        io.writeresults_echoes(catd, mmix, mask, comptable, ref_img)

    if png:
        LGR.info('Making figures folder with static component maps and '
                 'timecourse plots.')
        # make figure folder first
        if not op.isdir(op.join(out_dir, 'figures')):
            os.mkdir(op.join(out_dir, 'figures'))

        viz.write_comp_figs(data_oc, mask=mask, comptable=comptable,
                            mmix=mmix_orig, ref_img=ref_img,
                            out_dir=op.join(out_dir, 'figures'),
                            png_cmap=png_cmap)

        LGR.info('Making Kappa vs Rho scatter plot')
        viz.write_kappa_scatter(comptable=comptable,
                                out_dir=op.join(out_dir, 'figures'))

        LGR.info('Making overall summary figure')
        viz.write_summary_fig(comptable=comptable,
                              out_dir=op.join(out_dir, 'figures'))

    LGR.info('Workflow completed')

    bp_str += ("\n\nThis workflow used numpy (Van Der Walt, Colbert, & "
               "Varoquaux, 2011), scipy (Jones et al., 2001), pandas "
               "(McKinney, 2010), scikit-learn (Pedregosa et al., 2011), "
               "nilearn, and nibabel (Brett et al., 2019).")
    refs += ["Van Der Walt, S., Colbert, S. C., & Varoquaux, G. (2011). The "
             "NumPy array: a structure for efficient numerical computation. "
             "Computing in Science & Engineering, 13(2), 22.",
             "Jones E, Oliphant E, Peterson P, et al. SciPy: Open Source "
             "Scientific Tools for Python, 2001-, http://www.scipy.org/",
             "McKinney, W. (2010, June). Data structures for statistical "
             "computing in python. In Proceedings of the 9th Python in "
             "Science Conference (Vol. 445, pp. 51-56).",
             "Pedregosa, F., Varoquaux, G., Gramfort, A., Michel, V., "
             "Thirion, B., Grisel, O., ... & Vanderplas, J. (2011). "
             "Scikit-learn: Machine learning in Python. Journal of machine "
             "learning research, 12(Oct), 2825-2830.",
             "Brett, M., Markiewicz, C. J., Hanke, M., Côté, M.-A., "
             "Cipollini, B., McCarthy, P., … freec84. (2019, May 28). "
             "nipy/nibabel. Zenodo. http://doi.org/10.5281/zenodo.3233118"]

    bp_str += ("\n\nThis workflow also used the Dice similarity index "
               "(Dice, 1945; Sørensen, 1948).")
    refs += ["Dice, L. R. (1945). Measures of the amount of ecologic "
             "association between species. Ecology, 26(3), 297-302.",
             "Sørensen, T. J. (1948). A method of establishing groups of "
             "equal amplitude in plant sociology based on similarity of "
             "species content and its application to analyses of the "
             "vegetation on Danish commons. I kommission hos E. Munksgaard."]

    bp_str += '\n\nReferences\n\n'
    refs = sorted(list(set(refs)))
    bp_str += '\n\n'.join(refs)
    with open(repname, 'w') as fo:
        fo.write(bp_str)

    for handler in logging.root.handlers[:]:
        logging.root.removeHandler(handler)
Example #10
0
def tedpca(data_cat, data_oc, combmode, mask, t2s, t2sG,
           ref_img, tes, algorithm='mle', source_tes=-1, kdaw=10., rdaw=1.,
           out_dir='.', verbose=False, low_mem=False):
    """
    Use principal components analysis (PCA) to identify and remove thermal
    noise from multi-echo data.

    Parameters
    ----------
    data_cat : (S x E x T) array_like
        Input functional data
    data_oc : (S x T) array_like
        Optimally combined time series data
    combmode : {'t2s', 'paid'} str
        How optimal combination of echos should be made, where 't2s' indicates
        using the method of Posse 1999 and 'paid' indicates using the method of
        Poser 2006
    mask : (S,) array_like
        Boolean mask array
    t2s : (S,) array_like
        Map of voxel-wise T2* estimates.
    t2sG : (S,) array_like
        Map of voxel-wise T2* estimates.
    ref_img : :obj:`str` or img_like
        Reference image to dictate how outputs are saved to disk
    tes : :obj:`list`
        List of echo times associated with `data_cat`, in milliseconds
    algorithm : {'mle', 'kundu', 'kundu-stabilize'}, optional
        Method with which to select components in TEDPCA. Default is 'mle'.
    source_tes : :obj:`int` or :obj:`list` of :obj:`int`, optional
        Which echos to use in PCA. Values -1 and 0 are special, where a value
        of -1 will indicate using the optimal combination of the echos
        and 0  will indicate using all the echos. A list can be provided
        to indicate a subset of echos.
        Default: -1
    kdaw : :obj:`float`, optional
        Dimensionality augmentation weight for Kappa calculations. Must be a
        non-negative float, or -1 (a special value). Default is 10.
    rdaw : :obj:`float`, optional
        Dimensionality augmentation weight for Rho calculations. Must be a
        non-negative float, or -1 (a special value). Default is 1.
    out_dir : :obj:`str`, optional
        Output directory.
    verbose : :obj:`bool`, optional
        Whether to output files from fitmodels_direct or not. Default: False
    low_mem : :obj:`bool`, optional
        Whether to use incremental PCA (for low-memory systems) or not.
        Default: False

    Returns
    -------
    kept_data : (S x T) :obj:`numpy.ndarray`
        Dimensionally reduced optimally combined functional data
    n_components : :obj:`int`
        Number of components retained from PCA decomposition

    Notes
    -----
    ======================    =================================================
    Notation                  Meaning
    ======================    =================================================
    :math:`\\kappa`            Component pseudo-F statistic for TE-dependent
                              (BOLD) model.
    :math:`\\rho`              Component pseudo-F statistic for TE-independent
                              (artifact) model.
    :math:`v`                 Voxel
    :math:`V`                 Total number of voxels in mask
    :math:`\\zeta`             Something
    :math:`c`                 Component
    :math:`p`                 Something else
    ======================    =================================================

    Steps:

    1.  Variance normalize either multi-echo or optimally combined data,
        depending on settings.
    2.  Decompose normalized data using PCA or SVD.
    3.  Compute :math:`{\\kappa}` and :math:`{\\rho}`:

            .. math::
                {\\kappa}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,R_2^*}}{\\sum {\\zeta}_{c,v}^p}

                {\\rho}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,S_0}}{\\sum {\\zeta}_{c,v}^p}

    4.  Some other stuff. Something about elbows.
    5.  Classify components as thermal noise if they meet both of the
        following criteria:

            - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`.
            - Nonsignificant variance explained.

    Outputs:

    This function writes out several files:

    ======================    =================================================
    Filename                  Content
    ======================    =================================================
    pcastate.pkl              Values from PCA results.
    comp_table_pca.txt        PCA component table.
    mepca_mix.1D              PCA mixing matrix.
    ======================    =================================================
    """
    if low_mem and algorithm == 'mle':
        LGR.warning('Low memory option is not compatible with MLE '
                    'dimensionality estimation. Switching to Kundu decision '
                    'tree.')
        algorithm = 'kundu'

    n_samp, n_echos, n_vols = data_cat.shape
    source_tes = np.array([int(ee) for ee in str(source_tes).split(',')])

    if len(source_tes) == 1 and source_tes[0] == -1:
        LGR.info('Computing PCA of optimally combined multi-echo data')
        data = data_oc[mask, :][:, np.newaxis, :]
    elif len(source_tes) == 1 and source_tes[0] == 0:
        LGR.info('Computing PCA of spatially concatenated multi-echo data')
        data = data_cat[mask, ...]
    else:
        LGR.info('Computing PCA of echo #{0}'.format(','.join([str(ee) for ee in source_tes])))
        data = np.stack([data_cat[mask, ee, :] for ee in source_tes - 1], axis=1)

    eim = np.squeeze(eimask(data))
    data = np.squeeze(data[eim])

    data_z = ((data.T - data.T.mean(axis=0)) / data.T.std(axis=0)).T  # var normalize ts
    data_z = (data_z - data_z.mean()) / data_z.std()  # var normalize everything

    if algorithm == 'mle':
        voxel_comp_weights, varex, varex_norm, comp_ts = run_mlepca(data_z)
    elif low_mem:
        voxel_comp_weights, varex, comp_ts = low_mem_pca(data_z)
        varex_norm = varex / varex.sum()
    else:
        ppca = PCA(copy=False, n_components=(n_vols - 1))
        ppca.fit(data_z)
        comp_ts = ppca.components_.T
        varex = ppca.explained_variance_
        voxel_comp_weights = np.dot(np.dot(data_z, comp_ts),
                                    np.diag(1. / varex))
        varex_norm = varex / varex.sum()

    # Compute Kappa and Rho for PCA comps
    eimum = np.atleast_2d(eim)
    eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1])
    eimum = eimum.prod(axis=1)
    o = np.zeros((mask.shape[0], *eimum.shape[1:]))
    o[mask, ...] = eimum
    eimum = np.squeeze(o).astype(bool)

    # Normalize each component's time series
    vTmixN = stats.zscore(comp_ts, axis=0)
    comptable, _, _, _ = metrics.dependence_metrics(
                data_cat, data_oc, comp_ts, t2s, tes, ref_img,
                reindex=False, mmixN=vTmixN, algorithm=None,
                label='mepca_', out_dir=out_dir, verbose=verbose)

    # varex_norm from PCA overrides varex_norm from dependence_metrics,
    # but we retain the original
    comptable['estimated normalized variance explained'] = \
        comptable['normalized variance explained']
    comptable['normalized variance explained'] = varex_norm

    np.savetxt('mepca_mix.1D', comp_ts)

    # write component maps to 4D image
    comp_maps = np.zeros((data_oc.shape[0], comp_ts.shape[1]))
    for i_comp in range(comp_ts.shape[1]):
        temp_comp_ts = comp_ts[:, i_comp][:, None]
        comp_map = utils.unmask(computefeats2(data_oc, temp_comp_ts, mask), mask)
        comp_maps[:, i_comp] = np.squeeze(comp_map)
    io.filewrite(comp_maps, 'mepca_OC_components.nii', ref_img)

    # Select components using decision tree
    if algorithm == 'kundu':
        comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=False)
    elif algorithm == 'kundu-stabilize':
        comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=True)
    elif algorithm == 'mle':
        LGR.info('Selected {0} components with MLE dimensionality '
                 'detection'.format(comptable.shape[0]))
        comptable['classification'] = 'accepted'
        comptable['rationale'] = ''

    comptable.to_csv('comp_table_pca.txt', sep='\t', index=True,
                     index_label='component', float_format='%.6f')

    acc = comptable[comptable.classification == 'accepted'].index.values
    n_components = acc.size
    voxel_kept_comp_weighted = (voxel_comp_weights[:, acc] *
                                varex[None, acc])
    kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[:, acc].T)

    kept_data = stats.zscore(kept_data, axis=1)  # variance normalize time series
    kept_data = stats.zscore(kept_data, axis=None)  # variance normalize everything

    return kept_data, n_components
Example #11
0
def tedana_workflow(data,
                    tes,
                    out_dir='.',
                    mask=None,
                    fittype='loglin',
                    combmode='t2s',
                    tedpca='mdl',
                    fixed_seed=42,
                    maxit=500,
                    maxrestart=10,
                    tedort=False,
                    gscontrol=None,
                    no_png=False,
                    png_cmap='coolwarm',
                    verbose=False,
                    low_mem=False,
                    debug=False,
                    quiet=False,
                    t2smap=None,
                    mixm=None,
                    ctab=None,
                    manacc=None):
    """
    Run the "canonical" TE-Dependent ANAlysis workflow.

    Parameters
    ----------
    data : :obj:`str` or :obj:`list` of :obj:`str`
        Either a single z-concatenated file (single-entry list or str) or a
        list of echo-specific files, in ascending order.
    tes : :obj:`list`
        List of echo times associated with data in milliseconds.
    out_dir : :obj:`str`, optional
        Output directory.
    mask : :obj:`str` or None, optional
        Binary mask of voxels to include in TE Dependent ANAlysis. Must be
        spatially aligned with `data`. If an explicit mask is not provided,
        then Nilearn's compute_epi_mask function will be used to derive a mask
        from the first echo's data.
    fittype : {'loglin', 'curvefit'}, optional
        Monoexponential fitting method. 'loglin' uses the the default linear
        fit to the log of the data. 'curvefit' uses a monoexponential fit to
        the raw data, which is slightly slower but may be more accurate.
        Default is 'loglin'.
    combmode : {'t2s'}, optional
        Combination scheme for TEs: 't2s' (Posse 1999, default).
    tedpca : {'kundu', 'kundu-stabilize', 'mdl', 'aic', 'kic'}, optional
        Method with which to select components in TEDPCA. Default is 'mdl'.
    tedort : :obj:`bool`, optional
        Orthogonalize rejected components w.r.t. accepted ones prior to
        denoising. Default is False.
    gscontrol : {None, 't1c', 'gsr'} or :obj:`list`, optional
        Perform additional denoising to remove spatially diffuse noise. Default
        is None.
    verbose : :obj:`bool`, optional
        Generate intermediate and additional files. Default is False.
    no_png : obj:'bool', optional
        Do not generate .png plots and figures. Default is false.
    png_cmap : obj:'str', optional
        Name of a matplotlib colormap to be used when generating figures.
        Cannot be used with --no-png. Default is 'coolwarm'.
    t2smap : :obj:`str`, optional
        Precalculated T2* map in the same space as the input data.
    mixm : :obj:`str` or None, optional
        File containing mixing matrix, to be used when re-running the workflow.
        If not provided, ME-PCA and ME-ICA are done. Default is None.
    ctab : :obj:`str` or None, optional
        File containing component table from which to extract pre-computed
        classifications, to be used with 'mixm' when re-running the workflow.
        Default is None.
    manacc : :obj:`list`, :obj:`str`, or None, optional
        List of manually accepted components. Can be a list of the components,
        a comma-separated string with component numbers, or None. Default is
        None.

    Other Parameters
    ----------------
    fixed_seed : :obj:`int`, optional
        Value passed to ``mdp.numx_rand.seed()``.
        Set to a positive integer value for reproducible ICA results;
        otherwise, set to -1 for varying results across calls.
    maxit : :obj:`int`, optional
        Maximum number of iterations for ICA. Default is 500.
    maxrestart : :obj:`int`, optional
        Maximum number of attempts for ICA. If ICA fails to converge, the
        fixed seed will be updated and ICA will be run again. If convergence
        is achieved before maxrestart attempts, ICA will finish early.
        Default is 10.
    low_mem : :obj:`bool`, optional
        Enables low-memory processing, including the use of IncrementalPCA.
        May increase workflow duration. Default is False.
    debug : :obj:`bool`, optional
        Whether to run in debugging mode or not. Default is False.
    quiet : :obj:`bool`, optional
        If True, suppresses logging/printing of messages. Default is False.

    Notes
    -----
    This workflow writes out several files. For a complete list of the files
    generated by this workflow, please visit
    https://tedana.readthedocs.io/en/latest/outputs.html
    """
    out_dir = op.abspath(out_dir)
    if not op.isdir(out_dir):
        os.mkdir(out_dir)

    # boilerplate
    basename = 'report'
    extension = 'txt'
    repname = op.join(out_dir, (basename + '.' + extension))
    repex = op.join(out_dir, (basename + '*'))
    previousreps = glob(repex)
    previousreps.sort(reverse=True)
    for f in previousreps:
        previousparts = op.splitext(f)
        newname = previousparts[0] + '_old' + previousparts[1]
        os.rename(f, newname)
    refname = op.join(out_dir, '_references.txt')

    # create logfile name
    basename = 'tedana_'
    extension = 'tsv'
    start_time = datetime.datetime.now().strftime('%Y-%m-%dT%H%M%S')
    logname = op.join(out_dir, (basename + start_time + '.' + extension))

    # set logging format
    log_formatter = logging.Formatter(
        '%(asctime)s\t%(name)-12s\t%(levelname)-8s\t%(message)s',
        datefmt='%Y-%m-%dT%H:%M:%S')
    text_formatter = logging.Formatter('%(message)s')

    # set up logging file and open it for writing
    log_handler = logging.FileHandler(logname)
    log_handler.setFormatter(log_formatter)
    # Removing handlers after basicConfig doesn't work, so we use filters
    # for the relevant handlers themselves.
    log_handler.addFilter(ContextFilter())
    sh = logging.StreamHandler()
    sh.addFilter(ContextFilter())

    if quiet:
        logging.basicConfig(level=logging.WARNING, handlers=[log_handler, sh])
    elif debug:
        logging.basicConfig(level=logging.DEBUG, handlers=[log_handler, sh])
    else:
        logging.basicConfig(level=logging.INFO, handlers=[log_handler, sh])

    # Loggers for report and references
    rep_handler = logging.FileHandler(repname)
    rep_handler.setFormatter(text_formatter)
    ref_handler = logging.FileHandler(refname)
    ref_handler.setFormatter(text_formatter)
    RepLGR.setLevel(logging.INFO)
    RepLGR.addHandler(rep_handler)
    RepLGR.setLevel(logging.INFO)
    RefLGR.addHandler(ref_handler)

    LGR.info('Using output directory: {}'.format(out_dir))

    # ensure tes are in appropriate format
    tes = [float(te) for te in tes]
    n_echos = len(tes)

    # Coerce gscontrol to list
    if not isinstance(gscontrol, list):
        gscontrol = [gscontrol]

    LGR.info('Loading input data: {}'.format([f for f in data]))
    catd, ref_img = io.load_data(data, n_echos=n_echos)
    n_samp, n_echos, n_vols = catd.shape
    LGR.debug('Resulting data shape: {}'.format(catd.shape))

    if no_png and (png_cmap != 'coolwarm'):
        LGR.warning('Overriding --no-png since --png-cmap provided.')
        no_png = False

    # check if TR is 0
    img_t_r = ref_img.header.get_zooms()[-1]
    if img_t_r == 0 and not no_png:
        raise IOError(
            'Dataset has a TR of 0. This indicates incorrect'
            ' header information. To correct this, we recommend'
            ' using this snippet:'
            '\n'
            'https://gist.github.com/jbteves/032c87aeb080dd8de8861cb151bff5d6'
            '\n'
            'to correct your TR to the value it should be.')

    if mixm is not None and op.isfile(mixm):
        mixm = op.abspath(mixm)
        # Allow users to re-run on same folder
        if mixm != op.join(out_dir, 'ica_mixing.tsv'):
            shutil.copyfile(mixm, op.join(out_dir, 'ica_mixing.tsv'))
            shutil.copyfile(mixm, op.join(out_dir, op.basename(mixm)))
    elif mixm is not None:
        raise IOError('Argument "mixm" must be an existing file.')

    if ctab is not None and op.isfile(ctab):
        ctab = op.abspath(ctab)
        # Allow users to re-run on same folder
        if ctab != op.join(out_dir, 'ica_decomposition.json'):
            shutil.copyfile(ctab, op.join(out_dir, 'ica_decomposition.json'))
            shutil.copyfile(ctab, op.join(out_dir, op.basename(ctab)))
    elif ctab is not None:
        raise IOError('Argument "ctab" must be an existing file.')

    if isinstance(manacc, str):
        manacc = [int(comp) for comp in manacc.split(',')]

    if ctab and not mixm:
        LGR.warning('Argument "ctab" requires argument "mixm".')
        ctab = None
    elif manacc is not None and not mixm:
        LGR.warning('Argument "manacc" requires argument "mixm".')
        manacc = None

    if t2smap is not None and op.isfile(t2smap):
        t2smap = op.abspath(t2smap)
        # Allow users to re-run on same folder
        if t2smap != op.join(out_dir, 't2sv.nii.gz'):
            shutil.copyfile(t2smap, op.join(out_dir, 't2sv.nii.gz'))
            shutil.copyfile(t2smap, op.join(out_dir, op.basename(t2smap)))
    elif t2smap is not None:
        raise IOError('Argument "t2smap" must be an existing file.')

    RepLGR.info("TE-dependence analysis was performed on input data.")
    if mask and not t2smap:
        # TODO: add affine check
        LGR.info('Using user-defined mask')
        RepLGR.info("A user-defined mask was applied to the data.")
    elif t2smap and not mask:
        LGR.info('Using user-defined T2* map to generate mask')
        t2s_limited = utils.load_image(t2smap)
        t2s_full = t2s_limited.copy()
        mask = (t2s_limited != 0).astype(int)
    elif t2smap and mask:
        LGR.info('Combining user-defined mask and T2* map to generate mask')
        t2s_limited = utils.load_image(t2smap)
        t2s_full = t2s_limited.copy()
        mask = utils.load_image(mask)
        mask[t2s_limited == 0] = 0  # reduce mask based on T2* map
    else:
        LGR.info('Computing EPI mask from first echo')
        first_echo_img = io.new_nii_like(ref_img, catd[:, 0, :])
        mask = compute_epi_mask(first_echo_img)
        RepLGR.info("An initial mask was generated from the first echo using "
                    "nilearn's compute_epi_mask function.")

    mask, masksum = utils.make_adaptive_mask(catd, mask=mask, getsum=True)
    LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp))
    io.filewrite(masksum, op.join(out_dir, 'adaptive_mask.nii'), ref_img)

    if t2smap is None:
        LGR.info('Computing T2* map')
        t2s_limited, s0_limited, t2s_full, s0_full = decay.fit_decay(
            catd, tes, mask, masksum, fittype)

        # set a hard cap for the T2* map
        # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile
        cap_t2s = stats.scoreatpercentile(t2s_limited.flatten(),
                                          99.5,
                                          interpolation_method='lower')
        LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10))
        t2s_limited[t2s_limited > cap_t2s * 10] = cap_t2s
        io.filewrite(t2s_limited, op.join(out_dir, 't2sv.nii'), ref_img)
        io.filewrite(s0_limited, op.join(out_dir, 's0v.nii'), ref_img)

        if verbose:
            io.filewrite(t2s_full, op.join(out_dir, 't2svG.nii'), ref_img)
            io.filewrite(s0_full, op.join(out_dir, 's0vG.nii'), ref_img)

    # optimally combine data
    data_oc = combine.make_optcom(catd,
                                  tes,
                                  mask,
                                  t2s=t2s_full,
                                  combmode=combmode)

    # regress out global signal unless explicitly not desired
    if 'gsr' in gscontrol:
        catd, data_oc = gsc.gscontrol_raw(catd,
                                          data_oc,
                                          n_echos,
                                          ref_img,
                                          out_dir=out_dir)

    if mixm is None:
        # Identify and remove thermal noise from data
        dd, n_components = decomposition.tedpca(catd,
                                                data_oc,
                                                combmode,
                                                mask,
                                                t2s_limited,
                                                t2s_full,
                                                ref_img,
                                                tes=tes,
                                                algorithm=tedpca,
                                                kdaw=10.,
                                                rdaw=1.,
                                                out_dir=out_dir,
                                                verbose=verbose,
                                                low_mem=low_mem)
        mmix_orig = decomposition.tedica(dd, n_components, fixed_seed, maxit,
                                         maxrestart)

        if verbose:
            io.filewrite(utils.unmask(dd, mask),
                         op.join(out_dir, 'ts_OC_whitened.nii.gz'), ref_img)

        LGR.info('Making second component selection guess from ICA results')
        # Estimate betas and compute selection metrics for mixing matrix
        # generated from dimensionally reduced data using full data (i.e., data
        # with thermal noise)
        comptable, metric_maps, betas, mmix = metrics.dependence_metrics(
            catd,
            data_oc,
            mmix_orig,
            t2s_limited,
            tes,
            ref_img,
            reindex=True,
            label='meica_',
            out_dir=out_dir,
            algorithm='kundu_v2',
            verbose=verbose)
        comp_names = [
            io.add_decomp_prefix(comp,
                                 prefix='ica',
                                 max_value=comptable.index.max())
            for comp in comptable.index.values
        ]
        mixing_df = pd.DataFrame(data=mmix, columns=comp_names)
        mixing_df.to_csv(op.join(out_dir, 'ica_mixing.tsv'),
                         sep='\t',
                         index=False)
        betas_oc = utils.unmask(computefeats2(data_oc, mmix, mask), mask)
        io.filewrite(betas_oc, op.join(out_dir, 'ica_components.nii.gz'),
                     ref_img)

        comptable = metrics.kundu_metrics(comptable, metric_maps)
        comptable = selection.kundu_selection_v2(comptable, n_echos, n_vols)
    else:
        LGR.info('Using supplied mixing matrix from ICA')
        mmix_orig = pd.read_table(op.join(out_dir, 'ica_mixing.tsv')).values

        if ctab is None:
            comptable, metric_maps, betas, mmix = metrics.dependence_metrics(
                catd,
                data_oc,
                mmix_orig,
                t2s_limited,
                tes,
                ref_img,
                label='meica_',
                out_dir=out_dir,
                algorithm='kundu_v2',
                verbose=verbose)
            comptable = metrics.kundu_metrics(comptable, metric_maps)
            comptable = selection.kundu_selection_v2(comptable, n_echos,
                                                     n_vols)
        else:
            mmix = mmix_orig.copy()
            comptable = io.load_comptable(ctab)
            if manacc is not None:
                comptable = selection.manual_selection(comptable, acc=manacc)
        betas_oc = utils.unmask(computefeats2(data_oc, mmix, mask), mask)
        io.filewrite(betas_oc, op.join(out_dir, 'ica_components.nii.gz'),
                     ref_img)

    # Save decomposition
    comptable[
        'Description'] = 'ICA fit to dimensionally-reduced optimally combined data.'
    mmix_dict = {}
    mmix_dict['Method'] = ('Independent components analysis with FastICA '
                           'algorithm implemented by sklearn. Components '
                           'are sorted by Kappa in descending order. '
                           'Component signs are flipped to best match the '
                           'data.')
    io.save_comptable(comptable,
                      op.join(out_dir, 'ica_decomposition.json'),
                      label='ica',
                      metadata=mmix_dict)

    if comptable[comptable.classification == 'accepted'].shape[0] == 0:
        LGR.warning('No BOLD components detected! Please check data and '
                    'results!')

    mmix_orig = mmix.copy()
    if tedort:
        acc_idx = comptable.loc[~comptable.classification.str.
                                contains('rejected')].index.values
        rej_idx = comptable.loc[comptable.classification.str.contains(
            'rejected')].index.values
        acc_ts = mmix[:, acc_idx]
        rej_ts = mmix[:, rej_idx]
        betas = np.linalg.lstsq(acc_ts, rej_ts, rcond=None)[0]
        pred_rej_ts = np.dot(acc_ts, betas)
        resid = rej_ts - pred_rej_ts
        mmix[:, rej_idx] = resid
        comp_names = [
            io.add_decomp_prefix(comp,
                                 prefix='ica',
                                 max_value=comptable.index.max())
            for comp in comptable.index.values
        ]
        mixing_df = pd.DataFrame(data=mmix, columns=comp_names)
        mixing_df.to_csv(op.join(out_dir, 'ica_orth_mixing.tsv'),
                         sep='\t',
                         index=False)
        RepLGR.info("Rejected components' time series were then "
                    "orthogonalized with respect to accepted components' time "
                    "series.")

    io.writeresults(data_oc,
                    mask=mask,
                    comptable=comptable,
                    mmix=mmix,
                    n_vols=n_vols,
                    ref_img=ref_img,
                    out_dir=out_dir)

    if 't1c' in gscontrol:
        gsc.gscontrol_mmix(data_oc,
                           mmix,
                           mask,
                           comptable,
                           ref_img,
                           out_dir=out_dir)

    if verbose:
        io.writeresults_echoes(catd,
                               mmix,
                               mask,
                               comptable,
                               ref_img,
                               out_dir=out_dir)

    if not no_png:
        LGR.info('Making figures folder with static component maps and '
                 'timecourse plots.')
        # make figure folder first
        if not op.isdir(op.join(out_dir, 'figures')):
            os.mkdir(op.join(out_dir, 'figures'))

        viz.write_comp_figs(data_oc,
                            mask=mask,
                            comptable=comptable,
                            mmix=mmix_orig,
                            ref_img=ref_img,
                            out_dir=op.join(out_dir, 'figures'),
                            png_cmap=png_cmap)

        LGR.info('Making Kappa vs Rho scatter plot')
        viz.write_kappa_scatter(comptable=comptable,
                                out_dir=op.join(out_dir, 'figures'))

        LGR.info('Making Kappa/Rho scree plot')
        viz.write_kappa_scree(comptable=comptable,
                              out_dir=op.join(out_dir, 'figures'))

        LGR.info('Making overall summary figure')
        viz.write_summary_fig(comptable=comptable,
                              out_dir=op.join(out_dir, 'figures'))

    LGR.info('Workflow completed')

    RepLGR.info("This workflow used numpy (Van Der Walt, Colbert, & "
                "Varoquaux, 2011), scipy (Jones et al., 2001), pandas "
                "(McKinney, 2010), scikit-learn (Pedregosa et al., 2011), "
                "nilearn, and nibabel (Brett et al., 2019).")
    RefLGR.info(
        "Van Der Walt, S., Colbert, S. C., & Varoquaux, G. (2011). The "
        "NumPy array: a structure for efficient numerical computation. "
        "Computing in Science & Engineering, 13(2), 22.")
    RefLGR.info("Jones E, Oliphant E, Peterson P, et al. SciPy: Open Source "
                "Scientific Tools for Python, 2001-, http://www.scipy.org/")
    RefLGR.info("McKinney, W. (2010, June). Data structures for statistical "
                "computing in python. In Proceedings of the 9th Python in "
                "Science Conference (Vol. 445, pp. 51-56).")
    RefLGR.info("Pedregosa, F., Varoquaux, G., Gramfort, A., Michel, V., "
                "Thirion, B., Grisel, O., ... & Vanderplas, J. (2011). "
                "Scikit-learn: Machine learning in Python. Journal of machine "
                "learning research, 12(Oct), 2825-2830.")
    RefLGR.info("Brett, M., Markiewicz, C. J., Hanke, M., Côté, M.-A., "
                "Cipollini, B., McCarthy, P., … freec84. (2019, May 28). "
                "nipy/nibabel. Zenodo. http://doi.org/10.5281/zenodo.3233118")

    RepLGR.info("This workflow also used the Dice similarity index "
                "(Dice, 1945; Sørensen, 1948).")
    RefLGR.info("Dice, L. R. (1945). Measures of the amount of ecologic "
                "association between species. Ecology, 26(3), 297-302.")
    RefLGR.info(
        "Sørensen, T. J. (1948). A method of establishing groups of "
        "equal amplitude in plant sociology based on similarity of "
        "species content and its application to analyses of the "
        "vegetation on Danish commons. I kommission hos E. Munksgaard.")

    with open(repname, 'r') as fo:
        report = [line.rstrip() for line in fo.readlines()]
        report = ' '.join(report)
    with open(refname, 'r') as fo:
        reference_list = sorted(list(set(fo.readlines())))
        references = '\n'.join(reference_list)
    report += '\n\nReferences\n' + references
    with open(repname, 'w') as fo:
        fo.write(report)
    os.remove(refname)

    for handler in logging.root.handlers[:]:
        logging.root.removeHandler(handler)
Example #12
0
def tedpca(data_cat, data_oc, combmode, mask, t2s, t2sG,
           ref_img, tes, algorithm='mdl', source_tes=-1, kdaw=10., rdaw=1.,
           out_dir='.', verbose=False, low_mem=False):
    """
    Use principal components analysis (PCA) to identify and remove thermal
    noise from multi-echo data.

    Parameters
    ----------
    data_cat : (S x E x T) array_like
        Input functional data
    data_oc : (S x T) array_like
        Optimally combined time series data
    combmode : {'t2s', 'paid'} str
        How optimal combination of echos should be made, where 't2s' indicates
        using the method of Posse 1999 and 'paid' indicates using the method of
        Poser 2006
    mask : (S,) array_like
        Boolean mask array
    t2s : (S,) array_like
        Map of voxel-wise T2* estimates.
    t2sG : (S,) array_like
        Map of voxel-wise T2* estimates.
    ref_img : :obj:`str` or img_like
        Reference image to dictate how outputs are saved to disk
    tes : :obj:`list`
        List of echo times associated with `data_cat`, in milliseconds
    algorithm : {'mle', 'kundu', 'kundu-stabilize', 'mdl', 'aic', 'kic'}, optional
        Method with which to select components in TEDPCA. Default is 'mdl'. PCA
        decomposition with the mdl, kic and aic options are based on a Moving Average
        (stationary Gaussian) process and are ordered from most to least aggresive.
        See (Li et al., 2007).
    source_tes : :obj:`int` or :obj:`list` of :obj:`int`, optional
        Which echos to use in PCA. Values -1 and 0 are special, where a value
        of -1 will indicate using the optimal combination of the echos
        and 0  will indicate using all the echos. A list can be provided
        to indicate a subset of echos.
        Default: -1
    kdaw : :obj:`float`, optional
        Dimensionality augmentation weight for Kappa calculations. Must be a
        non-negative float, or -1 (a special value). Default is 10.
    rdaw : :obj:`float`, optional
        Dimensionality augmentation weight for Rho calculations. Must be a
        non-negative float, or -1 (a special value). Default is 1.
    out_dir : :obj:`str`, optional
        Output directory.
    verbose : :obj:`bool`, optional
        Whether to output files from fitmodels_direct or not. Default: False
    low_mem : :obj:`bool`, optional
        Whether to use incremental PCA (for low-memory systems) or not.
        Default: False

    Returns
    -------
    kept_data : (S x T) :obj:`numpy.ndarray`
        Dimensionally reduced optimally combined functional data
    n_components : :obj:`int`
        Number of components retained from PCA decomposition

    Notes
    -----
    ======================    =================================================
    Notation                  Meaning
    ======================    =================================================
    :math:`\\kappa`            Component pseudo-F statistic for TE-dependent
                              (BOLD) model.
    :math:`\\rho`              Component pseudo-F statistic for TE-independent
                              (artifact) model.
    :math:`v`                 Voxel
    :math:`V`                 Total number of voxels in mask
    :math:`\\zeta`             Something
    :math:`c`                 Component
    :math:`p`                 Something else
    ======================    =================================================

    Steps:

    1.  Variance normalize either multi-echo or optimally combined data,
        depending on settings.
    2.  Decompose normalized data using PCA or SVD.
    3.  Compute :math:`{\\kappa}` and :math:`{\\rho}`:

            .. math::
                {\\kappa}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,R_2^*}}{\\sum {\\zeta}_{c,v}^p}

                {\\rho}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,S_0}}{\\sum {\\zeta}_{c,v}^p}

    4.  Some other stuff. Something about elbows.
    5.  Classify components as thermal noise if they meet both of the
        following criteria:

            - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`.
            - Nonsignificant variance explained.

    Outputs:

    This function writes out several files:

    ======================    =================================================
    Filename                  Content
    ======================    =================================================
    pca_decomposition.json    PCA component table.
    pca_mixing.tsv            PCA mixing matrix.
    pca_components.nii.gz     Component weight maps.
    ======================    =================================================
    """
    if low_mem and algorithm == 'mle':
        LGR.warning('Low memory option is not compatible with MLE '
                    'dimensionality estimation. Switching to Kundu decision '
                    'tree.')
        algorithm = 'kundu'

    if algorithm == 'mle':
        alg_str = "using MLE dimensionality estimation (Minka, 2001)"
        RefLGR.info("Minka, T. P. (2001). Automatic choice of dimensionality "
                    "for PCA. In Advances in neural information processing "
                    "systems (pp. 598-604).")
    elif algorithm == 'kundu':
        alg_str = ("followed by the Kundu component selection decision "
                   "tree (Kundu et al., 2013)")
        RefLGR.info("Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., "
                    "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. "
                    "(2013). Integrated strategy for improving functional "
                    "connectivity mapping using multiecho fMRI. Proceedings "
                    "of the National Academy of Sciences, 110(40), "
                    "16187-16192.")
    elif algorithm == 'kundu-stabilize':
        alg_str = ("followed by the 'stabilized' Kundu component "
                   "selection decision tree (Kundu et al., 2013)")
        RefLGR.info("Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., "
                    "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. "
                    "(2013). Integrated strategy for improving functional "
                    "connectivity mapping using multiecho fMRI. Proceedings "
                    "of the National Academy of Sciences, 110(40), "
                    "16187-16192.")
    else:
        alg_str = ("based on the PCA component estimation with a Moving Average"
                   "(stationary Gaussian) process (Li et al., 2007)")
        RefLGR.info("Li, Y.O., Adalı, T. and Calhoun, V.D., (2007). "
                    "Estimating the number of independent components for "
                    "functional magnetic resonance imaging data. "
                    "Human brain mapping, 28(11), pp.1251-1266.")

    if source_tes == -1:
        dat_str = "the optimally combined data"
    elif source_tes == 0:
        dat_str = "the z-concatenated multi-echo data"
    else:
        dat_str = "a z-concatenated subset of echoes from the input data"

    RepLGR.info("Principal component analysis {0} was applied to "
                "{1} for dimensionality reduction.".format(alg_str, dat_str))

    n_samp, n_echos, n_vols = data_cat.shape
    source_tes = np.array([int(ee) for ee in str(source_tes).split(',')])

    if len(source_tes) == 1 and source_tes[0] == -1:
        LGR.info('Computing PCA of optimally combined multi-echo data')
        data = data_oc[mask, :][:, np.newaxis, :]
    elif len(source_tes) == 1 and source_tes[0] == 0:
        LGR.info('Computing PCA of spatially concatenated multi-echo data')
        data = data_cat[mask, ...]
    else:
        LGR.info('Computing PCA of echo #{0}'.format(','.join([str(ee) for ee in source_tes])))
        data = np.stack([data_cat[mask, ee, :] for ee in source_tes - 1], axis=1)

    eim = np.squeeze(_utils.eimask(data))
    data = np.squeeze(data[eim])

    data_z = ((data.T - data.T.mean(axis=0)) / data.T.std(axis=0)).T  # var normalize ts
    data_z = (data_z - data_z.mean()) / data_z.std()  # var normalize everything

    if algorithm in ['mdl', 'aic', 'kic']:
        data_img = io.new_nii_like(
            ref_img, utils.unmask(utils.unmask(data, eim), mask))
        mask_img = io.new_nii_like(ref_img,
                                   utils.unmask(eim, mask).astype(int))
        voxel_comp_weights, varex, varex_norm, comp_ts = ma_pca.ma_pca(
            data_img, mask_img, algorithm)
    elif algorithm == 'mle':
        voxel_comp_weights, varex, varex_norm, comp_ts = run_mlepca(data_z)
    elif low_mem:
        voxel_comp_weights, varex, comp_ts = low_mem_pca(data_z)
        varex_norm = varex / varex.sum()
    else:
        ppca = PCA(copy=False, n_components=(n_vols - 1))
        ppca.fit(data_z)
        comp_ts = ppca.components_.T
        varex = ppca.explained_variance_
        voxel_comp_weights = np.dot(np.dot(data_z, comp_ts),
                                    np.diag(1. / varex))
        varex_norm = varex / varex.sum()

    # Compute Kappa and Rho for PCA comps
    eimum = np.atleast_2d(eim)
    eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1])
    eimum = eimum.prod(axis=1)
    o = np.zeros((mask.shape[0], *eimum.shape[1:]))
    o[mask, ...] = eimum
    eimum = np.squeeze(o).astype(bool)

    # Normalize each component's time series
    vTmixN = stats.zscore(comp_ts, axis=0)
    comptable, _, _, _ = metrics.dependence_metrics(data_cat,
                                                    data_oc,
                                                    comp_ts,
                                                    t2s,
                                                    tes,
                                                    ref_img,
                                                    reindex=False,
                                                    mmixN=vTmixN,
                                                    algorithm=None,
                                                    label='mepca_',
                                                    out_dir=out_dir,
                                                    verbose=verbose)

    # varex_norm from PCA overrides varex_norm from dependence_metrics,
    # but we retain the original
    comptable['estimated normalized variance explained'] = \
        comptable['normalized variance explained']
    comptable['normalized variance explained'] = varex_norm

    # write component maps to 4D image
    comp_ts_z = stats.zscore(comp_ts, axis=0)
    comp_maps = utils.unmask(computefeats2(data_oc, comp_ts_z, mask), mask)
    io.filewrite(comp_maps, op.join(out_dir, 'pca_components.nii.gz'), ref_img)

    # Select components using decision tree
    if algorithm == 'kundu':
        comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=False)
    elif algorithm == 'kundu-stabilize':
        comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=True)
    elif algorithm == 'mle':
        LGR.info('Selected {0} components with MLE dimensionality '
                 'detection'.format(comptable.shape[0]))
        comptable['classification'] = 'accepted'
        comptable['rationale'] = ''

    elif algorithm in ['mdl', 'aic', 'kic']:
        LGR.info('Selected {0} components with {1} dimensionality '
                 'detection'.format(comptable.shape[0], algorithm))
        comptable['classification'] = 'accepted'
        comptable['rationale'] = ''

    # Save decomposition
    comp_names = [io.add_decomp_prefix(comp, prefix='pca', max_value=comptable.index.max())
                  for comp in comptable.index.values]

    mixing_df = pd.DataFrame(data=comp_ts, columns=comp_names)
    mixing_df.to_csv(op.join(out_dir, 'pca_mixing.tsv'), sep='\t', index=False)

    data_type = 'optimally combined data' if source_tes == -1 else 'z-concatenated data'
    comptable['Description'] = 'PCA fit to {0}.'.format(data_type)
    mmix_dict = {}
    mmix_dict['Method'] = ('Principal components analysis implemented by '
                           'sklearn. Components are sorted by variance '
                           'explained in descending order. '
                           'Component signs are flipped to best match the '
                           'data.')
    io.save_comptable(comptable, op.join(out_dir, 'pca_decomposition.json'),
                      label='pca', metadata=mmix_dict)

    acc = comptable[comptable.classification == 'accepted'].index.values
    n_components = acc.size
    voxel_kept_comp_weighted = (voxel_comp_weights[:, acc] * varex[None, acc])
    kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[:, acc].T)

    kept_data = stats.zscore(kept_data, axis=1)  # variance normalize time series
    kept_data = stats.zscore(kept_data, axis=None)  # variance normalize everything

    return kept_data, n_components
Example #13
0
def tedpca(catd,
           OCcatd,
           combmode,
           mask,
           t2s,
           t2sG,
           ref_img,
           tes,
           method='mle',
           ste=-1,
           kdaw=10.,
           rdaw=1.,
           wvpca=False,
           verbose=False):
    """
    Use principal components analysis (PCA) to identify and remove thermal
    noise from multi-echo data.

    Parameters
    ----------
    catd : (S x E x T) array_like
        Input functional data
    OCcatd : (S x T) array_like
        Optimally combined time series data
    combmode : {'t2s', 'ste'} str
        How optimal combination of echos should be made, where 't2s' indicates
        using the method of Posse 1999 and 'ste' indicates using the method of
        Poser 2006
    mask : (S,) array_like
        Boolean mask array
    ref_img : :obj:`str` or img_like
        Reference image to dictate how outputs are saved to disk
    tes : :obj:`list`
        List of echo times associated with `catd`, in milliseconds
    kdaw : :obj:`float`
        Dimensionality augmentation weight for Kappa calculations
    rdaw : :obj:`float`
        Dimensionality augmentation weight for Rho calculations
    method : {'mle', 'kundu', 'kundu-stabilize'}, optional
        Method with which to select components in TEDPCA. Default is 'mle'.
    ste : :obj:`int` or :obj:`list` of :obj:`int`, optional
        Which echos to use in PCA. Values -1 and 0 are special, where a value
        of -1 will indicate using the optimal combination of the echos
        and 0  will indicate using all the echos. A list can be provided
        to indicate a subset of echos.
        Default: -1
    wvpca : :obj:`bool`, optional
        Whether to apply wavelet denoising to data. Default: False
    verbose : :obj:`bool`, optional
        Whether to output files from fitmodels_direct or not. Default: False

    Returns
    -------
    n_components : :obj:`int`
        Number of components retained from PCA decomposition
    dd : (S x T) :obj:`numpy.ndarray`
        Dimensionally reduced optimally combined functional data

    Notes
    -----
    ======================    =================================================
    Notation                  Meaning
    ======================    =================================================
    :math:`\\kappa`            Component pseudo-F statistic for TE-dependent
                              (BOLD) model.
    :math:`\\rho`              Component pseudo-F statistic for TE-independent
                              (artifact) model.
    :math:`v`                 Voxel
    :math:`V`                 Total number of voxels in mask
    :math:`\\zeta`             Something
    :math:`c`                 Component
    :math:`p`                 Something else
    ======================    =================================================

    Steps:

    1.  Variance normalize either multi-echo or optimally combined data,
        depending on settings.
    2.  Decompose normalized data using PCA or SVD.
    3.  Compute :math:`{\\kappa}` and :math:`{\\rho}`:

            .. math::
                {\\kappa}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,R_2^*}}{\sum {\\zeta}_{c,v}^p}

                {\\rho}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,S_0}}{\sum {\\zeta}_{c,v}^p}

    4.  Some other stuff. Something about elbows.
    5.  Classify components as thermal noise if they meet both of the
        following criteria:

            - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`.
            - Nonsignificant variance explained.

    Outputs:

    This function writes out several files:

    ======================    =================================================
    Filename                  Content
    ======================    =================================================
    pcastate.pkl              Values from PCA results.
    comp_table_pca.txt        PCA component table.
    mepca_mix.1D              PCA mixing matrix.
    ======================    =================================================
    """

    n_samp, n_echos, n_vols = catd.shape
    ste = np.array([int(ee) for ee in str(ste).split(',')])

    if len(ste) == 1 and ste[0] == -1:
        LGR.info('Computing PCA of optimally combined multi-echo data')
        d = OCcatd[mask, :][:, np.newaxis, :]
    elif len(ste) == 1 and ste[0] == 0:
        LGR.info('Computing PCA of spatially concatenated multi-echo data')
        d = catd[mask, ...]
    else:
        LGR.info('Computing PCA of echo #%s' %
                 ','.join([str(ee) for ee in ste]))
        d = np.stack([catd[mask, ee, :] for ee in ste - 1], axis=1)

    eim = np.squeeze(eimask(d))
    d = np.squeeze(d[eim])

    dz = ((d.T - d.T.mean(axis=0)) / d.T.std(axis=0)).T  # var normalize ts
    dz = (dz - dz.mean()) / dz.std()  # var normalize everything

    if wvpca:
        dz, cAl = dwtmat(dz)

    fname = op.abspath('pcastate.pkl')
    if op.exists('pcastate.pkl'):
        LGR.info('Loading PCA from: pcastate.pkl')
        with open('pcastate.pkl', 'rb') as handle:
            pcastate = pickle.load(handle)

        if pcastate['method'] != method:
            LGR.warning('Method from PCA state file ({0}) does not match '
                        'requested method ({1}).'.format(
                            pcastate['method'], method))
            state_found = False
        else:
            state_found = True
    else:
        state_found = False

    if not state_found:
        if method == 'mle':
            voxel_comp_weights, varex, comp_ts = run_mlepca(dz)
        else:
            ppca = PCA()
            ppca.fit(dz)
            comp_ts = ppca.components_
            varex = ppca.explained_variance_
            voxel_comp_weights = np.dot(np.dot(dz, comp_ts.T),
                                        np.diag(1. / varex))

        # actual variance explained (normalized)
        varex_norm = varex / varex.sum()

        # Compute K and Rho for PCA comps
        eimum = np.atleast_2d(eim)
        eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1])
        eimum = eimum.prod(axis=1)
        o = np.zeros((mask.shape[0], *eimum.shape[1:]))
        o[mask, ...] = eimum
        eimum = np.squeeze(o).astype(bool)

        vTmix = comp_ts.T
        vTmixN = ((vTmix.T - vTmix.T.mean(0)) / vTmix.T.std(0)).T
        LGR.info('Making initial component selection guess from PCA results')
        _, ct_df, betasv, v_T = model.fitmodels_direct(catd,
                                                       comp_ts.T,
                                                       eimum,
                                                       t2s,
                                                       t2sG,
                                                       tes,
                                                       combmode,
                                                       ref_img,
                                                       mmixN=vTmixN,
                                                       full_sel=False,
                                                       label='mepca_',
                                                       verbose=verbose)
        # varex_norm overrides normalized varex computed by fitmodels_direct
        ct_df['normalized variance explained'] = varex_norm

        pcastate = {
            'method': method,
            'voxel_comp_weights': voxel_comp_weights,
            'varex': varex,
            'comp_ts': comp_ts,
            'comptable': ct_df
        }

        # Save state
        LGR.info('Saving PCA results to: {}'.format(fname))

        try:
            with open(fname, 'wb') as handle:
                pickle.dump(pcastate, handle)
        except TypeError:
            LGR.warning('Could not save PCA solution')
    else:  # if loading existing state
        voxel_comp_weights = pcastate['voxel_comp_weights']
        varex = pcastate['varex']
        comp_ts = pcastate['comp_ts']
        ct_df = pcastate['comptable']

    np.savetxt('mepca_mix.1D', comp_ts.T)

    # write component maps to 4D image
    comp_maps = np.zeros((OCcatd.shape[0], comp_ts.shape[0]))
    for i_comp in range(comp_ts.shape[0]):
        temp_comp_ts = comp_ts[i_comp, :][:, None]
        comp_map = utils.unmask(
            model.computefeats2(OCcatd, temp_comp_ts, mask), mask)
        comp_maps[:, i_comp] = np.squeeze(comp_map)
    io.filewrite(comp_maps, 'mepca_OC_components.nii', ref_img)

    # Add new columns to comptable for classification
    ct_df['classification'] = 'accepted'
    ct_df['rationale'] = ''

    # Select components using decision tree
    if method == 'kundu':
        ct_df = kundu_tedpca(ct_df, n_echos, kdaw, rdaw, stabilize=False)
    elif method == 'kundu-stabilize':
        ct_df = kundu_tedpca(ct_df, n_echos, kdaw, rdaw, stabilize=True)
    elif method == 'mle':
        LGR.info('Selected {0} components with MLE dimensionality '
                 'detection'.format(ct_df.shape[0]))

    ct_df.to_csv('comp_table_pca.txt',
                 sep='\t',
                 index=True,
                 index_label='component',
                 float_format='%.6f')

    sel_idx = ct_df['classification'] == 'accepted'
    n_components = np.sum(sel_idx)
    voxel_kept_comp_weighted = (voxel_comp_weights[:, sel_idx] *
                                varex[None, sel_idx])
    kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[sel_idx, :])

    if wvpca:
        kept_data = idwtmat(kept_data, cAl)

    kept_data = stats.zscore(kept_data,
                             axis=1)  # variance normalize time series
    kept_data = stats.zscore(kept_data,
                             axis=None)  # variance normalize everything

    return n_components, kept_data
Example #14
0
def fitmodels_direct(catd,
                     mmix,
                     mask,
                     t2s,
                     t2s_full,
                     tes,
                     combmode,
                     ref_img,
                     reindex=False,
                     mmixN=None,
                     full_sel=True,
                     label=None,
                     out_dir='.',
                     verbose=False):
    """
    Fit TE-dependence and -independence models to components.

    Parameters
    ----------
    catd : (S x E x T) array_like
        Input data, where `S` is samples, `E` is echos, and `T` is time
    mmix : (T x C) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `catd`
    mask : (S [x E]) array_like
        Boolean mask array
    t2s : (S [x T]) array_like
        Limited T2* map or timeseries.
    t2s_full : (S [x T]) array_like
        Full T2* map or timeseries. For voxels with good signal in only one
        echo, which are zeros in the limited T2* map, this map uses the T2*
        estimate using the first two echoes.
    tes : list
        List of echo times associated with `catd`, in milliseconds
    combmode : {'t2s', 'paid'} str
        How optimal combination of echos should be made, where 't2s' indicates
        using the method of Posse 1999 and 'paid' indicates using the method of
        Poser 2006
    ref_img : str or img_like
        Reference image to dictate how outputs are saved to disk
    reindex : bool, optional
        Default: False
    mmixN : array_like, optional
        Default: None
    full_sel : bool, optional
        Whether to perform selection of components based on Rho/Kappa scores.
        Default: True

    Returns
    -------
    seldict : dict
    comptable : (C x X) :obj:`pandas.DataFrame`
        Component metric table. One row for each component, with a column for
        each metric. The index is the component number.
    betas : :obj:`numpy.ndarray`
    mmix_new : :obj:`numpy.ndarray`
    """
    if not (catd.shape[0] == t2s.shape[0] == t2s_full.shape[0] ==
            mask.shape[0]):
        raise ValueError('First dimensions (number of samples) of catd ({0}), '
                         't2s ({1}), and mask ({2}) do not '
                         'match'.format(catd.shape[0], t2s.shape[0],
                                        mask.shape[0]))
    elif catd.shape[1] != len(tes):
        raise ValueError('Second dimension of catd ({0}) does not match '
                         'number of echoes provided (tes; '
                         '{1})'.format(catd.shape[1], len(tes)))
    elif catd.shape[2] != mmix.shape[0]:
        raise ValueError('Third dimension (number of volumes) of catd ({0}) '
                         'does not match first dimension of '
                         'mmix ({1})'.format(catd.shape[2], mmix.shape[0]))
    elif t2s.shape != t2s_full.shape:
        raise ValueError('Shape of t2s array {0} does not match shape of '
                         't2s_full array {1}'.format(t2s.shape,
                                                     t2s_full.shape))
    elif t2s.ndim == 2:
        if catd.shape[2] != t2s.shape[1]:
            raise ValueError('Third dimension (number of volumes) of catd '
                             '({0}) does not match second dimension of '
                             't2s ({1})'.format(catd.shape[2], t2s.shape[1]))

    mask = t2s != 0  # Override mask because problems

    # compute optimal combination of raw data
    tsoc = combine.make_optcom(catd,
                               tes,
                               mask,
                               t2s=t2s_full,
                               combmode=combmode,
                               verbose=False).astype(float)[mask]

    # demean optimal combination
    tsoc_dm = tsoc - tsoc.mean(axis=-1, keepdims=True)

    # compute un-normalized weight dataset (features)
    if mmixN is None:
        mmixN = mmix
    WTS = computefeats2(utils.unmask(tsoc, mask), mmixN, mask, normalize=False)

    # compute PSC dataset - shouldn't have to refit data
    tsoc_B = get_coeffs(tsoc_dm, mmix, mask=None)
    tsoc_Babs = np.abs(tsoc_B)
    PSC = tsoc_B / tsoc.mean(axis=-1, keepdims=True) * 100

    # compute skews to determine signs based on unnormalized weights,
    # correct mmix & WTS signs based on spatial distribution tails
    signs = stats.skew(WTS, axis=0)
    signs /= np.abs(signs)
    mmix = mmix.copy()
    mmix *= signs
    WTS *= signs
    PSC *= signs
    totvar = (tsoc_B**2).sum()
    totvar_norm = (WTS**2).sum()

    # compute Betas and means over TEs for TE-dependence analysis
    betas = get_coeffs(catd, mmix,
                       np.repeat(mask[:, np.newaxis], len(tes), axis=1))
    n_samp, n_echos, n_components = betas.shape
    n_voxels = mask.sum()
    n_data_voxels = (t2s != 0).sum()
    mu = catd.mean(axis=-1, dtype=float)
    tes = np.reshape(tes, (n_echos, 1))
    fmin, _, _ = utils.getfbounds(n_echos)

    # mask arrays
    mumask = mu[t2s != 0]
    t2smask = t2s[t2s != 0]
    betamask = betas[t2s != 0]

    # set up Xmats
    X1 = mumask.T  # Model 1
    X2 = np.tile(tes, (1, n_data_voxels)) * mumask.T / t2smask.T  # Model 2

    # tables for component selection
    kappas = np.zeros([n_components])
    rhos = np.zeros([n_components])
    varex = np.zeros([n_components])
    varex_norm = np.zeros([n_components])
    Z_maps = np.zeros([n_voxels, n_components])
    F_R2_maps = np.zeros([n_data_voxels, n_components])
    F_S0_maps = np.zeros([n_data_voxels, n_components])
    Z_clmaps = np.zeros([n_voxels, n_components])
    F_R2_clmaps = np.zeros([n_data_voxels, n_components])
    F_S0_clmaps = np.zeros([n_data_voxels, n_components])
    Br_R2_clmaps = np.zeros([n_voxels, n_components])
    Br_S0_clmaps = np.zeros([n_voxels, n_components])
    pred_R2_maps = np.zeros([n_data_voxels, n_echos, n_components])
    pred_S0_maps = np.zeros([n_data_voxels, n_echos, n_components])

    LGR.info('Fitting TE- and S0-dependent models to components')
    for i_comp in range(n_components):
        # size of B is (n_echoes, n_samples)
        B = np.atleast_3d(betamask)[:, :, i_comp].T
        alpha = (np.abs(B)**2).sum(axis=0)
        varex[i_comp] = (tsoc_B[:, i_comp]**2).sum() / totvar * 100.
        varex_norm[i_comp] = (utils.unmask(WTS, mask)[t2s != 0][:, i_comp]**2).sum() /\
            totvar_norm * 100.

        # S0 Model
        # (S,) model coefficient map
        coeffs_S0 = (B * X1).sum(axis=0) / (X1**2).sum(axis=0)
        pred_S0 = X1 * np.tile(coeffs_S0, (n_echos, 1))
        pred_S0_maps[:, :, i_comp] = pred_S0.T
        SSE_S0 = (B - pred_S0)**2
        SSE_S0 = SSE_S0.sum(axis=0)  # (S,) prediction error map
        F_S0 = (alpha - SSE_S0) * (n_echos - 1) / (SSE_S0)
        F_S0_maps[:, i_comp] = F_S0

        # R2 Model
        coeffs_R2 = (B * X2).sum(axis=0) / (X2**2).sum(axis=0)
        pred_R2 = X2 * np.tile(coeffs_R2, (n_echos, 1))
        pred_R2_maps[:, :, i_comp] = pred_R2.T
        SSE_R2 = (B - pred_R2)**2
        SSE_R2 = SSE_R2.sum(axis=0)
        F_R2 = (alpha - SSE_R2) * (n_echos - 1) / (SSE_R2)
        F_R2_maps[:, i_comp] = F_R2

        # compute weights as Z-values
        wtsZ = (WTS[:, i_comp] - WTS[:, i_comp].mean()) / WTS[:, i_comp].std()
        wtsZ[np.abs(wtsZ) > Z_MAX] = (
            Z_MAX * (np.abs(wtsZ) / wtsZ))[np.abs(wtsZ) > Z_MAX]
        Z_maps[:, i_comp] = wtsZ

        # compute Kappa and Rho
        F_S0[F_S0 > F_MAX] = F_MAX
        F_R2[F_R2 > F_MAX] = F_MAX
        norm_weights = np.abs(
            np.squeeze(utils.unmask(wtsZ, mask)[t2s != 0]**2.))
        kappas[i_comp] = np.average(F_R2, weights=norm_weights)
        rhos[i_comp] = np.average(F_S0, weights=norm_weights)

    # tabulate component values
    comptable = np.vstack([kappas, rhos, varex, varex_norm]).T
    if reindex:
        # re-index all components in Kappa order
        sort_idx = comptable[:, 0].argsort()[::-1]
        comptable = comptable[sort_idx, :]
        mmix_new = mmix[:, sort_idx]
        betas = betas[..., sort_idx]
        pred_R2_maps = pred_R2_maps[:, :, sort_idx]
        pred_S0_maps = pred_S0_maps[:, :, sort_idx]
        F_S0_maps = F_S0_maps[:, sort_idx]
        F_R2_maps = F_R2_maps[:, sort_idx]
        Z_maps = Z_maps[:, sort_idx]
        WTS = WTS[:, sort_idx]
        PSC = PSC[:, sort_idx]
        tsoc_B = tsoc_B[:, sort_idx]
        tsoc_Babs = tsoc_Babs[:, sort_idx]
    else:
        mmix_new = mmix

    if verbose:
        # Echo-specific weight maps for each of the ICA components.
        io.filewrite(betas, op.join(out_dir,
                                    '{0}betas_catd.nii'.format(label)),
                     ref_img)
        # Echo-specific maps of predicted values for R2 and S0 models for each
        # component.
        io.filewrite(utils.unmask(pred_R2_maps, mask),
                     op.join(out_dir, '{0}R2_pred.nii'.format(label)), ref_img)
        io.filewrite(utils.unmask(pred_S0_maps, mask),
                     op.join(out_dir, '{0}S0_pred.nii'.format(label)), ref_img)
        # Weight maps used to average metrics across voxels
        io.filewrite(utils.unmask(Z_maps**2., mask),
                     op.join(out_dir, '{0}metric_weights.nii'.format(label)),
                     ref_img)

    comptable = pd.DataFrame(comptable,
                             columns=[
                                 'kappa', 'rho', 'variance explained',
                                 'normalized variance explained'
                             ])
    comptable.index.name = 'component'

    # full selection including clustering criteria
    seldict = None
    if full_sel:
        LGR.info('Performing spatial clustering of components')
        csize = np.max([int(n_voxels * 0.0005) + 5, 20])
        LGR.debug('Using minimum cluster size: {}'.format(csize))
        for i_comp in range(n_components):
            # Cluster-extent threshold and binarize F-maps
            ccimg = io.new_nii_like(
                ref_img,
                np.squeeze(utils.unmask(F_R2_maps[:, i_comp], t2s != 0)))
            F_R2_clmaps[:,
                        i_comp] = utils.threshold_map(ccimg,
                                                      min_cluster_size=csize,
                                                      threshold=fmin,
                                                      mask=mask,
                                                      binarize=True)
            countsigFR2 = F_R2_clmaps[:, i_comp].sum()

            ccimg = io.new_nii_like(
                ref_img,
                np.squeeze(utils.unmask(F_S0_maps[:, i_comp], t2s != 0)))
            F_S0_clmaps[:,
                        i_comp] = utils.threshold_map(ccimg,
                                                      min_cluster_size=csize,
                                                      threshold=fmin,
                                                      mask=mask,
                                                      binarize=True)
            countsigFS0 = F_S0_clmaps[:, i_comp].sum()

            # Cluster-extent threshold and binarize Z-maps with CDT of p < 0.05
            ccimg = io.new_nii_like(
                ref_img, np.squeeze(utils.unmask(Z_maps[:, i_comp], t2s != 0)))
            Z_clmaps[:, i_comp] = utils.threshold_map(ccimg,
                                                      min_cluster_size=csize,
                                                      threshold=1.95,
                                                      mask=mask,
                                                      binarize=True)

            # Cluster-extent threshold and binarize ranked signal-change map
            ccimg = io.new_nii_like(
                ref_img,
                utils.unmask(stats.rankdata(tsoc_Babs[:, i_comp]), t2s != 0))
            Br_R2_clmaps[:, i_comp] = utils.threshold_map(
                ccimg,
                min_cluster_size=csize,
                threshold=(max(tsoc_Babs.shape) - countsigFR2),
                mask=mask,
                binarize=True)
            Br_S0_clmaps[:, i_comp] = utils.threshold_map(
                ccimg,
                min_cluster_size=csize,
                threshold=(max(tsoc_Babs.shape) - countsigFS0),
                mask=mask,
                binarize=True)

        seldict = {}
        selvars = [
            'WTS', 'tsoc_B', 'PSC', 'Z_maps', 'F_R2_maps', 'F_S0_maps',
            'Z_clmaps', 'F_R2_clmaps', 'F_S0_clmaps', 'Br_R2_clmaps',
            'Br_S0_clmaps'
        ]
        for vv in selvars:
            seldict[vv] = eval(vv)

    return seldict, comptable, betas, mmix_new
Example #15
0
def tedana_workflow(data,
                    tes,
                    mask=None,
                    mixm=None,
                    ctab=None,
                    manacc=None,
                    tedort=False,
                    gscontrol=None,
                    tedpca='mle',
                    ste=-1,
                    combmode='t2s',
                    verbose=False,
                    stabilize=False,
                    wvpca=False,
                    out_dir='.',
                    fixed_seed=42,
                    debug=False,
                    quiet=False):
    """
    Run the "canonical" TE-Dependent ANAlysis workflow.

    Parameters
    ----------
    data : :obj:`str` or :obj:`list` of :obj:`str`
        Either a single z-concatenated file (single-entry list or str) or a
        list of echo-specific files, in ascending order.
    tes : :obj:`list`
        List of echo times associated with data in milliseconds.
    mask : :obj:`str`, optional
        Binary mask of voxels to include in TE Dependent ANAlysis. Must be
        spatially aligned with `data`.
    mixm : :obj:`str`, optional
        File containing mixing matrix. If not provided, ME-PCA and ME-ICA are
        done.
    ctab : :obj:`str`, optional
        File containing component table from which to extract pre-computed
        classifications.
    manacc : :obj:`str`, optional
        Comma separated list of manually accepted components in string form.
        Default is None.
    tedort : :obj:`bool`, optional
        Orthogonalize rejected components w.r.t. accepted ones prior to
        denoising. Default is False.
    gscontrol : {None, 't1c', 'gsr'} or :obj:`list`, optional
        Perform additional denoising to remove spatially diffuse noise. Default
        is None.
    tedpca : {'mle', 'kundu', 'kundu-stabilize'}, optional
        Method with which to select components in TEDPCA. Default is 'mle'.
    ste : :obj:`int`, optional
        Source TEs for models. 0 for all, -1 for optimal combination.
        Default is -1.
    combmode : {'t2s', 'ste'}, optional
        Combination scheme for TEs: 't2s' (Posse 1999, default), 'ste' (Poser).
    verbose : :obj:`bool`, optional
        Generate intermediate and additional files. Default is False.
    wvpca : :obj:`bool`, optional
        Whether or not to perform PCA on wavelet-transformed data.
        Default is False.
    out_dir : :obj:`str`, optional
        Output directory.

    Other Parameters
    ----------------
    fixed_seed : :obj:`int`, optional
        Value passed to ``mdp.numx_rand.seed()``.
        Set to a positive integer value for reproducible ICA results;
        otherwise, set to -1 for varying results across calls.
    debug : :obj:`bool`, optional
        Whether to run in debugging mode or not. Default is False.
    quiet : :obj:`bool`, optional
        If True, suppresses logging/printing of messages. Default is False.

    Notes
    -----
    This workflow writes out several files. For a complete list of the files
    generated by this workflow, please visit
    https://tedana.readthedocs.io/en/latest/outputs.html
    """
    out_dir = op.abspath(out_dir)
    if not op.isdir(out_dir):
        os.mkdir(out_dir)

    if debug and not quiet:
        formatter = logging.Formatter(
            '%(asctime)s\t%(name)-12s\t%(levelname)-8s\t%(message)s',
            datefmt='%Y-%m-%dT%H:%M:%S')
        fh = logging.FileHandler(
            op.join(
                out_dir,
                'runlog-{0}.tsv'.format(datetime.now().isoformat().replace(
                    ':', '.'))))
        fh.setFormatter(formatter)
        logging.basicConfig(level=logging.DEBUG,
                            handlers=[fh, logging.StreamHandler()])
    elif quiet:
        logging.basicConfig(level=logging.WARNING)
    else:
        logging.basicConfig(level=logging.INFO)

    LGR.info('Using output directory: {}'.format(out_dir))

    # ensure tes are in appropriate format
    tes = [float(te) for te in tes]
    n_echos = len(tes)

    # Coerce gscontrol to list
    if not isinstance(gscontrol, list):
        gscontrol = [gscontrol]

    # coerce data to samples x echos x time array
    if isinstance(data, str):
        data = [data]

    LGR.info('Loading input data: {}'.format([f for f in data]))
    catd, ref_img = io.load_data(data, n_echos=n_echos)
    n_samp, n_echos, n_vols = catd.shape
    LGR.debug('Resulting data shape: {}'.format(catd.shape))

    if mixm is not None and op.isfile(mixm):
        shutil.copyfile(mixm, op.join(out_dir, 'meica_mix.1D'))
        shutil.copyfile(mixm, op.join(out_dir, op.basename(mixm)))
    elif mixm is not None:
        raise IOError('Argument "mixm" must be an existing file.')

    if ctab is not None and op.isfile(ctab):
        shutil.copyfile(ctab, op.join(out_dir, 'comp_table_ica.txt'))
        shutil.copyfile(ctab, op.join(out_dir, op.basename(ctab)))
    elif ctab is not None:
        raise IOError('Argument "ctab" must be an existing file.')

    if mask is None:
        LGR.info('Computing adaptive mask')
    else:
        # TODO: add affine check
        LGR.info('Using user-defined mask')

    mask, masksum = utils.make_adaptive_mask(catd,
                                             mask=mask,
                                             minimum=False,
                                             getsum=True)
    LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp))
    if verbose:
        io.filewrite(masksum, op.join(out_dir, 'adaptive_mask.nii'), ref_img)

    os.chdir(out_dir)

    LGR.info('Computing T2* map')
    t2s, s0, t2ss, s0s, t2sG, s0G = decay.fit_decay(catd, tes, mask, masksum)

    # set a hard cap for the T2* map
    # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile
    cap_t2s = stats.scoreatpercentile(t2s.flatten(),
                                      99.5,
                                      interpolation_method='lower')
    LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10))
    t2s[t2s > cap_t2s * 10] = cap_t2s
    io.filewrite(t2s, op.join(out_dir, 't2sv.nii'), ref_img)
    io.filewrite(s0, op.join(out_dir, 's0v.nii'), ref_img)

    if verbose:
        io.filewrite(t2ss, op.join(out_dir, 't2ss.nii'), ref_img)
        io.filewrite(s0s, op.join(out_dir, 's0vs.nii'), ref_img)
        io.filewrite(t2sG, op.join(out_dir, 't2svG.nii'), ref_img)
        io.filewrite(s0G, op.join(out_dir, 's0vG.nii'), ref_img)

    # optimally combine data
    data_oc = combine.make_optcom(catd, tes, mask, t2s=t2sG, combmode=combmode)

    # regress out global signal unless explicitly not desired
    if 'gsr' in gscontrol:
        catd, data_oc = model.gscontrol_raw(catd, data_oc, n_echos, ref_img)

    if mixm is None:
        # Identify and remove thermal noise from data
        n_components, dd = decomposition.tedpca(catd,
                                                data_oc,
                                                combmode,
                                                mask,
                                                t2s,
                                                t2sG,
                                                ref_img,
                                                tes=tes,
                                                method=tedpca,
                                                ste=ste,
                                                kdaw=10.,
                                                rdaw=1.,
                                                wvpca=wvpca,
                                                verbose=verbose)
        mmix_orig, fixed_seed = decomposition.tedica(n_components, dd,
                                                     fixed_seed)

        if verbose:
            np.savetxt(op.join(out_dir, '__meica_mix.1D'), mmix_orig)
            if ste == -1:
                io.filewrite(utils.unmask(dd, mask),
                             op.join(out_dir, 'ts_OC_whitened.nii'), ref_img)

        LGR.info('Making second component selection guess from ICA results')
        # Estimate betas and compute selection metrics for mixing matrix
        # generated from dimensionally reduced data using full data (i.e., data
        # with thermal noise)
        seldict, comptable, betas, mmix = model.fitmodels_direct(
            catd,
            mmix_orig,
            mask,
            t2s,
            t2sG,
            tes,
            combmode,
            ref_img,
            reindex=True,
            label='meica_',
            out_dir=out_dir,
            verbose=verbose)
        np.savetxt(op.join(out_dir, 'meica_mix.1D'), mmix)

        comptable = selection.selcomps(seldict, comptable, mmix, manacc,
                                       n_echos)
    else:
        LGR.info('Using supplied mixing matrix from ICA')
        mmix_orig = np.loadtxt(op.join(out_dir, 'meica_mix.1D'))
        seldict, comptable, betas, mmix = model.fitmodels_direct(
            catd,
            mmix_orig,
            mask,
            t2s,
            t2sG,
            tes,
            combmode,
            ref_img,
            label='meica_',
            out_dir=out_dir,
            verbose=verbose)
        if ctab is None:
            comptable = selection.selcomps(seldict, comptable, mmix, manacc,
                                           n_echos)
        else:
            comptable = pd.read_csv(ctab, sep='\t', index_col='component')

    comptable.to_csv(op.join(out_dir, 'comp_table_ica.txt'),
                     sep='\t',
                     index=True,
                     index_label='component',
                     float_format='%.6f')
    if 'component' not in comptable.columns:
        comptable['component'] = comptable.index
    acc = comptable.loc[comptable['classification'] == 'accepted', 'component']
    rej = comptable.loc[comptable['classification'] == 'rejected', 'component']
    midk = comptable.loc[comptable['classification'] == 'midk', 'component']
    ign = comptable.loc[comptable['classification'] == 'ignored', 'component']
    if len(acc) == 0:
        LGR.warning('No BOLD components detected! Please check data and '
                    'results!')

    if tedort:
        acc_idx = comptable.loc[
            ~comptable['classification'].str.contains('rejected'), 'component']
        rej_idx = comptable.loc[
            comptable['classification'].str.contains('rejected'), 'component']
        acc_ts = mmix[:, acc_idx]
        rej_ts = mmix[:, rej_idx]
        betas = np.linalg.lstsq(acc_ts, rej_ts, rcond=None)[0]
        pred_rej_ts = np.dot(acc_ts, betas)
        resid = rej_ts - pred_rej_ts
        mmix[:, rej_idx] = resid
        np.savetxt(op.join(out_dir, 'meica_mix_orth.1D'), mmix)

    io.writeresults(data_oc,
                    mask=mask,
                    comptable=comptable,
                    mmix=mmix,
                    n_vols=n_vols,
                    fixed_seed=fixed_seed,
                    acc=acc,
                    rej=rej,
                    midk=midk,
                    empty=ign,
                    ref_img=ref_img)

    if 't1c' in gscontrol:
        LGR.info('Performing T1c global signal regression to remove spatially '
                 'diffuse noise')
        io.gscontrol_mmix(data_oc, mmix, mask, comptable, ref_img)

    if verbose:
        io.writeresults_echoes(catd, mmix, mask, acc, rej, midk, ref_img)

    LGR.info('Workflow completed')
    for handler in logging.root.handlers[:]:
        logging.root.removeHandler(handler)
Example #16
0
def tedana_workflow(data, tes, mask=None, mixm=None, ctab=None, manacc=None,
                    tedort=False, gscontrol=None, tedpca='mle',
                    source_tes=-1, combmode='t2s', verbose=False, stabilize=False,
                    out_dir='.', fixed_seed=42, maxit=500, maxrestart=10,
                    debug=False, quiet=False, png=False, png_cmap='coolwarm'):
    """
    Run the "canonical" TE-Dependent ANAlysis workflow.

    Parameters
    ----------
    data : :obj:`str` or :obj:`list` of :obj:`str`
        Either a single z-concatenated file (single-entry list or str) or a
        list of echo-specific files, in ascending order.
    tes : :obj:`list`
        List of echo times associated with data in milliseconds.
    mask : :obj:`str`, optional
        Binary mask of voxels to include in TE Dependent ANAlysis. Must be
        spatially aligned with `data`. If an explicit mask is not provided,
        then Nilearn's compute_epi_mask function will be used to derive a mask
        from the first echo's data.
    mixm : :obj:`str`, optional
        File containing mixing matrix. If not provided, ME-PCA and ME-ICA are
        done.
    ctab : :obj:`str`, optional
        File containing component table from which to extract pre-computed
        classifications.
    manacc : :obj:`list`, :obj:`str`, or None, optional
        List of manually accepted components. Can be a list of the components,
        a comma-separated string with component numbers, or None. Default is
        None.
    tedort : :obj:`bool`, optional
        Orthogonalize rejected components w.r.t. accepted ones prior to
        denoising. Default is False.
    gscontrol : {None, 't1c', 'gsr'} or :obj:`list`, optional
        Perform additional denoising to remove spatially diffuse noise. Default
        is None.
    tedpca : {'mle', 'kundu', 'kundu-stabilize'}, optional
        Method with which to select components in TEDPCA. Default is 'mle'.
    source_tes : :obj:`int`, optional
        Source TEs for models. 0 for all, -1 for optimal combination.
        Default is -1.
    combmode : {'t2s'}, optional
        Combination scheme for TEs: 't2s' (Posse 1999, default).
    verbose : :obj:`bool`, optional
        Generate intermediate and additional files. Default is False.
    png : obj:'bool', optional
        Generate simple plots and figures. Default is false.
    png_cmap : obj:'str', optional
            Name of a matplotlib colormap to be used when generating figures.
            --png must still be used to request figures. Default is 'coolwarm'
    out_dir : :obj:`str`, optional
        Output directory.

    Other Parameters
    ----------------
    fixed_seed : :obj:`int`, optional
        Value passed to ``mdp.numx_rand.seed()``.
        Set to a positive integer value for reproducible ICA results;
        otherwise, set to -1 for varying results across calls.
    maxit : :obj:`int`, optional
        Maximum number of iterations for ICA. Default is 500.
    maxrestart : :obj:`int`, optional
        Maximum number of attempts for ICA. If ICA fails to converge, the
        fixed seed will be updated and ICA will be run again. If convergence
        is achieved before maxrestart attempts, ICA will finish early.
        Default is 10.
    debug : :obj:`bool`, optional
        Whether to run in debugging mode or not. Default is False.
    quiet : :obj:`bool`, optional
        If True, suppresses logging/printing of messages. Default is False.

    Notes
    -----
    This workflow writes out several files. For a complete list of the files
    generated by this workflow, please visit
    https://tedana.readthedocs.io/en/latest/outputs.html
    """
    out_dir = op.abspath(out_dir)
    if not op.isdir(out_dir):
        os.mkdir(out_dir)

    if debug and not quiet:
        # ensure old logs aren't over-written
        basename = 'tedana_run'
        extension = 'txt'
        logname = op.join(out_dir, (basename + '.' + extension))
        logex = op.join(out_dir, (basename + '*'))
        previouslogs = glob.glob(logex)
        previouslogs.sort(reverse=True)
        for f in previouslogs:
            previousparts = op.splitext(f)
            newname = previousparts[0] + '_old' + previousparts[1]
            os.rename(f, newname)

        # set logging format
        formatter = logging.Formatter(
                    '%(asctime)s\t%(name)-12s\t%(levelname)-8s\t%(message)s',
                    datefmt='%Y-%m-%dT%H:%M:%S')

        # set up logging file and open it for writing
        fh = logging.FileHandler(logname)
        fh.setFormatter(formatter)
        logging.basicConfig(level=logging.DEBUG,
                            handlers=[fh, logging.StreamHandler()])
    elif quiet:
        logging.basicConfig(level=logging.WARNING)
    else:
        logging.basicConfig(level=logging.INFO)

    LGR.info('Using output directory: {}'.format(out_dir))

    # ensure tes are in appropriate format
    tes = [float(te) for te in tes]
    n_echos = len(tes)

    # Coerce gscontrol to list
    if not isinstance(gscontrol, list):
        gscontrol = [gscontrol]

    # coerce data to samples x echos x time array
    if isinstance(data, str):
        data = [data]

    LGR.info('Loading input data: {}'.format([f for f in data]))
    catd, ref_img = io.load_data(data, n_echos=n_echos)
    n_samp, n_echos, n_vols = catd.shape
    LGR.debug('Resulting data shape: {}'.format(catd.shape))

    if mixm is not None and op.isfile(mixm):
        mixm = op.abspath(mixm)
        # Allow users to re-run on same folder
        if mixm != op.join(out_dir, 'meica_mix.1D'):
            shutil.copyfile(mixm, op.join(out_dir, 'meica_mix.1D'))
            shutil.copyfile(mixm, op.join(out_dir, op.basename(mixm)))
    elif mixm is not None:
        raise IOError('Argument "mixm" must be an existing file.')

    if ctab is not None and op.isfile(ctab):
        ctab = op.abspath(ctab)
        # Allow users to re-run on same folder
        if ctab != op.join(out_dir, 'comp_table_ica.txt'):
            shutil.copyfile(ctab, op.join(out_dir, 'comp_table_ica.txt'))
            shutil.copyfile(ctab, op.join(out_dir, op.basename(ctab)))
    elif ctab is not None:
        raise IOError('Argument "ctab" must be an existing file.')

    if isinstance(manacc, str):
        manacc = [int(comp) for comp in manacc.split(',')]

    if ctab and not mixm:
        LGR.warning('Argument "ctab" requires argument "mixm".')
        ctab = None
    elif ctab and (manacc is None):
        LGR.warning('Argument "ctab" requires argument "manacc".')
        ctab = None
    elif manacc is not None and not mixm:
        LGR.warning('Argument "manacc" requires argument "mixm".')
        manacc = None

    if mask is None:
        LGR.info('Computing EPI mask from first echo')
        first_echo_img = io.new_nii_like(ref_img, catd[:, 0, :])
        mask = compute_epi_mask(first_echo_img)
    else:
        # TODO: add affine check
        LGR.info('Using user-defined mask')

    mask, masksum = utils.make_adaptive_mask(catd, mask=mask, getsum=True)
    LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp))
    if verbose:
        io.filewrite(masksum, op.join(out_dir, 'adaptive_mask.nii'), ref_img)

    os.chdir(out_dir)

    LGR.info('Computing T2* map')
    t2s, s0, t2ss, s0s, t2sG, s0G = decay.fit_decay(catd, tes, mask, masksum)

    # set a hard cap for the T2* map
    # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile
    cap_t2s = stats.scoreatpercentile(t2s.flatten(), 99.5,
                                      interpolation_method='lower')
    LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10))
    t2s[t2s > cap_t2s * 10] = cap_t2s
    io.filewrite(t2s, op.join(out_dir, 't2sv.nii'), ref_img)
    io.filewrite(s0, op.join(out_dir, 's0v.nii'), ref_img)

    if verbose:
        io.filewrite(t2ss, op.join(out_dir, 't2ss.nii'), ref_img)
        io.filewrite(s0s, op.join(out_dir, 's0vs.nii'), ref_img)
        io.filewrite(t2sG, op.join(out_dir, 't2svG.nii'), ref_img)
        io.filewrite(s0G, op.join(out_dir, 's0vG.nii'), ref_img)

    # optimally combine data
    data_oc = combine.make_optcom(catd, tes, mask, t2s=t2sG, combmode=combmode)

    # regress out global signal unless explicitly not desired
    if 'gsr' in gscontrol:
        catd, data_oc = gsc.gscontrol_raw(catd, data_oc, n_echos, ref_img)

    if mixm is None:
        # Identify and remove thermal noise from data
        dd, n_components = decomposition.tedpca(catd, data_oc, combmode, mask,
                                                t2s, t2sG, ref_img,
                                                tes=tes, algorithm=tedpca,
                                                source_tes=source_tes,
                                                kdaw=10., rdaw=1.,
                                                out_dir=out_dir, verbose=verbose)
        mmix_orig = decomposition.tedica(dd, n_components, fixed_seed,
                                         maxit, maxrestart)

        if verbose:
            np.savetxt(op.join(out_dir, '__meica_mix.1D'), mmix_orig)
            if source_tes == -1:
                io.filewrite(utils.unmask(dd, mask),
                             op.join(out_dir, 'ts_OC_whitened.nii'), ref_img)

        LGR.info('Making second component selection guess from ICA results')
        # Estimate betas and compute selection metrics for mixing matrix
        # generated from dimensionally reduced data using full data (i.e., data
        # with thermal noise)
        comptable, metric_maps, betas, mmix = model.dependence_metrics(
                    catd, data_oc, mmix_orig, mask, t2s, tes,
                    ref_img, reindex=True, label='meica_', out_dir=out_dir,
                    algorithm='kundu_v2', verbose=verbose)
        np.savetxt(op.join(out_dir, 'meica_mix.1D'), mmix)

        comptable = model.kundu_metrics(comptable, metric_maps)
        comptable = selection.kundu_selection_v2(comptable, n_echos, n_vols)
    else:
        LGR.info('Using supplied mixing matrix from ICA')
        mmix_orig = np.loadtxt(op.join(out_dir, 'meica_mix.1D'))
        comptable, metric_maps, betas, mmix = model.dependence_metrics(
                    catd, data_oc, mmix_orig, mask, t2s, tes,
                    ref_img, label='meica_', out_dir=out_dir,
                    algorithm='kundu_v2', verbose=verbose)
        if ctab is None:
            comptable = model.kundu_metrics(comptable, metric_maps)
            comptable = selection.kundu_selection_v2(comptable, n_echos, n_vols)
        else:
            comptable = pd.read_csv(ctab, sep='\t', index_col='component')
            comptable = selection.manual_selection(comptable, acc=manacc)

    comptable.to_csv(op.join(out_dir, 'comp_table_ica.txt'), sep='\t',
                     index=True, index_label='component', float_format='%.6f')

    if comptable[comptable.classification == 'accepted'].shape[0] == 0:
        LGR.warning('No BOLD components detected! Please check data and '
                    'results!')

    mmix_orig = mmix.copy()
    if tedort:
        acc_idx = comptable.loc[
            ~comptable.classification.str.contains('rejected')].index.values
        rej_idx = comptable.loc[
            comptable.classification.str.contains('rejected')].index.values
        acc_ts = mmix[:, acc_idx]
        rej_ts = mmix[:, rej_idx]
        betas = np.linalg.lstsq(acc_ts, rej_ts, rcond=None)[0]
        pred_rej_ts = np.dot(acc_ts, betas)
        resid = rej_ts - pred_rej_ts
        mmix[:, rej_idx] = resid
        np.savetxt(op.join(out_dir, 'meica_mix_orth.1D'), mmix)

    io.writeresults(data_oc, mask=mask, comptable=comptable, mmix=mmix,
                    n_vols=n_vols, ref_img=ref_img)

    if 't1c' in gscontrol:
        LGR.info('Performing T1c global signal regression to remove spatially '
                 'diffuse noise')
        gsc.gscontrol_mmix(data_oc, mmix, mask, comptable, ref_img)

    if verbose:
        io.writeresults_echoes(catd, mmix, mask, comptable, ref_img)

    if png:
        LGR.info('Making figures folder with static component maps and '
                 'timecourse plots.')
        # make figure folder first
        if not op.isdir(op.join(out_dir, 'figures')):
            os.mkdir(op.join(out_dir, 'figures'))

        viz.write_comp_figs(data_oc, mask=mask, comptable=comptable,
                            mmix=mmix_orig, ref_img=ref_img,
                            out_dir=op.join(out_dir, 'figures'),
                            png_cmap=png_cmap)

        LGR.info('Making Kappa vs Rho scatter plot')
        viz.write_kappa_scatter(comptable=comptable,
                                out_dir=op.join(out_dir, 'figures'))

        LGR.info('Making overall summary figure')
        viz.write_summary_fig(comptable=comptable,
                              out_dir=op.join(out_dir, 'figures'))

    LGR.info('Workflow completed')
    for handler in logging.root.handlers[:]:
        logging.root.removeHandler(handler)
Example #17
0
def tedpca(data_cat,
           data_oc,
           combmode,
           mask,
           adaptive_mask,
           t2sG,
           ref_img,
           tes,
           algorithm='mdl',
           kdaw=10.,
           rdaw=1.,
           out_dir='.',
           verbose=False,
           low_mem=False):
    """
    Use principal components analysis (PCA) to identify and remove thermal
    noise from multi-echo data.

    Parameters
    ----------
    data_cat : (S x E x T) array_like
        Input functional data
    data_oc : (S x T) array_like
        Optimally combined time series data
    combmode : {'t2s', 'paid'} str
        How optimal combination of echos should be made, where 't2s' indicates
        using the method of Posse 1999 and 'paid' indicates using the method of
        Poser 2006
    mask : (S,) array_like
        Boolean mask array
    adaptive_mask : (S,) array_like
        Array where each value indicates the number of echoes with good signal
        for that voxel. This mask may be thresholded; for example, with values
        less than 3 set to 0.
        For more information on thresholding, see `make_adaptive_mask`.
    t2sG : (S,) array_like
        Map of voxel-wise T2* estimates.
    ref_img : :obj:`str` or img_like
        Reference image to dictate how outputs are saved to disk
    tes : :obj:`list`
        List of echo times associated with `data_cat`, in milliseconds
    algorithm : {'kundu', 'kundu-stabilize', 'mdl', 'aic', 'kic', float}, optional
        Method with which to select components in TEDPCA. PCA
        decomposition with the mdl, kic and aic options are based on a Moving Average
        (stationary Gaussian) process and are ordered from most to least aggressive
        (see Li et al., 2007).
        If a float is provided, then it is assumed to represent percentage of variance
        explained (0-1) to retain from PCA.
        Default is 'mdl'.
    kdaw : :obj:`float`, optional
        Dimensionality augmentation weight for Kappa calculations. Must be a
        non-negative float, or -1 (a special value). Default is 10.
    rdaw : :obj:`float`, optional
        Dimensionality augmentation weight for Rho calculations. Must be a
        non-negative float, or -1 (a special value). Default is 1.
    out_dir : :obj:`str`, optional
        Output directory.
    verbose : :obj:`bool`, optional
        Whether to output files from fitmodels_direct or not. Default: False
    low_mem : :obj:`bool`, optional
        Whether to use incremental PCA (for low-memory systems) or not.
        This is only compatible with the "kundu" or "kundu-stabilize" algorithms.
        Default: False

    Returns
    -------
    kept_data : (S x T) :obj:`numpy.ndarray`
        Dimensionally reduced optimally combined functional data
    n_components : :obj:`int`
        Number of components retained from PCA decomposition

    Notes
    -----
    ======================    =================================================
    Notation                  Meaning
    ======================    =================================================
    :math:`\\kappa`            Component pseudo-F statistic for TE-dependent
                              (BOLD) model.
    :math:`\\rho`              Component pseudo-F statistic for TE-independent
                              (artifact) model.
    :math:`v`                 Voxel
    :math:`V`                 Total number of voxels in mask
    :math:`\\zeta`             Something
    :math:`c`                 Component
    :math:`p`                 Something else
    ======================    =================================================

    Steps:

    1.  Variance normalize either multi-echo or optimally combined data,
        depending on settings.
    2.  Decompose normalized data using PCA or SVD.
    3.  Compute :math:`{\\kappa}` and :math:`{\\rho}`:

            .. math::
                {\\kappa}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,R_2^*}}{\\sum {\\zeta}_{c,v}^p}

                {\\rho}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,S_0}}{\\sum {\\zeta}_{c,v}^p}

    4.  Some other stuff. Something about elbows.
    5.  Classify components as thermal noise if they meet both of the
        following criteria:

            - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`.
            - Nonsignificant variance explained.

    Outputs:

    This function writes out several files:

    ======================    =================================================
    Filename                  Content
    ======================    =================================================
    pca_decomposition.json    PCA component table.
    pca_mixing.tsv            PCA mixing matrix.
    pca_components.nii.gz     Component weight maps.
    ======================    =================================================

    See Also
    --------
    :func:`tedana.utils.make_adaptive_mask` : The function used to create the ``adaptive_mask``
                                              parameter.
    """
    if algorithm == 'kundu':
        alg_str = ("followed by the Kundu component selection decision "
                   "tree (Kundu et al., 2013)")
        RefLGR.info("Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., "
                    "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. "
                    "(2013). Integrated strategy for improving functional "
                    "connectivity mapping using multiecho fMRI. Proceedings "
                    "of the National Academy of Sciences, 110(40), "
                    "16187-16192.")
    elif algorithm == 'kundu-stabilize':
        alg_str = ("followed by the 'stabilized' Kundu component "
                   "selection decision tree (Kundu et al., 2013)")
        RefLGR.info("Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., "
                    "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. "
                    "(2013). Integrated strategy for improving functional "
                    "connectivity mapping using multiecho fMRI. Proceedings "
                    "of the National Academy of Sciences, 110(40), "
                    "16187-16192.")
    elif isinstance(algorithm, Number):
        alg_str = (
            "in which the number of components was determined based on a "
            "variance explained threshold")
    else:
        alg_str = (
            "based on the PCA component estimation with a Moving Average"
            "(stationary Gaussian) process (Li et al., 2007)")
        RefLGR.info("Li, Y.O., Adalı, T. and Calhoun, V.D., (2007). "
                    "Estimating the number of independent components for "
                    "functional magnetic resonance imaging data. "
                    "Human brain mapping, 28(11), pp.1251-1266.")

    RepLGR.info("Principal component analysis {0} was applied to "
                "the optimally combined data for dimensionality "
                "reduction.".format(alg_str))

    n_samp, n_echos, n_vols = data_cat.shape

    LGR.info('Computing PCA of optimally combined multi-echo data')
    data = data_oc[mask, :]

    data_z = ((data.T - data.T.mean(axis=0)) /
              data.T.std(axis=0)).T  # var normalize ts
    data_z = (data_z -
              data_z.mean()) / data_z.std()  # var normalize everything

    if algorithm in ['mdl', 'aic', 'kic']:
        data_img = io.new_nii_like(ref_img, utils.unmask(data, mask))
        mask_img = io.new_nii_like(ref_img, mask.astype(int))
        voxel_comp_weights, varex, varex_norm, comp_ts = ma_pca.ma_pca(
            data_img, mask_img, algorithm)
    elif isinstance(algorithm, Number):
        ppca = PCA(copy=False, n_components=algorithm, svd_solver="full")
        ppca.fit(data_z)
        comp_ts = ppca.components_.T
        varex = ppca.explained_variance_
        voxel_comp_weights = np.dot(np.dot(data_z, comp_ts),
                                    np.diag(1. / varex))
        varex_norm = varex / varex.sum()
    elif low_mem:
        voxel_comp_weights, varex, comp_ts = low_mem_pca(data_z)
        varex_norm = varex / varex.sum()
    else:
        ppca = PCA(copy=False, n_components=(n_vols - 1))
        ppca.fit(data_z)
        comp_ts = ppca.components_.T
        varex = ppca.explained_variance_
        voxel_comp_weights = np.dot(np.dot(data_z, comp_ts),
                                    np.diag(1. / varex))
        varex_norm = varex / varex.sum()

    # Compute Kappa and Rho for PCA comps
    # Normalize each component's time series
    vTmixN = stats.zscore(comp_ts, axis=0)
    comptable, _, _, _ = metrics.dependence_metrics(data_cat,
                                                    data_oc,
                                                    comp_ts,
                                                    adaptive_mask,
                                                    tes,
                                                    ref_img,
                                                    reindex=False,
                                                    mmixN=vTmixN,
                                                    algorithm=None,
                                                    label='mepca_',
                                                    out_dir=out_dir,
                                                    verbose=verbose)

    # varex_norm from PCA overrides varex_norm from dependence_metrics,
    # but we retain the original
    comptable['estimated normalized variance explained'] = \
        comptable['normalized variance explained']
    comptable['normalized variance explained'] = varex_norm

    # write component maps to 4D image
    comp_ts_z = stats.zscore(comp_ts, axis=0)
    comp_maps = utils.unmask(computefeats2(data_oc, comp_ts_z, mask), mask)
    io.filewrite(comp_maps, op.join(out_dir, 'pca_components.nii.gz'), ref_img)

    # Select components using decision tree
    if algorithm == 'kundu':
        comptable = kundu_tedpca(comptable,
                                 n_echos,
                                 kdaw,
                                 rdaw,
                                 stabilize=False)
    elif algorithm == 'kundu-stabilize':
        comptable = kundu_tedpca(comptable,
                                 n_echos,
                                 kdaw,
                                 rdaw,
                                 stabilize=True)
    else:
        alg_str = "variance explained-based" if isinstance(
            algorithm, Number) else algorithm
        LGR.info('Selected {0} components with {1} dimensionality '
                 'detection'.format(comptable.shape[0], alg_str))
        comptable['classification'] = 'accepted'
        comptable['rationale'] = ''

    # Save decomposition
    comp_names = [
        io.add_decomp_prefix(comp,
                             prefix='pca',
                             max_value=comptable.index.max())
        for comp in comptable.index.values
    ]

    mixing_df = pd.DataFrame(data=comp_ts, columns=comp_names)
    mixing_df.to_csv(op.join(out_dir, 'pca_mixing.tsv'), sep='\t', index=False)

    comptable['Description'] = 'PCA fit to optimally combined data.'
    mmix_dict = {}
    mmix_dict['Method'] = ('Principal components analysis implemented by '
                           'sklearn. Components are sorted by variance '
                           'explained in descending order. '
                           'Component signs are flipped to best match the '
                           'data.')
    io.save_comptable(comptable,
                      op.join(out_dir, 'pca_decomposition.json'),
                      label='pca',
                      metadata=mmix_dict)

    acc = comptable[comptable.classification == 'accepted'].index.values
    n_components = acc.size
    voxel_kept_comp_weighted = (voxel_comp_weights[:, acc] * varex[None, acc])
    kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[:, acc].T)

    kept_data = stats.zscore(kept_data,
                             axis=1)  # variance normalize time series
    kept_data = stats.zscore(kept_data,
                             axis=None)  # variance normalize everything

    return kept_data, n_components
Example #18
0
def combine_tedana(tes,
                   data,
                   combmodes=('t2s', 'ste'),
                   mask=None,
                   overwrite=True):
    """ Function based on tedana main workflow """
    from tedana import utils, model, io, decay, combine
    from scipy import stats
    import numpy as np
    import os

    # ensure tes are in appropriate format
    tes = [float(te) for te in tes]
    n_echos = len(tes)

    # coerce data to samples x echos x time array
    if isinstance(data, str):
        data = [data]

    catd, ref_img = io.load_data(data, n_echos=n_echos)
    n_samp, n_echos, n_vols = catd.shape

    mask, masksum = utils.make_adaptive_mask(catd,
                                             mask=mask,
                                             minimum=False,
                                             getsum=True)

    # check if the t2s-map is already created first
    base_name = data[0].replace('_echo-1',
                                '').replace('desc-preproc-hp',
                                            'desc-preproc-hp-%s').replace(
                                                '.nii', '').replace('.gz', '')

    if not os.path.exists(base_name % 't2sv' + '.nii.gz') or overwrite:
        t2s, s0, t2ss, s0s, t2sG, s0G = decay.fit_decay(
            catd, tes, mask, masksum)
        # set a hard cap for the T2* map
        # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile
        cap_t2s = stats.scoreatpercentile(t2s.flatten(),
                                          99.5,
                                          interpolation_method='lower')
        t2s[t2s > cap_t2s * 10] = cap_t2s

        # save
        io.filewrite(t2s, base_name % 't2sv' + '.nii', ref_img, gzip=True)
        io.filewrite(s0, base_name % 's0v' + '.nii', ref_img, gzip=True)
        io.filewrite(t2ss, base_name % 't2ss' + '.nii', ref_img, gzip=True)
        io.filewrite(s0s, base_name % 's0vs' + '.nii', ref_img, gzip=True)
        io.filewrite(t2sG, base_name % 't2svG' + '.nii', ref_img, gzip=True)
        io.filewrite(s0G, base_name % 's0vG' + '.nii', ref_img, gzip=True)

    else:
        t2sG = utils.load_image(base_name % 't2svG' + '.nii.gz')
        t2s = utils.load_image(base_name % 't2sv' + '.nii.gz')

    # optimally combine data
    if 't2s' in combmodes:
        print('Combining echos using optcomb...', end='')
        ext = 'optcomb'
        data_oc = combine.make_optcom(catd,
                                      tes,
                                      mask,
                                      t2s=t2sG,
                                      combmode='t2s')
        # make sure to set all nan-values/inf to 0
        data_oc[np.isinf(data_oc)] = 0
        data_oc[np.isnan(data_oc)] = 0
        print('Done, writing results...')
        io.filewrite(data_oc, base_name % ext + '.nii', ref_img, gzip=True)
    if 'ste' in combmodes:
        print('Combining echos using optcomb...', end='')
        ext = 'PAID'
        data_oc = combine.make_optcom(catd,
                                      tes,
                                      mask,
                                      t2s=t2sG,
                                      combmode='ste')
        # make sure to set all nan-values/inf to 0
        data_oc[np.isinf(data_oc)] = 0
        data_oc[np.isnan(data_oc)] = 0
        io.filewrite(data_oc, base_name % ext + '.nii', ref_img, gzip=True)
        print('Done, writing results...')

    return 0
Example #19
0
def t2smap_workflow(data,
                    tes,
                    out_dir='.',
                    mask=None,
                    fittype='loglin',
                    fitmode='all',
                    combmode='t2s',
                    debug=False,
                    quiet=False):
    """
    Estimate T2 and S0, and optimally combine data across TEs.

    Parameters
    ----------
    data : :obj:`str` or :obj:`list` of :obj:`str`
        Either a single z-concatenated file (single-entry list or str) or a
        list of echo-specific files, in ascending order.
    tes : :obj:`list`
        List of echo times associated with data in milliseconds.
    out_dir : :obj:`str`, optional
        Output directory.
    mask : :obj:`str`, optional
        Binary mask of voxels to include in TE Dependent ANAlysis. Must be spatially
        aligned with `data`.
    fittype : {'loglin', 'curvefit'}, optional
        Monoexponential fitting method.
        'loglin' means to use the the default linear fit to the log of
        the data.
        'curvefit' means to use a monoexponential fit to the raw data,
        which is slightly slower but may be more accurate.
    fitmode : {'all', 'ts'}, optional
        Monoexponential model fitting scheme.
        'all' means that the model is fit, per voxel, across all timepoints.
        'ts' means that the model is fit, per voxel and per timepoint.
        Default is 'all'.
    combmode : {'t2s', 'paid'}, optional
        Combination scheme for TEs: 't2s' (Posse 1999, default), 'paid' (Poser).

    Other Parameters
    ----------------
    debug : :obj:`bool`, optional
        Whether to run in debugging mode or not. Default is False.
    quiet : :obj:`bool`, optional
        If True, suppress logging/printing of messages. Default is False.

    Notes
    -----
    This workflow writes out several files, which are described below:

    ==========================    =================================================
    Filename                      Content
    ==========================    =================================================
    T2starmap.nii.gz              Limited estimated T2* 3D map or 4D timeseries.
                                  Will be a 3D map if ``fitmode`` is 'all' and a
                                  4D timeseries if it is 'ts'.
    S0map.nii.gz                  Limited S0 3D map or 4D timeseries.
    desc-full_T2starmap.nii.gz    Full T2* map/timeseries. The difference between
                                  the limited and full maps is that, for voxels
                                  affected by dropout where only one echo contains
                                  good data, the full map uses the single echo's
                                  value while the limited map has a NaN.
    desc-full_S0map.nii.gz        Full S0 map/timeseries.
    desc-optcom_bold.nii.gz       Optimally combined timeseries.
    ==========================    =================================================
    """
    out_dir = op.abspath(out_dir)
    if not op.isdir(out_dir):
        os.mkdir(out_dir)

    if debug and not quiet:
        logging.basicConfig(level=logging.DEBUG)
    elif quiet:
        logging.basicConfig(level=logging.WARNING)
    else:
        logging.basicConfig(level=logging.INFO)

    LGR.info('Using output directory: {}'.format(out_dir))

    # ensure tes are in appropriate format
    tes = [float(te) for te in tes]
    n_echos = len(tes)

    # coerce data to samples x echos x time array
    if isinstance(data, str):
        data = [data]

    LGR.info('Loading input data: {}'.format([f for f in data]))
    catd, ref_img = io.load_data(data, n_echos=n_echos)
    n_samp, n_echos, n_vols = catd.shape
    LGR.debug('Resulting data shape: {}'.format(catd.shape))

    if mask is None:
        LGR.info('Computing adaptive mask')
    else:
        LGR.info('Using user-defined mask')
    mask, masksum = utils.make_adaptive_mask(catd,
                                             mask=mask,
                                             getsum=True,
                                             threshold=1)

    LGR.info('Computing adaptive T2* map')
    if fitmode == 'all':
        (t2s_limited, s0_limited, t2s_full,
         s0_full) = decay.fit_decay(catd, tes, mask, masksum, fittype)
    else:
        (t2s_limited, s0_limited, t2s_full,
         s0_full) = decay.fit_decay_ts(catd, tes, mask, masksum, fittype)

    # set a hard cap for the T2* map/timeseries
    # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile
    cap_t2s = stats.scoreatpercentile(t2s_limited.flatten(),
                                      99.5,
                                      interpolation_method='lower')
    cap_t2s_sec = utils.millisec2sec(cap_t2s * 10.)
    LGR.debug('Setting cap on T2* map at {:.5f}s'.format(cap_t2s_sec))
    t2s_limited[t2s_limited > cap_t2s * 10] = cap_t2s

    LGR.info('Computing optimal combination')
    # optimally combine data
    OCcatd = combine.make_optcom(catd,
                                 tes,
                                 masksum,
                                 t2s=t2s_full,
                                 combmode=combmode)

    # clean up numerical errors
    for arr in (OCcatd, s0_limited, t2s_limited):
        np.nan_to_num(arr, copy=False)

    s0_limited[s0_limited < 0] = 0
    t2s_limited[t2s_limited < 0] = 0

    io.filewrite(utils.millisec2sec(t2s_limited),
                 op.join(out_dir, 'T2starmap.nii.gz'), ref_img)
    io.filewrite(s0_limited, op.join(out_dir, 'S0map.nii.gz'), ref_img)
    io.filewrite(utils.millisec2sec(t2s_full),
                 op.join(out_dir, 'desc-full_T2starmap.nii.gz'), ref_img)
    io.filewrite(s0_full, op.join(out_dir, 'desc-full_S0map.nii.gz'), ref_img)
    io.filewrite(OCcatd, op.join(out_dir, 'desc-optcom_bold.nii.gz'), ref_img)
Example #20
0
def minimum_image_regression(optcom_ts,
                             mmix,
                             mask,
                             comptable,
                             ref_img,
                             out_dir="."):
    """
    Perform minimum image regression (MIR) to remove T1-like effects from
    BOLD-like components.

    While this method has not yet been described in detail in any publications,
    we recommend that users cite [1]_.

    Parameters
    ----------
    optcom_ts : (S x T) array_like
        Optimally combined time series data
    mmix : (T x C) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `optcom_ts`
    mask : (S,) array_like
        Boolean mask array
    comptable : (C x X) :obj:`pandas.DataFrame`
        Component metric table. One row for each component, with a column for
        each metric. The index should be the component number.
    ref_img : :obj:`str` or img_like
        Reference image to dictate how outputs are saved to disk
    out_dir : :obj:`str`, optional
        Output directory.

    Notes
    -----
    Minimum image regression operates by constructing a amplitude-normalized
    form of the multi-echo high Kappa (MEHK) time series from BOLD-like ICA
    components, and then taking voxel-wise minimum over time.
    This "minimum map" serves as a voxel-wise estimate of the T1-like effect
    in the time series.
    From this minimum map, a T1-like global signal (i.e., a 1D time series)
    is estimated.
    The component time series in the mixing matrix are then corrected for the
    T1-like effect by regressing out the global signal time series from each.
    Finally, the multi-echo denoising (MEDN) and MEHK time series are
    reconstructed from the corrected mixing matrix and are written out to new
    files.

    This function writes out several files:

    ======================    =================================================
    Filename                  Content
    ======================    =================================================
    sphis_hik.nii             T1-like effect
    hik_ts_OC_MIR.nii         T1-corrected BOLD (high-Kappa) time series
    dn_ts_OC_MIR.nii          Denoised version of T1-corrected time series
    betas_hik_OC_MIR.nii      T1 global signal-corrected components
    meica_mix_MIR.1D          T1 global signal-corrected mixing matrix
    ======================    =================================================

    References
    ----------
    .. [1] Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., Vértes, P. E.,
           Inati, S. J., ... & Bullmore, E. T. (2013).
           Integrated strategy for improving functional connectivity mapping
           using multiecho fMRI.
           Proceedings of the National Academy of Sciences, 110(40), 16187-16192.
    """
    LGR.info(
        "Performing minimum image regression to remove spatially-diffuse noise"
    )
    RepLGR.info(
        "Minimum image regression was then applied to the "
        "data in order to remove spatially diffuse noise (Kundu et al., 2013)."
    )
    RefLGR.info(
        "Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., Vértes, P. E., "
        "Inati, S. J., ... & Bullmore, E. T. (2013). "
        "Integrated strategy for improving functional connectivity mapping "
        "using multiecho fMRI. "
        "Proceedings of the National Academy of Sciences, 110(40), 16187-16192."
    )

    all_comps = comptable.index.values
    acc = comptable[comptable.classification == "accepted"].index.values
    ign = comptable[comptable.classification == "ignored"].index.values
    not_ign = sorted(np.setdiff1d(all_comps, ign))

    optcom_masked = optcom_ts[mask, :]
    optcom_mean = optcom_masked.mean(axis=-1)[:, np.newaxis]
    optcom_std = optcom_masked.std(axis=-1)[:, np.newaxis]

    # Compute temporal regression
    optcom_z = stats.zscore(optcom_masked, axis=-1)
    comp_pes = np.linalg.lstsq(
        mmix, optcom_z.T, rcond=None)[0].T  # component parameter estimates
    resid = optcom_z - np.dot(comp_pes[:, not_ign], mmix[:, not_ign].T)

    # Build time series of just BOLD-like components (i.e., MEHK) and save T1-like effect
    mehk_ts = np.dot(comp_pes[:, acc], mmix[:, acc].T)
    t1_map = mehk_ts.min(axis=-1)  # map of T1-like effect
    t1_map -= t1_map.mean()
    io.filewrite(utils.unmask(t1_map, mask), op.join(out_dir, "sphis_hik"),
                 ref_img)
    t1_map = t1_map[:, np.newaxis]

    # Find the global signal based on the T1-like effect
    glob_sig = np.linalg.lstsq(t1_map, optcom_z, rcond=None)[0]

    # Remove T1-like global signal from MEHK time series
    mehk_noT1gs = mehk_ts - np.dot(
        np.linalg.lstsq(glob_sig.T, mehk_ts.T, rcond=None)[0].T, glob_sig)
    hik_ts = mehk_noT1gs * optcom_std  # rescale
    io.filewrite(utils.unmask(hik_ts, mask), op.join(out_dir, "hik_ts_OC_MIR"),
                 ref_img)

    # Make denoised version of T1-corrected time series
    medn_ts = optcom_mean + ((mehk_noT1gs + resid) * optcom_std)
    io.filewrite(utils.unmask(medn_ts, mask), op.join(out_dir, "dn_ts_OC_MIR"),
                 ref_img)

    # Orthogonalize mixing matrix w.r.t. T1-GS
    mmix_noT1gs = mmix.T - np.dot(
        np.linalg.lstsq(glob_sig.T, mmix, rcond=None)[0].T, glob_sig)
    mmix_noT1gs_z = stats.zscore(mmix_noT1gs, axis=-1)
    mmix_noT1gs_z = np.vstack(
        (np.atleast_2d(np.ones(max(glob_sig.shape))), glob_sig, mmix_noT1gs_z))

    # Write T1-corrected components and mixing matrix
    comp_pes_norm = np.linalg.lstsq(mmix_noT1gs_z.T, optcom_z.T,
                                    rcond=None)[0].T
    io.filewrite(
        utils.unmask(comp_pes_norm[:, 2:], mask),
        op.join(out_dir, "betas_hik_OC_MIR"),
        ref_img,
    )
    np.savetxt(op.join(out_dir, "meica_mix_MIR.1D"), mmix_noT1gs)