Ejemplo n.º 1
0
def tedpca(catd,
           OCcatd,
           combmode,
           mask,
           t2s,
           t2sG,
           stabilize,
           ref_img,
           tes,
           kdaw,
           rdaw,
           ste=0,
           wvpca=False):
    """
    Use principal components analysis (PCA) to identify and remove thermal
    noise from multi-echo data.

    Parameters
    ----------
    catd : (S x E x T) array_like
        Input functional data
    OCcatd : (S x T) array_like
        Optimally-combined time series data
    combmode : {'t2s', 'ste'} str
        How optimal combination of echos should be made, where 't2s' indicates
        using the method of Posse 1999 and 'ste' indicates using the method of
        Poser 2006
    mask : (S,) array_like
        Boolean mask array
    stabilize : :obj:`bool`
        Whether to attempt to stabilize convergence of ICA by returning
        dimensionally-reduced data from PCA and component selection.
    ref_img : :obj:`str` or img_like
        Reference image to dictate how outputs are saved to disk
    tes : :obj:`list`
        List of echo times associated with `catd`, in milliseconds
    kdaw : :obj:`float`
        Dimensionality augmentation weight for Kappa calculations
    rdaw : :obj:`float`
        Dimensionality augmentation weight for Rho calculations
    ste : :obj:`int` or :obj:`list` of :obj:`int`, optional
        Which echos to use in PCA. Values -1 and 0 are special, where a value
        of -1 will indicate using all the echos and 0 will indicate using the
        optimal combination of the echos. A list can be provided to indicate
        a subset of echos. Default: 0
    wvpca : :obj:`bool`, optional
        Whether to apply wavelet denoising to data. Default: False

    Returns
    -------
    n_components : :obj:`int`
        Number of components retained from PCA decomposition
    dd : (S x T) :obj:`numpy.ndarray`
        Dimensionally reduced optimally combined functional data

    Notes
    -----
    ======================    =================================================
    Notation                  Meaning
    ======================    =================================================
    :math:`\\kappa`            Component pseudo-F statistic for TE-dependent
                              (BOLD) model.
    :math:`\\rho`              Component pseudo-F statistic for TE-independent
                              (artifact) model.
    :math:`v`                 Voxel
    :math:`V`                 Total number of voxels in mask
    :math:`\\zeta`             Something
    :math:`c`                 Component
    :math:`p`                 Something else
    ======================    =================================================

    Steps:

    1.  Variance normalize either multi-echo or optimally combined data,
        depending on settings.
    2.  Decompose normalized data using PCA or SVD.
    3.  Compute :math:`{\\kappa}` and :math:`{\\rho}`:

            .. math::
                {\\kappa}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,R_2^*}}{\sum {\\zeta}_{c,v}^p}

                {\\rho}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,S_0}}{\sum {\\zeta}_{c,v}^p}

    4.  Some other stuff. Something about elbows.
    5.  Classify components as thermal noise if they meet both of the
        following criteria:

            - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`.
            - Nonsignificant variance explained.

    Outputs:

    This function writes out several files:

    ======================    =================================================
    Filename                  Content
    ======================    =================================================
    pcastate.pkl              Values from PCA results.
    comp_table_pca.txt        PCA component table.
    mepca_mix.1D              PCA mixing matrix.
    ======================    =================================================
    """

    n_samp, n_echos, n_vols = catd.shape
    ste = np.array([int(ee) for ee in str(ste).split(',')])

    if len(ste) == 1 and ste[0] == -1:
        LGR.info('Computing PCA of optimally combined multi-echo data')
        d = OCcatd[utils.make_min_mask(OCcatd[:,
                                              np.newaxis, :])][:,
                                                               np.newaxis, :]
    elif len(ste) == 1 and ste[0] == 0:
        LGR.info('Computing PCA of spatially concatenated multi-echo data')
        d = catd[mask].astype('float64')
    else:
        LGR.info('Computing PCA of echo #%s' %
                 ','.join([str(ee) for ee in ste]))
        d = np.stack([catd[mask, ee] for ee in ste - 1],
                     axis=1).astype('float64')

    eim = np.squeeze(eimask(d))
    d = np.squeeze(d[eim])

    dz = ((d.T - d.T.mean(axis=0)) / d.T.std(axis=0)).T  # var normalize ts
    dz = (dz - dz.mean()) / dz.std()  # var normalize everything

    if wvpca:
        dz, cAl = dwtmat(dz)

    if not op.exists('pcastate.pkl'):
        voxel_comp_weights, varex, comp_ts = run_svd(dz)

        # actual variance explained (normalized)
        varex_norm = varex / varex.sum()
        eigenvalue_elbow = getelbow(varex_norm, return_val=True)

        diff_varex_norm = np.abs(np.diff(varex_norm))
        lower_diff_varex_norm = diff_varex_norm[(len(diff_varex_norm) // 2):]
        varex_norm_thr = np.mean(
            [lower_diff_varex_norm.max(),
             diff_varex_norm.min()])
        varex_norm_min = varex_norm[
            (len(diff_varex_norm) // 2) + np.arange(len(lower_diff_varex_norm))
            [lower_diff_varex_norm >= varex_norm_thr][0] + 1]
        varex_norm_cum = np.cumsum(varex_norm)

        # Compute K and Rho for PCA comps
        eimum = np.atleast_2d(eim)
        eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1])
        eimum = eimum.prod(axis=1)
        o = np.zeros((mask.shape[0], *eimum.shape[1:]))
        o[mask] = eimum
        eimum = np.squeeze(o).astype(bool)

        vTmix = comp_ts.T
        vTmixN = ((vTmix.T - vTmix.T.mean(0)) / vTmix.T.std(0)).T
        LGR.info('Making initial component selection guess from PCA results')
        _, ct_df, betasv, v_T = model.fitmodels_direct(catd,
                                                       comp_ts.T,
                                                       eimum,
                                                       t2s,
                                                       t2sG,
                                                       tes,
                                                       combmode,
                                                       ref_img,
                                                       mmixN=vTmixN,
                                                       full_sel=False)
        # varex_norm overrides normalized varex computed by fitmodels_direct
        ct_df['normalized variance explained'] = varex_norm

        # Save state
        fname = op.abspath('pcastate.pkl')
        LGR.info('Saving PCA results to: {}'.format(fname))
        pcastate = {
            'voxel_comp_weights': voxel_comp_weights,
            'varex': varex,
            'comp_ts': comp_ts,
            'comptable': ct_df,
            'eigenvalue_elbow': eigenvalue_elbow,
            'varex_norm_min': varex_norm_min,
            'varex_norm_cum': varex_norm_cum
        }
        try:
            with open(fname, 'wb') as handle:
                pickle.dump(pcastate, handle)
        except TypeError:
            LGR.warning('Could not save PCA solution')

    else:  # if loading existing state
        LGR.info('Loading PCA from: pcastate.pkl')
        with open('pcastate.pkl', 'rb') as handle:
            pcastate = pickle.load(handle)
        voxel_comp_weights, varex = pcastate['voxel_comp_weights'], pcastate[
            'varex']
        comp_ts = pcastate['comp_ts']
        ct_df = pcastate['comptable']
        eigenvalue_elbow = pcastate['eigenvalue_elbow']
        varex_norm_min = pcastate['varex_norm_min']
        varex_norm_cum = pcastate['varex_norm_cum']

    np.savetxt('mepca_mix.1D', comp_ts.T)

    # write component maps to 4D image
    comp_maps = np.zeros((OCcatd.shape[0], comp_ts.shape[0]))
    for i_comp in range(comp_ts.shape[0]):
        temp_comp_ts = comp_ts[i_comp, :][:, None]
        comp_map = utils.unmask(
            model.computefeats2(OCcatd, temp_comp_ts, mask), mask)
        comp_maps[:, i_comp] = np.squeeze(comp_map)
    io.filewrite(comp_maps, 'mepca_OC_components.nii', ref_img)

    fmin, fmid, fmax = utils.getfbounds(n_echos)
    kappa_thr = np.average(sorted(
        [fmin, getelbow(ct_df['kappa'], return_val=True) / 2, fmid]),
                           weights=[kdaw, 1, 1])
    rho_thr = np.average(sorted(
        [fmin, getelbow_cons(ct_df['rho'], return_val=True) / 2, fmid]),
                         weights=[rdaw, 1, 1])
    if int(kdaw) == -1:
        lim_idx = utils.andb([ct_df['kappa'] < fmid,
                              ct_df['kappa'] > fmin]) == 2
        kappa_lim = ct_df.loc[lim_idx, 'kappa'].values
        kappa_thr = kappa_lim[getelbow(kappa_lim)]

        lim_idx = utils.andb([ct_df['rho'] < fmid, ct_df['rho'] > fmin]) == 2
        rho_lim = ct_df.loc[lim_idx, 'rho'].values
        rho_thr = rho_lim[getelbow(rho_lim)]
        stabilize = True
    elif int(rdaw) == -1:
        lim_idx = utils.andb([ct_df['rho'] < fmid, ct_df['rho'] > fmin]) == 2
        rho_lim = ct_df.loc[lim_idx, 'rho'].values
        rho_thr = rho_lim[getelbow(rho_lim)]

    # Add new columns to comptable for classification
    ct_df['classification'] = 'accepted'
    ct_df['rationale'] = ''

    # Reject if low Kappa, Rho, and variance explained
    is_lowk = ct_df['kappa'] <= kappa_thr
    is_lowr = ct_df['rho'] <= rho_thr
    is_lowe = ct_df['normalized variance explained'] <= eigenvalue_elbow
    is_lowkre = is_lowk & is_lowr & is_lowe
    ct_df.loc[is_lowkre, 'classification'] = 'rejected'
    ct_df.loc[is_lowkre, 'rationale'] += 'low rho, kappa, and varex;'

    # Reject if low variance explained
    is_lows = ct_df['normalized variance explained'] <= varex_norm_min
    ct_df.loc[is_lows, 'classification'] = 'rejected'
    ct_df.loc[is_lows, 'rationale'] += 'low variance explained;'

    # Reject if Kappa over limit
    is_fmax1 = ct_df['kappa'] == F_MAX
    ct_df.loc[is_fmax1, 'classification'] = 'rejected'
    ct_df.loc[is_fmax1, 'rationale'] += 'kappa equals fmax;'

    # Reject if Rho over limit
    is_fmax2 = ct_df['rho'] == F_MAX
    ct_df.loc[is_fmax2, 'classification'] = 'rejected'
    ct_df.loc[is_fmax2, 'rationale'] += 'rho equals fmax;'

    if stabilize:
        temp7 = varex_norm_cum >= 0.95
        ct_df.loc[temp7, 'classification'] = 'rejected'
        ct_df.loc[temp7, 'rationale'] += 'cumulative var. explained above 95%;'
        under_fmin1 = ct_df['kappa'] <= fmin
        ct_df.loc[under_fmin1, 'classification'] = 'rejected'
        ct_df.loc[under_fmin1, 'rationale'] += 'kappa below fmin;'
        under_fmin2 = ct_df['rho'] <= fmin
        ct_df.loc[under_fmin2, 'classification'] = 'rejected'
        ct_df.loc[under_fmin2, 'rationale'] += 'rho below fmin;'

    ct_df.to_csv('comp_table_pca.txt',
                 sep='\t',
                 index=True,
                 index_label='component',
                 float_format='%.6f')

    sel_idx = ct_df['classification'] == 'accepted'
    n_components = np.sum(sel_idx)
    voxel_kept_comp_weighted = (voxel_comp_weights[:, sel_idx] *
                                varex[None, sel_idx])
    kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[sel_idx, :])

    if wvpca:
        kept_data = idwtmat(kept_data, cAl)

    LGR.info('Selected {0} components with Kappa threshold: {1:.02f}, '
             'Rho threshold: {2:.02f}'.format(n_components, kappa_thr,
                                              rho_thr))

    kept_data = stats.zscore(kept_data,
                             axis=1)  # variance normalize timeseries
    kept_data = stats.zscore(kept_data,
                             axis=None)  # variance normalize everything

    return n_components, kept_data
Ejemplo n.º 2
0
def tedana(data,
           tes,
           mixm=None,
           ctab=None,
           manacc=None,
           strict=False,
           gscontrol=True,
           kdaw=10.,
           rdaw=1.,
           conv=2.5e-5,
           ste=-1,
           combmode='t2s',
           dne=False,
           initcost='tanh',
           finalcost='tanh',
           stabilize=False,
           fout=False,
           filecsdata=False,
           label=None,
           fixed_seed=42,
           debug=False,
           quiet=False):
    """
    Run the "canonical" TE-Dependent ANAlysis workflow.

    Parameters
    ----------
    data : :obj:`list` of :obj:`str`
        Either a single z-concatenated file (single-entry list) or a
        list of echo-specific files, in ascending order.
    tes : :obj:`list`
        List of echo times associated with data in milliseconds.
    mixm : :obj:`str`, optional
        File containing mixing matrix. If not provided, ME-PCA and ME-ICA are
        done.
    ctab : :obj:`str`, optional
        File containing component table from which to extract pre-computed
        classifications.
    manacc : :obj:`str`, optional
        Comma separated list of manually accepted components in string form.
        Default is None.
    strict : :obj:`bool`, optional
        Ignore low-variance ambiguous components. Default is False.
    gscontrol : :obj:`bool`, optional
        Control global signal using spatial approach. Default is True.
    kdaw : :obj:`float`, optional
        Dimensionality augmentation weight (Kappa). Default is 10.
        -1 for low-dimensional ICA.
    rdaw : :obj:`float`, optional
        Dimensionality augmentation weight (Rho). Default is 1.
        -1 for low-dimensional ICA.
    conv : :obj:`float`, optional
        Convergence limit. Default is 2.5e-5.
    ste : :obj:`int`, optional
        Source TEs for models. 0 for all, -1 for optimal combination.
        Default is -1.
    combmode : {'t2s', 'ste'}, optional
        Combination scheme for TEs: 't2s' (Posse 1999, default), 'ste' (Poser).
    dne : :obj:`bool`, optional
        Denoise each TE dataset separately. Default is False.
    initcost : {'tanh', 'pow3', 'gaus', 'skew'}, optional
        Initial cost function for ICA. Default is 'tanh'.
    finalcost : {'tanh', 'pow3', 'gaus', 'skew'}, optional
        Final cost function. Default is 'tanh'.
    stabilize : :obj:`bool`, optional
        Stabilize convergence by reducing dimensionality, for low quality data.
        Default is False.
    fout : :obj:`bool`, optional
        Save output TE-dependence Kappa/Rho SPMs. Default is False.
    filecsdata : :obj:`bool`, optional
        Save component selection data to file. Default is False.
    label : :obj:`str` or :obj:`None`, optional
        Label for output directory. Default is None.
    fixed_seed : :obj:`int`, optional
        Seeded value for ICA, for reproducibility.
    """

    # ensure tes are in appropriate format
    tes = [float(te) for te in tes]
    n_echos = len(tes)

    # coerce data to samples x echos x time array
    LGR.info('Loading input data: {}'.format([op.abspath(f) for f in data]))
    catd, ref_img = utils.load_data(data, n_echos=n_echos)
    n_samp, n_echos, n_vols = catd.shape
    LGR.debug('Resulting data shape: {}'.format(catd.shape))

    if fout:
        fout = ref_img
    else:
        fout = None

    kdaw, rdaw = float(kdaw), float(rdaw)

    if label is not None:
        out_dir = 'TED.{0}'.format(label)
    else:
        out_dir = 'TED'
    out_dir = op.abspath(out_dir)
    if not op.isdir(out_dir):
        LGR.info('Creating output directory: {}'.format(out_dir))
        os.mkdir(out_dir)
    else:
        LGR.info('Using output directory: {}'.format(out_dir))

    if mixm is not None and op.isfile(mixm):
        shutil.copyfile(mixm, op.join(out_dir, 'meica_mix.1D'))
        shutil.copyfile(mixm, op.join(out_dir, op.basename(mixm)))
    elif mixm is not None:
        raise IOError('Argument "mixm" must be an existing file.')

    if ctab is not None and op.isfile(ctab):
        shutil.copyfile(ctab, op.join(out_dir, 'comp_table.txt'))
        shutil.copyfile(ctab, op.join(out_dir, op.basename(ctab)))
    elif ctab is not None:
        raise IOError('Argument "ctab" must be an existing file.')

    os.chdir(out_dir)

    LGR.info('Computing adapative mask')
    mask, masksum = utils.make_adaptive_mask(catd, minimum=False, getsum=True)
    LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp))

    LGR.info('Computing T2* map')
    t2s, s0, t2ss, s0s, t2sG, s0G = model.fit_decay(catd,
                                                    tes,
                                                    mask,
                                                    masksum,
                                                    start_echo=1)

    # set a hard cap for the T2* map
    # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile
    cap_t2s = stats.scoreatpercentile(t2s.flatten(),
                                      99.5,
                                      interpolation_method='lower')
    LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10))
    t2s[t2s > cap_t2s * 10] = cap_t2s
    utils.filewrite(t2s, op.join(out_dir, 't2sv'), ref_img)
    utils.filewrite(s0, op.join(out_dir, 's0v'), ref_img)
    utils.filewrite(t2ss, op.join(out_dir, 't2ss'), ref_img)
    utils.filewrite(s0s, op.join(out_dir, 's0vs'), ref_img)
    utils.filewrite(t2sG, op.join(out_dir, 't2svG'), ref_img)
    utils.filewrite(s0G, op.join(out_dir, 's0vG'), ref_img)

    # optimally combine data
    OCcatd = model.make_optcom(catd, t2sG, tes, mask, combmode)

    # regress out global signal unless explicitly not desired
    if gscontrol:
        catd, OCcatd = model.gscontrol_raw(catd, OCcatd, n_echos, ref_img)

    if mixm is None:
        n_components, dd = decomposition.tedpca(catd,
                                                OCcatd,
                                                combmode,
                                                mask,
                                                t2s,
                                                t2sG,
                                                stabilize,
                                                ref_img,
                                                tes=tes,
                                                kdaw=kdaw,
                                                rdaw=rdaw,
                                                ste=ste)
        mmix_orig = decomposition.tedica(n_components,
                                         dd,
                                         conv,
                                         fixed_seed,
                                         cost=initcost,
                                         final_cost=finalcost,
                                         verbose=debug)
        np.savetxt(op.join(out_dir, '__meica_mix.1D'), mmix_orig)
        LGR.info('Making second component selection guess from ICA results')
        seldict, comptable, betas, mmix = model.fitmodels_direct(catd,
                                                                 mmix_orig,
                                                                 mask,
                                                                 t2s,
                                                                 t2sG,
                                                                 tes,
                                                                 combmode,
                                                                 ref_img,
                                                                 fout=fout,
                                                                 reindex=True)
        np.savetxt(op.join(out_dir, 'meica_mix.1D'), mmix)

        acc, rej, midk, empty = selection.selcomps(seldict,
                                                   mmix,
                                                   mask,
                                                   ref_img,
                                                   manacc,
                                                   n_echos,
                                                   t2s,
                                                   s0,
                                                   strict_mode=strict,
                                                   filecsdata=filecsdata)
    else:
        LGR.info('Using supplied mixing matrix from ICA')
        mmix_orig = np.loadtxt(op.join(out_dir, 'meica_mix.1D'))
        seldict, comptable, betas, mmix = model.fitmodels_direct(catd,
                                                                 mmix_orig,
                                                                 mask,
                                                                 t2s,
                                                                 t2sG,
                                                                 tes,
                                                                 combmode,
                                                                 ref_img,
                                                                 fout=fout)
        if ctab is None:
            acc, rej, midk, empty = selection.selcomps(seldict,
                                                       mmix,
                                                       mask,
                                                       ref_img,
                                                       manacc,
                                                       n_echos,
                                                       t2s,
                                                       s0,
                                                       filecsdata=filecsdata,
                                                       strict_mode=strict)
        else:
            acc, rej, midk, empty = utils.ctabsel(ctab)

    if len(acc) == 0:
        LGR.warning(
            'No BOLD components detected! Please check data and results!')

    utils.writeresults(OCcatd, mask, comptable, mmix, n_vols, acc, rej, midk,
                       empty, ref_img)
    utils.gscontrol_mmix(OCcatd, mmix, mask, acc, rej, midk, ref_img)
    if dne:
        utils.writeresults_echoes(catd, mmix, mask, acc, rej, midk, ref_img)
Ejemplo n.º 3
0
def tedpca(catd, OCcatd, combmode, mask, t2s, t2sG, stabilize,
           ref_img, tes, kdaw, rdaw, ste=0, mlepca=True):
    """
    Use principal components analysis (PCA) to identify and remove thermal
    noise from multi-echo data.

    Parameters
    ----------
    catd : (S x E x T) array_like
        Input functional data
    OCcatd : (S x T) array_like
        Optimally-combined time series data
    combmode : {'t2s', 'ste'} str
        How optimal combination of echos should be made, where 't2s' indicates
        using the method of Posse 1999 and 'ste' indicates using the method of
        Poser 2006
    mask : (S,) array_like
        Boolean mask array
    stabilize : bool
        Whether to attempt to stabilize convergence of ICA by returning
        dimensionally-reduced data from PCA and component selection.
    ref_img : str or img_like
        Reference image to dictate how outputs are saved to disk
    tes : list
        List of echo times associated with `catd`, in milliseconds
    kdaw : float
        Dimensionality augmentation weight for Kappa calculations
    rdaw : float
        Dimensionality augmentation weight for Rho calculations
    ste : int or list-of-int, optional
        Which echos to use in PCA. Values -1 and 0 are special, where a value
        of -1 will indicate using all the echos and 0 will indicate using the
        optimal combination of the echos. A list can be provided to indicate
        a subset of echos. Default: 0
    mlepca : bool, optional
        Whether to use the method originally explained in Minka, NIPS 2000 for
        guessing PCA dimensionality instead of a traditional SVD. Default: True

    Returns
    -------
    n_components : int
        Number of components retained from PCA decomposition
    dd : (S x E x T) :obj:`numpy.ndarray`
        Dimensionally-reduced functional data

    Notes
    -----
    ======================    =================================================
    Notation                  Meaning
    ======================    =================================================
    :math:`\\kappa`            Component pseudo-F statistic for TE-dependent
                              (BOLD) model.
    :math:`\\rho`              Component pseudo-F statistic for TE-independent
                              (artifact) model.
    :math:`v`                 Voxel
    :math:`V`                 Total number of voxels in mask
    :math:`\\zeta`             Something
    :math:`c`                 Component
    :math:`p`                 Something else
    ======================    =================================================

    Steps:

    1.  Variance normalize either multi-echo or optimally combined data,
        depending on settings.
    2.  Decompose normalized data using PCA or SVD.
    3.  Compute :math:`{\\kappa}` and :math:`{\\rho}`:

            .. math::
                {\\kappa}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,R_2^*}}{\sum {\\zeta}_{c,v}^p}

                {\\rho}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,S_0}}{\sum {\\zeta}_{c,v}^p}

    4.  Some other stuff. Something about elbows.
    5.  Classify components as thermal noise if they meet both of the
        following criteria:

            - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`.
            - Nonsignificant variance explained.
    """

    n_samp, n_echos, n_vols = catd.shape
    ste = np.array([int(ee) for ee in str(ste).split(',')])

    if len(ste) == 1 and ste[0] == -1:
        LGR.info('Computing PCA of optimally combined multi-echo data')
        d = OCcatd[utils.make_min_mask(OCcatd[:, np.newaxis, :])][:, np.newaxis, :]
    elif len(ste) == 1 and ste[0] == 0:
        LGR.info('Computing PCA of spatially concatenated multi-echo data')
        d = catd[mask].astype('float64')
    else:
        LGR.info('Computing PCA of echo #%s' % ','.join([str(ee) for ee in ste]))
        d = np.stack([catd[mask, ee] for ee in ste - 1], axis=1).astype('float64')

    eim = np.squeeze(eimask(d))
    d = np.squeeze(d[eim])

    dz = ((d.T - d.T.mean(axis=0)) / d.T.std(axis=0)).T  # var normalize ts
    dz = (dz - dz.mean()) / dz.std()  # var normalize everything

    if not op.exists('pcastate.pkl'):
        # do PC dimension selection and get eigenvalue cutoff
        if mlepca:
            from sklearn.decomposition import PCA
            ppca = PCA(n_components='mle', svd_solver='full')
            ppca.fit(dz)
            v = ppca.components_
            s = ppca.explained_variance_
            u = np.dot(np.dot(dz, v.T), np.diag(1. / s))
        else:
            u, s, v = np.linalg.svd(dz, full_matrices=0)

        # actual variance explained (normalized)
        sp = s / s.sum()
        eigelb = getelbow_mod(sp, val=True)

        spdif = np.abs(np.diff(sp))
        spdifh = spdif[(len(spdif)//2):]
        spdthr = np.mean([spdifh.max(), spdif.min()])
        spmin = sp[(len(spdif)//2) + np.arange(len(spdifh))[spdifh >= spdthr][0] + 1]
        spcum = np.cumsum(sp)

        # Compute K and Rho for PCA comps
        eimum = np.atleast_2d(eim)
        eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1])
        eimum = eimum.prod(axis=1)
        o = np.zeros((mask.shape[0], *eimum.shape[1:]))
        o[mask] = eimum
        eimum = np.squeeze(o).astype(bool)

        vTmix = v.T
        vTmixN = ((vTmix.T - vTmix.T.mean(0)) / vTmix.T.std(0)).T
        LGR.info('Making initial component selection guess from PCA results')
        _, ctb, betasv, v_T = model.fitmodels_direct(catd, v.T, eimum, t2s, t2sG,
                                                     tes, combmode, ref_img,
                                                     mmixN=vTmixN, full_sel=False)
        ctb = ctb[ctb[:, 0].argsort(), :]
        ctb = np.vstack([ctb.T[:3], sp]).T

        # Save state
        fname = op.abspath('pcastate.pkl')
        LGR.info('Saving PCA results to: {}'.format(fname))
        pcastate = {'u': u, 's': s, 'v': v, 'ctb': ctb,
                    'eigelb': eigelb, 'spmin': spmin, 'spcum': spcum}
        try:
            with open(fname, 'wb') as handle:
                pickle.dump(pcastate, handle)
        except TypeError:
            LGR.warning('Could not save PCA solution')

    else:  # if loading existing state
        LGR.info('Loading PCA from: {}'.format('pcastate.pkl'))
        with open('pcastate.pkl', 'rb') as handle:
            pcastate = pickle.load(handle)
        u, s, v = pcastate['u'], pcastate['s'], pcastate['v']
        ctb, eigelb = pcastate['ctb'], pcastate['eigelb']
        spmin, spcum = pcastate['spmin'], pcastate['spcum']

    np.savetxt('comp_table_pca.txt', ctb[ctb[:, 1].argsort(), :][::-1])
    np.savetxt('mepca_mix.1D', v[ctb[:, 1].argsort()[::-1], :].T)

    kappas = ctb[ctb[:, 1].argsort(), 1]
    rhos = ctb[ctb[:, 2].argsort(), 2]
    fmin, fmid, fmax = utils.getfbounds(n_echos)
    kappa_thr = np.average(sorted([fmin, getelbow_mod(kappas, val=True)/2, fmid]),
                           weights=[kdaw, 1, 1])
    rho_thr = np.average(sorted([fmin, getelbow_cons(rhos, val=True)/2, fmid]),
                         weights=[rdaw, 1, 1])
    if int(kdaw) == -1:
        kappas_lim = kappas[utils.andb([kappas < fmid, kappas > fmin]) == 2]
        kappa_thr = kappas_lim[getelbow_mod(kappas_lim)]
        rhos_lim = rhos[utils.andb([rhos < fmid, rhos > fmin]) == 2]
        rho_thr = rhos_lim[getelbow_mod(rhos_lim)]
        stabilize = True
    if int(kdaw) != -1 and int(rdaw) == -1:
        rhos_lim = rhos[utils.andb([rhos < fmid, rhos > fmin]) == 2]
        rho_thr = rhos_lim[getelbow_mod(rhos_lim)]

    is_hik = np.array(ctb[:, 1] > kappa_thr, dtype=np.int)
    is_hir = np.array(ctb[:, 2] > rho_thr, dtype=np.int)
    is_hie = np.array(ctb[:, 3] > eigelb, dtype=np.int)
    is_his = np.array(ctb[:, 3] > spmin, dtype=np.int)
    is_not_fmax1 = np.array(ctb[:, 1] != F_MAX, dtype=np.int)
    is_not_fmax2 = np.array(ctb[:, 2] != F_MAX, dtype=np.int)
    pcscore = (is_hik + is_hir + is_hie) * is_his * is_not_fmax1 * is_not_fmax2
    if stabilize:
        temp7 = np.array(spcum < 0.95, dtype=np.int)
        temp8 = np.array(ctb[:, 2] > fmin, dtype=np.int)
        temp9 = np.array(ctb[:, 1] > fmin, dtype=np.int)
        pcscore = pcscore * temp7 * temp8 * temp9

    pcsel = pcscore > 0
    dd = u.dot(np.diag(s*np.array(pcsel, dtype=np.int))).dot(v)

    n_components = s[pcsel].shape[0]
    LGR.info('Selected {0} components with Kappa threshold: {1:.02f}, '
             'Rho threshold: {2:.02f}'.format(n_components, kappa_thr, rho_thr))

    dd = stats.zscore(dd.T, axis=0).T  # variance normalize timeseries
    dd = stats.zscore(dd, axis=None)  # variance normalize everything

    return n_components, dd
Ejemplo n.º 4
0
def tedana_workflow(data,
                    tes,
                    mask=None,
                    mixm=None,
                    ctab=None,
                    manacc=None,
                    tedort=False,
                    gscontrol=None,
                    tedpca='mle',
                    source_tes=-1,
                    combmode='t2s',
                    verbose=False,
                    stabilize=False,
                    out_dir='.',
                    fixed_seed=42,
                    maxit=500,
                    maxrestart=10,
                    debug=False,
                    quiet=False,
                    png=False,
                    png_cmap='coolwarm'):
    """
    Run the "canonical" TE-Dependent ANAlysis workflow.

    Parameters
    ----------
    data : :obj:`str` or :obj:`list` of :obj:`str`
        Either a single z-concatenated file (single-entry list or str) or a
        list of echo-specific files, in ascending order.
    tes : :obj:`list`
        List of echo times associated with data in milliseconds.
    mask : :obj:`str`, optional
        Binary mask of voxels to include in TE Dependent ANAlysis. Must be
        spatially aligned with `data`. If an explicit mask is not provided,
        then Nilearn's compute_epi_mask function will be used to derive a mask
        from the first echo's data.
    mixm : :obj:`str`, optional
        File containing mixing matrix. If not provided, ME-PCA and ME-ICA are
        done.
    ctab : :obj:`str`, optional
        File containing component table from which to extract pre-computed
        classifications.
    manacc : :obj:`list`, :obj:`str`, or None, optional
        List of manually accepted components. Can be a list of the components,
        a comma-separated string with component numbers, or None. Default is
        None.
    tedort : :obj:`bool`, optional
        Orthogonalize rejected components w.r.t. accepted ones prior to
        denoising. Default is False.
    gscontrol : {None, 't1c', 'gsr'} or :obj:`list`, optional
        Perform additional denoising to remove spatially diffuse noise. Default
        is None.
    tedpca : {'mle', 'kundu', 'kundu-stabilize'}, optional
        Method with which to select components in TEDPCA. Default is 'mle'.
    source_tes : :obj:`int`, optional
        Source TEs for models. 0 for all, -1 for optimal combination.
        Default is -1.
    combmode : {'t2s'}, optional
        Combination scheme for TEs: 't2s' (Posse 1999, default).
    verbose : :obj:`bool`, optional
        Generate intermediate and additional files. Default is False.
    png : obj:'bool', optional
        Generate simple plots and figures. Default is false.
    png_cmap : obj:'str', optional
            Name of a matplotlib colormap to be used when generating figures.
            --png must still be used to request figures. Default is 'coolwarm'
    out_dir : :obj:`str`, optional
        Output directory.

    Other Parameters
    ----------------
    fixed_seed : :obj:`int`, optional
        Value passed to ``mdp.numx_rand.seed()``.
        Set to a positive integer value for reproducible ICA results;
        otherwise, set to -1 for varying results across calls.
    maxit : :obj:`int`, optional
        Maximum number of iterations for ICA. Default is 500.
    maxrestart : :obj:`int`, optional
        Maximum number of attempts for ICA. If ICA fails to converge, the
        fixed seed will be updated and ICA will be run again. If convergence
        is achieved before maxrestart attempts, ICA will finish early.
        Default is 10.
    debug : :obj:`bool`, optional
        Whether to run in debugging mode or not. Default is False.
    quiet : :obj:`bool`, optional
        If True, suppresses logging/printing of messages. Default is False.

    Notes
    -----
    This workflow writes out several files. For a complete list of the files
    generated by this workflow, please visit
    https://tedana.readthedocs.io/en/latest/outputs.html
    """
    out_dir = op.abspath(out_dir)
    if not op.isdir(out_dir):
        os.mkdir(out_dir)

    if debug and not quiet:
        formatter = logging.Formatter(
            '%(asctime)s\t%(name)-12s\t%(levelname)-8s\t%(message)s',
            datefmt='%Y-%m-%dT%H:%M:%S')
        fh = logging.FileHandler(
            op.join(
                out_dir,
                'runlog-{0}.tsv'.format(datetime.now().isoformat().replace(
                    ':', '.'))))
        fh.setFormatter(formatter)
        logging.basicConfig(level=logging.DEBUG,
                            handlers=[fh, logging.StreamHandler()])
    elif quiet:
        logging.basicConfig(level=logging.WARNING)
    else:
        logging.basicConfig(level=logging.INFO)

    LGR.info('Using output directory: {}'.format(out_dir))

    # ensure tes are in appropriate format
    tes = [float(te) for te in tes]
    n_echos = len(tes)

    # Coerce gscontrol to list
    if not isinstance(gscontrol, list):
        gscontrol = [gscontrol]

    # coerce data to samples x echos x time array
    if isinstance(data, str):
        data = [data]

    LGR.info('Loading input data: {}'.format([f for f in data]))
    catd, ref_img = io.load_data(data, n_echos=n_echos)
    n_samp, n_echos, n_vols = catd.shape
    LGR.debug('Resulting data shape: {}'.format(catd.shape))

    if mixm is not None and op.isfile(mixm):
        mixm = op.abspath(mixm)
        # Allow users to re-run on same folder
        if mixm != op.join(out_dir, 'meica_mix.1D'):
            shutil.copyfile(mixm, op.join(out_dir, 'meica_mix.1D'))
            shutil.copyfile(mixm, op.join(out_dir, op.basename(mixm)))
    elif mixm is not None:
        raise IOError('Argument "mixm" must be an existing file.')

    if ctab is not None and op.isfile(ctab):
        ctab = op.abspath(ctab)
        # Allow users to re-run on same folder
        if ctab != op.join(out_dir, 'comp_table_ica.txt'):
            shutil.copyfile(ctab, op.join(out_dir, 'comp_table_ica.txt'))
            shutil.copyfile(ctab, op.join(out_dir, op.basename(ctab)))
    elif ctab is not None:
        raise IOError('Argument "ctab" must be an existing file.')

    if isinstance(manacc, str):
        manacc = [int(comp) for comp in manacc.split(',')]

    if ctab and not mixm:
        LGR.warning('Argument "ctab" requires argument "mixm".')
        ctab = None
    elif ctab and (manacc is None):
        LGR.warning('Argument "ctab" requires argument "manacc".')
        ctab = None
    elif manacc is not None and not mixm:
        LGR.warning('Argument "manacc" requires argument "mixm".')
        manacc = None

    if mask is None:
        LGR.info('Computing EPI mask from first echo')
        first_echo_img = io.new_nii_like(ref_img, catd[:, 0, :])
        mask = compute_epi_mask(first_echo_img)
    else:
        # TODO: add affine check
        LGR.info('Using user-defined mask')

    mask, masksum = utils.make_adaptive_mask(catd, mask=mask, getsum=True)
    LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp))
    if verbose:
        io.filewrite(masksum, op.join(out_dir, 'adaptive_mask.nii'), ref_img)

    os.chdir(out_dir)

    LGR.info('Computing T2* map')
    t2s, s0, t2ss, s0s, t2sG, s0G = decay.fit_decay(catd, tes, mask, masksum)

    # set a hard cap for the T2* map
    # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile
    cap_t2s = stats.scoreatpercentile(t2s.flatten(),
                                      99.5,
                                      interpolation_method='lower')
    LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10))
    t2s[t2s > cap_t2s * 10] = cap_t2s
    io.filewrite(t2s, op.join(out_dir, 't2sv.nii'), ref_img)
    io.filewrite(s0, op.join(out_dir, 's0v.nii'), ref_img)

    if verbose:
        io.filewrite(t2ss, op.join(out_dir, 't2ss.nii'), ref_img)
        io.filewrite(s0s, op.join(out_dir, 's0vs.nii'), ref_img)
        io.filewrite(t2sG, op.join(out_dir, 't2svG.nii'), ref_img)
        io.filewrite(s0G, op.join(out_dir, 's0vG.nii'), ref_img)

    # optimally combine data
    data_oc = combine.make_optcom(catd, tes, mask, t2s=t2sG, combmode=combmode)

    # regress out global signal unless explicitly not desired
    if 'gsr' in gscontrol:
        catd, data_oc = gsc.gscontrol_raw(catd, data_oc, n_echos, ref_img)

    if mixm is None:
        # Identify and remove thermal noise from data
        dd, n_components = decomposition.tedpca(catd,
                                                data_oc,
                                                combmode,
                                                mask,
                                                t2s,
                                                t2sG,
                                                ref_img,
                                                tes=tes,
                                                method=tedpca,
                                                source_tes=source_tes,
                                                kdaw=10.,
                                                rdaw=1.,
                                                out_dir=out_dir,
                                                verbose=verbose)
        mmix_orig = decomposition.tedica(dd, n_components, fixed_seed, maxit,
                                         maxrestart)

        if verbose:
            np.savetxt(op.join(out_dir, '__meica_mix.1D'), mmix_orig)
            if source_tes == -1:
                io.filewrite(utils.unmask(dd, mask),
                             op.join(out_dir, 'ts_OC_whitened.nii'), ref_img)

        LGR.info('Making second component selection guess from ICA results')
        # Estimate betas and compute selection metrics for mixing matrix
        # generated from dimensionally reduced data using full data (i.e., data
        # with thermal noise)
        seldict, comptable, betas, mmix = model.fitmodels_direct(
            catd,
            mmix_orig,
            mask,
            t2s,
            t2sG,
            tes,
            combmode,
            ref_img,
            reindex=True,
            label='meica_',
            out_dir=out_dir,
            verbose=verbose)
        np.savetxt(op.join(out_dir, 'meica_mix.1D'), mmix)

        comptable = selection.selcomps(seldict, comptable, mmix, manacc,
                                       n_echos)
    elif ctab is not None and manacc is not None:
        LGR.info('Using supplied ICA mixing matrix, component table, and '
                 'accepted components')
        mmix = np.loadtxt(op.join(out_dir, 'meica_mix.1D'))
        comptable = pd.read_csv(ctab, sep='\t', index_col='component')
        comptable = selection.selcomps({}, comptable, mmix, manacc, n_echos)
    else:
        LGR.info('Using supplied mixing matrix from ICA')
        mmix_orig = np.loadtxt(op.join(out_dir, 'meica_mix.1D'))
        seldict, comptable, betas, mmix = model.fitmodels_direct(
            catd,
            mmix_orig,
            mask,
            t2s,
            t2sG,
            tes,
            combmode,
            ref_img,
            label='meica_',
            out_dir=out_dir,
            verbose=verbose)
        comptable = selection.selcomps(seldict, comptable, mmix, manacc,
                                       n_echos)

    comptable.to_csv(op.join(out_dir, 'comp_table_ica.txt'),
                     sep='\t',
                     index=True,
                     index_label='component',
                     float_format='%.6f')

    if comptable[comptable.classification == 'accepted'].shape[0] == 0:
        LGR.warning('No BOLD components detected! Please check data and '
                    'results!')

    mmix_orig = mmix.copy()
    if tedort:
        acc_idx = comptable.loc[~comptable.classification.str.
                                contains('rejected')].index.values
        rej_idx = comptable.loc[comptable.classification.str.contains(
            'rejected')].index.values
        acc_ts = mmix[:, acc_idx]
        rej_ts = mmix[:, rej_idx]
        betas = np.linalg.lstsq(acc_ts, rej_ts, rcond=None)[0]
        pred_rej_ts = np.dot(acc_ts, betas)
        resid = rej_ts - pred_rej_ts
        mmix[:, rej_idx] = resid
        np.savetxt(op.join(out_dir, 'meica_mix_orth.1D'), mmix)

    io.writeresults(data_oc,
                    mask=mask,
                    comptable=comptable,
                    mmix=mmix,
                    n_vols=n_vols,
                    ref_img=ref_img)

    if 't1c' in gscontrol:
        LGR.info('Performing T1c global signal regression to remove spatially '
                 'diffuse noise')
        gsc.gscontrol_mmix(data_oc, mmix, mask, comptable, ref_img)

    if verbose:
        io.writeresults_echoes(catd, mmix, mask, comptable, ref_img)

    if png:
        LGR.info('Making figures folder with static component maps and '
                 'timecourse plots.')
        # make figure folder first
        if not op.isdir(op.join(out_dir, 'figures')):
            os.mkdir(op.join(out_dir, 'figures'))

        viz.write_comp_figs(data_oc,
                            mask=mask,
                            comptable=comptable,
                            mmix=mmix_orig,
                            ref_img=ref_img,
                            out_dir=op.join(out_dir, 'figures'),
                            png_cmap=png_cmap)

        LGR.info('Making Kappa vs Rho scatter plot')
        viz.write_kappa_scatter(comptable=comptable,
                                out_dir=op.join(out_dir, 'figures'))

        LGR.info('Making overall summary figure')
        viz.write_summary_fig(comptable=comptable,
                              out_dir=op.join(out_dir, 'figures'))

    LGR.info('Workflow completed')
    for handler in logging.root.handlers[:]:
        logging.root.removeHandler(handler)
Ejemplo n.º 5
0
def tedana(data, tes, mixm=None, ctab=None, manacc=None, strict=False,
           gscontrol=True, kdaw=10., rdaw=1., conv=2.5e-5, ste=-1,
           combmode='t2s', dne=False, initcost='tanh', finalcost='tanh',
           stabilize=False, fout=False, filecsdata=False, label=None,
           fixed_seed=42, debug=False, quiet=False):
    """
    Run the "canonical" TE-Dependent ANAlysis workflow.

    Parameters
    ----------
    data : :obj:`list` of :obj:`str`
        Either a single z-concatenated file (single-entry list) or a
        list of echo-specific files, in ascending order.
    tes : :obj:`list`
        List of echo times associated with data in milliseconds.
    mixm : :obj:`str`, optional
        File containing mixing matrix. If not provided, ME-PCA and ME-ICA are
        done.
    ctab : :obj:`str`, optional
        File containing component table from which to extract pre-computed
        classifications.
    manacc : :obj:`str`, optional
        Comma separated list of manually accepted components in string form.
        Default is None.
    strict : :obj:`bool`, optional
        Ignore low-variance ambiguous components. Default is False.
    gscontrol : :obj:`bool`, optional
        Control global signal using spatial approach. Default is True.
    kdaw : :obj:`float`, optional
        Dimensionality augmentation weight (Kappa). Default is 10.
        -1 for low-dimensional ICA.
    rdaw : :obj:`float`, optional
        Dimensionality augmentation weight (Rho). Default is 1.
        -1 for low-dimensional ICA.
    conv : :obj:`float`, optional
        Convergence limit. Default is 2.5e-5.
    ste : :obj:`int`, optional
        Source TEs for models. 0 for all, -1 for optimal combination.
        Default is -1.
    combmode : {'t2s', 'ste'}, optional
        Combination scheme for TEs: 't2s' (Posse 1999, default), 'ste' (Poser).
    dne : :obj:`bool`, optional
        Denoise each TE dataset separately. Default is False.
    initcost : {'tanh', 'pow3', 'gaus', 'skew'}, optional
        Initial cost function for ICA. Default is 'tanh'.
    finalcost : {'tanh', 'pow3', 'gaus', 'skew'}, optional
        Final cost function. Default is 'tanh'.
    stabilize : :obj:`bool`, optional
        Stabilize convergence by reducing dimensionality, for low quality data.
        Default is False.
    fout : :obj:`bool`, optional
        Save output TE-dependence Kappa/Rho SPMs. Default is False.
    filecsdata : :obj:`bool`, optional
        Save component selection data to file. Default is False.
    label : :obj:`str` or :obj:`None`, optional
        Label for output directory. Default is None.
    fixed_seed : :obj:`int`, optional
        Seeded value for ICA, for reproducibility.
    """

    # ensure tes are in appropriate format
    tes = [float(te) for te in tes]
    n_echos = len(tes)

    # coerce data to samples x echos x time array
    LGR.info('Loading input data: {}'.format([op.abspath(f) for f in data]))
    catd, ref_img = utils.load_data(data, n_echos=n_echos)
    n_samp, n_echos, n_vols = catd.shape
    LGR.debug('Resulting data shape: {}'.format(catd.shape))

    if fout:
        fout = ref_img
    else:
        fout = None

    kdaw, rdaw = float(kdaw), float(rdaw)

    if label is not None:
        out_dir = 'TED.{0}'.format(label)
    else:
        out_dir = 'TED'
    out_dir = op.abspath(out_dir)
    if not op.isdir(out_dir):
        LGR.info('Creating output directory: {}'.format(out_dir))
        os.mkdir(out_dir)
    else:
        LGR.info('Using output directory: {}'.format(out_dir))

    if mixm is not None and op.isfile(mixm):
        shutil.copyfile(mixm, op.join(out_dir, 'meica_mix.1D'))
        shutil.copyfile(mixm, op.join(out_dir, op.basename(mixm)))
    elif mixm is not None:
        raise IOError('Argument "mixm" must be an existing file.')

    if ctab is not None and op.isfile(ctab):
        shutil.copyfile(ctab, op.join(out_dir, 'comp_table.txt'))
        shutil.copyfile(ctab, op.join(out_dir, op.basename(ctab)))
    elif ctab is not None:
        raise IOError('Argument "ctab" must be an existing file.')

    os.chdir(out_dir)

    LGR.info('Computing adapative mask')
    mask, masksum = utils.make_adaptive_mask(catd, minimum=False, getsum=True)
    LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp))

    LGR.info('Computing T2* map')
    t2s, s0, t2ss, s0s, t2sG, s0G = model.fit_decay(catd, tes, mask, masksum,
                                                    start_echo=1)

    # set a hard cap for the T2* map
    # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile
    cap_t2s = stats.scoreatpercentile(t2s.flatten(), 99.5,
                                      interpolation_method='lower')
    LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10))
    t2s[t2s > cap_t2s * 10] = cap_t2s
    utils.filewrite(t2s, op.join(out_dir, 't2sv'), ref_img)
    utils.filewrite(s0, op.join(out_dir, 's0v'), ref_img)
    utils.filewrite(t2ss, op.join(out_dir, 't2ss'), ref_img)
    utils.filewrite(s0s, op.join(out_dir, 's0vs'), ref_img)
    utils.filewrite(t2sG, op.join(out_dir, 't2svG'), ref_img)
    utils.filewrite(s0G, op.join(out_dir, 's0vG'), ref_img)

    # optimally combine data
    OCcatd = model.make_optcom(catd, t2sG, tes, mask, combmode)

    # regress out global signal unless explicitly not desired
    if gscontrol:
        catd, OCcatd = model.gscontrol_raw(catd, OCcatd, n_echos, ref_img)

    if mixm is None:
        n_components, dd = decomposition.tedpca(catd, OCcatd, combmode, mask, t2s, t2sG,
                                                stabilize, ref_img,
                                                tes=tes, kdaw=kdaw, rdaw=rdaw, ste=ste)
        mmix_orig = decomposition.tedica(n_components, dd, conv, fixed_seed, cost=initcost,
                                         final_cost=finalcost, verbose=debug)
        np.savetxt(op.join(out_dir, '__meica_mix.1D'), mmix_orig)
        LGR.info('Making second component selection guess from ICA results')
        seldict, comptable, betas, mmix = model.fitmodels_direct(catd, mmix_orig,
                                                                 mask, t2s, t2sG,
                                                                 tes, combmode,
                                                                 ref_img,
                                                                 fout=fout,
                                                                 reindex=True)
        np.savetxt(op.join(out_dir, 'meica_mix.1D'), mmix)

        acc, rej, midk, empty = selection.selcomps(seldict, mmix, mask, ref_img, manacc,
                                                   n_echos, t2s, s0, strict_mode=strict,
                                                   filecsdata=filecsdata)
    else:
        LGR.info('Using supplied mixing matrix from ICA')
        mmix_orig = np.loadtxt(op.join(out_dir, 'meica_mix.1D'))
        seldict, comptable, betas, mmix = model.fitmodels_direct(catd, mmix_orig,
                                                                 mask, t2s, t2sG,
                                                                 tes, combmode,
                                                                 ref_img,
                                                                 fout=fout)
        if ctab is None:
            acc, rej, midk, empty = selection.selcomps(seldict, mmix, mask,
                                                       ref_img, manacc,
                                                       n_echos, t2s, s0,
                                                       filecsdata=filecsdata,
                                                       strict_mode=strict)
        else:
            acc, rej, midk, empty = utils.ctabsel(ctab)

    if len(acc) == 0:
        LGR.warning('No BOLD components detected! Please check data and results!')

    utils.writeresults(OCcatd, mask, comptable, mmix, n_vols, acc, rej, midk, empty, ref_img)
    utils.gscontrol_mmix(OCcatd, mmix, mask, acc, rej, midk, ref_img)
    if dne:
        utils.writeresults_echoes(catd, mmix, mask, acc, rej, midk, ref_img)
Ejemplo n.º 6
0
def tedana_workflow(data,
                    tes,
                    mask=None,
                    mixm=None,
                    ctab=None,
                    manacc=None,
                    strict=False,
                    gscontrol=True,
                    kdaw=10.,
                    rdaw=1.,
                    conv=2.5e-5,
                    ste=-1,
                    combmode='t2s',
                    dne=False,
                    cost='logcosh',
                    stabilize=False,
                    filecsdata=False,
                    wvpca=False,
                    label=None,
                    fixed_seed=42,
                    debug=False,
                    quiet=False):
    """
    Run the "canonical" TE-Dependent ANAlysis workflow.

    Parameters
    ----------
    data : :obj:`str` or :obj:`list` of :obj:`str`
        Either a single z-concatenated file (single-entry list or str) or a
        list of echo-specific files, in ascending order.
    tes : :obj:`list`
        List of echo times associated with data in milliseconds.
    mask : :obj:`str`, optional
        Binary mask of voxels to include in TE Dependent ANAlysis. Must be spatially
        aligned with `data`.
    mixm : :obj:`str`, optional
        File containing mixing matrix. If not provided, ME-PCA and ME-ICA are
        done.
    ctab : :obj:`str`, optional
        File containing component table from which to extract pre-computed
        classifications.
    manacc : :obj:`str`, optional
        Comma separated list of manually accepted components in string form.
        Default is None.
    strict : :obj:`bool`, optional
        Ignore low-variance ambiguous components. Default is False.
    gscontrol : :obj:`bool`, optional
        Control global signal using spatial approach. Default is True.
    kdaw : :obj:`float`, optional
        Dimensionality augmentation weight (Kappa). Default is 10.
        -1 for low-dimensional ICA.
    rdaw : :obj:`float`, optional
        Dimensionality augmentation weight (Rho). Default is 1.
        -1 for low-dimensional ICA.
    conv : :obj:`float`, optional
        Convergence limit. Default is 2.5e-5.
    ste : :obj:`int`, optional
        Source TEs for models. 0 for all, -1 for optimal combination.
        Default is -1.
    combmode : {'t2s', 'ste'}, optional
        Combination scheme for TEs: 't2s' (Posse 1999, default), 'ste' (Poser).
    dne : :obj:`bool`, optional
        Denoise each TE dataset separately. Default is False.
    cost : {'logcosh', 'exp', 'cube'} str, optional
        Cost function for ICA
    stabilize : :obj:`bool`, optional
        Stabilize convergence by reducing dimensionality, for low quality data.
        Default is False.
    filecsdata : :obj:`bool`, optional
        Save component selection data to file. Default is False.
    wvpca : :obj:`bool`, optional
        Whether or not to perform PCA on wavelet-transformed data.
        Default is False.
    label : :obj:`str` or :obj:`None`, optional
        Label for output directory. Default is None.

    Other Parameters
    ----------------
    fixed_seed : :obj:`int`, optional
        Value passed to ``mdp.numx_rand.seed()``.
        Set to a positive integer value for reproducible ICA results;
        otherwise, set to -1 for varying results across calls.
    debug : :obj:`bool`, optional
        Whether to run in debugging mode or not. Default is False.
    quiet : :obj:`bool`, optional
        If True, suppresses logging/printing of messages. Default is False.

    Notes
    -----
    This workflow writes out several files, which are written out to a folder
    named TED.[ref_label].[label] if ``label`` is provided and TED.[ref_label]
    if not. ``ref_label`` is determined based on the name of the first ``data``
    file. For a complete list of the files generated by this workflow, please
    visit https://tedana.readthedocs.io/en/latest/outputs.html
    """

    # ensure tes are in appropriate format
    tes = [float(te) for te in tes]
    n_echos = len(tes)

    # coerce data to samples x echos x time array
    if isinstance(data, str):
        data = [data]

    LGR.info('Loading input data: {}'.format([f for f in data]))
    catd, ref_img = io.load_data(data, n_echos=n_echos)
    n_samp, n_echos, n_vols = catd.shape
    LGR.debug('Resulting data shape: {}'.format(catd.shape))

    kdaw, rdaw = float(kdaw), float(rdaw)

    try:
        ref_label = op.basename(ref_img).split('.')[0]
    except (TypeError, AttributeError):
        ref_label = op.basename(str(data[0])).split('.')[0]

    if label is not None:
        out_dir = 'TED.{0}.{1}'.format(ref_label, label)
    else:
        out_dir = 'TED.{0}'.format(ref_label)
    out_dir = op.abspath(out_dir)
    if not op.isdir(out_dir):
        LGR.info('Creating output directory: {}'.format(out_dir))
        os.mkdir(out_dir)
    else:
        LGR.info('Using output directory: {}'.format(out_dir))

    if mixm is not None and op.isfile(mixm):
        shutil.copyfile(mixm, op.join(out_dir, 'meica_mix.1D'))
        shutil.copyfile(mixm, op.join(out_dir, op.basename(mixm)))
    elif mixm is not None:
        raise IOError('Argument "mixm" must be an existing file.')

    if ctab is not None and op.isfile(ctab):
        shutil.copyfile(ctab, op.join(out_dir, 'comp_table_ica.txt'))
        shutil.copyfile(ctab, op.join(out_dir, op.basename(ctab)))
    elif ctab is not None:
        raise IOError('Argument "ctab" must be an existing file.')

    os.chdir(out_dir)

    if mask is None:
        LGR.info('Computing adaptive mask')
    else:
        # TODO: add affine check
        LGR.info('Using user-defined mask')
    mask, masksum = utils.make_adaptive_mask(catd,
                                             mask=mask,
                                             minimum=False,
                                             getsum=True)
    LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp))

    LGR.info('Computing T2* map')
    t2s, s0, t2ss, s0s, t2sG, s0G = decay.fit_decay(catd, tes, mask, masksum)

    # set a hard cap for the T2* map
    # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile
    cap_t2s = stats.scoreatpercentile(t2s.flatten(),
                                      99.5,
                                      interpolation_method='lower')
    LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10))
    t2s[t2s > cap_t2s * 10] = cap_t2s
    io.filewrite(t2s, op.join(out_dir, 't2sv.nii'), ref_img)
    io.filewrite(s0, op.join(out_dir, 's0v.nii'), ref_img)
    io.filewrite(t2ss, op.join(out_dir, 't2ss.nii'), ref_img)
    io.filewrite(s0s, op.join(out_dir, 's0vs.nii'), ref_img)
    io.filewrite(t2sG, op.join(out_dir, 't2svG.nii'), ref_img)
    io.filewrite(s0G, op.join(out_dir, 's0vG.nii'), ref_img)

    # optimally combine data
    data_oc = combine.make_optcom(catd, tes, mask, t2s=t2sG, combmode=combmode)

    # regress out global signal unless explicitly not desired
    if gscontrol:
        catd, data_oc = model.gscontrol_raw(catd, data_oc, n_echos, ref_img)

    if mixm is None:
        # Identify and remove thermal noise from data
        n_components, dd = decomposition.tedpca(catd,
                                                data_oc,
                                                combmode,
                                                mask,
                                                t2s,
                                                t2sG,
                                                stabilize,
                                                ref_img,
                                                tes=tes,
                                                kdaw=kdaw,
                                                rdaw=rdaw,
                                                ste=ste,
                                                wvpca=wvpca)
        mmix_orig, fixed_seed = decomposition.tedica(n_components,
                                                     dd,
                                                     conv,
                                                     fixed_seed,
                                                     cost=cost)
        np.savetxt(op.join(out_dir, '__meica_mix.1D'), mmix_orig)

        LGR.info('Making second component selection guess from ICA results')
        # Estimate betas and compute selection metrics for mixing matrix
        # generated from dimensionally reduced data using full data (i.e., data
        # with thermal noise)
        seldict, comptable, betas, mmix = model.fitmodels_direct(catd,
                                                                 mmix_orig,
                                                                 mask,
                                                                 t2s,
                                                                 t2sG,
                                                                 tes,
                                                                 combmode,
                                                                 ref_img,
                                                                 reindex=True)
        np.savetxt(op.join(out_dir, 'meica_mix.1D'), mmix)

        comptable = selection.selcomps(seldict, comptable, mmix, manacc,
                                       n_echos)
    else:
        LGR.info('Using supplied mixing matrix from ICA')
        mmix_orig = np.loadtxt(op.join(out_dir, 'meica_mix.1D'))
        seldict, comptable, betas, mmix = model.fitmodels_direct(
            catd, mmix_orig, mask, t2s, t2sG, tes, combmode, ref_img)
        if ctab is None:
            comptable = selection.selcomps(seldict, comptable, mmix, manacc,
                                           n_echos)
        else:
            comptable = pd.read_csv(ctab, sep='\t', index_col='component')

    comptable.to_csv(op.join(out_dir, 'comp_table_ica.txt'),
                     sep='\t',
                     index=True,
                     index_label='component',
                     float_format='%.6f')
    if 'component' not in comptable.columns:
        comptable['component'] = comptable.index
    acc = comptable.loc[comptable['classification'] == 'accepted', 'component']
    rej = comptable.loc[comptable['classification'] == 'rejected', 'component']
    midk = comptable.loc[comptable['classification'] == 'midk', 'component']
    ign = comptable.loc[comptable['classification'] == 'ignored', 'component']
    if len(acc) == 0:
        LGR.warning('No BOLD components detected! Please check data and '
                    'results!')

    io.writeresults(data_oc,
                    mask=mask,
                    comptable=comptable,
                    mmix=mmix,
                    n_vols=n_vols,
                    fixed_seed=fixed_seed,
                    acc=acc,
                    rej=rej,
                    midk=midk,
                    empty=ign,
                    ref_img=ref_img)
    io.gscontrol_mmix(data_oc, mmix, mask, comptable, ref_img)
    if dne:
        io.writeresults_echoes(catd, mmix, mask, acc, rej, midk, ref_img)
Ejemplo n.º 7
0
def tedpca(catd,
           OCcatd,
           combmode,
           mask,
           t2s,
           t2sG,
           ref_img,
           tes,
           method='mle',
           ste=-1,
           kdaw=10.,
           rdaw=1.,
           wvpca=False,
           verbose=False):
    """
    Use principal components analysis (PCA) to identify and remove thermal
    noise from multi-echo data.

    Parameters
    ----------
    catd : (S x E x T) array_like
        Input functional data
    OCcatd : (S x T) array_like
        Optimally combined time series data
    combmode : {'t2s', 'ste'} str
        How optimal combination of echos should be made, where 't2s' indicates
        using the method of Posse 1999 and 'ste' indicates using the method of
        Poser 2006
    mask : (S,) array_like
        Boolean mask array
    ref_img : :obj:`str` or img_like
        Reference image to dictate how outputs are saved to disk
    tes : :obj:`list`
        List of echo times associated with `catd`, in milliseconds
    kdaw : :obj:`float`
        Dimensionality augmentation weight for Kappa calculations
    rdaw : :obj:`float`
        Dimensionality augmentation weight for Rho calculations
    method : {'mle', 'kundu', 'kundu-stabilize'}, optional
        Method with which to select components in TEDPCA. Default is 'mle'.
    ste : :obj:`int` or :obj:`list` of :obj:`int`, optional
        Which echos to use in PCA. Values -1 and 0 are special, where a value
        of -1 will indicate using the optimal combination of the echos
        and 0  will indicate using all the echos. A list can be provided
        to indicate a subset of echos.
        Default: -1
    wvpca : :obj:`bool`, optional
        Whether to apply wavelet denoising to data. Default: False
    verbose : :obj:`bool`, optional
        Whether to output files from fitmodels_direct or not. Default: False

    Returns
    -------
    n_components : :obj:`int`
        Number of components retained from PCA decomposition
    dd : (S x T) :obj:`numpy.ndarray`
        Dimensionally reduced optimally combined functional data

    Notes
    -----
    ======================    =================================================
    Notation                  Meaning
    ======================    =================================================
    :math:`\\kappa`            Component pseudo-F statistic for TE-dependent
                              (BOLD) model.
    :math:`\\rho`              Component pseudo-F statistic for TE-independent
                              (artifact) model.
    :math:`v`                 Voxel
    :math:`V`                 Total number of voxels in mask
    :math:`\\zeta`             Something
    :math:`c`                 Component
    :math:`p`                 Something else
    ======================    =================================================

    Steps:

    1.  Variance normalize either multi-echo or optimally combined data,
        depending on settings.
    2.  Decompose normalized data using PCA or SVD.
    3.  Compute :math:`{\\kappa}` and :math:`{\\rho}`:

            .. math::
                {\\kappa}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,R_2^*}}{\sum {\\zeta}_{c,v}^p}

                {\\rho}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,S_0}}{\sum {\\zeta}_{c,v}^p}

    4.  Some other stuff. Something about elbows.
    5.  Classify components as thermal noise if they meet both of the
        following criteria:

            - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`.
            - Nonsignificant variance explained.

    Outputs:

    This function writes out several files:

    ======================    =================================================
    Filename                  Content
    ======================    =================================================
    pcastate.pkl              Values from PCA results.
    comp_table_pca.txt        PCA component table.
    mepca_mix.1D              PCA mixing matrix.
    ======================    =================================================
    """

    n_samp, n_echos, n_vols = catd.shape
    ste = np.array([int(ee) for ee in str(ste).split(',')])

    if len(ste) == 1 and ste[0] == -1:
        LGR.info('Computing PCA of optimally combined multi-echo data')
        d = OCcatd[mask, :][:, np.newaxis, :]
    elif len(ste) == 1 and ste[0] == 0:
        LGR.info('Computing PCA of spatially concatenated multi-echo data')
        d = catd[mask, ...]
    else:
        LGR.info('Computing PCA of echo #%s' %
                 ','.join([str(ee) for ee in ste]))
        d = np.stack([catd[mask, ee, :] for ee in ste - 1], axis=1)

    eim = np.squeeze(eimask(d))
    d = np.squeeze(d[eim])

    dz = ((d.T - d.T.mean(axis=0)) / d.T.std(axis=0)).T  # var normalize ts
    dz = (dz - dz.mean()) / dz.std()  # var normalize everything

    if wvpca:
        dz, cAl = dwtmat(dz)

    fname = op.abspath('pcastate.pkl')
    if op.exists('pcastate.pkl'):
        LGR.info('Loading PCA from: pcastate.pkl')
        with open('pcastate.pkl', 'rb') as handle:
            pcastate = pickle.load(handle)

        if pcastate['method'] != method:
            LGR.warning('Method from PCA state file ({0}) does not match '
                        'requested method ({1}).'.format(
                            pcastate['method'], method))
            state_found = False
        else:
            state_found = True
    else:
        state_found = False

    if not state_found:
        if method == 'mle':
            voxel_comp_weights, varex, comp_ts = run_mlepca(dz)
        else:
            ppca = PCA()
            ppca.fit(dz)
            comp_ts = ppca.components_
            varex = ppca.explained_variance_
            voxel_comp_weights = np.dot(np.dot(dz, comp_ts.T),
                                        np.diag(1. / varex))

        # actual variance explained (normalized)
        varex_norm = varex / varex.sum()

        # Compute K and Rho for PCA comps
        eimum = np.atleast_2d(eim)
        eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1])
        eimum = eimum.prod(axis=1)
        o = np.zeros((mask.shape[0], *eimum.shape[1:]))
        o[mask, ...] = eimum
        eimum = np.squeeze(o).astype(bool)

        vTmix = comp_ts.T
        vTmixN = ((vTmix.T - vTmix.T.mean(0)) / vTmix.T.std(0)).T
        LGR.info('Making initial component selection guess from PCA results')
        _, ct_df, betasv, v_T = model.fitmodels_direct(catd,
                                                       comp_ts.T,
                                                       eimum,
                                                       t2s,
                                                       t2sG,
                                                       tes,
                                                       combmode,
                                                       ref_img,
                                                       mmixN=vTmixN,
                                                       full_sel=False,
                                                       label='mepca_',
                                                       verbose=verbose)
        # varex_norm overrides normalized varex computed by fitmodels_direct
        ct_df['normalized variance explained'] = varex_norm

        pcastate = {
            'method': method,
            'voxel_comp_weights': voxel_comp_weights,
            'varex': varex,
            'comp_ts': comp_ts,
            'comptable': ct_df
        }

        # Save state
        LGR.info('Saving PCA results to: {}'.format(fname))

        try:
            with open(fname, 'wb') as handle:
                pickle.dump(pcastate, handle)
        except TypeError:
            LGR.warning('Could not save PCA solution')
    else:  # if loading existing state
        voxel_comp_weights = pcastate['voxel_comp_weights']
        varex = pcastate['varex']
        comp_ts = pcastate['comp_ts']
        ct_df = pcastate['comptable']

    np.savetxt('mepca_mix.1D', comp_ts.T)

    # write component maps to 4D image
    comp_maps = np.zeros((OCcatd.shape[0], comp_ts.shape[0]))
    for i_comp in range(comp_ts.shape[0]):
        temp_comp_ts = comp_ts[i_comp, :][:, None]
        comp_map = utils.unmask(
            model.computefeats2(OCcatd, temp_comp_ts, mask), mask)
        comp_maps[:, i_comp] = np.squeeze(comp_map)
    io.filewrite(comp_maps, 'mepca_OC_components.nii', ref_img)

    # Add new columns to comptable for classification
    ct_df['classification'] = 'accepted'
    ct_df['rationale'] = ''

    # Select components using decision tree
    if method == 'kundu':
        ct_df = kundu_tedpca(ct_df, n_echos, kdaw, rdaw, stabilize=False)
    elif method == 'kundu-stabilize':
        ct_df = kundu_tedpca(ct_df, n_echos, kdaw, rdaw, stabilize=True)
    elif method == 'mle':
        LGR.info('Selected {0} components with MLE dimensionality '
                 'detection'.format(ct_df.shape[0]))

    ct_df.to_csv('comp_table_pca.txt',
                 sep='\t',
                 index=True,
                 index_label='component',
                 float_format='%.6f')

    sel_idx = ct_df['classification'] == 'accepted'
    n_components = np.sum(sel_idx)
    voxel_kept_comp_weighted = (voxel_comp_weights[:, sel_idx] *
                                varex[None, sel_idx])
    kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[sel_idx, :])

    if wvpca:
        kept_data = idwtmat(kept_data, cAl)

    kept_data = stats.zscore(kept_data,
                             axis=1)  # variance normalize time series
    kept_data = stats.zscore(kept_data,
                             axis=None)  # variance normalize everything

    return n_components, kept_data
Ejemplo n.º 8
0
def tedana_workflow(data,
                    tes,
                    mask=None,
                    mixm=None,
                    ctab=None,
                    manacc=None,
                    tedort=False,
                    gscontrol=None,
                    tedpca='mle',
                    ste=-1,
                    combmode='t2s',
                    verbose=False,
                    stabilize=False,
                    wvpca=False,
                    out_dir='.',
                    fixed_seed=42,
                    debug=False,
                    quiet=False):
    """
    Run the "canonical" TE-Dependent ANAlysis workflow.

    Parameters
    ----------
    data : :obj:`str` or :obj:`list` of :obj:`str`
        Either a single z-concatenated file (single-entry list or str) or a
        list of echo-specific files, in ascending order.
    tes : :obj:`list`
        List of echo times associated with data in milliseconds.
    mask : :obj:`str`, optional
        Binary mask of voxels to include in TE Dependent ANAlysis. Must be
        spatially aligned with `data`.
    mixm : :obj:`str`, optional
        File containing mixing matrix. If not provided, ME-PCA and ME-ICA are
        done.
    ctab : :obj:`str`, optional
        File containing component table from which to extract pre-computed
        classifications.
    manacc : :obj:`str`, optional
        Comma separated list of manually accepted components in string form.
        Default is None.
    tedort : :obj:`bool`, optional
        Orthogonalize rejected components w.r.t. accepted ones prior to
        denoising. Default is False.
    gscontrol : {None, 't1c', 'gsr'} or :obj:`list`, optional
        Perform additional denoising to remove spatially diffuse noise. Default
        is None.
    tedpca : {'mle', 'kundu', 'kundu-stabilize'}, optional
        Method with which to select components in TEDPCA. Default is 'mle'.
    ste : :obj:`int`, optional
        Source TEs for models. 0 for all, -1 for optimal combination.
        Default is -1.
    combmode : {'t2s', 'ste'}, optional
        Combination scheme for TEs: 't2s' (Posse 1999, default), 'ste' (Poser).
    verbose : :obj:`bool`, optional
        Generate intermediate and additional files. Default is False.
    wvpca : :obj:`bool`, optional
        Whether or not to perform PCA on wavelet-transformed data.
        Default is False.
    out_dir : :obj:`str`, optional
        Output directory.

    Other Parameters
    ----------------
    fixed_seed : :obj:`int`, optional
        Value passed to ``mdp.numx_rand.seed()``.
        Set to a positive integer value for reproducible ICA results;
        otherwise, set to -1 for varying results across calls.
    debug : :obj:`bool`, optional
        Whether to run in debugging mode or not. Default is False.
    quiet : :obj:`bool`, optional
        If True, suppresses logging/printing of messages. Default is False.

    Notes
    -----
    This workflow writes out several files. For a complete list of the files
    generated by this workflow, please visit
    https://tedana.readthedocs.io/en/latest/outputs.html
    """
    out_dir = op.abspath(out_dir)
    if not op.isdir(out_dir):
        os.mkdir(out_dir)

    if debug and not quiet:
        formatter = logging.Formatter(
            '%(asctime)s\t%(name)-12s\t%(levelname)-8s\t%(message)s',
            datefmt='%Y-%m-%dT%H:%M:%S')
        fh = logging.FileHandler(
            op.join(
                out_dir,
                'runlog-{0}.tsv'.format(datetime.now().isoformat().replace(
                    ':', '.'))))
        fh.setFormatter(formatter)
        logging.basicConfig(level=logging.DEBUG,
                            handlers=[fh, logging.StreamHandler()])
    elif quiet:
        logging.basicConfig(level=logging.WARNING)
    else:
        logging.basicConfig(level=logging.INFO)

    LGR.info('Using output directory: {}'.format(out_dir))

    # ensure tes are in appropriate format
    tes = [float(te) for te in tes]
    n_echos = len(tes)

    # Coerce gscontrol to list
    if not isinstance(gscontrol, list):
        gscontrol = [gscontrol]

    # coerce data to samples x echos x time array
    if isinstance(data, str):
        data = [data]

    LGR.info('Loading input data: {}'.format([f for f in data]))
    catd, ref_img = io.load_data(data, n_echos=n_echos)
    n_samp, n_echos, n_vols = catd.shape
    LGR.debug('Resulting data shape: {}'.format(catd.shape))

    if mixm is not None and op.isfile(mixm):
        shutil.copyfile(mixm, op.join(out_dir, 'meica_mix.1D'))
        shutil.copyfile(mixm, op.join(out_dir, op.basename(mixm)))
    elif mixm is not None:
        raise IOError('Argument "mixm" must be an existing file.')

    if ctab is not None and op.isfile(ctab):
        shutil.copyfile(ctab, op.join(out_dir, 'comp_table_ica.txt'))
        shutil.copyfile(ctab, op.join(out_dir, op.basename(ctab)))
    elif ctab is not None:
        raise IOError('Argument "ctab" must be an existing file.')

    if mask is None:
        LGR.info('Computing adaptive mask')
    else:
        # TODO: add affine check
        LGR.info('Using user-defined mask')

    mask, masksum = utils.make_adaptive_mask(catd,
                                             mask=mask,
                                             minimum=False,
                                             getsum=True)
    LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp))
    if verbose:
        io.filewrite(masksum, op.join(out_dir, 'adaptive_mask.nii'), ref_img)

    os.chdir(out_dir)

    LGR.info('Computing T2* map')
    t2s, s0, t2ss, s0s, t2sG, s0G = decay.fit_decay(catd, tes, mask, masksum)

    # set a hard cap for the T2* map
    # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile
    cap_t2s = stats.scoreatpercentile(t2s.flatten(),
                                      99.5,
                                      interpolation_method='lower')
    LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10))
    t2s[t2s > cap_t2s * 10] = cap_t2s
    io.filewrite(t2s, op.join(out_dir, 't2sv.nii'), ref_img)
    io.filewrite(s0, op.join(out_dir, 's0v.nii'), ref_img)

    if verbose:
        io.filewrite(t2ss, op.join(out_dir, 't2ss.nii'), ref_img)
        io.filewrite(s0s, op.join(out_dir, 's0vs.nii'), ref_img)
        io.filewrite(t2sG, op.join(out_dir, 't2svG.nii'), ref_img)
        io.filewrite(s0G, op.join(out_dir, 's0vG.nii'), ref_img)

    # optimally combine data
    data_oc = combine.make_optcom(catd, tes, mask, t2s=t2sG, combmode=combmode)

    # regress out global signal unless explicitly not desired
    if 'gsr' in gscontrol:
        catd, data_oc = model.gscontrol_raw(catd, data_oc, n_echos, ref_img)

    if mixm is None:
        # Identify and remove thermal noise from data
        n_components, dd = decomposition.tedpca(catd,
                                                data_oc,
                                                combmode,
                                                mask,
                                                t2s,
                                                t2sG,
                                                ref_img,
                                                tes=tes,
                                                method=tedpca,
                                                ste=ste,
                                                kdaw=10.,
                                                rdaw=1.,
                                                wvpca=wvpca,
                                                verbose=verbose)
        mmix_orig, fixed_seed = decomposition.tedica(n_components, dd,
                                                     fixed_seed)

        if verbose:
            np.savetxt(op.join(out_dir, '__meica_mix.1D'), mmix_orig)
            if ste == -1:
                io.filewrite(utils.unmask(dd, mask),
                             op.join(out_dir, 'ts_OC_whitened.nii'), ref_img)

        LGR.info('Making second component selection guess from ICA results')
        # Estimate betas and compute selection metrics for mixing matrix
        # generated from dimensionally reduced data using full data (i.e., data
        # with thermal noise)
        seldict, comptable, betas, mmix = model.fitmodels_direct(
            catd,
            mmix_orig,
            mask,
            t2s,
            t2sG,
            tes,
            combmode,
            ref_img,
            reindex=True,
            label='meica_',
            out_dir=out_dir,
            verbose=verbose)
        np.savetxt(op.join(out_dir, 'meica_mix.1D'), mmix)

        comptable = selection.selcomps(seldict, comptable, mmix, manacc,
                                       n_echos)
    else:
        LGR.info('Using supplied mixing matrix from ICA')
        mmix_orig = np.loadtxt(op.join(out_dir, 'meica_mix.1D'))
        seldict, comptable, betas, mmix = model.fitmodels_direct(
            catd,
            mmix_orig,
            mask,
            t2s,
            t2sG,
            tes,
            combmode,
            ref_img,
            label='meica_',
            out_dir=out_dir,
            verbose=verbose)
        if ctab is None:
            comptable = selection.selcomps(seldict, comptable, mmix, manacc,
                                           n_echos)
        else:
            comptable = pd.read_csv(ctab, sep='\t', index_col='component')

    comptable.to_csv(op.join(out_dir, 'comp_table_ica.txt'),
                     sep='\t',
                     index=True,
                     index_label='component',
                     float_format='%.6f')
    if 'component' not in comptable.columns:
        comptable['component'] = comptable.index
    acc = comptable.loc[comptable['classification'] == 'accepted', 'component']
    rej = comptable.loc[comptable['classification'] == 'rejected', 'component']
    midk = comptable.loc[comptable['classification'] == 'midk', 'component']
    ign = comptable.loc[comptable['classification'] == 'ignored', 'component']
    if len(acc) == 0:
        LGR.warning('No BOLD components detected! Please check data and '
                    'results!')

    if tedort:
        acc_idx = comptable.loc[
            ~comptable['classification'].str.contains('rejected'), 'component']
        rej_idx = comptable.loc[
            comptable['classification'].str.contains('rejected'), 'component']
        acc_ts = mmix[:, acc_idx]
        rej_ts = mmix[:, rej_idx]
        betas = np.linalg.lstsq(acc_ts, rej_ts, rcond=None)[0]
        pred_rej_ts = np.dot(acc_ts, betas)
        resid = rej_ts - pred_rej_ts
        mmix[:, rej_idx] = resid
        np.savetxt(op.join(out_dir, 'meica_mix_orth.1D'), mmix)

    io.writeresults(data_oc,
                    mask=mask,
                    comptable=comptable,
                    mmix=mmix,
                    n_vols=n_vols,
                    fixed_seed=fixed_seed,
                    acc=acc,
                    rej=rej,
                    midk=midk,
                    empty=ign,
                    ref_img=ref_img)

    if 't1c' in gscontrol:
        LGR.info('Performing T1c global signal regression to remove spatially '
                 'diffuse noise')
        io.gscontrol_mmix(data_oc, mmix, mask, comptable, ref_img)

    if verbose:
        io.writeresults_echoes(catd, mmix, mask, acc, rej, midk, ref_img)

    LGR.info('Workflow completed')
    for handler in logging.root.handlers[:]:
        logging.root.removeHandler(handler)
Ejemplo n.º 9
0
def tedpca(catd,
           OCcatd,
           combmode,
           mask,
           t2s,
           t2sG,
           stabilize,
           ref_img,
           tes,
           kdaw,
           rdaw,
           ste=0,
           mlepca=True,
           wvpca=False):
    """
    Use principal components analysis (PCA) to identify and remove thermal
    noise from multi-echo data.

    Parameters
    ----------
    catd : (S x E x T) array_like
        Input functional data
    OCcatd : (S x T) array_like
        Optimally-combined time series data
    combmode : {'t2s', 'ste'} str
        How optimal combination of echos should be made, where 't2s' indicates
        using the method of Posse 1999 and 'ste' indicates using the method of
        Poser 2006
    mask : (S,) array_like
        Boolean mask array
    stabilize : :obj:`bool`
        Whether to attempt to stabilize convergence of ICA by returning
        dimensionally-reduced data from PCA and component selection.
    ref_img : :obj:`str` or img_like
        Reference image to dictate how outputs are saved to disk
    tes : :obj:`list`
        List of echo times associated with `catd`, in milliseconds
    kdaw : :obj:`float`
        Dimensionality augmentation weight for Kappa calculations
    rdaw : :obj:`float`
        Dimensionality augmentation weight for Rho calculations
    ste : :obj:`int` or :obj:`list` of :obj:`int`, optional
        Which echos to use in PCA. Values -1 and 0 are special, where a value
        of -1 will indicate using all the echos and 0 will indicate using the
        optimal combination of the echos. A list can be provided to indicate
        a subset of echos. Default: 0
    mlepca : :obj:`bool`, optional
        Whether to use the method originally explained in Minka, NIPS 2000 for
        guessing PCA dimensionality instead of a traditional SVD. Default: True
    wvpca : :obj:`bool`, optional
        Whether to apply wavelet denoising to data. Default: False

    Returns
    -------
    n_components : :obj:`int`
        Number of components retained from PCA decomposition
    dd : (S x E x T) :obj:`numpy.ndarray`
        Dimensionally-reduced functional data

    Notes
    -----
    ======================    =================================================
    Notation                  Meaning
    ======================    =================================================
    :math:`\\kappa`            Component pseudo-F statistic for TE-dependent
                              (BOLD) model.
    :math:`\\rho`              Component pseudo-F statistic for TE-independent
                              (artifact) model.
    :math:`v`                 Voxel
    :math:`V`                 Total number of voxels in mask
    :math:`\\zeta`             Something
    :math:`c`                 Component
    :math:`p`                 Something else
    ======================    =================================================

    Steps:

    1.  Variance normalize either multi-echo or optimally combined data,
        depending on settings.
    2.  Decompose normalized data using PCA or SVD.
    3.  Compute :math:`{\\kappa}` and :math:`{\\rho}`:

            .. math::
                {\\kappa}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,R_2^*}}{\sum {\\zeta}_{c,v}^p}

                {\\rho}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,S_0}}{\sum {\\zeta}_{c,v}^p}

    4.  Some other stuff. Something about elbows.
    5.  Classify components as thermal noise if they meet both of the
        following criteria:

            - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`.
            - Nonsignificant variance explained.

    Outputs:

    This function writes out several files:

    ======================    =================================================
    Filename                  Content
    ======================    =================================================
    pcastate.pkl              Values from PCA results.
    comp_table_pca.txt        PCA component table.
    mepca_mix.1D              PCA mixing matrix.
    ======================    =================================================
    """

    n_samp, n_echos, n_vols = catd.shape
    ste = np.array([int(ee) for ee in str(ste).split(',')])

    if len(ste) == 1 and ste[0] == -1:
        LGR.info('Computing PCA of optimally combined multi-echo data')
        d = OCcatd[utils.make_min_mask(OCcatd[:,
                                              np.newaxis, :])][:,
                                                               np.newaxis, :]
    elif len(ste) == 1 and ste[0] == 0:
        LGR.info('Computing PCA of spatially concatenated multi-echo data')
        d = catd[mask].astype('float64')
    else:
        LGR.info('Computing PCA of echo #%s' %
                 ','.join([str(ee) for ee in ste]))
        d = np.stack([catd[mask, ee] for ee in ste - 1],
                     axis=1).astype('float64')

    eim = np.squeeze(eimask(d))
    d = np.squeeze(d[eim])

    dz = ((d.T - d.T.mean(axis=0)) / d.T.std(axis=0)).T  # var normalize ts
    dz = (dz - dz.mean()) / dz.std()  # var normalize everything

    if wvpca:
        dz, cAl = dwtmat(dz)

    if not op.exists('pcastate.pkl'):
        # do PC dimension selection and get eigenvalue cutoff
        if mlepca:
            from sklearn.decomposition import PCA
            ppca = PCA(n_components='mle', svd_solver='full')
            ppca.fit(dz)
            v = ppca.components_
            s = ppca.explained_variance_
            u = np.dot(np.dot(dz, v.T), np.diag(1. / s))
        else:
            u, s, v = np.linalg.svd(dz, full_matrices=0)

        # actual variance explained (normalized)
        sp = s / s.sum()
        eigelb = getelbow_mod(sp, return_val=True)

        spdif = np.abs(np.diff(sp))
        spdifh = spdif[(len(spdif) // 2):]
        spdthr = np.mean([spdifh.max(), spdif.min()])
        spmin = sp[(len(spdif) // 2) +
                   np.arange(len(spdifh))[spdifh >= spdthr][0] + 1]
        spcum = np.cumsum(sp)

        # Compute K and Rho for PCA comps
        eimum = np.atleast_2d(eim)
        eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1])
        eimum = eimum.prod(axis=1)
        o = np.zeros((mask.shape[0], *eimum.shape[1:]))
        o[mask] = eimum
        eimum = np.squeeze(o).astype(bool)

        vTmix = v.T
        vTmixN = ((vTmix.T - vTmix.T.mean(0)) / vTmix.T.std(0)).T
        LGR.info('Making initial component selection guess from PCA results')
        _, ctb, betasv, v_T = model.fitmodels_direct(catd,
                                                     v.T,
                                                     eimum,
                                                     t2s,
                                                     t2sG,
                                                     tes,
                                                     combmode,
                                                     ref_img,
                                                     mmixN=vTmixN,
                                                     full_sel=False)
        ctb = ctb[ctb[:, 0].argsort(), :]
        ctb = np.vstack([ctb.T[:3], sp]).T

        # Save state
        fname = op.abspath('pcastate.pkl')
        LGR.info('Saving PCA results to: {}'.format(fname))
        pcastate = {
            'u': u,
            's': s,
            'v': v,
            'ctb': ctb,
            'eigelb': eigelb,
            'spmin': spmin,
            'spcum': spcum
        }
        try:
            with open(fname, 'wb') as handle:
                pickle.dump(pcastate, handle)
        except TypeError:
            LGR.warning('Could not save PCA solution')

    else:  # if loading existing state
        LGR.info('Loading PCA from: pcastate.pkl')
        with open('pcastate.pkl', 'rb') as handle:
            pcastate = pickle.load(handle)
        u, s, v = pcastate['u'], pcastate['s'], pcastate['v']
        ctb, eigelb = pcastate['ctb'], pcastate['eigelb']
        spmin, spcum = pcastate['spmin'], pcastate['spcum']

    np.savetxt('comp_table_pca.txt', ctb[ctb[:, 1].argsort(), :][::-1])
    np.savetxt('mepca_mix.1D', v[ctb[:, 1].argsort()[::-1], :].T)

    kappas = ctb[ctb[:, 1].argsort(), 1]
    rhos = ctb[ctb[:, 2].argsort(), 2]
    fmin, fmid, fmax = utils.getfbounds(n_echos)
    kappa_thr = np.average(sorted(
        [fmin, getelbow_mod(kappas, return_val=True) / 2, fmid]),
                           weights=[kdaw, 1, 1])
    rho_thr = np.average(sorted(
        [fmin, getelbow_cons(rhos, return_val=True) / 2, fmid]),
                         weights=[rdaw, 1, 1])
    if int(kdaw) == -1:
        kappas_lim = kappas[utils.andb([kappas < fmid, kappas > fmin]) == 2]
        kappa_thr = kappas_lim[getelbow_mod(kappas_lim)]
        rhos_lim = rhos[utils.andb([rhos < fmid, rhos > fmin]) == 2]
        rho_thr = rhos_lim[getelbow_mod(rhos_lim)]
        stabilize = True
    if int(kdaw) != -1 and int(rdaw) == -1:
        rhos_lim = rhos[utils.andb([rhos < fmid, rhos > fmin]) == 2]
        rho_thr = rhos_lim[getelbow_mod(rhos_lim)]

    is_hik = np.array(ctb[:, 1] > kappa_thr, dtype=np.int)
    is_hir = np.array(ctb[:, 2] > rho_thr, dtype=np.int)
    is_hie = np.array(ctb[:, 3] > eigelb, dtype=np.int)
    is_his = np.array(ctb[:, 3] > spmin, dtype=np.int)
    is_not_fmax1 = np.array(ctb[:, 1] != F_MAX, dtype=np.int)
    is_not_fmax2 = np.array(ctb[:, 2] != F_MAX, dtype=np.int)
    pcscore = (is_hik + is_hir + is_hie) * is_his * is_not_fmax1 * is_not_fmax2
    if stabilize:
        temp7 = np.array(spcum < 0.95, dtype=np.int)
        temp8 = np.array(ctb[:, 2] > fmin, dtype=np.int)
        temp9 = np.array(ctb[:, 1] > fmin, dtype=np.int)
        pcscore = pcscore * temp7 * temp8 * temp9

    pcsel = pcscore > 0
    dd = u.dot(np.diag(s * np.array(pcsel, dtype=np.int))).dot(v)

    if wvpca:
        dd = idwtmat(dd, cAl)

    n_components = s[pcsel].shape[0]
    LGR.info('Selected {0} components with Kappa threshold: {1:.02f}, '
             'Rho threshold: {2:.02f}'.format(n_components, kappa_thr,
                                              rho_thr))

    dd = stats.zscore(dd.T, axis=0).T  # variance normalize timeseries
    dd = stats.zscore(dd, axis=None)  # variance normalize everything

    return n_components, dd
Ejemplo n.º 10
0
def tedpca(data_cat,
           data_oc,
           combmode,
           mask,
           t2s,
           t2sG,
           ref_img,
           tes,
           method='mle',
           source_tes=-1,
           kdaw=10.,
           rdaw=1.,
           out_dir='.',
           verbose=False):
    """
    Use principal components analysis (PCA) to identify and remove thermal
    noise from multi-echo data.

    Parameters
    ----------
    data_cat : (S x E x T) array_like
        Input functional data
    data_oc : (S x T) array_like
        Optimally combined time series data
    combmode : {'t2s', 'paid'} str
        How optimal combination of echos should be made, where 't2s' indicates
        using the method of Posse 1999 and 'paid' indicates using the method of
        Poser 2006
    mask : (S,) array_like
        Boolean mask array
    t2s : (S,) array_like
        Map of voxel-wise T2* estimates.
    t2sG : (S,) array_like
        Map of voxel-wise T2* estimates.
    ref_img : :obj:`str` or img_like
        Reference image to dictate how outputs are saved to disk
    tes : :obj:`list`
        List of echo times associated with `data_cat`, in milliseconds
    method : {'mle', 'kundu', 'kundu-stabilize'}, optional
        Method with which to select components in TEDPCA. Default is 'mle'.
    source_tes : :obj:`int` or :obj:`list` of :obj:`int`, optional
        Which echos to use in PCA. Values -1 and 0 are special, where a value
        of -1 will indicate using the optimal combination of the echos
        and 0  will indicate using all the echos. A list can be provided
        to indicate a subset of echos.
        Default: -1
    kdaw : :obj:`float`, optional
        Dimensionality augmentation weight for Kappa calculations. Must be a
        non-negative float, or -1 (a special value). Default is 10.
    rdaw : :obj:`float`, optional
        Dimensionality augmentation weight for Rho calculations. Must be a
        non-negative float, or -1 (a special value). Default is 1.
    out_dir : :obj:`str`, optional
        Output directory.
    verbose : :obj:`bool`, optional
        Whether to output files from fitmodels_direct or not. Default: False

    Returns
    -------
    kept_data : (S x T) :obj:`numpy.ndarray`
        Dimensionally reduced optimally combined functional data
    n_components : :obj:`int`
        Number of components retained from PCA decomposition

    Notes
    -----
    ======================    =================================================
    Notation                  Meaning
    ======================    =================================================
    :math:`\\kappa`            Component pseudo-F statistic for TE-dependent
                              (BOLD) model.
    :math:`\\rho`              Component pseudo-F statistic for TE-independent
                              (artifact) model.
    :math:`v`                 Voxel
    :math:`V`                 Total number of voxels in mask
    :math:`\\zeta`             Something
    :math:`c`                 Component
    :math:`p`                 Something else
    ======================    =================================================

    Steps:

    1.  Variance normalize either multi-echo or optimally combined data,
        depending on settings.
    2.  Decompose normalized data using PCA or SVD.
    3.  Compute :math:`{\\kappa}` and :math:`{\\rho}`:

            .. math::
                {\\kappa}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,R_2^*}}{\sum {\\zeta}_{c,v}^p}

                {\\rho}_c = \\frac{\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,S_0}}{\sum {\\zeta}_{c,v}^p}

    4.  Some other stuff. Something about elbows.
    5.  Classify components as thermal noise if they meet both of the
        following criteria:

            - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`.
            - Nonsignificant variance explained.

    Outputs:

    This function writes out several files:

    ======================    =================================================
    Filename                  Content
    ======================    =================================================
    pcastate.pkl              Values from PCA results.
    comp_table_pca.txt        PCA component table.
    mepca_mix.1D              PCA mixing matrix.
    ======================    =================================================
    """

    n_samp, n_echos, n_vols = data_cat.shape
    source_tes = np.array([int(ee) for ee in str(source_tes).split(',')])

    if len(source_tes) == 1 and source_tes[0] == -1:
        LGR.info('Computing PCA of optimally combined multi-echo data')
        data = data_oc[mask, :][:, np.newaxis, :]
    elif len(source_tes) == 1 and source_tes[0] == 0:
        LGR.info('Computing PCA of spatially concatenated multi-echo data')
        data = data_cat[mask, ...]
    else:
        LGR.info('Computing PCA of echo #{0}'.format(','.join(
            [str(ee) for ee in source_tes])))
        data = np.stack([data_cat[mask, ee, :] for ee in source_tes - 1],
                        axis=1)

    eim = np.squeeze(eimask(data))
    data = np.squeeze(data[eim])

    data_z = ((data.T - data.T.mean(axis=0)) /
              data.T.std(axis=0)).T  # var normalize ts
    data_z = (data_z -
              data_z.mean()) / data_z.std()  # var normalize everything

    if method == 'mle':
        voxel_comp_weights, varex, comp_ts = run_mlepca(data_z)
    else:
        ppca = PCA()
        ppca.fit(data_z)
        comp_ts = ppca.components_.T
        varex = ppca.explained_variance_
        voxel_comp_weights = np.dot(np.dot(data_z, comp_ts),
                                    np.diag(1. / varex))

    # actual variance explained (normalized)
    varex_norm = varex / varex.sum()

    # Compute Kappa and Rho for PCA comps
    eimum = np.atleast_2d(eim)
    eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1])
    eimum = eimum.prod(axis=1)
    o = np.zeros((mask.shape[0], *eimum.shape[1:]))
    o[mask, ...] = eimum
    eimum = np.squeeze(o).astype(bool)

    # Normalize each component's time series
    vTmixN = stats.zscore(comp_ts, axis=0)
    _, comptable, _, _ = model.fitmodels_direct(data_cat,
                                                comp_ts,
                                                eimum,
                                                t2s,
                                                t2sG,
                                                tes,
                                                combmode,
                                                ref_img,
                                                reindex=False,
                                                mmixN=vTmixN,
                                                full_sel=False,
                                                label='mepca_',
                                                out_dir=out_dir,
                                                verbose=verbose)
    # varex_norm from PCA retained on top of varex from fitmodels_direct
    comptable['original normalized variance explained'] = varex_norm

    np.savetxt('mepca_mix.1D', comp_ts)

    # write component maps to 4D image
    comp_maps = np.zeros((data_oc.shape[0], comp_ts.shape[1]))
    for i_comp in range(comp_ts.shape[1]):
        temp_comp_ts = comp_ts[:, i_comp][:, None]
        comp_map = utils.unmask(
            model.computefeats2(data_oc, temp_comp_ts, mask), mask)
        comp_maps[:, i_comp] = np.squeeze(comp_map)
    io.filewrite(comp_maps, 'mepca_OC_components.nii', ref_img)

    # Select components using decision tree
    if method == 'kundu':
        comptable = kundu_tedpca(comptable,
                                 n_echos,
                                 kdaw,
                                 rdaw,
                                 stabilize=False)
    elif method == 'kundu-stabilize':
        comptable = kundu_tedpca(comptable,
                                 n_echos,
                                 kdaw,
                                 rdaw,
                                 stabilize=True)
    elif method == 'mle':
        LGR.info('Selected {0} components with MLE dimensionality '
                 'detection'.format(comptable.shape[0]))
        comptable['classification'] = 'accepted'
        comptable['rationale'] = ''

    comptable.to_csv('comp_table_pca.txt',
                     sep='\t',
                     index=True,
                     index_label='component',
                     float_format='%.6f')

    acc = comptable[comptable.classification == 'accepted'].index.values
    n_components = acc.size
    voxel_kept_comp_weighted = (voxel_comp_weights[:, acc] * varex[None, acc])
    kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[:, acc].T)

    kept_data = stats.zscore(kept_data,
                             axis=1)  # variance normalize time series
    kept_data = stats.zscore(kept_data,
                             axis=None)  # variance normalize everything

    return kept_data, n_components
Ejemplo n.º 11
0
def tedana_workflow(data, tes, mask=None, mixm=None, ctab=None, manacc=None,
                    strict=False, gscontrol=True, kdaw=10., rdaw=1., conv=2.5e-5,
                    ste=-1, combmode='t2s', dne=False,
                    initcost='tanh', finalcost='tanh',
                    stabilize=False, filecsdata=False, wvpca=False,
                    label=None, fixed_seed=42, debug=False, quiet=False):
    """
    Run the "canonical" TE-Dependent ANAlysis workflow.

    Parameters
    ----------
    data : :obj:`str` or :obj:`list` of :obj:`str`
        Either a single z-concatenated file (single-entry list or str) or a
        list of echo-specific files, in ascending order.
    tes : :obj:`list`
        List of echo times associated with data in milliseconds.
    mask : :obj:`str`, optional
        Binary mask of voxels to include in TE Dependent ANAlysis. Must be spatially
        aligned with `data`.
    mixm : :obj:`str`, optional
        File containing mixing matrix. If not provided, ME-PCA and ME-ICA are
        done.
    ctab : :obj:`str`, optional
        File containing component table from which to extract pre-computed
        classifications.
    manacc : :obj:`str`, optional
        Comma separated list of manually accepted components in string form.
        Default is None.
    strict : :obj:`bool`, optional
        Ignore low-variance ambiguous components. Default is False.
    gscontrol : :obj:`bool`, optional
        Control global signal using spatial approach. Default is True.
    kdaw : :obj:`float`, optional
        Dimensionality augmentation weight (Kappa). Default is 10.
        -1 for low-dimensional ICA.
    rdaw : :obj:`float`, optional
        Dimensionality augmentation weight (Rho). Default is 1.
        -1 for low-dimensional ICA.
    conv : :obj:`float`, optional
        Convergence limit. Default is 2.5e-5.
    ste : :obj:`int`, optional
        Source TEs for models. 0 for all, -1 for optimal combination.
        Default is -1.
    combmode : {'t2s', 'ste'}, optional
        Combination scheme for TEs: 't2s' (Posse 1999, default), 'ste' (Poser).
    dne : :obj:`bool`, optional
        Denoise each TE dataset separately. Default is False.
    initcost : {'tanh', 'pow3', 'gaus', 'skew'}, optional
        Initial cost function for ICA. Default is 'tanh'.
    finalcost : {'tanh', 'pow3', 'gaus', 'skew'}, optional
        Final cost function. Default is 'tanh'.
    stabilize : :obj:`bool`, optional
        Stabilize convergence by reducing dimensionality, for low quality data.
        Default is False.
    filecsdata : :obj:`bool`, optional
        Save component selection data to file. Default is False.
    wvpca : :obj:`bool`, optional
        Whether or not to perform PCA on wavelet-transformed data.
        Default is False.
    label : :obj:`str` or :obj:`None`, optional
        Label for output directory. Default is None.

    Other Parameters
    ----------------
    fixed_seed : :obj:`int`, optional
        Value passed to ``mdp.numx_rand.seed()``.
        Set to a positive integer value for reproducible ICA results;
        otherwise, set to -1 for varying results across calls.
    debug : :obj:`bool`, optional
        Whether to run in debugging mode or not. Default is False.
    quiet : :obj:`bool`, optional
        If True, suppresses logging/printing of messages. Default is False.

    Notes
    -----
    PROCEDURE 2 : Computes ME-PCA and ME-ICA

    - Computes T2* map
    - Computes PCA of concatenated ME data, then computes TE-dependence of PCs
    - Computes ICA of TE-dependence PCs
    - Identifies TE-dependent ICs, outputs high-\kappa (BOLD) component
      and denoised time series

    or computes TE-dependence of each component of a general linear model
    specified by input (includes MELODIC FastICA mixing matrix)

    PROCEDURE 2a: Model fitting and component selection routines

    This workflow writes out several files, which are written out to a folder
    named TED.[ref_label].[label] if ``label`` is provided and TED.[ref_label]
    if not. ``ref_label`` is determined based on the name of the first ``data``
    file.

    Files are listed below:

    ======================    =================================================
    Filename                  Content
    ======================    =================================================
    t2sv.nii                  Limited estimated T2* 3D map.
                              The difference between the limited and full maps
                              is that, for voxels affected by dropout where
                              only one echo contains good data, the full map
                              uses the single echo's value while the limited
                              map has a NaN.
    s0v.nii                   Limited S0 3D map.
                              The difference between the limited and full maps
                              is that, for voxels affected by dropout where
                              only one echo contains good data, the full map
                              uses the single echo's value while the limited
                              map has a NaN.
    t2ss.nii                  ???
    s0vs.nii                  ???
    t2svG.nii                 Full T2* map/timeseries. The difference between
                              the limited and full maps is that, for voxels
                              affected by dropout where only one echo contains
                              good data, the full map uses the single echo's
                              value while the limited map has a NaN.
    s0vG.nii                  Full S0 map/timeseries.
    __meica_mix.1D            A mixing matrix
    meica_mix.1D              Another mixing matrix
    ts_OC.nii                 Optimally combined timeseries.
    betas_OC.nii              Full ICA coefficient feature set.
    betas_hik_OC.nii          Denoised ICA coefficient feature set
    feats_OC2.nii             Z-normalized spatial component maps
    comp_table.txt            Component table
    sphis_hik.nii             T1-like effect
    hik_ts_OC_T1c.nii         T1 corrected time series by regression
    dn_ts_OC_T1c.nii          ME-DN version of T1 corrected time series
    betas_hik_OC_T1c.nii      T1-GS corrected components
    meica_mix_T1c.1D          T1-GS corrected mixing matrix
    ======================    =================================================

    If ``dne`` is set to True:

    ======================    =================================================
    Filename                  Content
    ======================    =================================================
    hik_ts_e[echo].nii        High-Kappa timeseries for echo number ``echo``
    midk_ts_e[echo].nii       Mid-Kappa timeseries for echo number ``echo``
    lowk_ts_e[echo].nii       Low-Kappa timeseries for echo number ``echo``
    dn_ts_e[echo].nii         Denoised timeseries for echo number ``echo``
    ======================    =================================================
    """

    # ensure tes are in appropriate format
    tes = [float(te) for te in tes]
    n_echos = len(tes)

    # coerce data to samples x echos x time array
    if isinstance(data, str):
        data = [data]

    LGR.info('Loading input data: {}'.format([f for f in data]))
    catd, ref_img = utils.load_data(data, n_echos=n_echos)
    n_samp, n_echos, n_vols = catd.shape
    LGR.debug('Resulting data shape: {}'.format(catd.shape))

    kdaw, rdaw = float(kdaw), float(rdaw)

    try:
        ref_label = op.basename(ref_img).split('.')[0]
    except TypeError:
        ref_label = op.basename(str(data[0])).split('.')[0]

    if label is not None:
        out_dir = 'TED.{0}.{1}'.format(ref_label, label)
    else:
        out_dir = 'TED.{0}'.format(ref_label)
    out_dir = op.abspath(out_dir)
    if not op.isdir(out_dir):
        LGR.info('Creating output directory: {}'.format(out_dir))
        os.mkdir(out_dir)
    else:
        LGR.info('Using output directory: {}'.format(out_dir))

    if mixm is not None and op.isfile(mixm):
        shutil.copyfile(mixm, op.join(out_dir, 'meica_mix.1D'))
        shutil.copyfile(mixm, op.join(out_dir, op.basename(mixm)))
    elif mixm is not None:
        raise IOError('Argument "mixm" must be an existing file.')

    if ctab is not None and op.isfile(ctab):
        shutil.copyfile(ctab, op.join(out_dir, 'comp_table.txt'))
        shutil.copyfile(ctab, op.join(out_dir, op.basename(ctab)))
    elif ctab is not None:
        raise IOError('Argument "ctab" must be an existing file.')

    os.chdir(out_dir)

    if mask is None:
        LGR.info('Computing adaptive mask')
    else:
        # TODO: add affine check
        LGR.info('Using user-defined mask')
    mask, masksum = utils.make_adaptive_mask(catd, mask=mask,
                                             minimum=False, getsum=True)
    LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp))

    LGR.info('Computing T2* map')
    t2s, s0, t2ss, s0s, t2sG, s0G = model.fit_decay(catd, tes, mask, masksum)

    # set a hard cap for the T2* map
    # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile
    cap_t2s = stats.scoreatpercentile(t2s.flatten(), 99.5,
                                      interpolation_method='lower')
    LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10))
    t2s[t2s > cap_t2s * 10] = cap_t2s
    utils.filewrite(t2s, op.join(out_dir, 't2sv.nii'), ref_img)
    utils.filewrite(s0, op.join(out_dir, 's0v.nii'), ref_img)
    utils.filewrite(t2ss, op.join(out_dir, 't2ss.nii'), ref_img)
    utils.filewrite(s0s, op.join(out_dir, 's0vs.nii'), ref_img)
    utils.filewrite(t2sG, op.join(out_dir, 't2svG.nii'), ref_img)
    utils.filewrite(s0G, op.join(out_dir, 's0vG.nii'), ref_img)

    # optimally combine data
    OCcatd = model.make_optcom(catd, tes, mask, t2s=t2sG, combmode=combmode)

    # regress out global signal unless explicitly not desired
    if gscontrol:
        catd, OCcatd = model.gscontrol_raw(catd, OCcatd, n_echos, ref_img)

    if mixm is None:
        n_components, dd = decomposition.tedpca(catd, OCcatd, combmode, mask,
                                                t2s, t2sG, stabilize, ref_img,
                                                tes=tes, kdaw=kdaw, rdaw=rdaw,
                                                ste=ste, wvpca=wvpca)
        mmix_orig, fixed_seed = decomposition.tedica(n_components, dd, conv, fixed_seed,
                                                     cost=initcost, final_cost=finalcost,
                                                     verbose=debug)
        np.savetxt(op.join(out_dir, '__meica_mix.1D'), mmix_orig)
        LGR.info('Making second component selection guess from ICA results')
        seldict, comptable, betas, mmix = model.fitmodels_direct(catd, mmix_orig,
                                                                 mask, t2s, t2sG,
                                                                 tes, combmode,
                                                                 ref_img,
                                                                 reindex=True)
        np.savetxt(op.join(out_dir, 'meica_mix.1D'), mmix)

        acc, rej, midk, empty = selection.selcomps(seldict, mmix, mask, ref_img, manacc,
                                                   n_echos, t2s, s0, strict_mode=strict,
                                                   filecsdata=filecsdata)
    else:
        LGR.info('Using supplied mixing matrix from ICA')
        mmix_orig = np.loadtxt(op.join(out_dir, 'meica_mix.1D'))
        seldict, comptable, betas, mmix = model.fitmodels_direct(catd, mmix_orig,
                                                                 mask, t2s, t2sG,
                                                                 tes, combmode,
                                                                 ref_img)
        if ctab is None:
            acc, rej, midk, empty = selection.selcomps(seldict, mmix, mask,
                                                       ref_img, manacc,
                                                       n_echos, t2s, s0,
                                                       filecsdata=filecsdata,
                                                       strict_mode=strict)
        else:
            acc, rej, midk, empty = utils.ctabsel(ctab)

    if len(acc) == 0:
        LGR.warning('No BOLD components detected! Please check data and '
                    'results!')

    utils.writeresults(OCcatd, mask, comptable, mmix, fixed_seed, n_vols,
                       acc, rej, midk, empty, ref_img)
    utils.gscontrol_mmix(OCcatd, mmix, mask, acc, ref_img)
    if dne:
        utils.writeresults_echoes(catd, mmix, mask, acc, rej, midk, ref_img)