Beispiel #1
0
def fit(data, mask, tes, masksum, start_echo):
    """
    Fit voxel- and timepoint-wise monoexponential decay models to estimate
    T2* and S0 timeseries.
    """
    nx, ny, nz, n_echoes, n_trs = data.shape
    echodata = fmask(data, mask)
    tes = np.array(tes)

    t2sa_ts = np.zeros([nx, ny, nz, n_trs])
    s0va_ts = np.zeros([nx, ny, nz, n_trs])
    t2saf_ts = np.zeros([nx, ny, nz, n_trs])
    s0vaf_ts = np.zeros([nx, ny, nz, n_trs])

    for vol in range(echodata.shape[-1]):
        t2ss = np.zeros([nx, ny, nz, n_echoes - 1])
        s0vs = t2ss.copy()
        # Fit monoexponential decay first for first echo only,
        # then first two echoes, etc.
        for i_echo in range(start_echo, n_echoes + 1):
            B = np.abs(echodata[:, :i_echo, vol]) + 1
            B = np.log(B).transpose()
            neg_tes = -1 * tes[:i_echo]

            # First row is constant, second is TEs for decay curve
            # Independent variables for least-squares model
            x = np.array([np.ones(i_echo), neg_tes])
            X = np.sort(x)[:, ::-1].transpose()

            beta, _, _, _ = np.linalg.lstsq(X, B)
            t2s = 1. / beta[1, :].transpose()
            s0 = np.exp(beta[0, :]).transpose()

            t2s[np.isinf(t2s)] = 500.
            s0[np.isnan(s0)] = 0.

            t2ss[:, :, :, i_echo - 2] = np.squeeze(unmask(t2s, mask))
            s0vs[:, :, :, i_echo - 2] = np.squeeze(unmask(s0, mask))

        # Limited T2* and S0 maps
        fl = np.zeros([nx, ny, nz, len(tes) - 1], bool)
        for i_echo in range(n_echoes - 1):
            fl_ = np.squeeze(fl[:, :, :, i_echo])
            fl_[masksum == i_echo + 2] = True
            fl[:, :, :, i_echo] = fl_
        t2sa = np.squeeze(unmask(t2ss[fl], masksum > 1))
        s0va = np.squeeze(unmask(s0vs[fl], masksum > 1))

        # Full T2* maps with S0 estimation errors
        t2saf = t2sa.copy()
        s0vaf = s0va.copy()
        t2saf[masksum == 1] = t2ss[masksum == 1, 0]
        s0vaf[masksum == 1] = s0vs[masksum == 1, 0]

        t2sa_ts[:, :, :, vol] = t2sa
        s0va_ts[:, :, :, vol] = s0va
        t2saf_ts[:, :, :, vol] = t2saf
        s0vaf_ts[:, :, :, vol] = s0vaf

    return t2sa_ts, s0va_ts, t2saf_ts, s0vaf_ts
Beispiel #2
0
def write_split_ts(data, mmix, mask, acc, rej, midk, ref_img, suffix=''):
    """
    Splits `data` into denoised / noise / ignored time series and saves to disk

    Parameters
    ----------
    data : (S x T) array_like
        Input time series
    mmix : (C x T) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `data`
    mask : (S,) array_like
        Boolean mask array
    acc : list
        Indices of accepted (BOLD) components in `mmix`
    rej : list
        Indices of rejected (non-BOLD) components in `mmix`
    midk : list
        Indices of mid-K (questionable) components in `mmix`
    ref_img : str or img_like
        Reference image to dictate how outputs are saved to disk
    suffix : str, optional
        Appended to name of saved files (before extension). Default: ''

    Returns
    -------
    varexpl : float
        Percent variance of data explained by extracted + retained components
    """

    # mask and de-mean data
    mdata = data[mask]
    dmdata = mdata.T - mdata.T.mean(axis=0)

    # get variance explained by retained components
    betas = model.get_coeffs(utils.unmask(dmdata.T, mask), mask, mmix)[mask]
    varexpl = (1 - ((dmdata.T - betas.dot(mmix.T))**2.).sum() / (dmdata**2.).sum()) * 100
    LGR.info('Variance explained by ICA decomposition: {:.02f}%'.format(varexpl))

    # create component and de-noised time series and save to files
    hikts = betas[:, acc].dot(mmix.T[acc, :])
    midkts = betas[:, midk].dot(mmix.T[midk, :])
    lowkts = betas[:, rej].dot(mmix.T[rej, :])
    dnts = data[mask] - lowkts - midkts

    if len(acc) != 0:
        fout = utils.filewrite(utils.unmask(hikts, mask), 'hik_ts_{0}'.format(suffix), ref_img)
        LGR.info('Writing high-Kappa time series: {}'.format(op.abspath(fout)))
    if len(midk) != 0:
        fout = utils.filewrite(utils.unmask(midkts, mask), 'midk_ts_{0}'.format(suffix), ref_img)
        LGR.info('Writing mid-Kappa time series: {}'.format(op.abspath(fout)))
    if len(rej) != 0:
        fout = utils.filewrite(utils.unmask(lowkts, mask), 'lowk_ts_{0}'.format(suffix), ref_img)
        LGR.info('Writing low-Kappa time series: {}'.format(op.abspath(fout)))

    fout = utils.filewrite(utils.unmask(dnts, mask), 'dn_ts_{0}'.format(suffix), ref_img)
    LGR.info('Writing denoised time series: {}'.format(op.abspath(fout)))

    return varexpl
Beispiel #3
0
def t2sadmap(catd, mask, tes, masksum, start_echo):
    """
    t2sadmap(catd,mask,tes,masksum)

    Input:

    catd  has shape (nx,ny,nz,Ne,nt)
    mask  has shape (nx,ny,nz)
    tes   is a 1d numpy array
    masksum
    """
    nx, ny, nz, Ne, nt = catd.shape
    echodata = fmask(catd, mask)
    Nm = echodata.shape[0]

    t2ss = np.zeros([nx, ny, nz, Ne - 1])
    s0vs = t2ss.copy()

    for ne in range(start_echo, Ne + 1):

        # Do Log Linear fit
        B = np.reshape(np.abs(echodata[:, :ne]) + 1, (Nm, ne * nt)).transpose()
        B = np.log(B)
        neg_tes = [-1 * te for te in tes[:ne]]
        x = np.array([np.ones(ne), neg_tes])
        X = np.tile(x, (1, nt))
        X = np.sort(X)[:, ::-1].transpose()

        beta, res, rank, sing = np.linalg.lstsq(X, B)
        t2s = 1 / beta[1, :].transpose()
        s0 = np.exp(beta[0, :]).transpose()

        t2s[np.isinf(t2s)] = 500.
        s0[np.isnan(s0)] = 0.

        t2ss[:, :, :, ne - 2] = np.squeeze(unmask(t2s, mask))
        s0vs[:, :, :, ne - 2] = np.squeeze(unmask(s0, mask))

    # Limited T2* and S0 maps
    fl = np.zeros([nx, ny, nz, len(tes) - 2 + 1])
    for ne in range(Ne - 1):
        fl_ = np.squeeze(fl[:, :, :, ne])
        fl_[masksum == ne + 2] = True
        fl[:, :, :, ne] = fl_
    fl = np.array(fl, dtype=bool)
    t2sa = np.squeeze(unmask(t2ss[fl], masksum > 1))
    s0va = np.squeeze(unmask(s0vs[fl], masksum > 1))

    # Full T2* maps with S0 estimation errors
    t2saf = t2sa.copy()
    s0vaf = s0va.copy()
    t2saf[masksum == 1] = t2ss[masksum == 1, 0]
    s0vaf[masksum == 1] = s0vs[masksum == 1, 0]

    return t2sa, s0va, t2ss, s0vs, t2saf, s0vaf
Beispiel #4
0
def test_smoke_unmask():
    """
    ensure that unmask returns reasonable objects with random inputs
    in the correct format
    Note: unmask could take in 1D or 2D or 3D arrays
    """
    data_1d = np.random.random((100))
    data_2d = np.random.random((100, 5))
    data_3d = np.random.random((100, 5, 20))
    mask = np.random.randint(2, size=100)

    assert utils.unmask(data_1d, mask) is not None
    assert utils.unmask(data_2d, mask) is not None
    assert utils.unmask(data_3d, mask) is not None
Beispiel #5
0
def writefeats(data, mmix, mask, ref_img, suffix=''):
    """
    Converts `data` to component space with `mmix` and saves to disk

    Parameters
    ----------
    data : (S x T) array_like
        Input time series
    mmix : (C x T) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `data`
    mask : (S,) array_like
        Boolean mask array
    ref_img : str or img_like
        Reference image to dictate how outputs are saved to disk
    suffix : str, optional
        Appended to name of saved files (before extension). Default: ''

    Returns
    -------
    fname : str
        Filepath to saved file
    """

    # write feature versions of components
    feats = utils.unmask(model.computefeats2(data, mmix, mask), mask)
    fname = utils.filewrite(feats, 'feats_{0}'.format(suffix), ref_img)

    return fname
Beispiel #6
0
def split_ts(data, mmix, mask, acc):
    """
    Splits `data` time series into accepted component time series and remainder

    Parameters
    ----------
    data : (S x T) array_like
        Input data, where `S` is samples and `T` is time
    mmix : (T x C) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `data`
    mask : (S,) array_like
        Boolean mask array
    acc : :obj:`list`
        List of accepted components used to subset `mmix`

    Returns
    -------
    hikts : (S x T) :obj:`numpy.ndarray`
        Time series reconstructed using only components in `acc`
    rest : (S x T) :obj:`numpy.ndarray`
        Original data with `hikts` removed
    """

    cbetas = model.get_coeffs(data - data.mean(axis=-1, keepdims=True),
                              mmix, mask)
    betas = cbetas[mask]
    if len(acc) != 0:
        hikts = utils.unmask(betas[:, acc].dot(mmix.T[acc, :]), mask)
    else:
        hikts = None

    resid = data - hikts

    return hikts, resid
Beispiel #7
0
def split_ts(data, mmix, mask, acc):
    """
    Splits `data` time series into accepted component time series and remainder

    Parameters
    ----------
    data : (S x T) array_like
        Input data, where `S` is samples and `T` is time
    mmix : (T x C) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `data`
    mask : (S,) array_like
        Boolean mask array
    acc : list
        List of accepted components used to subset `mmix`

    Returns
    -------
    hikts : (S x T) :obj:`numpy.ndarray`
        Time series reconstructed using only components in `acc`
    rest : (S x T) :obj:`numpy.ndarray`
        Original data with `hikts` removed
    """

    cbetas = model.get_coeffs(data - data.mean(axis=-1, keepdims=True), mask, mmix)
    betas = cbetas[mask]
    if len(acc) != 0:
        hikts = utils.unmask(betas[:, acc].dot(mmix.T[acc, :]), mask)
    else:
        hikts = None

    return hikts, data - hikts
Beispiel #8
0
def writefeats(data, mmix, mask, ref_img, suffix=''):
    """
    Converts `data` to component space with `mmix` and saves to disk

    Parameters
    ----------
    data : (S x T) array_like
        Input time series
    mmix : (C x T) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `data`
    mask : (S,) array_like
        Boolean mask array
    ref_img : str or img_like
        Reference image to dictate how outputs are saved to disk
    suffix : str, optional
        Appended to name of saved files (before extension). Default: ''

    Returns
    -------
    fname : str
        Filepath to saved file
    """

    # write feature versions of components
    feats = utils.unmask(model.computefeats2(data, mmix, mask), mask)
    fname = utils.filewrite(feats, 'feats_{0}'.format(suffix), ref_img)

    return fname
Beispiel #9
0
def optcom(data, t2, tes, mask, combmode, useG=False):
    """
    Optimally combine BOLD data across TEs.

    out = optcom(data,t2s)

    Parameters
    ----------
    data : :obj:`numpy.ndarray`
        Concatenated BOLD data. Has shape (nx, ny, nz, n_echoes, n_trs)
    t2 : :obj:`numpy.ndarray`
        3D map of estimated T2* values. Has shape (nx, ny, nz)
    tes : :obj:`numpy.ndarray`
        Array of TEs, in seconds.
    mask : :obj:`numpy.ndarray`
        Brain mask in 3D array. Has shape (nx, ny, nz)
    combmode : :obj:`str`
        How to combine data. Either 'ste' or 't2s'.
    useG : :obj:`bool`, optional
        Use G. Default is False.

    Returns
    -------
    out : :obj:`numpy.ndarray`
        Optimally combined data. Has shape (nx, ny, nz, n_trs)
    """
    _, _, _, _, n_trs = data.shape

    if useG:
        fdat = fmask(data, mask)
        ft2s = fmask(t2, mask)
    else:
        fdat = fmask(data, mask)
        ft2s = fmask(t2, mask)

    tes = np.array(tes)
    tes = tes[np.newaxis, :]

    if len(t2.shape) == 3:
        print('Optimally combining with voxel-wise T2 estimates')
        ft2s = ft2s[:, np.newaxis]
    else:
        print('Optimally combining with voxel- and volume-wise T2 estimates')
        ft2s = ft2s[:, :, np.newaxis]

    if combmode == 'ste':
        alpha = fdat.mean(-1) * tes
    else:
        alpha = tes * np.exp(-tes / ft2s)

    if len(t2.shape) == 3:
        alpha = np.tile(alpha[:, :, np.newaxis], (1, 1, n_trs))
    else:
        alpha = np.swapaxes(alpha, 1, 2)
        ax0_idx, ax2_idx = np.where(np.all(alpha == 0, axis=1))
        alpha[ax0_idx, :, ax2_idx] = 1.

    fout = np.average(fdat, axis=1, weights=alpha)
    out = unmask(fout, mask)
    return out
Beispiel #10
0
def get_coeffs(data, X, mask=None, add_const=False):
    """
    Performs least-squares fit of `X` against `data`

    Parameters
    ----------
    data : (S [x E] x T) array_like
        Array where `S` is samples, `E` is echoes, and `T` is time
    X : (T [x C]) array_like
        Array where `T` is time and `C` is predictor variables
    mask : (S [x E]) array_like
        Boolean mask array
    add_const : bool, optional
        Add intercept column to `X` before fitting. Default: False

    Returns
    -------
    betas : (S [x E] x C) :obj:`numpy.ndarray`
        Array of `S` sample betas for `C` predictors
    """
    if data.ndim not in [2, 3]:
        raise ValueError('Parameter data should be 2d or 3d, not {0}d'.format(data.ndim))
    elif X.ndim not in [2]:
        raise ValueError('Parameter X should be 2d, not {0}d'.format(X.ndim))
    elif data.shape[-1] != X.shape[0]:
        raise ValueError('Last dimension (dimension {0}) of data ({1}) does not '
                         'match first dimension of '
                         'X ({2})'.format(data.ndim, data.shape[-1], X.shape[0]))

    # mask data and flip (time x samples)
    if mask is not None:
        if mask.ndim not in [1, 2]:
            raise ValueError('Parameter data should be 1d or 2d, not {0}d'.format(mask.ndim))
        elif data.shape[0] != mask.shape[0]:
            raise ValueError('First dimensions of data ({0}) and mask ({1}) do not '
                             'match'.format(data.shape[0], mask.shape[0]))
        mdata = data[mask, :].T
    else:
        mdata = data.T

    # coerce X to >=2d
    X = np.atleast_2d(X)

    if len(X) == 1:
        X = X.T

    if add_const:  # add intercept, if specified
        X = np.column_stack([X, np.ones((len(X), 1))])

    betas = np.linalg.lstsq(X, mdata, rcond=None)[0].T
    if add_const:  # drop beta for intercept, if specified
        betas = betas[:, :-1]

    if mask is not None:
        betas = utils.unmask(betas, mask)

    return betas
Beispiel #11
0
def denoise_ts(data, mmix, mask, comptable):
    """Apply component classifications to data for denoising.

    Parameters
    ----------
    data : (S x T) array_like
        Input time series
    mmix : (C x T) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `data`
    mask : (S,) array_like
        Boolean mask array
    comptable : (C x X) :obj:`pandas.DataFrame`
        Component metric table. One row for each component, with a column for
        each metric. Requires at least one column: "classification".

    Returns
    -------
    dnts : (S x T) array_like
        Denoised data (i.e., data with rejected components removed).
    hikts : (S x T) array_like
        High-Kappa data (i.e., data composed only of accepted components).
    lowkts : (S x T) array_like
        Low-Kappa data (i.e., data composed only of rejected components).
    """
    acc = comptable[comptable.classification == "accepted"].index.values
    rej = comptable[comptable.classification == "rejected"].index.values

    # mask and de-mean data
    mdata = data[mask]
    dmdata = mdata.T - mdata.T.mean(axis=0)

    # get variance explained by retained components
    betas = get_coeffs(dmdata.T, mmix, mask=None)
    varexpl = (1 - ((dmdata.T - betas.dot(mmix.T))**2.0).sum() /
               (dmdata**2.0).sum()) * 100
    LGR.info("Variance explained by decomposition: {:.02f}%".format(varexpl))

    # create component-based data
    hikts = utils.unmask(betas[:, acc].dot(mmix.T[acc, :]), mask)
    lowkts = utils.unmask(betas[:, rej].dot(mmix.T[rej, :]), mask)
    dnts = utils.unmask(data[mask] - lowkts[mask], mask)
    return dnts, hikts, lowkts
Beispiel #12
0
def test_unmask():
    # generate boolean mask + get number of True values
    mask = rs.choice([0, 1], size=(100, )).astype(bool)
    n_data = mask.sum()

    inputs = [
        (rs.rand(n_data, 3), float),  # 2D float
        (rs.rand(n_data, 3, 3), float),  # 3D float
        (rs.randint(10, size=(n_data, 3)), int),  # 2D int
        (rs.randint(10, size=(n_data, 3, 3)), int)  # 3D int
    ]

    for (input, dtype) in inputs:
        out = utils.unmask(input, mask)
        assert out.shape == (100, ) + input.shape[1:]
        assert out.dtype == dtype
Beispiel #13
0
def test_unmask():
    # generate boolean mask + get number of True values
    mask = rs.choice([0, 1], size=(100,)).astype(bool)
    n_data = mask.sum()

    inputs = [
        (rs.rand(n_data, 3), float),  # 2D float
        (rs.rand(n_data, 3, 3), float),  # 3D float
        (rs.randint(10, size=(n_data, 3)), int),  # 2D int
        (rs.randint(10, size=(n_data, 3, 3)), int)  # 3D int
    ]

    for (input, dtype) in inputs:
        out = utils.unmask(input, mask)
        assert out.shape == (100,) + input.shape[1:]
        assert out.dtype == dtype
Beispiel #14
0
def get_coeffs(data, X, mask=None, add_const=False):
    """
    Performs least-squares fit of `X` against `data`

    Parameters
    ----------
    data : (S x T) array_like
        Array where `S` is samples and `T` is time
    mask : (S,) array_like
        Boolean mask array
    X : (T x C) array_like
        Array where `T` is time and `C` is predictor variables
    add_const : bool, optional
        Add intercept column to `X` before fitting. Default: False

    Returns
    -------
    betas : (S x C) :obj:`numpy.ndarray`
        Array of `S` sample betas for `C` predictors
    """

    # mask data and flip (time x samples)
    if mask is not None:
        mdata = data[mask, :].T
    else:
        mdata = data.T

    # coerce X to >=2d
    X = np.atleast_2d(X)

    if len(X) == 1:
        X = X.T

    if add_const:  # add intercept, if specified
        X = np.column_stack([X, np.ones((len(X), 1))])

    betas = np.linalg.lstsq(X, mdata, rcond=None)[0].T
    if add_const:  # drop beta for intercept, if specified
        betas = betas[:, :-1]

    if mask is not None:
        betas = utils.unmask(betas, mask)

    return betas
Beispiel #15
0
def computefeats2(data, mmix, mask, normalize=True):
    """
    Converts `data` to component space using `mmix`

    Parameters
    ----------
    data : (S x T) array_like
        Input data
    mmix : (T x C) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `data`
    mask : (S,) array-like
        Boolean mask array
    normalize : bool, optional
        Whether to z-score output. Default: True

    Returns
    -------
    data_Z : (S x C) :obj:`numpy.ndarray`
        Data in component space
    """

    # demean masked data
    data_vn = stats.zscore(data[mask], axis=-1)

    # get betas of `data`~`mmix` and limit to range [-0.999, 0.999]
    data_R = get_coeffs(utils.unmask(data_vn, mask), mask, mmix)[mask]
    data_R[data_R < -0.999] = -0.999
    data_R[data_R > 0.999] = 0.999

    # R-to-Z transform
    data_Z = np.arctanh(data_R)
    if data_Z.ndim == 1:
        data_Z = np.atleast_2d(data_Z).T

    # normalize data
    if normalize:
        data_Zm = stats.zscore(data_Z, axis=0)
        data_Z = data_Zm + (data_Z.mean(axis=0, keepdims=True) /
                            data_Z.std(axis=0, keepdims=True))
    return data_Z
Beispiel #16
0
def writefeats(data, mmix, mask, ref_img, out_dir='.', suffix=''):
    """
    Converts `data` to component space with `mmix` and saves to disk

    Parameters
    ----------
    data : (S x T) array_like
        Input time series
    mmix : (C x T) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `data`
    mask : (S,) array_like
        Boolean mask array
    ref_img : :obj:`str` or img_like
        Reference image to dictate how outputs are saved to disk
    out_dir : :obj:`str`, optional
        Output directory.
    suffix : :obj:`str`, optional
        Appended to name of saved files (before extension). Default: ''

    Returns
    -------
    fname : :obj:`str`
        Filepath to saved file

    Notes
    -----
    This function writes out a file:

    ======================    =================================================
    Filename                  Content
    ======================    =================================================
    feats_[suffix].nii        Z-normalized spatial component maps.
    ======================    =================================================
    """

    # write feature versions of components
    feats = utils.unmask(computefeats2(data, mmix, mask), mask)
    fname = filewrite(feats, op.join(out_dir, 'feats_{0}'.format(suffix)),
                      ref_img)
    return fname
Beispiel #17
0
def computefeats2(data, mmix, mask, normalize=True):
    """
    Converts `data` to component space using `mmix`

    Parameters
    ----------
    data : (S x T) array_like
        Input data
    mmix : (T x C) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `data`
    mask : (S,) array-like
        Boolean mask array
    normalize : bool, optional
        Whether to z-score output. Default: True

    Returns
    -------
    data_Z : (S x C) :obj:`numpy.ndarray`
        Data in component space
    """

    # demean masked data
    data_vn = stats.zscore(data[mask], axis=-1)

    # get betas of `data`~`mmix` and limit to range [-0.999, 0.999]
    data_R = get_coeffs(utils.unmask(data_vn, mask), mask, mmix)[mask]
    data_R[data_R < -0.999] = -0.999
    data_R[data_R > 0.999] = 0.999

    # R-to-Z transform
    data_Z = np.arctanh(data_R)
    if data_Z.ndim == 1:
        data_Z = np.atleast_2d(data_Z).T

    # normalize data
    if normalize:
        data_Zm = stats.zscore(data_Z, axis=0)
        data_Z = data_Zm + (data_Z.mean(axis=0, keepdims=True) /
                            data_Z.std(axis=0, keepdims=True))
    return data_Z
Beispiel #18
0
def threshold_map(maps, mask, ref_img, threshold, csize=None):
    """Perform cluster-extent thresholding.

    Parameters
    ----------
    maps : (M x C) array_like
        Statistical maps to be thresholded.
    mask : (S) array_like
        Binary mask.
    ref_img : img_like
        Reference image to convert to niimgs with.
    threshold : :obj:`float`
        Value threshold to apply to maps.
    csize : :obj:`int` or :obj:`None`, optional
        Minimum cluster size. If None, standard thresholding (non-cluster-extent) will be done.
        Default is None.

    Returns
    -------
    maps_thresh : (M x C) array_like
    """
    n_voxels, n_components = maps.shape
    maps_thresh = np.zeros([n_voxels, n_components], bool)
    if csize is None:
        csize = np.max([int(n_voxels * 0.0005) + 5, 20])
    else:
        csize = int(csize)

    for i_comp in range(n_components):
        # Cluster-extent threshold and binarize F-maps
        ccimg = io.new_nii_like(
            ref_img, np.squeeze(utils.unmask(maps[:, i_comp], mask)))

        maps_thresh[:, i_comp] = utils.threshold_map(ccimg,
                                                     min_cluster_size=csize,
                                                     threshold=threshold,
                                                     mask=mask,
                                                     binarize=True)
    return maps_thresh
Beispiel #19
0
def optcom(data, t2, tes, mask, combmode, useG=False):
    """
    out = optcom(data,t2s)


    Input:

    data.shape = (nx,ny,nz,Ne,Nt)
    t2s.shape  = (nx,ny,nz)
    tes.shape  = len(Ne)

    Output:

    out.shape = (nx,ny,nz,Nt)
    """
    nx, ny, nz, Ne, Nt = data.shape

    if useG:
        fdat = fmask(data, mask)
        ft2s = fmask(t2, mask)

    else:
        fdat = fmask(data, mask)
        ft2s = fmask(t2, mask)

    tes = np.array(tes)
    tes = tes[np.newaxis, :]
    ft2s = ft2s[:, np.newaxis]

    if combmode == 'ste':
        alpha = fdat.mean(-1) * tes
    else:
        alpha = tes * np.exp(-tes / ft2s)

    alpha = np.tile(alpha[:, :, np.newaxis], (1, 1, Nt))

    fout = np.average(fdat, axis=1, weights=alpha)
    out = unmask(fout, mask)
    return out
Beispiel #20
0
def get_coeffs(data, mask, X, add_const=False):
    """
    Performs least-squares fit of `X` against `data`

    Parameters
    ----------
    data : (S x T) array-like
        Array where `S` is samples and `T` is time
    mask : (S,) array-like
        Boolean mask array
    X : (T x C) array-like
        Array where `T` is time and `C` is predictor variables
    add_const : bool, optional
        Add intercept column to `X` before fitting. Default: False

    Returns
    -------
    betas : (S x C) :obj:`numpy.ndarray`
        Array of `S` sample betas for `C` predictors
    """

    # mask data and flip (time x samples)
    mdata = data[mask].T

    # coerce X to >=2d
    X = np.atleast_2d(X)

    if len(X) == 1:
        X = X.T
    if add_const:  # add intercept, if specified
        X = np.column_stack([X, np.ones((len(X), 1))])

    betas = np.linalg.lstsq(X, mdata, rcond=None)[0].T
    if add_const:  # drop beta for intercept, if specified
        betas = betas[:, :-1]
    betas = utils.unmask(betas, mask)

    return betas
Beispiel #21
0
def split_ts(data, mmix, mask, comptable):
    """
    Splits `data` time series into accepted component time series and remainder

    Parameters
    ----------
    data : (S x T) array_like
        Input data, where `S` is samples and `T` is time
    mmix : (T x C) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `data`
    mask : (S,) array_like
        Boolean mask array
    comptable : (C x X) :obj:`pandas.DataFrame`
        Component metric table. One row for each component, with a column for
        each metric. Requires at least two columns: "component" and
        "classification".

    Returns
    -------
    hikts : (S x T) :obj:`numpy.ndarray`
        Time series reconstructed using only components in `acc`
    rest : (S x T) :obj:`numpy.ndarray`
        Original data with `hikts` removed
    """
    acc = comptable[comptable.classification == 'accepted'].index.values

    cbetas = get_coeffs(data - data.mean(axis=-1, keepdims=True),
                        mmix, mask)
    betas = cbetas[mask]
    if len(acc) != 0:
        hikts = utils.unmask(betas[:, acc].dot(mmix.T[acc, :]), mask)
    else:
        hikts = None

    resid = data - hikts

    return hikts, resid
Beispiel #22
0
def gscontrol_mmix(OCcatd, mmix, mask, acc, rej, midk, ref_img):
    """
    Perform global signal regression.

    Parameters
    ----------
    OCcatd : (S x T) array_like
        Optimally-combined time series data
    mmix : (C x T) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `OCcatd`
    mask : (S,) array_like
        Boolean mask array
    acc : list
        Indices of accepted (BOLD) components in `mmix`
    rej : list
        Indices of rejected (non-BOLD) components in `mmix`
    midk : list
        Indices of mid-K (questionable) components in `mmix`
    ref_img : str or img_like
        Reference image to dictate how outputs are saved to disk
    """

    Gmu = OCcatd.mean(axis=-1)
    Gstd = OCcatd.std(axis=-1)
    Gmask = (Gmu != 0)

    """
    Compute temporal regression
    """
    dat = (OCcatd[Gmask] - Gmu[Gmask][:, np.newaxis]) / Gstd[mask][:, np.newaxis]
    solG = np.linalg.lstsq(mmix, dat.T, rcond=None)[0]
    resid = dat - np.dot(solG.T, mmix.T)

    """
    Build BOLD time series without amplitudes, and save T1-like effect
    """
    bold_ts = np.dot(solG.T[:, acc], mmix[:, acc].T)
    sphis = bold_ts.min(axis=-1)
    sphis -= sphis.mean()
    utils.utils.filewrite(utils.utils.unmask(sphis, mask), 'sphis_hik', ref_img)

    """
    Find the global signal based on the T1-like effect
    """
    sol = np.linalg.lstsq(np.atleast_2d(sphis).T, dat, rcond=None)
    glsig = sol[0]

    """
    T1 correct time series by regression
    """
    bold_noT1gs = bold_ts - np.dot(np.linalg.lstsq(glsig.T, bold_ts.T, rcond=None)[0].T, glsig)
    utils.utils.filewrite(utils.unmask(bold_noT1gs * Gstd[mask][:, np.newaxis], mask),
                          'hik_ts_OC_T1c.nii', ref_img)

    """
    Make medn version of T1 corrected time series
    """
    utils.filewrite(Gmu[..., np.newaxis] +
                    utils.unmask((bold_noT1gs+resid)*Gstd[mask][:, np.newaxis], mask),
                    'dn_ts_OC_T1c', ref_img)

    """
    Orthogonalize mixing matrix w.r.t. T1-GS
    """
    mmixnogs = mmix.T - np.dot(np.linalg.lstsq(glsig.T, mmix, rcond=None)[0].T, glsig)
    mmixnogs_mu = mmixnogs.mean(-1)
    mmixnogs_std = mmixnogs.std(-1)
    mmixnogs_norm = (mmixnogs - mmixnogs_mu[:, np.newaxis]) / mmixnogs_std[:, np.newaxis]
    mmixnogs_norm = np.vstack([np.atleast_2d(np.ones(max(glsig.shape))), glsig, mmixnogs_norm])

    """
    Write T1-GS corrected components and mixing matrix
    """
    sol = np.linalg.lstsq(mmixnogs_norm.T, dat.T, rcond=None)
    utils.filewrite(utils.unmask(sol[0].T[:, 2:], mask), 'betas_hik_OC_T1c', ref_img)
    np.savetxt('meica_mix_T1c.1D', mmixnogs)
Beispiel #23
0
def fitmodels_direct(catd,
                     mmix,
                     mask,
                     t2s,
                     t2sG,
                     tes,
                     combmode,
                     ref_img,
                     fout=None,
                     reindex=False,
                     mmixN=None,
                     full_sel=True):
    """
    Fit models directly.

    Parameters
    ----------
    catd : (S x E x T) array_like
        Input data, where `S` is samples, `E` is echos, and `T` is time
    mmix : (T x C) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `catd`
    mask : (S,) array_like
        Boolean mask array
    t2s : (S,) array_like
    t2sG : (S,) array_like
    tes : list
        List of echo times associated with `catd`, in milliseconds
    combmode : {'t2s', 'ste'} str
        How optimal combination of echos should be made, where 't2s' indicates
        using the method of Posse 1999 and 'ste' indicates using the method of
        Poser 2006
    ref_img : str or img_like
        Reference image to dictate how outputs are saved to disk
    fout : bool
        Whether to output per-component TE-dependence maps. Default: None
    reindex : bool, optional
        Default: False
    mmixN : array_like, optional
        Default: None
    full_sel : bool, optional
        Whether to perform selection of components based on Rho/Kappa scores.
        Default: True

    Returns
    -------
    seldict : dict
    comptab : (N x 5) :obj:`numpy.ndarray`
        Array with columns denoting (1) index of component, (2) Kappa score of
        component, (3) Rho score of component, (4) variance explained by
        component, and (5) normalized variance explained bycomponent
    betas : :obj:`numpy.ndarray`
    mmix_new : :obj:`numpy.ndarray`
    """

    # compute optimal combination of raw data
    tsoc = model.make_optcom(catd, t2sG, tes, mask, combmode,
                             verbose=False).astype(float)[mask]
    # demean optimal combination
    tsoc_dm = tsoc - tsoc.mean(axis=-1, keepdims=True)

    # compute un-normalized weight dataset (features)
    if mmixN is None:
        mmixN = mmix
    WTS = computefeats2(utils.unmask(tsoc, mask), mmixN, mask, normalize=False)

    # compute PSC dataset - shouldn't have to refit data
    tsoc_B = get_coeffs(utils.unmask(tsoc_dm, mask), mask, mmix)[mask]
    tsoc_Babs = np.abs(tsoc_B)
    PSC = tsoc_B / tsoc.mean(axis=-1, keepdims=True) * 100

    # compute skews to determine signs based on unnormalized weights,
    # correct mmix & WTS signs based on spatial distribution tails
    signs = stats.skew(WTS, axis=0)
    signs /= np.abs(signs)
    mmix = mmix.copy()
    mmix *= signs
    WTS *= signs
    PSC *= signs
    totvar = (tsoc_B**2).sum()
    totvar_norm = (WTS**2).sum()

    # compute Betas and means over TEs for TE-dependence analysis
    betas = get_coeffs(catd, np.repeat(mask[:, np.newaxis], len(tes), axis=1),
                       mmix)
    n_samp, n_echos, n_components = betas.shape
    n_voxels = mask.sum()
    n_data_voxels = (t2s != 0).sum()
    mu = catd.mean(axis=-1, dtype=float)
    tes = np.reshape(tes, (n_echos, 1))
    fmin, fmid, fmax = utils.getfbounds(n_echos)

    # mask arrays
    mumask = mu[t2s != 0]
    t2smask = t2s[t2s != 0]
    betamask = betas[t2s != 0]

    # set up Xmats
    X1 = mumask.T  # Model 1
    X2 = np.tile(tes, (1, n_data_voxels)) * mumask.T / t2smask.T  # Model 2

    # tables for component selection
    Kappas = np.zeros([n_components])
    Rhos = np.zeros([n_components])
    varex = np.zeros([n_components])
    varex_norm = np.zeros([n_components])
    Z_maps = np.zeros([n_voxels, n_components])
    F_R2_maps = np.zeros([n_data_voxels, n_components])
    F_S0_maps = np.zeros([n_data_voxels, n_components])
    Z_clmaps = np.zeros([n_voxels, n_components])
    F_R2_clmaps = np.zeros([n_data_voxels, n_components])
    F_S0_clmaps = np.zeros([n_data_voxels, n_components])
    Br_clmaps_R2 = np.zeros([n_voxels, n_components])
    Br_clmaps_S0 = np.zeros([n_voxels, n_components])

    LGR.info('Fitting TE- and S0-dependent models to components')
    for i in range(n_components):
        # size of B is (n_components, nx*ny*nz)
        B = np.atleast_3d(betamask)[:, :, i].T
        alpha = (np.abs(B)**2).sum(axis=0)
        varex[i] = (tsoc_B[:, i]**2).sum() / totvar * 100.
        varex_norm[i] = (utils.unmask(WTS, mask)[t2s != 0][:, i]**
                         2).sum() / totvar_norm * 100.

        # S0 Model
        coeffs_S0 = (B * X1).sum(axis=0) / (X1**2).sum(axis=0)
        SSE_S0 = (B - X1 * np.tile(coeffs_S0, (n_echos, 1)))**2
        SSE_S0 = SSE_S0.sum(axis=0)
        F_S0 = (alpha - SSE_S0) * 2 / (SSE_S0)
        F_S0_maps[:, i] = F_S0

        # R2 Model
        coeffs_R2 = (B * X2).sum(axis=0) / (X2**2).sum(axis=0)
        SSE_R2 = (B - X2 * np.tile(coeffs_R2, (n_echos, 1)))**2
        SSE_R2 = SSE_R2.sum(axis=0)
        F_R2 = (alpha - SSE_R2) * 2 / (SSE_R2)
        F_R2_maps[:, i] = F_R2

        # compute weights as Z-values
        wtsZ = (WTS[:, i] - WTS[:, i].mean()) / WTS[:, i].std()
        wtsZ[np.abs(wtsZ) > Z_MAX] = (
            Z_MAX * (np.abs(wtsZ) / wtsZ))[np.abs(wtsZ) > Z_MAX]
        Z_maps[:, i] = wtsZ

        # compute Kappa and Rho
        F_S0[F_S0 > F_MAX] = F_MAX
        F_R2[F_R2 > F_MAX] = F_MAX
        norm_weights = np.abs(
            np.squeeze(utils.unmask(wtsZ, mask)[t2s != 0]**2.))
        Kappas[i] = np.average(F_R2, weights=norm_weights)
        Rhos[i] = np.average(F_S0, weights=norm_weights)

    # tabulate component values
    comptab_pre = np.vstack(
        [np.arange(n_components), Kappas, Rhos, varex, varex_norm]).T
    if reindex:
        # re-index all components in Kappa order
        comptab = comptab_pre[comptab_pre[:, 1].argsort()[::-1], :]
        Kappas = comptab[:, 1]
        Rhos = comptab[:, 2]
        varex = comptab[:, 3]
        varex_norm = comptab[:, 4]
        nnc = np.array(comptab[:, 0], dtype=np.int)
        mmix_new = mmix[:, nnc]
        F_S0_maps = F_S0_maps[:, nnc]
        F_R2_maps = F_R2_maps[:, nnc]
        Z_maps = Z_maps[:, nnc]
        WTS = WTS[:, nnc]
        PSC = PSC[:, nnc]
        tsoc_B = tsoc_B[:, nnc]
        tsoc_Babs = tsoc_Babs[:, nnc]
        comptab[:, 0] = np.arange(comptab.shape[0])
    else:
        comptab = comptab_pre
        mmix_new = mmix

    # full selection including clustering criteria
    seldict = None
    if full_sel:
        LGR.info('Performing spatial clustering of components')
        csize = np.max([int(n_voxels * 0.0005) + 5, 20])
        LGR.debug('Using minimum cluster size: {}'.format(csize))
        for i in range(n_components):
            # save out files
            out = np.zeros((n_samp, 4))
            out[:, 0] = np.squeeze(utils.unmask(PSC[:, i], mask))
            out[:, 1] = np.squeeze(utils.unmask(F_R2_maps[:, i], t2s != 0))
            out[:, 2] = np.squeeze(utils.unmask(F_S0_maps[:, i], t2s != 0))
            out[:, 3] = np.squeeze(utils.unmask(Z_maps[:, i], mask))

            if utils.get_dtype(ref_img) == 'GIFTI':
                continue  # TODO: pass through GIFTI file data as below

            ccimg = utils.new_nii_like(ref_img, out)

            # Do simple clustering on F
            sel = spatclust(ccimg,
                            min_cluster_size=csize,
                            threshold=int(fmin),
                            index=[1, 2],
                            mask=(t2s != 0))
            F_R2_clmaps[:, i] = sel[:, 0]
            F_S0_clmaps[:, i] = sel[:, 1]
            countsigFR2 = F_R2_clmaps[:, i].sum()
            countsigFS0 = F_S0_clmaps[:, i].sum()

            # Do simple clustering on Z at p<0.05
            sel = spatclust(ccimg,
                            min_cluster_size=csize,
                            threshold=1.95,
                            index=3,
                            mask=mask)
            Z_clmaps[:, i] = sel

            # Do simple clustering on ranked signal-change map
            spclust_input = utils.unmask(stats.rankdata(tsoc_Babs[:, i]), mask)
            spclust_input = utils.new_nii_like(ref_img, spclust_input)
            Br_clmaps_R2[:, i] = spatclust(spclust_input,
                                           min_cluster_size=csize,
                                           threshold=max(tsoc_Babs.shape) -
                                           countsigFR2,
                                           mask=mask)
            Br_clmaps_S0[:, i] = spatclust(spclust_input,
                                           min_cluster_size=csize,
                                           threshold=max(tsoc_Babs.shape) -
                                           countsigFS0,
                                           mask=mask)

        seldict = {}
        selvars = [
            'Kappas', 'Rhos', 'WTS', 'varex', 'Z_maps', 'F_R2_maps',
            'F_S0_maps', 'Z_clmaps', 'F_R2_clmaps', 'F_S0_clmaps', 'tsoc_B',
            'Br_clmaps_R2', 'Br_clmaps_S0', 'PSC'
        ]
        for vv in selvars:
            seldict[vv] = eval(vv)

    return seldict, comptab, betas, mmix_new
Beispiel #24
0
def tedpca(data_cat,
           data_oc,
           combmode,
           mask,
           adaptive_mask,
           t2sG,
           ref_img,
           tes,
           algorithm='mdl',
           kdaw=10.,
           rdaw=1.,
           out_dir='.',
           verbose=False,
           low_mem=False):
    """
    Use principal components analysis (PCA) to identify and remove thermal
    noise from multi-echo data.

    Parameters
    ----------
    data_cat : (S x E x T) array_like
        Input functional data
    data_oc : (S x T) array_like
        Optimally combined time series data
    combmode : {'t2s', 'paid'} str
        How optimal combination of echos should be made, where 't2s' indicates
        using the method of Posse 1999 and 'paid' indicates using the method of
        Poser 2006
    mask : (S,) array_like
        Boolean mask array
    adaptive_mask : (S,) array_like
        Array where each value indicates the number of echoes with good signal
        for that voxel. This mask may be thresholded; for example, with values
        less than 3 set to 0.
        For more information on thresholding, see `make_adaptive_mask`.
    t2sG : (S,) array_like
        Map of voxel-wise T2* estimates.
    ref_img : :obj:`str` or img_like
        Reference image to dictate how outputs are saved to disk
    tes : :obj:`list`
        List of echo times associated with `data_cat`, in milliseconds
    algorithm : {'kundu', 'kundu-stabilize', 'mdl', 'aic', 'kic', float}, optional
        Method with which to select components in TEDPCA. PCA
        decomposition with the mdl, kic and aic options are based on a Moving Average
        (stationary Gaussian) process and are ordered from most to least aggressive
        (see Li et al., 2007).
        If a float is provided, then it is assumed to represent percentage of variance
        explained (0-1) to retain from PCA.
        Default is 'mdl'.
    kdaw : :obj:`float`, optional
        Dimensionality augmentation weight for Kappa calculations. Must be a
        non-negative float, or -1 (a special value). Default is 10.
    rdaw : :obj:`float`, optional
        Dimensionality augmentation weight for Rho calculations. Must be a
        non-negative float, or -1 (a special value). Default is 1.
    out_dir : :obj:`str`, optional
        Output directory.
    verbose : :obj:`bool`, optional
        Whether to output files from fitmodels_direct or not. Default: False
    low_mem : :obj:`bool`, optional
        Whether to use incremental PCA (for low-memory systems) or not.
        This is only compatible with the "kundu" or "kundu-stabilize" algorithms.
        Default: False

    Returns
    -------
    kept_data : (S x T) :obj:`numpy.ndarray`
        Dimensionally reduced optimally combined functional data
    n_components : :obj:`int`
        Number of components retained from PCA decomposition

    Notes
    -----
    ======================    =================================================
    Notation                  Meaning
    ======================    =================================================
    :math:`\\kappa`            Component pseudo-F statistic for TE-dependent
                              (BOLD) model.
    :math:`\\rho`              Component pseudo-F statistic for TE-independent
                              (artifact) model.
    :math:`v`                 Voxel
    :math:`V`                 Total number of voxels in mask
    :math:`\\zeta`             Something
    :math:`c`                 Component
    :math:`p`                 Something else
    ======================    =================================================

    Steps:

    1.  Variance normalize either multi-echo or optimally combined data,
        depending on settings.
    2.  Decompose normalized data using PCA or SVD.
    3.  Compute :math:`{\\kappa}` and :math:`{\\rho}`:

            .. math::
                {\\kappa}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,R_2^*}}{\\sum {\\zeta}_{c,v}^p}

                {\\rho}_c = \\frac{\\sum_{v}^V {\\zeta}_{c,v}^p * \
                      F_{c,v,S_0}}{\\sum {\\zeta}_{c,v}^p}

    4.  Some other stuff. Something about elbows.
    5.  Classify components as thermal noise if they meet both of the
        following criteria:

            - Nonsignificant :math:`{\\kappa}` and :math:`{\\rho}`.
            - Nonsignificant variance explained.

    Outputs:

    This function writes out several files:

    ======================    =================================================
    Filename                  Content
    ======================    =================================================
    pca_decomposition.json    PCA component table.
    pca_mixing.tsv            PCA mixing matrix.
    pca_components.nii.gz     Component weight maps.
    ======================    =================================================

    See Also
    --------
    :func:`tedana.utils.make_adaptive_mask` : The function used to create the ``adaptive_mask``
                                              parameter.
    """
    if algorithm == 'kundu':
        alg_str = ("followed by the Kundu component selection decision "
                   "tree (Kundu et al., 2013)")
        RefLGR.info("Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., "
                    "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. "
                    "(2013). Integrated strategy for improving functional "
                    "connectivity mapping using multiecho fMRI. Proceedings "
                    "of the National Academy of Sciences, 110(40), "
                    "16187-16192.")
    elif algorithm == 'kundu-stabilize':
        alg_str = ("followed by the 'stabilized' Kundu component "
                   "selection decision tree (Kundu et al., 2013)")
        RefLGR.info("Kundu, P., Brenowitz, N. D., Voon, V., Worbe, Y., "
                    "Vértes, P. E., Inati, S. J., ... & Bullmore, E. T. "
                    "(2013). Integrated strategy for improving functional "
                    "connectivity mapping using multiecho fMRI. Proceedings "
                    "of the National Academy of Sciences, 110(40), "
                    "16187-16192.")
    elif isinstance(algorithm, Number):
        alg_str = (
            "in which the number of components was determined based on a "
            "variance explained threshold")
    else:
        alg_str = (
            "based on the PCA component estimation with a Moving Average"
            "(stationary Gaussian) process (Li et al., 2007)")
        RefLGR.info("Li, Y.O., Adalı, T. and Calhoun, V.D., (2007). "
                    "Estimating the number of independent components for "
                    "functional magnetic resonance imaging data. "
                    "Human brain mapping, 28(11), pp.1251-1266.")

    RepLGR.info("Principal component analysis {0} was applied to "
                "the optimally combined data for dimensionality "
                "reduction.".format(alg_str))

    n_samp, n_echos, n_vols = data_cat.shape

    LGR.info('Computing PCA of optimally combined multi-echo data')
    data = data_oc[mask, :]

    data_z = ((data.T - data.T.mean(axis=0)) /
              data.T.std(axis=0)).T  # var normalize ts
    data_z = (data_z -
              data_z.mean()) / data_z.std()  # var normalize everything

    if algorithm in ['mdl', 'aic', 'kic']:
        data_img = io.new_nii_like(ref_img, utils.unmask(data, mask))
        mask_img = io.new_nii_like(ref_img, mask.astype(int))
        voxel_comp_weights, varex, varex_norm, comp_ts = ma_pca.ma_pca(
            data_img, mask_img, algorithm)
    elif isinstance(algorithm, Number):
        ppca = PCA(copy=False, n_components=algorithm, svd_solver="full")
        ppca.fit(data_z)
        comp_ts = ppca.components_.T
        varex = ppca.explained_variance_
        voxel_comp_weights = np.dot(np.dot(data_z, comp_ts),
                                    np.diag(1. / varex))
        varex_norm = varex / varex.sum()
    elif low_mem:
        voxel_comp_weights, varex, comp_ts = low_mem_pca(data_z)
        varex_norm = varex / varex.sum()
    else:
        ppca = PCA(copy=False, n_components=(n_vols - 1))
        ppca.fit(data_z)
        comp_ts = ppca.components_.T
        varex = ppca.explained_variance_
        voxel_comp_weights = np.dot(np.dot(data_z, comp_ts),
                                    np.diag(1. / varex))
        varex_norm = varex / varex.sum()

    # Compute Kappa and Rho for PCA comps
    # Normalize each component's time series
    vTmixN = stats.zscore(comp_ts, axis=0)
    comptable, _, _, _ = metrics.dependence_metrics(data_cat,
                                                    data_oc,
                                                    comp_ts,
                                                    adaptive_mask,
                                                    tes,
                                                    ref_img,
                                                    reindex=False,
                                                    mmixN=vTmixN,
                                                    algorithm=None,
                                                    label='mepca_',
                                                    out_dir=out_dir,
                                                    verbose=verbose)

    # varex_norm from PCA overrides varex_norm from dependence_metrics,
    # but we retain the original
    comptable['estimated normalized variance explained'] = \
        comptable['normalized variance explained']
    comptable['normalized variance explained'] = varex_norm

    # write component maps to 4D image
    comp_ts_z = stats.zscore(comp_ts, axis=0)
    comp_maps = utils.unmask(computefeats2(data_oc, comp_ts_z, mask), mask)
    io.filewrite(comp_maps, op.join(out_dir, 'pca_components.nii.gz'), ref_img)

    # Select components using decision tree
    if algorithm == 'kundu':
        comptable = kundu_tedpca(comptable,
                                 n_echos,
                                 kdaw,
                                 rdaw,
                                 stabilize=False)
    elif algorithm == 'kundu-stabilize':
        comptable = kundu_tedpca(comptable,
                                 n_echos,
                                 kdaw,
                                 rdaw,
                                 stabilize=True)
    else:
        alg_str = "variance explained-based" if isinstance(
            algorithm, Number) else algorithm
        LGR.info('Selected {0} components with {1} dimensionality '
                 'detection'.format(comptable.shape[0], alg_str))
        comptable['classification'] = 'accepted'
        comptable['rationale'] = ''

    # Save decomposition
    comp_names = [
        io.add_decomp_prefix(comp,
                             prefix='pca',
                             max_value=comptable.index.max())
        for comp in comptable.index.values
    ]

    mixing_df = pd.DataFrame(data=comp_ts, columns=comp_names)
    mixing_df.to_csv(op.join(out_dir, 'pca_mixing.tsv'), sep='\t', index=False)

    comptable['Description'] = 'PCA fit to optimally combined data.'
    mmix_dict = {}
    mmix_dict['Method'] = ('Principal components analysis implemented by '
                           'sklearn. Components are sorted by variance '
                           'explained in descending order. '
                           'Component signs are flipped to best match the '
                           'data.')
    io.save_comptable(comptable,
                      op.join(out_dir, 'pca_decomposition.json'),
                      label='pca',
                      metadata=mmix_dict)

    acc = comptable[comptable.classification == 'accepted'].index.values
    n_components = acc.size
    voxel_kept_comp_weighted = (voxel_comp_weights[:, acc] * varex[None, acc])
    kept_data = np.dot(voxel_kept_comp_weighted, comp_ts[:, acc].T)

    kept_data = stats.zscore(kept_data,
                             axis=1)  # variance normalize time series
    kept_data = stats.zscore(kept_data,
                             axis=None)  # variance normalize everything

    return kept_data, n_components
Beispiel #25
0
def selcomps(seldict, mmix, mask, ref_img, manacc, n_echos, t2s, s0, olevel=2,
             oversion=99, filecsdata=True, savecsdiag=True, strict_mode=False):
    """
    Labels components in `mmix`

    Parameters
    ----------
    seldict : :obj:`dict`
        As output from `fitmodels_direct`
    mmix : (C x T) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the number of volumes in the original data
    mask : (S,) array_like
        Boolean mask array
    ref_img : str or img_like
        Reference image to dictate how outputs are saved to disk
    manacc : list
        Comma-separated list of indices of manually accepted components
    n_echos : int
        Number of echos in original data
    t2s : (S,) array_like
    s0 : (S,) array_like
    olevel : int, optional
        Default: 2
    oversion : int, optional
        Default: 99
    filecsdata: bool, optional
        Default: False
    savecsdiag: bool, optional
        Default: True
    strict_mode: bool, optional
        Default: False

    Returns
    -------
    acc : list
        Indices of accepted (BOLD) components in `mmix`
    rej : list
        Indices of rejected (non-BOLD) components in `mmix`
    midk : list
        Indices of mid-K (questionable) components in `mmix`
    ign : list
        Indices of ignored components in `mmix`
    """

    if filecsdata:
        import bz2
        if seldict is not None:
            LGR.info('Saving component selection data')
            with bz2.BZ2File('compseldata.pklbz', 'wb') as csstate_f:
                pickle.dump(seldict, csstate_f)
        else:
            try:
                with bz2.BZ2File('compseldata.pklbz', 'rb') as csstate_f:
                    seldict = pickle.load(csstate_f)
            except FileNotFoundError:
                LGR.warning('Failed to load component selection data')
                return None

    # List of components
    midk = []
    ign = []
    nc = np.arange(len(seldict['Kappas']))
    ncl = np.arange(len(seldict['Kappas']))

    # If user has specified components to accept manually
    if manacc:
        acc = sorted([int(vv) for vv in manacc.split(',')])
        midk = []
        rej = sorted(np.setdiff1d(ncl, acc))
        return acc, rej, midk, []  # Add string for ign

    """
    Do some tallies for no. of significant voxels
    """
    countsigFS0 = seldict['F_S0_clmaps'].sum(0)
    countsigFR2 = seldict['F_R2_clmaps'].sum(0)
    countnoise = np.zeros(len(nc))

    """
    Make table of dice values
    """
    dice_tbl = np.zeros([nc.shape[0], 2])
    for ii in ncl:
        dice_FR2 = utils.dice(utils.unmask(seldict['Br_clmaps_R2'][:, ii], mask)[t2s != 0],
                              seldict['F_R2_clmaps'][:, ii])
        dice_FS0 = utils.dice(utils.unmask(seldict['Br_clmaps_S0'][:, ii], mask)[t2s != 0],
                              seldict['F_S0_clmaps'][:, ii])
        dice_tbl[ii, :] = [dice_FR2, dice_FS0]  # step 3a here and above
    dice_tbl[np.isnan(dice_tbl)] = 0

    """
    Make table of noise gain
    """
    tt_table = np.zeros([len(nc), 4])
    counts_FR2_Z = np.zeros([len(nc), 2])
    for ii in nc:
        comp_noise_sel = utils.andb([np.abs(seldict['Z_maps'][:, ii]) > 1.95,
                                     seldict['Z_clmaps'][:, ii] == 0]) == 2
        countnoise[ii] = np.array(comp_noise_sel, dtype=np.int).sum()
        noise_FR2_Z_mask = utils.unmask(comp_noise_sel, mask)[t2s != 0]
        noise_FR2_Z = np.log10(np.unique(seldict['F_R2_maps'][noise_FR2_Z_mask, ii]))
        signal_FR2_Z_mask = utils.unmask(seldict['Z_clmaps'][:, ii], mask)[t2s != 0] == 1
        signal_FR2_Z = np.log10(np.unique(seldict['F_R2_maps'][signal_FR2_Z_mask, ii]))
        counts_FR2_Z[ii, :] = [len(signal_FR2_Z), len(noise_FR2_Z)]
        try:
            ttest = stats.ttest_ind(signal_FR2_Z, noise_FR2_Z, equal_var=True)
            # avoid DivideByZero RuntimeWarning
            if signal_FR2_Z.size > 0 and noise_FR2_Z.size > 0:
                mwu = stats.norm.ppf(stats.mannwhitneyu(signal_FR2_Z, noise_FR2_Z)[1])
            else:
                mwu = -np.inf
            tt_table[ii, 0] = np.abs(mwu) * ttest[0] / np.abs(ttest[0])
            tt_table[ii, 1] = ttest[1]
        except Exception:  # TODO: what is the error that might be caught here?
            pass
    tt_table[np.isnan(tt_table)] = 0
    tt_table[np.isinf(tt_table[:, 0]), 0] = np.percentile(tt_table[~np.isinf(tt_table[:, 0]), 0],
                                                          98)

    # Time series derivative kurtosis
    mmix_dt = (mmix[:-1] - mmix[1:])
    mmix_kurt = stats.kurtosis(mmix_dt)
    mmix_std = np.std(mmix_dt, axis=0)

    """
    Step 1: Reject anything that's obviously an artifact
    a. Estimate a null variance
    """
    LGR.debug('Rejecting gross artifacts based on Rho/Kappa values and S0/R2 counts')
    rej = ncl[utils.andb([seldict['Rhos'] > seldict['Kappas'], countsigFS0 > countsigFR2]) > 0]
    ncl = np.setdiff1d(ncl, rej)

    """
    Step 2: Compute 3-D spatial FFT of Beta maps to detect high-spatial
    frequency artifacts
    """
    LGR.debug('Computing 3D spatial FFT of beta maps to detect high-spatial frequency artifacts')
    # spatial information is important so for NIFTI we convert back to 3D space
    if utils.get_dtype(ref_img) == 'NIFTI':
        dim1 = np.prod(ref_img.shape[:2])
    else:
        dim1 = mask.shape[0]
    fproj_arr = np.zeros([dim1, len(nc)])
    fproj_arr_val = np.zeros([dim1, len(nc)])
    spr = []
    fdist = []
    for ii in nc:
        # convert data back to 3D array
        if utils.get_dtype(ref_img) == 'NIFTI':
            tproj = utils.new_nii_like(ref_img, utils.unmask(seldict['PSC'],
                                                             mask)[:, ii]).get_data()
        else:
            tproj = utils.unmask(seldict['PSC'], mask)[:, ii]
        fproj = np.fft.fftshift(np.abs(np.fft.rfftn(tproj)))
        fproj_z = fproj.max(axis=2)
        fproj[fproj == fproj.max()] = 0
        fproj_arr[:, ii] = stats.rankdata(fproj_z.flatten())
        fproj_arr_val[:, ii] = fproj_z.flatten()
        spr.append(np.array(fproj_z > fproj_z.max() / 4, dtype=np.int).sum())
        fprojr = np.array([fproj, fproj[:, :, ::-1]]).max(0)
        fdist.append(np.max([utils.fitgaussian(fproj.max(jj))[3:].max() for
                     jj in range(fprojr.ndim)]))
    fdist = np.array(fdist)
    spr = np.array(spr)

    """
    Step 3: Create feature space of component properties
    """
    LGR.debug('Creating feature space of component properties')
    fdist_pre = fdist.copy()
    fdist_pre[fdist > np.median(fdist) * 3] = np.median(fdist) * 3
    fdist_z = (fdist_pre - np.median(fdist_pre)) / fdist_pre.std()
    spz = (spr-spr.mean())/spr.std()
    Tz = (tt_table[:, 0] - tt_table[:, 0].mean()) / tt_table[:, 0].std()
    varex_ = np.log(seldict['varex'])
    Vz = (varex_-varex_.mean()) / varex_.std()
    Rz = (seldict['Rhos'] - seldict['Rhos'].mean()) / seldict['Rhos'].std()
    Ktz = np.log(seldict['Kappas']) / 2
    Ktz = (Ktz-Ktz.mean()) / Ktz.std()
    Rtz = np.log(seldict['Rhos']) / 2
    Rtz = (Rtz-Rtz.mean())/Rtz.std()
    KRr = stats.zscore(np.log(seldict['Kappas']) / np.log(seldict['Rhos']))
    cnz = (countnoise-countnoise.mean()) / countnoise.std()
    Dz = stats.zscore(np.arctanh(dice_tbl[:, 0] + 0.001))
    fz = np.array([Tz, Vz, Ktz, KRr, cnz, Rz, mmix_kurt, fdist_z])

    """
    Step 3: Make initial guess of where BOLD components are and use DBSCAN
    to exclude noise components and find a sample set of 'good' components
    """
    LGR.debug('Making initial guess of BOLD components')
    # epsmap is [index,level of overlap with dicemask,
    # number of high Rho components]
    F05, F025, F01 = utils.getfbounds(n_echos)
    epsmap = []
    Rhos_sorted = np.array(sorted(seldict['Rhos']))[::-1]
    # Make an initial guess as to number of good components based on
    # consensus of control points across Rhos and Kappas
    KRcutguesses = [getelbow_mod(seldict['Rhos']), getelbow_cons(seldict['Rhos']),
                    getelbow_aggr(seldict['Rhos']), getelbow_mod(seldict['Kappas']),
                    getelbow_cons(seldict['Kappas']), getelbow_aggr(seldict['Kappas'])]
    Khighelbowval = stats.scoreatpercentile([getelbow_mod(seldict['Kappas'], val=True),
                                             getelbow_cons(seldict['Kappas'], val=True),
                                             getelbow_aggr(seldict['Kappas'], val=True)] +
                                            list(utils.getfbounds(n_echos)),
                                            75, interpolation_method='lower')
    KRcut = np.median(KRcutguesses)

    # only use exclusive when inclusive is extremely inclusive - double KRcut
    cond1 = getelbow_cons(seldict['Kappas']) > KRcut * 2
    cond2 = getelbow_mod(seldict['Kappas'], val=True) < F01
    if cond1 and cond2:
        Kcut = getelbow_mod(seldict['Kappas'], val=True)
    else:
        Kcut = getelbow_cons(seldict['Kappas'], val=True)
    # only use inclusive when exclusive is extremely exclusive - half KRcut
    # (remember for Rho inclusive is higher, so want both Kappa and Rho
    # to defaut to lower)
    if getelbow_cons(seldict['Rhos']) > KRcut * 2:
        Rcut = getelbow_mod(seldict['Rhos'], val=True)
    # for above, consider something like:
    # min([getelbow_mod(Rhos,True),sorted(Rhos)[::-1][KRguess] ])
    else:
        Rcut = getelbow_cons(seldict['Rhos'], val=True)
    if Rcut > Kcut:
        Kcut = Rcut  # Rcut should never be higher than Kcut
    KRelbow = utils.andb([seldict['Kappas'] > Kcut, seldict['Rhos'] < Rcut])
    # Make guess of Kundu et al 2011 plus remove high frequencies,
    # generally high variance, and high variance given low Kappa
    tt_lim = stats.scoreatpercentile(tt_table[tt_table[:, 0] > 0, 0],
                                     75, interpolation_method='lower') / 3
    KRguess = np.setdiff1d(np.setdiff1d(nc[KRelbow == 2], rej),
                           np.union1d(nc[tt_table[:, 0] < tt_lim],
                           np.union1d(np.union1d(nc[spz > 1],
                                                 nc[Vz > 2]),
                                      nc[utils.andb([seldict['varex'] > 0.5 *
                                         sorted(seldict['varex'])[::-1][int(KRcut)],
                                                seldict['Kappas'] < 2*Kcut]) == 2])))
    guessmask = np.zeros(len(nc))
    guessmask[KRguess] = 1

    # Throw lower-risk bad components out
    rejB = ncl[utils.andb([tt_table[ncl, 0] < 0,
                           seldict['varex'][ncl] > np.median(seldict['varex']), ncl > KRcut]) == 3]
    rej = np.union1d(rej, rejB)
    ncl = np.setdiff1d(ncl, rej)

    LGR.debug('Using DBSCAN to find optimal set of "good" BOLD components')
    for ii in range(20000):
        eps = .005 + ii * .005
        db = DBSCAN(eps=eps, min_samples=3).fit(fz.T)

        # it would be great to have descriptive names, here
        # DBSCAN found at least three non-noisy clusters
        cond1 = db.labels_.max() > 1
        # DBSCAN didn't detect more classes than the total # of components / 6
        cond2 = db.labels_.max() < len(nc) / 6
        # TODO: confirm if 0 is a special label for DBSCAN
        # my intuition here is that we're confirming DBSCAN labelled previously
        # rejected components as noise (i.e., no overlap between `rej` and
        # labelled DBSCAN components)
        cond3 = np.intersect1d(rej, nc[db.labels_ == 0]).shape[0] == 0
        # DBSCAN labelled less than half of the total components as noisy
        cond4 = np.array(db.labels_ == -1, dtype=int).sum() / float(len(nc)) < .5

        if cond1 and cond2 and cond3 and cond4:
            epsmap.append([ii, utils.dice(guessmask, db.labels_ == 0),
                           np.intersect1d(nc[db.labels_ == 0],
                           nc[seldict['Rhos'] > getelbow_mod(Rhos_sorted,
                                                             val=True)]).shape[0]])
        db = None

    epsmap = np.array(epsmap)
    LGR.debug('Found DBSCAN solutions for {}/20000 eps resolutions'.format(len(epsmap)))
    group0 = []
    dbscanfailed = False
    if len(epsmap) != 0:
        # Select index that maximizes Dice with guessmask but first
        # minimizes number of higher Rho components
        ii = int(epsmap[np.argmax(epsmap[epsmap[:, 2] == np.min(epsmap[:, 2]), 1], 0), 0])
        LGR.debug('Component selection tuning: {:.05f}'.format(epsmap[:, 1].max()))
        db = DBSCAN(eps=.005+ii*.005, min_samples=3).fit(fz.T)
        ncl = nc[db.labels_ == 0]
        ncl = np.setdiff1d(ncl, rej)
        ncl = np.setdiff1d(ncl, ncl[ncl > len(nc) - len(rej)])
        group0 = ncl.copy()
        group_n1 = nc[db.labels_ == -1]
        to_clf = np.setdiff1d(nc, np.union1d(ncl, rej))
    if len(group0) == 0 or len(group0) < len(KRguess) * .5:
        dbscanfailed = True
        LGR.debug('DBSCAN guess failed; using elbow guess method instead')
        ncl = np.setdiff1d(np.setdiff1d(nc[KRelbow == 2], rej),
                           np.union1d(nc[tt_table[:, 0] < tt_lim],
                           np.union1d(np.union1d(nc[spz > 1],
                                      nc[Vz > 2]),
                                      nc[utils.andb([seldict['varex'] > 0.5 *
                                                     sorted(seldict['varex'])[::-1][int(KRcut)],
                                                     seldict['Kappas'] < 2 * Kcut]) == 2])))
        group0 = ncl.copy()
        group_n1 = []
        to_clf = np.setdiff1d(nc, np.union1d(group0, rej))
    if len(group0) < 2 or (len(group0) < 4 and float(len(rej))/len(group0) > 3):
        LGR.warning('Extremely limited reliable BOLD signal space! '
                    'Not filtering components beyond BOLD/non-BOLD guesses.')
        midkfailed = True
        min_acc = np.array([])
        if len(group0) != 0:
            # For extremes, building in a 20% tolerance
            toacc_hi = np.setdiff1d(nc[utils.andb([fdist <= np.max(fdist[group0]),
                                                   seldict['Rhos'] < F025, Vz > -2]) == 3],
                                    np.union1d(group0, rej))
            min_acc = np.union1d(group0, toacc_hi)
            to_clf = np.setdiff1d(nc, np.union1d(min_acc, rej))
        diagstep_keys = ['Rejected components', 'Kappa-Rho cut point',
                         'Kappa cut point', 'Rho cut point', 'DBSCAN failed to converge',
                         'Mid-Kappa failed (limited BOLD signal)', 'Kappa-Rho guess',
                         'min_acc', 'toacc_hi']
        diagstep_vals = [rej.tolist(), KRcut, Kcut, Rcut, dbscanfailed,
                         midkfailed, KRguess.tolist(), min_acc.tolist(), toacc_hi.tolist()]

        with open('csstepdata.json', 'w') as ofh:
            json.dump(dict(zip(diagstep_keys, diagstep_vals)), ofh, indent=4, sort_keys=True)
        return list(sorted(min_acc)), list(sorted(rej)), [], list(sorted(to_clf))

    # Find additional components to reject based on Dice - doing this here
    # since Dice is a little unstable, need to reference group0
    rej_supp = []
    dice_rej = False
    if not dbscanfailed and len(rej) + len(group0) < 0.75 * len(nc):
        dice_rej = True
        rej_supp = np.setdiff1d(np.setdiff1d(np.union1d(rej,
                                                        nc[dice_tbl[nc, 0] <= dice_tbl[nc, 1]]),
                                             group0), group_n1)
        rej = np.union1d(rej, rej_supp)

    # Temporal features
    # larger is worse - spike
    mmix_kurt_z = (mmix_kurt-mmix_kurt[group0].mean()) / mmix_kurt[group0].std()
    # smaller is worse - drift
    mmix_std_z = -1 * ((mmix_std-mmix_std[group0].mean()) / mmix_std[group0].std())
    mmix_kurt_z_max = np.max([mmix_kurt_z, mmix_std_z], 0)

    """
    Step 2: Classifiy midk and ignore using separte SVMs for
    different variance regimes
    # To render hyperplane:
    min_x = np.min(spz2);max_x=np.max(spz2)
    # plotting separating hyperplane
        ww = clf_.coef_[0]
        aa = -ww[0] / ww[1]
        # make sure the next line is long enough
        xx = np.linspace(min_x - 2, max_x + 2)
        yy = aa * xx - (clf_.intercept_[0]) / ww[1]
        plt.plot(xx, yy, '-')
    """
    LGR.debug('Attempting to classify midk components')
    # Tried getting rid of accepting based on SVM altogether,
    # now using only rejecting
    toacc_hi = np.setdiff1d(nc[utils.andb([fdist <= np.max(fdist[group0]),
                               seldict['Rhos'] < F025, Vz > -2]) == 3],
                            np.union1d(group0, rej))
    toacc_lo = np.intersect1d(to_clf,
                              nc[utils.andb([spz < 1, Rz < 0, mmix_kurt_z_max < 5,
                                             Dz > -1, Tz > -1, Vz < 0, seldict['Kappas'] >= F025,
                                             fdist < 3 * np.percentile(fdist[group0], 98)]) == 8])
    midk_clf, clf_ = do_svm(fproj_arr_val[:, np.union1d(group0, rej)].T,
                            [0] * len(group0) + [1] * len(rej),
                            fproj_arr_val[:, to_clf].T,
                            svmtype=2)
    midk = np.setdiff1d(to_clf[utils.andb([midk_clf == 1, seldict['varex'][to_clf] >
                                           np.median(seldict['varex'][group0])]) == 2],
                        np.union1d(toacc_hi, toacc_lo))
    # only use SVM to augment toacc_hi only if toacc_hi isn't already
    # conflicting with SVM choice
    if len(np.intersect1d(to_clf[utils.andb([midk_clf == 1,
                                             Vz[to_clf] > 0]) == 2], toacc_hi)) == 0:
        svm_acc_fail = True
        toacc_hi = np.union1d(toacc_hi, to_clf[midk_clf == 0])
    else:
        svm_acc_fail = False

    """
    Step 3: Compute variance associated with low T2* areas
    (e.g. draining veins and low T2* areas)
    # To write out veinmask
    veinout = np.zeros(t2s.shape)
    veinout[t2s!=0] = veinmaskf
    utils.filewrite(veinout, 'veinmaskf', ref_img)
    veinBout = utils.unmask(veinmaskB, mask)
    utils.filewrite(veinBout, 'veins50', ref_img)
    """
    LGR.debug('Computing variance associated with low T2* areas (e.g., draining veins)')
    tsoc_B_Zcl = np.zeros(seldict['tsoc_B'].shape)
    tsoc_B_Zcl[seldict['Z_clmaps'] != 0] = np.abs(seldict['tsoc_B'])[seldict['Z_clmaps'] != 0]
    sig_B = [stats.scoreatpercentile(tsoc_B_Zcl[tsoc_B_Zcl[:, ii] != 0, ii], 25)
             if len(tsoc_B_Zcl[tsoc_B_Zcl[:, ii] != 0, ii]) != 0
             else 0 for ii in nc]
    sig_B = np.abs(seldict['tsoc_B']) > np.tile(sig_B, [seldict['tsoc_B'].shape[0], 1])

    veinmask = utils.andb([t2s < stats.scoreatpercentile(t2s[t2s != 0], 15,
                                                         interpolation_method='lower'),
                           t2s != 0]) == 2
    veinmaskf = veinmask[mask]
    veinR = np.array(sig_B[veinmaskf].sum(0),
                     dtype=float) / sig_B[~veinmaskf].sum(0)
    veinR[np.isnan(veinR)] = 0

    veinc = np.union1d(rej, midk)
    rej_veinRZ = ((veinR-veinR[veinc].mean())/veinR[veinc].std())[veinc]
    rej_veinRZ[rej_veinRZ < 0] = 0
    rej_veinRZ[countsigFR2[veinc] > np.array(veinmaskf, dtype=int).sum()] = 0
    t2s_lim = [stats.scoreatpercentile(t2s[t2s != 0], 50,
                                       interpolation_method='lower'),
               stats.scoreatpercentile(t2s[t2s != 0], 80,
                                       interpolation_method='lower') / 2]
    phys_var_zs = []
    for t2sl_i in range(len(t2s_lim)):
        t2sl = t2s_lim[t2sl_i]
        veinW = sig_B[:, veinc]*np.tile(rej_veinRZ, [sig_B.shape[0], 1])
        veincand = utils.unmask(utils.andb([s0[t2s != 0] < np.median(s0[t2s != 0]),
                                t2s[t2s != 0] < t2sl]) >= 1,
                                t2s != 0)[mask]
        veinW[~veincand] = 0
        invein = veinW.sum(axis=1)[(utils.unmask(veinmaskf, mask) *
                                    utils.unmask(veinW.sum(axis=1) > 1, mask))[mask]]
        minW = 10 * (np.log10(invein).mean()) - 1 * 10**(np.log10(invein).std())
        veinmaskB = veinW.sum(axis=1) > minW
        tsoc_Bp = seldict['tsoc_B'].copy()
        tsoc_Bp[tsoc_Bp < 0] = 0
        vvex = np.array([(tsoc_Bp[veinmaskB, ii]**2.).sum() /
                         (tsoc_Bp[:, ii]**2.).sum() for ii in nc])
        group0_res = np.intersect1d(KRguess, group0)
        phys_var_zs.append((vvex - vvex[group0_res].mean()) / vvex[group0_res].std())
        veinBout = utils.unmask(veinmaskB, mask)
        utils.filewrite(veinBout.astype(float), 'veins_l%i' % t2sl_i, ref_img)

    # Mask to sample veins
    phys_var_z = np.array(phys_var_zs).max(0)
    Vz2 = (varex_ - varex_[group0].mean())/varex_[group0].std()

    """
    Step 4: Learn joint TE-dependence spatial and temporal models to move
    remaining artifacts to ignore class
    """
    LGR.debug('Learning joint TE-dependence spatial/temporal models to ignore remaining artifacts')

    to_ign = []

    minK_ign = np.max([F05, getelbow_cons(seldict['Kappas'], val=True)])
    newcest = len(group0) + len(toacc_hi[seldict['Kappas'][toacc_hi] > minK_ign])
    phys_art = np.setdiff1d(nc[utils.andb([phys_var_z > 3.5,
                                           seldict['Kappas'] < minK_ign]) == 2], group0)
    rank_diff = stats.rankdata(phys_var_z) - stats.rankdata(seldict['Kappas'])
    phys_art = np.union1d(np.setdiff1d(nc[utils.andb([phys_var_z > 2, rank_diff > newcest / 2,
                                                      Vz2 > -1]) == 3],
                                       group0), phys_art)
    # Want to replace field_art with an acf/SVM based approach
    # instead of a kurtosis/filter one
    field_art = np.setdiff1d(nc[utils.andb([mmix_kurt_z_max > 5,
                                            seldict['Kappas'] < minK_ign]) == 2], group0)
    field_art = np.union1d(np.setdiff1d(nc[utils.andb([mmix_kurt_z_max > 2,
                                           (stats.rankdata(mmix_kurt_z_max) -
                                            stats.rankdata(seldict['Kappas'])) > newcest / 2,
                                           Vz2 > 1, seldict['Kappas'] < F01]) == 4],
                                        group0), field_art)
    field_art = np.union1d(np.setdiff1d(nc[utils.andb([mmix_kurt_z_max > 3,
                                                       Vz2 > 3, seldict['Rhos'] >
                                                       np.percentile(seldict['Rhos'][group0],
                                                                     75)]) == 3],
                                        group0), field_art)
    field_art = np.union1d(np.setdiff1d(nc[utils.andb([mmix_kurt_z_max > 5, Vz2 > 5]) == 2],
                                        group0), field_art)
    misc_art = np.setdiff1d(nc[utils.andb([(stats.rankdata(Vz) -
                                            stats.rankdata(Ktz)) > newcest / 2,
                            seldict['Kappas'] < Khighelbowval]) == 2], group0)
    ign_cand = np.unique(list(field_art)+list(phys_art)+list(misc_art))
    midkrej = np.union1d(midk, rej)
    to_ign = np.setdiff1d(list(ign_cand), midkrej)
    toacc = np.union1d(toacc_hi, toacc_lo)
    ncl = np.setdiff1d(np.union1d(ncl, toacc), np.union1d(to_ign, midkrej))
    ign = np.setdiff1d(nc, list(ncl) + list(midk) + list(rej))
    orphan = np.setdiff1d(nc, list(ncl) + list(to_ign) + list(midk) + list(rej))

    # Last ditch effort to save some transient components
    if not strict_mode:
        Vz3 = (varex_ - varex_[ncl].mean())/varex_[ncl].std()
        ncl = np.union1d(ncl, np.intersect1d(orphan,
                                             nc[utils.andb([seldict['Kappas'] > F05,
                                                            seldict['Rhos'] < F025,
                                                            seldict['Kappas'] > seldict['Rhos'],
                                                            Vz3 <= -1,
                                                            Vz3 > -3,
                                                            mmix_kurt_z_max < 2.5]) == 6]))
        ign = np.setdiff1d(nc, list(ncl)+list(midk)+list(rej))
        orphan = np.setdiff1d(nc, list(ncl) + list(to_ign) + list(midk) + list(rej))

    if savecsdiag:
        diagstep_keys = ['Rejected components', 'Kappa-Rho cut point', 'Kappa cut',
                         'Rho cut', 'DBSCAN failed to converge', 'Kappa-Rho guess',
                         'Dice rejected', 'rej_supp', 'to_clf',
                         'Mid-kappa components', 'svm_acc_fail', 'toacc_hi', 'toacc_lo',
                         'Field artifacts', 'Physiological artifacts',
                         'Miscellaneous artifacts', 'ncl', 'Ignored components']
        diagstep_vals = [rej.tolist(), KRcut, Kcut, Rcut, dbscanfailed,
                         KRguess.tolist(), dice_rej, rej_supp.tolist(),
                         to_clf.tolist(), midk.tolist(), svm_acc_fail,
                         toacc_hi.tolist(), toacc_lo.tolist(),
                         field_art.tolist(), phys_art.tolist(),
                         misc_art.tolist(), ncl.tolist(), ign.tolist()]

        with open('csstepdata.json', 'w') as ofh:
            json.dump(dict(zip(diagstep_keys, diagstep_vals)), ofh, indent=4, sort_keys=True)
        allfz = np.array([Tz, Vz, Ktz, KRr, cnz, Rz, mmix_kurt, fdist_z])
        np.savetxt('csdata.txt', allfz)

    return list(sorted(ncl)), list(sorted(rej)), list(sorted(midk)), list(sorted(ign))
Beispiel #26
0
def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img,
                     fout=None, reindex=False, mmixN=None, full_sel=True):
    """
    Fit models directly.

    Parameters
    ----------
    catd : (S x E x T) array_like
        Input data, where `S` is samples, `E` is echos, and `T` is time
    mmix : (T x C) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `catd`
    mask : (S,) array_like
        Boolean mask array
    t2s : (S,) array_like
    t2sG : (S,) array_like
    tes : list
        List of echo times associated with `catd`, in milliseconds
    combmode : {'t2s', 'ste'} str
        How optimal combination of echos should be made, where 't2s' indicates
        using the method of Posse 1999 and 'ste' indicates using the method of
        Poser 2006
    ref_img : str or img_like
        Reference image to dictate how outputs are saved to disk
    fout : bool
        Whether to output per-component TE-dependence maps. Default: None
    reindex : bool, optional
        Default: False
    mmixN : array_like, optional
        Default: None
    full_sel : bool, optional
        Whether to perform selection of components based on Rho/Kappa scores.
        Default: True

    Returns
    -------
    seldict : dict
    comptab : (N x 5) :obj:`numpy.ndarray`
        Array with columns denoting (1) index of component, (2) Kappa score of
        component, (3) Rho score of component, (4) variance explained by
        component, and (5) normalized variance explained bycomponent
    betas : :obj:`numpy.ndarray`
    mmix_new : :obj:`numpy.ndarray`
    """

    # compute optimal combination of raw data
    tsoc = model.make_optcom(catd, t2sG, tes, mask, combmode, verbose=False).astype(float)[mask]
    # demean optimal combination
    tsoc_dm = tsoc - tsoc.mean(axis=-1, keepdims=True)

    # compute un-normalized weight dataset (features)
    if mmixN is None:
        mmixN = mmix
    WTS = computefeats2(utils.unmask(tsoc, mask), mmixN, mask, normalize=False)

    # compute PSC dataset - shouldn't have to refit data
    tsoc_B = get_coeffs(utils.unmask(tsoc_dm, mask), mask, mmix)[mask]
    tsoc_Babs = np.abs(tsoc_B)
    PSC = tsoc_B / tsoc.mean(axis=-1, keepdims=True) * 100

    # compute skews to determine signs based on unnormalized weights,
    # correct mmix & WTS signs based on spatial distribution tails
    signs = stats.skew(WTS, axis=0)
    signs /= np.abs(signs)
    mmix = mmix.copy()
    mmix *= signs
    WTS *= signs
    PSC *= signs
    totvar = (tsoc_B**2).sum()
    totvar_norm = (WTS**2).sum()

    # compute Betas and means over TEs for TE-dependence analysis
    betas = get_coeffs(catd, np.repeat(mask[:, np.newaxis], len(tes), axis=1), mmix)
    n_samp, n_echos, n_components = betas.shape
    n_voxels = mask.sum()
    n_data_voxels = (t2s != 0).sum()
    mu = catd.mean(axis=-1, dtype=float)
    tes = np.reshape(tes, (n_echos, 1))
    fmin, fmid, fmax = utils.getfbounds(n_echos)

    # mask arrays
    mumask = mu[t2s != 0]
    t2smask = t2s[t2s != 0]
    betamask = betas[t2s != 0]

    # set up Xmats
    X1 = mumask.T  # Model 1
    X2 = np.tile(tes, (1, n_data_voxels)) * mumask.T / t2smask.T  # Model 2

    # tables for component selection
    Kappas = np.zeros([n_components])
    Rhos = np.zeros([n_components])
    varex = np.zeros([n_components])
    varex_norm = np.zeros([n_components])
    Z_maps = np.zeros([n_voxels, n_components])
    F_R2_maps = np.zeros([n_data_voxels, n_components])
    F_S0_maps = np.zeros([n_data_voxels, n_components])
    Z_clmaps = np.zeros([n_voxels, n_components])
    F_R2_clmaps = np.zeros([n_data_voxels, n_components])
    F_S0_clmaps = np.zeros([n_data_voxels, n_components])
    Br_clmaps_R2 = np.zeros([n_voxels, n_components])
    Br_clmaps_S0 = np.zeros([n_voxels, n_components])

    LGR.info('Fitting TE- and S0-dependent models to components')
    for i in range(n_components):
        # size of B is (n_components, nx*ny*nz)
        B = np.atleast_3d(betamask)[:, :, i].T
        alpha = (np.abs(B)**2).sum(axis=0)
        varex[i] = (tsoc_B[:, i]**2).sum() / totvar * 100.
        varex_norm[i] = (utils.unmask(WTS, mask)[t2s != 0][:, i]**2).sum() / totvar_norm * 100.

        # S0 Model
        coeffs_S0 = (B * X1).sum(axis=0) / (X1**2).sum(axis=0)
        SSE_S0 = (B - X1 * np.tile(coeffs_S0, (n_echos, 1)))**2
        SSE_S0 = SSE_S0.sum(axis=0)
        F_S0 = (alpha - SSE_S0) * 2 / (SSE_S0)
        F_S0_maps[:, i] = F_S0

        # R2 Model
        coeffs_R2 = (B * X2).sum(axis=0) / (X2**2).sum(axis=0)
        SSE_R2 = (B - X2 * np.tile(coeffs_R2, (n_echos, 1)))**2
        SSE_R2 = SSE_R2.sum(axis=0)
        F_R2 = (alpha - SSE_R2) * 2 / (SSE_R2)
        F_R2_maps[:, i] = F_R2

        # compute weights as Z-values
        wtsZ = (WTS[:, i] - WTS[:, i].mean()) / WTS[:, i].std()
        wtsZ[np.abs(wtsZ) > Z_MAX] = (Z_MAX * (np.abs(wtsZ) / wtsZ))[np.abs(wtsZ) > Z_MAX]
        Z_maps[:, i] = wtsZ

        # compute Kappa and Rho
        F_S0[F_S0 > F_MAX] = F_MAX
        F_R2[F_R2 > F_MAX] = F_MAX
        norm_weights = np.abs(np.squeeze(utils.unmask(wtsZ, mask)[t2s != 0]**2.))
        Kappas[i] = np.average(F_R2, weights=norm_weights)
        Rhos[i] = np.average(F_S0, weights=norm_weights)

    # tabulate component values
    comptab_pre = np.vstack([np.arange(n_components), Kappas, Rhos, varex, varex_norm]).T
    if reindex:
        # re-index all components in Kappa order
        comptab = comptab_pre[comptab_pre[:, 1].argsort()[::-1], :]
        Kappas = comptab[:, 1]
        Rhos = comptab[:, 2]
        varex = comptab[:, 3]
        varex_norm = comptab[:, 4]
        nnc = np.array(comptab[:, 0], dtype=np.int)
        mmix_new = mmix[:, nnc]
        F_S0_maps = F_S0_maps[:, nnc]
        F_R2_maps = F_R2_maps[:, nnc]
        Z_maps = Z_maps[:, nnc]
        WTS = WTS[:, nnc]
        PSC = PSC[:, nnc]
        tsoc_B = tsoc_B[:, nnc]
        tsoc_Babs = tsoc_Babs[:, nnc]
        comptab[:, 0] = np.arange(comptab.shape[0])
    else:
        comptab = comptab_pre
        mmix_new = mmix

    # full selection including clustering criteria
    seldict = None
    if full_sel:
        LGR.info('Performing spatial clustering of components')
        csize = np.max([int(n_voxels * 0.0005) + 5, 20])
        LGR.debug('Using minimum cluster size: {}'.format(csize))
        for i in range(n_components):
            # save out files
            out = np.zeros((n_samp, 4))
            out[:, 0] = np.squeeze(utils.unmask(PSC[:, i], mask))
            out[:, 1] = np.squeeze(utils.unmask(F_R2_maps[:, i], t2s != 0))
            out[:, 2] = np.squeeze(utils.unmask(F_S0_maps[:, i], t2s != 0))
            out[:, 3] = np.squeeze(utils.unmask(Z_maps[:, i], mask))

            if utils.get_dtype(ref_img) == 'GIFTI':
                continue  # TODO: pass through GIFTI file data as below

            ccimg = utils.new_nii_like(ref_img, out)

            # Do simple clustering on F
            sel = spatclust(ccimg, min_cluster_size=csize,
                            threshold=int(fmin), index=[1, 2], mask=(t2s != 0))
            F_R2_clmaps[:, i] = sel[:, 0]
            F_S0_clmaps[:, i] = sel[:, 1]
            countsigFR2 = F_R2_clmaps[:, i].sum()
            countsigFS0 = F_S0_clmaps[:, i].sum()

            # Do simple clustering on Z at p<0.05
            sel = spatclust(ccimg, min_cluster_size=csize,
                            threshold=1.95, index=3, mask=mask)
            Z_clmaps[:, i] = sel

            # Do simple clustering on ranked signal-change map
            spclust_input = utils.unmask(stats.rankdata(tsoc_Babs[:, i]), mask)
            spclust_input = utils.new_nii_like(ref_img, spclust_input)
            Br_clmaps_R2[:, i] = spatclust(spclust_input,
                                           min_cluster_size=csize,
                                           threshold=max(tsoc_Babs.shape)-countsigFR2,
                                           mask=mask)
            Br_clmaps_S0[:, i] = spatclust(spclust_input,
                                           min_cluster_size=csize,
                                           threshold=max(tsoc_Babs.shape)-countsigFS0,
                                           mask=mask)

        seldict = {}
        selvars = ['Kappas', 'Rhos', 'WTS', 'varex', 'Z_maps', 'F_R2_maps',
                   'F_S0_maps', 'Z_clmaps', 'F_R2_clmaps', 'F_S0_clmaps',
                   'tsoc_B', 'Br_clmaps_R2', 'Br_clmaps_S0', 'PSC']
        for vv in selvars:
            seldict[vv] = eval(vv)

    return seldict, comptab, betas, mmix_new
Beispiel #27
0
def fit_decay(data, tes, mask, adaptive_mask, fittype, report=True):
    """
    Fit voxel-wise monoexponential decay models to `data`

    Parameters
    ----------
    data : (S x E [x T]) array_like
        Multi-echo data array, where `S` is samples, `E` is echos, and `T` is
        time
    tes : (E,) :obj:`list`
        Echo times
    mask : (S,) array_like
        Boolean array indicating samples that are consistently (i.e., across
        time AND echoes) non-zero
    adaptive_mask : (S,) array_like
        Valued array indicating number of echos that have sufficient signal in
        given sample
    fittype : {loglin, curvefit}
        The type of model fit to use
    report : bool, optional
        Whether to log a description of this step or not. Default is True.

    Returns
    -------
    t2s_limited : (S,) :obj:`numpy.ndarray`
        Limited T2* map. The limited map only keeps the T2* values for data
        where there are at least two echos with good signal.
    s0_limited : (S,) :obj:`numpy.ndarray`
        Limited S0 map.  The limited map only keeps the S0 values for data
        where there are at least two echos with good signal.
    t2s_full : (S,) :obj:`numpy.ndarray`
        Full T2* map. For voxels affected by dropout, with good signal from
        only one echo, the full map uses the T2* estimate from the first two
        echoes.
    s0_full : (S,) :obj:`numpy.ndarray`
        Full S0 map. For voxels affected by dropout, with good signal from
        only one echo, the full map uses the S0 estimate from the first two
        echoes.

    Notes
    -----
    This function replaces infinite values in the :math:`T_2^*` map with 500 and
    :math:`T_2^*` values less than or equal to zero with 1.
    Additionally, very small :math:`T_2^*` values above zero are replaced with a floor
    value to prevent zero-division errors later on in the workflow.
    It also replaces NaN values in the :math:`S_0` map with 0.
    """
    if data.shape[1] != len(tes):
        raise ValueError(
            'Second dimension of data ({0}) does not match number '
            'of echoes provided (tes; {1})'.format(data.shape[1], len(tes)))
    elif not (data.shape[0] == mask.shape[0] == adaptive_mask.shape[0]):
        raise ValueError('First dimensions (number of samples) of data ({0}), '
                         'mask ({1}), and adaptive_mask ({2}) do not '
                         'match'.format(data.shape[0], mask.shape[0],
                                        adaptive_mask.shape[0]))

    data = data.copy()
    if data.ndim == 2:
        data = data[:, :, None]

    # Mask the inputs
    data_masked = data[mask, :, :]
    adaptive_mask_masked = adaptive_mask[mask]

    if fittype == 'loglin':
        t2s_limited, s0_limited, t2s_full, s0_full = fit_loglinear(
            data_masked, tes, adaptive_mask_masked, report=report)
    elif fittype == 'curvefit':
        t2s_limited, s0_limited, t2s_full, s0_full = fit_monoexponential(
            data_masked, tes, adaptive_mask_masked, report=report)
    else:
        raise ValueError('Unknown fittype option: {}'.format(fittype))

    t2s_limited[np.isinf(t2s_limited)] = 500.  # why 500?
    # let's get rid of negative values, but keep zeros where limited != full
    t2s_limited[(adaptive_mask_masked > 1) & (t2s_limited <= 0)] = 1.
    t2s_limited = _apply_t2s_floor(t2s_limited, tes)
    s0_limited[np.isnan(s0_limited)] = 0.  # why 0?
    t2s_full[np.isinf(t2s_full)] = 500.  # why 500?
    t2s_full[t2s_full <= 0] = 1.  # let's get rid of negative values!
    t2s_full = _apply_t2s_floor(t2s_full, tes)
    s0_full[np.isnan(s0_full)] = 0.  # why 0?

    t2s_limited = utils.unmask(t2s_limited, mask)
    s0_limited = utils.unmask(s0_limited, mask)
    t2s_full = utils.unmask(t2s_full, mask)
    s0_full = utils.unmask(s0_full, mask)

    return t2s_limited, s0_limited, t2s_full, s0_full
Beispiel #28
0
def fit_decay(data, tes, mask, masksum, start_echo=1):
    """
    Fit voxel-wise monoexponential decay models to `data`

    Parameters
    ----------
    data : (S x E [x T]) array_like
        Multi-echo data array, where `S` is samples, `E` is echos, and `T` is
        time
    tes : (E, ) list
        Echo times
    mask : (S, ) array_like
        Boolean array indicating samples that are consistently (i.e., across
        time AND echoes) non-zero
    masksum : (S, ) array_like
        Valued array indicating number of echos that have sufficient signal in
        given sample
    start_echo : int, optional
        First echo to consider. Default is 1 (first echo).

    Returns
    -------
    t2sa : (S) :obj:`numpy.ndarray`
        Limited T2* map
    s0va : (S) :obj:`numpy.ndarray`
        Limited S0 map
    t2ss : (S, E-1) :obj:`numpy.ndarray`
        ???
    s0vs : (S, E-1) :obj:`numpy.ndarray`
        ???
    t2saf : (S) :obj:`numpy.ndarray`
        Full T2* map
    s0vaf : (S) :obj:`numpy.ndarray`
        Full S0 map

    Notes
    -----
    1.  Fit monoexponential decay function to all values for a given voxel
        across TRs, per TE, to estimate voxel-wise :math:`S_0` and
        :math:`T_2^*`:

        .. math::
            S(TE) = S_0 * exp(-R_2^* * TE)

            T_2^* = 1 / R_2^*

    2.  Replace infinite values in :math:`T_2^*` map with 500 and NaN values
        in :math:`S_0` map with 0.
    3.  Generate limited :math:`T_2^*` and :math:`S_0` maps by doing something.
    """
    if len(data.shape) == 3:
        n_samp, n_echos, n_vols = data.shape
    else:
        n_samp, n_echos = data.shape
        n_vols = 1

    data = data[mask]
    t2ss = np.zeros([n_samp, n_echos - 1])
    s0vs = np.zeros([n_samp, n_echos - 1])

    for echo in range(start_echo, n_echos + 1):
        # perform log linear fit of echo times against MR signal
        # make DV matrix: samples x (time series * echos)
        B = np.log((np.abs(data[:, :echo, :]) + 1).reshape(len(data), -1).T)
        # make IV matrix: intercept/TEs x (time series * echos)
        x = np.column_stack([np.ones(echo), [-te for te in tes[:echo]]])
        X = np.repeat(x, n_vols, axis=0)

        beta = np.linalg.lstsq(X, B, rcond=None)[0]
        t2s = 1. / beta[1, :].T
        s0 = np.exp(beta[0, :]).T

        t2s[np.isinf(t2s)] = 500.  # why 500?
        s0[np.isnan(s0)] = 0.  # why 0?

        t2ss[..., echo - 2] = np.squeeze(utils.unmask(t2s, mask))
        s0vs[..., echo - 2] = np.squeeze(utils.unmask(s0, mask))

    # create limited T2* and S0 maps
    fl = np.zeros([n_samp, len(tes) - 1], dtype=bool)
    for echo in range(n_echos - 1):
        fl_ = np.squeeze(fl[..., echo])
        fl_[masksum == echo + 2] = True
        fl[..., echo] = fl_
    t2sa = utils.unmask(t2ss[fl], masksum > 1)
    s0va = utils.unmask(s0vs[fl], masksum > 1)

    # create full T2* maps with S0 estimation errors
    t2saf, s0vaf = t2sa.copy(), s0va.copy()
    t2saf[masksum == 1] = t2ss[masksum == 1, 0]
    s0vaf[masksum == 1] = s0vs[masksum == 1, 0]

    return t2sa, s0va, t2ss, s0vs, t2saf, s0vaf
Beispiel #29
0
def dependence_metrics(catd,
                       tsoc,
                       mmix,
                       t2s,
                       tes,
                       ref_img,
                       reindex=False,
                       mmixN=None,
                       algorithm=None,
                       label=None,
                       out_dir='.',
                       verbose=False):
    """
    Fit TE-dependence and -independence models to components.

    Parameters
    ----------
    catd : (S x E x T) array_like
        Input data, where `S` is samples, `E` is echos, and `T` is time
    tsoc : (S x T) array_like
        Optimally combined data
    mmix : (T x C) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `catd`
    t2s : (S [x T]) array_like
        Limited T2* map or timeseries.
    tes : list
        List of echo times associated with `catd`, in milliseconds
    ref_img : str or img_like
        Reference image to dictate how outputs are saved to disk
    reindex : bool, optional
        Whether to sort components in descending order by Kappa. Default: False
    mmixN : (T x C) array_like, optional
        Z-scored mixing matrix. Default: None
    algorithm : {'kundu_v2', 'kundu_v3', None}, optional
        Decision tree to be applied to metrics. Determines which maps will be
        generated and stored in seldict. Default: None
    label : :obj:`str` or None, optional
        Prefix to apply to generated files. Default is None.
    out_dir : :obj:`str`, optional
        Output directory for generated files. Default is current working
        directory.
    verbose : :obj:`bool`, optional
        Whether or not to generate additional files. Default is False.

    Returns
    -------
    comptable : (C x X) :obj:`pandas.DataFrame`
        Component metric table. One row for each component, with a column for
        each metric. The index is the component number.
    seldict : :obj:`dict` or None
        Dictionary containing component-specific metric maps to be used for
        component selection. If `algorithm` is None, then seldict will be None as
        well.
    betas : :obj:`numpy.ndarray`
    mmix_new : :obj:`numpy.ndarray`
    """
    # Use t2s as mask
    mask = t2s != 0
    if not (catd.shape[0] == t2s.shape[0] == mask.shape[0] == tsoc.shape[0]):
        raise ValueError('First dimensions (number of samples) of catd ({0}), '
                         'tsoc ({1}), and t2s ({2}) do not '
                         'match'.format(catd.shape[0], tsoc.shape[0],
                                        t2s.shape[0]))
    elif catd.shape[1] != len(tes):
        raise ValueError('Second dimension of catd ({0}) does not match '
                         'number of echoes provided (tes; '
                         '{1})'.format(catd.shape[1], len(tes)))
    elif not (catd.shape[2] == tsoc.shape[1] == mmix.shape[0]):
        raise ValueError('Number of volumes in catd ({0}), '
                         'tsoc ({1}), and mmix ({2}) do not '
                         'match.'.format(catd.shape[2], tsoc.shape[1],
                                         mmix.shape[0]))
    elif t2s.ndim == 2:
        if catd.shape[2] != t2s.shape[1]:
            raise ValueError('Number of volumes in catd '
                             '({0}) does not match number of volumes in '
                             't2s ({1})'.format(catd.shape[2], t2s.shape[1]))

    # mask everything we can
    tsoc = tsoc[mask, :]
    catd = catd[mask, ...]
    t2s = t2s[mask]

    # demean optimal combination
    tsoc_dm = tsoc - tsoc.mean(axis=-1, keepdims=True)

    # compute un-normalized weight dataset (features)
    if mmixN is None:
        mmixN = mmix
    WTS = computefeats2(tsoc, mmixN, mask=None, normalize=False)

    # compute PSC dataset - shouldn't have to refit data
    tsoc_B = get_coeffs(tsoc_dm, mmix, mask=None)
    del tsoc_dm
    tsoc_Babs = np.abs(tsoc_B)
    PSC = tsoc_B / tsoc.mean(axis=-1, keepdims=True) * 100

    # compute skews to determine signs based on unnormalized weights,
    # correct mmix & WTS signs based on spatial distribution tails
    signs = stats.skew(WTS, axis=0)
    signs /= np.abs(signs)
    mmix = mmix.copy()
    mmix *= signs
    WTS *= signs
    PSC *= signs
    totvar = (tsoc_B**2).sum()
    totvar_norm = (WTS**2).sum()

    # compute Betas and means over TEs for TE-dependence analysis
    betas = get_coeffs(utils.unmask(catd, mask), mmix,
                       np.repeat(mask[:, np.newaxis], len(tes), axis=1))
    betas = betas[mask, ...]
    n_voxels, n_echos, n_components = betas.shape
    mu = catd.mean(axis=-1, dtype=float)
    tes = np.reshape(tes, (n_echos, 1))
    fmin, _, _ = getfbounds(n_echos)

    # set up Xmats
    X1 = mu.T  # Model 1
    X2 = np.tile(tes, (1, n_voxels)) * mu.T / t2s.T  # Model 2

    # tables for component selection
    kappas = np.zeros([n_components])
    rhos = np.zeros([n_components])
    varex = np.zeros([n_components])
    varex_norm = np.zeros([n_components])
    Z_maps = np.zeros([n_voxels, n_components])
    F_R2_maps = np.zeros([n_voxels, n_components])
    F_S0_maps = np.zeros([n_voxels, n_components])
    pred_R2_maps = np.zeros([n_voxels, n_echos, n_components])
    pred_S0_maps = np.zeros([n_voxels, n_echos, n_components])

    LGR.info('Fitting TE- and S0-dependent models to components')
    for i_comp in range(n_components):
        # size of comp_betas is (n_echoes, n_samples)
        comp_betas = np.atleast_3d(betas)[:, :, i_comp].T
        alpha = (np.abs(comp_betas)**2).sum(axis=0)
        varex[i_comp] = (tsoc_B[:, i_comp]**2).sum() / totvar * 100.
        varex_norm[i_comp] = (WTS[:, i_comp]**2).sum() / totvar_norm

        # S0 Model
        # (S,) model coefficient map
        coeffs_S0 = (comp_betas * X1).sum(axis=0) / (X1**2).sum(axis=0)
        pred_S0 = X1 * np.tile(coeffs_S0, (n_echos, 1))
        pred_S0_maps[:, :, i_comp] = pred_S0.T
        SSE_S0 = (comp_betas - pred_S0)**2
        SSE_S0 = SSE_S0.sum(axis=0)  # (S,) prediction error map
        F_S0 = (alpha - SSE_S0) * (n_echos - 1) / (SSE_S0)
        F_S0_maps[:, i_comp] = F_S0

        # R2 Model
        coeffs_R2 = (comp_betas * X2).sum(axis=0) / (X2**2).sum(axis=0)
        pred_R2 = X2 * np.tile(coeffs_R2, (n_echos, 1))
        pred_R2_maps[:, :, i_comp] = pred_R2.T
        SSE_R2 = (comp_betas - pred_R2)**2
        SSE_R2 = SSE_R2.sum(axis=0)
        F_R2 = (alpha - SSE_R2) * (n_echos - 1) / (SSE_R2)
        F_R2_maps[:, i_comp] = F_R2

        # compute weights as Z-values
        wtsZ = (WTS[:, i_comp] - WTS[:, i_comp].mean()) / WTS[:, i_comp].std()
        wtsZ[np.abs(wtsZ) > Z_MAX] = (
            Z_MAX * (np.abs(wtsZ) / wtsZ))[np.abs(wtsZ) > Z_MAX]
        Z_maps[:, i_comp] = wtsZ

        # compute Kappa and Rho
        F_S0[F_S0 > F_MAX] = F_MAX
        F_R2[F_R2 > F_MAX] = F_MAX
        norm_weights = np.abs(wtsZ**2.)
        kappas[i_comp] = np.average(F_R2, weights=norm_weights)
        rhos[i_comp] = np.average(F_S0, weights=norm_weights)
    del SSE_S0, SSE_R2, wtsZ, F_S0, F_R2, norm_weights, comp_betas
    if algorithm != 'kundu_v3':
        del WTS, PSC, tsoc_B

    # tabulate component values
    comptable = np.vstack([kappas, rhos, varex, varex_norm]).T
    if reindex:
        # re-index all components in descending Kappa order
        sort_idx = comptable[:, 0].argsort()[::-1]
        comptable = comptable[sort_idx, :]
        mmix_new = mmix[:, sort_idx]
        betas = betas[..., sort_idx]
        pred_R2_maps = pred_R2_maps[:, :, sort_idx]
        pred_S0_maps = pred_S0_maps[:, :, sort_idx]
        F_R2_maps = F_R2_maps[:, sort_idx]
        F_S0_maps = F_S0_maps[:, sort_idx]
        Z_maps = Z_maps[:, sort_idx]
        tsoc_Babs = tsoc_Babs[:, sort_idx]
        if algorithm == 'kundu_v3':
            WTS = WTS[:, sort_idx]
            PSC = PSC[:, sort_idx]
            tsoc_B = tsoc_B[:, sort_idx]
    else:
        mmix_new = mmix
    del mmix

    if verbose:
        # Echo-specific weight maps for each of the ICA components.
        io.filewrite(utils.unmask(betas, mask),
                     op.join(out_dir, '{0}betas_catd.nii'.format(label)),
                     ref_img)

        # Echo-specific maps of predicted values for R2 and S0 models for each
        # component.
        io.filewrite(utils.unmask(pred_R2_maps, mask),
                     op.join(out_dir, '{0}R2_pred.nii'.format(label)), ref_img)
        io.filewrite(utils.unmask(pred_S0_maps, mask),
                     op.join(out_dir, '{0}S0_pred.nii'.format(label)), ref_img)
        # Weight maps used to average metrics across voxels
        io.filewrite(utils.unmask(Z_maps**2., mask),
                     op.join(out_dir, '{0}metric_weights.nii'.format(label)),
                     ref_img)
    del pred_R2_maps, pred_S0_maps

    comptable = pd.DataFrame(comptable,
                             columns=[
                                 'kappa', 'rho', 'variance explained',
                                 'normalized variance explained'
                             ])
    comptable.index.name = 'component'

    # Generate clustering criteria for component selection
    if algorithm in ['kundu_v2', 'kundu_v3']:
        Z_clmaps = np.zeros([n_voxels, n_components], bool)
        F_R2_clmaps = np.zeros([n_voxels, n_components], bool)
        F_S0_clmaps = np.zeros([n_voxels, n_components], bool)
        Br_R2_clmaps = np.zeros([n_voxels, n_components], bool)
        Br_S0_clmaps = np.zeros([n_voxels, n_components], bool)

        LGR.info('Performing spatial clustering of components')
        csize = np.max([int(n_voxels * 0.0005) + 5, 20])
        LGR.debug('Using minimum cluster size: {}'.format(csize))
        for i_comp in range(n_components):
            # Cluster-extent threshold and binarize F-maps
            ccimg = io.new_nii_like(
                ref_img, np.squeeze(utils.unmask(F_R2_maps[:, i_comp], mask)))
            F_R2_clmaps[:,
                        i_comp] = utils.threshold_map(ccimg,
                                                      min_cluster_size=csize,
                                                      threshold=fmin,
                                                      mask=mask,
                                                      binarize=True)
            countsigFR2 = F_R2_clmaps[:, i_comp].sum()

            ccimg = io.new_nii_like(
                ref_img, np.squeeze(utils.unmask(F_S0_maps[:, i_comp], mask)))
            F_S0_clmaps[:,
                        i_comp] = utils.threshold_map(ccimg,
                                                      min_cluster_size=csize,
                                                      threshold=fmin,
                                                      mask=mask,
                                                      binarize=True)
            countsigFS0 = F_S0_clmaps[:, i_comp].sum()

            # Cluster-extent threshold and binarize Z-maps with CDT of p < 0.05
            ccimg = io.new_nii_like(
                ref_img, np.squeeze(utils.unmask(Z_maps[:, i_comp], mask)))
            Z_clmaps[:, i_comp] = utils.threshold_map(ccimg,
                                                      min_cluster_size=csize,
                                                      threshold=1.95,
                                                      mask=mask,
                                                      binarize=True)

            # Cluster-extent threshold and binarize ranked signal-change map
            ccimg = io.new_nii_like(
                ref_img,
                utils.unmask(stats.rankdata(tsoc_Babs[:, i_comp]), mask))
            Br_R2_clmaps[:, i_comp] = utils.threshold_map(
                ccimg,
                min_cluster_size=csize,
                threshold=(max(tsoc_Babs.shape) - countsigFR2),
                mask=mask,
                binarize=True)
            Br_S0_clmaps[:, i_comp] = utils.threshold_map(
                ccimg,
                min_cluster_size=csize,
                threshold=(max(tsoc_Babs.shape) - countsigFS0),
                mask=mask,
                binarize=True)
        del ccimg, tsoc_Babs

        if algorithm == 'kundu_v2':
            # WTS, tsoc_B, PSC, and F_S0_maps are not used by Kundu v2.5
            selvars = [
                'Z_maps', 'F_R2_maps', 'Z_clmaps', 'F_R2_clmaps',
                'F_S0_clmaps', 'Br_R2_clmaps', 'Br_S0_clmaps'
            ]
        elif algorithm == 'kundu_v3':
            selvars = [
                'WTS', 'tsoc_B', 'PSC', 'Z_maps', 'F_R2_maps', 'F_S0_maps',
                'Z_clmaps', 'F_R2_clmaps', 'F_S0_clmaps', 'Br_R2_clmaps',
                'Br_S0_clmaps'
            ]
        elif algorithm is None:
            selvars = []
        else:
            raise ValueError(
                'Algorithm "{0}" not recognized.'.format(algorithm))

        seldict = {}
        for vv in selvars:
            seldict[vv] = eval(vv)
    else:
        seldict = None

    return comptable, seldict, betas, mmix_new
Beispiel #30
0
def gscontrol_raw(catd, optcom, n_echos, ref_img, dtrank=4):
    """
    Removes global signal from individual echo `catd` and `optcom` time series

    This function uses the spatial global signal estimation approach to
    to removal global signal out of individual echo time series datasets. The
    spatial global signal is estimated from the optimally combined data after
    detrending with a Legendre polynomial basis of `order = 0` and
    `degree = dtrank`.

    Parameters
    ----------
    catd : (S x E x T) array_like
        Input functional data
    optcom : (S x T) array_like
        Optimally-combined functional data (i.e., the output of `make_optcom`)
    n_echos : int
        Number of echos in data. Should be the same as `E` dimension of `catd`
    ref_img : str or img_like
        Reference image to dictate how outputs are saved to disk
    dtrank : int, optional
        Specfies degree of Legendre polynomial basis function for estimating
        spatial global signal. Default: 4

    Returns
    -------
    dm_catd : (S x E x T) array_like
        Input `catd` with global signal removed from time series
    dm_optcom : (S x T) array_like
        Input `optcom` with global signal removed from time series
    """

    LGR.info('Applying amplitude-based T1 equilibration correction')

    # Legendre polynomial basis for denoising
    bounds = np.linspace(-1, 1, optcom.shape[-1])
    Lmix = np.column_stack([lpmv(0, vv, bounds) for vv in range(dtrank)])

    # compute mean, std, mask local to this function
    # inefficient, but makes this function a bit more modular
    Gmu = optcom.mean(axis=-1)  # temporal mean
    Gmask = Gmu != 0

    # find spatial global signal
    dat = optcom[Gmask] - Gmu[Gmask][:, np.newaxis]
    sol = np.linalg.lstsq(Lmix, dat.T,
                          rcond=None)[0]  # Legendre basis for detrending
    detr = dat - np.dot(sol.T, Lmix.T)[0]
    sphis = (detr).min(axis=1)
    sphis -= sphis.mean()
    utils.filewrite(utils.unmask(sphis, Gmask), 'T1gs', ref_img)

    # find time course ofc the spatial global signal
    # make basis with the Legendre basis
    glsig = np.linalg.lstsq(np.atleast_2d(sphis).T, dat, rcond=None)[0]
    glsig = stats.zscore(glsig, axis=None)
    np.savetxt('glsig.1D', glsig)
    glbase = np.hstack([Lmix, glsig.T])

    # Project global signal out of optimally combined data
    sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T, rcond=None)[0]
    tsoc_nogs = dat - np.dot(
        np.atleast_2d(sol[dtrank]).T, np.atleast_2d(
            glbase.T[dtrank])) + Gmu[Gmask][:, np.newaxis]

    utils.filewrite(optcom, 'tsoc_orig', ref_img)
    dm_optcom = utils.unmask(tsoc_nogs, Gmask)
    utils.filewrite(dm_optcom, 'tsoc_nogs', ref_img)

    # Project glbase out of each echo
    dm_catd = catd.copy()  # don't overwrite catd
    for echo in range(n_echos):
        dat = dm_catd[:, echo, :][Gmask]
        sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T, rcond=None)[0]
        e_nogs = dat - np.dot(
            np.atleast_2d(sol[dtrank]).T, np.atleast_2d(glbase.T[dtrank]))
        dm_catd[:, echo, :] = utils.unmask(e_nogs, Gmask)

    return dm_catd, dm_optcom
Beispiel #31
0
def fit_decay(data, tes, mask, masksum, start_echo):
    """
    Fit voxel-wise monoexponential decay models to `data`

    Parameters
    ----------
    data : (S x E [x T]) array_like
        Multi-echo data array, where `S` is samples, `E` is echos, and `T` is
        time
    tes : (E, ) list
        Echo times
    mask : (S, ) array_like
        Boolean array indicating samples that are consistently (i.e., across
        time AND echoes) non-zero
    masksum : (S, ) array_like
        Valued array indicating number of echos that have sufficient signal in
        given sample
    start_echo : int
        First echo to consider

    Returns
    -------
    t2sa : (S x E) :obj:`numpy.ndarray`
        Limited T2* map
    s0va : (S x E) :obj:`numpy.ndarray`
        Limited S0 map
    t2ss : (S x E) :obj:`numpy.ndarray`
        ???
    s0vs : (S x E) :obj:`numpy.ndarray`
        ???
    t2saf : (S x E) :obj:`numpy.ndarray`
        Full T2* map
    s0vaf : (S x E) :obj:`numpy.ndarray`
        Full S0 map

    Notes
    -----
    1.  Fit monoexponential decay function to all values for a given voxel
        across TRs, per TE, to estimate voxel-wise :math:`S_0` and
        :math:`T_2^*`:

        .. math::
            S(TE) = S_0 * exp(-R_2^* * TE)

            T_2^* = 1 / R_2^*

    2.  Replace infinite values in :math:`T_2^*` map with 500 and NaN values
        in :math:`S_0` map with 0.
    3.  Generate limited :math:`T_2^*` and :math:`S_0` maps by doing something.
    """
    if len(data.shape) == 3:
        n_samp, n_echos, n_vols = data.shape
    else:
        n_samp, n_echos = data.shape
        n_vols = 1

    data = data[mask]
    t2ss = np.zeros([n_samp, n_echos - 1])
    s0vs = np.zeros([n_samp, n_echos - 1])

    for echo in range(start_echo, n_echos + 1):
        # perform log linear fit of echo times against MR signal
        # make DV matrix: samples x (time series * echos)
        B = np.log((np.abs(data[:, :echo, :]) + 1).reshape(len(data), -1).T)
        # make IV matrix: intercept/TEs x (time series * echos)
        x = np.column_stack([np.ones(echo), [-te for te in tes[:echo]]])
        X = np.repeat(x, n_vols, axis=0)

        beta = np.linalg.lstsq(X, B, rcond=None)[0]
        t2s = 1. / beta[1, :].T
        s0 = np.exp(beta[0, :]).T

        t2s[np.isinf(t2s)] = 500.  # why 500?
        s0[np.isnan(s0)] = 0.      # why 0?

        t2ss[..., echo - 2] = np.squeeze(utils.unmask(t2s, mask))
        s0vs[..., echo - 2] = np.squeeze(utils.unmask(s0, mask))

    # create limited T2* and S0 maps
    fl = np.zeros([n_samp, len(tes) - 1], dtype=bool)
    for echo in range(n_echos - 1):
        fl_ = np.squeeze(fl[..., echo])
        fl_[masksum == echo + 2] = True
        fl[..., echo] = fl_
    t2sa = utils.unmask(t2ss[fl], masksum > 1)
    s0va = utils.unmask(s0vs[fl], masksum > 1)

    # create full T2* maps with S0 estimation errors
    t2saf, s0vaf = t2sa.copy(), s0va.copy()
    t2saf[masksum == 1] = t2ss[masksum == 1, 0]
    s0vaf[masksum == 1] = s0vs[masksum == 1, 0]

    return t2sa, s0va, t2ss, s0vs, t2saf, s0vaf
Beispiel #32
0
def make_optcom(data, t2s, tes, mask, combmode, verbose=True):
    """
    Optimally combine BOLD data across TEs.

    Parameters
    ----------
    data : (S x E x T) :obj:`numpy.ndarray`
        Concatenated BOLD data.
    t2 : (S,) :obj:`numpy.ndarray`
        Estimated T2* values.
    tes : :obj:`numpy.ndarray`
        Array of TEs, in seconds.
    mask : (S,) :obj:`numpy.ndarray`
        Brain mask in 3D array.
    combmode : :obj:`str`
        How to combine data. Either 'ste' or 't2s'.
    verbose : :obj:`bool`, optional
        Whether to print status updates


    Returns
    -------
    combined : (S x T) :obj:`numpy.ndarray`
        Optimally combined data.

    Notes
    -----
    1.  Estimate voxel- and TE-specific weights based on estimated
        :math:`T_2^*`:

            .. math::
                w(T_2^*)_n = \\frac{TE_n * exp(\\frac{-TE}\
                {T_{2(est)}^*})}{\sum TE_n * exp(\\frac{-TE}{T_{2(est)}^*})}
    2.  Perform weighted average per voxel and TR across TEs based on weights
        estimated in the previous step.
    """

    _, _, n_vols = data.shape
    mdata = data[mask]
    tes = np.array(tes)[np.newaxis]  # (1 x E) array_like

    if t2s.ndim == 1:
        msg = 'Optimally combining data with voxel-wise T2 estimates'
        ft2s = t2s[mask, np.newaxis]
    else:
        msg = 'Optimally combining data with voxel- and volume-wise T2 estimates'
        ft2s = t2s[mask, :, np.newaxis]

    if verbose:
        LGR.info(msg)

    if combmode == 'ste':
        alpha = mdata.mean(axis=-1) * tes
    else:
        alpha = tes * np.exp(-tes / ft2s)

    if t2s.ndim == 1:
        alpha = np.tile(alpha[:, :, np.newaxis], (1, 1, n_vols))
    else:
        alpha = np.swapaxes(alpha, 1, 2)
        ax0_idx, ax2_idx = np.where(np.all(alpha == 0, axis=1))
        alpha[ax0_idx, :, ax2_idx] = 1.

    combined = np.average(mdata, axis=1, weights=alpha)
    combined = utils.unmask(combined, mask)

    return combined
Beispiel #33
0
def selcomps(seldict,
             mmix,
             mask,
             ref_img,
             manacc,
             n_echos,
             t2s,
             s0,
             olevel=2,
             oversion=99,
             filecsdata=True,
             savecsdiag=True,
             strict_mode=False):
    """
    Labels components in `mmix`

    Parameters
    ----------
    seldict : :obj:`dict`
        As output from `fitmodels_direct`
    mmix : (C x T) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the number of volumes in the original data
    mask : (S,) array_like
        Boolean mask array
    ref_img : str or img_like
        Reference image to dictate how outputs are saved to disk
    manacc : list
        Comma-separated list of indices of manually accepted components
    n_echos : int
        Number of echos in original data
    t2s : (S,) array_like
    s0 : (S,) array_like
    olevel : int, optional
        Default: 2
    oversion : int, optional
        Default: 99
    filecsdata: bool, optional
        Default: False
    savecsdiag: bool, optional
        Default: True
    strict_mode: bool, optional
        Default: False

    Returns
    -------
    acc : list
        Indices of accepted (BOLD) components in `mmix`
    rej : list
        Indices of rejected (non-BOLD) components in `mmix`
    midk : list
        Indices of mid-K (questionable) components in `mmix`
    ign : list
        Indices of ignored components in `mmix`
    """

    if filecsdata:
        import bz2
        if seldict is not None:
            LGR.info('Saving component selection data')
            with bz2.BZ2File('compseldata.pklbz', 'wb') as csstate_f:
                pickle.dump(seldict, csstate_f)
        else:
            try:
                with bz2.BZ2File('compseldata.pklbz', 'rb') as csstate_f:
                    seldict = pickle.load(csstate_f)
            except FileNotFoundError:
                LGR.warning('Failed to load component selection data')
                return None

    # List of components
    midk = []
    ign = []
    nc = np.arange(len(seldict['Kappas']))
    ncl = np.arange(len(seldict['Kappas']))

    # If user has specified components to accept manually
    if manacc:
        acc = sorted([int(vv) for vv in manacc.split(',')])
        midk = []
        rej = sorted(np.setdiff1d(ncl, acc))
        return acc, rej, midk, []  # Add string for ign
    """
    Do some tallies for no. of significant voxels
    """
    countsigFS0 = seldict['F_S0_clmaps'].sum(0)
    countsigFR2 = seldict['F_R2_clmaps'].sum(0)
    countnoise = np.zeros(len(nc))
    """
    Make table of dice values
    """
    dice_tbl = np.zeros([nc.shape[0], 2])
    for ii in ncl:
        dice_FR2 = utils.dice(
            utils.unmask(seldict['Br_clmaps_R2'][:, ii], mask)[t2s != 0],
            seldict['F_R2_clmaps'][:, ii])
        dice_FS0 = utils.dice(
            utils.unmask(seldict['Br_clmaps_S0'][:, ii], mask)[t2s != 0],
            seldict['F_S0_clmaps'][:, ii])
        dice_tbl[ii, :] = [dice_FR2, dice_FS0]  # step 3a here and above
    dice_tbl[np.isnan(dice_tbl)] = 0
    """
    Make table of noise gain
    """
    tt_table = np.zeros([len(nc), 4])
    counts_FR2_Z = np.zeros([len(nc), 2])
    for ii in nc:
        comp_noise_sel = utils.andb([
            np.abs(seldict['Z_maps'][:, ii]) > 1.95,
            seldict['Z_clmaps'][:, ii] == 0
        ]) == 2
        countnoise[ii] = np.array(comp_noise_sel, dtype=np.int).sum()
        noise_FR2_Z_mask = utils.unmask(comp_noise_sel, mask)[t2s != 0]
        noise_FR2_Z = np.log10(
            np.unique(seldict['F_R2_maps'][noise_FR2_Z_mask, ii]))
        signal_FR2_Z_mask = utils.unmask(seldict['Z_clmaps'][:, ii],
                                         mask)[t2s != 0] == 1
        signal_FR2_Z = np.log10(
            np.unique(seldict['F_R2_maps'][signal_FR2_Z_mask, ii]))
        counts_FR2_Z[ii, :] = [len(signal_FR2_Z), len(noise_FR2_Z)]
        ttest = stats.ttest_ind(signal_FR2_Z, noise_FR2_Z, equal_var=True)
        # avoid DivideByZero RuntimeWarning
        if signal_FR2_Z.size > 0 and noise_FR2_Z.size > 0:
            mwu = stats.norm.ppf(
                stats.mannwhitneyu(signal_FR2_Z, noise_FR2_Z)[1])
        else:
            mwu = -np.inf
        tt_table[ii, 0] = np.abs(mwu) * ttest[0] / np.abs(ttest[0])
        tt_table[ii, 1] = ttest[1]
    tt_table[np.isnan(tt_table)] = 0
    tt_table[np.isinf(tt_table[:, 0]),
             0] = np.percentile(tt_table[~np.isinf(tt_table[:, 0]), 0], 98)

    # Time series derivative kurtosis
    mmix_dt = (mmix[:-1] - mmix[1:])
    mmix_kurt = stats.kurtosis(mmix_dt)
    mmix_std = np.std(mmix_dt, axis=0)
    """
    Step 1: Reject anything that's obviously an artifact
    a. Estimate a null variance
    """
    LGR.debug(
        'Rejecting gross artifacts based on Rho/Kappa values and S0/R2 counts')
    rej = ncl[utils.andb(
        [seldict['Rhos'] > seldict['Kappas'], countsigFS0 > countsigFR2]) > 0]
    ncl = np.setdiff1d(ncl, rej)
    """
    Step 2: Compute 3-D spatial FFT of Beta maps to detect high-spatial
    frequency artifacts
    """
    LGR.debug(
        'Computing 3D spatial FFT of beta maps to detect high-spatial frequency artifacts'
    )
    # spatial information is important so for NIFTI we convert back to 3D space
    if utils.get_dtype(ref_img) == 'NIFTI':
        dim1 = np.prod(check_niimg(ref_img).shape[:2])
    else:
        dim1 = mask.shape[0]
    fproj_arr = np.zeros([dim1, len(nc)])
    fproj_arr_val = np.zeros([dim1, len(nc)])
    spr = []
    fdist = []
    for ii in nc:
        # convert data back to 3D array
        if utils.get_dtype(ref_img) == 'NIFTI':
            tproj = utils.new_nii_like(
                ref_img,
                utils.unmask(seldict['PSC'], mask)[:, ii]).get_data()
        else:
            tproj = utils.unmask(seldict['PSC'], mask)[:, ii]
        fproj = np.fft.fftshift(np.abs(np.fft.rfftn(tproj)))
        fproj_z = fproj.max(axis=-1)
        fproj[fproj == fproj.max()] = 0
        spr.append(np.array(fproj_z > fproj_z.max() / 4, dtype=np.int).sum())
        fproj_arr[:, ii] = stats.rankdata(fproj_z.flatten())
        fproj_arr_val[:, ii] = fproj_z.flatten()
        if utils.get_dtype(ref_img) == 'NIFTI':
            fprojr = np.array([fproj, fproj[:, :, ::-1]]).max(0)
            fdist.append(
                np.max([
                    utils.fitgaussian(fproj.max(jj))[3:].max()
                    for jj in range(fprojr.ndim)
                ]))
        else:
            fdist = np.load(os.path.join(RESOURCES, 'fdist.npy'))
    if type(fdist) is not np.ndarray:
        fdist = np.array(fdist)
    spr = np.array(spr)
    # import ipdb; ipdb.set_trace()
    """
    Step 3: Create feature space of component properties
    """
    LGR.debug('Creating feature space of component properties')
    fdist_pre = fdist.copy()
    fdist_pre[fdist > np.median(fdist) * 3] = np.median(fdist) * 3
    fdist_z = (fdist_pre - np.median(fdist_pre)) / fdist_pre.std()
    spz = (spr - spr.mean()) / spr.std()
    Tz = (tt_table[:, 0] - tt_table[:, 0].mean()) / tt_table[:, 0].std()
    varex_ = np.log(seldict['varex'])
    Vz = (varex_ - varex_.mean()) / varex_.std()
    Rz = (seldict['Rhos'] - seldict['Rhos'].mean()) / seldict['Rhos'].std()
    Ktz = np.log(seldict['Kappas']) / 2
    Ktz = (Ktz - Ktz.mean()) / Ktz.std()
    Rtz = np.log(seldict['Rhos']) / 2
    Rtz = (Rtz - Rtz.mean()) / Rtz.std()
    KRr = stats.zscore(np.log(seldict['Kappas']) / np.log(seldict['Rhos']))
    cnz = (countnoise - countnoise.mean()) / countnoise.std()
    Dz = stats.zscore(np.arctanh(dice_tbl[:, 0] + 0.001))
    fz = np.array([Tz, Vz, Ktz, KRr, cnz, Rz, mmix_kurt, fdist_z])
    """
    Step 3: Make initial guess of where BOLD components are and use DBSCAN
    to exclude noise components and find a sample set of 'good' components
    """
    LGR.debug('Making initial guess of BOLD components')
    # epsmap is [index,level of overlap with dicemask,
    # number of high Rho components]
    F05, F025, F01 = utils.getfbounds(n_echos)
    epsmap = []
    Rhos_sorted = np.array(sorted(seldict['Rhos']))[::-1]
    # Make an initial guess as to number of good components based on
    # consensus of control points across Rhos and Kappas
    KRcutguesses = [
        getelbow_mod(seldict['Rhos']),
        getelbow_cons(seldict['Rhos']),
        getelbow_aggr(seldict['Rhos']),
        getelbow_mod(seldict['Kappas']),
        getelbow_cons(seldict['Kappas']),
        getelbow_aggr(seldict['Kappas'])
    ]
    Khighelbowval = stats.scoreatpercentile([
        getelbow_mod(seldict['Kappas'], val=True),
        getelbow_cons(seldict['Kappas'], val=True),
        getelbow_aggr(seldict['Kappas'], val=True)
    ] + list(utils.getfbounds(n_echos)),
                                            75,
                                            interpolation_method='lower')
    KRcut = np.median(KRcutguesses)

    # only use exclusive when inclusive is extremely inclusive - double KRcut
    cond1 = getelbow_cons(seldict['Kappas']) > KRcut * 2
    cond2 = getelbow_mod(seldict['Kappas'], val=True) < F01
    if cond1 and cond2:
        Kcut = getelbow_mod(seldict['Kappas'], val=True)
    else:
        Kcut = getelbow_cons(seldict['Kappas'], val=True)
    # only use inclusive when exclusive is extremely exclusive - half KRcut
    # (remember for Rho inclusive is higher, so want both Kappa and Rho
    # to defaut to lower)
    if getelbow_cons(seldict['Rhos']) > KRcut * 2:
        Rcut = getelbow_mod(seldict['Rhos'], val=True)
    # for above, consider something like:
    # min([getelbow_mod(Rhos,True),sorted(Rhos)[::-1][KRguess] ])
    else:
        Rcut = getelbow_cons(seldict['Rhos'], val=True)
    if Rcut > Kcut:
        Kcut = Rcut  # Rcut should never be higher than Kcut
    KRelbow = utils.andb([seldict['Kappas'] > Kcut, seldict['Rhos'] < Rcut])
    # Make guess of Kundu et al 2011 plus remove high frequencies,
    # generally high variance, and high variance given low Kappa
    tt_lim = stats.scoreatpercentile(
        tt_table[tt_table[:, 0] > 0, 0], 75, interpolation_method='lower') / 3
    KRguess = np.setdiff1d(
        np.setdiff1d(nc[KRelbow == 2], rej),
        np.union1d(
            nc[tt_table[:, 0] < tt_lim],
            np.union1d(
                np.union1d(nc[spz > 1], nc[Vz > 2]), nc[utils.andb([
                    seldict['varex'] > 0.5 *
                    sorted(seldict['varex'])[::-1][int(KRcut)],
                    seldict['Kappas'] < 2 * Kcut
                ]) == 2])))
    guessmask = np.zeros(len(nc))
    guessmask[KRguess] = 1

    # Throw lower-risk bad components out
    rejB = ncl[utils.andb([
        tt_table[ncl, 0] < 0,
        seldict['varex'][ncl] > np.median(seldict['varex']), ncl > KRcut
    ]) == 3]
    rej = np.union1d(rej, rejB)
    ncl = np.setdiff1d(ncl, rej)

    LGR.debug('Using DBSCAN to find optimal set of "good" BOLD components')
    for ii in range(20000):
        eps = .005 + ii * .005
        db = DBSCAN(eps=eps, min_samples=3).fit(fz.T)

        # it would be great to have descriptive names, here
        # DBSCAN found at least three non-noisy clusters
        cond1 = db.labels_.max() > 1
        # DBSCAN didn't detect more classes than the total # of components / 6
        cond2 = db.labels_.max() < len(nc) / 6
        # TODO: confirm if 0 is a special label for DBSCAN
        # my intuition here is that we're confirming DBSCAN labelled previously
        # rejected components as noise (i.e., no overlap between `rej` and
        # labelled DBSCAN components)
        cond3 = np.intersect1d(rej, nc[db.labels_ == 0]).shape[0] == 0
        # DBSCAN labelled less than half of the total components as noisy
        cond4 = np.array(db.labels_ == -1, dtype=int).sum() / float(
            len(nc)) < .5

        if cond1 and cond2 and cond3 and cond4:
            epsmap.append([
                ii,
                utils.dice(guessmask, db.labels_ == 0),
                np.intersect1d(
                    nc[db.labels_ == 0],
                    nc[seldict['Rhos'] > getelbow_mod(Rhos_sorted, val=True)]).
                shape[0]
            ])
        db = None

    epsmap = np.array(epsmap)
    LGR.debug('Found DBSCAN solutions for {}/20000 eps resolutions'.format(
        len(epsmap)))
    group0 = []
    dbscanfailed = False
    if len(epsmap) != 0:
        # Select index that maximizes Dice with guessmask but first
        # minimizes number of higher Rho components
        ii = int(
            epsmap[np.argmax(epsmap[epsmap[:, 2] == np.min(epsmap[:, 2]),
                                    1], 0), 0])
        LGR.debug('Component selection tuning: {:.05f}'.format(
            epsmap[:, 1].max()))
        db = DBSCAN(eps=.005 + ii * .005, min_samples=3).fit(fz.T)
        ncl = nc[db.labels_ == 0]
        ncl = np.setdiff1d(ncl, rej)
        ncl = np.setdiff1d(ncl, ncl[ncl > len(nc) - len(rej)])
        group0 = ncl.copy()
        group_n1 = nc[db.labels_ == -1]
        to_clf = np.setdiff1d(nc, np.union1d(ncl, rej))
    if len(group0) == 0 or len(group0) < len(KRguess) * .5:
        dbscanfailed = True
        LGR.debug('DBSCAN guess failed; using elbow guess method instead')
        ncl = np.setdiff1d(
            np.setdiff1d(nc[KRelbow == 2], rej),
            np.union1d(
                nc[tt_table[:, 0] < tt_lim],
                np.union1d(
                    np.union1d(nc[spz > 1], nc[Vz > 2]), nc[utils.andb([
                        seldict['varex'] > 0.5 *
                        sorted(seldict['varex'])[::-1][int(KRcut)],
                        seldict['Kappas'] < 2 * Kcut
                    ]) == 2])))
        group0 = ncl.copy()
        group_n1 = []
        to_clf = np.setdiff1d(nc, np.union1d(group0, rej))
    if len(group0) < 2 or (len(group0) < 4
                           and float(len(rej)) / len(group0) > 3):
        LGR.warning('Extremely limited reliable BOLD signal space! '
                    'Not filtering components beyond BOLD/non-BOLD guesses.')
        midkfailed = True
        min_acc = np.array([])
        if len(group0) != 0:
            # For extremes, building in a 20% tolerance
            toacc_hi = np.setdiff1d(
                nc[utils.andb([
                    fdist <= np.max(fdist[group0]), seldict['Rhos'] < F025,
                    Vz > -2
                ]) == 3], np.union1d(group0, rej))
            min_acc = np.union1d(group0, toacc_hi)
            to_clf = np.setdiff1d(nc, np.union1d(min_acc, rej))
        else:
            toacc_hi = []
            min_acc = []
        diagstep_keys = [
            'Rejected components', 'Kappa-Rho cut point', 'Kappa cut point',
            'Rho cut point', 'DBSCAN failed to converge',
            'Mid-Kappa failed (limited BOLD signal)', 'Kappa-Rho guess',
            'min_acc', 'toacc_hi'
        ]
        diagstep_vals = [
            list(rej), KRcut, Kcut, Rcut, dbscanfailed, midkfailed,
            list(KRguess),
            list(min_acc),
            list(toacc_hi)
        ]
        with open('csstepdata.json', 'w') as ofh:
            json.dump(dict(zip(diagstep_keys, diagstep_vals)),
                      ofh,
                      indent=4,
                      sort_keys=True,
                      default=str)
        return list(sorted(min_acc)), list(sorted(rej)), [], list(
            sorted(to_clf))

    # Find additional components to reject based on Dice - doing this here
    # since Dice is a little unstable, need to reference group0
    rej_supp = []
    dice_rej = False
    if not dbscanfailed and len(rej) + len(group0) < 0.75 * len(nc):
        dice_rej = True
        rej_supp = np.setdiff1d(
            np.setdiff1d(
                np.union1d(rej, nc[dice_tbl[nc, 0] <= dice_tbl[nc, 1]]),
                group0), group_n1)
        rej = np.union1d(rej, rej_supp)

    # Temporal features
    # larger is worse - spike
    mmix_kurt_z = (mmix_kurt -
                   mmix_kurt[group0].mean()) / mmix_kurt[group0].std()
    # smaller is worse - drift
    mmix_std_z = -1 * (
        (mmix_std - mmix_std[group0].mean()) / mmix_std[group0].std())
    mmix_kurt_z_max = np.max([mmix_kurt_z, mmix_std_z], 0)
    """
    Step 2: Classifiy midk and ignore using separte SVMs for
    different variance regimes
    # To render hyperplane:
    min_x = np.min(spz2);max_x=np.max(spz2)
    # plotting separating hyperplane
        ww = clf_.coef_[0]
        aa = -ww[0] / ww[1]
        # make sure the next line is long enough
        xx = np.linspace(min_x - 2, max_x + 2)
        yy = aa * xx - (clf_.intercept_[0]) / ww[1]
        plt.plot(xx, yy, '-')
    """
    LGR.debug('Attempting to classify midk components')
    # Tried getting rid of accepting based on SVM altogether,
    # now using only rejecting
    toacc_hi = np.setdiff1d(
        nc[utils.andb([
            fdist <= np.max(fdist[group0]), seldict['Rhos'] < F025, Vz > -2
        ]) == 3], np.union1d(group0, rej))
    toacc_lo = np.intersect1d(
        to_clf, nc[utils.andb([
            spz < 1, Rz < 0, mmix_kurt_z_max < 5, Dz > -1, Tz > -1, Vz < 0,
            seldict['Kappas'] >= F025, fdist < 3 *
            np.percentile(fdist[group0], 98)
        ]) == 8])
    midk_clf, clf_ = do_svm(fproj_arr_val[:, np.union1d(group0, rej)].T,
                            [0] * len(group0) + [1] * len(rej),
                            fproj_arr_val[:, to_clf].T,
                            svmtype=2)
    midk = np.setdiff1d(
        to_clf[utils.andb([
            midk_clf == 1,
            seldict['varex'][to_clf] > np.median(seldict['varex'][group0])
        ]) == 2], np.union1d(toacc_hi, toacc_lo))
    # only use SVM to augment toacc_hi only if toacc_hi isn't already
    # conflicting with SVM choice
    if len(
            np.intersect1d(
                to_clf[utils.andb([midk_clf == 1, Vz[to_clf] > 0]) == 2],
                toacc_hi)) == 0:
        svm_acc_fail = True
        toacc_hi = np.union1d(toacc_hi, to_clf[midk_clf == 0])
    else:
        svm_acc_fail = False
    """
    Step 3: Compute variance associated with low T2* areas
    (e.g. draining veins and low T2* areas)
    # To write out veinmask
    veinout = np.zeros(t2s.shape)
    veinout[t2s!=0] = veinmaskf
    utils.filewrite(veinout, 'veinmaskf', ref_img)
    veinBout = utils.unmask(veinmaskB, mask)
    utils.filewrite(veinBout, 'veins50', ref_img)
    """
    LGR.debug(
        'Computing variance associated with low T2* areas (e.g., draining veins)'
    )
    tsoc_B_Zcl = np.zeros(seldict['tsoc_B'].shape)
    tsoc_B_Zcl[seldict['Z_clmaps'] != 0] = np.abs(
        seldict['tsoc_B'])[seldict['Z_clmaps'] != 0]
    sig_B = [
        stats.scoreatpercentile(tsoc_B_Zcl[tsoc_B_Zcl[:, ii] != 0, ii], 25)
        if len(tsoc_B_Zcl[tsoc_B_Zcl[:, ii] != 0, ii]) != 0 else 0 for ii in nc
    ]
    sig_B = np.abs(seldict['tsoc_B']) > np.tile(
        sig_B, [seldict['tsoc_B'].shape[0], 1])

    veinmask = utils.andb([
        t2s < stats.scoreatpercentile(
            t2s[t2s != 0], 15, interpolation_method='lower'), t2s != 0
    ]) == 2
    veinmaskf = veinmask[mask]
    veinR = np.array(sig_B[veinmaskf].sum(0),
                     dtype=float) / sig_B[~veinmaskf].sum(0)
    veinR[np.isnan(veinR)] = 0

    veinc = np.union1d(rej, midk)
    rej_veinRZ = ((veinR - veinR[veinc].mean()) / veinR[veinc].std())[veinc]
    rej_veinRZ[rej_veinRZ < 0] = 0
    rej_veinRZ[countsigFR2[veinc] > np.array(veinmaskf, dtype=int).sum()] = 0
    t2s_lim = [
        stats.scoreatpercentile(t2s[t2s != 0],
                                50,
                                interpolation_method='lower'),
        stats.scoreatpercentile(
            t2s[t2s != 0], 80, interpolation_method='lower') / 2
    ]
    phys_var_zs = []
    for t2sl_i in range(len(t2s_lim)):
        t2sl = t2s_lim[t2sl_i]
        veinW = sig_B[:, veinc] * np.tile(rej_veinRZ, [sig_B.shape[0], 1])
        veincand = utils.unmask(
            utils.andb([
                s0[t2s != 0] < np.median(s0[t2s != 0]), t2s[t2s != 0] < t2sl
            ]) >= 1, t2s != 0)[mask]
        veinW[~veincand] = 0
        invein = veinW.sum(
            axis=1)[(utils.unmask(veinmaskf, mask) *
                     utils.unmask(veinW.sum(axis=1) > 1, mask))[mask]]
        minW = 10 * (np.log10(invein).mean()) - 1 * 10**(
            np.log10(invein).std())
        veinmaskB = veinW.sum(axis=1) > minW
        tsoc_Bp = seldict['tsoc_B'].copy()
        tsoc_Bp[tsoc_Bp < 0] = 0
        vvex = np.array([
            (tsoc_Bp[veinmaskB, ii]**2.).sum() / (tsoc_Bp[:, ii]**2.).sum()
            for ii in nc
        ])
        group0_res = np.intersect1d(KRguess, group0)
        phys_var_zs.append(
            (vvex - vvex[group0_res].mean()) / vvex[group0_res].std())
        veinBout = utils.unmask(veinmaskB, mask)
        utils.filewrite(veinBout.astype(float), 'veins_l%i' % t2sl_i, ref_img)

    # Mask to sample veins
    phys_var_z = np.array(phys_var_zs).max(0)
    Vz2 = (varex_ - varex_[group0].mean()) / varex_[group0].std()
    """
    Step 4: Learn joint TE-dependence spatial and temporal models to move
    remaining artifacts to ignore class
    """
    LGR.debug(
        'Learning joint TE-dependence spatial/temporal models to ignore remaining artifacts'
    )

    to_ign = []

    minK_ign = np.max([F05, getelbow_cons(seldict['Kappas'], val=True)])
    newcest = len(group0) + len(
        toacc_hi[seldict['Kappas'][toacc_hi] > minK_ign])
    phys_art = np.setdiff1d(
        nc[utils.andb([phys_var_z > 3.5, seldict['Kappas'] < minK_ign]) == 2],
        group0)
    rank_diff = stats.rankdata(phys_var_z) - stats.rankdata(seldict['Kappas'])
    phys_art = np.union1d(
        np.setdiff1d(
            nc[utils.andb([phys_var_z > 2, rank_diff > newcest /
                           2, Vz2 > -1]) == 3], group0), phys_art)
    # Want to replace field_art with an acf/SVM based approach
    # instead of a kurtosis/filter one
    field_art = np.setdiff1d(
        nc[utils.andb([mmix_kurt_z_max > 5, seldict['Kappas'] < minK_ign]) ==
           2], group0)
    field_art = np.union1d(
        np.setdiff1d(
            nc[utils.andb([
                mmix_kurt_z_max > 2,
                (stats.rankdata(mmix_kurt_z_max) -
                 stats.rankdata(seldict['Kappas'])) > newcest /
                2, Vz2 > 1, seldict['Kappas'] < F01
            ]) == 4], group0), field_art)
    field_art = np.union1d(
        np.setdiff1d(
            nc[utils.andb([
                mmix_kurt_z_max > 3, Vz2 > 3,
                seldict['Rhos'] > np.percentile(seldict['Rhos'][group0], 75)
            ]) == 3], group0), field_art)
    field_art = np.union1d(
        np.setdiff1d(nc[utils.andb([mmix_kurt_z_max > 5, Vz2 > 5]) == 2],
                     group0), field_art)
    misc_art = np.setdiff1d(
        nc[utils.andb([(stats.rankdata(Vz) - stats.rankdata(Ktz)) > newcest /
                       2, seldict['Kappas'] < Khighelbowval]) == 2], group0)
    ign_cand = np.unique(list(field_art) + list(phys_art) + list(misc_art))
    midkrej = np.union1d(midk, rej)
    to_ign = np.setdiff1d(list(ign_cand), midkrej)
    toacc = np.union1d(toacc_hi, toacc_lo)
    ncl = np.setdiff1d(np.union1d(ncl, toacc), np.union1d(to_ign, midkrej))
    ign = np.setdiff1d(nc, list(ncl) + list(midk) + list(rej))
    orphan = np.setdiff1d(nc,
                          list(ncl) + list(to_ign) + list(midk) + list(rej))

    # Last ditch effort to save some transient components
    if not strict_mode:
        Vz3 = (varex_ - varex_[ncl].mean()) / varex_[ncl].std()
        ncl = np.union1d(
            ncl,
            np.intersect1d(
                orphan, nc[utils.andb([
                    seldict['Kappas'] > F05, seldict['Rhos'] < F025,
                    seldict['Kappas'] > seldict['Rhos'], Vz3 <= -1, Vz3 > -3,
                    mmix_kurt_z_max < 2.5
                ]) == 6]))
        ign = np.setdiff1d(nc, list(ncl) + list(midk) + list(rej))
        orphan = np.setdiff1d(
            nc,
            list(ncl) + list(to_ign) + list(midk) + list(rej))

    if savecsdiag:
        diagstep_keys = [
            'Rejected components', 'Kappa-Rho cut point', 'Kappa cut',
            'Rho cut', 'DBSCAN failed to converge', 'Kappa-Rho guess',
            'Dice rejected', 'rej_supp', 'to_clf', 'Mid-kappa components',
            'svm_acc_fail', 'toacc_hi', 'toacc_lo', 'Field artifacts',
            'Physiological artifacts', 'Miscellaneous artifacts', 'ncl',
            'Ignored components'
        ]
        diagstep_vals = [
            list(rej),
            KRcut.item(),
            Kcut.item(),
            Rcut.item(), dbscanfailed,
            list(KRguess), dice_rej,
            list(rej_supp),
            list(to_clf),
            list(midk), svm_acc_fail,
            list(toacc_hi),
            list(toacc_lo),
            list(field_art),
            list(phys_art),
            list(misc_art),
            list(ncl),
            list(ign)
        ]

        with open('csstepdata.json', 'w') as ofh:
            json.dump(dict(zip(diagstep_keys, diagstep_vals)),
                      ofh,
                      indent=4,
                      sort_keys=True,
                      default=str)
        allfz = np.array([Tz, Vz, Ktz, KRr, cnz, Rz, mmix_kurt, fdist_z])
        np.savetxt('csdata.txt', allfz)

    return list(sorted(ncl)), list(sorted(rej)), list(sorted(midk)), list(
        sorted(ign))
Beispiel #34
0
def generate_metrics(
    data_cat,
    data_optcom,
    mixing,
    adaptive_mask,
    tes,
    io_generator,
    label,
    metrics=None,
):
    """Fit TE-dependence and -independence models to components.

    Parameters
    ----------
    data_cat : (S x E x T) array_like
        Input data, where `S` is samples, `E` is echos, and `T` is time
    data_optcom : (S x T) array_like
        Optimally combined data
    mixing : (T x C) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `data_cat`
    adaptive_mask : (S) array_like
        Array where each value indicates the number of echoes with good signal
        for that voxel. This mask may be thresholded; for example, with values
        less than 3 set to 0.
        For more information on thresholding, see `make_adaptive_mask`.
    tes : list
        List of echo times associated with `data_cat`, in milliseconds
    io_generator : tedana.io.OutputGenerator
        The output generator object for this workflow
    label : str in ['ICA', 'PCA']
        The label for this metric generation type
    metrics : list
        List of metrics to return

    Returns
    -------
    comptable : (C x X) :obj:`pandas.DataFrame`
        Component metric table. One row for each component, with a column for
        each metric. The index is the component number.
    """
    # Load metric dependency tree from json file
    dependency_config = op.join(utils.get_resource_path(), "config", "metrics.json")
    dependency_config = io.load_json(dependency_config)

    if metrics is None:
        metrics = ["map weight"]
    RepLGR.info("The following metrics were calculated: {}.".format(", ".join(metrics)))

    if not (data_cat.shape[0] == data_optcom.shape[0] == adaptive_mask.shape[0]):
        raise ValueError(
            "First dimensions (number of samples) of data_cat ({0}), "
            "data_optcom ({1}), and adaptive_mask ({2}) do not "
            "match".format(data_cat.shape[0], data_optcom.shape[0], adaptive_mask.shape[0])
        )
    elif data_cat.shape[1] != len(tes):
        raise ValueError(
            "Second dimension of data_cat ({0}) does not match "
            "number of echoes provided (tes; "
            "{1})".format(data_cat.shape[1], len(tes))
        )
    elif not (data_cat.shape[2] == data_optcom.shape[1] == mixing.shape[0]):
        raise ValueError(
            "Number of volumes in data_cat ({0}), "
            "data_optcom ({1}), and mixing ({2}) do not "
            "match.".format(data_cat.shape[2], data_optcom.shape[1], mixing.shape[0])
        )

    # Derive mask from thresholded adaptive mask
    mask = adaptive_mask >= 3

    # Apply masks before anything else
    data_cat = data_cat[mask, ...]
    data_optcom = data_optcom[mask, :]
    adaptive_mask = adaptive_mask[mask]

    # Ensure that echo times are in an array, rather than a list
    tes = np.asarray(tes)

    # Get reference image from io_generator
    ref_img = io_generator.reference_img

    required_metrics = dependency_resolver(
        dependency_config["dependencies"],
        metrics,
        dependency_config["inputs"],
    )

    # Use copy to avoid changing the original variable outside of this function
    mixing = mixing.copy()

    # Generate the component table, which will be filled out, column by column,
    # throughout this function
    n_components = mixing.shape[1]
    comptable = pd.DataFrame(index=np.arange(n_components, dtype=int))
    comptable["Component"] = [
        io.add_decomp_prefix(comp, prefix=label, max_value=comptable.shape[0])
        for comp in comptable.index.values
    ]

    # Metric maps
    # Maps will be stored as arrays in an easily-indexable dictionary
    metric_maps = {}
    if "map weight" in required_metrics:
        LGR.info("Calculating weight maps")
        metric_maps["map weight"] = dependence.calculate_weights(data_optcom, mixing)
        signs = determine_signs(metric_maps["map weight"], axis=0)
        comptable["optimal sign"] = signs
        metric_maps["map weight"], mixing = flip_components(
            metric_maps["map weight"], mixing, signs=signs
        )

    if "map optcom betas" in required_metrics:
        LGR.info("Calculating parameter estimate maps for optimally combined data")
        metric_maps["map optcom betas"] = dependence.calculate_betas(data_optcom, mixing)
        if io_generator.verbose:
            metric_maps["map echo betas"] = dependence.calculate_betas(data_cat, mixing)

    if "map percent signal change" in required_metrics:
        LGR.info("Calculating percent signal change maps")
        # used in kundu v3.2 tree
        metric_maps["map percent signal change"] = dependence.calculate_psc(
            data_optcom, metric_maps["map optcom betas"]
        )

    if "map Z" in required_metrics:
        LGR.info("Calculating z-statistic maps")
        metric_maps["map Z"] = dependence.calculate_z_maps(metric_maps["map weight"])

        if io_generator.verbose:
            io_generator.save_file(
                utils.unmask(metric_maps["map Z"] ** 2, mask),
                label + " component weights img",
            )

    if ("map FT2" in required_metrics) or ("map FS0" in required_metrics):
        LGR.info("Calculating F-statistic maps")
        m_T2, m_S0, p_m_T2, p_m_S0 = dependence.calculate_f_maps(
            data_cat, metric_maps["map Z"], mixing, adaptive_mask, tes
        )
        metric_maps["map FT2"] = m_T2
        metric_maps["map FS0"] = m_S0
        metric_maps["map predicted T2"] = p_m_T2
        metric_maps["map predicted S0"] = p_m_S0

    if "map Z clusterized" in required_metrics:
        LGR.info("Thresholding z-statistic maps")
        z_thresh = 1.95
        metric_maps["map Z clusterized"] = dependence.threshold_map(
            metric_maps["map Z"], mask, ref_img, z_thresh
        )

    if "map FT2 clusterized" in required_metrics:
        LGR.info("Calculating T2* F-statistic maps")
        f_thresh, _, _ = getfbounds(len(tes))
        metric_maps["map FT2 clusterized"] = dependence.threshold_map(
            metric_maps["map FT2"], mask, ref_img, f_thresh
        )

    if "map FS0 clusterized" in required_metrics:
        LGR.info("Calculating S0 F-statistic maps")
        f_thresh, _, _ = getfbounds(len(tes))
        metric_maps["map FS0 clusterized"] = dependence.threshold_map(
            metric_maps["map FS0"], mask, ref_img, f_thresh
        )

    # Intermediate metrics
    if "countsigFT2" in required_metrics:
        LGR.info("Counting significant voxels in T2* F-statistic maps")
        comptable["countsigFT2"] = dependence.compute_countsignal(
            metric_maps["map FT2 clusterized"]
        )

    if "countsigFS0" in required_metrics:
        LGR.info("Counting significant voxels in S0 F-statistic maps")
        comptable["countsigFS0"] = dependence.compute_countsignal(
            metric_maps["map FS0 clusterized"]
        )

    # Back to maps
    if "map beta T2 clusterized" in required_metrics:
        LGR.info("Thresholding optimal combination beta maps to match T2* F-statistic maps")
        metric_maps["map beta T2 clusterized"] = dependence.threshold_to_match(
            metric_maps["map optcom betas"], comptable["countsigFT2"], mask, ref_img
        )

    if "map beta S0 clusterized" in required_metrics:
        LGR.info("Thresholding optimal combination beta maps to match S0 F-statistic maps")
        metric_maps["map beta S0 clusterized"] = dependence.threshold_to_match(
            metric_maps["map optcom betas"], comptable["countsigFS0"], mask, ref_img
        )

    # Dependence metrics
    if ("kappa" in required_metrics) or ("rho" in required_metrics):
        LGR.info("Calculating kappa and rho")
        comptable["kappa"], comptable["rho"] = dependence.calculate_dependence_metrics(
            F_T2_maps=metric_maps["map FT2"],
            F_S0_maps=metric_maps["map FS0"],
            Z_maps=metric_maps["map Z"],
        )

    # Generic metrics
    if "variance explained" in required_metrics:
        LGR.info("Calculating variance explained")
        comptable["variance explained"] = dependence.calculate_varex(
            metric_maps["map optcom betas"]
        )

    if "normalized variance explained" in required_metrics:
        LGR.info("Calculating normalized variance explained")
        comptable["normalized variance explained"] = dependence.calculate_varex_norm(
            metric_maps["map weight"]
        )

    # Spatial metrics
    if "dice_FT2" in required_metrics:
        LGR.info(
            "Calculating DSI between thresholded T2* F-statistic and "
            "optimal combination beta maps"
        )
        comptable["dice_FT2"] = dependence.compute_dice(
            metric_maps["map beta T2 clusterized"],
            metric_maps["map FT2 clusterized"],
            axis=0,
        )

    if "dice_FS0" in required_metrics:
        LGR.info(
            "Calculating DSI between thresholded S0 F-statistic and "
            "optimal combination beta maps"
        )
        comptable["dice_FS0"] = dependence.compute_dice(
            metric_maps["map beta S0 clusterized"],
            metric_maps["map FS0 clusterized"],
            axis=0,
        )

    if "signal-noise_t" in required_metrics:
        LGR.info("Calculating signal-noise t-statistics")
        RepLGR.info(
            "A t-test was performed between the distributions of T2*-model "
            "F-statistics associated with clusters (i.e., signal) and "
            "non-cluster voxels (i.e., noise) to generate a t-statistic "
            "(metric signal-noise_z) and p-value (metric signal-noise_p) "
            "measuring relative association of the component to signal "
            "over noise."
        )
        (
            comptable["signal-noise_t"],
            comptable["signal-noise_p"],
        ) = dependence.compute_signal_minus_noise_t(
            Z_maps=metric_maps["map Z"],
            Z_clmaps=metric_maps["map Z clusterized"],
            F_T2_maps=metric_maps["map FT2"],
        )

    if "signal-noise_z" in required_metrics:
        LGR.info("Calculating signal-noise z-statistics")
        RepLGR.info(
            "A t-test was performed between the distributions of T2*-model "
            "F-statistics associated with clusters (i.e., signal) and "
            "non-cluster voxels (i.e., noise) to generate a z-statistic "
            "(metric signal-noise_z) and p-value (metric signal-noise_p) "
            "measuring relative association of the component to signal "
            "over noise."
        )
        (
            comptable["signal-noise_z"],
            comptable["signal-noise_p"],
        ) = dependence.compute_signal_minus_noise_z(
            Z_maps=metric_maps["map Z"],
            Z_clmaps=metric_maps["map Z clusterized"],
            F_T2_maps=metric_maps["map FT2"],
        )

    if "countnoise" in required_metrics:
        LGR.info("Counting significant noise voxels from z-statistic maps")
        RepLGR.info(
            "The number of significant voxels not from clusters was "
            "calculated for each component."
        )
        comptable["countnoise"] = dependence.compute_countnoise(
            metric_maps["map Z"], metric_maps["map Z clusterized"]
        )

    # Composite metrics
    if "d_table_score" in required_metrics:
        LGR.info("Calculating decision table score")
        comptable["d_table_score"] = dependence.generate_decision_table_score(
            comptable["kappa"],
            comptable["dice_FT2"],
            comptable["signal-noise_t"],
            comptable["countnoise"],
            comptable["countsigFT2"],
        )

    # Write verbose metrics if needed
    if io_generator.verbose:
        write_betas = "map echo betas" in metric_maps
        write_T2S0 = "map predicted T2" in metric_maps
        if write_betas:
            betas = metric_maps["map echo betas"]
        if write_T2S0:
            pred_T2_maps = metric_maps["map predicted T2"]
            pred_S0_maps = metric_maps["map predicted S0"]

        for i_echo in range(len(tes)):
            if write_betas:
                echo_betas = betas[:, i_echo, :]
                io_generator.save_file(
                    utils.unmask(echo_betas, mask),
                    "echo weight " + label + " map split img",
                    echo=(i_echo + 1),
                )

            if write_T2S0:
                echo_pred_T2_maps = pred_T2_maps[:, i_echo, :]
                io_generator.save_file(
                    utils.unmask(echo_pred_T2_maps, mask),
                    "echo T2 " + label + " split img",
                    echo=(i_echo + 1),
                )

                echo_pred_S0_maps = pred_S0_maps[:, i_echo, :]
                io_generator.save_file(
                    utils.unmask(echo_pred_S0_maps, mask),
                    "echo S0 " + label + " split img",
                    echo=(i_echo + 1),
                )

    # Reorder component table columns based on previous tedana versions
    # NOTE: Some new columns will be calculated and columns may be reordered during
    # component selection
    preferred_order = (
        "Component",
        "kappa",
        "rho",
        "variance explained",
        "normalized variance explained",
        "estimated normalized variance explained",
        "countsigFT2",
        "countsigFS0",
        "dice_FT2",
        "dice_FS0",
        "countnoise",
        "signal-noise_t",
        "signal-noise_p",
        "d_table_score",
        "kappa ratio",
        "d_table_score_scrub",
        "classification",
        "rationale",
    )
    first_columns = [col for col in preferred_order if col in comptable.columns]
    other_columns = [col for col in comptable.columns if col not in preferred_order]
    comptable = comptable[first_columns + other_columns]

    return comptable
Beispiel #35
0
def write_split_ts(data, mmix, mask, comptable, ref_img, suffix=''):
    """
    Splits `data` into denoised / noise / ignored time series and saves to disk

    Parameters
    ----------
    data : (S x T) array_like
        Input time series
    mmix : (C x T) array_like
        Mixing matrix for converting input data to component space, where `C`
        is components and `T` is the same as in `data`
    mask : (S,) array_like
        Boolean mask array
    ref_img : :obj:`str` or img_like
        Reference image to dictate how outputs are saved to disk
    suffix : :obj:`str`, optional
        Appended to name of saved files (before extension). Default: ''

    Returns
    -------
    varexpl : :obj:`float`
        Percent variance of data explained by extracted + retained components

    Notes
    -----
    This function writes out several files:

    ======================    =================================================
    Filename                  Content
    ======================    =================================================
    hik_ts_[suffix].nii       High-Kappa time series.
    midk_ts_[suffix].nii      Mid-Kappa time series.
    low_ts_[suffix].nii       Low-Kappa time series.
    dn_ts_[suffix].nii        Denoised time series.
    ======================    =================================================
    """
    acc = comptable[comptable.classification == 'accepted'].index.values
    rej = comptable[comptable.classification == 'rejected'].index.values

    # mask and de-mean data
    mdata = data[mask]
    dmdata = mdata.T - mdata.T.mean(axis=0)

    # get variance explained by retained components
    betas = get_coeffs(dmdata.T, mmix, mask=None)
    varexpl = (1 - ((dmdata.T - betas.dot(mmix.T))**2.).sum() /
               (dmdata**2.).sum()) * 100
    LGR.info('Variance explained by ICA decomposition: {:.02f}%'.format(varexpl))

    # create component and de-noised time series and save to files
    hikts = betas[:, acc].dot(mmix.T[acc, :])
    lowkts = betas[:, rej].dot(mmix.T[rej, :])
    dnts = data[mask] - lowkts

    if len(acc) != 0:
        fout = filewrite(utils.unmask(hikts, mask),
                         'hik_ts_{0}'.format(suffix), ref_img)
        LGR.info('Writing high-Kappa time series: {}'.format(op.abspath(fout)))

    if len(rej) != 0:
        fout = filewrite(utils.unmask(lowkts, mask),
                         'lowk_ts_{0}'.format(suffix), ref_img)
        LGR.info('Writing low-Kappa time series: {}'.format(op.abspath(fout)))

    fout = filewrite(utils.unmask(dnts, mask),
                     'dn_ts_{0}'.format(suffix), ref_img)
    LGR.info('Writing denoised time series: {}'.format(op.abspath(fout)))

    return varexpl
Beispiel #36
0
def fit_loglinear(data_cat, echo_times, adaptive_mask, report=True):
    """
    """
    if report:
        RepLGR.info(
            "A monoexponential model was fit to the data at each voxel "
            "using log-linear regression in order to estimate T2* and S0 "
            "maps. For each voxel, the value from the adaptive mask was "
            "used to determine which echoes would be used to estimate T2* "
            "and S0.")
    n_samp, n_echos, n_vols = data_cat.shape

    echos_to_run = np.unique(adaptive_mask)
    if 1 in echos_to_run:
        echos_to_run = np.sort(np.unique(np.append(echos_to_run, 2)))
    echos_to_run = echos_to_run[echos_to_run >= 2]

    t2s_asc_maps = np.zeros([n_samp, len(echos_to_run)])
    s0_asc_maps = np.zeros([n_samp, len(echos_to_run)])
    echo_masks = np.zeros([n_samp, len(echos_to_run)], dtype=bool)

    for i_echo, echo_num in enumerate(echos_to_run):
        if echo_num == 2:
            voxel_idx = np.where(adaptive_mask <= echo_num)[0]
        else:
            voxel_idx = np.where(adaptive_mask == echo_num)[0]

        # Create echo masks to assign values to limited vs full maps later
        echo_mask = np.squeeze(echo_masks[..., i_echo])
        echo_mask[adaptive_mask == echo_num] = True
        echo_masks[..., i_echo] = echo_mask

        # perform log linear fit of echo times against MR signal
        # make DV matrix: samples x (time series * echos)
        data_2d = data_cat[voxel_idx, :echo_num, :].reshape(
            len(voxel_idx), -1).T
        log_data = np.log(np.abs(data_2d) + 1)

        # make IV matrix: intercept/TEs x (time series * echos)
        x = np.column_stack(
            [np.ones(echo_num), [-te for te in echo_times[:echo_num]]])
        X = np.repeat(x, n_vols, axis=0)

        # Log-linear fit
        betas = np.linalg.lstsq(X, log_data, rcond=None)[0]
        t2s = 1. / betas[1, :].T
        s0 = np.exp(betas[0, :]).T

        t2s_asc_maps[voxel_idx, i_echo] = t2s
        s0_asc_maps[voxel_idx, i_echo] = s0

    # create limited T2* and S0 maps
    t2s_limited = utils.unmask(t2s_asc_maps[echo_masks], adaptive_mask > 1)
    s0_limited = utils.unmask(s0_asc_maps[echo_masks], adaptive_mask > 1)

    # create full T2* maps with S0 estimation errors
    t2s_full, s0_full = t2s_limited.copy(), s0_limited.copy()
    t2s_full[adaptive_mask == 1] = t2s_asc_maps[adaptive_mask == 1, 0]
    s0_full[adaptive_mask == 1] = s0_asc_maps[adaptive_mask == 1, 0]

    return t2s_limited, s0_limited, t2s_full, s0_full
Beispiel #37
0
def make_optcom(data, tes, mask, t2s=None, combmode='t2s', verbose=True):
    """
    Optimally combine BOLD data across TEs.

    Parameters
    ----------
    data : (S x E x T) :obj:`numpy.ndarray`
        Concatenated BOLD data.
    tes : (E,) :obj:`numpy.ndarray`
        Array of TEs, in seconds.
    mask : (S,) :obj:`numpy.ndarray`
        Brain mask in 3D array.
    t2s : (S [x T]) :obj:`numpy.ndarray` or None, optional
        Estimated T2* values. Only required if combmode = 't2s'.
        Default is None.
    combmode : {'t2s', 'paid'}, optional
        How to combine data. Either 'paid' or 't2s'. If 'paid', argument 't2s'
        is not required. Default is 't2s'.
    verbose : :obj:`bool`, optional
        Whether to print status updates. Default is True.

    Returns
    -------
    combined : (S x T) :obj:`numpy.ndarray`
        Optimally combined data.

    Notes
    -----
    1.  Estimate voxel- and TE-specific weights based on estimated
        :math:`T_2^*`:

            .. math::
                w(T_2^*)_n = \\frac{TE_n * exp(\\frac{-TE}\
                {T_{2(est)}^*})}{\\sum TE_n * exp(\\frac{-TE}{T_{2(est)}^*})}
    2.  Perform weighted average per voxel and TR across TEs based on weights
        estimated in the previous step.
    """
    if data.ndim != 3:
        raise ValueError('Input data must be 3D (S x E x T)')

    if len(tes) != data.shape[1]:
        raise ValueError('Number of echos provided does not match second '
                         'dimension of input data: {0} != '
                         '{1}'.format(len(tes), data.shape[1]))

    if mask.ndim != 1:
        raise ValueError('Mask is not 1D')
    elif mask.shape[0] != data.shape[0]:
        raise ValueError('Mask and data do not have same number of '
                         'voxels/samples: {0} != {1}'.format(
                             mask.shape[0], data.shape[0]))

    if combmode not in ['t2s', 'paid']:
        raise ValueError("Argument 'combmode' must be either 't2s' or 'paid'")
    elif combmode == 't2s' and t2s is None:
        raise ValueError("Argument 't2s' must be supplied if 'combmode' is "
                         "set to 't2s'.")
    elif combmode == 'paid' and t2s is not None:
        LGR.warning("Argument 't2s' is not required if 'combmode' is 'paid'. "
                    "'t2s' array will not be used.")

    data = data[mask, :, :]  # mask out empty voxels/samples
    tes = np.array(tes)[np.newaxis, ...]  # (1 x E) array_like

    if combmode == 'paid':
        LGR.info(
            'Optimally combining data with parallel-acquired inhomogeneity '
            'desensitized (PAID) method')
        combined = _combine_paid(data, tes)
    else:
        if t2s.ndim == 1:
            msg = 'Optimally combining data with voxel-wise T2 estimates'
        else:
            msg = ('Optimally combining data with voxel- and volume-wise T2 '
                   'estimates')
        t2s = t2s[mask, ..., np.newaxis]  # mask out empty voxels/samples

        LGR.info(msg)
        combined = _combine_t2s(data, tes, t2s)

    combined = unmask(combined, mask)
    return combined
Beispiel #38
0
def fit_decay(data, tes, mask, adaptive_mask, fittype):
    """
    Fit voxel-wise monoexponential decay models to `data`

    Parameters
    ----------
    data : (S x E [x T]) array_like
        Multi-echo data array, where `S` is samples, `E` is echos, and `T` is
        time
    tes : (E,) :obj:`list`
        Echo times
    mask : (S,) array_like
        Boolean array indicating samples that are consistently (i.e., across
        time AND echoes) non-zero
    adaptive_mask : (S,) array_like
        Valued array indicating number of echos that have sufficient signal in
        given sample
    fittype : {loglin, curvefit}
        The type of model fit to use

    Returns
    -------
    t2s_limited : (S,) :obj:`numpy.ndarray`
        Limited T2* map. The limited map only keeps the T2* values for data
        where there are at least two echos with good signal.
    s0_limited : (S,) :obj:`numpy.ndarray`
        Limited S0 map.  The limited map only keeps the S0 values for data
        where there are at least two echos with good signal.
    t2s_full : (S,) :obj:`numpy.ndarray`
        Full T2* map. For voxels affected by dropout, with good signal from
        only one echo, the full map uses the T2* estimate from the first two
        echoes.
    s0_full : (S,) :obj:`numpy.ndarray`
        Full S0 map. For voxels affected by dropout, with good signal from
        only one echo, the full map uses the S0 estimate from the first two
        echoes.

    Notes
    -----
    1.  Fit monoexponential decay function to all values for a given voxel
        across TRs, per TE, to estimate voxel-wise :math:`S_0` and
        :math:`T_2^*`:

        .. math::
            S(TE) = S_0 * exp(-R_2^* * TE)

            T_2^* = 1 / R_2^*

    2.  Replace infinite values in :math:`T_2^*` map with 500 and NaN values
        in :math:`S_0` map with 0.
    3.  Generate limited :math:`T_2^*` and :math:`S_0` maps by doing something.
    """
    if data.shape[1] != len(tes):
        raise ValueError(
            'Second dimension of data ({0}) does not match number '
            'of echoes provided (tes; {1})'.format(data.shape[1], len(tes)))
    elif not (data.shape[0] == mask.shape[0] == adaptive_mask.shape[0]):
        raise ValueError('First dimensions (number of samples) of data ({0}), '
                         'mask ({1}), and adaptive_mask ({2}) do not '
                         'match'.format(data.shape[0], mask.shape[0],
                                        adaptive_mask.shape[0]))

    data = data.copy()
    if data.ndim == 2:
        data = data[:, :, None]

    # Mask the inputs
    data_masked = data[mask, :, :]
    adaptive_mask_masked = adaptive_mask[mask]

    if fittype == 'loglin':
        t2s_limited, s0_limited, t2s_full, s0_full = fit_loglinear(
            data_masked, tes, adaptive_mask_masked)
    elif fittype == 'curvefit':
        t2s_limited, s0_limited, t2s_full, s0_full = fit_monoexponential(
            data_masked, tes, adaptive_mask_masked)
    else:
        raise ValueError('Unknown fittype option: {}'.format(fittype))

    t2s_limited[np.isinf(t2s_limited)] = 500.  # why 500?
    # let's get rid of negative values, but keep zeros where limited != full
    t2s_limited[(adaptive_mask_masked > 1) & (t2s_limited <= 0)] = 1.
    t2s_limited = _apply_t2s_floor(t2s_limited, tes)
    s0_limited[np.isnan(s0_limited)] = 0.  # why 0?
    t2s_full[np.isinf(t2s_full)] = 500.  # why 500?
    t2s_full[t2s_full <= 0] = 1.  # let's get rid of negative values!
    t2s_full = _apply_t2s_floor(t2s_full, tes)
    s0_full[np.isnan(s0_full)] = 0.  # why 0?

    t2s_limited = utils.unmask(t2s_limited, mask)
    s0_limited = utils.unmask(s0_limited, mask)
    t2s_full = utils.unmask(t2s_full, mask)
    s0_full = utils.unmask(s0_full, mask)

    return t2s_limited, s0_limited, t2s_full, s0_full
Beispiel #39
0
def fit_loglinear(data_cat, echo_times, adaptive_mask, report=True):
    """Fit monoexponential decay model with log-linear regression.

    The monoexponential decay function is fitted to all values for a given
    voxel across TRs, per TE, to estimate voxel-wise :math:`S_0` and :math:`T_2^*`.
    At a given voxel, only those echoes with "good signal", as indicated by the
    value of the voxel in the adaptive mask, are used.
    Therefore, for a voxel with an adaptive mask value of five, the first five
    echoes would be used to estimate T2* and S0.

    Parameters
    ----------
    data_cat : (S x E x T) :obj:`numpy.ndarray`
        Multi-echo data. S is samples, E is echoes, and T is timepoints.
    echo_times : (E,) array_like
        Echo times in milliseconds.
    adaptive_mask : (S,) :obj:`numpy.ndarray`
        Array where each value indicates the number of echoes with good signal
        for that voxel.
    report : :obj:`bool`, optional
        Whether to log a description of this step or not. Default is True.

    Returns
    -------
    t2s_limited, s0_limited, t2s_full, s0_full: (S,) :obj:`numpy.ndarray`
        T2* and S0 estimate maps.

    Notes
    -----
    The approach used in this function involves transforming the raw signal values
    (:math:`log(|data| + 1)`) and then fitting a line to the transformed data using
    ordinary least squares.
    This results in two parameter estimates: one for the slope  and one for the intercept.
    The slope estimate is inverted (i.e., 1 / slope) to get  :math:`T_2^*`,
    while the intercept estimate is exponentiated (i.e., e^intercept) to get :math:`S_0`.

    This method is faster, but less accurate, than the nonlinear approach.
    """
    if report:
        RepLGR.info(
            "A monoexponential model was fit to the data at each voxel "
            "using log-linear regression in order to estimate T2* and S0 "
            "maps. For each voxel, the value from the adaptive mask was "
            "used to determine which echoes would be used to estimate T2* "
            "and S0.")
    n_samp, n_echos, n_vols = data_cat.shape

    echos_to_run = np.unique(adaptive_mask)
    if 1 in echos_to_run:
        echos_to_run = np.sort(np.unique(np.append(echos_to_run, 2)))
    echos_to_run = echos_to_run[echos_to_run >= 2]

    t2s_asc_maps = np.zeros([n_samp, len(echos_to_run)])
    s0_asc_maps = np.zeros([n_samp, len(echos_to_run)])
    echo_masks = np.zeros([n_samp, len(echos_to_run)], dtype=bool)

    for i_echo, echo_num in enumerate(echos_to_run):
        if echo_num == 2:
            voxel_idx = np.where(adaptive_mask <= echo_num)[0]
        else:
            voxel_idx = np.where(adaptive_mask == echo_num)[0]

        # Create echo masks to assign values to limited vs full maps later
        echo_mask = np.squeeze(echo_masks[..., i_echo])
        echo_mask[adaptive_mask == echo_num] = True
        echo_masks[..., i_echo] = echo_mask

        # perform log linear fit of echo times against MR signal
        # make DV matrix: samples x (time series * echos)
        data_2d = data_cat[voxel_idx, :echo_num, :].reshape(
            len(voxel_idx), -1).T
        log_data = np.log(np.abs(data_2d) + 1)

        # make IV matrix: intercept/TEs x (time series * echos)
        x = np.column_stack(
            [np.ones(echo_num), [-te for te in echo_times[:echo_num]]])
        X = np.repeat(x, n_vols, axis=0)

        # Log-linear fit
        betas = np.linalg.lstsq(X, log_data, rcond=None)[0]
        t2s = 1. / betas[1, :].T
        s0 = np.exp(betas[0, :]).T

        t2s_asc_maps[voxel_idx, i_echo] = t2s
        s0_asc_maps[voxel_idx, i_echo] = s0

    # create limited T2* and S0 maps
    t2s_limited = utils.unmask(t2s_asc_maps[echo_masks], adaptive_mask > 1)
    s0_limited = utils.unmask(s0_asc_maps[echo_masks], adaptive_mask > 1)

    # create full T2* maps with S0 estimation errors
    t2s_full, s0_full = t2s_limited.copy(), s0_limited.copy()
    t2s_full[adaptive_mask == 1] = t2s_asc_maps[adaptive_mask == 1, 0]
    s0_full[adaptive_mask == 1] = s0_asc_maps[adaptive_mask == 1, 0]

    return t2s_limited, s0_limited, t2s_full, s0_full
Beispiel #40
0
def tedana_workflow(data, tes, mask=None, mixm=None, ctab=None, manacc=None,
                    tedort=False, gscontrol=None, tedpca='mle',
                    source_tes=-1, combmode='t2s', verbose=False, stabilize=False,
                    out_dir='.', fixed_seed=42, maxit=500, maxrestart=10,
                    debug=False, quiet=False, png=False, png_cmap='coolwarm'):
    """
    Run the "canonical" TE-Dependent ANAlysis workflow.

    Parameters
    ----------
    data : :obj:`str` or :obj:`list` of :obj:`str`
        Either a single z-concatenated file (single-entry list or str) or a
        list of echo-specific files, in ascending order.
    tes : :obj:`list`
        List of echo times associated with data in milliseconds.
    mask : :obj:`str`, optional
        Binary mask of voxels to include in TE Dependent ANAlysis. Must be
        spatially aligned with `data`. If an explicit mask is not provided,
        then Nilearn's compute_epi_mask function will be used to derive a mask
        from the first echo's data.
    mixm : :obj:`str`, optional
        File containing mixing matrix. If not provided, ME-PCA and ME-ICA are
        done.
    ctab : :obj:`str`, optional
        File containing component table from which to extract pre-computed
        classifications.
    manacc : :obj:`list`, :obj:`str`, or None, optional
        List of manually accepted components. Can be a list of the components,
        a comma-separated string with component numbers, or None. Default is
        None.
    tedort : :obj:`bool`, optional
        Orthogonalize rejected components w.r.t. accepted ones prior to
        denoising. Default is False.
    gscontrol : {None, 't1c', 'gsr'} or :obj:`list`, optional
        Perform additional denoising to remove spatially diffuse noise. Default
        is None.
    tedpca : {'mle', 'kundu', 'kundu-stabilize'}, optional
        Method with which to select components in TEDPCA. Default is 'mle'.
    source_tes : :obj:`int`, optional
        Source TEs for models. 0 for all, -1 for optimal combination.
        Default is -1.
    combmode : {'t2s'}, optional
        Combination scheme for TEs: 't2s' (Posse 1999, default).
    verbose : :obj:`bool`, optional
        Generate intermediate and additional files. Default is False.
    png : obj:'bool', optional
        Generate simple plots and figures. Default is false.
    png_cmap : obj:'str', optional
            Name of a matplotlib colormap to be used when generating figures.
            --png must still be used to request figures. Default is 'coolwarm'
    out_dir : :obj:`str`, optional
        Output directory.

    Other Parameters
    ----------------
    fixed_seed : :obj:`int`, optional
        Value passed to ``mdp.numx_rand.seed()``.
        Set to a positive integer value for reproducible ICA results;
        otherwise, set to -1 for varying results across calls.
    maxit : :obj:`int`, optional
        Maximum number of iterations for ICA. Default is 500.
    maxrestart : :obj:`int`, optional
        Maximum number of attempts for ICA. If ICA fails to converge, the
        fixed seed will be updated and ICA will be run again. If convergence
        is achieved before maxrestart attempts, ICA will finish early.
        Default is 10.
    debug : :obj:`bool`, optional
        Whether to run in debugging mode or not. Default is False.
    quiet : :obj:`bool`, optional
        If True, suppresses logging/printing of messages. Default is False.

    Notes
    -----
    This workflow writes out several files. For a complete list of the files
    generated by this workflow, please visit
    https://tedana.readthedocs.io/en/latest/outputs.html
    """
    out_dir = op.abspath(out_dir)
    if not op.isdir(out_dir):
        os.mkdir(out_dir)

    if debug and not quiet:
        # ensure old logs aren't over-written
        basename = 'tedana_run'
        extension = 'txt'
        logname = op.join(out_dir, (basename + '.' + extension))
        logex = op.join(out_dir, (basename + '*'))
        previouslogs = glob.glob(logex)
        previouslogs.sort(reverse=True)
        for f in previouslogs:
            previousparts = op.splitext(f)
            newname = previousparts[0] + '_old' + previousparts[1]
            os.rename(f, newname)

        # set logging format
        formatter = logging.Formatter(
                    '%(asctime)s\t%(name)-12s\t%(levelname)-8s\t%(message)s',
                    datefmt='%Y-%m-%dT%H:%M:%S')

        # set up logging file and open it for writing
        fh = logging.FileHandler(logname)
        fh.setFormatter(formatter)
        logging.basicConfig(level=logging.DEBUG,
                            handlers=[fh, logging.StreamHandler()])
    elif quiet:
        logging.basicConfig(level=logging.WARNING)
    else:
        logging.basicConfig(level=logging.INFO)

    LGR.info('Using output directory: {}'.format(out_dir))

    # ensure tes are in appropriate format
    tes = [float(te) for te in tes]
    n_echos = len(tes)

    # Coerce gscontrol to list
    if not isinstance(gscontrol, list):
        gscontrol = [gscontrol]

    # coerce data to samples x echos x time array
    if isinstance(data, str):
        data = [data]

    LGR.info('Loading input data: {}'.format([f for f in data]))
    catd, ref_img = io.load_data(data, n_echos=n_echos)
    n_samp, n_echos, n_vols = catd.shape
    LGR.debug('Resulting data shape: {}'.format(catd.shape))

    if mixm is not None and op.isfile(mixm):
        mixm = op.abspath(mixm)
        # Allow users to re-run on same folder
        if mixm != op.join(out_dir, 'meica_mix.1D'):
            shutil.copyfile(mixm, op.join(out_dir, 'meica_mix.1D'))
            shutil.copyfile(mixm, op.join(out_dir, op.basename(mixm)))
    elif mixm is not None:
        raise IOError('Argument "mixm" must be an existing file.')

    if ctab is not None and op.isfile(ctab):
        ctab = op.abspath(ctab)
        # Allow users to re-run on same folder
        if ctab != op.join(out_dir, 'comp_table_ica.txt'):
            shutil.copyfile(ctab, op.join(out_dir, 'comp_table_ica.txt'))
            shutil.copyfile(ctab, op.join(out_dir, op.basename(ctab)))
    elif ctab is not None:
        raise IOError('Argument "ctab" must be an existing file.')

    if isinstance(manacc, str):
        manacc = [int(comp) for comp in manacc.split(',')]

    if ctab and not mixm:
        LGR.warning('Argument "ctab" requires argument "mixm".')
        ctab = None
    elif ctab and (manacc is None):
        LGR.warning('Argument "ctab" requires argument "manacc".')
        ctab = None
    elif manacc is not None and not mixm:
        LGR.warning('Argument "manacc" requires argument "mixm".')
        manacc = None

    if mask is None:
        LGR.info('Computing EPI mask from first echo')
        first_echo_img = io.new_nii_like(ref_img, catd[:, 0, :])
        mask = compute_epi_mask(first_echo_img)
    else:
        # TODO: add affine check
        LGR.info('Using user-defined mask')

    mask, masksum = utils.make_adaptive_mask(catd, mask=mask, getsum=True)
    LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp))
    if verbose:
        io.filewrite(masksum, op.join(out_dir, 'adaptive_mask.nii'), ref_img)

    os.chdir(out_dir)

    LGR.info('Computing T2* map')
    t2s, s0, t2ss, s0s, t2sG, s0G = decay.fit_decay(catd, tes, mask, masksum)

    # set a hard cap for the T2* map
    # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile
    cap_t2s = stats.scoreatpercentile(t2s.flatten(), 99.5,
                                      interpolation_method='lower')
    LGR.debug('Setting cap on T2* map at {:.5f}'.format(cap_t2s * 10))
    t2s[t2s > cap_t2s * 10] = cap_t2s
    io.filewrite(t2s, op.join(out_dir, 't2sv.nii'), ref_img)
    io.filewrite(s0, op.join(out_dir, 's0v.nii'), ref_img)

    if verbose:
        io.filewrite(t2ss, op.join(out_dir, 't2ss.nii'), ref_img)
        io.filewrite(s0s, op.join(out_dir, 's0vs.nii'), ref_img)
        io.filewrite(t2sG, op.join(out_dir, 't2svG.nii'), ref_img)
        io.filewrite(s0G, op.join(out_dir, 's0vG.nii'), ref_img)

    # optimally combine data
    data_oc = combine.make_optcom(catd, tes, mask, t2s=t2sG, combmode=combmode)

    # regress out global signal unless explicitly not desired
    if 'gsr' in gscontrol:
        catd, data_oc = gsc.gscontrol_raw(catd, data_oc, n_echos, ref_img)

    if mixm is None:
        # Identify and remove thermal noise from data
        dd, n_components = decomposition.tedpca(catd, data_oc, combmode, mask,
                                                t2s, t2sG, ref_img,
                                                tes=tes, algorithm=tedpca,
                                                source_tes=source_tes,
                                                kdaw=10., rdaw=1.,
                                                out_dir=out_dir, verbose=verbose)
        mmix_orig = decomposition.tedica(dd, n_components, fixed_seed,
                                         maxit, maxrestart)

        if verbose:
            np.savetxt(op.join(out_dir, '__meica_mix.1D'), mmix_orig)
            if source_tes == -1:
                io.filewrite(utils.unmask(dd, mask),
                             op.join(out_dir, 'ts_OC_whitened.nii'), ref_img)

        LGR.info('Making second component selection guess from ICA results')
        # Estimate betas and compute selection metrics for mixing matrix
        # generated from dimensionally reduced data using full data (i.e., data
        # with thermal noise)
        comptable, metric_maps, betas, mmix = model.dependence_metrics(
                    catd, data_oc, mmix_orig, mask, t2s, tes,
                    ref_img, reindex=True, label='meica_', out_dir=out_dir,
                    algorithm='kundu_v2', verbose=verbose)
        np.savetxt(op.join(out_dir, 'meica_mix.1D'), mmix)

        comptable = model.kundu_metrics(comptable, metric_maps)
        comptable = selection.kundu_selection_v2(comptable, n_echos, n_vols)
    else:
        LGR.info('Using supplied mixing matrix from ICA')
        mmix_orig = np.loadtxt(op.join(out_dir, 'meica_mix.1D'))
        comptable, metric_maps, betas, mmix = model.dependence_metrics(
                    catd, data_oc, mmix_orig, mask, t2s, tes,
                    ref_img, label='meica_', out_dir=out_dir,
                    algorithm='kundu_v2', verbose=verbose)
        if ctab is None:
            comptable = model.kundu_metrics(comptable, metric_maps)
            comptable = selection.kundu_selection_v2(comptable, n_echos, n_vols)
        else:
            comptable = pd.read_csv(ctab, sep='\t', index_col='component')
            comptable = selection.manual_selection(comptable, acc=manacc)

    comptable.to_csv(op.join(out_dir, 'comp_table_ica.txt'), sep='\t',
                     index=True, index_label='component', float_format='%.6f')

    if comptable[comptable.classification == 'accepted'].shape[0] == 0:
        LGR.warning('No BOLD components detected! Please check data and '
                    'results!')

    mmix_orig = mmix.copy()
    if tedort:
        acc_idx = comptable.loc[
            ~comptable.classification.str.contains('rejected')].index.values
        rej_idx = comptable.loc[
            comptable.classification.str.contains('rejected')].index.values
        acc_ts = mmix[:, acc_idx]
        rej_ts = mmix[:, rej_idx]
        betas = np.linalg.lstsq(acc_ts, rej_ts, rcond=None)[0]
        pred_rej_ts = np.dot(acc_ts, betas)
        resid = rej_ts - pred_rej_ts
        mmix[:, rej_idx] = resid
        np.savetxt(op.join(out_dir, 'meica_mix_orth.1D'), mmix)

    io.writeresults(data_oc, mask=mask, comptable=comptable, mmix=mmix,
                    n_vols=n_vols, ref_img=ref_img)

    if 't1c' in gscontrol:
        LGR.info('Performing T1c global signal regression to remove spatially '
                 'diffuse noise')
        gsc.gscontrol_mmix(data_oc, mmix, mask, comptable, ref_img)

    if verbose:
        io.writeresults_echoes(catd, mmix, mask, comptable, ref_img)

    if png:
        LGR.info('Making figures folder with static component maps and '
                 'timecourse plots.')
        # make figure folder first
        if not op.isdir(op.join(out_dir, 'figures')):
            os.mkdir(op.join(out_dir, 'figures'))

        viz.write_comp_figs(data_oc, mask=mask, comptable=comptable,
                            mmix=mmix_orig, ref_img=ref_img,
                            out_dir=op.join(out_dir, 'figures'),
                            png_cmap=png_cmap)

        LGR.info('Making Kappa vs Rho scatter plot')
        viz.write_kappa_scatter(comptable=comptable,
                                out_dir=op.join(out_dir, 'figures'))

        LGR.info('Making overall summary figure')
        viz.write_summary_fig(comptable=comptable,
                              out_dir=op.join(out_dir, 'figures'))

    LGR.info('Workflow completed')
    for handler in logging.root.handlers[:]:
        logging.root.removeHandler(handler)
Beispiel #41
0
def fit_monoexponential(data_cat, echo_times, adaptive_mask, report=True):
    """
    Fit monoexponential decay model with nonlinear curve-fitting.

    Parameters
    ----------
    data_cat : (S x E x T) :obj:`numpy.ndarray`
        Multi-echo data.
    echo_times : (E,) array_like
        Echo times in milliseconds.
    adaptive_mask : (S,) :obj:`numpy.ndarray`
        Array where each value indicates the number of echoes with good signal
        for that voxel.
    report : bool, optional
        Whether to log a description of this step or not. Default is True.

    Returns
    -------
    t2s_limited, s0_limited, t2s_full, s0_full : (S,) :obj:`numpy.ndarray`
        T2* and S0 estimate maps.

    Notes
    -----
    This method is slower, but more accurate, than the log-linear approach.
    """
    if report:
        RepLGR.info(
            "A monoexponential model was fit to the data at each voxel "
            "using nonlinear model fitting in order to estimate T2* and S0 "
            "maps, using T2*/S0 estimates from a log-linear fit as "
            "initial values. For each voxel, the value from the adaptive "
            "mask was used to determine which echoes would be used to "
            "estimate T2* and S0. In cases of model fit failure, T2*/S0 "
            "estimates from the log-linear fit were retained instead.")
    n_samp, n_echos, n_vols = data_cat.shape

    # Currently unused
    # fit_data = np.mean(data_cat, axis=2)
    # fit_sigma = np.std(data_cat, axis=2)

    t2s_limited, s0_limited, t2s_full, s0_full = fit_loglinear(data_cat,
                                                               echo_times,
                                                               adaptive_mask,
                                                               report=False)

    echos_to_run = np.unique(adaptive_mask)
    if 1 in echos_to_run:
        echos_to_run = np.sort(np.unique(np.append(echos_to_run, 2)))
    echos_to_run = echos_to_run[echos_to_run >= 2]

    t2s_asc_maps = np.zeros([n_samp, len(echos_to_run)])
    s0_asc_maps = np.zeros([n_samp, len(echos_to_run)])
    echo_masks = np.zeros([n_samp, len(echos_to_run)], dtype=bool)

    for i_echo, echo_num in enumerate(echos_to_run):
        if echo_num == 2:
            voxel_idx = np.where(adaptive_mask <= echo_num)[0]
        else:
            voxel_idx = np.where(adaptive_mask == echo_num)[0]

        # Create echo masks to assign values to limited vs full maps later
        echo_mask = np.squeeze(echo_masks[..., i_echo])
        echo_mask[adaptive_mask == echo_num] = True
        echo_masks[..., i_echo] = echo_mask

        data_2d = data_cat[:, :echo_num, :].reshape(len(data_cat), -1).T
        echo_times_1d = np.repeat(echo_times[:echo_num], n_vols)

        # perform a monoexponential fit of echo times against MR signal
        # using loglin estimates as initial starting points for fit
        fail_count = 0
        for voxel in voxel_idx:
            try:
                popt, cov = scipy.optimize.curve_fit(
                    monoexponential,
                    echo_times_1d,
                    data_2d[:, voxel],
                    p0=(s0_full[voxel], t2s_full[voxel]),
                    bounds=((np.min(data_2d[:, voxel]), 0), (np.inf, np.inf)))
                s0_full[voxel] = popt[0]
                t2s_full[voxel] = popt[1]
            except (RuntimeError, ValueError):
                # If curve_fit fails to converge, fall back to loglinear estimate
                fail_count += 1

        if fail_count:
            fail_percent = 100 * fail_count / len(voxel_idx)
            LGR.debug('With {0} echoes, monoexponential fit failed on {1}/{2} '
                      '({3:.2f}%) voxel(s), used log linear estimate '
                      'instead'.format(echo_num, fail_count, len(voxel_idx),
                                       fail_percent))

        t2s_asc_maps[:, i_echo] = t2s_full
        s0_asc_maps[:, i_echo] = s0_full

    # create limited T2* and S0 maps
    t2s_limited = utils.unmask(t2s_asc_maps[echo_masks], adaptive_mask > 1)
    s0_limited = utils.unmask(s0_asc_maps[echo_masks], adaptive_mask > 1)

    # create full T2* maps with S0 estimation errors
    t2s_full, s0_full = t2s_limited.copy(), s0_limited.copy()
    t2s_full[adaptive_mask == 1] = t2s_asc_maps[adaptive_mask == 1, 0]
    s0_full[adaptive_mask == 1] = s0_asc_maps[adaptive_mask == 1, 0]

    return t2s_limited, s0_limited, t2s_full, s0_full
Beispiel #42
0
def gscontrol_raw(catd, optcom, n_echos, ref_img, dtrank=4):
    """
    Removes global signal from individual echo `catd` and `optcom` time series

    This function uses the spatial global signal estimation approach to
    to removal global signal out of individual echo time series datasets. The
    spatial global signal is estimated from the optimally combined data after
    detrending with a Legendre polynomial basis of `order = 0` and
    `degree = dtrank`.

    Parameters
    ----------
    catd : (S x E x T) array_like
        Input functional data
    optcom : (S x T) array_like
        Optimally-combined functional data (i.e., the output of `make_optcom`)
    n_echos : int
        Number of echos in data. Should be the same as `E` dimension of `catd`
    ref_img : str or img_like
        Reference image to dictate how outputs are saved to disk
    dtrank : int, optional
        Specfies degree of Legendre polynomial basis function for estimating
        spatial global signal. Default: 4

    Returns
    -------
    dm_catd : (S x E x T) array_like
        Input `catd` with global signal removed from time series
    dm_optcom : (S x T) array_like
        Input `optcom` with global signal removed from time series
    """

    LGR.info('Applying amplitude-based T1 equilibration correction')

    # Legendre polynomial basis for denoising
    bounds = np.linspace(-1, 1, optcom.shape[-1])
    Lmix = np.column_stack([lpmv(0, vv, bounds) for vv in range(dtrank)])

    # compute mean, std, mask local to this function
    # inefficient, but makes this function a bit more modular
    Gmu = optcom.mean(axis=-1)  # temporal mean
    Gmask = Gmu != 0

    # find spatial global signal
    dat = optcom[Gmask] - Gmu[Gmask][:, np.newaxis]
    sol = np.linalg.lstsq(Lmix, dat.T, rcond=None)[0]  # Legendre basis for detrending
    detr = dat - np.dot(sol.T, Lmix.T)[0]
    sphis = (detr).min(axis=1)
    sphis -= sphis.mean()
    utils.filewrite(utils.unmask(sphis, Gmask), 'T1gs', ref_img)

    # find time course ofc the spatial global signal
    # make basis with the Legendre basis
    glsig = np.linalg.lstsq(np.atleast_2d(sphis).T, dat, rcond=None)[0]
    glsig = stats.zscore(glsig, axis=None)
    np.savetxt('glsig.1D', glsig)
    glbase = np.hstack([Lmix, glsig.T])

    # Project global signal out of optimally combined data
    sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T, rcond=None)[0]
    tsoc_nogs = dat - np.dot(np.atleast_2d(sol[dtrank]).T,
                             np.atleast_2d(glbase.T[dtrank])) + Gmu[Gmask][:, np.newaxis]

    utils.filewrite(optcom, 'tsoc_orig', ref_img)
    dm_optcom = utils.unmask(tsoc_nogs, Gmask)
    utils.filewrite(dm_optcom, 'tsoc_nogs', ref_img)

    # Project glbase out of each echo
    dm_catd = catd.copy()  # don't overwrite catd
    for echo in range(n_echos):
        dat = dm_catd[:, echo, :][Gmask]
        sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T, rcond=None)[0]
        e_nogs = dat - np.dot(np.atleast_2d(sol[dtrank]).T,
                              np.atleast_2d(glbase.T[dtrank]))
        dm_catd[:, echo, :] = utils.unmask(e_nogs, Gmask)

    return dm_catd, dm_optcom