Exemple #1
0
    def __init__(self,
                 inference='rfx',
                 mi_method='gc',
                 kernel=None,
                 verbose=None):
        """Init."""
        WfBase.__init__(self)
        assert inference in [
            'ffx', 'rfx'
        ], ("'inference' input parameter should either be 'ffx' or 'rfx'")
        assert mi_method in [
            'gc', 'bin'
        ], ("'mi_method' input parameter should either be 'gc' or 'bin'")
        self._mi_type = 'cc'
        self._inference = inference
        self._mi_method = mi_method
        self._need_copnorm = mi_method == 'gc'
        self._gcrn = inference == 'rfx'
        self._kernel = kernel
        set_log_level(verbose)
        self.clean()
        self._wf_stats = WfStats(verbose=verbose)
        # update internal config
        self.update_cfg(mi_type=self._mi_type,
                        inference=inference,
                        mi_method=mi_method,
                        kernel=kernel)

        logger.info(f"Workflow for computing connectivity ({self._mi_type} - "
                    f"{mi_method})")
Exemple #2
0
    def __init__(self, inference='rfx', estimator=None, kernel=None,
                 verbose=None):
        """Init."""
        WfBase.__init__(self)
        assert inference in ['ffx', 'rfx'], (
            "'inference' input parameter should either be 'ffx' or 'rfx'")
        self._mi_type = 'cc'
        if estimator is None:
            estimator = GCMIEstimator(mi_type='cc', copnorm=False,
                                      verbose=verbose)
        assert estimator.settings['mi_type'] == self._mi_type
        self._copnorm = isinstance(estimator, GCMIEstimator)
        self._inference = inference
        self.estimator = estimator
        self._gcrn = inference == 'rfx'
        self._kernel = kernel
        set_log_level(verbose)
        self.clean()
        self._wf_stats = WfStats(verbose=verbose)
        # update internal config
        self.attrs.update(dict(mi_type=self._mi_type, inference=inference,
                               kernel=kernel))

        logger.info(f"Workflow for computing comodulations between distant "
                    f"brain areas ({inference})")
Exemple #3
0
    def __init__(self, mi_type='cc', verbose=None):
        """Init."""
        set_log_level(verbose)
        desc = CONFIG['MI_REPR'][mi_type]
        settings = {'description': desc}
        self.settings = Attributes(attrs=settings, section_name='Settings')
        self._kwargs = dict()
        assert hasattr(self, 'name')

        logger.info(f"{self.name} ({mi_type})")
Exemple #4
0
    def get_connectivity_pairs(self,
                               nb_min_suj=None,
                               directed=False,
                               verbose=None):
        """Get the connectivity pairs for this dataset.

        This method can be used to get the possible connectivity pairs i.e
        (sources, targets) for directed connectivity (or not). In addition,
        some pairs are going to be ignored because of a number of subjects to
        low.

        Parameters
        ----------
        nb_min_suj : int | None
            Minimum number of shared subjects between two pairs
        directed : bool | False
            Get either directed (True) or non-directed (False) pairs

        Returns
        -------
        sources : array_like
            Indices of the source
        targets : array_like
            Indices of the target
        """
        set_log_level(verbose)
        assert self._groupedby == 'roi', (
            "To get connectivity pairs, the dataset should already be grouped "
            "by roi")
        bad = []
        # get all possible pairs
        if directed:
            pairs = np.where(~np.eye(self.n_roi, dtype=bool))
        else:
            pairs = np.triu_indices(self.n_roi, k=1)
        # remove pairs where there's not enough subjects
        if isinstance(nb_min_suj, int):
            s_new, t_new = [], []
            for s, t in zip(pairs[0], pairs[1]):
                suj_s, suj_t = self.suj_roi_u[s], self.suj_roi_u[t]
                if len(np.intersect1d(suj_s, suj_t)) >= nb_min_suj:
                    s_new += [s]
                    t_new += [t]
                else:
                    bad += [f"{self.roi_names[s]}-{self.roi_names[t]}"]
            if len(bad):
                logger.warning("The following connectivity pairs are going to "
                               "be ignored because the number of subjects is "
                               f"bellow {nb_min_suj} : {bad}")
            pairs = (np.asarray(s_new), np.asarray(t_new))
        logger.info(f"    {len(pairs[0])} remaining connectivity pairs / "
                    f"{len(bad)} pairs have been ignored "
                    f"(nb_min_suj={nb_min_suj})")

        return pairs[0], pairs[1]
Exemple #5
0
def conn_io(da, trials=None, roi=None, times=None, verbose=None):
    """I/O conversion for connectivity functions.

    Parameters
    ----------
    da : array_like
        Array of electrophysiological data of shape (n_trials, n_roi, n_times)
    roi : array_like | None
        List of roi names or string corresponding to the dimension name in a
        DataArray
    times : array_like | None
        Time vector or string corresponding to the dimension name in a
        DataArray
    """
    set_log_level(verbose)
    assert isinstance(da, np.ndarray) or isinstance(da, xr.DataArray)
    assert da.ndim == 3
    n_trials, n_roi, n_times = da.shape
    attrs = dict(n_trials=n_trials, n_roi=n_roi, n_times=n_times)
    logger.info(f"Inputs conversion (n_trials={n_trials}, n_roi={n_roi}, "
                f"n_times={n_times})")

    # _______________________________ Xarray case _____________________________
    if isinstance(da, xr.DataArray):
        # force using
        if trials is None:
            trials = da.dims[0]
        # get trials, roi and times
        if isinstance(trials, str):
            trials = da[trials].data
        if isinstance(roi, str):
            roi = da[roi].data
        if isinstance(times, str):
            times = da[times].data
        attrs = {**attrs, **da.attrs}
        da = da.data

    # _____________________________ Empty inputs ______________________________
    if roi is None:
        roi = [f"roi_{k}" for k in range(n_roi)]
    if times is None:
        times = np.arange(n_times)
    if trials is None:
        trials = np.arange(n_trials)

    # _______________________________ Final check _____________________________
    assert isinstance(da, np.ndarray)
    assert da.shape == (len(trials), len(roi), len(times))

    return da, trials, roi, times, attrs
Exemple #6
0
def gccmi_1d_ccc(x, y, z, verbose=None):
    """Gaussian-Copula CMI between three continuous variables.

    I = gccmi_1d_ccc(x,y,z) returns the CMI between two (possibly
    multidimensional) continuous variables, x and y, conditioned on a third, z,
    estimated via a Gaussian copula.

    Parameters
    ----------
    x, y, z : array_like
        Continuous arrays of shape (n_epochs,) or (n_dimensions, n_epochs).

    Returns
    -------
    i : float
        Information shared by x and y conditioned by z (in bits)
    """
    set_log_level(verbose)
    x, y, z = np.atleast_2d(x), np.atleast_2d(y), np.atleast_2d(z)
    if x.ndim > 2 or y.ndim > 2 or z.ndim > 2:
        raise ValueError("x, y and z must be at most 2d")

    nvarx, ntrl = x.shape
    nvary = y.shape[0]
    nvarz = z.shape[0]

    if y.shape[1] != ntrl or z.shape[1] != ntrl:
        raise ValueError("number of trials do not match")

    # check for repeated values
    for xi in range(nvarx):
        if (np.unique(x[xi, :]).size / float(ntrl)) < 0.9:
            logger.info("Input x has more than 10% repeated values")
            break
    for yi in range(nvary):
        if (np.unique(y[yi, :]).size / float(ntrl)) < 0.9:
            logger.info("Input y has more than 10% repeated values")
            break
    for zi in range(nvarz):
        if (np.unique(z[zi, :]).size / float(ntrl)) < 0.9:
            logger.info("Input y has more than 10% repeated values")
            break

    # copula normalization
    cx = copnorm_nd(x, axis=1)
    cy = copnorm_nd(y, axis=1)
    cz = copnorm_nd(z, axis=1)
    # parametric Gaussian CMI
    return cmi_1d_ggg(cx, cy, cz, True, True)
Exemple #7
0
    def savgol_filter(self, h_freq, edges=None, verbose=None):
        """Filter the data using Savitzky-Golay polynomial method.

        This method is an adaptation of the mne-python one. Note that this
        smoothing operation is performed inplace to avoid data copy.

        Parameters
        ----------
        h_freq : float
            Approximate high cut-off frequency in Hz. Note that this is not an
            exact cutoff, since Savitzky-Golay filtering is done using
            polynomial fits instead of FIR/IIR filtering. This parameter is
            thus used to determine the length of the window over which a
            5th-order polynomial smoothing is used.
        edges : int, float | None
            Edge compensation. Use either an integer to drop a specific number
            of time points (e.g edges=100 remove 100 time points at the
            begining and at the end) or a float to drop a period (e.g
            edges=0.2 drop 200ms at the begining and at the end)

        Returns
        -------
        inst : instance of DatasetEphy
            The object with the filtering applied.

        Notes
        -----
        For Savitzky-Golay low-pass approximation, see:
            https://gist.github.com/larsoner/bbac101d50176611136b
        """
        set_log_level(verbose)

        # perform smoothing
        for n_s in range(len(self._x)):
            self._x[n_s] = savgol_filter(self._x[n_s],
                                         h_freq,
                                         axis='times',
                                         sfreq=self.attrs['sfreq'],
                                         verbose=verbose)
        # edge effect compensation
        if isinstance(edges, CONFIG['FLOAT_DTYPE']):
            t = self._times
            self.sel(times=slice(t[0] + edges, t[-1] - edges))
        elif isinstance(edges, CONFIG['INT_DTYPE']):
            self.isel(times=slice(edges, -edges))

        return self
Exemple #8
0
def set_config(key, value, verbose=None):
    """Change the global config of frites.

    Parameters
    ----------
    key : string
        Entry of the config
    value : dict / list
        The new value for the selected key. The type should be the same as the
        default one
    """
    io.set_log_level(verbose)
    assert isinstance(key, str)
    CONFIG = get_config()  # noqa
    assert key in CONFIG.keys(), f"The key {key} doesn't exist."
    CONFIG[key] = value
    logger.info(f"The key {key} has been updated")
Exemple #9
0
def gcmi_model_1d_cd(x, y, verbose=None):
    """Gaussian-Copula MI between a continuous and a discrete variable.

    This method is based on ANOVA style model comparison.
    I = gcmi_model_cd(x,y,Ym) returns the MI between the (possibly
    multidimensional) continuous variable x and the discrete variable y.

    Parameters
    ----------
    x, y : array_like
        Continuous arrays of shape (n_epochs,) or (n_dimensions, n_epochs). y
        must be an array of integers

    Returns
    -------
    i : float
        Information shared by x and y (in bits)
    """
    set_log_level(verbose)
    x, y = np.atleast_2d(x), np.squeeze(y)
    if x.ndim > 2:
        raise ValueError("x must be at most 2d")
    if y.ndim > 1:
        raise ValueError("only univariate discrete variables supported")
    if not np.issubdtype(y.dtype, np.integer):
        raise ValueError("y should be an integer array")

    nvarx, ntrl = x.shape

    if y.size != ntrl:
        raise ValueError("number of trials do not match")

    # check for repeated values
    for xi in range(nvarx):
        if (np.unique(x[xi, :]).size / float(ntrl)) < 0.9:
            logger.info("Input x has more than 10% repeated values")
            break

    # copula normalization
    cx = copnorm_nd(x, axis=1)
    # parametric Gaussian MI
    return mi_model_1d_gd(cx, y, True, True)
Exemple #10
0
    def savgol_filter(self, h_freq, verbose=None):
        """Filter the data using Savitzky-Golay polynomial method.

        This method is an adaptation of the mne-python one.

        Parameters
        ----------
        h_freq : float
            Approximate high cut-off frequency in Hz. Note that this is not an
            exact cutoff, since Savitzky-Golay filtering is done using
            polynomial fits instead of FIR/IIR filtering. This parameter is
            thus used to determine the length of the window over which a
            5th-order polynomial smoothing is used.

        Returns
        -------
        inst : instance of DatasetEphy
            The object with the filtering applied.

        Notes
        -----
        For Savitzky-Golay low-pass approximation, see:
            https://gist.github.com/larsoner/bbac101d50176611136b
        """
        set_log_level(verbose)
        assert self._groupedby is "subject", ("Slicing only work when data is "
                                              "grouped by 'subjects'")

        from scipy.signal import savgol_filter
        h_freq = float(h_freq)
        if h_freq >= self.sfreq / 2.:
            raise ValueError('h_freq must be less than half the sample rate')

        # savitzky-golay filtering
        window_length = (int(np.round(self.sfreq / h_freq)) // 2) * 2 + 1
        logger.info(f'    Using savgol length {window_length}')
        for k in range(len(self._x)):
            self._x[k] = savgol_filter(self._x[k],
                                       axis=2,
                                       polyorder=5,
                                       window_length=window_length)
        return self
Exemple #11
0
def conn_power_corr(data,
                    freqs=None,
                    roi=None,
                    times=None,
                    pairs=None,
                    sfreq=None,
                    foi=None,
                    sm_times=.5,
                    sm_freqs=1,
                    sm_kernel='hanning',
                    mode='morlet',
                    n_cycles=7.,
                    mt_bandwidth=None,
                    decim=1,
                    kw_cwt={},
                    kw_mt={},
                    block_size=None,
                    n_jobs=-1,
                    verbose=None,
                    dtype=np.float32):
    """Wavelet-based single-trial time-resolved spectral connectivity.
    Parameters
    ----------
    data : array_like
        Electrophysiological data. Several input types are supported :
            * Standard NumPy arrays of shape (n_epochs, n_roi, n_times)
            * mne.Epochs
            * xarray.DataArray of shape (n_epochs, n_roi, n_times)
    metric : str | "coh"
        Which connectivity metric. Use either :
            * 'coh' : Coherence
            * 'plv' : Phase-Locking Value (PLV)
            * 'sxy' : Cross-spectrum
        By default, the coherenc is used.
    freqs : array_like
        Array of central frequencies of shape (n_freqs,).
    roi : array_like | None
        ROI names of a single subject. If the input is an xarray, the
        name of the ROI dimension can be provided
    times : array_like | None
        Time vector array of shape (n_times,). If the input is an xarray, the
        name of the time dimension can be provided
    pairs : array_like | None
        Pairs of links of shape (n_pairs, 2) to compute. If None, all pairs are
        computed
    sfreq : float | None
        Sampling frequency
    foi : array_like | None
        Extract frequencies of interest. This parameters should be an array of
        shapes (n_foi, 2) defining where each band of interest start and
        finish.
    sm_times : float | .5
        Number of points to consider for the temporal smoothing in seconds. By
        default, a 500ms smoothing is used.
    sm_freqs : int | 1
        Number of points for frequency smoothing. By default, 1 is used which
        is equivalent to no smoothing
    kernel : {'square', 'hanning'}
        Kernel type to use. Choose either 'square' or 'hanning'
    mode : {'morlet', 'multitaper'}
        Spectrum estimation mode can be either: 'multitaper' or 'morlet'.
    n_cycles : array_like | 7.
        Number of cycles to use for each frequency. If a float or an integer is
        used, the same number of cycles is going to be used for all frequencies
    mt_bandwidth : array_like | None
        The bandwidth of the multitaper windowing function in Hz. Only used in
        'multitaper' mode.
    decim : int | 1
        To reduce memory usage, decimation factor after time-frequency
        decomposition. default 1 If int, returns tfr[…, ::decim]. If slice,
        returns tfr[…, decim].
    kw_cwt : dict | {}
        Additional arguments sent to the mne-function
        :py:`mne.time_frequency.tfr_array_morlet`
    kw_mt : dict | {}
        Additional arguments sent to the mne-function
        :py:`mne.time_frequency.tfr_array_multitaper`
    block_size : int | None
        Number of blocks of trials to process at once. This parameter can be
        use in order to decrease memory load. If None, all trials are used. If
        for example block_size=2, the number of trials are subdivided into two
        groups and each group is process one after the other.
    n_jobs : int | 1
        Number of jobs to use for parallel computing (use -1 to use all
        jobs). The parallel loop is set at the pair level.
    Returns
    -------
    conn : xarray.DataArray
        DataArray of shape (n_trials, n_pairs, n_freqs, n_times)
    """
    set_log_level(verbose)

    if isinstance(sm_times, np.ndarray):
        raise NotImplementedError("Frequency dependent kernel in development"
                                  f"only first {sm_times[0]} will be used")

    # _________________________________ METHODS _______________________________
    conn_f, f_name = {'pec': (_pec, "Power correlation")}['pec']

    # _________________________________ INPUTS ________________________________
    # inputs conversion
    data, cfg = conn_io(
        data,
        times=times,
        roi=roi,
        agg_ch=False,
        win_sample=None,
        pairs=pairs,
        sort=True,
        block_size=block_size,
        sfreq=sfreq,
        freqs=freqs,
        foi=foi,
        sm_times=sm_times,
        sm_freqs=sm_freqs,
        verbose=verbose,
        name=f'Sepctral connectivity (metric = {f_name}, mode={mode})',
    )

    # extract variables
    x, trials, attrs = data.data, data['y'].data, cfg['attrs']
    times, n_trials = data['times'].data, len(trials)
    x_s, x_t, roi_p = cfg['x_s'], cfg['x_t'], cfg['roi_p']
    indices, sfreq = cfg['blocks'], cfg['sfreq']
    freqs, _, foi_idx = cfg['freqs'], cfg['need_foi'], cfg['foi_idx']
    f_vec, sm_times, sm_freqs = cfg['f_vec'], cfg['sm_times'], cfg['sm_freqs']
    n_pairs, n_freqs = len(x_s), len(freqs)

    # temporal decimation
    if isinstance(decim, int):
        times = times[::decim]
        sm_times = int(np.round(sm_times / decim))
        sm_times = max(sm_times, 1)

    # Create smoothing kernel
    kernel = _create_kernel(sm_times, sm_freqs, kernel=sm_kernel)

    # define arguments for parallel computing
    mesg = f'Estimating pairwise {f_name} for trials %s'
    kw_para = dict(n_jobs=n_jobs, verbose=verbose, total=n_pairs)

    # show info
    logger.info(f"Computing pairwise {f_name} (n_pairs={n_pairs}, "
                f"n_freqs={n_freqs}, decim={decim}, sm_times={sm_times}, "
                f"sm_freqs={sm_freqs})")

    # ______________________ CONTAINER FOR CONNECTIVITY _______________________
    # compute coherence on blocks of trials
    conn = np.zeros((n_trials, n_pairs, len(f_vec), len(times)), dtype=dtype)
    for tr in indices:
        # --------------------------- TIME-FREQUENCY --------------------------
        # time-frequency decomposition
        w = _tf_decomp(x[tr, ...],
                       sfreq,
                       freqs,
                       n_cycles=n_cycles,
                       decim=decim,
                       mode=mode,
                       mt_bandwidth=mt_bandwidth,
                       kw_cwt=kw_cwt,
                       kw_mt=kw_mt,
                       n_jobs=n_jobs)

        # ----------------------------- CONN TRIALS ---------------------------
        # give indication about computed trials
        kw_para['mesg'] = mesg % f"{tr[0]}...{tr[-1]}"

        # computes conn across trials
        conn_tr = conn_f(w, kernel, foi_idx, x_s, x_t, kw_para)

        # merge results
        conn[tr, ...] = np.stack(conn_tr, axis=1)

        # Call GC
        del conn_tr, w

    # _________________________________ OUTPUTS _______________________________
    # configuration
    cfg = dict(sfreq=sfreq,
               sm_times=sm_times,
               sm_freqs=sm_freqs,
               sm_kernel=sm_kernel,
               mode=mode,
               n_cycles=n_cycles,
               mt_bandwidth=mt_bandwidth,
               decim=decim,
               type=metric)

    # conversion
    conn = xr.DataArray(conn,
                        dims=('trials', 'roi', 'freqs', 'times'),
                        name=metric,
                        coords=(trials, roi_p, f_vec, times),
                        attrs=check_attrs({
                            **attrs,
                            **cfg
                        }))
    return conn
Exemple #12
0
def gccmi_1d_ccd(x, y, z, verbose=None):
    """GCCMI between 2 continuous variables conditioned on a discrete variable.

    I = gccmi_ccd(x,y,z,Zm) returns the CMI between two (possibly
    multidimensional) continuous variables, x and y, conditioned on a third
    discrete variable z, estimated via a Gaussian copula.

    Parameters
    ----------
    x, y : array_like
        Continuous arrays of shape (n_epochs,) or (n_dimensions, n_epochs).
    z : array_like
        Discret array of shape (n_epochs,)

    Returns
    -------
    cmi : float
        Conditional Mutual Information shared by x and y conditioned by z
        (in bits)
    """
    set_log_level(verbose)
    x = np.atleast_2d(x)
    y = np.atleast_2d(y)
    if x.ndim > 2 or y.ndim > 2:
        raise ValueError("x and y must be at most 2d")
    if z.ndim > 1:
        raise ValueError("only univariate discrete variables supported")
    if not np.issubdtype(z.dtype, np.integer):
        raise ValueError("z should be an integer array")

    nvarx, ntrl = x.shape
    nvary = y.shape[0]
    zm = np.unique(z)

    if y.shape[1] != ntrl or z.size != ntrl:
        raise ValueError("number of trials do not match")

    # check for repeated values
    for xi in range(nvarx):
        if (np.unique(x[xi, :]).size / float(ntrl)) < 0.9:
            logger.info("Input x has more than 10% repeated values")
            break
    for yi in range(nvary):
        if (np.unique(y[yi, :]).size / float(ntrl)) < 0.9:
            logger.info("Input y has more than 10% repeated values")
            break

    # calculate gcmi for each z value
    icond = np.zeros(len(zm))
    pz = np.zeros(len(zm))
    cx = []
    cy = []
    for zi in zm:
        idx = z == zi
        thsx = copnorm_nd(x[:, idx], axis=1)
        thsy = copnorm_nd(y[:, idx], axis=1)
        pz[zi] = idx.sum()
        cx.append(thsx)
        cy.append(thsy)
        icond[zi] = mi_1d_gg(thsx, thsy, True, True)

    pz = pz / float(ntrl)

    # conditional mutual information
    cmi = np.sum(pz * icond)
    i = mi_1d_gg(np.hstack(cx), np.hstack(cy), True, False)
    return (cmi, i)
Exemple #13
0
from frites import (
    io,
    core,
    conn,
    stats,
    utils,
    workflow,
    simulations,  # noqa
    estimator)

__version__ = "0.3.8"

# -----------------------------------------------------------------------------
# Set 'info' as the default logging level
logger = logging.getLogger('frites')
io.set_log_level('info')

# -----------------------------------------------------------------------------
# get / set config


def get_config():
    """Get the global configuration of frites."""
    from frites.config import CONFIG
    return CONFIG


def set_config(key, value, verbose=None):
    """Change the global config of frites.

    Parameters
Exemple #14
0
def conn_dfc(data,
             win_sample=None,
             times=None,
             roi=None,
             n_jobs=1,
             gcrn=True,
             verbose=None):
    """Single trial Dynamic Functional Connectivity.

    This function computes the Dynamic Functional Connectivity (DFC) using the
    Gaussian Copula Mutual Information (GCMI). The DFC is computed across time
    points for each trial. Note that the DFC can either be computed on windows
    manually defined or on sliding windows.

    Parameters
    ----------
    data : array_like
        Electrophysiological data. Several input types are supported :

            * Standard NumPy arrays of shape (n_epochs, n_roi, n_times)
            * mne.Epochs
            * xarray.DataArray of shape (n_epochs, n_roi, n_times)

    win_sample : array_like | None
        Array of shape (n_windows, 2) describing where each window start and
        finish. You can use the function :func:`frites.conn.define_windows`
        to define either manually either sliding windows. If None, the entire
        time window is used instead.
    times : array_like | None
        Time vector array of shape (n_times,). If the input is an xarray, the
        name of the time dimension can be provided
    roi : array_like | None
        ROI names of a single subject. If the input is an xarray, the
        name of the ROI dimension can be provided
    n_jobs : int | 1
        Number of jobs to use for parallel computing (use -1 to use all
        jobs). The parallel loop is set at the pair level.
    gcrn : bool | True
        Specify if the Gaussian Copula Rank Normalization should be applied.
        If the data are normalized (e.g z-score) this parameter can be set to
        False because the data can be considered as gaussian over time.

    Returns
    -------
    dfc : array_like
        The DFC array of shape (n_epochs, n_pairs, n_windows)

    See also
    --------
    define_windows, conn_covgc
    """
    set_log_level(verbose)
    # -------------------------------------------------------------------------
    # inputs conversion and data checking
    set_log_level(verbose)
    if isinstance(data, xr.DataArray):
        trials, attrs = data[data.dims[0]].data, data.attrs
    else:
        trials, attrs = np.arange(data.shape[0]), {}
    # internal conversion
    data = SubjectEphy(data, y=trials, roi=roi, times=times)
    x, roi, times = data.data, data['roi'].data, data['times'].data
    trials = data['y'].data
    n_trials = len(trials)
    # deal with the win_sample array
    if win_sample is None:
        win_sample = np.array([[0, len(times) - 1]])
    assert isinstance(win_sample, np.ndarray) and (win_sample.ndim == 2)
    assert win_sample.dtype in CONFIG['INT_DTYPE']
    n_win = win_sample.shape[0]

    # -------------------------------------------------------------------------
    # find group of brain regions
    gp = pd.DataFrame({'roi': roi}).groupby('roi').groups
    roi_gp, roi_idx = list(gp.keys()), list(gp.values())
    n_roi = len(roi_gp)
    x_s, x_t = np.triu_indices(n_roi, k=1)
    n_pairs = len(x_s)
    pairs = np.c_[x_s, x_t]
    roi_p = [f"{roi_gp[s]}-{roi_gp[t]}" for s, t in zip(x_s, x_t)]

    # -------------------------------------------------------------------------
    # prepare outputs and elements
    n_jobs = 1 if n_win == 1 else n_jobs
    parallel, p_fun = parallel_func(_conn_dfc,
                                    n_jobs=n_jobs,
                                    verbose=verbose,
                                    total=n_win,
                                    mesg='Estimating DFC')

    logger.info(f'Computing DFC between {n_pairs} pairs (gcrn={gcrn})')
    dfc = np.zeros((n_trials, n_pairs, n_win), dtype=np.float64)

    # -------------------------------------------------------------------------
    # compute distance correlation

    dfc = parallel(
        p_fun(x[:, :, w[0]:w[1]], x_s, x_t, roi_idx, gcrn) for w in win_sample)
    dfc = np.stack(dfc, 2)

    # -------------------------------------------------------------------------
    # dataarray conversion
    win_times = times[win_sample]
    dfc = xr.DataArray(dfc,
                       dims=('trials', 'roi', 'times'),
                       name='dfc',
                       coords=(trials, roi_p, win_times.mean(1)))
    # add the windows used in the attributes
    cfg = dict(win_sample=np.r_[tuple(win_sample)],
               win_times=np.r_[tuple(win_times)],
               type='dfc')
    dfc.attrs = {**cfg, **attrs}

    return dfc
Exemple #15
0
    def __init__(self,
                 x,
                 y=None,
                 roi=None,
                 z=None,
                 times=None,
                 nb_min_suj=None,
                 sub_roi=None,
                 verbose=None):
        """Init."""
        set_log_level(verbose)
        # ---------------------------------------------------------------------
        # conversion of the electrophysiological data
        # ---------------------------------------------------------------------
        logger.info('Definition of an electrophysiological dataset')
        x, y, z, roi, times, sub_roi = ds_ephy_io(x,
                                                  roi=roi,
                                                  y=y,
                                                  z=z,
                                                  times=times,
                                                  sub_roi=sub_roi,
                                                  verbose=verbose)
        if y is None:
            logger.debug("Fill the y input because otherwise everything fails")
            y = [np.zeros((x[k].shape[0])) for k in range(len(x))]

        # ---------------------------------------------------------------------
        # check the types of y (and z)
        # ---------------------------------------------------------------------
        self._y_dtype = self._infer_dtypes(y, 'y')
        self._z_dtype = self._infer_dtypes(z, 'z')
        self._sub_roi_dtype = self._infer_dtypes(sub_roi, 'sub_roi')
        if (self._y_dtype == 'float') and (self._z_dtype == 'none'):
            if self._sub_roi_dtype == 'int':
                self._mi_type = 'ccd'
                _mi_st = 'Regr (continuous) | sub_roi'
            else:
                self._mi_type = 'cc'
                _mi_st = 'Regr (continuous)'
        elif (self._y_dtype == 'int') and (self._z_dtype == 'none'):
            self._mi_type = 'cd'
            if self._sub_roi_dtype == 'int':
                _mi_st = '[Regr (discret), sub_roi]'
            else:
                _mi_st = 'Regr (discret)'
        elif (self._y_dtype == 'float') and (self._z_dtype == 'int'):
            self._mi_type = 'ccd'
            if self._sub_roi_dtype == 'int':
                _mi_st = 'Regr (continuous) | [Regr (discret), sub_roi]'
            else:
                _mi_st = 'Regr (continuous) | Regr (discret)'
        else:
            raise TypeError(f"Types of y ({self._y_dtype}) and z ("
                            f"{self._z_dtype}) doesn't allow to then compute "
                            "mi on it")
        logger.debug(f"y.dtype={self._y_dtype}; z.dtype={self._z_dtype}); "
                     f"sub_roi.dtype={self._sub_roi_dtype}")
        logger.info(f"    Supported MI definition ({self._mi_type}) : "
                    f"I(ephy; {_mi_st})")
        # (optionnal) multi-conditions conversion
        if self._y_dtype == 'int':
            y = self._multicond_conversion(y, 'y', verbose)
        if self._z_dtype == 'int':
            z = self._multicond_conversion(z, 'z', verbose)

        # ---------------------------------------------------------------------
        # 4d conversion
        # ---------------------------------------------------------------------
        self._reshape = None
        if all([k.ndim == 4 for k in x]):
            logger.debug(f"    4d reshaping")
            for k in range(len(x)):
                n_e, n_r, n_f, n_t = x[k].shape
                x[k] = x[k].reshape(n_e, n_r, n_f * n_t)
            self._reshape = (n_f, n_t)

        # ---------------------------------------------------------------------
        # retain in self
        # ---------------------------------------------------------------------
        # data related
        self.nb_min_suj = nb_min_suj
        self.n_subjects = len(x)
        self.times = times
        self.roi = roi
        # unique roi list
        merged_roi = np.r_[tuple(self.roi)]
        _, u_idx = np.unique(merged_roi, return_index=True)
        self.roi_names = merged_roi[np.sort(u_idx)]
        self.n_roi = len(self.roi_names)
        # internals
        self.modality = "electrophysiological"
        self._copnormed = False
        self._groupedby = "subject"
        self.__version__ = frites.__version__
        # main data
        self._x = x  # [(n_epochs, n_channels, n_times)]
        self._y = y  # [(n_epochs,)]
        self._z = z  # [(n_epochs,)]
        self._sub_roi = sub_roi  # [(n_roi,)]
        self.n_times = self._x[0].shape[-1]
        if len(self.times) > 1:
            self.sfreq = 1. / (self.times[1] - self.times[0])
        else:
            logger.warning("Impossible to know the sampling frequency when the"
                           " time vector only contains a single time point")
            self.sfreq = 1.

        logger.info(f"Dataset composed of {self.n_subjects} subjects. At least"
                    f" {self.nb_min_suj} subjects per roi are required")
Exemple #16
0
    def resample(self,
                 sfreq,
                 npad='auto',
                 window='boxcar',
                 n_jobs=1,
                 pad='edge',
                 verbose=None):
        """Resample data.

        This method is an adaptation of the mne-python one.

        Parameters
        ----------
        sfreq : float
            New sample rate to use.
        npad : int | str
            Amount to pad the start and end of the data. Can also be “auto” to
            use a padding that will result in a power-of-two size (can be much
            faster).
        window : str | tuple
            Frequency-domain window to use in resampling. See
            scipy.signal.resample().
        pad : str | 'edge'
            The type of padding to use. Supports all numpy.pad() mode options.
            Can also be “reflect_limited”, which pads with a reflected version
            of each vector mirrored on the first and last values of the vector,
            followed by zeros. Only used for method='fir'. The default is
            'edge', which pads with the edge values of each vector.

        Returns
        -------
        inst : instance of DatasetEphy
            The object with the filtering applied.

        Notes
        -----
        For some data, it may be more accurate to use npad=0 to reduce
        artifacts. This is dataset dependent -- check your data!
        """
        set_log_level(verbose)
        assert self._groupedby is "subject", ("Slicing only work when data is "
                                              "grouped by 'subjects'")
        from mne.filter import resample
        sfreq = float(sfreq)
        o_sfreq = self.sfreq
        logger.info(f"    Resample to the frequency {sfreq}Hz")
        for k in range(len(self._x)):
            self._x[k] = resample(self._x[k],
                                  sfreq,
                                  o_sfreq,
                                  npad,
                                  window=window,
                                  n_jobs=n_jobs,
                                  pad=pad)
        self.sfreq = float(sfreq)

        self.times = (np.arange(self._x[0].shape[-1], dtype=np.float) / sfreq +
                      self.times[0])
        self.n_times = len(self.times)

        return self
Exemple #17
0
    def __new__(self,
                x,
                y=None,
                z=None,
                roi=None,
                times=None,
                agg_ch=True,
                multivariate=False,
                name=None,
                attrs=None,
                sfreq=None,
                verbose=None):
        """Init."""
        set_log_level(verbose)
        attrs = Attributes(attrs=attrs)
        _supp_dim = []

        # ========================== Data extraction ==========================

        # ____________________________ extraction _____________________________
        if isinstance(x, xr.DataArray):  # xr -> xr
            # get data, name and attributes
            attrs.update(x.attrs)
            name = x.name if name is None else name
            data = x.data
            # get y / z regressors
            y = x[y].data if isinstance(y, str) else y
            z = x[z].data if isinstance(z, str) else z
            # get spatial informations (roi)
            roi = x[roi].data if isinstance(roi, str) else roi
            # build 4d (possibly multivariate) coordinate
            if x.ndim == 4:
                if multivariate:
                    _supp_dim = ('mv', np.full((x.shape[2]), np.nan))
                else:
                    _supp_dim = (x.dims[2], x[x.dims[2]].data)
            # get the temporal vector
            times = x[times].data if isinstance(times, str) else times

        if 'mne' in str(type(x)):  # mne -> xr
            times = x.times if times is None else times
            roi = x.info['ch_names'] if roi is None else roi
            sfreq = x.info['sfreq'] if sfreq is None else sfreq
            if isinstance(x, CONFIG["MNE_EPOCHS_TYPE"]):
                data = x.get_data()
            elif isinstance(x, CONFIG["MNE_EPOCHSTFR_TYPE"]):
                data = x.data
                if multivariate:
                    _supp_dim = ('mv', np.full((data.shape[2]), np.nan))
                else:
                    _supp_dim = ('freqs', x.freqs)

        if isinstance(x, np.ndarray):  # numpy -> xr
            data = x
            if data.ndim == 4:
                if multivariate:
                    _supp_dim = ('mv', np.full((data.shape[2]), np.nan))
                else:
                    _supp_dim = ('supp', np.arange(data.shape[2]))

        assert data.ndim <= 4, "Data up to 4-dimensions are supported"

        # ____________________________ Y/Z dtypes _____________________________
        # infer dtypes
        y_dtype = self._infer_dtypes(y, 'y')
        z_dtype = self._infer_dtypes(z, 'z')
        # infer supported mi_type
        mi_type = CONFIG['MI_TABLE'][y_dtype][z_dtype]
        mi_repr = CONFIG['MI_REPR'][mi_type]
        # uni to multi condition remapping
        y = multi_to_uni_conditions([y], var_name='y', verbose=verbose)[0]
        z = multi_to_uni_conditions([z], var_name='z', verbose=verbose)[0]

        # __________________________ Sampling rate ____________________________
        # infer the sampling frequency (if needed)
        if sfreq is None:
            if (times is not None) and (len(times) >= 2):
                sfreq = 1. / (times[1] - times[0])
            else:
                logger.warning("Impossible to infer the sampling frequency. "
                               "You should consider providing a time vector")
                sfreq = 1.
        sfreq = float(sfreq)

        # ============================= DataArray =============================

        # ___________________________ Dims & Coords ___________________________

        dims, coords = [], OrderedDict()
        n_trials, n_roi, n_times = np.array(list(data.shape))[[0, 1, -1]]

        # don't break if empty time vector or missing roi
        if times is None:
            logger.warning("No time vector. A default one is created")
            times = np.arange(n_times) / sfreq
        if roi is None:
            logger.warning("No regions of interest are provided (roi). Default"
                           " ones are created")
            roi = [f"roi_{k}" for k in range(n_roi)]

        # build trials (potentially) multi-coordinates
        coords['trials'] = ('trials', np.arange(n_trials))
        if (y is not None) and (len(y) == n_trials):
            coords['y'] = ('trials', y)
        if (z is not None) and (len(z) == n_trials):
            coords['z'] = ('trials', z)
        if name is not None:
            coords['subject'] = ('trials', [name] * n_trials)
        dims += ['trials']
        # build space (potentially) multi-coordinates
        coords['roi'] = ('roi', roi)
        if agg_ch:
            coords['agg_ch'] = ('roi', [0] * n_roi)
        else:
            coords['agg_ch'] = ('roi', np.arange(n_roi))
        dims += ['roi']
        if _supp_dim:
            coords[_supp_dim[0]] = _supp_dim[1]
            dims += [_supp_dim[0]]
        # build temporal coordinate
        if (times is not None) and (len(times) == n_times):
            coords['times'] = ('times', times)
        dims += ['times']

        # _____________________________ Attributes ____________________________
        attrs.update({
            '__version__': frites.__version__,
            'modality': "electrophysiology",
            'dtype': 'SubjectEphy',
            'y_dtype': y_dtype,
            'z_dtype': z_dtype,
            'mi_type': mi_type,
            'mi_repr': mi_repr,
            'sfreq': sfreq,
            'agg_ch': agg_ch,
            'multivariate': multivariate
        })

        # _____________________________ DataArray _____________________________
        # for a given reason, DataArray are not easy to subclass (see #706,
        # #728, #3980). Therefore, for the moment, it's just easier to simply
        # return a dataarray
        da = xr.DataArray(data,
                          dims=dims,
                          coords=coords,
                          name=name,
                          attrs=attrs)

        return da
Exemple #18
0
def gcmi_mixture_1d_cd(x, y, verbose=None):
    """Gaussian-Copula MI between a continuous and a discrete variable.

    This method evaluate MI from a Gaussian mixture.

    The Gaussian mixture is fit using robust measures of location (median) and
    scale (median absolute deviation) for each class.
    I = gcmi_mixture_cd(x,y) returns the MI between the (possibly
    multidimensional).

    Parameters
    ----------
    x, y : array_like
        Continuous arrays of shape (n_epochs,) or (n_dimensions, n_epochs). y
        must be an array of integers

    Returns
    -------
    i : float
        Information shared by x and y (in bits)
    """
    set_log_level(verbose)
    x, y = np.atleast_2d(x), np.squeeze(y)
    if x.ndim > 2:
        raise ValueError("x must be at most 2d")
    if y.ndim > 1:
        raise ValueError("only univariate discrete variables supported")
    if not np.issubdtype(y.dtype, np.integer):
        raise ValueError("y should be an integer array")

    nvarx, ntrl = x.shape
    ym = np.unique(y)

    if y.size != ntrl:
        raise ValueError("number of trials do not match")

    # check for repeated values
    for xi in range(nvarx):
        if (np.unique(x[xi, :]).size / float(ntrl)) < 0.9:
            logger.info("Input x has more than 10% repeated values")
            break

    # copula normalise each class
    # shift and rescale to match loc and scale of raw data
    # this provides a robust way to fit the gaussian mixture
    classdat = []
    ydat = []
    for yi in ym:
        # class conditional data
        idx = y == yi
        xm = x[:, idx]
        cxm = copnorm_nd(xm, axis=1)

        xmmed = np.median(xm, axis=1)[:, np.newaxis]
        # robust measure of s.d. under Gaussian assumption from median
        # absolute deviation
        xmmad = np.median(np.abs(xm - xmmed), axis=1)[:, np.newaxis]
        cxmscaled = cxm * (1.482602218505602 * xmmad)
        # robust measure of loc from median
        cxmscaled = cxmscaled + xmmed
        classdat.append(cxmscaled)
        ydat.append(yi * np.ones(xm.shape[1], dtype=np.int))

    cx = np.concatenate(classdat, axis=1)
    newy = np.concatenate(ydat)
    return mi_mixture_1d_gd(cx, newy)
Exemple #19
0
def conn_get_pairs(roi, directed=False, nb_min_suj=-np.inf, verbose=None):
    """Get possible connectivity pairs for multiple subjects.

    This function returns a DataFrame that contains all of the necessary
    informations for managing pairs of brain regions across many subjects.

    Parameters
    ----------
    roi : list
        List where each item in this list is an array descriving the brain
        region names of a single subject.
    directed : bool | False
        Specify whether the the returned pairs should be for directed (True)
        or undirected (default : False) connectivity.
    nb_min_suj : int | -np.inf
        Specify whether the pairs should be represented by a minimum number of
        subjects.

    Returns
    -------
    df_conn : pd.DataFrame
        A Pandas DataFrame that describes the connectivity informations at the
        group level. The table contains the following entries :

            * 'sources' / 'targets' : respectively, the source and target names
            * 'subjects' : list of subjects per pair of brain regions
            * '#subjects' : number of subjects per pair of brain regions
            * 'names' : name of each pair. If undirected, the names are going
              to be like 'roi_0-roi_1' or 'roi_0->roi_1' if directed
            * 'keep' : booleans indicating whether the number of subjects per
              pair of brain regions is over nb_min_suj
    df_suj : pd.DataFrame
        A Pandas DataFrame that describes the connectivity information per
        subject. The table contains the following entries :

            * 'subjects' : subject number
            * 'keep_roi' / 'drop_roi' : the brain regions respectively to keep
              and to remove to fit the input parameters nb_min_suj
            * 'keep_suj' : boolean describing if the subject should be dropped
              or conserved
            * 'conn' : the 2D boolean connectivity array per subject
    """
    set_log_level(verbose)
    assert isinstance(roi, list)
    n_subjects = len(roi)
    roi = [np.asarray(k) for k in roi]

    # =========================== Conn info per pair ==========================

    s_ss, t_ss, ss = [], [], []
    for k in range(n_subjects):
        # get the unique list of unsorted list of brain regions
        u_roi = nonsorted_unique(roi[k], assert_unique=True)
        n_u_roi = len(u_roi)
        # get all possible pairs
        if directed:
            pairs = np.where(~np.eye(n_u_roi, dtype=bool))
        else:
            pairs = np.triu_indices(n_u_roi, k=1)
        s_names, t_names = u_roi[pairs[0]], u_roi[pairs[1]]
        # if not directed, merge '0-1' and '1-0'
        if not directed:
            st_names = np.c_[s_names, t_names]
            s_names, t_names = np.unique(np.sort(st_names, axis=1), axis=0).T
        # keep single-subject source and target names
        s_ss += [s_names]
        t_ss += [t_names]
        ss += [k] * len(s_names)
    # fill info in a dataframe
    df_ss = pd.DataFrame({
        'subjects': ss,
        'sources': np.concatenate(s_ss),
        'targets': np.concatenate(t_ss)
    })

    # get the number of subjects per pair
    pattern = '->' if directed else '-'
    gp = df_ss.groupby(['sources', 'targets'])
    fcn = lambda df: len(np.unique(df))
    df_conn = gp.subjects.aggregate([list]).reset_index()
    df_conn = df_conn.rename(columns={'list': 'subjects'})
    df_conn['#subjects'] = [len(k) for k in df_conn['subjects']]
    df_conn['names'] = [
        f"{k}{pattern}{i}"
        for k, i in zip(df_conn['sources'], df_conn['targets'])
    ]
    df_conn['keep'] = df_conn['#subjects'] >= nb_min_suj

    # print the info
    n_remain = np.sum(list(df_conn['keep']))
    n_drop = np.sum(list(~df_conn['keep']))
    logger.info(f"    {n_remain} remaining pairs of brain regions "
                f"(nb_min_suj={nb_min_suj}), {n_drop} dropped")

    # ========================= Conn info per subject =========================

    # build 2d connectivity array per subject
    conn = {}
    for n_s in range(n_subjects):
        n_roi_s = len(roi[n_s])
        _conn = xr.DataArray(~np.eye(n_roi_s, dtype=bool),
                             dims=('sources', 'targets'),
                             coords=(roi[n_s], roi[n_s]))
        conn[n_s] = _conn

    # fill the information
    for k in range(len(df_conn)):
        _df = df_conn.iloc[k, :]
        for s in _df['subjects']:
            _s, _t, _k = _df['sources'], _df['targets'], bool(_df['keep'])
            conn[s].loc[dict(sources=_s, targets=_t)] = _k
            if not directed:
                conn[s].loc[dict(sources=_t, targets=_s)] = _k

    # get the brain regions to keep / drop per subject
    suj, roi_keep, roi_drop, conn_tot = [], [], [], []
    for s in range(n_subjects):
        _keep = roi[s][np.union1d(*np.where(conn[s]))]
        _drop = np.setdiff1d(roi[s], _keep)
        suj += [s]
        roi_keep += [_keep.tolist()]
        roi_drop += [_drop.tolist()]
        conn_tot += [conn[s].data]
    # create the final dataframe
    df_suj = pd.DataFrame({
        'subjects': suj,
        'keep_roi': roi_keep,
        'drop_roi': roi_drop
    })  # , 'conn': conn_tot
    df_suj['keep_suj'] = [len(k) > 1 for k in df_suj['keep_roi']]

    return df_conn, df_suj
Exemple #20
0
 def __init__(self, verbose=None):  # noqa
     WfBase.__init__(self)
     set_log_level(verbose)
     logger.info("Definition of a non-parametric statistical workflow")
Exemple #21
0
def conn_dfc(data,
             win_sample,
             times=None,
             roi=None,
             n_jobs=1,
             gcrn=True,
             verbose=None):
    """Single trial Dynamic Functional Connectivity.

    This function computes the Dynamic Functional Connectivity (DFC) using the
    Gaussian Copula Mutual Information (GCMI). The DFC is computed across time
    points for each trial. Note that the DFC can either be computed on windows
    manually defined or on sliding windows.

    Parameters
    ----------
    data : array_like
        Electrophysiological data array of a single subject organized as
        (n_epochs, n_roi, n_times)
    win_sample : array_like
        Array of shape (n_windows, 2) describing where each window start and
        finish. You can use the function :func:`frites.conn.define_windows`
        to define either manually either sliding windows.
    times : array_like | None
        Time vector array of shape (n_times,)
    roi : array_like | None
        ROI names of a single subject
    n_jobs : int | 1
        Number of jobs to use for parallel computing (use -1 to use all
        jobs). The parallel loop is set at the pair level.
    gcrn : bool | True
        Specify if the Gaussian Copula Rank Normalization should be applied.
        If the data are normalized (e.g z-score) this parameter can be set to
        False because the data can be considered as gaussian over time.

    Returns
    -------
    dfc : array_like
        The DFC array of shape (n_epochs, n_pairs, n_windows)

    See also
    --------
    define_windows, conn_covgc
    """
    set_log_level(verbose)
    # -------------------------------------------------------------------------
    # inputs conversion
    data, trials, roi, times, attrs = conn_io(data,
                                              roi=roi,
                                              times=times,
                                              verbose=verbose)

    # -------------------------------------------------------------------------
    # data checking
    n_epochs, n_roi, n_pts = data.shape
    assert (len(roi) == n_roi) and (len(times) == n_pts)
    assert isinstance(win_sample, np.ndarray) and (win_sample.ndim == 2)
    assert win_sample.dtype in CONFIG['INT_DTYPE']
    n_win = win_sample.shape[0]
    # get the non-directed pairs
    x_s, x_t = np.triu_indices(n_roi, k=1)
    n_pairs = len(x_s)
    pairs = np.c_[x_s, x_t]
    # build roi pairs names
    roi_p = [f"{roi[s]}-{roi[t]}" for s, t in zip(x_s, x_t)]

    # -------------------------------------------------------------------------
    # compute dfc
    logger.info(f'Computing DFC between {n_pairs} pairs (gcrn={gcrn})')
    # get the parallel function
    parallel, p_fun = parallel_func(mi_nd_gg,
                                    n_jobs=n_jobs,
                                    verbose=verbose,
                                    prefer='threads')
    pbar = ProgressBar(range(n_win), mesg='Estimating DFC')

    dfc = np.zeros((n_epochs, n_pairs, n_win), dtype=np.float32)
    with parallel as para:
        for n_w, w in enumerate(win_sample):
            # select the data in the window and copnorm across time points
            data_w = data[..., w[0]:w[1]]
            # apply gcrn over time
            if gcrn:
                data_w = copnorm_nd(data_w, axis=2)
            # compute mi between pairs
            _dfc = para(
                p_fun(data_w[:, [s], :], data_w[:,
                                                [t], :], **CONFIG["KW_GCMI"])
                for s, t in zip(x_s, x_t))
            dfc[..., n_w] = np.stack(_dfc, axis=1)
            pbar.update_with_increment_value(1)

    # -------------------------------------------------------------------------
    # dataarray conversion
    win_times = times[win_sample]
    dfc = xr.DataArray(dfc,
                       dims=('trials', 'roi', 'times'),
                       name='dfc',
                       coords=(trials, roi_p, win_times.mean(1)))
    # add the windows used in the attributes
    cfg = dict(win_sample=np.r_[tuple(win_sample)],
               win_times=np.r_[tuple(win_times)],
               type='dfc')
    dfc.attrs = {**cfg, **attrs}

    return dfc
Exemple #22
0
    def __init__(self,
                 x,
                 y=None,
                 z=None,
                 roi=None,
                 agg_ch=True,
                 times=None,
                 multivariate=False,
                 nb_min_suj=False,
                 attrs=None,
                 verbose=None):
        """Init."""
        set_log_level(verbose)
        self.attrs = Attributes(attrs=attrs)
        assert isinstance(x, (list, tuple))
        self._agg_ch = agg_ch
        self._multivariate = multivariate

        logger.info('Definition of an electrophysiological dataset')
        logger.info(f'    Dataset composed of {len(x)} subjects / sessions')

        # ========================== Multi-conditions =========================

        # remapping group y and z
        if isinstance(y, (list, tuple)):
            y = multi_to_uni_conditions(y, var_name='y', verbose=verbose)
        if isinstance(z, (list, tuple)):
            z = multi_to_uni_conditions(z, var_name='z', verbose=verbose)

        # ===================== Multi-subjects conversion =====================

        # force converting the data (latest task-related variables)
        n_subjects = len(x)
        y = [y] * n_subjects if not isinstance(y, list) else y
        z = [z] * n_subjects if not isinstance(z, list) else z
        roi = [roi] * n_subjects if not isinstance(roi, list) else roi
        for k in range(n_subjects):
            x[k] = SubjectEphy(x[k],
                               y=y[k],
                               z=z[k],
                               roi=roi[k],
                               agg_ch=True,
                               times=times,
                               multivariate=multivariate,
                               verbose=verbose)
        self._x = x

        # minimum number of subject / roi
        nb_min_suj = -np.inf if not isinstance(nb_min_suj, int) else nb_min_suj
        self._nb_min_suj = nb_min_suj
        logger.info(f"    At least {self._nb_min_suj} subjects / roi required")

        # merge attributes
        self.attrs.merge([k.attrs for k in self._x])
        self._y_dtype = self.attrs['y_dtype']
        self._z_dtype = self.attrs['z_dtype']
        self._mi_type = self.attrs['mi_type']
        mi_repr = self.attrs['mi_repr']
        logger.info(f"    Supported MI definition {mi_repr} ({self._mi_type})")

        # ===================== Additional dimensions  ========================

        # Subject dimension
        for n_k, k in enumerate(range(len(self._x))):
            self._x[k].name = f'subject_{n_k}'
            self._x[k] = self._x[k].assign_coords(
                subject=('trials', [n_k] * self._x[k].shape[0]))
        # channel aggregation
        if not agg_ch:
            # split into sections of unique intergers
            n_trials_s = [k.shape[1] for k in self._x]
            agg_ch_num = np.arange(np.sum(n_trials_s))
            agg_split = np.split(agg_ch_num, np.cumsum(n_trials_s)[0:-1])
            # add additional dimension
            for k in range(len(self._x)):
                self._x[k] = self._x[k].assign_coords(agg_ch=('roi',
                                                              agg_split[k]))
        # final mi dimension
        dims = list(self._x[0].dims)
        self._mi_dims = [k for k in dims if k not in ['trials', 'mv']]

        # ============================= Attributes ============================

        # update internals parameters
        self._update_internals()
        # # update internal attributes
        self.attrs.update({
            'nb_min_suj': nb_min_suj,
            'n_subjects': len(self._x),
            'agg_ch': agg_ch,
            'multivariate': multivariate,
            'dtype': "DatasetEphy",
            '__version__': frites.__version__
        })
def define_windows(times,
                   windows=None,
                   slwin_len=None,
                   slwin_start=None,
                   slwin_stop=None,
                   slwin_step=None,
                   verbose=None):
    """Define temporal windows.

    This function can be used to either manually define temporal windows either
    automatic sliding windows. Note that every input parameters should be in
    the time domain (e.g seconds or milliseconds).

    Parameters
    ----------
    times : array_like
        Time vector
    windows : array_like | None
        Manual windows (e.g (.1, .2) or [(.1, .2), (.4, .5)]).
    slwin_len : float | None
        Length of each sliding (e.g .2 produces 200ms window length).
    slwin_start : float | None
        Time point for starting sliding windows (e.g 0.1). If None, sliding
        windows will start from the first time point.
    slwin_stop : float | None
        Time point for ending sliding windows (e.g 1.5). If None, sliding
        windows will finish at the last time point.
    slwin_step : float | None
        Temporal step between each temporal window (e.g .1 means that each
        consecutive windows are going to be separated by 100ms). This parameter
        can be used to define either overlapping or non-overlapping windows. If
        None, slwin_step is going to be set to slwin_step in order to produce
        consecutive non-overlapping windows.

    Returns
    -------
    win_sample : array_like
        Array of shape (n_windows, 2) of temporal indexes defining where each
        window (start, finish)
    mean_time : array_like
        Mean time vector inside each defined window of shape (n_windows,)

    See also
    --------
    plot_windows
    """
    set_log_level(verbose)
    assert isinstance(times, np.ndarray)
    logger.info("Defining temporal windows")
    stamp = times[1] - times[0]

    # -------------------------------------------------------------------------
    # build windows
    if (windows is None) and (slwin_len is None):
        logger.info("    No input detected. Full time window is used")
        win_time = np.array([[times[0], times[-1]]])
    elif windows is not None:
        logger.info("    Manual definition of windows")
        win_time = np.atleast_2d(windows)
    elif slwin_len is not None:
        # manage empty inputs
        if slwin_start is None: slwin_start = times[0]  # noqa
        if slwin_stop is None: slwin_stop = times[-1]  # noqa
        if slwin_step is None: slwin_step = slwin_len + stamp  # noqa
        logger.info(f"    Definition of sliding windows (len={slwin_len}, "
                    f"start={slwin_start}, stop={slwin_stop}, "
                    f"step={slwin_step})")
        # build the sliding windows
        sl_start = np.arange(slwin_start, slwin_stop - slwin_len, slwin_step)
        sl_stop = np.arange(slwin_start + slwin_len, slwin_stop, slwin_step)
        if len(sl_start) != len(sl_stop):
            min_len = min(len(sl_start), len(sl_stop))
            sl_start, sl_stop = sl_start[0:min_len], sl_stop[0:min_len]
        win_time = np.c_[sl_start, sl_stop]
    assert (win_time.ndim == 2) and (win_time.shape[1] == 2)

    # -------------------------------------------------------------------------
    # time to sample conversion
    win_sample = np.zeros_like(win_time, dtype=int)
    times = times.reshape(-1, 1)
    for n_k, k in enumerate(win_time):
        win_sample[n_k, :] = np.argmin(np.abs(times - k), axis=0)
    logger.info(f"    {win_sample.shape[0]} windows defined")

    return win_sample, win_time.mean(1)
Exemple #24
0
def conn_covgc(data, dt, lag, t0, step=1, roi=None, times=None, method='gc',
               conditional=False, n_jobs=-1, verbose=None):
    r"""Single-trial covariance-based Granger Causality for gaussian variables.

    This function computes the (conditional) covariance-based Granger Causality
    (covgc) for each trial.

    .. note::
        **Total Granger interdependence**

            * TGI = gc.sum(axis=-1) = gc(x->y) + gc(y->x) + gc(x.y)
            * TGI = Hycy + Hxcx - Hxxcyy

        **Relations between Mutual Informarion and conditional entropies**

        This quantity can be defined as the Increment of Total Interdependence
        and it can be calculated from the different of two mutual informations
        as follows

        .. math::

            Ixxyy  &=  I(X_{i+1}, X_{i}|Y_{i+1}, Y_{i}) \\
                   &=  H(X_{i+1}) + H(Y_{i+1}) - H(X_{i+1},Y_{i+1}) \\
                   &=  log(det_{xi1}) + log(det_{yi1}) - log(det_{xyi1}) \\
            Ixy    &=  I(X_{i}|Y_{i}) \\
                   &=  H(X_{i}) + H(Y_{i}) - H(X_{i}, Y_{i}) \\
                   &=  log(det_{xi}) + log(det_{yi}) - log(det_{yxi}) \\
            ITI    &= Ixxyy - Ixy

    Parameters
    ----------
    data : array_like
        Electrophysiological data. Several input types are supported :

            * Standard NumPy arrays of shape (n_epochs, n_roi, n_times)
            * mne.Epochs
            * xarray.DataArray of shape (n_epochs, n_roi, n_times)

    dt : int
        Duration of the time window for covariance correlation in samples
    lag : int
        Number of samples for the lag within each trial
    t0 : array_like
        Array of zero time in samples of length (n_window,)
    step : int | 1
        Number of samples stepping in the past for the lag within each trial
    times : array_like | None
        Time vector array of shape (n_times,). If the input is an xarray, the
        name of the time dimension can be provided
    roi : array_like | None
        ROI names of a single subject. If the input is an xarray, the
        name of the ROI dimension can be provided
    method : {'gauss', 'gc'}
        Method for the estimation of the covgc. Use either 'gauss' which
        assumes that the time-points are normally distributed or 'gc' in order
        to use the gaussian-copula.
    conditional : bool | False
        If True, the conditional Granger Causality is computed i.e the past is
        also conditioned by the past of other sources.
    n_jobs : int | -1
        Number of jobs to use for parallel computing (use -1 to use all
        jobs). The parallel loop is set at the pair level.

    Returns
    -------
    gc : array_like
        Granger Causality arranged as (n_epochs, n_pairs, n_windows, 3) where
        the last dimension means :

            * 0 : pairs[:, 0] -> pairs[:, 1] (x->y)
            * 1 : pairs[:, 1] -> pairs[:, 0] (y->x)
            * 2 : instantaneous  (x.y)

    References
    ----------
    Brovelli et al., 2015 :cite:`brovelli2015characterization`

    See also
    --------
    conn_dfc
    """
    set_log_level(verbose)
    # -------------------------------------------------------------------------
    # input checking
    if isinstance(t0, CONFIG['INT_DTYPE']) or isinstance(
        t0, CONFIG['FLOAT_DTYPE']):
        t0 = np.array([t0])
    t0 = np.asarray(t0).astype(int)
    dt, lag, step = int(dt), int(lag), int(step)
    # handle dataarray input
    if isinstance(data, xr.DataArray):
        trials, attrs = data[data.dims[0]].data, data.attrs
    else:
        trials, attrs = np.arange(data.shape[0]), {}
    # internal conversion
    data = SubjectEphy(data, y=trials, roi=roi, times=times)
    x, roi, times = data.data, data['roi'].data, data['times'].data
    trials = data['y'].data
    n_epochs, n_roi, n_pts = data.shape
    # force C contiguous array because operations on row-major
    if not x.flags.c_contiguous:
        x = np.ascontiguousarray(x)
    # method checking
    assert method in ['gauss', 'gc']
    fcn = dict(gauss=_covgc, gc=_gccovgc)[method]

    # -------------------------------------------------------------------------
    # build generic time indices (just need to add t0 to it)
    rows, cols = np.mgrid[0:lag + 1, 0:dt]
    # step in the past lags
    rows = rows[::step, :]
    cols = cols[::step, :]
    # create index for all lags and timespoints
    ind_tx = cols - rows
    # build output time vector
    times_p = np.empty((len(t0)), dtype=times.dtype, order='C')
    for n_t, t in enumerate(t0):
        times_p[n_t] = times[ind_tx[0, :] + t].mean()
    # get the non-directed pairs and build roi pairs names
    x_s, x_t = np.triu_indices(n_roi, k=1)
    pairs = np.c_[x_s, x_t]
    roi_p = np.array([f"{roi[s]}-{roi[t]}" for s, t in zip(x_s, x_t)])
    # check the ratio between lag and dt
    ratio = 100 * (ind_tx.shape[0] / (step * ind_tx.shape[1]))
    if not 10. <= ratio <= 15.:
        _step = int(np.ceil((lag + 1) / (.15 * dt)))
        logger.warning(f"The ratio between the lag and dt is {ratio}%. It's "
                       f"recommended to conserve this ratio between 10-15%."
                       f" Try with a step={_step}")
    logger.debug(f"Index shape : {ind_tx.shape}")

    # -------------------------------------------------------------------------
    ext = 'conditional' if conditional else ''
    # compute covgc and parallel over pairs
    logger.info(f"Compute the {ext} covgc (method={method}, n_pairs={len(x_s)}"
                f"; n_windows={len(t0)}, lag={lag}, dt={dt}, step={step})")
    kw_par = dict(n_jobs=n_jobs, total=len(x_s), verbose=False)
    if not conditional:
        parallel, p_fun = parallel_func(fcn, **kw_par)
        gc = parallel(p_fun(x[:, s, :], x[:, t, :], ind_tx,
                            t0) for s, t in zip(x_s, x_t))
    else:
        parallel, p_fun = parallel_func(_cond_gccovgc, **kw_par)
        gc = parallel(p_fun(x, s, t, ind_tx, t0) for s, t in zip(x_s, x_t))
    gc = np.stack(gc, axis=1)

    # -------------------------------------------------------------------------
    # change output type
    dire = np.array(['x->y', 'y->x', 'x.y'])
    gc = xr.DataArray(gc, dims=('trials', 'roi', 'times', 'direction'),
                      coords=(trials, roi_p, times_p, dire), name='covgc')
    # set attributes
    cfg = dict(lag='lag', step='step', dt='dt', t0='t0',
               conditional='conditional', type='covgc')
    gc.attrs = {**attrs, **cfg}

    return gc
Exemple #25
0
def ds_ephy_io(x,
               roi=None,
               y=None,
               z=None,
               times=None,
               sub_roi=None,
               verbose=None):
    """Manage inputs conversion for the DatasetEphy.

    This function is used to convert NumPy / MNE / Xarray inputs into a
    standardize NumPy version.

    Parameters
    ----------
    x : list
        List of length (n_subjects,). Each element of the list should either be
        an array of shape (n_epochs, n_channels, n_times), mne.Epochs,
        mne.EpochsArray, mne.EpochsTFR (i.e. non-averaged power) or DataArray
    roi : list | None
        List of length (n_subjects,) of roi names of length (n_channels)
    y, z : list | None
        List for the regressors. Each element should be an array of shape
        (n_epochs)
    sub_roi : list | None
        List of sub_roi names
    times : array_like | None
        Time vector

    Returns
    -------
    x : list
        List of data array of shape (n_epochs, n_channels, n_times)
    y, z : list
        List of arrays of shape (n_epochs,)
    roi : list
        List of arrays of shape (n_channels,)
    times : array_like
        Time vector of shape (n_times,)
    sub_roi : array_like
        List of arrays of shape (n_channels,)
    """
    set_log_level(verbose)
    # -------------------------------------------------------------------------
    # data type detection and switch
    # -------------------------------------------------------------------------
    assert isinstance(x, list), ("x input should be a list of elements of "
                                 "length (n_subjects,)")
    assert all([type(x[k]) == type(x[0]) for k in range(len(x))
                ]), ("All elements in the `x` inputs are not the same type")

    # -------------------------------------------------------------------------
    # conversion to array according to datatype
    # -------------------------------------------------------------------------
    if 'numpy' in str(type(x[0])):
        logger.info("    NumPy inputs detected")
    elif 'mne' in str(type(x[0])):
        logger.info("    Converting mne inputs")
        x, times, roi = mne_to_arr(x, roi=roi)
    elif 'xarray' in str(type(x[0])):
        logger.info("    Converting xarray inputs")
        x, roi, y, z, times, sub_roi = xr_to_arr(x,
                                                 roi=roi,
                                                 y=y,
                                                 z=z,
                                                 times=times,
                                                 sub_roi=sub_roi)

    # -------------------------------------------------------------------------
    # manage none inputs
    # -------------------------------------------------------------------------
    # empty roi
    if not isinstance(roi, list):
        logger.warning("No roi have been provided. A default will be used "
                       "instead. You should use the `roi` input instead")
        roi = []
        for k in range(len(x)):
            roi += [np.array([f"roi_{i}" for i in range(x[k].shape[1])])]
    # empty time vector
    if not isinstance(times, np.ndarray):
        logger.warning("No time vector found. A default will be used instead."
                       " You should use the `times` input instead")
        times = np.arange(x[0].shape[-1])

    # -------------------------------------------------------------------------
    # shape and types checking before returning
    # -------------------------------------------------------------------------
    # spatio-temporal conversion
    roi = [np.asarray(roi[k]) for k in range(len(roi))]
    times = times.astype(np.float32)
    # data checking
    assert all([
        k.ndim in [3, 4] for k in x
    ]), ("data should either contains 3d arrays (n_trials, n_channels, n_pts) "
         "or 4d arrays (n_trials, n_channels, n_freqs, n_pts)")
    x_sh = [x[k].shape for k in range(len(x))]
    is_sh_roi = [x_sh[k][1] == len(roi[k]) for k in range(len(x))]
    is_sh_times = [x_sh[k][-1] == len(times) for k in range(len(x))]
    assert all(is_sh_roi), "Inconsistent number of ROI"
    assert all(is_sh_times), "Inconsistent number of time points"
    assert all([list(x_sh[0])[1:] == list(x_sh[k])[1:]] for k in range(len(x)))
    if isinstance(y, list):
        y = [np.asarray(y[k]) for k in range(len(y))]
        assert len(y) == len(x), "length of y shoud be (n_subjects,)"
        assert [
            x_sh[k][0] == len(y[k]) for k in range(len(x))
        ], ("Each element of the y input should have a length of (n_epochs,)")
    if isinstance(z, list):
        z = [np.asarray(z[k]) for k in range(len(z))]
        assert len(z) == len(x), "length of z shoud be (n_subjects,)"
        assert [
            x_sh[k][0] == len(z[k]) for k in range(len(x))
        ], ("Each element of the z input should have a length of (n_epochs,)")
    if isinstance(sub_roi, list):
        assert all([k.shape == i.shape for k, i in zip(roi, sub_roi)])

    # -------------------------------------------------------------------------
    # categorical sub roi
    # -------------------------------------------------------------------------
    # concatenate everything and get unique elements
    if isinstance(sub_roi, list):
        import pandas as pd
        logger.info("    Replacing sub roi by categorical integers")

        # get unique sub roi and build replacement dict
        sub_roi_cat = np.r_[tuple([k.squeeze() for k in sub_roi])]
        sub_roi_u = np.unique(sub_roi_cat, return_index=True)
        repl = {k: v for k, v in zip(*sub_roi_u)}

        # replace for each subject
        sub_roi_int = []
        for _sub in sub_roi:
            sub_int = np.array(list(pd.Series(_sub).replace(repl, regex=True)))
            sub_roi_int += [sub_int]
    else:
        sub_roi_int = None

    return x, y, z, roi, times, sub_roi_int
Exemple #26
0
 def __init__(self, verbose=None):
     """Init."""
     set_log_level(verbose)
Exemple #27
0
def multi_to_uni_conditions(x, var_name=None, verbose=None):
    """Convert a discret vector that contains multiple conditions.

    This function can be used to convert a list of discret arrays, each
    reflecting possibly multivariate stimulus or conditions.

    Parameters
    ----------
    x : list
        List of multi-variate conditions. Each element of the list is an array
        of shape (n_trials, n_conditions), where the number of trials can
        varies across elements of the list but they all have to have the same
        number of conditions
    var_name : string | None
        The name of the variable (usefull for debugging)

    Returns
    -------
    x_new : list
        List of remapped conditions where each element of the list has a shape
        of (n_trials,)
    """
    set_log_level(verbose)
    # =============================== Checking ================================

    if not isinstance(x, (list, tuple)):
        return [x]
    assert all([type(x[0]) == type(k) for k in x])
    x_types = type(x[0])
    if not x_types == np.ndarray:
        return x
    # get if all variables are integers and multicolumns else skip it
    is_int = all([k.dtype in CONFIG['INT_DTYPE'] for k in x])
    is_ndim = all([k.ndim > 1 for k in x])
    if not is_int or not is_ndim:
        return x
    # test that all dimensions are equals
    same_dim = all([k.ndim == x[0].ndim for k in x])
    if not same_dim and isinstance(var_name, str):
        assert ValueError(f"Every array in the `{var_name}` input should "
                          "have the same number of dimensions")
    # otherwise find all possible pairs
    x_all = np.concatenate(x, axis=0)
    idx = np.unique(x_all, axis=0, return_index=True)[1]
    u_cat = x_all[sorted(idx), :]
    # show to the user the new categories
    user = []
    for n_c, cat in enumerate(u_cat):
        user += [f"{n_c}: [{', '.join([str(c) for c in cat])}]"]
    if isinstance(var_name, str):
        logger.debug(f"    The `{var_name}` input contains multiple conditions"
                     f" that have been remapped to : {'; '.join(user)}")
    # loop over subjects
    x_new = []
    for k in range(len(x)):
        x_cat = np.full((x[k].shape[0],), -1, dtype=int)
        for n_c, cat in enumerate(u_cat):
            x_cat[np.equal(x[k], cat.reshape(1, -1)).all(1)] = n_c
        assert x_cat.min() > -1, "Not all values have been replaced"
        x_new += [x_cat]

    return x_new
Exemple #28
0
def savgol_filter(x, h_freq, axis=None, sfreq=None, polyorder=5, verbose=None):
    """Filter the data using Savitzky-Golay polynomial method.

    This function is an adaptation of the mne-python one for xarray.DataArray.

    Parameters
    ----------
    x : array_like
        Multidimensional array or DataArray
    h_freq : float
        Approximate high cut-off frequency in Hz. Note that this is not an
        exact cutoff, since Savitzky-Golay filtering is done using
        polynomial fits instead of FIR/IIR filtering. This parameter is
        thus used to determine the length of the window
    axis : int, string | None
        Position of the time axis. Can either be an integer when `x` is a
        NumPy array or a string (e.g 'times') when using a DataArray
    polyorder : int | 5
        Polynomial order

    Returns
    -------
    x_filt : array_like
        Filtered data

    Notes
    -----
    For Savitzky-Golay low-pass approximation, see:
        https://gist.github.com/larsoner/bbac101d50176611136b

    See also
    --------
    kernel_smoothing
    """
    set_log_level(verbose)
    # inputs checking
    if isinstance(x, xr.DataArray):
        dims = list(x.dims)
        # get axis name
        if axis is None:
            axis = 'times'
        if isinstance(axis, str):
            axis = list(x.dims).index(axis)
        # get sfreq if possible
        if not isinstance(sfreq, (int, float)):
            assert 'times' in dims
            sfreq = 1. / (x['times'].data[1] - x['times'].data[0])
    assert isinstance(h_freq, (int, float))
    assert isinstance(axis, int)
    assert isinstance(sfreq, (int, float))
    if h_freq >= sfreq / 2.:
        raise ValueError('h_freq must be less than half the sample rate')

    # get window length
    window_length = (int(np.round(sfreq / h_freq)) // 2) * 2 + 1
    logger.info(f'    Using savgol length {window_length}')

    # apply savgol depending on input type
    kw = dict(axis=axis, polyorder=polyorder, window_length=window_length)
    if isinstance(x, xr.DataArray):
        x.data = savgol(x.data, **kw)
        return x
    else:
        return savgol(x, **kw)