Esempio n. 1
0
    def __init__(self, inference='rfx', estimator=None, kernel=None,
                 verbose=None):
        """Init."""
        WfBase.__init__(self)
        assert inference in ['ffx', 'rfx'], (
            "'inference' input parameter should either be 'ffx' or 'rfx'")
        self._mi_type = 'cc'
        if estimator is None:
            estimator = GCMIEstimator(mi_type='cc', copnorm=False,
                                      verbose=verbose)
        assert estimator.settings['mi_type'] == self._mi_type
        self._copnorm = isinstance(estimator, GCMIEstimator)
        self._inference = inference
        self.estimator = estimator
        self._gcrn = inference == 'rfx'
        self._kernel = kernel
        set_log_level(verbose)
        self.clean()
        self._wf_stats = WfStats(verbose=verbose)
        # update internal config
        self.attrs.update(dict(mi_type=self._mi_type, inference=inference,
                               kernel=kernel))

        logger.info(f"Workflow for computing comodulations between distant "
                    f"brain areas ({inference})")
Esempio n. 2
0
def xr_to_arr(x, roi=None, y=None, z=None, times=None, sub_roi=None):
    """Xarray DataArray conversion to numpy arrays.

    The xarray supports using strings to specify the dimension name to use. Its
    also working for pandas MultiIndex.
    """
    coords = dict()
    if isinstance(times, str):
        coords['times'] = times
        times = eval(f"x[0].{times}.data")
    if isinstance(roi, str):
        coords['roi'] = roi
        roi = [x[k].coords[roi].data for k in range(len(x))]
    if isinstance(sub_roi, str):
        coords['sub_roi'] = sub_roi
        sub_roi = [x[k].coords[sub_roi].data for k in range(len(x))]
    if isinstance(y, str):
        coords['y'] = y
        y = [x[k].coords[y].data for k in range(len(x))]
    if isinstance(z, str):
        coords['z'] = z
        z = [x[k].coords[z].data for k in range(len(x))]
    if coords:
        log_str = f"\n{' ' * 8}".join([f"{k}: {v}" for k, v in coords.items()])
        logger.info(f"    The following coordinates have been used : \n"
                    f"{' ' * 8}{log_str}")
    x = [x[k].data for k in range(len(x))]

    return x, roi, y, z, times, sub_roi
Esempio n. 3
0
    def __init__(self,
                 inference='rfx',
                 mi_method='gc',
                 kernel=None,
                 verbose=None):
        """Init."""
        WfBase.__init__(self)
        assert inference in [
            'ffx', 'rfx'
        ], ("'inference' input parameter should either be 'ffx' or 'rfx'")
        assert mi_method in [
            'gc', 'bin'
        ], ("'mi_method' input parameter should either be 'gc' or 'bin'")
        self._mi_type = 'cc'
        self._inference = inference
        self._mi_method = mi_method
        self._need_copnorm = mi_method == 'gc'
        self._gcrn = inference == 'rfx'
        self._kernel = kernel
        set_log_level(verbose)
        self.clean()
        self._wf_stats = WfStats(verbose=verbose)
        # update internal config
        self.update_cfg(mi_type=self._mi_type,
                        inference=inference,
                        mi_method=mi_method,
                        kernel=kernel)

        logger.info(f"Workflow for computing connectivity ({self._mi_type} - "
                    f"{mi_method})")
Esempio n. 4
0
def parallel_func(fcn, n_jobs=-1, verbose=None, total=None, mesg=None,
                  cache_dir=None, **kwargs):
    """Get an instance of parallel and delayed function.

    This function is inspired by MNE's one.

    Parameters
    ----------
    func : callable
        A function.
    n_jobs : int
        Number of jobs to run in parallel.
    total : int | None
        If int, use a progress bar to display the progress of dispatched
        jobs. This should only be used when directly iterating, not when
        using ``split_list`` or :func:`np.array_split`.
        If None (default), do not add a progress bar.
    mesg : string | None
        Message to display on the progress bar
    cache_dir : string | None
        If path to an existing directory, the function is going to cache the
        computations
    kwargs : dict | {}
        Additional arguments are sent to the joblibe.Parallel function.

    Returns
    -------
    parallel: instance of joblib.Parallel or list
        The parallel object.
    my_func: callable
        ``func`` if not parallel or delayed(func).
    """
    from frites.config import CONFIG
    # set_log_level(verbose)

    # manually merge inputs inside the default config
    for k, v in CONFIG["JOBLIB_CFG"].copy().items():
        kwargs[k] = v
    # verbosity level of joblib
    kwargs['verbose'] = 1 if verbose in ['debug', True] else 0

    # caching option
    if isinstance(cache_dir, str) and os.path.isdir(cache_dir):
        logger.info(f'Caching computations to {cache_dir}')
        memory = Memory(cache_dir, verbose=kwargs['verbose'])
        fcn = memory.cache(fcn)

    # parallel functions
    para_fcn = delayed(fcn)
    parallel = Parallel(n_jobs, **kwargs)

    if total is not None:
        def parallel_progress(op_iter):
            return parallel(ProgressBar(iterable=op_iter, max_value=total,
                                        mesg=mesg))
        parallel_out = parallel_progress
    else:
        parallel_out = parallel

    return parallel_out, para_fcn
Esempio n. 5
0
    def _node_compute_mi(self, dataset, n_bins, n_perm, n_jobs, random_state):
        """Compute mi and permuted mi.

        Permutations are performed by randomizing the regressor variable. For
        the fixed effect, this randomization is performed across subjects. For
        the random effect, the randomization is performed per subject.
        """
        # get the function for computing mi
        mi_fun = get_core_mi_fun(self._mi_method)[self._mi_type]
        assert f"mi_{self._mi_method}_ephy_{self._mi_type}" == mi_fun.__name__
        # get x, y, z and subject names per roi
        if dataset._mi_type != self._mi_type:
            assert TypeError(f"Your dataset doesn't allow to compute the mi "
                             f"{self._mi_type}. Allowed mi is "
                             f"{dataset._mi_type}")
        x, y, z, suj = dataset.x, dataset.y, dataset.z, dataset.suj_roi
        n_roi, inf = dataset.n_roi, self._inference
        # evaluate true mi
        logger.info(f"    Evaluate true and permuted mi (n_perm={n_perm}, "
                    f"n_jobs={n_jobs})")
        # parallel function for computing permutations
        parallel, p_fun = parallel_func(mi_fun, n_jobs=n_jobs, verbose=False)
        pbar = ProgressBar(range(n_roi), mesg='Estimating MI')
        # evaluate permuted mi
        with parallel as para:
            mi, mi_p = [], []
            for r in range(n_roi):
                # compute the true mi
                mi += [mi_fun(x[r], y[r], z[r], suj[r], inf, n_bins=n_bins)]

                # get the randomize version of y
                y_p = permute_mi_vector(y[r],
                                        suj[r],
                                        mi_type=self._mi_type,
                                        inference=self._inference,
                                        n_perm=n_perm)
                # run permutations using the randomize regressor
                _mi = para(
                    p_fun(x[r], y_p[p], z[r], suj[r], inf, n_bins=n_bins)
                    for p in range(n_perm))
                mi_p += [np.asarray(_mi)]
                pbar.update_with_increment_value(1)
        # smoothing
        if isinstance(self._kernel, np.ndarray):
            logger.info("    Apply smoothing to the true and permuted MI")
            for r in range(len(mi)):
                for s in range(mi[r].shape[0]):
                    mi[r][s, :] = np.convolve(mi[r][s, :],
                                              self._kernel,
                                              mode='same')
                    for p in range(mi_p[r].shape[0]):
                        mi_p[r][p, s, :] = np.convolve(mi_p[r][p, s, :],
                                                       self._kernel,
                                                       mode='same')

        self._mi, self._mi_p = mi, mi_p

        return mi, mi_p
Esempio n. 6
0
    def _node_compute_mi(self, dataset, n_bins, n_perm, n_jobs, random_state):
        """Compute mi and permuted mi.

        Permutations are performed by randomizing the target roi. For the fixed
        effect, this randomization is performed across subjects. For the random
        effect, the randomization is performed per subject.
        """
        # get the function for computing mi
        mi_fun = get_core_mi_fun(self._mi_method)[f"{self._mi_type}_conn"]
        assert (f"mi_{self._mi_method}_ephy_conn_"
                f"{self._mi_type}" == mi_fun.__name__)
        # get x, y, z and subject names per roi
        x, y, suj = dataset.x, dataset.y, dataset.suj_roi
        roi = dataset.roi_names
        n_roi, inf = dataset.n_roi, self._inference
        # get the pairs for computing mi
        self.pairs = dataset.get_connectivity_pairs(
            nb_min_suj=dataset.nb_min_suj, directed=False)
        x_s, x_t = self.pairs
        n_pairs = len(self.pairs)
        # evaluate true mi
        logger.info(f"    Evaluate true and permuted mi (n_perm={n_perm}, "
                    f"n_jobs={n_jobs}, n_pairs={len(x_s)})")
        mi = [
            mi_fun(x[s], x[t], suj[s], suj[t], inf, n_bins=n_bins)
            for s, t in zip(x_s, x_t)
        ]
        # get joblib configuration
        cfg_jobs = config.CONFIG["JOBLIB_CFG"]
        # evaluate permuted mi
        mi_p = []
        for s, t in zip(x_s, x_t):
            # get the randomize version of y
            y_p = permute_mi_trials(suj[t],
                                    inference=self._inference,
                                    n_perm=n_perm)
            # run permutations using the randomize regressor
            _mi = Parallel(n_jobs=n_jobs, **cfg_jobs)(delayed(mi_fun)(
                x[s], x[t][..., y_p[p]], suj[s], suj[t], inf, n_bins=n_bins)
                                                      for p in range(n_perm))
            mi_p += [np.asarray(_mi)]
        # # smoothing
        if isinstance(self._kernel, np.ndarray):
            logger.info("    Apply smoothing to the true and permuted MI")
            for r in range(len(mi)):
                for s in range(mi[r].shape[0]):
                    mi[r][s, :] = np.convolve(mi[r][s, :],
                                              self._kernel,
                                              mode='same')
                    for p in range(mi_p[r].shape[0]):
                        mi_p[r][p, s, :] = np.convolve(mi_p[r][p, s, :],
                                                       self._kernel,
                                                       mode='same')

        self._mi, self._mi_p = mi, mi_p

        return mi, mi_p
Esempio n. 7
0
    def __init__(self, mi_type='cc', verbose=None):
        """Init."""
        set_log_level(verbose)
        desc = CONFIG['MI_REPR'][mi_type]
        settings = {'description': desc}
        self.settings = Attributes(attrs=settings, section_name='Settings')
        self._kwargs = dict()
        assert hasattr(self, 'name')

        logger.info(f"{self.name} ({mi_type})")
Esempio n. 8
0
def mne_to_arr(x, roi=None):
    """Convert list of MNE types into numpy arrays."""
    # get time vector and roi names (if empty)
    times = x[0].times
    if roi is None:
        logger.info("    Infer roi names using `ch_names`")
        roi = [np.asarray(x[k].ch_names) for k in range(len(x))]
    # get the data and replace inplace
    for k in range(len(x)):
        x[k] = x[k].get_data()

    return x, times, roi
Esempio n. 9
0
def conn_io(da, trials=None, roi=None, times=None, verbose=None):
    """I/O conversion for connectivity functions.

    Parameters
    ----------
    da : array_like
        Array of electrophysiological data of shape (n_trials, n_roi, n_times)
    roi : array_like | None
        List of roi names or string corresponding to the dimension name in a
        DataArray
    times : array_like | None
        Time vector or string corresponding to the dimension name in a
        DataArray
    """
    set_log_level(verbose)
    assert isinstance(da, np.ndarray) or isinstance(da, xr.DataArray)
    assert da.ndim == 3
    n_trials, n_roi, n_times = da.shape
    attrs = dict(n_trials=n_trials, n_roi=n_roi, n_times=n_times)
    logger.info(f"Inputs conversion (n_trials={n_trials}, n_roi={n_roi}, "
                f"n_times={n_times})")

    # _______________________________ Xarray case _____________________________
    if isinstance(da, xr.DataArray):
        # force using
        if trials is None:
            trials = da.dims[0]
        # get trials, roi and times
        if isinstance(trials, str):
            trials = da[trials].data
        if isinstance(roi, str):
            roi = da[roi].data
        if isinstance(times, str):
            times = da[times].data
        attrs = {**attrs, **da.attrs}
        da = da.data

    # _____________________________ Empty inputs ______________________________
    if roi is None:
        roi = [f"roi_{k}" for k in range(n_roi)]
    if times is None:
        times = np.arange(n_times)
    if trials is None:
        trials = np.arange(n_trials)

    # _______________________________ Final check _____________________________
    assert isinstance(da, np.ndarray)
    assert da.shape == (len(trials), len(roi), len(times))

    return da, trials, roi, times, attrs
Esempio n. 10
0
def ds_ephy_io(x,
               roi=None,
               y=None,
               z=None,
               times=None,
               sub_roi=None,
               verbose=None):
    """Manage inputs conversion for the DatasetEphy.

    This function is used to convert NumPy / MNE / Xarray inputs into a
    standardize NumPy version.

    Parameters
    ----------
    x : list
        List of length (n_subjects,). Each element of the list should either be
        an array of shape (n_epochs, n_channels, n_times), mne.Epochs,
        mne.EpochsArray, mne.EpochsTFR (i.e. non-averaged power) or DataArray
    roi : list | None
        List of length (n_subjects,) of roi names of length (n_channels)
    y, z : list | None
        List for the regressors. Each element should be an array of shape
        (n_epochs)
    sub_roi : list | None
        List of sub_roi names
    times : array_like | None
        Time vector

    Returns
    -------
    x : list
        List of data array of shape (n_epochs, n_channels, n_times)
    y, z : list
        List of arrays of shape (n_epochs,)
    roi : list
        List of arrays of shape (n_channels,)
    times : array_like
        Time vector of shape (n_times,)
    sub_roi : array_like
        List of arrays of shape (n_channels,)
    """
    set_log_level(verbose)
    # -------------------------------------------------------------------------
    # data type detection and switch
    # -------------------------------------------------------------------------
    assert isinstance(x, list), ("x input should be a list of elements of "
                                 "length (n_subjects,)")
    assert all([type(x[k]) == type(x[0]) for k in range(len(x))
                ]), ("All elements in the `x` inputs are not the same type")

    # -------------------------------------------------------------------------
    # conversion to array according to datatype
    # -------------------------------------------------------------------------
    if 'numpy' in str(type(x[0])):
        logger.info("    NumPy inputs detected")
    elif 'mne' in str(type(x[0])):
        logger.info("    Converting mne inputs")
        x, times, roi = mne_to_arr(x, roi=roi)
    elif 'xarray' in str(type(x[0])):
        logger.info("    Converting xarray inputs")
        x, roi, y, z, times, sub_roi = xr_to_arr(x,
                                                 roi=roi,
                                                 y=y,
                                                 z=z,
                                                 times=times,
                                                 sub_roi=sub_roi)

    # -------------------------------------------------------------------------
    # manage none inputs
    # -------------------------------------------------------------------------
    # empty roi
    if not isinstance(roi, list):
        logger.warning("No roi have been provided. A default will be used "
                       "instead. You should use the `roi` input instead")
        roi = []
        for k in range(len(x)):
            roi += [np.array([f"roi_{i}" for i in range(x[k].shape[1])])]
    # empty time vector
    if not isinstance(times, np.ndarray):
        logger.warning("No time vector found. A default will be used instead."
                       " You should use the `times` input instead")
        times = np.arange(x[0].shape[-1])

    # -------------------------------------------------------------------------
    # shape and types checking before returning
    # -------------------------------------------------------------------------
    # spatio-temporal conversion
    roi = [np.asarray(roi[k]) for k in range(len(roi))]
    times = times.astype(np.float32)
    # data checking
    assert all([
        k.ndim in [3, 4] for k in x
    ]), ("data should either contains 3d arrays (n_trials, n_channels, n_pts) "
         "or 4d arrays (n_trials, n_channels, n_freqs, n_pts)")
    x_sh = [x[k].shape for k in range(len(x))]
    is_sh_roi = [x_sh[k][1] == len(roi[k]) for k in range(len(x))]
    is_sh_times = [x_sh[k][-1] == len(times) for k in range(len(x))]
    assert all(is_sh_roi), "Inconsistent number of ROI"
    assert all(is_sh_times), "Inconsistent number of time points"
    assert all([list(x_sh[0])[1:] == list(x_sh[k])[1:]] for k in range(len(x)))
    if isinstance(y, list):
        y = [np.asarray(y[k]) for k in range(len(y))]
        assert len(y) == len(x), "length of y shoud be (n_subjects,)"
        assert [
            x_sh[k][0] == len(y[k]) for k in range(len(x))
        ], ("Each element of the y input should have a length of (n_epochs,)")
    if isinstance(z, list):
        z = [np.asarray(z[k]) for k in range(len(z))]
        assert len(z) == len(x), "length of z shoud be (n_subjects,)"
        assert [
            x_sh[k][0] == len(z[k]) for k in range(len(x))
        ], ("Each element of the z input should have a length of (n_epochs,)")
    if isinstance(sub_roi, list):
        assert all([k.shape == i.shape for k, i in zip(roi, sub_roi)])

    # -------------------------------------------------------------------------
    # categorical sub roi
    # -------------------------------------------------------------------------
    # concatenate everything and get unique elements
    if isinstance(sub_roi, list):
        import pandas as pd
        logger.info("    Replacing sub roi by categorical integers")

        # get unique sub roi and build replacement dict
        sub_roi_cat = np.r_[tuple([k.squeeze() for k in sub_roi])]
        sub_roi_u = np.unique(sub_roi_cat, return_index=True)
        repl = {k: v for k, v in zip(*sub_roi_u)}

        # replace for each subject
        sub_roi_int = []
        for _sub in sub_roi:
            sub_int = np.array(list(pd.Series(_sub).replace(repl, regex=True)))
            sub_roi_int += [sub_int]
    else:
        sub_roi_int = None

    return x, y, z, roi, times, sub_roi_int
Esempio n. 11
0
def conn_get_pairs(roi, directed=False, nb_min_suj=-np.inf, verbose=None):
    """Get possible connectivity pairs for multiple subjects.

    This function returns a DataFrame that contains all of the necessary
    informations for managing pairs of brain regions across many subjects.

    Parameters
    ----------
    roi : list
        List where each item in this list is an array descriving the brain
        region names of a single subject.
    directed : bool | False
        Specify whether the the returned pairs should be for directed (True)
        or undirected (default : False) connectivity.
    nb_min_suj : int | -np.inf
        Specify whether the pairs should be represented by a minimum number of
        subjects.

    Returns
    -------
    df_conn : pd.DataFrame
        A Pandas DataFrame that describes the connectivity informations at the
        group level. The table contains the following entries :

            * 'sources' / 'targets' : respectively, the source and target names
            * 'subjects' : list of subjects per pair of brain regions
            * '#subjects' : number of subjects per pair of brain regions
            * 'names' : name of each pair. If undirected, the names are going
              to be like 'roi_0-roi_1' or 'roi_0->roi_1' if directed
            * 'keep' : booleans indicating whether the number of subjects per
              pair of brain regions is over nb_min_suj
    df_suj : pd.DataFrame
        A Pandas DataFrame that describes the connectivity information per
        subject. The table contains the following entries :

            * 'subjects' : subject number
            * 'keep_roi' / 'drop_roi' : the brain regions respectively to keep
              and to remove to fit the input parameters nb_min_suj
            * 'keep_suj' : boolean describing if the subject should be dropped
              or conserved
            * 'conn' : the 2D boolean connectivity array per subject
    """
    set_log_level(verbose)
    assert isinstance(roi, list)
    n_subjects = len(roi)
    roi = [np.asarray(k) for k in roi]

    # =========================== Conn info per pair ==========================

    s_ss, t_ss, ss = [], [], []
    for k in range(n_subjects):
        # get the unique list of unsorted list of brain regions
        u_roi = nonsorted_unique(roi[k], assert_unique=True)
        n_u_roi = len(u_roi)
        # get all possible pairs
        if directed:
            pairs = np.where(~np.eye(n_u_roi, dtype=bool))
        else:
            pairs = np.triu_indices(n_u_roi, k=1)
        s_names, t_names = u_roi[pairs[0]], u_roi[pairs[1]]
        # if not directed, merge '0-1' and '1-0'
        if not directed:
            st_names = np.c_[s_names, t_names]
            s_names, t_names = np.unique(np.sort(st_names, axis=1), axis=0).T
        # keep single-subject source and target names
        s_ss += [s_names]
        t_ss += [t_names]
        ss += [k] * len(s_names)
    # fill info in a dataframe
    df_ss = pd.DataFrame({
        'subjects': ss,
        'sources': np.concatenate(s_ss),
        'targets': np.concatenate(t_ss)
    })

    # get the number of subjects per pair
    pattern = '->' if directed else '-'
    gp = df_ss.groupby(['sources', 'targets'])
    fcn = lambda df: len(np.unique(df))
    df_conn = gp.subjects.aggregate([list]).reset_index()
    df_conn = df_conn.rename(columns={'list': 'subjects'})
    df_conn['#subjects'] = [len(k) for k in df_conn['subjects']]
    df_conn['names'] = [
        f"{k}{pattern}{i}"
        for k, i in zip(df_conn['sources'], df_conn['targets'])
    ]
    df_conn['keep'] = df_conn['#subjects'] >= nb_min_suj

    # print the info
    n_remain = np.sum(list(df_conn['keep']))
    n_drop = np.sum(list(~df_conn['keep']))
    logger.info(f"    {n_remain} remaining pairs of brain regions "
                f"(nb_min_suj={nb_min_suj}), {n_drop} dropped")

    # ========================= Conn info per subject =========================

    # build 2d connectivity array per subject
    conn = {}
    for n_s in range(n_subjects):
        n_roi_s = len(roi[n_s])
        _conn = xr.DataArray(~np.eye(n_roi_s, dtype=bool),
                             dims=('sources', 'targets'),
                             coords=(roi[n_s], roi[n_s]))
        conn[n_s] = _conn

    # fill the information
    for k in range(len(df_conn)):
        _df = df_conn.iloc[k, :]
        for s in _df['subjects']:
            _s, _t, _k = _df['sources'], _df['targets'], bool(_df['keep'])
            conn[s].loc[dict(sources=_s, targets=_t)] = _k
            if not directed:
                conn[s].loc[dict(sources=_t, targets=_s)] = _k

    # get the brain regions to keep / drop per subject
    suj, roi_keep, roi_drop, conn_tot = [], [], [], []
    for s in range(n_subjects):
        _keep = roi[s][np.union1d(*np.where(conn[s]))]
        _drop = np.setdiff1d(roi[s], _keep)
        suj += [s]
        roi_keep += [_keep.tolist()]
        roi_drop += [_drop.tolist()]
        conn_tot += [conn[s].data]
    # create the final dataframe
    df_suj = pd.DataFrame({
        'subjects': suj,
        'keep_roi': roi_keep,
        'drop_roi': roi_drop
    })  # , 'conn': conn_tot
    df_suj['keep_suj'] = [len(k) > 1 for k in df_suj['keep_roi']]

    return df_conn, df_suj
Esempio n. 12
0
def define_windows(times,
                   windows=None,
                   slwin_len=None,
                   slwin_start=None,
                   slwin_stop=None,
                   slwin_step=None,
                   verbose=None):
    """Define temporal windows.

    This function can be used to either manually define temporal windows either
    automatic sliding windows. Note that every input parameters should be in
    the time domain (e.g seconds or milliseconds).

    Parameters
    ----------
    times : array_like
        Time vector
    windows : array_like | None
        Manual windows (e.g (.1, .2) or [(.1, .2), (.4, .5)]).
    slwin_len : float | None
        Length of each sliding (e.g .2 produces 200ms window length).
    slwin_start : float | None
        Time point for starting sliding windows (e.g 0.1). If None, sliding
        windows will start from the first time point.
    slwin_stop : float | None
        Time point for ending sliding windows (e.g 1.5). If None, sliding
        windows will finish at the last time point.
    slwin_step : float | None
        Temporal step between each temporal window (e.g .1 means that each
        consecutive windows are going to be separated by 100ms). This parameter
        can be used to define either overlapping or non-overlapping windows. If
        None, slwin_step is going to be set to slwin_step in order to produce
        consecutive non-overlapping windows.

    Returns
    -------
    win_sample : array_like
        Array of shape (n_windows, 2) of temporal indexes defining where each
        window (start, finish)
    mean_time : array_like
        Mean time vector inside each defined window of shape (n_windows,)

    See also
    --------
    plot_windows
    """
    set_log_level(verbose)
    assert isinstance(times, np.ndarray)
    logger.info("Defining temporal windows")
    stamp = times[1] - times[0]

    # -------------------------------------------------------------------------
    # build windows
    if (windows is None) and (slwin_len is None):
        logger.info("    No input detected. Full time window is used")
        win_time = np.array([[times[0], times[-1]]])
    elif windows is not None:
        logger.info("    Manual definition of windows")
        win_time = np.atleast_2d(windows)
    elif slwin_len is not None:
        # manage empty inputs
        if slwin_start is None: slwin_start = times[0]  # noqa
        if slwin_stop is None: slwin_stop = times[-1]  # noqa
        if slwin_step is None: slwin_step = slwin_len + stamp  # noqa
        logger.info(f"    Definition of sliding windows (len={slwin_len}, "
                    f"start={slwin_start}, stop={slwin_stop}, "
                    f"step={slwin_step})")
        # build the sliding windows
        sl_start = np.arange(slwin_start, slwin_stop - slwin_len, slwin_step)
        sl_stop = np.arange(slwin_start + slwin_len, slwin_stop, slwin_step)
        if len(sl_start) != len(sl_stop):
            min_len = min(len(sl_start), len(sl_stop))
            sl_start, sl_stop = sl_start[0:min_len], sl_stop[0:min_len]
        win_time = np.c_[sl_start, sl_stop]
    assert (win_time.ndim == 2) and (win_time.shape[1] == 2)

    # -------------------------------------------------------------------------
    # time to sample conversion
    win_sample = np.zeros_like(win_time, dtype=int)
    times = times.reshape(-1, 1)
    for n_k, k in enumerate(win_time):
        win_sample[n_k, :] = np.argmin(np.abs(times - k), axis=0)
    logger.info(f"    {win_sample.shape[0]} windows defined")

    return win_sample, win_time.mean(1)
Esempio n. 13
0
    def get_params(self, *params):
        """Get formatted parameters.

        This method can be used to get internal arrays formatted as xarray
        DataArray.

        Parameters
        ----------
        params : string
            Internal array names to get as xarray DataArray. You can use :

                * 'tvalues' : DataArray of t-values of shape (n_times, n_roi).
                  Only possible with RFX inferences
                * 'mi_ss' : DataArray of single subject mutual-information of
                  shape (n_subjects, n_times, n_roi)
                * 'perm_ss' : DataArray of computed permutations of shape
                  (n_perm, n_subjects, n_times, n_roi)
                * 'perm_' : DataArray of maximum computed permutations of
                  shape (n_perm,)
        """
        # get coordinates
        times, roi, df_rs = self._times, self._roi, self._df_rs
        if self._inference == 'ffx':
            suj = [np.array([-1])] * len(roi)
        elif self._inference == 'rfx':
            suj = [np.array(df_rs.loc[r, 'subjects']) for r in roi]
        n_perm = self._mi_p[0].shape[0]
        perm = np.arange(n_perm)
        # loop over possible outputs
        outs = []
        for param in params:
            assert isinstance(param, str)
            logger.info(f'    Formatting array {param}')
            if param == 'tvalues':
                da = self._tvalues
            elif param == 'mi_ss':
                mi = dict()
                for n_r, r in enumerate(roi):
                    mi[r] = xr.DataArray(self._mi[n_r],
                                         coords=(suj[n_r], times),
                                         dims=('subjects', 'times'))
                da = xr.Dataset(mi).to_array('roi')
                da = da.transpose('subjects', 'times', 'roi')
            elif param == 'perm_ss':
                mi = dict()
                for n_r, r in enumerate(roi):
                    mi[r] = xr.DataArray(self._mi_p[n_r],
                                         dims=('perm', 'subjects', 'times'),
                                         coords=(perm, suj[n_r], times))
                da = xr.Dataset(mi).to_array('roi')
                da = da.transpose('perm', 'subjects', 'times', 'roi')
            elif param == 'perm_':
                mi_p = np.r_[tuple([k.ravel() for k in self._mi_p])]
                mi_p.sort()
                da = xr.DataArray(mi_p[-n_perm:],
                                  dims=('perm', ),
                                  coords=(perm, ))
            else:
                raise ValueError(f"Parameter {param} not found")
            # add workflow attributes
            self.attrs.wrap_xr(da, name=param)
            outs += [da]

        return tuple(outs)
Esempio n. 14
0
    def _node_compute_mi(self, dataset, n_perm, n_jobs, random_state):
        """Compute mi and permuted mi.

        Permutations are performed by randomizing the regressor variable. For
        the fixed effect, this randomization is performed across subjects. For
        the random effect, the randomization is performed per subject.
        """
        # get the function for computing mi
        mi_fun = self.estimator.get_function()
        # get x, y, z and subject names per roi
        if dataset._mi_type != self._mi_type:
            assert TypeError(f"Your dataset doesn't allow to compute the mi "
                             f"{self._mi_type}. Allowed mi is "
                             f"{dataset._mi_type}")
        # get data variables
        n_roi, inf = len(self._roi), self._inference
        # evaluate true mi
        logger.info(f"    Evaluate true and permuted mi (n_perm={n_perm}, "
                    f"n_jobs={n_jobs})")
        # parallel function for computing permutations
        parallel, p_fun = parallel_func(mi_fun, n_jobs=n_jobs, verbose=False)
        pbar = ProgressBar(range(n_roi), mesg='Estimating MI')
        # evaluate permuted mi
        mi, mi_p = [], []
        for r in range(n_roi):
            # get the data of selected roi
            da = dataset.get_roi_data(self._roi[r],
                                      copnorm=self._copnorm,
                                      mi_type=self._mi_type,
                                      gcrn_per_suj=self._gcrn)
            x, y, suj = da.data, da['y'].data, da['subject'].data
            kw_mi = dict()
            # cmi and categorical MI
            if 'z' in list(da.coords):
                kw_mi['z'] = da['z'].data
            if self._inference == 'rfx':
                kw_mi['categories'] = suj

            # compute the true mi
            _mi = mi_fun(x, y, **kw_mi)
            # get the randomize version of y
            y_p = permute_mi_vector(y,
                                    suj,
                                    mi_type=self._mi_type,
                                    inference=self._inference,
                                    n_perm=n_perm)
            # run permutations using the randomize regressor
            _mi_p = parallel(p_fun(x, y_p[p], **kw_mi) for p in range(n_perm))
            _mi_p = np.asarray(_mi_p)

            # kernel smoothing
            if isinstance(self._kernel, np.ndarray):
                _mi = kernel_smoothing(_mi, self._kernel, axis=-1)
                _mi_p = kernel_smoothing(_mi_p, self._kernel, axis=-1)

            mi += [_mi]
            mi_p += [_mi_p]
            pbar.update_with_increment_value(1)

        self._mi, self._mi_p = mi, mi_p

        return mi, mi_p
Esempio n. 15
0
    def fit(self,
            dataset,
            mcp='cluster',
            n_perm=1000,
            cluster_th=None,
            cluster_alpha=0.05,
            n_bins=None,
            n_jobs=-1,
            random_state=None,
            **kw_stats):
        """Run the workflow on a dataset.

        In order to run the worflow, you must first provide a dataset instance
        (see :class:`frites.dataset.DatasetEphy`)

        .. warning::

            When performing statistics at the cluster-level, we only test
            the cluster size. This means that in your results, you can only
            discuss about the presence of a significant cluster without being
            precise about its spatio-temporal properties
            (see :cite:`sassenhagen2019cluster`)

        Parameters
        ----------
        dataset : :class:`frites.dataset.DatasetEphy`
            A dataset instance
        mcp : {'cluster', 'maxstat', 'fdr', 'bonferroni', 'nostat', None}
            Method to use for correcting p-values for the multiple comparison
            problem. Use either :
                
                * 'cluster' : cluster-based statistics [default]
                * 'maxstat' : test-wise maximum statistics correction
                * 'fdr' : test-wise FDR correction
                * 'bonferroni' : test-wise Bonferroni correction
                * 'noperm' / None : no permutations are computed
        n_perm : int | 1000
            Number of permutations to perform in order to estimate the random
            distribution of mi that can be obtained by chance
        cluster_th : str, float | None
            The threshold to use for forming clusters. Use either :

                * a float that is going to act as a threshold
                * None and the threshold is automatically going to be inferred
                  using the distribution of permutations
                * 'tfce' : for Threshold Free Cluster Enhancement
        cluster_alpha : float | 0.05
            Control the percentile to use for forming the clusters. By default
            the 95th percentile of the permutations is used.
        n_bins : int | None
            Number of bins to use if the method for computing the mutual
            information is based on binning (mi_method='bin'). If None, the
            number of bins is going to be automatically inferred based on the
            number of trials and variables
        n_jobs : int | -1
            Number of jobs to use for parallel computing (use -1 to use all
            jobs)
        random_state : int | None
            Fix the random state of the machine (use it for reproducibility).
            If None, a random state is randomly assigned.
        kw_stats : dict | {}
            Additional arguments to pass to the selected statistical method
            selected using the `stat_method` input parameter

        Returns
        -------
        mi, pvalues : array_like
            DataArray of mean mutual information and p-values.

        References
        ----------
        Maris and Oostenveld, 2007 :cite:`maris2007nonparametric`
        """
        # ---------------------------------------------------------------------
        # prepare variables
        # ---------------------------------------------------------------------
        # don't compute permutations if mcp is either nostat / None
        if mcp in ['noperm', None]:
            n_perm = 0
        # infer the number of bins if needed
        if (self._mi_method == 'bin') and not isinstance(n_bins, int):
            n_bins = 4
            logger.info(f"    Use an automatic number of bins of {n_bins}")
        self._n_bins = n_bins

        # ---------------------------------------------------------------------
        # compute connectivity
        # ---------------------------------------------------------------------
        # if mi and mi_p have already been computed, reuse it instead
        if len(self._mi) and len(self._mi_p):
            logger.info("    True and permuted mutual-information already "
                        "computed. Use WfComod.clean to reset "
                        "arguments")
            mi, mi_p = self._mi, self._mi_p
        else:
            self._node_prepare_data(dataset)
            mi, mi_p = self._node_compute_mi(dataset, self._n_bins, n_perm,
                                             n_jobs, random_state)

        # ---------------------------------------------------------------------
        # compute statistics
        # ---------------------------------------------------------------------
        # infer p-values and t-values
        pvalues, tvalues = self._wf_stats.fit(mi,
                                              mi_p,
                                              mcp=mcp,
                                              cluster_th=cluster_th,
                                              tail=1,
                                              cluster_alpha=cluster_alpha,
                                              inference=self._inference,
                                              **kw_stats)
        # update internal config
        self.update_cfg(n_perm=n_perm,
                        random_state=random_state,
                        n_bins=n_bins,
                        **self._wf_stats.cfg)

        # ---------------------------------------------------------------------
        # post-processing
        # ---------------------------------------------------------------------
        logger.info(f"    Formatting outputs")
        args = (self._times, dataset.roi_names, self.pairs[0], self.pairs[1],
                'dataarray')
        if isinstance(tvalues, np.ndarray):
            self._tvalues = convert_dfc_outputs(tvalues, *args)
        pvalues = convert_dfc_outputs(pvalues, is_pvalue=True, *args)
        if self._inference == 'rfx':
            mi = np.stack([k.mean(axis=0) for k in mi]).T  # mean mi
        elif self._inference == 'ffx':
            mi = np.concatenate(mi, axis=0).T  # mi
        mi = convert_dfc_outputs(mi, *args)
        # converting outputs
        mi = self._attrs_xarray(mi, da_type='mi')
        pvalues = self._attrs_xarray(pvalues, da_type='pvalues')

        return mi, pvalues
Esempio n. 16
0
def conn_dfc(data,
             win_sample=None,
             times=None,
             roi=None,
             n_jobs=1,
             gcrn=True,
             verbose=None):
    """Single trial Dynamic Functional Connectivity.

    This function computes the Dynamic Functional Connectivity (DFC) using the
    Gaussian Copula Mutual Information (GCMI). The DFC is computed across time
    points for each trial. Note that the DFC can either be computed on windows
    manually defined or on sliding windows.

    Parameters
    ----------
    data : array_like
        Electrophysiological data. Several input types are supported :

            * Standard NumPy arrays of shape (n_epochs, n_roi, n_times)
            * mne.Epochs
            * xarray.DataArray of shape (n_epochs, n_roi, n_times)

    win_sample : array_like | None
        Array of shape (n_windows, 2) describing where each window start and
        finish. You can use the function :func:`frites.conn.define_windows`
        to define either manually either sliding windows. If None, the entire
        time window is used instead.
    times : array_like | None
        Time vector array of shape (n_times,). If the input is an xarray, the
        name of the time dimension can be provided
    roi : array_like | None
        ROI names of a single subject. If the input is an xarray, the
        name of the ROI dimension can be provided
    n_jobs : int | 1
        Number of jobs to use for parallel computing (use -1 to use all
        jobs). The parallel loop is set at the pair level.
    gcrn : bool | True
        Specify if the Gaussian Copula Rank Normalization should be applied.
        If the data are normalized (e.g z-score) this parameter can be set to
        False because the data can be considered as gaussian over time.

    Returns
    -------
    dfc : array_like
        The DFC array of shape (n_epochs, n_pairs, n_windows)

    See also
    --------
    define_windows, conn_covgc
    """
    set_log_level(verbose)
    # -------------------------------------------------------------------------
    # inputs conversion and data checking
    set_log_level(verbose)
    if isinstance(data, xr.DataArray):
        trials, attrs = data[data.dims[0]].data, data.attrs
    else:
        trials, attrs = np.arange(data.shape[0]), {}
    # internal conversion
    data = SubjectEphy(data, y=trials, roi=roi, times=times)
    x, roi, times = data.data, data['roi'].data, data['times'].data
    trials = data['y'].data
    n_trials = len(trials)
    # deal with the win_sample array
    if win_sample is None:
        win_sample = np.array([[0, len(times) - 1]])
    assert isinstance(win_sample, np.ndarray) and (win_sample.ndim == 2)
    assert win_sample.dtype in CONFIG['INT_DTYPE']
    n_win = win_sample.shape[0]

    # -------------------------------------------------------------------------
    # find group of brain regions
    gp = pd.DataFrame({'roi': roi}).groupby('roi').groups
    roi_gp, roi_idx = list(gp.keys()), list(gp.values())
    n_roi = len(roi_gp)
    x_s, x_t = np.triu_indices(n_roi, k=1)
    n_pairs = len(x_s)
    pairs = np.c_[x_s, x_t]
    roi_p = [f"{roi_gp[s]}-{roi_gp[t]}" for s, t in zip(x_s, x_t)]

    # -------------------------------------------------------------------------
    # prepare outputs and elements
    n_jobs = 1 if n_win == 1 else n_jobs
    parallel, p_fun = parallel_func(_conn_dfc,
                                    n_jobs=n_jobs,
                                    verbose=verbose,
                                    total=n_win,
                                    mesg='Estimating DFC')

    logger.info(f'Computing DFC between {n_pairs} pairs (gcrn={gcrn})')
    dfc = np.zeros((n_trials, n_pairs, n_win), dtype=np.float64)

    # -------------------------------------------------------------------------
    # compute distance correlation

    dfc = parallel(
        p_fun(x[:, :, w[0]:w[1]], x_s, x_t, roi_idx, gcrn) for w in win_sample)
    dfc = np.stack(dfc, 2)

    # -------------------------------------------------------------------------
    # dataarray conversion
    win_times = times[win_sample]
    dfc = xr.DataArray(dfc,
                       dims=('trials', 'roi', 'times'),
                       name='dfc',
                       coords=(trials, roi_p, win_times.mean(1)))
    # add the windows used in the attributes
    cfg = dict(win_sample=np.r_[tuple(win_sample)],
               win_times=np.r_[tuple(win_times)],
               type='dfc')
    dfc.attrs = {**cfg, **attrs}

    return dfc
Esempio n. 17
0
    def fit(self,
            dataset,
            mcp='cluster',
            n_perm=1000,
            cluster_th=None,
            cluster_alpha=0.05,
            n_jobs=-1,
            random_state=None,
            **kw_stats):
        """Run the workflow on a dataset.

        In order to run the worflow, you must first provide a dataset instance
        (see :class:`frites.dataset.DatasetEphy`)

        .. warning::

            When performing statistics at the cluster-level, we only test
            the cluster size. This means that in your results, you can only
            discuss about the presence of a significant cluster without being
            precise about its spatio-temporal properties
            (see :cite:`sassenhagen2019cluster`)

        Parameters
        ----------
        dataset : :class:`frites.dataset.DatasetEphy`
            A dataset instance
        mcp : {'cluster', 'maxstat', 'fdr', 'bonferroni', 'nostat', None}
            Method to use for correcting p-values for the multiple comparison
            problem. Use either :

                * 'cluster' : cluster-based statistics [default]
                * 'maxstat' : test-wise maximum statistics correction
                * 'fdr' : test-wise FDR correction
                * 'bonferroni' : test-wise Bonferroni correction
                * 'nostat' : permutations are computed but no statistics are
                  performed
                * 'noperm' / None : no permutations are computed
        n_perm : int | 1000
            Number of permutations to perform in order to estimate the random
            distribution of mi that can be obtained by chance
        cluster_th : str, float | None
            The threshold to use for forming clusters. Use either :

                * a float that is going to act as a threshold
                * None and the threshold is automatically going to be inferred
                  using the distribution of permutations
                * 'tfce' : for Threshold Free Cluster Enhancement
        cluster_alpha : float | 0.05
            Control the percentile to use for forming the clusters. By default
            the 95th percentile of the permutations is used.
        n_jobs : int | -1
            Number of jobs to use for parallel computing (use -1 to use all
            jobs)
        random_state : int | None
            Fix the random state of the machine (use it for reproducibility).
            If None, a random state is randomly assigned.
        kw_stats : dict | {}
            Additional arguments are sent to
            :py:class:`frites.workflow.WfStats.fit`

        Returns
        -------
        mi, pvalues : array_like
            DataArray of mutual information and p-values both of shapes
            (n_times, n_roi). If `inference` is 'ffx' the mi represents the MI
            computed across subjects while if it is 'rfx' it's the mean across
            subjects.

        References
        ----------
        Maris and Oostenveld, 2007 :cite:`maris2007nonparametric`
        """
        # ---------------------------------------------------------------------
        # prepare variables
        # ---------------------------------------------------------------------
        # don't compute permutations if mcp is either nostat / None
        if mcp in ['noperm', None]:
            n_perm = 0
        # get needed dataset's informations
        self._times, self._roi = dataset.times, dataset.roi_names
        self._mi_dims = dataset._mi_dims
        self._mi_coords = dict()
        for k in self._mi_dims:
            if k != 'roi':
                self._mi_coords[k] = dataset.x[0].coords[k].data
            else:
                self._mi_coords['roi'] = self._roi
        self._df_rs, self._n_subjects = dataset.df_rs, dataset._n_subjects

        # ---------------------------------------------------------------------
        # compute mutual information
        # ---------------------------------------------------------------------
        # if mi and mi_p have already been computed, reuse it instead
        if len(self._mi) and len(self._mi_p):
            logger.info("    True and permuted mutual-information already "
                        "computed. Use WfMi.clean to reset "
                        "arguments")
            mi, mi_p = self._mi, self._mi_p
        else:
            mi, mi_p = self._node_compute_mi(dataset, n_perm, n_jobs,
                                             random_state)
        """
        For information transfer (e.g FIT) we only need to compute the true and
        permuted mi but then, the statistics at the local representation mcp
        are discarded in favor of statistics on the information transfer
        """
        if mcp == 'nostat':
            logger.debug("Permutations computed. Stop there")
            return None

        # ---------------------------------------------------------------------
        # compute statistics
        # ---------------------------------------------------------------------
        # infer p-values and t-values
        pvalues, tvalues = self._wf_stats.fit(mi,
                                              mi_p,
                                              mcp=mcp,
                                              cluster_th=cluster_th,
                                              tail=1,
                                              cluster_alpha=cluster_alpha,
                                              inference=self._inference,
                                              **kw_stats)
        # update attributes
        self.attrs.update(self._wf_stats.attrs)
        self.attrs.update(dict(n_perm=n_perm, random_state=random_state))

        # ---------------------------------------------------------------------
        # postprocessing and conversions
        # ---------------------------------------------------------------------
        # tvalues conversion
        if isinstance(tvalues, np.ndarray):
            self._tvalues = self._xr_conversion(tvalues, 'tvalues')
        # mean mi across subjects
        if self._inference == 'rfx':
            logger.info("    Mean mi across subjects")
            mi = [k.mean(axis=0, keepdims=True) for k in mi]
        mi = np.moveaxis(np.concatenate(mi, axis=0), 0, -1)
        # dataarray conversion
        mi = self._xr_conversion(mi, 'mi')
        pv = self._xr_conversion(pvalues, 'pvalues')

        return mi, pv
Esempio n. 18
0
    def fit(self, ar_type='hga', sf=200, n_times=300, n_epochs=100, dt=50,
            n_stim=3, n_std=3, stim_onset=100, random_state=None):
        """Get the data generated by the selected model.

        Parameters
        ----------
        ar_type : {'hga', 'osc_20', 'osc_40', 'ding_2', 'ding_3', 'ding_5'}
            Autoregressive model type. Choose either :

                * 'hga' : for evoked high-gamma activity
                * 'osc_20' / 'osc_40' : for oscillations respectively around
                  20Hz and 40Hz
                * 'osc_40_3' : oscillations at 40hz for 3 nodes. This model
                  simulates X->Y, X->Z and instantaneous Y.Z
                * 'ding_2' / 'ding_3_direct' / 'ding_3_indirect' / 'ding_5' :
                  respectively the models with 2, 3 or 5 nodes described by
                  Ding et al. :cite:`ding2006granger`

        sf : float | 200
            The sampling frequency
        n_times : int | 300
            Number of time points
        n_epochs : int | 100
            Number of epochs
        dt : int | 50
            Width of the time-varying Gaussian stimulus
        n_stim : int | 3
            Number of stimulus to use
        n_std : float, int | 3
            Number of standard deviations the stimulus exceed the random noise.
            Should be an integer striclty over 1. Note that this concerns the
            first stimulus. For example, if n_std=3, the first stimulus is
            going to have a deviation 3 times larger than the noise, the second
            stimulus 6 times the noise, the third stimulus 9 times.
        stim_onset : int | 100
            Index where the time-varying Gaussian stimulus should start
        random_state : int | None
            Fix the random state of the machine for reproducibility

        Returns
        -------
        data : xarray.DataArray
            DataArray of shape (n_epochs * n_stim, n_roi, n_times)
        """
        assert isinstance(n_std, (int, float))
        times = np.arange(n_times) / sf - 0.5
        trials = np.arange(n_epochs)
        cval = np.arange(n_stim) + 1
        gval = np.arange(n_stim) + 1
        n_epochs_tot = int(n_epochs * n_stim)
        stim = np.repeat(np.arange(n_stim) + 1, n_epochs)
        if not isinstance(random_state, int):
            random_state = np.random.randint(10000)
        kw_noise = dict(size=(n_epochs_tot, n_times), loc=0,
                        random_state=random_state)

        if ar_type == 'hga':
            self._lab = 'Evoked HGA'
        elif ar_type == 'osc_20':
            self._lab = '20Hz oscillations'
        elif ar_type in ['osc_40', 'osc_40_3']:
            self._lab = '40Hz oscillations'
        elif 'ding' in ar_type:
            self._lab = f"Ding's {ar_type[-1]} nodes"

        logger.info(f"{self._lab} AR model (n_times={n_times}, "
                    f"n_epochs={n_epochs}, n_stim={n_stim}, "
                    f"random_state={random_state})")

        # ---------------------------------------------------------------------
        #                             GAUSSIAN STIM
        # ---------------------------------------------------------------------
        # generate time-varying Gaussian input to X
        gauss = stats.norm.pdf(np.linspace(-5, 5, dt + 1, endpoint=True), 0, 2)
        # normalise Gaussian profile between [0, 1]
        gauss -= gauss.min()
        gauss /= gauss.max()
        # full time gaussian stim
        gauss_stim = np.zeros((n_times,), dtype=float)
        gauss_stim[stim_onset - 1:stim_onset + len(gauss) - 1] = gauss

        # ---------------------------------------------------------------------
        #                            COUPLING STRENGTH
        # ---------------------------------------------------------------------
        c = np.repeat(cval, n_epochs)
        c = c.reshape(-1, 1) * gauss_stim.reshape(1, -1)
        causal = np.array(cval).reshape(-1, 1) * gauss_stim.reshape(1, -1)

        # ---------------------------------------------------------------------
        #                        AUTOREGRESSIVE MODEL
        # ---------------------------------------------------------------------

        if ar_type in ['hga', 'osc_20', 'osc_40']:
            # _____________________________ NOISE _____________________________
            # white noise with zero mean and unit variance
            n1, n2 = self._generate_noise(var=[.05, .05], **kw_noise)

            # _____________________________ GAIN ______________________________
            if ar_type == 'hga':
                # generate the array of gain
                g = np.repeat(gval, n_epochs)
                g = g.reshape(-1, 1) * gauss_stim.reshape(1, -1)
                # modulates gain according to n_std
                g = self._n_std_gain(g, n1, n_std)
                # for hga, there's no need to have an additional modulation
                c = np.ones_like(c)
            else:
                g = np.zeros((n_epochs_tot, n_times), dtype=float)

            # ________________________ N_STD COUPLING _________________________
            c2 = self._n_std_gain(c, n2, n_std)

            # _______________________ POLY COEFFICIENTS________________________
            if ar_type == 'osc_40':    # bivariate data oscillating at 40Hz
                a1 = [.55, -.8]
                a2 = [.35, -.5]
                a12 = [.5, 0.]
            elif ar_type == 'osc_20':  # bivariate data oscillating at ~20Hz
                a1 = [0, .05, .05, 0, -.3, -.3]
                a2 = [0, 0, 0, 0, -.3, -0.3]
                a12 = [0, 0, .5, .5, 0, 0]
            elif ar_type == 'hga':     # Evoked High-Gamma Activity (order 5)
                a1 = [.3]
                a2 = [.3]
                a12 = [0, 0, 0, .5, .5]

            # ______________________________ AR _______________________________
            # generate AR model with feature-specific causal connectivity (fCC)
            order = np.max([len(a1), len(a2), len(a12)])
            x, y = n1, n2
            for t in range(order, n_times):
                # past indexing
                _sl_a1 = np.arange(t - 1, t - len(a1) - 1, -1)
                _sl_a2 = np.arange(t - 1, t - len(a2) - 1, -1)
                _sl_a12 = np.arange(t - 1, t - len(a12) - 1, -1)
                # AR core
                # - x1 = noise + gain + a1 * past_x1
                # - x2 = noise + a2 * past_x2 + coupling * past_x1
                x[:, t] = n1[:, t] + g[:, t] + (x[:, _sl_a1] @ a1)
                y[:, t] = n2[:, t] + (y[:, _sl_a2] @ a2) + c2[:, t] * (
                    x[:, _sl_a12] @ a12)
            # concatenate everything
            dat, roi = np.stack((x, y), axis=1), ['x', 'y']
        elif ar_type == 'osc_40_3':
            n1, n2, n3 = self._generate_noise(var=[.05] * 3, **kw_noise)
            c2 = self._n_std_gain(c, n2, n_std)
            c3 = self._n_std_gain(c, n3, n_std)

            x, y, z = n1, n2, n3
            for t in range(2, n_times):
                x[:, t] = n1[:, t] + .55 * x[:, t - 1] - .8 * x[:, t - 2]
                y[:, t] = n2[:, t] + .35 * y[:, t - 1] - .5 * y[:, t - 2] + (
                    c2[:, t] * (.5 * x[:, t - 1]))
                z[:, t] = n3[:, t] + .35 * z[:, t - 1] - .5 * z[:, t - 2] + (
                    c3[:, t] * (.5 * x[:, t - 1]))
            dat, roi = np.stack((x, y, z), axis=1), ['x', 'y', 'z']
        elif ar_type == 'ding_2':
            n1, n2 = self._generate_noise(var=[1., .7], **kw_noise)
            c2 = self._n_std_gain(c, n2, n_std)

            x, y = n1, n2
            for t in range(2, n_times):
                x[:, t] = .9 * x[:, t - 1] - .5 * x[:, t - 2] + n1[:, t]
                y[:, t] = .8 * y[:, t - 1] - .5 * y[:, t - 2] + c2[:, t] * (
                    .16 * x[:, t - 1] - .2 * x[:, t - 2]) + n2[:, t]
            dat, roi = np.stack((x, y), axis=1), ['x', 'y']
        elif ar_type in ['ding_3_direct', 'ding_3_indirect']:
            n1, n2, n3 = self._generate_noise(var=[.3, 1., .2], **kw_noise)
            c1 = self._n_std_gain(c, n1, n_std)
            c2 = self._n_std_gain(c, n2, n_std)
            c3 = self._n_std_gain(c, n3, n_std)

            x, y, z = n1, n2, n3
            for t in range(2, n_times):
                if ar_type == 'ding_3_indirect':
                    x[:, t] = .8 * x[:, t - 1] - .5 * x[:, t - 2] + c1[
                        :, t] * (.4 * z[:, t - 1]) + n1[:, t]
                elif ar_type == 'ding_3_direct':
                    x[:, t] = .8 * x[:, t - 1] - .5 * x[:, t - 2] + c1[
                        :, t] * (.4 * z[:, t - 1] + .2 * y[:, t - 2]) + n1[
                        :, t]
                y[:, t] = .9 * y[:, t - 1] - .8 * y[:, t - 2] + n2[:, t]
                z[:, t] = .5 * z[:, t - 1] - .2 * z[:, t - 2] + c3[:, t] * (
                    .5 * y[:, t - 1]) + n3[:, t]
            dat, roi = np.stack((x, y, z), axis=1), ['x', 'y', 'z']
            ar_type = 'ding_3'
        elif ar_type == 'ding_5':
            n1, n2, n3, n4, n5 = self._generate_noise(var=[.6, .5, .3, .3, .6],
                                                      **kw_noise)
            c2 = self._n_std_gain(c, n2, n_std)
            c3 = self._n_std_gain(c, n3, n_std)
            c4 = self._n_std_gain(c, n4, n_std)
            c5 = self._n_std_gain(c, n5, n_std)
            sq2 = np.sqrt(2.)

            x1, x2, x3, x4, x5 = n1, n2, n3, n4, n5
            for t in range(3, n_times):
                x1[:, t] = .95 * sq2 * x1[:, t - 1] - .9025 * x1[
                    :, t - 2] + n1[:, t]
                x2[:, t] = c2[:, t] * (.5 * x1[:, t - 2]) + n2[:, t]
                x3[:, t] = c3[:, t] * (-.4 * x1[:, t - 3]) + n3[:, t]
                x4[:, t] = c4[:, t] * (-.5 * x1[:, t - 2] + .25 * sq2 * x5[
                    :, t - 1]) + .25 * sq2 * x4[:, t - 1] + n4[:, t]
                x5[:, t] = c5[:, t] * (-.25 * sq2 * x4[:, t - 1]) + (
                    .25 * sq2 * x5[:, t - 1] + n5[:, t])
            dat = np.stack((x1, x2, x3, x4, x5), axis=1)
            roi = ['x1', 'x2', 'x3', 'x4', 'x5']

        # ---------------------------------------------------------------------
        #                         XARRAY CONVERSION
        # ---------------------------------------------------------------------
        ar = xr.DataArray(dat, dims=('trials', 'roi', 'times'),
                          coords=(stim, np.array(roi), times))
        # add attributes
        attr = dict(n_stim=n_stim, n_std=n_std, ar_type=ar_type, stimulus=cval)
        for k, v in attr.items(): ar.attrs[k] = v  # noqa
        # keep in object
        self._ar_type = ar_type
        self._causal = causal
        self._ar = ar
        self._sf = sf
        self._n_stim = n_stim
        self._n_std = n_std

        return ar
Esempio n. 19
0
def savgol_filter(x, h_freq, axis=None, sfreq=None, polyorder=5, verbose=None):
    """Filter the data using Savitzky-Golay polynomial method.

    This function is an adaptation of the mne-python one for xarray.DataArray.

    Parameters
    ----------
    x : array_like
        Multidimensional array or DataArray
    h_freq : float
        Approximate high cut-off frequency in Hz. Note that this is not an
        exact cutoff, since Savitzky-Golay filtering is done using
        polynomial fits instead of FIR/IIR filtering. This parameter is
        thus used to determine the length of the window
    axis : int, string | None
        Position of the time axis. Can either be an integer when `x` is a
        NumPy array or a string (e.g 'times') when using a DataArray
    polyorder : int | 5
        Polynomial order

    Returns
    -------
    x_filt : array_like
        Filtered data

    Notes
    -----
    For Savitzky-Golay low-pass approximation, see:
        https://gist.github.com/larsoner/bbac101d50176611136b

    See also
    --------
    kernel_smoothing
    """
    set_log_level(verbose)
    # inputs checking
    if isinstance(x, xr.DataArray):
        dims = list(x.dims)
        # get axis name
        if axis is None:
            axis = 'times'
        if isinstance(axis, str):
            axis = list(x.dims).index(axis)
        # get sfreq if possible
        if not isinstance(sfreq, (int, float)):
            assert 'times' in dims
            sfreq = 1. / (x['times'].data[1] - x['times'].data[0])
    assert isinstance(h_freq, (int, float))
    assert isinstance(axis, int)
    assert isinstance(sfreq, (int, float))
    if h_freq >= sfreq / 2.:
        raise ValueError('h_freq must be less than half the sample rate')

    # get window length
    window_length = (int(np.round(sfreq / h_freq)) // 2) * 2 + 1
    logger.info(f'    Using savgol length {window_length}')

    # apply savgol depending on input type
    kw = dict(axis=axis, polyorder=polyorder, window_length=window_length)
    if isinstance(x, xr.DataArray):
        x.data = savgol(x.data, **kw)
        return x
    else:
        return savgol(x, **kw)
Esempio n. 20
0
def conn_dfc(data,
             win_sample,
             times=None,
             roi=None,
             n_jobs=1,
             gcrn=True,
             verbose=None):
    """Single trial Dynamic Functional Connectivity.

    This function computes the Dynamic Functional Connectivity (DFC) using the
    Gaussian Copula Mutual Information (GCMI). The DFC is computed across time
    points for each trial. Note that the DFC can either be computed on windows
    manually defined or on sliding windows.

    Parameters
    ----------
    data : array_like
        Electrophysiological data array of a single subject organized as
        (n_epochs, n_roi, n_times)
    win_sample : array_like
        Array of shape (n_windows, 2) describing where each window start and
        finish. You can use the function :func:`frites.conn.define_windows`
        to define either manually either sliding windows.
    times : array_like | None
        Time vector array of shape (n_times,)
    roi : array_like | None
        ROI names of a single subject
    n_jobs : int | 1
        Number of jobs to use for parallel computing (use -1 to use all
        jobs). The parallel loop is set at the pair level.
    gcrn : bool | True
        Specify if the Gaussian Copula Rank Normalization should be applied.
        If the data are normalized (e.g z-score) this parameter can be set to
        False because the data can be considered as gaussian over time.

    Returns
    -------
    dfc : array_like
        The DFC array of shape (n_epochs, n_pairs, n_windows)

    See also
    --------
    define_windows, conn_covgc
    """
    set_log_level(verbose)
    # -------------------------------------------------------------------------
    # inputs conversion
    data, trials, roi, times, attrs = conn_io(data,
                                              roi=roi,
                                              times=times,
                                              verbose=verbose)

    # -------------------------------------------------------------------------
    # data checking
    n_epochs, n_roi, n_pts = data.shape
    assert (len(roi) == n_roi) and (len(times) == n_pts)
    assert isinstance(win_sample, np.ndarray) and (win_sample.ndim == 2)
    assert win_sample.dtype in CONFIG['INT_DTYPE']
    n_win = win_sample.shape[0]
    # get the non-directed pairs
    x_s, x_t = np.triu_indices(n_roi, k=1)
    n_pairs = len(x_s)
    pairs = np.c_[x_s, x_t]
    # build roi pairs names
    roi_p = [f"{roi[s]}-{roi[t]}" for s, t in zip(x_s, x_t)]

    # -------------------------------------------------------------------------
    # compute dfc
    logger.info(f'Computing DFC between {n_pairs} pairs (gcrn={gcrn})')
    # get the parallel function
    parallel, p_fun = parallel_func(mi_nd_gg,
                                    n_jobs=n_jobs,
                                    verbose=verbose,
                                    prefer='threads')
    pbar = ProgressBar(range(n_win), mesg='Estimating DFC')

    dfc = np.zeros((n_epochs, n_pairs, n_win), dtype=np.float32)
    with parallel as para:
        for n_w, w in enumerate(win_sample):
            # select the data in the window and copnorm across time points
            data_w = data[..., w[0]:w[1]]
            # apply gcrn over time
            if gcrn:
                data_w = copnorm_nd(data_w, axis=2)
            # compute mi between pairs
            _dfc = para(
                p_fun(data_w[:, [s], :], data_w[:,
                                                [t], :], **CONFIG["KW_GCMI"])
                for s, t in zip(x_s, x_t))
            dfc[..., n_w] = np.stack(_dfc, axis=1)
            pbar.update_with_increment_value(1)

    # -------------------------------------------------------------------------
    # dataarray conversion
    win_times = times[win_sample]
    dfc = xr.DataArray(dfc,
                       dims=('trials', 'roi', 'times'),
                       name='dfc',
                       coords=(trials, roi_p, win_times.mean(1)))
    # add the windows used in the attributes
    cfg = dict(win_sample=np.r_[tuple(win_sample)],
               win_times=np.r_[tuple(win_times)],
               type='dfc')
    dfc.attrs = {**cfg, **attrs}

    return dfc
Esempio n. 21
0
 def __init__(self, verbose=None):  # noqa
     WfBase.__init__(self)
     set_log_level(verbose)
     logger.info("Definition of a non-parametric statistical workflow")
Esempio n. 22
0
    def __init__(self,
                 x,
                 y=None,
                 z=None,
                 roi=None,
                 agg_ch=True,
                 times=None,
                 multivariate=False,
                 nb_min_suj=False,
                 attrs=None,
                 verbose=None):
        """Init."""
        set_log_level(verbose)
        self.attrs = Attributes(attrs=attrs)
        assert isinstance(x, (list, tuple))
        self._agg_ch = agg_ch
        self._multivariate = multivariate

        logger.info('Definition of an electrophysiological dataset')
        logger.info(f'    Dataset composed of {len(x)} subjects / sessions')

        # ========================== Multi-conditions =========================

        # remapping group y and z
        if isinstance(y, (list, tuple)):
            y = multi_to_uni_conditions(y, var_name='y', verbose=verbose)
        if isinstance(z, (list, tuple)):
            z = multi_to_uni_conditions(z, var_name='z', verbose=verbose)

        # ===================== Multi-subjects conversion =====================

        # force converting the data (latest task-related variables)
        n_subjects = len(x)
        y = [y] * n_subjects if not isinstance(y, list) else y
        z = [z] * n_subjects if not isinstance(z, list) else z
        roi = [roi] * n_subjects if not isinstance(roi, list) else roi
        for k in range(n_subjects):
            x[k] = SubjectEphy(x[k],
                               y=y[k],
                               z=z[k],
                               roi=roi[k],
                               agg_ch=True,
                               times=times,
                               multivariate=multivariate,
                               verbose=verbose)
        self._x = x

        # minimum number of subject / roi
        nb_min_suj = -np.inf if not isinstance(nb_min_suj, int) else nb_min_suj
        self._nb_min_suj = nb_min_suj
        logger.info(f"    At least {self._nb_min_suj} subjects / roi required")

        # merge attributes
        self.attrs.merge([k.attrs for k in self._x])
        self._y_dtype = self.attrs['y_dtype']
        self._z_dtype = self.attrs['z_dtype']
        self._mi_type = self.attrs['mi_type']
        mi_repr = self.attrs['mi_repr']
        logger.info(f"    Supported MI definition {mi_repr} ({self._mi_type})")

        # ===================== Additional dimensions  ========================

        # Subject dimension
        for n_k, k in enumerate(range(len(self._x))):
            self._x[k].name = f'subject_{n_k}'
            self._x[k] = self._x[k].assign_coords(
                subject=('trials', [n_k] * self._x[k].shape[0]))
        # channel aggregation
        if not agg_ch:
            # split into sections of unique intergers
            n_trials_s = [k.shape[1] for k in self._x]
            agg_ch_num = np.arange(np.sum(n_trials_s))
            agg_split = np.split(agg_ch_num, np.cumsum(n_trials_s)[0:-1])
            # add additional dimension
            for k in range(len(self._x)):
                self._x[k] = self._x[k].assign_coords(agg_ch=('roi',
                                                              agg_split[k]))
        # final mi dimension
        dims = list(self._x[0].dims)
        self._mi_dims = [k for k in dims if k not in ['trials', 'mv']]

        # ============================= Attributes ============================

        # update internals parameters
        self._update_internals()
        # # update internal attributes
        self.attrs.update({
            'nb_min_suj': nb_min_suj,
            'n_subjects': len(self._x),
            'agg_ch': agg_ch,
            'multivariate': multivariate,
            'dtype': "DatasetEphy",
            '__version__': frites.__version__
        })
Esempio n. 23
0
    def fit(self, effect, perms, inference='rfx', mcp='cluster', tail=1,
            cluster_th=None, cluster_alpha=0.05, ttested=False):
        """Fit the workflow on true data.

        Parameters
        ----------
        effect : list
            True effect list of length (n_roi,) composed of arrays each one of
            shape (n_subjects, ..., n_times). Number of subjects per ROI could
            be different
        perms : list
            Permutation list of length (n_roi,) composed of arrays each one of
            shape (n_perm, n_subjects, ..., n_times). Number of subjects per
            ROI could be different
        inference : {'ffx', 'rfx'}
            Perform either Fixed-effect ('ffx') or Random-effect ('rfx')
            inferences. By default, random-effect is used
        mcp : {'cluster', 'maxstat', 'fdr', 'bonferroni', 'nostat', None}
            Method to use for correcting p-values for the multiple comparison
            problem. Use either :
                
                * 'cluster' : cluster-based statistics [default]
                * 'maxstat' : test-wise maximum statistics correction
                * 'fdr' : test-wise FDR correction
                * 'bonferroni' : test-wise Bonferroni correction
                * 'nostat' : permutations are computed but no statistics are
                  performed
                * 'noperm' / None : no permutations are computed
        tail : {-1, 0, 1}
            Type of comparison. Use -1 for the lower part of the distribution,
            1 for the higher part and 0 for both. By default, upper tail of the
            distribution is used
        cluster_th : str, float | None
            The threshold to use for forming clusters. Use either :

                * a float that is going to act as a threshold
                * None and the threshold is automatically going to be inferred
                  using the distribution of permutations
                * 'tfce' : for Threshold Free Cluster Enhancement
        cluster_alpha : float | 0.05
            Control the percentile to use for forming the clusters. By default
            the 95th percentile of the permutations is used.
        ttested : bool | False
            Specify if the inputs have already been t-tested

        Returns
        -------
        pvalues : array_like
            Array of p-values of shape (..., n_times, n_roi)
        tvalues : array_like
            Array of t-values of shape (..., n_times, n_roi). This ouput is
            only computed for group-level analysis

        References
        ----------
        Smith and Nichols, 2009 :cite:`smith2009threshold`
        """
        # ---------------------------------------------------------------------
        # check inputs
        # ---------------------------------------------------------------------
        assert inference in ['ffx', 'rfx']
        assert mcp in ['cluster', 'maxstat', 'fdr', 'bonferroni', 'nostat',
                       'noperm', None]
        assert isinstance(effect, list) and isinstance(perms, list)
        assert all([isinstance(k, np.ndarray) and k.ndim >= 2 for k in effect])
        n_roi, n_times, tvalues = len(effect), effect[0].shape[-1], None
        # don't compute statistics if `mcp` is None
        if (mcp in [None, 'noperm']) or not len(perms):
            return np.ones((n_times, n_roi), dtype=float), tvalues
        assert all([isinstance(k, np.ndarray) and k.ndim >= 3 for k in perms])
        assert len(effect) == len(perms)
        # test that all values are finite
        assert all([np.isfinite(k).all() for k in effect])
        assert all([np.isfinite(k).all() for k in perms])

        # ---------------------------------------------------------------------
        # FFX / RFX
        # ---------------------------------------------------------------------
        nb_suj_roi = [k.shape[0] for k in effect]
        if inference == 'ffx':
            # check that the number of subjects is 1
            ffx_suj = np.max(nb_suj_roi) == 1
            assert ffx_suj, "For FFX, `n_subjects` should be 1"
            es, es_p = effect, perms
            logger.info("    Fixed-effect inference (FFX)")
            # es = (n_roi, n_times); es_p = (n_perm, n_roi, n_times)
            es, es_p = np.concatenate(es, axis=0), np.concatenate(es_p, axis=1)
        elif inference == 'rfx':
            if ttested:
                es = np.concatenate(effect, axis=0)
                es_p = np.concatenate(perms, axis=1)
            else:
                # check that the number of subjects is > 1
                rfx_suj = np.min(nb_suj_roi) > 1
                assert rfx_suj, "For RFX, `n_subjects` should be > 1"
                # modelise how subjects are distributed
                es, es_p, pop_mean = rfx_ttest(effect, perms)
                from frites.config import CONFIG
                sigma = CONFIG['TTEST_MNE_SIGMA']
                self.attrs.update(dict(ttest_pop_mean=pop_mean,
                                       ttest_sigma=sigma))
            tvalues = es

        # ---------------------------------------------------------------------
        # cluster forming threshold
        # ---------------------------------------------------------------------
        if mcp == 'cluster':
            if isinstance(cluster_th, (int, float)):
                th, tfce = cluster_th, None
            else:
                if (cluster_th == 'tfce'):          # TFCE auto
                    tfce = True
                elif isinstance(cluster_th, dict):  # TFCE manual
                    tfce = cluster_th
                else:
                    tfce = None                     # cluster_th is None
                th = cluster_threshold(es, es_p, alpha=cluster_alpha,
                                       tail=tail, tfce=tfce)
                self._cluster_th = cluster_th
            self.attrs.update(dict(th=th, tfce=tfce))

        # ---------------------------------------------------------------------
        # test-wise or cluster-based correction for multiple comparisons
        # ---------------------------------------------------------------------
        if mcp == 'cluster':
            logger.info('    Inference at cluster-level')
            pvalues = cluster_correction_mcp(es, es_p, th, tail=tail)
        else:
            logger.info('    Inference at spatio-temporal level (test-wise)')
            pvalues = testwise_correction_mcp(es, es_p, tail=tail, mcp=mcp)

        # ---------------------------------------------------------------------
        # postprocessing
        # ---------------------------------------------------------------------
        # by default p and t-values are (n_roi, n_times)
        if isinstance(tvalues, np.ndarray):
            tvalues = np.moveaxis(tvalues, 0, -1)
        pvalues = np.moveaxis(pvalues, 0, -1)

        # update internal config
        self.attrs.update(dict(
            inference=inference, mcp=mcp, tail=tail, cluster_th=cluster_th,
            cluster_alpha=cluster_alpha, ttested=int(ttested)))
        return pvalues, tvalues
Esempio n. 24
0
def conn_power_corr(data,
                    freqs=None,
                    roi=None,
                    times=None,
                    pairs=None,
                    sfreq=None,
                    foi=None,
                    sm_times=.5,
                    sm_freqs=1,
                    sm_kernel='hanning',
                    mode='morlet',
                    n_cycles=7.,
                    mt_bandwidth=None,
                    decim=1,
                    kw_cwt={},
                    kw_mt={},
                    block_size=None,
                    n_jobs=-1,
                    verbose=None,
                    dtype=np.float32):
    """Wavelet-based single-trial time-resolved spectral connectivity.
    Parameters
    ----------
    data : array_like
        Electrophysiological data. Several input types are supported :
            * Standard NumPy arrays of shape (n_epochs, n_roi, n_times)
            * mne.Epochs
            * xarray.DataArray of shape (n_epochs, n_roi, n_times)
    metric : str | "coh"
        Which connectivity metric. Use either :
            * 'coh' : Coherence
            * 'plv' : Phase-Locking Value (PLV)
            * 'sxy' : Cross-spectrum
        By default, the coherenc is used.
    freqs : array_like
        Array of central frequencies of shape (n_freqs,).
    roi : array_like | None
        ROI names of a single subject. If the input is an xarray, the
        name of the ROI dimension can be provided
    times : array_like | None
        Time vector array of shape (n_times,). If the input is an xarray, the
        name of the time dimension can be provided
    pairs : array_like | None
        Pairs of links of shape (n_pairs, 2) to compute. If None, all pairs are
        computed
    sfreq : float | None
        Sampling frequency
    foi : array_like | None
        Extract frequencies of interest. This parameters should be an array of
        shapes (n_foi, 2) defining where each band of interest start and
        finish.
    sm_times : float | .5
        Number of points to consider for the temporal smoothing in seconds. By
        default, a 500ms smoothing is used.
    sm_freqs : int | 1
        Number of points for frequency smoothing. By default, 1 is used which
        is equivalent to no smoothing
    kernel : {'square', 'hanning'}
        Kernel type to use. Choose either 'square' or 'hanning'
    mode : {'morlet', 'multitaper'}
        Spectrum estimation mode can be either: 'multitaper' or 'morlet'.
    n_cycles : array_like | 7.
        Number of cycles to use for each frequency. If a float or an integer is
        used, the same number of cycles is going to be used for all frequencies
    mt_bandwidth : array_like | None
        The bandwidth of the multitaper windowing function in Hz. Only used in
        'multitaper' mode.
    decim : int | 1
        To reduce memory usage, decimation factor after time-frequency
        decomposition. default 1 If int, returns tfr[…, ::decim]. If slice,
        returns tfr[…, decim].
    kw_cwt : dict | {}
        Additional arguments sent to the mne-function
        :py:`mne.time_frequency.tfr_array_morlet`
    kw_mt : dict | {}
        Additional arguments sent to the mne-function
        :py:`mne.time_frequency.tfr_array_multitaper`
    block_size : int | None
        Number of blocks of trials to process at once. This parameter can be
        use in order to decrease memory load. If None, all trials are used. If
        for example block_size=2, the number of trials are subdivided into two
        groups and each group is process one after the other.
    n_jobs : int | 1
        Number of jobs to use for parallel computing (use -1 to use all
        jobs). The parallel loop is set at the pair level.
    Returns
    -------
    conn : xarray.DataArray
        DataArray of shape (n_trials, n_pairs, n_freqs, n_times)
    """
    set_log_level(verbose)

    if isinstance(sm_times, np.ndarray):
        raise NotImplementedError("Frequency dependent kernel in development"
                                  f"only first {sm_times[0]} will be used")

    # _________________________________ METHODS _______________________________
    conn_f, f_name = {'pec': (_pec, "Power correlation")}['pec']

    # _________________________________ INPUTS ________________________________
    # inputs conversion
    data, cfg = conn_io(
        data,
        times=times,
        roi=roi,
        agg_ch=False,
        win_sample=None,
        pairs=pairs,
        sort=True,
        block_size=block_size,
        sfreq=sfreq,
        freqs=freqs,
        foi=foi,
        sm_times=sm_times,
        sm_freqs=sm_freqs,
        verbose=verbose,
        name=f'Sepctral connectivity (metric = {f_name}, mode={mode})',
    )

    # extract variables
    x, trials, attrs = data.data, data['y'].data, cfg['attrs']
    times, n_trials = data['times'].data, len(trials)
    x_s, x_t, roi_p = cfg['x_s'], cfg['x_t'], cfg['roi_p']
    indices, sfreq = cfg['blocks'], cfg['sfreq']
    freqs, _, foi_idx = cfg['freqs'], cfg['need_foi'], cfg['foi_idx']
    f_vec, sm_times, sm_freqs = cfg['f_vec'], cfg['sm_times'], cfg['sm_freqs']
    n_pairs, n_freqs = len(x_s), len(freqs)

    # temporal decimation
    if isinstance(decim, int):
        times = times[::decim]
        sm_times = int(np.round(sm_times / decim))
        sm_times = max(sm_times, 1)

    # Create smoothing kernel
    kernel = _create_kernel(sm_times, sm_freqs, kernel=sm_kernel)

    # define arguments for parallel computing
    mesg = f'Estimating pairwise {f_name} for trials %s'
    kw_para = dict(n_jobs=n_jobs, verbose=verbose, total=n_pairs)

    # show info
    logger.info(f"Computing pairwise {f_name} (n_pairs={n_pairs}, "
                f"n_freqs={n_freqs}, decim={decim}, sm_times={sm_times}, "
                f"sm_freqs={sm_freqs})")

    # ______________________ CONTAINER FOR CONNECTIVITY _______________________
    # compute coherence on blocks of trials
    conn = np.zeros((n_trials, n_pairs, len(f_vec), len(times)), dtype=dtype)
    for tr in indices:
        # --------------------------- TIME-FREQUENCY --------------------------
        # time-frequency decomposition
        w = _tf_decomp(x[tr, ...],
                       sfreq,
                       freqs,
                       n_cycles=n_cycles,
                       decim=decim,
                       mode=mode,
                       mt_bandwidth=mt_bandwidth,
                       kw_cwt=kw_cwt,
                       kw_mt=kw_mt,
                       n_jobs=n_jobs)

        # ----------------------------- CONN TRIALS ---------------------------
        # give indication about computed trials
        kw_para['mesg'] = mesg % f"{tr[0]}...{tr[-1]}"

        # computes conn across trials
        conn_tr = conn_f(w, kernel, foi_idx, x_s, x_t, kw_para)

        # merge results
        conn[tr, ...] = np.stack(conn_tr, axis=1)

        # Call GC
        del conn_tr, w

    # _________________________________ OUTPUTS _______________________________
    # configuration
    cfg = dict(sfreq=sfreq,
               sm_times=sm_times,
               sm_freqs=sm_freqs,
               sm_kernel=sm_kernel,
               mode=mode,
               n_cycles=n_cycles,
               mt_bandwidth=mt_bandwidth,
               decim=decim,
               type=metric)

    # conversion
    conn = xr.DataArray(conn,
                        dims=('trials', 'roi', 'freqs', 'times'),
                        name=metric,
                        coords=(trials, roi_p, f_vec, times),
                        attrs=check_attrs({
                            **attrs,
                            **cfg
                        }))
    return conn
Esempio n. 25
0
def conn_covgc(data, dt, lag, t0, step=1, roi=None, times=None, method='gc',
               conditional=False, n_jobs=-1, verbose=None):
    r"""Single-trial covariance-based Granger Causality for gaussian variables.

    This function computes the (conditional) covariance-based Granger Causality
    (covgc) for each trial.

    .. note::
        **Total Granger interdependence**

            * TGI = gc.sum(axis=-1) = gc(x->y) + gc(y->x) + gc(x.y)
            * TGI = Hycy + Hxcx - Hxxcyy

        **Relations between Mutual Informarion and conditional entropies**

        This quantity can be defined as the Increment of Total Interdependence
        and it can be calculated from the different of two mutual informations
        as follows

        .. math::

            Ixxyy  &=  I(X_{i+1}, X_{i}|Y_{i+1}, Y_{i}) \\
                   &=  H(X_{i+1}) + H(Y_{i+1}) - H(X_{i+1},Y_{i+1}) \\
                   &=  log(det_{xi1}) + log(det_{yi1}) - log(det_{xyi1}) \\
            Ixy    &=  I(X_{i}|Y_{i}) \\
                   &=  H(X_{i}) + H(Y_{i}) - H(X_{i}, Y_{i}) \\
                   &=  log(det_{xi}) + log(det_{yi}) - log(det_{yxi}) \\
            ITI    &= Ixxyy - Ixy

    Parameters
    ----------
    data : array_like
        Electrophysiological data. Several input types are supported :

            * Standard NumPy arrays of shape (n_epochs, n_roi, n_times)
            * mne.Epochs
            * xarray.DataArray of shape (n_epochs, n_roi, n_times)

    dt : int
        Duration of the time window for covariance correlation in samples
    lag : int
        Number of samples for the lag within each trial
    t0 : array_like
        Array of zero time in samples of length (n_window,)
    step : int | 1
        Number of samples stepping in the past for the lag within each trial
    times : array_like | None
        Time vector array of shape (n_times,). If the input is an xarray, the
        name of the time dimension can be provided
    roi : array_like | None
        ROI names of a single subject. If the input is an xarray, the
        name of the ROI dimension can be provided
    method : {'gauss', 'gc'}
        Method for the estimation of the covgc. Use either 'gauss' which
        assumes that the time-points are normally distributed or 'gc' in order
        to use the gaussian-copula.
    conditional : bool | False
        If True, the conditional Granger Causality is computed i.e the past is
        also conditioned by the past of other sources.
    n_jobs : int | -1
        Number of jobs to use for parallel computing (use -1 to use all
        jobs). The parallel loop is set at the pair level.

    Returns
    -------
    gc : array_like
        Granger Causality arranged as (n_epochs, n_pairs, n_windows, 3) where
        the last dimension means :

            * 0 : pairs[:, 0] -> pairs[:, 1] (x->y)
            * 1 : pairs[:, 1] -> pairs[:, 0] (y->x)
            * 2 : instantaneous  (x.y)

    References
    ----------
    Brovelli et al., 2015 :cite:`brovelli2015characterization`

    See also
    --------
    conn_dfc
    """
    set_log_level(verbose)
    # -------------------------------------------------------------------------
    # input checking
    if isinstance(t0, CONFIG['INT_DTYPE']) or isinstance(
        t0, CONFIG['FLOAT_DTYPE']):
        t0 = np.array([t0])
    t0 = np.asarray(t0).astype(int)
    dt, lag, step = int(dt), int(lag), int(step)
    # handle dataarray input
    if isinstance(data, xr.DataArray):
        trials, attrs = data[data.dims[0]].data, data.attrs
    else:
        trials, attrs = np.arange(data.shape[0]), {}
    # internal conversion
    data = SubjectEphy(data, y=trials, roi=roi, times=times)
    x, roi, times = data.data, data['roi'].data, data['times'].data
    trials = data['y'].data
    n_epochs, n_roi, n_pts = data.shape
    # force C contiguous array because operations on row-major
    if not x.flags.c_contiguous:
        x = np.ascontiguousarray(x)
    # method checking
    assert method in ['gauss', 'gc']
    fcn = dict(gauss=_covgc, gc=_gccovgc)[method]

    # -------------------------------------------------------------------------
    # build generic time indices (just need to add t0 to it)
    rows, cols = np.mgrid[0:lag + 1, 0:dt]
    # step in the past lags
    rows = rows[::step, :]
    cols = cols[::step, :]
    # create index for all lags and timespoints
    ind_tx = cols - rows
    # build output time vector
    times_p = np.empty((len(t0)), dtype=times.dtype, order='C')
    for n_t, t in enumerate(t0):
        times_p[n_t] = times[ind_tx[0, :] + t].mean()
    # get the non-directed pairs and build roi pairs names
    x_s, x_t = np.triu_indices(n_roi, k=1)
    pairs = np.c_[x_s, x_t]
    roi_p = np.array([f"{roi[s]}-{roi[t]}" for s, t in zip(x_s, x_t)])
    # check the ratio between lag and dt
    ratio = 100 * (ind_tx.shape[0] / (step * ind_tx.shape[1]))
    if not 10. <= ratio <= 15.:
        _step = int(np.ceil((lag + 1) / (.15 * dt)))
        logger.warning(f"The ratio between the lag and dt is {ratio}%. It's "
                       f"recommended to conserve this ratio between 10-15%."
                       f" Try with a step={_step}")
    logger.debug(f"Index shape : {ind_tx.shape}")

    # -------------------------------------------------------------------------
    ext = 'conditional' if conditional else ''
    # compute covgc and parallel over pairs
    logger.info(f"Compute the {ext} covgc (method={method}, n_pairs={len(x_s)}"
                f"; n_windows={len(t0)}, lag={lag}, dt={dt}, step={step})")
    kw_par = dict(n_jobs=n_jobs, total=len(x_s), verbose=False)
    if not conditional:
        parallel, p_fun = parallel_func(fcn, **kw_par)
        gc = parallel(p_fun(x[:, s, :], x[:, t, :], ind_tx,
                            t0) for s, t in zip(x_s, x_t))
    else:
        parallel, p_fun = parallel_func(_cond_gccovgc, **kw_par)
        gc = parallel(p_fun(x, s, t, ind_tx, t0) for s, t in zip(x_s, x_t))
    gc = np.stack(gc, axis=1)

    # -------------------------------------------------------------------------
    # change output type
    dire = np.array(['x->y', 'y->x', 'x.y'])
    gc = xr.DataArray(gc, dims=('trials', 'roi', 'times', 'direction'),
                      coords=(trials, roi_p, times_p, dire), name='covgc')
    # set attributes
    cfg = dict(lag='lag', step='step', dt='dt', t0='t0',
               conditional='conditional', type='covgc')
    gc.attrs = {**attrs, **cfg}

    return gc