def test_multivariate(self): """Test support for multi-variate axis.""" mnd = self._get_data('mne', 4) da_mv = SubjectEphy(mnd, y=y_flo, z=z, multivariate=True, **kw) assert da_mv.dims[-2] == 'mv' da_mv = SubjectEphy(mnd, y=y_flo, z=z, multivariate=False, **kw) assert da_mv.dims[-2] == 'freqs'
def test_coordinates(self): """Test if coordinates and dims are properly set""" # _________________________ Test Xarray coords ________________________ # build the 4d data xrd = self._get_data('xr', 4) da = SubjectEphy(xrd, y='y_flo', z='z', roi='roi', times='times', **kw) # testings np.testing.assert_array_equal(y_flo, da['y'].data) np.testing.assert_array_equal(z, da['z'].data) np.testing.assert_array_equal(roi, da['roi'].data) np.testing.assert_array_equal(freqs, da['freqs'].data) np.testing.assert_array_equal(times, da['times'].data) np.testing.assert_array_equal( ('trials', 'roi', 'freqs', 'times'), da.dims) # ___________________________ Test MNE coords _________________________ # build the 4d data mnd = self._get_data('mne', 4) da = SubjectEphy(mnd, y=y_flo, z=z, **kw) assert da.attrs['sfreq'] == sfreq # testings np.testing.assert_array_equal(y_flo, da['y'].data) np.testing.assert_array_equal(z, da['z'].data) np.testing.assert_array_equal(ch_names, da['roi'].data) np.testing.assert_array_equal(freqs, da['freqs'].data) np.testing.assert_array_equal(times, da['times'].data) np.testing.assert_array_equal( ('trials', 'roi', 'freqs', 'times'), da.dims)
def test_agg_ch(self): """Test function agg_ch.""" xr_3d = self._get_data('xr', 3) da_a = SubjectEphy(xr_3d, y='y_flo', z='z', roi='roi', times='times', agg_ch=True, **kw) # test aggregation np.testing.assert_array_equal(da_a['agg_ch'].data, [0] * len(roi)) # test no aggregation da_na = SubjectEphy(xr_3d, y='y_flo', z='z', roi='roi', times='times', agg_ch=False, **kw) np.testing.assert_array_equal( da_na['agg_ch'].data, np.arange(len(roi)))
def test_numpy_inputs(self): """Test function numpy_inputs.""" # ___________________________ test 3d inputs __________________________ SubjectEphy(x_3d, **kw) SubjectEphy(x_3d, y=y_int, **kw) SubjectEphy(x_3d, z=z, **kw) SubjectEphy(x_3d, y=y_int, z=z, roi=roi, **kw) da_3d = SubjectEphy(x_3d, y=y_int, z=z, roi=roi, times=times, **kw) self._test_memory(x_3d, da_3d.data) # ___________________________ test 4d inputs __________________________ SubjectEphy(x_4d, **kw) SubjectEphy(x_4d, y=y_int, **kw) SubjectEphy(x_4d, z=z, **kw) SubjectEphy(x_4d, y=y_int, z=z, roi=roi, **kw) da_4d = SubjectEphy(x_4d, y=y_int, z=z, roi=roi, times=times, **kw) self._test_memory(x_4d, da_4d.data)
def test_dtypes(self): """Test y, z dtypes and mi_type.""" # cd da = SubjectEphy(x_3d, y=y_int, **kw) assert da.attrs['y_dtype'] == 'int' assert da.attrs['z_dtype'] == 'none' assert da.attrs['mi_type'] == 'cd' # cc da = SubjectEphy(x_3d, y=y_flo, **kw) assert da.attrs['y_dtype'] == 'float' assert da.attrs['z_dtype'] == 'none' assert da.attrs['mi_type'] == 'cc' # ccd da = SubjectEphy(x_3d, y=y_flo, z=z, **kw) assert da.attrs['y_dtype'] == 'float' assert da.attrs['z_dtype'] == 'int' assert da.attrs['mi_type'] == 'ccd'
def test_attrs(self): """Test setting attributes""" # test attrs passed as inputs attrs = {'test': 'passed', 'ruggero': 'bg'} da = SubjectEphy(x_4d, attrs=attrs, **kw) assert all([da.attrs[k] == v for k, v in attrs.items()]) # test attrs attached to an input xarray xr_4d = self._get_data('xr', 4) xr_4d.attrs = attrs da = SubjectEphy(xr_4d, y='y_flo', z='z', times='times', agg_ch=False, multivariate=True, **kw) assert all([da.attrs[k] == v for k, v in attrs.items()]) # test computed attrs assert da.attrs['sfreq'] == sfreq assert da.attrs['y_dtype'] == 'float' assert da.attrs['z_dtype'] == 'int' assert da.attrs['mi_type'] == 'ccd' assert da.attrs['agg_ch'] is False assert da.attrs['multivariate'] is True
def test_mne_inputs(self): """Test function mne_inputs.""" # ___________________________ test 3d inputs __________________________ # test inputs mne_3d = self._get_data('mne', 3) SubjectEphy(mne_3d, **kw) SubjectEphy(mne_3d, y=y_int, **kw) SubjectEphy(mne_3d, z=z, **kw) SubjectEphy(mne_3d, y=y_int, z=z, roi=roi, **kw) da_3d = SubjectEphy(mne_3d, y=y_int, z=z, roi=roi, times=times, **kw) self._test_memory(x_3d, da_3d.data) # ___________________________ test 4d inputs __________________________ # test inputs mne_4d = self._get_data('mne', 4) SubjectEphy(mne_4d, **kw) SubjectEphy(mne_4d, y=y_int, **kw) SubjectEphy(mne_4d, z=z, **kw) SubjectEphy(mne_4d, y=y_int, z=z, roi=roi, **kw) da_4d = SubjectEphy(mne_4d, y=y_int, z=z, roi=roi, times=times, **kw) self._test_memory(x_4d, da_4d.data)
def test_xr_inputs(self): """Test function xr_inputs.""" # ___________________________ test 3d inputs __________________________ # test inputs xr_3d = self._get_data('xr', 3) SubjectEphy(xr_3d, **kw) SubjectEphy(xr_3d, y='y_int', **kw) SubjectEphy(xr_3d, y='y_flo', **kw) SubjectEphy(xr_3d, z='z', **kw) SubjectEphy(xr_3d, y='y_int', z='z', roi='roi', **kw) da_3d = SubjectEphy(xr_3d, y='y_flo', z='z', roi='roi', times='times', **kw) self._test_memory(x_3d, da_3d.data) # ___________________________ test 4d inputs __________________________ # test inputs xr_4d = self._get_data('xr', 4) xr_4d = self._get_data('xr', 4) SubjectEphy(xr_4d, **kw) SubjectEphy(xr_4d, y='y_int', **kw) SubjectEphy(xr_4d, y='y_flo', **kw) SubjectEphy(xr_4d, z='z', **kw) SubjectEphy(xr_4d, y='y_int', z='z', roi='roi', **kw) da_4d = SubjectEphy(xr_4d, y='y_flo', z='z', roi='roi', times='times', **kw) self._test_memory(x_4d, da_4d.data)
def test_name(self): """Test settings dataarray name""" name = 'TestingName' da = SubjectEphy(x_4d, name=name, **kw) assert da.name == name
def test_multiconditions(self): """Test function multicond.""" ds = SubjectEphy(x_3d, y=np.c_[y_int, y_int_2], **kw) np.testing.assert_array_equal( ds['y'].data, [0, 0, 1, 1, 2, 2, 3, 3, 3, 3])
def conn_dfc(data, win_sample=None, times=None, roi=None, n_jobs=1, gcrn=True, verbose=None): """Single trial Dynamic Functional Connectivity. This function computes the Dynamic Functional Connectivity (DFC) using the Gaussian Copula Mutual Information (GCMI). The DFC is computed across time points for each trial. Note that the DFC can either be computed on windows manually defined or on sliding windows. Parameters ---------- data : array_like Electrophysiological data. Several input types are supported : * Standard NumPy arrays of shape (n_epochs, n_roi, n_times) * mne.Epochs * xarray.DataArray of shape (n_epochs, n_roi, n_times) win_sample : array_like | None Array of shape (n_windows, 2) describing where each window start and finish. You can use the function :func:`frites.conn.define_windows` to define either manually either sliding windows. If None, the entire time window is used instead. times : array_like | None Time vector array of shape (n_times,). If the input is an xarray, the name of the time dimension can be provided roi : array_like | None ROI names of a single subject. If the input is an xarray, the name of the ROI dimension can be provided n_jobs : int | 1 Number of jobs to use for parallel computing (use -1 to use all jobs). The parallel loop is set at the pair level. gcrn : bool | True Specify if the Gaussian Copula Rank Normalization should be applied. If the data are normalized (e.g z-score) this parameter can be set to False because the data can be considered as gaussian over time. Returns ------- dfc : array_like The DFC array of shape (n_epochs, n_pairs, n_windows) See also -------- define_windows, conn_covgc """ set_log_level(verbose) # ------------------------------------------------------------------------- # inputs conversion and data checking set_log_level(verbose) if isinstance(data, xr.DataArray): trials, attrs = data[data.dims[0]].data, data.attrs else: trials, attrs = np.arange(data.shape[0]), {} # internal conversion data = SubjectEphy(data, y=trials, roi=roi, times=times) x, roi, times = data.data, data['roi'].data, data['times'].data trials = data['y'].data n_trials = len(trials) # deal with the win_sample array if win_sample is None: win_sample = np.array([[0, len(times) - 1]]) assert isinstance(win_sample, np.ndarray) and (win_sample.ndim == 2) assert win_sample.dtype in CONFIG['INT_DTYPE'] n_win = win_sample.shape[0] # ------------------------------------------------------------------------- # find group of brain regions gp = pd.DataFrame({'roi': roi}).groupby('roi').groups roi_gp, roi_idx = list(gp.keys()), list(gp.values()) n_roi = len(roi_gp) x_s, x_t = np.triu_indices(n_roi, k=1) n_pairs = len(x_s) pairs = np.c_[x_s, x_t] roi_p = [f"{roi_gp[s]}-{roi_gp[t]}" for s, t in zip(x_s, x_t)] # ------------------------------------------------------------------------- # prepare outputs and elements n_jobs = 1 if n_win == 1 else n_jobs parallel, p_fun = parallel_func(_conn_dfc, n_jobs=n_jobs, verbose=verbose, total=n_win, mesg='Estimating DFC') logger.info(f'Computing DFC between {n_pairs} pairs (gcrn={gcrn})') dfc = np.zeros((n_trials, n_pairs, n_win), dtype=np.float64) # ------------------------------------------------------------------------- # compute distance correlation dfc = parallel( p_fun(x[:, :, w[0]:w[1]], x_s, x_t, roi_idx, gcrn) for w in win_sample) dfc = np.stack(dfc, 2) # ------------------------------------------------------------------------- # dataarray conversion win_times = times[win_sample] dfc = xr.DataArray(dfc, dims=('trials', 'roi', 'times'), name='dfc', coords=(trials, roi_p, win_times.mean(1))) # add the windows used in the attributes cfg = dict(win_sample=np.r_[tuple(win_sample)], win_times=np.r_[tuple(win_times)], type='dfc') dfc.attrs = {**cfg, **attrs} return dfc
def conn_covgc(data, dt, lag, t0, step=1, roi=None, times=None, method='gc', conditional=False, n_jobs=-1, verbose=None): r"""Single-trial covariance-based Granger Causality for gaussian variables. This function computes the (conditional) covariance-based Granger Causality (covgc) for each trial. .. note:: **Total Granger interdependence** * TGI = gc.sum(axis=-1) = gc(x->y) + gc(y->x) + gc(x.y) * TGI = Hycy + Hxcx - Hxxcyy **Relations between Mutual Informarion and conditional entropies** This quantity can be defined as the Increment of Total Interdependence and it can be calculated from the different of two mutual informations as follows .. math:: Ixxyy &= I(X_{i+1}, X_{i}|Y_{i+1}, Y_{i}) \\ &= H(X_{i+1}) + H(Y_{i+1}) - H(X_{i+1},Y_{i+1}) \\ &= log(det_{xi1}) + log(det_{yi1}) - log(det_{xyi1}) \\ Ixy &= I(X_{i}|Y_{i}) \\ &= H(X_{i}) + H(Y_{i}) - H(X_{i}, Y_{i}) \\ &= log(det_{xi}) + log(det_{yi}) - log(det_{yxi}) \\ ITI &= Ixxyy - Ixy Parameters ---------- data : array_like Electrophysiological data. Several input types are supported : * Standard NumPy arrays of shape (n_epochs, n_roi, n_times) * mne.Epochs * xarray.DataArray of shape (n_epochs, n_roi, n_times) dt : int Duration of the time window for covariance correlation in samples lag : int Number of samples for the lag within each trial t0 : array_like Array of zero time in samples of length (n_window,) step : int | 1 Number of samples stepping in the past for the lag within each trial times : array_like | None Time vector array of shape (n_times,). If the input is an xarray, the name of the time dimension can be provided roi : array_like | None ROI names of a single subject. If the input is an xarray, the name of the ROI dimension can be provided method : {'gauss', 'gc'} Method for the estimation of the covgc. Use either 'gauss' which assumes that the time-points are normally distributed or 'gc' in order to use the gaussian-copula. conditional : bool | False If True, the conditional Granger Causality is computed i.e the past is also conditioned by the past of other sources. n_jobs : int | -1 Number of jobs to use for parallel computing (use -1 to use all jobs). The parallel loop is set at the pair level. Returns ------- gc : array_like Granger Causality arranged as (n_epochs, n_pairs, n_windows, 3) where the last dimension means : * 0 : pairs[:, 0] -> pairs[:, 1] (x->y) * 1 : pairs[:, 1] -> pairs[:, 0] (y->x) * 2 : instantaneous (x.y) References ---------- Brovelli et al., 2015 :cite:`brovelli2015characterization` See also -------- conn_dfc """ set_log_level(verbose) # ------------------------------------------------------------------------- # input checking if isinstance(t0, CONFIG['INT_DTYPE']) or isinstance( t0, CONFIG['FLOAT_DTYPE']): t0 = np.array([t0]) t0 = np.asarray(t0).astype(int) dt, lag, step = int(dt), int(lag), int(step) # handle dataarray input if isinstance(data, xr.DataArray): trials, attrs = data[data.dims[0]].data, data.attrs else: trials, attrs = np.arange(data.shape[0]), {} # internal conversion data = SubjectEphy(data, y=trials, roi=roi, times=times) x, roi, times = data.data, data['roi'].data, data['times'].data trials = data['y'].data n_epochs, n_roi, n_pts = data.shape # force C contiguous array because operations on row-major if not x.flags.c_contiguous: x = np.ascontiguousarray(x) # method checking assert method in ['gauss', 'gc'] fcn = dict(gauss=_covgc, gc=_gccovgc)[method] # ------------------------------------------------------------------------- # build generic time indices (just need to add t0 to it) rows, cols = np.mgrid[0:lag + 1, 0:dt] # step in the past lags rows = rows[::step, :] cols = cols[::step, :] # create index for all lags and timespoints ind_tx = cols - rows # build output time vector times_p = np.empty((len(t0)), dtype=times.dtype, order='C') for n_t, t in enumerate(t0): times_p[n_t] = times[ind_tx[0, :] + t].mean() # get the non-directed pairs and build roi pairs names x_s, x_t = np.triu_indices(n_roi, k=1) pairs = np.c_[x_s, x_t] roi_p = np.array([f"{roi[s]}-{roi[t]}" for s, t in zip(x_s, x_t)]) # check the ratio between lag and dt ratio = 100 * (ind_tx.shape[0] / (step * ind_tx.shape[1])) if not 10. <= ratio <= 15.: _step = int(np.ceil((lag + 1) / (.15 * dt))) logger.warning(f"The ratio between the lag and dt is {ratio}%. It's " f"recommended to conserve this ratio between 10-15%." f" Try with a step={_step}") logger.debug(f"Index shape : {ind_tx.shape}") # ------------------------------------------------------------------------- ext = 'conditional' if conditional else '' # compute covgc and parallel over pairs logger.info(f"Compute the {ext} covgc (method={method}, n_pairs={len(x_s)}" f"; n_windows={len(t0)}, lag={lag}, dt={dt}, step={step})") kw_par = dict(n_jobs=n_jobs, total=len(x_s), verbose=False) if not conditional: parallel, p_fun = parallel_func(fcn, **kw_par) gc = parallel(p_fun(x[:, s, :], x[:, t, :], ind_tx, t0) for s, t in zip(x_s, x_t)) else: parallel, p_fun = parallel_func(_cond_gccovgc, **kw_par) gc = parallel(p_fun(x, s, t, ind_tx, t0) for s, t in zip(x_s, x_t)) gc = np.stack(gc, axis=1) # ------------------------------------------------------------------------- # change output type dire = np.array(['x->y', 'y->x', 'x.y']) gc = xr.DataArray(gc, dims=('trials', 'roi', 'times', 'direction'), coords=(trials, roi_p, times_p, dire), name='covgc') # set attributes cfg = dict(lag='lag', step='step', dt='dt', t0='t0', conditional='conditional', type='covgc') gc.attrs = {**attrs, **cfg} return gc
def __init__(self, x, y=None, z=None, roi=None, agg_ch=True, times=None, multivariate=False, nb_min_suj=False, attrs=None, verbose=None): """Init.""" set_log_level(verbose) self.attrs = Attributes(attrs=attrs) assert isinstance(x, (list, tuple)) self._agg_ch = agg_ch self._multivariate = multivariate logger.info('Definition of an electrophysiological dataset') logger.info(f' Dataset composed of {len(x)} subjects / sessions') # ========================== Multi-conditions ========================= # remapping group y and z if isinstance(y, (list, tuple)): y = multi_to_uni_conditions(y, var_name='y', verbose=verbose) if isinstance(z, (list, tuple)): z = multi_to_uni_conditions(z, var_name='z', verbose=verbose) # ===================== Multi-subjects conversion ===================== # force converting the data (latest task-related variables) n_subjects = len(x) y = [y] * n_subjects if not isinstance(y, list) else y z = [z] * n_subjects if not isinstance(z, list) else z roi = [roi] * n_subjects if not isinstance(roi, list) else roi for k in range(n_subjects): x[k] = SubjectEphy(x[k], y=y[k], z=z[k], roi=roi[k], agg_ch=True, times=times, multivariate=multivariate, verbose=verbose) self._x = x # minimum number of subject / roi nb_min_suj = -np.inf if not isinstance(nb_min_suj, int) else nb_min_suj self._nb_min_suj = nb_min_suj logger.info(f" At least {self._nb_min_suj} subjects / roi required") # merge attributes self.attrs.merge([k.attrs for k in self._x]) self._y_dtype = self.attrs['y_dtype'] self._z_dtype = self.attrs['z_dtype'] self._mi_type = self.attrs['mi_type'] mi_repr = self.attrs['mi_repr'] logger.info(f" Supported MI definition {mi_repr} ({self._mi_type})") # ===================== Additional dimensions ======================== # Subject dimension for n_k, k in enumerate(range(len(self._x))): self._x[k].name = f'subject_{n_k}' self._x[k] = self._x[k].assign_coords( subject=('trials', [n_k] * self._x[k].shape[0])) # channel aggregation if not agg_ch: # split into sections of unique intergers n_trials_s = [k.shape[1] for k in self._x] agg_ch_num = np.arange(np.sum(n_trials_s)) agg_split = np.split(agg_ch_num, np.cumsum(n_trials_s)[0:-1]) # add additional dimension for k in range(len(self._x)): self._x[k] = self._x[k].assign_coords(agg_ch=('roi', agg_split[k])) # final mi dimension dims = list(self._x[0].dims) self._mi_dims = [k for k in dims if k not in ['trials', 'mv']] # ============================= Attributes ============================ # update internals parameters self._update_internals() # # update internal attributes self.attrs.update({ 'nb_min_suj': nb_min_suj, 'n_subjects': len(self._x), 'agg_ch': agg_ch, 'multivariate': multivariate, 'dtype': "DatasetEphy", '__version__': frites.__version__ })