def __init__(self, inference='rfx', mi_method='gc', kernel=None, verbose=None): """Init.""" WfBase.__init__(self) assert inference in [ 'ffx', 'rfx' ], ("'inference' input parameter should either be 'ffx' or 'rfx'") assert mi_method in [ 'gc', 'bin' ], ("'mi_method' input parameter should either be 'gc' or 'bin'") self._mi_type = 'cc' self._inference = inference self._mi_method = mi_method self._need_copnorm = mi_method == 'gc' self._gcrn = inference == 'rfx' self._kernel = kernel set_log_level(verbose) self.clean() self._wf_stats = WfStats(verbose=verbose) # update internal config self.update_cfg(mi_type=self._mi_type, inference=inference, mi_method=mi_method, kernel=kernel) logger.info(f"Workflow for computing connectivity ({self._mi_type} - " f"{mi_method})")
def __init__(self, inference='rfx', estimator=None, kernel=None, verbose=None): """Init.""" WfBase.__init__(self) assert inference in ['ffx', 'rfx'], ( "'inference' input parameter should either be 'ffx' or 'rfx'") self._mi_type = 'cc' if estimator is None: estimator = GCMIEstimator(mi_type='cc', copnorm=False, verbose=verbose) assert estimator.settings['mi_type'] == self._mi_type self._copnorm = isinstance(estimator, GCMIEstimator) self._inference = inference self.estimator = estimator self._gcrn = inference == 'rfx' self._kernel = kernel set_log_level(verbose) self.clean() self._wf_stats = WfStats(verbose=verbose) # update internal config self.attrs.update(dict(mi_type=self._mi_type, inference=inference, kernel=kernel)) logger.info(f"Workflow for computing comodulations between distant " f"brain areas ({inference})")
def __init__(self, mi_type='cc', verbose=None): """Init.""" set_log_level(verbose) desc = CONFIG['MI_REPR'][mi_type] settings = {'description': desc} self.settings = Attributes(attrs=settings, section_name='Settings') self._kwargs = dict() assert hasattr(self, 'name') logger.info(f"{self.name} ({mi_type})")
def get_connectivity_pairs(self, nb_min_suj=None, directed=False, verbose=None): """Get the connectivity pairs for this dataset. This method can be used to get the possible connectivity pairs i.e (sources, targets) for directed connectivity (or not). In addition, some pairs are going to be ignored because of a number of subjects to low. Parameters ---------- nb_min_suj : int | None Minimum number of shared subjects between two pairs directed : bool | False Get either directed (True) or non-directed (False) pairs Returns ------- sources : array_like Indices of the source targets : array_like Indices of the target """ set_log_level(verbose) assert self._groupedby == 'roi', ( "To get connectivity pairs, the dataset should already be grouped " "by roi") bad = [] # get all possible pairs if directed: pairs = np.where(~np.eye(self.n_roi, dtype=bool)) else: pairs = np.triu_indices(self.n_roi, k=1) # remove pairs where there's not enough subjects if isinstance(nb_min_suj, int): s_new, t_new = [], [] for s, t in zip(pairs[0], pairs[1]): suj_s, suj_t = self.suj_roi_u[s], self.suj_roi_u[t] if len(np.intersect1d(suj_s, suj_t)) >= nb_min_suj: s_new += [s] t_new += [t] else: bad += [f"{self.roi_names[s]}-{self.roi_names[t]}"] if len(bad): logger.warning("The following connectivity pairs are going to " "be ignored because the number of subjects is " f"bellow {nb_min_suj} : {bad}") pairs = (np.asarray(s_new), np.asarray(t_new)) logger.info(f" {len(pairs[0])} remaining connectivity pairs / " f"{len(bad)} pairs have been ignored " f"(nb_min_suj={nb_min_suj})") return pairs[0], pairs[1]
def conn_io(da, trials=None, roi=None, times=None, verbose=None): """I/O conversion for connectivity functions. Parameters ---------- da : array_like Array of electrophysiological data of shape (n_trials, n_roi, n_times) roi : array_like | None List of roi names or string corresponding to the dimension name in a DataArray times : array_like | None Time vector or string corresponding to the dimension name in a DataArray """ set_log_level(verbose) assert isinstance(da, np.ndarray) or isinstance(da, xr.DataArray) assert da.ndim == 3 n_trials, n_roi, n_times = da.shape attrs = dict(n_trials=n_trials, n_roi=n_roi, n_times=n_times) logger.info(f"Inputs conversion (n_trials={n_trials}, n_roi={n_roi}, " f"n_times={n_times})") # _______________________________ Xarray case _____________________________ if isinstance(da, xr.DataArray): # force using if trials is None: trials = da.dims[0] # get trials, roi and times if isinstance(trials, str): trials = da[trials].data if isinstance(roi, str): roi = da[roi].data if isinstance(times, str): times = da[times].data attrs = {**attrs, **da.attrs} da = da.data # _____________________________ Empty inputs ______________________________ if roi is None: roi = [f"roi_{k}" for k in range(n_roi)] if times is None: times = np.arange(n_times) if trials is None: trials = np.arange(n_trials) # _______________________________ Final check _____________________________ assert isinstance(da, np.ndarray) assert da.shape == (len(trials), len(roi), len(times)) return da, trials, roi, times, attrs
def gccmi_1d_ccc(x, y, z, verbose=None): """Gaussian-Copula CMI between three continuous variables. I = gccmi_1d_ccc(x,y,z) returns the CMI between two (possibly multidimensional) continuous variables, x and y, conditioned on a third, z, estimated via a Gaussian copula. Parameters ---------- x, y, z : array_like Continuous arrays of shape (n_epochs,) or (n_dimensions, n_epochs). Returns ------- i : float Information shared by x and y conditioned by z (in bits) """ set_log_level(verbose) x, y, z = np.atleast_2d(x), np.atleast_2d(y), np.atleast_2d(z) if x.ndim > 2 or y.ndim > 2 or z.ndim > 2: raise ValueError("x, y and z must be at most 2d") nvarx, ntrl = x.shape nvary = y.shape[0] nvarz = z.shape[0] if y.shape[1] != ntrl or z.shape[1] != ntrl: raise ValueError("number of trials do not match") # check for repeated values for xi in range(nvarx): if (np.unique(x[xi, :]).size / float(ntrl)) < 0.9: logger.info("Input x has more than 10% repeated values") break for yi in range(nvary): if (np.unique(y[yi, :]).size / float(ntrl)) < 0.9: logger.info("Input y has more than 10% repeated values") break for zi in range(nvarz): if (np.unique(z[zi, :]).size / float(ntrl)) < 0.9: logger.info("Input y has more than 10% repeated values") break # copula normalization cx = copnorm_nd(x, axis=1) cy = copnorm_nd(y, axis=1) cz = copnorm_nd(z, axis=1) # parametric Gaussian CMI return cmi_1d_ggg(cx, cy, cz, True, True)
def savgol_filter(self, h_freq, edges=None, verbose=None): """Filter the data using Savitzky-Golay polynomial method. This method is an adaptation of the mne-python one. Note that this smoothing operation is performed inplace to avoid data copy. Parameters ---------- h_freq : float Approximate high cut-off frequency in Hz. Note that this is not an exact cutoff, since Savitzky-Golay filtering is done using polynomial fits instead of FIR/IIR filtering. This parameter is thus used to determine the length of the window over which a 5th-order polynomial smoothing is used. edges : int, float | None Edge compensation. Use either an integer to drop a specific number of time points (e.g edges=100 remove 100 time points at the begining and at the end) or a float to drop a period (e.g edges=0.2 drop 200ms at the begining and at the end) Returns ------- inst : instance of DatasetEphy The object with the filtering applied. Notes ----- For Savitzky-Golay low-pass approximation, see: https://gist.github.com/larsoner/bbac101d50176611136b """ set_log_level(verbose) # perform smoothing for n_s in range(len(self._x)): self._x[n_s] = savgol_filter(self._x[n_s], h_freq, axis='times', sfreq=self.attrs['sfreq'], verbose=verbose) # edge effect compensation if isinstance(edges, CONFIG['FLOAT_DTYPE']): t = self._times self.sel(times=slice(t[0] + edges, t[-1] - edges)) elif isinstance(edges, CONFIG['INT_DTYPE']): self.isel(times=slice(edges, -edges)) return self
def set_config(key, value, verbose=None): """Change the global config of frites. Parameters ---------- key : string Entry of the config value : dict / list The new value for the selected key. The type should be the same as the default one """ io.set_log_level(verbose) assert isinstance(key, str) CONFIG = get_config() # noqa assert key in CONFIG.keys(), f"The key {key} doesn't exist." CONFIG[key] = value logger.info(f"The key {key} has been updated")
def gcmi_model_1d_cd(x, y, verbose=None): """Gaussian-Copula MI between a continuous and a discrete variable. This method is based on ANOVA style model comparison. I = gcmi_model_cd(x,y,Ym) returns the MI between the (possibly multidimensional) continuous variable x and the discrete variable y. Parameters ---------- x, y : array_like Continuous arrays of shape (n_epochs,) or (n_dimensions, n_epochs). y must be an array of integers Returns ------- i : float Information shared by x and y (in bits) """ set_log_level(verbose) x, y = np.atleast_2d(x), np.squeeze(y) if x.ndim > 2: raise ValueError("x must be at most 2d") if y.ndim > 1: raise ValueError("only univariate discrete variables supported") if not np.issubdtype(y.dtype, np.integer): raise ValueError("y should be an integer array") nvarx, ntrl = x.shape if y.size != ntrl: raise ValueError("number of trials do not match") # check for repeated values for xi in range(nvarx): if (np.unique(x[xi, :]).size / float(ntrl)) < 0.9: logger.info("Input x has more than 10% repeated values") break # copula normalization cx = copnorm_nd(x, axis=1) # parametric Gaussian MI return mi_model_1d_gd(cx, y, True, True)
def savgol_filter(self, h_freq, verbose=None): """Filter the data using Savitzky-Golay polynomial method. This method is an adaptation of the mne-python one. Parameters ---------- h_freq : float Approximate high cut-off frequency in Hz. Note that this is not an exact cutoff, since Savitzky-Golay filtering is done using polynomial fits instead of FIR/IIR filtering. This parameter is thus used to determine the length of the window over which a 5th-order polynomial smoothing is used. Returns ------- inst : instance of DatasetEphy The object with the filtering applied. Notes ----- For Savitzky-Golay low-pass approximation, see: https://gist.github.com/larsoner/bbac101d50176611136b """ set_log_level(verbose) assert self._groupedby is "subject", ("Slicing only work when data is " "grouped by 'subjects'") from scipy.signal import savgol_filter h_freq = float(h_freq) if h_freq >= self.sfreq / 2.: raise ValueError('h_freq must be less than half the sample rate') # savitzky-golay filtering window_length = (int(np.round(self.sfreq / h_freq)) // 2) * 2 + 1 logger.info(f' Using savgol length {window_length}') for k in range(len(self._x)): self._x[k] = savgol_filter(self._x[k], axis=2, polyorder=5, window_length=window_length) return self
def conn_power_corr(data, freqs=None, roi=None, times=None, pairs=None, sfreq=None, foi=None, sm_times=.5, sm_freqs=1, sm_kernel='hanning', mode='morlet', n_cycles=7., mt_bandwidth=None, decim=1, kw_cwt={}, kw_mt={}, block_size=None, n_jobs=-1, verbose=None, dtype=np.float32): """Wavelet-based single-trial time-resolved spectral connectivity. Parameters ---------- data : array_like Electrophysiological data. Several input types are supported : * Standard NumPy arrays of shape (n_epochs, n_roi, n_times) * mne.Epochs * xarray.DataArray of shape (n_epochs, n_roi, n_times) metric : str | "coh" Which connectivity metric. Use either : * 'coh' : Coherence * 'plv' : Phase-Locking Value (PLV) * 'sxy' : Cross-spectrum By default, the coherenc is used. freqs : array_like Array of central frequencies of shape (n_freqs,). roi : array_like | None ROI names of a single subject. If the input is an xarray, the name of the ROI dimension can be provided times : array_like | None Time vector array of shape (n_times,). If the input is an xarray, the name of the time dimension can be provided pairs : array_like | None Pairs of links of shape (n_pairs, 2) to compute. If None, all pairs are computed sfreq : float | None Sampling frequency foi : array_like | None Extract frequencies of interest. This parameters should be an array of shapes (n_foi, 2) defining where each band of interest start and finish. sm_times : float | .5 Number of points to consider for the temporal smoothing in seconds. By default, a 500ms smoothing is used. sm_freqs : int | 1 Number of points for frequency smoothing. By default, 1 is used which is equivalent to no smoothing kernel : {'square', 'hanning'} Kernel type to use. Choose either 'square' or 'hanning' mode : {'morlet', 'multitaper'} Spectrum estimation mode can be either: 'multitaper' or 'morlet'. n_cycles : array_like | 7. Number of cycles to use for each frequency. If a float or an integer is used, the same number of cycles is going to be used for all frequencies mt_bandwidth : array_like | None The bandwidth of the multitaper windowing function in Hz. Only used in 'multitaper' mode. decim : int | 1 To reduce memory usage, decimation factor after time-frequency decomposition. default 1 If int, returns tfr[…, ::decim]. If slice, returns tfr[…, decim]. kw_cwt : dict | {} Additional arguments sent to the mne-function :py:`mne.time_frequency.tfr_array_morlet` kw_mt : dict | {} Additional arguments sent to the mne-function :py:`mne.time_frequency.tfr_array_multitaper` block_size : int | None Number of blocks of trials to process at once. This parameter can be use in order to decrease memory load. If None, all trials are used. If for example block_size=2, the number of trials are subdivided into two groups and each group is process one after the other. n_jobs : int | 1 Number of jobs to use for parallel computing (use -1 to use all jobs). The parallel loop is set at the pair level. Returns ------- conn : xarray.DataArray DataArray of shape (n_trials, n_pairs, n_freqs, n_times) """ set_log_level(verbose) if isinstance(sm_times, np.ndarray): raise NotImplementedError("Frequency dependent kernel in development" f"only first {sm_times[0]} will be used") # _________________________________ METHODS _______________________________ conn_f, f_name = {'pec': (_pec, "Power correlation")}['pec'] # _________________________________ INPUTS ________________________________ # inputs conversion data, cfg = conn_io( data, times=times, roi=roi, agg_ch=False, win_sample=None, pairs=pairs, sort=True, block_size=block_size, sfreq=sfreq, freqs=freqs, foi=foi, sm_times=sm_times, sm_freqs=sm_freqs, verbose=verbose, name=f'Sepctral connectivity (metric = {f_name}, mode={mode})', ) # extract variables x, trials, attrs = data.data, data['y'].data, cfg['attrs'] times, n_trials = data['times'].data, len(trials) x_s, x_t, roi_p = cfg['x_s'], cfg['x_t'], cfg['roi_p'] indices, sfreq = cfg['blocks'], cfg['sfreq'] freqs, _, foi_idx = cfg['freqs'], cfg['need_foi'], cfg['foi_idx'] f_vec, sm_times, sm_freqs = cfg['f_vec'], cfg['sm_times'], cfg['sm_freqs'] n_pairs, n_freqs = len(x_s), len(freqs) # temporal decimation if isinstance(decim, int): times = times[::decim] sm_times = int(np.round(sm_times / decim)) sm_times = max(sm_times, 1) # Create smoothing kernel kernel = _create_kernel(sm_times, sm_freqs, kernel=sm_kernel) # define arguments for parallel computing mesg = f'Estimating pairwise {f_name} for trials %s' kw_para = dict(n_jobs=n_jobs, verbose=verbose, total=n_pairs) # show info logger.info(f"Computing pairwise {f_name} (n_pairs={n_pairs}, " f"n_freqs={n_freqs}, decim={decim}, sm_times={sm_times}, " f"sm_freqs={sm_freqs})") # ______________________ CONTAINER FOR CONNECTIVITY _______________________ # compute coherence on blocks of trials conn = np.zeros((n_trials, n_pairs, len(f_vec), len(times)), dtype=dtype) for tr in indices: # --------------------------- TIME-FREQUENCY -------------------------- # time-frequency decomposition w = _tf_decomp(x[tr, ...], sfreq, freqs, n_cycles=n_cycles, decim=decim, mode=mode, mt_bandwidth=mt_bandwidth, kw_cwt=kw_cwt, kw_mt=kw_mt, n_jobs=n_jobs) # ----------------------------- CONN TRIALS --------------------------- # give indication about computed trials kw_para['mesg'] = mesg % f"{tr[0]}...{tr[-1]}" # computes conn across trials conn_tr = conn_f(w, kernel, foi_idx, x_s, x_t, kw_para) # merge results conn[tr, ...] = np.stack(conn_tr, axis=1) # Call GC del conn_tr, w # _________________________________ OUTPUTS _______________________________ # configuration cfg = dict(sfreq=sfreq, sm_times=sm_times, sm_freqs=sm_freqs, sm_kernel=sm_kernel, mode=mode, n_cycles=n_cycles, mt_bandwidth=mt_bandwidth, decim=decim, type=metric) # conversion conn = xr.DataArray(conn, dims=('trials', 'roi', 'freqs', 'times'), name=metric, coords=(trials, roi_p, f_vec, times), attrs=check_attrs({ **attrs, **cfg })) return conn
def gccmi_1d_ccd(x, y, z, verbose=None): """GCCMI between 2 continuous variables conditioned on a discrete variable. I = gccmi_ccd(x,y,z,Zm) returns the CMI between two (possibly multidimensional) continuous variables, x and y, conditioned on a third discrete variable z, estimated via a Gaussian copula. Parameters ---------- x, y : array_like Continuous arrays of shape (n_epochs,) or (n_dimensions, n_epochs). z : array_like Discret array of shape (n_epochs,) Returns ------- cmi : float Conditional Mutual Information shared by x and y conditioned by z (in bits) """ set_log_level(verbose) x = np.atleast_2d(x) y = np.atleast_2d(y) if x.ndim > 2 or y.ndim > 2: raise ValueError("x and y must be at most 2d") if z.ndim > 1: raise ValueError("only univariate discrete variables supported") if not np.issubdtype(z.dtype, np.integer): raise ValueError("z should be an integer array") nvarx, ntrl = x.shape nvary = y.shape[0] zm = np.unique(z) if y.shape[1] != ntrl or z.size != ntrl: raise ValueError("number of trials do not match") # check for repeated values for xi in range(nvarx): if (np.unique(x[xi, :]).size / float(ntrl)) < 0.9: logger.info("Input x has more than 10% repeated values") break for yi in range(nvary): if (np.unique(y[yi, :]).size / float(ntrl)) < 0.9: logger.info("Input y has more than 10% repeated values") break # calculate gcmi for each z value icond = np.zeros(len(zm)) pz = np.zeros(len(zm)) cx = [] cy = [] for zi in zm: idx = z == zi thsx = copnorm_nd(x[:, idx], axis=1) thsy = copnorm_nd(y[:, idx], axis=1) pz[zi] = idx.sum() cx.append(thsx) cy.append(thsy) icond[zi] = mi_1d_gg(thsx, thsy, True, True) pz = pz / float(ntrl) # conditional mutual information cmi = np.sum(pz * icond) i = mi_1d_gg(np.hstack(cx), np.hstack(cy), True, False) return (cmi, i)
from frites import ( io, core, conn, stats, utils, workflow, simulations, # noqa estimator) __version__ = "0.3.8" # ----------------------------------------------------------------------------- # Set 'info' as the default logging level logger = logging.getLogger('frites') io.set_log_level('info') # ----------------------------------------------------------------------------- # get / set config def get_config(): """Get the global configuration of frites.""" from frites.config import CONFIG return CONFIG def set_config(key, value, verbose=None): """Change the global config of frites. Parameters
def conn_dfc(data, win_sample=None, times=None, roi=None, n_jobs=1, gcrn=True, verbose=None): """Single trial Dynamic Functional Connectivity. This function computes the Dynamic Functional Connectivity (DFC) using the Gaussian Copula Mutual Information (GCMI). The DFC is computed across time points for each trial. Note that the DFC can either be computed on windows manually defined or on sliding windows. Parameters ---------- data : array_like Electrophysiological data. Several input types are supported : * Standard NumPy arrays of shape (n_epochs, n_roi, n_times) * mne.Epochs * xarray.DataArray of shape (n_epochs, n_roi, n_times) win_sample : array_like | None Array of shape (n_windows, 2) describing where each window start and finish. You can use the function :func:`frites.conn.define_windows` to define either manually either sliding windows. If None, the entire time window is used instead. times : array_like | None Time vector array of shape (n_times,). If the input is an xarray, the name of the time dimension can be provided roi : array_like | None ROI names of a single subject. If the input is an xarray, the name of the ROI dimension can be provided n_jobs : int | 1 Number of jobs to use for parallel computing (use -1 to use all jobs). The parallel loop is set at the pair level. gcrn : bool | True Specify if the Gaussian Copula Rank Normalization should be applied. If the data are normalized (e.g z-score) this parameter can be set to False because the data can be considered as gaussian over time. Returns ------- dfc : array_like The DFC array of shape (n_epochs, n_pairs, n_windows) See also -------- define_windows, conn_covgc """ set_log_level(verbose) # ------------------------------------------------------------------------- # inputs conversion and data checking set_log_level(verbose) if isinstance(data, xr.DataArray): trials, attrs = data[data.dims[0]].data, data.attrs else: trials, attrs = np.arange(data.shape[0]), {} # internal conversion data = SubjectEphy(data, y=trials, roi=roi, times=times) x, roi, times = data.data, data['roi'].data, data['times'].data trials = data['y'].data n_trials = len(trials) # deal with the win_sample array if win_sample is None: win_sample = np.array([[0, len(times) - 1]]) assert isinstance(win_sample, np.ndarray) and (win_sample.ndim == 2) assert win_sample.dtype in CONFIG['INT_DTYPE'] n_win = win_sample.shape[0] # ------------------------------------------------------------------------- # find group of brain regions gp = pd.DataFrame({'roi': roi}).groupby('roi').groups roi_gp, roi_idx = list(gp.keys()), list(gp.values()) n_roi = len(roi_gp) x_s, x_t = np.triu_indices(n_roi, k=1) n_pairs = len(x_s) pairs = np.c_[x_s, x_t] roi_p = [f"{roi_gp[s]}-{roi_gp[t]}" for s, t in zip(x_s, x_t)] # ------------------------------------------------------------------------- # prepare outputs and elements n_jobs = 1 if n_win == 1 else n_jobs parallel, p_fun = parallel_func(_conn_dfc, n_jobs=n_jobs, verbose=verbose, total=n_win, mesg='Estimating DFC') logger.info(f'Computing DFC between {n_pairs} pairs (gcrn={gcrn})') dfc = np.zeros((n_trials, n_pairs, n_win), dtype=np.float64) # ------------------------------------------------------------------------- # compute distance correlation dfc = parallel( p_fun(x[:, :, w[0]:w[1]], x_s, x_t, roi_idx, gcrn) for w in win_sample) dfc = np.stack(dfc, 2) # ------------------------------------------------------------------------- # dataarray conversion win_times = times[win_sample] dfc = xr.DataArray(dfc, dims=('trials', 'roi', 'times'), name='dfc', coords=(trials, roi_p, win_times.mean(1))) # add the windows used in the attributes cfg = dict(win_sample=np.r_[tuple(win_sample)], win_times=np.r_[tuple(win_times)], type='dfc') dfc.attrs = {**cfg, **attrs} return dfc
def __init__(self, x, y=None, roi=None, z=None, times=None, nb_min_suj=None, sub_roi=None, verbose=None): """Init.""" set_log_level(verbose) # --------------------------------------------------------------------- # conversion of the electrophysiological data # --------------------------------------------------------------------- logger.info('Definition of an electrophysiological dataset') x, y, z, roi, times, sub_roi = ds_ephy_io(x, roi=roi, y=y, z=z, times=times, sub_roi=sub_roi, verbose=verbose) if y is None: logger.debug("Fill the y input because otherwise everything fails") y = [np.zeros((x[k].shape[0])) for k in range(len(x))] # --------------------------------------------------------------------- # check the types of y (and z) # --------------------------------------------------------------------- self._y_dtype = self._infer_dtypes(y, 'y') self._z_dtype = self._infer_dtypes(z, 'z') self._sub_roi_dtype = self._infer_dtypes(sub_roi, 'sub_roi') if (self._y_dtype == 'float') and (self._z_dtype == 'none'): if self._sub_roi_dtype == 'int': self._mi_type = 'ccd' _mi_st = 'Regr (continuous) | sub_roi' else: self._mi_type = 'cc' _mi_st = 'Regr (continuous)' elif (self._y_dtype == 'int') and (self._z_dtype == 'none'): self._mi_type = 'cd' if self._sub_roi_dtype == 'int': _mi_st = '[Regr (discret), sub_roi]' else: _mi_st = 'Regr (discret)' elif (self._y_dtype == 'float') and (self._z_dtype == 'int'): self._mi_type = 'ccd' if self._sub_roi_dtype == 'int': _mi_st = 'Regr (continuous) | [Regr (discret), sub_roi]' else: _mi_st = 'Regr (continuous) | Regr (discret)' else: raise TypeError(f"Types of y ({self._y_dtype}) and z (" f"{self._z_dtype}) doesn't allow to then compute " "mi on it") logger.debug(f"y.dtype={self._y_dtype}; z.dtype={self._z_dtype}); " f"sub_roi.dtype={self._sub_roi_dtype}") logger.info(f" Supported MI definition ({self._mi_type}) : " f"I(ephy; {_mi_st})") # (optionnal) multi-conditions conversion if self._y_dtype == 'int': y = self._multicond_conversion(y, 'y', verbose) if self._z_dtype == 'int': z = self._multicond_conversion(z, 'z', verbose) # --------------------------------------------------------------------- # 4d conversion # --------------------------------------------------------------------- self._reshape = None if all([k.ndim == 4 for k in x]): logger.debug(f" 4d reshaping") for k in range(len(x)): n_e, n_r, n_f, n_t = x[k].shape x[k] = x[k].reshape(n_e, n_r, n_f * n_t) self._reshape = (n_f, n_t) # --------------------------------------------------------------------- # retain in self # --------------------------------------------------------------------- # data related self.nb_min_suj = nb_min_suj self.n_subjects = len(x) self.times = times self.roi = roi # unique roi list merged_roi = np.r_[tuple(self.roi)] _, u_idx = np.unique(merged_roi, return_index=True) self.roi_names = merged_roi[np.sort(u_idx)] self.n_roi = len(self.roi_names) # internals self.modality = "electrophysiological" self._copnormed = False self._groupedby = "subject" self.__version__ = frites.__version__ # main data self._x = x # [(n_epochs, n_channels, n_times)] self._y = y # [(n_epochs,)] self._z = z # [(n_epochs,)] self._sub_roi = sub_roi # [(n_roi,)] self.n_times = self._x[0].shape[-1] if len(self.times) > 1: self.sfreq = 1. / (self.times[1] - self.times[0]) else: logger.warning("Impossible to know the sampling frequency when the" " time vector only contains a single time point") self.sfreq = 1. logger.info(f"Dataset composed of {self.n_subjects} subjects. At least" f" {self.nb_min_suj} subjects per roi are required")
def resample(self, sfreq, npad='auto', window='boxcar', n_jobs=1, pad='edge', verbose=None): """Resample data. This method is an adaptation of the mne-python one. Parameters ---------- sfreq : float New sample rate to use. npad : int | str Amount to pad the start and end of the data. Can also be “auto” to use a padding that will result in a power-of-two size (can be much faster). window : str | tuple Frequency-domain window to use in resampling. See scipy.signal.resample(). pad : str | 'edge' The type of padding to use. Supports all numpy.pad() mode options. Can also be “reflect_limited”, which pads with a reflected version of each vector mirrored on the first and last values of the vector, followed by zeros. Only used for method='fir'. The default is 'edge', which pads with the edge values of each vector. Returns ------- inst : instance of DatasetEphy The object with the filtering applied. Notes ----- For some data, it may be more accurate to use npad=0 to reduce artifacts. This is dataset dependent -- check your data! """ set_log_level(verbose) assert self._groupedby is "subject", ("Slicing only work when data is " "grouped by 'subjects'") from mne.filter import resample sfreq = float(sfreq) o_sfreq = self.sfreq logger.info(f" Resample to the frequency {sfreq}Hz") for k in range(len(self._x)): self._x[k] = resample(self._x[k], sfreq, o_sfreq, npad, window=window, n_jobs=n_jobs, pad=pad) self.sfreq = float(sfreq) self.times = (np.arange(self._x[0].shape[-1], dtype=np.float) / sfreq + self.times[0]) self.n_times = len(self.times) return self
def __new__(self, x, y=None, z=None, roi=None, times=None, agg_ch=True, multivariate=False, name=None, attrs=None, sfreq=None, verbose=None): """Init.""" set_log_level(verbose) attrs = Attributes(attrs=attrs) _supp_dim = [] # ========================== Data extraction ========================== # ____________________________ extraction _____________________________ if isinstance(x, xr.DataArray): # xr -> xr # get data, name and attributes attrs.update(x.attrs) name = x.name if name is None else name data = x.data # get y / z regressors y = x[y].data if isinstance(y, str) else y z = x[z].data if isinstance(z, str) else z # get spatial informations (roi) roi = x[roi].data if isinstance(roi, str) else roi # build 4d (possibly multivariate) coordinate if x.ndim == 4: if multivariate: _supp_dim = ('mv', np.full((x.shape[2]), np.nan)) else: _supp_dim = (x.dims[2], x[x.dims[2]].data) # get the temporal vector times = x[times].data if isinstance(times, str) else times if 'mne' in str(type(x)): # mne -> xr times = x.times if times is None else times roi = x.info['ch_names'] if roi is None else roi sfreq = x.info['sfreq'] if sfreq is None else sfreq if isinstance(x, CONFIG["MNE_EPOCHS_TYPE"]): data = x.get_data() elif isinstance(x, CONFIG["MNE_EPOCHSTFR_TYPE"]): data = x.data if multivariate: _supp_dim = ('mv', np.full((data.shape[2]), np.nan)) else: _supp_dim = ('freqs', x.freqs) if isinstance(x, np.ndarray): # numpy -> xr data = x if data.ndim == 4: if multivariate: _supp_dim = ('mv', np.full((data.shape[2]), np.nan)) else: _supp_dim = ('supp', np.arange(data.shape[2])) assert data.ndim <= 4, "Data up to 4-dimensions are supported" # ____________________________ Y/Z dtypes _____________________________ # infer dtypes y_dtype = self._infer_dtypes(y, 'y') z_dtype = self._infer_dtypes(z, 'z') # infer supported mi_type mi_type = CONFIG['MI_TABLE'][y_dtype][z_dtype] mi_repr = CONFIG['MI_REPR'][mi_type] # uni to multi condition remapping y = multi_to_uni_conditions([y], var_name='y', verbose=verbose)[0] z = multi_to_uni_conditions([z], var_name='z', verbose=verbose)[0] # __________________________ Sampling rate ____________________________ # infer the sampling frequency (if needed) if sfreq is None: if (times is not None) and (len(times) >= 2): sfreq = 1. / (times[1] - times[0]) else: logger.warning("Impossible to infer the sampling frequency. " "You should consider providing a time vector") sfreq = 1. sfreq = float(sfreq) # ============================= DataArray ============================= # ___________________________ Dims & Coords ___________________________ dims, coords = [], OrderedDict() n_trials, n_roi, n_times = np.array(list(data.shape))[[0, 1, -1]] # don't break if empty time vector or missing roi if times is None: logger.warning("No time vector. A default one is created") times = np.arange(n_times) / sfreq if roi is None: logger.warning("No regions of interest are provided (roi). Default" " ones are created") roi = [f"roi_{k}" for k in range(n_roi)] # build trials (potentially) multi-coordinates coords['trials'] = ('trials', np.arange(n_trials)) if (y is not None) and (len(y) == n_trials): coords['y'] = ('trials', y) if (z is not None) and (len(z) == n_trials): coords['z'] = ('trials', z) if name is not None: coords['subject'] = ('trials', [name] * n_trials) dims += ['trials'] # build space (potentially) multi-coordinates coords['roi'] = ('roi', roi) if agg_ch: coords['agg_ch'] = ('roi', [0] * n_roi) else: coords['agg_ch'] = ('roi', np.arange(n_roi)) dims += ['roi'] if _supp_dim: coords[_supp_dim[0]] = _supp_dim[1] dims += [_supp_dim[0]] # build temporal coordinate if (times is not None) and (len(times) == n_times): coords['times'] = ('times', times) dims += ['times'] # _____________________________ Attributes ____________________________ attrs.update({ '__version__': frites.__version__, 'modality': "electrophysiology", 'dtype': 'SubjectEphy', 'y_dtype': y_dtype, 'z_dtype': z_dtype, 'mi_type': mi_type, 'mi_repr': mi_repr, 'sfreq': sfreq, 'agg_ch': agg_ch, 'multivariate': multivariate }) # _____________________________ DataArray _____________________________ # for a given reason, DataArray are not easy to subclass (see #706, # #728, #3980). Therefore, for the moment, it's just easier to simply # return a dataarray da = xr.DataArray(data, dims=dims, coords=coords, name=name, attrs=attrs) return da
def gcmi_mixture_1d_cd(x, y, verbose=None): """Gaussian-Copula MI between a continuous and a discrete variable. This method evaluate MI from a Gaussian mixture. The Gaussian mixture is fit using robust measures of location (median) and scale (median absolute deviation) for each class. I = gcmi_mixture_cd(x,y) returns the MI between the (possibly multidimensional). Parameters ---------- x, y : array_like Continuous arrays of shape (n_epochs,) or (n_dimensions, n_epochs). y must be an array of integers Returns ------- i : float Information shared by x and y (in bits) """ set_log_level(verbose) x, y = np.atleast_2d(x), np.squeeze(y) if x.ndim > 2: raise ValueError("x must be at most 2d") if y.ndim > 1: raise ValueError("only univariate discrete variables supported") if not np.issubdtype(y.dtype, np.integer): raise ValueError("y should be an integer array") nvarx, ntrl = x.shape ym = np.unique(y) if y.size != ntrl: raise ValueError("number of trials do not match") # check for repeated values for xi in range(nvarx): if (np.unique(x[xi, :]).size / float(ntrl)) < 0.9: logger.info("Input x has more than 10% repeated values") break # copula normalise each class # shift and rescale to match loc and scale of raw data # this provides a robust way to fit the gaussian mixture classdat = [] ydat = [] for yi in ym: # class conditional data idx = y == yi xm = x[:, idx] cxm = copnorm_nd(xm, axis=1) xmmed = np.median(xm, axis=1)[:, np.newaxis] # robust measure of s.d. under Gaussian assumption from median # absolute deviation xmmad = np.median(np.abs(xm - xmmed), axis=1)[:, np.newaxis] cxmscaled = cxm * (1.482602218505602 * xmmad) # robust measure of loc from median cxmscaled = cxmscaled + xmmed classdat.append(cxmscaled) ydat.append(yi * np.ones(xm.shape[1], dtype=np.int)) cx = np.concatenate(classdat, axis=1) newy = np.concatenate(ydat) return mi_mixture_1d_gd(cx, newy)
def conn_get_pairs(roi, directed=False, nb_min_suj=-np.inf, verbose=None): """Get possible connectivity pairs for multiple subjects. This function returns a DataFrame that contains all of the necessary informations for managing pairs of brain regions across many subjects. Parameters ---------- roi : list List where each item in this list is an array descriving the brain region names of a single subject. directed : bool | False Specify whether the the returned pairs should be for directed (True) or undirected (default : False) connectivity. nb_min_suj : int | -np.inf Specify whether the pairs should be represented by a minimum number of subjects. Returns ------- df_conn : pd.DataFrame A Pandas DataFrame that describes the connectivity informations at the group level. The table contains the following entries : * 'sources' / 'targets' : respectively, the source and target names * 'subjects' : list of subjects per pair of brain regions * '#subjects' : number of subjects per pair of brain regions * 'names' : name of each pair. If undirected, the names are going to be like 'roi_0-roi_1' or 'roi_0->roi_1' if directed * 'keep' : booleans indicating whether the number of subjects per pair of brain regions is over nb_min_suj df_suj : pd.DataFrame A Pandas DataFrame that describes the connectivity information per subject. The table contains the following entries : * 'subjects' : subject number * 'keep_roi' / 'drop_roi' : the brain regions respectively to keep and to remove to fit the input parameters nb_min_suj * 'keep_suj' : boolean describing if the subject should be dropped or conserved * 'conn' : the 2D boolean connectivity array per subject """ set_log_level(verbose) assert isinstance(roi, list) n_subjects = len(roi) roi = [np.asarray(k) for k in roi] # =========================== Conn info per pair ========================== s_ss, t_ss, ss = [], [], [] for k in range(n_subjects): # get the unique list of unsorted list of brain regions u_roi = nonsorted_unique(roi[k], assert_unique=True) n_u_roi = len(u_roi) # get all possible pairs if directed: pairs = np.where(~np.eye(n_u_roi, dtype=bool)) else: pairs = np.triu_indices(n_u_roi, k=1) s_names, t_names = u_roi[pairs[0]], u_roi[pairs[1]] # if not directed, merge '0-1' and '1-0' if not directed: st_names = np.c_[s_names, t_names] s_names, t_names = np.unique(np.sort(st_names, axis=1), axis=0).T # keep single-subject source and target names s_ss += [s_names] t_ss += [t_names] ss += [k] * len(s_names) # fill info in a dataframe df_ss = pd.DataFrame({ 'subjects': ss, 'sources': np.concatenate(s_ss), 'targets': np.concatenate(t_ss) }) # get the number of subjects per pair pattern = '->' if directed else '-' gp = df_ss.groupby(['sources', 'targets']) fcn = lambda df: len(np.unique(df)) df_conn = gp.subjects.aggregate([list]).reset_index() df_conn = df_conn.rename(columns={'list': 'subjects'}) df_conn['#subjects'] = [len(k) for k in df_conn['subjects']] df_conn['names'] = [ f"{k}{pattern}{i}" for k, i in zip(df_conn['sources'], df_conn['targets']) ] df_conn['keep'] = df_conn['#subjects'] >= nb_min_suj # print the info n_remain = np.sum(list(df_conn['keep'])) n_drop = np.sum(list(~df_conn['keep'])) logger.info(f" {n_remain} remaining pairs of brain regions " f"(nb_min_suj={nb_min_suj}), {n_drop} dropped") # ========================= Conn info per subject ========================= # build 2d connectivity array per subject conn = {} for n_s in range(n_subjects): n_roi_s = len(roi[n_s]) _conn = xr.DataArray(~np.eye(n_roi_s, dtype=bool), dims=('sources', 'targets'), coords=(roi[n_s], roi[n_s])) conn[n_s] = _conn # fill the information for k in range(len(df_conn)): _df = df_conn.iloc[k, :] for s in _df['subjects']: _s, _t, _k = _df['sources'], _df['targets'], bool(_df['keep']) conn[s].loc[dict(sources=_s, targets=_t)] = _k if not directed: conn[s].loc[dict(sources=_t, targets=_s)] = _k # get the brain regions to keep / drop per subject suj, roi_keep, roi_drop, conn_tot = [], [], [], [] for s in range(n_subjects): _keep = roi[s][np.union1d(*np.where(conn[s]))] _drop = np.setdiff1d(roi[s], _keep) suj += [s] roi_keep += [_keep.tolist()] roi_drop += [_drop.tolist()] conn_tot += [conn[s].data] # create the final dataframe df_suj = pd.DataFrame({ 'subjects': suj, 'keep_roi': roi_keep, 'drop_roi': roi_drop }) # , 'conn': conn_tot df_suj['keep_suj'] = [len(k) > 1 for k in df_suj['keep_roi']] return df_conn, df_suj
def __init__(self, verbose=None): # noqa WfBase.__init__(self) set_log_level(verbose) logger.info("Definition of a non-parametric statistical workflow")
def conn_dfc(data, win_sample, times=None, roi=None, n_jobs=1, gcrn=True, verbose=None): """Single trial Dynamic Functional Connectivity. This function computes the Dynamic Functional Connectivity (DFC) using the Gaussian Copula Mutual Information (GCMI). The DFC is computed across time points for each trial. Note that the DFC can either be computed on windows manually defined or on sliding windows. Parameters ---------- data : array_like Electrophysiological data array of a single subject organized as (n_epochs, n_roi, n_times) win_sample : array_like Array of shape (n_windows, 2) describing where each window start and finish. You can use the function :func:`frites.conn.define_windows` to define either manually either sliding windows. times : array_like | None Time vector array of shape (n_times,) roi : array_like | None ROI names of a single subject n_jobs : int | 1 Number of jobs to use for parallel computing (use -1 to use all jobs). The parallel loop is set at the pair level. gcrn : bool | True Specify if the Gaussian Copula Rank Normalization should be applied. If the data are normalized (e.g z-score) this parameter can be set to False because the data can be considered as gaussian over time. Returns ------- dfc : array_like The DFC array of shape (n_epochs, n_pairs, n_windows) See also -------- define_windows, conn_covgc """ set_log_level(verbose) # ------------------------------------------------------------------------- # inputs conversion data, trials, roi, times, attrs = conn_io(data, roi=roi, times=times, verbose=verbose) # ------------------------------------------------------------------------- # data checking n_epochs, n_roi, n_pts = data.shape assert (len(roi) == n_roi) and (len(times) == n_pts) assert isinstance(win_sample, np.ndarray) and (win_sample.ndim == 2) assert win_sample.dtype in CONFIG['INT_DTYPE'] n_win = win_sample.shape[0] # get the non-directed pairs x_s, x_t = np.triu_indices(n_roi, k=1) n_pairs = len(x_s) pairs = np.c_[x_s, x_t] # build roi pairs names roi_p = [f"{roi[s]}-{roi[t]}" for s, t in zip(x_s, x_t)] # ------------------------------------------------------------------------- # compute dfc logger.info(f'Computing DFC between {n_pairs} pairs (gcrn={gcrn})') # get the parallel function parallel, p_fun = parallel_func(mi_nd_gg, n_jobs=n_jobs, verbose=verbose, prefer='threads') pbar = ProgressBar(range(n_win), mesg='Estimating DFC') dfc = np.zeros((n_epochs, n_pairs, n_win), dtype=np.float32) with parallel as para: for n_w, w in enumerate(win_sample): # select the data in the window and copnorm across time points data_w = data[..., w[0]:w[1]] # apply gcrn over time if gcrn: data_w = copnorm_nd(data_w, axis=2) # compute mi between pairs _dfc = para( p_fun(data_w[:, [s], :], data_w[:, [t], :], **CONFIG["KW_GCMI"]) for s, t in zip(x_s, x_t)) dfc[..., n_w] = np.stack(_dfc, axis=1) pbar.update_with_increment_value(1) # ------------------------------------------------------------------------- # dataarray conversion win_times = times[win_sample] dfc = xr.DataArray(dfc, dims=('trials', 'roi', 'times'), name='dfc', coords=(trials, roi_p, win_times.mean(1))) # add the windows used in the attributes cfg = dict(win_sample=np.r_[tuple(win_sample)], win_times=np.r_[tuple(win_times)], type='dfc') dfc.attrs = {**cfg, **attrs} return dfc
def __init__(self, x, y=None, z=None, roi=None, agg_ch=True, times=None, multivariate=False, nb_min_suj=False, attrs=None, verbose=None): """Init.""" set_log_level(verbose) self.attrs = Attributes(attrs=attrs) assert isinstance(x, (list, tuple)) self._agg_ch = agg_ch self._multivariate = multivariate logger.info('Definition of an electrophysiological dataset') logger.info(f' Dataset composed of {len(x)} subjects / sessions') # ========================== Multi-conditions ========================= # remapping group y and z if isinstance(y, (list, tuple)): y = multi_to_uni_conditions(y, var_name='y', verbose=verbose) if isinstance(z, (list, tuple)): z = multi_to_uni_conditions(z, var_name='z', verbose=verbose) # ===================== Multi-subjects conversion ===================== # force converting the data (latest task-related variables) n_subjects = len(x) y = [y] * n_subjects if not isinstance(y, list) else y z = [z] * n_subjects if not isinstance(z, list) else z roi = [roi] * n_subjects if not isinstance(roi, list) else roi for k in range(n_subjects): x[k] = SubjectEphy(x[k], y=y[k], z=z[k], roi=roi[k], agg_ch=True, times=times, multivariate=multivariate, verbose=verbose) self._x = x # minimum number of subject / roi nb_min_suj = -np.inf if not isinstance(nb_min_suj, int) else nb_min_suj self._nb_min_suj = nb_min_suj logger.info(f" At least {self._nb_min_suj} subjects / roi required") # merge attributes self.attrs.merge([k.attrs for k in self._x]) self._y_dtype = self.attrs['y_dtype'] self._z_dtype = self.attrs['z_dtype'] self._mi_type = self.attrs['mi_type'] mi_repr = self.attrs['mi_repr'] logger.info(f" Supported MI definition {mi_repr} ({self._mi_type})") # ===================== Additional dimensions ======================== # Subject dimension for n_k, k in enumerate(range(len(self._x))): self._x[k].name = f'subject_{n_k}' self._x[k] = self._x[k].assign_coords( subject=('trials', [n_k] * self._x[k].shape[0])) # channel aggregation if not agg_ch: # split into sections of unique intergers n_trials_s = [k.shape[1] for k in self._x] agg_ch_num = np.arange(np.sum(n_trials_s)) agg_split = np.split(agg_ch_num, np.cumsum(n_trials_s)[0:-1]) # add additional dimension for k in range(len(self._x)): self._x[k] = self._x[k].assign_coords(agg_ch=('roi', agg_split[k])) # final mi dimension dims = list(self._x[0].dims) self._mi_dims = [k for k in dims if k not in ['trials', 'mv']] # ============================= Attributes ============================ # update internals parameters self._update_internals() # # update internal attributes self.attrs.update({ 'nb_min_suj': nb_min_suj, 'n_subjects': len(self._x), 'agg_ch': agg_ch, 'multivariate': multivariate, 'dtype': "DatasetEphy", '__version__': frites.__version__ })
def define_windows(times, windows=None, slwin_len=None, slwin_start=None, slwin_stop=None, slwin_step=None, verbose=None): """Define temporal windows. This function can be used to either manually define temporal windows either automatic sliding windows. Note that every input parameters should be in the time domain (e.g seconds or milliseconds). Parameters ---------- times : array_like Time vector windows : array_like | None Manual windows (e.g (.1, .2) or [(.1, .2), (.4, .5)]). slwin_len : float | None Length of each sliding (e.g .2 produces 200ms window length). slwin_start : float | None Time point for starting sliding windows (e.g 0.1). If None, sliding windows will start from the first time point. slwin_stop : float | None Time point for ending sliding windows (e.g 1.5). If None, sliding windows will finish at the last time point. slwin_step : float | None Temporal step between each temporal window (e.g .1 means that each consecutive windows are going to be separated by 100ms). This parameter can be used to define either overlapping or non-overlapping windows. If None, slwin_step is going to be set to slwin_step in order to produce consecutive non-overlapping windows. Returns ------- win_sample : array_like Array of shape (n_windows, 2) of temporal indexes defining where each window (start, finish) mean_time : array_like Mean time vector inside each defined window of shape (n_windows,) See also -------- plot_windows """ set_log_level(verbose) assert isinstance(times, np.ndarray) logger.info("Defining temporal windows") stamp = times[1] - times[0] # ------------------------------------------------------------------------- # build windows if (windows is None) and (slwin_len is None): logger.info(" No input detected. Full time window is used") win_time = np.array([[times[0], times[-1]]]) elif windows is not None: logger.info(" Manual definition of windows") win_time = np.atleast_2d(windows) elif slwin_len is not None: # manage empty inputs if slwin_start is None: slwin_start = times[0] # noqa if slwin_stop is None: slwin_stop = times[-1] # noqa if slwin_step is None: slwin_step = slwin_len + stamp # noqa logger.info(f" Definition of sliding windows (len={slwin_len}, " f"start={slwin_start}, stop={slwin_stop}, " f"step={slwin_step})") # build the sliding windows sl_start = np.arange(slwin_start, slwin_stop - slwin_len, slwin_step) sl_stop = np.arange(slwin_start + slwin_len, slwin_stop, slwin_step) if len(sl_start) != len(sl_stop): min_len = min(len(sl_start), len(sl_stop)) sl_start, sl_stop = sl_start[0:min_len], sl_stop[0:min_len] win_time = np.c_[sl_start, sl_stop] assert (win_time.ndim == 2) and (win_time.shape[1] == 2) # ------------------------------------------------------------------------- # time to sample conversion win_sample = np.zeros_like(win_time, dtype=int) times = times.reshape(-1, 1) for n_k, k in enumerate(win_time): win_sample[n_k, :] = np.argmin(np.abs(times - k), axis=0) logger.info(f" {win_sample.shape[0]} windows defined") return win_sample, win_time.mean(1)
def conn_covgc(data, dt, lag, t0, step=1, roi=None, times=None, method='gc', conditional=False, n_jobs=-1, verbose=None): r"""Single-trial covariance-based Granger Causality for gaussian variables. This function computes the (conditional) covariance-based Granger Causality (covgc) for each trial. .. note:: **Total Granger interdependence** * TGI = gc.sum(axis=-1) = gc(x->y) + gc(y->x) + gc(x.y) * TGI = Hycy + Hxcx - Hxxcyy **Relations between Mutual Informarion and conditional entropies** This quantity can be defined as the Increment of Total Interdependence and it can be calculated from the different of two mutual informations as follows .. math:: Ixxyy &= I(X_{i+1}, X_{i}|Y_{i+1}, Y_{i}) \\ &= H(X_{i+1}) + H(Y_{i+1}) - H(X_{i+1},Y_{i+1}) \\ &= log(det_{xi1}) + log(det_{yi1}) - log(det_{xyi1}) \\ Ixy &= I(X_{i}|Y_{i}) \\ &= H(X_{i}) + H(Y_{i}) - H(X_{i}, Y_{i}) \\ &= log(det_{xi}) + log(det_{yi}) - log(det_{yxi}) \\ ITI &= Ixxyy - Ixy Parameters ---------- data : array_like Electrophysiological data. Several input types are supported : * Standard NumPy arrays of shape (n_epochs, n_roi, n_times) * mne.Epochs * xarray.DataArray of shape (n_epochs, n_roi, n_times) dt : int Duration of the time window for covariance correlation in samples lag : int Number of samples for the lag within each trial t0 : array_like Array of zero time in samples of length (n_window,) step : int | 1 Number of samples stepping in the past for the lag within each trial times : array_like | None Time vector array of shape (n_times,). If the input is an xarray, the name of the time dimension can be provided roi : array_like | None ROI names of a single subject. If the input is an xarray, the name of the ROI dimension can be provided method : {'gauss', 'gc'} Method for the estimation of the covgc. Use either 'gauss' which assumes that the time-points are normally distributed or 'gc' in order to use the gaussian-copula. conditional : bool | False If True, the conditional Granger Causality is computed i.e the past is also conditioned by the past of other sources. n_jobs : int | -1 Number of jobs to use for parallel computing (use -1 to use all jobs). The parallel loop is set at the pair level. Returns ------- gc : array_like Granger Causality arranged as (n_epochs, n_pairs, n_windows, 3) where the last dimension means : * 0 : pairs[:, 0] -> pairs[:, 1] (x->y) * 1 : pairs[:, 1] -> pairs[:, 0] (y->x) * 2 : instantaneous (x.y) References ---------- Brovelli et al., 2015 :cite:`brovelli2015characterization` See also -------- conn_dfc """ set_log_level(verbose) # ------------------------------------------------------------------------- # input checking if isinstance(t0, CONFIG['INT_DTYPE']) or isinstance( t0, CONFIG['FLOAT_DTYPE']): t0 = np.array([t0]) t0 = np.asarray(t0).astype(int) dt, lag, step = int(dt), int(lag), int(step) # handle dataarray input if isinstance(data, xr.DataArray): trials, attrs = data[data.dims[0]].data, data.attrs else: trials, attrs = np.arange(data.shape[0]), {} # internal conversion data = SubjectEphy(data, y=trials, roi=roi, times=times) x, roi, times = data.data, data['roi'].data, data['times'].data trials = data['y'].data n_epochs, n_roi, n_pts = data.shape # force C contiguous array because operations on row-major if not x.flags.c_contiguous: x = np.ascontiguousarray(x) # method checking assert method in ['gauss', 'gc'] fcn = dict(gauss=_covgc, gc=_gccovgc)[method] # ------------------------------------------------------------------------- # build generic time indices (just need to add t0 to it) rows, cols = np.mgrid[0:lag + 1, 0:dt] # step in the past lags rows = rows[::step, :] cols = cols[::step, :] # create index for all lags and timespoints ind_tx = cols - rows # build output time vector times_p = np.empty((len(t0)), dtype=times.dtype, order='C') for n_t, t in enumerate(t0): times_p[n_t] = times[ind_tx[0, :] + t].mean() # get the non-directed pairs and build roi pairs names x_s, x_t = np.triu_indices(n_roi, k=1) pairs = np.c_[x_s, x_t] roi_p = np.array([f"{roi[s]}-{roi[t]}" for s, t in zip(x_s, x_t)]) # check the ratio between lag and dt ratio = 100 * (ind_tx.shape[0] / (step * ind_tx.shape[1])) if not 10. <= ratio <= 15.: _step = int(np.ceil((lag + 1) / (.15 * dt))) logger.warning(f"The ratio between the lag and dt is {ratio}%. It's " f"recommended to conserve this ratio between 10-15%." f" Try with a step={_step}") logger.debug(f"Index shape : {ind_tx.shape}") # ------------------------------------------------------------------------- ext = 'conditional' if conditional else '' # compute covgc and parallel over pairs logger.info(f"Compute the {ext} covgc (method={method}, n_pairs={len(x_s)}" f"; n_windows={len(t0)}, lag={lag}, dt={dt}, step={step})") kw_par = dict(n_jobs=n_jobs, total=len(x_s), verbose=False) if not conditional: parallel, p_fun = parallel_func(fcn, **kw_par) gc = parallel(p_fun(x[:, s, :], x[:, t, :], ind_tx, t0) for s, t in zip(x_s, x_t)) else: parallel, p_fun = parallel_func(_cond_gccovgc, **kw_par) gc = parallel(p_fun(x, s, t, ind_tx, t0) for s, t in zip(x_s, x_t)) gc = np.stack(gc, axis=1) # ------------------------------------------------------------------------- # change output type dire = np.array(['x->y', 'y->x', 'x.y']) gc = xr.DataArray(gc, dims=('trials', 'roi', 'times', 'direction'), coords=(trials, roi_p, times_p, dire), name='covgc') # set attributes cfg = dict(lag='lag', step='step', dt='dt', t0='t0', conditional='conditional', type='covgc') gc.attrs = {**attrs, **cfg} return gc
def ds_ephy_io(x, roi=None, y=None, z=None, times=None, sub_roi=None, verbose=None): """Manage inputs conversion for the DatasetEphy. This function is used to convert NumPy / MNE / Xarray inputs into a standardize NumPy version. Parameters ---------- x : list List of length (n_subjects,). Each element of the list should either be an array of shape (n_epochs, n_channels, n_times), mne.Epochs, mne.EpochsArray, mne.EpochsTFR (i.e. non-averaged power) or DataArray roi : list | None List of length (n_subjects,) of roi names of length (n_channels) y, z : list | None List for the regressors. Each element should be an array of shape (n_epochs) sub_roi : list | None List of sub_roi names times : array_like | None Time vector Returns ------- x : list List of data array of shape (n_epochs, n_channels, n_times) y, z : list List of arrays of shape (n_epochs,) roi : list List of arrays of shape (n_channels,) times : array_like Time vector of shape (n_times,) sub_roi : array_like List of arrays of shape (n_channels,) """ set_log_level(verbose) # ------------------------------------------------------------------------- # data type detection and switch # ------------------------------------------------------------------------- assert isinstance(x, list), ("x input should be a list of elements of " "length (n_subjects,)") assert all([type(x[k]) == type(x[0]) for k in range(len(x)) ]), ("All elements in the `x` inputs are not the same type") # ------------------------------------------------------------------------- # conversion to array according to datatype # ------------------------------------------------------------------------- if 'numpy' in str(type(x[0])): logger.info(" NumPy inputs detected") elif 'mne' in str(type(x[0])): logger.info(" Converting mne inputs") x, times, roi = mne_to_arr(x, roi=roi) elif 'xarray' in str(type(x[0])): logger.info(" Converting xarray inputs") x, roi, y, z, times, sub_roi = xr_to_arr(x, roi=roi, y=y, z=z, times=times, sub_roi=sub_roi) # ------------------------------------------------------------------------- # manage none inputs # ------------------------------------------------------------------------- # empty roi if not isinstance(roi, list): logger.warning("No roi have been provided. A default will be used " "instead. You should use the `roi` input instead") roi = [] for k in range(len(x)): roi += [np.array([f"roi_{i}" for i in range(x[k].shape[1])])] # empty time vector if not isinstance(times, np.ndarray): logger.warning("No time vector found. A default will be used instead." " You should use the `times` input instead") times = np.arange(x[0].shape[-1]) # ------------------------------------------------------------------------- # shape and types checking before returning # ------------------------------------------------------------------------- # spatio-temporal conversion roi = [np.asarray(roi[k]) for k in range(len(roi))] times = times.astype(np.float32) # data checking assert all([ k.ndim in [3, 4] for k in x ]), ("data should either contains 3d arrays (n_trials, n_channels, n_pts) " "or 4d arrays (n_trials, n_channels, n_freqs, n_pts)") x_sh = [x[k].shape for k in range(len(x))] is_sh_roi = [x_sh[k][1] == len(roi[k]) for k in range(len(x))] is_sh_times = [x_sh[k][-1] == len(times) for k in range(len(x))] assert all(is_sh_roi), "Inconsistent number of ROI" assert all(is_sh_times), "Inconsistent number of time points" assert all([list(x_sh[0])[1:] == list(x_sh[k])[1:]] for k in range(len(x))) if isinstance(y, list): y = [np.asarray(y[k]) for k in range(len(y))] assert len(y) == len(x), "length of y shoud be (n_subjects,)" assert [ x_sh[k][0] == len(y[k]) for k in range(len(x)) ], ("Each element of the y input should have a length of (n_epochs,)") if isinstance(z, list): z = [np.asarray(z[k]) for k in range(len(z))] assert len(z) == len(x), "length of z shoud be (n_subjects,)" assert [ x_sh[k][0] == len(z[k]) for k in range(len(x)) ], ("Each element of the z input should have a length of (n_epochs,)") if isinstance(sub_roi, list): assert all([k.shape == i.shape for k, i in zip(roi, sub_roi)]) # ------------------------------------------------------------------------- # categorical sub roi # ------------------------------------------------------------------------- # concatenate everything and get unique elements if isinstance(sub_roi, list): import pandas as pd logger.info(" Replacing sub roi by categorical integers") # get unique sub roi and build replacement dict sub_roi_cat = np.r_[tuple([k.squeeze() for k in sub_roi])] sub_roi_u = np.unique(sub_roi_cat, return_index=True) repl = {k: v for k, v in zip(*sub_roi_u)} # replace for each subject sub_roi_int = [] for _sub in sub_roi: sub_int = np.array(list(pd.Series(_sub).replace(repl, regex=True))) sub_roi_int += [sub_int] else: sub_roi_int = None return x, y, z, roi, times, sub_roi_int
def __init__(self, verbose=None): """Init.""" set_log_level(verbose)
def multi_to_uni_conditions(x, var_name=None, verbose=None): """Convert a discret vector that contains multiple conditions. This function can be used to convert a list of discret arrays, each reflecting possibly multivariate stimulus or conditions. Parameters ---------- x : list List of multi-variate conditions. Each element of the list is an array of shape (n_trials, n_conditions), where the number of trials can varies across elements of the list but they all have to have the same number of conditions var_name : string | None The name of the variable (usefull for debugging) Returns ------- x_new : list List of remapped conditions where each element of the list has a shape of (n_trials,) """ set_log_level(verbose) # =============================== Checking ================================ if not isinstance(x, (list, tuple)): return [x] assert all([type(x[0]) == type(k) for k in x]) x_types = type(x[0]) if not x_types == np.ndarray: return x # get if all variables are integers and multicolumns else skip it is_int = all([k.dtype in CONFIG['INT_DTYPE'] for k in x]) is_ndim = all([k.ndim > 1 for k in x]) if not is_int or not is_ndim: return x # test that all dimensions are equals same_dim = all([k.ndim == x[0].ndim for k in x]) if not same_dim and isinstance(var_name, str): assert ValueError(f"Every array in the `{var_name}` input should " "have the same number of dimensions") # otherwise find all possible pairs x_all = np.concatenate(x, axis=0) idx = np.unique(x_all, axis=0, return_index=True)[1] u_cat = x_all[sorted(idx), :] # show to the user the new categories user = [] for n_c, cat in enumerate(u_cat): user += [f"{n_c}: [{', '.join([str(c) for c in cat])}]"] if isinstance(var_name, str): logger.debug(f" The `{var_name}` input contains multiple conditions" f" that have been remapped to : {'; '.join(user)}") # loop over subjects x_new = [] for k in range(len(x)): x_cat = np.full((x[k].shape[0],), -1, dtype=int) for n_c, cat in enumerate(u_cat): x_cat[np.equal(x[k], cat.reshape(1, -1)).all(1)] = n_c assert x_cat.min() > -1, "Not all values have been replaced" x_new += [x_cat] return x_new
def savgol_filter(x, h_freq, axis=None, sfreq=None, polyorder=5, verbose=None): """Filter the data using Savitzky-Golay polynomial method. This function is an adaptation of the mne-python one for xarray.DataArray. Parameters ---------- x : array_like Multidimensional array or DataArray h_freq : float Approximate high cut-off frequency in Hz. Note that this is not an exact cutoff, since Savitzky-Golay filtering is done using polynomial fits instead of FIR/IIR filtering. This parameter is thus used to determine the length of the window axis : int, string | None Position of the time axis. Can either be an integer when `x` is a NumPy array or a string (e.g 'times') when using a DataArray polyorder : int | 5 Polynomial order Returns ------- x_filt : array_like Filtered data Notes ----- For Savitzky-Golay low-pass approximation, see: https://gist.github.com/larsoner/bbac101d50176611136b See also -------- kernel_smoothing """ set_log_level(verbose) # inputs checking if isinstance(x, xr.DataArray): dims = list(x.dims) # get axis name if axis is None: axis = 'times' if isinstance(axis, str): axis = list(x.dims).index(axis) # get sfreq if possible if not isinstance(sfreq, (int, float)): assert 'times' in dims sfreq = 1. / (x['times'].data[1] - x['times'].data[0]) assert isinstance(h_freq, (int, float)) assert isinstance(axis, int) assert isinstance(sfreq, (int, float)) if h_freq >= sfreq / 2.: raise ValueError('h_freq must be less than half the sample rate') # get window length window_length = (int(np.round(sfreq / h_freq)) // 2) * 2 + 1 logger.info(f' Using savgol length {window_length}') # apply savgol depending on input type kw = dict(axis=axis, polyorder=polyorder, window_length=window_length) if isinstance(x, xr.DataArray): x.data = savgol(x.data, **kw) return x else: return savgol(x, **kw)