def gccmi_1d_ccc(x, y, z, biascorrect=True): """Gaussian-Copula CMI between three continuous variables. I = gccmi_1d_ccc(x,y,z) returns the CMI between two (possibly multidimensional) continuous variables, x and y, conditioned on a third, z, estimated via a Gaussian copula. Parameters ---------- x, y, z : array_like Continuous arrays of shape (n_epochs,) or (n_dimensions, n_epochs). Returns ------- i : float Information shared by x and y conditioned by z (in bits) """ x, y, z = np.atleast_2d(x), np.atleast_2d(y), np.atleast_2d(z) if x.ndim > 2 or y.ndim > 2 or z.ndim > 2: raise ValueError("x, y and z must be at most 2d") nvarx, ntrl = x.shape nvary = y.shape[0] nvarz = z.shape[0] if y.shape[1] != ntrl or z.shape[1] != ntrl: raise ValueError("number of trials do not match") # copula normalization cx = copnorm_nd(x, axis=1) cy = copnorm_nd(y, axis=1) cz = copnorm_nd(z, axis=1) # parametric Gaussian CMI return cmi_1d_ggg(cx, cy, cz, biascorrect=True, demeaned=True)
def gcmi_1d_cc(x, y): """Gaussian-Copula MI between two continuous variables. I = gcmi_cc(x,y) returns the MI between two (possibly multidimensional) continuous variables, x and y, estimated via a Gaussian copula. Parameters ---------- x, y : array_like Continuous arrays of shape (n_epochs,) or (n_dimensions, n_epochs) Returns ------- i : float Information shared by x and y (in bits) """ x, y = np.atleast_2d(x), np.atleast_2d(y) if x.ndim > 2 or y.ndim > 2: raise ValueError("x and y must be at most 2d") nvarx, ntrl = x.shape nvary = y.shape[0] if y.shape[1] != ntrl: raise ValueError("number of trials do not match") # copula normalization cx, cy = copnorm_nd(x, axis=1), copnorm_nd(y, axis=1) # parametric Gaussian MI return mi_1d_gg(cx, cy, True, True)
def test_copnorm_nd(self): """Test function copnorm_nd.""" _arr = np.random.randint(0, 10, (20,)) arr_v = np.c_[_arr, _arr] arr_h = arr_v.T cp_v = copnorm_nd(arr_v, axis=0) cp_h = copnorm_nd(arr_h, axis=1) assert (cp_v[:, 0] == cp_v[:, 1]).all() assert (cp_h == cp_v.T).all()
def gccmi_1d_ccc(x, y, z, verbose=None): """Gaussian-Copula CMI between three continuous variables. I = gccmi_1d_ccc(x,y,z) returns the CMI between two (possibly multidimensional) continuous variables, x and y, conditioned on a third, z, estimated via a Gaussian copula. Parameters ---------- x, y, z : array_like Continuous arrays of shape (n_epochs,) or (n_dimensions, n_epochs). Returns ------- i : float Information shared by x and y conditioned by z (in bits) """ set_log_level(verbose) x, y, z = np.atleast_2d(x), np.atleast_2d(y), np.atleast_2d(z) if x.ndim > 2 or y.ndim > 2 or z.ndim > 2: raise ValueError("x, y and z must be at most 2d") nvarx, ntrl = x.shape nvary = y.shape[0] nvarz = z.shape[0] if y.shape[1] != ntrl or z.shape[1] != ntrl: raise ValueError("number of trials do not match") # check for repeated values for xi in range(nvarx): if (np.unique(x[xi, :]).size / float(ntrl)) < 0.9: logger.info("Input x has more than 10% repeated values") break for yi in range(nvary): if (np.unique(y[yi, :]).size / float(ntrl)) < 0.9: logger.info("Input y has more than 10% repeated values") break for zi in range(nvarz): if (np.unique(z[zi, :]).size / float(ntrl)) < 0.9: logger.info("Input y has more than 10% repeated values") break # copula normalization cx = copnorm_nd(x, axis=1) cy = copnorm_nd(y, axis=1) cz = copnorm_nd(z, axis=1) # parametric Gaussian CMI return cmi_1d_ggg(cx, cy, cz, True, True)
def gcmi_model_1d_cd(x, y): """Gaussian-Copula MI between a continuous and a discrete variable. This method is based on ANOVA style model comparison. I = gcmi_model_cd(x,y,Ym) returns the MI between the (possibly multidimensional) continuous variable x and the discrete variable y. Parameters ---------- x, y : array_like Continuous arrays of shape (n_epochs,) or (n_dimensions, n_epochs). y must be an array of integers Returns ------- i : float Information shared by x and y (in bits) """ x, y = np.atleast_2d(x), np.squeeze(y) if x.ndim > 2: raise ValueError("x must be at most 2d") if y.ndim > 1: raise ValueError("only univariate discrete variables supported") if not np.issubdtype(y.dtype, np.integer): raise ValueError("y should be an integer array") nvarx, ntrl = x.shape if y.size != ntrl: raise ValueError("number of trials do not match") # copula normalization cx = copnorm_nd(x, axis=1) # parametric Gaussian MI return mi_model_1d_gd(cx, y, True, True)
def gcmi_mixture_1d_cd(x, y): """Gaussian-Copula MI between a continuous and a discrete variable. This method evaluate MI from a Gaussian mixture. The Gaussian mixture is fit using robust measures of location (median) and scale (median absolute deviation) for each class. I = gcmi_mixture_cd(x,y) returns the MI between the (possibly multidimensional). Parameters ---------- x, y : array_like Continuous arrays of shape (n_epochs,) or (n_dimensions, n_epochs). y must be an array of integers Returns ------- i : float Information shared by x and y (in bits) """ x, y = np.atleast_2d(x), np.squeeze(y) if x.ndim > 2: raise ValueError("x must be at most 2d") if y.ndim > 1: raise ValueError("only univariate discrete variables supported") if not np.issubdtype(y.dtype, np.integer): raise ValueError("y should be an integer array") nvarx, ntrl = x.shape ym = np.unique(y) if y.size != ntrl: raise ValueError("number of trials do not match") # copula normalise each class # shift and rescale to match loc and scale of raw data # this provides a robust way to fit the gaussian mixture classdat = [] ydat = [] for yi in ym: # class conditional data idx = y == yi xm = x[:, idx] cxm = copnorm_nd(xm, axis=1) xmmed = np.median(xm, axis=1)[:, np.newaxis] # robust measure of s.d. under Gaussian assumption from median # absolute deviation xmmad = np.median(np.abs(xm - xmmed), axis=1)[:, np.newaxis] cxmscaled = cxm * (1.482602218505602 * xmmad) # robust measure of loc from median cxmscaled = cxmscaled + xmmed classdat.append(cxmscaled) ydat.append(yi * np.ones(xm.shape[1], dtype=np.int)) cx = np.concatenate(classdat, axis=1) newy = np.concatenate(ydat) return mi_mixture_1d_gd(cx, newy)
def copnorm(self, mi_type='cc', gcrn_per_suj=True): """Apply the Gaussian-Copula rank normalization. The copnorm is only applied to continuous variables. Parameters ---------- mi_type : {'cc', 'cd', 'ccd'} The copnorm depends on the mutual-information type that is going to be performed. Choose either 'cc' (continuous / continuous), 'cd' (continuous / discret) or 'ccd' (continuous / continuous / discret) gcrn_per_suj : bool | True Apply the Gaussian-rank normalization either per subject (True) or across subjects (False). """ assert mi_type in ['cc', 'cd', 'ccd'] # do not enable to copnorm two times if isinstance(self._copnormed, str): logger.warning("Data already copnormed. Copnorm ignored") return None logger.info(f" Apply copnorm (per subject={gcrn_per_suj}; " f"mi_type={mi_type})") # copnorm applied differently how data have been organized if self._groupedby == "roi": if gcrn_per_suj: # per subject logger.debug("copnorm applied per subjects") self._x = [ copnorm_cat_nd(k, i, axis=-1) for k, i in zip(self._x, self.suj_roi) ] if mi_type in ['cc', 'ccd']: self._y = [ copnorm_cat_nd(k, i, axis=0) for k, i in zip(self._y, self.suj_roi) ] else: # subject-wise logger.debug("copnorm applied across subjects") self._x = [copnorm_nd(k, axis=-1) for k in self._x] if mi_type in ['cc', 'ccd']: self._y = [copnorm_nd(k, axis=0) for k in self._y] elif self._groupedby == "subject": raise NotImplementedError("FUTURE WORK") self._copnormed = f"{int(gcrn_per_suj)}-{mi_type}"
def _conn_dfc(x_w, x_s, x_t, roi_idx, gcrn): """Parallel function for computing DFC.""" dfc = np.zeros((x_w.shape[0], len(x_s))) # copnorm data only once if gcrn: x_w = copnorm_nd(x_w, axis=2) # compute dfc for n_p, (s, t) in enumerate(zip(x_s, x_t)): # select sources and targets time-series _x_s = x_w[:, roi_idx[s], :] _x_t = x_w[:, roi_idx[t], :] # compute mi between time-series dfc[:, n_p] = mi_nd_gg(_x_s, _x_t, traxis=-1, mvaxis=-2, shape_checking=False) return dfc
def gcmi_model_1d_cd(x, y, verbose=None): """Gaussian-Copula MI between a continuous and a discrete variable. This method is based on ANOVA style model comparison. I = gcmi_model_cd(x,y,Ym) returns the MI between the (possibly multidimensional) continuous variable x and the discrete variable y. Parameters ---------- x, y : array_like Continuous arrays of shape (n_epochs,) or (n_dimensions, n_epochs). y must be an array of integers Returns ------- i : float Information shared by x and y (in bits) """ set_log_level(verbose) x, y = np.atleast_2d(x), np.squeeze(y) if x.ndim > 2: raise ValueError("x must be at most 2d") if y.ndim > 1: raise ValueError("only univariate discrete variables supported") if not np.issubdtype(y.dtype, np.integer): raise ValueError("y should be an integer array") nvarx, ntrl = x.shape if y.size != ntrl: raise ValueError("number of trials do not match") # check for repeated values for xi in range(nvarx): if (np.unique(x[xi, :]).size / float(ntrl)) < 0.9: logger.info("Input x has more than 10% repeated values") break # copula normalization cx = copnorm_nd(x, axis=1) # parametric Gaussian MI return mi_model_1d_gd(cx, y, True, True)
def test_copnorm(self): """Test function copnorm.""" # build dataset d_3d = self._get_data(3) ds = DatasetEphy(d_3d, y='y', z='z', **kw) # check copnorm range ds_roi2 = ds.get_roi_data("roi_2", copnorm=False) s1_r2, s2_r2 = d_3d[0].sel(roi='roi_2'), d_3d[1].sel(roi='roi_2') s12 = xr.concat((s1_r2, s2_r2), 'trials').T.expand_dims('mv', axis=-2) assert 9. < ds_roi2.data.ravel().mean() < 11. np.testing.assert_array_equal(s12.data, ds_roi2.data) ds_roi2 = ds.get_roi_data("roi_2", copnorm=True) assert -1. < ds_roi2.data.ravel().mean() < 1. # check values (gcrn_per_suj=False) gc_t = ds.get_roi_data("roi_2", copnorm=True, gcrn_per_suj=False) np.testing.assert_array_equal(copnorm_nd(s12.data), gc_t.data) # check values (gcrn_per_suj=True) gc_t = ds.get_roi_data("roi_2", copnorm=True, gcrn_per_suj=True) np.testing.assert_array_equal( copnorm_cat_nd(s12.data, gc_t['subject'].data), gc_t.data)
def conn_transfer_entropy(x, max_delay=30, pairs=None, gcrn=True): """Across-trials transfer entropy. The transfer entropy represents the amount of information that is send from a source to a target. It is defined as : .. math:: TE = I(source_{past}; target_{present} | target_{past}) Where :math:`past` is defined using the `max_delay` input parameter. Note that the transfer entropy only provides about the amount of information that is sent, not on the content. Parameters ---------- x : array_like Array of data of shape (n_roi, n_times, n_epochs). Must be a gaussian variable max_delay : int | 30 Number of time points defining where to stop looking at in the past. Increasing this maximum delay input can lead to slower computations pairs : array_like Array of pairs to consider for computing the transfer entropy. It should be an array of shape (n_pairs, 2) where the first column refers to sources and the second to targets. If None, all pairs will be computed gcrn : bool | True Apply a Gaussian Copula rank normalization Returns ------- te : array_like The transfer entropy array of shape (n_pairs, n_times - max_delay) pairs : array_like Pairs vector use for computations of shape (n_pairs, 2) """ # ------------------------------------------------------------------------- # check pairs n_roi, n_times, n_epochs = x.shape if not isinstance(pairs, np.ndarray): pairs = np.c_[np.where(~np.eye(n_roi, dtype=bool))] assert isinstance(pairs, np.ndarray) and (pairs.ndim == 2) and ( pairs.shape[1] == 2), ("`pairs` should be a 2d array of shape " "(n_pairs, 2) where the first column refers to " "sources and the second to targets") x_all_s, x_all_t = pairs[:, 0], pairs[:, 1] n_pairs = len(x_all_s) # check max_delay assert isinstance(max_delay, (int, np.int)), ("`max_delay` should be an " "integer") # check input data assert (x.ndim == 3), ("input data `x` should be a 3d array of shape " "(n_roi, n_times, n_epochs)") x = x[..., np.newaxis, :] # ------------------------------------------------------------------------- # apply copnorm if gcrn: x = copnorm_nd(x, axis=-1) # ------------------------------------------------------------------------- # compute the transfer entropy te = np.zeros((n_pairs, n_times - max_delay), dtype=float) for n_s, x_s in enumerate(x_all_s): # select targets is_source = x_all_s == x_s x_t = x_all_t[is_source] targets = x[x_t, ...] # tile source source = np.tile(x[[x_s], ...], (targets.shape[0], 1, 1, 1)) # loop over remaining time points for n_d, d in enumerate(range(max_delay + 1, n_times)): t_pres = np.tile(targets[:, [d], :], (1, max_delay, 1, 1)) past = slice(d - max_delay - 1, d - 1) s_past = source[:, past, ...] t_past = targets[:, past, ...] # compute the transfer entropy _te = cmi_nd_ggg(s_past, t_pres, t_past, **CONFIG["KW_GCMI"]) # take the sum over delays te[is_source, n_d] = _te.mean(1) return te, pairs
def conn_dfc(data, win_sample, times=None, roi=None, n_jobs=1, gcrn=True, verbose=None): """Single trial Dynamic Functional Connectivity. This function computes the Dynamic Functional Connectivity (DFC) using the Gaussian Copula Mutual Information (GCMI). The DFC is computed across time points for each trial. Note that the DFC can either be computed on windows manually defined or on sliding windows. Parameters ---------- data : array_like Electrophysiological data array of a single subject organized as (n_epochs, n_roi, n_times) win_sample : array_like Array of shape (n_windows, 2) describing where each window start and finish. You can use the function :func:`frites.conn.define_windows` to define either manually either sliding windows. times : array_like | None Time vector array of shape (n_times,) roi : array_like | None ROI names of a single subject n_jobs : int | 1 Number of jobs to use for parallel computing (use -1 to use all jobs). The parallel loop is set at the pair level. gcrn : bool | True Specify if the Gaussian Copula Rank Normalization should be applied. If the data are normalized (e.g z-score) this parameter can be set to False because the data can be considered as gaussian over time. Returns ------- dfc : array_like The DFC array of shape (n_epochs, n_pairs, n_windows) See also -------- define_windows, conn_covgc """ set_log_level(verbose) # ------------------------------------------------------------------------- # inputs conversion data, trials, roi, times, attrs = conn_io(data, roi=roi, times=times, verbose=verbose) # ------------------------------------------------------------------------- # data checking n_epochs, n_roi, n_pts = data.shape assert (len(roi) == n_roi) and (len(times) == n_pts) assert isinstance(win_sample, np.ndarray) and (win_sample.ndim == 2) assert win_sample.dtype in CONFIG['INT_DTYPE'] n_win = win_sample.shape[0] # get the non-directed pairs x_s, x_t = np.triu_indices(n_roi, k=1) n_pairs = len(x_s) pairs = np.c_[x_s, x_t] # build roi pairs names roi_p = [f"{roi[s]}-{roi[t]}" for s, t in zip(x_s, x_t)] # ------------------------------------------------------------------------- # compute dfc logger.info(f'Computing DFC between {n_pairs} pairs (gcrn={gcrn})') # get the parallel function parallel, p_fun = parallel_func(mi_nd_gg, n_jobs=n_jobs, verbose=verbose, prefer='threads') pbar = ProgressBar(range(n_win), mesg='Estimating DFC') dfc = np.zeros((n_epochs, n_pairs, n_win), dtype=np.float32) with parallel as para: for n_w, w in enumerate(win_sample): # select the data in the window and copnorm across time points data_w = data[..., w[0]:w[1]] # apply gcrn over time if gcrn: data_w = copnorm_nd(data_w, axis=2) # compute mi between pairs _dfc = para( p_fun(data_w[:, [s], :], data_w[:, [t], :], **CONFIG["KW_GCMI"]) for s, t in zip(x_s, x_t)) dfc[..., n_w] = np.stack(_dfc, axis=1) pbar.update_with_increment_value(1) # ------------------------------------------------------------------------- # dataarray conversion win_times = times[win_sample] dfc = xr.DataArray(dfc, dims=('trials', 'roi', 'times'), name='dfc', coords=(trials, roi_p, win_times.mean(1))) # add the windows used in the attributes cfg = dict(win_sample=np.r_[tuple(win_sample)], win_times=np.r_[tuple(win_times)], type='dfc') dfc.attrs = {**cfg, **attrs} return dfc
def get_roi_data(self, roi, groupby='subjects', mi_type='cc', copnorm=True, gcrn_per_suj=True): """Get the data of a single brain region. Parameters ---------- roi : string ROI name to get groupby : {'subjects'} Specify if the data across subjects have to be concatenated mi_type : {'cc', 'cd', 'ccd'} The type of mutual-information that is then going to be used. This is going to have an influence on how the data are organized and how the copnorm is going to be applied copnorm : bool | True Apply the gaussian copula rank normalization gcrn_per_suj : bool | True Specify whether the gaussian copula rank normalization have to be applied per subject (True - RFX) or across subjects (False - FFX) Returns ------- da : xr.DataArray The data of the single brain region """ # list of subjects present in the desired roi suj_list = self._df_rs.loc[roi, 'subjects'] # group data across subjects if groupby == 'subjects': x_r_ms = [] for s in suj_list: # roi (possibly multi-sites) selection x_roi = self._x[s].sel(roi=self._x[s]['roi'].data == roi) # stack roi and trials x_roi = x_roi.stack(rtr=('roi', 'trials')) x_r_ms.append(x_roi) x_r_ms = xr.concat(x_r_ms, 'rtr') # 4d or multivariate if self._multivariate: x_r_ms = x_r_ms.transpose('times', 'mv', 'rtr') else: x_r_ms = x_r_ms.expand_dims('mv', axis=-2) x_coords = list(x_r_ms.coords) # channels aggregation if not self._agg_ch and ('y' in x_coords): # shortcuts ch_id = x_r_ms['agg_ch'].data y = x_r_ms['y'].data # transformation depends on mi_type if mi_type == 'cd': # I(C; D) where the D=[y, ch_id] ysub = np.c_[y, ch_id] x_r_ms['y'].data = multi_to_uni_conditions([ysub], False)[0] elif mi_type == 'ccd' and ('z' not in x_coords): # I(C; C; D) where D=ch_id. In that case z=D x_r_ms = x_r_ms.assign_coords(z=('rtr', ch_id)) elif mi_type == 'ccd' and ('z' in x_coords): # I(C; C; D) where D=[z, ch_id] zsub = np.c_[x_r_ms['z'].data, ch_id] x_r_ms['z'].data = multi_to_uni_conditions([zsub], False)[0] else: raise ValueError("Can't avoid aggregating channels") # gaussian copula rank normalization if copnorm: if gcrn_per_suj: # gcrn per subject logger.debug("copnorm applied per subjects") suj = x_r_ms['subject'].data x_r_ms.data = copnorm_cat_nd(x_r_ms.data, suj, axis=-1) if (mi_type in ['cc', 'ccd']) and ('y' in x_coords): x_r_ms['y'].data = copnorm_cat_nd(x_r_ms['y'].data, suj, axis=0) else: # gcrn across subjects logger.debug("copnorm applied across subjects") x_r_ms.data = copnorm_nd(x_r_ms.data, axis=-1) if (mi_type in ['cc', 'ccd']) and ('y' in x_coords): x_r_ms['y'].data = copnorm_nd(x_r_ms['y'].data, axis=0) return x_r_ms
def gccmi_1d_ccd(x, y, z, verbose=None): """GCCMI between 2 continuous variables conditioned on a discrete variable. I = gccmi_ccd(x,y,z,Zm) returns the CMI between two (possibly multidimensional) continuous variables, x and y, conditioned on a third discrete variable z, estimated via a Gaussian copula. Parameters ---------- x, y : array_like Continuous arrays of shape (n_epochs,) or (n_dimensions, n_epochs). z : array_like Discret array of shape (n_epochs,) Returns ------- cmi : float Conditional Mutual Information shared by x and y conditioned by z (in bits) """ set_log_level(verbose) x = np.atleast_2d(x) y = np.atleast_2d(y) if x.ndim > 2 or y.ndim > 2: raise ValueError("x and y must be at most 2d") if z.ndim > 1: raise ValueError("only univariate discrete variables supported") if not np.issubdtype(z.dtype, np.integer): raise ValueError("z should be an integer array") nvarx, ntrl = x.shape nvary = y.shape[0] zm = np.unique(z) if y.shape[1] != ntrl or z.size != ntrl: raise ValueError("number of trials do not match") # check for repeated values for xi in range(nvarx): if (np.unique(x[xi, :]).size / float(ntrl)) < 0.9: logger.info("Input x has more than 10% repeated values") break for yi in range(nvary): if (np.unique(y[yi, :]).size / float(ntrl)) < 0.9: logger.info("Input y has more than 10% repeated values") break # calculate gcmi for each z value icond = np.zeros(len(zm)) pz = np.zeros(len(zm)) cx = [] cy = [] for zi in zm: idx = z == zi thsx = copnorm_nd(x[:, idx], axis=1) thsy = copnorm_nd(y[:, idx], axis=1) pz[zi] = idx.sum() cx.append(thsx) cy.append(thsy) icond[zi] = mi_1d_gg(thsx, thsy, True, True) pz = pz / float(ntrl) # conditional mutual information cmi = np.sum(pz * icond) i = mi_1d_gg(np.hstack(cx), np.hstack(cy), True, False) return (cmi, i)