def load_behavioral(path, verbose=None): """Load the behavioral analysis excel file of a single subject. The purpose of this function is to load the excel file that have been generated using the function `behavioral_analysis`. Parameters ---------- path : str Full path to the excel file Returns ------- summary : pandas.DataFrame The dataframe that summarize probabilities and contingency. behavior : dict A dictionary where the keys refer to the team number. Items are dataframes with all of the info per trial. """ assert os.path.isfile(path) set_log_level(verbose) logger.info('Loading %s' % path) xl = pd.ExcelFile(path) sheet_names = xl.sheet_names assert sheet_names[0] == 'Summary' # Get the summary logger.info(' - Reading summary') summary = xl.parse('Summary') # Read team sheet behavior = dict() logger.info(' - Reading team') for s in sheet_names[1::]: behavior[int(s.split('Team ')[1])] = xl.parse(s) return summary, behavior
def gccmi_ccc(x, y, z, verbose=None): """Gaussian-Copula CMI between three continuous variables. I = gccmi_ccc(x,y,z) returns the CMI between two (possibly multidimensional) continuous variables, x and y, conditioned on a third, z, estimated via a Gaussian copula. If x and/or y are multivariate columns must correspond to samples, rows to dimensions/variables. (Samples first axis) """ set_log_level(verbose) x = np.atleast_2d(x) y = np.atleast_2d(y) z = np.atleast_2d(z) if x.ndim > 2 or y.ndim > 2 or z.ndim > 2: raise ValueError("x, y and z must be at most 2d") Ntrl = x.shape[1] Nvarx = x.shape[0] Nvary = y.shape[0] Nvarz = z.shape[0] if y.shape[1] != Ntrl or z.shape[1] != Ntrl: raise ValueError("number of trials do not match") # check for repeated values for xi in range(Nvarx): if (np.unique(x[xi, :]).size / float(Ntrl)) < 0.9: logger.info("Input x has more than 10% repeated values") break for yi in range(Nvary): if (np.unique(y[yi, :]).size / float(Ntrl)) < 0.9: logger.info("Input y has more than 10% repeated values") break for zi in range(Nvarz): if (np.unique(z[zi, :]).size / float(Ntrl)) < 0.9: logger.info("Input y has more than 10% repeated values") break # copula normalization cx = copnorm(x) cy = copnorm(y) cz = copnorm(z) # parametric Gaussian CMI I = cmi_ggg(cx, cy, cz, True, True) return I
def gcmi_model_cd(x, y, Ym, verbose=None): """Gaussian-Copula Mutual Information between a continuous and a discrete variable based on ANOVA style model comparison. I = gcmi_model_cd(x,y,Ym) returns the MI between the (possibly multidimensional) continuous variable x and the discrete variable y. For 1D x this is a lower bound to the mutual information. Columns of x correspond to samples, rows to dimensions/variables. (Samples last axis) y should contain integer values in the range [0 Ym-1] (inclusive). See also: gcmi_mixture_cd """ set_log_level(verbose) x = np.atleast_2d(x) y = np.squeeze(y) if x.ndim > 2: raise ValueError("x must be at most 2d") if y.ndim > 1: raise ValueError("only univariate discrete variables supported") if not np.issubdtype(y.dtype, np.integer): raise ValueError("y should be an integer array") if not isinstance(Ym, int): raise ValueError("Ym should be an integer") Ntrl = x.shape[1] Nvarx = x.shape[0] if y.size != Ntrl: raise ValueError("number of trials do not match") # check for repeated values for xi in range(Nvarx): if (np.unique(x[xi, :]).size / float(Ntrl)) < 0.9: logger.info("Input x has more than 10% repeated values") break # check values of discrete variable if y.min() != 0 or y.max() != (Ym - 1): raise ValueError("values of discrete variable y are out of bounds") # copula normalization cx = copnorm(x) # parametric Gaussian MI I = mi_model_gd(cx, y, Ym, True, True) return I
def mne_epochstfr_to_epochs(epoch, freqs=None, verbose=None): """Convert an MNE EpochsTFR to Epochs instance. Parameters ---------- epoch : mne.time_frequency.EpochsTFR | str Should either be an EpochsTFR instance or a path to a -tfr.h5 file freqs : tuple, list | None The frequencies to select Use None to select all frequencies or a tuple of two floats to select a sub-band. The final Epochs instance is obtained by taking the mean across selected frequencies. Returns ------- r_epoch : mne.Epochs Epochs instance """ set_log_level(verbose) if isinstance(epoch, str): assert '-tfr.h5' in epoch, "File should end with -tfr.h5 file" epoch = mne.time_frequency.read_tfrs(epoch)[0] assert isinstance(epoch, mne.time_frequency.EpochsTFR) # Handle frequencies epoch_freqs = epoch.freqs if freqs is None: logger.info(' Selecting all frequencies') sl = slice(None) elif isinstance(freqs, (list, tuple, np.ndarray)): assert len(freqs) == 2, "`freqs` should be tuple of two elements" logger.info(" Selecting frequencies " "(%.2f, %.2f)" % (freqs[0], freqs[1])) _idx = np.abs( epoch_freqs.reshape(-1, 1) - np.array(freqs).reshape(1, -1)).argmin(0) sl = slice(_idx[0], _idx[1]) # Built the Epochs instance info = epoch.info data = epoch.data[..., sl, :].mean(2) return mne.EpochsArray(data, info, tmin=epoch.times[0], verbose=verbose)
def gcmi_cc(x, y, verbose=None): """Gaussian-Copula Mutual Information between two continuous variables. I = gcmi_cc(x,y) returns the MI between two (possibly multidimensional) continuous variables, x and y, estimated via a Gaussian copula. If x and/or y are multivariate columns must correspond to samples, rows to dimensions/variables. (Samples first axis) This provides a lower bound to the true MI value. """ set_log_level(verbose) x = np.atleast_2d(x) y = np.atleast_2d(y) if x.ndim > 2 or y.ndim > 2: raise ValueError("x and y must be at most 2d") Ntrl = x.shape[1] Nvarx = x.shape[0] Nvary = y.shape[0] if y.shape[1] != Ntrl: raise ValueError("number of trials do not match") # check for repeated values for xi in range(Nvarx): if (np.unique(x[xi, :]).size / float(Ntrl)) < 0.9: logger.info("Input x has more than 10% repeated values") break for yi in range(Nvary): if (np.unique(y[yi, :]).size / float(Ntrl)) < 0.9: logger.info("Input y has more than 10% repeated values") break # copula normalization cx = copnorm(x) cy = copnorm(y) # parametric Gaussian MI I = mi_gg(cx, cy, True, True) return I
def behavioral_analysis(tr_team, tr_play, tr_win, save_as=None, embedded_plot=True, modality='meg', verbose=None): """Perform behavioral analysis using team, play and win triggers. Parameters ---------- tr_team : array_like Array describing the team number per trial (e.g. [6, 6, 6, ..., 15]) tr_play : array_like Array describing if the subject is playing (1) or not (0) tr_win : array_like Array describing if the subject win (1) or lose (0) save_as : string | None Full path to a .xlsx file where to save the Excel file embedded_plot : bool | True Put P(O|A), P(O|nA) and dP plots inside the excel file. modality : {'meg', 'seeg'} Because the probabilities are different between the meg and seeg task, you have to specify the recording modality. Returns ------- summary : dataframe A pandas dataframe that summarize the estimated probabilities (edP) with theorical values define by the task (different if seeg or meg) behavior : dict Dictionary organize by team number in which conditional probabilities and cumulative sum are saved. """ set_log_level(verbose) # Sanity check (tr_team, tr_play, tr_win) = tuple( [np.asarray(k, dtype=int) for k in [tr_team, tr_play, tr_win]]) assert tr_team.shape == tr_play.shape == tr_win.shape _u = all([np.array_equal(np.unique(k), [0, 1]) for k in [tr_play, tr_win]]) assert _u, "`tr_play` and `tr_win` should only contains 0 and 1" is_t_min, is_t_max = 1 <= tr_team.min() <= 15, 1 <= tr_team.max() <= 15 assert is_t_min and is_t_max, "Team number must be between [1, 15]" # Boolean analysis logger.info(' - Get conditional booleans') is_oa = np.logical_and(tr_win == 1, tr_play == 1).astype(int) is_noa = np.logical_and(tr_win == 0, tr_play == 1).astype(int) is_ona = np.logical_and(tr_win == 1, tr_play == 0).astype(int) is_nona = np.logical_and(tr_win == 0, tr_play == 0).astype(int) # Compute the cumulative sum per team behavior = dict() col = [ 'Team', 'Play', 'Win', 'O|A', 'nO|A', 'O|nA', 'nO|nA', 'f(O|A)', 'f(nO|A)', 'f(O|nA)', 'f(nO|nA)', 'eP(O|A)', 'eP(O|nA)', 'edP', 'uedP' ] logger.info(' - Split cumulative sum per team') for team in np.unique(tr_team): _df = dict() idx_team = tr_team == team # Retains trigger for the current team _df['Team'] = tr_team[idx_team] _df['Play'], _df['Win'] = tr_play[idx_team], tr_win[idx_team] # Retains the probability _df['O|A'], _df['O|nA'] = is_oa[idx_team], is_ona[idx_team] _df['nO|A'], _df['nO|nA'] = is_noa[idx_team], is_nona[idx_team] # Get the cumulative sum for each condition _df['f(O|A)'] = bincumsum(_df['O|A']) _df['f(O|nA)'] = bincumsum(_df['O|nA']) _df['f(nO|A)'] = bincumsum(_df['nO|A']) _df['f(nO|nA)'] = bincumsum(_df['nO|nA']) # Compute P(O|A) and P(O|nA) _df['eP(O|A)'] = _df['f(O|A)'] / (_df['f(O|A)'] + _df['f(nO|A)']) _df['eP(O|nA)'] = _df['f(O|nA)'] / (_df['f(O|nA)'] + _df['f(nO|nA)']) _df['edP'] = _df['eP(O|A)'] - _df['eP(O|nA)'] _df['uedP'] = np.diff(np.r_[0, _df['edP']]) behavior[team] = pd.DataFrame(_df, columns=col) # Summary table logger.info(" - Summary table") summary = get_causal_probabilities(False, modality) _task = np.full((len(summary), 3), np.nan) for t in np.unique(tr_team) - 1: _task[t, 0] = behavior[t + 1]['eP(O|A)'].iloc[-1] _task[t, 1] = behavior[t + 1]['eP(O|nA)'].iloc[-1] _task[t, 2] = behavior[t + 1]['edP'].iloc[-1] summary['eP(O|A)'] = _task[:, 0] summary['eP(O|nA)'] = _task[:, 1] summary['edP'] = _task[:, 2] # Change the column order for plotting col = ['Team', 'P(O|A)', 'eP(O|A)', 'P(O|nA)', 'eP(O|nA)', 'dP', 'edP'] summary = summary[col] # Save the dataframe if isinstance(save_as, str): with pd.ExcelWriter(save_as) as writer: summary.to_excel(writer, sheet_name='Summary') for team, df in behavior.items(): df.to_excel(writer, sheet_name='Team %i' % team) # Generate plots inside the Excel file from openpyxl import load_workbook from openpyxl.chart import Reference, LineChart, BarChart wb = load_workbook(save_as) # Summary plot ws = wb['Summary'] c1 = BarChart() title = "Comparison between dP and estimated dP (edP)" _xl_plot(c1, title, 'Team', 'Contingency') data = Reference(ws, min_col=7, min_row=1, max_row=17, max_col=8) team = Reference(ws, min_col=2, min_row=1, max_row=17) c1.set_categories(team) c1.add_data(data, titles_from_data=True) ws.add_chart(c1, "A18") # Team plot for team, df in behavior.items(): ws = wb['Team %i' % team] c1 = LineChart() title = "Contingency evolution across trials" _xl_plot(c1, title, 'Trials', 'dP') data = Reference(ws, min_col=13, max_col=15, min_row=1, max_row=41) c1.add_data(data, titles_from_data=True) ws.add_chart(c1, "A45") wb.save(save_as) logger.info(" - Behavioral analysis saved to %s" % save_as) return summary, behavior
""" BraiNets ======== Python codes for causal relationships using Gaussian copula and information theory based tools. """ import logging from brainets import ( behavior, gcmi, infodyn, spectral, stats, syslog, utils, # noqa preprocessing, plot, io) # Set 'info' as the default logging level logger = logging.getLogger('brainets') syslog.set_log_level('info') __version__ = "0.0.0"
def plot_marsatlas(data, time=None, modality='meg', seeg_roi=None, contrast=5, cmap='viridis', title=None, verbose=None): """Plot data sorted using MarsAtlas parcellation. This function sort the data by hemisphere, lobe (frontal, occipital, parietal, temporal and subcortical) and by roi. Parameters ---------- data : array_like GCMI result across ROI of shape (n_pts, n_roi) time : list | tuple | None Time boundaries. Should be (time_start, time_end). If None, a default time vector is set between (-1.5, 1.5) modality : {'meg', 'seeg'} The recording modality. Should either be 'meg' or 'seeg'. seeg_roi : pd.DataFrame | None The ROI dataframe in case of sEEG data. Should contains n_rois rows and a MarsAtlas column contrast : int | float Contrast to use for the plot. A contrast of 5 means that vmin is set to 5% of the data and vmax 95% of the data. If None, vmin and vmax are set to the min and max of the data. Alternatively, you can also provide a tuple to manually define it title : string | None Title of the figure cmap : string | 'viridis' The colormap to use Returns ------- fig_l, fig_r : plt.figure Figures for the left and right hemisphere """ set_log_level(verbose) assert modality in ['meg', 'seeg'] assert isinstance(data, np.ndarray) and (data.ndim == 2) # Load MarsAtlas DataFrame logger.info(' Load MarsAtlas labels') df_ma = load_marsatlas() # Prepare the data before plotting according to the recording modality logger.info(' Prepare the data for %s modality' % modality) if modality == 'meg': assert data.shape[1] == len(df_ma), ("`data` should have a shape of " "(n_pts, %i)" % len(df_ma)) df, df_ma = _prepare_data_meg(df_ma, data) elif modality == 'seeg': assert isinstance(seeg_roi, pd.DataFrame) and ( data.shape[1] == len(seeg_roi)), ("`data` should have a shape of " "(n_pts, %i)" % len(seeg_roi)) df, df_ma = _prepare_data_seeg(df_ma, data, seeg_roi) # Built the multi-indexing assert len(df.columns) == len(df_ma) mi = pd.MultiIndex.from_frame(df_ma[['Hemisphere', 'Lobe', 'Name']]) df.columns = mi # Time vector if isinstance(time, (list, tuple, np.ndarray)) and (len(time) == 2): time = np.linspace(time[0], time[1], data.shape[0], endpoint=True) logger.info(' Generate time vector') else: time = np.linspace(-1.5, 1.5, data.shape[0], endpoint=True) logger.warning("Automatically generate a time vector between " "(-1.5, 1.5)") # Get colorbar limits if isinstance(contrast, (int, float)): vmin = np.percentile(data, contrast) vmax = np.percentile(data, 100 - contrast) elif isinstance(contrast, (tuple, list)) and (len(contrast) == 2): vmin, vmax = contrast else: vmin, vmax = data.min(), data.max() kwargs = dict(cmap=cmap, vmin=vmin, vmax=vmax) # Generate plots title = '' if not isinstance(title, str) else title fig_l = _plot_gcmi_hemi(df, 'L', time, title, **kwargs) fig_r = _plot_gcmi_hemi(df, 'R', time, title, **kwargs) return fig_l, fig_r
def gccmi_ccd(x, y, z, Zm, verbose=None): """Gaussian-Copula CMI between 2 continuous variables conditioned on a discrete variable. I = gccmi_ccd(x,y,z,Zm) returns the CMI between two (possibly multidimensional) continuous variables, x and y, conditioned on a third discrete variable z, estimated via a Gaussian copula. If x and/or y are multivariate columns must correspond to samples, rows to dimensions/variables. (Samples first axis) z should contain integer values in the range [0 Zm-1] (inclusive). """ set_log_level(verbose) x = np.atleast_2d(x) y = np.atleast_2d(y) if x.ndim > 2 or y.ndim > 2: raise ValueError("x and y must be at most 2d") if z.ndim > 1: raise ValueError("only univariate discrete variables supported") if not np.issubdtype(z.dtype, np.integer): raise ValueError("z should be an integer array") if not isinstance(Zm, int): raise ValueError("Zm should be an integer") Ntrl = x.shape[1] Nvarx = x.shape[0] Nvary = y.shape[0] if y.shape[1] != Ntrl or z.size != Ntrl: raise ValueError("number of trials do not match") # check for repeated values for xi in range(Nvarx): if (np.unique(x[xi, :]).size / float(Ntrl)) < 0.9: logger.info("Input x has more than 10% repeated values") break for yi in range(Nvary): if (np.unique(y[yi, :]).size / float(Ntrl)) < 0.9: logger.info("Input y has more than 10% repeated values") break # check values of discrete variable if z.min() != 0 or z.max() != (Zm - 1): raise ValueError("values of discrete variable z are out of bounds") # calculate gcmi for each z value Icond = np.zeros(Zm) Pz = np.zeros(Zm) cx = [] cy = [] for zi in range(Zm): idx = z == zi thsx = copnorm(x[:, idx]) thsy = copnorm(y[:, idx]) Pz[zi] = x.shape[1] cx.append(thsx) cy.append(thsy) Icond[zi] = mi_gg(thsx, thsy, True, True) Pz = Pz / float(Ntrl) # conditional mutual information CMI = np.sum(Pz * Icond) I = mi_gg(np.hstack(cx), np.hstack(cy), True, False) return (CMI, I)
def gcmi_mixture_cd(x, y, Ym, verbose=None): """Gaussian-Copula Mutual Information between a continuous and a discrete variable calculated from a Gaussian mixture. The Gaussian mixture is fit using robust measures of location (median) and scale (median absolute deviation) for each class. I = gcmi_mixture_cd(x,y,Ym) returns the MI between the (possibly multidimensional) continuous variable x and the discrete variable y. For 1D x this is a lower bound to the mutual information. Columns of x correspond to samples, rows to dimensions/variables. (Samples last axis) y should contain integer values in the range [0 Ym-1] (inclusive). See also: gcmi_model_cd """ set_log_level(verbose) x = np.atleast_2d(x) y = np.squeeze(y) if x.ndim > 2: raise ValueError("x must be at most 2d") if y.ndim > 1: raise ValueError("only univariate discrete variables supported") if not np.issubdtype(y.dtype, np.integer): raise ValueError("y should be an integer array") if not isinstance(Ym, int): raise ValueError("Ym should be an integer") Ntrl = x.shape[1] Nvarx = x.shape[0] if y.size != Ntrl: raise ValueError("number of trials do not match") # check for repeated values for xi in range(Nvarx): if (np.unique(x[xi, :]).size / float(Ntrl)) < 0.9: logger.info("Input x has more than 10% repeated values") break # check values of discrete variable if y.min() != 0 or y.max() != (Ym - 1): raise ValueError("values of discrete variable y are out of bounds") # copula normalise each class # shift and rescale to match loc and scale of raw data # this provides a robust way to fit the gaussian mixture classdat = [] ydat = [] for yi in range(Ym): # class conditional data idx = y == yi xm = x[:, idx] cxm = copnorm(xm) xmmed = np.median(xm, axis=1)[:, np.newaxis] # robust measure of s.d. under Gaussian assumption from median absolute deviation xmmad = np.median(np.abs(xm - xmmed), axis=1)[:, np.newaxis] cxmscaled = cxm * (1.482602218505602 * xmmad) # robust measure of loc from median cxmscaled = cxmscaled + xmmed classdat.append(cxmscaled) ydat.append(yi * np.ones(xm.shape[1], dtype=np.int)) cx = np.concatenate(classdat, axis=1) newy = np.concatenate(ydat) I = mi_mixture_gd(cx, newy, Ym) return I