def combine_sessions(sessions, **kwargs): """ Merge session data sets in single data set. """ # make a copy of the sessions, just to be safe sessions_ = list(sessions) # define dataset based on first session dataset_ = None # append data from other sessions for i, session_ in enumerate(sessions_): print("[+] session: {}, file: {}".format( i, session_.meta.reset_index(drop=False).session[0]) ) if dataset_ is None: dataset_ = Bunch(**dict(session_)) else: dataset_.data = dataset_.data.append(session_.data, ignore_index=True, sort=False) dataset_.meta = dataset_.meta.append(session_.meta, ignore_index=False, sort=False) dataset_.tmask = dataset_.tmask.append(session_.tmask, ignore_index=False, sort=False) # clean if kwargs.get('clean_meta'): dataset_.meta = clean_meta(dataset_.meta, **kwargs).reset_index(drop=False) # set X, y dataset_.X = dataset_.data.values.reshape(-1, dataset_.data.shape[-1]) dataset_.y = dataset_.meta.values.reshape(-1, dataset_.meta.shape[-1]) # cache sessions dataset_.sessions = list(sessions) return dataset_
def load_subject_meta(dataset, index=0, sessions=None, targets=None, **kwargs): """ Load behavioral data for a single haxby subject. """ # load target information as string and give a numerical identifier to each meta = pd.read_csv(dataset.session_target[index], sep=" ") # condition mask sessions = sessions or list(set(meta.chunks)) targets = targets or list(set(meta.labels)) # apply conditions mask session_mask = meta.chunks.isin(sessions) target_mask = meta.labels.isin(targets) condition_mask = (session_mask & target_mask) # mask, extract, factorize target, session = meta.labels, meta.chunks #target, target_names = pd.factorize(target) target_names = np.ravel(TARGET_NAMES) target = np.stack(list(map(TARGET_NAMES.index, target))) meta = meta.assign(session=session, target=target) # convert y to one-hot encoding from sklearn.preprocessing import OneHotEncoder encoder = OneHotEncoder(handle_unknown='ignore', sparse=False) y = encoder.fit_transform(np.c_[target]) target = pd.DataFrame(y, columns=target_names) # define colors, labels target_colors = np.arange(len(target.columns)) # define colormap, norm cmap = kwargs.get('cmap', 'nipy_spectral_r') cmap = cmap if callable(cmap) else plt.get_cmap(cmap) norm = mpl.colors.Normalize(target_colors.min(), target_colors.max() + 1) # map colors, labels cmap = mpl.colors.ListedColormap([cmap(norm(_)) for _ in target_colors]) target_colors_hex = [mpl.colors.to_hex(_) for _ in cmap.colors] # for naming session_code = "sess" + '_'.join(str(_) for _ in sessions) # return as bunch subject = Bunch() subject.meta = meta.loc[condition_mask] subject.target = target.loc[condition_mask] subject.y = subject.meta.target subject.groups = subject.meta.session subject.condition_mask = condition_mask subject.target_names = list(target_names) subject.target_colors = list(target_colors_hex) subject.cmap = cmap subject.norm = norm subject.session_code = session_code return subject