def combine_sessions(sessions, **kwargs):
    """ Merge session data sets in single data set.
    """
    # make a copy of the sessions, just to be safe
    sessions_ = list(sessions)

    # define dataset based on first session
    dataset_ = None 

    # append data from other sessions
    for i, session_ in enumerate(sessions_):
        print("[+] session: {}, file: {}".format(
            i, session_.meta.reset_index(drop=False).session[0])
            )
        if dataset_ is None:
        	dataset_ = Bunch(**dict(session_))
        else:
        	dataset_.data = dataset_.data.append(session_.data, ignore_index=True, sort=False)
        	dataset_.meta = dataset_.meta.append(session_.meta, ignore_index=False, sort=False)
        	dataset_.tmask = dataset_.tmask.append(session_.tmask, ignore_index=False, sort=False)       
        
    # clean
    if kwargs.get('clean_meta'):
	    dataset_.meta = clean_meta(dataset_.meta, **kwargs).reset_index(drop=False)

    # set X, y
    dataset_.X = dataset_.data.values.reshape(-1, dataset_.data.shape[-1])
    dataset_.y = dataset_.meta.values.reshape(-1, dataset_.meta.shape[-1])

    # cache sessions
    dataset_.sessions = list(sessions)
    return dataset_
Exemple #2
0
def load_subject_meta(dataset, index=0, sessions=None, targets=None, **kwargs):
    """ Load behavioral data for a single haxby subject. """

    # load target information as string and give a numerical identifier to each
    meta = pd.read_csv(dataset.session_target[index], sep=" ")

    # condition mask
    sessions = sessions or list(set(meta.chunks))
    targets = targets or list(set(meta.labels))

    # apply conditions mask
    session_mask = meta.chunks.isin(sessions)
    target_mask = meta.labels.isin(targets)
    condition_mask = (session_mask & target_mask)

    # mask, extract, factorize
    target, session = meta.labels, meta.chunks
    #target, target_names = pd.factorize(target)
    target_names = np.ravel(TARGET_NAMES)
    target = np.stack(list(map(TARGET_NAMES.index, target)))
    meta = meta.assign(session=session, target=target)

    # convert y to one-hot encoding
    from sklearn.preprocessing import OneHotEncoder
    encoder = OneHotEncoder(handle_unknown='ignore', sparse=False)
    y = encoder.fit_transform(np.c_[target])
    target = pd.DataFrame(y, columns=target_names)

    # define colors, labels
    target_colors = np.arange(len(target.columns))

    # define colormap, norm
    cmap = kwargs.get('cmap', 'nipy_spectral_r')
    cmap = cmap if callable(cmap) else plt.get_cmap(cmap)
    norm = mpl.colors.Normalize(target_colors.min(), target_colors.max() + 1)

    # map colors, labels
    cmap = mpl.colors.ListedColormap([cmap(norm(_)) for _ in target_colors])
    target_colors_hex = [mpl.colors.to_hex(_) for _ in cmap.colors]

    # for naming
    session_code = "sess" + '_'.join(str(_) for _ in sessions)

    # return as bunch
    subject = Bunch()
    subject.meta = meta.loc[condition_mask]
    subject.target = target.loc[condition_mask]
    subject.y = subject.meta.target
    subject.groups = subject.meta.session
    subject.condition_mask = condition_mask
    subject.target_names = list(target_names)
    subject.target_colors = list(target_colors_hex)
    subject.cmap = cmap
    subject.norm = norm
    subject.session_code = session_code
    return subject