name_for_scale  = ['awareness']
 # ['ah', 'av', 'bj', 'cm', 'db', 'ddb', 'fcm', 'kf', 'kk', 'ml', 'qa','sk', 'yv']
 # get one of the participants' data
 participant = 'db'
 df_sub          = df[df['participant'] == participant]
 # for 1-back to 4-back
 for n_back in np.arange(1,5):
     X,y,groups = utils.get_features_targets_groups(
                             df_sub.dropna(),
                             n_back                  = n_back,
                             names                   = name_for_scale,
                             independent_variables   = feature_names,
                             dependent_variable      = [target_name,'correctness'])
     X,y,groups = shuffle(X,y,groups)
     y,correctness = y[:,0],y[:,1]
     for model_name,model in utils.make_clfs().items():
         cv = LeaveOneOut()
         print('{}-back,{}'.format(n_back,model_name))
         preds = cross_val_predict(model,X,y,groups=groups,cv=cv,method='predict',verbose=2,n_jobs=4)
         df_pred_ = pd.DataFrame(np.vstack([preds,correctness]).T,columns = ['preds','correct'])
         p_correct = float(np.sum(correctness == 1)+1) / (len(correctness)+1)
         p_incorrect = float(np.sum(correctness == 0)+1) / (len(correctness)+1)
         p_aware = float(np.sum(preds == 1)+1) / (len(preds)+1)
         p_unaware = float(np.sum(preds == 0)+1) / (len(preds)+1)
         p_correct_aware = float(np.sum(np.logical_and(correctness == 1, preds == 1))+1) / (len(df_pred_)+1)
         p_correct_unaware = float(np.sum(np.logical_and(correctness == 1, preds == 0))+1) / (len(df_pred_)+1)
         p_incorrect_aware = float(np.sum(np.logical_and(correctness == 0, preds == 1))+1) / (len(df_pred_)+1)
         p_incorrect_unaware = float(np.sum(np.logical_and(correctness == 0, preds == 0))+1) / (len(df_pred_)+1)
         correlation,pval = stats.spearmanr(preds,correctness)
         results['sub'].append(participant)
         results['model'].append(model_name)
 pr   = 0.7 # selected proportion of the data 
 # select subset of the traiing data and the test data to estimate the variance
 # of the cross validation
 
 # select a proportion of the training data
 idxs_train = [np.random.choice(len(X_pos),
                                size     = int(pr*len(X_pos)),
                                replace  = False
                                ) for ii in range(n_cv)]
 # select a proportion of the test data
 idxs_test  = [np.random.choice(len(X_att),
                                size     = int(pr*len(X_att)),
                                replace  = False
                                ) for ii in range(n_cv)]
 # for 2 models, we will perform the cross experiment validation
 for model_name in make_clfs().keys():
     scores              = []
     permutation_scores  = []
     n_permutations      = 2000
     for fold,(idx_train,idx_test) in tqdm(enumerate(zip(idxs_train,idxs_test)),
               desc='cv-{}'.format(model_name)):
         # initialize the classifier - LG or RF
         clf     = make_clfs()[model_name]
         X_train = X_pos[idx_train]# get the training features
         y_train = y_pos[idx_train]# get the training targets
         
         X_test  = X_att[idx_test ]# get the testing features
         y_test  = y_att[idx_test ]# get the testing targets
         
         clf.fit(X_train,y_train)
         preds   = clf.predict_proba(X_test)
df = df.sort_values([
    'sub',
    'model',
    'window',
    'experiment',
])
df_decode = df_decode.sort_values([
    'sub',
    'model',
    'window',
    'experiment',
])

for col_name in df_decode.columns:
    if col_name not in df.columns:
        df[col_name] = df_decode[col_name].values

col_to_plot = [
    'p(correct|awareness)', 'p(correct|unawareness)', 'p(incorrect|awareness)',
    'p(incorrect|unawareness)'
]
fig, axes = plt.subplots(figsize=(20, 25), nrows=4, ncols=2, sharey=True)
for col_name, ax in zip(col_to_plot, axes):
    for model, a in zip(utils.make_clfs().keys(), ax):
        df_work = df[df['model'] == model]
        sns.barplot(x='window',
                    y=col_name,
                    hue='experiment',
                    data=df_work,
                    ax=a)
        names=[
            'success',  # need to normalize to 0 and 1
            'awareness',  # need to normalize to 0 and 1
            'confidence'
        ],  # need to normalize to 0 and 1
        independent_variables=['correct', 'awareness', 'confidence'],
        dependent_variable='success')
    ##################################################################################
    ################## train on one experiment and test on the individual subjects in
    ################## the other experiment ##########################################
    # train on POS first
    experiment_train = 'POS'  # define the source data
    experiment_test = 'ATT'  # define the target data
    for participant, df_sub in att.groupby(
            'participant'):  # loop through the subjects in ATT as test data
        for model_name in make_clfs().keys():  # loop through the 2 models
            cv = StratifiedShuffleSplit(n_splits=n_cv,
                                        test_size=pr,
                                        random_state=12345)
            # in each fold of the cross-validation
            for fold, (train, _) in enumerate(cv.split(X_pos, y_pos)):
                X_train = X_pos[
                    train]  # pick a proportion of the training/source data
                y_train = y_pos[
                    train]  # pick a proportion of the training/source lables

                clf = make_clfs()[model_name]  # initialize the model
                clf.fit(X_train, y_train)  # fit the model
                # prepare the test for a give subject
                X_test, y_test, _ = get_features_targets_groups(
                    df_sub,