Beispiel #1
0
def main():
    pdata = pu.proj_data()
    rois = pdata.roiLabels
    meg_subj, meg_sess = pdata.get_meg_metadata()
    phase_amp_file = '../data/MEG_phase_amp_data.hdf5'
    attn_rois = ['IPS1_R', 'FEF_R', 'TPOJ1_R', 'AVI_R',
                 '7m_R']  # ['IPS1_R', 'FEF_R', 'TPOJ1_R', 'AVI_R']

    # --Phase-phase coupling-- #
    first_level_tests_ppc = first_level_ppc(phase_amp_file, meg_subj, meg_sess,
                                            rois, attn_rois)
    first_level_tests_ppc.to_excel(
        '../data/attention_networks/ppc_first_level.xlsx')
    first_level_tests_ppc = pd.read_excel(
        '../data/attention_networks/ppc_first_level.xlsx', index_col=0)
    second_level_res = second_level(first_level_tests_ppc)
    cron_alpha_res = cron_alpha_test(first_level_tests_ppc, attn_rois,
                                     meg_sess)

    res = {
        'first_level_tests': first_level_tests_ppc,
        'second_level_tests': second_level_res,
        'cron_alpha_tests': cron_alpha_res
    }
    pu.save_xls(res, '../data/attention_networks/ppc_second_level.xlsx')

    plot_grouped_boxplot(first_level_tests_ppc,
                         attn_rois,
                         cron_alpha_df=cron_alpha_res,
                         fname='../figures/attention_networks/ppc_boxplot.pdf')

    # --Phase-amplitude coupling-- #
    first_level_tests_pac = first_level_pac(phase_amp_file, meg_subj, meg_sess,
                                            rois, attn_rois)
    first_level_tests_pac.to_excel(
        '../data/attention_networks/pac_first_level.xlsx')
    first_level_tests_pac = pd.read_excel(
        '../data/attention_networks/pac_first_level.xlsx', index_col=0)
    second_level_res = second_level(first_level_tests_pac)
    cron_alpha_res = cron_alpha_test(first_level_tests_pac, attn_rois,
                                     meg_sess)

    res = {
        'first_level_tests': first_level_tests_pac,
        'second_level_tests': second_level_res,
        'cron_alpha_tests': cron_alpha_res
    }
    pu.save_xls(res, '../data/attention_networks/pac_second_level.xlsx')

    plot_grouped_boxplot(first_level_tests_pac,
                         attn_rois,
                         cron_alpha_df=cron_alpha_res,
                         fname='../figures/attention_networks/pac_boxplot.pdf')
Beispiel #2
0
def save_output(output_dir,
                behavior,
                scores,
                confusion_matrices,
                features,
                grid_df=None,
                model=None,
                resamp_method=None,
                covariates=True):
    if covariates:
        cov_check = 'with_covariates'
    else:
        cov_check = 'without_covariates'

    folder_name = '%s %s %s %s' % (behavior, model, resamp_method, cov_check)
    res_dir = join(output_dir, folder_name)
    if not isdir(res_dir):
        mkdir(res_dir)

    pu.save_xls(scores, join(res_dir, 'performance.xlsx'))
    if features is not None:
        pu.save_xls(features, join(res_dir, 'coefficients.xlsx'))
    pu.save_xls(confusion_matrices, join(res_dir, 'confusion_matrices.xlsx'))

    normalized_cms = {}
    for fold in confusion_matrices:
        cm = confusion_matrices[fold]
        norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        normalized_cms[fold] = norm
    pu.save_xls(normalized_cms,
                join(res_dir, 'confusion_matrices_normalized.xlsx'))
def get_variable_data():
    def _count_data(data_to_count, vartype):
        data_df = pd.DataFrame(data_to_count, columns=[vartype])
        count_df = data_df[vartype].value_counts()
        return count_df

    output_dir = './../data/eeg_classification'
    if not isdir(output_dir):
        mkdir(output_dir)
    behavior_data, conn_data = pu.load_data_full_subjects()

    side_data = pu.convert_tin_to_str(
        behavior_data['tinnitus_side'].values.astype(float), 'tinnitus_side')
    side_count = _count_data(side_data, 'Side')

    type_data = pu.convert_tin_to_str(
        behavior_data['tinnitus_type'].values.astype(float), 'tinnitus_type')
    type_count = _count_data(type_data, 'Type')

    tq_data = behavior_data['distress_TQ'].values
    high_low_thresholds = [0, 46, 84]
    binned_high_low = np.digitize(tq_data,
                                  bins=high_low_thresholds,
                                  right=True)
    tq_high_low = ['Low' if t < 2 else 'High' for t in binned_high_low]
    hl_count = _count_data(tq_high_low, 'TQ (High/Low)')

    grade_thresholds = [0, 30, 46, 59, 84]
    binned_grade = np.digitize(tq_data, bins=grade_thresholds, right=True)
    tq_grade = ['Grade_%d' % t for t in binned_grade]
    grade_count = _count_data(tq_grade, 'TQ (Grade)')

    gender = behavior_data['sex']
    gender_str = ['Male' if g > 0 else 'Female' for g in gender.values]
    gender_count = _count_data(gender_str, 'Gender')

    # categorical_variables = ['smoking', 'deanxit_antidepressants', 'rivotril_antianxiety', 'sex']
    # categorical_data = behavior_data[categorical_variables]

    output = {
        'side': side_count,
        'type': type_count,
        'tq_high_low': hl_count,
        'tq_grade': grade_count,
        'gender': gender_count
    }
    pu.save_xls(output, join(output_dir, 'tin_variables_classcount.xlsx'))
def pls_psqi_with_power(sessions, rois, fig_dir, run_check=False):
    logging.info('%s: Running PLSC on PSQI components with power' % pu.ctime())
    if not os.path.isdir(fig_dir):
        os.mkdir(fig_dir)

    meg_df = pd.read_excel('../data/MEG_infraslow_power.xlsx',
                           sheet_name='location',
                           index_col=0)
    sleep_df, sleep_variables = load_psqi_data()

    if run_check:
        pres, bres = run_pls(x=meg_df.values,
                             y=sleep_df.values,
                             output_dir=fig_dir)
    else:
        logging.info('%s: Loading raw output' % pu.ctime())
        with open(fig_dir + '/pls_sleep.pkl', 'rb') as file:
            res = pkl.load(file)
        pres = res['permutation tests']
        bres = res['bootstrap_tests']

    alpha = .001
    nv = len(np.where(pres['p_values'] < alpha)[0])
    latent_vars = ['LV_%d' % (v + 1) for v in range(nv)]
    pls_functions.plot_scree(eigs=pres['true_eigs'],
                             pvals=pres['p_values'],
                             alpha=alpha,
                             fname=fig_dir + '/scree.png')

    behavior_df = pd.DataFrame(bres['y_zscores'][:nv, :],
                               index=latent_vars,
                               columns=sleep_variables)
    behavior_df.to_excel(fig_dir + '/behavior_res.xlsx')
    brain_res = organize_brain_sals(bres['x_zscores'],
                                    rois,
                                    sessions,
                                    latent_vars,
                                    comp='sign')
    pu.save_xls(brain_res, fig_dir + '/brain_res.xlsx')

    conj = brain_res['brain_conjunction']
    plot_roi_saliences(rois, conj, fig_dir, maxv=120, create_rois=False)

    logging.info('%s: Finished' % pu.ctime())
def eeg_multilabel_classify(ml_data, target_data, target_type, model, outdir):
    target_outdir = join(outdir, target_type)
    if not isdir(target_outdir):
        mkdir(target_outdir)

    feature_names = list(ml_data)

    # Create score dataframes, k-fold splitter
    n_splits = 10
    skf = model_selection.StratifiedKFold(n_splits=n_splits, random_state=seed)

    # Oversample connectivity data, apply k-fold splitter
    """Note: LP-transformation has to be applied for resampling, even though we're not treating it as a OVR problem"""
    x_res, y_res, y_res_lp_transformed = resample_multilabel(
        ml_data, target_data)
    skf.get_n_splits(x_res, y_res_lp_transformed)

    fold_count = 0
    classifier_objects, classifier_coefficients = {}, {}
    anx_balanced_acc, anx_chance_acc, anx_f1_scores = [], [], []
    dep_balanced_acc, dep_chance_acc, dep_f1_scores = [], [], []
    anx_cm_dict, anx_norm_cm_dict, dep_cm_dict, dep_norm_cm_dict = {}, {}, {}, {}

    for train_idx, test_idx in skf.split(x_res, y_res_lp_transformed):
        fold_count += 1
        print('%s: Running FOLD %d for %s' %
              (pu.ctime(), fold_count, target_type))
        foldname = 'Fold %02d' % fold_count

        # Stratified k-fold splitting
        x_train, x_test = x_res[train_idx], x_res[test_idx, :]
        y_train, y_test = y_res[train_idx], y_res[test_idx, :]

        if "categorical_sex_male" in feature_names:
            continuous_features = [
                f for f in feature_names if 'categorical' not in f
            ]
            continuous_indices = [
                ml_data.columns.get_loc(cont) for cont in continuous_features
            ]

            categorical_features = [
                f for f in feature_names if 'categorical' in f
            ]
            categorical_indices = [
                ml_data.columns.get_loc(cat) for cat in categorical_features
            ]

            x_train_feature_selected, x_test_feature_selected, cleaned_features = feature_selection_with_covariates(
                x_train, x_test, y_train, continuous_indices,
                categorical_indices, feature_names)
        else:
            x_train_feature_selected, x_test_feature_selected, cleaned_features = feature_selection_without_covariates(
                x_train, x_test, y_train, feature_names)

        if model is 'extra_trees':
            predicted, feature_importances, clf = extra_trees(
                x_train_feature_selected, y_train, x_test_feature_selected,
                cleaned_features)
            classifier_coefficients[foldname] = feature_importances

        elif model is 'knn':
            predicted, clf = knn(x_train_feature_selected, y_train,
                                 x_test_feature_selected)

        classifier_objects[foldname] = clf

        # Anxiety predictions
        yt, pred = y_test[:, 0], predicted[:, 0]
        balanced, chance, f1 = calc_scores(yt, pred)
        anx_balanced_acc.append(balanced)
        anx_chance_acc.append(chance)
        anx_f1_scores.append(f1)

        # Calculating fold confusion matrix
        anx_cm = metrics.confusion_matrix(yt, pred)
        anx_normalized_cm = anx_cm.astype('float') / anx_cm.sum(
            axis=1)[:, np.newaxis]

        classes = []
        for subclass_list in clf.classes_:
            classes.extend(list(subclass_list))
        anx_classes = [c for c in classes if 'anxiety' in c]
        dep_classes = [c for c in classes if 'depression' in c]

        anx_cm_dict[foldname] = pd.DataFrame(anx_cm,
                                             index=anx_classes,
                                             columns=anx_classes)
        anx_norm_cm_dict[foldname] = pd.DataFrame(anx_normalized_cm,
                                                  index=anx_classes,
                                                  columns=anx_classes)

        # Depression predictions
        yt, pred = y_test[:, 1], predicted[:, 1]
        balanced, chance, f1 = calc_scores(yt, pred)
        dep_balanced_acc.append(balanced)
        dep_chance_acc.append(chance)
        dep_f1_scores.append(f1)

        # Calculating fold confusion matrix
        dep_cm = metrics.confusion_matrix(yt, pred)
        dep_normalized_cm = dep_cm.astype('float') / dep_cm.sum(
            axis=1)[:, np.newaxis]

        dep_cm_dict[foldname] = pd.DataFrame(dep_cm,
                                             index=dep_classes,
                                             columns=dep_classes)
        dep_norm_cm_dict[foldname] = pd.DataFrame(dep_normalized_cm,
                                                  index=dep_classes,
                                                  columns=dep_classes)

    # Saving anxiety performance scores
    anx_f1_array = np.asarray(anx_f1_scores)
    anx_f1_class_averages = np.mean(anx_f1_array, axis=0)
    anx_f1_data = np.vstack((anx_f1_array, anx_f1_class_averages))

    balanced_acc_avg = np.mean(anx_balanced_acc)
    chance_acc_avg = np.mean(anx_chance_acc)

    anx_balanced_acc.append(balanced_acc_avg)
    anx_chance_acc.append(chance_acc_avg)

    accuracy_data = np.asarray([anx_balanced_acc, anx_chance_acc]).T

    rownames = ['Fold %02d' % (n + 1) for n in range(n_splits)]
    rownames.append('Average')
    score_df = pd.DataFrame(data=accuracy_data,
                            index=rownames,
                            columns=['Balanced accuracy', 'Chance accuracy'])

    f1_df = pd.DataFrame(data=np.asarray(anx_f1_data),
                         index=rownames,
                         columns=anx_classes)
    scores_dict = {'accuracy scores': score_df, 'f1 scores': f1_df}

    pu.save_xls(scores_dict, join(target_outdir, 'anxiety_performance.xlsx'))

    # Saving performance scores
    dep_f1_array = np.asarray(dep_f1_scores)
    dep_f1_class_averages = np.mean(dep_f1_array, axis=0)
    dep_f1_data = np.vstack((dep_f1_array, dep_f1_class_averages))

    balanced_acc_avg = np.mean(dep_balanced_acc)
    chance_acc_avg = np.mean(dep_chance_acc)

    dep_balanced_acc.append(balanced_acc_avg)
    dep_chance_acc.append(chance_acc_avg)

    accuracy_data = np.asarray([dep_balanced_acc, dep_chance_acc]).T

    rownames = ['Fold %02d' % (n + 1) for n in range(n_splits)]
    rownames.append('Average')
    score_df = pd.DataFrame(data=accuracy_data,
                            index=rownames,
                            columns=['Balanced accuracy', 'Chance accuracy'])

    f1_df = pd.DataFrame(data=np.asarray(dep_f1_data),
                         index=rownames,
                         columns=dep_classes)
    scores_dict = {'accuracy scores': score_df, 'f1 scores': f1_df}

    pu.save_xls(scores_dict, join(target_outdir,
                                  'depression_performance.xlsx'))

    # Saving coefficients
    if bool(classifier_coefficients):
        pu.save_xls(classifier_coefficients,
                    join(target_outdir, 'coefficients.xlsx'))

    # Saving confusion matrices
    pu.save_xls(anx_cm_dict,
                join(target_outdir, 'anxiety_confusion_matrices.xlsx'))
    pu.save_xls(
        anx_norm_cm_dict,
        join(target_outdir, 'anxiety_confusion_matrices_normalized.xlsx'))

    pu.save_xls(dep_cm_dict,
                join(target_outdir, 'depression_confusion_matrices.xlsx'))
    pu.save_xls(
        dep_norm_cm_dict,
        join(target_outdir, 'depression_confusion_matrices_normalized.xlsx'))

    # Saving classifier object
    with open(join(target_outdir, 'classifier_object.pkl'), 'wb') as file:
        pkl.dump(classifier_objects, file)
Beispiel #6
0
            for subj in subjects:
                f = h5py.File('../data/downsampled_MEG_truncated.hdf5', 'r')
                data = f[subj + '/MEG/' + sess + '/resampled_truncated'][...]
                f.close()

                data = _butter_filter(data, fs=500, cutoffs=[.01, .1])

                fft_power = np.absolute(np.fft.rfft(data, axis=0)) ** 2
                average_power = np.mean(fft_power, axis=0)

                session_df.loc[subj] = average_power

            df_list.append(session_df)

        grand_df = pd.concat(df_list, axis=1)
        return grand_df

    print('%s: Finished' % pu.ctime())


if __name__ == "__main__":
    location_df = calc_phase_amp_power(how='location')
    bp_df = calc_phase_amp_power(how='bandpass')
    trunc_df = calc_phase_amp_power(how='truncated')

    power_dict = {'location': location_df,
                  'bandpass': bp_df,
                  'truncated': trunc_df}

    pu.save_xls(power_dict, '../data/MEG_infraslow_power.xlsx')
def pls_psqi_with_ppc_roi_version(fig_dir, run_check=False):
    import matplotlib.pyplot as plt
    from seaborn import heatmap
    logging.info(
        '%s: Running PLSC on PSQI components with phase-phase coupling' %
        pu.ctime())
    if not os.path.isdir(fig_dir):
        os.mkdir(fig_dir)

    ppc_first_level = pd.read_excel(
        '../data/attention_networks/ppc_first_level.xlsx', index_col=0)
    colnames = list(ppc_first_level)
    connections = [c.split(' ')[1] for c in colnames]
    rois = pd.unique([c.split('-')[0].replace('\n', '') for c in connections])
    same, mirror = mirror_strfind(rois)
    columns_to_drop = [c for m in mirror for c in colnames if m in c]
    meg_df = ppc_first_level.drop(columns=columns_to_drop)

    sessions = pd.unique([t.split(' ')[0] for t in list(meg_df)])
    connections = pd.unique([t.split(' ')[1] for t in list(meg_df)])

    sleep_df, sleep_variables = load_psqi_data()
    if run_check:
        pres, bres = run_pls(x=meg_df.values,
                             y=sleep_df.values,
                             output_dir=fig_dir)
    else:
        logging.info('%s: Loading raw output' % pu.ctime())
        with open(fig_dir + '/pls_sleep.pkl', 'rb') as file:
            res = pkl.load(file)
        pres = res['permutation tests']
        bres = res['bootstrap_tests']

    print(pres['p_values'])
    alpha = .001
    nv = 1  # len(np.where(pres['p_values'] < alpha)[0])
    latent_vars = ['LV_%d' % (v + 1) for v in range(nv)]
    pls_functions.plot_scree(eigs=pres['true_eigs'],
                             pvals=pres['p_values'],
                             alpha=alpha,
                             fname=fig_dir + '/scree.png')

    behavior_df = pd.DataFrame(bres['y_zscores'][:nv, :],
                               index=latent_vars,
                               columns=sleep_variables)
    behavior_df.to_excel(fig_dir + '/behavior_res.xlsx')

    brain_res = organize_brain_sals(bres['x_zscores'],
                                    connections,
                                    sessions,
                                    latent_vars,
                                    comp='sign')
    pu.save_xls(brain_res, fig_dir + '/brain_res.xlsx')

    conj_res = brain_res['brain_conjunction']
    heatmap_data = pd.DataFrame(np.full(shape=(len(rois), len(rois)),
                                        fill_value=np.nan),
                                index=rois,
                                columns=rois)
    for roi1 in rois:
        for roi2 in rois:
            idx_label = '%s-%s' % (roi1, roi2)
            if idx_label not in conj_res.index:
                continue
            else:
                val = conj_res.loc[idx_label]['LV_1']
                heatmap_data.loc[roi2][roi1] = val

    fig, ax = plt.subplots(figsize=(8, 6))
    heatmap(data=heatmap_data,
            cmap='coolwarm',
            center=0.0,
            annot=True,
            fmt='.2f',
            cbar=True,
            square=True,
            ax=ax)
    fig.savefig(fig_dir + '/heatmap.svg')
    # plt.show()

    logging.info('%s: Finished' % pu.ctime())
def pls_psqi_with_bold_alpha_pac(fig_dir, run_check=True):
    logging.info(
        '%s: Running PLSC on PSQI components with phase-amplitude coupling' %
        pu.ctime())
    if not os.path.isdir(fig_dir):
        os.mkdir(fig_dir)

    # Extracting metadata
    h5_file = h5py.File('../data/MEG_phase_amp_coupling.hdf5')
    sessions = list(h5_file)
    meg_subj = list(h5_file[sessions[0]])
    rois = list(h5_file[sessions[0] + '/' + meg_subj[0]])
    h5_file.close()

    bold_pac_index = 0
    alpha_pac_index = 3
    meg_data = []
    for sess in sessions:
        session_df = pd.DataFrame(index=meg_subj, columns=rois)
        for roi in rois:
            h5_file = h5py.File('../data/MEG_phase_amp_coupling.hdf5')
            for subj in meg_subj:
                key = sess + '/' + subj + '/' + roi + '/r_vals'
                dset = h5_file[key][...]
                session_df.loc[subj][roi] = dset[bold_pac_index,
                                                 alpha_pac_index]
            h5_file.close()
        meg_data.append(session_df)
    meg_df = pd.concat(meg_data, axis=1)

    sleep_df, sleep_variables = load_psqi_data()

    if run_check:
        pres, bres = run_pls(x=meg_df.values,
                             y=sleep_df.values,
                             output_dir=fig_dir)
    else:
        logging.info('%s: Loading raw output' % pu.ctime())
        with open(fig_dir + '/pls_sleep.pkl', 'rb') as file:
            res = pkl.load(file)
        pres = res['permutation tests']
        bres = res['bootstrap_tests']

    alpha = .001
    nv = len(np.where(pres['p_values'] < alpha)[0])
    latent_vars = ['LV_%d' % (v + 1) for v in range(nv)]
    pls_functions.plot_scree(eigs=pres['true_eigs'],
                             pvals=pres['p_values'],
                             alpha=alpha,
                             fname=fig_dir + '/scree.png')

    behavior_df = pd.DataFrame(bres['y_zscores'][:nv, :],
                               index=latent_vars,
                               columns=sleep_variables)
    behavior_df.to_excel(fig_dir + '/behavior_res.xlsx')

    brain_res = organize_brain_sals(np.abs(bres['x_zscores']),
                                    rois,
                                    sessions,
                                    latent_vars,
                                    comp='sign')
    pu.save_xls(brain_res, fig_dir + '/brain_res.xlsx')

    conj = brain_res['brain_conjunction']
    plot_roi_saliences(rois, conj, fig_dir, maxv=120, create_rois=False)

    logging.info('%s: Finished' % pu.ctime())
Beispiel #9
0
def eeg_classify(eeg_data,
                 target_data,
                 target_type,
                 model,
                 outdir=None,
                 resample='SMOTE'):

    feature_names = list(eeg_data)
    if "categorical_sex_male" in feature_names:
        cv_check = 'with_covariates'
    else:
        cv_check = 'without_covariates'

    if resample is 'no_resample':

        class NoResample:  # for convenience
            @staticmethod
            def fit_resample(a, b):
                return a.values, np.asarray(b)

        resampler = NoResample()

    elif resample is 'ROS':
        resampler = RandomOverSampler(sampling_strategy='not majority',
                                      random_state=seed)

    elif resample is 'SMOTE':
        resampler = SMOTE(sampling_strategy='not majority', random_state=seed)

    elif resample is 'RUS':
        resampler = RandomUnderSampler(sampling_strategy='not minority',
                                       random_state=seed)

    x_res, y_res = resampler.fit_resample(eeg_data, target_data)

    if outdir is not None:
        model_outdir = join(
            outdir, '%s %s %s %s' % (target_type, model, cv_check, resample))
        if not isdir(model_outdir):
            mkdir(model_outdir)
    print('%s: Running classification - %s %s %s %s' %
          (pu.ctime(), target_type, model, cv_check, resample))

    # Apply k-fold splitter
    n_splits = 50
    skf = model_selection.StratifiedKFold(n_splits=n_splits, random_state=seed)
    skf.get_n_splits(x_res, y_res)

    fold_count = 0
    classifier_objects, classifier_coefficients, cm_dict, norm_cm_dict = {}, {}, {}, {}
    balanced_acc, chance_acc, f1_scores = [], [], []
    for train_idx, test_idx in skf.split(x_res, y_res):
        fold_count += 1
        print('%s: Running FOLD %d for %s' %
              (pu.ctime(), fold_count, target_type))
        foldname = 'Fold %02d' % fold_count

        # Stratified k-fold splitting
        x_train, x_test = x_res[train_idx], x_res[test_idx]
        y_train, y_test = y_res[train_idx], y_res[test_idx]

        if "categorical_sex_male" in feature_names:
            continuous_features = [
                f for f in feature_names if 'categorical' not in f
            ]
            continuous_indices = [
                eeg_data.columns.get_loc(cont) for cont in continuous_features
            ]

            categorical_features = [
                f for f in feature_names if 'categorical' in f
            ]
            categorical_indices = [
                eeg_data.columns.get_loc(cat) for cat in categorical_features
            ]

            x_train_fs, x_test_fs, cleaned_features = feature_selection_with_covariates(
                x_train, x_test, y_train, continuous_indices,
                categorical_indices, feature_names)
        else:
            x_train_fs, x_test_fs, cleaned_features = feature_selection_without_covariates(
                x_train, x_test, y_train, feature_names)

        if model is 'svm':
            predicted, coef_df, clf = svmc(x_train_fs, y_train, x_test_fs,
                                           cleaned_features)
            classifier_coefficients[foldname] = coef_df

        elif model is 'extra_trees':
            predicted, feature_importances, clf = extra_trees(
                x_train_fs, y_train, x_test_fs, cleaned_features)
            classifier_coefficients[foldname] = feature_importances
        elif model is 'knn':
            predicted, clf = knn(x_train_fs, y_train, x_test_fs)

        classifier_objects[foldname] = clf

        # Calculating fold performance scores
        balanced, chance, f1 = calc_scores(y_test, predicted)
        balanced_acc.append(balanced)
        chance_acc.append(chance)
        f1_scores.append(f1)

        # Calculating fold confusion matrix
        cm = metrics.confusion_matrix(y_test, predicted)
        normalized_cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

        cm_dict[foldname] = pd.DataFrame(cm,
                                         index=clf.classes_,
                                         columns=clf.classes_)
        norm_cm_dict[foldname] = pd.DataFrame(normalized_cm,
                                              index=clf.classes_,
                                              columns=clf.classes_)

    # Saving performance scores
    f1_df, score_df = save_scores(f1_scores,
                                  balanced_acc,
                                  chance_acc,
                                  class_labels=clf.classes_)
    scores_dict = {'accuracy scores': score_df, 'f1 scores': f1_df}

    try:
        pu.save_xls(scores_dict, join(model_outdir, 'performance.xlsx'))

        # Saving coefficients
        if bool(classifier_coefficients):
            pu.save_xls(classifier_coefficients,
                        join(model_outdir, 'coefficients.xlsx'))
        pu.save_xls(cm_dict, join(model_outdir, 'confusion_matrices.xlsx'))
        pu.save_xls(norm_cm_dict,
                    join(model_outdir, 'confusion_matrices_normalized.xlsx'))

        # Saving classifier object
        with open(join(model_outdir, 'classifier_object.pkl'), 'wb') as file:
            pkl.dump(classifier_objects, file)
    except Exception:
        pass

    return scores_dict