Esempio n. 1
0
def general_exp_var():

    for tissue in TISSUES:

        for feat_sel in FEATURES_SEL:
            beta, info = load_data(tissue)
            vari = beta.var()
            ind = np.argsort(vari)[-50000:]
            ec = beta.iloc[:, ind]
            cat = info['braak_bin'].loc[ec.index]
            nzeros = np.where(cat == 0)[0]
            nones = np.where(cat == 1)[0]
            cv_splits = 5

            for num in FEATURES_NUM:
                c_val_rbf = np.zeros(cv_splits)
                gamma_val_rbf = np.zeros(cv_splits)
                c_val_lin = np.zeros(cv_splits)
                best_score_rbf = np.zeros(cv_splits)
                best_score_lin = np.zeros(cv_splits)
                svm_accuracy = {}
                svm_accuracy_tr = {}
                zeros = np.random.permutation(nzeros)
                ones = np.random.permutation(nones)
                for i in range(cv_splits):
                    print('split: %d - num_features: %d - tissue:%s- feat_sel:%s' % (i, num, tissue, feat_sel))
                    test_index, train_index = ut.get_intervals(cv_splits, i, zeros, ones)
                    print(test_index)
                    train_full = ec.iloc[train_index]
                    y_train = cat[train_index]
                    test_full = ec.iloc[test_index]
                    samples = test_full.shape[0]
                    samples_tr = train_full.shape[0]
                    start_time = time.time()
                    features_file = OPEN_FILE + "/features_exp_CV_%s_%s_%d_%d.p" % (tissue, feat_sel, num, i)
                    print(train_full.shape)
                    if feat_sel == 't_test':
                        features_all = fs.feature_sel_t_test_parallel(train_full, info, num)
                    elif feat_sel == 'fisher':
                        features_all = fs.feature_fisher_score_parallel(train_full, info, num)
                    elif feat_sel == 'rfe':
                        features_all = fs.feature_sel_rfe(train_full, info, num)

                    print("--- %s seconds for feature selection ---" % (time.time() - start_time))
                    pickle.dump(features_all, open(features_file, "wb"))

                    if feat_sel == 'PCA':
                        # SCALING
                        scale = preprocessing.StandardScaler().fit(train_full)
                        train_sc = scale.transform(train_full)
                        test_sc = scale.transform(test_full)
                        # PCA
                        pca = PCA(n_components=num)
                        pca.fit(train_sc)
                        train = pca.transform(train_sc)
                        test = pca.transform(test_sc)
                    else:
                        train = train_full[features_all[0:num]]
                        print(train.shape)
                        test = test_full[features_all[0:num]]

                    y_true = cat[test_index]
                    start_time = time.time()
                    (y_pred_rbf, y_tr_rbf, c_val_rbf[i], gamma_val_rbf[i], best_score_rbf[i]) = cl.SVM_classify_rbf_all(
                        train, y_train, test, y_true,C_range=np.logspace(-4, 4, 20), gamma_range=np.logspace(-7, 2, 20))
                    (y_pred_lin, y_tr_lin, c_val_lin[i], best_score_lin[i]) = cl.SVM_classify_lin_all(train, y_train,
                        test, y_true, C_range=np.logspace(-4, 3, 20))

                    print("--- %s seconds for classification ---" % (time.time() - start_time))
                    pred_train = pd.DataFrame(
                        {'y_train': y_train,
                         'y_tr_rbf': y_tr_rbf,
                         'y_tr_lin': y_tr_lin,
                         })
                    pickle.dump(pred_train,
                                open(OPEN_FILE + "/pred_exp_tr_CV_%s_%s_%d_%d.p" % (tissue, feat_sel, num, i), "wb"))
                    svm_accuracy_tr[i] = [
                        np.where((pred_train['y_train'] == pred_train['y_tr_rbf']) == True)[0].shape[0] / samples_tr,
                        np.where((pred_train['y_train'] == pred_train['y_tr_lin']) == True)[0].shape[0] / samples_tr]
                    print(svm_accuracy_tr[i])
                    predictions = pd.DataFrame(
                        {'y_true': y_true,
                         'y_rbf': y_pred_rbf,
                         'y_lin': y_pred_lin,
                         })
                    pickle.dump(predictions,
                                open(OPEN_FILE + "/pred_exp_CV_%s_%s_%d_%d.p" % (tissue, feat_sel, num, i), "wb"))
                    svm_accuracy[i] = [
                        np.where((predictions['y_true'] == predictions['y_rbf']) == True)[0].shape[0] / samples,
                        np.where((predictions['y_true'] == predictions['y_lin']) == True)[0].shape[0] / samples]

                    print(svm_accuracy[i])

                pickle.dump(svm_accuracy_tr,
                            open(OPEN_FILE + "/accuracy_exp_tr_CV_%s_%s_%d.p" % (tissue, feat_sel, num), "wb"))
                pickle.dump(svm_accuracy,
                            open(OPEN_FILE + "/accuracy_exp_CV_%s_%s_%d.p" % (tissue, feat_sel, num), "wb"))
                parameters = pd.DataFrame(
                    {'C_rbf': c_val_rbf,
                     'gamma_rbf': gamma_val_rbf,
                     'C_lin': c_val_lin,
                     'best_rbf': best_score_rbf,
                     'best_lin': best_score_lin,
                     })
                pickle.dump(parameters, open(OPEN_FILE + "/params_exp_CV_%s_%s_%d.p" % (tissue, feat_sel, num), "wb"))
Esempio n. 2
0
def blood_surr():
    tissues = ['EC', 'STG', 'CER', 'FC']
    for tissue in tissues:
        open_file = os.path.realpath('../data_str/')
        ec, info = load_data(tissue)
        blood = pickle.load(open('../tissues/resi_norm_WB.p', "rb"))
        features_sel = ['t_test', 'fisher', 'rfe']
        features_num = [5, 10, 15, 20, 50, 75, 100, 250, 500]
        for feat_sel in features_sel:

            cat = info['braak_bin'].loc[blood.index]
            nzeros = np.where(cat == 0)[0]
            nones = np.where(cat == 1)[0]
            cv_splits = 5

            for num in features_num:
                c_val_rbf = np.zeros(cv_splits)
                gamma_val_rbf = np.zeros(cv_splits)
                c_val_lin = np.zeros(cv_splits)
                best_score_rbf = np.zeros(cv_splits)
                best_score_lin = np.zeros(cv_splits)
                svm_accuracy = {}
                svm_accuracy_tr = {}
                zeros = np.random.permutation(nzeros)
                ones = np.random.permutation(nones)
                for i in range(cv_splits):
                    print('split: %d - num_features: %d - tissue:%s- feat_sel:%s' % (i, num, tissue, feat_sel))
                    test_index, train_index = ut.get_intervals(cv_splits, i, zeros, ones)
                    print(test_index)

                    train_blood = blood.iloc[train_index]
                    y_train = cat[train_index]
                    test_blood = blood.iloc[test_index]
                    samples = test_blood.shape[0]
                    samples_tr = train_blood.shape[0]
                    # get the index of the samples in the test set- we dont want to train with those subjects
                    rem = test_blood.index
                    unwanted = list(info['subject'].loc[rem])
                    valids = []
                    for ids in info.index:
                        if ((info['tissue'].loc[ids] == tissue) and (info['subject'].loc[ids] not in unwanted)
                            and (info['braak_stage'].loc[ids] != 'Exclude')):
                            valids.append(ids)
                    ec_train = ec.loc[valids]

                    start_time = time.time()
                    features_file = open_file + "/features_blood_CV_%s_%s_%d_%d.p" % (tissue, feat_sel, num, i)
                    print(ec_train.shape)
                    if feat_sel == 't_test':
                        features_all = fs.feature_sel_t_test_parallel(ec_train, info, num)
                    elif feat_sel == 'fisher':
                        features_all = fs.feature_fisher_score_parallel(ec_train, info, num)
                    elif feat_sel == 'rfe':
                        features_all = fs.feature_sel_rfe(ec_train, info, num)
                    # elif feat_sel == 'chi2':
                    print("--- %s seconds for feature selection ---" % (time.time() - start_time))
                    if feat_sel == 't_test' or feat_sel == 'fisher' or feat_sel == 'rfe':
                        pickle.dump(features_all, open(features_file, "wb"))
                    train = train_blood[features_all]
                    print(train.shape)
                    test = test_blood[features_all]

                    y_true = cat[test_index]

                    start_time = time.time()
                    (y_pred_rbf, y_tr_rbf, c_val_rbf[i], gamma_val_rbf[i], best_score_rbf[i]) = cl.SVM_classify_rbf_all(
                        train, y_train, test, y_true, C_range=np.logspace(-4, 2, 10), gamma_range=np.logspace(-6, 2, 10))
                    (y_pred_lin, y_tr_lin, c_val_lin[i], best_score_lin[i]) = cl.SVM_classify_lin_all(train, y_train,
                        test, y_true, C_range=np.logspace(-4, 2, 10))
                    print("--- %s seconds for classification ---" % (time.time() - start_time))
                    pred_train = pd.DataFrame(
                        {'y_train': y_train,
                         'y_tr_rbf': y_tr_rbf,
                         'y_tr_lin': y_tr_lin,
                         })
                    pickle.dump(pred_train,
                                open(open_file + "/pred_blood_tr_CV_%s_%s_%d_%d.p" % (tissue, feat_sel, num, i), "wb"))
                    svm_accuracy_tr[i] = [
                        np.where((pred_train['y_train'] == pred_train['y_tr_rbf']) == True)[0].shape[0] / samples_tr,
                        np.where((pred_train['y_train'] == pred_train['y_tr_lin']) == True)[0].shape[0] / samples_tr]
                    print(svm_accuracy_tr[i])
                    predictions = pd.DataFrame(
                        {'y_true': y_true,
                         'y_rbf': y_pred_rbf,
                         'y_lin': y_pred_lin,
                         })
                    pickle.dump(predictions,
                                open(open_file + "/pred_blood_CV_%s_%s_%d_%d.p" % (tissue, feat_sel, num, i), "wb"))
                    svm_accuracy[i] = [
                        np.where((predictions['y_true'] == predictions['y_rbf']) == True)[0].shape[0] / samples,
                        np.where((predictions['y_true'] == predictions['y_lin']) == True)[0].shape[0] / samples]

                    print(svm_accuracy[i])

                pickle.dump(svm_accuracy_tr,
                            open(open_file + "/accuracy_blood_tr_CV_%s_%s_%d.p" % (tissue, feat_sel, num), "wb"))
                pickle.dump(svm_accuracy,
                            open(open_file + "/accuracy_blood_CV_%s_%s_%d.p" % (tissue, feat_sel, num), "wb"))
                parameters = pd.DataFrame(
                    {'C_rbf': c_val_rbf,
                     'gamma_rbf': gamma_val_rbf,
                     'C_lin': c_val_lin,
                     'best_rbf': best_score_rbf,
                     'best_lin': best_score_lin,
                     })
                pickle.dump(parameters, open(open_file + "/params_CV_%s_%s_%d.p" % (tissue, feat_sel, num), "wb"))
Esempio n. 3
0
def general_all_non():
    ec, info = load_data()
    ec_temp = ec.loc[info.braak_stage != 'Exclude']
    new_inf = info.loc[ec_temp.index]
    subjects = np.unique(new_inf.subject)
    fromtis = ec.loc[(info.tissue == 'STG') & (info.braak_stage != 'Exclude')]
    categories = new_inf['braak_bin'].loc[fromtis.index]
    print('cargo datos')
    nzeros = np.where(categories == 0)[0]
    nones = np.where(categories == 1)[0]
    for feat_sel in FEATURES_SEL:
        cat = info['braak_bin'].loc[ec.index]
        cv_splits = 5
        for num in FEATURES_NUM:
            c_val_rbf = np.zeros(cv_splits)
            gamma_val_rbf = np.zeros(cv_splits)
            c_val_lin = np.zeros(cv_splits)
            best_score_rbf = np.zeros(cv_splits)
            best_score_lin = np.zeros(cv_splits)
            svm_accuracy = {}
            svm_accuracy_tr = {}
            zeros = np.random.permutation(nzeros)
            ones = np.random.permutation(nones)
            for i in range(cv_splits):
                print('gen_all -split: %d - num_features: %d - feat_sel:%s' %
                      (i, num, feat_sel))
                test_index, train_index = ut.get_intervals_all(
                    cv_splits, i, zeros, ones, new_inf, subjects)
                print('tamaño de test: %s' % len(test_index))
                print(np.array(new_inf['subject'].loc[test_index]))
                train_full = ec.loc[train_index]
                y_train = cat[train_index]
                test_full = ec.loc[test_index]
                samples = test_full.shape[0]
                samples_tr = train_full.shape[0]
                start_time = time.time()
                features_file = OPEN_FILE + "/features_CV_%s_%s_%d_%d.p" % (
                    TISSUE, feat_sel, num, i)
                print(train_full.shape)
                if feat_sel == 't_test':
                    features_all = fs.feature_sel_t_test_parallel(
                        train_full, info, num)
                elif feat_sel == 'fisher':
                    features_all = fs.feature_fisher_score_parallel(
                        train_full, info, num)
                elif feat_sel == 'rfe':
                    features_all = fs.feature_sel_rfe(train_full, info, num)
                # elif feat_sel == 'chi2':

                print("--- %s seconds for feature selection ---" %
                      (time.time() - start_time))

                if feat_sel == 'PCA':
                    # SCALING
                    scale = preprocessing.StandardScaler().fit(train_full)
                    train_sc = scale.transform(train_full)
                    test_sc = scale.transform(test_full)
                    # PCA
                    pca = PCA(n_components=num)
                    pca.fit(train_sc)
                    train = pca.transform(train_sc)
                    test = pca.transform(test_sc)
                else:
                    pickle.dump(features_all, open(features_file, "wb"))
                    train = train_full[features_all[0:num]]
                    print(train.shape)
                    test = test_full[features_all[0:num]]

                y_true = cat[test_index]
                start_time = time.time()
                (y_pred_rbf, y_tr_rbf, c_val_rbf[i], gamma_val_rbf[i],
                 best_score_rbf[i]) = cl.SVM_classify_rbf_all(
                     train,
                     y_train,
                     test,
                     y_true,
                     C_range=np.logspace(-3, 5, 10),
                     gamma_range=np.logspace(-6, 3, 10))
                (y_pred_lin, y_tr_lin, c_val_lin[i],
                 best_score_lin[i]) = cl.SVM_classify_lin_all(
                     train,
                     y_train,
                     test,
                     y_true,
                     C_range=np.logspace(-2, 0, 6))
                print("--- %s seconds for classification ---" %
                      (time.time() - start_time))
                pred_train = pd.DataFrame({
                    'y_train': y_train,
                    'y_tr_rbf': y_tr_rbf,
                    'y_tr_lin': y_tr_lin,
                })
                pickle.dump(
                    pred_train,
                    open(
                        OPEN_FILE + "/pred_tr_CV_%s_%s_%d_%d.p" %
                        (TISSUE, feat_sel, num, i), "wb"))
                svm_accuracy_tr[i] = [
                    np.where((pred_train['y_train'] == pred_train['y_tr_rbf']
                              ) == True)[0].shape[0] / samples_tr,
                    np.where((pred_train['y_train'] == pred_train['y_tr_lin']
                              ) == True)[0].shape[0] / samples_tr
                ]
                print(svm_accuracy_tr[i])
                predictions = pd.DataFrame({
                    'y_true': y_true,
                    'y_rbf': y_pred_rbf,
                    'y_lin': y_pred_lin,
                })
                pickle.dump(
                    predictions,
                    open(
                        OPEN_FILE + "/pred_CV_%s_%s_%d_%d.p" %
                        (TISSUE, feat_sel, num, i), "wb"))
                svm_accuracy[i] = [
                    np.where((predictions['y_true'] == predictions['y_rbf']
                              ) == True)[0].shape[0] / samples,
                    np.where((predictions['y_true'] == predictions['y_lin']
                              ) == True)[0].shape[0] / samples
                ]

                print(svm_accuracy[i])

            pickle.dump(
                svm_accuracy_tr,
                open(
                    OPEN_FILE + "/accuracy_tr_CV_%s_%s_%d.p" %
                    (TISSUE, feat_sel, num), "wb"))
            pickle.dump(
                svm_accuracy,
                open(
                    OPEN_FILE + "/accuracy_CV_%s_%s_%d.p" %
                    (TISSUE, feat_sel, num), "wb"))
            parameters = pd.DataFrame({
                'C_rbf': c_val_rbf,
                'gamma_rbf': gamma_val_rbf,
                'C_lin': c_val_lin,
                'best_rbf': best_score_rbf,
                'best_lin': best_score_lin,
            })
            pickle.dump(
                parameters,
                open(
                    OPEN_FILE + "/params_CV_%s_%s_%d.p" %
                    (TISSUE, feat_sel, num), "wb"))
Esempio n. 4
0
def main():
    for tissue in TISSUES:
        betaqn, info = load_data(tissue)
        features_sel = ['rfe']
        for feat_sel in features_sel:
            ec = betaqn
            cat = info['braak_bin'].loc[ec.index]
            zeros = np.where(cat == 0)[0]
            ones = np.where(cat == 1)[0]

            for cv in CV:
                cv_splits = cv
                c_val_rbf = np.zeros(cv_splits)
                gamma_val_rbf = np.zeros(cv_splits)
                c_val_lin = np.zeros(cv_splits)
                best_score_rbf = np.zeros(cv_splits)
                best_score_lin = np.zeros(cv_splits)
                svm_accuracy = {}
                svm_accuracy_tr = {}
                zeros = np.random.permutation(zeros)
                ones = np.random.permutation(ones)
                for i in range(cv_splits):
                    print('split: %d - cv: %d' % (i, cv))
                    test_index, train_index = ut.get_intervals(
                        cv_splits, i, zeros, ones)
                    train_full = ec.iloc[train_index]
                    y_train = cat[train_index]
                    test_full = ec.iloc[test_index]
                    samples = test_full.shape[0]
                    samples_tr = train_full.shape[0]
                    start_time = time.time()
                    features_file = OPEN_FILE + "/features_diffCV_%s_%s_%d_%d.p" % (
                        tissue, feat_sel, cv, i)
                    if feat_sel == 't_test':
                        features_all = fs.feature_sel_t_test_parallel(
                            train_full, info, NUM)
                    elif feat_sel == 'fisher':
                        features_all = fs.feature_fisher_score_parallel(
                            train_full, info, NUM)
                    elif feat_sel == 'rfe':
                        features_all = fs.feature_sel_rfe(
                            train_full, info, NUM)
                    print("--- %s seconds for feature selection ---" %
                          (time.time() - start_time))
                    pickle.dump(features_all, open(features_file, "wb"))

                    train = train_full[features_all[0:NUM]]
                    test = test_full[features_all[0:NUM]]
                    y_true = cat[test_index]
                    start_time = time.time()

                    (y_pred_rbf, y_tr_rbf, c_val_rbf[i], gamma_val_rbf[i],
                     best_score_rbf[i]) = cl.SVM_classify_rbf_all(
                         train,
                         y_train,
                         test,
                         y_true,
                         C_range=np.logspace(-4, 4, 20),
                         gamma_range=np.logspace(-7, 2, 20))
                    (y_pred_lin, y_tr_lin, c_val_lin[i],
                     best_score_lin[i]) = cl.SVM_classify_lin_all(
                         train,
                         y_train,
                         test,
                         y_true,
                         C_range=np.logspace(-4, 3, 20))
                    print("--- %s seconds for classification ---" %
                          (time.time() - start_time))

                    pred_train = pd.DataFrame({
                        'y_train': y_train,
                        'y_tr_rbf': y_tr_rbf,
                        'y_tr_lin': y_tr_lin,
                    })
                    pickle.dump(
                        pred_train,
                        open(
                            OPEN_FILE + "/pred_tr_diffCV_%s_%s_%d_%d.p" %
                            (tissue, feat_sel, cv, i), "wb"))
                    svm_accuracy_tr[i] = [
                        np.where(
                            (pred_train['y_train'] == pred_train['y_tr_rbf']
                             ) == True)[0].shape[0] / samples_tr,
                        np.where(
                            (pred_train['y_train'] == pred_train['y_tr_lin']
                             ) == True)[0].shape[0] / samples_tr
                    ]
                    print(svm_accuracy_tr[i])
                    predictions = pd.DataFrame({
                        'y_true': y_true,
                        'y_rbf': y_pred_rbf,
                        'y_lin': y_pred_lin,
                    })
                    pickle.dump(
                        predictions,
                        open(
                            OPEN_FILE + "/pred_diffCV_%s_%s_%d_%d.p" %
                            (tissue, feat_sel, cv, i), "wb"))
                    svm_accuracy[i] = [
                        np.where((predictions['y_true'] == predictions['y_rbf']
                                  ) == True)[0].shape[0] / samples,
                        np.where((predictions['y_true'] == predictions['y_lin']
                                  ) == True)[0].shape[0] / samples
                    ]

                    print(svm_accuracy[i])
                pickle.dump(
                    svm_accuracy_tr,
                    open(
                        OPEN_FILE + "/accuracy_tr_diffCV_%s_%s_%d.p" %
                        (tissue, feat_sel, cv), "wb"))
                pickle.dump(
                    svm_accuracy,
                    open(
                        OPEN_FILE + "/accuracy_diffCV_%s_%s_%d.p" %
                        (tissue, feat_sel, cv), "wb"))
                parameters = pd.DataFrame({
                    'C_rbf': c_val_rbf,
                    'gamma_rbf': gamma_val_rbf,
                    'C_lin': c_val_lin,
                    'best_rbf': best_score_rbf,
                    'best_lin': best_score_lin,
                })
                pickle.dump(
                    parameters,
                    open(
                        OPEN_FILE + "/params_diffCV_%s_%s_%d.p" %
                        (tissue, feat_sel, cv), "wb"))
Esempio n. 5
0
def general_Lun():
    tissue = 'all_non'
    num = 12
    for feat_sel in FEATURES_SEL:
        for tissue in TISSUES:
            save_file = OPEN_FILE
            betaqn, info = load_data(tissue)
            ec = betaqn
            start_time = time.time()
            features_file = OPEN_FILE + "/features_LEO_CV_%s_%s_%d.p" % (
                tissue, feat_sel, num)
            if feat_sel == 't_test':
                features_all = fs.feature_sel_t_test_parallel(ec, info, num)
            elif feat_sel == 'fisher':
                features_all = fs.feature_fisher_score_parallel(ec, info, num)
            elif feat_sel == 'rfe':
                features_all = fs.feature_sel_rfe(ec, info, num)
            elif feat_sel == 'leo':
                if tissue == 'EC':
                    features_all = [
                        'cg11823178', 'cg22997194', 'cg06653632', 'cg05066959',
                        'cg24152732', 'cg14972141', 'cg04029027', 'cg05030077',
                        'cg04151012', 'cg18522315', 'cg20618448', 'cg24770624'
                    ]
                elif tissue == 'STG':
                    features_all = [
                        'cg04525464', 'cg06108383', 'cg10752406', 'cg25018458',
                        'cg13942103', 'cg00767503', 'cg02961798', 'cg05810363',
                        'cg15849154', 'cg06745695', 'cg15520955', 'cg03601797'
                    ]
                elif tissue == 'FC':
                    features_all = [
                        'cg04147621', 'cg05726109', 'cg11724984', 'cg23968456',
                        'cg24671734', 'cg06926306', 'cg07859799', 'cg02997560',
                        'cg13507269', 'cg19900677', 'cg14071588', 'cg15928398'
                    ]
                elif tissue == 'CER':
                    features_all = [
                        'cg22570053', 'cg00065957', 'cg21781422', 'cg17715556',
                        'cg01339004', 'cg18882687', 'cg20767910', 'cg24462001',
                        'cg07869256', 'cg20698501', 'cg21618635', 'cg17468317'
                    ]
                else:
                    continue
            elif feat_sel == 'leo_all':
                leo_all = [
                    'cg11823178', 'cg25018458', 'cg05810363', 'cg05066959',
                    'cg18428542', 'cg16665310', 'cg05912299', 'cg03169557',
                    'cg23968456', 'cg02672452', 'cg04147621', 'cg17910899'
                ]
            elif feat_sel == 'jager':
                features_all = [
                    'cg11724984', 'cg23968456', 'cg15821544', 'cg16733298',
                    'cg22962123', 'cg13076843', 'cg25594100', 'cg00621289',
                    'cg19803550', 'cg03169557', 'cg05066959', 'cg05810363'
                ]
            print("--- %s seconds for feature selection ---" %
                  (time.time() - start_time))
            pickle.dump(features_all, open(features_file, "wb"))

            cat = info['braak_bin'].loc[ec.index]

            samples = ec.shape[0]
            nzeros = np.where(cat == 0)[0]
            nones = np.where(cat == 1)[0]
            svm_accuracy = {}
            svm_accuracy_tr = {}

            c_val_rbf = np.zeros(CV_SPLITS)
            gamma_val_rbf = np.zeros(CV_SPLITS)
            c_val_lin = np.zeros(CV_SPLITS)
            best_score_rbf = np.zeros(CV_SPLITS)
            best_score_lin = np.zeros(CV_SPLITS)

            zeros = np.random.permutation(nzeros)
            ones = np.random.permutation(nones)
            for i in range(CV_SPLITS):
                print('split: %d - num_features: %d - tissue:%s- feat_sel:%s' %
                      (i, num, tissue, feat_sel))
                test_index, train_index = ut.get_intervals(
                    CV_SPLITS, i, zeros, ones)
                train_full = ec.iloc[train_index]
                test_full = ec.iloc[test_index]
                samples = test_full.shape[0]
                samples_tr = train_full.shape[0]

                train = train_full[features_all]
                test = test_full[features_all]
                y_train = info['braak_bin'].loc[train.index]
                y_true = cat[test_index]
                start_time = time.time()
                (y_pred_rbf, y_tr_rbf, c_val_rbf[i], gamma_val_rbf[i],
                 best_score_rbf[i]) = cl.SVM_classify_rbf_all(
                     train,
                     y_train,
                     test,
                     y_true,
                     C_range=np.logspace(-4, 4, 10),
                     gamma_range=np.logspace(-5, 2, 10))
            (y_pred_lin, y_tr_lin, c_val_lin[i],
             best_score_lin[i]) = cl.SVM_classify_lin_all(train,
                                                          y_train,
                                                          test,
                                                          y_true,
                                                          C_range=np.logspace(
                                                              -4, 3, 10))
            print("--- %s seconds for classification ---" %
                  (time.time() - start_time))
            parameters = pd.DataFrame({
                'C_rbf': c_val_rbf,
                'gamma_rbf': gamma_val_rbf,
                'C_lin': c_val_lin,
                'best_rbf': best_score_rbf,
                'best_lin': best_score_lin,
            })
            pickle.dump(
                parameters,
                open(
                    save_file + "/params_LEO2_%s_%s_%d.p" %
                    (tissue, feat_sel, i), "wb"))
            predictions = pd.DataFrame({
                'y_true': y_true,
                'y_rbf': y_pred_rbf,
                'y_lin': y_pred_lin,
            })
            pickle.dump(
                predictions,
                open(
                    save_file + "/pred_LEO2_%s_%s_%d.p" %
                    (tissue, feat_sel, i), "wb"))
            pred_train = pd.DataFrame({
                'y_train': y_train,
                'y_tr_rbf': y_tr_rbf,
                'y_tr_lin': y_tr_lin,
            })
            pickle.dump(
                pred_train,
                open(
                    save_file + "/pred_LEO2_tr_%s_%s_%d.p" %
                    (tissue, feat_sel, i), "wb"))
            svm_accuracy[i] = [
                np.where((predictions['y_true'] == predictions['y_rbf']
                          ) == True)[0].shape[0] / samples,
                np.where((predictions['y_true'] == predictions['y_lin']
                          ) == True)[0].shape[0] / samples
            ]
            svm_accuracy_tr[i] = [
                np.where((pred_train['y_train'] == pred_train['y_tr_rbf']
                          ) == True)[0].shape[0] / samples_tr,
                np.where((pred_train['y_train'] == pred_train['y_tr_lin']
                          ) == True)[0].shape[0] / samples_tr
            ]
            print(svm_accuracy[i])
            print(svm_accuracy_tr[i])
        pickle.dump(
            svm_accuracy,
            open(save_file + "/accuracy_LEO2_%s_%s.p" % (tissue, feat_sel),
                 "wb"))
        pickle.dump(
            svm_accuracy_tr,
            open(save_file + "/accuracy_LEO2_tr_%s_%s.p" % (tissue, feat_sel),
                 "wb"))