def general_exp_var(): for tissue in TISSUES: for feat_sel in FEATURES_SEL: beta, info = load_data(tissue) vari = beta.var() ind = np.argsort(vari)[-50000:] ec = beta.iloc[:, ind] cat = info['braak_bin'].loc[ec.index] nzeros = np.where(cat == 0)[0] nones = np.where(cat == 1)[0] cv_splits = 5 for num in FEATURES_NUM: c_val_rbf = np.zeros(cv_splits) gamma_val_rbf = np.zeros(cv_splits) c_val_lin = np.zeros(cv_splits) best_score_rbf = np.zeros(cv_splits) best_score_lin = np.zeros(cv_splits) svm_accuracy = {} svm_accuracy_tr = {} zeros = np.random.permutation(nzeros) ones = np.random.permutation(nones) for i in range(cv_splits): print('split: %d - num_features: %d - tissue:%s- feat_sel:%s' % (i, num, tissue, feat_sel)) test_index, train_index = ut.get_intervals(cv_splits, i, zeros, ones) print(test_index) train_full = ec.iloc[train_index] y_train = cat[train_index] test_full = ec.iloc[test_index] samples = test_full.shape[0] samples_tr = train_full.shape[0] start_time = time.time() features_file = OPEN_FILE + "/features_exp_CV_%s_%s_%d_%d.p" % (tissue, feat_sel, num, i) print(train_full.shape) if feat_sel == 't_test': features_all = fs.feature_sel_t_test_parallel(train_full, info, num) elif feat_sel == 'fisher': features_all = fs.feature_fisher_score_parallel(train_full, info, num) elif feat_sel == 'rfe': features_all = fs.feature_sel_rfe(train_full, info, num) print("--- %s seconds for feature selection ---" % (time.time() - start_time)) pickle.dump(features_all, open(features_file, "wb")) if feat_sel == 'PCA': # SCALING scale = preprocessing.StandardScaler().fit(train_full) train_sc = scale.transform(train_full) test_sc = scale.transform(test_full) # PCA pca = PCA(n_components=num) pca.fit(train_sc) train = pca.transform(train_sc) test = pca.transform(test_sc) else: train = train_full[features_all[0:num]] print(train.shape) test = test_full[features_all[0:num]] y_true = cat[test_index] start_time = time.time() (y_pred_rbf, y_tr_rbf, c_val_rbf[i], gamma_val_rbf[i], best_score_rbf[i]) = cl.SVM_classify_rbf_all( train, y_train, test, y_true,C_range=np.logspace(-4, 4, 20), gamma_range=np.logspace(-7, 2, 20)) (y_pred_lin, y_tr_lin, c_val_lin[i], best_score_lin[i]) = cl.SVM_classify_lin_all(train, y_train, test, y_true, C_range=np.logspace(-4, 3, 20)) print("--- %s seconds for classification ---" % (time.time() - start_time)) pred_train = pd.DataFrame( {'y_train': y_train, 'y_tr_rbf': y_tr_rbf, 'y_tr_lin': y_tr_lin, }) pickle.dump(pred_train, open(OPEN_FILE + "/pred_exp_tr_CV_%s_%s_%d_%d.p" % (tissue, feat_sel, num, i), "wb")) svm_accuracy_tr[i] = [ np.where((pred_train['y_train'] == pred_train['y_tr_rbf']) == True)[0].shape[0] / samples_tr, np.where((pred_train['y_train'] == pred_train['y_tr_lin']) == True)[0].shape[0] / samples_tr] print(svm_accuracy_tr[i]) predictions = pd.DataFrame( {'y_true': y_true, 'y_rbf': y_pred_rbf, 'y_lin': y_pred_lin, }) pickle.dump(predictions, open(OPEN_FILE + "/pred_exp_CV_%s_%s_%d_%d.p" % (tissue, feat_sel, num, i), "wb")) svm_accuracy[i] = [ np.where((predictions['y_true'] == predictions['y_rbf']) == True)[0].shape[0] / samples, np.where((predictions['y_true'] == predictions['y_lin']) == True)[0].shape[0] / samples] print(svm_accuracy[i]) pickle.dump(svm_accuracy_tr, open(OPEN_FILE + "/accuracy_exp_tr_CV_%s_%s_%d.p" % (tissue, feat_sel, num), "wb")) pickle.dump(svm_accuracy, open(OPEN_FILE + "/accuracy_exp_CV_%s_%s_%d.p" % (tissue, feat_sel, num), "wb")) parameters = pd.DataFrame( {'C_rbf': c_val_rbf, 'gamma_rbf': gamma_val_rbf, 'C_lin': c_val_lin, 'best_rbf': best_score_rbf, 'best_lin': best_score_lin, }) pickle.dump(parameters, open(OPEN_FILE + "/params_exp_CV_%s_%s_%d.p" % (tissue, feat_sel, num), "wb"))
def blood_surr(): tissues = ['EC', 'STG', 'CER', 'FC'] for tissue in tissues: open_file = os.path.realpath('../data_str/') ec, info = load_data(tissue) blood = pickle.load(open('../tissues/resi_norm_WB.p', "rb")) features_sel = ['t_test', 'fisher', 'rfe'] features_num = [5, 10, 15, 20, 50, 75, 100, 250, 500] for feat_sel in features_sel: cat = info['braak_bin'].loc[blood.index] nzeros = np.where(cat == 0)[0] nones = np.where(cat == 1)[0] cv_splits = 5 for num in features_num: c_val_rbf = np.zeros(cv_splits) gamma_val_rbf = np.zeros(cv_splits) c_val_lin = np.zeros(cv_splits) best_score_rbf = np.zeros(cv_splits) best_score_lin = np.zeros(cv_splits) svm_accuracy = {} svm_accuracy_tr = {} zeros = np.random.permutation(nzeros) ones = np.random.permutation(nones) for i in range(cv_splits): print('split: %d - num_features: %d - tissue:%s- feat_sel:%s' % (i, num, tissue, feat_sel)) test_index, train_index = ut.get_intervals(cv_splits, i, zeros, ones) print(test_index) train_blood = blood.iloc[train_index] y_train = cat[train_index] test_blood = blood.iloc[test_index] samples = test_blood.shape[0] samples_tr = train_blood.shape[0] # get the index of the samples in the test set- we dont want to train with those subjects rem = test_blood.index unwanted = list(info['subject'].loc[rem]) valids = [] for ids in info.index: if ((info['tissue'].loc[ids] == tissue) and (info['subject'].loc[ids] not in unwanted) and (info['braak_stage'].loc[ids] != 'Exclude')): valids.append(ids) ec_train = ec.loc[valids] start_time = time.time() features_file = open_file + "/features_blood_CV_%s_%s_%d_%d.p" % (tissue, feat_sel, num, i) print(ec_train.shape) if feat_sel == 't_test': features_all = fs.feature_sel_t_test_parallel(ec_train, info, num) elif feat_sel == 'fisher': features_all = fs.feature_fisher_score_parallel(ec_train, info, num) elif feat_sel == 'rfe': features_all = fs.feature_sel_rfe(ec_train, info, num) # elif feat_sel == 'chi2': print("--- %s seconds for feature selection ---" % (time.time() - start_time)) if feat_sel == 't_test' or feat_sel == 'fisher' or feat_sel == 'rfe': pickle.dump(features_all, open(features_file, "wb")) train = train_blood[features_all] print(train.shape) test = test_blood[features_all] y_true = cat[test_index] start_time = time.time() (y_pred_rbf, y_tr_rbf, c_val_rbf[i], gamma_val_rbf[i], best_score_rbf[i]) = cl.SVM_classify_rbf_all( train, y_train, test, y_true, C_range=np.logspace(-4, 2, 10), gamma_range=np.logspace(-6, 2, 10)) (y_pred_lin, y_tr_lin, c_val_lin[i], best_score_lin[i]) = cl.SVM_classify_lin_all(train, y_train, test, y_true, C_range=np.logspace(-4, 2, 10)) print("--- %s seconds for classification ---" % (time.time() - start_time)) pred_train = pd.DataFrame( {'y_train': y_train, 'y_tr_rbf': y_tr_rbf, 'y_tr_lin': y_tr_lin, }) pickle.dump(pred_train, open(open_file + "/pred_blood_tr_CV_%s_%s_%d_%d.p" % (tissue, feat_sel, num, i), "wb")) svm_accuracy_tr[i] = [ np.where((pred_train['y_train'] == pred_train['y_tr_rbf']) == True)[0].shape[0] / samples_tr, np.where((pred_train['y_train'] == pred_train['y_tr_lin']) == True)[0].shape[0] / samples_tr] print(svm_accuracy_tr[i]) predictions = pd.DataFrame( {'y_true': y_true, 'y_rbf': y_pred_rbf, 'y_lin': y_pred_lin, }) pickle.dump(predictions, open(open_file + "/pred_blood_CV_%s_%s_%d_%d.p" % (tissue, feat_sel, num, i), "wb")) svm_accuracy[i] = [ np.where((predictions['y_true'] == predictions['y_rbf']) == True)[0].shape[0] / samples, np.where((predictions['y_true'] == predictions['y_lin']) == True)[0].shape[0] / samples] print(svm_accuracy[i]) pickle.dump(svm_accuracy_tr, open(open_file + "/accuracy_blood_tr_CV_%s_%s_%d.p" % (tissue, feat_sel, num), "wb")) pickle.dump(svm_accuracy, open(open_file + "/accuracy_blood_CV_%s_%s_%d.p" % (tissue, feat_sel, num), "wb")) parameters = pd.DataFrame( {'C_rbf': c_val_rbf, 'gamma_rbf': gamma_val_rbf, 'C_lin': c_val_lin, 'best_rbf': best_score_rbf, 'best_lin': best_score_lin, }) pickle.dump(parameters, open(open_file + "/params_CV_%s_%s_%d.p" % (tissue, feat_sel, num), "wb"))
def general_all_non(): ec, info = load_data() ec_temp = ec.loc[info.braak_stage != 'Exclude'] new_inf = info.loc[ec_temp.index] subjects = np.unique(new_inf.subject) fromtis = ec.loc[(info.tissue == 'STG') & (info.braak_stage != 'Exclude')] categories = new_inf['braak_bin'].loc[fromtis.index] print('cargo datos') nzeros = np.where(categories == 0)[0] nones = np.where(categories == 1)[0] for feat_sel in FEATURES_SEL: cat = info['braak_bin'].loc[ec.index] cv_splits = 5 for num in FEATURES_NUM: c_val_rbf = np.zeros(cv_splits) gamma_val_rbf = np.zeros(cv_splits) c_val_lin = np.zeros(cv_splits) best_score_rbf = np.zeros(cv_splits) best_score_lin = np.zeros(cv_splits) svm_accuracy = {} svm_accuracy_tr = {} zeros = np.random.permutation(nzeros) ones = np.random.permutation(nones) for i in range(cv_splits): print('gen_all -split: %d - num_features: %d - feat_sel:%s' % (i, num, feat_sel)) test_index, train_index = ut.get_intervals_all( cv_splits, i, zeros, ones, new_inf, subjects) print('tamaño de test: %s' % len(test_index)) print(np.array(new_inf['subject'].loc[test_index])) train_full = ec.loc[train_index] y_train = cat[train_index] test_full = ec.loc[test_index] samples = test_full.shape[0] samples_tr = train_full.shape[0] start_time = time.time() features_file = OPEN_FILE + "/features_CV_%s_%s_%d_%d.p" % ( TISSUE, feat_sel, num, i) print(train_full.shape) if feat_sel == 't_test': features_all = fs.feature_sel_t_test_parallel( train_full, info, num) elif feat_sel == 'fisher': features_all = fs.feature_fisher_score_parallel( train_full, info, num) elif feat_sel == 'rfe': features_all = fs.feature_sel_rfe(train_full, info, num) # elif feat_sel == 'chi2': print("--- %s seconds for feature selection ---" % (time.time() - start_time)) if feat_sel == 'PCA': # SCALING scale = preprocessing.StandardScaler().fit(train_full) train_sc = scale.transform(train_full) test_sc = scale.transform(test_full) # PCA pca = PCA(n_components=num) pca.fit(train_sc) train = pca.transform(train_sc) test = pca.transform(test_sc) else: pickle.dump(features_all, open(features_file, "wb")) train = train_full[features_all[0:num]] print(train.shape) test = test_full[features_all[0:num]] y_true = cat[test_index] start_time = time.time() (y_pred_rbf, y_tr_rbf, c_val_rbf[i], gamma_val_rbf[i], best_score_rbf[i]) = cl.SVM_classify_rbf_all( train, y_train, test, y_true, C_range=np.logspace(-3, 5, 10), gamma_range=np.logspace(-6, 3, 10)) (y_pred_lin, y_tr_lin, c_val_lin[i], best_score_lin[i]) = cl.SVM_classify_lin_all( train, y_train, test, y_true, C_range=np.logspace(-2, 0, 6)) print("--- %s seconds for classification ---" % (time.time() - start_time)) pred_train = pd.DataFrame({ 'y_train': y_train, 'y_tr_rbf': y_tr_rbf, 'y_tr_lin': y_tr_lin, }) pickle.dump( pred_train, open( OPEN_FILE + "/pred_tr_CV_%s_%s_%d_%d.p" % (TISSUE, feat_sel, num, i), "wb")) svm_accuracy_tr[i] = [ np.where((pred_train['y_train'] == pred_train['y_tr_rbf'] ) == True)[0].shape[0] / samples_tr, np.where((pred_train['y_train'] == pred_train['y_tr_lin'] ) == True)[0].shape[0] / samples_tr ] print(svm_accuracy_tr[i]) predictions = pd.DataFrame({ 'y_true': y_true, 'y_rbf': y_pred_rbf, 'y_lin': y_pred_lin, }) pickle.dump( predictions, open( OPEN_FILE + "/pred_CV_%s_%s_%d_%d.p" % (TISSUE, feat_sel, num, i), "wb")) svm_accuracy[i] = [ np.where((predictions['y_true'] == predictions['y_rbf'] ) == True)[0].shape[0] / samples, np.where((predictions['y_true'] == predictions['y_lin'] ) == True)[0].shape[0] / samples ] print(svm_accuracy[i]) pickle.dump( svm_accuracy_tr, open( OPEN_FILE + "/accuracy_tr_CV_%s_%s_%d.p" % (TISSUE, feat_sel, num), "wb")) pickle.dump( svm_accuracy, open( OPEN_FILE + "/accuracy_CV_%s_%s_%d.p" % (TISSUE, feat_sel, num), "wb")) parameters = pd.DataFrame({ 'C_rbf': c_val_rbf, 'gamma_rbf': gamma_val_rbf, 'C_lin': c_val_lin, 'best_rbf': best_score_rbf, 'best_lin': best_score_lin, }) pickle.dump( parameters, open( OPEN_FILE + "/params_CV_%s_%s_%d.p" % (TISSUE, feat_sel, num), "wb"))
def main(): for tissue in TISSUES: betaqn, info = load_data(tissue) features_sel = ['rfe'] for feat_sel in features_sel: ec = betaqn cat = info['braak_bin'].loc[ec.index] zeros = np.where(cat == 0)[0] ones = np.where(cat == 1)[0] for cv in CV: cv_splits = cv c_val_rbf = np.zeros(cv_splits) gamma_val_rbf = np.zeros(cv_splits) c_val_lin = np.zeros(cv_splits) best_score_rbf = np.zeros(cv_splits) best_score_lin = np.zeros(cv_splits) svm_accuracy = {} svm_accuracy_tr = {} zeros = np.random.permutation(zeros) ones = np.random.permutation(ones) for i in range(cv_splits): print('split: %d - cv: %d' % (i, cv)) test_index, train_index = ut.get_intervals( cv_splits, i, zeros, ones) train_full = ec.iloc[train_index] y_train = cat[train_index] test_full = ec.iloc[test_index] samples = test_full.shape[0] samples_tr = train_full.shape[0] start_time = time.time() features_file = OPEN_FILE + "/features_diffCV_%s_%s_%d_%d.p" % ( tissue, feat_sel, cv, i) if feat_sel == 't_test': features_all = fs.feature_sel_t_test_parallel( train_full, info, NUM) elif feat_sel == 'fisher': features_all = fs.feature_fisher_score_parallel( train_full, info, NUM) elif feat_sel == 'rfe': features_all = fs.feature_sel_rfe( train_full, info, NUM) print("--- %s seconds for feature selection ---" % (time.time() - start_time)) pickle.dump(features_all, open(features_file, "wb")) train = train_full[features_all[0:NUM]] test = test_full[features_all[0:NUM]] y_true = cat[test_index] start_time = time.time() (y_pred_rbf, y_tr_rbf, c_val_rbf[i], gamma_val_rbf[i], best_score_rbf[i]) = cl.SVM_classify_rbf_all( train, y_train, test, y_true, C_range=np.logspace(-4, 4, 20), gamma_range=np.logspace(-7, 2, 20)) (y_pred_lin, y_tr_lin, c_val_lin[i], best_score_lin[i]) = cl.SVM_classify_lin_all( train, y_train, test, y_true, C_range=np.logspace(-4, 3, 20)) print("--- %s seconds for classification ---" % (time.time() - start_time)) pred_train = pd.DataFrame({ 'y_train': y_train, 'y_tr_rbf': y_tr_rbf, 'y_tr_lin': y_tr_lin, }) pickle.dump( pred_train, open( OPEN_FILE + "/pred_tr_diffCV_%s_%s_%d_%d.p" % (tissue, feat_sel, cv, i), "wb")) svm_accuracy_tr[i] = [ np.where( (pred_train['y_train'] == pred_train['y_tr_rbf'] ) == True)[0].shape[0] / samples_tr, np.where( (pred_train['y_train'] == pred_train['y_tr_lin'] ) == True)[0].shape[0] / samples_tr ] print(svm_accuracy_tr[i]) predictions = pd.DataFrame({ 'y_true': y_true, 'y_rbf': y_pred_rbf, 'y_lin': y_pred_lin, }) pickle.dump( predictions, open( OPEN_FILE + "/pred_diffCV_%s_%s_%d_%d.p" % (tissue, feat_sel, cv, i), "wb")) svm_accuracy[i] = [ np.where((predictions['y_true'] == predictions['y_rbf'] ) == True)[0].shape[0] / samples, np.where((predictions['y_true'] == predictions['y_lin'] ) == True)[0].shape[0] / samples ] print(svm_accuracy[i]) pickle.dump( svm_accuracy_tr, open( OPEN_FILE + "/accuracy_tr_diffCV_%s_%s_%d.p" % (tissue, feat_sel, cv), "wb")) pickle.dump( svm_accuracy, open( OPEN_FILE + "/accuracy_diffCV_%s_%s_%d.p" % (tissue, feat_sel, cv), "wb")) parameters = pd.DataFrame({ 'C_rbf': c_val_rbf, 'gamma_rbf': gamma_val_rbf, 'C_lin': c_val_lin, 'best_rbf': best_score_rbf, 'best_lin': best_score_lin, }) pickle.dump( parameters, open( OPEN_FILE + "/params_diffCV_%s_%s_%d.p" % (tissue, feat_sel, cv), "wb"))
def general_Lun(): tissue = 'all_non' num = 12 for feat_sel in FEATURES_SEL: for tissue in TISSUES: save_file = OPEN_FILE betaqn, info = load_data(tissue) ec = betaqn start_time = time.time() features_file = OPEN_FILE + "/features_LEO_CV_%s_%s_%d.p" % ( tissue, feat_sel, num) if feat_sel == 't_test': features_all = fs.feature_sel_t_test_parallel(ec, info, num) elif feat_sel == 'fisher': features_all = fs.feature_fisher_score_parallel(ec, info, num) elif feat_sel == 'rfe': features_all = fs.feature_sel_rfe(ec, info, num) elif feat_sel == 'leo': if tissue == 'EC': features_all = [ 'cg11823178', 'cg22997194', 'cg06653632', 'cg05066959', 'cg24152732', 'cg14972141', 'cg04029027', 'cg05030077', 'cg04151012', 'cg18522315', 'cg20618448', 'cg24770624' ] elif tissue == 'STG': features_all = [ 'cg04525464', 'cg06108383', 'cg10752406', 'cg25018458', 'cg13942103', 'cg00767503', 'cg02961798', 'cg05810363', 'cg15849154', 'cg06745695', 'cg15520955', 'cg03601797' ] elif tissue == 'FC': features_all = [ 'cg04147621', 'cg05726109', 'cg11724984', 'cg23968456', 'cg24671734', 'cg06926306', 'cg07859799', 'cg02997560', 'cg13507269', 'cg19900677', 'cg14071588', 'cg15928398' ] elif tissue == 'CER': features_all = [ 'cg22570053', 'cg00065957', 'cg21781422', 'cg17715556', 'cg01339004', 'cg18882687', 'cg20767910', 'cg24462001', 'cg07869256', 'cg20698501', 'cg21618635', 'cg17468317' ] else: continue elif feat_sel == 'leo_all': leo_all = [ 'cg11823178', 'cg25018458', 'cg05810363', 'cg05066959', 'cg18428542', 'cg16665310', 'cg05912299', 'cg03169557', 'cg23968456', 'cg02672452', 'cg04147621', 'cg17910899' ] elif feat_sel == 'jager': features_all = [ 'cg11724984', 'cg23968456', 'cg15821544', 'cg16733298', 'cg22962123', 'cg13076843', 'cg25594100', 'cg00621289', 'cg19803550', 'cg03169557', 'cg05066959', 'cg05810363' ] print("--- %s seconds for feature selection ---" % (time.time() - start_time)) pickle.dump(features_all, open(features_file, "wb")) cat = info['braak_bin'].loc[ec.index] samples = ec.shape[0] nzeros = np.where(cat == 0)[0] nones = np.where(cat == 1)[0] svm_accuracy = {} svm_accuracy_tr = {} c_val_rbf = np.zeros(CV_SPLITS) gamma_val_rbf = np.zeros(CV_SPLITS) c_val_lin = np.zeros(CV_SPLITS) best_score_rbf = np.zeros(CV_SPLITS) best_score_lin = np.zeros(CV_SPLITS) zeros = np.random.permutation(nzeros) ones = np.random.permutation(nones) for i in range(CV_SPLITS): print('split: %d - num_features: %d - tissue:%s- feat_sel:%s' % (i, num, tissue, feat_sel)) test_index, train_index = ut.get_intervals( CV_SPLITS, i, zeros, ones) train_full = ec.iloc[train_index] test_full = ec.iloc[test_index] samples = test_full.shape[0] samples_tr = train_full.shape[0] train = train_full[features_all] test = test_full[features_all] y_train = info['braak_bin'].loc[train.index] y_true = cat[test_index] start_time = time.time() (y_pred_rbf, y_tr_rbf, c_val_rbf[i], gamma_val_rbf[i], best_score_rbf[i]) = cl.SVM_classify_rbf_all( train, y_train, test, y_true, C_range=np.logspace(-4, 4, 10), gamma_range=np.logspace(-5, 2, 10)) (y_pred_lin, y_tr_lin, c_val_lin[i], best_score_lin[i]) = cl.SVM_classify_lin_all(train, y_train, test, y_true, C_range=np.logspace( -4, 3, 10)) print("--- %s seconds for classification ---" % (time.time() - start_time)) parameters = pd.DataFrame({ 'C_rbf': c_val_rbf, 'gamma_rbf': gamma_val_rbf, 'C_lin': c_val_lin, 'best_rbf': best_score_rbf, 'best_lin': best_score_lin, }) pickle.dump( parameters, open( save_file + "/params_LEO2_%s_%s_%d.p" % (tissue, feat_sel, i), "wb")) predictions = pd.DataFrame({ 'y_true': y_true, 'y_rbf': y_pred_rbf, 'y_lin': y_pred_lin, }) pickle.dump( predictions, open( save_file + "/pred_LEO2_%s_%s_%d.p" % (tissue, feat_sel, i), "wb")) pred_train = pd.DataFrame({ 'y_train': y_train, 'y_tr_rbf': y_tr_rbf, 'y_tr_lin': y_tr_lin, }) pickle.dump( pred_train, open( save_file + "/pred_LEO2_tr_%s_%s_%d.p" % (tissue, feat_sel, i), "wb")) svm_accuracy[i] = [ np.where((predictions['y_true'] == predictions['y_rbf'] ) == True)[0].shape[0] / samples, np.where((predictions['y_true'] == predictions['y_lin'] ) == True)[0].shape[0] / samples ] svm_accuracy_tr[i] = [ np.where((pred_train['y_train'] == pred_train['y_tr_rbf'] ) == True)[0].shape[0] / samples_tr, np.where((pred_train['y_train'] == pred_train['y_tr_lin'] ) == True)[0].shape[0] / samples_tr ] print(svm_accuracy[i]) print(svm_accuracy_tr[i]) pickle.dump( svm_accuracy, open(save_file + "/accuracy_LEO2_%s_%s.p" % (tissue, feat_sel), "wb")) pickle.dump( svm_accuracy_tr, open(save_file + "/accuracy_LEO2_tr_%s_%s.p" % (tissue, feat_sel), "wb"))