def do_svn(target, soure, feature_selection): seed = 7 # boost leave-one-out for i in range(0, 10): if (soure == 'exin'): if (feature_selection == 'fs'): id_all, x_data_all, y_data_all = cdu.get_exin_fs_data(target) fName = 'svm_exin_' + target + '_fs' else: id_all, x_data_all, y_data_all = cdu.get_exin_data(target) fName = 'svm_exin_' + target else: if (feature_selection == 'fs'): id_all, x_data_all, y_data_all = cdu.get_ex_fs_data(target) fName = 'svm_ex_' + target + '_fs' else: id_all, x_data_all, y_data_all = cdu.get_ex_data(target) fName = 'svm_ex_' + target lst = [] scaled_data = data_util.scale(x_data_all) x_data_all = pd.DataFrame(scaled_data, index=x_data_all.index, columns=x_data_all.columns) for train, test in LeaveOneOut().split(x_data_all): y_train = y_data_all.iloc[train] classifier = SVC(kernel='linear', probability=True, random_state=seed, verbose=True) classifier.fit(x_data_all.iloc[train], y_train) test_probas = classifier.predict_proba(x_data_all.iloc[test]) one_reslut = test_probas[0] lst.append([ id_all.iloc[test].values[0][0], y_data_all.iloc[test].values[0][0], one_reslut[0], one_reslut[1] ]) predict_result = pd.DataFrame(lst, columns=['id', 'label', '0', '1']) predict_result.to_csv(cdu.get_save_path(fName + '_' + str(i) + '.csv'), sep=',', encoding='utf-8') print('done')
def fs(target, source): seed = 7 # targets = ['RCCA', 'REICA', 'RIICA', 'RACA', 'RMCA', 'RPCA', 'REVA', 'RIVA', 'BA', 'LCCA', 'LEICA', 'LIICA', 'LACA', # 'LMCA', 'LPCA', 'LEVA', 'LIVA'] feature_names='' all_importance = [] portions = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7] for portion in portions: with open('fs'+os.sep+target+'_'+source+'_'+str(portion)+'_fs.csv', 'w', newline="") as csv_file: wr = csv.writer(csv_file, quoting=csv.QUOTE_ALL) wr.writerow(['predict', 'label']) for i in range(0, 10): if(source == 'exin'): id_all, x_data_all, y_data_all = cdu.get_exin_data(target) fName = 'fs_exin_'+target else: id_all, x_data_all, y_data_all = cdu.get_ex_data(target) fName = 'fs_ex_'+target feature_names = x_data_all.columns # Build a forest and compute the feature importances forest = ExtraTreesClassifier(n_estimators=250, random_state=0) forest.fit(x_data_all, y_data_all) importances = forest.feature_importances_ if i == 0: all_importance = importances else: all_importance = np.vstack((all_importance, importances)) indices = np.argsort(importances)[::-1] # classifier ===== cut = int(round(len(indices)*portion, 0))-1 indices_cut = indices[0:cut] x_data = x_data_all.ix[:, indices_cut] scaled_data = data_util.scale(x_data) x_data = pd.DataFrame(scaled_data, index=x_data.index, columns=x_data.columns) for train, test in LeaveOneOut().split(x_data_all): y_train = y_data_all.iloc[train] y_test = y_data_all.iloc[test] classifier = SVC(kernel='linear', random_state=seed, verbose=False) classifier.fit(x_data.iloc[train], y_train) predict = classifier.predict(x_data.iloc[test]) label = y_data_all.iloc[test].values[0][0] wr.writerow([predict[0], label])
# 10-fold kfold = StratifiedKFold(n_splits=3, shuffle=True, random_state=seed) if (soure == 'exin'): id_all, x_data_all, y_data_all = cdu.get_exin_data(target) fName = 'cnn_exin_' + target else: id_all, x_data_all, y_data_all = cdu.get_ex_data(target) fName = 'cnn_ex_' + target x_data_ed = x_data_all[data_util.cnn_col_ed] x_data_fv = x_data_all[data_util.cnn_col_fv] x_data_pi = x_data_all[data_util.cnn_col_pi] x_data_ps = x_data_all[data_util.cnn_col_ps] x_data_ri = x_data_all[data_util.cnn_col_ri] x_data_tav = x_data_all[data_util.cnn_col_tav] for index, (train, test) in enumerate(kfold.split(x_data_all, y_data_all)): x_train_ed = data_util.scale(x_data_ed.iloc[train]) x_train_ed = np.expand_dims(x_train_ed, 2) x_train_fv = data_util.scale(x_data_fv.iloc[train]) x_train_fv = np.expand_dims(x_train_fv, 2) x_train_pi = data_util.scale(x_data_pi.iloc[train]) x_train_pi = np.expand_dims(x_train_pi, 2) x_train_ps = data_util.scale(x_data_ps.iloc[train]) x_train_ps = np.expand_dims(x_train_ps, 2) x_train_ri = data_util.scale(x_data_ri.iloc[train]) x_train_ri = np.expand_dims(x_train_ri, 2) x_train_tav = data_util.scale(x_data_tav.iloc[train]) x_train_tav = np.expand_dims(x_train_tav, 2) x_test_ed = data_util.scale(x_data_ed.iloc[test]) x_test_ed = np.expand_dims(x_test_ed, 2) x_test_fv = data_util.scale(x_data_fv.iloc[test])
from sklearn.cluster import DBSCAN import matplotlib.pyplot as plt from my_util import data_util from sklearn import metrics from carotid import carotid_data_util as cdu import numpy as np import pandas as pd target = 'RCCA' seed = 7 id_all, x_data_all, y_data_all = cdu.get_ex_data(target) fName = 'dbscan.csv' x_data_all = data_util.scale(x_data_all) labels_true = y_data_all.values.ravel() # Compute DBSCAN mSample = round(id_all.shape[0] / 100, 0) db = DBSCAN(eps=300, min_samples=mSample).fit(x_data_all) core_samples_mask = np.zeros_like(db.labels_, dtype=bool) core_samples_mask[db.core_sample_indices_] = True # Noise labels = db.labels_ # Number of clusters in labels, ignoring noise if present. n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) print('Estimated number of clusters: %d' % n_clusters_) print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels)) print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels)) print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels)) # print("Adjusted Rand Index: %0.3f" # % metrics.adjusted_rand_score(labels_true, labels)) # print("Adjusted Mutual Information: %0.3f"
# kfold = StratifiedKFold(n_splits=3, shuffle=True, random_state=seed) # for index, (train, test) in enumerate(kfold.split(x_data_all, y_data_all)): # x_train = data_util.scale(x_data_all.iloc[train]) # x_test = data_util.scale(x_data_all.iloc[test]) # y_train = y_data_all.iloc[train] # model, history= ann(x_train, y_train) # loss, acc = model.evaluate(x_test, to_categorical(y_data_all.iloc[test])) # y_pred = model.predict(x_test) # predict_result_hold = id_all.iloc[test] # predict_result_hold['label'] = y_data_all.iloc[test] # predict_result_hold['0'] = y_pred[:, 0] # predict_result_hold['1'] = y_pred[:, 1] # predict_result_hold.to_csv(cdu.get_save_path(fName+'_'+str(index)+'.csv'), sep=',', encoding='utf-8') # print(acc, loss) # leave-one-out lst = [] scaled_data = data_util.scale(x_data_all) x_data_all = pd.DataFrame(scaled_data, index=x_data_all.index, columns=x_data_all.columns) for train, test in LeaveOneOut().split(x_data_all): y_train = y_data_all.iloc[train] model, history = ann(x_data_all.iloc[train], y_train) loss, acc = model.evaluate(x_data_all.iloc[test], to_categorical(y_data_all.iloc[test], 2)) y_pred = model.predict(x_data_all.iloc[test]) one_reslut = y_pred[0] lst.append([id_all.iloc[test].values[0][0], y_data_all.iloc[test].values[0][0], one_reslut[0], one_reslut[1]]) predict_result = pd.DataFrame(lst, columns=['id', 'label', '0', '1']) predict_result.to_csv(cdu.get_save_path(fName+'.csv'), sep=',', encoding='utf-8') print('done')