예제 #1
0
파일: svm.py 프로젝트: chingheng113/ml_farm
def do_svn(target, soure, feature_selection):
    seed = 7
    # boost leave-one-out
    for i in range(0, 10):
        if (soure == 'exin'):
            if (feature_selection == 'fs'):
                id_all, x_data_all, y_data_all = cdu.get_exin_fs_data(target)
                fName = 'svm_exin_' + target + '_fs'
            else:
                id_all, x_data_all, y_data_all = cdu.get_exin_data(target)
                fName = 'svm_exin_' + target
        else:
            if (feature_selection == 'fs'):
                id_all, x_data_all, y_data_all = cdu.get_ex_fs_data(target)
                fName = 'svm_ex_' + target + '_fs'
            else:
                id_all, x_data_all, y_data_all = cdu.get_ex_data(target)
                fName = 'svm_ex_' + target
        lst = []
        scaled_data = data_util.scale(x_data_all)
        x_data_all = pd.DataFrame(scaled_data,
                                  index=x_data_all.index,
                                  columns=x_data_all.columns)
        for train, test in LeaveOneOut().split(x_data_all):
            y_train = y_data_all.iloc[train]
            classifier = SVC(kernel='linear',
                             probability=True,
                             random_state=seed,
                             verbose=True)
            classifier.fit(x_data_all.iloc[train], y_train)
            test_probas = classifier.predict_proba(x_data_all.iloc[test])
            one_reslut = test_probas[0]
            lst.append([
                id_all.iloc[test].values[0][0],
                y_data_all.iloc[test].values[0][0], one_reslut[0],
                one_reslut[1]
            ])
        predict_result = pd.DataFrame(lst, columns=['id', 'label', '0', '1'])
        predict_result.to_csv(cdu.get_save_path(fName + '_' + str(i) + '.csv'),
                              sep=',',
                              encoding='utf-8')

    print('done')
예제 #2
0
def fs(target, source):
    seed = 7
    # targets = ['RCCA', 'REICA', 'RIICA', 'RACA', 'RMCA', 'RPCA', 'REVA', 'RIVA', 'BA', 'LCCA', 'LEICA', 'LIICA', 'LACA',
    #            'LMCA', 'LPCA', 'LEVA', 'LIVA']
    feature_names=''
    all_importance = []
    portions = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7]
    for portion in portions:
        with open('fs'+os.sep+target+'_'+source+'_'+str(portion)+'_fs.csv', 'w', newline="") as csv_file:
            wr = csv.writer(csv_file, quoting=csv.QUOTE_ALL)
            wr.writerow(['predict', 'label'])
            for i in range(0, 10):
                if(source == 'exin'):
                    id_all, x_data_all, y_data_all = cdu.get_exin_data(target)
                    fName = 'fs_exin_'+target
                else:
                    id_all, x_data_all, y_data_all = cdu.get_ex_data(target)
                    fName = 'fs_ex_'+target
                feature_names = x_data_all.columns
                # Build a forest and compute the feature importances
                forest = ExtraTreesClassifier(n_estimators=250, random_state=0)
                forest.fit(x_data_all, y_data_all)
                importances = forest.feature_importances_
                if i == 0:
                    all_importance = importances
                else:
                    all_importance = np.vstack((all_importance, importances))
                indices = np.argsort(importances)[::-1]
                # classifier =====
                cut = int(round(len(indices)*portion, 0))-1
                indices_cut = indices[0:cut]
                x_data = x_data_all.ix[:, indices_cut]
                scaled_data = data_util.scale(x_data)
                x_data = pd.DataFrame(scaled_data, index=x_data.index, columns=x_data.columns)
                for train, test in LeaveOneOut().split(x_data_all):
                    y_train = y_data_all.iloc[train]
                    y_test = y_data_all.iloc[test]
                    classifier = SVC(kernel='linear', random_state=seed, verbose=False)
                    classifier.fit(x_data.iloc[train], y_train)
                    predict = classifier.predict(x_data.iloc[test])
                    label = y_data_all.iloc[test].values[0][0]
                    wr.writerow([predict[0], label])
예제 #3
0
# 10-fold
kfold = StratifiedKFold(n_splits=3, shuffle=True, random_state=seed)
if (soure == 'exin'):
    id_all, x_data_all, y_data_all = cdu.get_exin_data(target)
    fName = 'cnn_exin_' + target
else:
    id_all, x_data_all, y_data_all = cdu.get_ex_data(target)
    fName = 'cnn_ex_' + target
x_data_ed = x_data_all[data_util.cnn_col_ed]
x_data_fv = x_data_all[data_util.cnn_col_fv]
x_data_pi = x_data_all[data_util.cnn_col_pi]
x_data_ps = x_data_all[data_util.cnn_col_ps]
x_data_ri = x_data_all[data_util.cnn_col_ri]
x_data_tav = x_data_all[data_util.cnn_col_tav]
for index, (train, test) in enumerate(kfold.split(x_data_all, y_data_all)):
    x_train_ed = data_util.scale(x_data_ed.iloc[train])
    x_train_ed = np.expand_dims(x_train_ed, 2)
    x_train_fv = data_util.scale(x_data_fv.iloc[train])
    x_train_fv = np.expand_dims(x_train_fv, 2)
    x_train_pi = data_util.scale(x_data_pi.iloc[train])
    x_train_pi = np.expand_dims(x_train_pi, 2)
    x_train_ps = data_util.scale(x_data_ps.iloc[train])
    x_train_ps = np.expand_dims(x_train_ps, 2)
    x_train_ri = data_util.scale(x_data_ri.iloc[train])
    x_train_ri = np.expand_dims(x_train_ri, 2)
    x_train_tav = data_util.scale(x_data_tav.iloc[train])
    x_train_tav = np.expand_dims(x_train_tav, 2)

    x_test_ed = data_util.scale(x_data_ed.iloc[test])
    x_test_ed = np.expand_dims(x_test_ed, 2)
    x_test_fv = data_util.scale(x_data_fv.iloc[test])
예제 #4
0
from sklearn.cluster import DBSCAN
import matplotlib.pyplot as plt
from my_util import data_util
from sklearn import metrics
from carotid import carotid_data_util as cdu
import numpy as np
import pandas as pd

target = 'RCCA'
seed = 7
id_all, x_data_all, y_data_all = cdu.get_ex_data(target)
fName = 'dbscan.csv'
x_data_all = data_util.scale(x_data_all)
labels_true = y_data_all.values.ravel()

# Compute DBSCAN
mSample = round(id_all.shape[0] / 100, 0)
db = DBSCAN(eps=300, min_samples=mSample).fit(x_data_all)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
# Noise
labels = db.labels_
# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
print('Estimated number of clusters: %d' % n_clusters_)
print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels))
print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels))
# print("Adjusted Rand Index: %0.3f"
#       % metrics.adjusted_rand_score(labels_true, labels))
# print("Adjusted Mutual Information: %0.3f"
예제 #5
0
파일: mlp.py 프로젝트: chingheng113/ml_farm
# kfold = StratifiedKFold(n_splits=3, shuffle=True, random_state=seed)
# for index, (train, test) in enumerate(kfold.split(x_data_all, y_data_all)):
#     x_train = data_util.scale(x_data_all.iloc[train])
#     x_test = data_util.scale(x_data_all.iloc[test])
#     y_train = y_data_all.iloc[train]
#     model, history= ann(x_train, y_train)
#     loss, acc = model.evaluate(x_test, to_categorical(y_data_all.iloc[test]))
#     y_pred = model.predict(x_test)
#     predict_result_hold = id_all.iloc[test]
#     predict_result_hold['label'] = y_data_all.iloc[test]
#     predict_result_hold['0'] = y_pred[:, 0]
#     predict_result_hold['1'] = y_pred[:, 1]
#     predict_result_hold.to_csv(cdu.get_save_path(fName+'_'+str(index)+'.csv'), sep=',', encoding='utf-8')
#     print(acc, loss)


# leave-one-out
lst = []
scaled_data = data_util.scale(x_data_all)
x_data_all = pd.DataFrame(scaled_data, index=x_data_all.index, columns=x_data_all.columns)
for train, test in LeaveOneOut().split(x_data_all):
    y_train = y_data_all.iloc[train]
    model, history = ann(x_data_all.iloc[train], y_train)
    loss, acc = model.evaluate(x_data_all.iloc[test], to_categorical(y_data_all.iloc[test], 2))
    y_pred = model.predict(x_data_all.iloc[test])
    one_reslut = y_pred[0]
    lst.append([id_all.iloc[test].values[0][0], y_data_all.iloc[test].values[0][0], one_reslut[0], one_reslut[1]])
predict_result = pd.DataFrame(lst, columns=['id', 'label', '0', '1'])
predict_result.to_csv(cdu.get_save_path(fName+'.csv'), sep=',', encoding='utf-8')

print('done')