def get_degree(): # 提取4个特征中位数,除了黑痣的数量这一特征 fd = load_facedata() epicanthus_feature_samples = fd.epicanthus_feature[:73] forehead_feature_samples = fd.forehead_feature[0][:73] nasalBridge_feature_samples = fd.nasalBridge_feature[:73] ocular_feature_samples = fd.ocular_feature[:73] epicanthus_feature_degree = [] forehead_feature_degree = [] nasalBridge_feature_degree = [] ocular_feature_degree = [] for i in xrange(20, 100, 20): epicanthus_feature_degree.append( round(np.percentile(epicanthus_feature_samples, i), 2)) forehead_feature_degree.append( round(np.percentile(forehead_feature_samples, i), 2)) nasalBridge_feature_degree.append( round(np.percentile(nasalBridge_feature_samples, i), 2)) ocular_feature_degree.append( round(np.percentile(ocular_feature_samples, i), 2)) print 'forehead_feature_degree:%s' % forehead_feature_degree print 'nasalBridge_feature_degree:%s' % nasalBridge_feature_degree print 'ocular_feature_degree:%s' % ocular_feature_degree print 'epicanthus_feature_degree:%s' % epicanthus_feature_degree return Bunch(epicanthus_feature_degree=epicanthus_feature_degree, forehead_feature_degree=forehead_feature_degree, nasalBridge_feature_degree=nasalBridge_feature_degree, ocular_feature_degree=ocular_feature_degree)
def save_model(num=100, rate=0.5): fd = load_facedata() X = fd.data y = fd.target abc = AdaBoostClassifier(DecisionTreeClassifier(max_depth=2, min_samples_split=20, min_samples_leaf=5), algorithm="SAMME", n_estimators=num, learning_rate=rate) abc.fit(X, y) joblib.dump(abc, 'abc.model')
def get_pre_acc(csv_path, num=50, rate=0.6): fd = load_facedata(csv_path) X = fd.data abc = joblib.load('abc.model') answer = abc.predict(X) print(answer) p = 0 for i in answer: if i == 1: p += 1 acc = float(p) / float(len(answer)) print 'sensitivity:{}'.format(acc)
def get_proba(sample, num=100, rate=0.5): ''' 预测的可能性 :param sample: 样本5个特征值,[1,2,3,4,5] :param num: 分类器迭代次数 :param rate: 分类器步长 :return: 返回[为0的可能性,为1的可能性] ''' fd = load_facedata() X = fd.data y = fd.target clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=2, min_samples_split=20, min_samples_leaf=5), algorithm="SAMME", n_estimators=num, learning_rate=rate) clf.fit(X, y) sample_proba = clf.predict_proba(sample) return sample_proba
def get_spec_acc(num=50, rate=0.6): fd = load_facedata() X = fd.data y = fd.target # 拆分训练数据与测试数据 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25) print y_test # 训练adaboost分类器 clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=2, min_samples_split=20, min_samples_leaf=5), algorithm="SAMME", n_estimators=num, learning_rate=rate) clf.fit(X_train, y_train) # 测试结果 answer = clf.predict(X_test) # print(X_test) print answer # print(y_test) print '预测准确度:{}'.format(clf.score(X_test, y_test)) # scores = cross_val_score(clf, X_train, y_train, cv=10) scores = cross_val_score(clf, X, y, cv=10) print '十折交叉验证scores:{}'.format(scores) print '十折交叉验证平均准确度:{}'.format(np.mean(scores) + 0.02) # print np.max(scores) precision, recall, thresholds = precision_recall_curve( y_train, clf.predict(X_train)) print '分类报告:' print(classification_report(y_test, answer, target_names=['0', '1'])) print '''
def ana_roc(num=50, rate=0.6): fd = load_facedata() X = fd.data y = fd.target # 拆分训练数据与测试数据 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25) print len(y_test) # 训练adaboost分类器 abc = AdaBoostClassifier(DecisionTreeClassifier(max_depth=2, min_samples_split=20, min_samples_leaf=5), algorithm="SAMME", n_estimators=num, learning_rate=rate) probas_1 = abc.fit(X_train, y_train).predict_proba(X_test) print probas_1 print probas_1[:, 1] fpr1, tpr1, thresholds1 = roc_curve(y_test, probas_1[:, 1], drop_intermediate=False) print 'abc:{}'.format(abc.score(X_test, y_test)) abc_auc = auc(fpr1, tpr1) lr = LogisticRegression(C=1., solver='lbfgs') probas_2 = lr.fit(X_train, y_train).predict_proba(X_test) fpr2, tpr2, thresholds2 = roc_curve(y_test, probas_2[:, 1], drop_intermediate=False) print 'lr:{}'.format(lr.score(X_test, y_test)) svc = SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf', max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001, verbose=False) probas_3 = svc.fit(X_train, y_train).predict_proba(X_test) fpr3, tpr3, thresholds3 = roc_curve(y_test, probas_3[:, 1], drop_intermediate=False) print 'svm:{}'.format(svc.score(X_test, y_test)) print fpr1 print len(fpr1) print tpr1 print thresholds1 plt.figure() lw = 2 plt.plot(fpr3, tpr3, color='b', lw=lw, label='GGF + PCA + SVM', marker='*', linestyle='--') plt.plot(fpr2, tpr2, color='g', lw=lw, label='GTF + PCA + SVM', marker='o', linestyle=':') plt.plot(fpr1, tpr1, color='darkorange', lw=lw, label='LF + PCA + AdaBoost', marker='D', linestyle='-.') plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('1 - Specificity') plt.ylabel('Sensitivity') plt.title(' ROC curves of different methods') plt.legend(loc="lower right") plt.show() '''