def RecommendByET(train_data, train_data_y, test_data, test_data_y, recommendNum=5): """多标签分类 """ clf = ExtraTreeClassifier() clf.fit(train_data, train_data_y) predictions = clf.predict_proba(test_data) """预测结果转化为data array""" predictions = DataProcessUtils.convertMultilabelProbaToDataArray(predictions) print(predictions) recommendList = DataProcessUtils.getListFromProbable(predictions, range(1, train_data_y.shape[1] + 1), recommendNum) answerList = test_data_y print(predictions) print(test_data_y) print(recommendList) print(answerList) return [recommendList, answerList]
# data = '' with open(fname) as f: for s in f: tmp = map(int, s.split()) labels.append(tmp[-1]) res.append(tmp[:-1]) # data += (str(tmp)[1:-1]).replace(',', '')+'\n' # with open('out.txt', 'w') as o: # o.write(str(data)[1:-1]) return res, labels X, Y = readData('german.data-numeric.txt') Xt = X[:-200] ; Yt = Y[:-200] XT = X[-200:] ; YT = Y[-200:] print len(Xt) clf = ExtraTreeClassifier(max_depth=None, random_state=0) clf = clf.fit(Xt, Yt) #proba = clf.predict_proba(XT) #print len(proba) #print proba err = 0 for i, x in enumerate(XT): if clf.predict(x) != YT[i]: prob = clf.predict_proba(x) # print prob err += 1 print err
## randomized tree with default setting clf_rnd_tree = ExtraTreeClassifier() clf_rnd_tree.fit(rnd_training_X, rnd_training_y) export_graphviz(clf_rnd_tree, out_file = 'default_rnd_tree.dot', feature_names = attribute_names, class_names = bi_class_target_attrs, filled = True, rounded = True, special_characters = True) print(check_output('dot -Tpdf default_rnd_tree.dot -o default_rnd_tree.pdf', shell = True)) print("Accuracy = %s"%accuracy_score(rnd_test_y, clf_rnd_tree.predict(rnd_test_X))) print("Precision = %s"%precision_score(rnd_test_y, clf_rnd_tree.predict(rnd_test_X))) print("Recall = %s"%recall_score(rnd_test_y, clf_rnd_tree.predict(rnd_test_X))) print("F = %s"%fbeta_score(rnd_test_y, clf_rnd_tree.predict(rnd_test_X), beta=1)) print("Confusion matrix = %s"%confusion_matrix(rnd_test_y, clf_rnd_tree.predict(rnd_test_X))) fpr, tpr, thresholds = roc_curve(rnd_test_y, clf_rnd_tree.predict_proba(rnd_test_X)[:, 1]) axes_roc.plot(fpr, tpr, label = "Randomized tree-1") axes_roc.set_title("ROC of CART and a randomized tree") axes_roc.set_xlabel("FPR") axes_roc.set_ylabel("TPR") axes_roc.set_ylim(0, 1.1) axes_roc.legend(loc = 'best', fontsize = 'medium') roc_auc_scorer = get_scorer("roc_auc") print("ROC AUC = %s"%roc_auc_scorer(clf_rnd_tree, rnd_test_X, rnd_test_y)) # randomized tree with max_depth = 4, min_samples_leaf = 5 clf_rnd_tree = ExtraTreeClassifier(max_depth = 4, min_samples_leaf = 5) clf_rnd_tree.fit(rnd_training_X, rnd_training_y) export_graphviz(clf_rnd_tree, out_file = 'rnd_tree.dot', feature_names = attribute_names, class_names = bi_class_target_attrs,