Esempio n. 1
0
    def RecommendByET(train_data, train_data_y, test_data, test_data_y, recommendNum=5):
        """多标签分类  """

        clf = ExtraTreeClassifier()
        clf.fit(train_data, train_data_y)
        predictions = clf.predict_proba(test_data)
        """预测结果转化为data array"""
        predictions = DataProcessUtils.convertMultilabelProbaToDataArray(predictions)
        print(predictions)

        recommendList = DataProcessUtils.getListFromProbable(predictions, range(1, train_data_y.shape[1] + 1),
                                                             recommendNum)
        answerList = test_data_y
        print(predictions)
        print(test_data_y)
        print(recommendList)
        print(answerList)
        return [recommendList, answerList]
Esempio n. 2
0
#    data = ''
    with open(fname) as f:
        for s in f:
            tmp = map(int, s.split())
            labels.append(tmp[-1])
            res.append(tmp[:-1])
#            data += (str(tmp)[1:-1]).replace(',', '')+'\n'
#    with open('out.txt', 'w') as o:
#        o.write(str(data)[1:-1])
    return res, labels

X, Y = readData('german.data-numeric.txt')
Xt = X[:-200] ; Yt = Y[:-200]
XT = X[-200:] ; YT = Y[-200:]
print len(Xt)
clf = ExtraTreeClassifier(max_depth=None, random_state=0)
clf = clf.fit(Xt, Yt)

#proba = clf.predict_proba(XT)
#print len(proba)
#print proba

err = 0
for i, x in enumerate(XT):
    if clf.predict(x) != YT[i]: 
        prob = clf.predict_proba(x)
#        print prob
        err += 1

print err
Esempio n. 3
0
## randomized tree with default setting
clf_rnd_tree = ExtraTreeClassifier()
clf_rnd_tree.fit(rnd_training_X, rnd_training_y)
export_graphviz(clf_rnd_tree, out_file = 'default_rnd_tree.dot',
                feature_names = attribute_names,
                class_names = bi_class_target_attrs,
                filled = True, rounded = True,
                special_characters = True)
print(check_output('dot -Tpdf default_rnd_tree.dot -o default_rnd_tree.pdf', shell = True))
print("Accuracy = %s"%accuracy_score(rnd_test_y, clf_rnd_tree.predict(rnd_test_X)))
print("Precision = %s"%precision_score(rnd_test_y, clf_rnd_tree.predict(rnd_test_X)))
print("Recall = %s"%recall_score(rnd_test_y, clf_rnd_tree.predict(rnd_test_X)))
print("F = %s"%fbeta_score(rnd_test_y, clf_rnd_tree.predict(rnd_test_X), beta=1))
print("Confusion matrix = %s"%confusion_matrix(rnd_test_y, clf_rnd_tree.predict(rnd_test_X)))
fpr, tpr, thresholds = roc_curve(rnd_test_y, clf_rnd_tree.predict_proba(rnd_test_X)[:, 1])
axes_roc.plot(fpr, tpr, label = "Randomized tree-1")
axes_roc.set_title("ROC of CART and a randomized tree")
axes_roc.set_xlabel("FPR")
axes_roc.set_ylabel("TPR")
axes_roc.set_ylim(0, 1.1)
axes_roc.legend(loc = 'best', fontsize = 'medium')
roc_auc_scorer = get_scorer("roc_auc")
print("ROC AUC = %s"%roc_auc_scorer(clf_rnd_tree, rnd_test_X, rnd_test_y))

# randomized tree with max_depth = 4, min_samples_leaf = 5
clf_rnd_tree = ExtraTreeClassifier(max_depth = 4, min_samples_leaf = 5)
clf_rnd_tree.fit(rnd_training_X, rnd_training_y)
export_graphviz(clf_rnd_tree, out_file = 'rnd_tree.dot',
                feature_names = attribute_names,
                class_names = bi_class_target_attrs,