Exemple #1
0
def randomForestTest(feature_len, all_lines, all_features, all_labels):
    best_trees_num = 0
    temp = 0
    counts = {}
    for i in range(10):
        rate = 0
        print("Test %d:" % (i + 1))
        train_features = all_features[0:int(0.8 * len(all_features))]
        train_labels = all_labels[0:int(0.8 * len(all_features))]
        test_features = all_features[int(0.8 * len(all_features)):]
        test_labels = all_labels[int(0.8 * len(all_features)):]
        for trees_num in range(25, 36):
            rate = 0
            if trees_num not in counts:
                counts[trees_num] = 0
            print("trees_num:%d " % (trees_num), end=" ")
            new_forest = RandomForest(trees_num)
            new_forest.buildTrees(train_features, train_labels,
                                  len(train_features[0]), 3, 6)
            length = len(test_labels)
            for j in range(0, length):
                res = new_forest.predictForest(test_features[j])
                if res == test_labels[j]:
                    rate += 1
            print(rate / length)
            counts[trees_num] += rate / length
            if temp < counts[trees_num]:
                temp = counts[trees_num]
                best_trees_num = trees_num
        all_features, all_labels = now_provider.getFeatureAndLabel(
            all_lines, feature_len)
    print("Best trees_num:%d %f" %
          (best_trees_num, counts[best_trees_num] / 10))
    for x in counts:
        print(x, counts[x])
Exemple #2
0
def compareTest(feature_len, all_lines, all_features, all_labels):
    count = {}
    for i in range(10):
        print("\nTest %d" % (i + 1))
        train_features = all_features[0:int(0.8 * len(all_features))]
        train_labels = all_labels[0:int(0.8 * len(all_features))]
        test_features = all_features[int(0.8 * len(all_features)):]
        test_labels = all_labels[int(0.8 * len(all_features)):]
        length = len(test_labels)

        rate = 0
        print("NaiveBayes : ", end="")
        new_bayes = NaiveBayes(train_features, train_labels, feature_len)
        new_bayes.train()
        for j in range(0, length):
            res = new_bayes.predict(test_features[j])
            if res == test_labels[j]:
                rate += 1
        print(rate / length)
        if "NaiveBayes" not in count:
            count["NaiveBayes"] = rate / length
        else:
            count["NaiveBayes"] += rate / length

        rate = 0
        print("KNN : ", end="")
        for j in range(0, length):
            res = Knn(train_features, train_labels, test_features[j], 3)
            if res == test_labels[j]:
                rate += 1
        print(rate / length)
        if "KNN" not in count:
            count["KNN"] = rate / length
        else:
            count["KNN"] += rate / length

        rate = 0
        print("Logistic : ", end="")
        new_logistic = Logistic(train_features,
                                train_labels,
                                feature_len,
                                alpha=5,
                                tol=0.000001)
        new_logistic.train()
        for j in range(0, length):
            res = new_logistic.predict(test_features[j])
            if res == test_labels[j]:
                rate += 1
        print(rate / length)
        if "Logistic" not in count:
            count["Logistic"] = rate / length
        else:
            count["Logistic"] += rate / length

        rate = 0
        print("NeuralNetwork : ", end="")
        new_NN = NeuralNetwork(train_features,
                               train_labels,
                               feature_len,
                               hidden_num=32,
                               learn_rate=100)
        new_NN.train()
        for j in range(0, length):
            res = new_NN.predict(test_features[j])
            if res == test_labels[j]:
                rate += 1
        print(rate / length)
        if "NeuralNetwork" not in count:
            count["NeuralNetwork"] = rate / length
        else:
            count["NeuralNetwork"] += rate / length

        rate = 0
        print("Tree : ", end="")
        new_tree = Tree(train_features, train_labels, len(train_features[0]),
                        3, 8)
        new_tree.train()
        for j in range(0, length):
            res = new_tree.predictTree(test_features[j])
            if res == test_labels[j]:
                rate += 1
        print(rate / length)
        if "Tree" not in count:
            count["Tree"] = rate / length
        else:
            count["Tree"] += rate / length

        rate = 0
        print("AdaBoost : ", end="")
        new_boost = AdaBoost(train_features,
                             train_labels,
                             len(train_features[0]),
                             28,
                             mode=2)
        new_boost.train()
        for j in range(0, length):
            res = new_boost.predict(test_features[j])
            if res == test_labels[j]:
                rate += 1
        print(rate / length)
        if "AdaBoost" not in count:
            count["AdaBoost"] = rate / length
        else:
            count["AdaBoost"] += rate / length

        rate = 0
        print("RandomForest : ", end="")
        new_forest = RandomForest(30)
        new_forest.buildTrees(train_features, train_labels,
                              len(train_features[0]), 3, 6)
        for j in range(0, length):
            res = new_forest.predictForest(test_features[j])
            if res == test_labels[j]:
                rate += 1
        print(rate / length)
        if "RandomForest" not in count:
            count["RandomForest"] = rate / length
        else:
            count["RandomForest"] += rate / length

        rate = 0
        print("SVM : ", end="")
        new_svm = SVM(train_features,
                      train_labels,
                      C=43,
                      function='RBF',
                      d=0.53)
        new_svm.train()
        for j in range(0, length):
            res = new_svm.predict(test_features[j])
            if res == test_labels[j]:
                rate += 1
        print(rate / length)
        if "SVM" not in count:
            count["SVM"] = rate / length
        else:
            count["SVM"] += rate / length

        all_features, all_labels = now_provider.getFeatureAndLabel(
            all_lines, feature_len)

    print("\nAverage:")
    for x in count:
        print(x, end=": ")
        print(count[x] / 10)