def randomForestTest(feature_len, all_lines, all_features, all_labels): best_trees_num = 0 temp = 0 counts = {} for i in range(10): rate = 0 print("Test %d:" % (i + 1)) train_features = all_features[0:int(0.8 * len(all_features))] train_labels = all_labels[0:int(0.8 * len(all_features))] test_features = all_features[int(0.8 * len(all_features)):] test_labels = all_labels[int(0.8 * len(all_features)):] for trees_num in range(25, 36): rate = 0 if trees_num not in counts: counts[trees_num] = 0 print("trees_num:%d " % (trees_num), end=" ") new_forest = RandomForest(trees_num) new_forest.buildTrees(train_features, train_labels, len(train_features[0]), 3, 6) length = len(test_labels) for j in range(0, length): res = new_forest.predictForest(test_features[j]) if res == test_labels[j]: rate += 1 print(rate / length) counts[trees_num] += rate / length if temp < counts[trees_num]: temp = counts[trees_num] best_trees_num = trees_num all_features, all_labels = now_provider.getFeatureAndLabel( all_lines, feature_len) print("Best trees_num:%d %f" % (best_trees_num, counts[best_trees_num] / 10)) for x in counts: print(x, counts[x])
def compareTest(feature_len, all_lines, all_features, all_labels): count = {} for i in range(10): print("\nTest %d" % (i + 1)) train_features = all_features[0:int(0.8 * len(all_features))] train_labels = all_labels[0:int(0.8 * len(all_features))] test_features = all_features[int(0.8 * len(all_features)):] test_labels = all_labels[int(0.8 * len(all_features)):] length = len(test_labels) rate = 0 print("NaiveBayes : ", end="") new_bayes = NaiveBayes(train_features, train_labels, feature_len) new_bayes.train() for j in range(0, length): res = new_bayes.predict(test_features[j]) if res == test_labels[j]: rate += 1 print(rate / length) if "NaiveBayes" not in count: count["NaiveBayes"] = rate / length else: count["NaiveBayes"] += rate / length rate = 0 print("KNN : ", end="") for j in range(0, length): res = Knn(train_features, train_labels, test_features[j], 3) if res == test_labels[j]: rate += 1 print(rate / length) if "KNN" not in count: count["KNN"] = rate / length else: count["KNN"] += rate / length rate = 0 print("Logistic : ", end="") new_logistic = Logistic(train_features, train_labels, feature_len, alpha=5, tol=0.000001) new_logistic.train() for j in range(0, length): res = new_logistic.predict(test_features[j]) if res == test_labels[j]: rate += 1 print(rate / length) if "Logistic" not in count: count["Logistic"] = rate / length else: count["Logistic"] += rate / length rate = 0 print("NeuralNetwork : ", end="") new_NN = NeuralNetwork(train_features, train_labels, feature_len, hidden_num=32, learn_rate=100) new_NN.train() for j in range(0, length): res = new_NN.predict(test_features[j]) if res == test_labels[j]: rate += 1 print(rate / length) if "NeuralNetwork" not in count: count["NeuralNetwork"] = rate / length else: count["NeuralNetwork"] += rate / length rate = 0 print("Tree : ", end="") new_tree = Tree(train_features, train_labels, len(train_features[0]), 3, 8) new_tree.train() for j in range(0, length): res = new_tree.predictTree(test_features[j]) if res == test_labels[j]: rate += 1 print(rate / length) if "Tree" not in count: count["Tree"] = rate / length else: count["Tree"] += rate / length rate = 0 print("AdaBoost : ", end="") new_boost = AdaBoost(train_features, train_labels, len(train_features[0]), 28, mode=2) new_boost.train() for j in range(0, length): res = new_boost.predict(test_features[j]) if res == test_labels[j]: rate += 1 print(rate / length) if "AdaBoost" not in count: count["AdaBoost"] = rate / length else: count["AdaBoost"] += rate / length rate = 0 print("RandomForest : ", end="") new_forest = RandomForest(30) new_forest.buildTrees(train_features, train_labels, len(train_features[0]), 3, 6) for j in range(0, length): res = new_forest.predictForest(test_features[j]) if res == test_labels[j]: rate += 1 print(rate / length) if "RandomForest" not in count: count["RandomForest"] = rate / length else: count["RandomForest"] += rate / length rate = 0 print("SVM : ", end="") new_svm = SVM(train_features, train_labels, C=43, function='RBF', d=0.53) new_svm.train() for j in range(0, length): res = new_svm.predict(test_features[j]) if res == test_labels[j]: rate += 1 print(rate / length) if "SVM" not in count: count["SVM"] = rate / length else: count["SVM"] += rate / length all_features, all_labels = now_provider.getFeatureAndLabel( all_lines, feature_len) print("\nAverage:") for x in count: print(x, end=": ") print(count[x] / 10)