def classify(classify_class1=True): if not os.path.exists(dp.bow_log_dataset_dir): log_dataset = dp.load_file_save_dataset() else: log_dataset = dp.load_log_dataset(dp.bow_log_dataset_dir) if classify_class1: test_features = log_dataset.test_features test_class_labels = log_dataset.test_class_labels1 train_features = log_dataset.train_features train_class_labels = log_dataset.train_class_labels1 label_map = log_dataset.class_label1_map else: test_features = log_dataset.test_features test_class_labels = log_dataset.test_class_labels2 train_features = log_dataset.train_features train_class_labels = log_dataset.train_class_labels2 label_map = log_dataset.class_label2_map acc = 0 for i in range(log_dataset.test_num): class_label = knn.classify(test_features[i], train_features, train_class_labels, 3) actual_label = test_class_labels[i] if class_label == actual_label: acc += 1 print('正确', class_label) else: print('错误, 预测类别:%s, 正确类别:%s' % (label_map[class_label], label_map[actual_label])) print('正确率:%.2f%%' % (100.0 * acc / log_dataset.test_num))
def classify(classify_class1=True): if not os.path.exists(dp.bow_log_dataset_dir): print('数据预处理并保存中') log_dataset = dp.load_file_save_dataset() else: log_dataset = dp.load_log_dataset(dp.bow_log_dataset_dir) if classify_class1: test_features = log_dataset.test_features test_class_labels = log_dataset.test_class_labels1 train_features = log_dataset.train_features train_class_labels = log_dataset.train_class_labels1 label_map = log_dataset.class_label1_map class_num = log_dataset.class1_num else: test_features = log_dataset.test_features test_class_labels = log_dataset.test_class_labels2 train_features = log_dataset.train_features train_class_labels = log_dataset.train_class_labels2 label_map = log_dataset.class_label2_map class_num = log_dataset.class2_num train_start_time = time.perf_counter() softmax_clf = softmax.Softmax(train_features, train_class_labels, class_num, alpha=0.01, iterations=50) softmax_clf.train() train_end_time = time.perf_counter() predict_start_time = time.perf_counter() predict_class_labels = softmax_clf.predict(test_features) acc = 0 for i in range(len(predict_class_labels)): class_label = predict_class_labels[i] actual_label = test_class_labels[i] if class_label == actual_label: acc += 1 print('正确', label_map[class_label]) else: print('错误, 预测类别:%s, 正确类别:%s' % (label_map[class_label], label_map[actual_label])) print('\n正确率:%.2f%%' % (100.0 * acc / log_dataset.test_num)) predict_end_time = time.perf_counter() print('Softmax-训练阶段运行时间:%s秒' % (train_end_time - train_start_time)) print('Softmax-预测阶段运行时间:%s秒' % (predict_end_time - predict_start_time))
def classify(classify_class1=True): if not os.path.exists(dp.bow_log_dataset_dir): print('数据预处理并保存中') log_dataset = dp.load_file_save_dataset() else: log_dataset = dp.load_log_dataset(dp.bow_log_dataset_dir) test_features = log_dataset.test_features test_num = log_dataset.test_num if classify_class1: train_class_labels = log_dataset.train_class_labels1 test_class_labels = log_dataset.test_class_labels1 class_num = log_dataset.class1_num label_map = log_dataset.class_label1_map else: train_class_labels = log_dataset.train_class_labels2 test_class_labels = log_dataset.test_class_labels2 class_num = log_dataset.class2_num label_map = log_dataset.class_label2_map train_start_time = time.perf_counter() p_words, p_class = nb.train_nb(log_dataset.train_features, train_class_labels, class_num) train_end_time = time.perf_counter() predict_start_time = time.perf_counter() acc = 0 for i in range(test_num): class_label = nb.nb_classify(p_words, p_class, test_features[i], class_num) if class_label == test_class_labels[i]: acc += 1 print('正确', label_map[class_label]) else: print('错误, 预测类别:%s, 正确类别:%s' % (label_map[class_label], label_map[test_class_labels[i]])) print('\n正确率:%.2f%%' % (100.0 * acc / test_num)) predict_end_time = time.perf_counter() print('Naive Bayes-训练阶段运行时间:%s秒' % (train_end_time - train_start_time)) print('Naive Bayes-预测阶段运行时间:%s秒' % (predict_end_time - predict_start_time))
def classify(classify_class1=True): if not os.path.exists(dp.bow_log_dataset_dir): print('数据预处理并保存中') log_dataset = dp.load_file_save_dataset() else: log_dataset = dp.load_log_dataset(dp.bow_log_dataset_dir) if classify_class1: test_features = log_dataset.test_features test_class_labels = log_dataset.test_class_labels1 train_features = log_dataset.train_features train_class_labels = log_dataset.train_class_labels1 label_map = log_dataset.class_label1_map else: test_features = log_dataset.test_features test_class_labels = log_dataset.test_class_labels2 train_features = log_dataset.train_features train_class_labels = log_dataset.train_class_labels2 label_map = log_dataset.class_label2_map train_start_time = time.perf_counter() clf = MLPClassifier(hidden_layer_sizes=(100,), solver='adam', activation='relu', random_state=1, verbose=True) clf.fit(train_features, train_class_labels) train_end_time = time.perf_counter() predict_start_time = time.perf_counter() predict_class_labels = clf.predict(test_features) acc = 0 for i in range(len(predict_class_labels)): class_label = predict_class_labels[i] actual_label = test_class_labels[i] if class_label == actual_label: acc += 1 print('正确', label_map[class_label]) else: print('错误, 预测类别:%s, 正确类别:%s' % (label_map[class_label], label_map[actual_label])) print('\n正确率:%.2f%%' % (100.0 * acc / log_dataset.test_num)) predict_end_time = time.perf_counter() print('Neural Network-训练阶段运行时间:%s秒' % (train_end_time - train_start_time)) print('Neural Network-预测阶段运行时间:%s秒' % (predict_end_time - predict_start_time))
def classify(classify_class1=True): if not os.path.exists(dp.bow_log_dataset_dir): print('数据预处理并保存中') log_dataset = dp.load_file_save_dataset() else: log_dataset = dp.load_log_dataset(dp.bow_log_dataset_dir) if classify_class1: test_features = log_dataset.test_features test_class_labels = log_dataset.test_class_labels1 train_features = log_dataset.train_features train_class_labels = log_dataset.train_class_labels1 label_map = log_dataset.class_label1_map else: test_features = log_dataset.test_features test_class_labels = log_dataset.test_class_labels2 train_features = log_dataset.train_features train_class_labels = log_dataset.train_class_labels2 label_map = log_dataset.class_label2_map train_start_time = time.perf_counter() knn_classifier = KNeighborsClassifier(n_neighbors=3) knn_classifier.fit(train_features, train_class_labels) train_end_time = time.perf_counter() predict_start_time = time.perf_counter() acc = 0 for i in range(log_dataset.test_num): class_label = knn_classifier.predict([test_features[i]])[0] actual_label = test_class_labels[i] if class_label == actual_label: acc += 1 print('正确', label_map[class_label]) else: print('错误, 预测类别:%s, 正确类别:%s' % (label_map[class_label], label_map[actual_label])) print('\n正确率:%.2f%%' % (100.0 * acc / log_dataset.test_num)) predict_end_time = time.perf_counter() print('KNN-sklearn-训练阶段运行时间:%s秒' % (train_end_time - train_start_time)) print('KNN-sklearn-预测阶段运行时间:%s秒' % (predict_end_time - predict_start_time))
def classify(classify_class1=True): if not os.path.exists(dp.bow_log_dataset_dir): print('数据预处理并保存中') log_dataset = dp.load_file_save_dataset() else: log_dataset = dp.load_log_dataset(dp.bow_log_dataset_dir) if classify_class1: train_class_labels = log_dataset.train_class_labels1 label_map = log_dataset.class_label1_map test_class_labels = log_dataset.test_class_labels1 else: train_class_labels = log_dataset.train_class_labels2 label_map = log_dataset.class_label2_map test_class_labels = log_dataset.test_class_labels2 d_tree = tree.DecisionTreeClassifier() build_tree_start_time = time.perf_counter() d_tree.fit(log_dataset.train_features, train_class_labels) build_tree_end_time = time.perf_counter() print(tree.export_text(d_tree, feature_names=log_dataset.feature_labels)) predict_start_time = time.perf_counter() acc = 0 for i in range(log_dataset.test_num): class_label = d_tree.predict([log_dataset.test_features[i]])[0] actual_label = test_class_labels[i] if class_label == actual_label: acc += 1 print('正确', label_map[class_label]) else: print('错误, 预测类别:%s, 正确类别:%s' % (label_map[class_label], label_map[actual_label])) print('\n正确率:%.2f%%' % (100.0 * acc / log_dataset.test_num)) predict_end_time = time.perf_counter() print('决策树-sklearn-构建决策树运行时间:%s秒' % (build_tree_end_time - build_tree_start_time)) print('决策树-sklearn-预测阶段运行时间:%s秒' % (predict_end_time - predict_start_time))
def classify(): log_dataset = dp.load_log_dataset(dp.bow_log_dataset_dir) train_data = log_dataset.get_combined_train() d_tree = id3.create_tree(train_data, log_dataset.feature_labels) print(d_tree)