def classify():
    if not os.path.exists(dp.android_dataset_dir):
        print('数据预处理并保存中')
        android_dataset = dp.load_file_to_dataset()
        dp.save_android_dataset(android_dataset, dp.android_dataset_dir)
    else:
        android_dataset = dp.load_android_dataset(dp.android_dataset_dir)

    train_start_time = time.perf_counter()
    svm_clf = svm.SVC()
    svm_clf.fit(android_dataset.train_features,
                android_dataset.train_class_labels)
    train_end_time = time.perf_counter()

    test_features = android_dataset.test_features
    test_class_labels = android_dataset.test_class_labels
    test_num = android_dataset.test_num

    predict_start_time = time.perf_counter()
    acc = 0
    for i in range(test_num):
        class_label = svm_clf.predict([test_features[i]])[0]
        if class_label == test_class_labels[i]:
            acc += 1
            print('正确', class_label)
        else:
            print('错误, 预测类别:%d, 正确类别:%d' % (class_label, test_class_labels[i]))

    print('\n正确率:%.2f%%' % (100.0 * acc / test_num))
    predict_end_time = time.perf_counter()

    print('SVM-sklearn-训练阶段运行时间:%s秒' % (train_end_time - train_start_time))
    print('SVM-sklearn-预测阶段运行时间:%s秒' % (predict_end_time - predict_start_time))
def classify():
    if not os.path.exists(dp.android_dataset_dir):
        print('数据预处理并保存中')
        android_dataset = dp.load_file_to_dataset()
        dp.save_android_dataset(android_dataset, dp.android_dataset_dir)
    else:
        android_dataset = dp.load_android_dataset(dp.android_dataset_dir)

    test_features = android_dataset.test_features
    test_class_labels = android_dataset.test_class_labels
    test_num = android_dataset.test_num
    train_features = android_dataset.train_features
    train_class_labels = android_dataset.train_class_labels

    predict_start_time = time.perf_counter()
    acc = 0
    for i in range(test_num):
        class_label = knn.classify(test_features[i], train_features,
                                   train_class_labels, 7)
        if class_label == test_class_labels[i]:
            acc += 1
            print('正确', class_label)
        else:
            print('错误, 预测类别:%d, 正确类别:%d' % (class_label, test_class_labels[i]))

    print('\n正确率:%.2f%%' % (100.0 * acc / test_num))
    predict_end_time = time.perf_counter()
    print('KNN-预测阶段运行时间:%s秒' % (predict_end_time - predict_start_time))
Beispiel #3
0
def classify():
    if not os.path.exists(dp.android_dataset_dir):
        print('数据预处理并保存中')
        android_dataset = dp.load_file_to_dataset()
        dp.save_android_dataset(android_dataset, dp.android_dataset_dir)
    else:
        android_dataset = dp.load_android_dataset(dp.android_dataset_dir)

    train_start_time = time.perf_counter()
    p_words, p_class = nb.train_nb(android_dataset.train_features,
                                   android_dataset.train_class_labels,
                                   android_dataset.class_num)
    train_end_time = time.perf_counter()

    test_features = android_dataset.test_features
    test_class_labels = android_dataset.test_class_labels
    test_num = android_dataset.test_num
    class_num = android_dataset.class_num

    predict_start_time = time.perf_counter()
    acc = 0
    for i in range(test_num):
        class_label = nb.nb_classify(p_words, p_class, test_features[i],
                                     class_num)
        if class_label == test_class_labels[i]:
            acc += 1
            print('正确', class_label)
        else:
            print('错误, 预测类别:%d, 正确类别:%d' % (class_label, test_class_labels[i]))

    print('\n正确率:%.2f%%' % (100.0 * acc / test_num))
    predict_end_time = time.perf_counter()

    print('naive-bayes-训练阶段运行时间:%s秒' % (train_end_time - train_start_time))
    print('naive-bayes-预测阶段运行时间:%s秒' % (predict_end_time - predict_start_time))
Beispiel #4
0
def classify():
    build_tree_start_time = 0
    build_tree_end_time = 0
    if os.path.exists(tree_file_dir):
        print('决策树已保存')
        d_tree, feature_labels, train_data_set, test_features_set, test_class_labels = id3.load_tree(
            tree_file_dir)
    else:
        print('决策树未保存,重新建树中')
        if not os.path.exists(dp.android_dataset_dir):
            print('数据预处理并保存中')
            android_dataset = dp.load_file_save_dataset()
        else:
            android_dataset = dp.load_android_dataset(dp.android_dataset_dir)
        train_data_set = android_dataset.get_combined_train()
        feature_labels = android_dataset.feature_labels
        test_features_set = android_dataset.test_features
        test_class_labels = android_dataset.test_class_labels
        # print(feature_labels)

        build_tree_start_time = time.perf_counter()
        d_tree = id3.create_tree(train_data_set, feature_labels)
        build_tree_end_time = time.perf_counter()
        id3.store_tree(tree_file_dir, d_tree, feature_labels, train_data_set,
                       test_features_set, test_class_labels)

    print(d_tree)

    predict_start_time = time.perf_counter()
    acc = 0
    for i in range(len(test_features_set)):
        class_label = id3.classify(d_tree, feature_labels,
                                   test_features_set[i])
        if class_label == test_class_labels[i]:
            acc += 1
            print('正确', class_label)
        else:
            print('错误, 预测类别:%d, 正确类别:%d' % (class_label, test_class_labels[i]))

    print('\n正确率:%.2f%%' % (100.0 * acc / len(test_features_set)))
    predict_end_time = time.perf_counter()

    print('决策树-构建决策树运行时间:%s秒' % (build_tree_end_time - build_tree_start_time))
    print('决策树-预测阶段运行时间:%s秒' % (predict_end_time - predict_start_time))
def classify():
    if not os.path.exists(dp.android_dataset_dir):
        print('数据预处理并保存中')
        android_dataset = dp.load_file_to_dataset()
        dp.save_android_dataset(android_dataset, dp.android_dataset_dir)
    else:
        android_dataset = dp.load_android_dataset(dp.android_dataset_dir)

    test_features = android_dataset.test_features
    test_class_labels = android_dataset.test_class_labels
    test_num = android_dataset.test_num

    train_start_time = time.perf_counter()
    clf = MLPClassifier(hidden_layer_sizes=(100, ),
                        solver='adam',
                        activation='relu',
                        random_state=1,
                        verbose=True)
    clf.fit(android_dataset.train_features, android_dataset.train_class_labels)
    train_end_time = time.perf_counter()

    predict_start_time = time.perf_counter()
    predict_class_labels = clf.predict(test_features)
    acc = 0
    for i in range(len(predict_class_labels)):
        class_label = predict_class_labels[i]
        if class_label == test_class_labels[i]:
            acc += 1
            print('正确', class_label)
        else:
            print('错误, 预测类别:%d, 正确类别:%d' % (class_label, test_class_labels[i]))

    print('\n正确率:%.2f%%' % (100.0 * acc / test_num))
    predict_end_time = time.perf_counter()

    print('Neural Network-训练阶段运行时间:%s秒' % (train_end_time - train_start_time))
    print('Neural Network-预测阶段运行时间:%s秒' %
          (predict_end_time - predict_start_time))
def classify():
    if not os.path.exists(dp.android_dataset_dir):
        print('数据预处理并保存中')
        android_dataset = dp.load_file_to_dataset()
        dp.save_android_dataset(android_dataset, dp.android_dataset_dir)
    else:
        android_dataset = dp.load_android_dataset(dp.android_dataset_dir)

    train_start_time = time.perf_counter()
    data = np.array(android_dataset.train_features)
    label = np.array(class_labels_process(android_dataset.train_class_labels))
    smo = svm.PlattSMO(data, label, 1, 0.0001, 10000, name='rbf', theta=20)
    smo.smoP()
    train_end_time = time.perf_counter()

    test_features = android_dataset.test_features
    test_class_labels = android_dataset.test_class_labels
    test_num = android_dataset.test_num

    predict_start_time = time.perf_counter()
    predict_labels = smo.predict(test_features)

    acc = 0
    for i in range(len(predict_labels)):
        class_label = predict_labels[i]
        if class_label == test_class_labels[i]:
            acc += 1
            print('正确', class_label)
        else:
            print('错误, 预测类别:%d, 正确类别:%d' % (class_label, test_class_labels[i]))

    print('\n正确率:%.2f%%' % (100.0 * acc / test_num))
    predict_end_time = time.perf_counter()

    print('SVM-训练阶段运行时间:%s秒' % (train_end_time - train_start_time))
    print('SVM-预测阶段运行时间:%s秒' % (predict_end_time - predict_start_time))