コード例 #1
0
def tune_svm_using_10_fold():
    dh = DataHandler('data/train-set-feature-engineered.csv', 'prediction_label')
    headers, train_features, train_prediction_labels = dh.get_numeric_data_set()

    #train_features = dh.get_k_best_features(500, train_features, train_prediction_labels)
    data_sets = dh.get_cross_validation_data_sets(10, train_features, train_prediction_labels)

    accuracy = []
    for data_set_number in data_sets:
        data_set = data_sets.get(data_set_number)
        training_set = data_set[0]
        tuning_set = data_set[1]
        train_features = training_set["data_points"]
        train_prediction_labels = training_set["labels"]

        # Feature selection
        train_features, selected_features = dh.get_k_best_features(len(train_features[0]), train_features, train_prediction_labels)

        test_features = tuning_set["data_points"]
        test_prediction_labels = tuning_set["labels"]

        # Feature selection
        test_features = dh.get_new_feature_vec(test_features, selected_features)

        svm = Svm(train_features, train_prediction_labels, 200, 1, 2)
        svm.train()
        eval_metrics = EvaluationMetrics(svm, test_features, test_prediction_labels)
        eval = eval_metrics.evaluate()
        accuracy.append(eval['accuracy'])

    average_accuracy = sum(accuracy) / len(accuracy)
    print average_accuracy
コード例 #2
0
def evaluate_svm():
    dh = DataHandler('data/train-set-feature-engineered.csv', 'prediction_label')
    headers, train_features, train_prediction_labels = dh.get_numeric_data_set()

    # Feature selection
    train_features, selected_features = dh.get_k_best_features(len(train_features[0]), train_features, train_prediction_labels)

    svm = Svm(train_features, train_prediction_labels, 20, 0)
    svm.train()

    dh_test = DataHandler('data/test-set-feature-engineered.csv', 'prediction_label')
    headers, test_features, test_prediction_labels = dh_test.get_numeric_data_set()

    # Feature selection
    test_features = dh_test.get_new_feature_vec(test_features, selected_features)

    eval_metrics = EvaluationMetrics(svm, test_features, test_prediction_labels)
    eval = eval_metrics.evaluate()
    eval_metrics.compute_and_plot_auc(eval['predicted'], test_prediction_labels)
    eval_metrics.compute_au_roc(eval['predicted'], test_prediction_labels)
コード例 #3
0
def main(argv):
    train_x = read_from_file(sys.argv[1])
    train_x = one_hot_encode(train_x).astype(float)

    train_y = read_from_file(sys.argv[2])
    train_y = train_y.astype(float).astype(int)
    num_of_labels = len(Counter(train_y).keys())
    # np.random.seed(5)
    # mapIndexPosition = list(zip(train_x, train_y))
    # np.random.shuffle(mapIndexPosition)
    # train_x, train_y = zip(*mapIndexPosition)
    # train_y = np.asarray(train_y)
    # train_x = np.asarray(train_x)

    ############## prediction:################
    test_x = read_from_file(sys.argv[3])
    test_x = one_hot_encode(test_x).astype(float)
    #
    # test_y = read_from_file("test_y.txt").astype(float).astype(int).tolist()

    # ###### cross validation #######
    # trains_x = [all_train_x[:657],all_train_x[657:1314],all_train_x[1314:1971],all_train_x[1971:2628],all_train_x[2628:]]
    # trains_y = [all_train_y[:657], all_train_y[657:1314], all_train_y[1314:1971], all_train_y[1971:2628],all_train_y[2628:]]

    # for K in range(5):
    #     test_x = trains_x[K]
    #     test_y = trains_y[K]
    #     train_x = []
    #     train_y = []
    #     for i in range(5):
    #         if i is not K:
    #             for example, lable in zip(trains_x[i],trains_y[i]):
    #                 train_x.append(example)
    #                 train_y.append(lable)
    #
    #     train_x = np.asarray(train_x)
    #     train_y = np.asarray(train_y)
    # d = {"I": 0, "M": 1, "F": 2}
    # temp = train_x
    # temp = scipy.stats.zscore(temp)
    train_x_Z_score, mean, std_dev = z_score_norm(train_x)
    train_x_min_max, min_train, max_train = min_max_norm(train_x)

    test_x_z_score = z_score_norm_by_mean_std(test_x, mean, std_dev)
    test_x_min_max = min_max_norm_by_min_max(test_x, min_train, max_train)

    # perceptron_z_score = Perceptron(train_x_Z_score, train_y, num_of_feature, num_of_labels)
    # svm_z_score = Svm(train_x_Z_score, train_y, num_of_feature, num_of_labels)
    # pa_z_score = Pa(train_x_Z_score, train_y, num_of_feature, num_of_labels)
    #
    # perceptron_min_max = Perceptron(train_x_min_max, train_y, num_of_feature, num_of_labels)
    # svm_min_max = Svm(train_x_min_max, train_y, num_of_feature, num_of_labels)
    # pa_min_max = Pa(train_x_min_max, train_y, num_of_feature, num_of_labels)
    ############# training:#################

    perceptron = Perceptron(train_x_min_max, train_y, num_of_labels)
    svm = Svm(train_x_min_max, train_y, num_of_labels)
    pa = Pa(train_x_Z_score, train_y, num_of_labels)
    perceptron.train()
    svm.train()
    pa.train()

    predict_pereceptron = []
    predict_svm = []
    predict_pa = []
    for test_min_max, test_z_score in zip(test_x_min_max, test_x_z_score):
        predict_pereceptron.append(perceptron.predict(test_min_max))
        predict_svm.append(svm.predict(test_min_max))
        predict_pa.append(pa.predict(test_z_score))
    # for test in test_x_z_score:
    #     predict_pa.append(pa.predict(test))

    print_predict(predict_pereceptron, predict_svm, predict_pa)