def preprocess(corpus_folder, mode, settings, out_file, verbose_level):
    preprocessor.format_corpus(corpus_folder)
    preprocessor.clean_directory(corpus_folder + "_partitioned")

    filter_words = True
    if settings['filter_words'] == 0:
        filter_words = False

    preprocessor.split_files(settings['num_lines_split'],
                             settings['sliding_window_size'], filter_words,
                             corpus_folder, corpus_folder + "_partitioned")

    all_files = load_files(corpus_folder + "_partitioned")

    if verbose_level > 1:
        print(
            "mode : {} filter: {} window size: {} num_lines_split: {}".format(
                mode, filter_words, settings['sliding_window_size'],
                settings['num_lines_split']))

    if mode == SVM:
        run_svm(all_files, settings['svm']['num_runs'], out_file,
                verbose_level)
    elif mode == KMEANS:
        param_dict = {}
        param_dict['n_init'] = settings['kmeans']['n_init']
        param_dict['max_iter'] = settings['kmeans']['max_iter']
        param_dict['tol'] = settings['kmeans']['tol']
        param_dict['num_runs'] = settings['kmeans']['num_runs']
        param_dict['k'] = settings['kmeans']['k']

        run_cluster(all_files, param_dict, out_file, verbose_level)
Exemple #2
0
def main(args):
    '''
    flowers recognition gogogo!
    '''
    if args.method == 'vgg':
        print('Using vgg network for flowers recognition')
        vgg.run_vgg(args.lr, args.epochs, args.batch_size, args.reg)

    if args.method == 'fc':
        print('Using fully connected network for flowers recognition')
        fc.run_fc(args.lr, args.epochs, args.batch_size, args.reg)

    if args.method == 'resnet34':
        print('Using deep residual network(34-layers) for flowers recognition')
        resnet.run_resnet(args.lr, args.epochs, args.batch_size, args.reg)

    if args.method == 'resnet50':
        print(
            'Using deep residual network(50-layers) pretrained for flowers recognition'
        )
        res.run_resnet50(args.batch_size, args.epochs, args.lr)

    if args.method == 'svm':
        print('Using Support Vector Machine for flowers recognition')
        svm.run_svm()

    if args.method == 'knn':
        print('Using K nearest neighbors for flowers recognition')
        knn.run_knn()
Exemple #3
0
def main():
    model = LogisticRegressionMulticlass.LogisticRegression()
    model.start_logistic_regression(learning_rate=0.7,
                                    num_epoch=50,
                                    theta=1,
                                    show_graph=False)

    svm.run_svm(kernal=svm.linear, train_size=20000)
    RandomForest.run_random_forest()

    nn = NeuralNetwork.NeuralNetwork()
    nn.start_neural_network(num_epoch=100,
                            learning_rate=0.015,
                            show_graph=False)

    combine_model.combine_model(combine_model.MNIST)
    combine_model.combine_model(combine_model.USPS)
Exemple #4
0
def cross_validate(train_inputs, train_labels, identity):

    kfold = cross_validation.StratifiedKFold(
        identity, n_folds=3, shuffle=True, random_state=5)
    errors = []
    for train_i, valid_i in kfold:
        train_x = train_inputs[train_i]
        valid_x = train_inputs[valid_i]
        train_y = train_labels[train_i]
        valid_y = train_labels[valid_i]

        error = svm.run_svm(train_x, train_y, valid_x, valid_y)
        errors.append(error)
    return errors
def test_images(labels, images, classifier):
    global label_dict
    sum = 0

    for idx, image in enumerate(images):
        img = cv2.imread(image)
        correct_label = labels[idx]
        rectified_img, boxes = get_candidates(img)

        # Draw boxes and labels around ROI
        img_clone = rectified_img.copy()
        answers = []
        for box in boxes:
            (x, y, w, h) = box
            cv2.rectangle(img_clone, (x, y), (x + w, y + h), (0, 255, 0), 1)

            # Run classifier
            h = hog(rectified_img, box)
            output = 0
            if classifier == "svm-linear" or classifier == "svm-rbf":
                output = int(run_svm(h)[1][0][0])
            elif classifier == "rf":
                output = int(run_rf(h)[1][0][0])
            elif classifier == "mlp":
                output = int(run_mlp(h))
            else:
                ValueError("Wrong classifier")
                exit(1)
            output_str = label_dict[output]
            answers.append(output)
            cv2.putText(img_clone, output_str, (x, y - 3), 3, 1, (0, 255, 0),
                        2, cv2.LINE_AA)

        Image.fromarray(img_clone).show()

        if correct_label in answers:
            sum += 1

        # Reverse Rectification with labels
        # img_clone = reverse_rectification(img_clone)
        # Image.fromarray(img_clone).show()

    # Evaluation
    print("Test Accuracy: {:6}".format(sum / len(labels)))
Exemple #6
0
def test_sliding_window():
  # list of testing set accuracies
  test_error_list = []
  overlap_list = []

  overlap = 20
  while overlap >= 0: 
    print("overlap num : {}".format(overlap))
    preprocessor.format_corpus("sermons")
    preprocessor.clean_directory("sermons_partitioned")
    preprocessor.split_files(38, overlap, True, "sermons", "sermons_partitioned")

    all_files = load_files("./sermons_partitioned/")
    test_error_list.append(100 - run_svm(all_files, 4, None, 3))
    overlap_list.append(overlap)
    overlap -= 2

  print(test_error_list)
  plot_sliding_window(test_error_list, overlap_list)
Exemple #7
0
def test_split_num():

  # list of testing set accuracies
  test_error_list = []
  split_num_list = []

  split_num = 38
  while split_num >= 4: 
    print("split num : {}".format(split_num))
    preprocessor.format_corpus("sermons")
    preprocessor.clean_directory("sermons_partitioned")
    preprocessor.split_files(split_num, 0, True, "sermons", "sermons_partitioned")

    all_files = load_files("./sermons_partitioned/")
    test_error_list.append(100 - run_svm(all_files, 4, None, 3))
    split_num_list.append(split_num)
    split_num -= 2
  
  print(test_error_list)
  plot_split_num(test_error_list, split_num_list)
Exemple #8
0
    print('#   Classifying Online Review Sentiment with Machine Learning   #')
    print('#                                                               #')
    print('#################################################################')
    print()

    dataset = SentimentCorpus()
    nb = MultinomialNaiveBayes()
    
    params = nb.train(dataset.train_X, dataset.train_y)
    
    predict_train = nb.test(dataset.train_X, params)
    eval_train = nb.evaluate(predict_train, dataset.train_y)
    
    predict_test = nb.test(dataset.test_X, params)
    eval_test = nb.evaluate(predict_test, dataset.test_y)
    print("\n=======================================================\n")
    print("+++ Naive Bayes +++")
    print  ("Accuracy on training data = %f \n Accuracy on testing data = %f" % (eval_train, eval_test))
    print("Confusion Matrix:")
    print(confusion_matrix(dataset.test_y,predict_test))
    print(classification_report(dataset.test_y,predict_test))
    print("=======================================================\n")
    print("+++ Support Vector Machine +++")
    svm.run_svm(dataset.train_X, dataset.train_y, dataset.test_X, dataset.test_y)
    print("=======================================================\n")
    print("+++ Neural Network +++")
    nn.run_nn(dataset.train_X, dataset.train_y, dataset.test_X, dataset.test_y)
    print("=======================================================")
     

Exemple #9
0
    #######################

    if chosen_classifier == "knn":
        # Enable to run knn classifier
        print("Running knn classifier")
        # knn.run_knn(train_images, train_labels, valid_images, valid_labels)
        # knn.run_knn(lbp_train_images, train_labels, lbp_val_images, valid_labels)
        # knn.run_knn(exp_train_images, exp_train_labels, exp_val_images, exp_val_labels)
        knn.run_knn(gc_train, exp_train_labels, gc_val, exp_val_labels)

    elif chosen_classifier == "svm":
        # svm classifier
        # print("Running svm classifier")
        # svm.run_svm(exp_train_images, exp_train_labels, exp_val_images, exp_val_labels)
        print("Running svm classifier on gamma corrected images")
        svm.run_svm(gc_train, exp_train_labels, gc_val, exp_val_labels)

    elif chosen_classifier == "mog":
        # mog classifier
        print("Running mog classifier")
        mog.run_mog(gc_train, exp_train_labels, gc_val, exp_val_labels)

    elif chosen_classifier == "dt":
        print("Running decision tree classifier")
        dt.decision_tree(gc_train, exp_train_labels, gc_val, exp_val_labels)

    elif chosen_classifier == "ensemble":
        print("Running ensemble method")
        ensemble.run_ensemble(exp_train_images, exp_train_labels, exp_val_images, exp_val_labels)

    else:
Exemple #10
0
if __name__ == '__main__':
    x_train, x_test, y_train, y_test = get_data(True)

    print(
        "\n-------------------------------------\nAccuracies with top 5 features:\n-------------------------------------"
    )

    run_decision_tree(x_train, x_test, y_train, y_test)
    run_k_nearest_neighbour(x_train, x_test, y_train, y_test)
    run_logistic_regression(x_train, x_test, y_train, y_test)
    run_naive_bayes(x_train, x_test, y_train, y_test)
    run_neural_network(x_train, x_test, y_train, y_test)
    run_perceptron(x_train, x_test, y_train, y_test)
    run_random_forest(x_train, x_test, y_train, y_test)
    run_svm(x_train, x_test, y_train, y_test)
    run_xg_boost(x_train, x_test, y_train, y_test)

    print(
        "\n-------------------------------------\nAccuracy with Voting in top 5 features:\n-------------------------------------"
    )
    run_voting(x_train, x_test, y_train, y_test)

    x_train, x_test, y_train, y_test = get_data()
    print(
        "\n-------------------------------------\nAccuracies with all 22 features:\n-------------------------------------"
    )

    run_decision_tree(x_train, x_test, y_train, y_test)
    run_k_nearest_neighbour(x_train, x_test, y_train, y_test)
    run_logistic_regression(x_train, x_test, y_train, y_test)
#logging.info('This is an info log')
#logging.warning('This is a warning log')
#logging.error('This is an error log')

#*******************************************************************************
# DATA EXTRACTION
TRAIN_DATA = data_extractor.get_data(defines.DATA_TRAIN_CSV_FILE)
TEST_DATA = data_extractor.get_data(defines.DATA_TEST_CSV_FILE)

#*******************************************************************************
# STOP WORDS FILTER
#logging.info('Prepare Data')
#nb_lib.nb_lib_prepare(TRAIN_DATA)

#*******************************************************************************
# FEATURE SELECTION
logging.info('Feature selection')
#feature_select.get_selected_features(TRAIN_DATA)
#*******************************************************************************

logging.info('SVM')

svm.run_svm(TRAIN_DATA, TEST_DATA)
logging.info ('Cross Validation')

#CrossVal.run_crossval(TRAIN_DATA)

print ("done!")
sys.exit()
def execute(topic1, topic2, test, dump_files):
    if dump_files == "True":
        print_bold("\n" + "Downloading the datasets ..." + "\n")
        create_cleaned_files(topic1, topic2, test)

    print_bold("Dumps TFIDF features ..." + "\n")

    # category is used to specify the unique Id of the dumped model
    category = topic1 + "-" + topic2
    dump_tfidf(category)

    print("=========================================================")
    print_bold("Start Running bayes model to establish a baseline")
    print("=========================================================")

    print_bold("\n" + "Run Bayes model ..." + "\n")

    pred_train_bayes, pred_test_bayes = run_bayes(category)

    print("=========================================================")
    print_bold("Improvement of the baseline")
    print("=========================================================")

    print_bold("Run Cnn model ..." + "\n")

    pred_train_cnn, pred_test_cnn = run_cnn()

    print(
        "--------------------------------------------------------------------------"
    )
    print_bold("Run Fasttext model ..." + "\n")
    pred_train_fasttext, pred_test_fasttext = run_fasttext()

    print(
        "--------------------------------------------------------------------------"
    )

    print_bold("Run SVM model ..." + "\n")

    pred_train_svm, pred_test_svm, y_train = run_svm(category)

    print(
        "--------------------------------------------------------------------------"
    )

    print_bold("Run Logistic Regression model ..." + "\n")

    pred_train_logreg, pred_test_logreg, y_test = run_logreg(category)

    print(
        "--------------------------------------------------------------------------"
    )

    print_bold("Starting Ensemble Method")

    # using train+val for training the ensemble (training on more dataset == stronger results)
    train = np.column_stack((pred_train_svm, pred_train_logreg, pred_train_cnn,
                             pred_train_fasttext))
    test = np.column_stack(
        (pred_test_svm, pred_test_logreg, pred_test_cnn, pred_test_fasttext))
    model = xgb().fit(train, y_train)

    print(
        "--------------------------------------------------------------------------"
    )
    print_bold("Final results on the test set : ")
    print(classification_report(y_test, model.predict(test)))