def run_forest(n_trees=400, n_features_per_tree=70, n_rows_power=0.5):
    start_time = time.time()
    forest = RandomForest(trainset, labels, n_trees, n_features_per_tree, n_rows_power)
    forest.train()
    score = []
    for test, (i, bm) in zip(testset, benchmarkset):
      result = forest.classify(test)
      result = result.most_common(1)[0][0]
      print('{:.1f}s -- No.{} should be: <{}> cal: {}'.format(time.time()-start_time, i, bm, result))
      if result == bm:
        score.append('+')
      else:
        score.append('-')
      with open('result_by_self_train[28000].csv', 'a') as f:
        f.write('{},{},<{}>\n'.format(i, result, bm))
    # print('classify done {}, {:.1%}'.format(Counter(score), Counter(score)['+'] / len(score)))

    return Counter(score)['+'] / len(score)
    def run_forest(n_trees=400, n_features_per_tree=70, n_rows_power=0.5):
        start_time = time.time()
        forest = RandomForest(trainset, labels, n_trees, n_features_per_tree,
                              n_rows_power)
        forest.train()
        score = []
        for test, (i, bm) in zip(testset, benchmarkset):
            result = forest.classify(test)
            result = result.most_common(1)[0][0]
            print('{:.1f}s -- No.{} should be: <{}> cal: {}'.format(
                time.time() - start_time, i, bm, result))
            if result == bm:
                score.append('+')
            else:
                score.append('-')
            with open('result_by_self_train[28000].csv', 'a') as f:
                f.write('{},{},<{}>\n'.format(i, result, bm))
        # print('classify done {}, {:.1%}'.format(Counter(score), Counter(score)['+'] / len(score)))

        return Counter(score)['+'] / len(score)
Example #3
0
def use_random_forest(data):
    '''
    Trains and predicts using a random forest on a data set.
    '''
    n_trees = input("How many decision trees would you like to use in your " +\
                    "random forest?\nUse 1 for a decision tree\n> ")
    while not n_trees.isdigit() or int(n_trees) < 1:
        print("Please enter an integer greater than 1...")
        n_trees = input("> ")
    print()

    if data == "1":  # use cifar
        # Get training data
        n_files = input(
            "How many file batches would you like to use?\nThere are 5.\n> ")
        while not n_files.isdigit() or int(n_files) < 1 or int(n_files) > 5:
            print("Please enter an integer between 1 and 5...")
            n_files = input("> ")
        print()
        n_files = int(n_files)

        n_images = input(
            "How many images would you like from each file?\nThere are 10000 images in each file.\n> "
        )
        while not n_images.isdigit() or int(n_images) < 1 or int(
                n_images) > 10000:
            print("Please enter an integer between 1 and 10000...")
            n_images = input("> ")
        print()
        n_images = int(n_images)
        training_data = aggregate_cifar(n_files=n_files, n_images=n_images)

        # Get test data
        test_data, test_labels = unpickle("cifar-10-batches-py/test_batch",
                                          n_images=10)
        test_full = np.array([np.append(test_data[0], test_labels[0])])
        for i in range(1, len(test_labels)):
            test_full = np.vstack(
                (test_full, np.append(test_data[i], test_labels[i])))

    else:  # use csgo
        n_data = input(
            "How many rows of data would you like to use for training and testing? 955466 rows available.\n80% will be used for training, 20% for testing\n> "
        )
        while not n_data.isdigit() or int(n_data) < 1 or int(n_data) > 955466:
            print("Please enter an integer between 1 and 955466...")
            n_data = input("> ")
        print()
        n_data = int(n_data)
        full_data = load_csgo(False)
        full_data = full_data.values
        full_data = full_data[:n_data]

        #splits dataset into training and test
        training_data = full_data[:int((len(full_data) + 1) * .80)]
        test_full = full_data[int(len(full_data) * .80 + 1):]

        print("Done unpacking CS:GO data...")

    start_time = time.time()
    rf = RandomForest(training_data, n_trees)  # create and train random forest
    print("Training time: " + str(time.time() - start_time) + " seconds")

    pass_count = 0
    fail_count = 0
    print("Classifying test data...")
    for row in test_full:
        res = rf.classify(row, label=True)
        print("Predicted: " + str(res) + "\tActual: " + str(row[-1]))
        if res == row[-1]:
            pass_count += 1
        else:
            fail_count += 1

    # Report results
    print("Correct classifications: " + str(pass_count))
    print("Wrong classifications: " + str(fail_count))
    print("Accuracy: " +
          str(float(pass_count) * 100 / (pass_count + fail_count)) + "%")