def run_forest(n_trees=400, n_features_per_tree=70, n_rows_power=0.5): start_time = time.time() forest = RandomForest(trainset, labels, n_trees, n_features_per_tree, n_rows_power) forest.train() score = [] for test, (i, bm) in zip(testset, benchmarkset): result = forest.classify(test) result = result.most_common(1)[0][0] print('{:.1f}s -- No.{} should be: <{}> cal: {}'.format(time.time()-start_time, i, bm, result)) if result == bm: score.append('+') else: score.append('-') with open('result_by_self_train[28000].csv', 'a') as f: f.write('{},{},<{}>\n'.format(i, result, bm)) # print('classify done {}, {:.1%}'.format(Counter(score), Counter(score)['+'] / len(score))) return Counter(score)['+'] / len(score)
def run_forest(n_trees=400, n_features_per_tree=70, n_rows_power=0.5): start_time = time.time() forest = RandomForest(trainset, labels, n_trees, n_features_per_tree, n_rows_power) forest.train() score = [] for test, (i, bm) in zip(testset, benchmarkset): result = forest.classify(test) result = result.most_common(1)[0][0] print('{:.1f}s -- No.{} should be: <{}> cal: {}'.format( time.time() - start_time, i, bm, result)) if result == bm: score.append('+') else: score.append('-') with open('result_by_self_train[28000].csv', 'a') as f: f.write('{},{},<{}>\n'.format(i, result, bm)) # print('classify done {}, {:.1%}'.format(Counter(score), Counter(score)['+'] / len(score))) return Counter(score)['+'] / len(score)
def use_random_forest(data): ''' Trains and predicts using a random forest on a data set. ''' n_trees = input("How many decision trees would you like to use in your " +\ "random forest?\nUse 1 for a decision tree\n> ") while not n_trees.isdigit() or int(n_trees) < 1: print("Please enter an integer greater than 1...") n_trees = input("> ") print() if data == "1": # use cifar # Get training data n_files = input( "How many file batches would you like to use?\nThere are 5.\n> ") while not n_files.isdigit() or int(n_files) < 1 or int(n_files) > 5: print("Please enter an integer between 1 and 5...") n_files = input("> ") print() n_files = int(n_files) n_images = input( "How many images would you like from each file?\nThere are 10000 images in each file.\n> " ) while not n_images.isdigit() or int(n_images) < 1 or int( n_images) > 10000: print("Please enter an integer between 1 and 10000...") n_images = input("> ") print() n_images = int(n_images) training_data = aggregate_cifar(n_files=n_files, n_images=n_images) # Get test data test_data, test_labels = unpickle("cifar-10-batches-py/test_batch", n_images=10) test_full = np.array([np.append(test_data[0], test_labels[0])]) for i in range(1, len(test_labels)): test_full = np.vstack( (test_full, np.append(test_data[i], test_labels[i]))) else: # use csgo n_data = input( "How many rows of data would you like to use for training and testing? 955466 rows available.\n80% will be used for training, 20% for testing\n> " ) while not n_data.isdigit() or int(n_data) < 1 or int(n_data) > 955466: print("Please enter an integer between 1 and 955466...") n_data = input("> ") print() n_data = int(n_data) full_data = load_csgo(False) full_data = full_data.values full_data = full_data[:n_data] #splits dataset into training and test training_data = full_data[:int((len(full_data) + 1) * .80)] test_full = full_data[int(len(full_data) * .80 + 1):] print("Done unpacking CS:GO data...") start_time = time.time() rf = RandomForest(training_data, n_trees) # create and train random forest print("Training time: " + str(time.time() - start_time) + " seconds") pass_count = 0 fail_count = 0 print("Classifying test data...") for row in test_full: res = rf.classify(row, label=True) print("Predicted: " + str(res) + "\tActual: " + str(row[-1])) if res == row[-1]: pass_count += 1 else: fail_count += 1 # Report results print("Correct classifications: " + str(pass_count)) print("Wrong classifications: " + str(fail_count)) print("Accuracy: " + str(float(pass_count) * 100 / (pass_count + fail_count)) + "%")