def print_comparison(self): print("NN results:\t", self.results_from_nn_playing) print("Random results:\t", self.results_from_random_playing) print("largest tiles", max(self.results_from_nn_playing), max(self.results_from_random_playing)) print("average tiles", sum(self.results_from_nn_playing)/float(len(self.results_from_nn_playing)), sum(self.results_from_random_playing)/float(len(self.results_from_random_playing))) points = ai2048demo.welch(self.results_from_random_playing, self.results_from_nn_playing) print("points", points)
def user_control(self): while True: self.action = input("Press r to play random, t to train, p to play with nn, c to compare results: ") if self.action[0] == "t": if len(self.action) == 1: output_activations = self.move_classifier.do_training() elif self.action[1] == "l": output_activations = self.move_classifier.do_testing() elif self.action[1] == "a": points = ai2048demo.welch(self.results_from_random_playing, self.results_from_nn_playing) print("points", points) elif self.action[0] == "p" or self.action[0] == "r": self.results_length = 50 return elif self.action[0] == "c": if len(self.results_from_nn_playing)+len(self.results_from_random_playing) < 100: self.results_length = 50 return else: self.print_comparison() else: self.errors = self.move_classifier.do_training(epochs=int(self.action), errors=self.errors) output_activations = self.move_classifier.do_testing(boards=self.move_classifier.test_boards) print("Statistics (test set):\t\t ", self.move_classifier.check_result(output_activations, labels=self.move_classifier.test_labels), "%") output_activations = self.move_classifier.do_testing(boards=self.move_classifier.boards) print("Statistics (training set):\t ", self.move_classifier.check_result(output_activations, labels=self.move_classifier.labels), "%") print("Total time elapsed: " + str(round((time() - self.start_time)/60, 1)) + " min")
def main(): #train([192,625,300,100,4],20) #train([192,150,80,25,4],1) #train([192,100,4],10) train([192, 100, 4], 2) #train([192,625,300,100,4],50) #BEST gui = Draw(DIM) response = input("Press Enter To continue: ") while not response: random, ai = gui.samples() random.sort() ai.sort() random.reverse() ai.reverse() print('\n', '\t\t---RANDOM---') print_list(random) print( '\n', '\t\t---AI---', ) print_list(ai) ############## w = welch(random, ai) print('\n', w) #visualize = input("type y to show: ") #if visualize == 'y': #gui.DEMO = True response = input("Print Enter To continue: ") gui.mainloop()
def benchmark(n, silent=False): ai_result = run_ai(n, silent) random_result = run_random(n, silent) if not silent: ai_avg = float(sum(ai_result)/len(ai_result)) random_avg = float(sum(random_result)/len(random_result)) print("AI player results") print_results(ai_result) print("Random player results") print_results(random_result) print("Difference:", ai_avg - random_avg) print(welch(random_result, ai_result)) return scipy.stats.ttest_ind(random_result, ai_result)
def print_comparison(self): print("NN results:\t", self.results_from_nn_playing) print("Random results:\t", self.results_from_random_playing) print("largest tiles", max(self.results_from_nn_playing), max(self.results_from_random_playing)) print( "average tiles", sum(self.results_from_nn_playing) / float(len(self.results_from_nn_playing)), sum(self.results_from_random_playing) / float(len(self.results_from_random_playing))) points = ai2048demo.welch(self.results_from_random_playing, self.results_from_nn_playing) print("points", points)
def user_control(self): while True: self.action = input( "Press r to play random, t to train, p to play with nn, c to compare results: " ) if self.action[0] == "t": if len(self.action) == 1: output_activations = self.move_classifier.do_training() elif self.action[1] == "l": output_activations = self.move_classifier.do_testing() elif self.action[1] == "a": points = ai2048demo.welch(self.results_from_random_playing, self.results_from_nn_playing) print("points", points) elif self.action[0] == "p" or self.action[0] == "r": self.results_length = 50 return elif self.action[0] == "c": if len(self.results_from_nn_playing) + len( self.results_from_random_playing) < 100: self.results_length = 50 return else: self.print_comparison() else: self.errors = self.move_classifier.do_training( epochs=int(self.action), errors=self.errors) output_activations = self.move_classifier.do_testing( boards=self.move_classifier.test_boards) print( "Statistics (test set):\t\t ", self.move_classifier.check_result( output_activations, labels=self.move_classifier.test_labels), "%") output_activations = self.move_classifier.do_testing( boards=self.move_classifier.boards) print( "Statistics (training set):\t ", self.move_classifier.check_result( output_activations, labels=self.move_classifier.labels), "%") print("Total time elapsed: " + str(round((time() - self.start_time) / 60, 1)) + " min")
def play2048(rounds=50): n = 0 own_results = [] random_results= [] while n < rounds: game = setupGame() while not game.is_game_over(): playMove(game) own_results.append(game.get_highest_value()) game = setupGame() while not game.is_game_over(): random_move = random.randint(0, 4) game.move(random_move) random_results.append(game.get_highest_value()) n += 1 print(own_results) print(random_results) score = ai2048demo.welch(random_results, own_results) print("Demo score:",score)
def demoRun(iterations, gui): randomTiles = [] for i in range(50): board = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] board = randomPlay(board, gui) randomTiles.append(max(board)) text_file = open("randomResult", "wb") pickle.dump(randomTiles, text_file, protocol=pickle.HIGHEST_PROTOCOL) text_file.close() text_file = open("randomResult", "rb") randomTiles = pickle.load(text_file) text_file.close() networkTiles = [] for i in range(50): board = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] board = networkPlay(board, gui) networkTiles.append(max(board)) print(ai2048demo.welch(randomTiles, networkTiles))
print('RUN NUMBER:', i + 1) TILES_RANDOM = [] TILES_ANN = [] for i in range(50): newGameRandom(4) #print(RUN_RANDOM) #print(TILES_RANDOM) #print(len(TILES_RANDOM)) for i in range(50): newGameANN(4) #print(RUN_ANN) #print(TILES_ANN) #print(len(TILES_ANN)) result = ai2048demo.welch(TILES_RANDOM, TILES_ANN) print(result) score = 0 try: score = result.split('\n')[3][-3] except: score = 0 print(score) scores.append(int(score)) RANDOM_AVERAGE = sum(TILES_RANDOM)/len(TILES_RANDOM) ANN_AVERAGE = sum(TILES_ANN)/len(TILES_ANN) print('Random average:') print(RANDOM_AVERAGE) print('ANN average:') print(ANN_AVERAGE) RANDOM_AVERAGES.append(RANDOM_AVERAGE)
if not moved: break board.place_new_value_randomly() num_empty_tiles, max_tile_value, tile_sum = board.get_tile_stats() print(max_tile_value) self.max_tile_value = max_tile_value if __name__ == '__main__': random_max_tile_values = [] print('Random player is playing...') for i in xrange(50): random_play = PlayRandomly() random_max_tile_values.append(random_play.max_tile_value) ai_max_tile_values = [] print('AI is playing...') for i in xrange(50): ai_play = Play() ai_max_tile_values.append(ai_play.max_tile_value) random_max_tile_value_counts = Counter(random_max_tile_values) print('random player stats:', random_max_tile_value_counts) ai_max_tile_value_counts = Counter(ai_max_tile_values) print('AI player stats:', ai_max_tile_value_counts) print(ai2048demo.welch(random_max_tile_values, ai_max_tile_values))
for i in range(50): pb = PowerBoard((4, 4)) pb.add_random_tile() while True: if pb.is_game_over(): pb.print_to_console() random_best_tiles.append(max(pb.get_board())) break possible_directions = pb.get_possible_move_directions() pb.move_and_add_random_tile(random.choice(possible_directions)) print(ann_best_tiles) ann_mean = statistics.mean(ann_best_tiles) print("Mean best tile for ANN:", ann_mean) print(random_best_tiles) random_mean = statistics.mean(random_best_tiles) print("Mean best tile for random:", random_mean) print("Welch test score: ", ai2048demo.welch(random_best_tiles, ann_best_tiles)) with open("play2048log.txt", "a") as logfile: timestamp = dt.fromtimestamp(start_time) readable_timestamp = timestamp.strftime('%Y-%m-%d %H:%M:%S') logfile.write("\n\nTest started at " + readable_timestamp) logfile.write("\nHidden layer topology: " + str(hidden_layer_topology)) logfile.write("\nNumber of epochs: " + str(no_of_epochs)) logfile.write("\nANN best tiles: " + str(ann_best_tiles)) logfile.write("\nANN average tile: " + str(ann_mean)) logfile.write("\nRandom best tiles: " + str(random_best_tiles)) logfile.write("\nRandom average tile: " + str(random_mean))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--A', action='store_true') parser.add_argument('--B', action='store_true') parser.add_argument('--L1', type=float) parser.add_argument('--L2', type=float) parser.add_argument('--ai', action='store_true') parser.add_argument('--benchmark', type=int) parser.add_argument('--compare', type=int) parser.add_argument('--data_a', default='training_data_a.pkl') parser.add_argument('--data_b', default='training_data_b.pkl') parser.add_argument('--demo', action='store_true') parser.add_argument('--dropout', type=float) parser.add_argument('--epochs', type=int, default=100) parser.add_argument('--generate', type=int) parser.add_argument('--hidden_function', default='relu') parser.add_argument('--hidden_layers', nargs='*') parser.add_argument('--learning_rate', type=float, default=0.08) parser.add_argument('--max_time', type=int) parser.add_argument('--minibatch_size', type=int, default=40) parser.add_argument('--model_a', default='model_a.pkl') parser.add_argument('--model_b', default='model_b.pkl') parser.add_argument('--output_directory', default='../data') parser.add_argument('--runs', action='store_true') parser.add_argument('--seed', type=int) parser.add_argument('--training_ratio', type=float) args = parser.parse_args() if not args.A and not args.B and not args.compare: print('A or B representation must be chosen!') sys.exit(-1) print(args) random.seed(42) numpy.random.seed(random.randint(0, 2**30)) if args.ai: if args.benchmark: La = list(play_ai_game(args.B) for _ in range(args.benchmark)) print('mean: {} std: {}'.format(numpy.mean(La), numpy.std(La))) else: Lr = list(play_random_game() for _ in range(50)) La = list(play_ai_game(args.B) for _ in range(50)) print('random play: {}'.format(Lr)) print('ann play: {}'.format(La)) print(ai2048demo.welch(Lr, La)) elif args.benchmark: network = pickle.load( open(args.model_a if args.A else args.model_b, 'rb')) predict_function = theano.function( inputs=[network.inputs], outputs=network.layers[-1].testing_outputs, allow_input_downcast=True) La = [ play_ann_game(args.B, predict_function) for _ in range(args.benchmark) ] print('mean: {} std: {}'.format(numpy.mean(La), numpy.std(La))) elif args.generate: training_data = [] training_labels = [] for i in range(args.generate): top_tile, x, y = generate_training_data(args.B) training_data.extend(x) training_labels.extend(y) print('{}/{} ({:.2f}%)'.format(i + 1, args.generate, 100.0 * (i + 1) / args.generate)) print('{} examples generated from {} games'.format( len(training_data), args.generate)) training_examples = list(zip(training_data, training_labels)) random.shuffle(training_examples) training_data[:], training_labels[:] = zip(*training_examples) with open(args.data_a if args.A else args.data_b, 'wb') as training_data_file: pickle.dump((training_data, training_labels), training_data_file) elif args.compare: import scipy.stats network_a = pickle.load(open(args.model_a, 'rb')) network_b = pickle.load(open(args.model_b, 'rb')) predict_function_a = theano.function( inputs=[network_a.inputs], outputs=network_a.layers[-1].testing_outputs, allow_input_downcast=True) predict_function_b = theano.function( inputs=[network_b.inputs], outputs=network_b.layers[-1].testing_outputs, allow_input_downcast=True) La_a = [ play_ann_game(False, predict_function_a) for _ in range(args.compare) ] La_b = [ play_ann_game(True, predict_function_b) for _ in range(args.compare) ] statistic, pvalue = scipy.stats.ttest_ind(La_a, La_b, equal_var=False) print('A mean: {} A std: {}'.format(numpy.mean(La_a), numpy.std(La_a))) print('B mean: {} B std: {}'.format(numpy.mean(La_b), numpy.std(La_b))) print('statistic = {:f} pvalue = {:f}'.format(statistic, pvalue)) elif args.demo: network = pickle.load( open(args.model_a if args.A else args.model_b, 'rb')) predict_function = theano.function( inputs=[network.inputs], outputs=network.layers[-1].testing_outputs, allow_input_downcast=True) La = [] for _ in range(50): game = TwentyFortyEight() game.new_tile() while not game.is_game_over(): x = numpy.asarray(transform_state(game, args.B)) move_probabilities = predict_function(x.reshape(1, x.shape[0]))[0] move_probabilities_sorted = sorted( ((probability, move) for (move, probability) in enumerate(move_probabilities)), reverse=True) # Select the first valid move ranked by probability: for probability, move in move_probabilities_sorted: if game.move(move): break t = game.get_highest_tile() print(t) La.append(t) Lr = list(play_random_game() for _ in range(50)) print('random play: {}'.format(Lr)) print('ann play: {}'.format(La)) print(ai2048demo.welch(Lr, La)) else: def epoch_status_function(time, epoch, average_loss, testing_error, is_best): if is_best: with open(args.model_a if args.A else args.model_b, 'wb') as model_file: pickle.dump(network, model_file) print( "Time: {:7.2f} sec, Epoch: {:4d}, Average loss: {:.5f}, Testing error: {:.5f}%" .format(time, epoch, average_loss, testing_error * 100.0)) x_data, y_data = pickle.load( open(args.data_a if args.A else args.data_b, 'rb')) #x_data, y_data = shuffle(x_data, y_data, random_state=0) num_training_examples = int(math.ceil(args.training_ratio * len(x_data))) \ if args.training_ratio else len(x_data) input_size = len(x_data[0]) layer_sizes = [input_size] + list(map(int, args.hidden_layers or [])) + [4] print("Creating shared Theano dataset variables...") training_dataset = vi.theano.TheanoDataSet( theano.shared(numpy.asarray(x_data[:num_training_examples], dtype=theano.config.floatX), borrow=True), T.cast( theano.shared(numpy.asarray(y_data[:num_training_examples], dtype=theano.config.floatX), borrow=True), 'int32'), num_training_examples) minibatch_index = T.lscalar() x = T.matrix('x') y = T.ivector('y') network = vi.theano.Network(x, layer_sizes, theano.tensor.nnet.relu, args.dropout, args.L1, args.L2) training_minibatch_count = math.ceil(training_dataset.size / args.minibatch_size) loss_function = network.loss_function(y) parameters = network.parameters() gradients = [ T.grad(loss_function, parameter) for parameter in parameters ] updates = [(parameter, parameter - args.learning_rate * gradient) for parameter, gradient in zip(parameters, gradients)] training_function = theano.function( inputs=[minibatch_index], outputs=network.errors(y), updates=updates, givens={ x: training_dataset.data[minibatch_index * args.minibatch_size:(minibatch_index + 1) * args.minibatch_size], y: training_dataset.labels[minibatch_index * args.minibatch_size:(minibatch_index + 1) * args.minibatch_size] }) testing_function = theano.function( inputs=[minibatch_index], outputs=network.errors(y), givens={ x: training_dataset.data[minibatch_index * args.minibatch_size:(minibatch_index + 1) * args.minibatch_size], y: training_dataset.labels[minibatch_index * args.minibatch_size:(minibatch_index + 1) * args.minibatch_size] }) print( "Starting stochastic gradient descent. learning_rate={} epochs={}". format(args.learning_rate, args.epochs)) training_time, training_epochs, testing_error = \ vi.theano.stochastic_gradient_descent( training_function, training_minibatch_count, testing_function, training_minibatch_count, learning_rate=args.learning_rate, epochs=args.epochs, epoch_status_function=epoch_status_function) print(( "Training completed after {:.2f} seconds. {} epochs at {:.2f} epochs / second. " + "Testing error: {:.5f}%").format(training_time, training_epochs, training_epochs / training_time, testing_error * 100.0))
print('RUN NUMBER:', i + 1) TILES_RANDOM = [] TILES_ANN = [] for i in range(50): newGameRandom(4) #print(RUN_RANDOM) #print(TILES_RANDOM) #print(len(TILES_RANDOM)) for i in range(50): newGameANN(4) #print(RUN_ANN) #print(TILES_ANN) #print(len(TILES_ANN)) result = ai2048demo.welch(TILES_RANDOM, TILES_ANN) print(result) score = 0 try: score = result.split('\n')[3][-3] except: score = 0 print(score) scores.append(int(score)) RANDOM_AVERAGE = sum(TILES_RANDOM) / len(TILES_RANDOM) ANN_AVERAGE = sum(TILES_ANN) / len(TILES_ANN) print('Random average:') print(RANDOM_AVERAGE) print('ANN average:') print(ANN_AVERAGE) RANDOM_AVERAGES.append(RANDOM_AVERAGE)