def test_learn_simple_game(self): game_spec = _VerySimpleGameSpec() create_model_func = functools.partial(create_network, 2, (4,)) variables, win_rate = train_policy_gradients(game_spec, create_model_func, None, learn_rate=0.1, number_of_games=1000, print_results_every=100, batch_size=20, randomize_first_player=False) self.assertGreater(win_rate, 0.9)
def test_tic_tac_toe(self): game_spec = TicTacToeGameSpec() create_model_func = functools.partial(create_network, game_spec.board_squares(), (100, 100, 100,)) variables, win_rate = train_policy_gradients(game_spec, create_model_func, None, learn_rate=1e-4, number_of_games=60000, print_results_every=1000, batch_size=100, randomize_first_player=False) self.assertGreater(win_rate, 0.4)
PRINT_RESULTS_EVERY_X = 100 # every how many games to print the results NETWORK_FILE_PATH = 'current_network.p' # path to save the network to NUMBER_OF_GAMES_TO_RUN = 1000 # to play a different game change this to another spec, e.g TicTacToeXGameSpec or ConnectXGameSpec, to get these to run # well may require tuning the hyper parameters a bit game_spec = TicTacToeGameSpec() create_network_func = functools.partial(create_network, game_spec.board_squares(), (100, 100, 100)) train_policy_gradients(game_spec, create_network_func, NETWORK_FILE_PATH, number_of_games=NUMBER_OF_GAMES_TO_RUN, batch_size=BATCH_SIZE, learn_rate=LEARN_RATE, print_results_every=PRINT_RESULTS_EVERY_X) def second_player_move(board_state, side): return game_spec.flat_move_to_tuple(int(input("Next Move:"))) ''' train_policy_gradients(game_spec, create_network_func, NETWORK_FILE_PATH, number_of_games=NUMBER_OF_GAMES_TO_RUN, batch_size=BATCH_SIZE, learn_rate=LEARN_RATE, opponent_func=second_player_move,
number_of_games=config['number_of_games'], update_opponent_winrate=config['update_opponent_winrate'], print_results_every=config['print_results_every'], learn_rate=config['learn_rate'], batch_size=config['batch_size'], cnn_on=config['cnn_on'], eps=config['eps'], deterministic=config['deterministic'], mcts=config['mcts'], min_win_ticks=config['min_win_ticks'], beta=config['beta']) else: res = train_policy_gradients( game_spec, create_network_func, load_network_file_path=config['load_network_file_path'], number_of_games=config['number_of_games'], batch_size=config['batch_size'], learn_rate=config['learn_rate'], print_results_every=config['print_results_every'], save_network_file_path=config['save_network_file_path'], cnn_on=config['cnn_on'], eps=config['eps'], deterministic=config['deterministic'], mcts=config['mcts'], beta=config['beta']) config["results"] = res[2] plt.save(config) # pdb.set_trace()
network_file_path = network_file_path + ".p" random_opponent = game_spec.get_random_player_func() perfect_opponent = game_spec.get_perfect_player() def mixed_opponent(*args, **kwds): opponent = random.choice([random_opponent, perfect_opponent]) return opponent(*args, **kwds) if args.opponent == "random": opponent_func = random_opponent elif args.opponent == "perfect": opponent_func = perfect_opponent elif args.opponent == "mixed": opponent_func = mixed_opponent else: raise Exception, "Invalid value for --opponent" train_policy_gradients(game_spec, create_network_func, network_file_path, opponent_func=opponent_func, number_of_games=args.num_games, batch_size=args.batch_size, learn_rate=args.learning_rate, print_results_every=args.print_freq, draw_reward=args.draw_reward)
from techniques.min_max import min_max_alpha_beta from techniques.train_policy_gradient import train_policy_gradients from connect_4.network import connect_4_game_spec, create_convolutional_network def min_max_move_func(board_state, side): return min_max_alpha_beta(connect_4_game_spec, board_state, side, 3)[1] train_policy_gradients( connect_4_game_spec, create_convolutional_network, 'convolutional_net_5_4_l_c_4_f_1_other_after.p', opponent_func=min_max_move_func, save_network_file_path= 'convolutional_net_5_4_l_c_4_f_1_other_after_vs_depth_3.p', number_of_games=5000, print_results_every=100)