def nn_vs_nn_export_better_player(): player1 = NNAgent1(verbose = True) player2 = NNAgent1(load_best=True) stats = Statistic(player1, verbose=True) while True: bg = Backgammon() bg.set_player_1(player1) bg.set_player_2(player2) winner = bg.play() player1.add_reward(winner) player2.add_reward(-1 * winner) stats.add_win(winner) if stats.nn_is_better() and stats.games_played % 100 == 0: break # only way to reach this point is if the current # neural network is better than the BestNNAgent() # ... at least I think so # thus, we export the current as best print("Congratulations, you brought the network one step closer") print("to taking over the world (of backgammon)!!!") player1.export_model(filename="nn_best_model")
def do_default(): """ Play with a neural network against random """ player1 = get_agent_by_config_name('nn_pg_2', 'best') player2 = get_agent_by_config_name('random', 'None') player1.training = True player2.training = True stats = Statistic(player1, verbose=True) # play games forever while True: bg = Backgammon() bg.set_player_1(player1) bg.set_player_2(player2) winner = bg.play() player1.add_reward(winner) player2.add_reward(-winner) # Reward the neural network agent # player1.reward_player(winner) stats.add_win(winner)
def train(competitors): # Train print("Training...") iteration = 0 while True: iteration += 1 competitor1, competitor2 = random_pair_not_self(competitors) player1 = competitor1['agent'] player2 = competitor2['agent'] player1.training = True player2.training = True bg = Backgammon() bg.set_player_1(player1) bg.set_player_2(player2) # 1 if player 1 won, -1 if player 2 won result = bg.play() player1.add_reward(result) player2.add_reward(-result) update_wins_and_losses(result, competitor1, competitor2) # Rate performance competitor1['rating'], competitor2['rating'] = update_rating( competitor1['rating'], competitor2['rating'], result) if iteration % 10 == 0: print_competitors(competitors, iteration) if iteration % (100 * len(competitors)) == 0: save_competitors(competitors)
def plan(self, board): time_when_planning_should_stop = time.time() * 1000 + 250 model_agent, transient_agent = self.initialize_planning_phase() while time.time() * 1000 < time_when_planning_should_stop: copy_of_board = copy.deepcopy(board) game = Backgammon() game.reset() game.set_player_1(model_agent) game.set_player_2(transient_agent) reward = game.play(start_with_this_board=copy_of_board) transient_agent.add_reward(reward)
def self_play(): """ Makes a human agent play against another (or the same) human agent. """ player1 = HumanAgent() player2 = HumanAgent() bg = Backgammon() bg.set_player_1(player1) bg.set_player_2(player2) bg.play()
def test_play(): """ Makes a human agent play against another (or the same) human agent. """ player1 = HumanAgent() player2 = get_agent_by_config_name('nn_pg', 'best') bg = Backgammon() bg.set_player_1(player1) bg.set_player_2(player2) bg.play()
def random_play(): """ Makes a random agent play against another random agent. """ player1 = RandomAgent() player2 = RandomAgent() bg = Backgammon() bg.set_player_1(player1) bg.set_player_2(player2) bg.play(commentary=True, verbose=True)