コード例 #1
0
def nn_vs_nn_export_better_player():
    player1 = NNAgent1(verbose = True)
    player2 = NNAgent1(load_best=True)

    stats = Statistic(player1, verbose=True)

    while True:
        bg = Backgammon()
        bg.set_player_1(player1)
        bg.set_player_2(player2)
        winner = bg.play()

        player1.add_reward(winner)
        player2.add_reward(-1 * winner)

        stats.add_win(winner)

        if stats.nn_is_better() and stats.games_played % 100 == 0:
            break

    # only way to reach this point is if the current
    # neural network is better than the BestNNAgent()
    # ... at least I think so
    # thus, we export the current as best
    print("Congratulations, you brought the network one step closer")
    print("to taking over the world (of backgammon)!!!")
    player1.export_model(filename="nn_best_model")
コード例 #2
0
def do_default():
    """
    Play with a neural network against random
    """
    player1 = get_agent_by_config_name('nn_pg_2', 'best')
    player2 = get_agent_by_config_name('random', 'None')

    player1.training = True
    player2.training = True

    stats = Statistic(player1, verbose=True)

    # play games forever
    while True:

        bg = Backgammon()
        bg.set_player_1(player1)
        bg.set_player_2(player2)
        winner = bg.play()

        player1.add_reward(winner)
        player2.add_reward(-winner)

        # Reward the neural network agent
        # player1.reward_player(winner)

        stats.add_win(winner)
コード例 #3
0
def train(competitors):
    # Train
    print("Training...")
    iteration = 0
    while True:
        iteration += 1
        competitor1, competitor2 = random_pair_not_self(competitors)

        player1 = competitor1['agent']
        player2 = competitor2['agent']

        player1.training = True
        player2.training = True

        bg = Backgammon()
        bg.set_player_1(player1)
        bg.set_player_2(player2)

        # 1 if player 1 won, -1 if player 2 won
        result = bg.play()

        player1.add_reward(result)
        player2.add_reward(-result)
        update_wins_and_losses(result, competitor1, competitor2)

        # Rate performance
        competitor1['rating'], competitor2['rating'] = update_rating(
            competitor1['rating'], competitor2['rating'], result)

        if iteration % 10 == 0:
            print_competitors(competitors, iteration)

        if iteration % (100 * len(competitors)) == 0:
            save_competitors(competitors)
コード例 #4
0
 def plan(self, board):
     time_when_planning_should_stop = time.time() * 1000 + 250
     model_agent, transient_agent = self.initialize_planning_phase()
     while time.time() * 1000 < time_when_planning_should_stop:
         copy_of_board = copy.deepcopy(board)
         game = Backgammon()
         game.reset()
         game.set_player_1(model_agent)
         game.set_player_2(transient_agent)
         reward = game.play(start_with_this_board=copy_of_board)
         transient_agent.add_reward(reward)
コード例 #5
0
def self_play():
    """
    Makes a human agent play against another (or the same) human agent.
    """

    player1 = HumanAgent()
    player2 = HumanAgent()

    bg = Backgammon()
    bg.set_player_1(player1)
    bg.set_player_2(player2)
    bg.play()
コード例 #6
0
def test_play():
    """
    Makes a human agent play against another (or the same) human agent.
    """

    player1 = HumanAgent()
    player2 = get_agent_by_config_name('nn_pg', 'best')

    bg = Backgammon()
    bg.set_player_1(player1)
    bg.set_player_2(player2)
    bg.play()
コード例 #7
0
def random_play():
    """
    Makes a random agent play against another random agent.
    """

    player1 = RandomAgent()
    player2 = RandomAgent()

    bg = Backgammon()
    bg.set_player_1(player1)
    bg.set_player_2(player2)
    bg.play(commentary=True, verbose=True)