def plan(self, board): time_when_planning_should_stop = time.time() * 1000 + 250 model_agent, transient_agent = self.initialize_planning_phase() while time.time() * 1000 < time_when_planning_should_stop: copy_of_board = copy.deepcopy(board) game = Backgammon() game.reset() game.set_player_1(model_agent) game.set_player_2(transient_agent) reward = game.play(start_with_this_board=copy_of_board) transient_agent.add_reward(reward)
def nn_vs_nn_export_better_player(): player1 = NNAgent1(verbose = True) player2 = NNAgent1(load_best=True) stats = Statistic(player1, verbose=True) while True: bg = Backgammon() bg.set_player_1(player1) bg.set_player_2(player2) winner = bg.play() player1.add_reward(winner) player2.add_reward(-1 * winner) stats.add_win(winner) if stats.nn_is_better() and stats.games_played % 100 == 0: break # only way to reach this point is if the current # neural network is better than the BestNNAgent() # ... at least I think so # thus, we export the current as best print("Congratulations, you brought the network one step closer") print("to taking over the world (of backgammon)!!!") player1.export_model(filename="nn_best_model")
def do_default(): """ Play with a neural network against random """ player1 = get_agent_by_config_name('nn_pg_2', 'best') player2 = get_agent_by_config_name('random', 'None') player1.training = True player2.training = True stats = Statistic(player1, verbose=True) # play games forever while True: bg = Backgammon() bg.set_player_1(player1) bg.set_player_2(player2) winner = bg.play() player1.add_reward(winner) player2.add_reward(-winner) # Reward the neural network agent # player1.reward_player(winner) stats.add_win(winner)
def train(competitors): # Train print("Training...") iteration = 0 while True: iteration += 1 competitor1, competitor2 = random_pair_not_self(competitors) player1 = competitor1['agent'] player2 = competitor2['agent'] player1.training = True player2.training = True bg = Backgammon() bg.set_player_1(player1) bg.set_player_2(player2) # 1 if player 1 won, -1 if player 2 won result = bg.play() player1.add_reward(result) player2.add_reward(-result) update_wins_and_losses(result, competitor1, competitor2) # Rate performance competitor1['rating'], competitor2['rating'] = update_rating( competitor1['rating'], competitor2['rating'], result) if iteration % 10 == 0: print_competitors(competitors, iteration) if iteration % (100 * len(competitors)) == 0: save_competitors(competitors)
def self_play(): """ Makes a human agent play against another (or the same) human agent. """ player1 = HumanAgent() player2 = HumanAgent() bg = Backgammon() bg.set_player_1(player1) bg.set_player_2(player2) bg.play()
def test_play(): """ Makes a human agent play against another (or the same) human agent. """ player1 = HumanAgent() player2 = get_agent_by_config_name('nn_pg', 'best') bg = Backgammon() bg.set_player_1(player1) bg.set_player_2(player2) bg.play()
def random_play(): """ Makes a random agent play against another random agent. """ player1 = RandomAgent() player2 = RandomAgent() bg = Backgammon() bg.set_player_1(player1) bg.set_player_2(player2) bg.play(commentary=True, verbose=True)
def action(self, board, dice, player): """ Args: board (ndarray): backgammon board dice (ndarray): a pair of dice player: the number for the player on the board who's turn it is. Returns: A move `move`. """ move = [] possible_moves, possible_boards = Backgammon.get_all_legal_moves_for_two_dice( board, dice) if len(possible_moves) != 0: move = self.pub_stomper_policy(possible_moves, possible_boards, dice, board) return move
def action(self, board, dice, player): """ Args: board (ndarray): backgammon board dice (ndarray): a pair of dice player: the number for the player on the board who's turn it is. Returns: A move `move`. """ # check out the legal moves available for dice throw move = [] possible_moves, _ = Backgammon.get_all_legal_moves_for_two_dice( board, dice) if len(possible_moves) == 0: return [] else: move = possible_moves[np.random.randint(len(possible_moves))] return move
def action(self, board, dice, player): """ Args: board (ndarray): backgammon board dice (ndarray): a pair of dice player: the number for the player on the board who's turn it is. Returns: A move `move`. """ all_legal_moves = Backgammon.get_all_legal_moves_for_two_dice( board, dice)[0] # Runs this until a legal move is made. while True: print(Backgammon.to_string(board)) print("") print(" You: " + str(Backgammon.get_player_symbol(player))) print(" Dice: " + str(dice)) print("") print(" Press (enter) to pass if no moves are possible.") print(" Syntax: POSITION_FROM POSITION_TO") if len(all_legal_moves) == 0: # No possible moves print("No possible moves. Press (enter) to continue.") input("Input: ") return [] else: # Some moves possible move_1 = parse_input(input("Input: ")) valid_move_1 = False future_legal_moves = [] for moves in all_legal_moves: if len(moves) > 0: first_move = moves[0] if len(first_move) == 2: if first_move[0] == move_1[0] and first_move[ 1] == move_1[1]: valid_move_1 = True future_legal_moves += [moves[1]] if valid_move_1: # Check if future moves are possible if len(future_legal_moves) == 0: return [move_1] else: move_2 = parse_input(input("Input: ")) for second_move in future_legal_moves: if second_move[0] == move_2[0] and second_move[ 1] == move_2[1]: return [move_1, move_2] print("Invalid second move") else: print("Invalid move")