Ejemplo n.º 1
0
def model_environment(opponent, state, action):

    game_complete = False
    initial_board = state

    file.write("AGENT MAKING MOVE: " + str(action) + str(board.to_state(action)) + "\n")

    current_board = p.add_move('X',action,initial_board)

    print("AFTER AGENT MOVE:")
    print(game.to_display_string(current_board))

    file.write("AFTER AGENT MOVE:\n")
    file.write(game.to_display_string(current_board))

    reward = 0.0

    if p.is_winner(current_board,'X'):
        game_complete = True
        reward = 1.0
    elif p.is_cat_game(current_board):
        game_complete = True
        reward = 0.0

    if not game_complete:
        # let the opponent make a move ...
        (opponent_id, opponent_move) = opponent.pick_next_move(current_board)

        current_board = p.add_move(opponent_id, opponent_move, current_board)

        print("AFTER OPPONENT MOVE")
        print(game.to_display_string(current_board))

        file.write("AFTER OPPONENT MOVE\n")
        file.write(game.to_display_string(current_board))

        if p.is_winner(current_board,opponent_id):
            game_complete = True
            reward = -1.0
        elif p.is_cat_game(current_board):
            game_complete = True
            reward = 0

    return current_board, reward, game_complete
Ejemplo n.º 2
0
def play_game(p1, p2, file=None):

    board = p.empty_board()

    players = [p1, p2]
    current_player_index = 0
    winner = None
    move_count = 0
    while (True):

        print("Current move is for player: ", players[current_player_index].player)

        if (file is not None):
            file.write("PRIOR TO MOVE " + str(move_count) + " ------------\n")
            file.write(to_display_string(board))

        if p.is_cat_game(board):
            if (file is not None):
                file.write("RESULT IS CAT GAME")
            break

        m = players[current_player_index].pick_next_move(board)

        board = p.add_move(m[0], m[1], board)

        p.display_board(board)

        move_count += 1

        if p.is_winner(board, players[current_player_index].player):
            winner = players[current_player_index]
            if (file is not None):
                file.write("FINAL BOARD AFTER MOVE " + str(move_count) + " WINNER IS: " + winner.player + "\n")
                file.write(to_display_string(board))
            break

        # alternate players
        if (current_player_index == 0):
            print("Switching to player 1...")
            current_player_index = 1
        else:
            print("Switching to player 0...")
            current_player_index = 0


    if (winner is None):
        print("CAT GAME")
    else:
        print("WINNER IS PLAYER: ", winner.player)

    return winner
Ejemplo n.º 3
0
    def find_actions_for_state(self, board):

        p.display_board(board)

        # given board, find available / unoccupied cells
        # this defines the next possible moves
        available_cells = p.get_available_cells(board)

        print("Found ", len(available_cells), " available cells --> ",
              available_cells)

        # create the next board
        actions = []
        for cell in available_cells:
            bnext = p.add_move(self.player, cell, board)
            action = {}
            action['cell'] = cell
            action['board'] = bnext
            actions.append(action)

        return actions
Ejemplo n.º 4
0
    def update_q_value(self, board, action, reward):

        self.log.write("UPDATING Q VALUE FOR BOARD ------\n")
        self.log.write(self.to_string(board))
        self.log.write("GIVEN ACTION: " + str(action['cell']) + "\n")

        #        previous_value = self.get_value(board)
        previous_value = self.get_value_for_state_and_action(
            board, action['cell'])

        self.log.write("PREVIOUS VALUE FOR STATE/ACTION: " +
                       str(previous_value) + "\n")
        self.log.flush()
        # approximate FUTURE reward from the NEXT states ...
        # max_a Q(s',a)
        # first we advance from s --> s' given action A and reward R
        # then we take the max over all actions FROM s'
        # do we need to approximate the OTHER player's actions here??
        # otherwise, the board is OUT OF SYNC with GAME PLAY
        next_board = p.copy(action['board'])

        # examine other player moves --> for now, just do a random move??
        opponent_possible_cells = p.get_available_cells(next_board)

        if len(opponent_possible_cells) > 0:
            self.log.write("POSSIBLE OPPONENT NEXT MOVES: " +
                           str(opponent_possible_cells) + "\n")
            # pick one at random
            inx_select = np.random.randint(0, len(opponent_possible_cells))
            opponent_move = opponent_possible_cells[inx_select]

            self.log.write("ASSUME OPPONENT NEXT MOVE: " + str(opponent_move) +
                           "\n")

            next_board_seen = p.add_move(p.get_other_player(self.player),
                                         opponent_move, next_board)

            self.log.write("ASSUMED NEXT BOARD SEEN ----\n")
            self.log.write(self.to_string(next_board_seen))
            self.log.flush()

            next_actions = self.find_actions_for_state(next_board_seen)

            if (len(next_actions) > 0):
                next_q_values = self.find_q_values(next_actions)

                for i in range(0, len(next_actions)):
                    action = next_actions[i]
                    self.log.write("Q VALUE FOR ACTION " + str(i) + " --> " +
                                   str(action['cell']) + " : " +
                                   str(next_q_values[i]) + "\n")

                    print("Got q values for actions: ", next_q_values)

                    max_next_q = max(next_q_values)
            else:
                max_next_q = 0
        else:
            print("EMPTY POSSIBLE OPPONENT NEXT CELLS")
            max_next_q = 0

        next_value = previous_value + self.learning_rate * (
            reward + self.gamma * max_next_q - previous_value)

        print("Updated q value ==> ", next_value)

        self.add_or_update_value(board, next_value)

        self.update_q_value_for_state_and_action(board, action['cell'],
                                                 next_value)