예제 #1
0
    def update_policy(self, transitions, Q, policy, epsilon):

#        print("PREVIOUS POLICY: ", policy)

        self.file.write("PREVIOUS POLICY: \n")
        for state in policy.keys():
            self.file.write(state + "\n")
            pByAction = policy[state]
            self.file.write("p(a): " + str(pByAction) + "\n")

        for transition in transitions:
            state = to_board_state(transition['from_state'])

#            print("UPDATE POLICY:  FROM STATE")
#            print(game.to_display_string(transition['from_state']))
            self.file.write(game.to_display_string(transition['from_state']))

            astar = self.find_max_action(Q, state)
#            print("UPDATE POLICY:  ASTAR ==> ", astar)
            self.file.write("UPDATE POLICY:  ASTAR ==> " + str(astar) + "\n")

            possible_actions = get_actions(policy, state)
#            print("POSSIBLE ACTIONS: ", possible_actions)
            self.file.write("POSSIBLE ACTIONS: " + str(possible_actions) + "\n")

            for action in possible_actions:
                if action == astar:
                    policy[state][action] = 1.0 - epsilon + epsilon / len(possible_actions)
                else:
                    policy[state][action] = epsilon / len(possible_actions)

#            print("UPDATED POLICY ==> ", policy)
            self.file.write("UPDATED POLICY: " + str(policy) + "\n")

        return policy
예제 #2
0
def model_environment(opponent, state, action):

    game_complete = False
    initial_board = state

    file.write("AGENT MAKING MOVE: " + str(action) + str(board.to_state(action)) + "\n")

    current_board = p.add_move('X',action,initial_board)

    print("AFTER AGENT MOVE:")
    print(game.to_display_string(current_board))

    file.write("AFTER AGENT MOVE:\n")
    file.write(game.to_display_string(current_board))

    reward = 0.0

    if p.is_winner(current_board,'X'):
        game_complete = True
        reward = 1.0
    elif p.is_cat_game(current_board):
        game_complete = True
        reward = 0.0

    if not game_complete:
        # let the opponent make a move ...
        (opponent_id, opponent_move) = opponent.pick_next_move(current_board)

        current_board = p.add_move(opponent_id, opponent_move, current_board)

        print("AFTER OPPONENT MOVE")
        print(game.to_display_string(current_board))

        file.write("AFTER OPPONENT MOVE\n")
        file.write(game.to_display_string(current_board))

        if p.is_winner(current_board,opponent_id):
            game_complete = True
            reward = -1.0
        elif p.is_cat_game(current_board):
            game_complete = True
            reward = 0

    return current_board, reward, game_complete
예제 #3
0
def generate_tic_tac_toe_episode(policy):

    current_board = p.empty_board()

    opponent = GreedyRandomPlayer('O')

    game_complete = False
    transitions = []
    while not game_complete:

        previous_state = current_board

        print("PRIOR TO MOVE:")
        print(game.to_display_string(current_board))
        file.write("PRIOR TO MOVE:\n")
        file.write(game.to_display_string(current_board))

        selectedAction = sample_tic_tac_toe_policy(current_board, policy)

        # model the environment --> returns a next state and a reward
        next_state, reward, game_complete = model_environment(opponent, current_board, selectedAction)

        # append the episode
        transition = {}
        transition['from_state'] = current_board
        transition['to_state'] = next_state
        transition['action'] = selectedAction
        transition['reward'] = reward
        transitions.append(transition)

        file.write("ADDING TRANSITION: " + str(transition) + "\n")

        current_board = next_state

    # now figure out the reward
    print("BOARD AT END OF EPISODE")
    print(game.to_display_string(current_board))
    file.write("BOARD AT END OF EPISODE\n")
    file.write(game.to_display_string(current_board))

    return transitions
예제 #4
0
    def execute(self, initial_board):

        current_board = initial_board
        while not self.environment.is_completed():

            player_id, player_move = self.agent.pick_next_move(current_board)
            action = {}
            action['player'] = player_id
            action['cell'] = player_move

            updated_board, reward = self.environment.update(
                current_board, action)
            current_board = updated_board

        print("EPISODE TERMINATED --> OUTCOME: ",
              self.environment.get_outcome())
        print(game.to_display_string(current_board))
        return self.agent, current_board