Python to_display_string 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: game

메소드/함수: to_display_string

hotexamples.com에서의 예제들: 4

Python to_display_string - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 game.to_display_string에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

    def update_policy(self, transitions, Q, policy, epsilon):

#        print("PREVIOUS POLICY: ", policy)

        self.file.write("PREVIOUS POLICY: \n")
        for state in policy.keys():
            self.file.write(state + "\n")
            pByAction = policy[state]
            self.file.write("p(a): " + str(pByAction) + "\n")

        for transition in transitions:
            state = to_board_state(transition['from_state'])

#            print("UPDATE POLICY:  FROM STATE")
#            print(game.to_display_string(transition['from_state']))
            self.file.write(game.to_display_string(transition['from_state']))

            astar = self.find_max_action(Q, state)
#            print("UPDATE POLICY:  ASTAR ==> ", astar)
            self.file.write("UPDATE POLICY:  ASTAR ==> " + str(astar) + "\n")

            possible_actions = get_actions(policy, state)
#            print("POSSIBLE ACTIONS: ", possible_actions)
            self.file.write("POSSIBLE ACTIONS: " + str(possible_actions) + "\n")

            for action in possible_actions:
                if action == astar:
                    policy[state][action] = 1.0 - epsilon + epsilon / len(possible_actions)
                else:
                    policy[state][action] = epsilon / len(possible_actions)

#            print("UPDATED POLICY ==> ", policy)
            self.file.write("UPDATED POLICY: " + str(policy) + "\n")

        return policy

예제 #2

파일 보기

def model_environment(opponent, state, action):

    game_complete = False
    initial_board = state

    file.write("AGENT MAKING MOVE: " + str(action) + str(board.to_state(action)) + "\n")

    current_board = p.add_move('X',action,initial_board)

    print("AFTER AGENT MOVE:")
    print(game.to_display_string(current_board))

    file.write("AFTER AGENT MOVE:\n")
    file.write(game.to_display_string(current_board))

    reward = 0.0

    if p.is_winner(current_board,'X'):
        game_complete = True
        reward = 1.0
    elif p.is_cat_game(current_board):
        game_complete = True
        reward = 0.0

    if not game_complete:
        # let the opponent make a move ...
        (opponent_id, opponent_move) = opponent.pick_next_move(current_board)

        current_board = p.add_move(opponent_id, opponent_move, current_board)

        print("AFTER OPPONENT MOVE")
        print(game.to_display_string(current_board))

        file.write("AFTER OPPONENT MOVE\n")
        file.write(game.to_display_string(current_board))

        if p.is_winner(current_board,opponent_id):
            game_complete = True
            reward = -1.0
        elif p.is_cat_game(current_board):
            game_complete = True
            reward = 0

    return current_board, reward, game_complete

예제 #3

파일 보기

def generate_tic_tac_toe_episode(policy):

    current_board = p.empty_board()

    opponent = GreedyRandomPlayer('O')

    game_complete = False
    transitions = []
    while not game_complete:

        previous_state = current_board

        print("PRIOR TO MOVE:")
        print(game.to_display_string(current_board))
        file.write("PRIOR TO MOVE:\n")
        file.write(game.to_display_string(current_board))

        selectedAction = sample_tic_tac_toe_policy(current_board, policy)

        # model the environment --> returns a next state and a reward
        next_state, reward, game_complete = model_environment(opponent, current_board, selectedAction)

        # append the episode
        transition = {}
        transition['from_state'] = current_board
        transition['to_state'] = next_state
        transition['action'] = selectedAction
        transition['reward'] = reward
        transitions.append(transition)

        file.write("ADDING TRANSITION: " + str(transition) + "\n")

        current_board = next_state

    # now figure out the reward
    print("BOARD AT END OF EPISODE")
    print(game.to_display_string(current_board))
    file.write("BOARD AT END OF EPISODE\n")
    file.write(game.to_display_string(current_board))

    return transitions

예제 #4

파일 보기

파일: episode.py 프로젝트: bcapozzi/tic-tac-toe

    def execute(self, initial_board):

        current_board = initial_board
        while not self.environment.is_completed():

            player_id, player_move = self.agent.pick_next_move(current_board)
            action = {}
            action['player'] = player_id
            action['cell'] = player_move

            updated_board, reward = self.environment.update(
                current_board, action)
            current_board = updated_board

        print("EPISODE TERMINATED --> OUTCOME: ",
              self.environment.get_outcome())
        print(game.to_display_string(current_board))
        return self.agent, current_board