Python Maze.reset 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: environment

클래스/타입: Maze

메소드/함수: reset

hotexamples.com에서의 예제들: 4

Python Maze.reset - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 environment.Maze.reset에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Maze(20)

reset(4)

render(3)

play(3)

after(3)

robot_position(2)

update_reward_matrix(1)

update_maze(1)

updateMaze(1)

step(1)

robotPosition(1)

print_maze(1)

mainloop(1)

is_game_over(1)

is_complete(1)

isGameOver(1)

get_state_and_reward(1)

getStateAndReward(1)

display(1)

win_all_final(1)

예제 #1

파일 보기

def trial(robot: Agent) -> List[int]:
    maze = Maze()
    move_history = []
    for i in range(5000):
        if i % 1000 == 0:
            print(i)
        while not maze.is_complete():
            state, _ = maze.get_state_and_reward()
            action = robot.choose_action(state, maze.allowed_states[state])
            maze.update_maze(action)
            state, reward = maze.get_state_and_reward()
            robot.update_state_history(state, reward)
            if maze.steps > 1000:
                maze.robot_position = State(5, 5)
        robot.learn()
        move_history.append(maze.steps)
        maze.reset()
    return move_history

예제 #2

파일 보기

class Tester():
    def __init__(self):             
        coins = generate_coins(testing_map.data)
        self.hero_position = generate_hero(testing_map.data)
        episodeSnapshot = EpisodeSnapshot('static/map/testing.json', coins, self.hero_position)

        self.game = Game(episodeSnapshot, True)
        self.env = Maze(episode_threshold=None)

    def on_coin_grabbed(self, maze_position):
        """Works as a callback when a coin is grabbed. A new one is generated based on this event.

        Args:
            maze_position (tuple): a position of the currently collected coin
        """
        coin = generate_coins(testing_map.data, count=1, grabbed_coin_position=maze_position)[0]

        self.game.append_coin(coin)
        self.env.update_reward_matrix()

    def test(self):
        """Testing process of the agent differs a bit from the training one. There isn't a explicit way of how to end the episode unless the agent steps out of the road, so coins are generated automatically in an infinite loop.
        """
        agent.load_pretrained_model()

        obs = self.env.reset(testing_map.data, self.hero_position, self.on_coin_grabbed)
        done = False
        actions = []
        reward_sum = 0

        visualization_done = False

        while not visualization_done:
            if not done:
                action = agent.choose_action(obs)
                next_obs, reward, done = self.env.step(action)
                reward_sum += reward

                obs = next_obs
                actions.append(action)

            if actions:
                visualization_done, _ = self.game.play(Move(actions.pop(0)))
            else:
                visualization_done, _ = self.game.play()

            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    visualization_done = True
                    done = True

        self.game.gameOver(reward_sum)

예제 #3

파일 보기

파일: maze.py 프로젝트: ashishm1999/Rover-Raspberry-pi

logging.basicConfig(level=logging.INFO,
                    format="%(levelname)s: %(asctime)s: %(message)s",
                    datefmt="%H:%M:%S")

maze = np.array([[0, 1, 0, 0, 0, 0, 0, 0], [0, 1, 0, 1, 0, 1, 0, 0],
                 [0, 0, 0, 1, 1, 0, 1, 0], [0, 1, 0, 1, 0, 0, 0, 0],
                 [1, 0, 0, 1, 0, 1, 0, 0], [0, 0, 0, 1, 0, 1, 1, 1],
                 [0, 1, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0,
                                            0]])  # 0 = free, 1 = occupied

game = Maze(maze)

if 0:  # only show the maze
    game.render("moves")
    game.reset()

if 0:  # play using random model
    model = RandomModel(game)
    model.train()

if 0:  # train using tabular Q-learning
    model = QTableModel(game, name="QTableModel")
    h, w, _, _ = model.train(discount=0.90,
                             exploration_rate=0.10,
                             learning_rate=0.10,
                             episodes=200)

if 0:  # train using tabular Q-learning and an eligibility trace (aka TD-lamba)
    model = QTableTraceModel(game)
    h, w, _, _ = model.train(discount=0.90,

예제 #4

파일 보기

def test_single(model, maze_width=11):
    maze_matrix = generate_prims_maze_matrix(maze_width)
    maze = Maze(maze_matrix)
    initial_cell = random.choice(maze.free_cells)
    maze.reset(initial_cell)
    return play_game(model, maze, initial_cell), maze