Python Grid.set_state Examples

Programming Language: Python

Namespace/Package Name: grid_world

Class/Type: Grid

Method/Function: set_state

Examples at hotexamples.com: 4

Python Grid.set_state - 4 examples found. These are the top rated real world Python examples of grid_world.Grid.set_state extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

current_state(5)

move(5)

Grid(4)

set_state(4)

is_game_over(3)

draw_board(1)

game_over(1)

reset(1)

step(1)

Example #1

Show file

def play_game(grid: Grid, policy):
    # Reset game to start at a random position.
    # Need to do this becasue of the current deterministic policy
    # we would never end up at certain states
    start_states = list(grid.actions.keys())
    start_idx = np.random.choice(len(start_states))
    grid.set_state(start_states[start_idx])

    state = grid.current_state()
    states_and_rewards = [(state, 0)]  # List of tuples (state, reward)
    while not grid.is_game_over():
        action = policy[state]
        action = random_action(action)
        reward = grid.move(action)
        state = grid.current_state()
        states_and_rewards.append((state, reward))

    # Calculate returns, G, by working backwards from the terminal state
    G = 0
    states_and_returns = []
    first = True
    for state, reward in reversed(states_and_rewards):
        # Value of terminal state is 0 so ignore it. Can also ignore last G
        if first:
            first = False
        else:
            states_and_returns.append((state, G))
        G = reward + gamma*G
    
    states_and_returns.reverse()  # Order of states visited, which was reverse
    return states_and_returns

Example #2

Show file

def play_game(grid: Grid, policy):
    state = (2, 0)
    grid.set_state(state)
    action = random_action(policy[state])

    states_actions_rewards = [(state, action, 0)]
    while True:
        reward = grid.move(action)
        state = grid.current_state()
        if grid.is_game_over():
            states_actions_rewards.append((state, None, reward))
            break
        else:
            action = random_action(policy[state])
            states_actions_rewards.append((state, action, reward))
            
    # Calculate returns, G, by working backwards from the terminal state
    G = 0
    states_actions_returns = []
    first = True
    for state, action, reward in reversed(states_actions_rewards):
        # Value of terminal state is 0 so ignore it. Can also ignore last G
        if first:
            first = False
        else:
            states_actions_returns.append((state, action, G))
        G = reward + gamma*G
    
    states_actions_returns.reverse()  # Order of states visited, which was reverse
    return states_actions_returns

Example #3

Show file

File: td0_prediction.py Project: samik-saha/udemy-rl

def play_game(grid: Grid, policy: dict):
    # returns a list of states and corresponding rewards
    # start at the designated start state
    s = (2, 0)
    grid.set_state(s)
    states_and_rewards = [(s, 0)]  #list of tuples (state, reward)
    while not grid.game_over():
        a = policy[s]
        a = random_action(a)
        r = grid.move(a)
        s = grid.current_state()
        states_and_rewards.append((s, r))
    return states_and_rewards

Example #4

Show file

def play_game(grid: Grid, policy):
    # Reset game to start at a random position.
    # Need to do this becasue of the current deterministic policy
    # we would never end up at certain states
    start_states = list(grid.actions.keys())
    start_idx = np.random.choice(len(start_states))
    grid.set_state(start_states[start_idx])

    state = grid.current_state()
    action = np.random.choice(
        ALL_POSSIBLE_ACTIONS)  # First action is uniformly random

    states_actions_rewards = [(state, action, 0)]
    seen_states = set()
    seen_states.add(grid.current_state())
    num_steps = 0

    while True:
        reward = grid.move(action)
        num_steps += 1
        state = grid.current_state()

        if state in seen_states:
            r = -10. / num_steps
            # Hack so we don't end up in an infinitely long episode bumping into a wall
            states_actions_rewards.append((state, None, r))
            break
        elif grid.is_game_over():
            states_actions_rewards.append((state, None, reward))
            break
        else:
            action = policy[state]
            states_actions_rewards.append((state, action, reward))
        seen_states.add(state)

    # Calculate returns, G, by working backwards from the terminal state
    G = 0
    states_actions_returns = []
    first = True
    for state, action, reward in reversed(states_actions_rewards):
        # Value of terminal state is 0 so ignore it. Can also ignore last G
        if first:
            first = False
        else:
            states_actions_returns.append((state, action, G))
        G = reward + gamma * G

    states_actions_returns.reverse(
    )  # Order of states visited, which was reverse
    return states_actions_returns