コード例 #1
0
def play_game(grid: Grid, policy):
    state = (2, 0)
    grid.set_state(state)
    action = random_action(policy[state])

    states_actions_rewards = [(state, action, 0)]
    while True:
        reward = grid.move(action)
        state = grid.current_state()
        if grid.is_game_over():
            states_actions_rewards.append((state, None, reward))
            break
        else:
            action = random_action(policy[state])
            states_actions_rewards.append((state, action, reward))
            
    # Calculate returns, G, by working backwards from the terminal state
    G = 0
    states_actions_returns = []
    first = True
    for state, action, reward in reversed(states_actions_rewards):
        # Value of terminal state is 0 so ignore it. Can also ignore last G
        if first:
            first = False
        else:
            states_actions_returns.append((state, action, G))
        G = reward + gamma*G
    
    states_actions_returns.reverse()  # Order of states visited, which was reverse
    return states_actions_returns
コード例 #2
0
ファイル: td0_prediction.py プロジェクト: samik-saha/udemy-rl
def play_game(grid: Grid, policy: dict):
    # returns a list of states and corresponding rewards
    # start at the designated start state
    s = (2, 0)
    grid.set_state(s)
    states_and_rewards = [(s, 0)]  #list of tuples (state, reward)
    while not grid.game_over():
        a = policy[s]
        a = random_action(a)
        r = grid.move(a)
        s = grid.current_state()
        states_and_rewards.append((s, r))
    return states_and_rewards
コード例 #3
0
ファイル: run.py プロジェクト: srikirank/Machine-Learning
def main(argv):
    setpath()
    from q_learning import QLearning
    from grid_world import Grid

    # 5 rows, 4 cols, unreachables : [(1,1), (1,3)], pits : [(3,1)], goal : (4,3)
    g = Grid(5, 4, [(1, 1), (1, 3)], [(3, 1)], (4, 3))
    q = QLearning(g)
    q.learn()
コード例 #4
0
def run_simulation_441():
    for i in tqdm(range(N)):
        grid = Grid(X_GRID, Y_GRID, LIVES, True)
        while not grid.done:
            state = grid.current_state()  # The current position in the env
            action = random.choice(
                grid.actions[state])  # Choose a random valid action
            grid.move(*action)

        round_reward = cost_per_game - rewards_table_441[grid.score]
        global current_balance
        current_balance += round_reward

        # Append round profit and accumulated profit
        games_log.append([round_reward, current_balance])

        if show_live and not i % SHOW_EVERY:
            plot_graph(True, games_log)
コード例 #5
0
def play_game(grid: Grid, policy):
    # Reset game to start at a random position.
    # Need to do this becasue of the current deterministic policy
    # we would never end up at certain states
    start_states = list(grid.actions.keys())
    start_idx = np.random.choice(len(start_states))
    grid.set_state(start_states[start_idx])

    state = grid.current_state()
    states_and_rewards = [(state, 0)]  # List of tuples (state, reward)
    while not grid.is_game_over():
        action = policy[state]
        action = random_action(action)
        reward = grid.move(action)
        state = grid.current_state()
        states_and_rewards.append((state, reward))

    # Calculate returns, G, by working backwards from the terminal state
    G = 0
    states_and_returns = []
    first = True
    for state, reward in reversed(states_and_rewards):
        # Value of terminal state is 0 so ignore it. Can also ignore last G
        if first:
            first = False
        else:
            states_and_returns.append((state, G))
        G = reward + gamma*G
    
    states_and_returns.reverse()  # Order of states visited, which was reverse
    return states_and_returns
コード例 #6
0
def play_game(grid: Grid, policy):
    # Reset game to start at a random position.
    # Need to do this becasue of the current deterministic policy
    # we would never end up at certain states
    start_states = list(grid.actions.keys())
    start_idx = np.random.choice(len(start_states))
    grid.set_state(start_states[start_idx])

    state = grid.current_state()
    action = np.random.choice(
        ALL_POSSIBLE_ACTIONS)  # First action is uniformly random

    states_actions_rewards = [(state, action, 0)]
    seen_states = set()
    seen_states.add(grid.current_state())
    num_steps = 0

    while True:
        reward = grid.move(action)
        num_steps += 1
        state = grid.current_state()

        if state in seen_states:
            r = -10. / num_steps
            # Hack so we don't end up in an infinitely long episode bumping into a wall
            states_actions_rewards.append((state, None, r))
            break
        elif grid.is_game_over():
            states_actions_rewards.append((state, None, reward))
            break
        else:
            action = policy[state]
            states_actions_rewards.append((state, action, reward))
        seen_states.add(state)

    # Calculate returns, G, by working backwards from the terminal state
    G = 0
    states_actions_returns = []
    first = True
    for state, action, reward in reversed(states_actions_rewards):
        # Value of terminal state is 0 so ignore it. Can also ignore last G
        if first:
            first = False
        else:
            states_actions_returns.append((state, action, G))
        G = reward + gamma * G

    states_actions_returns.reverse(
    )  # Order of states visited, which was reverse
    return states_actions_returns
コード例 #7
0
def initialize_round():
    global grid, screen, heart, pixels_per_tile, margin, old_lives

    pixels_per_tile = int(min(screen_h/Y_GRID,screen_w/X_GRID) * 3 / 4)
    margin = int(pixels_per_tile/20)

    size = [(pixels_per_tile+margin)*X_GRID + margin, (pixels_per_tile+margin)*Y_GRID+margin]
    # print(f"Grid Size: {X_GRID}x{Y_GRID}: ({size[0]}, {size[1]})px")

    setattr(Tile, 'PIXELS_PER_TILE', pixels_per_tile)
    setattr(Tile, 'MARGIN', margin)

    # screen = pygame.display.set_mode(SIZE, pygame.HWSURFACE | pygame.DOUBLEBUF | pygame.RESIZABLE| pygame.FULLSCREEN)
    screen = pygame.display.set_mode(size)

    grid = Grid(X_GRID,Y_GRID,LIVES)
    old_lives = LIVES
    print("Right Path: ", grid.path)

    heart = pygame.image.load(Tile.ASSETS['heart'])
    heart = pygame.transform.scale(heart,(int(pixels_per_tile/2),int(pixels_per_tile/2)))
コード例 #8
0
import numpy as np
import torch
from torch.optim import AdamW
from grid_world import Grid
from actor import Actor, Actor_Loss, choose_action
from critic import Critic, Critic_Loss

np.random.seed(1)

# training config
MAX_EPISODE = 450
Actor_lr = 1e-3
Critic_lr = 1e-3

# problem setting
grid = Grid()
grid.draw_board()
state_dim = 2
action_dim = 4

# init models
actor = Actor(input_dim=state_dim, output_dim=action_dim)
critic = Critic(input_dim=state_dim)
actor_opt = AdamW(actor.parameters(), lr=Actor_lr)
critic_opt = AdamW(critic.parameters(), lr=Critic_lr)

# init loss
a_loss = Actor_Loss()
c_loss = Critic_Loss()

for i_episode in range(MAX_EPISODE):