コード例 #1
0
ファイル: run.py プロジェクト: srikirank/Machine-Learning
def main(argv):
    setpath()
    from q_learning import QLearning
    from grid_world import Grid

    # 5 rows, 4 cols, unreachables : [(1,1), (1,3)], pits : [(3,1)], goal : (4,3)
    g = Grid(5, 4, [(1, 1), (1, 3)], [(3, 1)], (4, 3))
    q = QLearning(g)
    q.learn()
コード例 #2
0
def run_simulation_441():
    for i in tqdm(range(N)):
        grid = Grid(X_GRID, Y_GRID, LIVES, True)
        while not grid.done:
            state = grid.current_state()  # The current position in the env
            action = random.choice(
                grid.actions[state])  # Choose a random valid action
            grid.move(*action)

        round_reward = cost_per_game - rewards_table_441[grid.score]
        global current_balance
        current_balance += round_reward

        # Append round profit and accumulated profit
        games_log.append([round_reward, current_balance])

        if show_live and not i % SHOW_EVERY:
            plot_graph(True, games_log)
コード例 #3
0
def initialize_round():
    global grid, screen, heart, pixels_per_tile, margin, old_lives

    pixels_per_tile = int(min(screen_h/Y_GRID,screen_w/X_GRID) * 3 / 4)
    margin = int(pixels_per_tile/20)

    size = [(pixels_per_tile+margin)*X_GRID + margin, (pixels_per_tile+margin)*Y_GRID+margin]
    # print(f"Grid Size: {X_GRID}x{Y_GRID}: ({size[0]}, {size[1]})px")

    setattr(Tile, 'PIXELS_PER_TILE', pixels_per_tile)
    setattr(Tile, 'MARGIN', margin)

    # screen = pygame.display.set_mode(SIZE, pygame.HWSURFACE | pygame.DOUBLEBUF | pygame.RESIZABLE| pygame.FULLSCREEN)
    screen = pygame.display.set_mode(size)

    grid = Grid(X_GRID,Y_GRID,LIVES)
    old_lives = LIVES
    print("Right Path: ", grid.path)

    heart = pygame.image.load(Tile.ASSETS['heart'])
    heart = pygame.transform.scale(heart,(int(pixels_per_tile/2),int(pixels_per_tile/2)))
コード例 #4
0
import numpy as np
import torch
from torch.optim import AdamW
from grid_world import Grid
from actor import Actor, Actor_Loss, choose_action
from critic import Critic, Critic_Loss

np.random.seed(1)

# training config
MAX_EPISODE = 450
Actor_lr = 1e-3
Critic_lr = 1e-3

# problem setting
grid = Grid()
grid.draw_board()
state_dim = 2
action_dim = 4

# init models
actor = Actor(input_dim=state_dim, output_dim=action_dim)
critic = Critic(input_dim=state_dim)
actor_opt = AdamW(actor.parameters(), lr=Actor_lr)
critic_opt = AdamW(critic.parameters(), lr=Critic_lr)

# init loss
a_loss = Actor_Loss()
c_loss = Critic_Loss()

for i_episode in range(MAX_EPISODE):