def main(argv): setpath() from q_learning import QLearning from grid_world import Grid # 5 rows, 4 cols, unreachables : [(1,1), (1,3)], pits : [(3,1)], goal : (4,3) g = Grid(5, 4, [(1, 1), (1, 3)], [(3, 1)], (4, 3)) q = QLearning(g) q.learn()
def run_simulation_441(): for i in tqdm(range(N)): grid = Grid(X_GRID, Y_GRID, LIVES, True) while not grid.done: state = grid.current_state() # The current position in the env action = random.choice( grid.actions[state]) # Choose a random valid action grid.move(*action) round_reward = cost_per_game - rewards_table_441[grid.score] global current_balance current_balance += round_reward # Append round profit and accumulated profit games_log.append([round_reward, current_balance]) if show_live and not i % SHOW_EVERY: plot_graph(True, games_log)
def initialize_round(): global grid, screen, heart, pixels_per_tile, margin, old_lives pixels_per_tile = int(min(screen_h/Y_GRID,screen_w/X_GRID) * 3 / 4) margin = int(pixels_per_tile/20) size = [(pixels_per_tile+margin)*X_GRID + margin, (pixels_per_tile+margin)*Y_GRID+margin] # print(f"Grid Size: {X_GRID}x{Y_GRID}: ({size[0]}, {size[1]})px") setattr(Tile, 'PIXELS_PER_TILE', pixels_per_tile) setattr(Tile, 'MARGIN', margin) # screen = pygame.display.set_mode(SIZE, pygame.HWSURFACE | pygame.DOUBLEBUF | pygame.RESIZABLE| pygame.FULLSCREEN) screen = pygame.display.set_mode(size) grid = Grid(X_GRID,Y_GRID,LIVES) old_lives = LIVES print("Right Path: ", grid.path) heart = pygame.image.load(Tile.ASSETS['heart']) heart = pygame.transform.scale(heart,(int(pixels_per_tile/2),int(pixels_per_tile/2)))
import numpy as np import torch from torch.optim import AdamW from grid_world import Grid from actor import Actor, Actor_Loss, choose_action from critic import Critic, Critic_Loss np.random.seed(1) # training config MAX_EPISODE = 450 Actor_lr = 1e-3 Critic_lr = 1e-3 # problem setting grid = Grid() grid.draw_board() state_dim = 2 action_dim = 4 # init models actor = Actor(input_dim=state_dim, output_dim=action_dim) critic = Critic(input_dim=state_dim) actor_opt = AdamW(actor.parameters(), lr=Actor_lr) critic_opt = AdamW(critic.parameters(), lr=Critic_lr) # init loss a_loss = Actor_Loss() c_loss = Critic_Loss() for i_episode in range(MAX_EPISODE):