def key_press(k, mod): global restart global a if k == key.R: restart = True if k == key.UP: a = 0 if k == key.DOWN: a = 1 if k == key.LEFT: a = 2 if k == key.RIGHT: a = 3 env.render() env.viewer.window.on_key_press = key_press else: size = (args.dim + 2) * args.zoom model = DQN(size, size, batch_norm=True) model.load_state_dict(torch.load(args.filename)) policy = PurePolicy(model) try: while True: state = env.reset() total_reward = 0.0 steps = 0 restart = False while True: pyglet.clock.tick() if (policy is not None): state_ten = tensorize(state) a = policy.get(state_ten) state, r, done, info = env.step(a) total_reward += r steps += 1
env = gym.make(f'{game}-lvl{lvl}-v0') env.reset() device = find_device() init_screen = get_screen(env, device) _, _, screen_height, screen_width = init_screen.shape n_actions = env.action_space.n LINEAR_INPUT_SCALAR = 8 KERNEL = 5 init_model = [ screen_height, screen_width, LINEAR_INPUT_SCALAR, KERNEL, n_actions ] win_factor = 100 model = DQN(*init_model) model.load_state_dict(torch.load('saved_models/torch_model_0-1-1-1-1-1')) current_screen = get_screen(env, device) state = current_screen stop_after = 1000 sum_score = 0 won = 0 key_found = 0 for lvl in range(7, 8): level_name = f'{game}-lvl{lvl}-v0' print(level_name) env = gym.make(level_name)