def main(): # env = gym.make("MountainCar-v0") env = ThreeDMountainCarEnv() act = deepq.load("mountaincar_model_working.pkl") while True: obs, done = env.reset(), False episode_rew = 0 while not done: # env.render() env.render_orthographic() obs, rew, done, _ = env.step(act(obs[None])[0]) print(act(obs[None])[0]) print(obs) episode_rew += rew print("Episode reward", episode_rew)
import gym import numpy as np from matplotlib import pyplot as plt import itertools from lib.env.threedmountain_car import ThreeDMountainCarEnv env = ThreeDMountainCarEnv() state = env.reset() for t in itertools.count(): # action = env.action_space.sample() next_state, reward, done, info = env.step(0) # env.render() # yellow # env.render_y() #cyan env.render_orthographic() if done: break state = next_state # if t == 100: # env.close_gui() # break