def main(): env = gym.make("CartPole-v0") act = enhanceddeepq.learn(env, network='mlp', lr=1e-3, total_timesteps=100000, buffer_size=50000, exploration_fraction=0.1, exploration_final_eps=0.02, print_freq=10, callback=callback) print("Saving model to cartpole_model.pkl") act.save("cartpole_model.pkl")
def main(): env = gym.make("MountainCar-v0") # Enabling layer_norm here is import for parameter space noise! act = enhanceddeepq.learn(env, network=models.mlp(num_hidden=64, num_layers=1), lr=1e-3, total_timesteps=100000, buffer_size=50000, exploration_fraction=0.1, exploration_final_eps=0.1, print_freq=10, param_noise=True) print("Saving model to mountaincar_model.pkl") act.save("mountaincar_model.pkl")
def main(): env = gym.make("MountainCar-v0") act = enhanceddeepq.learn(env, network=models.mlp(num_layers=1, num_hidden=64), total_timesteps=0, load_path='mountaincar_model.pkl') while True: obs, done = env.reset(), False episode_rew = 0 while not done: env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew)
def main(): env = gym.make("CartPole-v0") act = enhanceddeepq.learn(env, network='mlp', total_timesteps=0, load_path="cartpole_model.pkl") while True: obs, done = env.reset(), False episode_rew = 0 while not done: env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew)
def main(): env = gym.make("PongNoFrameskip-v4") env = enhanceddeepq.wrap_atari_dqn(env) model = enhanceddeepq.learn( env, "conv_only", convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=True, total_timesteps=0 ) while True: obs, done = env.reset(), False episode_rew = 0 while not done: env.render() obs, rew, done, _ = env.step(model(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew)