def testEvaluationOption(self): ray.init() agent = DQNAgent(env="CartPole-v0", config={"evaluation_interval": 2}) r0 = agent.train() r1 = agent.train() r2 = agent.train() r3 = agent.train() r4 = agent.train() self.assertTrue("evaluation" in r0) self.assertTrue("episode_reward_mean" in r0["evaluation"]) self.assertEqual(r0["evaluation"], r1["evaluation"]) self.assertNotEqual(r1["evaluation"], r2["evaluation"]) self.assertEqual(r2["evaluation"], r3["evaluation"]) self.assertNotEqual(r3["evaluation"], r4["evaluation"])
def testTrainCartpoleOffPolicy(self): register_env( "test3", lambda _: PartOffPolicyServing(gym.make("CartPole-v0"), off_pol_frac=0.2)) dqn = DQNAgent(env="test3", config={"exploration_fraction": 0.001}) for i in range(100): result = dqn.train() print("Iteration {}, reward {}, timesteps {}".format( i, result["episode_reward_mean"], result["timesteps_total"])) if result["episode_reward_mean"] >= 100: return raise Exception("failed to improve reward")
def testTrainCartpoleOffPolicy(self): register_env( "test3", lambda _: PartOffPolicyServing( gym.make("CartPole-v0"), off_pol_frac=0.2)) dqn = DQNAgent(env="test3", config={"exploration_fraction": 0.001}) for i in range(100): result = dqn.train() print("Iteration {}, reward {}, timesteps {}".format( i, result["episode_reward_mean"], result["timesteps_total"])) if result["episode_reward_mean"] >= 100: return raise Exception("failed to improve reward")
"num_gpus": 1, "hiddens": [], "schedule_max_timesteps": 7500000, "timesteps_per_iteration": 4000, "exploration_fraction": 0.8, "exploration_final_eps": 0.02, "lr": 1e-3, "model": { "custom_model": "parametric", "custom_options": {}, # extra options to pass to your model } }) # Attempt to restore from checkpoint if possible. if os.path.exists(CHECKPOINT_FILE): checkpoint_path = open(CHECKPOINT_FILE).read() print("Restoring from checkpoint path", checkpoint_path) dqn.restore(checkpoint_path) # run the new command using the given tracer # make a report, placing output in the current directory # Serving and training loop while True: print(pretty_print(dqn.train())) checkpoint_path = dqn.save() print("Last checkpoint", checkpoint_path) with open(CHECKPOINT_FILE, "w") as f: f.write(checkpoint_path)
"timesteps_per_iteration": 200, "env_config": { "observation_size": args.observation_size, "action_size": args.action_size, }, }) elif args.run == "PG": agent = PGAgent( env="srv", config={ "num_workers": 0, "env_config": { "observation_size": args.observation_size, "action_size": args.action_size, }, }) # Attempt to restore from checkpoint if possible. if os.path.exists(args.checkpoint_file): checkpoint_file = open(args.checkpoint_file).read() print("Restoring from checkpoint path", checkpoint_file) agent.restore(checkpoint_file) # Serving and training loop while True: print(pretty_print(agent.train())) checkpoint_file = agent.save() print("Last checkpoint", checkpoint_file) with open(args.checkpoint_file, "w") as f: f.write(checkpoint_file)