def invoke_agent_env(params, alg): """Returns the wrapped env and string name of agent, then Use `eval(agent)` to activate it from main script """ if params.mode == "Atari": env = wrap_deepmind(make_atari("{}NoFrameskip-v4".format( params.env_name, skip_frame_k=params.skip_frame_k)), skip_frame_k=params.skip_frame_k) if params.debug_flg: agent = "{}_debug".format(alg) else: agent = "{}".format(alg) else: agent = "{}".format(alg) if params.mode == "CartPole": env = MyWrapper(gym.make("CartPole-v0")) elif params.mode == "CartPole-p": env = CartPole_Pixel(gym.make("CartPole-v0")) return agent, env
try: os.system("rm -rf {}".format(logdirs.log_DQfD)) except: pass parser = argparse.ArgumentParser() parser.add_argument("--mode", default="CartPole", help="game env type") parser.add_argument("--num_episodes", default=100, type=int, help="game env type") args = parser.parse_args() if args.mode == "CartPole": env = MyWrapper(gym.make("CartPole-v0")) elif args.mode == "Atari": env = wrap_deepmind(make_atari("PongNoFrameskip-v4")) params = Parameters(algo="DQfD", mode=args.mode) params.num_episodes = args.num_episodes replay_buffer = PrioritizedReplayBuffer( params.memory_size, alpha=params.prioritized_replay_alpha) Beta = AnnealingSchedule(start=params.prioritized_replay_beta_start, end=params.prioritized_replay_beta_end, decay_steps=params.decay_steps) agent = DQfD(args.mode, Model, Model, env.action_space.n, params, logdirs.model_DQN) if params.policy_fn == "Eps": Epsilon = AnnealingSchedule(start=params.epsilon_start, end=params.epsilon_end,
decay_type="linear") optimizer = tf.train.RMSPropOptimizer(anneal_lr.get_value(), 0.99, 0.0, 1e-6) if params.loss_fn == "huber": loss_fn = tf.losses.huber_loss elif params.loss_fn == "mse": loss_fn = tf.losses.mean_squared_error else: assert False, "Choose the loss_fn from either huber or mse" grad_clip_fn = gradient_clip_fn(flag=params.grad_clip_flg) summary_writer = tf.contrib.summary.create_file_writer(params.log_dir) now = datetime.now() if params.mode == "CartPole": env = MyWrapper(gym.make("CartPole-v0")) params.log_dir = "../../logs/logs/" + now.strftime( "%Y%m%d-%H%M%S") + "-DDDP/" params.model_dir = "../../logs/models/" + now.strftime( "%Y%m%d-%H%M%S") + "-DDDP/" agent = Double_DQN_cartpole(Model, optimizer, loss_fn, grad_clip_fn, env.action_space.n, params) elif params.mode == "CartPole-p": env = CartPole_Pixel(gym.make("CartPole-v0")) params.log_dir = "../../logs/logs/" + now.strftime( "%Y%m%d-%H%M%S") + "-DDDP-p/" params.model_dir = "../../logs/models/" + now.strftime( "%Y%m%d-%H%M%S") + "-DDDP-p/" agent = Double_DQN(Model_p, optimizer, loss_fn, grad_clip_fn, env.action_space.n, params)
default="../../logs/models/DQN/", help="directory for trained model") parser.add_argument("--debug_flg", default=False, type=bool, help="debug mode or not") parser.add_argument("--google_colab", default=False, type=bool, help="if you are executing this on GoogleColab") params = parser.parse_args() params.goal = 195 params.test_episodes = 10 if params.mode == "CartPole": env = MyWrapper(gym.make("CartPole-v0")) elif params.mode == "CartPole-p": env = CartPole_Pixel(gym.make("CartPole-v0")) if params.google_colab: # mount your drive on google colab from google.colab import drive drive.mount("/content/gdrive") params.log_dir = "/content/gdrive/My Drive/logs/logs/DQN/{}".format( params.env_name) params.model_dir = "/content/gdrive/My Drive/logs/models/DQN/{}".format( params.env_name) os.makedirs(params.log_dir) os.makedirs(params.model_dir) assert os.path.isdir(