def init_cem(env, nb_actions): """ Initialize the CEM agent using the keras-rl package. :param env: the environment to be played, required to determine the input size :param nb_actions: number of actions :return: CEM agent """ # Option 2: deep network model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) model.add(Dense(512)) model.add(Activation('relu')) model.add(Dense(256)) model.add(Activation('relu')) model.add(Dense(64)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('softmax')) # compile agent memory = EpisodeParameterMemory(limit=1000, window_length=1) cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05) cem.model_name = "CEM" cem.compile() return cem
def main(args): CUDA = torch.cuda.is_available() OUTPUT_RESULTS_DIR = './saver' ENVIRONMENT = 'CartPole-v0' TIMESTAMP = datetime.now().strftime("%Y%m%d-%H%M%S") SUMMARY_DIR = os.path.join(OUTPUT_RESULTS_DIR, "CEM", ENVIRONMENT, TIMESTAMP) env = gym.make(ENVIRONMENT) env = wrappers.Monitor(env, SUMMARY_DIR, force=True) state_dim = env.observation_space.shape[0] action_dim = env.action_space.n actor = ActorNetwork(state_dim, action_dim) if CUDA: actor = actor.cuda() agent = CEMAgent(actor, action_dim, batch_size=args.batch_size, ep_len = args.episode_len, elite_frac=args.elite_frac, noise_decay_const=args.noise_decay_const, noise_ampl=args.noise_ampl) if args.is_train: agent.train(env) agent.save_actor_weights(save_dir=OUTPUT_RESULTS_DIR, filename=args.actor_weights) else: agent.load_actor_weights(save_dir=OUTPUT_RESULTS_DIR, filename=args.actor_weights) agent.test(env)
def _build_model(self): model = Sequential() model.add(Flatten(input_shape=(1,) + (self.window * 2,))) model.add(Dense(3)) model.add(Activation('softmax')) memory = EpisodeParameterMemory(limit=1000, window_length=1) cem = CEMAgent(model=model, nb_actions=3, memory=memory, batch_size=50, nb_steps_warmup=1000, train_interval=50, elite_frac=0.05) cem.compile() return cem
def test_cem(): env = TwoRoundDeterministicRewardEnv() np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.n # Next, we build a very simple model. model = Sequential() model.add(Dense(16, input_shape=(1, ))) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) memory = EpisodeParameterMemory(limit=1000, window_length=1) dqn = CEMAgent(model=model, nb_actions=nb_actions, memory=memory) dqn.compile() dqn.fit(env, nb_steps=2000, visualize=False, verbose=1) h = dqn.test(env, nb_episodes=20, visualize=False) assert_allclose(np.mean(h.history['episode_reward']), 3.)
def test_cem(): env = TwoRoundDeterministicRewardEnv() np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.n # Next, we build a very simple model. model = Sequential() model.add(Dense(16, input_shape=(1,))) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) memory = EpisodeParameterMemory(limit=1000, window_length=1) dqn = CEMAgent(model=model, nb_actions=nb_actions, memory=memory) dqn.compile() dqn.fit(env, nb_steps=2000, visualize=False, verbose=1) h = dqn.test(env, nb_episodes=20, visualize=False) assert_allclose(np.mean(h.history['episode_reward']), 3.)
#model.add(Activation('relu')) #model.add(Dense(1)) #model.add(Activation('relu')) #model.add(Dense(nb_actions)) #model.add(Activation('softmax')) print(model.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = EpisodeParameterMemory(limit=1000, window_length=1) cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=1, nb_steps_warmup=10, train_interval=10, elite_frac=0.5) cem.compile() # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. cem.fit(env, nb_steps=100000, visualize=False, verbose=2) # After training is done, we save the best weights. cem.save_weights('cem_{}_params.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. cem.test(env, nb_episodes=10)