def init_cem(env, nb_actions):
    """ Initialize the CEM agent using the keras-rl package.

    :param env: the environment to be played, required to determine the input size
    :param nb_actions: number of actions
    :return: CEM agent
    """

    # Option 2: deep network
    model = Sequential()
    model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('softmax'))

    # compile agent
    memory = EpisodeParameterMemory(limit=1000, window_length=1)

    cem = CEMAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   batch_size=50,
                   nb_steps_warmup=2000,
                   train_interval=50,
                   elite_frac=0.05)
    cem.model_name = "CEM"
    cem.compile()
    return cem
コード例 #2
0
def main(args):
	CUDA = torch.cuda.is_available()
	OUTPUT_RESULTS_DIR = './saver'
	ENVIRONMENT = 'CartPole-v0'
	TIMESTAMP = datetime.now().strftime("%Y%m%d-%H%M%S")
	SUMMARY_DIR = os.path.join(OUTPUT_RESULTS_DIR, "CEM", ENVIRONMENT, TIMESTAMP)
	
	env = gym.make(ENVIRONMENT)
	env = wrappers.Monitor(env, SUMMARY_DIR, force=True)
	state_dim = env.observation_space.shape[0]
	action_dim = env.action_space.n

	actor = ActorNetwork(state_dim, action_dim)

	if CUDA: 
		actor = actor.cuda()


	agent = CEMAgent(actor, action_dim, batch_size=args.batch_size, ep_len = args.episode_len,
				 elite_frac=args.elite_frac, noise_decay_const=args.noise_decay_const, noise_ampl=args.noise_ampl)

	if args.is_train:
		agent.train(env)
		agent.save_actor_weights(save_dir=OUTPUT_RESULTS_DIR, filename=args.actor_weights)	
	else:
		agent.load_actor_weights(save_dir=OUTPUT_RESULTS_DIR, filename=args.actor_weights)
		agent.test(env)
コード例 #3
0
ファイル: djq_agent.py プロジェクト: superdjq/djq_quant
    def _build_model(self):
        model = Sequential()
        model.add(Flatten(input_shape=(1,) + (self.window * 2,)))
        model.add(Dense(3))
        model.add(Activation('softmax'))

        memory = EpisodeParameterMemory(limit=1000, window_length=1)

        cem = CEMAgent(model=model, nb_actions=3, memory=memory,
                       batch_size=50, nb_steps_warmup=1000, train_interval=50, elite_frac=0.05)
        cem.compile()
        return cem
コード例 #4
0
def test_cem():
    env = TwoRoundDeterministicRewardEnv()
    np.random.seed(123)
    env.seed(123)
    random.seed(123)
    nb_actions = env.action_space.n

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Dense(16, input_shape=(1, )))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))

    memory = EpisodeParameterMemory(limit=1000, window_length=1)
    dqn = CEMAgent(model=model, nb_actions=nb_actions, memory=memory)
    dqn.compile()

    dqn.fit(env, nb_steps=2000, visualize=False, verbose=1)
    h = dqn.test(env, nb_episodes=20, visualize=False)
    assert_allclose(np.mean(h.history['episode_reward']), 3.)
コード例 #5
0
def test_cem():
    env = TwoRoundDeterministicRewardEnv()
    np.random.seed(123)
    env.seed(123)
    random.seed(123)
    nb_actions = env.action_space.n

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Dense(16, input_shape=(1,)))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))

    memory = EpisodeParameterMemory(limit=1000, window_length=1)
    dqn = CEMAgent(model=model, nb_actions=nb_actions, memory=memory)
    dqn.compile()

    dqn.fit(env, nb_steps=2000, visualize=False, verbose=1)
    h = dqn.test(env, nb_episodes=20, visualize=False)
    assert_allclose(np.mean(h.history['episode_reward']), 3.)
コード例 #6
0
    #model.add(Activation('relu'))
    #model.add(Dense(1))
    #model.add(Activation('relu'))
    #model.add(Dense(nb_actions))
    #model.add(Activation('softmax'))

    print(model.summary())

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = EpisodeParameterMemory(limit=1000, window_length=1)

    cem = CEMAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   batch_size=1,
                   nb_steps_warmup=10,
                   train_interval=10,
                   elite_frac=0.5)
    cem.compile()

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.
    cem.fit(env, nb_steps=100000, visualize=False, verbose=2)

    # After training is done, we save the best weights.
    cem.save_weights('cem_{}_params.h5f'.format(ENV_NAME), overwrite=True)

    # Finally, evaluate our algorithm for 5 episodes.
    cem.test(env, nb_episodes=10)