Esempio n. 1
0
    def configure(self, observation_space_shape, nb_actions):
        if self.opts.model_type == 1:
            # Option 1 : Simple model
            model = Sequential()
            model.add(Flatten(input_shape=(1, ) + observation_space_shape))
            model.add(Dense(nb_actions))
            model.add(Activation('softmax'))
            print(model.summary())
        elif self.opts.model_type == 2:
            # Option 2: deep network
            model = Sequential()
            model.add(Flatten(input_shape=(1, ) + observation_space_shape))
            model.add(Dense(16))
            model.add(Activation('relu'))
            model.add(Dense(16))
            model.add(Activation('relu'))
            model.add(Dense(16))
            model.add(Activation('relu'))
            model.add(Dense(nb_actions))
            model.add(Activation('softmax'))
            print(model.summary())

        # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
        # even the metrics!
        memory = EpisodeParameterMemory(limit=1000, window_length=1)

        self.agent = CEMAgent(model=model,
                              nb_actions=nb_actions,
                              memory=memory,
                              batch_size=50,
                              nb_steps_warmup=2000,
                              train_interval=50,
                              elite_frac=0.05)
        self.agent.compile()
class KerasCEMAgent(object):
	'''
	The cross-entropy method Learning Agent as described in http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.81.6579&rep=rep1&type=pdf
	'''

	def __init__(self, opts):
		self.metadata = {
			'discrete_actions': True,
		}

		self.opts = opts

	def configure(self, observation_space_shape, nb_actions):
		if self.opts.model_type == 1:
			# Option 1 : Simple model
			model = Sequential()
			model.add(Flatten(input_shape=(1,) + observation_space_shape))
			model.add(Dense(nb_actions))
			model.add(Activation('softmax'))
			print(model.summary())
		elif self.opts.model_type == 2:
			# Option 2: deep network
			model = Sequential()
			model.add(Flatten(input_shape=(1,) + observation_space_shape))
			model.add(Dense(16))
			model.add(Activation('relu'))
			model.add(Dense(16))
			model.add(Activation('relu'))
			model.add(Dense(16))
			model.add(Activation('relu'))
			model.add(Dense(nb_actions))
			model.add(Activation('softmax'))
			print(model.summary())

		# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
		# even the metrics!
		memory = EpisodeParameterMemory(limit=1000, window_length=1)

		self.agent = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
							  batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05)
		self.agent.compile()

	def train(self, env, nb_steps, visualize, verbosity):
		# Okay, now it's time to learn something! We visualize the training here for show, but this
		# slows down training quite a lot. You can always safely abort the training prematurely using
		# Ctrl + C.
		self.agent.fit(env, nb_steps=nb_steps, visualize=visualize, verbose=verbosity)

	def test(self, env, nb_episodes, visualize):
		# Finally, evaluate our algorithm for 5 episodes.
		self.agent.test(env, nb_episodes=nb_episodes, visualize=visualize)

	def load_weights(self, load_file):
		self.agent.load_weights(load_file)

	def save_weights(self, save_file, overwrite):
		# After training is done, we save the best weights.
		self.agent.save_weights(save_file, overwrite=overwrite)
Esempio n. 3
0
 def __init__(self, env, timesteps_per_episode=10001):
     super().__init__(env, timesteps_per_episode)
     self.evaluating = False
     self.action_size = env.action_space.n
     self.state_size = env.num_states
     self.model = self._build_compile_model()
     memory = EpisodeParameterMemory(limit=1000, window_length=1)
     self.agent = CEMAgent(model=self.model, nb_actions=self.action_size, memory=memory, batch_size=50,
                           nb_steps_warmup=2000, train_interval=50, elite_frac=0.05)
Esempio n. 4
0
def main(env_name, nb_steps):
    # Get the environment and extract the number of actions.
    env = gym.make(env_name)
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    input_shape = (1, ) + env.observation_space.shape
    model = create_nn_model(input_shape, nb_actions)

    # Finally, we configure and compile our agent.
    memory = EpisodeParameterMemory(limit=450, window_length=1)

    agent = CEMAgent(model=model,
                     nb_actions=nb_actions,
                     memory=memory,
                     batch_size=50,
                     nb_steps_warmup=2000,
                     train_interval=50,
                     elite_frac=0.05)
    agent.compile()
    agent.fit(env, nb_steps=nb_steps, visualize=False, verbose=1)

    # After training is done, we save the best weights.
    agent.save_weights('cem_{}_params.h5f'.format(env_name), overwrite=True)

    # Finally, evaluate the agent
    history = agent.test(env, nb_episodes=100, visualize=False)
    rewards = np.array(history.history['episode_reward'])
    print(("Test rewards (#episodes={}): mean={:>5.2f}, std={:>5.2f}, "
           "min={:>5.2f}, max={:>5.2f}").format(len(rewards), rewards.mean(),
                                                rewards.std(), rewards.min(),
                                                rewards.max()))
Esempio n. 5
0
def main(env_name, nb_steps):
    # Get the environment and extract the number of actions.
    env = gym.make(env_name)
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    input_shape = (1,) + env.observation_space.shape
    model = create_nn_model(input_shape, nb_actions)

    # Finally, we configure and compile our agent.
    memory = EpisodeParameterMemory(limit=450, window_length=1)

    agent = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
                     batch_size=50, nb_steps_warmup=2000, train_interval=50,
                     elite_frac=0.05)
    agent.compile()
    agent.fit(env, nb_steps=nb_steps, visualize=False, verbose=1)

    # After training is done, we save the best weights.
    agent.save_weights('cem_{}_params.h5f'.format(env_name), overwrite=True)

    # Finally, evaluate the agent
    history = agent.test(env, nb_episodes=100, visualize=False)
    rewards = np.array(history.history['episode_reward'])
    print(("Test rewards (#episodes={}): mean={:>5.2f}, std={:>5.2f}, "
           "min={:>5.2f}, max={:>5.2f}")
          .format(len(rewards),
                  rewards.mean(),
                  rewards.std(),
                  rewards.min(),
                  rewards.max()))
Esempio n. 6
0
    def reinforce_train_cem(self,
                            steps=60000,
                            visualize=False,
                            verbose=1,
                            nb_steps_warmup=10000,
                            save_path=r"D:\Data\markets\weights",
                            save_weights_name="cem_CADJPY_weights.h5f",
                            log_interval=1000):
        memory = EpisodeParameterMemory(limit=200, window_length=1)
        nb_actions = self.env.action_space.n

        agent = CEMAgent(
            model=self.model,
            nb_actions=nb_actions,
            memory=memory,
            nb_steps_warmup=nb_steps_warmup,
            processor=MultiInputProcessor(nb_inputs=len(self.model.inputs)))
        agent.compile()
        agent.fit(self.env,
                  nb_steps=steps,
                  visualize=visualize,
                  verbose=verbose,
                  log_interval=log_interval)

        pathlib.Path(save_path).mkdir(parents=True, exist_ok=True)
        file_path = os.path.join(save_path, save_weights_name)
        agent.save_weights(filepath=file_path, overwrite=True)
Esempio n. 7
0
def create_cem_agent(env):
    ''' create cem agent '''
    env = create_environment()
    model = create_deep_model(env)
    nb_actions = env.action_space.n
    memory = EpisodeParameterMemory(limit=1000, window_length=1)
    cem = CEMAgent(
        model=model,
        nb_actions=nb_actions,
        memory=memory,
        batch_size=50,
        nb_steps_warmup=2000,
        train_interval=50,
        elite_frac=0.05)
    cem.compile()
    return cem
Esempio n. 8
0
def main():
    """Build model and train on environment."""
    env = MarketEnv(("ES", "FUT", "GLOBEX", "USD"), obs_xform=xform.BinaryDelta(3), episode_steps=STEPS_PER_EPISODE, client_id=3)
    #env = MarketEnv(("AAPL", "STK", "SMART", "USD"), obs_xform=xform.BinaryDelta(3), episode_steps=STEPS_PER_EPISODE, client_id=4)
    nb_actions = 3      # Keras-RL CEM is a discrete agent

    # Option 1 : Simple model
    model = Sequential([
        Flatten(input_shape=(1,) + env.observation_space.shape),
        Dense(nb_actions),
        Activation('softmax')
    ])

    # Option 2: deep network
    # hidden_nodes = reduce(operator.imul, env.observation_space.shape, 1)
    # model = Sequential([
    #     Flatten(input_shape=(1,) + env.observation_space.shape),
    #     Dense(hidden_nodes),
    #     Activation('relu'),
    #     Dense(hidden_nodes),
    #     Activation('relu'),
    #     Dense(hidden_nodes),
    #     Activation('relu'),
    #     Dense(nb_actions),
    #     Activation('softmax')
    # ])

    print(model.summary())

    param_logger = CEMParamLogger('cem_{}_params.json'.format(env.instrument.symbol))
    callbacks = [
        param_logger,
        FileLogger('cem_{}_log.json'.format(env.instrument.symbol), interval=STEPS_PER_EPISODE)
    ]

    theta_init = param_logger.read_params()     # Start with last saved params if present
    if theta_init is not None:
        print('Starting with parameters from {}:\n{}'.format(param_logger.params_filename, theta_init))

    memory = EpisodeParameterMemory(limit=EPISODES, window_length=1)        # Remember the parameters and rewards for the last `limit` episodes.
    cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=EPISODES, nb_steps_warmup=WARMUMP_EPISODES * STEPS_PER_EPISODE, train_interval=TRAIN_INTERVAL_EPISODES, elite_frac=0.2, theta_init=theta_init, processor=DiscreteProcessor(), noise_decay_const=0, noise_ampl=0)
    """
    :param memory: Remembers the parameters and rewards for the last `limit` episodes.
    :param int batch_size: Randomly sample this many episode parameters from memory before taking the top `elite_frac` to construct the next gen parameters from.
    :param int nb_steps_warmup: Run for this many steps (total) to fill memory before training
    :param int train_interval: Train (update parameters) every this many episodes
    :param float elite_frac: Take this top fraction of the `batch_size` randomly sampled parameters from the episode memory to construct new parameters.
    """
    cem.compile()
    cem.fit(env, nb_steps=STEPS_PER_EPISODE * EPISODES, visualize=True, verbose=2, callbacks=callbacks)
    cem.save_weights('cem_{}_weights.h5f'.format(env.instrument.symbol), overwrite=True)
Esempio n. 9
0
    def train_implementation(self, train_context: core.CemTrainContext):
        assert train_context
        cc: core.CemTrainContext = train_context
        train_env = self._create_env()
        keras_model = self._create_model(gym_env=train_env,
                                         activation='softmax')

        policy_buffer_size = 5 * cc.num_episodes_per_iteration
        self.log_api(f'EpisodeParameterMemory',
                     f'(limit={policy_buffer_size}, window_length=1)')
        memory = EpisodeParameterMemory(limit=policy_buffer_size,
                                        window_length=1)
        num_actions = train_env.action_space.n
        self.log_api(f'CEMAgent', f'(model=..., nb_actions={num_actions}, memory=..., ' + \
                     f'nb_steps_warmup={cc.num_steps_buffer_preload}, ' + \
                     f'train_interval={cc.num_episodes_per_iteration}, ' + \
                     f'batch_size={cc.num_episodes_per_iteration}, ' + \
                     f'elite_frac={cc.elite_set_fraction})')
        self._agent = CEMAgent(model=keras_model,
                               nb_actions=num_actions,
                               memory=memory,
                               nb_steps_warmup=cc.num_steps_buffer_preload,
                               batch_size=cc.num_episodes_per_iteration,
                               train_interval=cc.num_episodes_per_iteration,
                               elite_frac=cc.elite_set_fraction)
        self.log_api(f'agent.compile', '()')
        self._agent.compile()
        nb_steps = cc.num_iterations * cc.num_episodes_per_iteration * cc.max_steps_per_episode
        callback = KerasRlCemAgent.CemCallback(self, cc, nb_steps)
        self.on_train_iteration_begin()
        self.log_api(f'agent.fit', f'(train_env, nb_steps={nb_steps})')
        self._agent.fit(train_env,
                        nb_steps=nb_steps,
                        visualize=False,
                        verbose=0,
                        callbacks=[callback])
        if not cc.training_done:
            self.on_train_iteration_end(math.nan)
Esempio n. 10
0
def create(env):
    np.random.seed(config.current.domain_seed)
    env.seed(config.current.domain_seed)
    nb_actions = env.action_space.n

    obs_dim = env.observation_space.shape[0]

    # Option 1 : Simple model
    #model = Sequential()
    #model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    #model.add(Dense(nb_actions))
    #model.add(Activation('softmax'))

    # Option 2: deep network
    model = Sequential()
    model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('softmax'))

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = EpisodeParameterMemory(limit=1000, window_length=1)

    cem = CEMAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   batch_size=50,
                   nb_steps_warmup=2000,
                   train_interval=50,
                   elite_frac=0.05)
    cem.compile()
    return cem
Esempio n. 11
0
def test_single_cem_input():
    model = Sequential()
    model.add(Flatten(input_shape=(2, 3)))
    model.add(Dense(2))

    memory = EpisodeParameterMemory(limit=10, window_length=2)
    agent = CEMAgent(model, memory=memory, nb_actions=2, nb_steps_warmup=5, batch_size=4, train_interval=50)
    agent.compile()
    agent.fit(MultiInputTestEnv((3,)), nb_steps=100)
Esempio n. 12
0
def test_multi_cem_input():
    input1 = Input(shape=(2, 3))
    input2 = Input(shape=(2, 4))
    x = Concatenate()([input1, input2])
    x = Flatten()(x)
    x = Dense(2)(x)
    model = Model(inputs=[input1, input2], outputs=x)

    memory = EpisodeParameterMemory(limit=10, window_length=2)
    processor = MultiInputProcessor(nb_inputs=2)
    agent = CEMAgent(model, memory=memory, nb_actions=2, nb_steps_warmup=5, batch_size=4,
                     processor=processor, train_interval=50)
    agent.compile()
    agent.fit(MultiInputTestEnv([(3,), (4,)]), nb_steps=100)
Esempio n. 13
0
print(env.observation_space.shape)
# # Option 1 : Simple model
# model = Sequential()
# model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
# model.add(Dense(nb_actions))
# model.add(Activation('softmax'))
#
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dropout(0.4))
model.add(Dense(124))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('softmax'))

memory = EpisodeParameterMemory(limit=1000, window_length=1)

cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
               batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05)
cem.compile()

# cem.fit(env, nb_steps=200000, visualize=False, verbose=2)
# cem.save_weights('cem_{}_params.h5f'.format(ENV_NAME), overwrite=True)


cem.load_weights('cem_{}_params.h5f'.format(ENV_NAME))
cem.test(env, nb_episodes=10, visualize=True)
model.add(Activation('relu'))
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('softmax'))

print(model.summary())

memory = EpisodeParameterMemory(limit=1000, window_length=1)

cem = CEMAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               batch_size=50,
               nb_steps_warmup=2000,
               train_interval=50,
               elite_frac=0.05)
cem.compile()

cem.load_weights('cem_{}_params.h5f'.format("citizen-0"))

for move in range(100):
    action = cem.forward(env.citizens[0].vision)
    print(action)
    env.step(action)
    print(env.citizens[0].score)
    time.sleep(0.05)
    print(env)
Esempio n. 15
0
def main(options):
    # store args
    model_type = options.model_type
    train_interval_cem = options.train_interval_cem
    batch_size_cem = options.batch_size_cem
    steps_cem = options.steps_cem
    batch_size_props = options.batch_size_props
    steps_props = options.steps_props
    trunc_thres = options.trunc_thres
    Lmax = options.Lmax
    delta = options.delta

    # CEM
    # init environment
    env = gym.make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    obs_dim = env.observation_space.shape[0]

    model = initModel(model_type, nb_actions, env.observation_space.shape)
    memory = initMemory()

    cem = CEMAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   batch_size=batch_size_cem,
                   nb_steps_warmup=1000,
                   train_interval=train_interval_cem,
                   elite_frac=0.05)
    cem.compile()
    callback_cem = cem.fit(env, nb_steps=steps_cem, visualize=False, verbose=0)
    cem.save_weights('cem_dumps/cem_{}_{}_ti_{}_bs_{}_steps_{}.h5f'.format(
        ENV_NAME, model_type, train_interval_cem, batch_size_cem, steps_cem),
                     overwrite=True)
    #cem.test(env, nb_episodes=1, visualize=False)

    # PROPS
    # init environment
    env = gym.make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    obs_dim = env.observation_space.shape[0]

    model = initModel(model_type, nb_actions, env.observation_space.shape)
    memory = initMemory()

    bound_opts = {
        'analytic_jac': True,
        'normalize_weights': True,
        'truncate_weights': True,
        'truncate_thresh': trunc_thres
    }

    props = PROPSAgent(model=model,
                       nb_actions=nb_actions,
                       memory=memory,
                       Lmax=Lmax,
                       delta=delta,
                       bound_opts=bound_opts,
                       batch_size=batch_size_props)
    props.compile()
    callback_props = props.fit(env,
                               nb_steps=steps_props,
                               visualize=False,
                               verbose=0)
    props.save_weights(
        'props_dumps/props_{}_{}_bs_{}_steps_{}_thres_{}_Lmax_{}_delta_{}.h5f'.
        format(ENV_NAME, model_type, batch_size_props, steps_props,
               trunc_thres, Lmax, delta),
        overwrite=True)
    #props.test(env, nb_episodes=1, visualize=False)

    df_cem = pd.DataFrame({'data': callback_cem.history['episode_reward']})
    #plt.plot(callback_cem.history['episode_reward'])
    plt.plot(df_cem.rolling(window=train_interval_cem).mean())

    df_props = pd.DataFrame({'data': callback_props.history['episode_reward']})
    #plt.plot(callback_props.history['episode_reward'])
    plt.plot(df_props.rolling(window=batch_size_props).mean())

    plt.legend(['cem', 'props'], loc='upper left')
    #plt.show()
    plt.savefig('plots/{}_{}_bs_{}_thres_{}_Lmax_{}_delta_{}.jpeg'.format(
        ENV_NAME, model_type, batch_size_props, trunc_thres, Lmax, delta))
Esempio n. 16
0
    n_action = len(env_player.action_space)
​
    memory = EpisodeParameterMemory(limit=10000, window_length=1)
    model = Sequential()
    model.add(Flatten(input_shape=(1, 10)))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(n_action))
    model.add(Activation('softmax'))
​

    agent = CEMAgent(model=model, nb_actions=n_action, memory=memory,
                   batch_size=50, nb_steps_warmup=1000, train_interval=50, elite_frac=0.05, noise_ampl=4)

​
    agent.compile()
​
    # Training
    env_player.play_against(
        env_algorithm=agent_training,
        opponent=random_opponent,
                            env_algorithm_kwargs={"agent": agent, "nb_steps": NB_TRAINING_STEPS, "filename": TRAINING_OPPONENT},
    )
    model.save("model_%d" % NB_TRAINING_STEPS)
​
    # Evaluation
    print("Results against random player:")
    env_player.play_against(
Esempio n. 17
0
def run_agent(agent):
    print("started new process")

    import tensorflow as tf
    from keras.backend.tensorflow_backend import set_session

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    set_session(tf.Session(config=config))

    WINDOW_LENGTH = 1

    num_actions = 3
    view_shape = (21, 21)
    input_shape = (WINDOW_LENGTH, ) + view_shape

    env = RestrictedViewTronEnv(agent, 10)

    model = Sequential()

    model.add(Permute((2, 3, 1), input_shape=input_shape))

    model.add(Conv2D(16, (3, 3), padding="same"))
    model.add(Activation("relu"))

    model.add(Conv2D(32, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(Flatten())

    model.add(Dense(256))
    model.add(Activation("relu"))

    model.add(Dense(num_actions))
    model.add(Activation('softmax'))
    np.random.seed(2363)

    #policy = LinearAnnealedPolicy(BoltzmannQPolicy(), attr='tau', value_max=2.,
    #                              value_min=.1, value_test=.1, nb_steps=1000000 // 10)

    processor = TronProcessor()

    memory = EpisodeParameterMemory(limit=1000000, window_length=WINDOW_LENGTH)

    cem = CEMAgent(model,
                   nb_actions=num_actions,
                   memory=memory,
                   nb_steps_warmup=50000 // 5,
                   train_interval=4)

    #dqn.compile(Adam(lr=.00025), metrics=["mae"])
    cem.compile()

    weights_filename = 'tmp/dqn_test_weights.h5f'
    checkpoint_weights_filename = 'tmp/dqn_test_weights_{step}.h5f'
    log_filename = 'tmp/dqn_test_log.json'
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename,
                                interval=250000 // 10)
    ]
    callbacks += [FileLogger(log_filename, interval=10000)]

    def train(transfer=False):
        print(cem.get_config())  # todo save to file

        if transfer:
            cem.load_weights(weights_filename)

        cem.fit(env,
                callbacks=callbacks,
                nb_steps=1750000 // 10,
                log_interval=10000)
        cem.save_weights(weights_filename, overwrite=True)
        cem.test(env, nb_episodes=20, visualize=True)

    def opponent():
        cem.load_weights('tmp/dqn_test_weights.h5f')
        cem.test(env, nb_episodes=200000, visualize=False)

    def test():
        cem.load_weights('tmp/dqn_test_weights.h5f')
        cem.test(env, nb_episodes=20, visualize=True)

    # opponent()
    train()  # True
Esempio n. 18
0
    out = keras.layers.Concatenate()(dense_out2)

    return keras.models.Model(inp, out)


model = model_fn()

# print(model.summary())

memory = EpisodeParameterMemory(limit=1000, window_length=1)

cem = CEMAgent(model=model,
               nb_actions=se.action_space,
               memory=memory,
               batch_size=50,
               nb_steps_warmup=2000,
               train_interval=50,
               elite_frac=0.05)
cem.compile()

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
history = cem.fit(se, nb_steps=50000, visualize=False, verbose=2)

rewards = [x for x in history.history['episode_reward'] if x > 0]

import matplotlib.pyplot as plt

plt.plot(np.convolve(np.ones(100), rewards, 'valid'))
Esempio n. 19
0
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('softmax'))

print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = EpisodeParameterMemory(limit=10000, window_length=1)

cem = CEMAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               batch_size=1000,
               nb_steps_warmup=2000,
               train_interval=50,
               elite_frac=0.05,
               noise_decay_const=0.0,
               noise_ampl=1.0,
               processor=MujocoProcessor())
cem.compile()

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
cem.fit(env, nb_steps=100000, visualize=False, verbose=2)

# After training is done, we save the best weights.
cem.save_weights('cem_CAV_params.h5f', overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
Esempio n. 20
0
model = Sequential()
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add(Dense(nb_actions))
model.add(Activation('softmax'))

print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = EpisodeParameterMemory(limit=1000, window_length=1)

cem = CEMAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               batch_size=50,
               nb_steps_warmup=2000,
               train_interval=50,
               elite_frac=0.05)
cem.compile()

# Okay, now it's time to learn something! We visualiz   e the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
cem.fit(env, nb_steps=100000, visualize=False, verbose=2)

# After training is done, we save the best weights.
cem.save_weights('cem_{}_params.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
cem.test(env, nb_episodes=5, visualize=True)
Esempio n. 21
0
def main(params=None):
    """
    performs training and evaluation of params
    :return: model
    """
    if params is None:
        params = {
            'model_type': 'dqn_agent',
            'l1_out': 128,
            'l2_out': 64,
            'gamma': 0.5,
            'target_model_update': 1,
            'delta_clip': 0.01,
            'nb_steps_warmup': 1000
        }

    model_type = 'dqn_agent'
    env_player = SimpleRLPlayer(battle_format="gen8randombattle")
    # print('env_player',env_player)
    # print('help', help(env_player))
    env_player2 = SimpleRLPlayer(battle_format="gen8randombattle")

    opponent = RandomPlayer(battle_format="gen8randombattle")
    second_opponent = MaxDamagePlayer(battle_format="gen8randombattle")

    # Output dimension
    n_action = len(env_player.action_space)

    # model_params = {
    #     'n_actions': n_action,
    #     'l1_out': 128,
    #     'l2_out': 64,
    #     'model_type': params['model_type']
    # }
    model_params = params
    model_params['n_actions'] = n_action

    model = get_model(model_params)

    # print('first model summary')
    # print(model.summary())
    # model = Sequential()
    # model.add(Dense(128, activation="elu", input_shape=(1, 10)))
    #
    # # Our embedding have shape (1, 10), which affects our hidden layer
    # # dimension and output dimension
    # # Flattening resolve potential issues that would arise otherwise
    # model.add(Flatten())
    # model.add(Dense(64, activation="elu"))
    # model.add(Dense(n_action, activation="linear"))

    # elu activation is similar to relu
    # https://ml-cheatsheet.readthedocs.io/en/latest/activation_functions.html#elu

    # determine memory type
    if params['model_type'] in {'dqn_agent', 'sarsa_agent'}:
        # memory = SequentialMemory(limit=10000, window_length=1)
        memory = SequentialMemory(limit=NB_TRAINING_STEPS, window_length=1)
    else:
        memory = EpisodeParameterMemory(limit=10000, window_length=1)

    # Simple epsilon greedy
    # What is linear annealed policy?
    # - this policy gives gradually decreasing thresholds for the epsilon greedy policy
    # - it acts as a wrapper around epsilon greedy to feed in a custom threshold
    pol_steps = NB_TRAINING_STEPS
    policy = LinearAnnealedPolicy(
        EpsGreedyQPolicy(),
        attr="eps",
        value_max=1.0,
        value_min=0.05,
        value_test=0,
        nb_steps=pol_steps,
    )
    # pol_steps = NB_TRAINING_STEPS
    policy_boltz = BoltzmannQPolicy(tau=1)
    # policy = LinearAnnealedPolicy(
    #     BoltzmannQPolicy(),
    #     attr="tau",
    #     value_max=1.0,
    #     value_min=0.05,
    #     value_test=0,
    #     nb_steps=pol_steps,
    # )
    policy = policy_boltz

    # Defining our DQN
    # model = tf.keras.models.load_model('dqn_v_dqn')

    if params['model_type'] == 'dqn_agent':
        dqn = DQNAgent(
            model=model,
            nb_actions=len(env_player.action_space),
            policy=policy,
            memory=memory,
            nb_steps_warmup=params['nb_steps_warmup'],
            gamma=params['gamma'],
            target_model_update=params['target_model_update'],
            # delta_clip=0.01,
            delta_clip=params['delta_clip'],
            enable_double_dqn=params['enable_double_dqn__'],
            enable_dueling_network=params['enable_double_dqn__'],
            dueling_type=params['dueling_type__'])
        dqn.compile(Adam(lr=0.00025), metrics=["mae"])

    elif params['model_type'] == 'sarsa_agent':
        dqn = SARSAAgent(model=model,
                         nb_actions=len(env_player.action_space),
                         policy=policy,
                         nb_steps_warmup=params['nb_steps_warmup'],
                         gamma=params['gamma'],
                         delta_clip=params['delta_clip'])
        dqn.compile(Adam(lr=0.00025), metrics=["mae"])
    else:
        # CEMAgent
        # https://towardsdatascience.com/cross-entropy-method-for-reinforcement-learning-2b6de2a4f3a0
        dqn = CEMAgent(model=model,
                       nb_actions=len(env_player.action_space),
                       memory=memory,
                       nb_steps_warmup=params['nb_steps_warmup'])
        # different compile function
        dqn.compile()

    # dqn.compile(Adam(lr=0.00025), metrics=["mae"])
    # opponent dqn
    dqn_opponent = DQNAgent(
        model=model,
        nb_actions=len(env_player.action_space),
        policy=policy,
        memory=memory,
        nb_steps_warmup=params['nb_steps_warmup'],
        gamma=params['gamma'],
        target_model_update=params['target_model_update'],
        # delta_clip=0.01,
        delta_clip=params['delta_clip'],
        enable_double_dqn=params['enable_double_dqn__'],
        enable_dueling_network=params['enable_double_dqn__'],
        dueling_type=params['dueling_type__'])
    dqn_opponent.compile(Adam(lr=0.00025), metrics=["mae"])
    # NB_TRAINING_STEPS = NB_TRAINING_STEPS

    # rl_opponent = TrainedRLPlayer(model)
    # Training
    rounds = 4
    n_steps = NB_TRAINING_STEPS // rounds

    for k in range(rounds):
        env_player.play_against(
            env_algorithm=dqn_training,
            opponent=opponent,
            env_algorithm_kwargs={
                "dqn": dqn,
                "nb_steps": n_steps
            },
        )
        env_player.play_against(
            env_algorithm=dqn_training,
            opponent=second_opponent,
            env_algorithm_kwargs={
                "dqn": dqn,
                "nb_steps": n_steps
            },
        )

    name = params["name"] + "_model"
    model.save(name)

    # loaded_model = tf.keras.models.load_model(name)

    # Evaluation
    print("Results against random player:")
    env_player.play_against(
        env_algorithm=dqn_evaluation,
        opponent=opponent,
        env_algorithm_kwargs={
            "dqn": dqn,
            "nb_episodes": NB_EVALUATION_EPISODES
        },
    )

    print("\nResults against max player:")
    env_player.play_against(
        env_algorithm=dqn_evaluation,
        opponent=second_opponent,
        env_algorithm_kwargs={
            "dqn": dqn,
            "nb_episodes": NB_EVALUATION_EPISODES
        },
    )

    return model
Esempio n. 22
0
class KerasCEMAgent(AbstractAgent):
    def __init__(self, env, timesteps_per_episode=10001):
        super().__init__(env, timesteps_per_episode)
        self.evaluating = False
        self.action_size = env.action_space.n
        self.state_size = env.num_states
        self.model = self._build_compile_model()
        memory = EpisodeParameterMemory(limit=1000, window_length=1)
        self.agent = CEMAgent(model=self.model, nb_actions=self.action_size, memory=memory, batch_size=50,
                              nb_steps_warmup=2000, train_interval=50, elite_frac=0.05)

    def run(self) -> {str: float}:
        """
        The agent's training method.
        Returns: a dictionary - {"episode_reward_mean": __, "episode_reward_min": __, "episode_reward_max": __,
        "episode_len_mean": __}
        """
        self.agent.compile()
        history = self.agent.fit(self.env, nb_steps=ITER_NUM, visualize=False, verbose=1)
        if len(history.history) > 0:
            episode_reward = history.history["episode_reward"]
            nb_episode_steps = history.history["nb_episode_steps"]
        else:
            episode_reward, nb_episode_steps = [0], [0]  # TODO - placeholder
        result = {EPISODE_REWARD_MEAN: np.array(episode_reward),
                  EPISODE_STEP_NUM_MEAN: np.array(nb_episode_steps),
                  EPISODE_REWARD_MIN: np.empty([]),
                  EPISODE_REWARD_MAX: np.empty([]), EPISODE_VARIANCE: np.empty([])}
        return result

    def _build_compile_model(self):
        ## simple net
        # model = Sequential()
        # model.add(Flatten(input_shape=(1,) + self.env.observation_space.shape))
        # model.add(Dense(self.action_size))
        # model.add(Activation('softmax'))
        # return model
        model = Sequential()
        model.add(Flatten(input_shape=(1,) + self.env.observation_space.shape))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(self.action_size))
        model.add(Activation('softmax'))
        return model

    def compute_action(self, state) -> int:
        """
        Computes the best action from a given state.
        Returns: a int that represents the best action.
        """
        # state = np.array([[state]])
        return int(np.argmax(self.model.predict(state)))

    def stop_episode(self):
        pass

    def episode_callback(self, state, action, reward, next_state, terminated):
        pass

    def evaluate(self, visualize=False):
        self.agent.test(self.env, nb_episodes=5, visualize=visualize, nb_max_episode_steps=60)

    def replay_experiences(self):
        pass
Esempio n. 23
0
class KerasRlCemAgent(KerasRlAgent):
    """Keras-rl implementation of the cross-entropy method algorithm.

        see "https://learning.mpi-sws.org/mlss2016/slides/2016-MLSS-RL.pdf" and
            "https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.81.6579&rep=rep1&type=pdf"
    """
    class CemCallback(rl.callbacks.Callback):
        """Callback registered with keras rl agents to propagate iteration and episode updates."""
        def __init__(self, cem_agent: KerasRlAgent,
                     cem_context: core.CemTrainContext, nb_steps: int):
            """
            Args:
                cem_agent: the agent to propagate iteration begn/end events to.
                cem_context: the train_context containing the iteration definitions
                nb_steps: value set in the keras cem agent.
            """
            assert cem_agent
            assert cem_context
            assert nb_steps
            self._cem_agent: KerasRlAgent = cem_agent
            self._cem_context: core.CemTrainContext = cem_context
            self._nb_steps = nb_steps
            super().__init__()

        def on_episode_end(self, episode, logs=None):
            """Signals the base class the end / begin of a training iteration."""
            cc: core.CemTrainContext = self._cem_context
            episode = episode + 1
            if episode % cc.num_episodes_per_iteration == 0:
                self._cem_agent.on_train_iteration_end(math.nan)
                if self._cem_context.training_done:
                    self._cem_agent._agent.step = self._nb_steps
                else:
                    self._cem_agent.on_train_iteration_begin()

    def train_implementation(self, train_context: core.CemTrainContext):
        assert train_context
        cc: core.CemTrainContext = train_context
        train_env = self._create_env()
        keras_model = self._create_model(gym_env=train_env,
                                         activation='softmax')

        policy_buffer_size = 5 * cc.num_episodes_per_iteration
        self.log_api(f'EpisodeParameterMemory',
                     f'(limit={policy_buffer_size}, window_length=1)')
        memory = EpisodeParameterMemory(limit=policy_buffer_size,
                                        window_length=1)
        num_actions = train_env.action_space.n
        self.log_api(f'CEMAgent', f'(model=..., nb_actions={num_actions}, memory=..., ' + \
                     f'nb_steps_warmup={cc.num_steps_buffer_preload}, ' + \
                     f'train_interval={cc.num_episodes_per_iteration}, ' + \
                     f'batch_size={cc.num_episodes_per_iteration}, ' + \
                     f'elite_frac={cc.elite_set_fraction})')
        self._agent = CEMAgent(model=keras_model,
                               nb_actions=num_actions,
                               memory=memory,
                               nb_steps_warmup=cc.num_steps_buffer_preload,
                               batch_size=cc.num_episodes_per_iteration,
                               train_interval=cc.num_episodes_per_iteration,
                               elite_frac=cc.elite_set_fraction)
        self.log_api(f'agent.compile', '()')
        self._agent.compile()
        nb_steps = cc.num_iterations * cc.num_episodes_per_iteration * cc.max_steps_per_episode
        callback = KerasRlCemAgent.CemCallback(self, cc, nb_steps)
        self.on_train_iteration_begin()
        self.log_api(f'agent.fit', f'(train_env, nb_steps={nb_steps})')
        self._agent.fit(train_env,
                        nb_steps=nb_steps,
                        visualize=False,
                        verbose=0,
                        callbacks=[callback])
        if not cc.training_done:
            self.on_train_iteration_end(math.nan)
Esempio n. 24
0
def train():
    # Get the environment and extract the number of actions.
    env = gym.make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    obs_dim = env.observation_space.shape[0]

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    from keras import backend as K
    K.set_session(sess)

    # Option 1 : Simple model
    # model = Sequential()
    # model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    # model.add(Dense(nb_actions))
    # model.add(Activation('softmax'))

    # Option 2: deep network
    model = Sequential()
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('softmax'))

    model.summary()

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = EpisodeParameterMemory(limit=1000, window_length=1)

    if REWARD == "normal":
        cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
                       batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05)
        cem.compile()
        history_normal = cem.fit(env, nb_steps=100000, visualize=False, verbose=2)
        cem.save_weights(os.path.join(LOG_DIR, 'cem_normal_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
        cem.test(env, nb_episodes=5, visualize=False)

        pandas.DataFrame(history_normal.history).to_csv(os.path.join(LOG_DIR, "normal.csv"))

    elif REWARD == "noisy":
        if not SMOOTH:
            processor_noisy = CartpoleProcessor(e_=ERR_N, e=ERR_P, smooth=False, surrogate=False)
        else:
            processor_noisy = CartpoleProcessor(e_=ERR_N, e=ERR_P, smooth=True, surrogate=False)

        # processor_surrogate = CartpoleSurrogateProcessor(e_=ERR_N, e=ERR_P, surrogate=False)
        cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
                       batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05,
                       processor=processor_noisy)
        cem.compile()
        history_noisy = cem.fit(env, nb_steps=100000, visualize=False, verbose=2)
        if not SMOOTH:
            cem.save_weights(os.path.join(LOG_DIR, 'cem_noisy_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_noisy.history).to_csv(os.path.join(LOG_DIR, "noisy.csv"))

        else:
            cem.save_weights(os.path.join(LOG_DIR, 'cem_noisy_smooth_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_noisy.history).to_csv(os.path.join(LOG_DIR, "noisy_smooth.csv"))

        cem.test(env, nb_episodes=5, visualize=False)

    elif REWARD == "surrogate":
        if not SMOOTH:
            processor_surrogate = CartpoleProcessor(e_=ERR_N, e=ERR_P, smooth=False, surrogate=True)
        else:
            processor_surrogate = CartpoleProcessor(e_=ERR_N, e=ERR_P, smooth=True, surrogate=True)

        # processor_surrogate = CartpoleSurrogateProcessor(e_=ERR_N, e=ERR_P, surrogate=True)
        cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
                       batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05,
                       processor=processor_surrogate)
        cem.compile()
        history_surrogate = cem.fit(env, nb_steps=100000, visualize=False, verbose=2)
        if not SMOOTH:
            cem.save_weights(os.path.join(LOG_DIR, 'cem_surrogate_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_surrogate.history).to_csv(os.path.join(LOG_DIR, "surrogate.csv"))
        else:
            cem.save_weights(os.path.join(LOG_DIR, 'cem_surrogate_smooth_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_surrogate.history).to_csv(os.path.join(LOG_DIR, "surrogate_smooth.csv"))

        cem.test(env, nb_episodes=5, visualize=False)

    else:
        raise NotImplementedError
model.add(Reshape(env.observation_space.shape))
model.add(
    Conv2D(32, (3, 3),
           activation='relu',
           input_shape=env.observation_space.shape))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Flatten())
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('softmax'))
print(model.summary())

memory = EpisodeParameterMemory(limit=10000, window_length=1)

cem = CEMAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=1000,
               batch_size=50,
               train_interval=50,
               elite_frac=0.1)
cem.compile()

cem.fit(env, nb_steps=100000000, visualize=False)

#dqn.load_weights('dqn_test_run_weights.h5f')
cem.save_weights('cem_{}_weights.h5f'.format('test_run'), overwrite=True)

#dqn.test(env, nb_episodes=5, visualize=True)
Esempio n. 26
0
# Get the environment and extract the number of actions.
env = gym.make(ENV_NAME)
np.random.seed(123)
env.seed(123)

nb_actions = env.action_space.n
obs_dim = env.observation_space.shape[0]

modelL = Sequential()
modelL.add(LSTM(nb_actions, input_shape=(1, ) + env.observation_space.shape))
modelL.add(Activation('softmax'))
memoryL = EpisodeParameterMemory(limit=1000, window_length=1)
cemL = CEMAgent(model=modelL,
                nb_actions=nb_actions,
                memory=memoryL,
                batch_size=50,
                nb_steps_warmup=2000,
                train_interval=50,
                elite_frac=0.05)
cemL.compile()
histL = cemL.fit(env, nb_steps=50000, visualize=False, verbose=1)
cemL.save_weights('cemL_{}_params.h5f'.format(ENV_NAME), overwrite=True)
cemL.test(env, nb_episodes=5, visualize=True)

modelD = Sequential()
modelD.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
modelD.add(Dense(nb_actions))
modelD.add(Activation('softmax'))
memoryD = EpisodeParameterMemory(limit=1000, window_length=1)
cemD = CEMAgent(model=modelD,
                nb_actions=nb_actions,
Esempio n. 27
0
model.add( Dense(16) )
model.add( Activation('relu') )
model.add( Dense(16) )
model.add( Activation('relu') )
model.add( Dense(nb_actions) )
model.add( Reshape( [1,nb_actions] ) )
#model.add(Activation('softmax'))


print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = EpisodeParameterMemory(limit=1000, window_length=1)

#TODO write your own agent!
cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
               batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05)
cem.compile()

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
cem.fit(env, nb_steps=10000, visualize=False, verbose=2)

# After training is done, we save the best weights.
#cem.save_weights('cem_{}_params.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
cem.test(env, nb_episodes=5, visualize=True)
Esempio n. 28
0
np.random.seed(123)
env.seed(123)

nb_actions = env.action_space.n
obs_dim = env.observation_space.shape[0]

model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape, name = 'Duda-flatten'))
model.add(Dense(nb_actions, name = 'Duda-dense'))
model.add(Activation('softmax', name = 'Duda-relu'))

print(model.summary())

memory = EpisodeParameterMemory(limit=1000, window_length=1)

cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
               batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05)
cem.compile()
print('\033[93m' + "Model is compiled"+'\033[0m')
cem.fit(env, nb_steps=1000, visualize=True, verbose=2)
print('\033[93m' + "Training"+'\033[0m')

cem.save_weights('cem_{}_params.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
cem.test(env, nb_episodes=5, visualize=True)

#observations = np.array((4,10))
observations = []
for _ in range(10):
    observations.append(deepcopy(env.reset()))
observations = np.asarray(observations)
Esempio n. 29
0
import numpy as np
import gym
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam
from rl.agents.cem import CEMAgent
from rl.memory import EpisodeParameterMemory

ENV_NAME = 'CartPole-v0'
env = gym.make(ENV_NAME)
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n
obs_dim = env.observation_space.shape[0]
model = Sequential()
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add(Dense(nb_actions))
model.add(Activation('softmax'))
print(model.summary())
memory = EpisodeParameterMemory(limit=1000, window_length=1)
cem = CEMAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               batch_size=50,
               nb_steps_warmup=2000,
               train_interval=50,
               elite_frac=0.05)
cem.compile()
cem.fit(env, nb_steps=100000, visualize=False, verbose=2)
cem.save_weights('cem_{}_params.h5f'.format(ENV_NAME), overwrite=True)
cem.test(env, nb_episodes=5, visualize=True)
Esempio n. 30
0
# model.add(Dense(16))
# model.add(Activation('relu'))
# model.add(Dense(nb_actions))
# model.add(Activation('softmax'))

print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = EpisodeParameterMemory(limit=MEMORY_LIMIT,
                                window_length=WINDOW_LENGHT)

cem = CEMAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               batch_size=BATCH_SIZE,
               nb_steps_warmup=NB_STEPS_WARMUP,
               train_interval=TRAIN_INTERVAL,
               elite_frac=ELITE_FRAC)
cem.compile()

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
cem.fit(env, nb_steps=NB_STEPS, visualize=VISUALIZE_TRAIN, verbose=VERBOSE)

# After training is done, we save the best weights.
cem.save_weights('cem_{}_params.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
cem.test(env, nb_episodes=NB_EPISODES, visualize=VISUALIZE_TEST)