Esempi in Python per CEMAgent, esempi in Python per rl.agents.cem.CEMAgent

Esempio n. 1

0

Mostra file

    def configure(self, observation_space_shape, nb_actions):
        if self.opts.model_type == 1:
            # Option 1 : Simple model
            model = Sequential()
            model.add(Flatten(input_shape=(1, ) + observation_space_shape))
            model.add(Dense(nb_actions))
            model.add(Activation('softmax'))
            print(model.summary())
        elif self.opts.model_type == 2:
            # Option 2: deep network
            model = Sequential()
            model.add(Flatten(input_shape=(1, ) + observation_space_shape))
            model.add(Dense(16))
            model.add(Activation('relu'))
            model.add(Dense(16))
            model.add(Activation('relu'))
            model.add(Dense(16))
            model.add(Activation('relu'))
            model.add(Dense(nb_actions))
            model.add(Activation('softmax'))
            print(model.summary())

        # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
        # even the metrics!
        memory = EpisodeParameterMemory(limit=1000, window_length=1)

        self.agent = CEMAgent(model=model,
                              nb_actions=nb_actions,
                              memory=memory,
                              batch_size=50,
                              nb_steps_warmup=2000,
                              train_interval=50,
                              elite_frac=0.05)
        self.agent.compile()

Esempio n. 2

0

Mostra file

File: agents_kerasrl.py Progetto: carolinahiguera/humanoid_transfer

class KerasCEMAgent(object):
	'''
	The cross-entropy method Learning Agent as described in http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.81.6579&rep=rep1&type=pdf
	'''

	def __init__(self, opts):
		self.metadata = {
			'discrete_actions': True,
		}

		self.opts = opts

	def configure(self, observation_space_shape, nb_actions):
		if self.opts.model_type == 1:
			# Option 1 : Simple model
			model = Sequential()
			model.add(Flatten(input_shape=(1,) + observation_space_shape))
			model.add(Dense(nb_actions))
			model.add(Activation('softmax'))
			print(model.summary())
		elif self.opts.model_type == 2:
			# Option 2: deep network
			model = Sequential()
			model.add(Flatten(input_shape=(1,) + observation_space_shape))
			model.add(Dense(16))
			model.add(Activation('relu'))
			model.add(Dense(16))
			model.add(Activation('relu'))
			model.add(Dense(16))
			model.add(Activation('relu'))
			model.add(Dense(nb_actions))
			model.add(Activation('softmax'))
			print(model.summary())

		# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
		# even the metrics!
		memory = EpisodeParameterMemory(limit=1000, window_length=1)

		self.agent = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
							  batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05)
		self.agent.compile()

	def train(self, env, nb_steps, visualize, verbosity):
		# Okay, now it's time to learn something! We visualize the training here for show, but this
		# slows down training quite a lot. You can always safely abort the training prematurely using
		# Ctrl + C.
		self.agent.fit(env, nb_steps=nb_steps, visualize=visualize, verbose=verbosity)

	def test(self, env, nb_episodes, visualize):
		# Finally, evaluate our algorithm for 5 episodes.
		self.agent.test(env, nb_episodes=nb_episodes, visualize=visualize)

	def load_weights(self, load_file):
		self.agent.load_weights(load_file)

	def save_weights(self, save_file, overwrite):
		# After training is done, we save the best weights.
		self.agent.save_weights(save_file, overwrite=overwrite)

Esempio n. 3

0

Mostra file

File: cem_agents.py Progetto: MiraFinkel/marl_transforms

 def __init__(self, env, timesteps_per_episode=10001):
     super().__init__(env, timesteps_per_episode)
     self.evaluating = False
     self.action_size = env.action_space.n
     self.state_size = env.num_states
     self.model = self._build_compile_model()
     memory = EpisodeParameterMemory(limit=1000, window_length=1)
     self.agent = CEMAgent(model=self.model, nb_actions=self.action_size, memory=memory, batch_size=50,
                           nb_steps_warmup=2000, train_interval=50, elite_frac=0.05)

Esempio n. 4

0

Mostra file

def main(env_name, nb_steps):
    # Get the environment and extract the number of actions.
    env = gym.make(env_name)
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    input_shape = (1, ) + env.observation_space.shape
    model = create_nn_model(input_shape, nb_actions)

    # Finally, we configure and compile our agent.
    memory = EpisodeParameterMemory(limit=450, window_length=1)

    agent = CEMAgent(model=model,
                     nb_actions=nb_actions,
                     memory=memory,
                     batch_size=50,
                     nb_steps_warmup=2000,
                     train_interval=50,
                     elite_frac=0.05)
    agent.compile()
    agent.fit(env, nb_steps=nb_steps, visualize=False, verbose=1)

    # After training is done, we save the best weights.
    agent.save_weights('cem_{}_params.h5f'.format(env_name), overwrite=True)

    # Finally, evaluate the agent
    history = agent.test(env, nb_episodes=100, visualize=False)
    rewards = np.array(history.history['episode_reward'])
    print(("Test rewards (#episodes={}): mean={:>5.2f}, std={:>5.2f}, "
           "min={:>5.2f}, max={:>5.2f}").format(len(rewards), rewards.mean(),
                                                rewards.std(), rewards.min(),
                                                rewards.max()))

Esempio n. 5

0

Mostra file

File: cem_agent.py Progetto: MartinThoma/algorithms

def main(env_name, nb_steps):
    # Get the environment and extract the number of actions.
    env = gym.make(env_name)
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    input_shape = (1,) + env.observation_space.shape
    model = create_nn_model(input_shape, nb_actions)

    # Finally, we configure and compile our agent.
    memory = EpisodeParameterMemory(limit=450, window_length=1)

    agent = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
                     batch_size=50, nb_steps_warmup=2000, train_interval=50,
                     elite_frac=0.05)
    agent.compile()
    agent.fit(env, nb_steps=nb_steps, visualize=False, verbose=1)

    # After training is done, we save the best weights.
    agent.save_weights('cem_{}_params.h5f'.format(env_name), overwrite=True)

    # Finally, evaluate the agent
    history = agent.test(env, nb_episodes=100, visualize=False)
    rewards = np.array(history.history['episode_reward'])
    print(("Test rewards (#episodes={}): mean={:>5.2f}, std={:>5.2f}, "
           "min={:>5.2f}, max={:>5.2f}")
          .format(len(rewards),
                  rewards.mean(),
                  rewards.std(),
                  rewards.min(),
                  rewards.max()))

Esempio n. 6

0

Mostra file

    def reinforce_train_cem(self,
                            steps=60000,
                            visualize=False,
                            verbose=1,
                            nb_steps_warmup=10000,
                            save_path=r"D:\Data\markets\weights",
                            save_weights_name="cem_CADJPY_weights.h5f",
                            log_interval=1000):
        memory = EpisodeParameterMemory(limit=200, window_length=1)
        nb_actions = self.env.action_space.n

        agent = CEMAgent(
            model=self.model,
            nb_actions=nb_actions,
            memory=memory,
            nb_steps_warmup=nb_steps_warmup,
            processor=MultiInputProcessor(nb_inputs=len(self.model.inputs)))
        agent.compile()
        agent.fit(self.env,
                  nb_steps=steps,
                  visualize=visualize,
                  verbose=verbose,
                  log_interval=log_interval)

        pathlib.Path(save_path).mkdir(parents=True, exist_ok=True)
        file_path = os.path.join(save_path, save_weights_name)
        agent.save_weights(filepath=file_path, overwrite=True)

Esempio n. 7

0

Mostra file

def create_cem_agent(env):
    ''' create cem agent '''
    env = create_environment()
    model = create_deep_model(env)
    nb_actions = env.action_space.n
    memory = EpisodeParameterMemory(limit=1000, window_length=1)
    cem = CEMAgent(
        model=model,
        nb_actions=nb_actions,
        memory=memory,
        batch_size=50,
        nb_steps_warmup=2000,
        train_interval=50,
        elite_frac=0.05)
    cem.compile()
    return cem

Esempio n. 8

0

Mostra file

def main():
    """Build model and train on environment."""
    env = MarketEnv(("ES", "FUT", "GLOBEX", "USD"), obs_xform=xform.BinaryDelta(3), episode_steps=STEPS_PER_EPISODE, client_id=3)
    #env = MarketEnv(("AAPL", "STK", "SMART", "USD"), obs_xform=xform.BinaryDelta(3), episode_steps=STEPS_PER_EPISODE, client_id=4)
    nb_actions = 3      # Keras-RL CEM is a discrete agent

    # Option 1 : Simple model
    model = Sequential([
        Flatten(input_shape=(1,) + env.observation_space.shape),
        Dense(nb_actions),
        Activation('softmax')
    ])

    # Option 2: deep network
    # hidden_nodes = reduce(operator.imul, env.observation_space.shape, 1)
    # model = Sequential([
    #     Flatten(input_shape=(1,) + env.observation_space.shape),
    #     Dense(hidden_nodes),
    #     Activation('relu'),
    #     Dense(hidden_nodes),
    #     Activation('relu'),
    #     Dense(hidden_nodes),
    #     Activation('relu'),
    #     Dense(nb_actions),
    #     Activation('softmax')
    # ])

    print(model.summary())

    param_logger = CEMParamLogger('cem_{}_params.json'.format(env.instrument.symbol))
    callbacks = [
        param_logger,
        FileLogger('cem_{}_log.json'.format(env.instrument.symbol), interval=STEPS_PER_EPISODE)
    ]

    theta_init = param_logger.read_params()     # Start with last saved params if present
    if theta_init is not None:
        print('Starting with parameters from {}:\n{}'.format(param_logger.params_filename, theta_init))

    memory = EpisodeParameterMemory(limit=EPISODES, window_length=1)        # Remember the parameters and rewards for the last `limit` episodes.
    cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=EPISODES, nb_steps_warmup=WARMUMP_EPISODES * STEPS_PER_EPISODE, train_interval=TRAIN_INTERVAL_EPISODES, elite_frac=0.2, theta_init=theta_init, processor=DiscreteProcessor(), noise_decay_const=0, noise_ampl=0)
    """
    :param memory: Remembers the parameters and rewards for the last `limit` episodes.
    :param int batch_size: Randomly sample this many episode parameters from memory before taking the top `elite_frac` to construct the next gen parameters from.
    :param int nb_steps_warmup: Run for this many steps (total) to fill memory before training
    :param int train_interval: Train (update parameters) every this many episodes
    :param float elite_frac: Take this top fraction of the `batch_size` randomly sampled parameters from the episode memory to construct new parameters.
    """
    cem.compile()
    cem.fit(env, nb_steps=STEPS_PER_EPISODE * EPISODES, visualize=True, verbose=2, callbacks=callbacks)
    cem.save_weights('cem_{}_weights.h5f'.format(env.instrument.symbol), overwrite=True)

Esempio n. 9

0

Mostra file

File: kerasrl.py Progetto: sdforce/easyagents

    def train_implementation(self, train_context: core.CemTrainContext):
        assert train_context
        cc: core.CemTrainContext = train_context
        train_env = self._create_env()
        keras_model = self._create_model(gym_env=train_env,
                                         activation='softmax')

        policy_buffer_size = 5 * cc.num_episodes_per_iteration
        self.log_api(f'EpisodeParameterMemory',
                     f'(limit={policy_buffer_size}, window_length=1)')
        memory = EpisodeParameterMemory(limit=policy_buffer_size,
                                        window_length=1)
        num_actions = train_env.action_space.n
        self.log_api(f'CEMAgent', f'(model=..., nb_actions={num_actions}, memory=..., ' + \
                     f'nb_steps_warmup={cc.num_steps_buffer_preload}, ' + \
                     f'train_interval={cc.num_episodes_per_iteration}, ' + \
                     f'batch_size={cc.num_episodes_per_iteration}, ' + \
                     f'elite_frac={cc.elite_set_fraction})')
        self._agent = CEMAgent(model=keras_model,
                               nb_actions=num_actions,
                               memory=memory,
                               nb_steps_warmup=cc.num_steps_buffer_preload,
                               batch_size=cc.num_episodes_per_iteration,
                               train_interval=cc.num_episodes_per_iteration,
                               elite_frac=cc.elite_set_fraction)
        self.log_api(f'agent.compile', '()')
        self._agent.compile()
        nb_steps = cc.num_iterations * cc.num_episodes_per_iteration * cc.max_steps_per_episode
        callback = KerasRlCemAgent.CemCallback(self, cc, nb_steps)
        self.on_train_iteration_begin()
        self.log_api(f'agent.fit', f'(train_env, nb_steps={nb_steps})')
        self._agent.fit(train_env,
                        nb_steps=nb_steps,
                        visualize=False,
                        verbose=0,
                        callbacks=[callback])
        if not cc.training_done:
            self.on_train_iteration_end(math.nan)

Esempio n. 10

0

Mostra file

def create(env):
    np.random.seed(config.current.domain_seed)
    env.seed(config.current.domain_seed)
    nb_actions = env.action_space.n

    obs_dim = env.observation_space.shape[0]

    # Option 1 : Simple model
    #model = Sequential()
    #model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    #model.add(Dense(nb_actions))
    #model.add(Activation('softmax'))

    # Option 2: deep network
    model = Sequential()
    model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('softmax'))

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = EpisodeParameterMemory(limit=1000, window_length=1)

    cem = CEMAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   batch_size=50,
                   nb_steps_warmup=2000,
                   train_interval=50,
                   elite_frac=0.05)
    cem.compile()
    return cem

Esempio n. 11

0

Mostra file

def test_single_cem_input():
    model = Sequential()
    model.add(Flatten(input_shape=(2, 3)))
    model.add(Dense(2))

    memory = EpisodeParameterMemory(limit=10, window_length=2)
    agent = CEMAgent(model, memory=memory, nb_actions=2, nb_steps_warmup=5, batch_size=4, train_interval=50)
    agent.compile()
    agent.fit(MultiInputTestEnv((3,)), nb_steps=100)

Esempio n. 12

0

Mostra file

def test_multi_cem_input():
    input1 = Input(shape=(2, 3))
    input2 = Input(shape=(2, 4))
    x = Concatenate()([input1, input2])
    x = Flatten()(x)
    x = Dense(2)(x)
    model = Model(inputs=[input1, input2], outputs=x)

    memory = EpisodeParameterMemory(limit=10, window_length=2)
    processor = MultiInputProcessor(nb_inputs=2)
    agent = CEMAgent(model, memory=memory, nb_actions=2, nb_steps_warmup=5, batch_size=4,
                     processor=processor, train_interval=50)
    agent.compile()
    agent.fit(MultiInputTestEnv([(3,), (4,)]), nb_steps=100)

Esempio n. 13

0

Mostra file

print(env.observation_space.shape)
# # Option 1 : Simple model
# model = Sequential()
# model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
# model.add(Dense(nb_actions))
# model.add(Activation('softmax'))
#
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dropout(0.4))
model.add(Dense(124))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('softmax'))

memory = EpisodeParameterMemory(limit=1000, window_length=1)

cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
               batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05)
cem.compile()

# cem.fit(env, nb_steps=200000, visualize=False, verbose=2)
# cem.save_weights('cem_{}_params.h5f'.format(ENV_NAME), overwrite=True)


cem.load_weights('cem_{}_params.h5f'.format(ENV_NAME))
cem.test(env, nb_episodes=10, visualize=True)

Esempio n. 14

0

Mostra file

File: test_rl.py Progetto: SpinazieSin/scared_citizen_simulation

model.add(Activation('relu'))
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('softmax'))

print(model.summary())

memory = EpisodeParameterMemory(limit=1000, window_length=1)

cem = CEMAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               batch_size=50,
               nb_steps_warmup=2000,
               train_interval=50,
               elite_frac=0.05)
cem.compile()

cem.load_weights('cem_{}_params.h5f'.format("citizen-0"))

for move in range(100):
    action = cem.forward(env.citizens[0].vision)
    print(action)
    env.step(action)
    print(env.citizens[0].score)
    time.sleep(0.05)
    print(env)

Esempio n. 15

0

Mostra file

def main(options):
    # store args
    model_type = options.model_type
    train_interval_cem = options.train_interval_cem
    batch_size_cem = options.batch_size_cem
    steps_cem = options.steps_cem
    batch_size_props = options.batch_size_props
    steps_props = options.steps_props
    trunc_thres = options.trunc_thres
    Lmax = options.Lmax
    delta = options.delta

    # CEM
    # init environment
    env = gym.make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    obs_dim = env.observation_space.shape[0]

    model = initModel(model_type, nb_actions, env.observation_space.shape)
    memory = initMemory()

    cem = CEMAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   batch_size=batch_size_cem,
                   nb_steps_warmup=1000,
                   train_interval=train_interval_cem,
                   elite_frac=0.05)
    cem.compile()
    callback_cem = cem.fit(env, nb_steps=steps_cem, visualize=False, verbose=0)
    cem.save_weights('cem_dumps/cem_{}_{}_ti_{}_bs_{}_steps_{}.h5f'.format(
        ENV_NAME, model_type, train_interval_cem, batch_size_cem, steps_cem),
                     overwrite=True)
    #cem.test(env, nb_episodes=1, visualize=False)

    # PROPS
    # init environment
    env = gym.make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    obs_dim = env.observation_space.shape[0]

    model = initModel(model_type, nb_actions, env.observation_space.shape)
    memory = initMemory()

    bound_opts = {
        'analytic_jac': True,
        'normalize_weights': True,
        'truncate_weights': True,
        'truncate_thresh': trunc_thres
    }

    props = PROPSAgent(model=model,
                       nb_actions=nb_actions,
                       memory=memory,
                       Lmax=Lmax,
                       delta=delta,
                       bound_opts=bound_opts,
                       batch_size=batch_size_props)
    props.compile()
    callback_props = props.fit(env,
                               nb_steps=steps_props,
                               visualize=False,
                               verbose=0)
    props.save_weights(
        'props_dumps/props_{}_{}_bs_{}_steps_{}_thres_{}_Lmax_{}_delta_{}.h5f'.
        format(ENV_NAME, model_type, batch_size_props, steps_props,
               trunc_thres, Lmax, delta),
        overwrite=True)
    #props.test(env, nb_episodes=1, visualize=False)

    df_cem = pd.DataFrame({'data': callback_cem.history['episode_reward']})
    #plt.plot(callback_cem.history['episode_reward'])
    plt.plot(df_cem.rolling(window=train_interval_cem).mean())

    df_props = pd.DataFrame({'data': callback_props.history['episode_reward']})
    #plt.plot(callback_props.history['episode_reward'])
    plt.plot(df_props.rolling(window=batch_size_props).mean())

    plt.legend(['cem', 'props'], loc='upper left')
    #plt.show()
    plt.savefig('plots/{}_{}_bs_{}_thres_{}_Lmax_{}_delta_{}.jpeg'.format(
        ENV_NAME, model_type, batch_size_props, trunc_thres, Lmax, delta))

Esempio n. 16

0

Mostra file

File: CEM_RL_Agent.py Progetto: Vectorshot/Pok-bot

    n_action = len(env_player.action_space)

    memory = EpisodeParameterMemory(limit=10000, window_length=1)
    model = Sequential()
    model.add(Flatten(input_shape=(1, 10)))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(n_action))
    model.add(Activation('softmax'))


    agent = CEMAgent(model=model, nb_actions=n_action, memory=memory,
                   batch_size=50, nb_steps_warmup=1000, train_interval=50, elite_frac=0.05, noise_ampl=4)


    agent.compile()

    # Training
    env_player.play_against(
        env_algorithm=agent_training,
        opponent=random_opponent,
                            env_algorithm_kwargs={"agent": agent, "nb_steps": NB_TRAINING_STEPS, "filename": TRAINING_OPPONENT},
    )
    model.save("model_%d" % NB_TRAINING_STEPS)

    # Evaluation
    print("Results against random player:")
    env_player.play_against(

Esempio n. 17

0

Mostra file

def run_agent(agent):
    print("started new process")

    import tensorflow as tf
    from keras.backend.tensorflow_backend import set_session

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    set_session(tf.Session(config=config))

    WINDOW_LENGTH = 1

    num_actions = 3
    view_shape = (21, 21)
    input_shape = (WINDOW_LENGTH, ) + view_shape

    env = RestrictedViewTronEnv(agent, 10)

    model = Sequential()

    model.add(Permute((2, 3, 1), input_shape=input_shape))

    model.add(Conv2D(16, (3, 3), padding="same"))
    model.add(Activation("relu"))

    model.add(Conv2D(32, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(Flatten())

    model.add(Dense(256))
    model.add(Activation("relu"))

    model.add(Dense(num_actions))
    model.add(Activation('softmax'))
    np.random.seed(2363)

    #policy = LinearAnnealedPolicy(BoltzmannQPolicy(), attr='tau', value_max=2.,
    #                              value_min=.1, value_test=.1, nb_steps=1000000 // 10)

    processor = TronProcessor()

    memory = EpisodeParameterMemory(limit=1000000, window_length=WINDOW_LENGTH)

    cem = CEMAgent(model,
                   nb_actions=num_actions,
                   memory=memory,
                   nb_steps_warmup=50000 // 5,
                   train_interval=4)

    #dqn.compile(Adam(lr=.00025), metrics=["mae"])
    cem.compile()

    weights_filename = 'tmp/dqn_test_weights.h5f'
    checkpoint_weights_filename = 'tmp/dqn_test_weights_{step}.h5f'
    log_filename = 'tmp/dqn_test_log.json'
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename,
                                interval=250000 // 10)
    ]
    callbacks += [FileLogger(log_filename, interval=10000)]

    def train(transfer=False):
        print(cem.get_config())  # todo save to file

        if transfer:
            cem.load_weights(weights_filename)

        cem.fit(env,
                callbacks=callbacks,
                nb_steps=1750000 // 10,
                log_interval=10000)
        cem.save_weights(weights_filename, overwrite=True)
        cem.test(env, nb_episodes=20, visualize=True)

    def opponent():
        cem.load_weights('tmp/dqn_test_weights.h5f')
        cem.test(env, nb_episodes=200000, visualize=False)

    def test():
        cem.load_weights('tmp/dqn_test_weights.h5f')
        cem.test(env, nb_episodes=20, visualize=True)

    # opponent()
    train()  # True

Esempio n. 18

0

Mostra file

    out = keras.layers.Concatenate()(dense_out2)

    return keras.models.Model(inp, out)


model = model_fn()

# print(model.summary())

memory = EpisodeParameterMemory(limit=1000, window_length=1)

cem = CEMAgent(model=model,
               nb_actions=se.action_space,
               memory=memory,
               batch_size=50,
               nb_steps_warmup=2000,
               train_interval=50,
               elite_frac=0.05)
cem.compile()

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
history = cem.fit(se, nb_steps=50000, visualize=False, verbose=2)

rewards = [x for x in history.history['episode_reward'] if x > 0]

import matplotlib.pyplot as plt

plt.plot(np.convolve(np.ones(100), rewards, 'valid'))

Esempio n. 19

0

Mostra file

model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('softmax'))

print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = EpisodeParameterMemory(limit=10000, window_length=1)

cem = CEMAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               batch_size=1000,
               nb_steps_warmup=2000,
               train_interval=50,
               elite_frac=0.05,
               noise_decay_const=0.0,
               noise_ampl=1.0,
               processor=MujocoProcessor())
cem.compile()

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
cem.fit(env, nb_steps=100000, visualize=False, verbose=2)

# After training is done, we save the best weights.
cem.save_weights('cem_CAV_params.h5f', overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.

Esempio n. 20

0

Mostra file

File: CEMAgent.py Progetto: zhuzhenping/trading-agent

model = Sequential()
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add(Dense(nb_actions))
model.add(Activation('softmax'))

print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = EpisodeParameterMemory(limit=1000, window_length=1)

cem = CEMAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               batch_size=50,
               nb_steps_warmup=2000,
               train_interval=50,
               elite_frac=0.05)
cem.compile()

# Okay, now it's time to learn something! We visualiz   e the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
cem.fit(env, nb_steps=100000, visualize=False, verbose=2)

# After training is done, we save the best weights.
cem.save_weights('cem_{}_params.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
cem.test(env, nb_episodes=5, visualize=True)

Esempio n. 21

0

Mostra file

def main(params=None):
    """
    performs training and evaluation of params
    :return: model
    """
    if params is None:
        params = {
            'model_type': 'dqn_agent',
            'l1_out': 128,
            'l2_out': 64,
            'gamma': 0.5,
            'target_model_update': 1,
            'delta_clip': 0.01,
            'nb_steps_warmup': 1000
        }

    model_type = 'dqn_agent'
    env_player = SimpleRLPlayer(battle_format="gen8randombattle")
    # print('env_player',env_player)
    # print('help', help(env_player))
    env_player2 = SimpleRLPlayer(battle_format="gen8randombattle")

    opponent = RandomPlayer(battle_format="gen8randombattle")
    second_opponent = MaxDamagePlayer(battle_format="gen8randombattle")

    # Output dimension
    n_action = len(env_player.action_space)

    # model_params = {
    #     'n_actions': n_action,
    #     'l1_out': 128,
    #     'l2_out': 64,
    #     'model_type': params['model_type']
    # }
    model_params = params
    model_params['n_actions'] = n_action

    model = get_model(model_params)

    # print('first model summary')
    # print(model.summary())
    # model = Sequential()
    # model.add(Dense(128, activation="elu", input_shape=(1, 10)))
    #
    # # Our embedding have shape (1, 10), which affects our hidden layer
    # # dimension and output dimension
    # # Flattening resolve potential issues that would arise otherwise
    # model.add(Flatten())
    # model.add(Dense(64, activation="elu"))
    # model.add(Dense(n_action, activation="linear"))

    # elu activation is similar to relu
    # https://ml-cheatsheet.readthedocs.io/en/latest/activation_functions.html#elu

    # determine memory type
    if params['model_type'] in {'dqn_agent', 'sarsa_agent'}:
        # memory = SequentialMemory(limit=10000, window_length=1)
        memory = SequentialMemory(limit=NB_TRAINING_STEPS, window_length=1)
    else:
        memory = EpisodeParameterMemory(limit=10000, window_length=1)

    # Simple epsilon greedy
    # What is linear annealed policy?
    # - this policy gives gradually decreasing thresholds for the epsilon greedy policy
    # - it acts as a wrapper around epsilon greedy to feed in a custom threshold
    pol_steps = NB_TRAINING_STEPS
    policy = LinearAnnealedPolicy(
        EpsGreedyQPolicy(),
        attr="eps",
        value_max=1.0,
        value_min=0.05,
        value_test=0,
        nb_steps=pol_steps,
    )
    # pol_steps = NB_TRAINING_STEPS
    policy_boltz = BoltzmannQPolicy(tau=1)
    # policy = LinearAnnealedPolicy(
    #     BoltzmannQPolicy(),
    #     attr="tau",
    #     value_max=1.0,
    #     value_min=0.05,
    #     value_test=0,
    #     nb_steps=pol_steps,
    # )
    policy = policy_boltz

    # Defining our DQN
    # model = tf.keras.models.load_model('dqn_v_dqn')

    if params['model_type'] == 'dqn_agent':
        dqn = DQNAgent(
            model=model,
            nb_actions=len(env_player.action_space),
            policy=policy,
            memory=memory,
            nb_steps_warmup=params['nb_steps_warmup'],
            gamma=params['gamma'],
            target_model_update=params['target_model_update'],
            # delta_clip=0.01,
            delta_clip=params['delta_clip'],
            enable_double_dqn=params['enable_double_dqn__'],
            enable_dueling_network=params['enable_double_dqn__'],
            dueling_type=params['dueling_type__'])
        dqn.compile(Adam(lr=0.00025), metrics=["mae"])

    elif params['model_type'] == 'sarsa_agent':
        dqn = SARSAAgent(model=model,
                         nb_actions=len(env_player.action_space),
                         policy=policy,
                         nb_steps_warmup=params['nb_steps_warmup'],
                         gamma=params['gamma'],
                         delta_clip=params['delta_clip'])
        dqn.compile(Adam(lr=0.00025), metrics=["mae"])
    else:
        # CEMAgent
        # https://towardsdatascience.com/cross-entropy-method-for-reinforcement-learning-2b6de2a4f3a0
        dqn = CEMAgent(model=model,
                       nb_actions=len(env_player.action_space),
                       memory=memory,
                       nb_steps_warmup=params['nb_steps_warmup'])
        # different compile function
        dqn.compile()

    # dqn.compile(Adam(lr=0.00025), metrics=["mae"])
    # opponent dqn
    dqn_opponent = DQNAgent(
        model=model,
        nb_actions=len(env_player.action_space),
        policy=policy,
        memory=memory,
        nb_steps_warmup=params['nb_steps_warmup'],
        gamma=params['gamma'],
        target_model_update=params['target_model_update'],
        # delta_clip=0.01,
        delta_clip=params['delta_clip'],
        enable_double_dqn=params['enable_double_dqn__'],
        enable_dueling_network=params['enable_double_dqn__'],
        dueling_type=params['dueling_type__'])
    dqn_opponent.compile(Adam(lr=0.00025), metrics=["mae"])
    # NB_TRAINING_STEPS = NB_TRAINING_STEPS

    # rl_opponent = TrainedRLPlayer(model)
    # Training
    rounds = 4
    n_steps = NB_TRAINING_STEPS // rounds

    for k in range(rounds):
        env_player.play_against(
            env_algorithm=dqn_training,
            opponent=opponent,
            env_algorithm_kwargs={
                "dqn": dqn,
                "nb_steps": n_steps
            },
        )
        env_player.play_against(
            env_algorithm=dqn_training,
            opponent=second_opponent,
            env_algorithm_kwargs={
                "dqn": dqn,
                "nb_steps": n_steps
            },
        )

    name = params["name"] + "_model"
    model.save(name)

    # loaded_model = tf.keras.models.load_model(name)

    # Evaluation
    print("Results against random player:")
    env_player.play_against(
        env_algorithm=dqn_evaluation,
        opponent=opponent,
        env_algorithm_kwargs={
            "dqn": dqn,
            "nb_episodes": NB_EVALUATION_EPISODES
        },
    )

    print("\nResults against max player:")
    env_player.play_against(
        env_algorithm=dqn_evaluation,
        opponent=second_opponent,
        env_algorithm_kwargs={
            "dqn": dqn,
            "nb_episodes": NB_EVALUATION_EPISODES
        },
    )

    return model

Esempio n. 22

0

Mostra file

File: cem_agents.py Progetto: MiraFinkel/marl_transforms

class KerasCEMAgent(AbstractAgent):
    def __init__(self, env, timesteps_per_episode=10001):
        super().__init__(env, timesteps_per_episode)
        self.evaluating = False
        self.action_size = env.action_space.n
        self.state_size = env.num_states
        self.model = self._build_compile_model()
        memory = EpisodeParameterMemory(limit=1000, window_length=1)
        self.agent = CEMAgent(model=self.model, nb_actions=self.action_size, memory=memory, batch_size=50,
                              nb_steps_warmup=2000, train_interval=50, elite_frac=0.05)

    def run(self) -> {str: float}:
        """
        The agent's training method.
        Returns: a dictionary - {"episode_reward_mean": __, "episode_reward_min": __, "episode_reward_max": __,
        "episode_len_mean": __}
        """
        self.agent.compile()
        history = self.agent.fit(self.env, nb_steps=ITER_NUM, visualize=False, verbose=1)
        if len(history.history) > 0:
            episode_reward = history.history["episode_reward"]
            nb_episode_steps = history.history["nb_episode_steps"]
        else:
            episode_reward, nb_episode_steps = [0], [0]  # TODO - placeholder
        result = {EPISODE_REWARD_MEAN: np.array(episode_reward),
                  EPISODE_STEP_NUM_MEAN: np.array(nb_episode_steps),
                  EPISODE_REWARD_MIN: np.empty([]),
                  EPISODE_REWARD_MAX: np.empty([]), EPISODE_VARIANCE: np.empty([])}
        return result

    def _build_compile_model(self):
        ## simple net
        # model = Sequential()
        # model.add(Flatten(input_shape=(1,) + self.env.observation_space.shape))
        # model.add(Dense(self.action_size))
        # model.add(Activation('softmax'))
        # return model
        model = Sequential()
        model.add(Flatten(input_shape=(1,) + self.env.observation_space.shape))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(self.action_size))
        model.add(Activation('softmax'))
        return model

    def compute_action(self, state) -> int:
        """
        Computes the best action from a given state.
        Returns: a int that represents the best action.
        """
        # state = np.array([[state]])
        return int(np.argmax(self.model.predict(state)))

    def stop_episode(self):
        pass

    def episode_callback(self, state, action, reward, next_state, terminated):
        pass

    def evaluate(self, visualize=False):
        self.agent.test(self.env, nb_episodes=5, visualize=visualize, nb_max_episode_steps=60)

    def replay_experiences(self):
        pass

Esempio n. 23

0

Mostra file

File: kerasrl.py Progetto: sdforce/easyagents

class KerasRlCemAgent(KerasRlAgent):
    """Keras-rl implementation of the cross-entropy method algorithm.

        see "https://learning.mpi-sws.org/mlss2016/slides/2016-MLSS-RL.pdf" and
            "https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.81.6579&rep=rep1&type=pdf"
    """
    class CemCallback(rl.callbacks.Callback):
        """Callback registered with keras rl agents to propagate iteration and episode updates."""
        def __init__(self, cem_agent: KerasRlAgent,
                     cem_context: core.CemTrainContext, nb_steps: int):
            """
            Args:
                cem_agent: the agent to propagate iteration begn/end events to.
                cem_context: the train_context containing the iteration definitions
                nb_steps: value set in the keras cem agent.
            """
            assert cem_agent
            assert cem_context
            assert nb_steps
            self._cem_agent: KerasRlAgent = cem_agent
            self._cem_context: core.CemTrainContext = cem_context
            self._nb_steps = nb_steps
            super().__init__()

        def on_episode_end(self, episode, logs=None):
            """Signals the base class the end / begin of a training iteration."""
            cc: core.CemTrainContext = self._cem_context
            episode = episode + 1
            if episode % cc.num_episodes_per_iteration == 0:
                self._cem_agent.on_train_iteration_end(math.nan)
                if self._cem_context.training_done:
                    self._cem_agent._agent.step = self._nb_steps
                else:
                    self._cem_agent.on_train_iteration_begin()

    def train_implementation(self, train_context: core.CemTrainContext):
        assert train_context
        cc: core.CemTrainContext = train_context
        train_env = self._create_env()
        keras_model = self._create_model(gym_env=train_env,
                                         activation='softmax')

        policy_buffer_size = 5 * cc.num_episodes_per_iteration
        self.log_api(f'EpisodeParameterMemory',
                     f'(limit={policy_buffer_size}, window_length=1)')
        memory = EpisodeParameterMemory(limit=policy_buffer_size,
                                        window_length=1)
        num_actions = train_env.action_space.n
        self.log_api(f'CEMAgent', f'(model=..., nb_actions={num_actions}, memory=..., ' + \
                     f'nb_steps_warmup={cc.num_steps_buffer_preload}, ' + \
                     f'train_interval={cc.num_episodes_per_iteration}, ' + \
                     f'batch_size={cc.num_episodes_per_iteration}, ' + \
                     f'elite_frac={cc.elite_set_fraction})')
        self._agent = CEMAgent(model=keras_model,
                               nb_actions=num_actions,
                               memory=memory,
                               nb_steps_warmup=cc.num_steps_buffer_preload,
                               batch_size=cc.num_episodes_per_iteration,
                               train_interval=cc.num_episodes_per_iteration,
                               elite_frac=cc.elite_set_fraction)
        self.log_api(f'agent.compile', '()')
        self._agent.compile()
        nb_steps = cc.num_iterations * cc.num_episodes_per_iteration * cc.max_steps_per_episode
        callback = KerasRlCemAgent.CemCallback(self, cc, nb_steps)
        self.on_train_iteration_begin()
        self.log_api(f'agent.fit', f'(train_env, nb_steps={nb_steps})')
        self._agent.fit(train_env,
                        nb_steps=nb_steps,
                        visualize=False,
                        verbose=0,
                        callbacks=[callback])
        if not cc.training_done:
            self.on_train_iteration_end(math.nan)

Esempio n. 24

0

Mostra file

File: cem_cartpole.py Progetto: wxEdward/rl-perturbed-reward

def train():
    # Get the environment and extract the number of actions.
    env = gym.make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    obs_dim = env.observation_space.shape[0]

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    from keras import backend as K
    K.set_session(sess)

    # Option 1 : Simple model
    # model = Sequential()
    # model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    # model.add(Dense(nb_actions))
    # model.add(Activation('softmax'))

    # Option 2: deep network
    model = Sequential()
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('softmax'))

    model.summary()

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = EpisodeParameterMemory(limit=1000, window_length=1)

    if REWARD == "normal":
        cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
                       batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05)
        cem.compile()
        history_normal = cem.fit(env, nb_steps=100000, visualize=False, verbose=2)
        cem.save_weights(os.path.join(LOG_DIR, 'cem_normal_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
        cem.test(env, nb_episodes=5, visualize=False)

        pandas.DataFrame(history_normal.history).to_csv(os.path.join(LOG_DIR, "normal.csv"))

    elif REWARD == "noisy":
        if not SMOOTH:
            processor_noisy = CartpoleProcessor(e_=ERR_N, e=ERR_P, smooth=False, surrogate=False)
        else:
            processor_noisy = CartpoleProcessor(e_=ERR_N, e=ERR_P, smooth=True, surrogate=False)

        # processor_surrogate = CartpoleSurrogateProcessor(e_=ERR_N, e=ERR_P, surrogate=False)
        cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
                       batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05,
                       processor=processor_noisy)
        cem.compile()
        history_noisy = cem.fit(env, nb_steps=100000, visualize=False, verbose=2)
        if not SMOOTH:
            cem.save_weights(os.path.join(LOG_DIR, 'cem_noisy_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_noisy.history).to_csv(os.path.join(LOG_DIR, "noisy.csv"))

        else:
            cem.save_weights(os.path.join(LOG_DIR, 'cem_noisy_smooth_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_noisy.history).to_csv(os.path.join(LOG_DIR, "noisy_smooth.csv"))

        cem.test(env, nb_episodes=5, visualize=False)

    elif REWARD == "surrogate":
        if not SMOOTH:
            processor_surrogate = CartpoleProcessor(e_=ERR_N, e=ERR_P, smooth=False, surrogate=True)
        else:
            processor_surrogate = CartpoleProcessor(e_=ERR_N, e=ERR_P, smooth=True, surrogate=True)

        # processor_surrogate = CartpoleSurrogateProcessor(e_=ERR_N, e=ERR_P, surrogate=True)
        cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
                       batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05,
                       processor=processor_surrogate)
        cem.compile()
        history_surrogate = cem.fit(env, nb_steps=100000, visualize=False, verbose=2)
        if not SMOOTH:
            cem.save_weights(os.path.join(LOG_DIR, 'cem_surrogate_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_surrogate.history).to_csv(os.path.join(LOG_DIR, "surrogate.csv"))
        else:
            cem.save_weights(os.path.join(LOG_DIR, 'cem_surrogate_smooth_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_surrogate.history).to_csv(os.path.join(LOG_DIR, "surrogate_smooth.csv"))

        cem.test(env, nb_episodes=5, visualize=False)

    else:
        raise NotImplementedError

Esempio n. 25

0

Mostra file

File: DQN_with_CNN.py Progetto: karan-maheshwari/Reinforcement-Learning-Contra

model.add(Reshape(env.observation_space.shape))
model.add(
    Conv2D(32, (3, 3),
           activation='relu',
           input_shape=env.observation_space.shape))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Flatten())
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('softmax'))
print(model.summary())

memory = EpisodeParameterMemory(limit=10000, window_length=1)

cem = CEMAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=1000,
               batch_size=50,
               train_interval=50,
               elite_frac=0.1)
cem.compile()

cem.fit(env, nb_steps=100000000, visualize=False)

#dqn.load_weights('dqn_test_run_weights.h5f')
cem.save_weights('cem_{}_weights.h5f'.format('test_run'), overwrite=True)

#dqn.test(env, nb_episodes=5, visualize=True)

Esempio n. 26

0

Mostra file

File: _cem_cartpole_compare.py Progetto: yosider/keras-rl

# Get the environment and extract the number of actions.
env = gym.make(ENV_NAME)
np.random.seed(123)
env.seed(123)

nb_actions = env.action_space.n
obs_dim = env.observation_space.shape[0]

modelL = Sequential()
modelL.add(LSTM(nb_actions, input_shape=(1, ) + env.observation_space.shape))
modelL.add(Activation('softmax'))
memoryL = EpisodeParameterMemory(limit=1000, window_length=1)
cemL = CEMAgent(model=modelL,
                nb_actions=nb_actions,
                memory=memoryL,
                batch_size=50,
                nb_steps_warmup=2000,
                train_interval=50,
                elite_frac=0.05)
cemL.compile()
histL = cemL.fit(env, nb_steps=50000, visualize=False, verbose=1)
cemL.save_weights('cemL_{}_params.h5f'.format(ENV_NAME), overwrite=True)
cemL.test(env, nb_episodes=5, visualize=True)

modelD = Sequential()
modelD.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
modelD.add(Dense(nb_actions))
modelD.add(Activation('softmax'))
memoryD = EpisodeParameterMemory(limit=1000, window_length=1)
cemD = CEMAgent(model=modelD,
                nb_actions=nb_actions,

Esempio n. 27

0

Mostra file

model.add( Dense(16) )
model.add( Activation('relu') )
model.add( Dense(16) )
model.add( Activation('relu') )
model.add( Dense(nb_actions) )
model.add( Reshape( [1,nb_actions] ) )
#model.add(Activation('softmax'))


print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = EpisodeParameterMemory(limit=1000, window_length=1)

#TODO write your own agent!
cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
               batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05)
cem.compile()

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
cem.fit(env, nb_steps=10000, visualize=False, verbose=2)

# After training is done, we save the best weights.
#cem.save_weights('cem_{}_params.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
cem.test(env, nb_episodes=5, visualize=True)

Esempio n. 28

0

Mostra file

np.random.seed(123)
env.seed(123)

nb_actions = env.action_space.n
obs_dim = env.observation_space.shape[0]

model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape, name = 'Duda-flatten'))
model.add(Dense(nb_actions, name = 'Duda-dense'))
model.add(Activation('softmax', name = 'Duda-relu'))

print(model.summary())

memory = EpisodeParameterMemory(limit=1000, window_length=1)

cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
               batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05)
cem.compile()
print('\033[93m' + "Model is compiled"+'\033[0m')
cem.fit(env, nb_steps=1000, visualize=True, verbose=2)
print('\033[93m' + "Training"+'\033[0m')

cem.save_weights('cem_{}_params.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
cem.test(env, nb_episodes=5, visualize=True)

#observations = np.array((4,10))
observations = []
for _ in range(10):
    observations.append(deepcopy(env.reset()))
observations = np.asarray(observations)

Esempio n. 29

0

Mostra file

import numpy as np
import gym
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam
from rl.agents.cem import CEMAgent
from rl.memory import EpisodeParameterMemory

ENV_NAME = 'CartPole-v0'
env = gym.make(ENV_NAME)
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n
obs_dim = env.observation_space.shape[0]
model = Sequential()
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add(Dense(nb_actions))
model.add(Activation('softmax'))
print(model.summary())
memory = EpisodeParameterMemory(limit=1000, window_length=1)
cem = CEMAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               batch_size=50,
               nb_steps_warmup=2000,
               train_interval=50,
               elite_frac=0.05)
cem.compile()
cem.fit(env, nb_steps=100000, visualize=False, verbose=2)
cem.save_weights('cem_{}_params.h5f'.format(ENV_NAME), overwrite=True)
cem.test(env, nb_episodes=5, visualize=True)

Esempio n. 30

0

Mostra file

# model.add(Dense(16))
# model.add(Activation('relu'))
# model.add(Dense(nb_actions))
# model.add(Activation('softmax'))

print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = EpisodeParameterMemory(limit=MEMORY_LIMIT,
                                window_length=WINDOW_LENGHT)

cem = CEMAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               batch_size=BATCH_SIZE,
               nb_steps_warmup=NB_STEPS_WARMUP,
               train_interval=TRAIN_INTERVAL,
               elite_frac=ELITE_FRAC)
cem.compile()

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
cem.fit(env, nb_steps=NB_STEPS, visualize=VISUALIZE_TRAIN, verbose=VERBOSE)

# After training is done, we save the best weights.
cem.save_weights('cem_{}_params.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
cem.test(env, nb_episodes=NB_EPISODES, visualize=VISUALIZE_TEST)