Esempi in Python per CEMAgent.fit, esempi in Python per rl.agents.cem.CEMAgent.fit

Esempio n. 1

0

Mostra file

def main(env_name, nb_steps):
    # Get the environment and extract the number of actions.
    env = gym.make(env_name)
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    input_shape = (1, ) + env.observation_space.shape
    model = create_nn_model(input_shape, nb_actions)

    # Finally, we configure and compile our agent.
    memory = EpisodeParameterMemory(limit=450, window_length=1)

    agent = CEMAgent(model=model,
                     nb_actions=nb_actions,
                     memory=memory,
                     batch_size=50,
                     nb_steps_warmup=2000,
                     train_interval=50,
                     elite_frac=0.05)
    agent.compile()
    agent.fit(env, nb_steps=nb_steps, visualize=False, verbose=1)

    # After training is done, we save the best weights.
    agent.save_weights('cem_{}_params.h5f'.format(env_name), overwrite=True)

    # Finally, evaluate the agent
    history = agent.test(env, nb_episodes=100, visualize=False)
    rewards = np.array(history.history['episode_reward'])
    print(("Test rewards (#episodes={}): mean={:>5.2f}, std={:>5.2f}, "
           "min={:>5.2f}, max={:>5.2f}").format(len(rewards), rewards.mean(),
                                                rewards.std(), rewards.min(),
                                                rewards.max()))

Esempio n. 2

0

Mostra file

File: cem_agent.py Progetto: MartinThoma/algorithms

def main(env_name, nb_steps):
    # Get the environment and extract the number of actions.
    env = gym.make(env_name)
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    input_shape = (1,) + env.observation_space.shape
    model = create_nn_model(input_shape, nb_actions)

    # Finally, we configure and compile our agent.
    memory = EpisodeParameterMemory(limit=450, window_length=1)

    agent = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
                     batch_size=50, nb_steps_warmup=2000, train_interval=50,
                     elite_frac=0.05)
    agent.compile()
    agent.fit(env, nb_steps=nb_steps, visualize=False, verbose=1)

    # After training is done, we save the best weights.
    agent.save_weights('cem_{}_params.h5f'.format(env_name), overwrite=True)

    # Finally, evaluate the agent
    history = agent.test(env, nb_episodes=100, visualize=False)
    rewards = np.array(history.history['episode_reward'])
    print(("Test rewards (#episodes={}): mean={:>5.2f}, std={:>5.2f}, "
           "min={:>5.2f}, max={:>5.2f}")
          .format(len(rewards),
                  rewards.mean(),
                  rewards.std(),
                  rewards.min(),
                  rewards.max()))

Esempio n. 3

0

Mostra file

    def reinforce_train_cem(self,
                            steps=60000,
                            visualize=False,
                            verbose=1,
                            nb_steps_warmup=10000,
                            save_path=r"D:\Data\markets\weights",
                            save_weights_name="cem_CADJPY_weights.h5f",
                            log_interval=1000):
        memory = EpisodeParameterMemory(limit=200, window_length=1)
        nb_actions = self.env.action_space.n

        agent = CEMAgent(
            model=self.model,
            nb_actions=nb_actions,
            memory=memory,
            nb_steps_warmup=nb_steps_warmup,
            processor=MultiInputProcessor(nb_inputs=len(self.model.inputs)))
        agent.compile()
        agent.fit(self.env,
                  nb_steps=steps,
                  visualize=visualize,
                  verbose=verbose,
                  log_interval=log_interval)

        pathlib.Path(save_path).mkdir(parents=True, exist_ok=True)
        file_path = os.path.join(save_path, save_weights_name)
        agent.save_weights(filepath=file_path, overwrite=True)

Esempio n. 4

0

Mostra file

File: agents_kerasrl.py Progetto: carolinahiguera/humanoid_transfer

class KerasCEMAgent(object):
	'''
	The cross-entropy method Learning Agent as described in http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.81.6579&rep=rep1&type=pdf
	'''

	def __init__(self, opts):
		self.metadata = {
			'discrete_actions': True,
		}

		self.opts = opts

	def configure(self, observation_space_shape, nb_actions):
		if self.opts.model_type == 1:
			# Option 1 : Simple model
			model = Sequential()
			model.add(Flatten(input_shape=(1,) + observation_space_shape))
			model.add(Dense(nb_actions))
			model.add(Activation('softmax'))
			print(model.summary())
		elif self.opts.model_type == 2:
			# Option 2: deep network
			model = Sequential()
			model.add(Flatten(input_shape=(1,) + observation_space_shape))
			model.add(Dense(16))
			model.add(Activation('relu'))
			model.add(Dense(16))
			model.add(Activation('relu'))
			model.add(Dense(16))
			model.add(Activation('relu'))
			model.add(Dense(nb_actions))
			model.add(Activation('softmax'))
			print(model.summary())

		# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
		# even the metrics!
		memory = EpisodeParameterMemory(limit=1000, window_length=1)

		self.agent = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
							  batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05)
		self.agent.compile()

	def train(self, env, nb_steps, visualize, verbosity):
		# Okay, now it's time to learn something! We visualize the training here for show, but this
		# slows down training quite a lot. You can always safely abort the training prematurely using
		# Ctrl + C.
		self.agent.fit(env, nb_steps=nb_steps, visualize=visualize, verbose=verbosity)

	def test(self, env, nb_episodes, visualize):
		# Finally, evaluate our algorithm for 5 episodes.
		self.agent.test(env, nb_episodes=nb_episodes, visualize=visualize)

	def load_weights(self, load_file):
		self.agent.load_weights(load_file)

	def save_weights(self, save_file, overwrite):
		# After training is done, we save the best weights.
		self.agent.save_weights(save_file, overwrite=overwrite)

Esempio n. 5

0

Mostra file

def test_single_cem_input():
    model = Sequential()
    model.add(Flatten(input_shape=(2, 3)))
    model.add(Dense(2))

    memory = EpisodeParameterMemory(limit=10, window_length=2)
    agent = CEMAgent(model, memory=memory, nb_actions=2, nb_steps_warmup=5, batch_size=4, train_interval=50)
    agent.compile()
    agent.fit(MultiInputTestEnv((3,)), nb_steps=100)

Esempio n. 6

0

Mostra file

def main():
    """Build model and train on environment."""
    env = MarketEnv(("ES", "FUT", "GLOBEX", "USD"), obs_xform=xform.BinaryDelta(3), episode_steps=STEPS_PER_EPISODE, client_id=3)
    #env = MarketEnv(("AAPL", "STK", "SMART", "USD"), obs_xform=xform.BinaryDelta(3), episode_steps=STEPS_PER_EPISODE, client_id=4)
    nb_actions = 3      # Keras-RL CEM is a discrete agent

    # Option 1 : Simple model
    model = Sequential([
        Flatten(input_shape=(1,) + env.observation_space.shape),
        Dense(nb_actions),
        Activation('softmax')
    ])

    # Option 2: deep network
    # hidden_nodes = reduce(operator.imul, env.observation_space.shape, 1)
    # model = Sequential([
    #     Flatten(input_shape=(1,) + env.observation_space.shape),
    #     Dense(hidden_nodes),
    #     Activation('relu'),
    #     Dense(hidden_nodes),
    #     Activation('relu'),
    #     Dense(hidden_nodes),
    #     Activation('relu'),
    #     Dense(nb_actions),
    #     Activation('softmax')
    # ])

    print(model.summary())

    param_logger = CEMParamLogger('cem_{}_params.json'.format(env.instrument.symbol))
    callbacks = [
        param_logger,
        FileLogger('cem_{}_log.json'.format(env.instrument.symbol), interval=STEPS_PER_EPISODE)
    ]

    theta_init = param_logger.read_params()     # Start with last saved params if present
    if theta_init is not None:
        print('Starting with parameters from {}:\n{}'.format(param_logger.params_filename, theta_init))

    memory = EpisodeParameterMemory(limit=EPISODES, window_length=1)        # Remember the parameters and rewards for the last `limit` episodes.
    cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=EPISODES, nb_steps_warmup=WARMUMP_EPISODES * STEPS_PER_EPISODE, train_interval=TRAIN_INTERVAL_EPISODES, elite_frac=0.2, theta_init=theta_init, processor=DiscreteProcessor(), noise_decay_const=0, noise_ampl=0)
    """
    :param memory: Remembers the parameters and rewards for the last `limit` episodes.
    :param int batch_size: Randomly sample this many episode parameters from memory before taking the top `elite_frac` to construct the next gen parameters from.
    :param int nb_steps_warmup: Run for this many steps (total) to fill memory before training
    :param int train_interval: Train (update parameters) every this many episodes
    :param float elite_frac: Take this top fraction of the `batch_size` randomly sampled parameters from the episode memory to construct new parameters.
    """
    cem.compile()
    cem.fit(env, nb_steps=STEPS_PER_EPISODE * EPISODES, visualize=True, verbose=2, callbacks=callbacks)
    cem.save_weights('cem_{}_weights.h5f'.format(env.instrument.symbol), overwrite=True)

Esempio n. 7

0

Mostra file

def test_multi_cem_input():
    input1 = Input(shape=(2, 3))
    input2 = Input(shape=(2, 4))
    x = Concatenate()([input1, input2])
    x = Flatten()(x)
    x = Dense(2)(x)
    model = Model(inputs=[input1, input2], outputs=x)

    memory = EpisodeParameterMemory(limit=10, window_length=2)
    processor = MultiInputProcessor(nb_inputs=2)
    agent = CEMAgent(model, memory=memory, nb_actions=2, nb_steps_warmup=5, batch_size=4,
                     processor=processor, train_interval=50)
    agent.compile()
    agent.fit(MultiInputTestEnv([(3,), (4,)]), nb_steps=100)

Esempio n. 8

0

Mostra file

File: cem_agents.py Progetto: MiraFinkel/marl_transforms

class KerasCEMAgent(AbstractAgent):
    def __init__(self, env, timesteps_per_episode=10001):
        super().__init__(env, timesteps_per_episode)
        self.evaluating = False
        self.action_size = env.action_space.n
        self.state_size = env.num_states
        self.model = self._build_compile_model()
        memory = EpisodeParameterMemory(limit=1000, window_length=1)
        self.agent = CEMAgent(model=self.model, nb_actions=self.action_size, memory=memory, batch_size=50,
                              nb_steps_warmup=2000, train_interval=50, elite_frac=0.05)

    def run(self) -> {str: float}:
        """
        The agent's training method.
        Returns: a dictionary - {"episode_reward_mean": __, "episode_reward_min": __, "episode_reward_max": __,
        "episode_len_mean": __}
        """
        self.agent.compile()
        history = self.agent.fit(self.env, nb_steps=ITER_NUM, visualize=False, verbose=1)
        if len(history.history) > 0:
            episode_reward = history.history["episode_reward"]
            nb_episode_steps = history.history["nb_episode_steps"]
        else:
            episode_reward, nb_episode_steps = [0], [0]  # TODO - placeholder
        result = {EPISODE_REWARD_MEAN: np.array(episode_reward),
                  EPISODE_STEP_NUM_MEAN: np.array(nb_episode_steps),
                  EPISODE_REWARD_MIN: np.empty([]),
                  EPISODE_REWARD_MAX: np.empty([]), EPISODE_VARIANCE: np.empty([])}
        return result

    def _build_compile_model(self):
        ## simple net
        # model = Sequential()
        # model.add(Flatten(input_shape=(1,) + self.env.observation_space.shape))
        # model.add(Dense(self.action_size))
        # model.add(Activation('softmax'))
        # return model
        model = Sequential()
        model.add(Flatten(input_shape=(1,) + self.env.observation_space.shape))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(self.action_size))
        model.add(Activation('softmax'))
        return model

    def compute_action(self, state) -> int:
        """
        Computes the best action from a given state.
        Returns: a int that represents the best action.
        """
        # state = np.array([[state]])
        return int(np.argmax(self.model.predict(state)))

    def stop_episode(self):
        pass

    def episode_callback(self, state, action, reward, next_state, terminated):
        pass

    def evaluate(self, visualize=False):
        self.agent.test(self.env, nb_episodes=5, visualize=visualize, nb_max_episode_steps=60)

    def replay_experiences(self):
        pass

Esempio n. 9

0

Mostra file

def main(options):
    # store args
    model_type = options.model_type
    train_interval_cem = options.train_interval_cem
    batch_size_cem = options.batch_size_cem
    steps_cem = options.steps_cem
    batch_size_props = options.batch_size_props
    steps_props = options.steps_props
    trunc_thres = options.trunc_thres
    Lmax = options.Lmax
    delta = options.delta

    # CEM
    # init environment
    env = gym.make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    obs_dim = env.observation_space.shape[0]

    model = initModel(model_type, nb_actions, env.observation_space.shape)
    memory = initMemory()

    cem = CEMAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   batch_size=batch_size_cem,
                   nb_steps_warmup=1000,
                   train_interval=train_interval_cem,
                   elite_frac=0.05)
    cem.compile()
    callback_cem = cem.fit(env, nb_steps=steps_cem, visualize=False, verbose=0)
    cem.save_weights('cem_dumps/cem_{}_{}_ti_{}_bs_{}_steps_{}.h5f'.format(
        ENV_NAME, model_type, train_interval_cem, batch_size_cem, steps_cem),
                     overwrite=True)
    #cem.test(env, nb_episodes=1, visualize=False)

    # PROPS
    # init environment
    env = gym.make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    obs_dim = env.observation_space.shape[0]

    model = initModel(model_type, nb_actions, env.observation_space.shape)
    memory = initMemory()

    bound_opts = {
        'analytic_jac': True,
        'normalize_weights': True,
        'truncate_weights': True,
        'truncate_thresh': trunc_thres
    }

    props = PROPSAgent(model=model,
                       nb_actions=nb_actions,
                       memory=memory,
                       Lmax=Lmax,
                       delta=delta,
                       bound_opts=bound_opts,
                       batch_size=batch_size_props)
    props.compile()
    callback_props = props.fit(env,
                               nb_steps=steps_props,
                               visualize=False,
                               verbose=0)
    props.save_weights(
        'props_dumps/props_{}_{}_bs_{}_steps_{}_thres_{}_Lmax_{}_delta_{}.h5f'.
        format(ENV_NAME, model_type, batch_size_props, steps_props,
               trunc_thres, Lmax, delta),
        overwrite=True)
    #props.test(env, nb_episodes=1, visualize=False)

    df_cem = pd.DataFrame({'data': callback_cem.history['episode_reward']})
    #plt.plot(callback_cem.history['episode_reward'])
    plt.plot(df_cem.rolling(window=train_interval_cem).mean())

    df_props = pd.DataFrame({'data': callback_props.history['episode_reward']})
    #plt.plot(callback_props.history['episode_reward'])
    plt.plot(df_props.rolling(window=batch_size_props).mean())

    plt.legend(['cem', 'props'], loc='upper left')
    #plt.show()
    plt.savefig('plots/{}_{}_bs_{}_thres_{}_Lmax_{}_delta_{}.jpeg'.format(
        ENV_NAME, model_type, batch_size_props, trunc_thres, Lmax, delta))

Esempio n. 10

0

Mostra file

obs_dim = env.observation_space.shape[0]

model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape, name = 'Duda-flatten'))
model.add(Dense(nb_actions, name = 'Duda-dense'))
model.add(Activation('softmax', name = 'Duda-relu'))

print(model.summary())

memory = EpisodeParameterMemory(limit=1000, window_length=1)

cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
               batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05)
cem.compile()
print('\033[93m' + "Model is compiled"+'\033[0m')
cem.fit(env, nb_steps=1000, visualize=True, verbose=2)
print('\033[93m' + "Training"+'\033[0m')

cem.save_weights('cem_{}_params.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
cem.test(env, nb_episodes=5, visualize=True)

#observations = np.array((4,10))
observations = []
for _ in range(10):
    observations.append(deepcopy(env.reset()))
observations = np.asarray(observations)

for layer in cem.model.layers:
    fake_x_test = np.ndarray(shape=(10,1), dtype=float, order='F')

Esempio n. 11

0

Mostra file

# print(model.summary())

memory = EpisodeParameterMemory(limit=1000, window_length=1)

cem = CEMAgent(model=model,
               nb_actions=se.action_space,
               memory=memory,
               batch_size=50,
               nb_steps_warmup=2000,
               train_interval=50,
               elite_frac=0.05)
cem.compile()

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
history = cem.fit(se, nb_steps=50000, visualize=False, verbose=2)

rewards = [x for x in history.history['episode_reward'] if x > 0]

import matplotlib.pyplot as plt

plt.plot(np.convolve(np.ones(100), rewards, 'valid'))
plt.show()

# After training is done, we save the best weights.
cem.save_weights('cem_{}_params.h5f'.format('Student2'), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
cem.test(se, nb_episodes=5, visualize=False)

Esempio n. 12

0

Mostra file

y = Dense(16)(y)
y = Activation('relu')(y)
y = Dense(16)(y)
y = Activation('relu')(y)
y = Dense(16)(y)
y = Activation('relu')(y)
y = Dense(nb_actions)(y)
y = Activation('linear')(y)
model = Model(x, y)

memory = EpisodeParameterMemory(limit=50000, window_length=1)
cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
               nb_steps_warmup=2000, batch_size=50, train_interval=50, elite_frac=0.05)
cem.compile()

rewards = []
hist = cem.fit(env, nb_steps=50000, visualize=False, verbose=2)
rewards.extend(hist.history.get('episode_reward'))
plt.plot(rewards)

cem.test(env, nb_episodes=5, visualize=True)

state = env.reset()
action = env.action_space.sample()
print(action)
for i in range(500):
    action = np.argmax(cem.model.predict(state.reshape(1, 1, 6))[0])
    state, reward, done, _ = env.step(action)
    env.render()
env.render(close=True)

Esempio n. 13

0

Mostra file

model.add(Dense(nb_actions))
model.add(Activation('softmax'))

print(model.summary())

# Configuration and compilation of the agent
memory = EpisodeParameterMemory(limit=1000, window_length=1)

cem = CEMAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               batch_size=50,
               nb_steps_warmup=2000,
               train_interval=50,
               elite_frac=0.05)

cem.compile()

# Agent training
cem.fit(env,
        nb_steps=50000,
        visualize=True,
        verbose=2,
        callbacks=[WandbLogger()])

# Saving of the final weights
cem.save_weights('cem_{}_params.h5f'.format(ENV_NAME), overwrite=True)

# Testing the algorithm for 5 episodes
cem.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)

Esempio n. 14

0

Mostra file

File: _cem_cartpole_compare.py Progetto: yosider/keras-rl

nb_actions = env.action_space.n
obs_dim = env.observation_space.shape[0]

modelL = Sequential()
modelL.add(LSTM(nb_actions, input_shape=(1, ) + env.observation_space.shape))
modelL.add(Activation('softmax'))
memoryL = EpisodeParameterMemory(limit=1000, window_length=1)
cemL = CEMAgent(model=modelL,
                nb_actions=nb_actions,
                memory=memoryL,
                batch_size=50,
                nb_steps_warmup=2000,
                train_interval=50,
                elite_frac=0.05)
cemL.compile()
histL = cemL.fit(env, nb_steps=50000, visualize=False, verbose=1)
cemL.save_weights('cemL_{}_params.h5f'.format(ENV_NAME), overwrite=True)
cemL.test(env, nb_episodes=5, visualize=True)

modelD = Sequential()
modelD.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
modelD.add(Dense(nb_actions))
modelD.add(Activation('softmax'))
memoryD = EpisodeParameterMemory(limit=1000, window_length=1)
cemD = CEMAgent(model=modelD,
                nb_actions=nb_actions,
                memory=memoryD,
                batch_size=50,
                nb_steps_warmup=2000,
                train_interval=50,
                elite_frac=0.05)

Esempio n. 15

0

Mostra file

File: DQN_with_CNN.py Progetto: karan-maheshwari/Reinforcement-Learning-Contra

model.add(Reshape(env.observation_space.shape))
model.add(
    Conv2D(32, (3, 3),
           activation='relu',
           input_shape=env.observation_space.shape))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Flatten())
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('softmax'))
print(model.summary())

memory = EpisodeParameterMemory(limit=10000, window_length=1)

cem = CEMAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=1000,
               batch_size=50,
               train_interval=50,
               elite_frac=0.1)
cem.compile()

cem.fit(env, nb_steps=100000000, visualize=False)

#dqn.load_weights('dqn_test_run_weights.h5f')
cem.save_weights('cem_{}_weights.h5f'.format('test_run'), overwrite=True)

#dqn.test(env, nb_episodes=5, visualize=True)

Esempio n. 16

0

Mostra file

File: cem_cartpole.py Progetto: wxEdward/rl-perturbed-reward

def train():
    # Get the environment and extract the number of actions.
    env = gym.make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    obs_dim = env.observation_space.shape[0]

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    from keras import backend as K
    K.set_session(sess)

    # Option 1 : Simple model
    # model = Sequential()
    # model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    # model.add(Dense(nb_actions))
    # model.add(Activation('softmax'))

    # Option 2: deep network
    model = Sequential()
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('softmax'))

    model.summary()

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = EpisodeParameterMemory(limit=1000, window_length=1)

    if REWARD == "normal":
        cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
                       batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05)
        cem.compile()
        history_normal = cem.fit(env, nb_steps=100000, visualize=False, verbose=2)
        cem.save_weights(os.path.join(LOG_DIR, 'cem_normal_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
        cem.test(env, nb_episodes=5, visualize=False)

        pandas.DataFrame(history_normal.history).to_csv(os.path.join(LOG_DIR, "normal.csv"))

    elif REWARD == "noisy":
        if not SMOOTH:
            processor_noisy = CartpoleProcessor(e_=ERR_N, e=ERR_P, smooth=False, surrogate=False)
        else:
            processor_noisy = CartpoleProcessor(e_=ERR_N, e=ERR_P, smooth=True, surrogate=False)

        # processor_surrogate = CartpoleSurrogateProcessor(e_=ERR_N, e=ERR_P, surrogate=False)
        cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
                       batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05,
                       processor=processor_noisy)
        cem.compile()
        history_noisy = cem.fit(env, nb_steps=100000, visualize=False, verbose=2)
        if not SMOOTH:
            cem.save_weights(os.path.join(LOG_DIR, 'cem_noisy_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_noisy.history).to_csv(os.path.join(LOG_DIR, "noisy.csv"))

        else:
            cem.save_weights(os.path.join(LOG_DIR, 'cem_noisy_smooth_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_noisy.history).to_csv(os.path.join(LOG_DIR, "noisy_smooth.csv"))

        cem.test(env, nb_episodes=5, visualize=False)

    elif REWARD == "surrogate":
        if not SMOOTH:
            processor_surrogate = CartpoleProcessor(e_=ERR_N, e=ERR_P, smooth=False, surrogate=True)
        else:
            processor_surrogate = CartpoleProcessor(e_=ERR_N, e=ERR_P, smooth=True, surrogate=True)

        # processor_surrogate = CartpoleSurrogateProcessor(e_=ERR_N, e=ERR_P, surrogate=True)
        cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
                       batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05,
                       processor=processor_surrogate)
        cem.compile()
        history_surrogate = cem.fit(env, nb_steps=100000, visualize=False, verbose=2)
        if not SMOOTH:
            cem.save_weights(os.path.join(LOG_DIR, 'cem_surrogate_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_surrogate.history).to_csv(os.path.join(LOG_DIR, "surrogate.csv"))
        else:
            cem.save_weights(os.path.join(LOG_DIR, 'cem_surrogate_smooth_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_surrogate.history).to_csv(os.path.join(LOG_DIR, "surrogate_smooth.csv"))

        cem.test(env, nb_episodes=5, visualize=False)

    else:
        raise NotImplementedError

Esempio n. 17

0

Mostra file

File: kerasrl.py Progetto: sdforce/easyagents

class KerasRlCemAgent(KerasRlAgent):
    """Keras-rl implementation of the cross-entropy method algorithm.

        see "https://learning.mpi-sws.org/mlss2016/slides/2016-MLSS-RL.pdf" and
            "https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.81.6579&rep=rep1&type=pdf"
    """
    class CemCallback(rl.callbacks.Callback):
        """Callback registered with keras rl agents to propagate iteration and episode updates."""
        def __init__(self, cem_agent: KerasRlAgent,
                     cem_context: core.CemTrainContext, nb_steps: int):
            """
            Args:
                cem_agent: the agent to propagate iteration begn/end events to.
                cem_context: the train_context containing the iteration definitions
                nb_steps: value set in the keras cem agent.
            """
            assert cem_agent
            assert cem_context
            assert nb_steps
            self._cem_agent: KerasRlAgent = cem_agent
            self._cem_context: core.CemTrainContext = cem_context
            self._nb_steps = nb_steps
            super().__init__()

        def on_episode_end(self, episode, logs=None):
            """Signals the base class the end / begin of a training iteration."""
            cc: core.CemTrainContext = self._cem_context
            episode = episode + 1
            if episode % cc.num_episodes_per_iteration == 0:
                self._cem_agent.on_train_iteration_end(math.nan)
                if self._cem_context.training_done:
                    self._cem_agent._agent.step = self._nb_steps
                else:
                    self._cem_agent.on_train_iteration_begin()

    def train_implementation(self, train_context: core.CemTrainContext):
        assert train_context
        cc: core.CemTrainContext = train_context
        train_env = self._create_env()
        keras_model = self._create_model(gym_env=train_env,
                                         activation='softmax')

        policy_buffer_size = 5 * cc.num_episodes_per_iteration
        self.log_api(f'EpisodeParameterMemory',
                     f'(limit={policy_buffer_size}, window_length=1)')
        memory = EpisodeParameterMemory(limit=policy_buffer_size,
                                        window_length=1)
        num_actions = train_env.action_space.n
        self.log_api(f'CEMAgent', f'(model=..., nb_actions={num_actions}, memory=..., ' + \
                     f'nb_steps_warmup={cc.num_steps_buffer_preload}, ' + \
                     f'train_interval={cc.num_episodes_per_iteration}, ' + \
                     f'batch_size={cc.num_episodes_per_iteration}, ' + \
                     f'elite_frac={cc.elite_set_fraction})')
        self._agent = CEMAgent(model=keras_model,
                               nb_actions=num_actions,
                               memory=memory,
                               nb_steps_warmup=cc.num_steps_buffer_preload,
                               batch_size=cc.num_episodes_per_iteration,
                               train_interval=cc.num_episodes_per_iteration,
                               elite_frac=cc.elite_set_fraction)
        self.log_api(f'agent.compile', '()')
        self._agent.compile()
        nb_steps = cc.num_iterations * cc.num_episodes_per_iteration * cc.max_steps_per_episode
        callback = KerasRlCemAgent.CemCallback(self, cc, nb_steps)
        self.on_train_iteration_begin()
        self.log_api(f'agent.fit', f'(train_env, nb_steps={nb_steps})')
        self._agent.fit(train_env,
                        nb_steps=nb_steps,
                        visualize=False,
                        verbose=0,
                        callbacks=[callback])
        if not cc.training_done:
            self.on_train_iteration_end(math.nan)

Esempio n. 18

0

Mostra file

import numpy as np
import gym
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam
from rl.agents.cem import CEMAgent
from rl.memory import EpisodeParameterMemory

ENV_NAME = 'CartPole-v0'
env = gym.make(ENV_NAME)
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n
obs_dim = env.observation_space.shape[0]
model = Sequential()
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add(Dense(nb_actions))
model.add(Activation('softmax'))
print(model.summary())
memory = EpisodeParameterMemory(limit=1000, window_length=1)
cem = CEMAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               batch_size=50,
               nb_steps_warmup=2000,
               train_interval=50,
               elite_frac=0.05)
cem.compile()
cem.fit(env, nb_steps=100000, visualize=False, verbose=2)
cem.save_weights('cem_{}_params.h5f'.format(ENV_NAME), overwrite=True)
cem.test(env, nb_episodes=5, visualize=True)

Esempio n. 19

0

Mostra file

# model.add(Activation('relu'))
# model.add(Dense(nb_actions))
# model.add(Activation('softmax'))

print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = EpisodeParameterMemory(limit=MEMORY_LIMIT,
                                window_length=WINDOW_LENGHT)

cem = CEMAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               batch_size=BATCH_SIZE,
               nb_steps_warmup=NB_STEPS_WARMUP,
               train_interval=TRAIN_INTERVAL,
               elite_frac=ELITE_FRAC)
cem.compile()

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
cem.fit(env, nb_steps=NB_STEPS, visualize=VISUALIZE_TRAIN, verbose=VERBOSE)

# After training is done, we save the best weights.
cem.save_weights('cem_{}_params.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
cem.test(env, nb_episodes=NB_EPISODES, visualize=VISUALIZE_TEST)