Esempi in Python per CEMAgent.save_weights, esempi in Python per rl.agents.cem.CEMAgent.save_weights

Esempio n. 1

0

Mostra file

    def reinforce_train_cem(self,
                            steps=60000,
                            visualize=False,
                            verbose=1,
                            nb_steps_warmup=10000,
                            save_path=r"D:\Data\markets\weights",
                            save_weights_name="cem_CADJPY_weights.h5f",
                            log_interval=1000):
        memory = EpisodeParameterMemory(limit=200, window_length=1)
        nb_actions = self.env.action_space.n

        agent = CEMAgent(
            model=self.model,
            nb_actions=nb_actions,
            memory=memory,
            nb_steps_warmup=nb_steps_warmup,
            processor=MultiInputProcessor(nb_inputs=len(self.model.inputs)))
        agent.compile()
        agent.fit(self.env,
                  nb_steps=steps,
                  visualize=visualize,
                  verbose=verbose,
                  log_interval=log_interval)

        pathlib.Path(save_path).mkdir(parents=True, exist_ok=True)
        file_path = os.path.join(save_path, save_weights_name)
        agent.save_weights(filepath=file_path, overwrite=True)

Esempio n. 2

0

Mostra file

def main(env_name, nb_steps):
    # Get the environment and extract the number of actions.
    env = gym.make(env_name)
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    input_shape = (1, ) + env.observation_space.shape
    model = create_nn_model(input_shape, nb_actions)

    # Finally, we configure and compile our agent.
    memory = EpisodeParameterMemory(limit=450, window_length=1)

    agent = CEMAgent(model=model,
                     nb_actions=nb_actions,
                     memory=memory,
                     batch_size=50,
                     nb_steps_warmup=2000,
                     train_interval=50,
                     elite_frac=0.05)
    agent.compile()
    agent.fit(env, nb_steps=nb_steps, visualize=False, verbose=1)

    # After training is done, we save the best weights.
    agent.save_weights('cem_{}_params.h5f'.format(env_name), overwrite=True)

    # Finally, evaluate the agent
    history = agent.test(env, nb_episodes=100, visualize=False)
    rewards = np.array(history.history['episode_reward'])
    print(("Test rewards (#episodes={}): mean={:>5.2f}, std={:>5.2f}, "
           "min={:>5.2f}, max={:>5.2f}").format(len(rewards), rewards.mean(),
                                                rewards.std(), rewards.min(),
                                                rewards.max()))

Esempio n. 3

0

Mostra file

File: cem_agent.py Progetto: MartinThoma/algorithms

def main(env_name, nb_steps):
    # Get the environment and extract the number of actions.
    env = gym.make(env_name)
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    input_shape = (1,) + env.observation_space.shape
    model = create_nn_model(input_shape, nb_actions)

    # Finally, we configure and compile our agent.
    memory = EpisodeParameterMemory(limit=450, window_length=1)

    agent = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
                     batch_size=50, nb_steps_warmup=2000, train_interval=50,
                     elite_frac=0.05)
    agent.compile()
    agent.fit(env, nb_steps=nb_steps, visualize=False, verbose=1)

    # After training is done, we save the best weights.
    agent.save_weights('cem_{}_params.h5f'.format(env_name), overwrite=True)

    # Finally, evaluate the agent
    history = agent.test(env, nb_episodes=100, visualize=False)
    rewards = np.array(history.history['episode_reward'])
    print(("Test rewards (#episodes={}): mean={:>5.2f}, std={:>5.2f}, "
           "min={:>5.2f}, max={:>5.2f}")
          .format(len(rewards),
                  rewards.mean(),
                  rewards.std(),
                  rewards.min(),
                  rewards.max()))

Esempio n. 4

0

Mostra file

File: agents_kerasrl.py Progetto: carolinahiguera/humanoid_transfer

class KerasCEMAgent(object):
	'''
	The cross-entropy method Learning Agent as described in http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.81.6579&rep=rep1&type=pdf
	'''

	def __init__(self, opts):
		self.metadata = {
			'discrete_actions': True,
		}

		self.opts = opts

	def configure(self, observation_space_shape, nb_actions):
		if self.opts.model_type == 1:
			# Option 1 : Simple model
			model = Sequential()
			model.add(Flatten(input_shape=(1,) + observation_space_shape))
			model.add(Dense(nb_actions))
			model.add(Activation('softmax'))
			print(model.summary())
		elif self.opts.model_type == 2:
			# Option 2: deep network
			model = Sequential()
			model.add(Flatten(input_shape=(1,) + observation_space_shape))
			model.add(Dense(16))
			model.add(Activation('relu'))
			model.add(Dense(16))
			model.add(Activation('relu'))
			model.add(Dense(16))
			model.add(Activation('relu'))
			model.add(Dense(nb_actions))
			model.add(Activation('softmax'))
			print(model.summary())

		# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
		# even the metrics!
		memory = EpisodeParameterMemory(limit=1000, window_length=1)

		self.agent = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
							  batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05)
		self.agent.compile()

	def train(self, env, nb_steps, visualize, verbosity):
		# Okay, now it's time to learn something! We visualize the training here for show, but this
		# slows down training quite a lot. You can always safely abort the training prematurely using
		# Ctrl + C.
		self.agent.fit(env, nb_steps=nb_steps, visualize=visualize, verbose=verbosity)

	def test(self, env, nb_episodes, visualize):
		# Finally, evaluate our algorithm for 5 episodes.
		self.agent.test(env, nb_episodes=nb_episodes, visualize=visualize)

	def load_weights(self, load_file):
		self.agent.load_weights(load_file)

	def save_weights(self, save_file, overwrite):
		# After training is done, we save the best weights.
		self.agent.save_weights(save_file, overwrite=overwrite)

Esempio n. 5

0

Mostra file

def main():
    """Build model and train on environment."""
    env = MarketEnv(("ES", "FUT", "GLOBEX", "USD"), obs_xform=xform.BinaryDelta(3), episode_steps=STEPS_PER_EPISODE, client_id=3)
    #env = MarketEnv(("AAPL", "STK", "SMART", "USD"), obs_xform=xform.BinaryDelta(3), episode_steps=STEPS_PER_EPISODE, client_id=4)
    nb_actions = 3      # Keras-RL CEM is a discrete agent

    # Option 1 : Simple model
    model = Sequential([
        Flatten(input_shape=(1,) + env.observation_space.shape),
        Dense(nb_actions),
        Activation('softmax')
    ])

    # Option 2: deep network
    # hidden_nodes = reduce(operator.imul, env.observation_space.shape, 1)
    # model = Sequential([
    #     Flatten(input_shape=(1,) + env.observation_space.shape),
    #     Dense(hidden_nodes),
    #     Activation('relu'),
    #     Dense(hidden_nodes),
    #     Activation('relu'),
    #     Dense(hidden_nodes),
    #     Activation('relu'),
    #     Dense(nb_actions),
    #     Activation('softmax')
    # ])

    print(model.summary())

    param_logger = CEMParamLogger('cem_{}_params.json'.format(env.instrument.symbol))
    callbacks = [
        param_logger,
        FileLogger('cem_{}_log.json'.format(env.instrument.symbol), interval=STEPS_PER_EPISODE)
    ]

    theta_init = param_logger.read_params()     # Start with last saved params if present
    if theta_init is not None:
        print('Starting with parameters from {}:\n{}'.format(param_logger.params_filename, theta_init))

    memory = EpisodeParameterMemory(limit=EPISODES, window_length=1)        # Remember the parameters and rewards for the last `limit` episodes.
    cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=EPISODES, nb_steps_warmup=WARMUMP_EPISODES * STEPS_PER_EPISODE, train_interval=TRAIN_INTERVAL_EPISODES, elite_frac=0.2, theta_init=theta_init, processor=DiscreteProcessor(), noise_decay_const=0, noise_ampl=0)
    """
    :param memory: Remembers the parameters and rewards for the last `limit` episodes.
    :param int batch_size: Randomly sample this many episode parameters from memory before taking the top `elite_frac` to construct the next gen parameters from.
    :param int nb_steps_warmup: Run for this many steps (total) to fill memory before training
    :param int train_interval: Train (update parameters) every this many episodes
    :param float elite_frac: Take this top fraction of the `batch_size` randomly sampled parameters from the episode memory to construct new parameters.
    """
    cem.compile()
    cem.fit(env, nb_steps=STEPS_PER_EPISODE * EPISODES, visualize=True, verbose=2, callbacks=callbacks)
    cem.save_weights('cem_{}_weights.h5f'.format(env.instrument.symbol), overwrite=True)

Esempio n. 6

0

Mostra file

import numpy as np
import gym
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam
from rl.agents.cem import CEMAgent
from rl.memory import EpisodeParameterMemory

ENV_NAME = 'CartPole-v0'
env = gym.make(ENV_NAME)
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n
obs_dim = env.observation_space.shape[0]
model = Sequential()
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add(Dense(nb_actions))
model.add(Activation('softmax'))
print(model.summary())
memory = EpisodeParameterMemory(limit=1000, window_length=1)
cem = CEMAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               batch_size=50,
               nb_steps_warmup=2000,
               train_interval=50,
               elite_frac=0.05)
cem.compile()
cem.fit(env, nb_steps=100000, visualize=False, verbose=2)
cem.save_weights('cem_{}_params.h5f'.format(ENV_NAME), overwrite=True)
cem.test(env, nb_episodes=5, visualize=True)

Esempio n. 7

0

Mostra file

File: DQN_with_CNN.py Progetto: karan-maheshwari/Reinforcement-Learning-Contra

model.add(Reshape(env.observation_space.shape))
model.add(
    Conv2D(32, (3, 3),
           activation='relu',
           input_shape=env.observation_space.shape))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Flatten())
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('softmax'))
print(model.summary())

memory = EpisodeParameterMemory(limit=10000, window_length=1)

cem = CEMAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=1000,
               batch_size=50,
               train_interval=50,
               elite_frac=0.1)
cem.compile()

cem.fit(env, nb_steps=100000000, visualize=False)

#dqn.load_weights('dqn_test_run_weights.h5f')
cem.save_weights('cem_{}_weights.h5f'.format('test_run'), overwrite=True)

#dqn.test(env, nb_episodes=5, visualize=True)

Esempio n. 8

0

Mostra file

File: deep_rl.py Progetto: SpinazieSin/scared_citizen_simulation

model = Sequential()
model.add(Dense(128, input_shape=(8, )))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('softmax'))

print(model.summary())

memory = EpisodeParameterMemory(limit=1000, window_length=1)

cem = CEMAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               batch_size=50,
               nb_steps_warmup=2000,
               train_interval=50,
               elite_frac=0.05)
cem.compile()

cem.fit(env, nb_steps=100000, visualize=False, verbose=2)

cem.save_weights('cem_{}_params.h5f'.format("citizen-0"), overwrite=True)

Esempio n. 9

0

Mostra file

def main(options):
    # store args
    model_type = options.model_type
    train_interval_cem = options.train_interval_cem
    batch_size_cem = options.batch_size_cem
    steps_cem = options.steps_cem
    batch_size_props = options.batch_size_props
    steps_props = options.steps_props
    trunc_thres = options.trunc_thres
    Lmax = options.Lmax
    delta = options.delta

    # CEM
    # init environment
    env = gym.make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    obs_dim = env.observation_space.shape[0]

    model = initModel(model_type, nb_actions, env.observation_space.shape)
    memory = initMemory()

    cem = CEMAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   batch_size=batch_size_cem,
                   nb_steps_warmup=1000,
                   train_interval=train_interval_cem,
                   elite_frac=0.05)
    cem.compile()
    callback_cem = cem.fit(env, nb_steps=steps_cem, visualize=False, verbose=0)
    cem.save_weights('cem_dumps/cem_{}_{}_ti_{}_bs_{}_steps_{}.h5f'.format(
        ENV_NAME, model_type, train_interval_cem, batch_size_cem, steps_cem),
                     overwrite=True)
    #cem.test(env, nb_episodes=1, visualize=False)

    # PROPS
    # init environment
    env = gym.make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    obs_dim = env.observation_space.shape[0]

    model = initModel(model_type, nb_actions, env.observation_space.shape)
    memory = initMemory()

    bound_opts = {
        'analytic_jac': True,
        'normalize_weights': True,
        'truncate_weights': True,
        'truncate_thresh': trunc_thres
    }

    props = PROPSAgent(model=model,
                       nb_actions=nb_actions,
                       memory=memory,
                       Lmax=Lmax,
                       delta=delta,
                       bound_opts=bound_opts,
                       batch_size=batch_size_props)
    props.compile()
    callback_props = props.fit(env,
                               nb_steps=steps_props,
                               visualize=False,
                               verbose=0)
    props.save_weights(
        'props_dumps/props_{}_{}_bs_{}_steps_{}_thres_{}_Lmax_{}_delta_{}.h5f'.
        format(ENV_NAME, model_type, batch_size_props, steps_props,
               trunc_thres, Lmax, delta),
        overwrite=True)
    #props.test(env, nb_episodes=1, visualize=False)

    df_cem = pd.DataFrame({'data': callback_cem.history['episode_reward']})
    #plt.plot(callback_cem.history['episode_reward'])
    plt.plot(df_cem.rolling(window=train_interval_cem).mean())

    df_props = pd.DataFrame({'data': callback_props.history['episode_reward']})
    #plt.plot(callback_props.history['episode_reward'])
    plt.plot(df_props.rolling(window=batch_size_props).mean())

    plt.legend(['cem', 'props'], loc='upper left')
    #plt.show()
    plt.savefig('plots/{}_{}_bs_{}_thres_{}_Lmax_{}_delta_{}.jpeg'.format(
        ENV_NAME, model_type, batch_size_props, trunc_thres, Lmax, delta))

Esempio n. 10

0

Mostra file

File: cem_cartpole.py Progetto: Bosmansc/tetris_openai

# model.add(Dense(16))
# model.add(Activation('relu'))
# model.add(Dense(nb_actions))
# model.add(Activation('softmax'))

print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in tensorflow.keras optimizer and
# even the metrics!
memory = EpisodeParameterMemory(limit=1000, window_length=1)

cem = CEMAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               batch_size=50,
               nb_steps_warmup=2000,
               train_interval=50,
               elite_frac=0.05)
cem.compile()

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
cem.fit(env, nb_steps=100000, visualize=False, verbose=2)

# After training is done, we save the best weights.
cem.save_weights(f'cem_{ENV_NAME}_params.h5f', overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
cem.test(env, nb_episodes=5, visualize=True)

Esempio n. 11

0

Mostra file

# print(model.summary())

memory = EpisodeParameterMemory(limit=1000, window_length=1)

cem = CEMAgent(model=model,
               nb_actions=se.action_space,
               memory=memory,
               batch_size=50,
               nb_steps_warmup=2000,
               train_interval=50,
               elite_frac=0.05)
cem.compile()

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
history = cem.fit(se, nb_steps=50000, visualize=False, verbose=2)

rewards = [x for x in history.history['episode_reward'] if x > 0]

import matplotlib.pyplot as plt

plt.plot(np.convolve(np.ones(100), rewards, 'valid'))
plt.show()

# After training is done, we save the best weights.
cem.save_weights('cem_{}_params.h5f'.format('Student2'), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
cem.test(se, nb_episodes=5, visualize=False)

Esempio n. 12

0

Mostra file

File: cem_cartpole.py Progetto: wxEdward/rl-perturbed-reward

def train():
    # Get the environment and extract the number of actions.
    env = gym.make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    obs_dim = env.observation_space.shape[0]

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    from keras import backend as K
    K.set_session(sess)

    # Option 1 : Simple model
    # model = Sequential()
    # model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    # model.add(Dense(nb_actions))
    # model.add(Activation('softmax'))

    # Option 2: deep network
    model = Sequential()
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('softmax'))

    model.summary()

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = EpisodeParameterMemory(limit=1000, window_length=1)

    if REWARD == "normal":
        cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
                       batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05)
        cem.compile()
        history_normal = cem.fit(env, nb_steps=100000, visualize=False, verbose=2)
        cem.save_weights(os.path.join(LOG_DIR, 'cem_normal_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
        cem.test(env, nb_episodes=5, visualize=False)

        pandas.DataFrame(history_normal.history).to_csv(os.path.join(LOG_DIR, "normal.csv"))

    elif REWARD == "noisy":
        if not SMOOTH:
            processor_noisy = CartpoleProcessor(e_=ERR_N, e=ERR_P, smooth=False, surrogate=False)
        else:
            processor_noisy = CartpoleProcessor(e_=ERR_N, e=ERR_P, smooth=True, surrogate=False)

        # processor_surrogate = CartpoleSurrogateProcessor(e_=ERR_N, e=ERR_P, surrogate=False)
        cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
                       batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05,
                       processor=processor_noisy)
        cem.compile()
        history_noisy = cem.fit(env, nb_steps=100000, visualize=False, verbose=2)
        if not SMOOTH:
            cem.save_weights(os.path.join(LOG_DIR, 'cem_noisy_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_noisy.history).to_csv(os.path.join(LOG_DIR, "noisy.csv"))

        else:
            cem.save_weights(os.path.join(LOG_DIR, 'cem_noisy_smooth_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_noisy.history).to_csv(os.path.join(LOG_DIR, "noisy_smooth.csv"))

        cem.test(env, nb_episodes=5, visualize=False)

    elif REWARD == "surrogate":
        if not SMOOTH:
            processor_surrogate = CartpoleProcessor(e_=ERR_N, e=ERR_P, smooth=False, surrogate=True)
        else:
            processor_surrogate = CartpoleProcessor(e_=ERR_N, e=ERR_P, smooth=True, surrogate=True)

        # processor_surrogate = CartpoleSurrogateProcessor(e_=ERR_N, e=ERR_P, surrogate=True)
        cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
                       batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05,
                       processor=processor_surrogate)
        cem.compile()
        history_surrogate = cem.fit(env, nb_steps=100000, visualize=False, verbose=2)
        if not SMOOTH:
            cem.save_weights(os.path.join(LOG_DIR, 'cem_surrogate_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_surrogate.history).to_csv(os.path.join(LOG_DIR, "surrogate.csv"))
        else:
            cem.save_weights(os.path.join(LOG_DIR, 'cem_surrogate_smooth_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_surrogate.history).to_csv(os.path.join(LOG_DIR, "surrogate_smooth.csv"))

        cem.test(env, nb_episodes=5, visualize=False)

    else:
        raise NotImplementedError

Esempio n. 13

0

Mostra file

model.add(Dense(nb_actions))
model.add(Activation('softmax'))

print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = EpisodeParameterMemory(limit=10000, window_length=1)

cem = CEMAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               batch_size=1000,
               nb_steps_warmup=2000,
               train_interval=50,
               elite_frac=0.05,
               noise_decay_const=0.0,
               noise_ampl=1.0,
               processor=MujocoProcessor())
cem.compile()

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
cem.fit(env, nb_steps=100000, visualize=False, verbose=2)

# After training is done, we save the best weights.
cem.save_weights('cem_CAV_params.h5f', overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
cem.test(env, nb_episodes=5, visualize=True)