def reinforce_train_cem(self, steps=60000, visualize=False, verbose=1, nb_steps_warmup=10000, save_path=r"D:\Data\markets\weights", save_weights_name="cem_CADJPY_weights.h5f", log_interval=1000): memory = EpisodeParameterMemory(limit=200, window_length=1) nb_actions = self.env.action_space.n agent = CEMAgent( model=self.model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=nb_steps_warmup, processor=MultiInputProcessor(nb_inputs=len(self.model.inputs))) agent.compile() agent.fit(self.env, nb_steps=steps, visualize=visualize, verbose=verbose, log_interval=log_interval) pathlib.Path(save_path).mkdir(parents=True, exist_ok=True) file_path = os.path.join(save_path, save_weights_name) agent.save_weights(filepath=file_path, overwrite=True)
def main(env_name, nb_steps): # Get the environment and extract the number of actions. env = gym.make(env_name) np.random.seed(123) env.seed(123) nb_actions = env.action_space.n input_shape = (1, ) + env.observation_space.shape model = create_nn_model(input_shape, nb_actions) # Finally, we configure and compile our agent. memory = EpisodeParameterMemory(limit=450, window_length=1) agent = CEMAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05) agent.compile() agent.fit(env, nb_steps=nb_steps, visualize=False, verbose=1) # After training is done, we save the best weights. agent.save_weights('cem_{}_params.h5f'.format(env_name), overwrite=True) # Finally, evaluate the agent history = agent.test(env, nb_episodes=100, visualize=False) rewards = np.array(history.history['episode_reward']) print(("Test rewards (#episodes={}): mean={:>5.2f}, std={:>5.2f}, " "min={:>5.2f}, max={:>5.2f}").format(len(rewards), rewards.mean(), rewards.std(), rewards.min(), rewards.max()))
def main(env_name, nb_steps): # Get the environment and extract the number of actions. env = gym.make(env_name) np.random.seed(123) env.seed(123) nb_actions = env.action_space.n input_shape = (1,) + env.observation_space.shape model = create_nn_model(input_shape, nb_actions) # Finally, we configure and compile our agent. memory = EpisodeParameterMemory(limit=450, window_length=1) agent = CEMAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05) agent.compile() agent.fit(env, nb_steps=nb_steps, visualize=False, verbose=1) # After training is done, we save the best weights. agent.save_weights('cem_{}_params.h5f'.format(env_name), overwrite=True) # Finally, evaluate the agent history = agent.test(env, nb_episodes=100, visualize=False) rewards = np.array(history.history['episode_reward']) print(("Test rewards (#episodes={}): mean={:>5.2f}, std={:>5.2f}, " "min={:>5.2f}, max={:>5.2f}") .format(len(rewards), rewards.mean(), rewards.std(), rewards.min(), rewards.max()))
class KerasCEMAgent(object): ''' The cross-entropy method Learning Agent as described in http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.81.6579&rep=rep1&type=pdf ''' def __init__(self, opts): self.metadata = { 'discrete_actions': True, } self.opts = opts def configure(self, observation_space_shape, nb_actions): if self.opts.model_type == 1: # Option 1 : Simple model model = Sequential() model.add(Flatten(input_shape=(1,) + observation_space_shape)) model.add(Dense(nb_actions)) model.add(Activation('softmax')) print(model.summary()) elif self.opts.model_type == 2: # Option 2: deep network model = Sequential() model.add(Flatten(input_shape=(1,) + observation_space_shape)) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('softmax')) print(model.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = EpisodeParameterMemory(limit=1000, window_length=1) self.agent = CEMAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05) self.agent.compile() def train(self, env, nb_steps, visualize, verbosity): # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. self.agent.fit(env, nb_steps=nb_steps, visualize=visualize, verbose=verbosity) def test(self, env, nb_episodes, visualize): # Finally, evaluate our algorithm for 5 episodes. self.agent.test(env, nb_episodes=nb_episodes, visualize=visualize) def load_weights(self, load_file): self.agent.load_weights(load_file) def save_weights(self, save_file, overwrite): # After training is done, we save the best weights. self.agent.save_weights(save_file, overwrite=overwrite)
def main(): """Build model and train on environment.""" env = MarketEnv(("ES", "FUT", "GLOBEX", "USD"), obs_xform=xform.BinaryDelta(3), episode_steps=STEPS_PER_EPISODE, client_id=3) #env = MarketEnv(("AAPL", "STK", "SMART", "USD"), obs_xform=xform.BinaryDelta(3), episode_steps=STEPS_PER_EPISODE, client_id=4) nb_actions = 3 # Keras-RL CEM is a discrete agent # Option 1 : Simple model model = Sequential([ Flatten(input_shape=(1,) + env.observation_space.shape), Dense(nb_actions), Activation('softmax') ]) # Option 2: deep network # hidden_nodes = reduce(operator.imul, env.observation_space.shape, 1) # model = Sequential([ # Flatten(input_shape=(1,) + env.observation_space.shape), # Dense(hidden_nodes), # Activation('relu'), # Dense(hidden_nodes), # Activation('relu'), # Dense(hidden_nodes), # Activation('relu'), # Dense(nb_actions), # Activation('softmax') # ]) print(model.summary()) param_logger = CEMParamLogger('cem_{}_params.json'.format(env.instrument.symbol)) callbacks = [ param_logger, FileLogger('cem_{}_log.json'.format(env.instrument.symbol), interval=STEPS_PER_EPISODE) ] theta_init = param_logger.read_params() # Start with last saved params if present if theta_init is not None: print('Starting with parameters from {}:\n{}'.format(param_logger.params_filename, theta_init)) memory = EpisodeParameterMemory(limit=EPISODES, window_length=1) # Remember the parameters and rewards for the last `limit` episodes. cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=EPISODES, nb_steps_warmup=WARMUMP_EPISODES * STEPS_PER_EPISODE, train_interval=TRAIN_INTERVAL_EPISODES, elite_frac=0.2, theta_init=theta_init, processor=DiscreteProcessor(), noise_decay_const=0, noise_ampl=0) """ :param memory: Remembers the parameters and rewards for the last `limit` episodes. :param int batch_size: Randomly sample this many episode parameters from memory before taking the top `elite_frac` to construct the next gen parameters from. :param int nb_steps_warmup: Run for this many steps (total) to fill memory before training :param int train_interval: Train (update parameters) every this many episodes :param float elite_frac: Take this top fraction of the `batch_size` randomly sampled parameters from the episode memory to construct new parameters. """ cem.compile() cem.fit(env, nb_steps=STEPS_PER_EPISODE * EPISODES, visualize=True, verbose=2, callbacks=callbacks) cem.save_weights('cem_{}_weights.h5f'.format(env.instrument.symbol), overwrite=True)
import numpy as np import gym from keras.models import Sequential from keras.layers import Dense, Activation, Flatten from keras.optimizers import Adam from rl.agents.cem import CEMAgent from rl.memory import EpisodeParameterMemory ENV_NAME = 'CartPole-v0' env = gym.make(ENV_NAME) np.random.seed(123) env.seed(123) nb_actions = env.action_space.n obs_dim = env.observation_space.shape[0] model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) model.add(Dense(nb_actions)) model.add(Activation('softmax')) print(model.summary()) memory = EpisodeParameterMemory(limit=1000, window_length=1) cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05) cem.compile() cem.fit(env, nb_steps=100000, visualize=False, verbose=2) cem.save_weights('cem_{}_params.h5f'.format(ENV_NAME), overwrite=True) cem.test(env, nb_episodes=5, visualize=True)
model.add(Reshape(env.observation_space.shape)) model.add( Conv2D(32, (3, 3), activation='relu', input_shape=env.observation_space.shape)) model.add(MaxPooling2D((2, 2))) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(Flatten()) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('softmax')) print(model.summary()) memory = EpisodeParameterMemory(limit=10000, window_length=1) cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000, batch_size=50, train_interval=50, elite_frac=0.1) cem.compile() cem.fit(env, nb_steps=100000000, visualize=False) #dqn.load_weights('dqn_test_run_weights.h5f') cem.save_weights('cem_{}_weights.h5f'.format('test_run'), overwrite=True) #dqn.test(env, nb_episodes=5, visualize=True)
model = Sequential() model.add(Dense(128, input_shape=(8, ))) model.add(Activation('relu')) model.add(BatchNormalization()) model.add(Dense(64)) model.add(Activation('relu')) model.add(Dense(32)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('softmax')) print(model.summary()) memory = EpisodeParameterMemory(limit=1000, window_length=1) cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05) cem.compile() cem.fit(env, nb_steps=100000, visualize=False, verbose=2) cem.save_weights('cem_{}_params.h5f'.format("citizen-0"), overwrite=True)
def main(options): # store args model_type = options.model_type train_interval_cem = options.train_interval_cem batch_size_cem = options.batch_size_cem steps_cem = options.steps_cem batch_size_props = options.batch_size_props steps_props = options.steps_props trunc_thres = options.trunc_thres Lmax = options.Lmax delta = options.delta # CEM # init environment env = gym.make(ENV_NAME) np.random.seed(123) env.seed(123) nb_actions = env.action_space.n obs_dim = env.observation_space.shape[0] model = initModel(model_type, nb_actions, env.observation_space.shape) memory = initMemory() cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=batch_size_cem, nb_steps_warmup=1000, train_interval=train_interval_cem, elite_frac=0.05) cem.compile() callback_cem = cem.fit(env, nb_steps=steps_cem, visualize=False, verbose=0) cem.save_weights('cem_dumps/cem_{}_{}_ti_{}_bs_{}_steps_{}.h5f'.format( ENV_NAME, model_type, train_interval_cem, batch_size_cem, steps_cem), overwrite=True) #cem.test(env, nb_episodes=1, visualize=False) # PROPS # init environment env = gym.make(ENV_NAME) np.random.seed(123) env.seed(123) nb_actions = env.action_space.n obs_dim = env.observation_space.shape[0] model = initModel(model_type, nb_actions, env.observation_space.shape) memory = initMemory() bound_opts = { 'analytic_jac': True, 'normalize_weights': True, 'truncate_weights': True, 'truncate_thresh': trunc_thres } props = PROPSAgent(model=model, nb_actions=nb_actions, memory=memory, Lmax=Lmax, delta=delta, bound_opts=bound_opts, batch_size=batch_size_props) props.compile() callback_props = props.fit(env, nb_steps=steps_props, visualize=False, verbose=0) props.save_weights( 'props_dumps/props_{}_{}_bs_{}_steps_{}_thres_{}_Lmax_{}_delta_{}.h5f'. format(ENV_NAME, model_type, batch_size_props, steps_props, trunc_thres, Lmax, delta), overwrite=True) #props.test(env, nb_episodes=1, visualize=False) df_cem = pd.DataFrame({'data': callback_cem.history['episode_reward']}) #plt.plot(callback_cem.history['episode_reward']) plt.plot(df_cem.rolling(window=train_interval_cem).mean()) df_props = pd.DataFrame({'data': callback_props.history['episode_reward']}) #plt.plot(callback_props.history['episode_reward']) plt.plot(df_props.rolling(window=batch_size_props).mean()) plt.legend(['cem', 'props'], loc='upper left') #plt.show() plt.savefig('plots/{}_{}_bs_{}_thres_{}_Lmax_{}_delta_{}.jpeg'.format( ENV_NAME, model_type, batch_size_props, trunc_thres, Lmax, delta))
# model.add(Dense(16)) # model.add(Activation('relu')) # model.add(Dense(nb_actions)) # model.add(Activation('softmax')) print(model.summary()) # Finally, we configure and compile our agent. You can use every built-in tensorflow.keras optimizer and # even the metrics! memory = EpisodeParameterMemory(limit=1000, window_length=1) cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05) cem.compile() # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. cem.fit(env, nb_steps=100000, visualize=False, verbose=2) # After training is done, we save the best weights. cem.save_weights(f'cem_{ENV_NAME}_params.h5f', overwrite=True) # Finally, evaluate our algorithm for 5 episodes. cem.test(env, nb_episodes=5, visualize=True)
# print(model.summary()) memory = EpisodeParameterMemory(limit=1000, window_length=1) cem = CEMAgent(model=model, nb_actions=se.action_space, memory=memory, batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05) cem.compile() # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. history = cem.fit(se, nb_steps=50000, visualize=False, verbose=2) rewards = [x for x in history.history['episode_reward'] if x > 0] import matplotlib.pyplot as plt plt.plot(np.convolve(np.ones(100), rewards, 'valid')) plt.show() # After training is done, we save the best weights. cem.save_weights('cem_{}_params.h5f'.format('Student2'), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. cem.test(se, nb_episodes=5, visualize=False)
def train(): # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) np.random.seed(123) env.seed(123) nb_actions = env.action_space.n obs_dim = env.observation_space.shape[0] config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) from keras import backend as K K.set_session(sess) # Option 1 : Simple model # model = Sequential() # model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) # model.add(Dense(nb_actions)) # model.add(Activation('softmax')) # Option 2: deep network model = Sequential() model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('softmax')) model.summary() # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = EpisodeParameterMemory(limit=1000, window_length=1) if REWARD == "normal": cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05) cem.compile() history_normal = cem.fit(env, nb_steps=100000, visualize=False, verbose=2) cem.save_weights(os.path.join(LOG_DIR, 'cem_normal_{}_params.h5f'.format(ENV_NAME)), overwrite=True) cem.test(env, nb_episodes=5, visualize=False) pandas.DataFrame(history_normal.history).to_csv(os.path.join(LOG_DIR, "normal.csv")) elif REWARD == "noisy": if not SMOOTH: processor_noisy = CartpoleProcessor(e_=ERR_N, e=ERR_P, smooth=False, surrogate=False) else: processor_noisy = CartpoleProcessor(e_=ERR_N, e=ERR_P, smooth=True, surrogate=False) # processor_surrogate = CartpoleSurrogateProcessor(e_=ERR_N, e=ERR_P, surrogate=False) cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05, processor=processor_noisy) cem.compile() history_noisy = cem.fit(env, nb_steps=100000, visualize=False, verbose=2) if not SMOOTH: cem.save_weights(os.path.join(LOG_DIR, 'cem_noisy_{}_params.h5f'.format(ENV_NAME)), overwrite=True) pandas.DataFrame(history_noisy.history).to_csv(os.path.join(LOG_DIR, "noisy.csv")) else: cem.save_weights(os.path.join(LOG_DIR, 'cem_noisy_smooth_{}_params.h5f'.format(ENV_NAME)), overwrite=True) pandas.DataFrame(history_noisy.history).to_csv(os.path.join(LOG_DIR, "noisy_smooth.csv")) cem.test(env, nb_episodes=5, visualize=False) elif REWARD == "surrogate": if not SMOOTH: processor_surrogate = CartpoleProcessor(e_=ERR_N, e=ERR_P, smooth=False, surrogate=True) else: processor_surrogate = CartpoleProcessor(e_=ERR_N, e=ERR_P, smooth=True, surrogate=True) # processor_surrogate = CartpoleSurrogateProcessor(e_=ERR_N, e=ERR_P, surrogate=True) cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05, processor=processor_surrogate) cem.compile() history_surrogate = cem.fit(env, nb_steps=100000, visualize=False, verbose=2) if not SMOOTH: cem.save_weights(os.path.join(LOG_DIR, 'cem_surrogate_{}_params.h5f'.format(ENV_NAME)), overwrite=True) pandas.DataFrame(history_surrogate.history).to_csv(os.path.join(LOG_DIR, "surrogate.csv")) else: cem.save_weights(os.path.join(LOG_DIR, 'cem_surrogate_smooth_{}_params.h5f'.format(ENV_NAME)), overwrite=True) pandas.DataFrame(history_surrogate.history).to_csv(os.path.join(LOG_DIR, "surrogate_smooth.csv")) cem.test(env, nb_episodes=5, visualize=False) else: raise NotImplementedError
model.add(Dense(nb_actions)) model.add(Activation('softmax')) print(model.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = EpisodeParameterMemory(limit=10000, window_length=1) cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=1000, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05, noise_decay_const=0.0, noise_ampl=1.0, processor=MujocoProcessor()) cem.compile() # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. cem.fit(env, nb_steps=100000, visualize=False, verbose=2) # After training is done, we save the best weights. cem.save_weights('cem_CAV_params.h5f', overwrite=True) # Finally, evaluate our algorithm for 5 episodes. cem.test(env, nb_episodes=5, visualize=True)