def main(): # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) env = wrappers.Monitor(env, '/tmp/{}'.format(ENV_NAME), force=True) np.random.seed(123) env.seed(123) assert len(env.action_space.shape) == 1 action_shape = env.action_space.shape[0] observation_shape = env.observation_space.shape actor = create_actor(observation_shape, action_shape) action_input = Input(shape=(action_shape,), name='action_input') observation_input = Input(shape=(1,) + observation_shape, name='observation_input') critic = create_critic(observation_input, action_input) memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=action_shape, theta=.15, mu=0., sigma=.1) agent = DDPGAgent(nb_actions=action_shape, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, processor=BipedalProcessor()) agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae']) agent.load_weights('ddpg_{}_weights.h5f'.format(ENV_NAME)) #agent.fit() agent.fit(env, nb_steps=3000000, visualize=False, verbose=2) agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
class DDPG(): def __init__(self, Env): self.env = Env nb_actions = self.env.action_space.shape[0] actor = Sequential() actor.add(Flatten(input_shape=(1,) + self.env.observation_space.shape)) actor.add(Dense(5)) actor.add(Activation('relu')) actor.add(Dense(8)) actor.add(Activation('relu')) actor.add(Dense(5)) actor.add(Activation('relu')) # actor.add(Dense(16)) # actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('softmax')) # print(actor.summary()) action_input = Input(shape=(nb_actions,), name='action_input') observation_input = Input(shape=(1,) + Env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = concatenate([action_input, flattened_observation], name = 'concatenate') x = Dense(5)(x) x = Activation('relu')(x) x = Dense(8)(x) x = Activation('relu')(x) x = Dense(5)(x) x = Activation('relu')(x) # x = Dense(32)(x) # x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) # print(critic.summary()) memory = SequentialMemory(limit=100000, window_length=1) # random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) random_process = None self.agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=32, nb_steps_warmup_actor=32, random_process=random_process, gamma=0, target_model_update=0.001) self.agent.processor = ShowActionProcessor(self.agent, self.env) self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) def fit(self): history = self.agent.fit(self.env, action_repetition=1, nb_steps=20000, visualize=False, verbose=1, nb_max_episode_steps=10) return history def save_weights(self): self.agent.save_weights('./store/ddpg_{}_weights2.h5f'.format("porfolio"), overwrite=True) def test(self): history = self.agent.test(self.env, nb_episodes=1, visualize=False, nb_max_episode_steps=10) return history def load_weights(self): self.agent.load_weights('./store/ddpg_{}_weights2.h5f'.format("porfolio"))
def _train(self): env = CrazyflieEnvironment(self._cf) atexit.register(teardown_env, env, self._cf) np.random.seed(123) assert len(env.action_space.shape) == 1 nb_actions = env.action_space.shape[0] # Next, we build a very simple model. actor = self.actor_model(env, nb_actions) action_input, critic = self.critic_model(env, nb_actions) memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) model_name = 'ddpg_{}_weights.h5f'.format('crazyflie') agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) if os.path.exists(model_name): agent.load_weights(model_name) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) try: agent.fit(env, nb_steps=50000, verbose=2) agent.test(env, nb_episodes=1) finally: agent.save_weights(model_name, overwrite=True)
def main(): """Create environment, build models, train.""" env = MarketEnv(("ES", "FUT", "GLOBEX", "USD"), obs_xform=xform.Basic(30, 4), episode_steps=STEPS_PER_EPISODE, client_id=3) #env = MarketEnv(("EUR", "CASH", "IDEALPRO", "USD"), max_quantity=20000, quantity_increment=20000, obs_xform=xform.Basic(30, 4), episode_steps=STEPS_PER_EPISODE, client_id=5, afterhours=False) obs_size = np.product(env.observation_space.shape) # Actor model dropout = 0.1 actor = Sequential([ Flatten(input_shape=(1, ) + env.observation_space.shape), BatchNormalization(), Dense(obs_size, activation='relu'), GaussianDropout(dropout), BatchNormalization(), Dense(obs_size, activation='relu'), GaussianDropout(dropout), BatchNormalization(), Dense(obs_size, activation='relu'), GaussianDropout(dropout), BatchNormalization(), Dense(1, activation='tanh'), ]) print('Actor model') actor.summary() action_input = Input(shape=(1, ), name='action_input') observation_input = Input(shape=(1, ) + env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = concatenate([action_input, flattened_observation]) x = BatchNormalization()(x) x = Dense(obs_size + 1, activation='relu')(x) x = GaussianDropout(dropout)(x) x = Dense(obs_size + 1, activation='relu')(x) x = GaussianDropout(dropout)(x) x = Dense(obs_size + 1, activation='relu')(x) x = GaussianDropout(dropout)(x) x = Dense(obs_size + 1, activation='relu')(x) x = GaussianDropout(dropout)(x) x = Dense(1, activation='linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) print('\nCritic Model') critic.summary() memory = SequentialMemory(limit=EPISODES * STEPS_PER_EPISODE, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.5, mu=0., sigma=.5) agent = DDPGAgent( nb_actions=1, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=STEPS_PER_EPISODE * WARMUP_EPISODES, nb_steps_warmup_actor=STEPS_PER_EPISODE * WARMUP_EPISODES, random_process=random_process, gamma=0.95, target_model_update=0.01) agent.compile('rmsprop', metrics=['mae']) weights_filename = 'ddpg_{}_weights.h5f'.format(env.instrument.symbol) try: agent.load_weights(weights_filename) print( 'Using weights from {}'.format(weights_filename) ) # DDPGAgent actually uses two separate files for actor and critic derived from this filename except IOError: pass agent.fit(env, nb_steps=EPISODES * STEPS_PER_EPISODE, visualize=True, verbose=2, nb_max_episode_steps=STEPS_PER_EPISODE) agent.save_weights(weights_filename, overwrite=True)
print(critic.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.1) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=1000, nb_steps_warmup_actor=1000, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. agent.fit(env, nb_steps=1000000, visualize=True, verbose=1) # After training is done, we save the final weights. agent.save_weights('TrainedModels/ddpg_weights.h5f', overwrite=True) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)
x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) print(critic.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. agent.fit(env, nb_steps=50000, visualize=True, verbose=1, nb_max_episode_steps=200) # After training is done, we save the final weights. agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)
agent.compile(Adam(lr=0.001, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. # agent.load_weights('models/ddpg/{}_weights.h5f'.format(ENV_NAME)) print('Fitting agent.') agent.fit( env, nb_steps=5000, visualize=True, verbose=1, nb_max_episode_steps=max_training_session_length, ) # agent.fit(env, nb_steps=500000, visualize=False, verbose=1, nb_max_episode_steps=max_training_session_length, ) # agent.fit(env, nb_steps=10000, visualize=True, verbose=1, nb_max_episode_steps=max_training_session_length, ) # After training is done, we save the final weights. agent.save_weights('models/ddpg/{}_weights.h5f'.format(ENV_NAME), overwrite=True) # out_df = pd.DataFrame({'agent':[agent]}) # save = open('ddpg_{}_agent.pickle'.format(ENV_NAME), 'wb') # pickle.dump(out_df, save) # save.close() # pickle.dump(agent,'ddpg_{}_agent'.format(ENV_NAME)) # pickle.dump(agent,'ddpg_{}_agent'.format(ENV_NAME)) # Finally, evaluate our algorithm for 5 episodes. # agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)
memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, delta_range=(-100., 100.)) # agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model, # memory=memory, nb_steps_warmup=1000, random_process=random_process, # gamma=.99, target_model_update=0.1) #agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) agent.compile([RMSprop(lr=.001), RMSprop(lr=.001)], metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. if args.train: agent.fit(env, nb_steps=nallsteps, visualize=True, verbose=1, nb_max_episode_steps=env.timestep_limit, log_interval=10000) # After training is done, we save the final weights. agent.save_weights(args.output, overwrite=True) if not args.train: agent.load_weights(args.output) # Finally, evaluate our algorithm for 5 episodes. if args.env != "Arm": agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=500) else: for i in range(10000): if i % 300 == 0: env.new_target() print("Target shoulder = %f, elbow = %f" % (env.shoulder,env.elbow)) env.step(agent.forward(env.get_observation()))
class KerasDDPGAgent(object): ''' classdocs ''' def __init__(self, opts): self.metadata = {'discrete_actions': False} self.opts = opts def configure(self, observation_space_shape, nb_actions): # Next, we build a simple model. # actor network actor = Sequential() actor.add(Flatten(input_shape=(1, ) + observation_space_shape)) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('linear')) print(actor.summary()) # critic network action_input = Input(shape=(nb_actions, ), name='action_input') observation_input = Input(shape=(1, ) + observation_space_shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = concatenate([action_input, flattened_observation]) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(input=[action_input, observation_input], output=x) print(critic.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) self.agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) def train(self, env, nb_steps, visualize, verbosity): # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. self.agent.fit(env, nb_steps=nb_steps, visualize=visualize, verbose=verbosity, nb_max_episode_steps=200) def test(self, env, nb_episodes, visualize): # Finally, evaluate our algorithm for 5 episodes. self.agent.test(env, nb_episodes=nb_episodes, visualize=visualize, nb_max_episode_steps=200) def load_weights(self, load_file): self.agent.load_weights(load_file) def save_weights(self, save_file, overwrite): self.agent.save_weights(save_file, overwrite=True)
x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) print(critic.summary()) # Set up the agent for training memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.2, size=env.noutput) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, delta_clip=1.) # agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model, # memory=memory, nb_steps_warmup=1000, random_process=random_process, # gamma=.99, target_model_update=0.1) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. if args.train: agent.fit(env, nb_steps=nallsteps, visualize=False, verbose=1, nb_max_episode_steps=env.timestep_limit, log_interval=10000) # After training is done, we save the final weights. agent.save_weights(args.model, overwrite=True) if not args.train: agent.load_weights(args.model) # Finally, evaluate our algorithm for 1 episode. agent.test(env, nb_episodes=1, visualize=False, nb_max_episode_steps=500)
def train(): # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) np.random.seed(123) env.seed(123) assert len(env.action_space.shape) == 1 nb_actions = env.action_space.shape[0] config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) from keras import backend as K K.set_session(sess) # Next, we build a very simple model. actor = Sequential() actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('linear')) # print(actor.summary()) action_input = Input(shape=(nb_actions, ), name='action_input') observation_input = Input(shape=(1, ) + env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = Concatenate()([action_input, flattened_observation]) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) # print(critic.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) if REWARD == "normal": ddpg_normal = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) ddpg_normal.compile(Adam(lr=.0005, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. history_normal = ddpg_normal.fit(env, nb_steps=150000, visualize=False, verbose=2, nb_max_episode_steps=200) # After training is done, we save the final weights. ddpg_normal.save_weights(os.path.join( LOG_DIR, 'ddpg_normal_{}_weights.h5f'.format(ENV_NAME)), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. ddpg_normal.test(env, nb_episodes=5, visualize=False, verbose=2, nb_max_episode_steps=200) pandas.DataFrame(history_normal.history).to_csv( os.path.join(LOG_DIR, "normal.csv")) elif REWARD == "noisy": processor_noisy = PendulumSurrogateProcessor(weight=WEIGHT, surrogate=False, noise_type=NOISE_TYPE) ddpg_noisy = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, processor=processor_noisy) ddpg_noisy.compile(Adam(lr=.0005, clipnorm=1.), metrics=['mae']) history_noisy = ddpg_noisy.fit(env, nb_steps=150000, visualize=False, verbose=2, nb_max_episode_steps=200) ddpg_noisy.save_weights(os.path.join( LOG_DIR, 'ddpg_noisy_{}_weights.h5f'.format(ENV_NAME)), overwrite=True) ddpg_noisy.test(env, nb_episodes=5, visualize=False, verbose=2, nb_max_episode_steps=200) pandas.DataFrame(history_noisy.history).to_csv( os.path.join(LOG_DIR, "noisy.csv")) elif REWARD == "surrogate": processor_surrogate = PendulumSurrogateProcessor(weight=WEIGHT, surrogate=True, noise_type=NOISE_TYPE) ddpg_surrogate = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, processor=processor_surrogate) ddpg_surrogate.compile(Adam(lr=.0005, clipnorm=1.), metrics=['mae']) history_surrogate = ddpg_surrogate.fit(env, nb_steps=150000, visualize=False, verbose=2, nb_max_episode_steps=200) ddpg_surrogate.save_weights(os.path.join( LOG_DIR, 'ddpg_surrogate_{}_weights.h5f'.format(ENV_NAME)), overwrite=True) ddpg_surrogate.test(env, nb_episodes=5, visualize=False, verbose=2, nb_max_episode_steps=200) pandas.DataFrame(history_surrogate.history).to_csv( os.path.join(LOG_DIR, "surrogate.csv")) else: raise NotImplementedError
x = Dense(20)(flattened_observation) x = Activation('relu')(x) x = Concatenate()([x, action_input]) x = Dense(20)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('tanh')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) print(critic.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=100, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.1) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, processor=Processor()) agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae']) # agent.load_weights('ddpg_20181006160521_Ship_Env_weights.h5f') for i in range(10): # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. agent.fit(env, nb_steps=50000, visualize=False, verbose=1, log_interval=5000, callbacks=[logger]) # After training is done, we save the final weights. agent.save_weights('ddpg_{}_{}_weights.h5f'.format(timestamp, 'Ship_Env'), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=20000)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=0.15, mu=0.0, sigma=0.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=nb_steps_warmup, nb_steps_warmup_actor=nb_steps_warmup, random_process=random_process, gamma=0.9, target_model_update=1e-3) agent.compile(SGD(lr=1e-5, clipvalue=0.001), metrics=['mae']) callbacks = [ ModelIntervalCheckpoint(weights_name + '_{step}.h5f', interval=10_000), TrainEpisodeLogger(), TensorBoard() ] agent.fit(env, nb_steps=nb_steps, visualize=False, verbose=1, callbacks=callbacks) agent.save_weights(weights_name + '_final.h5f', overwrite=True) # agent.test(env, nb_episodes=1, visualize=False)
memory=memory, random_process=random_process, nb_steps_warmup_actor=2048, nb_steps_warmup_critic=1024, target_model_update=1000, gamma=0.9, batch_size=128, memory_interval=1) agent.compile([Adam(lr=3e-5), Adam(lr=3e-3)]) # Start training for 1.5M simulation steps agent.fit( env, nb_steps=75000, nb_max_start_steps=0, nb_max_episode_steps=10000, visualize=True, action_repetition=1, verbose=2, log_interval=10000, callbacks=[], ) # Test the agent hist = agent.test(env, nb_episodes=5, action_repetition=1, visualize=True, nb_max_episode_steps=10000) agent.save_weights('SavedWeightsConstWrap.h5', True)
# Optionally, we can reload a previous model's weights and continue training from there # Remove the _actor or _critic from the filename. The load method automatically # appends these. WEIGHTS_FILENAME = 'weights/ddpg_planar_crane_continuous-v0_weights.h5f' # agent.load_weights(WEIGHTS_FILENAME) callbacks = [] checkpoint_weights_filename = 'weights/ddpg_{}_checkpointWeights_{{step}}_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID) log_filename = 'logs/ddpg_{}_log_{}_{}_{}_{}.json'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID) #callbacks += [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=100000)] callbacks += [FileLogger(log_filename, interval=100)] # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. agent.fit(env, nb_steps=NUM_STEPS, callbacks=callbacks, visualize=False, verbose=1)#, nb_max_episode_steps=500) # After training is done, we save the final weights. filename = 'weights/ddpg_{}_weights_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID) agent.save_weights(filename, overwrite=True) # We'll also save a simply named version to make running test immediately # following training easier. filename = 'weights/ddpg_{}_weights.h5f'.format(ENV_NAME) agent.save_weights(filename, overwrite=True) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, visualize=True) #nb_max_episode_steps=500,
class DDPG: """Deep Deterministic Policy Gradient Class This is an implementation of DDPG for continuous control tasks made using the high level keras-rl library. Args: env_name (str): Name of the gym environment weights_dir (str): Dir for storing model weights (for both actors and critic as separate files) actor_layers (list(int)): A list of int representing neurons in each subsequent the hidden layer in actor critic_layers (list(int)): A list of int representing neurons in each subsequent the hidden layer in actor n_episodes (int): Maximum training eprisodes visualize (bool): Whether a popup window with the environment view is required """ def __init__(self, env_name='MountainCarContinuous-v0', weights_dir="model_weights", actor_layers=[64, 64, 32], critic_layers=[128, 128, 64], n_episodes=200, visualize=True): self.env_name = env_name self.env = gym.make(env_name) np.random.seed(123) self.env.seed(123) self.actor_layers = actor_layers self.critic_layers = critic_layers self.n_episodes = n_episodes self.visualize = visualize self.n_actions = self.env.action_space.shape[0] self.n_states = self.env.observation_space.shape self.weights_file = os.path.join( weights_dir, 'ddpg_{}_weights.h5f'.format(self.env_name)) self.actor = None self.critic = None self.agent = None self.action_input = None def _make_actor(self): """Internal helper function to create an actor custom model """ self.actor = Sequential() self.actor.add(Flatten(input_shape=(1, ) + self.n_states)) for size in self.actor_layers: self.actor.add(Dense(size, activation='relu')) self.actor.add(Dense(self.n_actions, activation='linear')) self.actor.summary() def _make_critic(self): """Internal helper function to create an actor custom model """ action_input = Input(shape=(self.n_actions, ), name='action_input') observation_input = Input(shape=(1, ) + self.n_states, name='observation_input') flattened_observation = Flatten()(observation_input) input_layer = Concatenate()([action_input, flattened_observation]) hidden_layers = Dense(self.critic_layers[0], activation='relu')(input_layer) for size in self.critic_layers[1:]: hidden_layers = Dense(size, activation='relu')(hidden_layers) output_layer = Dense(1, activation='linear')(hidden_layers) self.critic = Model(inputs=[action_input, observation_input], outputs=output_layer) self.critic.summary() self.action_input = action_input def _make_agent(self): """Internal helper function to create an actor-critic custom agent model """ if self.actor is None: self._make_actor() if self.critic is None: self._make_critic() memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=self.n_actions, theta=.15, mu=0., sigma=.3) self.agent = DDPGAgent(nb_actions=self.n_actions, actor=self.actor, critic=self.critic, critic_action_input=self.action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) def _load_or_make_agent(self): """Internal helper function to load an agent model, creates a new if no model weights exists """ if self.agent is None: self._make_agent() if os.path.exists(self.weights_file): logger.info( "Found existing weights for the model for this environment. Loading..." ) self.agent.load_weights(self.weights_file) def train(self): """Train the DDPG agent """ self._load_or_make_agent() self.agent.fit(self.env, nb_steps=50000, visualize=self.visualize, verbose=1, nb_max_episode_steps=self.n_episodes) self.agent.save_weights(self.weights_file, overwrite=True) def test(self, nb_episodes=5): """Test the DDPG agent """ logger.info( "Testing the agents with {} episodes...".format(nb_episodes)) self.agent.test(self.env, nb_episodes=nb_episodes, visualize=self.visualize, nb_max_episode_steps=200)
memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, delta_range=(-100., 100.)) # agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model, # memory=memory, nb_steps_warmup=1000, random_process=random_process, # gamma=.99, target_model_update=0.1) #agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) agent.compile([RMSprop(lr=.001), RMSprop(lr=.001)], metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. if args.train: agent.fit(env, nb_steps=nallsteps, visualize=True, verbose=1, nb_max_episode_steps=env.timestep_limit, log_interval=10000) # After training is done, we save the final weights. agent.save_weights(args.output, overwrite=True) if not args.train: agent.load_weights(args.output) # Finally, evaluate our algorithm for 5 episodes. if args.env != "Arm": agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=500) else: for i in range(10000): if i % 300 == 0: env.new_target() print("Target shoulder = %f, elbow = %f" % (env.shoulder,env.elbow)) env.step(agent.forward(env.get_observation()))
x = Dense(32)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) print(critic.summary()) plot_model(critic, to_file='critic.png', show_shapes=True) # # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # # even the metrics! memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=0.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) # # Okay, now it's time to learn something! We visualize the training here for show, but this # # slows down training quite a lot. You can always safely abort the training prematurely using # # Ctrl + C. agent.fit(env, nb_steps=25000, visualize=False, verbose=1, nb_max_episode_steps=200) # # After training is done, we save the final weights. agent.save_weights('ddpg_myenv_weights.h5f', overwrite=True) # # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=1, visualize=True, nb_max_episode_steps=200)
memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.8, target_model_update=1e-3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. nb_steps = 800 * 1440 #1*(env.periods-2)# 100*(env.periods-2) #100000+1870#env.periods-2 agent.fit(env, nb_steps, visualize=True, verbose=2, nb_max_episode_steps=1440, log_interval=10) plt.figure(0) plt.plot(env.portfolio_value) plt.figure(1) noise_over_action_array = np.array(agent.noise_over_action) noise_over_action_array = np.transpose(noise_over_action_array) for i in range(nb_actions): plt.plot(noise_over_action_array[i, :]) plt.show() # After training is done, we save the final weights. agent.save_weights('ddpg_{}_weights5.h5f'.format('Crypto'), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)
random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. agent.fit(env, nb_steps=50000, visualize=False, verbose=1, nb_max_start_steps=0) # After training is done, we save the final weights. agent.save_weights('ddpg_{}_weights.h5f'.format("test"), overwrite=True) observation = env.reset() # Obtain an initial observation of the environment while True: print observation action = agent.select_action([observation]) print action action = action.argmax() observation, reward, done, info = env.step(action) graph_stk, graph_holding, graph_liquidasset, graph_staticasset = env.graphing( ) if done: fig, axarr = plt.subplots(3, 1) fig.suptitle("DDPG Agent", fontsize=10) axarr[0].plot(graph_holding) axarr[0].set_title('Stocks held')
elif(args.HER==False and args.PER==True): print("\nTraining with Prioritised Experience Replay\n") save_data_path_local = 'PER/'+args.ENV_NAME+'.json' elif(args.HER==True and args.PER==True): print("\nTraining with Prioritised Hindsight Experience Replay\n") save_data_path_local = 'PHER/'+args.ENV_NAME+'.json' if(args.train): """ Start Training (You can always safely abort the training prematurely using Ctrl + C, *once* ) """ agent.fit(env, nb_steps=args.nb_train_steps, visualize=False, verbose=1, save_data_path=save_data_path_local, file_interval=args.file_interval, nb_max_episode_steps=args.max_step_episode) # After training is done, we save the final weights and plot the training graph. try: if(args.HER==True and args.PER==False): if(args.train): agent.save_weights('HER/ddpg_{}_weights.h5f'.format(args.ENV_NAME), overwrite=True) plot_af(file_path='HER/'+args.ENV_NAME+'.json',save_file_name='HER/'+args.ENV_NAME,plot_what='success') plot_af(file_path='HER/'+args.ENV_NAME+'.json',save_file_name='HER/'+args.ENV_NAME,plot_what='loss') elif(args.HER==False and args.PER==True): if(args.train): agent.save_weights('PER/ddpg_{}_weights.h5f'.format(args.ENV_NAME), overwrite=True) plot_af(file_path='PER/'+args.ENV_NAME+'.json',save_file_name='PER/'+args.ENV_NAME,plot_what='success') plot_af(file_path='PER/'+args.ENV_NAME+'.json',save_file_name='PER/'+args.ENV_NAME,plot_what='loss') elif(args.HER==True and args.PER==True): if(args.train): agent.save_weights('PHER/ddpg_{}_weights.h5f'.format(args.ENV_NAME), overwrite=True) plot_af(file_path='PHER/'+args.ENV_NAME+'.json',save_file_name='PHER/'+args.ENV_NAME,plot_what='success') plot_af(file_path='PHER/'+args.ENV_NAME+'.json',save_file_name='PHER/'+args.ENV_NAME,plot_what='loss') except KeyboardInterrupt: pass
sigma=.1) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=50, nb_steps_warmup_actor=50, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) agent.fit(env, nb_steps=train_steps, verbose=0) agent.save_weights( 'swmm_rl_multi_inp_forecast/agent_weights/ddpg_swmm_weights.h5f', overwrite=True) env.close() else: agent.load_weights( 'swmm_rl_multi_inp_forecast/agent_weights/ddpg_swmm_weights.h5f' ) agent.fit(env, nb_steps=train_steps, verbose=0) agent.save_weights( 'swmm_rl_multi_inp_forecast/agent_weights/ddpg_swmm_weights.h5f', overwrite=True) env.close() if file_num % 100 == 0: print("finished training on ", file_num, " files")
def run_ddpg(): global N_NODE_NETWORK env = SnakeGymContinuous() assert len(env.action_space.shape) == 1 nb_actions = env.action_space.shape[0] # initialize randomness np.random.seed(123) env.seed(123) # Next, we build a very simple model. actor = Sequential() actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) actor.add(Dense(N_NODE_NETWORK)) actor.add(Activation('relu')) actor.add(Dense(N_NODE_NETWORK)) actor.add(Activation('relu')) actor.add(Dense(N_NODE_NETWORK)) actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('linear')) print(actor.summary()) action_input = Input(shape=(nb_actions, ), name='action_input') observation_input = Input(shape=(1, ) + env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = Concatenate()([action_input, flattened_observation]) x = Dense(N_NODE_NETWORK * 2)(x) x = Activation('relu')(x) x = Dense(N_NODE_NETWORK * 2)(x) x = Activation('relu')(x) x = Dense(N_NODE_NETWORK * 2)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) print(critic.summary()) memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=500, nb_steps_warmup_actor=500, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile('adam', metrics=['mae']) agent.fit(env, nb_steps=50000, visualize=True, verbose=2, nb_max_episode_steps=200) agent.save_weights('ddpg_SnakeGymContinuous_weights.h5f', overwrite=True) agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)
x = Dense(32)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) # print(critic.summary()) memory = SequentialMemory(limit=1000000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.1) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=50, nb_steps_warmup_actor=50, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) agent.load_weights('/home/bdb3m/swmm_rl/agent_weights/ddpg_swmm_weights.h5f') # added to continue training agent.fit(env, nb_steps=train_steps, verbose=0) agent.save_weights('/home/bdb3m/swmm_rl/agent_weights/ddpg_swmm_weights.h5f', overwrite=True) env.close() else: agent.load_weights('/home/bdb3m/swmm_rl/agent_weights/ddpg_swmm_weights.h5f') agent.fit(env, nb_steps=train_steps, verbose=0) agent.save_weights('/home/bdb3m/swmm_rl/agent_weights/ddpg_swmm_weights.h5f', overwrite=True) env.close() if file_num % 10 == 0: print("finished training on ", file_num, " files") file_num += 1 # loop through testing envs for file in os.scandir("/home/bdb3m/swmm_rl/syn_inp_test"): if file.name.endswith('.inp'):
def train_with_params(sigma_v = 0., sigma_o = 0.,test=False): ENV_NAME = 'PongSolo' conf_name = '{}_sv_{}_so_{}'.format(ENV_NAME,sigma_v,sigma_o) # sv, so = sigma_v et sigma_orientation # Get the environment and extract the number of actions. env = EnvPongSolo(sigma_v = sigma_v, sigma_o = sigma_v) np.random.seed(123) #assert len(env.action_space.shape) == 1 nb_actions = 1 leaky_alpha = 0.2 # Next, we build a very simple model. actor = Sequential() actor.add(Flatten(input_shape=(1,) + env.observation_space.shape)) actor.add(Dense(100)) actor.add(LeakyReLU(leaky_alpha)) actor.add(Dense(nb_actions)) actor.add(Activation('linear')) print(actor.summary()) action_input = Input(shape=(nb_actions,), name='action_input') observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = merge([action_input, flattened_observation], mode='concat') x = Dense(200)(x) x = LeakyReLU(leaky_alpha)(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(input=[action_input, observation_input], output=x) print(critic.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=100000, window_length=1) n_steps = 5000000 random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=1., mu=0., sigma=.3, sigma_min=0.01, n_steps_annealing=n_steps) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. directory_weights = "weights/ddpg/{}".format(conf_name) if not os.path.exists(directory_weights): os.makedirs(directory_weights) if test == False: perfCheckPoint = ModelPerformanceCheckpoint('{}/checkpoint_avg{}_steps{}'.format(directory_weights,'{}','{}'), 800) agent.fit(env, nb_steps=n_steps, visualize=False, verbose=2, nb_max_episode_steps=200,callbacks=[perfCheckPoint]) # After training is done, we save the final weights. agent.save_weights('{}/final.h5f'.format(directory_weights), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=100, visualize=False, nb_max_episode_steps=200) else: agent.load_weights('{}/final.h5f'.format(directory_weights)) agent.test(env, nb_episodes=1000, visualize=False, nb_max_episode_steps=200)
class Agent: def __init__(self, env): self.nb_actions = env.action_space.shape[0] self.nb_states = env.observation_space.shape[0] self.env = env self.actor = self.build_actor(env) self.actor.compile('Adam', 'mse') self.critic, action_input = self.build_critic(env) self.loss = self.build_loss() self.processor = WhiteningNormalizerProcessor() self.memory = SequentialMemory(limit=5000000, window_length=1) self.random_process = OrnsteinUhlenbeckProcess(size=self.nb_actions, theta=0.75, mu=0.5, sigma=0.25) self.agent = DDPGAgent(nb_actions=self.nb_actions, actor=self.actor, critic=self.critic, critic_action_input=action_input, memory=self.memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=self.random_process, gamma=.99, target_model_update=1e-3, processor=self.processor) self.agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=self.loss) self.sym_actor = self.build_sym_actor() self.sym_actor.compile(optimizer='Adam', loss='mse') def build_loss(self): return ['mse'] def build_actor(self, env): actor = Sequential() actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) actor.add(Dense(64, activation='tanh')) actor.add(GaussianNoise(0.05)) actor.add(Dense(64, activation='tanh')) actor.add(GaussianNoise(0.05)) actor.add(Dense(self.nb_actions, activation='hard_sigmoid')) actor.summary() inD = Input(shape=(1, ) + env.observation_space.shape) out = actor(inD) return Model(inD, out) def build_critic(self, env): action_input = Input(shape=(self.nb_actions, ), name='action_input') observation_input = Input(shape=(1, ) + env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = Dense(64, activation='relu')(flattened_observation) x = Concatenate()([x, action_input]) x = Dense(32, activation='relu')(x) x = Dense(1)(x) critic = Model(inputs=[action_input, observation_input], outputs=x) critic.summary() return critic, action_input def build_sym_actor(self): stateSwap = [] actionSwap = [] state_desc = self.env.get_state_desc() for x in state_desc.keys(): keys = list(state_desc[x].keys()) for (k, key) in enumerate(keys): if '_r' in key: i = keys.index(key.replace('_r', '_l')) if i != -1: stateSwap += [(k, i), (i, k)] muscle_list = [] for i in range(self.env.osim_model.muscleSet.getSize()): muscle_list.append(self.env.osim_model.muscleSet.get(i).getName()) for (k, key) in enumerate(muscle_list): if '_r' in key: i = muscle_list.index(key.replace('_r', '_l')) if i != -1: actionSwap += [(k, i), (i, k)] stateSwapMat = np.zeros((self.nb_states, self.nb_states)) actionSwapMat = np.zeros((self.nb_actions, self.nb_actions)) stateSwapMat[0, 0] for (i, j) in stateSwap: stateSwapMat[i, j] = 1 for (i, j) in actionSwap: actionSwapMat[i, j] = 1 def ssT(shape, dtype=None): if shape != stateSwapMat.shape: raise Exception("State Swap Tensor Shape Error") return K.variable(stateSwapMat, dtype=dtype) def asT(shape, dtype=None): if shape != actionSwapMat.shape: raise Exception("Action Swap Tensor Shape Error") return K.variable(actionSwapMat, dtype=dtype) model1 = Sequential() model1.add( Dense(self.nb_states, input_shape=(1, ) + self.env.observation_space.shape, trainable=False, kernel_initializer=ssT, bias_initializer='zeros')) inD = Input(shape=(1, ) + self.env.observation_space.shape) symState = model1(inD) symPol = self.actor(symState) model2 = Sequential() model2.add( Dense(self.nb_actions, input_shape=(1, self.nb_actions), trainable=False, kernel_initializer=asT, bias_initializer='zeros')) out = model2(symPol) return Model(inD, out) def fit(self, **kwargs): if 'nb_max_episode_steps' in kwargs.keys(): self.env.spec.timestep_limit = kwargs['nb_max_episode_steps'] else: self.env.spec.timestep_limit = self.env.time_limit out = self.agent.fit(self.env, **kwargs) print("\n\ndo symetric loss back propigation\n\n") states = np.random.normal( 0, 10, (kwargs['nb_steps'] // 200, 1, self.nb_states)) actions = self.actor.predict_on_batch(states) self.sym_actor.train_on_batch(states, actions) return out def test(self, **kwargs): print("testing") print("VA:", self.env.get_VA()) if 'nb_max_episode_steps' in kwargs.keys(): self.env.spec.timestep_limit = kwargs['nb_max_episode_steps'] else: self.env.spec.timestep_limit = self.env.time_limit return self.agent.test(self.env, **kwargs) def test_get_steps(self, **kwargs): return self.test(**kwargs).history['nb_steps'][-1] def save_weights(self, filename='osim-rl/ddpg_{}_weights.h5f'): self.agent.save_weights(filename.format("opensim"), overwrite=True) self.save_processor() def load_weights(self, filename='osim-rl/ddpg_{}_weights.h5f'): self.agent.load_weights(filename.format("opensim")) self.load_processor() def search_VA(self): # 1-D line search state = self.env.get_VA() goal = 0.0 if abs(state - goal) < 0.01: self.env.upd_VA(goal) return steps = self.test_get_steps(nb_episodes=1, visualize=False, nb_max_episode_steps=1000) dv = 0.0 dsteps = steps while (state - dv > goal and dsteps > 0.8 * steps): dv += 0.02 self.env.upd_VA(state - dv) dsteps = self.test_get_steps(nb_episodes=1, visualize=False, nb_max_episode_steps=1000) if abs((state - dv) - goal) < 0.01: self.env.upd_VA(goal) else: dv -= 0.02 self.env.upd_VA(state - dv) def save_processor(self): np.savez('osim-rl/processor.npz', _sum=self.processor.normalizer._sum, _count=np.array([self.processor.normalizer._count]), _sumsq=self.processor.normalizer._sumsq, mean=self.processor.normalizer.mean, std=self.processor.normalizer.std) def load_processor(self): f = np.load('osim-rl/processor.npz') dtype = f['_sum'].dtype if (self.processor.normalizer == None): self.processor.normalizer = WhiteningNormalizer( shape=(1, ) + self.env.observation_space.shape, dtype=dtype) self.processor.normalizer._sum = f['_sum'] self.processor.normalizer._count = int(f['_count'][0]) self.processor.normalizer._sumsq = f['_sumsq'] self.processor.normalizer.mean = f['mean'] self.processor.normalizer.std = f['std']
x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) # print(critic.summary()) memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.1) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=50, nb_steps_warmup_actor=50, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) train_steps = 10000 # agent.load_weights('ddpg_swmm_weights_{}_depth_{}.h5f'.format(train_steps,Depth)) # agent.load_weights('ddpg_swmm_weights_{}.h5f'.format(train_steps)) agent.fit(env, nb_steps=train_steps, verbose=1) agent.save_weights('ddpg_swmm_weights_{}_depth_{}.h5f'.format(train_steps,Depth), overwrite=True) history = agent.test(env, nb_episodes=1, visualize=False, nb_max_start_steps=0) all_actions = np.array(history.history['action']) all_states = np.array(history.history['states']) all_depths = all_states[:, :, :3] all_flooding = all_states[:, :, 3:] st_max = [Depth] * len(all_depths[0]) j3_max = [2] * len(all_depths[0]) # plot average rewards per episode avg_reward = [] num_episodes = int(memory.nb_entries/env.T) for i in range(num_episodes): temp_rwd = memory.rewards.data[env.T * i: env.T * i + env.T]
x = Dense(32)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) print(critic.summary()) plot_model(critic, to_file='critic.png', show_shapes=True) # # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # # even the metrics! memory = SequentialMemory(limit=10000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=0.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) # # Okay, now it's time to learn something! We visualize the training here for show, but this # # slows down training quite a lot. You can always safely abort the training prematurely using # # Ctrl + C. agent.fit(env, nb_steps=25000, visualize=False, verbose=1, nb_max_episode_steps=200) # # After training is done, we save the final weights. agent.save_weights('ddpg_stokes_weights.h5f', overwrite=True) # # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)
## Initialize Replay Buffer ## memory = SequentialMemory(limit=REPLAY_BUFFER_SIZE, window_length=1) # window_length : usefull for Atari game (cb d'images d'affilé on veut analysé (vitesse de la balle, etc..)) ## Random process (exploration) ## random_process = OrnsteinUhlenbeckProcess(theta=THETA, mu=MEAN, sigma=SIGMA, size=action_size) ## Paramètres agent DDPG ## agent = DDPGAgent(nb_actions=action_size, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, random_process=random_process, gamma=DISC_FACT, target_model_update=TARGET_MODEL_UPDATE, batch_size= BATCH_SIZE) agent.compile(optimizer = [opti_critic, opti_actor], metrics= ['mae']) ##### TRAIN ##### if args.train: check_overwrite(args.model) history = agent.fit(env, nb_steps=N_STEPS_TRAIN, visualize=args.visualize, verbose=VERBOSE, log_interval = LOG_INTERVAL) agent.save_weights(FILES_WEIGHTS_NETWORKS, overwrite=True) save_plot_reward(history, args.model, params) ##### TEST ##### if not args.train : agent.load_weights(FILES_WEIGHTS_NETWORKS) history = agent.test(env, nb_episodes=N_EPISODE_TEST, visualize=args.visualize) save_result(history, args.model, params)
callbacks = [] checkpoint_weights_filename = 'weights/ddpg_{}_checkpointWeights_{{step}}_{}_{}_{}_{}.h5f'.format( ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID) log_filename = 'logs/ddpg_{}_log_{}_{}_{}_{}.json'.format( ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID) #callbacks += [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=100000)] callbacks += [FileLogger(log_filename, interval=100)] # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. agent.fit(env, nb_steps=NUM_STEPS, callbacks=callbacks, visualize=False, verbose=1, action_repetition=5) #, nb_max_episode_steps=500) # After training is done, we save the final weights. filename = 'weights/ddpg_{}_weights_{}_{}_{}_{}.h5f'.format( ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID) agent.save_weights(filename, overwrite=True) # We'll also save a simply named version to make running test immediately # following training easier. filename = 'weights/ddpg_{}_weights.h5f'.format(ENV_NAME) agent.save_weights(filename, overwrite=True) # Finally, evaluate our algorithm for 5 episodes. # agent.test(env, visualize=True) #nb_max_episode_steps=500,
x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) print(critic.summary()) # Set up the agent for training memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.2, size=env.noutput) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, delta_clip=1.) # agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model, # memory=memory, nb_steps_warmup=1000, random_process=random_process, # gamma=.99, target_model_update=0.1) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. if args.train: agent.fit(env, nb_steps=nallsteps, visualize=False, verbose=1, nb_max_episode_steps=200, log_interval=10000) # After training is done, we save the final weights. agent.save_weights(args.model, overwrite=True) if not args.train: agent.load_weights(args.model) # Finally, evaluate our algorithm for 1 episode. agent.test(env, nb_episodes=5, visualize=False, nb_max_episode_steps=1000)
# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. agent.fit(env, nb_steps=50000, visualize=True, verbose=1, nb_max_episode_steps=200) # After training is done, we save the final weights. agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)
''' # agent.load_weights('fit-weights.h5f') ''' fit ''' history = agent.learning(env, policy, policy_list, nb_steps=1e7, visualize=False, log_interval=1000, verbose=1, nb_max_episode_steps=4000, imitation_leaning_time=1e4, reinforcement_learning_time=9e4) # plt.plot(history.history['metrics']) # plt.plot(history.history['reward']) # plt.show() sio.savemat(ENV_NAME + '-' + nowtime + '/fit.mat', history.history) # After training is done, we save the final weights. agent.save_weights(ENV_NAME + '-' + nowtime + '/fit-weights.h5f', overwrite=True) # Finally, evaluate our algorithm for 5 episodes. history = agent.test(env, nb_episodes=10, visualize=True, nb_max_episode_steps=5000) sio.savemat(ENV_NAME + '-' + nowtime + '/test-final.mat', history.history)
sigma=.3) action_input = Input(shape=(n_actions, ), name='action_input') # create an agent and compile agent = DDPGAgent(nb_actions=n_actions, actor=ac.create_actor_model(env.observation_space.shape, n_actions), critic=ac.create_critic_model(env.observation_space.shape, action_input), critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=.01, clipnorm=1.), metrics=['mae']) # train an agent agent.fit(env, nb_steps=30000, visualize=False, verbose=1, nb_max_episode_steps=200) # save an agent agent.save_weights('ddpg_weights.h5f', overwrite=True) # evaluate for 10 episodes agent.test(env, nb_episodes=10, visualize=True, nb_max_episode_steps=200)
def main_function(args, data): #### INITIALISATION DES CONSTANTES ##### ## Model ## SIZE_HIDDEN_LAYER_ACTOR = data['SIZE_HIDDEN_LAYER_ACTOR'][0] LR_ACTOR = data['LR_ACTOR'][0] SIZE_HIDDEN_LAYER_CRITIC = data['SIZE_HIDDEN_LAYER_CRITIC'][0] LR_CRITIC = data['LR_CRITIC'][0] DISC_FACT = data['DISC_FACT'][0] TARGET_MODEL_UPDATE = data['TARGET_MODEL_UPDATE'][0] BATCH_SIZE = data['BATCH_SIZE'][0] REPLAY_BUFFER_SIZE = data['REPLAY_BUFFER_SIZE'][0] ## Exploration ## THETA = data['THETA'][0] SIGMA = data['SIGMA'][0] SIGMA_MIN = data['SIGMA_MIN'][0] N_STEPS_ANNEALING = data['N_STEPS_ANNEALING'][0] ## Acceleration ## ACTION_REPETITION = data['ACTION_REPETITION'][0] INTEGRATOR_ACCURACY = data['INTEGRATOR_ACCURACY'][0] # # Simulation ## N_STEPS_TRAIN = int(args.step) N_EPISODE_TEST = 100 if args.visualize: N_EPISODE_TEST = 3 VERBOSE = 1 # 0: pas de descriptif # 1: descriptif toutes les LOG_INTERVAL steps # 2: descriptif à chaque épisode LOG_INTERVAL = 500 # Save weights ## if not os.path.exists('weights'): os.mkdir('weights') print("Directory ", 'weights', " Created ") FILES_WEIGHTS_NETWORKS = './weights/' + args.model + '.h5f' # #### CHARGEMENT DE L'ENVIRONNEMENT ##### if args.prosthetic: env = ProsContinueRewardWrapper( ProstheticsEnv(visualize=args.visualize, integrator_accuracy=INTEGRATOR_ACCURACY)) if not args.prosthetic: env = CustomDoneOsimWrapper( CustomRewardWrapper( RelativeMassCenterObservationWrapper( NoObstacleObservationWrapper( L2RunEnv(visualize=args.visualize, integrator_accuracy=0.005))))) env.reset() # Examine the action space ## action_size = env.action_space.shape[0] #action_size = int(env.action_space.shape[0]/2) pour la symmétrie print('Size of each action:', action_size) # Examine the state space ## state_size = env.observation_space.shape[0] print('Size of state:', state_size) # #### ACTOR / CRITIC ##### # Actor (mu) ## if args.prosthetic: input_shape = (1, env.observation_space.shape[0]) if not args.prosthetic: input_shape = (1, env.observation_space.shape[0]) observation_input = Input(shape=input_shape, name='observation_input') x = Flatten()(observation_input) x = Dense(SIZE_HIDDEN_LAYER_ACTOR)(x) x = Activation('relu')(x) x = Dense(SIZE_HIDDEN_LAYER_ACTOR)(x) x = Activation('relu')(x) x = Dense(SIZE_HIDDEN_LAYER_ACTOR)(x) x = Activation('relu')(x) x = Dense(action_size)(x) x = Activation('sigmoid')(x) actor = Model(inputs=observation_input, outputs=x) opti_actor = Adam(lr=LR_ACTOR) # Critic (Q) ## action_input = Input(shape=(action_size, ), name='action_input') x = Flatten()(observation_input) x = concatenate([action_input, x]) x = Dense(SIZE_HIDDEN_LAYER_CRITIC)(x) x = Activation('relu')(x) x = Dense(SIZE_HIDDEN_LAYER_CRITIC)(x) x = Activation('relu')(x) x = Dense(SIZE_HIDDEN_LAYER_CRITIC)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) opti_critic = Adam(lr=LR_CRITIC) # #### SET UP THE AGENT ##### # Initialize Replay Buffer ## memory = SequentialMemory(limit=REPLAY_BUFFER_SIZE, window_length=1) # Random process (exploration) ## random_process = OrnsteinUhlenbeckProcess( theta=THETA, mu=0, sigma=SIGMA, sigma_min=SIGMA_MIN, size=action_size, n_steps_annealing=N_STEPS_ANNEALING) # random_process_l = OrnsteinUhlenbeckProcess(theta=THETA, mu=0, sigma=SIGMA,sigma_min= SIGMA_MIN, # size=action_size, n_steps_annealing=N_STEPS_ANNEALING) # random_process_r = OrnsteinUhlenbeckProcess(theta=THETA, mu=0, sigma=SIGMA,sigma_min= SIGMA_MIN, # size=action_size, n_steps_annealing=N_STEPS_ANNEALING) # Paramètres agent DDPG ## # agent = SymmetricDDPGAgent(nb_actions=action_size, actor=actor, critic=critic, # critic_action_input=action_input, # memory=memory, random_process_l=random_process_l, random_process_r=random_process_r, # gamma=DISC_FACT, target_model_update=TARGET_MODEL_UPDATE, # batch_size=BATCH_SIZE) agent = DDPGAgent(nb_actions=action_size, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, random_process=random_process, gamma=DISC_FACT, target_model_update=TARGET_MODEL_UPDATE, batch_size=BATCH_SIZE) agent.compile(optimizer=[opti_critic, opti_actor]) # #### TRAIN ##### logdir = "keras_logs/" + datetime.now().strftime("%Y-%m-%d_%H.%M.%S") robustensorboard = RobustTensorBoard(log_dir=logdir, hyperparams=data) saveBest = SaveBestEpisode() if args.train: if args.resume: agent.load_weights(FILES_WEIGHTS_NETWORKS) else: check_overwrite(args.model) agent.fit(env, nb_steps=N_STEPS_TRAIN, visualize=args.visualize, verbose=VERBOSE, log_interval=LOG_INTERVAL, callbacks=[robustensorboard, saveBest], action_repetition=ACTION_REPETITION) agent.save_weights(FILES_WEIGHTS_NETWORKS, overwrite=True) #### TEST ##### if not args.train: agent.load_weights(FILES_WEIGHTS_NETWORKS) agent.test(env, nb_episodes=N_EPISODE_TEST, visualize=args.visualize)
x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) print(critic.summary()) # Finally, we configure and compile our agent. You can use every built-in tensorflow.keras optimizer and # even the metrics! memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(learning_rate=.001, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. agent.fit(env, nb_steps=50000, visualize=True, verbose=1, nb_max_episode_steps=200) # After training is done, we save the final weights. agent.save_weights(f'ddpg_{ENV_NAME}_weights.h5f', overwrite=True) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)
delta_range=(-100., 100.)) # agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model, # memory=memory, nb_steps_warmup=1000, random_process=random_process, # gamma=.99, target_model_update=0.1) #agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) agent.compile([RMSprop(lr=.001), RMSprop(lr=.001)], metrics=['mae']) prefix = args.output if args.output else "%s_s%f_t%f" % (args.env ,float(args.sigma), float(args.theta)) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. if args.train: agent.fit(env, nb_steps=nallsteps, visualize=True, verbose=1, nb_max_episode_steps=env.timestep_limit, log_interval=10000, prefix=prefix) # After training is done, we save the final weights. agent.save_weights("%s.h5f" % args.output, overwrite=True) if not args.train: agent.load_weights("%s.h5f" % args.output) # Finally, evaluate our algorithm for 5 episodes. if args.env != "Arm": agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=500) else: for i in range(10000): if i % 300 == 0: env.new_target() print("\n\nTarget shoulder = %f, elbow = %f" % (env.shoulder,env.elbow)) obs = env.get_observation() print "Actual shoulder = %f, elbow = %f\r" % (obs[2],obs[3]), env.step(agent.forward(obs))