def train(index, policy_nb_steps, fit_nb_steps): # Get the environment and extract the number of actions. print("Using environment", environment_name) environment = gym.make(environment_name) np.random.seed(666) nb_actions = environment.action_space.shape[0] # Build the model. v_model, mu_model, l_model = build_models((WINDOW_LENGTH, ) + INPUT_SHAPE, nb_actions) v_model.summary() mu_model.summary() l_model.summary() # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH) processor = CarRacingProcessor() random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3, size=nb_actions) agent = NAFAgent(nb_actions=nb_actions, V_model=v_model, L_model=l_model, mu_model=mu_model, memory=memory, nb_steps_warmup=100, random_process=random_process, gamma=.99, target_model_update=1e-3, processor=processor) agent.compile(optimizers.Adam(lr=.001, clipnorm=1.), metrics=['mae']) weights_filename = 'naf_{}_{}_weights.h5f'.format(environment_name, index) # Okay, now it's time to learn something! We capture the interrupt exception so that training # can be prematurely aborted. Notice that now you can use the built-in Keras callbacks! checkpoint_weights_filename = 'naf_' + environment_name + '_weights_{step}.h5f' log_filename = 'naf_{}_log.json'.format(environment_name) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000) ] callbacks += [TensorboardCallback()] callbacks += [FileLogger(log_filename, interval=100)] agent.fit( environment, callbacks=callbacks, #nb_steps=1750000, nb_steps=fit_nb_steps, log_interval=10000, visualize="visualize" in sys.argv) # After training is done, we save the final weights one more time. agent.save_weights(weights_filename, overwrite=True)
def test_cdqn(): # TODO: replace this with a simpler environment where we can actually test if it finds a solution env = gym.make('Pendulum-v0') np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.shape[0] V_model = Sequential() V_model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) V_model.add(Dense(16)) V_model.add(Activation('relu')) V_model.add(Dense(1)) mu_model = Sequential() mu_model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) mu_model.add(Dense(16)) mu_model.add(Activation('relu')) mu_model.add(Dense(nb_actions)) action_input = Input(shape=(nb_actions, ), name='action_input') observation_input = Input(shape=(1, ) + env.observation_space.shape, name='observation_input') x = Concatenate()([action_input, Flatten()(observation_input)]) x = Dense(16)(x) x = Activation('relu')(x) x = Dense((nb_actions * nb_actions + nb_actions) // 2)(x) L_model = Model(inputs=[action_input, observation_input], outputs=x) memory = SequentialMemory(limit=1000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3, size=nb_actions) agent = NAFAgent(nb_actions=nb_actions, V_model=V_model, L_model=L_model, mu_model=mu_model, memory=memory, nb_steps_warmup=50, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=1e-3)) agent.fit(env, nb_steps=400, visualize=False, verbose=0, nb_max_episode_steps=100) h = agent.test(env, nb_episodes=2, visualize=False, nb_max_episode_steps=100)
def __init__(self, env: gym.Env, logger=Logger()): nb_actions = env.action_space.shape[0] V_model = Sequential() V_model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) V_model.add(Dense(16)) V_model.add(Activation('relu')) V_model.add(Dense(16)) V_model.add(Activation('relu')) V_model.add(Dense(16)) V_model.add(Activation('relu')) V_model.add(Dense(1)) V_model.add(Activation('linear')) mu_model = Sequential() mu_model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) mu_model.add(Dense(16)) mu_model.add(Activation('relu')) mu_model.add(Dense(16)) mu_model.add(Activation('relu')) mu_model.add(Dense(16)) mu_model.add(Activation('relu')) mu_model.add(Dense(nb_actions)) mu_model.add(Activation('linear')) action_input = Input(shape=(nb_actions, ), name='action_input') observation_input = Input(shape=(1, ) + env.observation_space.shape, name='observation_input') x = Concatenate()([action_input, Flatten()(observation_input)]) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x) x = Activation('linear')(x) L_model = Model(inputs=[action_input, observation_input], outputs=x) memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3, size=nb_actions) agent = NAFAgent(nb_actions=nb_actions, V_model=V_model, L_model=L_model, mu_model=mu_model, memory=memory, random_process=random_process) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) self.agent = agent self.env = env super().__init__(env, logger)
def create(env): np.random.seed(config.current.domain_seed) env.seed(config.current.domain_seed) nb_actions = env.action_space.n # Build all necessary models: V, mu, and L networks. V_model = Sequential() V_model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) V_model.add(Dense(16)) V_model.add(Activation('relu')) V_model.add(Dense(16)) V_model.add(Activation('relu')) V_model.add(Dense(16)) V_model.add(Activation('relu')) V_model.add(Dense(1)) V_model.add(Activation('linear')) mu_model = Sequential() mu_model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) mu_model.add(Dense(16)) mu_model.add(Activation('relu')) mu_model.add(Dense(16)) mu_model.add(Activation('relu')) mu_model.add(Dense(16)) mu_model.add(Activation('relu')) mu_model.add(Dense(nb_actions)) mu_model.add(Activation('linear')) action_input = Input(shape=(nb_actions,), name='action_input') observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input') x = Concatenate()([action_input, Flatten()(observation_input)]) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x) x = Activation('linear')(x) L_model = Model(inputs=[action_input, observation_input], outputs=x) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! processor = PendulumProcessor() memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3, size=nb_actions) agent = NAFAgent(nb_actions=nb_actions, V_model=V_model, L_model=L_model, mu_model=mu_model, memory=memory, nb_steps_warmup=100, random_process=random_process, gamma=.99, target_model_update=1e-3, processor=processor) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) return agent
def test_cdqn(): # TODO: replace this with a simpler environment where we can actually test if it finds a solution env = gym.make('Pendulum-v0') np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.shape[0] V_model = Sequential() V_model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) V_model.add(Dense(16)) V_model.add(Activation('relu')) V_model.add(Dense(1)) mu_model = Sequential() mu_model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) mu_model.add(Dense(16)) mu_model.add(Activation('relu')) mu_model.add(Dense(nb_actions)) action_input = Input(shape=(nb_actions,), name='action_input') observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input') x = Concatenate()([action_input, Flatten()(observation_input)]) x = Dense(16)(x) x = Activation('relu')(x) x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x) L_model = Model(inputs=[action_input, observation_input], outputs=x) memory = SequentialMemory(limit=1000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3, size=nb_actions) agent = NAFAgent(nb_actions=nb_actions, V_model=V_model, L_model=L_model, mu_model=mu_model, memory=memory, nb_steps_warmup=50, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=1e-3)) agent.fit(env, nb_steps=400, visualize=False, verbose=0, nb_max_episode_steps=100) h = agent.test(env, nb_episodes=2, visualize=False, nb_max_episode_steps=100)
memory = SequentialMemory(limit=10000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3, size=nb_actions) agent = NAFAgent(nb_actions=nb_actions, V_model=V_model, L_model=L_model, mu_model=mu_model, memory=memory, nb_steps_warmup=100, random_process=random_process, gamma=.99, target_model_update=1e-3, processor=processor) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. agent.fit(env, nb_steps=500, visualize=True, verbose=1, nb_max_episode_steps=200) # After training is done, we save the final weights. agent.save_weights('cdqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=10, visualize=True, nb_max_episode_steps=200)
def init_naf(env, nb_actions): """ Initialize the NAF agent using the keras-rl package. :param env: the environment to be played, required to determine the input size :param nb_actions: number of actions :return: NAF agent """ # Build all necessary models: V, mu, and L networks. v_model = Sequential() v_model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) v_model.add(Dense(16)) v_model.add(Activation('relu')) v_model.add(Dense(16)) v_model.add(Activation('relu')) v_model.add(Dense(16)) v_model.add(Activation('relu')) v_model.add(Dense(1)) v_model.add(Activation('linear')) mu_model = Sequential() mu_model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) mu_model.add(Dense(16)) mu_model.add(Activation('relu')) mu_model.add(Dense(16)) mu_model.add(Activation('relu')) mu_model.add(Dense(16)) mu_model.add(Activation('relu')) mu_model.add(Dense(nb_actions)) mu_model.add(Activation('linear')) action_input = Input(shape=(nb_actions, ), name='action_input') observation_input = Input(shape=(1, ) + env.observation_space.shape, name='observation_input') x = Concatenate()([action_input, Flatten()(observation_input)]) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x) x = Activation('linear')(x) l_model = Model(inputs=[action_input, observation_input], outputs=x) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! processor = PendulumProcessor() memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3, size=nb_actions) agent = NAFAgent(nb_actions=nb_actions, V_model=v_model, L_model=l_model, mu_model=mu_model, memory=memory, nb_steps_warmup=100, random_process=random_process, gamma=.99, target_model_update=1e-3, processor=processor) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) agent.model_name = "NAF" return agent
x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x) x = Activation('linear')(x) L_model = Model(inputs=[action_input, observation_input], outputs=x) print(L_model.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! processor = PendulumProcessor() memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3, size=nb_actions) agent = NAFAgent(nb_actions=nb_actions, V_model=V_model, L_model=L_model, mu_model=mu_model, memory=memory, nb_steps_warmup=100, random_process=random_process, gamma=.99, target_model_update=1e-3, processor=processor) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. agent.fit(env, nb_steps=50000, visualize=True, verbose=1, nb_max_episode_steps=200) # After training is done, we save the final weights. agent.save_weights('cdqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=10, visualize=True, nb_max_episode_steps=200)
memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3, size=nb_actions) agent = NAFAgent(nb_actions=nb_actions, V_model=V_model, L_model=L_model, mu_model=mu_model, memory=memory, nb_steps_warmup=100, random_process=random_process, gamma=.99, target_model_update=1e-3, processor=processor) agent.compile(Adam(learning_rate=.001, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. agent.fit(env, nb_steps=50000, visualize=True, verbose=1, nb_max_episode_steps=200) # After training is done, we save the final weights. agent.save_weights(f'cdqn_{ENV_NAME}_weights.h5f', overwrite=True) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=10, visualize=True, nb_max_episode_steps=200)