Ejemplo n.º 1
0
def create_agent_cdqn(env, args):
    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    # processor = PendulumProcessor()
    nb_actions = env.action_space.shape[0]
    V_model = create_V(env, args)
    mu_model = create_mu(env, args)
    L_model = create_L(env, args)
    memory = SequentialMemory(limit=args.memory, window_length=args.window)
    random_process = OrnsteinUhlenbeckProcess(theta=.15,
                                              mu=0.,
                                              sigma=.3,
                                              size=nb_actions)
    cdqn = ContinuousDQNAgent(nb_actions=nb_actions,
                              V_model=V_model,
                              L_model=L_model,
                              mu_model=mu_model,
                              memory=memory,
                              nb_steps_warmup=args.memory,
                              random_process=random_process,
                              gamma=.99,
                              target_model_update=1e-3)

    # , processor=processor)
    cdqn.compile(RMSprop(lr=1e-4, clipnorm=1.), metrics=['mae'])
    return cdqn
    def configure(self, observation_space_shape, nb_actions):

        # Build all necessary models: V, mu, and L networks.
        V_model = Sequential()
        V_model.add(Flatten(input_shape=(1, ) + observation_space_shape))
        V_model.add(Dense(16))
        V_model.add(Activation('relu'))
        V_model.add(Dense(16))
        V_model.add(Activation('relu'))
        V_model.add(Dense(16))
        V_model.add(Activation('relu'))
        V_model.add(Dense(1))
        V_model.add(Activation('linear'))
        print(V_model.summary())

        mu_model = Sequential()
        mu_model.add(Flatten(input_shape=(1, ) + observation_space_shape))
        mu_model.add(Dense(16))
        mu_model.add(Activation('relu'))
        mu_model.add(Dense(16))
        mu_model.add(Activation('relu'))
        mu_model.add(Dense(16))
        mu_model.add(Activation('relu'))
        mu_model.add(Dense(nb_actions))
        mu_model.add(Activation('linear'))
        print(mu_model.summary())

        action_input = Input(shape=(nb_actions, ), name='action_input')
        observation_input = Input(shape=(1, ) + observation_space_shape,
                                  name='observation_input')
        x = merge([action_input, Flatten()(observation_input)], mode='concat')
        x = Dense(32)(x)
        x = Activation('relu')(x)
        x = Dense(32)(x)
        x = Activation('relu')(x)
        x = Dense(32)(x)
        x = Activation('relu')(x)
        x = Dense(((nb_actions * nb_actions + nb_actions) / 2))(x)
        x = Activation('linear')(x)
        L_model = Model(input=[action_input, observation_input], output=x)
        print(L_model.summary())

        # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
        # even the metrics!
        memory = SequentialMemory(limit=100000, window_length=1)
        random_process = OrnsteinUhlenbeckProcess(theta=.15,
                                                  mu=0.,
                                                  sigma=.3,
                                                  size=nb_actions)
        self.agent = ContinuousDQNAgent(nb_actions=nb_actions,
                                        V_model=V_model,
                                        L_model=L_model,
                                        mu_model=mu_model,
                                        memory=memory,
                                        nb_steps_warmup=100,
                                        random_process=random_process,
                                        gamma=.99,
                                        target_model_update=1e-3)
        self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
Ejemplo n.º 3
0
def test_cdqn():
    # TODO: replace this with a simpler environment where we can actually test if it finds a solution
    env = gym.make('Pendulum-v0')
    np.random.seed(123)
    env.seed(123)
    random.seed(123)
    nb_actions = env.action_space.shape[0]

    V_model = Sequential()
    V_model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    V_model.add(Dense(16))
    V_model.add(Activation('relu'))
    V_model.add(Dense(1))
    V_model.add(Activation('linear'))

    mu_model = Sequential()
    mu_model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    mu_model.add(Dense(16))
    mu_model.add(Activation('relu'))
    mu_model.add(Dense(nb_actions))
    mu_model.add(Activation('linear'))

    action_input = Input(shape=(nb_actions, ), name='action_input')
    observation_input = Input(shape=(1, ) + env.observation_space.shape,
                              name='observation_input')
    x = merge([action_input, Flatten()(observation_input)], mode='concat')
    x = Dense(16)(x)
    x = Activation('relu')(x)
    x = Dense(((nb_actions * nb_actions + nb_actions) / 2))(x)
    x = Activation('linear')(x)
    L_model = Model(input=[action_input, observation_input], output=x)

    memory = SequentialMemory(limit=1000, window_length=1)
    random_process = OrnsteinUhlenbeckProcess(theta=.15,
                                              mu=0.,
                                              sigma=.3,
                                              size=nb_actions)
    agent = ContinuousDQNAgent(nb_actions=nb_actions,
                               V_model=V_model,
                               L_model=L_model,
                               mu_model=mu_model,
                               memory=memory,
                               nb_steps_warmup=50,
                               random_process=random_process,
                               gamma=.99,
                               target_model_update=1e-3)
    agent.compile(Adam(lr=1e-3))

    agent.fit(env,
              nb_steps=400,
              visualize=False,
              verbose=0,
              nb_max_episode_steps=100)
    h = agent.test(env,
                   nb_episodes=2,
                   visualize=False,
                   nb_max_episode_steps=100)
Ejemplo n.º 4
0
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(((nb_actions * nb_actions + nb_actions) / 2))(x)
x = Activation('linear')(x)
L_model = Model(input=[action_input, observation_input], output=x)
print(L_model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=100000)
random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3, size=nb_actions)
agent = ContinuousDQNAgent(nb_actions=nb_actions, V_model=V_model, L_model=L_model, mu_model=mu_model,
                           memory=memory, nb_steps_warmup=100, random_process=random_process,
                           gamma=.99, target_model_update=1e-3)
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
agent.fit(env, nb_steps=50000, visualize=True, verbose=1, nb_max_episode_steps=200)

# After training is done, we save the final weights.
agent.save_weights('cdqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
agent.test(env, nb_episodes=10, visualize=True, nb_max_episode_steps=200)
Ejemplo n.º 5
0
x = Activation('linear')(x)
L_model = Model(input=[action_input, observation_input], output=x)
print(L_model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=100000)
random_process = OrnsteinUhlenbeckProcess(theta=.15,
                                          mu=0.,
                                          sigma=.3,
                                          size=nb_actions)
agent = ContinuousDQNAgent(nb_actions=nb_actions,
                           V_model=V_model,
                           L_model=L_model,
                           mu_model=mu_model,
                           memory=memory,
                           nb_steps_warmup=100,
                           random_process=random_process,
                           gamma=.99,
                           target_model_update=1e-3)
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
agent.fit(env,
          nb_steps=50000,
          visualize=True,
          verbose=1,
          nb_max_episode_steps=200)
class KerasNAFAgent(object):
    '''
    classdocs
    '''
    def __init__(self, opts):
        self.metadata = {
            'discrete_actions': False,
        }

        self.opts = opts

    def configure(self, observation_space_shape, nb_actions):

        # Build all necessary models: V, mu, and L networks.
        V_model = Sequential()
        V_model.add(Flatten(input_shape=(1, ) + observation_space_shape))
        V_model.add(Dense(16))
        V_model.add(Activation('relu'))
        V_model.add(Dense(16))
        V_model.add(Activation('relu'))
        V_model.add(Dense(16))
        V_model.add(Activation('relu'))
        V_model.add(Dense(1))
        V_model.add(Activation('linear'))
        print(V_model.summary())

        mu_model = Sequential()
        mu_model.add(Flatten(input_shape=(1, ) + observation_space_shape))
        mu_model.add(Dense(16))
        mu_model.add(Activation('relu'))
        mu_model.add(Dense(16))
        mu_model.add(Activation('relu'))
        mu_model.add(Dense(16))
        mu_model.add(Activation('relu'))
        mu_model.add(Dense(nb_actions))
        mu_model.add(Activation('linear'))
        print(mu_model.summary())

        action_input = Input(shape=(nb_actions, ), name='action_input')
        observation_input = Input(shape=(1, ) + observation_space_shape,
                                  name='observation_input')
        x = merge([action_input, Flatten()(observation_input)], mode='concat')
        x = Dense(32)(x)
        x = Activation('relu')(x)
        x = Dense(32)(x)
        x = Activation('relu')(x)
        x = Dense(32)(x)
        x = Activation('relu')(x)
        x = Dense(((nb_actions * nb_actions + nb_actions) / 2))(x)
        x = Activation('linear')(x)
        L_model = Model(input=[action_input, observation_input], output=x)
        print(L_model.summary())

        # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
        # even the metrics!
        memory = SequentialMemory(limit=100000, window_length=1)
        random_process = OrnsteinUhlenbeckProcess(theta=.15,
                                                  mu=0.,
                                                  sigma=.3,
                                                  size=nb_actions)
        self.agent = ContinuousDQNAgent(nb_actions=nb_actions,
                                        V_model=V_model,
                                        L_model=L_model,
                                        mu_model=mu_model,
                                        memory=memory,
                                        nb_steps_warmup=100,
                                        random_process=random_process,
                                        gamma=.99,
                                        target_model_update=1e-3)
        self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

    def train(self, env, nb_steps, visualize, verbosity):
        # Okay, now it's time to learn something! We visualize the training here for show, but this
        # slows down training quite a lot. You can always safely abort the training prematurely using
        # Ctrl + C.
        self.agent.fit(env,
                       nb_steps=nb_steps,
                       visualize=visualize,
                       verbose=verbosity,
                       nb_max_episode_steps=200)

    def test(self, env, nb_episodes, visualize):
        self.agent.test(env,
                        nb_episodes=nb_episodes,
                        visualize=visualize,
                        nb_max_episode_steps=200)

    def load_weights(self, load_file):
        self.agent.load_weights(load_file)

    def save_weights(self, save_file, overwrite):
        # After training is done, we save the final weights.
        self.agent.save_weights(save_file, overwrite=overwrite)