コード例 #1
0
def train(index, policy_nb_steps, fit_nb_steps):

    # Get the environment and extract the number of actions.
    print("Using environment", environment_name)
    environment = gym.make(environment_name)
    np.random.seed(666)
    nb_actions = environment.action_space.shape[0]

    # Build the model.
    v_model, mu_model, l_model = build_models((WINDOW_LENGTH, ) + INPUT_SHAPE,
                                              nb_actions)
    v_model.summary()
    mu_model.summary()
    l_model.summary()

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)
    processor = CarRacingProcessor()
    random_process = OrnsteinUhlenbeckProcess(theta=.15,
                                              mu=0.,
                                              sigma=.3,
                                              size=nb_actions)

    agent = NAFAgent(nb_actions=nb_actions,
                     V_model=v_model,
                     L_model=l_model,
                     mu_model=mu_model,
                     memory=memory,
                     nb_steps_warmup=100,
                     random_process=random_process,
                     gamma=.99,
                     target_model_update=1e-3,
                     processor=processor)
    agent.compile(optimizers.Adam(lr=.001, clipnorm=1.), metrics=['mae'])

    weights_filename = 'naf_{}_{}_weights.h5f'.format(environment_name, index)

    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that now you can use the built-in Keras callbacks!
    checkpoint_weights_filename = 'naf_' + environment_name + '_weights_{step}.h5f'
    log_filename = 'naf_{}_log.json'.format(environment_name)
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)
    ]
    callbacks += [TensorboardCallback()]
    callbacks += [FileLogger(log_filename, interval=100)]
    agent.fit(
        environment,
        callbacks=callbacks,
        #nb_steps=1750000,
        nb_steps=fit_nb_steps,
        log_interval=10000,
        visualize="visualize" in sys.argv)

    # After training is done, we save the final weights one more time.
    agent.save_weights(weights_filename, overwrite=True)
コード例 #2
0
def test_cdqn():
    # TODO: replace this with a simpler environment where we can actually test if it finds a solution
    env = gym.make('Pendulum-v0')
    np.random.seed(123)
    env.seed(123)
    random.seed(123)
    nb_actions = env.action_space.shape[0]

    V_model = Sequential()
    V_model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    V_model.add(Dense(16))
    V_model.add(Activation('relu'))
    V_model.add(Dense(1))

    mu_model = Sequential()
    mu_model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    mu_model.add(Dense(16))
    mu_model.add(Activation('relu'))
    mu_model.add(Dense(nb_actions))

    action_input = Input(shape=(nb_actions, ), name='action_input')
    observation_input = Input(shape=(1, ) + env.observation_space.shape,
                              name='observation_input')
    x = Concatenate()([action_input, Flatten()(observation_input)])
    x = Dense(16)(x)
    x = Activation('relu')(x)
    x = Dense((nb_actions * nb_actions + nb_actions) // 2)(x)
    L_model = Model(inputs=[action_input, observation_input], outputs=x)

    memory = SequentialMemory(limit=1000, window_length=1)
    random_process = OrnsteinUhlenbeckProcess(theta=.15,
                                              mu=0.,
                                              sigma=.3,
                                              size=nb_actions)
    agent = NAFAgent(nb_actions=nb_actions,
                     V_model=V_model,
                     L_model=L_model,
                     mu_model=mu_model,
                     memory=memory,
                     nb_steps_warmup=50,
                     random_process=random_process,
                     gamma=.99,
                     target_model_update=1e-3)
    agent.compile(Adam(lr=1e-3))

    agent.fit(env,
              nb_steps=400,
              visualize=False,
              verbose=0,
              nb_max_episode_steps=100)
    h = agent.test(env,
                   nb_episodes=2,
                   visualize=False,
                   nb_max_episode_steps=100)
コード例 #3
0
    def __init__(self, env: gym.Env, logger=Logger()):
        nb_actions = env.action_space.shape[0]

        V_model = Sequential()
        V_model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
        V_model.add(Dense(16))
        V_model.add(Activation('relu'))
        V_model.add(Dense(16))
        V_model.add(Activation('relu'))
        V_model.add(Dense(16))
        V_model.add(Activation('relu'))
        V_model.add(Dense(1))
        V_model.add(Activation('linear'))

        mu_model = Sequential()
        mu_model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
        mu_model.add(Dense(16))
        mu_model.add(Activation('relu'))
        mu_model.add(Dense(16))
        mu_model.add(Activation('relu'))
        mu_model.add(Dense(16))
        mu_model.add(Activation('relu'))
        mu_model.add(Dense(nb_actions))
        mu_model.add(Activation('linear'))

        action_input = Input(shape=(nb_actions, ), name='action_input')
        observation_input = Input(shape=(1, ) + env.observation_space.shape,
                                  name='observation_input')
        x = Concatenate()([action_input, Flatten()(observation_input)])
        x = Dense(32)(x)
        x = Activation('relu')(x)
        x = Dense(32)(x)
        x = Activation('relu')(x)
        x = Dense(32)(x)
        x = Activation('relu')(x)
        x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x)
        x = Activation('linear')(x)
        L_model = Model(inputs=[action_input, observation_input], outputs=x)

        memory = SequentialMemory(limit=100000, window_length=1)
        random_process = OrnsteinUhlenbeckProcess(theta=.15,
                                                  mu=0.,
                                                  sigma=.3,
                                                  size=nb_actions)
        agent = NAFAgent(nb_actions=nb_actions,
                         V_model=V_model,
                         L_model=L_model,
                         mu_model=mu_model,
                         memory=memory,
                         random_process=random_process)
        agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
        self.agent = agent
        self.env = env
        super().__init__(env, logger)
コード例 #4
0
ファイル: naf.py プロジェクト: vkf63516/escape_room
def create(env):
  np.random.seed(config.current.domain_seed)
  env.seed(config.current.domain_seed)
  nb_actions = env.action_space.n

  # Build all necessary models: V, mu, and L networks.
  V_model = Sequential()
  V_model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
  V_model.add(Dense(16))
  V_model.add(Activation('relu'))
  V_model.add(Dense(16))
  V_model.add(Activation('relu'))
  V_model.add(Dense(16))
  V_model.add(Activation('relu'))
  V_model.add(Dense(1))
  V_model.add(Activation('linear'))

  mu_model = Sequential()
  mu_model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
  mu_model.add(Dense(16))
  mu_model.add(Activation('relu'))
  mu_model.add(Dense(16))
  mu_model.add(Activation('relu'))
  mu_model.add(Dense(16))
  mu_model.add(Activation('relu'))
  mu_model.add(Dense(nb_actions))
  mu_model.add(Activation('linear'))

  action_input = Input(shape=(nb_actions,), name='action_input')
  observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input')
  x = Concatenate()([action_input, Flatten()(observation_input)])
  x = Dense(32)(x)
  x = Activation('relu')(x)
  x = Dense(32)(x)
  x = Activation('relu')(x)
  x = Dense(32)(x)
  x = Activation('relu')(x)
  x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x)
  x = Activation('linear')(x)
  L_model = Model(inputs=[action_input, observation_input], outputs=x)

  # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
  # even the metrics!
  processor = PendulumProcessor()
  memory = SequentialMemory(limit=100000, window_length=1)
  random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3, size=nb_actions)
  agent = NAFAgent(nb_actions=nb_actions, V_model=V_model, L_model=L_model, mu_model=mu_model,
                   memory=memory, nb_steps_warmup=100, random_process=random_process,
                   gamma=.99, target_model_update=1e-3, processor=processor)
  agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
  return agent
コード例 #5
0
def test_cdqn():
    # TODO: replace this with a simpler environment where we can actually test if it finds a solution
    env = gym.make('Pendulum-v0')
    np.random.seed(123)
    env.seed(123)
    random.seed(123)
    nb_actions = env.action_space.shape[0]

    V_model = Sequential()
    V_model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    V_model.add(Dense(16))
    V_model.add(Activation('relu'))
    V_model.add(Dense(1))

    mu_model = Sequential()
    mu_model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    mu_model.add(Dense(16))
    mu_model.add(Activation('relu'))
    mu_model.add(Dense(nb_actions))
    
    action_input = Input(shape=(nb_actions,), name='action_input')
    observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input')
    x = Concatenate()([action_input, Flatten()(observation_input)])
    x = Dense(16)(x)
    x = Activation('relu')(x)
    x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x)
    L_model = Model(inputs=[action_input, observation_input], outputs=x)

    memory = SequentialMemory(limit=1000, window_length=1)
    random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3, size=nb_actions)
    agent = NAFAgent(nb_actions=nb_actions, V_model=V_model, L_model=L_model, mu_model=mu_model,
                     memory=memory, nb_steps_warmup=50, random_process=random_process,
                     gamma=.99, target_model_update=1e-3)
    agent.compile(Adam(lr=1e-3))

    agent.fit(env, nb_steps=400, visualize=False, verbose=0, nb_max_episode_steps=100)
    h = agent.test(env, nb_episodes=2, visualize=False, nb_max_episode_steps=100)
コード例 #6
0
memory = SequentialMemory(limit=10000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(theta=.15,
                                          mu=0.,
                                          sigma=.3,
                                          size=nb_actions)
agent = NAFAgent(nb_actions=nb_actions,
                 V_model=V_model,
                 L_model=L_model,
                 mu_model=mu_model,
                 memory=memory,
                 nb_steps_warmup=100,
                 random_process=random_process,
                 gamma=.99,
                 target_model_update=1e-3,
                 processor=processor)
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
agent.fit(env,
          nb_steps=500,
          visualize=True,
          verbose=1,
          nb_max_episode_steps=200)

# After training is done, we save the final weights.
agent.save_weights('cdqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
agent.test(env, nb_episodes=10, visualize=True, nb_max_episode_steps=200)
def init_naf(env, nb_actions):
    """ Initialize the NAF agent using the keras-rl package.

    :param env: the environment to be played, required to determine the input size
    :param nb_actions: number of actions
    :return: NAF agent
    """
    # Build all necessary models: V, mu, and L networks.
    v_model = Sequential()
    v_model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    v_model.add(Dense(16))
    v_model.add(Activation('relu'))
    v_model.add(Dense(16))
    v_model.add(Activation('relu'))
    v_model.add(Dense(16))
    v_model.add(Activation('relu'))
    v_model.add(Dense(1))
    v_model.add(Activation('linear'))

    mu_model = Sequential()
    mu_model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    mu_model.add(Dense(16))
    mu_model.add(Activation('relu'))
    mu_model.add(Dense(16))
    mu_model.add(Activation('relu'))
    mu_model.add(Dense(16))
    mu_model.add(Activation('relu'))
    mu_model.add(Dense(nb_actions))
    mu_model.add(Activation('linear'))

    action_input = Input(shape=(nb_actions, ), name='action_input')
    observation_input = Input(shape=(1, ) + env.observation_space.shape,
                              name='observation_input')
    x = Concatenate()([action_input, Flatten()(observation_input)])
    x = Dense(32)(x)
    x = Activation('relu')(x)
    x = Dense(32)(x)
    x = Activation('relu')(x)
    x = Dense(32)(x)
    x = Activation('relu')(x)
    x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x)
    x = Activation('linear')(x)
    l_model = Model(inputs=[action_input, observation_input], outputs=x)

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    processor = PendulumProcessor()
    memory = SequentialMemory(limit=100000, window_length=1)
    random_process = OrnsteinUhlenbeckProcess(theta=.15,
                                              mu=0.,
                                              sigma=.3,
                                              size=nb_actions)
    agent = NAFAgent(nb_actions=nb_actions,
                     V_model=v_model,
                     L_model=l_model,
                     mu_model=mu_model,
                     memory=memory,
                     nb_steps_warmup=100,
                     random_process=random_process,
                     gamma=.99,
                     target_model_update=1e-3,
                     processor=processor)
    agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
    agent.model_name = "NAF"
    return agent
コード例 #8
0
x = Activation('relu')(x)
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x)
x = Activation('linear')(x)
L_model = Model(inputs=[action_input, observation_input], outputs=x)
print(L_model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
processor = PendulumProcessor()
memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3, size=nb_actions)
agent = NAFAgent(nb_actions=nb_actions, V_model=V_model, L_model=L_model, mu_model=mu_model,
                 memory=memory, nb_steps_warmup=100, random_process=random_process,
                 gamma=.99, target_model_update=1e-3, processor=processor)
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
agent.fit(env, nb_steps=50000, visualize=True, verbose=1, nb_max_episode_steps=200)

# After training is done, we save the final weights.
agent.save_weights('cdqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
agent.test(env, nb_episodes=10, visualize=True, nb_max_episode_steps=200)
コード例 #9
0
memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(theta=.15,
                                          mu=0.,
                                          sigma=.3,
                                          size=nb_actions)
agent = NAFAgent(nb_actions=nb_actions,
                 V_model=V_model,
                 L_model=L_model,
                 mu_model=mu_model,
                 memory=memory,
                 nb_steps_warmup=100,
                 random_process=random_process,
                 gamma=.99,
                 target_model_update=1e-3,
                 processor=processor)
agent.compile(Adam(learning_rate=.001, clipnorm=1.), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
agent.fit(env,
          nb_steps=50000,
          visualize=True,
          verbose=1,
          nb_max_episode_steps=200)

# After training is done, we save the final weights.
agent.save_weights(f'cdqn_{ENV_NAME}_weights.h5f', overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
agent.test(env, nb_episodes=10, visualize=True, nb_max_episode_steps=200)