Esempio n. 1
0
def build_ddpg_model(env, actor_hidden_layers, critic_hidden_layers, gamma,
                     learning_rate):

    actor = build_actor(env, actor_hidden_layers)
    critic, action_input = build_critic(env, critic_hidden_layers)

    nb_actions = env.action_space.shape[0]
    memory = SequentialMemory(limit=100000, window_length=1)
    random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                              theta=.2,
                                              mu=0.1,
                                              sigma=.3)
    agent = DDPGAgent(nb_actions=nb_actions,
                      actor=actor,
                      critic=critic,
                      critic_action_input=action_input,
                      memory=memory,
                      nb_steps_warmup_critic=100,
                      nb_steps_warmup_actor=100,
                      random_process=random_process,
                      gamma=.99,
                      target_model_update=1e-3)

    agent.compile(Adam(lr=learning_rate, clipnorm=1.), metrics=['mae'])

    return agent
Esempio n. 2
0
def get_agent(drlm):
    print('tesing', '.' * 60)
    actor = drlm.actor
    critic = drlm.critic
    nb_actions = drlm.nb_actions
    action_input = drlm.action_input
    processor = TorcsProcessor()

    # load weights

    print('loading weights ', load_weights, '.' * 60)

    if load_weights:
        actor.load_weights(alw)
        critic.load_weights(clw)

    memory = SequentialMemory(limit=100000, window_length=1)
    random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.5, mu=0., sigma=.3)

    # random_process = ExplorationNoise(nb_steps=epochs,
    #                                   epsilon=1.0,
    #                                   steer=OrnsteinUhlenbeckProcess(theta=0.6, mu=0, sigma=0.3),
    #                                   accel_brake=OrnsteinUhlenbeckProcess(theta=1.0, mu=0.5, sigma=0.3),
    #                                   noise=1)

    agent = DDPGAgent(nb_actions=nb_actions, batch_size=batch_size,
                      actor=actor, critic=critic, critic_action_input=action_input,
                      memory=memory, processor=processor, nb_steps_warmup_critic=nb_steps_warmup_critic,
                      nb_steps_warmup_actor=nb_steps_warmup_actor,
                      random_process=random_process, gamma=.99, target_model_update=1e-3)
    agent.compile(Adam(lr=.0001, clipnorm=1.), metrics=[loss])
    return agent
Esempio n. 3
0
def main():
    # Get the environment and extract the number of actions.
    env = gym.make(ENV_NAME)
    env = wrappers.Monitor(env, '/tmp/{}'.format(ENV_NAME), force=True)
    np.random.seed(123)
    env.seed(123)
    assert len(env.action_space.shape) == 1
    
    action_shape = env.action_space.shape[0]
    observation_shape = env.observation_space.shape

    actor = create_actor(observation_shape, action_shape)
    
    action_input = Input(shape=(action_shape,), name='action_input')
    observation_input = Input(shape=(1,) + observation_shape, name='observation_input')
    
    critic = create_critic(observation_input, action_input)

    memory = SequentialMemory(limit=100000, window_length=1)
    
    random_process = OrnsteinUhlenbeckProcess(size=action_shape, theta=.15, mu=0., sigma=.1)
    agent = DDPGAgent(nb_actions=action_shape, actor=actor, critic=critic, critic_action_input=action_input,
                      memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                      random_process=random_process, gamma=.99, target_model_update=1e-3,
                      processor=BipedalProcessor())
    agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae'])
    agent.load_weights('ddpg_{}_weights.h5f'.format(ENV_NAME))
    #agent.fit()
    agent.fit(env, nb_steps=3000000, visualize=False, verbose=2)
    agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
Esempio n. 4
0
    def __init__(self, env):
        self.nb_actions = env.action_space.shape[0]
        self.nb_states = env.observation_space.shape[0]
        self.env = env

        self.actor = self.build_actor(env)
        self.actor.compile('Adam', 'mse')
        self.critic, action_input = self.build_critic(env)
        self.loss = self.build_loss()
        self.processor = WhiteningNormalizerProcessor()

        self.memory = SequentialMemory(limit=5000000, window_length=1)
        self.random_process = OrnsteinUhlenbeckProcess(size=self.nb_actions,
                                                       theta=0.75,
                                                       mu=0.5,
                                                       sigma=0.25)
        self.agent = DDPGAgent(nb_actions=self.nb_actions,
                               actor=self.actor,
                               critic=self.critic,
                               critic_action_input=action_input,
                               memory=self.memory,
                               nb_steps_warmup_critic=100,
                               nb_steps_warmup_actor=100,
                               random_process=self.random_process,
                               gamma=.99,
                               target_model_update=1e-3,
                               processor=self.processor)
        self.agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=self.loss)
        self.sym_actor = self.build_sym_actor()
        self.sym_actor.compile(optimizer='Adam', loss='mse')
Esempio n. 5
0
class RLAgent:
    def __init__(self, env):
        np.random.seed(123)
        env.seed(123)
        assert len(env.action_space.shape) == 1
        nb_actions = env.action_space.shape[0]

        # Next, we build a very simple model.
        self.actor = Sequential()
        self.actor.add(Flatten(input_shape=(1, ) +
                               env.observation_space.shape))
        self.actor.add(Dense(16))
        self.actor.add(Activation('relu'))
        self.actor.add(Dense(16))
        self.actor.add(Activation('relu'))
        self.actor.add(Dense(16))
        self.actor.add(Activation('relu'))
        self.actor.add(
            Dense(nb_actions,
                  activation='tanh',
                  kernel_initializer=RandomUniform()))
        self.actor.add(Lambda(lambda x: x * 60.0))
        print(self.actor.summary())

        action_input = Input(shape=(nb_actions, ), name='action_input')
        observation_input = Input(shape=(1, ) + env.observation_space.shape,
                                  name='observation_input')
        flattened_observation = Flatten()(observation_input)
        x = Concatenate()([action_input, flattened_observation])
        x = Dense(32)(x)
        x = Activation('relu')(x)
        x = Dense(32)(x)
        x = Activation('relu')(x)
        x = Dense(32)(x)
        x = Activation('relu')(x)
        x = Dense(1)(x)
        x = Activation('linear')(x)
        critic = Model(inputs=[action_input, observation_input], outputs=x)
        print(critic.summary())

        # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
        # even the metrics!
        memory = SequentialMemory(limit=100000, window_length=1)
        random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                                  theta=.15,
                                                  mu=0.,
                                                  sigma=.3)
        self.agent = DDPGAgent(nb_actions=nb_actions,
                               actor=self.actor,
                               critic=critic,
                               critic_action_input=action_input,
                               memory=memory,
                               nb_steps_warmup_critic=100,
                               nb_steps_warmup_actor=100,
                               random_process=random_process,
                               gamma=.99,
                               target_model_update=1e-3)
        self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
Esempio n. 6
0
    def __init__(self,
                 observation_space,
                 action_space,
                 filename='KerasDDPGAgent.h5f'):
        nb_actions = action_space.shape[0]

        # Actor network
        actor = Sequential()
        actor.add(Flatten(input_shape=(1, ) + observation_space.shape))
        actor.add(Dense(32))
        actor.add(Activation('relu'))
        actor.add(Dense(32))
        actor.add(Activation('relu'))
        actor.add(Dense(32))
        actor.add(Activation('relu'))
        actor.add(Dense(nb_actions))
        actor.add(Activation('sigmoid'))
        print(actor.summary())

        # Critic network
        action_input = Input(shape=(nb_actions, ), name='action_input')
        observation_input = Input(shape=(1, ) + observation_space.shape,
                                  name='observation_input')
        flattened_observation = Flatten()(observation_input)
        x = concatenate([action_input, flattened_observation])
        x = Dense(64)(x)
        x = Activation('relu')(x)
        x = Dense(64)(x)
        x = Activation('relu')(x)
        x = Dense(64)(x)
        x = Activation('relu')(x)
        x = Dense(1)(x)
        x = Activation('linear')(x)
        critic = Model(inputs=[action_input, observation_input], outputs=x)
        print(critic.summary())

        # Setup Keras RL's DDPGAgent
        memory = SequentialMemory(limit=100000, window_length=1)
        random_process = OrnsteinUhlenbeckProcess(theta=.15,
                                                  mu=0.,
                                                  sigma=.2,
                                                  size=nb_actions)
        self.agent = DDPGAgent(nb_actions=nb_actions,
                               actor=actor,
                               critic=critic,
                               critic_action_input=action_input,
                               memory=memory,
                               nb_steps_warmup_critic=100,
                               nb_steps_warmup_actor=100,
                               random_process=random_process,
                               gamma=.99,
                               target_model_update=1e-3,
                               delta_clip=1.)
        self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

        self.filename = filename
Esempio n. 7
0
def buildAgent(env):
    nb_actions = env.action_space.shape[0]
    # Create networks for DDPG
    # Next, we build a very simple model.
    actor = Sequential()
    print((-1, env.observation_space.shape[0]))
    actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape))

    actor.add(Dense(32))
    actor.add(Activation('tanh'))
    actor.add(Dense(32))
    actor.add(Activation('tanh'))
    actor.add(Dense(32))
    actor.add(Activation('tanh'))
    actor.add(Dense(nb_actions))
    actor.add(Activation('sigmoid'))
    print(actor.summary())

    action_input = Input(shape=(nb_actions, ), name='action_input')
    observation_input = Input(shape=(1, ) + env.observation_space.shape,
                              name='observation_input')
    flattened_observation = Flatten()(observation_input)
    x = concatenate([action_input, flattened_observation])
    x = Dense(64)(x)
    x = Activation('tanh')(x)
    x = Dense(64)(x)
    x = Activation('tanh')(x)
    x = Dense(64)(x)
    x = Activation('tanh')(x)
    x = Dense(1)(x)
    x = Activation('linear')(x)
    critic = Model(inputs=[action_input, observation_input], outputs=x)
    print(critic.summary())
    # Set up the agent for training
    memory = SequentialMemory(limit=100000, window_length=1)
    random_process = OrnsteinUhlenbeckProcess(theta=.15,
                                              mu=0.,
                                              sigma=.2,
                                              size=env.noutput)
    agent = DDPGAgent(nb_actions=nb_actions,
                      actor=actor,
                      critic=critic,
                      critic_action_input=action_input,
                      memory=memory,
                      nb_steps_warmup_critic=100,
                      nb_steps_warmup_actor=100,
                      random_process=random_process,
                      gamma=.99,
                      target_model_update=1e-3,
                      delta_clip=1.)
    # agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model,
    #                            memory=memory, nb_steps_warmup=1000, random_process=random_process,
    #                            gamma=.99, target_model_update=0.1)
    agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
    return agent
Esempio n. 8
0
def main(args):
    CUDA = torch.cuda.is_available()
    OUTPUT_RESULTS_DIR = './saver'
    ENVIRONMENT = 'SemisuperPendulumRandom-v0'
    TIMESTAMP = datetime.now().strftime("%Y%m%d-%H%M%S")
    SUMMARY_DIR = os.path.join(OUTPUT_RESULTS_DIR, "DDPG", ENVIRONMENT,
                               TIMESTAMP)

    env = gym.make(ENVIRONMENT)
    env = wrappers.Monitor(env, SUMMARY_DIR, force=True)
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]
    action_bound = env.action_space.high

    actor = ActorNetwork(state_dim, action_dim, action_bound, args.actor_lr,
                         args.tau, args.seed)
    target_actor = ActorNetwork(state_dim, action_dim, action_bound,
                                args.actor_lr, args.tau, args.seed)
    critic = CriticNetwork(state_dim, action_dim, action_bound, args.critic_lr,
                           args.tau, args.l2_decay, args.seed)
    target_critic = CriticNetwork(state_dim, action_dim, action_bound,
                                  args.critic_lr, args.tau, args.l2_decay,
                                  args.seed)

    if CUDA:
        actor = actor.cuda()
        target_actor = target_actor.cuda()
        critic = critic.cuda()
        target_critic = target_critic.cuda()

    replay_buffer = ReplayBuffer(args.bufferlength, args.seed)

    agent = DDPGAgent(actor,
                      target_actor,
                      critic,
                      target_critic,
                      replay_buffer,
                      batch_size=args.batch_size,
                      gamma=args.gamma,
                      seed=args.seed,
                      episode_len=args.episode_len,
                      episode_steps=args.episode_steps,
                      noise_mean=args.noise_mean,
                      noise_th=args.noise_th,
                      noise_std=args.noise_std,
                      noise_decay=args.noise_decay)

    if args.is_train:
        agent.train(env)
        agent.save_actor_weights(save_dir=OUTPUT_RESULTS_DIR,
                                 filename=args.actor_weights)
    else:
        agent.load_actor_weights(save_dir=OUTPUT_RESULTS_DIR,
                                 filename=args.actor_weights)
        agent.test(env)
Esempio n. 9
0
def create(env):
    np.random.seed(config.current.domain_seed)
    env.seed(config.current.domain_seed)
    nb_actions = env.action_space.n

    # Next, we build a very simple model.
    actor = Sequential()
    actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    actor.add(Dense(config.current.agent_vfn_complexity))
    actor.add(Activation('relu'))
    actor.add(Dense(config.current.agent_vfn_complexity))
    actor.add(Activation('relu'))
    actor.add(Dense(config.current.agent_vfn_complexity))
    actor.add(Activation('relu'))
    actor.add(Dense(nb_actions))
    actor.add(Activation('linear'))
    #print(actor.summary())

    action_input = Input(shape=(nb_actions, ), name='action_input')
    observation_input = Input(shape=(1, ) + env.observation_space.shape,
                              name='observation_input')
    flattened_observation = Flatten()(observation_input)
    x = Concatenate()([action_input, flattened_observation])
    x = Dense(config.current.agent_vfn_complexity)(x)
    x = Activation('relu')(x)
    x = Dense(config.current.agent_vfn_complexity)(x)
    x = Activation('relu')(x)
    x = Dense(config.current.agent_vfn_complexity)(x)
    x = Activation('relu')(x)
    x = Dense(1)(x)
    x = Activation('linear')(x)
    critic = Model(inputs=[action_input, observation_input], outputs=x)
    #print(critic.summary())

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=100000, window_length=1)
    random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                              theta=.15,
                                              mu=0.,
                                              sigma=.3)
    agent = DDPGAgent(nb_actions=nb_actions,
                      actor=actor,
                      critic=critic,
                      critic_action_input=action_input,
                      memory=memory,
                      nb_steps_warmup_critic=1000,
                      nb_steps_warmup_actor=100,
                      random_process=random_process,
                      gamma=.99,
                      target_model_update=1e-3,
                      processor=ArgmaxProcessor())
    agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
    return agent
Esempio n. 10
0
def build_agent(env):
    assert len(env.action_space.shape) == 1
    nb_actions = env.action_space.shape[0]
    print(env.observation_space.shape)

    # Actor model
    actor = Sequential()
    actor.add(
        Flatten(input_shape=(window_length, ) + env.observation_space.shape))
    actor.add(Dense(128, activation="relu"))
    actor.add(Dense(128, activation="relu"))
    actor.add(Dense(64, activation="relu"))
    actor.add(Dense(nb_actions, activation="tanh"))
    actor.summary()

    # Critic model
    action_input = Input(shape=(nb_actions, ), name='action_input')
    observation_input = Input(shape=(window_length, ) +
                              env.observation_space.shape,
                              name='observation_input')
    flattened_observation = Flatten()(observation_input)
    x = Concatenate()([action_input, flattened_observation])
    x = Dense(256, activation="relu")(x)
    x = Dense(256, activation="relu")(x)
    x = Dense(128, activation="relu")(x)
    x = Dense(1, activation="linear")(x)
    critic = Model(inputs=[action_input, observation_input], outputs=x)
    critic.summary()

    memory = SequentialMemory(limit=1000000, window_length=window_length)
    # Exploration policy - has a great effect on learning. Should encourage forward motion.
    # theta - how fast the process returns to the mean
    # mu - mean value - this should be greater than 0 to encourage forward motion
    # sigma - volatility of the process
    random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                              theta=.5,
                                              mu=0.4,
                                              sigma=0.3)
    agent = DDPGAgent(nb_actions=nb_actions,
                      actor=actor,
                      critic=critic,
                      critic_action_input=action_input,
                      memory=memory,
                      nb_steps_warmup_critic=50,
                      nb_steps_warmup_actor=50,
                      random_process=random_process,
                      gamma=.99,
                      target_model_update=1e-3)
    agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
    return agent
    def configure(self, observation_space_shape, nb_actions):
        # Next, we build a simple model.
        # actor network
        actor = Sequential()
        actor.add(Flatten(input_shape=(1, ) + observation_space_shape))
        actor.add(Dense(16))
        actor.add(Activation('relu'))
        actor.add(Dense(16))
        actor.add(Activation('relu'))
        actor.add(Dense(16))
        actor.add(Activation('relu'))
        actor.add(Dense(nb_actions))
        actor.add(Activation('linear'))
        print(actor.summary())

        # critic network
        action_input = Input(shape=(nb_actions, ), name='action_input')
        observation_input = Input(shape=(1, ) + observation_space_shape,
                                  name='observation_input')
        flattened_observation = Flatten()(observation_input)
        x = concatenate([action_input, flattened_observation])
        x = Dense(32)(x)
        x = Activation('relu')(x)
        x = Dense(32)(x)
        x = Activation('relu')(x)
        x = Dense(32)(x)
        x = Activation('relu')(x)
        x = Dense(1)(x)
        x = Activation('linear')(x)
        critic = Model(input=[action_input, observation_input], output=x)
        print(critic.summary())

        # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
        # even the metrics!
        memory = SequentialMemory(limit=100000, window_length=1)
        random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                                  theta=.15,
                                                  mu=0.,
                                                  sigma=.3)
        self.agent = DDPGAgent(nb_actions=nb_actions,
                               actor=actor,
                               critic=critic,
                               critic_action_input=action_input,
                               memory=memory,
                               nb_steps_warmup_critic=100,
                               nb_steps_warmup_actor=100,
                               random_process=random_process,
                               gamma=.99,
                               target_model_update=1e-3)
        self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
Esempio n. 12
0
def visualize(session_name):
    kwargs = {'viewer': True}

    ENV_NAME = 'singlePendulum-v0'
    env = gym.make(ENV_NAME, **kwargs)
    np.random.seed(7)
    env.seed(7)
    assert len(env.action_space.shape) == 1
    nb_actions = env.action_space.shape[0]

    actor, critic, action_input = create_networks(env)

    memory = SequentialMemory(limit=400, window_length=1)
    agent = DDPGAgent(nb_actions=nb_actions,
                      actor=actor,
                      critic=critic,
                      critic_action_input=action_input,
                      memory=memory)
    agent.compile(Adam(lr=.0005,
                       clipnorm=1.,
                       epsilon=1.e-7,
                       beta_1=0.9,
                       beta_2=0.999),
                  metrics=['mae'])

    checkpoint_filepath = 'checkpoint/ddpg_{}_{}_weights.h5f'.format(
        ENV_NAME, session_name)
    filepath = 'ddpg_{}_{}_weights.h5f'.format(ENV_NAME, session_name)
    agent.load_weights(filepath=filepath)

    env.viewer = True
    agent.test(env, nb_episodes=1, visualize=False, nb_max_episode_steps=400)
    env.close()
Esempio n. 13
0
    def _build_ddpg(nb_actions, nb_states):
        # build an actor network
        actor = Sequential()
        actor.add(Flatten(input_shape=(1, nb_states)))
        actor.add(Dense(16))
        actor.add(Activation('relu'))
        actor.add(Dense(16))
        actor.add(Activation('relu'))
        actor.add(Dense(16))
        actor.add(Activation('relu'))
        actor.add(Dense(nb_actions))
        actor.add(Activation('sigmoid'))

        # build a critic network
        action_input = Input(shape=(nb_actions, ), name='action_input')
        observation_input = Input(shape=(1, nb_states),
                                  name='observation_input')
        flattened_observation = Flatten()(observation_input)
        x = Concatenate()([action_input, flattened_observation])
        x = Dense(32)(x)
        x = Activation('relu')(x)
        x = Dense(32)(x)
        x = Activation('relu')(x)
        x = Dense(32)(x)
        x = Activation('relu')(x)
        x = Dense(1)(x)
        x = Activation('linear')(x)
        critic = Model(inputs=[action_input, observation_input], outputs=x)

        # tricks:
        memory = SequentialMemory(limit=10240, window_length=1)
        oup = OrnsteinUhlenbeckProcess(size=nb_actions,
                                       theta=.15,
                                       mu=0.,
                                       sigma=.3)

        # build ddpg agent
        ddpg = DDPGAgent(nb_actions=nb_actions,
                         actor=actor,
                         critic=critic,
                         critic_action_input=action_input,
                         memory=memory,
                         nb_steps_warmup_actor=100,
                         nb_steps_warmup_critic=100,
                         random_process=oup,
                         gamma=.99,
                         target_model_update=1e-3)
        ddpg.compile(Adam(), metrics=['mae'])
        return ddpg
Esempio n. 14
0
def build_agent(num_action, observation_shape):
    actor = build_actor_model(num_action, observation_shape)
    critic, critic_action_input = build_critic_model(num_action,
                                                     observation_shape)
    memory = SequentialMemory(limit=10**5, window_length=1)
    agent = DDPGAgent(num_action, actor, critic, critic_action_input, memory)
    return agent
Esempio n. 15
0
def create_agent(observation_space, action_space, hyperparams):
    assert len(action_space.shape) == 1
    nb_actions = action_space.shape[0]

    actor = Sequential()
    actor.add(Flatten(input_shape=(1, ) + observation_space.shape))
    for layer in hyperparams['actor']['layers']:
        actor.add(Dense(layer['neurons'].value))
        actor.add(Activation(layer['activation'].value))

    actor.add(Dense(nb_actions))
    actor.add(Activation(hyperparams['actor']['output_activation'].value))

    action_input = Input(shape=(nb_actions, ), name='action_input')
    observation_input = Input(shape=(1, ) + observation_space.shape,
                              name='observation_input')

    flattened_observation = Flatten()(observation_input)
    x = Concatenate()([action_input, flattened_observation])
    for layer in hyperparams['critic']['layers']:
        x = Dense(layer['neurons'].value,
                  activation=layer['activation'].value)(x)
    x = Dense(1,
              activation=hyperparams['critic']['output_activation'].value)(x)

    critic = Model(inputs=[action_input, observation_input], outputs=x)

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=100000, window_length=1)
    random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                              theta=.15,
                                              mu=0.,
                                              sigma=.3)
    agent = DDPGAgent(nb_actions=nb_actions,
                      actor=actor,
                      critic=critic,
                      critic_action_input=action_input,
                      memory=memory,
                      nb_steps_warmup_critic=100,
                      nb_steps_warmup_actor=100,
                      random_process=random_process,
                      gamma=.99,
                      target_model_update=1e-3)
    agent.compile(optimizer=hyperparams['optimizer'].value, metrics=['mae'])

    return actor, agent
def get_agent(env) -> DDPGAgent:
    """
    Generate a `DDPGAgent` instance that represents an agent learned using
    Deep Deterministic Policy Gradient. The agent has 2 neural networks: an actor
    network and a critic network.

    Args:
    * `env`: An OpenAI `gym.Env` instance.

    Returns:
    * a `DDPGAgent` instance.
    """
    nb_actions = env.action_space.shape[0]
    action_input = Input(shape=(nb_actions,), name='action_input')
    observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input')

    actor = Sequential()
    actor.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    actor.add(Dense(16))
    actor.add(Activation('relu'))
    actor.add(Dense(16))
    actor.add(Activation('relu'))
    actor.add(Dense(16))
    actor.add(Activation('relu'))
    actor.add(Dense(nb_actions))
    actor.add(Activation('tanh'))

    flattened_observation = Flatten()(observation_input)
    x = Concatenate()([action_input, flattened_observation])
    x = Dense(32)(x)
    x = Activation('relu')(x)
    x = Dense(32)(x)
    x = Activation('relu')(x)
    x = Dense(32)(x)
    x = Activation('relu')(x)
    x = Dense(1)(x)
    x = Activation('linear')(x)
    critic = Model(inputs=[action_input, observation_input], outputs=x)

    memory = SequentialMemory(limit=100000, window_length=1)

    random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3, size=nb_actions)
    agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                    memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                    gamma=.99, target_model_update=1e-3)#random_process=random_process, 
    agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
    return agent
Esempio n. 17
0
class DDPG():
    def __init__(self, Env):
        self.env = Env
        nb_actions = self.env.action_space.shape[0]
        actor = Sequential()
        actor.add(Flatten(input_shape=(1,) + self.env.observation_space.shape))
        actor.add(Dense(5))
        actor.add(Activation('relu'))
        actor.add(Dense(8))
        actor.add(Activation('relu'))
        actor.add(Dense(5))
        actor.add(Activation('relu'))
        # actor.add(Dense(16))
        # actor.add(Activation('relu'))
        actor.add(Dense(nb_actions))
        actor.add(Activation('softmax'))
        # print(actor.summary())

        action_input = Input(shape=(nb_actions,), name='action_input')
        observation_input = Input(shape=(1,) + Env.observation_space.shape, name='observation_input')
        flattened_observation = Flatten()(observation_input)
        x = concatenate([action_input, flattened_observation], name = 'concatenate')
        x = Dense(5)(x)
        x = Activation('relu')(x)
        x = Dense(8)(x)
        x = Activation('relu')(x)
        x = Dense(5)(x)
        x = Activation('relu')(x)
        # x = Dense(32)(x)
        # x = Activation('relu')(x)
        x = Dense(1)(x)
        x = Activation('linear')(x)
        critic = Model(inputs=[action_input, observation_input], outputs=x)
        # print(critic.summary())

        memory = SequentialMemory(limit=100000, window_length=1)
        # random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3)
        random_process = None
        self.agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                          memory=memory, nb_steps_warmup_critic=32, nb_steps_warmup_actor=32,
                          random_process=random_process, gamma=0, target_model_update=0.001)
        self.agent.processor = ShowActionProcessor(self.agent, self.env)
        self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

    def fit(self):
        history = self.agent.fit(self.env, action_repetition=1, nb_steps=20000, visualize=False, verbose=1, nb_max_episode_steps=10)
        return history

    def save_weights(self):
        self.agent.save_weights('./store/ddpg_{}_weights2.h5f'.format("porfolio"), overwrite=True)

    def test(self):
        history = self.agent.test(self.env, nb_episodes=1, visualize=False, nb_max_episode_steps=10)
        return history

    def load_weights(self):
        self.agent.load_weights('./store/ddpg_{}_weights2.h5f'.format("porfolio"))
Esempio n. 18
0
	def build_agent(self):
		### Building Agent
		log_info("building DDPGAgent ...") 
		self.agent = DDPGAgent(nb_actions=self.nb_actions, actor=self.networks.actor, critic=self.networks.critic, critic_action_input=self.networks.action_input,
                  memory=self.networks.memory, nb_steps_warmup_critic=self.nb_steps_warmup_critic, nb_steps_warmup_actor=self.nb_steps_warmup_actor,
                  random_process=self.networks.random_process, gamma=self.gamma, target_model_update=self.target_model_update,
                  delta_clip=self.delta_clip)

		# self.agent = DDPGAgent(nb_actions=self.nb_actions, actor=self.networks.actor, critic=self.networks.critic, critic_action_input=self.networks.action_input,
  #                 memory=self.networks.memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
  #                 random_process=self.networks.random_process, gamma=.99, target_model_update=1e-3,
  #                 delta_clip=1.)


		log_info("Adding optimizer ...")
		self.agent.compile(self.optimizer, self.metrics)
		log_info("Optimizer added.")
Esempio n. 19
0
def build_agent(nS, nA, action_min, action_max):
    actor = build_actor(nS, nA, action_min, action_max)
    action_input, critic = build_critic(nS, nA)
    agent = DDPGAgent(nb_actions=nA, 
                      actor=actor, 
                      critic=critic, 
                      critic_action_input=action_input,
                      memory=SequentialMemory(limit=100000, 
                                              window_length=1), 
                      nb_steps_warmup_critic=100, 
                      nb_steps_warmup_actor=100,
                      random_process=OrnsteinUhlenbeckProcess(size=nA, 
                                                              theta=0.15,
                                                              sigma=0.3), 
                      gamma=0.99, 
                      target_model_update=0.001)
    agent.compile(Adam(lr=0.001, clipnorm=1.), metrics=['mae'])
    return agent
Esempio n. 20
0
    def __init__(self, Env):
        self.env = Env
        nb_actions = self.env.action_space.shape[0]
        actor = Sequential()
        actor.add(Flatten(input_shape=(1,) + self.env.observation_space.shape))
        actor.add(Dense(5))
        actor.add(Activation('relu'))
        actor.add(Dense(8))
        actor.add(Activation('relu'))
        actor.add(Dense(5))
        actor.add(Activation('relu'))
        # actor.add(Dense(16))
        # actor.add(Activation('relu'))
        actor.add(Dense(nb_actions))
        actor.add(Activation('softmax'))
        # print(actor.summary())

        action_input = Input(shape=(nb_actions,), name='action_input')
        observation_input = Input(shape=(1,) + Env.observation_space.shape, name='observation_input')
        flattened_observation = Flatten()(observation_input)
        x = concatenate([action_input, flattened_observation], name = 'concatenate')
        x = Dense(5)(x)
        x = Activation('relu')(x)
        x = Dense(8)(x)
        x = Activation('relu')(x)
        x = Dense(5)(x)
        x = Activation('relu')(x)
        # x = Dense(32)(x)
        # x = Activation('relu')(x)
        x = Dense(1)(x)
        x = Activation('linear')(x)
        critic = Model(inputs=[action_input, observation_input], outputs=x)
        # print(critic.summary())

        memory = SequentialMemory(limit=100000, window_length=1)
        # random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3)
        random_process = None
        self.agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                          memory=memory, nb_steps_warmup_critic=32, nb_steps_warmup_actor=32,
                          random_process=random_process, gamma=0, target_model_update=0.001)
        self.agent.processor = ShowActionProcessor(self.agent, self.env)
        self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
Esempio n. 21
0
 def compile_agent(self):
     # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
     # even the metrics!
     processor = DistopiaProcessor(self.num_blocks, self.num_actions)
     memory = SequentialMemory(limit=50000, window_length=1)
     random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                               theta=.15,
                                               mu=0.,
                                               sigma=.3)
     self.agent = DDPGAgent(nb_actions=self.nb_actions,
                            actor=self.actor,
                            critic=self.critic,
                            critic_action_input=self.action_input,
                            memory=memory,
                            nb_steps_warmup_critic=100,
                            nb_steps_warmup_actor=100,
                            random_process=random_process,
                            gamma=.99,
                            target_model_update=1e-3)
     self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
Esempio n. 22
0
def create_actor_critic_agent():
    # create ddpg agent
    memory = SequentialMemory(limit=1000000, window_length=1)
    random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                              theta=.5,
                                              mu=0.,
                                              sigma=.1)
    agent = DDPGAgent(nb_actions=nb_actions,
                      actor=create_actor(),
                      critic=create_critic(),
                      critic_action_input=action_input,
                      memory=memory,
                      nb_steps_warmup_critic=100,
                      nb_steps_warmup_actor=100,
                      random_process=random_process,
                      gamma=.8,
                      target_model_update=1e-3,
                      processor=UnitsProcessor(n_sonars=env.robot.n_sonars))
    agent.compile(Adam(lr=.001), metrics=['mse'])
    return agent
Esempio n. 23
0
    def __init__(self, env):
        # rospack = rospkg.RosPack()
        # self.working_dir = rospack.get_path('neuroracer_gym_rl')
        # self.weight_backup      = os.path.join(self.working_dir, "neuroracer.h5")
        self.env = env

        self.observation_space = self.env.observation_space
        self.action_space = self.env.action_space
        self.nb_actions  = self.env.action_space.shape[0]
        self.batch_size = 16
        self.max_buffer = 100000
        self.window_length = 16
        self.memory = SequentialMemory(limit=self.max_buffer, window_length=self.window_length)
        self.learning_rate_actor = 0.0001
        self.learning_rate_critic = 0.001
        self.gamma              = 0.9
        self.exploration_rate   = 0.95
        self.exploration_min    = 0.01
        self.exploration_decay  = 0.995

        random_process = OrnsteinUhlenbeckProcess(size=self.nb_actions, theta=.15, mu=0., sigma=.2)

        actor = self._create_actor()        
        critic, critic_action_input = self._create_critic(self.nb_actions)

        self.model = DDPGAgent(nb_actions=self.nb_actions, 
                                actor=actor, 
                                critic=critic,
                                critic_action_input=critic_action_input,
                                memory=self.memory,
                                nb_steps_warmup_critic=500,
                                nb_steps_warmup_actor=500,
                                random_process=random_process,
                                gamma=self.gamma,
                                target_model_update=.001,
                                # processor=self.processor,
                                batch_size=self.batch_size)
        self.model.compile(
            (Adam(lr=self.learning_rate_actor, clipnorm=1.), Adam(lr=self.learning_rate_critic, clipnorm=1.)),
            metrics=['mse'])
Esempio n. 24
0
def test_ddpg():
    # TODO: replace this with a simpler environment where we can actually test if it finds a solution
    env = gym.make('Pendulum-v0')
    np.random.seed(123)
    env.seed(123)
    random.seed(123)
    nb_actions = env.action_space.shape[0]

    actor = Sequential()
    actor.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    actor.add(Dense(16))
    actor.add(Activation('relu'))
    actor.add(Dense(nb_actions))
    actor.add(Activation('linear'))

    action_input = Input(shape=(nb_actions,), name='action_input')
    observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input')
    flattened_observation = Flatten()(observation_input)
    x = Concatenate()([action_input, flattened_observation])
    x = Dense(16)(x)
    x = Activation('relu')(x)
    x = Dense(1)(x)
    x = Activation('linear')(x)
    critic = Model(inputs=[action_input, observation_input], outputs=x)
    
    memory = SequentialMemory(limit=1000, window_length=1)
    random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3)
    agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                      memory=memory, nb_steps_warmup_critic=50, nb_steps_warmup_actor=50,
                      random_process=random_process, gamma=.99, target_model_update=1e-3)
    agent.compile([Adam(lr=1e-3), Adam(lr=1e-3)])

    agent.fit(env, nb_steps=400, visualize=False, verbose=0, nb_max_episode_steps=100)
    h = agent.test(env, nb_episodes=2, visualize=False, nb_max_episode_steps=100)
Esempio n. 25
0
def keras_rl(env,
             model_name,
             saved_model_name="model",
             steps=50000,
             test_steps=5,
             visualize=False,
             hidden_layers=3,
             critic_hidden_layers=3):
    nb_actions = 0
    if (model_name == "DQN" or model_name == "SARSA"):
        nb_actions = env.action_space.n
    elif (model_name == "DDPG"):
        nb_actions = env.action_space.shape[0]

    model_structure = define_layers(env,
                                    nb_actions,
                                    num_of_hidden_layers=hidden_layers)
    memory = define_memory()
    policy = define_policy(model_name)

    if (model_name == "DQN"):
        model = DQNAgent(model=model_structure,
                         nb_actions=nb_actions,
                         memory=memory,
                         nb_steps_warmup=100,
                         enable_double_dqn=True,
                         dueling_type='avg',
                         target_model_update=1e-2)
    elif (model_name == "SARSA"):
        model = SARSAAgent(model=model_structure,
                           nb_actions=nb_actions,
                           nb_steps_warmup=10,
                           policy=policy)
    elif (model_name == "DDPG"):
        action_input, critic_layers = define_critic_layers(
            env, num_of_hidden_layers=critic_hidden_layers)
        random_process = define_random_process(nb_actions)
        model = DDPGAgent(nb_actions=nb_actions,
                          actor=model_structure,
                          critic=critic_layers,
                          critic_action_input=action_input,
                          memory=memory,
                          nb_steps_warmup_critic=100,
                          nb_steps_warmup_actor=100,
                          random_process=random_process,
                          gamma=.99,
                          target_model_update=1e-3)

    model.compile(Adam(lr=1e-3), metrics=['mae'])
    model.fit(env, nb_steps=steps, visualize=False, verbose=2)
    model.save_weights('{}.h5f'.format(model_name), overwrite=True)
    model.test(env, nb_episodes=test_steps, visualize=visualize)
Esempio n. 26
0
class KerasDDPGAgent(KerasAgent):
    """
    An DDPG agent using Keras library with Keras RL.

    For more details about Deep Deterministic Policy Gradient algorithm, check
    "Continuous control with deep reinforcement learning" by Lillicrap.
    https://arxiv.org/abs/1509.02971
    """
    def __init__(self, observation_space, action_space, filename='KerasDDPGAgent.h5f'):
        #from keras.layers.normalization import BatchNormalization       
        nb_actions = action_space.shape[0]

        # Actor network
        actor = Sequential()
        actor.add(Flatten(input_shape=(1,) + observation_space.shape))
        actor.add(Dense(32),activation='selu'))
        actor.add(Dense(32),activation='selu')
        actor.add(Dense(32),activation='selu')
        actor.add(Dense(nb_actions),activation='sigmoid')      
        print(actor.summary())

        # Critic network
        action_input = Input(shape=(nb_actions,), name='action_input')
        observation_input = Input(shape=(1,) + observation_space.shape, name='observation_input')
        flattened_observation = Flatten()(observation_input)
        x = concatenate([action_input, flattened_observation])
        x = Dense(64)(x)
        x = Activation('tanh')(x)
        x = Dense(64)(x)
        x = Activation('tanh')(x)
        x = Dense(64)(x)
        x = Activation('tanh')(x)
        x = Dense(1)(x)
        x = Activation('linear')(x)
        critic = Model(inputs=[action_input, observation_input], outputs=x)
        print(critic.summary())

        # Setup Keras RL's DDPGAgent
        memory = SequentialMemory(limit=100000, window_length=1)
        random_process = OrnsteinUhlenbeckProcess(theta=.1, mu=0., sigma=.2, 
                dt=1e-2,size=nb_actions,sigma_min=.05,n_steps_annealing=1e6)
        self.agent = DDPGAgent(nb_actions=nb_actions,
                          actor=actor,
                          critic=critic,
                          critic_action_input=action_input,
                          memory=memory,
                          nb_steps_warmup_critic=100,
                          nb_steps_warmup_actor=100,
                          random_process=random_process,
                          gamma=.96,
                          target_model_update=1e-4,
                          delta_clip=1.)
Esempio n. 27
0
def create_ddpg_agent(env):
    
    nb_actions = env.action_space.n
    policy = BoltzmannQPolicy()
    actor = Sequential()
    actor.add(Flatten(input_shape=(1,) + (env.observation_space.shape[1],)))
    actor.add(Dense(16))
    actor.add(Activation('relu'))
    actor.add(Dense(16))
    actor.add(Activation('relu'))
    actor.add(Dense(16))
    actor.add(Activation('relu'))
    actor.add(Dense(nb_actions))
    actor.add(Activation('linear'))

    action_input = Input(shape=(nb_actions,), name='action_input')
    observation_input = Input(shape=(1,) + (env.observation_space.shape[1],), name='observation_input')
    flattened_observation = Flatten()(observation_input)
    x = merge([action_input, flattened_observation], mode='concat')
    x = Dense(32)(x)
    x = Activation('relu')(x)
    x = Dense(32)(x)
    x = Activation('relu')(x)
    x = Dense(32)(x)
    x = Activation('relu')(x)
    x = Dense(1)(x)
    x = Activation('linear')(x)
    critic = Model(input=[action_input, observation_input], output=x)

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=100000, window_length=1)
    random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3)
    agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                      memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                      random_process=random_process, gamma=.99, target_model_update=1e-3)
    agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
    return agent
Esempio n. 28
0
def build_agent(num_action, observation_shape):
    actor = build_actor_model(num_action, observation_shape)
    critic, critic_action_input = build_critic_model(num_action, observation_shape)
    memory = SequentialMemory(limit=10**6, window_length=1)
    random_process = OrnsteinUhlenbeckProcess(size=num_action, theta=0.15, mu=0, sigma=0.3)
    agent = DDPGAgent(
        num_action,
        actor,
        critic,
        critic_action_input,
        memory,
        random_process=random_process
    )
    return agent
Esempio n. 29
0
 def _make_agent(self):
     """Internal helper function to create an actor-critic custom agent model
     """
     if self.actor is None:
         self._make_actor()
     if self.critic is None:
         self._make_critic()
     memory = SequentialMemory(limit=100000, window_length=1)
     random_process = OrnsteinUhlenbeckProcess(size=self.n_actions,
                                               theta=.15,
                                               mu=0.,
                                               sigma=.3)
     self.agent = DDPGAgent(nb_actions=self.n_actions,
                            actor=self.actor,
                            critic=self.critic,
                            critic_action_input=self.action_input,
                            memory=memory,
                            nb_steps_warmup_critic=100,
                            nb_steps_warmup_actor=100,
                            random_process=random_process,
                            gamma=.99,
                            target_model_update=1e-3)
     self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
Esempio n. 30
0
class actor_critic(keras_model):


	def __init__(self,shared_object):

		log_info('building actor-critic specific parameters ')
		
		self.gamma = shared_object.get('gamma',.99)
		self.nb_steps_warmup_critic = shared_object.get('nb_steps_warmup_critic',100)
		self.nb_steps_warmup_actor = shared_object.get('nb_steps_warmup_actor',100)
		self.target_model_update =  shared_object.get('target_model_update',1.0e-3)
		self.delta_clip = shared_object.get('delta_clip',1.)

		log_info('loading actor critic specific parameters is done')
		
		super(self.__class__,self).__init__(shared_object)



	def build_agent(self):
		### Building Agent
		log_info("building DDPGAgent ...") 
		self.agent = DDPGAgent(nb_actions=self.nb_actions, actor=self.networks.actor, critic=self.networks.critic, critic_action_input=self.networks.action_input,
                  memory=self.networks.memory, nb_steps_warmup_critic=self.nb_steps_warmup_critic, nb_steps_warmup_actor=self.nb_steps_warmup_actor,
                  random_process=self.networks.random_process, gamma=self.gamma, target_model_update=self.target_model_update,
                  delta_clip=self.delta_clip)

		# self.agent = DDPGAgent(nb_actions=self.nb_actions, actor=self.networks.actor, critic=self.networks.critic, critic_action_input=self.networks.action_input,
  #                 memory=self.networks.memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
  #                 random_process=self.networks.random_process, gamma=.99, target_model_update=1e-3,
  #                 delta_clip=1.)


		log_info("Adding optimizer ...")
		self.agent.compile(self.optimizer, self.metrics)
		log_info("Optimizer added.")
Esempio n. 31
0
def test_ddpg():
    # TODO: replace this with a simpler environment where we can actually test if it finds a solution
    env = gym.make('Pendulum-v0')
    np.random.seed(123)
    env.seed(123)
    random.seed(123)
    nb_actions = env.action_space.shape[0]

    actor = Sequential()
    actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    actor.add(Dense(16))
    actor.add(Activation('relu'))
    actor.add(Dense(nb_actions))
    actor.add(Activation('linear'))

    action_input = Input(shape=(nb_actions, ), name='action_input')
    observation_input = Input(shape=(1, ) + env.observation_space.shape,
                              name='observation_input')
    flattened_observation = Flatten()(observation_input)
    x = Concatenate()([action_input, flattened_observation])
    x = Dense(16)(x)
    x = Activation('relu')(x)
    x = Dense(1)(x)
    x = Activation('linear')(x)
    critic = Model(inputs=[action_input, observation_input], outputs=x)

    memory = SequentialMemory(limit=1000, window_length=1)
    random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3)
    agent = DDPGAgent(nb_actions=nb_actions,
                      actor=actor,
                      critic=critic,
                      critic_action_input=action_input,
                      memory=memory,
                      nb_steps_warmup_critic=50,
                      nb_steps_warmup_actor=50,
                      random_process=random_process,
                      gamma=.99,
                      target_model_update=1e-3)
    agent.compile([Adam(lr=1e-3), Adam(lr=1e-3)])

    agent.fit(env,
              nb_steps=400,
              visualize=False,
              verbose=0,
              nb_max_episode_steps=100)
    h = agent.test(env,
                   nb_episodes=2,
                   visualize=False,
                   nb_max_episode_steps=100)
Esempio n. 32
0
x = Dense(64)(x)
x = Activation('relu')(x)
x = Dense(64)(x)
x = Activation('relu')(x)
x = Dense(64)(x)
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)
print(critic.summary())

# Set up the agent for training
memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.2, size=env.noutput)
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                  random_process=random_process, gamma=.99, target_model_update=1e-3,
                  delta_clip=1.)
# agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model,
#                            memory=memory, nb_steps_warmup=1000, random_process=random_process,
#                            gamma=.99, target_model_update=0.1)
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
if args.train:
    agent.fit(env, nb_steps=nallsteps, visualize=False, verbose=1, nb_max_episode_steps=200, log_interval=10000)
    # After training is done, we save the final weights.
    agent.save_weights(args.model, overwrite=True)

if not args.train:
    x = Activation('relu')(x)

# Output Layer
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(input=[action_input, observation_input], output=x)
print(critic.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=2*NUM_STEPS, window_length=1)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3)
# random_process = OrnsteinUhlenbeckProcess(size=nb_actions, dt = env.tau, theta=1.0, mu=0.0, sigma=0.5, sigma_min=0.3, n_steps_annealing=NUM_STEPS)

agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                  random_process=random_process, gamma=.999, target_model_update=1e-3,
                  delta_clip=1.0)

agent.compile(Adam(lr=3e-4, clipnorm=1.0), metrics=['mae'])




# Optionally, we can reload a previous model's weights and continue training from there
# Remove the _actor or _critic from the filename. The load method automatically
# appends these.        
WEIGHTS_FILENAME = 'weights/ddpg_{}_weights.h5f'.format(ENV_NAME)
# agent.load_weights(WEIGHTS_FILENAME)


callbacks = []
    x = (Dense(LAYER_SIZE))(x)
    x = Activation('relu')(x)

# Output Layer
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(input=[action_input, observation_input], output=x)
print(critic.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=2*NUM_STEPS, window_length=1)
# random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions, dt = env.tau, theta=0.6, mu=0.0, sigma=0.5, sigma_min=0.15, n_steps_annealing=NUM_STEPS)

agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                  random_process=random_process, gamma=.999, target_model_update=1e-3,
                  delta_clip=1.0)

agent.compile(Adam(lr=.001, clipnorm=1.0), metrics=['mae'])



# Load the model weights - this method will automatically load the weights for
# both the actor and critic
agent.load_weights(FILENAME)


# Finally, evaluate our algorithm for 5 episodes.
agent.test(env, nb_episodes=5, visualize=True,action_repetition=5) #nb_max_episode_steps=500, 
    x = Activation('relu')(x)

# Output Layer
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(input=[action_input, observation_input], output=x)
print(critic.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=2*NUM_STEPS, window_length=1)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3)
# random_process = OrnsteinUhlenbeckProcess(size=nb_actions, dt = env.tau, theta=1.0, mu=0.0, sigma=0.5, sigma_min=0.3, n_steps_annealing=NUM_STEPS)

agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                  random_process=random_process, gamma=.999, target_model_update=1e-3,
                  delta_clip=1.0)

agent.compile(Adam(lr=.001, clipnorm=1.0), metrics=['mae'])




# Optionally, we can reload a previous model's weights and continue training from there
# Remove the _actor or _critic from the filename. The load method automatically
# appends these.        
WEIGHTS_FILENAME = 'weights/ddpg_planar_crane_continuous-v0_weights.h5f'
# agent.load_weights(WEIGHTS_FILENAME)


callbacks = []
Esempio n. 36
0
    x = concatenate([action_input, flattened_observation])
    x = Dense(32)(x)
    x = Activation('relu')(x)
    x = Dense(1)(x)
    x = Activation('linear')(x)
    critic = Model(inputs=[action_input, observation_input], outputs=x)
    print(critic.summary())

    plot_model(critic, to_file='critic.png', show_shapes=True)

# # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# # even the metrics!
    memory = SequentialMemory(limit=10000, window_length=1)
    random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=0.15, mu=0., sigma=.3)
    agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                   memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                   random_process=random_process, gamma=.99, target_model_update=1e-3)
    agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

# # Okay, now it's time to learn something! We visualize the training here for show, but this
# # slows down training quite a lot. You can always safely abort the training prematurely using
# # Ctrl + C.
    agent.fit(env, nb_steps=25000, visualize=False, verbose=1, nb_max_episode_steps=200)

# # After training is done, we save the final weights.
    agent.save_weights('ddpg_stokes_weights.h5f', overwrite=True)

# # Finally, evaluate our algorithm for 5 episodes.
    agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)

Esempio n. 37
0
x = Activation('relu')(x)
x = Dense(64)(x)
x = Activation('relu')(x)
x = Dense(64)(x)
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(input=[action_input, observation_input], output=x)
print(critic.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.2, size=env.noutput)
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                  random_process=random_process, gamma=.99, target_model_update=1e-3,
                  delta_range=(-100., 100.))
# agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model,
#                            memory=memory, nb_steps_warmup=1000, random_process=random_process,
#                            gamma=.99, target_model_update=0.1)
#agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
agent.compile([RMSprop(lr=.001), RMSprop(lr=.001)], metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
if args.train:
    agent.fit(env, nb_steps=nallsteps, visualize=True, verbose=1, nb_max_episode_steps=env.timestep_limit, log_interval=10000)
    # After training is done, we save the final weights.
    agent.save_weights(args.output, overwrite=True)
Esempio n. 38
0
x = Dense(400)(flattened_observation)
x = Activation('relu')(x)
x = Concatenate()([x, action_input])
x = Dense(300)(x)
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)
print(critic.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.1)
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=1000, nb_steps_warmup_actor=1000,
                  random_process=random_process, gamma=.99, target_model_update=1e-3,
                  processor=MujocoProcessor())
agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
agent.fit(env, nb_steps=1000000, visualize=False, verbose=1)

# After training is done, we save the final weights.
agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)
Esempio n. 39
0
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(input=[action_input, observation_input], output=x)
print(critic.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=100000)
random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3)
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
	memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process,
	gamma=.99, target_model_update=1e-3, delta_range=(-10., 10.))
agent.compile([RMSprop(lr=.001), RMSprop(lr=.001)], metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
agent.fit(env, nb_steps=1000000, visualize=True, verbose=1, nb_max_episode_steps=200)

# After training is done, we save the final weights.
agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)