Example #1
0
def main():
    # Get the environment and extract the number of actions.
    env = gym.make(ENV_NAME)
    env = wrappers.Monitor(env, '/tmp/{}'.format(ENV_NAME), force=True)
    np.random.seed(123)
    env.seed(123)
    assert len(env.action_space.shape) == 1
    
    action_shape = env.action_space.shape[0]
    observation_shape = env.observation_space.shape

    actor = create_actor(observation_shape, action_shape)
    
    action_input = Input(shape=(action_shape,), name='action_input')
    observation_input = Input(shape=(1,) + observation_shape, name='observation_input')
    
    critic = create_critic(observation_input, action_input)

    memory = SequentialMemory(limit=100000, window_length=1)
    
    random_process = OrnsteinUhlenbeckProcess(size=action_shape, theta=.15, mu=0., sigma=.1)
    agent = DDPGAgent(nb_actions=action_shape, actor=actor, critic=critic, critic_action_input=action_input,
                      memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                      random_process=random_process, gamma=.99, target_model_update=1e-3,
                      processor=BipedalProcessor())
    agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae'])
    agent.load_weights('ddpg_{}_weights.h5f'.format(ENV_NAME))
    #agent.fit()
    agent.fit(env, nb_steps=3000000, visualize=False, verbose=2)
    agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
Example #2
0
def visualize(session_name):
    kwargs = {'viewer': True}

    ENV_NAME = 'singlePendulum-v0'
    env = gym.make(ENV_NAME, **kwargs)
    np.random.seed(7)
    env.seed(7)
    assert len(env.action_space.shape) == 1
    nb_actions = env.action_space.shape[0]

    actor, critic, action_input = create_networks(env)

    memory = SequentialMemory(limit=400, window_length=1)
    agent = DDPGAgent(nb_actions=nb_actions,
                      actor=actor,
                      critic=critic,
                      critic_action_input=action_input,
                      memory=memory)
    agent.compile(Adam(lr=.0005,
                       clipnorm=1.,
                       epsilon=1.e-7,
                       beta_1=0.9,
                       beta_2=0.999),
                  metrics=['mae'])

    checkpoint_filepath = 'checkpoint/ddpg_{}_{}_weights.h5f'.format(
        ENV_NAME, session_name)
    filepath = 'ddpg_{}_{}_weights.h5f'.format(ENV_NAME, session_name)
    agent.load_weights(filepath=filepath)

    env.viewer = True
    agent.test(env, nb_episodes=1, visualize=False, nb_max_episode_steps=400)
    env.close()
Example #3
0
class DDPG():
    def __init__(self, Env):
        self.env = Env
        nb_actions = self.env.action_space.shape[0]
        actor = Sequential()
        actor.add(Flatten(input_shape=(1,) + self.env.observation_space.shape))
        actor.add(Dense(5))
        actor.add(Activation('relu'))
        actor.add(Dense(8))
        actor.add(Activation('relu'))
        actor.add(Dense(5))
        actor.add(Activation('relu'))
        # actor.add(Dense(16))
        # actor.add(Activation('relu'))
        actor.add(Dense(nb_actions))
        actor.add(Activation('softmax'))
        # print(actor.summary())

        action_input = Input(shape=(nb_actions,), name='action_input')
        observation_input = Input(shape=(1,) + Env.observation_space.shape, name='observation_input')
        flattened_observation = Flatten()(observation_input)
        x = concatenate([action_input, flattened_observation], name = 'concatenate')
        x = Dense(5)(x)
        x = Activation('relu')(x)
        x = Dense(8)(x)
        x = Activation('relu')(x)
        x = Dense(5)(x)
        x = Activation('relu')(x)
        # x = Dense(32)(x)
        # x = Activation('relu')(x)
        x = Dense(1)(x)
        x = Activation('linear')(x)
        critic = Model(inputs=[action_input, observation_input], outputs=x)
        # print(critic.summary())

        memory = SequentialMemory(limit=100000, window_length=1)
        # random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3)
        random_process = None
        self.agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                          memory=memory, nb_steps_warmup_critic=32, nb_steps_warmup_actor=32,
                          random_process=random_process, gamma=0, target_model_update=0.001)
        self.agent.processor = ShowActionProcessor(self.agent, self.env)
        self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

    def fit(self):
        history = self.agent.fit(self.env, action_repetition=1, nb_steps=20000, visualize=False, verbose=1, nb_max_episode_steps=10)
        return history

    def save_weights(self):
        self.agent.save_weights('./store/ddpg_{}_weights2.h5f'.format("porfolio"), overwrite=True)

    def test(self):
        history = self.agent.test(self.env, nb_episodes=1, visualize=False, nb_max_episode_steps=10)
        return history

    def load_weights(self):
        self.agent.load_weights('./store/ddpg_{}_weights2.h5f'.format("porfolio"))
def load_network(env):
    ENV_NAME = 'Carom-v0'
    gym.undo_logger_setup()

    # Get the environment and extract the number of actions.
    np.random.seed(323)
    env.seed(323)
    assert len(env.action_space.shape) == 1
    nb_actions = env.action_space.shape[0]

    # Next, we build a very simple model.
    actor = Sequential()
    actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    actor.add(Dense(16))
    actor.add(Activation('relu'))
    actor.add(Dense(16))
    actor.add(Activation('relu'))
    actor.add(Dense(16))
    actor.add(Activation('relu'))
    actor.add(Dense(nb_actions))
    actor.add(Activation('linear'))

    action_input = Input(shape=(nb_actions, ), name='action_input')
    observation_input = Input(shape=(1, ) + env.observation_space.shape,
                              name='observation_input')
    flattened_observation = Flatten()(observation_input)
    x = Concatenate()([action_input, flattened_observation])
    x = Dense(32)(x)
    x = Activation('relu')(x)
    x = Dense(32)(x)
    x = Activation('relu')(x)
    x = Dense(32)(x)
    x = Activation('relu')(x)
    x = Dense(1)(x)
    x = Activation('linear')(x)
    critic = Model(inputs=[action_input, observation_input], outputs=x)

    memory = SequentialMemory(limit=50000, window_length=1)
    agent = DDPGAgent(nb_actions=nb_actions,
                      actor=actor,
                      critic=critic,
                      critic_action_input=action_input,
                      memory=memory,
                      nb_steps_warmup_critic=100,
                      nb_steps_warmup_actor=100,
                      gamma=.99,
                      target_model_update=1e3)
    agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
    agent.load_weights('ddpg_{}_2balls_final_weights_v4.h5f'.format(ENV_NAME))
    return agent
Example #5
0
    def _train(self):
        env = CrazyflieEnvironment(self._cf)
        atexit.register(teardown_env, env, self._cf)

        np.random.seed(123)
        assert len(env.action_space.shape) == 1
        nb_actions = env.action_space.shape[0]

        # Next, we build a very simple model.
        actor = self.actor_model(env, nb_actions)
        action_input, critic = self.critic_model(env, nb_actions)

        memory = SequentialMemory(limit=100000, window_length=1)
        random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                                  theta=.15,
                                                  mu=0.,
                                                  sigma=.3)
        model_name = 'ddpg_{}_weights.h5f'.format('crazyflie')
        agent = DDPGAgent(nb_actions=nb_actions,
                          actor=actor,
                          critic=critic,
                          critic_action_input=action_input,
                          memory=memory,
                          nb_steps_warmup_critic=100,
                          nb_steps_warmup_actor=100,
                          random_process=random_process,
                          gamma=.99,
                          target_model_update=1e-3)
        if os.path.exists(model_name):
            agent.load_weights(model_name)
        agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

        try:
            agent.fit(env, nb_steps=50000, verbose=2)
            agent.test(env, nb_episodes=1)
        finally:
            agent.save_weights(model_name, overwrite=True)
def evaluate_model(model_path=None, interactive=False, seed=12345):
    np.random.seed(seed)

    actor, critic, action_input = define_actor_critic_models(actions=3)
    memory = SequentialMemory(limit=10000, window_length=1)
    random_process = GaussianWhiteNoiseProcess(mu=0,
                                               sigma=0,
                                               sigma_min=0,
                                               n_steps_annealing=1)

    agent = DDPGAgent(nb_actions=3,
                      actor=actor,
                      critic=critic,
                      critic_action_input=action_input,
                      memory=memory,
                      nb_steps_warmup_critic=500,
                      nb_steps_warmup_actor=100,
                      random_process=random_process,
                      gamma=.95,
                      target_model_update=0.0001,
                      batch_size=32)
    agent.compile([RMSprop(lr=.0001), RMSprop(lr=.01)], metrics=['mae'])

    if model_path is not None:
        agent.load_weights(model_path)

    # Train Evaluation
    env = CameraControlEnvCont(dataset_pickle_path='data/dataset.pickle',
                               testing=False,
                               interactive=interactive)
    env.seed(seed)
    res = agent.test(env,
                     nb_episodes=500,
                     nb_max_episode_steps=100,
                     verbose=0,
                     visualize=False)
    train_mean_reward = np.mean(res.history['episode_reward'])
    before_train_position_error = np.mean(
        np.abs(env.init_position_error_pixels))
    before_train_zoom_error = np.mean(np.abs(env.init_zoom_error_pixels))
    after_train_position_error = np.mean(
        np.abs(env.final_position_error_pixels))
    after_train_zoom_error = np.mean(np.abs(env.final_zoom_error_pixels))
    print("Training evaluation: ")
    print("Mean reward: ", train_mean_reward)
    print("Position: ", before_train_position_error, " -> ",
          after_train_position_error)
    print("Zoom: ", before_train_zoom_error, " -> ", after_train_zoom_error)

    # Test Evaluation
    env = CameraControlEnvCont(dataset_pickle_path='data/dataset.pickle',
                               testing=True,
                               interactive=interactive)
    env.seed(seed)
    res = agent.test(env,
                     nb_episodes=500,
                     nb_max_episode_steps=100,
                     verbose=0,
                     visualize=False)
    train_mean_reward = np.mean(res.history['episode_reward'])
    before_train_position_error = np.mean(
        np.abs(env.init_position_error_pixels))
    before_train_zoom_error = np.mean(np.abs(env.init_zoom_error_pixels))
    after_train_position_error = np.mean(
        np.abs(env.final_position_error_pixels))
    after_train_zoom_error = np.mean(np.abs(env.final_zoom_error_pixels))
    print("Testing evaluation: ")
    print("Mean reward: ", train_mean_reward)
    print("Position: ", before_train_position_error, " -> ",
          after_train_position_error)
    print("Zoom: ", before_train_zoom_error, " -> ", after_train_zoom_error)
Example #7
0
## Initialize Replay Buffer ##
memory = SequentialMemory(limit=REPLAY_BUFFER_SIZE, window_length=1)
# window_length : usefull for Atari game (cb d'images d'affilé on veut analysé (vitesse de la balle, etc..))

## Random process (exploration) ##
random_process = OrnsteinUhlenbeckProcess(theta=THETA, mu=MEAN, sigma=SIGMA, size=action_size)

## Paramètres agent DDPG ##
agent = DDPGAgent(nb_actions=action_size, actor=actor, critic=critic, 
    critic_action_input=action_input,
    memory=memory, random_process=random_process, 
    gamma=DISC_FACT, target_model_update=TARGET_MODEL_UPDATE, 
    batch_size= BATCH_SIZE)

agent.compile(optimizer = [opti_critic, opti_actor], metrics= ['mae'])


##### TRAIN #####
if args.train:
    check_overwrite(args.model)
    history = agent.fit(env, nb_steps=N_STEPS_TRAIN, visualize=args.visualize, verbose=VERBOSE, log_interval = LOG_INTERVAL)
    agent.save_weights(FILES_WEIGHTS_NETWORKS, overwrite=True)
    save_plot_reward(history, args.model, params) 


##### TEST #####
if not args.train :
    agent.load_weights(FILES_WEIGHTS_NETWORKS)
    history = agent.test(env, nb_episodes=N_EPISODE_TEST, visualize=args.visualize)
    save_result(history, args.model, params) 
Example #8
0
                  random_process=random_process,
                  gamma=.99,
                  target_model_update=1e-3)
agent.compile(Adam(lr=0.001, clipnorm=1.), metrics=['mae'])

# Okay, now it's time to learn something!
mode = 'test'
if mode == 'train':
    hist = agent.fit(env,
                     nb_steps=1000000,
                     visualize=False,
                     verbose=2,
                     nb_max_episode_steps=1000)
    filename = '600kit_rn4_maior2_mem20k_target01_theta3_batch32_adam2'
    # we save the history of learning, it can further be used to plot reward evolution
    with open('_experiments/history_ddpg__redetorcs' + filename + '.pickle',
              'wb') as handle:
        pickle.dump(hist.history, handle, protocol=pickle.HIGHEST_PROTOCOL)
    #After training is done, we save the final weights.
    agent.save_weights('h5f_files/ddpg_{}_weights.h5f'.format(
        '600kit_rn4_maior2_mem20k_target01_theta3_batch32_adam2_action_lim_1'),
                       overwrite=True)

    # Finally, evaluate our algorithm for 5 episodes.
    agent.test(env, nb_episodes=10, visualize=True, nb_max_episode_steps=1000)
elif mode == 'test':
    env.set_test_performace()  # Define the initialization as performance test
    env.set_save_experice()  # Save the test to plot the results after
    agent.load_weights('h5f_files/ddpg_{}_weights.h5f'.format(
        '600kit_rn4_maior2_mem20k_target01_theta3_batch32_adam2'))
    agent.test(env, nb_episodes=10, visualize=Trues, nb_max_episode_steps=1000)
Example #9
0
#                            gamma=.99, target_model_update=0.1)
#agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
agent.compile([RMSprop(lr=.001), RMSprop(lr=.001)], metrics=['mae'])

prefix = args.output if args.output else "%s_s%f_t%f" % (args.env ,float(args.sigma), float(args.theta))

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
if args.train:
    agent.fit(env, nb_steps=nallsteps, visualize=True, verbose=1, nb_max_episode_steps=env.timestep_limit, log_interval=10000, prefix=prefix)
    # After training is done, we save the final weights.
    agent.save_weights("%s.h5f" % args.output, overwrite=True)

if not args.train:
    agent.load_weights("%s.h5f" % args.output)
    # Finally, evaluate our algorithm for 5 episodes.
    if args.env != "Arm":
        agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=500)
    else:
        for i in range(10000):
            if i % 300 == 0:
                env.new_target()
                print("\n\nTarget shoulder = %f, elbow = %f" % (env.shoulder,env.elbow)) 

            obs = env.get_observation()
            print "Actual shoulder = %f, elbow = %f\r" % (obs[2],obs[3]),
            env.step(agent.forward(obs))


Example #10
0
                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                  random_process=random_process, gamma=.99, target_model_update=1e-3,
                  delta_range=(-100., 100.))
# agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model,
#                            memory=memory, nb_steps_warmup=1000, random_process=random_process,
#                            gamma=.99, target_model_update=0.1)
#agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
agent.compile([RMSprop(lr=.001), RMSprop(lr=.001)], metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
if args.train:
    agent.fit(env, nb_steps=nallsteps, visualize=True, verbose=1, nb_max_episode_steps=env.timestep_limit, log_interval=10000)
    # After training is done, we save the final weights.
    agent.save_weights(args.output, overwrite=True)

if not args.train:
    agent.load_weights(args.output)
    # Finally, evaluate our algorithm for 5 episodes.

    if args.env != "Arm":
        agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=500)
    else:
        for i in range(10000):
            if i % 300 == 0:
                env.new_target()
                print("Target shoulder = %f, elbow = %f" % (env.shoulder,env.elbow)) 
            
            env.step(agent.forward(env.get_observation()))
Example #11
0
            x = Dense(32)(x)
            x = Activation('relu')(x)
            x = Dense(32)(x)
            x = Activation('relu')(x)
            x = Dense(1)(x)
            x = Activation('linear')(x)
            critic = Model(inputs=[action_input, observation_input], outputs=x)
            # print(critic.summary())

            memory = SequentialMemory(limit=1000000, window_length=1)
            random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.1)
            agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                              memory=memory, nb_steps_warmup_critic=50, nb_steps_warmup_actor=50,
                              random_process=random_process, gamma=.99, target_model_update=1e-3)
            agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
            agent.load_weights('/home/bdb3m/swmm_rl/agent_weights/ddpg_swmm_weights.h5f')  # added to continue training
            agent.fit(env, nb_steps=train_steps, verbose=0)
            agent.save_weights('/home/bdb3m/swmm_rl/agent_weights/ddpg_swmm_weights.h5f', overwrite=True)
            env.close()

        else:
            agent.load_weights('/home/bdb3m/swmm_rl/agent_weights/ddpg_swmm_weights.h5f')
            agent.fit(env, nb_steps=train_steps, verbose=0)
            agent.save_weights('/home/bdb3m/swmm_rl/agent_weights/ddpg_swmm_weights.h5f', overwrite=True)
            env.close()

        if file_num % 10 == 0:
            print("finished training on ", file_num, " files")
        file_num += 1

# loop through testing envs
Example #12
0
                                          mu=0.,
                                          sigma=.3)
agent = DDPGAgent(nb_actions=nb_actions,
                  actor=actor,
                  critic=critic,
                  critic_action_input=action_input,
                  memory=memory,
                  nb_steps_warmup_critic=100,
                  nb_steps_warmup_actor=100,
                  random_process=random_process,
                  gamma=.99,
                  target_model_update=1e-3)
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

try:
    agent.load_weights('ddpg_{}_nomad_v3_weights.h5f'.format(ENV_NAME))
except (OSError, IOError):
    logger.warning("File not found")

n = 0
while True:
    n += 1
    logger.info('Iteration #{}'.format(n))

    #train
    train_history = agent.fit(env,
                              nb_steps=nb_stepis,
                              visualize=False,
                              verbose=1,
                              nb_max_episode_steps=nb_stepis)
Example #13
0
def main_function(args, data):
    #### INITIALISATION DES CONSTANTES #####
    ## Model ##
    SIZE_HIDDEN_LAYER_ACTOR = data['SIZE_HIDDEN_LAYER_ACTOR'][0]
    LR_ACTOR = data['LR_ACTOR'][0]
    SIZE_HIDDEN_LAYER_CRITIC = data['SIZE_HIDDEN_LAYER_CRITIC'][0]
    LR_CRITIC = data['LR_CRITIC'][0]
    DISC_FACT = data['DISC_FACT'][0]
    TARGET_MODEL_UPDATE = data['TARGET_MODEL_UPDATE'][0]
    BATCH_SIZE = data['BATCH_SIZE'][0]
    REPLAY_BUFFER_SIZE = data['REPLAY_BUFFER_SIZE'][0]
    ## Exploration ##
    THETA = data['THETA'][0]
    SIGMA = data['SIGMA'][0]
    SIGMA_MIN = data['SIGMA_MIN'][0]
    N_STEPS_ANNEALING = data['N_STEPS_ANNEALING'][0]

    ## Acceleration ##
    ACTION_REPETITION = data['ACTION_REPETITION'][0]
    INTEGRATOR_ACCURACY = data['INTEGRATOR_ACCURACY'][0]

    # # Simulation ##
    N_STEPS_TRAIN = int(args.step)
    N_EPISODE_TEST = 100
    if args.visualize:
        N_EPISODE_TEST = 3
    VERBOSE = 1
    # 0: pas de descriptif
    # 1: descriptif toutes les LOG_INTERVAL steps
    # 2: descriptif à chaque épisode
    LOG_INTERVAL = 500

    # Save weights ##
    if not os.path.exists('weights'):
        os.mkdir('weights')
        print("Directory ", 'weights', " Created ")
    FILES_WEIGHTS_NETWORKS = './weights/' + args.model + '.h5f'

    # #### CHARGEMENT DE L'ENVIRONNEMENT #####
    if args.prosthetic:
        env = ProsContinueRewardWrapper(
            ProstheticsEnv(visualize=args.visualize,
                           integrator_accuracy=INTEGRATOR_ACCURACY))
    if not args.prosthetic:
        env = CustomDoneOsimWrapper(
            CustomRewardWrapper(
                RelativeMassCenterObservationWrapper(
                    NoObstacleObservationWrapper(
                        L2RunEnv(visualize=args.visualize,
                                 integrator_accuracy=0.005)))))

    env.reset()
    # Examine the action space ##
    action_size = env.action_space.shape[0]
    #action_size = int(env.action_space.shape[0]/2)    pour la symmétrie
    print('Size of each action:', action_size)

    # Examine the state space ##
    state_size = env.observation_space.shape[0]
    print('Size of state:', state_size)

    # #### ACTOR / CRITIC #####

    # Actor (mu) ##
    if args.prosthetic:
        input_shape = (1, env.observation_space.shape[0])
    if not args.prosthetic:
        input_shape = (1, env.observation_space.shape[0])

    observation_input = Input(shape=input_shape, name='observation_input')

    x = Flatten()(observation_input)
    x = Dense(SIZE_HIDDEN_LAYER_ACTOR)(x)
    x = Activation('relu')(x)
    x = Dense(SIZE_HIDDEN_LAYER_ACTOR)(x)
    x = Activation('relu')(x)
    x = Dense(SIZE_HIDDEN_LAYER_ACTOR)(x)
    x = Activation('relu')(x)
    x = Dense(action_size)(x)
    x = Activation('sigmoid')(x)

    actor = Model(inputs=observation_input, outputs=x)
    opti_actor = Adam(lr=LR_ACTOR)

    # Critic (Q) ##
    action_input = Input(shape=(action_size, ), name='action_input')

    x = Flatten()(observation_input)
    x = concatenate([action_input, x])
    x = Dense(SIZE_HIDDEN_LAYER_CRITIC)(x)
    x = Activation('relu')(x)
    x = Dense(SIZE_HIDDEN_LAYER_CRITIC)(x)
    x = Activation('relu')(x)
    x = Dense(SIZE_HIDDEN_LAYER_CRITIC)(x)
    x = Activation('relu')(x)
    x = Dense(1)(x)
    x = Activation('linear')(x)

    critic = Model(inputs=[action_input, observation_input], outputs=x)

    opti_critic = Adam(lr=LR_CRITIC)

    # #### SET UP THE AGENT #####
    # Initialize Replay Buffer ##
    memory = SequentialMemory(limit=REPLAY_BUFFER_SIZE, window_length=1)

    # Random process (exploration) ##
    random_process = OrnsteinUhlenbeckProcess(
        theta=THETA,
        mu=0,
        sigma=SIGMA,
        sigma_min=SIGMA_MIN,
        size=action_size,
        n_steps_annealing=N_STEPS_ANNEALING)

    # random_process_l = OrnsteinUhlenbeckProcess(theta=THETA, mu=0, sigma=SIGMA,sigma_min= SIGMA_MIN,
    #                                           size=action_size, n_steps_annealing=N_STEPS_ANNEALING)
    # random_process_r = OrnsteinUhlenbeckProcess(theta=THETA, mu=0, sigma=SIGMA,sigma_min= SIGMA_MIN,
    #                                           size=action_size, n_steps_annealing=N_STEPS_ANNEALING)

    # Paramètres agent DDPG ##
    # agent = SymmetricDDPGAgent(nb_actions=action_size, actor=actor, critic=critic,
    #                            critic_action_input=action_input,
    #                            memory=memory, random_process_l=random_process_l, random_process_r=random_process_r,
    #                            gamma=DISC_FACT, target_model_update=TARGET_MODEL_UPDATE,
    #                            batch_size=BATCH_SIZE)

    agent = DDPGAgent(nb_actions=action_size,
                      actor=actor,
                      critic=critic,
                      critic_action_input=action_input,
                      memory=memory,
                      random_process=random_process,
                      gamma=DISC_FACT,
                      target_model_update=TARGET_MODEL_UPDATE,
                      batch_size=BATCH_SIZE)

    agent.compile(optimizer=[opti_critic, opti_actor])

    # #### TRAIN #####
    logdir = "keras_logs/" + datetime.now().strftime("%Y-%m-%d_%H.%M.%S")
    robustensorboard = RobustTensorBoard(log_dir=logdir, hyperparams=data)
    saveBest = SaveBestEpisode()
    if args.train:
        if args.resume:
            agent.load_weights(FILES_WEIGHTS_NETWORKS)
        else:
            check_overwrite(args.model)

        agent.fit(env,
                  nb_steps=N_STEPS_TRAIN,
                  visualize=args.visualize,
                  verbose=VERBOSE,
                  log_interval=LOG_INTERVAL,
                  callbacks=[robustensorboard, saveBest],
                  action_repetition=ACTION_REPETITION)

        agent.save_weights(FILES_WEIGHTS_NETWORKS, overwrite=True)

    #### TEST #####
    if not args.train:
        agent.load_weights(FILES_WEIGHTS_NETWORKS)
        agent.test(env, nb_episodes=N_EPISODE_TEST, visualize=args.visualize)
env.seed(123)
assert len(env.action_space.shape) == 1
nb_actions = env.action_space.shape[0]

n = DroneNetwork(nb_actions=nb_actions,
                 observation_shape=env.observation_space.shape)

# Next, we build a very simple model.
actor = n.create_actor()
critic = n.create_critic()
action_input = n.get_action_input()

actor.summary()
critic.summary()
print(action_input)

memory = SequentialMemory(limit=100000, window_length=1)

agent = DDPGAgent(nb_actions=nb_actions,
                  actor=actor,
                  critic=critic,
                  critic_action_input=action_input,
                  memory=memory)

agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

agent.load_weights('ddpg_{}_weights.h5f'.format('drone'))
agent.test(env, nb_episodes=100000, visualize=True)
#agent.test(env, nb_episodes=20, visualize=True, nb_max_episode_steps=50)
env.close()
                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                  random_process=random_process, gamma=.99, target_model_update=1e-3,
                  delta_range=(-100., 100.))
# agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model,
#                            memory=memory, nb_steps_warmup=1000, random_process=random_process,
#                            gamma=.99, target_model_update=0.1)
#agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
agent.compile([RMSprop(lr=.001), RMSprop(lr=.001)], metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
if args.train:
    agent.fit(env, nb_steps=nallsteps, visualize=True, verbose=1, nb_max_episode_steps=env.timestep_limit, log_interval=10000)
    # After training is done, we save the final weights.
    agent.save_weights(args.output, overwrite=True)

if not args.train:
    agent.load_weights(args.output)
    # Finally, evaluate our algorithm for 5 episodes.

    if args.env != "Arm":
        agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=500)
    else:
        for i in range(10000):
            if i % 300 == 0:
                env.new_target()
                print("Target shoulder = %f, elbow = %f" % (env.shoulder,env.elbow)) 
            
            env.step(agent.forward(env.get_observation()))
    x = (Dense(LAYER_SIZE))(x)
    x = Activation('relu')(x)

# Output Layer
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(input=[action_input, observation_input], output=x)
print(critic.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=2*NUM_STEPS, window_length=1)
# random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions, dt = env.tau, theta=0.6, mu=0.0, sigma=0.5, sigma_min=0.15, n_steps_annealing=NUM_STEPS)

agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                  random_process=random_process, gamma=.999, target_model_update=1e-3,
                  delta_clip=1.0)

agent.compile(Adam(lr=.001, clipnorm=1.0), metrics=['mae'])



# Load the model weights - this method will automatically load the weights for
# both the actor and critic
agent.load_weights(FILENAME)


# Finally, evaluate our algorithm for 5 episodes.
agent.test(env, nb_episodes=5, visualize=True,action_repetition=5) #nb_max_episode_steps=500, 
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)
#print(critic.summary())

memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3)

agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                  random_process=random_process, gamma=.99, target_model_update=1e-3)
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

try:
    agent.load_weights(os.path.dirname(__file__) + '/weights/trained_weight/ddpg_{}_weights.h5f'.format(ENV_NAME))
    print("find weights-file")
except:
    print("not found weights-file")

rospy.init_node('pub_drive', anonymous=True)
rospy.Subscriber("/observe", Float32MultiArray, callback_observe)

pub = rospy.Publisher('/cmd_vel', Twist, queue_size=1)
r = rospy.Rate(10) # 5hz

fig, ax = plt.subplots(1, 1)
ax.set_ylim(math.radians(-50), math.radians(50))
x = []
y = []
step_count = 0
Example #18
0
                                          mu=0.,
                                          sigma=.1)
agent = DDPGAgent(nb_actions=nb_actions,
                  actor=actor,
                  critic=critic,
                  critic_action_input=action_input,
                  memory=memory,
                  nb_steps_warmup_critic=50,
                  nb_steps_warmup_actor=50,
                  random_process=random_process,
                  gamma=.99,
                  target_model_update=1e-3)
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
train_steps = 197000
agent.load_weights(
    'swmm_rl_multi_inp_forecast/agent_weights/ddpg_swmm_weights_100000_depth_4.61.h5f'
    .format(depth)
)  # these wgts are from training XXX, but are used in the forecast models
# agent.load_weights('ddpg_swmm_weights_{}.h5f'.format(train_steps))

agent.fit(env, nb_steps=train_steps, verbose=1)
agent.save_weights(
    'swmm_rl_multi_inp_forecast/agent_weights/ddpg_swmm_weights2_{}_depth_{}.h5f'
    .format(train_steps, depth),
    overwrite=True)

print("training finished at: ", datetime.now())

# # get agent weights and names
# actor_weights = agent.actor.get_weights()
# critic_weights = agent.critic.get_weights()
# actor_names = [weight.name for layer in agent.actor.layers for weight in layer.weights]
Example #19
0
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)
print(critic.summary())

# Set up the agent for training
memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.2, size=env.noutput)
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                  random_process=random_process, gamma=.99, target_model_update=1e-3,
                  delta_clip=1.)
# agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model,
#                            memory=memory, nb_steps_warmup=1000, random_process=random_process,
#                            gamma=.99, target_model_update=0.1)
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
if args.train:
    agent.fit(env, nb_steps=nallsteps, visualize=False, verbose=1, nb_max_episode_steps=200, log_interval=10000)
    # After training is done, we save the final weights.
    agent.save_weights(args.model, overwrite=True)

if not args.train:
    agent.load_weights(args.model)
    # Finally, evaluate our algorithm for 1 episode.
    agent.test(env, nb_episodes=5, visualize=False, nb_max_episode_steps=1000)
Example #20
0
def train_with_params(sigma_v = 0., sigma_o = 0.,test=False):

    ENV_NAME = 'PongSolo'
    conf_name = '{}_sv_{}_so_{}'.format(ENV_NAME,sigma_v,sigma_o) # sv, so = sigma_v et sigma_orientation

    # Get the environment and extract the number of actions.
    env = EnvPongSolo(sigma_v = sigma_v, sigma_o = sigma_v)
    np.random.seed(123)

    #assert len(env.action_space.shape) == 1
    nb_actions = 1
    leaky_alpha = 0.2

    # Next, we build a very simple model.
    actor = Sequential()
    actor.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    actor.add(Dense(100))
    actor.add(LeakyReLU(leaky_alpha))
    actor.add(Dense(nb_actions))
    actor.add(Activation('linear'))
    print(actor.summary())

    action_input = Input(shape=(nb_actions,), name='action_input')
    observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input')
    flattened_observation = Flatten()(observation_input)
    x = merge([action_input, flattened_observation], mode='concat')
    x = Dense(200)(x)
    x = LeakyReLU(leaky_alpha)(x)
    x = Dense(1)(x)
    x = Activation('linear')(x)
    critic = Model(input=[action_input, observation_input], output=x)
    print(critic.summary())

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=100000, window_length=1)
    n_steps = 5000000
    random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=1., mu=0., sigma=.3, sigma_min=0.01, n_steps_annealing=n_steps)
    agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                      memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                      random_process=random_process, gamma=.99, target_model_update=1e-3)
    agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.

    directory_weights = "weights/ddpg/{}".format(conf_name)

    if not os.path.exists(directory_weights):
        os.makedirs(directory_weights)

    if test == False:
        perfCheckPoint = ModelPerformanceCheckpoint('{}/checkpoint_avg{}_steps{}'.format(directory_weights,'{}','{}'), 800)
        agent.fit(env, nb_steps=n_steps, visualize=False, verbose=2, nb_max_episode_steps=200,callbacks=[perfCheckPoint])

        # After training is done, we save the final weights.
        agent.save_weights('{}/final.h5f'.format(directory_weights), overwrite=True)

        # Finally, evaluate our algorithm for 5 episodes.
        agent.test(env, nb_episodes=100, visualize=False, nb_max_episode_steps=200)
    else:
        agent.load_weights('{}/final.h5f'.format(directory_weights))
        agent.test(env, nb_episodes=1000, visualize=False, nb_max_episode_steps=200)
                  random_process=random_process,
                  gamma=GAMMA,
                  target_model_update=1e-3)
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mse'])

callbacks = build_callbacks(ENV_NAME)
test_callbacks = build_test_callbacks(ENV_NAME)

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
#agent.fit(env, nb_steps=500000, visualize=False, callbacks=callbacks, verbose=1, gamma=GAMMA, nb_max_episode_steps=30)

# After training is done, we save the final weights.
#agent.save_weights('results/InvertedPendulum/exp_6/ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
agent.load_weights(
    'results/InvertedPendulum/exp_6/ddpg_{}_weights.h5f'.format(ENV_NAME))

# Finally, evaluate our algorithm for 5 episodes.
history, state_history_nominal, episode_reward_nominal = agent.test(env, nb_episodes=1, visualize=True, action_repetition=1, callbacks=test_callbacks, nb_max_episode_steps=30, \
                                                         initial_state=[0, np.pi, 0, 0], std_dev_noise=0, gamma=GAMMA)
u_max = 12
print(episode_reward_nominal, state_history_nominal)
'''
f = open("results/InvertedPendulum/exp_3/data.txt", "a")

for i in frange(0.0, 1.0, 0.05):
    episode_reward_n = 0
    Var_n = 0
    terminal_mse = 0
    Var_terminal_mse = 0
    for j in range(n_samples):
Example #22
0
agent = DDPGAgent(nb_actions=nb_actions,
                  actor=actor,
                  critic=critic,
                  critic_action_input=action_input,
                  memory=memory,
                  nb_steps_warmup_critic=10,
                  nb_steps_warmup_actor=10,
                  random_process=random_process,
                  gamma=.99,
                  target_model_update=1e-3)
agent.compile(optimizer=Adam(
    lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False))

if __name__ == '__main__':
    # Load
    agent.load_weights('OsmoEnv.h5f')

    # Train
    # agent.fit(env, nb_steps=50000, verbose=1, nb_max_episode_steps=200)
    #
    # # Weights
    # agent.save_weights('OsmoEnv.h5f',overwrite=True)
    #
    # # Load
    # agent.load_weights('OsmoEnv.h5f')

    # Test
    # agent.test(env, visualize=False, nb_episodes=50, nb_max_episode_steps=200)

    #Play
    for _ in range(10):
class KerasDDPGAgent(object):
    '''
    classdocs
    '''
    def __init__(self, opts):
        self.metadata = {'discrete_actions': False}

        self.opts = opts

    def configure(self, observation_space_shape, nb_actions):
        # Next, we build a simple model.
        # actor network
        actor = Sequential()
        actor.add(Flatten(input_shape=(1, ) + observation_space_shape))
        actor.add(Dense(16))
        actor.add(Activation('relu'))
        actor.add(Dense(16))
        actor.add(Activation('relu'))
        actor.add(Dense(16))
        actor.add(Activation('relu'))
        actor.add(Dense(nb_actions))
        actor.add(Activation('linear'))
        print(actor.summary())

        # critic network
        action_input = Input(shape=(nb_actions, ), name='action_input')
        observation_input = Input(shape=(1, ) + observation_space_shape,
                                  name='observation_input')
        flattened_observation = Flatten()(observation_input)
        x = concatenate([action_input, flattened_observation])
        x = Dense(32)(x)
        x = Activation('relu')(x)
        x = Dense(32)(x)
        x = Activation('relu')(x)
        x = Dense(32)(x)
        x = Activation('relu')(x)
        x = Dense(1)(x)
        x = Activation('linear')(x)
        critic = Model(input=[action_input, observation_input], output=x)
        print(critic.summary())

        # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
        # even the metrics!
        memory = SequentialMemory(limit=100000, window_length=1)
        random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                                  theta=.15,
                                                  mu=0.,
                                                  sigma=.3)
        self.agent = DDPGAgent(nb_actions=nb_actions,
                               actor=actor,
                               critic=critic,
                               critic_action_input=action_input,
                               memory=memory,
                               nb_steps_warmup_critic=100,
                               nb_steps_warmup_actor=100,
                               random_process=random_process,
                               gamma=.99,
                               target_model_update=1e-3)
        self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

    def train(self, env, nb_steps, visualize, verbosity):
        # Okay, now it's time to learn something! We visualize the training here for show, but this
        # slows down training quite a lot. You can always safely abort the training prematurely using
        # Ctrl + C.
        self.agent.fit(env,
                       nb_steps=nb_steps,
                       visualize=visualize,
                       verbose=verbosity,
                       nb_max_episode_steps=200)

    def test(self, env, nb_episodes, visualize):
        # Finally, evaluate our algorithm for 5 episodes.
        self.agent.test(env,
                        nb_episodes=nb_episodes,
                        visualize=visualize,
                        nb_max_episode_steps=200)

    def load_weights(self, load_file):
        self.agent.load_weights(load_file)

    def save_weights(self, save_file, overwrite):
        self.agent.save_weights(save_file, overwrite=True)
Example #24
0
agent.test(env, nb_episodes=2, visualize=True, nb_max_episode_steps=1000)

mode = 'train'
if mode == 'train':
    filename = 'test'
    # we save the history of learning, it can further be used to plot reward evolution
    # with open('_experiments/history_ddpg__redetorcs'+filename+'.pickle', 'wb') as handle:
    #  pickle.dump(hist.history, handle, protocol=pickle.HIGHEST_PROTOCOL)
    #After training is done, we save the final weights.
    # agent.save_weights('h5f_files/ddpg_{}_weights.h5f'.format('test'), overwrite=True)

    # Finally, evaluate our algorithm for 5 episodes.
elif mode == 'test':
    # env.set_test_performace() # Define the initialization as performance test
    # env.set_save_experice()   # Save the test to plot the results after
    agent.load_weights('h5f_files/ddpg_{}_weights.h5f'.format('test'))
    agent.test(env, nb_episodes=10, visualize=True, nb_max_episode_steps=1000)

# env.reset()
# # env.render()
# done = False
# print(done)
# count = 0
# while done == False:
#     if count < 15:
#         observation, reward, done, info = env.step([0, 0, 0, 0])
#     elif count < 30:
#         observation, reward, done, info = env.step([0, 0.5, 0, 0])
#     elif count < 60:
#         observation, reward, done, info = env.step([0, 0, 0, 0])
#     else:
                              nb_steps_warmup_critic=50,
                              nb_steps_warmup_actor=50,
                              random_process=random_process,
                              gamma=.99,
                              target_model_update=1e-3)
            agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

            agent.fit(env, nb_steps=train_steps, verbose=0)
            agent.save_weights(
                'swmm_rl_multi_inp_forecast/agent_weights/ddpg_swmm_weights.h5f',
                overwrite=True)
            env.close()

        else:
            agent.load_weights(
                'swmm_rl_multi_inp_forecast/agent_weights/ddpg_swmm_weights.h5f'
            )
            agent.fit(env, nb_steps=train_steps, verbose=0)
            agent.save_weights(
                'swmm_rl_multi_inp_forecast/agent_weights/ddpg_swmm_weights.h5f',
                overwrite=True)
            env.close()

        if file_num % 100 == 0:
            print("finished training on ", file_num, " files")
        file_num += 1

# loop through testing envs
for file in os.scandir(os.path.join(data_dir, "syn_inp_test")):
    if file.name.endswith('.inp'):
        print('testing ', file.name)
Example #26
0
class DDPG:
    """Deep Deterministic Policy Gradient Class

        This is an implementation of DDPG for continuous control tasks made using the high level keras-rl library.

        Args:
            env_name (str): Name of the gym environment
            weights_dir (str): Dir for storing model weights (for both actors and critic as separate files)
            actor_layers (list(int)): A list of int representing neurons in each subsequent the hidden layer in actor
            critic_layers (list(int)): A list of int representing neurons in each subsequent the hidden layer in actor
            n_episodes (int): Maximum training eprisodes
            visualize (bool): Whether a popup window with the environment view is required
    """
    def __init__(self,
                 env_name='MountainCarContinuous-v0',
                 weights_dir="model_weights",
                 actor_layers=[64, 64, 32],
                 critic_layers=[128, 128, 64],
                 n_episodes=200,
                 visualize=True):
        self.env_name = env_name
        self.env = gym.make(env_name)
        np.random.seed(123)
        self.env.seed(123)
        self.actor_layers = actor_layers
        self.critic_layers = critic_layers
        self.n_episodes = n_episodes
        self.visualize = visualize
        self.n_actions = self.env.action_space.shape[0]
        self.n_states = self.env.observation_space.shape
        self.weights_file = os.path.join(
            weights_dir, 'ddpg_{}_weights.h5f'.format(self.env_name))
        self.actor = None
        self.critic = None
        self.agent = None
        self.action_input = None

    def _make_actor(self):
        """Internal helper function to create an actor custom model
        """
        self.actor = Sequential()
        self.actor.add(Flatten(input_shape=(1, ) + self.n_states))
        for size in self.actor_layers:
            self.actor.add(Dense(size, activation='relu'))
        self.actor.add(Dense(self.n_actions, activation='linear'))
        self.actor.summary()

    def _make_critic(self):
        """Internal helper function to create an actor custom model
        """
        action_input = Input(shape=(self.n_actions, ), name='action_input')
        observation_input = Input(shape=(1, ) + self.n_states,
                                  name='observation_input')
        flattened_observation = Flatten()(observation_input)
        input_layer = Concatenate()([action_input, flattened_observation])
        hidden_layers = Dense(self.critic_layers[0],
                              activation='relu')(input_layer)
        for size in self.critic_layers[1:]:
            hidden_layers = Dense(size, activation='relu')(hidden_layers)
        output_layer = Dense(1, activation='linear')(hidden_layers)
        self.critic = Model(inputs=[action_input, observation_input],
                            outputs=output_layer)
        self.critic.summary()
        self.action_input = action_input

    def _make_agent(self):
        """Internal helper function to create an actor-critic custom agent model
        """
        if self.actor is None:
            self._make_actor()
        if self.critic is None:
            self._make_critic()
        memory = SequentialMemory(limit=100000, window_length=1)
        random_process = OrnsteinUhlenbeckProcess(size=self.n_actions,
                                                  theta=.15,
                                                  mu=0.,
                                                  sigma=.3)
        self.agent = DDPGAgent(nb_actions=self.n_actions,
                               actor=self.actor,
                               critic=self.critic,
                               critic_action_input=self.action_input,
                               memory=memory,
                               nb_steps_warmup_critic=100,
                               nb_steps_warmup_actor=100,
                               random_process=random_process,
                               gamma=.99,
                               target_model_update=1e-3)
        self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

    def _load_or_make_agent(self):
        """Internal helper function to load an agent model, creates a new if no model weights exists
        """
        if self.agent is None:
            self._make_agent()
        if os.path.exists(self.weights_file):
            logger.info(
                "Found existing weights for the model for this environment. Loading..."
            )
            self.agent.load_weights(self.weights_file)

    def train(self):
        """Train the DDPG agent
        """
        self._load_or_make_agent()
        self.agent.fit(self.env,
                       nb_steps=50000,
                       visualize=self.visualize,
                       verbose=1,
                       nb_max_episode_steps=self.n_episodes)
        self.agent.save_weights(self.weights_file, overwrite=True)

    def test(self, nb_episodes=5):
        """Test the DDPG agent
        """
        logger.info(
            "Testing the agents with {} episodes...".format(nb_episodes))
        self.agent.test(self.env,
                        nb_episodes=nb_episodes,
                        visualize=self.visualize,
                        nb_max_episode_steps=200)
Example #27
0
class Agent:
    def __init__(self, env):
        self.nb_actions = env.action_space.shape[0]
        self.nb_states = env.observation_space.shape[0]
        self.env = env

        self.actor = self.build_actor(env)
        self.actor.compile('Adam', 'mse')
        self.critic, action_input = self.build_critic(env)
        self.loss = self.build_loss()
        self.processor = WhiteningNormalizerProcessor()

        self.memory = SequentialMemory(limit=5000000, window_length=1)
        self.random_process = OrnsteinUhlenbeckProcess(size=self.nb_actions,
                                                       theta=0.75,
                                                       mu=0.5,
                                                       sigma=0.25)
        self.agent = DDPGAgent(nb_actions=self.nb_actions,
                               actor=self.actor,
                               critic=self.critic,
                               critic_action_input=action_input,
                               memory=self.memory,
                               nb_steps_warmup_critic=100,
                               nb_steps_warmup_actor=100,
                               random_process=self.random_process,
                               gamma=.99,
                               target_model_update=1e-3,
                               processor=self.processor)
        self.agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=self.loss)
        self.sym_actor = self.build_sym_actor()
        self.sym_actor.compile(optimizer='Adam', loss='mse')

    def build_loss(self):
        return ['mse']

    def build_actor(self, env):
        actor = Sequential()
        actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
        actor.add(Dense(64, activation='tanh'))
        actor.add(GaussianNoise(0.05))
        actor.add(Dense(64, activation='tanh'))
        actor.add(GaussianNoise(0.05))
        actor.add(Dense(self.nb_actions, activation='hard_sigmoid'))
        actor.summary()

        inD = Input(shape=(1, ) + env.observation_space.shape)
        out = actor(inD)

        return Model(inD, out)

    def build_critic(self, env):
        action_input = Input(shape=(self.nb_actions, ), name='action_input')
        observation_input = Input(shape=(1, ) + env.observation_space.shape,
                                  name='observation_input')
        flattened_observation = Flatten()(observation_input)
        x = Dense(64, activation='relu')(flattened_observation)
        x = Concatenate()([x, action_input])
        x = Dense(32, activation='relu')(x)
        x = Dense(1)(x)

        critic = Model(inputs=[action_input, observation_input], outputs=x)
        critic.summary()

        return critic, action_input

    def build_sym_actor(self):
        stateSwap = []
        actionSwap = []
        state_desc = self.env.get_state_desc()
        for x in state_desc.keys():
            keys = list(state_desc[x].keys())
            for (k, key) in enumerate(keys):
                if '_r' in key:
                    i = keys.index(key.replace('_r', '_l'))
                    if i != -1:
                        stateSwap += [(k, i), (i, k)]
        muscle_list = []
        for i in range(self.env.osim_model.muscleSet.getSize()):
            muscle_list.append(self.env.osim_model.muscleSet.get(i).getName())
        for (k, key) in enumerate(muscle_list):
            if '_r' in key:
                i = muscle_list.index(key.replace('_r', '_l'))
                if i != -1:
                    actionSwap += [(k, i), (i, k)]

        stateSwapMat = np.zeros((self.nb_states, self.nb_states))
        actionSwapMat = np.zeros((self.nb_actions, self.nb_actions))
        stateSwapMat[0, 0]
        for (i, j) in stateSwap:
            stateSwapMat[i, j] = 1
        for (i, j) in actionSwap:
            actionSwapMat[i, j] = 1

        def ssT(shape, dtype=None):
            if shape != stateSwapMat.shape:
                raise Exception("State Swap Tensor Shape Error")
            return K.variable(stateSwapMat, dtype=dtype)

        def asT(shape, dtype=None):
            if shape != actionSwapMat.shape:
                raise Exception("Action Swap Tensor Shape Error")
            return K.variable(actionSwapMat, dtype=dtype)

        model1 = Sequential()
        model1.add(
            Dense(self.nb_states,
                  input_shape=(1, ) + self.env.observation_space.shape,
                  trainable=False,
                  kernel_initializer=ssT,
                  bias_initializer='zeros'))
        inD = Input(shape=(1, ) + self.env.observation_space.shape)
        symState = model1(inD)
        symPol = self.actor(symState)
        model2 = Sequential()
        model2.add(
            Dense(self.nb_actions,
                  input_shape=(1, self.nb_actions),
                  trainable=False,
                  kernel_initializer=asT,
                  bias_initializer='zeros'))
        out = model2(symPol)

        return Model(inD, out)

    def fit(self, **kwargs):
        if 'nb_max_episode_steps' in kwargs.keys():
            self.env.spec.timestep_limit = kwargs['nb_max_episode_steps']
        else:
            self.env.spec.timestep_limit = self.env.time_limit
        out = self.agent.fit(self.env, **kwargs)
        print("\n\ndo symetric loss back propigation\n\n")
        states = np.random.normal(
            0, 10, (kwargs['nb_steps'] // 200, 1, self.nb_states))
        actions = self.actor.predict_on_batch(states)
        self.sym_actor.train_on_batch(states, actions)
        return out

    def test(self, **kwargs):
        print("testing")
        print("VA:", self.env.get_VA())
        if 'nb_max_episode_steps' in kwargs.keys():
            self.env.spec.timestep_limit = kwargs['nb_max_episode_steps']
        else:
            self.env.spec.timestep_limit = self.env.time_limit
        return self.agent.test(self.env, **kwargs)

    def test_get_steps(self, **kwargs):
        return self.test(**kwargs).history['nb_steps'][-1]

    def save_weights(self, filename='osim-rl/ddpg_{}_weights.h5f'):
        self.agent.save_weights(filename.format("opensim"), overwrite=True)
        self.save_processor()

    def load_weights(self, filename='osim-rl/ddpg_{}_weights.h5f'):
        self.agent.load_weights(filename.format("opensim"))
        self.load_processor()

    def search_VA(self):
        # 1-D line search
        state = self.env.get_VA()
        goal = 0.0
        if abs(state - goal) < 0.01:
            self.env.upd_VA(goal)
            return
        steps = self.test_get_steps(nb_episodes=1,
                                    visualize=False,
                                    nb_max_episode_steps=1000)
        dv = 0.0
        dsteps = steps
        while (state - dv > goal and dsteps > 0.8 * steps):
            dv += 0.02
            self.env.upd_VA(state - dv)
            dsteps = self.test_get_steps(nb_episodes=1,
                                         visualize=False,
                                         nb_max_episode_steps=1000)
        if abs((state - dv) - goal) < 0.01:
            self.env.upd_VA(goal)
        else:
            dv -= 0.02
            self.env.upd_VA(state - dv)

    def save_processor(self):
        np.savez('osim-rl/processor.npz',
                 _sum=self.processor.normalizer._sum,
                 _count=np.array([self.processor.normalizer._count]),
                 _sumsq=self.processor.normalizer._sumsq,
                 mean=self.processor.normalizer.mean,
                 std=self.processor.normalizer.std)

    def load_processor(self):
        f = np.load('osim-rl/processor.npz')
        dtype = f['_sum'].dtype
        if (self.processor.normalizer == None):
            self.processor.normalizer = WhiteningNormalizer(
                shape=(1, ) + self.env.observation_space.shape, dtype=dtype)
        self.processor.normalizer._sum = f['_sum']
        self.processor.normalizer._count = int(f['_count'][0])
        self.processor.normalizer._sumsq = f['_sumsq']
        self.processor.normalizer.mean = f['mean']
        self.processor.normalizer.std = f['std']
memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.2, size=env.noutput)
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                  random_process=random_process, gamma=.99, target_model_update=1e-3,
                  delta_clip=1.)
# agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model,
#                            memory=memory, nb_steps_warmup=1000, random_process=random_process,
#                            gamma=.99, target_model_update=0.1)
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
if args.train:
    agent.load_weights('aviral_jump_new.h5f')
    print 'weights loaded'
    agent.fit(env, nb_steps=nallsteps, visualize=True, verbose=1, nb_max_episode_steps=1000, log_interval=1000)
    print 'TRAINED THE MODELS'
    # After training is done, we save the final weights.
    agent.save_weights(args.model, overwrite=True)

if not args.train:
    print args.model
    agent.load_weights(args.model)
    # sys.exit(0)
    # Finally, evaluate our algorithm for 1 episode.
    h = Histories()
    agent.test(env, nb_episodes=10, visualize=False, nb_max_episode_steps=1000, action_repetition=2, callbacks=[h])
    # print h.action_list
    f = open('values_jump_new.txt', 'w')
Example #29
0
# random process for exploration noise
random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=theta, dt=0.01, mu=0., sigma=.25)
# define the DDPG agent
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                  random_process=random_process, gamma=GAMMA, target_model_update=1e-3)
# compile the model
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mse'])

callbacks = common_func.build_callbacks(ENV_NAME, log_filename_pre, filename_exp)

# ----------------------------------------------------------------------------------------------------------------------------------------
# Training phase

# fitting the agent
#agent.fit(env, nb_steps=800000, visualize=False, callbacks=callbacks, verbose=1, gamma=GAMMA, nb_max_episode_steps=900)

# After training is done, we save the final weights.
#agent.save_weights('../results/Swimmer6/exp_1/ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# -----------------------------------------------------------------------------------------------------------------------------------------
# Testing phase
agent.load_weights(log_filename_pre+filename_exp +'/ddpg_{}_weights.h5f'.format(ENV_NAME))
history, state_history_nominal, episode_reward_nominal, action_history = agent.test(env, nb_episodes=1, visualize=True, action_repetition=1, nb_max_episode_steps=STEPS_PER_EPISODE, \
                                                         initial_state=np.zeros((16,)), std_dev_noise=20, gamma=GAMMA, process_noise_std=process_noise_std)

# np.savetxt(log_filename_pre+filename_exp+'/s3_nominal_action.txt', action_history)
# np.savetxt(log_filename_pre+filename_exp+'/s3_nominal_state.txt', state_history_nominal)

print(state_history_nominal,action_history)
# -----------------------------------------------------------------------------------------------------------------------------------------
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)

memory = SequentialMemory(limit=50000, window_length=1)
agent = DDPGAgent(nb_actions=nb_actions,
                  actor=actor,
                  critic=critic,
                  critic_action_input=action_input,
                  memory=memory,
                  nb_steps_warmup_critic=100,
                  nb_steps_warmup_actor=100,
                  gamma=.99,
                  target_model_update=1e3)
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
agent.load_weights('ddpg_{}_2balls_final_weights_v4.h5f'.format(ENV_NAME))

# def B(b):
#     env.render = False
#     state = env.reset()
#     pos = env.arraystate2pos(state)
#     print(pos)
#     optimal_action = np.zeros(2)
#     action, optimal_action, a, b, theta = agent.test(env, nb_episodes=500000, visualize=False, nb_max_episode_steps=200, modif = True, pos = pos)
#     env.non_random_reset(pos[0], pos[1], pos[2])
#     env.render = True
#     env.step(action, rand = optimal_action, a = a, b = b, theta = theta)

# state = env.reset()
# pos = env.arraystate2pos(state)
# optimal_action = np.zeros(2)
Example #31
0
def main():
    """Create environment, build models, train."""
    env = MarketEnv(("ES", "FUT", "GLOBEX", "USD"),
                    obs_xform=xform.Basic(30, 4),
                    episode_steps=STEPS_PER_EPISODE,
                    client_id=3)
    #env = MarketEnv(("EUR", "CASH", "IDEALPRO", "USD"), max_quantity=20000, quantity_increment=20000, obs_xform=xform.Basic(30, 4), episode_steps=STEPS_PER_EPISODE, client_id=5, afterhours=False)
    obs_size = np.product(env.observation_space.shape)

    # Actor model
    dropout = 0.1
    actor = Sequential([
        Flatten(input_shape=(1, ) + env.observation_space.shape),
        BatchNormalization(),
        Dense(obs_size, activation='relu'),
        GaussianDropout(dropout),
        BatchNormalization(),
        Dense(obs_size, activation='relu'),
        GaussianDropout(dropout),
        BatchNormalization(),
        Dense(obs_size, activation='relu'),
        GaussianDropout(dropout),
        BatchNormalization(),
        Dense(1, activation='tanh'),
    ])
    print('Actor model')
    actor.summary()

    action_input = Input(shape=(1, ), name='action_input')
    observation_input = Input(shape=(1, ) + env.observation_space.shape,
                              name='observation_input')
    flattened_observation = Flatten()(observation_input)
    x = concatenate([action_input, flattened_observation])
    x = BatchNormalization()(x)
    x = Dense(obs_size + 1, activation='relu')(x)
    x = GaussianDropout(dropout)(x)
    x = Dense(obs_size + 1, activation='relu')(x)
    x = GaussianDropout(dropout)(x)
    x = Dense(obs_size + 1, activation='relu')(x)
    x = GaussianDropout(dropout)(x)
    x = Dense(obs_size + 1, activation='relu')(x)
    x = GaussianDropout(dropout)(x)
    x = Dense(1, activation='linear')(x)
    critic = Model(inputs=[action_input, observation_input], outputs=x)
    print('\nCritic Model')
    critic.summary()

    memory = SequentialMemory(limit=EPISODES * STEPS_PER_EPISODE,
                              window_length=1)
    random_process = OrnsteinUhlenbeckProcess(theta=.5, mu=0., sigma=.5)
    agent = DDPGAgent(
        nb_actions=1,
        actor=actor,
        critic=critic,
        critic_action_input=action_input,
        memory=memory,
        nb_steps_warmup_critic=STEPS_PER_EPISODE * WARMUP_EPISODES,
        nb_steps_warmup_actor=STEPS_PER_EPISODE * WARMUP_EPISODES,
        random_process=random_process,
        gamma=0.95,
        target_model_update=0.01)
    agent.compile('rmsprop', metrics=['mae'])
    weights_filename = 'ddpg_{}_weights.h5f'.format(env.instrument.symbol)
    try:
        agent.load_weights(weights_filename)
        print(
            'Using weights from {}'.format(weights_filename)
        )  # DDPGAgent actually uses two separate files for actor and critic derived from this filename
    except IOError:
        pass
    agent.fit(env,
              nb_steps=EPISODES * STEPS_PER_EPISODE,
              visualize=True,
              verbose=2,
              nb_max_episode_steps=STEPS_PER_EPISODE)
    agent.save_weights(weights_filename, overwrite=True)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                          theta=0.2,
                                          mu=0.0,
                                          sigma=0.25,
                                          sigma_min=0.01,
                                          n_steps_annealing=500000)

# agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
#                   random_process=random_process, gamma=.99, target_model_update=1E-3,
#                   memory=memory, nb_steps_warmup_critic=10000, nb_steps_warmup_actor=100000)

agent = DDPGAgent(nb_actions=nb_actions,
                  actor=actor,
                  critic=critic,
                  critic_action_input=action_input,
                  memory=memory,
                  nb_steps_warmup_critic=25000,
                  nb_steps_warmup_actor=25000,
                  random_process=None,
                  gamma=.99,
                  target_model_update=1E-3)

agent.compile(Adam(lr=0.001, clipnorm=1.))  # was 1
#
# agent.fit(env, nb_steps=500000, visualize=False, verbose=1, nb_max_episode_steps = 10000,  log_interval = 10000,
#           action_repetition = 10)
# agent.save_weights('ddpg_{}_SimpleSimFuelReward.h5f'.format(ENV_NAME), overwrite=True)

agent.load_weights('ddpg_{}_SimpleSimFuelReward.h5f'.format(ENV_NAME))
agent.test(env, nb_episodes=1, visualize=False, nb_max_episode_steps=10000)
Example #33
0
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)
print(critic.summary())

# Set up the agent for training
memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.2, size=env.noutput)
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                  random_process=random_process, gamma=.99, target_model_update=1e-3,
                  delta_clip=1.)
# agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model,
#                            memory=memory, nb_steps_warmup=1000, random_process=random_process,
#                            gamma=.99, target_model_update=0.1)
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
if args.train:
    agent.fit(env, nb_steps=nallsteps, visualize=False, verbose=1, nb_max_episode_steps=env.timestep_limit, log_interval=10000)
    # After training is done, we save the final weights.
    agent.save_weights(args.model, overwrite=True)

if not args.train:
    agent.load_weights(args.model)
    # Finally, evaluate our algorithm for 1 episode.
    agent.test(env, nb_episodes=1, visualize=False, nb_max_episode_steps=500)
                              nb_steps_warmup_critic=50,
                              nb_steps_warmup_actor=50,
                              random_process=random_process,
                              gamma=.99,
                              target_model_update=1e-3)
            agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
            #agent.load_weights('/home/bdb3m/swmm_rl/agent_weights_gated/ddpg_swmm_weights.h5f')  # added to continue training
            agent.fit(env, nb_steps=train_steps, verbose=0)
            agent.save_weights(
                '/home/bdb3m/swmm_rl/agent_weights_gated/ddpg_swmm_weights.h5f',
                overwrite=True)
            env.close()

        else:
            agent.load_weights(
                '/home/bdb3m/swmm_rl/agent_weights_gated/ddpg_swmm_weights.h5f'
            )
            agent.fit(env, nb_steps=train_steps, verbose=0)
            agent.save_weights(
                '/home/bdb3m/swmm_rl/agent_weights_gated/ddpg_swmm_weights.h5f',
                overwrite=True)
            env.close()

        if file_num % 1000 == 0:
            print("finished training on ", file_num, " files")
        file_num += 1

# loop through testing envs
for file in os.scandir("/home/bdb3m/swmm_rl/syn_inp_test_gated"):
    if file.name.endswith('.inp'):
        print('testing ', file.name)