コード例 #1
0
def test_ddpg():
    # TODO: replace this with a simpler environment where we can actually test if it finds a solution
    env = gym.make('Pendulum-v0')
    np.random.seed(123)
    env.seed(123)
    random.seed(123)
    nb_actions = env.action_space.shape[0]

    actor = Sequential()
    actor.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    actor.add(Dense(16))
    actor.add(Activation('relu'))
    actor.add(Dense(nb_actions))
    actor.add(Activation('linear'))

    action_input = Input(shape=(nb_actions,), name='action_input')
    observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input')
    flattened_observation = Flatten()(observation_input)
    x = Concatenate()([action_input, flattened_observation])
    x = Dense(16)(x)
    x = Activation('relu')(x)
    x = Dense(1)(x)
    x = Activation('linear')(x)
    critic = Model(inputs=[action_input, observation_input], outputs=x)
    
    memory = SequentialMemory(limit=1000, window_length=1)
    random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3)
    agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                      memory=memory, nb_steps_warmup_critic=50, nb_steps_warmup_actor=50,
                      random_process=random_process, gamma=.99, target_model_update=1e-3)
    agent.compile([Adam(lr=1e-3), Adam(lr=1e-3)])

    agent.fit(env, nb_steps=400, visualize=False, verbose=0, nb_max_episode_steps=100)
    h = agent.test(env, nb_episodes=2, visualize=False, nb_max_episode_steps=100)
コード例 #2
0
def fit_ddpg(env, force: bool = False, root_dir: str = ""):
    nb_actions = env.action_space.n

    loaded = False
    actor_weights_path = pathlib.Path(f"{root_dir}/ddpg-actor.h5")
    critic_weights_path = pathlib.Path(f"{root_dir}/ddpg-critic.h5")
    train_history_path = pathlib.Path(f"{root_dir}/ddpg.log")

    if not force and actor_weights_path.exists():
        actor = load_model(str(actor_weights_path))
        critic = load_model(str(critic_weights_path), compile=False)
        with open(train_history_path, "rb") as f:
            history = pickle.load(f)
        loaded = True
    else:
        actor = Sequential()
        actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
        actor.add(Dense(16))
        actor.add(Dropout(0.5))
        actor.add(Activation("relu"))
        actor.add(Dense(16))
        actor.add(Dropout(0.5))
        actor.add(Activation("relu"))
        actor.add(Dense(16))
        actor.add(Dropout(0.5))
        actor.add(Activation("relu"))
        actor.add(Dense(nb_actions))
        actor.add(Activation("linear"))

        action_input = Input(shape=(nb_actions, ), name="action_input")
        observation_input = Input(shape=(1, ) + env.observation_space.shape,
                                  name="observation_input")
        flattened_observation = Flatten()(observation_input)
        x = Concatenate()([action_input, flattened_observation])
        x = Dense(32)(x)
        x = Activation("relu")(x)
        x = Dense(32)(x)
        x = Activation("relu")(x)
        x = Dense(32)(x)
        x = Dropout(0.5)(x)
        x = Activation("relu")(x)
        x = Dense(1)(x)
        x = Activation("linear")(x)
        critic = Model(inputs=[action_input, observation_input], outputs=x)

    memory = SequentialMemory(limit=100000, window_length=1)
    random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                              theta=0.15,
                                              mu=0.0,
                                              sigma=5)

    ddpg = DDPGAgent(
        nb_actions=nb_actions,
        actor=actor,
        critic=critic,
        critic_action_input=critic.inputs[0],
        memory=memory,
        nb_steps_warmup_critic=1000,
        nb_steps_warmup_actor=1000,
        random_process=random_process,
        gamma=0.99,
        target_model_update=1e-3,
    )

    ddpg.compile(Adam(lr=1e-3), metrics=["mae"])

    if loaded:
        return ddpg, history

    metrics = Metrics(ddpg)

    history = ddpg.fit(
        env,
        nb_steps=10000,
        start_step_policy=env.start_step_policy,
        nb_max_start_steps=10,
        nb_max_episode_steps=100,
        callbacks=[metrics],
    )

    actor.save(str(actor_weights_path))
    critic.save(str(critic_weights_path))
    with open(train_history_path, "wb") as f:
        history = history.history
        history.update(metrics.metrics)
        pickle.dump(history, f)

    return ddpg, history
コード例 #3
0
ファイル: ddpg_pong_solo.py プロジェクト: hippover/keras-rl
def train_with_params(sigma_v = 0., sigma_o = 0.,test=False):

    ENV_NAME = 'PongSolo'
    conf_name = '{}_sv_{}_so_{}'.format(ENV_NAME,sigma_v,sigma_o) # sv, so = sigma_v et sigma_orientation

    # Get the environment and extract the number of actions.
    env = EnvPongSolo(sigma_v = sigma_v, sigma_o = sigma_v)
    np.random.seed(123)

    #assert len(env.action_space.shape) == 1
    nb_actions = 1
    leaky_alpha = 0.2

    # Next, we build a very simple model.
    actor = Sequential()
    actor.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    actor.add(Dense(100))
    actor.add(LeakyReLU(leaky_alpha))
    actor.add(Dense(nb_actions))
    actor.add(Activation('linear'))
    print(actor.summary())

    action_input = Input(shape=(nb_actions,), name='action_input')
    observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input')
    flattened_observation = Flatten()(observation_input)
    x = merge([action_input, flattened_observation], mode='concat')
    x = Dense(200)(x)
    x = LeakyReLU(leaky_alpha)(x)
    x = Dense(1)(x)
    x = Activation('linear')(x)
    critic = Model(input=[action_input, observation_input], output=x)
    print(critic.summary())

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=100000, window_length=1)
    n_steps = 5000000
    random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=1., mu=0., sigma=.3, sigma_min=0.01, n_steps_annealing=n_steps)
    agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                      memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                      random_process=random_process, gamma=.99, target_model_update=1e-3)
    agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.

    directory_weights = "weights/ddpg/{}".format(conf_name)

    if not os.path.exists(directory_weights):
        os.makedirs(directory_weights)

    if test == False:
        perfCheckPoint = ModelPerformanceCheckpoint('{}/checkpoint_avg{}_steps{}'.format(directory_weights,'{}','{}'), 800)
        agent.fit(env, nb_steps=n_steps, visualize=False, verbose=2, nb_max_episode_steps=200,callbacks=[perfCheckPoint])

        # After training is done, we save the final weights.
        agent.save_weights('{}/final.h5f'.format(directory_weights), overwrite=True)

        # Finally, evaluate our algorithm for 5 episodes.
        agent.test(env, nb_episodes=100, visualize=False, nb_max_episode_steps=200)
    else:
        agent.load_weights('{}/final.h5f'.format(directory_weights))
        agent.test(env, nb_episodes=1000, visualize=False, nb_max_episode_steps=200)
コード例 #4
0
def run_ddpg():

    global N_NODE_NETWORK

    env = SnakeGymContinuous()
    assert len(env.action_space.shape) == 1
    nb_actions = env.action_space.shape[0]

    # initialize randomness
    np.random.seed(123)
    env.seed(123)

    # Next, we build a very simple model.
    actor = Sequential()
    actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    actor.add(Dense(N_NODE_NETWORK))
    actor.add(Activation('relu'))
    actor.add(Dense(N_NODE_NETWORK))
    actor.add(Activation('relu'))
    actor.add(Dense(N_NODE_NETWORK))
    actor.add(Activation('relu'))
    actor.add(Dense(nb_actions))
    actor.add(Activation('linear'))
    print(actor.summary())

    action_input = Input(shape=(nb_actions, ), name='action_input')
    observation_input = Input(shape=(1, ) + env.observation_space.shape,
                              name='observation_input')
    flattened_observation = Flatten()(observation_input)
    x = Concatenate()([action_input, flattened_observation])
    x = Dense(N_NODE_NETWORK * 2)(x)
    x = Activation('relu')(x)
    x = Dense(N_NODE_NETWORK * 2)(x)
    x = Activation('relu')(x)
    x = Dense(N_NODE_NETWORK * 2)(x)
    x = Activation('relu')(x)
    x = Dense(1)(x)
    x = Activation('linear')(x)
    critic = Model(inputs=[action_input, observation_input], outputs=x)
    print(critic.summary())

    memory = SequentialMemory(limit=100000, window_length=1)
    random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                              theta=.15,
                                              mu=0.,
                                              sigma=.3)
    agent = DDPGAgent(nb_actions=nb_actions,
                      actor=actor,
                      critic=critic,
                      critic_action_input=action_input,
                      memory=memory,
                      nb_steps_warmup_critic=500,
                      nb_steps_warmup_actor=500,
                      random_process=random_process,
                      gamma=.99,
                      target_model_update=1e-3)

    agent.compile('adam', metrics=['mae'])

    agent.fit(env,
              nb_steps=50000,
              visualize=True,
              verbose=2,
              nb_max_episode_steps=200)
    agent.save_weights('ddpg_SnakeGymContinuous_weights.h5f', overwrite=True)

    agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)
コード例 #5
0
x = Activation('linear')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)
print(critic.summary())

memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                          theta=.15,
                                          mu=0.,
                                          sigma=.3)
agent = DDPGAgent(nb_actions=nb_actions,
                  actor=actor,
                  critic=critic,
                  critic_action_input=action_input,
                  memory=memory,
                  nb_steps_warmup_critic=100,
                  nb_steps_warmup_actor=100,
                  random_process=random_process,
                  gamma=.99,
                  target_model_update=1e-3)
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

agent.fit(env,
          nb_steps=50000,
          visualize=True,
          verbose=1,
          nb_max_episode_steps=200)

agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)
コード例 #6
0
                  critic_action_input=action_input,
                  memory=memory,
                  nb_steps_warmup_critic=100,
                  nb_steps_warmup_actor=100,
                  random_process=random_process,
                  gamma=.99,
                  target_model_update=1e-3,
                  delta_clip=1.)
# agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model,
#                            memory=memory, nb_steps_warmup=1000, random_process=random_process,
#                            gamma=.99, target_model_update=0.1)
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
if args.train:
    agent.fit(env,
              nb_steps=nallsteps,
              visualize=False,
              verbose=1,
              nb_max_episode_steps=200,
              log_interval=10000)
    # After training is done, we save the final weights.
    agent.save_weights(args.model, overwrite=True)

if not args.train:
    agent.load_weights(args.model)
    # Finally, evaluate our algorithm for 1 episode.
    agent.test(env, nb_episodes=5, visualize=False, nb_max_episode_steps=1000)
コード例 #7
0
                                              mu=0.0,
                                              sigma=0.3)
    agent = DDPGAgent(
        nb_actions=nb_actions,
        actor=actor,
        critic=critic,
        critic_action_input=action_input,
        memory=memory,
        nb_steps_warmup_critic=1000,
        nb_steps_warmup_actor=1000,
        random_process=random_process,
        gamma=0.99,
        target_model_update=1e-3,
    )
    agent.compile(Adam(lr=0.001, clipnorm=1.0), metrics=["mae"])

    # # Okay, now it's time to learn something! We visualize the training here for show, but this
    # # slows down training quite a lot. You can always safely abort the training prematurely using
    # # Ctrl + C.
    agent.fit(env,
              nb_steps=100000,
              visualize=False,
              verbose=1,
              nb_max_episode_steps=288)

    # # After training is done, we save the final weights.
    agent.save_weights("ddpg_{}_weights.h5f".format(ENV_NAME), overwrite=True)

    # # Finally, evaluate our algorithm for 5 episodes.
    agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=288)
コード例 #8
0
ファイル: rlearn_stokes.py プロジェクト: aasensio/DNHazel
    x = Dense(32)(x)
    x = Activation('relu')(x)
    x = Dense(1)(x)
    x = Activation('linear')(x)
    critic = Model(inputs=[action_input, observation_input], outputs=x)
    print(critic.summary())

    plot_model(critic, to_file='critic.png', show_shapes=True)

# # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# # even the metrics!
    memory = SequentialMemory(limit=10000, window_length=1)
    random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=0.15, mu=0., sigma=.3)
    agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                   memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                   random_process=random_process, gamma=.99, target_model_update=1e-3)
    agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

# # Okay, now it's time to learn something! We visualize the training here for show, but this
# # slows down training quite a lot. You can always safely abort the training prematurely using
# # Ctrl + C.
    agent.fit(env, nb_steps=25000, visualize=False, verbose=1, nb_max_episode_steps=200)

# # After training is done, we save the final weights.
    agent.save_weights('ddpg_stokes_weights.h5f', overwrite=True)

# # Finally, evaluate our algorithm for 5 episodes.
    agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)


コード例 #9
0
# '''
#
# history = agent.warm_fit(env, policy, policy_list, nb_steps=5e6, visualize=False, log_interval=1000, verbose=2, nb_max_episode_steps=2000)
# sio.savemat('warm-up-' + ENV_NAME + '-' + nowtime + '.mat', history.history)
# agent.save_weights('ddpg_{}_weights_after_warm_start.h5f'.format(ENV_NAME), overwrite=True)
# '''
# the test after warm_up
# '''
# history = agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=2000)
# sio.savemat('test-'+ENV_NAME+'-'+nowtime+'.mat',history.history)
# after = history.history['episode_reward']
# print('before training ', before)
# print('after training ', after)


history = agent.fit(env, nb_steps=5e6, visualize=False, log_interval=1000, verbose=2, nb_max_episode_steps=2000)

'''
# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
history = agent.fit(env, nb_steps=5e6, visualize=False, log_interval=1000, verbose=2, nb_max_episode_steps=2000)
# print(history.history['metrics'])
action = np.zeros([3])
observation = np.zeros([env.observation_space.shape[0]])
reward = np.zeros([1])
metrics = np.zeros([3])
for time in range(len(history.history['action'])):
    action = np.vstack((action,history.history['action'][time]))
    observation = np.vstack((observation, history.history['observation'][time]))
    reward = np.vstack((reward, history.history['reward'][time]))
コード例 #10
0
                  nb_steps_warmup_critic=100,
                  nb_steps_warmup_actor=100,
                  random_process=random_process,
                  gamma=.995,
                  target_model_update=1e-3,
                  delta_clip=1.)
# agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model,
#                             memory=memory, nb_steps_warmup=1000, random_process=random_process,
#                             gamma=.99, target_model_update=0.1)
agent.compile(Adam(lr=1e-3, clipnorm=1.), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
if args.train:
    history_cb = agent.fit(env,
                           nb_steps=nallsteps,
                           visualize=args.visualize,
                           verbose=1,
                           nb_max_episode_steps=None,
                           log_interval=1000)
    # After training is done, we save the final weights.
    agent.save_weights(args.model, overwrite=True)
    reward_history = history_cb.history["episode_reward"]
    np.savetxt("episode_reward.txt", reward_history, delimiter=",")

if not args.train:
    agent.load_weights(args.model)
    # Finally, evaluate our algorithm for 1 episode.
    agent.test(env, nb_episodes=1, visualize=False, nb_max_episode_steps=1000)
コード例 #11
0
    with open(f"{tb_folder_path}/actor_config.json", "w") as outfile:
        json_string = actor.to_json()
        json.dump(json_string, outfile)

    with open(f"{tb_folder_path}/critic_config.json", "w") as outfile:
        json_string = critic.to_json()
        json.dump(json_string, outfile)

    # This function saves all the important hypterparameters to the run summary file.
    save_hyperparameters(["DDPG.py", "gym_bizhawk.py"], f"{tb_folder_path}/run_summary.txt")

    start_time_ascii = time.asctime(time.localtime(time.time()))
    start_time = time.time()
    print("Training has started!")
    # BREADCRUMBS_START
    callback = [callbacks.TensorBoard(log_dir=tb_folder_path, write_graph=False)]
    agent.fit(env, nb_steps=8192 * 2, visualize=True, verbose=1, nb_max_episode_steps=512, callbacks=callback)
    # BREADCRUMBS_END

    # After training is done, we save the final weights.
    agent.save_weights('{}\{}_run{}_weights.h5f'.format(tb_folder_path, ENV_NAME, folder_count), overwrite=True)

    total_run_time = round(time.time() - start_time, 2)
    print("Training is done.")
    send_email(f"The training of {run_name} finalized!\nIt started at {start_time_ascii} and took {total_run_time/60} minutes .")

    env.shut_down_bizhawk_game()
    # Finally, evaluate our algorithm for 5 episodes.
    # movie.save("C:/Users/user/Desktop/VideoGame Ret/RL Retrieval/movie")
    # dqn.test(env, nb_episodes=1, visualize=False)
コード例 #12
0
def main():
    set_gpu_option()
    # OPTIONS
    ENV_NAME = 'DDPGEnv-v0'
    TIME_STEP = 30

    # Get the environment and extract the number of actions.

    PATH_TRAIN = '/home/data/training_x_150.h5'
    PATH_TEST = '/home/data/test_x_150.h5'
    """
    env = OhlcvEnv(TIME_STEP, path=PATH_TRAIN)
    env_test = OhlcvEnv(TIME_STEP, path=PATH_TEST)
    """
    store = pd.HDFStore(PATH_TRAIN, mode='r')
    varieties_list = store.keys()
    print('varieties_list: ', varieties_list)
    print('num varieties: ', len(varieties_list))
    
    variety = 'RB'
    print('variety: ', variety)
    
    # get selected features
    SELECTED_FACTOR_PATH = '~/feature_selection/根据互信息选出的特征,根据重要性排序.csv'
    selected_factor_df = pd.read_csv(SELECTED_FACTOR_PATH, index_col=0)
    selected_factor_list = selected_factor_df[variety].to_list()
    
    env = DDPGEnv(TIME_STEP, variety=variety, path=PATH_TRAIN, selected_factor_list=selected_factor_list)
    #env_test = DDPGEnv(TIME_STEP, variety=variety, path=PATH_TEST,  selected_factor_list=selected_factor_list)

    # random seed
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.shape[0]
    print('nb_actions: ', nb_actions)

    print('env.observation_space.shape: ', env.observation_space.shape)
    print('env.observation_space: ', env.observation_space)
    
    # create actor
    actor = create_actor(input_shape=env.shape, nb_actions=nb_actions)
    
    # create critic
    action_input = Input(shape=(nb_actions,), name='action_input')
    observation_input = Input(shape=env.shape, name='observation_input')
    critic = create_critic(action_input, observation_input)
    


    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and even the metrics!
    memory = SequentialMemory(limit=50000, window_length=TIME_STEP)

    random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3)
    ddpg = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=1000, nb_steps_warmup_actor=1000,
                  random_process=random_process, gamma=.99, target_model_update=1e-3, processor=DDPGProcessor())
    ddpg.compile(optimizer=Adam(lr=1e-3), metrics=['mae'])

    log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, write_grads=True)
    for _ in range(3):
        ddpg.fit(env, nb_steps=140000, nb_max_episode_steps=140000, visualize=False, verbose=2)

    """
コード例 #13
0
#setup agent, using defined keras model alog with the policy and actions from above

#Discrete actions:
policy = EpsGreedyQPolicy()
testPolicy = GreedyQPolicy()
#agent = DQNAgent(model=actorModel, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, policy=policy, test_policy=testPolicy)

#continuous actions:
random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                          theta=.15,
                                          mu=0.,
                                          sigma=.3)
agent = DDPGAgent(actor=actorModel,
                  critic=criticModel,
                  nb_actions=nb_actions,
                  memory=memory,
                  nb_steps_warmup_actor=100,
                  nb_steps_warmup_critic=100,
                  critic_action_input=action_input,
                  random_process=random_process)

#compile model
agent.compile(Nadam(lr=1e-3, clipnorm=0.1), metrics=['mae'])

# Okay, now it's time to learn something!
# We visualize the training here for show, but this slows down training quite a lot.
agent.fit(env, nb_steps=50000, visualize=True, verbose=2)

#TEST!
#blockingVar = input('Press a key!: ')
agent.test(env, nb_episodes=5, visualize=True)
コード例 #14
0
                  critic=critic,
                  critic_action_input=action_input,
                  memory=memory,
                  nb_steps_warmup_critic=2000,
                  nb_steps_warmup_actor=10000,
                  random_process=random_process,
                  gamma=.99,
                  target_model_update=1e-3)
agent.compile(Adam(lr=0.001, clipnorm=1.), metrics=['mae'])

# Okay, now it's time to learn something!
mode = 'test'
if mode == 'train':
    hist = agent.fit(env,
                     nb_steps=1000000,
                     visualize=False,
                     verbose=2,
                     nb_max_episode_steps=1000)
    filename = '600kit_rn4_maior2_mem20k_target01_theta3_batch32_adam2'
    # we save the history of learning, it can further be used to plot reward evolution
    with open('_experiments/history_ddpg__redetorcs' + filename + '.pickle',
              'wb') as handle:
        pickle.dump(hist.history, handle, protocol=pickle.HIGHEST_PROTOCOL)
    #After training is done, we save the final weights.
    agent.save_weights('h5f_files/ddpg_{}_weights.h5f'.format(
        '600kit_rn4_maior2_mem20k_target01_theta3_batch32_adam2_action_lim_1'),
                       overwrite=True)

    # Finally, evaluate our algorithm for 5 episodes.
    agent.test(env, nb_episodes=10, visualize=True, nb_max_episode_steps=1000)
elif mode == 'test':
コード例 #15
0
                                                      theta=.15,
                                                      mu=0.,
                                                      sigma=.1)
            agent = DDPGAgent(nb_actions=nb_actions,
                              actor=actor,
                              critic=critic,
                              critic_action_input=action_input,
                              memory=memory,
                              nb_steps_warmup_critic=50,
                              nb_steps_warmup_actor=50,
                              random_process=random_process,
                              gamma=.99,
                              target_model_update=1e-3)
            agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
            #agent.load_weights('/home/bdb3m/swmm_rl/agent_weights_gated/ddpg_swmm_weights.h5f')  # added to continue training
            agent.fit(env, nb_steps=train_steps, verbose=0)
            agent.save_weights(
                '/home/bdb3m/swmm_rl/agent_weights_gated/ddpg_swmm_weights.h5f',
                overwrite=True)
            env.close()

        else:
            agent.load_weights(
                '/home/bdb3m/swmm_rl/agent_weights_gated/ddpg_swmm_weights.h5f'
            )
            agent.fit(env, nb_steps=train_steps, verbose=0)
            agent.save_weights(
                '/home/bdb3m/swmm_rl/agent_weights_gated/ddpg_swmm_weights.h5f',
                overwrite=True)
            env.close()
コード例 #16
0
ファイル: ddpg_pendulum.py プロジェクト: Jaystings/keras-rl
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(input=[action_input, observation_input], output=x)
print(critic.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=100000)
random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3)
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
	memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process,
	gamma=.99, target_model_update=1e-3, delta_range=(-10., 10.))
agent.compile([RMSprop(lr=.001), RMSprop(lr=.001)], metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
agent.fit(env, nb_steps=1000000, visualize=True, verbose=1, nb_max_episode_steps=200)

# After training is done, we save the final weights.
agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)
コード例 #17
0
ファイル: ddpg_reacher.py プロジェクト: ion0113/NAF_sample
                                          theta=.15,
                                          mu=0.,
                                          sigma=.3)
agent = DDPGAgent(nb_actions=nb_actions,
                  actor=actor,
                  critic=critic,
                  critic_action_input=action_input,
                  memory=memory,
                  nb_steps_warmup_critic=100,
                  nb_steps_warmup_actor=100,
                  random_process=random_process,
                  gamma=.99,
                  target_model_update=1e-3)
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
agent.fit(env,
          nb_steps=50000,
          visualize=False,
          verbose=1,
          log_interval=50,
          nb_max_episode_steps=None)

# After training is done, we save the final weights.
agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
#agent.load_weights('ddpg_Reacher-v2_weights_128.h5f')

# Finally, evaluate our algorithm for 5 episodes.
agent.test(env, nb_episodes=30, visualize=True, nb_max_episode_steps=None)
コード例 #18
0
random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                          theta=0.15,
                                          mu=0.0,
                                          sigma=0.3)
agent = DDPGAgent(nb_actions=nb_actions,
                  actor=actor,
                  critic=critic,
                  critic_action_input=action_input,
                  memory=memory,
                  nb_steps_warmup_critic=nb_steps_warmup,
                  nb_steps_warmup_actor=nb_steps_warmup,
                  random_process=random_process,
                  gamma=0.9,
                  target_model_update=1e-3)
agent.compile(SGD(lr=1e-5, clipvalue=0.001), metrics=['mae'])

callbacks = [
    ModelIntervalCheckpoint(weights_name + '_{step}.h5f', interval=10_000),
    TrainEpisodeLogger(),
    TensorBoard()
]

agent.fit(env,
          nb_steps=nb_steps,
          visualize=False,
          verbose=1,
          callbacks=callbacks)
agent.save_weights(weights_name + '_final.h5f', overwrite=True)

# agent.test(env, nb_episodes=1, visualize=False)
コード例 #19
0
x = Dense(20)(flattened_observation)
x = Activation('relu')(x)
x = Concatenate()([x, action_input])
x = Dense(20)(x)
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('tanh')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)
print(critic.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=100, window_length=1)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.1)
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                  random_process=random_process, gamma=.99, target_model_update=1e-3, processor=Processor())
agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae'])
# agent.load_weights('ddpg_20181006160521_Ship_Env_weights.h5f')

for i in range(10):
# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
    agent.fit(env, nb_steps=50000, visualize=False, verbose=1, log_interval=5000, callbacks=[logger])

    # After training is done, we save the final weights.
    agent.save_weights('ddpg_{}_{}_weights.h5f'.format(timestamp, 'Ship_Env'), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=20000)
コード例 #20
0
# agent.load_weights(WEIGHTS_FILENAME)

callbacks = []
checkpoint_weights_filename = 'weights/ddpg_{}_checkpointWeights_{{step}}_{}_{}_{}_{}.h5f'.format(
    ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
log_filename = 'logs/ddpg_{}_log_{}_{}_{}_{}.json'.format(
    ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
#callbacks += [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=100000)]
callbacks += [FileLogger(log_filename, interval=100)]

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
agent.fit(env,
          nb_steps=NUM_STEPS,
          callbacks=callbacks,
          visualize=False,
          verbose=1)  #, nb_max_episode_steps=500)

# After training is done, we save the final weights.
filename = 'weights/ddpg_{}_weights_{}_{}_{}_{}.h5f'.format(
    ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
agent.save_weights(filename, overwrite=True)

# We'll also save a simply named version to make running test immediately
# following training easier.
filename = 'weights/ddpg_{}_weights.h5f'.format(ENV_NAME)
agent.save_weights(filename, overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
agent.test(env, visualize=True)  #nb_max_episode_steps=500,
コード例 #21
0
def train():
    # Get the environment and extract the number of actions.
    env = gym.make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)
    assert len(env.action_space.shape) == 1
    nb_actions = env.action_space.shape[0]

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    from keras import backend as K
    K.set_session(sess)

    # Next, we build a very simple model.
    actor = Sequential()
    actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    actor.add(Dense(16))
    actor.add(Activation('relu'))
    actor.add(Dense(16))
    actor.add(Activation('relu'))
    actor.add(Dense(16))
    actor.add(Activation('relu'))
    actor.add(Dense(nb_actions))
    actor.add(Activation('linear'))
    # print(actor.summary())

    action_input = Input(shape=(nb_actions, ), name='action_input')
    observation_input = Input(shape=(1, ) + env.observation_space.shape,
                              name='observation_input')
    flattened_observation = Flatten()(observation_input)
    x = Concatenate()([action_input, flattened_observation])
    x = Dense(32)(x)
    x = Activation('relu')(x)
    x = Dense(32)(x)
    x = Activation('relu')(x)
    x = Dense(32)(x)
    x = Activation('relu')(x)
    x = Dense(1)(x)
    x = Activation('linear')(x)
    critic = Model(inputs=[action_input, observation_input], outputs=x)
    # print(critic.summary())

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=100000, window_length=1)
    random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                              theta=.15,
                                              mu=0.,
                                              sigma=.3)

    if REWARD == "normal":
        ddpg_normal = DDPGAgent(nb_actions=nb_actions,
                                actor=actor,
                                critic=critic,
                                critic_action_input=action_input,
                                memory=memory,
                                nb_steps_warmup_critic=100,
                                nb_steps_warmup_actor=100,
                                random_process=random_process,
                                gamma=.99,
                                target_model_update=1e-3)
        ddpg_normal.compile(Adam(lr=.0005, clipnorm=1.), metrics=['mae'])

        # Okay, now it's time to learn something! We visualize the training here for show, but this
        # slows down training quite a lot. You can always safely abort the training prematurely using
        # Ctrl + C.
        history_normal = ddpg_normal.fit(env,
                                         nb_steps=150000,
                                         visualize=False,
                                         verbose=2,
                                         nb_max_episode_steps=200)

        # After training is done, we save the final weights.
        ddpg_normal.save_weights(os.path.join(
            LOG_DIR, 'ddpg_normal_{}_weights.h5f'.format(ENV_NAME)),
                                 overwrite=True)
        # Finally, evaluate our algorithm for 5 episodes.
        ddpg_normal.test(env,
                         nb_episodes=5,
                         visualize=False,
                         verbose=2,
                         nb_max_episode_steps=200)

        pandas.DataFrame(history_normal.history).to_csv(
            os.path.join(LOG_DIR, "normal.csv"))

    elif REWARD == "noisy":
        processor_noisy = PendulumSurrogateProcessor(weight=WEIGHT,
                                                     surrogate=False,
                                                     noise_type=NOISE_TYPE)
        ddpg_noisy = DDPGAgent(nb_actions=nb_actions,
                               actor=actor,
                               critic=critic,
                               critic_action_input=action_input,
                               memory=memory,
                               nb_steps_warmup_critic=100,
                               nb_steps_warmup_actor=100,
                               random_process=random_process,
                               gamma=.99,
                               target_model_update=1e-3,
                               processor=processor_noisy)
        ddpg_noisy.compile(Adam(lr=.0005, clipnorm=1.), metrics=['mae'])
        history_noisy = ddpg_noisy.fit(env,
                                       nb_steps=150000,
                                       visualize=False,
                                       verbose=2,
                                       nb_max_episode_steps=200)
        ddpg_noisy.save_weights(os.path.join(
            LOG_DIR, 'ddpg_noisy_{}_weights.h5f'.format(ENV_NAME)),
                                overwrite=True)
        ddpg_noisy.test(env,
                        nb_episodes=5,
                        visualize=False,
                        verbose=2,
                        nb_max_episode_steps=200)

        pandas.DataFrame(history_noisy.history).to_csv(
            os.path.join(LOG_DIR, "noisy.csv"))

    elif REWARD == "surrogate":
        processor_surrogate = PendulumSurrogateProcessor(weight=WEIGHT,
                                                         surrogate=True,
                                                         noise_type=NOISE_TYPE)
        ddpg_surrogate = DDPGAgent(nb_actions=nb_actions,
                                   actor=actor,
                                   critic=critic,
                                   critic_action_input=action_input,
                                   memory=memory,
                                   nb_steps_warmup_critic=100,
                                   nb_steps_warmup_actor=100,
                                   random_process=random_process,
                                   gamma=.99,
                                   target_model_update=1e-3,
                                   processor=processor_surrogate)
        ddpg_surrogate.compile(Adam(lr=.0005, clipnorm=1.), metrics=['mae'])
        history_surrogate = ddpg_surrogate.fit(env,
                                               nb_steps=150000,
                                               visualize=False,
                                               verbose=2,
                                               nb_max_episode_steps=200)

        ddpg_surrogate.save_weights(os.path.join(
            LOG_DIR, 'ddpg_surrogate_{}_weights.h5f'.format(ENV_NAME)),
                                    overwrite=True)
        ddpg_surrogate.test(env,
                            nb_episodes=5,
                            visualize=False,
                            verbose=2,
                            nb_max_episode_steps=200)

        pandas.DataFrame(history_surrogate.history).to_csv(
            os.path.join(LOG_DIR, "surrogate.csv"))

    else:
        raise NotImplementedError
コード例 #22
0
agent = DDPGAgent(nb_actions=nb_actions,
                  actor=actor,
                  critic=critic,
                  critic_action_input=action_input,
                  memory=memory,
                  nb_steps_warmup_critic=1000,
                  nb_steps_warmup_actor=1000,
                  batch_size=64,
                  random_process=random_process,
                  gamma=.98,
                  target_model_update=1e-3,
                  processor=MujocoProcessor())
agent.compile([Adam(lr=5e-4), Adam(lr=1e-3)], metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
save_data_path_local = ENV_NAME + '.json'
agent.fit(env,
          nb_steps=1000000,
          visualize=False,
          verbose=1,
          save_data_path=save_data_path_local,
          file_interval=10000)

# After training is done, we save the final weights.
# agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
plot_af(file_path=ENV_NAME + '.json', save_file_name=ENV_NAME + '.png')
# Finally, evaluate our algorithm for 5 episodes.
agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)
コード例 #23
0
ファイル: ddpg_dart_walk.py プロジェクト: hpgit/HumanFoot
                  memory=memory,
                  nb_steps_warmup_critic=1000,
                  nb_steps_warmup_actor=1000,
                  random_process=random_process,
                  gamma=.99,
                  target_model_update=1e-3,
                  processor=MujocoProcessor())
agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.

checkpoint_weights_filename = 'checkpoint/dqn_' + ENV_NAME + '_weights_{step}.h5f'
callbacks = [
    ModelIntervalCheckpoint(checkpoint_weights_filename, interval=10000)
]
log_filename = 'ddpg_{}_log.json'.format(ENV_NAME)
callbacks += [FileLogger(log_filename, interval=200)]
agent.fit(env,
          nb_steps=1000000,
          visualize=True,
          verbose=2,
          callbacks=callbacks)

# After training is done, we save the final weights.
agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)
コード例 #24
0



# Create Actor and Critic networks
k.clear_session()
actor = get_actor(obs_n, actions_n)
critic, action_input = get_critic(obs_n, actions_n)
print(actor.summary())
print(critic.summary())

memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(size=actions_n, theta=.15, mu=0., sigma=.1)
agent = DDPGAgent(nb_actions=actions_n[0], actor=actor, critic=critic, batch_size=64, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=1000, nb_steps_warmup_actor=1000,
                  random_process=random_process, gamma=.99)

agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mse'])

#agent.load_weights('ddpg_' + ENV_NAME + 'weights.h5f')
agent.fit(env, env_name=ENV_NAME, nb_steps=500000, action_repetition=5, visualize=False, verbose=1)



env = wrappers.Monitor(env,'/home/wolfie/PycharmProjects/pythonProject/ddpg_halfcheetah',
                       video_callable=lambda episode_id: True, force=True)


agent.test(env, nb_episodes=5, visualize=False, nb_max_episode_steps=1000, verbose=1)

p.disconnect()
コード例 #25
0
                      critic=critic,
                      critic_action_input=action_input,
                      memory=memory,
                      random_process=random_process,
                      nb_steps_warmup_actor=2048,
                      nb_steps_warmup_critic=1024,
                      target_model_update=1000,
                      gamma=0.9,
                      batch_size=128,
                      memory_interval=2)
    agent.compile([Adam(lr=3e-5), Adam(lr=3e-3)])

    # Start training for 75000 simulation steps
    agent.fit(
        env,
        nb_steps=75000,
        nb_max_start_steps=0,
        nb_max_episode_steps=10000,
        visualize=True,
        action_repetition=1,
        verbose=2,
        log_interval=10000,
        callbacks=[],
    )
    # Test the agent
    hist = agent.test(env,
                      nb_episodes=5,
                      action_repetition=1,
                      visualize=True,
                      nb_max_episode_steps=10000)
コード例 #26
0
# Finally, we configure and compile our agent. You can use every built-in tensorflow.keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                          theta=.15,
                                          mu=0.,
                                          sigma=.1)
agent = DDPGAgent(nb_actions=nb_actions,
                  actor=actor,
                  critic=critic,
                  critic_action_input=action_input,
                  memory=memory,
                  nb_steps_warmup_critic=1000,
                  nb_steps_warmup_actor=1000,
                  random_process=random_process,
                  gamma=.99,
                  target_model_update=1e-3,
                  processor=MujocoProcessor())
agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
agent.fit(env, nb_steps=1000, visualize=False, verbose=1)

# After training is done, we save the final weights.
agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=600)
コード例 #27
0
random_process = GaussianWhiteNoiseProcess(mu=0.0,
                                           sigma=0.8,
                                           sigma_min=0.05,
                                           n_steps_annealing=650000)

# Create the agent
agent = DDPGAgent(nb_actions=nb_actions,
                  actor=actor,
                  critic=critic,
                  critic_action_input=action_input,
                  memory=memory,
                  random_process=random_process,
                  nb_steps_warmup_actor=32,
                  nb_steps_warmup_critic=32,
                  target_model_update=1e-4,
                  gamma=0.9,
                  batch_size=32)
agent.compile(Adam(lr=1e-4), metrics=['mae'])

# Start training for 7.5M simulation steps (1.5M training steps with actions repeated 5 times)
agent.fit(env,
          nb_steps=1500000,
          visualize=False,
          action_repetition=5,
          verbose=2,
          nb_max_start_steps=0,
          log_interval=10000,
          callbacks=[])

# Test the agent
hist = agent.test(env, nb_episodes=10, action_repetition=1, visualize=True)
コード例 #28
0
ファイル: agent.py プロジェクト: quinnabrvau/osim-rl
class Agent:
    def __init__(self, env):
        self.nb_actions = env.action_space.shape[0]
        self.nb_states = env.observation_space.shape[0]
        self.env = env

        self.actor = self.build_actor(env)
        self.actor.compile('Adam', 'mse')
        self.critic, action_input = self.build_critic(env)
        self.loss = self.build_loss()
        self.processor = WhiteningNormalizerProcessor()

        self.memory = SequentialMemory(limit=5000000, window_length=1)
        self.random_process = OrnsteinUhlenbeckProcess(size=self.nb_actions,
                                                       theta=0.75,
                                                       mu=0.5,
                                                       sigma=0.25)
        self.agent = DDPGAgent(nb_actions=self.nb_actions,
                               actor=self.actor,
                               critic=self.critic,
                               critic_action_input=action_input,
                               memory=self.memory,
                               nb_steps_warmup_critic=100,
                               nb_steps_warmup_actor=100,
                               random_process=self.random_process,
                               gamma=.99,
                               target_model_update=1e-3,
                               processor=self.processor)
        self.agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=self.loss)
        self.sym_actor = self.build_sym_actor()
        self.sym_actor.compile(optimizer='Adam', loss='mse')

    def build_loss(self):
        return ['mse']

    def build_actor(self, env):
        actor = Sequential()
        actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
        actor.add(Dense(64, activation='tanh'))
        actor.add(GaussianNoise(0.05))
        actor.add(Dense(64, activation='tanh'))
        actor.add(GaussianNoise(0.05))
        actor.add(Dense(self.nb_actions, activation='hard_sigmoid'))
        actor.summary()

        inD = Input(shape=(1, ) + env.observation_space.shape)
        out = actor(inD)

        return Model(inD, out)

    def build_critic(self, env):
        action_input = Input(shape=(self.nb_actions, ), name='action_input')
        observation_input = Input(shape=(1, ) + env.observation_space.shape,
                                  name='observation_input')
        flattened_observation = Flatten()(observation_input)
        x = Dense(64, activation='relu')(flattened_observation)
        x = Concatenate()([x, action_input])
        x = Dense(32, activation='relu')(x)
        x = Dense(1)(x)

        critic = Model(inputs=[action_input, observation_input], outputs=x)
        critic.summary()

        return critic, action_input

    def build_sym_actor(self):
        stateSwap = []
        actionSwap = []
        state_desc = self.env.get_state_desc()
        for x in state_desc.keys():
            keys = list(state_desc[x].keys())
            for (k, key) in enumerate(keys):
                if '_r' in key:
                    i = keys.index(key.replace('_r', '_l'))
                    if i != -1:
                        stateSwap += [(k, i), (i, k)]
        muscle_list = []
        for i in range(self.env.osim_model.muscleSet.getSize()):
            muscle_list.append(self.env.osim_model.muscleSet.get(i).getName())
        for (k, key) in enumerate(muscle_list):
            if '_r' in key:
                i = muscle_list.index(key.replace('_r', '_l'))
                if i != -1:
                    actionSwap += [(k, i), (i, k)]

        stateSwapMat = np.zeros((self.nb_states, self.nb_states))
        actionSwapMat = np.zeros((self.nb_actions, self.nb_actions))
        stateSwapMat[0, 0]
        for (i, j) in stateSwap:
            stateSwapMat[i, j] = 1
        for (i, j) in actionSwap:
            actionSwapMat[i, j] = 1

        def ssT(shape, dtype=None):
            if shape != stateSwapMat.shape:
                raise Exception("State Swap Tensor Shape Error")
            return K.variable(stateSwapMat, dtype=dtype)

        def asT(shape, dtype=None):
            if shape != actionSwapMat.shape:
                raise Exception("Action Swap Tensor Shape Error")
            return K.variable(actionSwapMat, dtype=dtype)

        model1 = Sequential()
        model1.add(
            Dense(self.nb_states,
                  input_shape=(1, ) + self.env.observation_space.shape,
                  trainable=False,
                  kernel_initializer=ssT,
                  bias_initializer='zeros'))
        inD = Input(shape=(1, ) + self.env.observation_space.shape)
        symState = model1(inD)
        symPol = self.actor(symState)
        model2 = Sequential()
        model2.add(
            Dense(self.nb_actions,
                  input_shape=(1, self.nb_actions),
                  trainable=False,
                  kernel_initializer=asT,
                  bias_initializer='zeros'))
        out = model2(symPol)

        return Model(inD, out)

    def fit(self, **kwargs):
        if 'nb_max_episode_steps' in kwargs.keys():
            self.env.spec.timestep_limit = kwargs['nb_max_episode_steps']
        else:
            self.env.spec.timestep_limit = self.env.time_limit
        out = self.agent.fit(self.env, **kwargs)
        print("\n\ndo symetric loss back propigation\n\n")
        states = np.random.normal(
            0, 10, (kwargs['nb_steps'] // 200, 1, self.nb_states))
        actions = self.actor.predict_on_batch(states)
        self.sym_actor.train_on_batch(states, actions)
        return out

    def test(self, **kwargs):
        print("testing")
        print("VA:", self.env.get_VA())
        if 'nb_max_episode_steps' in kwargs.keys():
            self.env.spec.timestep_limit = kwargs['nb_max_episode_steps']
        else:
            self.env.spec.timestep_limit = self.env.time_limit
        return self.agent.test(self.env, **kwargs)

    def test_get_steps(self, **kwargs):
        return self.test(**kwargs).history['nb_steps'][-1]

    def save_weights(self, filename='osim-rl/ddpg_{}_weights.h5f'):
        self.agent.save_weights(filename.format("opensim"), overwrite=True)
        self.save_processor()

    def load_weights(self, filename='osim-rl/ddpg_{}_weights.h5f'):
        self.agent.load_weights(filename.format("opensim"))
        self.load_processor()

    def search_VA(self):
        # 1-D line search
        state = self.env.get_VA()
        goal = 0.0
        if abs(state - goal) < 0.01:
            self.env.upd_VA(goal)
            return
        steps = self.test_get_steps(nb_episodes=1,
                                    visualize=False,
                                    nb_max_episode_steps=1000)
        dv = 0.0
        dsteps = steps
        while (state - dv > goal and dsteps > 0.8 * steps):
            dv += 0.02
            self.env.upd_VA(state - dv)
            dsteps = self.test_get_steps(nb_episodes=1,
                                         visualize=False,
                                         nb_max_episode_steps=1000)
        if abs((state - dv) - goal) < 0.01:
            self.env.upd_VA(goal)
        else:
            dv -= 0.02
            self.env.upd_VA(state - dv)

    def save_processor(self):
        np.savez('osim-rl/processor.npz',
                 _sum=self.processor.normalizer._sum,
                 _count=np.array([self.processor.normalizer._count]),
                 _sumsq=self.processor.normalizer._sumsq,
                 mean=self.processor.normalizer.mean,
                 std=self.processor.normalizer.std)

    def load_processor(self):
        f = np.load('osim-rl/processor.npz')
        dtype = f['_sum'].dtype
        if (self.processor.normalizer == None):
            self.processor.normalizer = WhiteningNormalizer(
                shape=(1, ) + self.env.observation_space.shape, dtype=dtype)
        self.processor.normalizer._sum = f['_sum']
        self.processor.normalizer._count = int(f['_count'][0])
        self.processor.normalizer._sumsq = f['_sumsq']
        self.processor.normalizer.mean = f['mean']
        self.processor.normalizer.std = f['std']
コード例 #29
0
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

try:
    agent.load_weights('ddpg_{}_nomad_v3_weights.h5f'.format(ENV_NAME))
except (OSError, IOError):
    logger.warning("File not found")

n = 0
while True:
    n += 1
    logger.info('Iteration #{}'.format(n))

    #train
    train_history = agent.fit(env,
                              nb_steps=nb_stepis,
                              visualize=False,
                              verbose=1,
                              nb_max_episode_steps=nb_stepis)

    # After training is done, we save the final weights.
    agent.save_weights('ddpg_{}_nomad_v3_weights.h5f'.format(ENV_NAME),
                       overwrite=True)

    # Save memory
    pickle.dump(memory, open("memory2.pkl", "wb"))

    # Finally, evaluate our algorithm for nb_episodes episodes.
    test_history = agent.test(env,
                              nb_episodes=nb_episodes,
                              visualize=False,
                              nb_max_episode_steps=nb_stepis)
コード例 #30
0
ファイル: example.py プロジェクト: csy888000/osim-rl-csy
                  random_process=random_process,
                  gamma=.99,
                  target_model_update=1e-3,
                  delta_clip=1.)
# agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model,
#                            memory=memory, nb_steps_warmup=1000, random_process=random_process,
#                            gamma=.99, target_model_update=0.1)
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
if args.train:
    agent.fit(env,
              nb_steps=nallsteps,
              visualize=False,
              verbose=1,
              nb_max_episode_steps=env.timestep_limit,
              log_interval=10000)
    # After training is done, we save the final weights.
    agent.save_weights(args.model, overwrite=True)

# If TEST and TOKEN, submit to crowdAI
if not args.train and args.token:
    agent.load_weights(args.model)
    # Settings
    remote_base = 'http://grader.crowdai.org:1729'
    client = Client(remote_base)

    # Create environment
    observation = client.env_create(args.token)
コード例 #31
0
ファイル: DDPG.py プロジェクト: pheredia10/CryptoTrading
                  critic_action_input=action_input,
                  memory=memory,
                  nb_steps_warmup_critic=100,
                  nb_steps_warmup_actor=100,
                  random_process=random_process,
                  gamma=.8,
                  target_model_update=1e-3)
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
nb_steps = 800 * 1440  #1*(env.periods-2)# 100*(env.periods-2) #100000+1870#env.periods-2
agent.fit(env,
          nb_steps,
          visualize=True,
          verbose=2,
          nb_max_episode_steps=1440,
          log_interval=10)
plt.figure(0)
plt.plot(env.portfolio_value)
plt.figure(1)
noise_over_action_array = np.array(agent.noise_over_action)
noise_over_action_array = np.transpose(noise_over_action_array)
for i in range(nb_actions):
    plt.plot(noise_over_action_array[i, :])
plt.show()
# After training is done, we save the final weights.
agent.save_weights('ddpg_{}_weights5.h5f'.format('Crypto'), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)
コード例 #32
0
ファイル: train.arm.py プロジェクト: wiplug/osim-rl
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)
print(critic.summary())

# Set up the agent for training
memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.2, size=env.noutput)
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                  random_process=random_process, gamma=.99, target_model_update=1e-3,
                  delta_clip=1.)
# agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model,
#                            memory=memory, nb_steps_warmup=1000, random_process=random_process,
#                            gamma=.99, target_model_update=0.1)
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
if args.train:
    agent.fit(env, nb_steps=nallsteps, visualize=False, verbose=1, nb_max_episode_steps=200, log_interval=10000)
    # After training is done, we save the final weights.
    agent.save_weights(args.model, overwrite=True)

if not args.train:
    agent.load_weights(args.model)
    # Finally, evaluate our algorithm for 1 episode.
    agent.test(env, nb_episodes=5, visualize=False, nb_max_episode_steps=1000)
コード例 #33
0
                  do_PER=args.PER, epsilon=1e-4, processor=MujocoProcessor(), pretanh_weight=args.pretanh_weight)
agent.compile([Adam(lr=args.actor_lr, clipnorm=args.actor_gradient_clip), Adam(lr=args.critic_lr, clipnorm=args.critic_gradient_clip)], metrics=['mae'])

if(args.HER==True and args.PER==False):
	print("\nTraining with Hindsight Experience Replay\n")
	save_data_path_local = 'HER/'+args.ENV_NAME+'.json'
elif(args.HER==False and args.PER==True):
	print("\nTraining with Prioritised Experience Replay\n")
	save_data_path_local = 'PER/'+args.ENV_NAME+'.json'
elif(args.HER==True and args.PER==True):
	print("\nTraining with Prioritised Hindsight Experience Replay\n")
	save_data_path_local = 'PHER/'+args.ENV_NAME+'.json'

if(args.train):
	""" Start Training (You can always safely abort the training prematurely using Ctrl + C, *once* ) """
	agent.fit(env, nb_steps=args.nb_train_steps, visualize=False, verbose=1, save_data_path=save_data_path_local, file_interval=args.file_interval, nb_max_episode_steps=args.max_step_episode)

# After training is done, we save the final weights and plot the training graph.
try:
	if(args.HER==True and args.PER==False):
		if(args.train):
			agent.save_weights('HER/ddpg_{}_weights.h5f'.format(args.ENV_NAME), overwrite=True)
		plot_af(file_path='HER/'+args.ENV_NAME+'.json',save_file_name='HER/'+args.ENV_NAME,plot_what='success')
		plot_af(file_path='HER/'+args.ENV_NAME+'.json',save_file_name='HER/'+args.ENV_NAME,plot_what='loss')
	elif(args.HER==False and args.PER==True):
		if(args.train):
			agent.save_weights('PER/ddpg_{}_weights.h5f'.format(args.ENV_NAME), overwrite=True)
		plot_af(file_path='PER/'+args.ENV_NAME+'.json',save_file_name='PER/'+args.ENV_NAME,plot_what='success')
		plot_af(file_path='PER/'+args.ENV_NAME+'.json',save_file_name='PER/'+args.ENV_NAME,plot_what='loss')
	elif(args.HER==True and args.PER==True):
		if(args.train):
コード例 #34
0
ファイル: train.ddpg.py プロジェクト: wiplug/osim-rl
random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.2, size=env.noutput)
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                  random_process=random_process, gamma=.99, target_model_update=1e-3,
                  delta_range=(-100., 100.))
# agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model,
#                            memory=memory, nb_steps_warmup=1000, random_process=random_process,
#                            gamma=.99, target_model_update=0.1)
#agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
agent.compile([RMSprop(lr=.001), RMSprop(lr=.001)], metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
if args.train:
    agent.fit(env, nb_steps=nallsteps, visualize=True, verbose=1, nb_max_episode_steps=env.timestep_limit, log_interval=10000)
    # After training is done, we save the final weights.
    agent.save_weights(args.output, overwrite=True)

if not args.train:
    agent.load_weights(args.output)
    # Finally, evaluate our algorithm for 5 episodes.

    if args.env != "Arm":
        agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=500)
    else:
        for i in range(10000):
            if i % 300 == 0:
                env.new_target()
                print("Target shoulder = %f, elbow = %f" % (env.shoulder,env.elbow)) 
            
コード例 #35
0
                  actor=actor,
                  critic=critic,
                  critic_action_input=action_input,
                  memory=memory,
                  nb_steps_warmup_critic=1000,
                  nb_steps_warmup_actor=1000,
                  random_process=random_process,
                  gamma=.99,
                  target_model_update=1e-3)
agent.compile(Adam(lr=.0001, clipnorm=1.), metrics=['mae'])

if (args.action == 'test'):
    agent.test(env,
               nb_episodes=1000,
               verbose=2,
               visualize=False,
               nb_max_episode_steps=300)

tbCallback = TensorBoard(log_dir='./Graph/',
                         write_grads=True,
                         write_graph=True,
                         histogram_freq=0)
ckptCallback = ModelIntervalCheckpoint(filepath='./CheckPoints/',
                                       interval=1000)
agent.fit(env,
          nb_steps=250000,
          visualize=False,
          verbose=2,
          nb_max_episode_steps=300,
          callbacks=[tbCallback, ckptCallback])
# Optionally, we can reload a previous model's weights and continue training from there
# Remove the _actor or _critic from the filename. The load method automatically
# appends these.        
WEIGHTS_FILENAME = 'weights/ddpg_planar_crane_continuous-v0_weights.h5f'
# agent.load_weights(WEIGHTS_FILENAME)


callbacks = []
checkpoint_weights_filename = 'weights/ddpg_{}_checkpointWeights_{{step}}_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
log_filename = 'logs/ddpg_{}_log_{}_{}_{}_{}.json'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
#callbacks += [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=100000)]
callbacks += [FileLogger(log_filename, interval=100)]

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
agent.fit(env, nb_steps=NUM_STEPS, callbacks=callbacks, visualize=False, verbose=1)#, nb_max_episode_steps=500)

# After training is done, we save the final weights.
filename = 'weights/ddpg_{}_weights_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
agent.save_weights(filename, overwrite=True)

# We'll also save a simply named version to make running test immediately
# following training easier. 
filename = 'weights/ddpg_{}_weights.h5f'.format(ENV_NAME)
agent.save_weights(filename, overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
agent.test(env, visualize=True) #nb_max_episode_steps=500,
コード例 #37
0
ファイル: ddpg_mujoco.py プロジェクト: navigator8972/keras-rl
x = Dense(400)(flattened_observation)
x = Activation('relu')(x)
x = Concatenate()([x, action_input])
x = Dense(300)(x)
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)
print(critic.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.1)
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=1000, nb_steps_warmup_actor=1000,
                  random_process=random_process, gamma=.99, target_model_update=1e-3,
                  processor=MujocoProcessor())
agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
agent.fit(env, nb_steps=1000000, visualize=False, verbose=1)

# After training is done, we save the final weights.
agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)