Beispiel #1
0
def learn(policy,
          env,
          expert,
          seed,
          total_timesteps=int(40e6),
          gamma=0.99,
          lam=0.95,
          log_interval=1,
          nprocs=32,
          nsteps=20,
          nstack=1,
          ent_coef=0.01,
          vf_coef=0.5,
          vf_fisher_coef=1.0,
          lr=0.05,
          max_grad_norm=0.5,
          kfac_clip=0.001,
          save_interval=100,
          lrschedule='linear'):
    tf.reset_default_graph()
    set_global_seeds(seed)
    nenvs = env.num_envs
    ob_space = env.observation_space
    ac_space = env.action_space
    make_model = lambda: Model(policy,
                               ob_space,
                               ac_space,
                               nenvs,
                               total_timesteps,
                               nprocs=nprocs,
                               nsteps=1024,
                               nstack=nstack,
                               ent_coef=ent_coef,
                               vf_coef=vf_coef,
                               vf_fisher_coef=vf_fisher_coef,
                               lr=lr,
                               max_grad_norm=max_grad_norm,
                               kfac_clip=kfac_clip,
                               lrschedule=lrschedule)
    if save_interval and logger.get_dir():
        import cloudpickle
        with open(osp.join(logger.get_dir(), 'make_model.pkl'), 'wb') as fh:
            fh.write(cloudpickle.dumps(make_model))
    model = make_model()

    for _ in range(10000):
        e_obs, e_actions, _, _ = expert.get_next_batch(1024)
        e_a = [np.argmax(e_actions[k], axis=1) for k in range(len(e_actions))]
        lld_loss = model.clone(e_obs, e_a)
        print(lld_loss)
Beispiel #2
0
 def create_env():
     env = make_env.make_env(env_id)
     env.seed(10)
     # env = bench.Monitor(env, '/tmp/',  allow_early_resets=True)
     set_global_seeds(10)
     return env
Beispiel #3
0
 def create_env():
     env = make_env.make_env('simple_spread')
     env.seed(3)
     env = bench.Monitor(env, '/tmp/', allow_early_resets=True)
     set_global_seeds(3)
     return env
Beispiel #4
0
lower_nb_actions = 2
TIME_STEPS = 10
TBD_total = 56
TBD_left = 38
TBD_straight = 56
TBD_right = 38
LSTM_HIDDEN = 128
ENCODE_LSTM_HIDDEN = 64
# Get the environment and extract the number of actions.
curr_path = os.path.dirname(__file__)
env = EndtoendEnv(setting_path=curr_path +
                  '/LasVSim/Scenario/Highway_endtoend/',
                  plan_horizon=30,
                  history_len=TIME_STEPS)
env = ObservationWrapper(env)
set_global_seeds(11)  # set of seeds: [42, 0, 1, 2]
env.seed(11)
# nb_actions = env.action_space.n


def build_models():
    # build upper model.
    upper_model = Sequential(name='upper_model')
    upper_model.add(layers.LSTM(
        128,
        input_shape=(TIME_STEPS,
                     TBD_total)))  # A 3D tensor [batch, timesteps, inputdim]
    upper_model.add(layers.Dense(upper_nb_actions, activation='relu'))

    # build lower actor shared part----------------------------------------------
    actor_lstm_model = Sequential(name='shared_actor_lstm_model')
Beispiel #5
0
def learn(policy,
          env,
          expert,
          seed,
          total_timesteps=int(40e6),
          gamma=0.99,
          lam=0.95,
          log_interval=1,
          nprocs=4,
          nsteps=20,
          nstack=1,
          ent_coef=0.01,
          vf_coef=0.5,
          vf_fisher_coef=1.0,
          lr=0.05,
          max_grad_norm=0.5,
          kfac_clip=0.001,
          save_interval=1000,
          lrschedule='linear',
          batch_size=1024):
    tf.reset_default_graph()
    set_global_seeds(seed)
    nenvs = env.num_envs
    ob_space = env.observation_space
    ac_space = env.action_space
    make_model = lambda: Model(policy,
                               ob_space,
                               ac_space,
                               nenvs,
                               total_timesteps,
                               nprocs=nprocs,
                               nsteps=batch_size,
                               nstack=nstack,
                               ent_coef=ent_coef,
                               vf_coef=vf_coef,
                               vf_fisher_coef=vf_fisher_coef,
                               lr=lr,
                               max_grad_norm=max_grad_norm,
                               kfac_clip=kfac_clip,
                               lrschedule=lrschedule)
    if save_interval and logger.get_dir():
        import cloudpickle
        with open(osp.join(logger.get_dir(), 'make_model.pkl'), 'wb') as fh:
            fh.write(cloudpickle.dumps(make_model))
    model = make_model()

    tstart = time.time()
    coord = tf.train.Coordinator()
    # enqueue_threads = [q_runner.create_threads(model.sess, coord=coord, start=True) for q_runner in model.q_runner]
    print("-------------------------------")
    print(total_timesteps // batch_size + 1)
    print("-------------------------------")

    for update in range(total_timesteps // batch_size + 1):
        e_obs, e_actions, _, _ = expert.get_next_batch(batch_size)
        e_a = [np.argmax(e_actions[k], axis=1) for k in range(len(e_actions))]
        nseconds = time.time() - tstart
        fps = int((update * batch_size) / nseconds)

        lld_loss = model.clone(e_obs, e_a)[0]
        # print(lld_loss)
        if update % log_interval == 0 or update == 1:
            logger.record_tabular("nupdates", update)
            logger.record_tabular("total_timesteps", update * batch_size)
            logger.record_tabular("fps", fps)
            for k in range(model.num_agents):
                logger.record_tabular("lld_loss %d" % k, float(lld_loss[k]))
            logger.dump_tabular()

        if save_interval and (update % save_interval == 0
                              or update == 1) and logger.get_dir():
            savepath = osp.join(logger.get_dir(), 'checkpoint%.5i' % update)
            print('Saving to', savepath)
            model.save(savepath)

    coord.request_stop()