Esempio n. 1
0
def test_gail(expert_env):
    env_id, expert_path = expert_env
    env = gym.make(env_id)
    dataset = ExpertDataset(expert_path=expert_path,
                            traj_limitation=10,
                            sequential_preprocessing=True)

    # Note: train for 1M steps to have a working policy
    model = GAIL('MlpPolicy',
                 env,
                 adversary_entcoeff=0.0,
                 lam=0.92,
                 max_kl=0.001,
                 expert_dataset=dataset,
                 hidden_size_adversary=64,
                 verbose=0)

    model.learn(1000)
    model.save("GAIL-{}".format(env_id))
    model = model.load("GAIL-{}".format(env_id), env=env)
    model.learn(1000)

    obs = env.reset()

    for _ in range(1000):
        action, _ = model.predict(obs)
        obs, _, done, _ = env.step(action)
        if done:
            obs = env.reset()
    del dataset, model
Esempio n. 2
0
def test_gail(tmp_path, expert_env):
    env_id, expert_path, load_from_memory = expert_env
    env = gym.make(env_id)

    traj_data = None
    if load_from_memory:
        traj_data = np.load(expert_path)
        expert_path = None
    dataset = ExpertDataset(traj_data=traj_data,
                            expert_path=expert_path,
                            traj_limitation=10,
                            sequential_preprocessing=True)

    # Note: train for 1M steps to have a working policy
    model = GAIL('MlpPolicy',
                 env,
                 adversary_entcoeff=0.0,
                 lam=0.92,
                 max_kl=0.001,
                 expert_dataset=dataset,
                 hidden_size_adversary=64,
                 verbose=0)

    model.learn(300)
    model.save(str(tmp_path / "GAIL-{}".format(env_id)))
    model = model.load(str(tmp_path / "GAIL-{}".format(env_id)), env=env)
    model.learn(300)

    evaluate_policy(model, env, n_eval_episodes=5)
    del dataset, model
Esempio n. 3
0
def load_bc_model_from_path(model_name):
    # NOTE: The lowest loss and highest accuracy models
    # were also saved, can be found in the same dir with
    # special suffixes.
    bc_metadata = load_pickle(BC_SAVE_DIR + model_name + "/bc_metadata")
    bc_params = bc_metadata["bc_params"]
    model = GAIL.load(BC_SAVE_DIR + model_name + "/model")
    return model, bc_params
Esempio n. 4
0
def load_model(path: str, algorithm: str):
    from stable_baselines import PPO2, DQN, A2C, ACER, GAIL, TRPO
    if algorithm == 'PPO2':
        return PPO2.load(path)
    if algorithm == 'DQN':
        return DQN.load(path)
    if algorithm == 'A2C':
        return A2C.load(path)
    if algorithm == 'ACER':
        return ACER.load(path)
    if algorithm == 'GAIL':
        return GAIL.load(path)
    if algorithm == 'TRPO':
        return TRPO.load(path)
    return None
Esempio n. 5
0
def eval_with_standard_baselines(n_games, model_name, display=False):
    """Method to evaluate agent performance with stable-baselines infrastructure,
    just to make sure everything is compatible and integrating correctly."""
    bc_metadata = load_pickle(BC_SAVE_DIR + model_name + "/bc_metadata")
    bc_params = bc_metadata["bc_params"]
    model = GAIL.load(BC_SAVE_DIR + model_name + "/model")

    gym_env = init_gym_env(bc_params)

    tot_rew = 0
    for i in tqdm.trange(n_games):
        obs, _ = gym_env.reset()
        done = False
        while not done:
            ob0, ob1 = obs
            a0 = stable_baselines_predict_fn(model, ob0)
            a1 = stable_baselines_predict_fn(model, ob1)
            joint_action = (a0, a1)
            (obs, _), rewards, done, info = gym_env.step(joint_action)
            tot_rew += rewards

    print("avg reward", tot_rew / n_games)
    return tot_rew / n_games
Esempio n. 6
0
def trian_agent_with_gail(load):
    from stable_baselines.common.policies import MlpPolicy
    from stable_baselines import GAIL

    env = gym.make("F16GCAS-v0")

    class CustomPolicy(MlpPolicy):
        def __init__(self, *args, **kwargs):
            super(CustomPolicy, self).__init__(*args, **kwargs,
                                               layers=[128, 128])
    if not load:
        ExpData = ExpertDataset("./lqr_export.npz")
        model = GAIL(CustomPolicy, env, ExpData, verbose=1)
        model.learn(total_timesteps=1000000)
        model.save(ROOT+"/trained_models/TDRL/f16/gail/128_128")
    else:
        # with model.graph.as_default():
        #     for i in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='model/pi/'):
        #         print(i)
        model = GAIL.load(ROOT+"/trained_models/TDRL/f16/gail/128_128", env=env)
        with model.graph.as_default():
            print(tf.all_variables())

    return model
Esempio n. 7
0
                    type=int,
                    help='Number of games to test.')
parser.add_argument('-s', '--save', default=True, type=bool)

args = parser.parse_args()

sys.path.append('/Users/cusgadmin/Documents/UCB/Academics/SSastry/\
    Multi_agent_competition')
os.chdir(
    '/Users/cusgadmin/Documents/UCB/Academics/SSastry/Multi_agent_competition/'
)

print(colored('Testing learnt policy from model file {} for {} games!'.\
  format(args.model,args.num_test),'red'))
start_time = time.time()
model = GAIL.load(args.model)
env = gym.make('gym_pursuitevasion_small:pursuitevasion_small-v0')
g = 1
obs = env.reset(ep=g)
e_win_games = int(0)
env.render(mode='human', highlight=True, ep=g)
if args.save:
    metadata = dict(title='Game')
    writer = FFMpegWriter(fps=5, metadata=metadata)
    writer.setup(env.window.fig, "test_game.mp4", 300)
    writer.grab_frame()
while True:
    action, _states = model.predict(obs)
    obs, rewards, done, e_win = env.step(action)
    env.render(mode='human', highlight=True, ep=g)
    if args.save:
Esempio n. 8
0
import gym

from stable_baselines import GAIL, SAC
from stable_baselines.gail import ExpertDataset, generate_expert_traj

# Generate expert trajectories (train expert)
model = SAC('MlpPolicy', 'Pendulum-v0', verbose=1)
generate_expert_traj(model, 'expert_pendulum', n_timesteps=100, n_episodes=10)

# Load the expert dataset
dataset = ExpertDataset(expert_path='expert_pendulum.npz', traj_limitation=10, verbose=1)

model = GAIL('MlpPolicy', 'Pendulum-v0', dataset, verbose=1)
# Note: in practice, you need to train for 1M steps to have a working policy
model.learn(total_timesteps=100000)
model.save("gail_pendulum")

del model # remove to demonstrate saving and loading

model = GAIL.load("gail_pendulum")

env = gym.make('Pendulum-v0')
obs = env.reset()
while True:
  action, _states = model.predict(obs)
  obs, rewards, dones, info = env.step(action)
  env.render()