Beispiel #1
0
def test_generate_vec_env_non_image_observation():
    env = DummyVecEnv([lambda: gym.make('CartPole-v1')] * 2)

    model = PPO2('MlpPolicy', env)
    model.learn(total_timesteps=300)

    generate_expert_traj(model, save_path='.', n_timesteps=0, n_episodes=5)
def main(args):
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = args.device
    env = gym.make(args.env)
    train_log_dir = os.path.join(
        args.train_log_dir,
        args.env + '_' + args.expert + '_' + args.policy_type)
    if args.expert == 'PPO':
        expert_model = PPO1(args.policy_type,
                            env,
                            verbose=1,
                            tensorboard_log=train_log_dir)
    else:
        raise NotImplementedError
    expert_model.learn(total_timesteps=args.expert_training_step)
    generate_expert_traj(expert_model,
                         os.path.join(train_log_dir, 'expert_traj'),
                         n_timesteps=1000,
                         n_episodes=args.expert_episodes)

    dataset = ExpertDataset(expert_path=os.path.join(train_log_dir,
                                                     'expert_traj.npz'),
                            traj_limitation=-1)
    gail_model = GAIL(args.policy_type,
                      env,
                      dataset,
                      verbose=1,
                      tensorboard_log=train_log_dir)
    gail_model.learn(args.student_training_step)
    evaluate(gail_model, env, num_steps=10000)
    gail_model.save(train_log_dir)
    env.close()
def train():

    # Load Model

    env = gym.make('roundabout-v0')

    model = DQN(MlpPolicy, env, verbose=1)
    generate_expert_traj(model, 'expert_roundabout', n_timesteps=1000, n_episodes=10)

    #Data Augmentation
    expert_data = dict(np.load('expert_roundabout.npz'))
    print("my keys are:" + str(expert_data.keys()))
    obs = expert_data['obs']
    obs.shape
    expert_data['obs'] = obs.ravel()  # convert to 1D array
    print("my keys are:" + str(expert_data.keys()))
    np.savez('expert_roundabout.npz', expert_data)

    dataset = ExpertDataset(expert_path='expert_roundabout.npz', traj_limitation=10, verbose=1)
    model = GAIL('MlpPolicy', env, dataset, verbose=1)
    model.learn(total_timesteps=1000)
    model.save("gail_roundabout")

    env.close()
    del env
Beispiel #4
0
def train(env, implemented_combos, model_logdir, arg_dict, pretrained_model=None):
    model_name = arg_dict["algo"] + '_' + str(arg_dict["steps"])
    conf_pth   = os.path.join(model_logdir, "train.json")
    model_path = os.path.join(model_logdir, "best_model.zip")
    arg_dict["model_path"] = model_path
    with open(conf_pth, "w") as f:
        json.dump(arg_dict, f, indent=4)

    model_args = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][1]
    model_kwargs = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][2]
    if pretrained_model:
        if not os.path.isabs(pretrained_model):
            pretrained_model = pkg_resources.resource_filename("myGym", pretrained_model)
        env = model_args[1]
        vec_env = DummyVecEnv([lambda: env])
        model = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][0].load(pretrained_model, vec_env)
    else:
        model = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][0](*model_args, **model_kwargs)

    if arg_dict["algo"] == "gail":
        # Multi processing: (using MPI)
        if arg_dict["train_framework"] == 'tensorflow':
            # Generate expert trajectories (train expert)
            generate_expert_traj(model, model_name, n_timesteps=3000, n_episodes=100)
            # Load the expert dataset
            dataset = ExpertDataset(expert_path=model_name+'.npz', traj_limitation=10, verbose=1)
            model = GAIL_T('MlpPolicy', model_name, dataset, verbose=1)
            # Note: in practice, you need to train for 1M steps to have a working policy

    start_time = time.time()
    callbacks_list = []
    if pretrained_model:
        model_logdir = pretrained_model.split('/')
        model_logdir = model_logdir[:-1]
        model_logdir = "/".join(model_logdir)
        auto_save_callback = SaveOnBestTrainingRewardCallback(check_freq=1024, logdir=model_logdir, env=env, engine=arg_dict["engine"], multiprocessing=arg_dict["multiprocessing"])
    else:
        auto_save_callback = SaveOnBestTrainingRewardCallback(check_freq=1024, logdir=model_logdir, env=env, engine=arg_dict["engine"], multiprocessing=arg_dict["multiprocessing"])
    callbacks_list.append(auto_save_callback)
    if arg_dict["eval_freq"]:
        eval_env = configure_env(arg_dict, model_logdir, for_train=False)
        eval_callback = CustomEvalCallback(eval_env, log_path=model_logdir,
                                           eval_freq=arg_dict["eval_freq"],
                                           n_eval_episodes=arg_dict["eval_episodes"],
                                           record=arg_dict["record"],
                                           camera_id=arg_dict["camera"])
        callbacks_list.append(eval_callback)
    #callbacks_list.append(PlottingCallback(model_logdir))
    with ProgressBarManager(total_timesteps=arg_dict["steps"]) as progress_callback:
        callbacks_list.append(progress_callback)
        model.learn(total_timesteps=arg_dict["steps"], callback=callbacks_list)
    model.save(os.path.join(model_logdir, model_name))
    print("Training time: {:.2f} s".format(time.time() - start_time))

    # info_keywords in monitor class above is neccessary for pybullet to save_results
    # when using the info_keywords for mujoco we get an error
    if arg_dict["engine"] == "pybullet":
        save_results(arg_dict, model_name, env, model_logdir)
    return model
Beispiel #5
0
 def lecture(self):
     teacher = DummyExpert()
     #teacher = NormalizeActionWrapper(teacher)
     print("Let me show you how it's done.")
     generate_expert_traj(teacher.teach,
                          'dummy_expert_rocket',
                          self.env,
                          n_episodes=10)
def test_generate_callable(tmp_path):
    """
    Test generating expert trajectories with a callable.
    """
    env = gym.make("CartPole-v1")
    # Here the expert is a random agent
    def dummy_expert(_obs):
        return env.action_space.sample()
    generate_expert_traj(dummy_expert, tmp_path / 'dummy_expert_cartpole', env, n_timesteps=0, n_episodes=10)
def train(params):

    # create model
    env = FlattenObservation(gym.make(params.get("environment")))
    exp_name = params.get("model_name") + "_train_" + params.get("environment")
    log_dir = './logs/' + exp_name
    expert_name = 'expert_{0}'.format(exp_name)

    if params.get("model_name") == 'TRPO':
        print("Loading TRPO Model")
        model = TRPO(MlpPolicy, env, verbose=1, tensorboard_log=log_dir)
        model.learn(total_timesteps=params.get("train_steps"))
        model.save(exp_name)

    if params.get("model_name") == 'PPO':
        print("Loading PPO Model")
        model = PPO1(MlpPolicy,
                     env,
                     verbose=1,
                     tensorboard_log=log_dir,
                     entcoeff=params.get("ent_coef"),
                     gamma=params.get("gamma"),
                     optim_batchsize=params.get("batch_size"),
                     clip_param=params.get("clip_range"),
                     lam=params.get("gae_lambda"))
        model.learn(total_timesteps=params.get("train_steps"))
        model.save(exp_name)

    if params.get("expert_exists") is False:
        print("Training expert trajectories")
        # Train expert controller (if needed) and record expert trajectories.
        generate_expert_traj(model,
                             expert_name,
                             n_timesteps=params.get("expert_timesteps"),
                             n_episodes=params.get("n_episodes"))

    dataset = ExpertDataset(
        expert_path='{0}.npz'.format(expert_name),
        traj_limitation=-1,
        randomize=True,  # if the dataset should be shuffled
        verbose=1)

    model = GAIL('MlpPolicy', env, dataset, verbose=1,
                 tensorboard_log=log_dir)  # Check out for defaults

    if params.get("pre_train") is True:
        print("Pretraining Dataset with Behavioural Cloning")
        model.pretrain(dataset, n_epochs=10000)

    print("Executing GAIL Learning")
    model.learn(total_timesteps=params.get("train_steps"))
    model.save("BC" + exp_name)

    env.close()
    del env
Beispiel #8
0
def generate_obs(environment, record_path, n_episodes):
    global env, model
    env = environment

    print('Starting record...')
    # model = get_existing_model(os.path.join('models', 'Self6hr_human50_self114hr'))
    # generate_expert_traj(acer_expert, record_path, env, n_episodes=n_episodes)
    generate_expert_traj(human_expert, record_path, env, n_episodes=n_episodes)
    print(
        f'Recording of {n_episodes} episodes complete. Saved file to {record_path}.npz'
    )
def test_generate(generate_env):
    model, policy, env_name, n_env, n_episodes = generate_env

    if n_env > 1:
        env = make_atari_env(env_name, num_env=n_env, seed=0)
        model = model(policy, env, verbose=0)
    else:
        model = model(policy, env_name, verbose=0)

    generate_expert_traj(model,
                         'expert',
                         n_timesteps=1000,
                         n_episodes=n_episodes,
                         image_folder='test_recorded_images')
def test_pretrain_images(tmp_path):
    env = make_atari_env("PongNoFrameskip-v4", num_env=1, seed=0)
    env = VecFrameStack(env, n_stack=4)
    model = PPO2('CnnPolicy', env)
    generate_expert_traj(model, str(tmp_path / 'expert_pong'), n_timesteps=0, n_episodes=1,
                         image_folder=str(tmp_path / 'pretrain_recorded_images'))

    expert_path = str(tmp_path / 'expert_pong.npz')
    dataset = ExpertDataset(expert_path=expert_path, traj_limitation=1, batch_size=32,
                            sequential_preprocessing=True)
    model.pretrain(dataset, n_epochs=2)

    shutil.rmtree(str(tmp_path / 'pretrain_recorded_images'))
    env.close()
    del dataset, model, env
Beispiel #11
0
def get_expert_dataset(
    expert,
    venv,
    total_timesteps,
):
    filename = f"/tmp/{uuid.uuid4()}"
    n_episodes = total_timesteps // get_horizon(venv)

    generate_expert_traj(expert,
                         save_path=filename,
                         env=venv,
                         n_episodes=n_episodes)
    dataset = ExpertDataset(expert_path=f"{filename}.npz", verbose=0)

    return dataset
Beispiel #12
0
def train_gail_withppo2():
    env = gimbal(5, 500)
    env = DummyVecEnv([lambda: env])
    model = PPO2.load("./models/baseline_ppo2_t1")
    generate_expert_traj(model,
                         './models/baseline_expert_t1',
                         env,
                         n_timesteps=0,
                         n_episodes=100)
    dataset = ExpertDataset(expert_path='./models/baseline_expert_t1.npz',
                            traj_limitation=-1,
                            verbose=1)
    model = GAIL("MlpPolicy", env, dataset, verbose=1)
    model.learn(total_timesteps=500000)
    model.save("./models/baseline_gail_ppo2_t1")
Beispiel #13
0
def main(args):
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = args.device

    # train expert model for multiple times and save the best model
    best_reward = -np.inf
    train_env = make_vec_env(args.env, n_envs=args.n_env)
    eval_env = gym.make(args.env)

    for i in range(args.times_expert):
        train_env.reset()
        train_log_dir = os.path.join(args.train_log_dir,
                                     args.env + '_' + args.expert)
        if args.expert == 'PPO':
            expert_model = PPO2(args.policy_type, env=train_env, n_steps=args.n_steps, nminibatches=args.nminibatches, noptepochs=args.noptepochs, ent_coef=args.ent_coef,\
                            lam=args.lam, gamma=args.gamma, cliprange=args.cliprange, learning_rate=args.learning_rate, verbose=1, tensorboard_log=train_log_dir)
        else:
            raise NotImplementedError
        expert_model.learn(total_timesteps=args.expert_training_step)
        mean_reward = evaluate(expert_model, eval_env, num_steps=10000)
        if mean_reward > best_reward:
            best_reward = mean_reward
            expert_model.save(
                os.path.join(args.train_log_dir, args.env + '_expert'))
        del expert_model
    train_env.reset()
    expert_model = PPO2.load(os.path.join(args.train_log_dir,
                                          args.env + '_expert'),
                             env=train_env)
    generate_expert_traj(expert_model,
                         os.path.join(train_log_dir, 'expert_traj'),
                         n_timesteps=-1,
                         n_episodes=args.expert_episodes)
    train_env.close()

    dataset = ExpertDataset(expert_path=os.path.join(train_log_dir,
                                                     'expert_traj.npz'),
                            traj_limitation=-1)
    gail_model = GAIL(args.policy_type,
                      args.env,
                      dataset,
                      verbose=1,
                      tensorboard_log=train_log_dir)
    gail_model.learn(args.student_training_step)

    evaluate(gail_model, eval_env, num_steps=10000)
    gail_model.save(os.path.join(args.train_log_dir, args.env + '_GAIL'))
    eval_env.close()
def test_generate(generate_env):
    model, policy, env_name, n_env, n_episodes = generate_env

    if n_env > 1:
        env = make_atari_env(env_name, num_env=n_env, seed=0)
        model = model(policy, env, verbose=0)
    else:
        model = model(policy, env_name, verbose=0)

    dataset = generate_expert_traj(model,
                                   'expert',
                                   n_timesteps=1000,
                                   n_episodes=n_episodes,
                                   image_folder='test_recorded_images')

    assert set(dataset.keys()).issuperset(
        ['actions', 'obs', 'rewards', 'episode_returns', 'episode_starts'])
    assert sum(dataset['episode_starts']) == n_episodes
    assert len(dataset['episode_returns']) == n_episodes
    n_timesteps = len(dataset['episode_starts'])
    for key, val in dataset.items():
        if key != 'episode_returns':
            assert val.shape[
                0] == n_timesteps, "inconsistent number of timesteps at '{}'".format(
                    key)

    dataset_loaded = np.load('expert.npz')
    assert dataset.keys() == dataset_loaded.keys()
    for key in dataset.keys():
        assert (dataset[key] == dataset_loaded[key]
                ).all(), "different data at '{}'".format(key)
Beispiel #15
0
def test_generate(tmp_path, generate_env):
    model, policy, env_name, n_env, n_episodes = generate_env

    if n_env > 1:
        env = make_atari_env(env_name, num_env=n_env, seed=0)
        model = model(policy, env, verbose=0)
    else:
        model = model(policy, env_name, verbose=0)

    dataset = generate_expert_traj(model,
                                   str(tmp_path / 'expert'),
                                   n_timesteps=300,
                                   n_episodes=n_episodes,
                                   image_folder=str(tmp_path /
                                                    'test_recorded_images'))

    assert set(dataset.keys()).issuperset(
        ['actions', 'obs', 'rewards', 'episode_returns', 'episode_starts'])
    assert sum(dataset['episode_starts']) == n_episodes
    assert len(dataset['episode_returns']) == n_episodes
    n_timesteps = len(dataset['episode_starts'])
    for key, val in dataset.items():
        if key != 'episode_returns':
            assert val.shape[
                0] == n_timesteps, "inconsistent number of timesteps at '{}'".format(
                    key)

    dataset_loaded = np.load(str(tmp_path / 'expert.npz'), allow_pickle=True)
    assert dataset.keys() == dataset_loaded.keys()
    for key in dataset.keys():
        assert (dataset[key] == dataset_loaded[key]
                ).all(), "different data at '{}'".format(key)
    # Cleanup folder
    if os.path.isdir(str(tmp_path / 'test_recorded_images')):
        shutil.rmtree(str(tmp_path / 'test_recorded_images'))
Beispiel #16
0
 def gen_pre_train(self, num_e=1, save='default2', episodes=1000):
     #self.create_envs(game_name=game, state_name=state, num_env=num_e)
     #self.env=SubprocVecEnv(self.env_fns)
     env_id = 'default'
     num_e = 1
     self.env = SubprocVecEnv([self.make_env(env_id, i) for i in range(num_e)])
     #env = Template_Gym()
     #self.env = DummyVecEnv([lambda: env])
     self.env = VecNormalize(self.env, norm_obs=True, norm_reward=True)
     #env = make_env()
     #model = GAIL("MlpPolicy", env=env, expert_dataset=dataset, verbose=1)
     self.env.load_running_average("saves")
     self.model = PPO2.load("saves/m19", self.env, policy=CustomPolicy, tensorboard_log="./default/" )
     self.env.load_running_average("saves")
     #env = make_env()
     #self.expert_agent = 
     generate_expert_traj(self.model, save, self.env, n_episodes=episodes)
def generate_pretrain_data(args):
    env = gym.make(args.env, n_particles=args.n_particles)
    env_copy = gym.make(args.env, n_particles=args.n_particles)
    env.seed(args.seed)
    env.reset()
    env_copy.seed(args.seed)
    env_copy = GymMazeWrapper(env_copy, render=args.render)
    pre_alg = MoveToRandomCornerAlgorithm(env_copy)
    alg = ALGORITHMS[args.algorithm](env_copy)
    target_alg = TargetPointMoverAlgorithm(env_copy,
                                           tuple(env_copy.get_goal()))

    env = ReplayWrapper(env,
                        env_copy, [pre_alg],
                        alg,
                        target_alg,
                        downscale=True,
                        frame_stack=True)
    generate_expert_traj(env.next,
                         args.generate_pretrain_data,
                         env=env,
                         n_episodes=2)
Beispiel #18
0
def test_generate_cartpole():
    model = DQN('MlpPolicy', 'CartPole-v1', verbose=0)
    generate_expert_traj(model, 'expert_cartpole', n_timesteps=1000, n_episodes=10)
global_buy_counter = 0
global_sell_counter = 0
global_last_action = 0

# The algorithms require a vectorized environment to run

# Data will be saved in a numpy archive named `expert_cartpole.npz`
# when using something different than an RL expert,
# you must pass the environment object explicitly
# Automatically normalize the input features and reward
#VecEnv = DummyVecEnv([lambda: create_trade_env(data,symbol)])
# Automatically normalize the input features and reward
#VecEnv = VecNormalize(VecEnv, norm_obs=True, norm_reward=True,
#                   clip_obs=10.)
generate_expert_traj(expert_trader,
                     'expert_trader_ORG_' + symbol,
                     env,
                     n_episodes=10)

# %% [markdown]
# # Read Recording Set

# %%
# Pre-Train a Model using Behavior Cloning
#import ExpertDataset
# Using only one expert trajectory
# you can specify `traj_limitation=-1` for using the whole dataset
dataset = ExpertDataset(expert_path='expert_trader_ORG_' + symbol + '.npz',
                        traj_limitation=10,
                        batch_size=64,
                        randomize=False)
dataset.plot()
Beispiel #20
0
from stable_baselines.gail import generate_expert_traj, ExpertDataset
from stable_baselines import PPO2
import time
import numpy as np

# THIS SECTION IS FOR GEN EXP TRAJ
kwargs_dict = {'resume': False, 'render': False}
log_dir = f'runs/wide'

env_name = f"zelda-wide-v0"
policy = FullyConvPolicyBigMap
env = make_vec_envs(env_name, "wide", log_dir, n_cpu=1, **kwargs_dict)

model = PPO2(policy, env, verbose=1, tensorboard_log=f"./runs/wide")
a_dict = generate_expert_traj(model,
                              'expert_wide',
                              n_timesteps=int(0),
                              n_episodes=1)
print(a_dict)

numpy_dict = np.load('expert_wide.npz')
print(type(numpy_dict))
print(list(numpy_dict.keys()))

# ['actions', 'obs', 'rewards', 'episode_returns', 'episode_starts']
print(f"ACTIONS")
print(f"=============================")
print(numpy_dict['actions'])
print(numpy_dict['actions'].shape)
print(f"=============================")
print(f"=============================")
print(f"=============================")
Beispiel #21
0
from stable_baselines import DQN
from stable_baselines.gail import generate_expert_traj
import gym
import highway_env

model = DQN('MlpPolicy', 'overtaking-v0', verbose=1)
# Train a DQN agent for 1e5 timesteps and generate 10 trajectories
# data will be saved in a numpy archive named `expert_cartpole.npz`
generate_expert_traj(model,
                     'expert_overtaking',
                     n_timesteps=int(1e5),
                     n_episodes=10)
Beispiel #22
0
def test_generate_pendulum():
    model = SAC('MlpPolicy', 'Pendulum-v0', verbose=0)
    generate_expert_traj(model, 'expert_pendulum', n_timesteps=1000, n_episodes=10)
timestep_per_epoch = int(1e5)
expert_n_episodes = 100
############################################

if __name__ == "__main__":

    if not os.path.exists(save_name):
        os.makedirs(save_name)

    # Generate expert trajectories (train expert)
    print("\n...Generate expert trajectories\n")
    env = PrticleEnv(alpha=1, beta=10, win_thre=1, max_timestep=256)
    model = PPO1.load("model/part_circle_exp2_epoch05_sib.zip")
    model.set_env(env)
    generate_expert_traj(model,
                         'expert_part_circle_exp2_epoch05_sib',
                         n_episodes=expert_n_episodes)
    print("...finish\n")

    # Load the expert dataset
    print("\n...Load the expert dataset\n")

    dataset = ExpertDataset(
        expert_path='expert_part_circle_exp2_epoch05_sib.npz',
        traj_limitation=-1,
        verbose=1)
    print("...finish\n")

    model = GAIL('MlpPolicy'\
                ,DummyVecEnv([lambda: PrticleEnv(alpha=1,beta=10,win_thre=1, max_timestep=256)])\
                , dataset, tensorboard_log=save_name, verbose=0, n_cpu_tf_sess=None)
Beispiel #24
0
def generate():
    model = LQRModel()
    generate_expert_traj(model, save_path="./lqr_export.npz", env=None, n_timesteps=0, n_episodes=10)
Beispiel #25
0
from mycart import MyCartPoleEnv
from mycartCont import MyCartContEnv

from stable_baselines.gail import generate_expert_traj
import numpy as np

env = MyCartContEnv()


# Here the expert is a random agent
# but it can be any python function, e.g. a PID controller
def dummy_expert(_obs):
    x, x_dot, theta, theta_dot = _obs
    #print(obs)
    K1 = -50
    K2 = -5
    K3 = -4
    K4 = -2

    action = [-K1 * theta - K2 * theta_dot - K3 * (x - env.xref) - K4 * x_dot]
    return action


# Data will be saved in a numpy archive named `expert_cartpole.npz`
# when using something different than an RL expert,
# you must pass the environment object explicitely
generate_expert_traj(dummy_expert,
                     'dummy_expert_cartpole',
                     env,
                     n_episodes=100)
Beispiel #26
0
    degToRad(-90),
    degToRad(-90)
]

actions = [
    mov0, mov1, mov2, mov3, mov4, mov5, mov6, mov7, mov8, mov9, mov10, mov11
]

actions = [np.array(mov) for mov in actions]


def dummy_expert(_obs):
    global state
    global actions
    state += 1
    if state == 11:
        state = 0
    # TODO add noise to each angle independently
    return actions[state] + round(0.2 * np.random.random_sample() - 0.1,
                                  2)  # +-0.1 rad as noise for all angles


env = gym.make('gym_quadruped:quadruped-v0', visualize=False)

# Data will be saved in a numpy archive named `dummy_quadruped.npz`
# when using something different than an RL expert,
# you must pass the environment object explicitly
generate_expert_traj(dummy_expert,
                     './pretrain/dummy_quadruped',
                     env,
                     n_episodes=200)
Beispiel #27
0
                except:
                    pass

        try:
            order_type, goal = actions_list[0]
            actions_list = actions_list[1:]

        except:
            order_type, goal = 0, 0

        if order_type == 0 and goal == 0:
            action = 0

        else:
            action = decode_action(order_type, goal)

        return action
        # make folder if not exist

    try:
        os.mkdir(specified_path)
    except:
        pass

    ## Generate Data based on heuristic for pre-training
    # Data will be saved in a numpy archive named `heuristic_expert.npz`
    env.reset()
    generate_expert_traj(dummy_expert,
                         join(specified_path, 'heuristic_expert'),
                         env,
                         n_episodes=args.numepisodes)
from stable_baselines.gail import generate_expert_traj

from tetris import TetrisEnv
from tetris.bot import Bot

if __name__ == '__main__':
    import os
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument('episod', type=int)
    parser.add_argument('output_dir')
    args = parser.parse_args()

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    filename = os.path.join(args.output_dir,
                            'dataset-{}.npz'.format(args.episod))
    if os.path.exists(filename):
        raise OSError(filename, 'already exists!')

    env = TetrisEnv()
    model = Bot(env)
    trajs = generate_expert_traj(model.predict,
                                 env=env,
                                 n_episodes=args.episod,
                                 save_path=None,
                                 image_folder=None)
    np.savez(filename, **trajs)
Beispiel #29
0
from stable_baselines import DQN
from stable_baselines.gail import generate_expert_traj

model = DQN('MlpPolicy', 'CartPole-v1', verbose=1)
# Train a DQN agent for 1e5 timesteps and generate 10 trajectories
# data will be saved in a numpy archive named `expert_cartpole.npz`
num_episodes = 10000
generate_expert_traj(model,
                     '../data/expert/cartpole' + str(num_episodes),
                     n_timesteps=int(1e5),
                     n_episodes=num_episodes)
Beispiel #30
0
    :param _obs: (np.ndarray) Current observation
    :return: (np.ndarray) action taken by the expert
    """

    while True:
        env.render()
        print_play_keys(env.action_str)
        time.sleep(0.2)
        key_pressed = keyboard.read_key()
        # return index of action if valid key is pressed
        if key_pressed:
            if key_pressed in KEY_ACTION_DICT:
                return KEY_ACTION_DICT[key_pressed]
            elif key_pressed == "esc":
                print("You pressed esc, exiting!!")
                break
            else:
                print("You pressed wrong key. Press Esc key to exit, OR:")


# Data will be saved in a numpy archive named `expert_+env_id.npz`
# when using something different than an RL expert,
# you must pass the environment object explicitly
env.render()
episodes = 50
generate_expert_traj(human_expert,
                     'expert_' + env_id + '_' + str(episodes) + 'demos',
                     env,
                     n_episodes=episodes)