def train(params): # create model env = FlattenObservation(gym.make(params.get("environment"))) exp_name = params.get("model_name") + "_train_" + params.get("environment") log_dir = './logs/' + exp_name expert_name = 'expert_{0}'.format(exp_name) if params.get("model_name") == 'TRPO': print("Loading TRPO Model") model = TRPO(MlpPolicy, env, verbose=1, tensorboard_log=log_dir) model.learn(total_timesteps=params.get("train_steps")) model.save(exp_name) if params.get("model_name") == 'PPO': print("Loading PPO Model") model = PPO1(MlpPolicy, env, verbose=1, tensorboard_log=log_dir, entcoeff=params.get("ent_coef"), gamma=params.get("gamma"), optim_batchsize=params.get("batch_size"), clip_param=params.get("clip_range"), lam=params.get("gae_lambda")) model.learn(total_timesteps=params.get("train_steps")) model.save(exp_name) if params.get("expert_exists") is False: print("Training expert trajectories") # Train expert controller (if needed) and record expert trajectories. generate_expert_traj(model, expert_name, n_timesteps=params.get("expert_timesteps"), n_episodes=params.get("n_episodes")) dataset = ExpertDataset( expert_path='{0}.npz'.format(expert_name), traj_limitation=-1, randomize=True, # if the dataset should be shuffled verbose=1) model = GAIL('MlpPolicy', env, dataset, verbose=1, tensorboard_log=log_dir) # Check out for defaults if params.get("pre_train") is True: print("Pretraining Dataset with Behavioural Cloning") model.pretrain(dataset, n_epochs=10000) print("Executing GAIL Learning") model.learn(total_timesteps=params.get("train_steps")) model.save("BC" + exp_name) env.close() del env
# make_vec_env() is used for multiprocess enviroment env = make_vec_env('gym_quadruped:quadruped-v0', n_envs=4) check_dir('./pretrain/PPO/') model = PPO2(MlpPolicy, env, verbose=1, tensorboard_log='./pretrain/PPO/') else: print('Model choosen not available, check spelling or if it is supported') # Using only one expert trajectory # you can specify `traj_limitation=-1` for using the whole dataset dataset = ExpertDataset(expert_path='./pretrain/dummy_quadruped.npz', traj_limitation=-1, batch_size=128) model.pretrain(dataset, n_epochs=args['pt']) if args['pretrainVisualization']: # Test the pre-trained model env = model.get_env() obs = env.reset() reward_sum = 0.0 for _ in range(1000): action, _ = model.predict(obs) obs, reward, done, _ = env.step(action) reward_sum += reward env.render() if done: print(reward_sum) reward_sum = 0.0