def main(env): n_actions = env.action_space.shape[0] param_noise = None action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions)) # Using only one expert trajectory # you can specify `traj_limitation=-1` for using the whole dataset file_dir = "/home/vignesh/Thesis_Suture_data/trial2/ambf_data/" dataset = ExpertDataset(expert_path=file_dir + 'expert_psm_data.npz', traj_limitation=1, batch_size=32) model = DDPG(MlpPolicy, env, gamma=0.95, verbose=1, nb_train_steps=300, nb_rollout_steps=150, param_noise=param_noise, batch_size=128, action_noise=action_noise, random_exploration=0.05, normalize_observations=True, tensorboard_log="./ddpg_dvrk_tensorboard/", observation_range=(-1.5, 1.5)) model.pretrain(dataset, n_epochs=1000) model.save("./gail_robot_env")
def train_agent_with_ddpg(load): from stable_baselines.ddpg.policies import FeedForwardPolicy from stable_baselines.common.vec_env import DummyVecEnv from stable_baselines.ddpg.noise import OrnsteinUhlenbeckActionNoise from stable_baselines import DDPG # Create and wrap the environment env = gym.make('F16GCAS-v0') env = DummyVecEnv([lambda: env]) # the noise objects for DDPG n_actions = env.action_space.shape[-1] param_noise = None action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.01) * np.ones(n_actions)) # Custom MLP policy of two layers of size 16 each class CustomPolicy(FeedForwardPolicy): def __init__(self, *args, **kwargs): super(CustomPolicy, self).__init__(*args, **kwargs, layers=[128, 128], layer_norm=False, feature_extraction="mlp") model = DDPG(CustomPolicy, env, verbose=1, action_noise=action_noise) if not load: ExpData = ExpertDataset("./lqr_export.npz") model.pretrain(ExpData, n_epochs=100) model.save(ROOT+"/trained_models/TDRL/f16/ddpg/128_128") else: model = DDPG.load(ROOT+"/trained_models/TDRL/f16/ddpg/128_128", policy=CustomPolicy, env=env) return model