def __init__( self, render=True, init_noise=True, act_noise=False, obs_noise=False, control_skip=10, max_tar_vel=2.5, energy_weight=0.1, jl_weight=0.5, ab=5.0, q_pen_weight=0.4, dq_pen_weight=0.001, vel_r_weight=4.0, train_dyn=True, # if false, fix dyn and train motor policy pretrain_dyn=False, # pre-train with deviation to sim enlarge_act_range=0.25, # make behavior pi more diverse to match collection behavior_dir="trained_models_laika_bullet_48/ppo", behavior_env_name="LaikagoBulletEnv-v3", behavior_iter=None, dyn_dir="", dyn_env_name="LaikagoConFEnv-v3", # itself dyn_iter=None, cuda_env=True, ): self.render = render self.init_noise = init_noise self.obs_noise = obs_noise self.act_noise = act_noise self.control_skip = int(control_skip) self._ts = 1. / 500. self.max_tar_vel = max_tar_vel self.energy_weight = energy_weight self.jl_weight = jl_weight self.ab = ab self.q_pen_weight = q_pen_weight self.dq_pen_weight = dq_pen_weight self.vel_r_weight = vel_r_weight self.train_dyn = train_dyn self.enlarge_act_range = enlarge_act_range self.pretrain_dyn = pretrain_dyn self.cuda_env = cuda_env if self.render: self._p = bullet_client.BulletClient(connection_mode=pybullet.GUI) else: self._p = bullet_client.BulletClient() self.np_random = None self.robot = LaikagoBulletV2(init_noise=self.init_noise, time_step=self._ts, np_random=self.np_random) self.seed( 0 ) # used once temporarily, will be overwritten outside though superclass api self.viewer = None self.timer = 0 self.feat_select_func = LaikagoBulletEnvV3.feature_selection_G2BD_laika_v3 if self.train_dyn: if behavior_iter: behavior_iter = int(behavior_iter) self.dyn_actor_critic = None # load fixed behavior policy self.go_actor_critic, _, \ self.recurrent_hidden_states, \ self.masks = utils.load( behavior_dir, behavior_env_name, self.cuda_env, behavior_iter ) else: if dyn_iter: dyn_iter = int(dyn_iter) # train motor policy self.go_actor_critic = None # load fixed dynamics model self.dyn_actor_critic, _, \ self.recurrent_hidden_states, \ self.masks = utils.load( dyn_dir, dyn_env_name, self.cuda_env, dyn_iter ) # # self.discri = utils.load_gail_discriminator(dyn_dir, # dyn_env_name, # self.cuda_env, # dyn_iter) # # self.feat_select_func = self.robot.feature_selection_all_laika self.reset_const = 100 self.reset_counter = self.reset_const # do a hard reset first self.obs = [] self.behavior_act_len = None self.init_state = None # self.obs here is for G which is longer and contains dqs self.obs_dim = 0 # dummy self.reset() # and update init obs & self.obs_dim # # self.d_scores = [] # # set up imaginary session for pre-train # self.set_up_imaginary_session() if self.train_dyn: assert self.behavior_act_len == len(self.robot.ctrl_dofs) # (12) self.action_dim = 12 # 12D action scales, see beginning of step() for comment else: self.action_dim = len(self.robot.ctrl_dofs) self.act = [0.0] * self.action_dim self.action_space = gym.spaces.Box( low=np.array([-1.] * self.action_dim), high=np.array([+1.] * self.action_dim)) obs_dummy = np.array([1.12234567] * self.obs_dim) # print(self.obs_dim) self.observation_space = gym.spaces.Box(low=-np.inf * obs_dummy, high=np.inf * obs_dummy)
def __init__( self, render=True, init_noise=True, act_noise=True, obs_noise=True, control_skip=10, using_torque_ctrl=True, max_tar_vel=2.5, energy_weight=0.1, jl_weight=0.5, ab=5.0, q_pen_weight=0.5, dq_pen_weight=0.02, vel_r_weight=4.0, train_dyn=True, # if false, fix dyn and train motor policy pretrain_dyn=False, # pre-train with deviation to sim enlarge_act_range=True, # make behavior pi more diverse to match collection behavior_dir="trained_models_laika_bullet_12/ppo", behavior_env_name="LaikagoBulletEnv-v1", behavior_iter=None, dyn_dir="", dyn_env_name="LaikagoConFEnv-v1", dyn_iter=None, cuda_env=True, # # soft_floor_env=False, # for collecting data ): self.render = render self.init_noise = init_noise self.obs_noise = obs_noise self.act_noise = act_noise self.control_skip = int(control_skip) self._ts = 1. / 500. self.max_tar_vel = max_tar_vel self.energy_weight = energy_weight self.jl_weight = jl_weight self.ab = ab self.q_pen_weight = q_pen_weight self.dq_pen_weight = dq_pen_weight self.vel_r_weight = vel_r_weight self.train_dyn = train_dyn self.enlarge_act_range = enlarge_act_range self.pretrain_dyn = pretrain_dyn self.cuda_env = cuda_env # self.soft_floor_env = soft_floor_env if self.render: self._p = bullet_client.BulletClient(connection_mode=pybullet.GUI) else: self._p = bullet_client.BulletClient() self.np_random = None self.robot = LaikagoBullet(init_noise=self.init_noise, time_step=self._ts, np_random=self.np_random) self.seed( 0 ) # used once temporarily, will be overwritten outside though superclass api self.viewer = None self.timer = 0 if self.train_dyn: if behavior_iter: behavior_iter = int(behavior_iter) self.dyn_actor_critic = None # load fixed behavior policy self.go_actor_critic, _, \ self.recurrent_hidden_states, \ self.masks = utils.load( behavior_dir, behavior_env_name, self.cuda_env, behavior_iter ) else: if dyn_iter: dyn_iter = int(dyn_iter) # train motor policy self.go_actor_critic = None # load fixed dynamics model self.dyn_actor_critic, _, \ self.recurrent_hidden_states, \ self.masks = utils.load( dyn_dir, dyn_env_name, self.cuda_env, dyn_iter ) self.reset_counter = 50 # do a hard reset first self.obs = [] self.behavior_obs_len = None self.behavior_act_len = None self.init_state = None self.reset() # and update init obs # set up imaginary session for pre-train self.set_up_imaginary_session() if self.train_dyn: assert self.behavior_act_len == len(self.robot.ctrl_dofs) self.action_dim = 12 # see beginning of step() for comment self.obs_dim = self.behavior_act_len + self.behavior_obs_len # see beginning of update_obs() for comment else: self.action_dim = len(self.robot.ctrl_dofs) self.obs_dim = len(self.obs) self.act = [0.0] * self.action_dim self.action_space = gym.spaces.Box( low=np.array([-1.] * self.action_dim), high=np.array([+1.] * self.action_dim)) obs_dummy = np.array([1.12234567] * self.obs_dim) self.observation_space = gym.spaces.Box(low=-np.inf * obs_dummy, high=np.inf * obs_dummy)