Esempio n. 1
0
    def __init__(
        self,
        render=True,
        init_noise=True,
        act_noise=False,
        obs_noise=False,
        control_skip=10,
        max_tar_vel=2.5,
        energy_weight=0.1,
        jl_weight=0.5,
        ab=5.0,
        q_pen_weight=0.4,
        dq_pen_weight=0.001,
        vel_r_weight=4.0,
        train_dyn=True,  # if false, fix dyn and train motor policy
        pretrain_dyn=False,  # pre-train with deviation to sim
        enlarge_act_range=0.25,  # make behavior pi more diverse to match collection
        behavior_dir="trained_models_laika_bullet_48/ppo",
        behavior_env_name="LaikagoBulletEnv-v3",
        behavior_iter=None,
        dyn_dir="",
        dyn_env_name="LaikagoConFEnv-v3",  # itself
        dyn_iter=None,
        cuda_env=True,
    ):

        self.render = render
        self.init_noise = init_noise
        self.obs_noise = obs_noise
        self.act_noise = act_noise
        self.control_skip = int(control_skip)
        self._ts = 1. / 500.

        self.max_tar_vel = max_tar_vel
        self.energy_weight = energy_weight
        self.jl_weight = jl_weight
        self.ab = ab
        self.q_pen_weight = q_pen_weight
        self.dq_pen_weight = dq_pen_weight
        self.vel_r_weight = vel_r_weight

        self.train_dyn = train_dyn
        self.enlarge_act_range = enlarge_act_range
        self.pretrain_dyn = pretrain_dyn
        self.cuda_env = cuda_env

        if self.render:
            self._p = bullet_client.BulletClient(connection_mode=pybullet.GUI)
        else:
            self._p = bullet_client.BulletClient()

        self.np_random = None
        self.robot = LaikagoBulletV2(init_noise=self.init_noise,
                                     time_step=self._ts,
                                     np_random=self.np_random)
        self.seed(
            0
        )  # used once temporarily, will be overwritten outside though superclass api
        self.viewer = None
        self.timer = 0

        self.feat_select_func = LaikagoBulletEnvV3.feature_selection_G2BD_laika_v3

        if self.train_dyn:
            if behavior_iter:
                behavior_iter = int(behavior_iter)
            self.dyn_actor_critic = None
            # load fixed behavior policy
            self.go_actor_critic, _, \
                self.recurrent_hidden_states, \
                self.masks = utils.load(
                    behavior_dir, behavior_env_name, self.cuda_env, behavior_iter
                )
        else:
            if dyn_iter:
                dyn_iter = int(dyn_iter)
            # train motor policy
            self.go_actor_critic = None
            # load fixed dynamics model
            self.dyn_actor_critic, _, \
                self.recurrent_hidden_states, \
                self.masks = utils.load(
                    dyn_dir, dyn_env_name, self.cuda_env, dyn_iter
                )
            #
            # self.discri = utils.load_gail_discriminator(dyn_dir,
            #                                             dyn_env_name,
            #                                             self.cuda_env,
            #                                             dyn_iter)
            #
            # self.feat_select_func = self.robot.feature_selection_all_laika

        self.reset_const = 100
        self.reset_counter = self.reset_const  # do a hard reset first
        self.obs = []

        self.behavior_act_len = None
        self.init_state = None
        # self.obs here is for G which is longer and contains dqs
        self.obs_dim = 0  # dummy
        self.reset()  # and update init obs & self.obs_dim
        #
        # self.d_scores = []

        # # set up imaginary session for pre-train
        # self.set_up_imaginary_session()

        if self.train_dyn:
            assert self.behavior_act_len == len(self.robot.ctrl_dofs)  # (12)
            self.action_dim = 12  # 12D action scales, see beginning of step() for comment
        else:
            self.action_dim = len(self.robot.ctrl_dofs)

        self.act = [0.0] * self.action_dim
        self.action_space = gym.spaces.Box(
            low=np.array([-1.] * self.action_dim),
            high=np.array([+1.] * self.action_dim))
        obs_dummy = np.array([1.12234567] * self.obs_dim)
        # print(self.obs_dim)
        self.observation_space = gym.spaces.Box(low=-np.inf * obs_dummy,
                                                high=np.inf * obs_dummy)
Esempio n. 2
0
    def __init__(
        self,
        render=True,
        init_noise=True,
        act_noise=True,
        obs_noise=True,
        control_skip=10,
        using_torque_ctrl=True,
        max_tar_vel=2.5,
        energy_weight=0.1,
        jl_weight=0.5,
        ab=5.0,
        q_pen_weight=0.5,
        dq_pen_weight=0.02,
        vel_r_weight=4.0,
        train_dyn=True,  # if false, fix dyn and train motor policy
        pretrain_dyn=False,  # pre-train with deviation to sim
        enlarge_act_range=True,  # make behavior pi more diverse to match collection
        behavior_dir="trained_models_laika_bullet_12/ppo",
        behavior_env_name="LaikagoBulletEnv-v1",
        behavior_iter=None,
        dyn_dir="",
        dyn_env_name="LaikagoConFEnv-v1",
        dyn_iter=None,
        cuda_env=True,
        #
        # soft_floor_env=False,      # for collecting data
    ):

        self.render = render
        self.init_noise = init_noise
        self.obs_noise = obs_noise
        self.act_noise = act_noise
        self.control_skip = int(control_skip)
        self._ts = 1. / 500.

        self.max_tar_vel = max_tar_vel
        self.energy_weight = energy_weight
        self.jl_weight = jl_weight
        self.ab = ab
        self.q_pen_weight = q_pen_weight
        self.dq_pen_weight = dq_pen_weight
        self.vel_r_weight = vel_r_weight

        self.train_dyn = train_dyn
        self.enlarge_act_range = enlarge_act_range
        self.pretrain_dyn = pretrain_dyn
        self.cuda_env = cuda_env
        # self.soft_floor_env = soft_floor_env

        if self.render:
            self._p = bullet_client.BulletClient(connection_mode=pybullet.GUI)
        else:
            self._p = bullet_client.BulletClient()

        self.np_random = None
        self.robot = LaikagoBullet(init_noise=self.init_noise,
                                   time_step=self._ts,
                                   np_random=self.np_random)
        self.seed(
            0
        )  # used once temporarily, will be overwritten outside though superclass api
        self.viewer = None
        self.timer = 0

        if self.train_dyn:
            if behavior_iter:
                behavior_iter = int(behavior_iter)
            self.dyn_actor_critic = None
            # load fixed behavior policy
            self.go_actor_critic, _, \
            self.recurrent_hidden_states, \
            self.masks = utils.load(
                behavior_dir, behavior_env_name, self.cuda_env, behavior_iter
            )
        else:
            if dyn_iter:
                dyn_iter = int(dyn_iter)
            # train motor policy
            self.go_actor_critic = None
            # load fixed dynamics model
            self.dyn_actor_critic, _, \
            self.recurrent_hidden_states, \
            self.masks = utils.load(
                dyn_dir, dyn_env_name, self.cuda_env, dyn_iter
            )

        self.reset_counter = 50  # do a hard reset first
        self.obs = []
        self.behavior_obs_len = None
        self.behavior_act_len = None
        self.init_state = None
        self.reset()  # and update init obs

        # set up imaginary session for pre-train
        self.set_up_imaginary_session()

        if self.train_dyn:
            assert self.behavior_act_len == len(self.robot.ctrl_dofs)
            self.action_dim = 12  # see beginning of step() for comment
            self.obs_dim = self.behavior_act_len + self.behavior_obs_len  # see beginning of update_obs() for comment
        else:
            self.action_dim = len(self.robot.ctrl_dofs)
            self.obs_dim = len(self.obs)

        self.act = [0.0] * self.action_dim
        self.action_space = gym.spaces.Box(
            low=np.array([-1.] * self.action_dim),
            high=np.array([+1.] * self.action_dim))
        obs_dummy = np.array([1.12234567] * self.obs_dim)
        self.observation_space = gym.spaces.Box(low=-np.inf * obs_dummy,
                                                high=np.inf * obs_dummy)