コード例 #1
0
    def update_extended_observation(self):
        self.obs = self.robot.get_robot_observation(with_vel=True)

        # actually should /2 (downsample)
        self.obs.extend(
            list(np.mean(self.normal_forces[-self.control_skip:], axis=0)))

        if self.obs_noise:
            self.obs = utils.perturb(self.obs, 0.1, self.np_random)

        if self.train_dyn:
            obs_behavior = self.feat_select_func(self.obs)
            obs_nn = utils.wrap(obs_behavior, is_cuda=self.cuda_env)
            with torch.no_grad():
                _, action_nn, _, self.recurrent_hidden_states = self.go_actor_critic.act(
                    obs_nn,
                    self.recurrent_hidden_states,
                    self.masks,
                    deterministic=False)
            action = utils.unwrap(action_nn, is_cuda=self.cuda_env)

            # 25% noise if a clipped to -1, 1
            action = utils.perturb(action, self.enlarge_act_range,
                                   self.np_random)
            action = np.clip(action, -1.0, 1.0)
            self.behavior_act_len = len(action)

            self.obs = list(self.obs) + list(action)
コード例 #2
0
    def update_extended_observation(self):
        self.obs = self.robot.get_robot_observation()

        # self.obs = np.concatenate((self.obs, [np.minimum(self.timer / 500, self.max_tar_vel)]))  # TODO

        if self.obs_noise:
            self.obs = utils.perturb(self.obs, 0.1, self.np_random)

        if self.train_dyn:
            self.behavior_obs_len = len(self.obs)

            obs_nn = utils.wrap(self.obs, is_cuda=self.cuda_env)
            with torch.no_grad():
                _, action_nn, _, self.recurrent_hidden_states = self.go_actor_critic.act(
                    obs_nn,
                    self.recurrent_hidden_states,
                    self.masks,
                    deterministic=False  # TODO, det pi
                )
            action = utils.unwrap(action_nn, is_cuda=self.cuda_env)

            if self.enlarge_act_range:
                # 15% noise if a clipped to -1, 1
                action = utils.perturb(action, 0.15, self.np_random)

            self.behavior_act_len = len(action)

            self.obs = np.concatenate((self.obs, action))
コード例 #3
0
    def reset(self, bullet_client):
        self._p = bullet_client

        base_init_pos = utils.perturb(self.base_init_pos, 0.03)
        base_init_euler = utils.perturb(self.base_init_euler, 0.1)

        self.go_id = self._p.loadURDF(
            os.path.join(currentdir,
                         "assets/laikago/laikago_toes_limits.urdf"),
            list(base_init_pos - [0.043794, 0.0, 0.03]),
            list(self._p.getQuaternionFromEuler(list(base_init_euler))),
            flags=self._p.URDF_USE_SELF_COLLISION,
            useFixedBase=0)
        # self.go_id = self._p.loadURDF(os.path.join(currentdir,
        #                                                "assets/laikago/laikago_limits.urdf"),
        #                               list(self.base_init_pos),
        #                               list(self._p.getQuaternionFromEuler([1.5708, 0, 1.5708])),
        #                               flags=self._p.URDF_USE_SELF_COLLISION,
        #                               useFixedBase=0)
        # self.go_id = self._p.loadURDF(os.path.join(currentdir,
        #                                                "assets/laikago/laikago_toes_limits.urdf"),
        #                               list(self.base_init_pos),
        #                               list(self.base_init_quat),
        #                               useFixedBase=0)

        # self.print_all_joints_info()

        for j in range(self._p.getNumJoints(self.go_id)):
            # self._p.changeDynamics(self.go_id, j, linearDamping=0, angularDamping=0)     # TODO
            self._p.changeDynamics(self.go_id, j, jointDamping=0.5)  # TODO

        if len(self.ctrl_dofs) == 0:
            for j in range(self._p.getNumJoints(self.go_id)):
                info = self._p.getJointInfo(self.go_id, j)
                joint_type = info[2]
                if joint_type == self._p.JOINT_PRISMATIC or joint_type == self._p.JOINT_REVOLUTE:
                    self.ctrl_dofs.append(j)

        # print("ctrl dofs:", self.ctrl_dofs)

        self.reset_joints(self.init_q, np.array([0.0] * len(self.ctrl_dofs)))

        # turn off root default control:
        # use torque control
        self._p.setJointMotorControlArray(bodyIndex=self.go_id,
                                          jointIndices=self.ctrl_dofs,
                                          controlMode=self._p.VELOCITY_CONTROL,
                                          forces=[0.0] * len(self.ctrl_dofs))
        self.torque = [0.0] * len(self.ctrl_dofs)

        self.ll = np.array(
            [self._p.getJointInfo(self.go_id, i)[8] for i in self.ctrl_dofs])
        self.ul = np.array(
            [self._p.getJointInfo(self.go_id, i)[9] for i in self.ctrl_dofs])

        assert len(self.ctrl_dofs) == len(self.init_q)
        assert len(self.max_forces) == len(self.ctrl_dofs)
        assert len(self.max_forces) == len(self.ll)
コード例 #4
0
    def reset_joints(self, q, dq):
        if self.init_noise:
            init_q = utils.perturb(q, 0.01, self.np_random)
            init_dq = utils.perturb(dq, 0.1, self.np_random)
        else:
            init_q = utils.perturb(q, 0.0, self.np_random)
            init_dq = utils.perturb(dq, 0.0, self.np_random)

        for pt, ind in enumerate(self.ctrl_dofs):
            self._p.resetJointState(self.go_id, ind, init_q[pt], init_dq[pt])
コード例 #5
0
    def soft_reset(self, bullet_client):
        self._p = bullet_client

        base_init_pos = utils.perturb(self.base_init_pos, 0.03)
        base_init_euler = utils.perturb(self.base_init_euler, 0.1)

        self._p.resetBaseVelocity(self.go_id, [0., 0, 0], [0., 0, 0])
        self._p.resetBasePositionAndOrientation(
            self.go_id, list(base_init_pos),
            list(self._p.getQuaternionFromEuler(list(base_init_euler))))

        self.reset_joints(self.init_q, np.array([0.0] * len(self.ctrl_dofs)))
コード例 #6
0
    def get_extended_observation(self):

        # with vel false
        cur_state = self.robot.get_robot_observation(with_vel=False)

        if self.obs_noise:
            cur_state = utils.perturb(cur_state, 0.1, self.np_random)

        # then update past obs
        utils.push_recent_value(self.past_obs_array, cur_state)

        # then construct behavior obs
        b_obs_all = utils.select_and_merge_from_s_a(
            s_mt=list(self.past_obs_array),
            a_mt=list(self.past_bact_array),
            s_idx=self.behavior_past_obs_t_idx,
            a_idx=np.array([]))
        # if train motor, return behavior obs and we are done
        if not self.train_dyn:
            return b_obs_all

        # else, train dyn
        # rollout b_pi
        obs_nn = utils.wrap(b_obs_all, is_cuda=self.cuda_env)
        with torch.no_grad():
            _, action_nn, _, self.recurrent_hidden_states = self.go_actor_critic.act(
                obs_nn,
                self.recurrent_hidden_states,
                self.masks,
                deterministic=False)
        b_cur_act = list(utils.unwrap(action_nn, is_cuda=self.cuda_env))
        b_cur_act = utils.perturb(b_cur_act, self.enlarge_act_range,
                                  self.np_random)
        b_cur_act = np.tanh(b_cur_act)

        # Store action after tanh (-1,1)
        utils.push_recent_value(self.past_bact_array, b_cur_act)

        #  construct G obs from updated past obs&b_act
        # g_obs_all = utils.select_and_merge_from_s_a(
        #     s_mt=list(self.past_obs_array),
        #     a_mt=list(self.past_bact_array),
        #     s_idx=self.generator_past_obs_t_idx,
        #     a_idx=self.generator_past_act_t_idx
        # )

        # calculate 4x split obs here
        g_obs_all = self.get_all_feet_local_obs(b_cur_act)

        return g_obs_all
コード例 #7
0
    def update_extended_observation(self):
        self.obs = self.robot.get_robot_observation()

        if self.correct_obs_dx:
            dx = self.get_ave_dx() * self.robot.obs_scaling[5]
            self.obs[5] = dx

        if self.obs_noise:
            self.obs = utils.perturb(self.obs, 0.1, self.np_random)
コード例 #8
0
    def get_extended_observation(self):
        obs = self.robot.get_robot_observation(with_vel=False)

        if self.obs_noise:
            obs = utils.perturb(obs, 0.1, self.np_random)

        # print(obs[-4:])

        return obs
コード例 #9
0
ファイル: hopper.py プロジェクト: googleinterns/gail-dyn
    def reset(
            self,
            bullet_client
    ):
        self._p = bullet_client
        self.hopper_id = self._p.loadURDF(os.path.join(currentdir,
                                                       "assets/hopper_my_box.urdf"),
                                          list(self.base_init_pos),
                                          self._p.getQuaternionFromEuler(list(self.base_init_euler)),
                                          flags=self._p.URDF_USE_SELF_COLLISION,
                                          useFixedBase=1)

        # self.print_all_joints_info()

        if self.init_noise:
            init_q = utils.perturb([0.0] * self.n_total_dofs, 0.01, self.np_random)
            init_dq = utils.perturb([0.0] * self.n_total_dofs, 0.1, self.np_random)
        else:
            init_q = utils.perturb([0.0] * self.n_total_dofs, 0.0, self.np_random)
            init_dq = utils.perturb([0.0] * self.n_total_dofs, 0.0, self.np_random)

        for ind in range(self.n_total_dofs):
            self._p.resetJointState(self.hopper_id, ind, init_q[ind], init_dq[ind])

        # turn off root default control:
        self._p.setJointMotorControlArray(
            bodyIndex=self.hopper_id,
            jointIndices=self.root_dofs,
            controlMode=self._p.VELOCITY_CONTROL,
            forces=[0.0] * len(self.root_dofs))
        # use torque control
        self._p.setJointMotorControlArray(
            bodyIndex=self.hopper_id,
            jointIndices=self.ctrl_dofs,
            controlMode=self._p.VELOCITY_CONTROL,
            forces=[0.0] * len(self.ctrl_dofs))
        self.torque = [0.0] * len(self.ctrl_dofs)

        self.ll = np.array([self._p.getJointInfo(self.hopper_id, i)[8] for i in self.ctrl_dofs])
        self.ul = np.array([self._p.getJointInfo(self.hopper_id, i)[9] for i in self.ctrl_dofs])
コード例 #10
0
    def get_extended_observation(self):
        obs = self.robot.get_robot_observation(with_vel=False)

        # actually should /2 (downsample)
        obs.extend(
            list(np.mean(self.normal_forces[-self.control_skip:], axis=0)))

        if self.obs_noise:
            obs = utils.perturb(obs, 0.1, self.np_random)

        # print(obs[-4:])

        return obs
コード例 #11
0
ファイル: laikago_env.py プロジェクト: googleinterns/gail-dyn
    def get_extended_observation(self):
        obs = self.robot.get_robot_observation()

        # for foot in self.robot.feet:
        #     cps = self._p.getContactPoints(self.robot.go_id, self.floor_id, foot, -1)
        #     if len(cps) > 0:
        #         obs.extend([1.0])
        #     else:
        #         obs.extend([-1.0])

        # obs.extend([np.minimum(self.timer / 500, self.max_tar_vel)])  # TODO

        if self.obs_noise:
            obs = utils.perturb(obs, 0.1, self.np_random)

        # print(obs)
        return obs
コード例 #12
0
    def get_extended_observation(self):
        obs = self.robot.get_robot_observation(with_vel=False)

        if self.obs_noise:
            obs = utils.perturb(obs, 0.1, self.np_random)

        utils.push_recent_value(self.past_obs_array, obs)

        # print(obs[-4:])

        # obs_all = []
        # list_past_obs = list(self.past_obs_array)
        # for t_idx in self.behavior_past_obs_t_idx:
        #     obs_all.extend(list_past_obs[t_idx])

        obs_all = utils.select_and_merge_from_s_a(
            s_mt=list(self.past_obs_array),
            a_mt=list(self.past_act_array),
            s_idx=self.behavior_past_obs_t_idx,
            a_idx=np.array([]))

        return list(obs_all)
コード例 #13
0
    def step(self, a):

        root_pos, _ = self.robot.get_link_com_xyz_orn(-1)
        x_0 = root_pos[0]

        a = np.clip(a, -1.0, 1.0)
        if self.act_noise:
            a = utils.perturb(a, 0.05, self.np_random)

        if self.low_power_env:
            # # for pi44
            # _, dq = self.robot.get_q_dq(self.robot.ctrl_dofs)
            # max_force_ratio = np.clip(2 - dq/2.5, 0, 1)
            # a *= max_force_ratio
            # for pi43
            _, dq = self.robot.get_q_dq(self.robot.ctrl_dofs)
            max_force_ratio = np.clip(2 - dq/2., 0, 1)
            a *= max_force_ratio

        for _ in range(self.control_skip):
            # action is in not -1,1
            if a is not None:
                self.act = a
                self.robot.apply_action(a)

            if self.randomforce_train:
                for foot_ind, link in enumerate(self.robot.feet):
                    # first dim represents fz
                    fz = np.random.uniform(-80, 80)
                    # second dim represents fx
                    fx = np.random.uniform(-80, 80)
                    # third dim represents fy
                    fy = np.random.uniform(-80, 80)

                    utils.apply_external_world_force_on_local_point(self.robot.go_id, link,
                                                                    [fx, fy, fz],
                                                                    [0, 0, 0],
                                                                    self._p)
            self._p.stepSimulation()
            if self.render:
                time.sleep(self._ts * 0.5)
            self.timer += 1

        root_pos, _ = self.robot.get_link_com_xyz_orn(-1)
        x_1 = root_pos[0]
        self.velx = (x_1 - x_0) / (self.control_skip * self._ts)

        q, dq = self.robot.get_q_dq(self.robot.ctrl_dofs)

        reward = self.ab  # alive bonus
        tar = np.minimum(self.timer / 500, self.max_tar_vel)
        reward += np.minimum(self.velx, tar) * self.vel_r_weight
        # print("v", self.velx, "tar", tar)
        reward += -self.energy_weight * np.square(a).sum()
        # print("act norm", -self.energy_weight * np.square(a).sum())

        pos_mid = 0.5 * (self.robot.ll + self.robot.ul)
        q_scaled = 2 * (q - pos_mid) / (self.robot.ul - self.robot.ll)
        joints_at_limit = np.count_nonzero(np.abs(q_scaled) > 0.97)
        reward += -self.jl_weight * joints_at_limit
        # print("jl", -self.jl_weight * joints_at_limit)

        reward += -np.minimum(np.sum(np.square(dq)) * self.dq_pen_weight, 5.0)
        weight = np.array([2.0, 1.0, 1.0] * 4)
        reward += -np.minimum(np.sum(np.square(q - self.robot.init_q) * weight) * self.q_pen_weight, 5.0)

        # reward = self.ab
        # tar = np.minimum(self.timer / 500, self.max_tar_vel)
        # reward += np.minimum(self.velx, tar) * self.vel_r_weight
        # # print("v", self.velx, "tar", tar)
        #
        # # reward += np.maximum((self.max_tar_vel - tar) * self.vel_r_weight - 3.0, 0.0)     # alive bonus
        #
        # reward += -self.energy_weight * np.linalg.norm(a)
        # # print("act norm", -self.energy_weight * np.square(a).sum())
        #
        # q, dq = self.robot.get_q_dq(self.robot.ctrl_dofs)
        # # print(np.max(np.abs(dq)))
        # pos_mid = 0.5 * (self.robot.ll + self.robot.ul)
        # q_scaled = 2 * (q - pos_mid) / (self.robot.ul - self.robot.ll)
        # joints_at_limit = np.count_nonzero(np.abs(q_scaled) > 0.97)
        # reward += -self.jl_weight * joints_at_limit
        # # print("jl", -self.jl_weight * joints_at_limit)
        #
        # reward += -np.minimum(np.linalg.norm(dq) * self.dq_pen_weight, 5.0)
        # weight = np.array([2.0, 0.2, 1.0] * 4)
        # reward += -np.minimum(np.linalg.norm((q - self.robot.init_q) * weight) * self.q_pen_weight, 5.0)
        # # print("vel pen", -np.minimum(np.sum(np.abs(dq)) * self.dq_pen_weight, 5.0))
        # # print("pos pen", -np.minimum(np.sum(np.square(q - self.robot.init_q)) * self.q_pen_weight, 5.0))

        y_1 = root_pos[1]
        reward += -y_1 * 0.5
        # print("dev pen", -y_1*0.5)
        height = root_pos[2]

        in_support = self.robot.is_root_com_in_support()
        # print("______")
        # print(in_support)

        # print("h", height)
        # print("dq.", np.abs(dq))
        # print((np.abs(dq) < 50).all())

        # cps = self._p.getContactPoints(bodyA=self.robot.go_id, linkIndexA=0)
        # body_in_contact = (len(cps) > 0)

        # print("------")
        obs = self.get_extended_observation()
        # rpy = self._p.getEulerFromQuaternion(obs[8:12])

        # for data collection
        not_done = (np.abs(dq) < 90).all() and (height > 0.3) and (height < 1.0)
        # TODO
        # not_done = (np.abs(dq) < 90).all() and (height > 0.3) and (height < 1.0) and in_support
        # not_done = True

        return obs, reward, not not_done, {}
コード例 #14
0
    def step(self, a):
        # TODO: currently for laika, env_action is 12D, 4 feet 3D without wrench
        if self.train_dyn:
            env_action = a
            robo_action = self.obs[-self.behavior_act_len:]
        else:
            robo_action = a
            robo_action = np.clip(robo_action, -1.0, 1.0)
            env_pi_obs = np.concatenate((self.obs, robo_action))
            env_pi_obs_nn = utils.wrap(env_pi_obs, is_cuda=self.cuda_env)
            with torch.no_grad():
                _, env_action_nn, _, self.recurrent_hidden_states = self.dyn_actor_critic.act(
                    env_pi_obs_nn,
                    self.recurrent_hidden_states,
                    self.masks,
                    deterministic=False)
            env_action = utils.unwrap(env_action_nn, is_cuda=self.cuda_env)

            # self.ratios = np.append(self.ratios, [env_action / robo_action], axis=0)
            #
            # env_pi_obs_feat = self.feat_select_func(self.obs)
            # dis_state = np.concatenate((env_pi_obs_feat, robo_action))
            # dis_state = utils.wrap(dis_state, is_cuda=self.cuda_env)

        root_pos, _ = self.robot.get_link_com_xyz_orn(-1)
        x_0 = root_pos[0]

        # TODO
        # # this is post noise (unseen), different from seen diversify of self.enlarge_act_scale
        if self.act_noise:
            robo_action = utils.perturb(robo_action, 0.05, self.np_random)

        # # TODO
        # if self.pretrain_dyn:
        #     # self.state_id = self._p.saveState()
        #     self.img_obs, pre_s_i = self.rollout_one_step_imaginary()     # takes the old self.obs
        #     # img_obs = self.rollout_one_step_imaginary_same_session()
        #     # self._p.restoreState(self.state_id)
        #     pre_s = self.robot.get_robot_raw_state_vec()
        #     # print(pre_s_i)
        #     # print(pre_s)
        #     assert np.allclose(pre_s, pre_s_i, atol=1e-5)

        for _ in range(self.control_skip):
            self.robot.apply_action(robo_action)
            self.apply_scale_clip_conf_from_pi_new(env_action)
            self._p.stepSimulation()
            if self.render:
                time.sleep(self._ts * 0.5)
            self.timer += 1

        self.update_extended_observation()

        root_pos, _ = self.robot.get_link_com_xyz_orn(-1)
        x_1 = root_pos[0]
        self.velx = (x_1 - x_0) / (self.control_skip * self._ts)

        y_1 = root_pos[1]
        height = root_pos[2]
        q, dq = self.robot.get_q_dq(self.robot.ctrl_dofs)
        # print(np.max(np.abs(dq)))
        in_support = self.robot.is_root_com_in_support()

        if not self.pretrain_dyn:
            reward = self.ab  # alive bonus
            tar = np.minimum(self.timer / 500, self.max_tar_vel)
            reward += np.minimum(self.velx, tar) * self.vel_r_weight
            # print("v", self.velx, "tar", tar)
            reward += -self.energy_weight * np.square(robo_action).sum()
            # print("act norm", -self.energy_weight * np.square(a).sum())

            pos_mid = 0.5 * (self.robot.ll + self.robot.ul)
            q_scaled = 2 * (q - pos_mid) / (self.robot.ul - self.robot.ll)
            joints_at_limit = np.count_nonzero(np.abs(q_scaled) > 0.97)
            reward += -self.jl_weight * joints_at_limit
            # print("jl", -self.jl_weight * joints_at_limit)

            reward += -np.minimum(
                np.sum(np.square(dq)) * self.dq_pen_weight, 5.0)
            weight = np.array([2.0, 1.0, 1.0] * 4)
            reward += -np.minimum(
                np.sum(np.square(q - self.robot.init_q) * weight) *
                self.q_pen_weight, 5.0)
            # print("vel pen", -np.minimum(np.sum(np.abs(dq)) * self.dq_pen_weight, 5.0))
            # print("pos pen", -np.minimum(np.sum(np.square(q - self.robot.init_q)) * self.q_pen_weight, 5.0))

            y_1 = root_pos[1]
            reward += -y_1 * 0.5
            # print("dev pen", -y_1*0.5)
        else:
            # reward = self.calc_obs_dist_pretrain(self.img_obs[:-4], self.obs[:len(self.img_obs[:-4])])
            reward = 0  # TODO

        # print("______")
        # print(in_support)

        # print("h", height)
        # print("dq.", np.abs(dq))
        # print((np.abs(dq) < 50).all())

        # print("------")
        # conf policy will not have body-in-contact flag
        not_done = (np.abs(dq) < 90).all() and (height > 0.3) and (height <
                                                                   1.0)
        # not_done = (np.abs(dq) < 90).all() and (height > 0.3) and (height < 1.0) and in_support
        # not_done = (abs(y_1) < 5.0) and (height > 0.1) and (height < 1.0) and (rpy[2] > 0.1)
        # not_done = True
        #
        # if not not_done:
        #     print(self.ratios.shape)
        #     print(np.mean(self.ratios, axis=0))

        # if not self.train_dyn:
        #     dis_action = self.feat_select_func(self.obs)
        #     dis_action = utils.wrap(dis_action, is_cuda=self.cuda_env)
        #     d_score = self.discri.predict_prob_single_step(dis_state, dis_action)
        #     self.d_scores.append(utils.unwrap(d_score, is_cuda=self.cuda_env))
        #     # if len(self.d_scores) > 20 and np.mean(self.d_scores[-20:]) < 0.4:
        #     #     not_done = False
        #     # if not not_done or self.timer==1000:
        #     #     print(np.mean(self.d_scores))

        if not self.train_dyn:
            obs_behavior = self.feat_select_func(self.obs)
            return obs_behavior, reward, not not_done, {}
        return self.obs, reward, not not_done, {}
コード例 #15
0
    def step(self, a):

        assert self.train_dyn
        # TODO: currently for laika, env_action is 12D, 4 feet 3D without wrench
        env_action = a
        robo_action = self.obs[self.behavior_obs_len:]
        # else:
        #     robo_action = a
        #     if not self.soft_floor_env:
        #         env_pi_obs = np.concatenate((self.obs, robo_action))
        #         env_pi_obs_nn = utils.wrap(env_pi_obs, is_cuda=self.cuda_env)
        #         with torch.no_grad():
        #             _, env_action_nn, _, self.recurrent_hidden_states = self.dyn_actor_critic.act(
        #                 env_pi_obs_nn, self.recurrent_hidden_states, self.masks, deterministic=False
        #             )
        #         env_action = utils.unwrap(env_action_nn, is_cuda=self.cuda_env)

        root_pos, _ = self.robot.get_link_com_xyz_orn(-1)
        x_0 = root_pos[0]

        # this is post noise (unseen), different from seen diversify of self.enlarge_act_scale
        robo_action = np.clip(robo_action, -1.0, 1.0)
        if self.act_noise:
            robo_action = utils.perturb(robo_action, 0.05, self.np_random)

        if self.pretrain_dyn:
            # self.state_id = self._p.saveState()
            self.img_obs, pre_s_i = self.rollout_one_step_imaginary(
                env_action)  # takes the old self.obs
            # img_obs = self.rollout_one_step_imaginary_same_session()
            # self._p.restoreState(self.state_id)
            pre_s = self.robot.get_robot_raw_state_vec()
            # print(pre_s_i)
            # print(pre_s)
            assert np.allclose(pre_s, pre_s_i, atol=1e-5)

        for _ in range(self.control_skip):
            self.robot.apply_action(robo_action)
            self._p.stepSimulation()
            if self.render:
                time.sleep(self._ts * 0.5)
            self.timer += 1
        self.update_extended_observation()

        # print(self.obs[:self.behavior_obs_len - 5], "out real")

        root_pos, _ = self.robot.get_link_com_xyz_orn(-1)
        x_1 = root_pos[0]
        self.velx = (x_1 - x_0) / (self.control_skip * self._ts)

        y_1 = root_pos[1]
        height = root_pos[2]
        q, dq = self.robot.get_q_dq(self.robot.ctrl_dofs)
        # print(np.max(np.abs(dq)))
        in_support = self.robot.is_root_com_in_support()
        rpy = self._p.getEulerFromQuaternion(self.obs[9:13])

        if not self.pretrain_dyn:
            reward = self.ab  # alive bonus
            tar = np.minimum(self.timer / 500, self.max_tar_vel)
            reward += np.minimum(self.velx, tar) * self.vel_r_weight
            # print("v", self.velx, "tar", tar)
            reward += -self.energy_weight * np.square(robo_action).sum()
            # print("act norm", -self.energy_weight * np.square(a).sum())

            pos_mid = 0.5 * (self.robot.ll + self.robot.ul)
            q_scaled = 2 * (q - pos_mid) / (self.robot.ul - self.robot.ll)
            joints_at_limit = np.count_nonzero(np.abs(q_scaled) > 0.97)
            reward += -self.jl_weight * joints_at_limit
            # print("jl", -self.jl_weight * joints_at_limit)

            reward += -np.minimum(np.sum(np.abs(dq)) * self.dq_pen_weight, 5.0)
            reward += -np.minimum(
                np.sum(np.square(q - self.robot.init_q)) * self.q_pen_weight,
                5.0)
            # print("vel pen", -np.minimum(np.sum(np.abs(dq)) * self.dq_pen_weight, 5.0))
            # print("pos pen", -np.minimum(np.sum(np.square(q - self.robot.init_q)) * self.q_pen_weight, 5.0))

            y_1 = root_pos[1]
            reward += -y_1 * 0.5
            # print("dev pen", -y_1*0.5)
        else:
            reward = self.calc_obs_dist_pretrain(
                self.img_obs[:-4], self.obs[:len(self.img_obs[:-4])])

        # print("______")
        # print(in_support)

        # print("h", height)
        # print("dq.", np.abs(dq))
        # print((np.abs(dq) < 50).all())

        # body = [0, 4, 8, 12, -1, 1, 5, 9, 13]
        # body_floor = False
        # for link in body:
        #     cps = self._p.getContactPoints(self.robot.go_id, self.floor_id, link, -1)
        #     if len(cps) > 0:
        #         print("body hit floor")
        #         body_floor = True
        #         break

        # print("------")
        not_done = (np.abs(dq) < 90).all() and (height > 0.3) and (height <
                                                                   1.0)
        # not_done = (abs(y_1) < 5.0) and (height > 0.1) and (height < 1.0) and (rpy[2] > 0.1)
        # not_done = True

        return self.obs, reward, not not_done, {}
コード例 #16
0
    def step(self, a):
        # TODO: currently for laika, env_action is 12D, 4 feet 3D without wrench
        if self.train_dyn:
            env_action = a
            robo_action = self.past_bact_array[0]  # after tanh
        else:
            robo_action = a
            robo_action = np.tanh(robo_action)
            # update past_bact after tanh
            utils.push_recent_value(self.past_bact_array, robo_action)

            env_pi_obs = utils.select_and_merge_from_s_a(
                s_mt=list(self.past_obs_array),
                a_mt=list(self.past_bact_array),
                s_idx=self.generator_past_obs_t_idx,
                a_idx=self.generator_past_act_t_idx)
            env_pi_obs_nn = utils.wrap(env_pi_obs, is_cuda=self.cuda_env)
            with torch.no_grad():
                _, env_action_nn, _, self.recurrent_hidden_states = self.dyn_actor_critic.act(
                    env_pi_obs_nn,
                    self.recurrent_hidden_states,
                    self.masks,
                    deterministic=False)
            env_action = utils.unwrap(env_action_nn, is_cuda=self.cuda_env)

        # if self.ratio is None:
        #     self.ratio = np.array([env_action / robo_action])
        # else:
        #     self.ratio = np.append(self.ratio, [env_action / robo_action], axis=0)
        # self.ratios = np.append(self.ratios, [env_action / robo_action], axis=0)
        #
        # env_pi_obs_feat = self.feat_select_func(self.obs)
        # dis_state = np.concatenate((env_pi_obs_feat, robo_action))
        # dis_state = utils.wrap(dis_state, is_cuda=self.cuda_env)

        root_pos, _ = self.robot.get_link_com_xyz_orn(-1)
        x_0 = root_pos[0]

        # this is post noise (unseen), different from seen diversify of self.enlarge_act_scale
        if self.act_noise:
            robo_action = utils.perturb(robo_action, 0.05, self.np_random)

        # when call info, should call before sim_step() as in v4 (append s_t+1 later)
        # info will be used to construct D input outside.
        past_info = self.construct_past_traj_window()

        # # TODO
        # if self.pretrain_dyn:
        #     # self.state_id = self._p.saveState()
        #     self.img_obs, pre_s_i = self.rollout_one_step_imaginary()     # takes the old self.obs
        #     # img_obs = self.rollout_one_step_imaginary_same_session()
        #     # self._p.restoreState(self.state_id)
        #     pre_s = self.robot.get_robot_raw_state_vec()
        #     # print(pre_s_i)
        #     # print(pre_s)
        #     assert np.allclose(pre_s, pre_s_i, atol=1e-5)

        for _ in range(self.control_skip):
            self.robot.apply_action(robo_action)
            self.apply_scale_clip_conf_from_pi_new(env_action)
            self._p.stepSimulation()
            if self.render:
                time.sleep(self._ts * 1.0)
            self.timer += 1

        obs_new = self.get_extended_observation()  # and update past_obs_array
        past_info += [self.past_obs_array[0]]  # s_t+1

        root_pos, _ = self.robot.get_link_com_xyz_orn(-1)
        x_1 = root_pos[0]
        self.velx = (x_1 - x_0) / (self.control_skip * self._ts)

        y_1 = root_pos[1]
        height = root_pos[2]
        q, dq = self.robot.get_q_dq(self.robot.ctrl_dofs)
        # print(np.max(np.abs(dq)))
        # in_support = self.robot.is_root_com_in_support()

        if not self.pretrain_dyn:
            reward = self.ab  # alive bonus
            tar = np.minimum(self.timer / 500, self.max_tar_vel)
            reward += np.minimum(self.velx, tar) * self.vel_r_weight
            # print("v", self.velx, "tar", tar)
            reward += -self.energy_weight * np.square(robo_action).sum()
            # print("act norm", -self.energy_weight * np.square(a).sum())

            pos_mid = 0.5 * (self.robot.ll + self.robot.ul)
            q_scaled = 2 * (q - pos_mid) / (self.robot.ul - self.robot.ll)
            joints_at_limit = np.count_nonzero(np.abs(q_scaled) > 0.97)
            reward += -self.jl_weight * joints_at_limit
            # print("jl", -self.jl_weight * joints_at_limit)

            reward += -np.minimum(
                np.sum(np.square(dq)) * self.dq_pen_weight, 5.0)
            weight = np.array([2.0, 1.0, 1.0] * 4)
            reward += -np.minimum(
                np.sum(np.square(q - self.robot.init_q) * weight) *
                self.q_pen_weight, 5.0)
            # print("vel pen", -np.minimum(np.sum(np.abs(dq)) * self.dq_pen_weight, 5.0))
            # print("pos pen", -np.minimum(np.sum(np.square(q - self.robot.init_q)) * self.q_pen_weight, 5.0))

            y_1 = root_pos[1]
            reward += -y_1 * 0.5
            # print("dev pen", -y_1*0.5)
        else:
            # reward = self.calc_obs_dist_pretrain(self.img_obs[:-4], self.obs[:len(self.img_obs[:-4])])
            reward = 0  # TODO

        # print("______")
        # print(in_support)

        # print("h", height)
        # print("dq.", np.abs(dq))
        # print((np.abs(dq) < 50).all())

        # print("------")
        # conf policy will not have body-in-contact flag
        not_done = (np.abs(dq) < 90).all() and (height > 0.2) and (height <
                                                                   1.0)
        # not_done = (abs(y_1) < 5.0) and (height > 0.1) and (height < 1.0) and (rpy[2] > 0.1)
        # not_done = True
        #
        # if not not_done:
        #     print(self.ratio.shape)
        #     labels = list("123456789ABC")
        #     data = self.ratio
        #     from matplotlib import pyplot as plt
        #     width = 0.4
        #     fig, ax = plt.subplots()
        #     for i, l in enumerate(labels):
        #         x = np.ones(data.shape[0]) * i + (np.random.rand(data.shape[0]) * width - width / 2.)
        #         ax.scatter(x, data[:, i], s=25)
        #         median = np.median(data[:, i])
        #         ax.plot([i - width / 2., i + width / 2.], [median, median], color="k")
        #
        #     plt.ylim(-5, 5)
        #     ax.set_xticks(range(len(labels)))
        #     ax.set_xticklabels(labels)
        #     plt.show()
        #     self.ratio = None

        # if not self.train_dyn:
        #     dis_action = self.feat_select_func(self.obs)
        #     dis_action = utils.wrap(dis_action, is_cuda=self.cuda_env)
        #     d_score = self.discri.predict_prob_single_step(dis_state, dis_action)
        #     self.d_scores.append(utils.unwrap(d_score, is_cuda=self.cuda_env))
        #     # if len(self.d_scores) > 20 and np.mean(self.d_scores[-20:]) < 0.4:
        #     #     not_done = False
        #     # if not not_done or self.timer==1000:
        #     #     print(np.mean(self.d_scores))

        return obs_new, reward, not not_done, {"sas_window": past_info}
コード例 #17
0
    def step(self, a):
        if self.act_noise and a is not None:
            a = utils.perturb(a, 0.05, self.np_random)

        if not self.use_gen_dyn:
            for _ in range(self.control_skip):
                # action is in not -1,1
                if a is not None:
                    self.act = np.clip(a, -1.0, 1.0)
                    self.robot.apply_action(self.act)
                self._p.stepSimulation()
                if self.render:
                    time.sleep(self._ts * 0.5)
                self.timer += 1
            self.robot.update_x()
            self.update_extended_observation()
            obs_unnorm = np.array(self.obs) / self.robot.obs_scaling
        else:

            # gen_input = list(self.obs) + list(a) + [0.0] * (11+3)

            gen_input = list(self.obs) + list(a) + list(
                np.random.normal(0, 1, 14))  # TODO
            gen_input = utils.wrap(gen_input, is_cuda=False)  # TODO
            gen_output = self.gen_dyn(gen_input)
            gen_output = utils.unwrap(gen_output, is_cuda=False)

            self.obs = gen_output
            obs_unnorm = np.array(self.obs) / self.robot.obs_scaling
            self.robot.last_x = self.robot.x
            self.robot.x += obs_unnorm[5] * (self.control_skip * self._ts)

            if self.render:
                all_qs = [self.robot.x] + list(obs_unnorm[:5])
                all_qs[1] -= 1.5
                for ind in range(self.robot.n_total_dofs):
                    self._p.resetJointState(self.robot.hopper_id, ind,
                                            all_qs[ind], 0.0)
                time.sleep(self._ts * 5.0)

            if self.obs_noise:
                self.obs = utils.perturb(self.obs, 0.1, self.np_random)
            self.timer += self.control_skip

        reward = 2.0  # alive bonus
        reward += self.get_ave_dx()
        # print("v", self.get_ave_dx())
        reward += -0.1 * np.square(a).sum()
        # print("act norm", -0.1 * np.square(a).sum())

        q = np.array(obs_unnorm[2:5])
        pos_mid = 0.5 * (self.robot.ll + self.robot.ul)
        q_scaled = 2 * (q - pos_mid) / (self.robot.ul - self.robot.ll)
        joints_at_limit = np.count_nonzero(np.abs(q_scaled) > 0.97)
        reward += -2.0 * joints_at_limit
        # print("jl", -2.0 * joints_at_limit)

        dq = np.array(obs_unnorm[8:11])
        reward -= np.minimum(np.sum(np.abs(dq)) * 0.02, 5.0)  # almost like /23
        # print("vel pen", np.minimum(np.sum(np.abs(dq)) * 0.02, 5.0))

        height = obs_unnorm[0]
        # ang = self._p.getJointState(self.robot.hopper_id, 2)[0]

        # print(joints_dq)
        # print(height)
        # print("ang", ang)
        not_done = (np.abs(dq) < 50).all() and (height > .3) and (height < 1.8)

        return self.obs, reward, not not_done, {}