Exemple #1
0
    def apply_scale_clip_conf_from_pi(self, con_f):

        approx_mass = 26.0
        max_fz = approx_mass * 9.81 * 2  # 2mg        # TODO

        for foot_ind, link in enumerate(self.robot.feet):
            this_con_f = con_f[foot_ind * 3:(foot_ind + 1) * 3]
            # first dim represents fz
            fz = np.interp(this_con_f[0], [-0.1, 5], [0.0, max_fz])
            # second dim represents fx
            fx = np.interp(this_con_f[1], [-2, 2], [-1.8 * fz, 1.8 * fz])
            # fx = np.sign(fx) * np.maximum(np.abs(fx) - 0.6 * max_fz, 0.0)   # mu<=1.2
            # third dim represents fy
            fy = np.interp(this_con_f[2], [-2, 2], [-1.8 * fz, 1.8 * fz])
            # fy = np.sign(fy) * np.maximum(np.abs(fy) - 0.6 * max_fz, 0.0)   # mu<=1.2

            utils.apply_external_world_force_on_local_point(
                self._imaginary_robot.go_id, link, [fx, fy, fz], [0, 0, 0],
                self._imaginary_p)
Exemple #2
0
    def apply_scale_clip_conf_from_pi_new(self, con_f):

        approx_mass = 26.0
        max_fz = approx_mass * 9.81 * 2  # 2mg        # TODO

        for foot_ind, link in enumerate(self.robot.feet):

            this_con_f = np.tanh(con_f[foot_ind * 3:(foot_ind + 1) *
                                       3])  # [-1 ,1]

            pos, _ = self.robot.get_link_com_xyz_orn(link, fk=1)
            if pos[2] < 0.01:
                # first dim represents fz
                # fz = np.abs(this_con_f[0]) * max_fz
                fz = (this_con_f[0] + 1) / 2.0 * max_fz
            else:
                fz = 0.0

            fx = this_con_f[1] * 1.5 * fz
            fy = this_con_f[2] * 1.5 * fz

            utils.apply_external_world_force_on_local_point(
                self.robot.go_id, link, [fx, fy, fz], [0, 0, 0], self._p)
    def step(self, a):

        root_pos, _ = self.robot.get_link_com_xyz_orn(-1)
        x_0 = root_pos[0]

        a = np.clip(a, -1.0, 1.0)
        if self.act_noise:
            a = utils.perturb(a, 0.05, self.np_random)

        if self.low_power_env:
            # # for pi44
            # _, dq = self.robot.get_q_dq(self.robot.ctrl_dofs)
            # max_force_ratio = np.clip(2 - dq/2.5, 0, 1)
            # a *= max_force_ratio
            # for pi43
            _, dq = self.robot.get_q_dq(self.robot.ctrl_dofs)
            max_force_ratio = np.clip(2 - dq/2., 0, 1)
            a *= max_force_ratio

        for _ in range(self.control_skip):
            # action is in not -1,1
            if a is not None:
                self.act = a
                self.robot.apply_action(a)

            if self.randomforce_train:
                for foot_ind, link in enumerate(self.robot.feet):
                    # first dim represents fz
                    fz = np.random.uniform(-80, 80)
                    # second dim represents fx
                    fx = np.random.uniform(-80, 80)
                    # third dim represents fy
                    fy = np.random.uniform(-80, 80)

                    utils.apply_external_world_force_on_local_point(self.robot.go_id, link,
                                                                    [fx, fy, fz],
                                                                    [0, 0, 0],
                                                                    self._p)
            self._p.stepSimulation()
            if self.render:
                time.sleep(self._ts * 0.5)
            self.timer += 1

        root_pos, _ = self.robot.get_link_com_xyz_orn(-1)
        x_1 = root_pos[0]
        self.velx = (x_1 - x_0) / (self.control_skip * self._ts)

        q, dq = self.robot.get_q_dq(self.robot.ctrl_dofs)

        reward = self.ab  # alive bonus
        tar = np.minimum(self.timer / 500, self.max_tar_vel)
        reward += np.minimum(self.velx, tar) * self.vel_r_weight
        # print("v", self.velx, "tar", tar)
        reward += -self.energy_weight * np.square(a).sum()
        # print("act norm", -self.energy_weight * np.square(a).sum())

        pos_mid = 0.5 * (self.robot.ll + self.robot.ul)
        q_scaled = 2 * (q - pos_mid) / (self.robot.ul - self.robot.ll)
        joints_at_limit = np.count_nonzero(np.abs(q_scaled) > 0.97)
        reward += -self.jl_weight * joints_at_limit
        # print("jl", -self.jl_weight * joints_at_limit)

        reward += -np.minimum(np.sum(np.square(dq)) * self.dq_pen_weight, 5.0)
        weight = np.array([2.0, 1.0, 1.0] * 4)
        reward += -np.minimum(np.sum(np.square(q - self.robot.init_q) * weight) * self.q_pen_weight, 5.0)

        # reward = self.ab
        # tar = np.minimum(self.timer / 500, self.max_tar_vel)
        # reward += np.minimum(self.velx, tar) * self.vel_r_weight
        # # print("v", self.velx, "tar", tar)
        #
        # # reward += np.maximum((self.max_tar_vel - tar) * self.vel_r_weight - 3.0, 0.0)     # alive bonus
        #
        # reward += -self.energy_weight * np.linalg.norm(a)
        # # print("act norm", -self.energy_weight * np.square(a).sum())
        #
        # q, dq = self.robot.get_q_dq(self.robot.ctrl_dofs)
        # # print(np.max(np.abs(dq)))
        # pos_mid = 0.5 * (self.robot.ll + self.robot.ul)
        # q_scaled = 2 * (q - pos_mid) / (self.robot.ul - self.robot.ll)
        # joints_at_limit = np.count_nonzero(np.abs(q_scaled) > 0.97)
        # reward += -self.jl_weight * joints_at_limit
        # # print("jl", -self.jl_weight * joints_at_limit)
        #
        # reward += -np.minimum(np.linalg.norm(dq) * self.dq_pen_weight, 5.0)
        # weight = np.array([2.0, 0.2, 1.0] * 4)
        # reward += -np.minimum(np.linalg.norm((q - self.robot.init_q) * weight) * self.q_pen_weight, 5.0)
        # # print("vel pen", -np.minimum(np.sum(np.abs(dq)) * self.dq_pen_weight, 5.0))
        # # print("pos pen", -np.minimum(np.sum(np.square(q - self.robot.init_q)) * self.q_pen_weight, 5.0))

        y_1 = root_pos[1]
        reward += -y_1 * 0.5
        # print("dev pen", -y_1*0.5)
        height = root_pos[2]

        in_support = self.robot.is_root_com_in_support()
        # print("______")
        # print(in_support)

        # print("h", height)
        # print("dq.", np.abs(dq))
        # print((np.abs(dq) < 50).all())

        # cps = self._p.getContactPoints(bodyA=self.robot.go_id, linkIndexA=0)
        # body_in_contact = (len(cps) > 0)

        # print("------")
        obs = self.get_extended_observation()
        # rpy = self._p.getEulerFromQuaternion(obs[8:12])

        # for data collection
        not_done = (np.abs(dq) < 90).all() and (height > 0.3) and (height < 1.0)
        # TODO
        # not_done = (np.abs(dq) < 90).all() and (height > 0.3) and (height < 1.0) and in_support
        # not_done = True

        return obs, reward, not not_done, {}