def apply_scale_clip_conf_from_pi(self, con_f): approx_mass = 26.0 max_fz = approx_mass * 9.81 * 2 # 2mg # TODO for foot_ind, link in enumerate(self.robot.feet): this_con_f = con_f[foot_ind * 3:(foot_ind + 1) * 3] # first dim represents fz fz = np.interp(this_con_f[0], [-0.1, 5], [0.0, max_fz]) # second dim represents fx fx = np.interp(this_con_f[1], [-2, 2], [-1.8 * fz, 1.8 * fz]) # fx = np.sign(fx) * np.maximum(np.abs(fx) - 0.6 * max_fz, 0.0) # mu<=1.2 # third dim represents fy fy = np.interp(this_con_f[2], [-2, 2], [-1.8 * fz, 1.8 * fz]) # fy = np.sign(fy) * np.maximum(np.abs(fy) - 0.6 * max_fz, 0.0) # mu<=1.2 utils.apply_external_world_force_on_local_point( self._imaginary_robot.go_id, link, [fx, fy, fz], [0, 0, 0], self._imaginary_p)
def apply_scale_clip_conf_from_pi_new(self, con_f): approx_mass = 26.0 max_fz = approx_mass * 9.81 * 2 # 2mg # TODO for foot_ind, link in enumerate(self.robot.feet): this_con_f = np.tanh(con_f[foot_ind * 3:(foot_ind + 1) * 3]) # [-1 ,1] pos, _ = self.robot.get_link_com_xyz_orn(link, fk=1) if pos[2] < 0.01: # first dim represents fz # fz = np.abs(this_con_f[0]) * max_fz fz = (this_con_f[0] + 1) / 2.0 * max_fz else: fz = 0.0 fx = this_con_f[1] * 1.5 * fz fy = this_con_f[2] * 1.5 * fz utils.apply_external_world_force_on_local_point( self.robot.go_id, link, [fx, fy, fz], [0, 0, 0], self._p)
def step(self, a): root_pos, _ = self.robot.get_link_com_xyz_orn(-1) x_0 = root_pos[0] a = np.clip(a, -1.0, 1.0) if self.act_noise: a = utils.perturb(a, 0.05, self.np_random) if self.low_power_env: # # for pi44 # _, dq = self.robot.get_q_dq(self.robot.ctrl_dofs) # max_force_ratio = np.clip(2 - dq/2.5, 0, 1) # a *= max_force_ratio # for pi43 _, dq = self.robot.get_q_dq(self.robot.ctrl_dofs) max_force_ratio = np.clip(2 - dq/2., 0, 1) a *= max_force_ratio for _ in range(self.control_skip): # action is in not -1,1 if a is not None: self.act = a self.robot.apply_action(a) if self.randomforce_train: for foot_ind, link in enumerate(self.robot.feet): # first dim represents fz fz = np.random.uniform(-80, 80) # second dim represents fx fx = np.random.uniform(-80, 80) # third dim represents fy fy = np.random.uniform(-80, 80) utils.apply_external_world_force_on_local_point(self.robot.go_id, link, [fx, fy, fz], [0, 0, 0], self._p) self._p.stepSimulation() if self.render: time.sleep(self._ts * 0.5) self.timer += 1 root_pos, _ = self.robot.get_link_com_xyz_orn(-1) x_1 = root_pos[0] self.velx = (x_1 - x_0) / (self.control_skip * self._ts) q, dq = self.robot.get_q_dq(self.robot.ctrl_dofs) reward = self.ab # alive bonus tar = np.minimum(self.timer / 500, self.max_tar_vel) reward += np.minimum(self.velx, tar) * self.vel_r_weight # print("v", self.velx, "tar", tar) reward += -self.energy_weight * np.square(a).sum() # print("act norm", -self.energy_weight * np.square(a).sum()) pos_mid = 0.5 * (self.robot.ll + self.robot.ul) q_scaled = 2 * (q - pos_mid) / (self.robot.ul - self.robot.ll) joints_at_limit = np.count_nonzero(np.abs(q_scaled) > 0.97) reward += -self.jl_weight * joints_at_limit # print("jl", -self.jl_weight * joints_at_limit) reward += -np.minimum(np.sum(np.square(dq)) * self.dq_pen_weight, 5.0) weight = np.array([2.0, 1.0, 1.0] * 4) reward += -np.minimum(np.sum(np.square(q - self.robot.init_q) * weight) * self.q_pen_weight, 5.0) # reward = self.ab # tar = np.minimum(self.timer / 500, self.max_tar_vel) # reward += np.minimum(self.velx, tar) * self.vel_r_weight # # print("v", self.velx, "tar", tar) # # # reward += np.maximum((self.max_tar_vel - tar) * self.vel_r_weight - 3.0, 0.0) # alive bonus # # reward += -self.energy_weight * np.linalg.norm(a) # # print("act norm", -self.energy_weight * np.square(a).sum()) # # q, dq = self.robot.get_q_dq(self.robot.ctrl_dofs) # # print(np.max(np.abs(dq))) # pos_mid = 0.5 * (self.robot.ll + self.robot.ul) # q_scaled = 2 * (q - pos_mid) / (self.robot.ul - self.robot.ll) # joints_at_limit = np.count_nonzero(np.abs(q_scaled) > 0.97) # reward += -self.jl_weight * joints_at_limit # # print("jl", -self.jl_weight * joints_at_limit) # # reward += -np.minimum(np.linalg.norm(dq) * self.dq_pen_weight, 5.0) # weight = np.array([2.0, 0.2, 1.0] * 4) # reward += -np.minimum(np.linalg.norm((q - self.robot.init_q) * weight) * self.q_pen_weight, 5.0) # # print("vel pen", -np.minimum(np.sum(np.abs(dq)) * self.dq_pen_weight, 5.0)) # # print("pos pen", -np.minimum(np.sum(np.square(q - self.robot.init_q)) * self.q_pen_weight, 5.0)) y_1 = root_pos[1] reward += -y_1 * 0.5 # print("dev pen", -y_1*0.5) height = root_pos[2] in_support = self.robot.is_root_com_in_support() # print("______") # print(in_support) # print("h", height) # print("dq.", np.abs(dq)) # print((np.abs(dq) < 50).all()) # cps = self._p.getContactPoints(bodyA=self.robot.go_id, linkIndexA=0) # body_in_contact = (len(cps) > 0) # print("------") obs = self.get_extended_observation() # rpy = self._p.getEulerFromQuaternion(obs[8:12]) # for data collection not_done = (np.abs(dq) < 90).all() and (height > 0.3) and (height < 1.0) # TODO # not_done = (np.abs(dq) < 90).all() and (height > 0.3) and (height < 1.0) and in_support # not_done = True return obs, reward, not not_done, {}