Ejemplo n.º 1
0
    def step(self, ctrl):

        ctrl = self.scale_action(ctrl)

        self.sim.data.ctrl[:] = ctrl
        self.sim.forward()
        self.sim.step()
        self.step_ctr += 1

        obs = self.get_obs()

        # Angle deviation
        x, y, z, qw, qx, qy, qz = obs[:7]
        xd, yd, zd, thd, phid, psid = self.sim.get_state().qvel.tolist()[:6]

        # Reward conditions
        target_vel = 0.25
        t_x = target_vel * np.cos(self.target_yaw)
        t_y = target_vel * np.sin(self.target_yaw)

        velocity_rew_x = 1. / (abs(xd - t_x) + 1.) - 1. / (t_x + 1.)
        velocity_rew_y = 1. / (abs(yd - t_y) + 1.) - 1. / (t_y + 1.)

        roll, pitch, yaw = my_utils.quat_to_rpy([qw, qx, qy, qz])

        yaw_pen = np.min((abs((yaw % 6.183) - (self.target_yaw % 6.183)),
                          abs(yaw - self.target_yaw)))

        r_pos = velocity_rew_x * 6 + velocity_rew_y * 6
        r_neg = np.square(roll) * 0.5 + \
                np.square(pitch) * 0.5 + \
                np.square(zd) * 0.5 + \
                np.square(yaw_pen) * 0.2

        r_neg = np.clip(r_neg, 0, 1) * 1.
        r_pos = np.clip(r_pos, -2, 2)
        r = r_pos - r_neg
        self.episode_reward += r

        # Reevaluate termination condition
        done = self.step_ctr > self.max_steps

        contacts = (np.abs(
            np.array(self.sim.data.cfrc_ext[[4, 7, 10, 13, 16, 19
                                             ]])).sum(axis=1) > 0.05).astype(
                                                 np.float32)

        obs = np.concatenate([
            self.scale_joints(self.sim.get_state().qpos.tolist()[7:]),
            self.sim.get_state().qvel.tolist()[6:],
            self.sim.get_state().qvel.tolist()[:6], [roll, pitch, yaw, y],
            [self.target_yaw], contacts
        ])

        self.model.body_quat[20] = my_utils.rpy_to_quat(0, 0, self.target_yaw)
        self.model.body_pos[20] = [x, y, 1]

        return obs, r, done, None
Ejemplo n.º 2
0
    def step(self, ctrl):

        ctrl = self.scale_action(ctrl)

        self.sim.data.ctrl[:] = ctrl
        self.sim.forward()
        self.sim.step()
        self.step_ctr += 1

        obs = self.get_obs()
        obs_dict = self.get_obs_dict()

        # Angle deviation
        x, y, z, qw, qx, qy, qz = obs[:7]
        xd, yd, zd, thd, phid, psid = self.sim.get_state().qvel.tolist()[:6]

        contacts = (np.abs(
            np.array(self.sim.data.cfrc_ext[[4, 7, 10, 13, 16, 19
                                             ]])).sum(axis=1) > 0.05).astype(
                                                 np.float32)

        # Reward conditions
        target_vel = self.target_criteria_dict["vel"]
        velocity_rew = (1. / (abs(xd - target_vel) + 1.) - 1. /
                        (target_vel + 1.)) * (1. / target_vel)

        roll, pitch, yaw = my_utils.quat_to_rpy([qw, qx, qy, qz])
        r_pos = velocity_rew

        r_neg = np.square(self.target_criteria_dict["height"] - z) * 100 + \
                np.square(roll) * 1. + \
                np.square(pitch) * 1. + \
                np.square(zd) * 1. + \
                np.square(yaw) * 5. + \
                np.square(y) * 1.

        #print(z, self.target_criteria_dict["height"], np.square(self.target_criteria_dict["height"] - z) * 100, velocity_rew)

        r_neg = np.clip(r_neg, 0, 2.)
        r_pos = np.clip(r_pos, -2, 2)
        r = r_pos - r_neg

        # Reevaluate termination condition
        done = self.step_ctr > self.max_steps  # or abs(roll) > 2 or abs(pitch) > 2

        obs = np.concatenate([
            self.scale_joints(self.sim.get_state().qpos.tolist()[7:]),
            self.sim.get_state().qvel.tolist()[6:],
            self.sim.get_state().qvel.tolist()[:6], self.target_criteria_norm,
            [roll, pitch, yaw, y], contacts
        ])

        return obs, r, done, obs_dict
Ejemplo n.º 3
0
    def step(self, ctrl):

        ctrl = self.scale_action(ctrl)

        self.sim.data.ctrl[:] = ctrl
        self.sim.forward()
        self.sim.step()
        self.step_ctr += 1

        obs = self.get_obs()

        # Angle deviation
        x, y, z, qw, qx, qy, qz = obs[:7]
        xd, yd, zd, thd, phid, psid = self.sim.get_state().qvel.tolist()[:6]

        # Reward conditions
        target_vel = 0.30
        velocity_rew = 1. / (abs(xd - target_vel) + 1.) - 1. / (target_vel +
                                                                1.)

        roll, pitch, yaw = my_utils.quat_to_rpy([qw, qx, qy, qz])

        r_pos = velocity_rew * 5

        r_neg = np.square(self.sim.data.actuator_force).mean() * 0.00001 + \
            np.square(ctrl).mean() * 0.01 + \
            np.square(roll) * 0.1 + \
            np.square(pitch) * 0.1 + \
            np.square(yaw) * 0.3 + \
            np.square(y) * 0.1

        r_neg = np.clip(r_neg, 0, 1) * 1
        r_pos = np.clip(r_pos, -2, 2)
        r = r_pos - r_neg
        self.episode_reward += r

        # Reevaluate termination condition
        done = self.step_ctr > self.max_steps

        contacts = (np.abs(
            np.array(self.sim.data.cfrc_ext[[4, 7, 10, 13, 16, 19
                                             ]])).sum(axis=1) > 0.05).astype(
                                                 np.float32)
        obs = np.concatenate([
            self.sim.get_state().qpos.tolist()[7:],
            self.sim.get_state().qvel.tolist()[6:],
            self.sim.get_state().qvel.tolist()[:6], [roll, pitch, yaw, y],
            contacts
        ])

        return obs, r, done, None
Ejemplo n.º 4
0
    def step(self, ctrl):
        goal_dist_pre = np.abs(self.goal_pos -
                               self.sim.get_state().qpos.tolist()[:2]).sum()

        mem = ctrl[-self.mem_dim:]
        act = ctrl[:-self.mem_dim]
        sact = self.scale_action(act)

        self.sim.data.ctrl[:] = sact
        self.sim.step()
        self.step_ctr += 1

        goal_dist_post = np.abs(self.goal_pos -
                                self.sim.get_state().qpos.tolist()[:2]).sum()

        obs_c = self.get_obs()
        obs_dict = self.get_obs_dict()

        x, y, z, qw, qx, qy, qz = obs_c[:7]
        #angle = 2 * np.arccos(qw)
        _, _, yaw = my_utils.quat_to_rpy((qw, qx, qy, qz))

        target_angle = np.arctan2(self.goal_pos[1] - y, self.goal_pos[0] - x)
        target_err = abs(yaw - target_angle)

        # Reevaluate termination condition.
        done = self.step_ctr > self.max_steps  # or obs_dict['torso_contact'] > 0.1

        xd, yd, _, _, _, _ = obs_dict["root_vel"]

        ctrl_effort = np.square(ctrl[0:8]).mean() * 0.00
        target_progress = (goal_dist_pre - goal_dist_post) * 50
        #print(target_progress)

        obs = np.concatenate(
            (self.goal_pos - obs_c[:2], obs_c[2:], obs_dict["contacts"],
             [obs_dict['torso_contact']], mem))
        r = np.clip(target_progress, -1,
                    1) - ctrl_effort - min(target_err, 3) * 0.1

        return obs, r, done, obs_dict
Ejemplo n.º 5
0
    def step(self, ctrl):
        ctrl = self.scale_action(ctrl)

        self.sim.data.ctrl[:] = ctrl
        self.sim.forward()
        self.sim.step()
        self.step_ctr += 1

        obs = self.get_obs()
        obs_dict = self.get_obs_dict()

        # Angle deviation
        x, y, z, qw, qx, qy, qz = obs[:7]
        xd, yd, zd, thd, phid, psid = self.sim.get_state().qvel.tolist()[:6]

        # Reward conditions
        ctrl_effort = np.square(ctrl).sum()
        target_vel = 0.25
        velocity_rew = 1. / (abs(xd - target_vel) + 1.) - 1. / (target_vel +
                                                                1.)

        roll, pitch, yaw = my_utils.quat_to_rpy([qw, qx, qy, qz])

        r = velocity_rew * 5 - \
            np.square(self.sim.data.actuator_force).mean() * 0.0001

        r = np.clip(r, -2, 2)
        self.episode_reward += r

        # Reevaluate termination condition
        done = self.step_ctr > self.max_steps  #or abs(roll) > 2 or abs(pitch) > 2

        obs = np.concatenate([
            np.array(self.sim.get_state().qpos.tolist()[7:]),
            [roll, pitch, yaw, xd, yd, thd, phid], obs_dict["contacts"]
        ])

        return obs, r, done, obs_dict
    def step(self, ctrl):
        ctrl = np.clip(ctrl, -1, 1)
        ctrl_pen = np.square(ctrl).mean()
        #torques = self.sim.data.actuator_force
        #ctrl_pen = np.square(torques).mean()

        ctrl = self.scale_action(ctrl)

        self.sim.data.ctrl[:] = ctrl
        self.sim.forward()
        self.sim.step()
        self.step_ctr += 1
        #
        # if np.random.rand() < 0.002:
        #     self.model.geom_friction[0] = np.array([0.0001, 0.5, 0.5])
        #     print("Changed friction")

        obs = self.get_obs()

        # Angle deviation
        x, y, z, qw, qx, qy, qz = obs[:7]
        xd, yd, zd, thd, phid, psid = self.sim.get_state().qvel.tolist()[:6]
        #xa, ya, za, tha, phia, psia = self.sim.data.qacc.tolist()[:6]

        self.vel_sum += xd

        # Reward conditions
        target_vel = 0.4

        velocity_rew = 1. / (abs(xd - target_vel) + 1.) - 1. / (target_vel +
                                                                1.)
        velocity_rew = velocity_rew * (1 / (1 + 30 * np.square(yd)))

        roll, pitch, _ = my_utils.quat_to_rpy([qw, qx, qy, qz])
        #yaw_deviation = np.min((abs((yaw % 6.183) - (0 % 6.183)), abs(yaw - 0)))

        q_yaw = 2 * acos(qw)

        r_neg = np.square(y) * 0.2 + \
                np.square(q_yaw) * 0.5 + \
                np.square(pitch) * 0.5 + \
                np.square(roll) * 0.5 + \
                ctrl_pen * 0.01 + \
                np.square(zd) * 0.7

        r_pos = velocity_rew * 6  # + (abs(self.prev_yaw_deviation) - abs(yaw_deviation)) * 3. + (abs(self.prev_y_deviation) - abs(y)) * 3.
        r = r_pos - r_neg

        # Reevaluate termination condition
        done = self.step_ctr > self.max_steps  # or abs(y) > 0.3 or abs(yaw) > 0.6 or abs(roll) > 0.8 or abs(pitch) > 0.8
        contacts = (np.abs(
            np.array(self.sim.data.cfrc_ext[[4, 7, 10, 13, 16, 19
                                             ]])).sum(axis=1) > 0.05).astype(
                                                 np.float32)

        if self.use_HF:
            obs = np.concatenate([
                self.scale_joints(self.sim.get_state().qpos.tolist()[7:]),
                self.sim.get_state().qvel.tolist()[6:],
                self.sim.get_state().qvel.tolist()[:6], [qw, qx, qy, qz, y],
                contacts,
                self.get_local_hf(x, y).flatten()
            ])
        else:
            obs = np.concatenate([
                self.scale_joints(self.sim.get_state().qpos.tolist()[7:]),
                self.sim.get_state().qvel.tolist()[6:],
                self.sim.get_state().qvel.tolist()[:6], [qw, qx, qy, qz, y],
                contacts
            ])

        return obs, r, done, None
Ejemplo n.º 7
0
    def step(self, ctrl):

        ctrl = self.scale_action(ctrl)

        self.sim.data.ctrl[:] = ctrl
        self.sim.forward()
        self.sim.step()
        self.step_ctr += 1

        obs = self.get_obs()

        # Angle deviation
        x, y, z, qw, qx, qy, qz = obs[:7]
        xd, yd, zd, thd, phid, psid = self.sim.get_state().qvel.tolist()[:6]

        # Reward conditions
        target_vel = 0.25
        dprev = np.sqrt((self.prev_xy[0] - self.goal_xy[0])**2 +
                        (self.prev_xy[1] - self.goal_xy[1])**2)
        dcur = np.sqrt((x - self.goal_xy[0])**2 + (y - self.goal_xy[1])**2)
        to_goal_vel = dprev - dcur
        to_goal_vel *= 50.
        velocity_rew = 1. / (abs(to_goal_vel - target_vel) +
                             1.) - 1. / (target_vel + 1.)

        roll, pitch, yaw = my_utils.quat_to_rpy([qw, qx, qy, qz])

        tar_angle = np.arctan2(self.goal_xy[1] - y, self.goal_xy[0] - x)
        yaw_deviation = np.min(
            (abs((yaw % 6.183) - (tar_angle % 6.183)), abs(yaw - tar_angle)))

        r_pos = velocity_rew * 3 + (self.prev_deviation - yaw_deviation) * 9
        r_neg = np.square(roll) * 1.2 + \
                np.square(pitch) * 1.2 + \
                np.square(zd) * 1.5

        self.prev_deviation = yaw_deviation

        r_neg = np.clip(r_neg, 0, 2) * 1.
        r_pos = np.clip(r_pos, -2, 2)
        r = r_pos - r_neg
        self.episode_reward += r

        # Reevaluate termination condition
        done = self.step_ctr > self.max_steps or dcur < 0.15

        self.prev_xy = [x, y]

        contacts = (np.abs(
            np.array(self.sim.data.cfrc_ext[[4, 7, 10, 13, 16, 19
                                             ]])).sum(axis=1) > 0.05).astype(
                                                 np.float32)

        obs = np.concatenate([
            self.scale_joints(self.sim.get_state().qpos.tolist()[7:]),
            self.sim.get_state().qvel.tolist()[6:],
            self.sim.get_state().qvel.tolist()[:6], [roll, pitch, yaw, y],
            [x - self.goal_xy[0], y - self.goal_xy[1]], contacts
        ])

        return obs, r, done, None
    def step(self, ctrl):
        ctrl = ctrl[:-self.n_envs]
        classif = ctrl[-self.n_envs:]
        ctrl = np.clip(ctrl, -1, 1)
        ctrl = self.scale_action(ctrl)

        self.sim.data.ctrl[:] = ctrl
        self.sim.forward()
        self.sim.step()
        self.step_ctr += 1

        obs = self.get_obs()

        # Angle deviation
        x, y, z, qw, qx, qy, qz = obs[:7]
        xd, yd, zd, thd, phid, psid = self.sim.get_state().qvel.tolist()[:6]
        xa, ya, za, tha, phia, psia = self.sim.data.qacc.tolist()[:6]

        if x > self.scaled_indeces_list[self.current_env_idx]:
            self.current_env_idx += 1
            self.current_env = self.envs[self.current_env_idx]

        with T.no_grad():
            env_label = T.tensor(self.env_list.index(
                self.current_env)).unsqueeze(0)
            env_pred = T.tensor(classif).unsqueeze(0)
            ce_loss = self.CE_loss(env_pred, env_label)

        # Reward conditions
        target_vel = 0.3
        velocity_rew = 1. / (abs(xd - target_vel) + 1.) - 1. / (target_vel +
                                                                1.)

        roll, pitch, yaw = my_utils.quat_to_rpy([qw, qx, qy, qz])

        r_pos = velocity_rew * 6
        r_neg = np.square(roll) * 2. + \
                np.square(pitch) * 2. + \
                np.square(zd) * 2. + \
                np.square(yd) * 2. + \
                np.square(y) * 2. + \
                np.square(yaw) * 4.0 + \
                np.square(self.sim.data.actuator_force).mean() * 0.000 + \
                np.clip(np.square(np.array(self.sim.data.cfrc_ext[1])).sum(axis=0), 0, 1) * 0.4 + \
                ce_loss.numpy() * .1

        r_neg = np.clip(r_neg, 0, 1) * 1
        r_pos = np.clip(r_pos, -2, 2)
        r = r_pos - r_neg
        self.episode_reward += r

        # Reevaluate termination condition
        done = self.step_ctr > self.max_steps
        contacts = (np.abs(
            np.array(self.sim.data.cfrc_ext[[4, 7, 10, 13, 16, 19
                                             ]])).sum(axis=1) > 0.05).astype(
                                                 np.float32)

        if self.use_HF:
            obs = np.concatenate([
                self.scale_joints(self.sim.get_state().qpos.tolist()[7:]),
                self.sim.get_state().qvel.tolist()[6:],
                self.sim.get_state().qvel.tolist()[:6], [roll, pitch, yaw, y],
                contacts,
                self.get_local_hf(x, y).flatten()
            ])
        else:
            obs = np.concatenate([
                self.scale_joints(self.sim.get_state().qpos.tolist()[7:]),
                self.sim.get_state().qvel.tolist()[6:],
                self.sim.get_state().qvel.tolist()[:6], [roll, pitch, yaw, y],
                contacts
            ])

        return obs, r, done, None
Ejemplo n.º 9
0
    def step(self, ctrl):
        ctrl = np.clip(ctrl, -1, 1)
        ctrl = self.scale_action(ctrl)

        self.sim.data.ctrl[:] = ctrl
        self.sim.forward()
        self.sim.step()
        self.step_ctr += 1

        obs = self.get_obs()

        # Angle deviation
        x, y, z, qw, qx, qy, qz = obs[:7]
        xd, yd, zd, thd, phid, psid = self.sim.get_state().qvel.tolist()[:6]

        if x > self.scaled_indeces_list[self.current_env_idx]:
            self.current_env_idx += 1
            self.current_env = self.envs[self.current_env_idx]

        expert = self.expert_dict[self.current_env]
        expert_act = expert(my_utils.to_tensor(self.obs, True)).detach()
        expert_act = expert_act[0].numpy()

        # Reward conditions
        target_vel = 0.3
        velocity_rew = 1. / (abs(xd - target_vel) + 1.) - 1. / (target_vel +
                                                                1.)

        roll, pitch, yaw = my_utils.quat_to_rpy([qw, qx, qy, qz])

        r_pos = velocity_rew * 6

        r_neg = np.square(roll) * 1. + \
                np.square(pitch) * 1. + \
                np.square(zd) * 1. + \
                np.square(y) * 1. + \
                np.square(yaw) * 1.0 + \
                np.square(ctrl - expert_act).mean() * 0.01

        print(np.square(ctrl - expert_act).mean() * 0.01)

        r_neg = np.clip(r_neg, 0, 1) * 1
        r_pos = np.clip(r_pos, -2, 2)
        r = r_pos - r_neg
        self.episode_reward += r

        # Reevaluate termination condition
        done = self.step_ctr > self.max_steps
        contacts = (np.abs(
            np.array(self.sim.data.cfrc_ext[[4, 7, 10, 13, 16, 19
                                             ]])).sum(axis=1) > 0.05).astype(
                                                 np.float32)

        if self.use_HF:
            obs = np.concatenate([
                self.scale_joints(self.sim.get_state().qpos.tolist()[7:]),
                self.sim.get_state().qvel.tolist()[6:],
                self.sim.get_state().qvel.tolist()[:6], [roll, pitch, yaw, y],
                contacts,
                self.get_local_hf(x, y).flatten()
            ])
        else:
            obs = np.concatenate([
                self.scale_joints(self.sim.get_state().qpos.tolist()[7:]),
                self.sim.get_state().qvel.tolist()[6:],
                self.sim.get_state().qvel.tolist()[:6], [roll, pitch, yaw, y],
                contacts
            ])

        self.obs = obs

        return obs, r, done, self.env_list.index(self.current_env)
Ejemplo n.º 10
0
    def step(self, ctrl):
        ctrl = ctrl[:-(self.n_envs + 1)]
        classif = ctrl[-(self.n_envs + 1):]

        if np.argmax(classif) < self.n_envs + 1:
            policy = self.expert_list[np.argmax(classif[:self.n_envs])]
            act = policy(my_utils.to_tensor(self.obs, True)).detach()
            ctrl = act[0].numpy()

        ctrl = np.clip(ctrl, -1, 1)
        ctrl = self.scale_action(ctrl)

        self.sim.data.ctrl[:] = ctrl
        self.sim.forward()
        self.sim.step()
        self.step_ctr += 1

        obs = self.get_obs()

        # Angle deviation
        x, y, z, qw, qx, qy, qz = obs[:7]
        xd, yd, zd, thd, phid, psid = self.sim.get_state().qvel.tolist()[:6]
        xa, ya, za, tha, phia, psia = self.sim.data.qacc.tolist()[:6]

        # Reward conditions
        target_vel = 0.3
        velocity_rew = 1. / (abs(xd - target_vel) + 1.) - 1. / (target_vel +
                                                                1.)

        roll, pitch, yaw = my_utils.quat_to_rpy([qw, qx, qy, qz])

        r_pos = velocity_rew * 6
        r_neg = np.square(roll) * 2. + \
                np.square(pitch) * 2. + \
                np.square(zd) * 2. + \
                np.square(yd) * 2. + \
                np.square(y) * 2. + \
                np.square(yaw) * 4.0 + \
                np.square(self.sim.data.actuator_force).mean() * 0.000 + \
                np.clip(np.square(np.array(self.sim.data.cfrc_ext[1])).sum(axis=0), 0, 1) * 0.4

        r_neg = np.clip(r_neg, 0, 1) * 1
        r_pos = np.clip(r_pos, -2, 2)
        r = r_pos - r_neg
        self.episode_reward += r

        # Reevaluate termination condition
        done = self.step_ctr > self.max_steps
        contacts = (np.abs(
            np.array(self.sim.data.cfrc_ext[[4, 7, 10, 13, 16, 19
                                             ]])).sum(axis=1) > 0.05).astype(
                                                 np.float32)

        if self.use_HF:
            self.obs = np.concatenate([
                self.scale_joints(self.sim.get_state().qpos.tolist()[7:]),
                self.sim.get_state().qvel.tolist()[6:],
                self.sim.get_state().qvel.tolist()[:6], [roll, pitch, yaw, y],
                contacts,
                self.get_local_hf(x, y).flatten()
            ])
        else:
            self.obs = np.concatenate([
                self.scale_joints(self.sim.get_state().qpos.tolist()[7:]),
                self.sim.get_state().qvel.tolist()[6:],
                self.sim.get_state().qvel.tolist()[:6], [roll, pitch, yaw, y],
                contacts
            ])

        return self.obs, r, done, None
Ejemplo n.º 11
0
    def step(self, ctrl):

        # Mute appropriate leg joints
        for i in range(6):
            if self.dead_leg_vector[i] == 1:
                ctrl[i * 3:i * 3 + 3] = np.zeros(3)  #np.random.randn(3) * 0.1

        if self.mem_dim == 0:
            mem = np.zeros(0)
            act = ctrl
            ctrl = self.scale_action(act)
        else:
            mem = ctrl[-self.mem_dim:]
            act = ctrl[:-self.mem_dim]
            ctrl = self.scale_action(act)

        self.prev_act = np.array(act)

        self.sim.data.ctrl[:] = ctrl
        self.sim.forward()
        self.sim.step()
        self.step_ctr += 1

        obs = self.get_obs()
        obs_dict = self.get_obs_dict()

        # Angle deviation
        x, y, z, qw, qx, qy, qz = obs[:7]

        xd, yd, zd, _, _, _ = self.sim.get_state().qvel.tolist()[:6]
        angle = 2 * acos(qw)

        roll, pitch, yaw = my_utils.quat_to_rpy((qw, qx, qy, qz))

        # Reward conditions
        target_vel = 0.25
        velocity_rew = 1. / (abs(xd - target_vel) + 1.) - 1. / (target_vel +
                                                                1.)

        r = velocity_rew * 10 - \
            np.square(self.sim.data.actuator_force).mean() * 0.0001 - \
            np.abs(roll) * 0.1 - \
            np.square(pitch) * 0.1 - \
            np.square(yaw) * .1 - \
            np.square(y) * 0.1 - \
            np.square(zd) * 0.01
        r = np.clip(r, -2, 2)

        self.cumulative_environment_reward += r

        # Reevaluate termination condition
        done = self.step_ctr > self.max_steps  # or abs(y) > 0.3 or x < -0.2 or abs(yaw) > 0.8

        obs = np.concatenate([
            np.array(self.sim.get_state().qpos.tolist()[3:]), [xd, yd],
            obs_dict["contacts"], mem
        ])

        # if np.random.rand() < self.dead_leg_prob:
        #     idx = np.random.randint(0,6)
        #     self.dead_leg_vector[idx] = 1
        #     self.dead_leg_sums[idx] += 1
        #     self.model.geom_rgba[self.model._geom_name2id[self.leg_list[idx]]] = [1, 0, 0, 1]
        #     self.dead_leg_prob = 0.

        return obs, r, done, obs_dict
Ejemplo n.º 12
0
    def step(self, ctrl):
        ctrl_penalty = np.mean(ctrl)
        ctrl = self.scale_action(ctrl)

        self.sim.data.ctrl[:] = ctrl
        self.sim.forward()
        self.sim.step()
        self.step_ctr += 1

        obs = self.get_obs()

        self.used_energy += self.sim.data.actuator_force

        # Angle deviation
        x, y, z, qw, qx, qy, qz = obs[:7]
        xd, yd, zd, thd, phid, psid = self.sim.get_state().qvel.tolist()[:6]

        # Reward conditions
        target_vel = 0.30
        velocity_rew_x = 1. / (abs(xd - target_vel) + 1.) - 1. / (target_vel +
                                                                  1.)

        roll, pitch, yaw = my_utils.quat_to_rpy([qw, qx, qy, qz])

        contacts = (np.abs(
            np.array(self.sim.data.cfrc_ext[[4, 7, 10, 13, 16, 19
                                             ]])).sum(axis=1) > 0.05).astype(
                                                 np.float32)

        # mean_used_energy = self.used_energy / self.step_ctr
        # mean_used_energy_coxa = mean_used_energy[0::3]
        # mean_used_energy_femur = mean_used_energy[1::3]
        # mean_used_energy_tibia = mean_used_energy[2::3]
        # coxa_penalty = np.mean(np.square(mean_used_energy_coxa - np.mean(mean_used_energy_coxa)))
        # femur_penalty = np.mean(np.square(mean_used_energy_femur - np.mean(mean_used_energy_femur)))
        # tibia_penalty = np.mean(np.square(mean_used_energy_tibia - np.mean(mean_used_energy_tibia)))

        r_pos = velocity_rew_x * 5.  # + np.mean(contacts) * 0.2
        r_neg = np.square(roll) * 1. + \
                np.square(pitch) * 1. + \
                np.square(zd) * 1. + \
                np.square(yd) * 4. + \
                np.square(y) * 7. + \
                np.square(yaw) * 4. + \
                np.square(self.sim.data.actuator_force).mean() * 0.000 + \
                ctrl_penalty * 0.0 # + \
        #(coxa_penalty * .1 + femur_penalty * .1 + tibia_penalty * .1) * self.step_ctr > 30

        r_neg = np.clip(r_neg, 0, 1) * 1.
        r_pos = np.clip(r_pos, -2, 2)
        r = r_pos - r_neg
        self.episode_reward += r

        # Reevaluate termination condition
        done = self.step_ctr > self.max_steps

        obs = np.concatenate([
            self.scale_joints(self.sim.get_state().qpos.tolist()[7:]),
            self.sim.get_state().qvel.tolist()[6:],
            self.sim.get_state().qvel.tolist()[:6], [roll, pitch, yaw, y],
            contacts
        ])

        return obs, r, done, None
Ejemplo n.º 13
0
    def step(self, ctrl, render=False):
        ctrl = np.clip(ctrl, -1, 1)
        ctrl = self.scale_action(ctrl)
        self.sim.data.ctrl[:] = ctrl

        self.model.opt.timestep = 0.003
        for i in range(27):
            self.sim.forward()
            self.sim.step()
            if render:
                self.render()

        # Simulate read delay
        self.model.opt.timestep = 0.0008
        joints = []
        for i in range(18):
            joints.append(self.sim.get_state().qpos.tolist()[7 + i])
            self.sim.forward()
            self.sim.step()

        self.step_ctr += 1
        self.current_difficulty = np.minimum(
            self.current_difficulty + self.difficulty_per_step_increment, 1)
        torques = self.sim.data.actuator_force
        ctrl_pen = np.square(torques).mean()

        obs = self.get_obs()

        # Angle deviation
        x, y, z, qw, qx, qy, qz = obs[:7]
        xd, yd, zd, thd, phid, psid = self.sim.get_state().qvel.tolist()[:6]

        self.xd_queue.append(xd)
        if len(self.xd_queue) > 15:
            self.xd_queue.pop(0)
        xd_av = sum(self.xd_queue) / len(self.xd_queue)

        velocity_rew = 1. / (abs(xd_av - self.target_vel) +
                             1.) - 1. / (self.target_vel + 1.)
        velocity_rew *= (0.3 / self.target_vel)

        roll, pitch, _ = my_utils.quat_to_rpy([qw, qx, qy, qz])
        q_yaw = 2 * acos(qw)

        yaw_deviation = np.min(
            (abs((q_yaw % 6.183) - (0 % 6.183)), abs(q_yaw - 0)))

        r_neg = np.square(q_yaw) * 0.5 + \
                np.square(pitch) * 1.5 * self.current_difficulty + \
                np.square(roll) * 1.5 * self.current_difficulty + \
                ctrl_pen * 0.00000 + \
                np.square(zd) * 1.5 * self.current_difficulty

        r_correction = np.clip(
            abs(self.prev_deviation) - abs(yaw_deviation), -1, 1)
        r_pos = velocity_rew * 7 + r_correction * 15
        r = np.clip(r_pos - r_neg, -3, 3)

        self.prev_deviation = yaw_deviation

        # Reevaluate termination condition
        done = self.step_ctr > self.max_steps
        contacts = (np.abs(
            np.array(self.sim.data.sensordata[0:6], dtype=np.float32)) >
                    0.05).astype(np.float32) * 2 - 1.

        # See if changing contacts affects policy
        #np.random.randint(0,2, size=6) * 2 - 1
        #contacts = -np.ones(6)

        #clipped_torques = np.clip(torques * 0.05, -1, 1)
        c_obs = self.scale_joints(joints)
        quat = obs[3:7]

        if self.use_contacts:
            c_obs = np.concatenate([c_obs, contacts])

        c_obs = np.concatenate([c_obs, quat])
        return c_obs, r, done, {}
Ejemplo n.º 14
0
    def step(self, ctrl, render=False):
        ctrl = np.clip(ctrl, -1, 1)
        ctrl = self.scale_action(ctrl)
        self.sim.data.ctrl[:] = ctrl

        self.sim.forward()
        self.sim.step()
        if render:
            self.render()
            time.sleep(0.001)

        self.step_ctr += 1

        torques = self.sim.data.actuator_force
        ctrl_pen = np.square(torques).mean()

        obs = self.get_obs()

        # Angle deviation
        x, y, z, qw, qx, qy, qz = obs[:7]
        xd, yd, zd, thd, phid, psid = self.sim.get_state().qvel.tolist()[:6]

        self.xd_queue.append(xd)
        if len(self.xd_queue) > 15:
            self.xd_queue.pop(0)
        xd_av = sum(self.xd_queue) / len(self.xd_queue)

        velocity_rew = 1. / (abs(xd_av - self.target_vel) +
                             1.) - 1. / (self.target_vel + 1.)
        velocity_rew *= (0.3 / self.target_vel)

        roll, pitch, _ = my_utils.quat_to_rpy([qw, qx, qy, qz])
        q_yaw = 2 * acos(qw)

        yaw_deviation = np.min(
            (abs((q_yaw % 6.183) - (0 % 6.183)), abs(q_yaw - 0)))

        r_neg = np.square(q_yaw) * 0.2 + \
                np.square(pitch) * 0.5 + \
                np.square(roll) * 0.5 + \
                ctrl_pen * 0.00001 + \
                np.square(zd) * 0.7

        r_correction = np.clip(
            abs(self.prev_deviation) - abs(yaw_deviation), -1, 1)
        r_pos = velocity_rew * 4 + r_correction * 10
        r = np.clip(r_pos - r_neg, -3, 3)

        self.prev_deviation = yaw_deviation

        # Reevaluate termination condition
        done = self.step_ctr > self.max_steps
        contacts = (np.abs(
            np.array(self.sim.data.sensordata[0:6], dtype=np.float32)) >
                    0.05).astype(np.float32) * 2 - 1.

        #clipped_torques = np.clip(torques * 0.05, -1, 1)
        c_obs = self.scale_joints(self.sim.get_state().qpos.tolist()[7:])

        quat = obs[3:7]

        if self.use_contacts:
            c_obs = np.concatenate([c_obs, contacts])

        c_obs = np.concatenate([c_obs, quat])

        return np.array(quat), r, done, {}