Beispiel #1
0
    def step(self, action):
        z = action
        obs = self.base_env.get_observation()
        obs = obs["image"].reshape([self.img_dim, self.img_dim, 3]) * 255
        real_action, log_prob = self.predictive_model.predict(
            self.past[-self.past_length:], obs, z)
        total_reward = 0
        for i in range(self.num_execution_per_step):
            first_predicted_action = real_action[0, 0, 0, i]
            x, y, z, theta = first_predicted_action[0], first_predicted_action[1], first_predicted_action[2], \
                             first_predicted_action[3]
            a = np.array([x, y, z, theta])
            a = (a * stddev) + mean
            obs, reward, done, info = self.base_env.step(a)
            info["log_prob"] = log_prob
            total_reward += reward

            quat = obs["state"][3:7]
            theta = bullet.quat_to_deg(quat)[2]
            state = np.array(
                [obs["state"][0], obs["state"][1], obs["state"][2],
                 theta]).reshape([1, self.state_dim])
            self.past = np.concatenate([self.past, state], axis=0)

        return obs, total_reward, done, info
Beispiel #2
0
    def get_vr_output():
        nonlocal prev_vr_theta
        ee_pos, ee_theta = bullet.get_link_state(env.robot_id,
                                                 env.end_effector_index)
        events = p.getVREvents()

        # detect input from controllers
        assert events, "no input from controller!"
        e = events[0]

        # obtain gripper state from controller trigger
        trigger = get_gripper_input(e)

        # pass controller position and orientation into the environment
        cont_pos = e[POSITION]
        cont_orient = bullet.deg_to_quat([180, 0, 0])
        if ORIENTATION_ENABLED:
            cont_orient = e[ORIENTATION]
            cont_orient = bullet.quat_to_deg(list(cont_orient))

        action = [
            cont_pos[0] - ee_pos[0], cont_pos[1] - ee_pos[1],
            cont_pos[2] - ee_pos[2]
        ]
        action = np.array(action) * 3.5  # to make grasp success < 20 timesteps

        grip = trigger
        for _ in range(2):
            action = np.append(action, 0)
        wrist_theta = cont_orient[2] - prev_vr_theta

        action = np.append(action, wrist_theta)
        action = np.append(action, grip)
        action = np.append(action, 0)
        # ===========================================================
        # Add noise during actual data collection
        noise = 0.1
        noise_scalings = [noise] * 3 + [0.1 * noise] * 3 + [noise] * 2
        action += np.random.normal(scale=noise_scalings)
        # ===========================================================

        action = np.clip(action, -1 + EPSILON, 1 - EPSILON)
        prev_vr_theta = cont_orient[2]
        return action
Beispiel #3
0
    def step(self, action):

        # TODO Clean this up
        if np.isnan(np.sum(action)):
            print('action', action)
            raise RuntimeError('Action has NaN entries')

        action = np.clip(action, -1, +1)  # TODO Clean this up

        xyz_action = action[:3]  # ee position actions
        abc_action = action[3:6]  # ee orientation actions
        gripper_action = action[6]
        neutral_action = action[7]

        ee_pos, ee_quat = bullet.get_link_state(
            self.robot_id, self.end_effector_index)
        joint_states, _ = bullet.get_joint_states(self.robot_id,
                                                  self.movable_joints)
        gripper_state = np.asarray([joint_states[-2], joint_states[-1]])

        target_ee_pos = ee_pos + self.xyz_action_scale * xyz_action
        ee_deg = bullet.quat_to_deg(ee_quat)
        target_ee_deg = ee_deg + self.abc_action_scale * abc_action
        target_ee_quat = bullet.deg_to_quat(target_ee_deg)

        if self.control_mode == 'continuous':
            num_sim_steps = self.num_sim_steps
            target_gripper_state = gripper_state + \
                                   [-self.gripper_action_scale * gripper_action,
                                    self.gripper_action_scale * gripper_action]

        elif self.control_mode == 'discrete_gripper':
            if gripper_action > 0.5 and not self.is_gripper_open:
                num_sim_steps = self.num_sim_steps_discrete_action
                target_gripper_state = GRIPPER_OPEN_STATE
                self.is_gripper_open = True  # TODO(avi): Clean this up

            elif gripper_action < -0.5 and self.is_gripper_open:
                num_sim_steps = self.num_sim_steps_discrete_action
                target_gripper_state = GRIPPER_CLOSED_STATE
                self.is_gripper_open = False  # TODO(avi): Clean this up
            else:
                num_sim_steps = self.num_sim_steps
                if self.is_gripper_open:
                    target_gripper_state = GRIPPER_OPEN_STATE
                else:
                    target_gripper_state = GRIPPER_CLOSED_STATE
                # target_gripper_state = gripper_state
        else:
            raise NotImplementedError

        target_ee_pos = np.clip(target_ee_pos, self.ee_pos_low,
                                self.ee_pos_high)
        target_gripper_state = np.clip(target_gripper_state, GRIPPER_LIMITS_LOW,
                                       GRIPPER_LIMITS_HIGH)

        bullet.apply_action_ik(
            target_ee_pos, target_ee_quat, target_gripper_state,
            self.robot_id,
            self.end_effector_index, self.movable_joints,
            lower_limit=JOINT_LIMIT_LOWER,
            upper_limit=JOINT_LIMIT_UPPER,
            rest_pose=RESET_JOINT_VALUES,
            joint_range=JOINT_RANGE,
            num_sim_steps=num_sim_steps)

        if self.use_neutral_action and neutral_action > 0.5:
            if self.neutral_gripper_open:
                bullet.move_to_neutral(
                    self.robot_id,
                    self.reset_joint_indices,
                    RESET_JOINT_VALUES)
            else:
                bullet.move_to_neutral(
                    self.robot_id,
                    self.reset_joint_indices,
                    RESET_JOINT_VALUES_GRIPPER_CLOSED)

        info = self.get_info()
        reward = self.get_reward(info)
        done = False
        return self.get_observation(), reward, done, info