Ejemplo n.º 1
0
def test_viewer():
    model = load_model_from_path("mujoco_py/tests/test.xml")
    sim = MjSim(model)
    viewer = MjViewer(sim)
    for _ in range(100):
        sim.step()
        viewer.render()
Ejemplo n.º 2
0
def test_ignore_mujoco_warnings():
    # Two boxes on a plane need more than 1 contact (nconmax)
    xml = '''
    <mujoco>
      <size nconmax="1"/>
      <worldbody>
        <geom type="plane" size="1 1 0.1"/>
        <body pos="1 0 1"> <joint type="free"/> <geom size="1"/> </body>
        <body pos="0 1 1"> <joint type="free"/> <geom size="1"/> </body>
      </worldbody>
    </mujoco>
    '''
    model = load_model_from_xml(xml)
    sim = MjSim(model)

    sim.reset()
    with ignore_mujoco_warnings():
        # This should raise an exception due to the mujoco warning callback,
        # but it's suppressed by the context manager.
        sim.step()

    sim.reset()
    with pytest.raises(Exception):
        # test to make sure previous warning callback restored.
        sim.step()
Ejemplo n.º 3
0
def test_mj_sim_basics():
    model = load_model_from_xml(BASIC_MODEL_XML)
    sim = MjSim(model, nsubsteps=2)

    sim.reset()
    sim.step()
    sim.reset()
    sim.forward()
Ejemplo n.º 4
0
def test_mj_warning_raises():
    ''' Test that MuJoCo warnings cause exceptions. '''
    # Two boxes on a plane need more than 1 contact (nconmax)
    xml = '''
    <mujoco>
      <size nconmax="1"/>
      <worldbody>
        <geom type="plane" size="1 1 0.1"/>
        <body pos="1 0 1"> <joint type="free"/> <geom size="1"/> </body>
        <body pos="0 1 1"> <joint type="free"/> <geom size="1"/> </body>
      </worldbody>
    </mujoco>
    '''
    model = load_model_from_xml(xml)
    sim = MjSim(model)

    sim.reset()
    with pytest.raises(Exception):
        # This should raise an exception due to the mujoco warning callback
        sim.step()
Ejemplo n.º 5
0
def test_xvelr():  # xvelr = rotational velocity in world frame
    xml = """
    <mujoco>
        <worldbody>
            <body name="body1" pos="0 0 0">
                <joint name="a" axis="1 0 0" pos="0 0 0" type="hinge"/>
                <geom name="geom1" pos="0 0 0" size="0.3"/>
                <body name="body2" pos="0 0 1">
                    <joint name="b" axis="1 0 0" pos="0 0 0" type="hinge"/>
                    <geom name="geom2" pos="0 0 0" size="0.3"/>
                    <site name="site1" size="0.1"/>
                </body>
            </body>
        </worldbody>
        <actuator>
            <motor joint="a"/>
            <motor joint="b"/>
        </actuator>
    </mujoco>
    """
    model = load_model_from_xml(xml)
    sim = MjSim(model)
    sim.reset()
    sim.forward()
    # Check that xvelr starts out at zero (since qvel is zero)
    site1_xvelr = sim.data.get_site_xvelr('site1')
    np.testing.assert_allclose(site1_xvelr, np.zeros(3))
    # Push the base body and step forward to get it moving
    sim.data.ctrl[0] = 1e9
    sim.step()
    sim.forward()
    # Check that the first body has nonzero xvelr
    body1_xvelr = sim.data.get_body_xvelr('body1')
    assert not np.allclose(body1_xvelr, np.zeros(3))
    # Check that the second body has zero xvelr (still)
    body2_xvelr = sim.data.get_body_xvelr('body2')
    np.testing.assert_allclose(body2_xvelr, np.zeros(3))
    # Check that this matches the batch (gathered) getter property
    np.testing.assert_allclose(body2_xvelr, sim.data.body_xvelr[2])
Ejemplo n.º 6
0
        </body>
    </worldbody>
    <actuator>
        <position joint="j" kp="2000"/>
    </actuator>
</mujoco>
"""

fn = '''
    #define SQUARE(a) (a * a)
    void fun(const mjModel* m, mjData* d) {
        for (int i = d->ne; i < d->nefc; i++) {
            pos_sum += SQUARE(d->efc_pos[i]);
        }
    }
'''

sim = MjSim(load_model_from_xml(MODEL_XML), nsubsteps=50,
            substep_callback=fn, userdata_names=['pos_sum'])
t = 0
while t < 10:
    t += 1
    sim.data.ctrl[0] = .2
    print('t', t)
    sim.step()
    # verify that there are no contacts visible between steps
    assert sim.data.ncon == 0, 'No contacts should be detected here'
    # verify that contacts (and penetrations) are visible to substep_callback
    if t > 1:
        assert sim.data.get_userdata('pos_sum') > 0  # detected collision
Ejemplo n.º 7
0
            <joint axis="1 0 0" name="cylinder:slidex" type="slide"/>
            <joint axis="0 1 0" name="cylinder:slidey" type="slide"/>
        </body>
        <body name="box" pos="-0.8 0 0.2">
            <geom mass="0.1" size="0.15 0.15 0.15" type="box"/>
        </body>
        <body name="floor" pos="0 0 0.025">
            <geom condim="3" size="1.0 1.0 0.02" rgba="0 1 0 1" type="box"/>
        </body>
    </worldbody>
    <actuator>
        <motor gear="2000.0" joint="slide0"/>
        <motor gear="2000.0" joint="slide1"/>
    </actuator>
</mujoco>
"""

model = load_model_from_xml(MODEL_XML)
sim = MjSim(model)
viewer = MjViewer(sim)
t = 0
while True:
    data = sim.data
    sim.data.ctrl[0] = math.cos(t / 10.) * 0.01
    sim.data.ctrl[1] = math.sin(t / 10.) * 0.01
    t += 1
    sim.step()
    viewer.render()
    if t > 100 and os.getenv('TESTING') is not None:
        break
Ejemplo n.º 8
0
class Environment():
    def __init__(self,
                 model_name,
                 goal_space_train,
                 goal_space_test,
                 project_state_to_end_goal,
                 end_goal_thresholds,
                 initial_state_space,
                 subgoal_bounds,
                 project_state_to_subgoal,
                 subgoal_thresholds,
                 max_actions=1200,
                 num_frames_skip=10,
                 show=False):

        self.name = model_name

        # Create Mujoco Simulation
        self.model = load_model_from_path("./mujoco_files/" + model_name)
        self.sim = MjSim(self.model)

        # Set dimensions and ranges of states, actions, and goals in order to configure actor/critic networks
        if model_name == "pendulum.xml":
            self.state_dim = 2 * len(self.sim.data.qpos) + len(
                self.sim.data.qvel)
        elif model_name == "reacher.xml":
            self.state_dim = len(self.sim.data.qpos) + len(
                self.sim.data.qvel) + 2  # fingertip x,y
        else:
            self.state_dim = len(self.sim.data.qpos) + len(
                self.sim.data.qvel
            )  # State will include (i) joint angles and (ii) joint velocities
        self.action_dim = len(
            self.sim.model.actuator_ctrlrange)  # low-level action dim
        self.action_bounds = self.sim.model.actuator_ctrlrange[:,
                                                               1]  # low-level action bounds
        self.action_offset = np.zeros((len(
            self.action_bounds)))  # Assumes symmetric low-level action ranges
        self.end_goal_dim = len(goal_space_test)
        self.subgoal_dim = len(subgoal_bounds)
        self.subgoal_bounds = subgoal_bounds

        # Projection functions
        self.project_state_to_end_goal = project_state_to_end_goal
        self.project_state_to_subgoal = project_state_to_subgoal

        # Convert subgoal bounds to symmetric bounds and offset.  Need these to properly configure subgoal actor networks
        self.subgoal_bounds_symmetric = np.zeros((len(self.subgoal_bounds)))
        self.subgoal_bounds_offset = np.zeros((len(self.subgoal_bounds)))

        for i in range(len(self.subgoal_bounds)):
            self.subgoal_bounds_symmetric[i] = (self.subgoal_bounds[i][1] -
                                                self.subgoal_bounds[i][0]) / 2
            self.subgoal_bounds_offset[i] = self.subgoal_bounds[i][
                1] - self.subgoal_bounds_symmetric[i]

        # End goal/subgoal thresholds
        self.end_goal_thresholds = end_goal_thresholds
        self.subgoal_thresholds = subgoal_thresholds

        # Set inital state and goal state spaces
        self.initial_state_space = initial_state_space
        self.goal_space_train = goal_space_train
        self.goal_space_test = goal_space_test
        self.subgoal_colors = [
            "Magenta", "Green", "Red", "Blue", "Cyan", "Orange", "Maroon",
            "Gray", "White", "Black"
        ]

        self.max_actions = max_actions

        # Implement visualization if necessary
        self.visualize = show  # Visualization boolean
        if self.visualize:
            self.viewer = MjViewer(self.sim)
        self.num_frames_skip = num_frames_skip

    # Get state, which concatenates joint positions and velocities
    def get_state(self):

        if self.name == "pendulum.xml":
            return np.concatenate([
                np.cos(self.sim.data.qpos),
                np.sin(self.sim.data.qpos), self.sim.data.qvel
            ])
        elif self.name == "reacher.xml":
            return np.concatenate([
                self.sim.data.qpos, self.sim.data.qvel,
                np.reshape(
                    np.array(self.sim.data.get_body_xpos("fingertip")[:2]),
                    self.sim.data.qpos.shape)
            ])
        else:
            return np.concatenate((self.sim.data.qpos, self.sim.data.qvel))

    # Reset simulation to state within initial state specified by user
    def reset_sim(self):

        # Reset joint positions and velocities
        for i in range(len(self.sim.data.qpos)):
            self.sim.data.qpos[i] = np.random.uniform(
                self.initial_state_space[i][0], self.initial_state_space[i][1])

        for i in range(len(self.sim.data.qvel)):
            self.sim.data.qvel[i] = np.random.uniform(
                self.initial_state_space[len(self.sim.data.qpos) + i][0],
                self.initial_state_space[len(self.sim.data.qpos) + i][1])

        self.sim.step()

        # Return state
        return self.get_state()

    # Execute low-level action for number of frames specified by num_frames_skip
    def execute_action(self, action):

        self.sim.data.ctrl[:] = action
        for _ in range(self.num_frames_skip):
            self.sim.step()
            if self.visualize:
                self.viewer.render()

        return self.get_state()

    # Visualize end goal.  This function may need to be adjusted for new environments.
    def display_end_goal(self, end_goal):

        # Goal can be visualized by changing the location of the relevant site object.
        if self.name == "pendulum.xml":
            self.sim.data.mocap_pos[0] = np.array([
                0.5 * np.sin(end_goal[0]), 0, 0.5 * np.cos(end_goal[0]) + 0.6
            ])
        elif self.name == "ur5.xml":

            theta_1 = end_goal[0]
            theta_2 = end_goal[1]
            theta_3 = end_goal[2]

            # shoulder_pos_1 = np.array([0,0,0,1])
            upper_arm_pos_2 = np.array([0, 0.13585, 0, 1])
            forearm_pos_3 = np.array([0.425, 0, 0, 1])
            wrist_1_pos_4 = np.array([0.39225, -0.1197, 0, 1])

            # Transformation matrix from shoulder to base reference frame
            T_1_0 = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0.089159],
                              [0, 0, 0, 1]])

            # Transformation matrix from upper arm to shoulder reference frame
            T_2_1 = np.array([[np.cos(theta_1), -np.sin(theta_1), 0, 0],
                              [np.sin(theta_1),
                               np.cos(theta_1), 0, 0], [0, 0, 1, 0],
                              [0, 0, 0, 1]])

            # Transformation matrix from forearm to upper arm reference frame
            T_3_2 = np.array([[np.cos(theta_2), 0,
                               np.sin(theta_2), 0], [0, 1, 0, 0.13585],
                              [-np.sin(theta_2), 0,
                               np.cos(theta_2), 0], [0, 0, 0, 1]])

            # Transformation matrix from wrist 1 to forearm reference frame
            T_4_3 = np.array([[np.cos(theta_3), 0,
                               np.sin(theta_3), 0.425], [0, 1, 0, 0],
                              [-np.sin(theta_3), 0,
                               np.cos(theta_3), 0], [0, 0, 0, 1]])

            # Determine joint position relative to original reference frame
            # shoulder_pos = T_1_0.dot(shoulder_pos_1)
            upper_arm_pos = T_1_0.dot(T_2_1).dot(upper_arm_pos_2)[:3]
            forearm_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(forearm_pos_3)[:3]
            wrist_1_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(T_4_3).dot(
                wrist_1_pos_4)[:3]

            joint_pos = [upper_arm_pos, forearm_pos, wrist_1_pos]
            """
            print("\nEnd Goal Joint Pos: ")
            print("Upper Arm Pos: ", joint_pos[0])
            print("Forearm Pos: ", joint_pos[1])
            print("Wrist Pos: ", joint_pos[2])
            """

            for i in range(3):
                self.sim.data.mocap_pos[i] = joint_pos[i]
        elif self.name == "reacher.xml":
            x, y = end_goal
            self.sim.data.mocap_pos[0] = np.array([x, y, .01])
        else:
            assert False, "Provide display end goal function in environment.py file"

    # Function returns an end goal
    def get_next_goal(self, test):

        end_goal = np.zeros((len(self.goal_space_test)))

        if self.name == "ur5.xml":

            goal_possible = False
            while not goal_possible:
                end_goal = np.zeros(shape=(self.end_goal_dim, ))
                end_goal[0] = np.random.uniform(self.goal_space_test[0][0],
                                                self.goal_space_test[0][1])

                end_goal[1] = np.random.uniform(self.goal_space_test[1][0],
                                                self.goal_space_test[1][1])
                end_goal[2] = np.random.uniform(self.goal_space_test[2][0],
                                                self.goal_space_test[2][1])

                # Next need to ensure chosen joint angles result in achievable task (i.e., desired end effector position is above ground)

                theta_1 = end_goal[0]
                theta_2 = end_goal[1]
                theta_3 = end_goal[2]

                # shoulder_pos_1 = np.array([0,0,0,1])
                upper_arm_pos_2 = np.array([0, 0.13585, 0, 1])
                forearm_pos_3 = np.array([0.425, 0, 0, 1])
                wrist_1_pos_4 = np.array([0.39225, -0.1197, 0, 1])

                # Transformation matrix from shoulder to base reference frame
                T_1_0 = np.array([[1, 0, 0, 0], [0, 1, 0, 0],
                                  [0, 0, 1, 0.089159], [0, 0, 0, 1]])

                # Transformation matrix from upper arm to shoulder reference frame
                T_2_1 = np.array([[np.cos(theta_1), -np.sin(theta_1), 0, 0],
                                  [np.sin(theta_1),
                                   np.cos(theta_1), 0, 0], [0, 0, 1, 0],
                                  [0, 0, 0, 1]])

                # Transformation matrix from forearm to upper arm reference frame
                T_3_2 = np.array([[np.cos(theta_2), 0,
                                   np.sin(theta_2), 0], [0, 1, 0, 0.13585],
                                  [-np.sin(theta_2), 0,
                                   np.cos(theta_2), 0], [0, 0, 0, 1]])

                # Transformation matrix from wrist 1 to forearm reference frame
                T_4_3 = np.array([[np.cos(theta_3), 0,
                                   np.sin(theta_3), 0.425], [0, 1, 0, 0],
                                  [-np.sin(theta_3), 0,
                                   np.cos(theta_3), 0], [0, 0, 0, 1]])

                forearm_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(
                    forearm_pos_3)[:3]
                wrist_1_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(T_4_3).dot(
                    wrist_1_pos_4)[:3]

                # Make sure wrist 1 pos is above ground so can actually be reached
                if np.absolute(end_goal[0]) > np.pi / 4 and forearm_pos[
                        2] > 0.05 and wrist_1_pos[2] > 0.15:
                    goal_possible = True

        elif not test and self.goal_space_train is not None:
            for i in range(len(self.goal_space_train)):
                end_goal[i] = np.random.uniform(self.goal_space_train[i][0],
                                                self.goal_space_train[i][1])
        else:
            assert self.goal_space_test is not None, "Need goal space for testing. Set goal_space_test variable in \"design_env.py\" file"

            for i in range(len(self.goal_space_test)):
                end_goal[i] = np.random.uniform(self.goal_space_test[i][0],
                                                self.goal_space_test[i][1])

        # Visualize End Goal
        self.display_end_goal(end_goal)

        return end_goal

    # Visualize all subgoals
    def display_subgoals(self, subgoals):

        # Display up to 10 subgoals and end goal
        if len(subgoals) <= 11:
            subgoal_ind = 0
        else:
            subgoal_ind = len(subgoals) - 11

        for i in range(1, min(len(subgoals), 11)):
            if self.name == "pendulum.xml":
                self.sim.data.mocap_pos[i] = np.array([
                    0.5 * np.sin(subgoals[subgoal_ind][0]), 0,
                    0.5 * np.cos(subgoals[subgoal_ind][0]) + 0.6
                ])
                # Visualize subgoal
                self.sim.model.site_rgba[i][3] = 1
                subgoal_ind += 1

            elif self.name == "ur5.xml":

                theta_1 = subgoals[subgoal_ind][0]
                theta_2 = subgoals[subgoal_ind][1]
                theta_3 = subgoals[subgoal_ind][2]

                # shoulder_pos_1 = np.array([0,0,0,1])
                upper_arm_pos_2 = np.array([0, 0.13585, 0, 1])
                forearm_pos_3 = np.array([0.425, 0, 0, 1])
                wrist_1_pos_4 = np.array([0.39225, -0.1197, 0, 1])

                # Transformation matrix from shoulder to base reference frame
                T_1_0 = np.array([[1, 0, 0, 0], [0, 1, 0, 0],
                                  [0, 0, 1, 0.089159], [0, 0, 0, 1]])

                # Transformation matrix from upper arm to shoulder reference frame
                T_2_1 = np.array([[np.cos(theta_1), -np.sin(theta_1), 0, 0],
                                  [np.sin(theta_1),
                                   np.cos(theta_1), 0, 0], [0, 0, 1, 0],
                                  [0, 0, 0, 1]])

                # Transformation matrix from forearm to upper arm reference frame
                T_3_2 = np.array([[np.cos(theta_2), 0,
                                   np.sin(theta_2), 0], [0, 1, 0, 0.13585],
                                  [-np.sin(theta_2), 0,
                                   np.cos(theta_2), 0], [0, 0, 0, 1]])

                # Transformation matrix from wrist 1 to forearm reference frame
                T_4_3 = np.array([[np.cos(theta_3), 0,
                                   np.sin(theta_3), 0.425], [0, 1, 0, 0],
                                  [-np.sin(theta_3), 0,
                                   np.cos(theta_3), 0], [0, 0, 0, 1]])

                # Determine joint position relative to original reference frame
                # shoulder_pos = T_1_0.dot(shoulder_pos_1)
                upper_arm_pos = T_1_0.dot(T_2_1).dot(upper_arm_pos_2)[:3]
                forearm_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(
                    forearm_pos_3)[:3]
                wrist_1_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(T_4_3).dot(
                    wrist_1_pos_4)[:3]

                joint_pos = [upper_arm_pos, forearm_pos, wrist_1_pos]
                """
                print("\nSubgoal %d Joint Pos: " % i)
                print("Upper Arm Pos: ", joint_pos[0])
                print("Forearm Pos: ", joint_pos[1])
                print("Wrist Pos: ", joint_pos[2])
                """

                # Designate site position for upper arm, forearm and wrist
                for j in range(3):
                    self.sim.data.mocap_pos[3 + 3 * (i - 1) + j] = np.copy(
                        joint_pos[j])
                    self.sim.model.site_rgba[3 + 3 * (i - 1) + j][3] = 1

                # print("\nLayer %d Predicted Pos: " % i, wrist_1_pos[:3])

                subgoal_ind += 1
            else:
                return
                # Visualize desired gripper position, which is elements 18-21 in subgoal vector
                self.sim.data.mocap_pos[i] = subgoals[subgoal_ind]
                # Visualize subgoal
                self.sim.model.site_rgba[i][3] = 1
                subgoal_ind += 1
Ejemplo n.º 9
0
class Square2dEnv(Env):
    # TODO make this into GoalEnv
    def __init__(self,
                 model_path='./square2d.xml',
                 distance_threshold=1e-1,
                 frame_skip=2,
                 goal=[0.3, 0.3],
                 horizon=100):

        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)

        self.model = load_model_from_path(fullpath)
        self.seed()
        self.sim = MjSim(self.model)
        self.data = self.sim.data
        self.viewer = None
        self.distance_threshold = distance_threshold
        self.frame_skip = frame_skip
        self.set_goal_location(goal)
        self.reward_type = 'dense'
        self.horizon = horizon
        self.time_step = 0
        obs = self.get_current_observation()
        self.action_space = spaces.Box(-1., 1., shape=(2, ))
        self.observation_space = spaces.Box(-np.inf, np.inf, shape=obs.shape)

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def reset(self):
        self.set_ball_location([0., 0.])
        self.sim.forward()
        self.time_step = 0
        return self.get_current_observation()

    def _get_viewer(self):
        if self.viewer is None:
            self.viewer = MjViewer(self.sim)
            self.viewer_setup()
        return self.viewer

    def viewer_setup(self):

        self.viewer.cam.lookat[
            0] = 0.0  # x,y,z offset from the object (works if trackbodyid=-1)
        self.viewer.cam.lookat[1] = 0.0
        self.viewer.cam.lookat[2] = 0.0
        self.viewer.cam.elevation = -90  # camera rotation around the axis in the plane going through the frame origin (if 0 you just see a line)
        self.viewer.cam.azimuth = 90
        self.viewer.cam.distance = 1.5

    def set_goal_location(self, goalPos):
        # goal = [xLoc, yLoc]
        assert np.linalg.norm(np.asarray(goalPos) - np.asarray([0.3, 0.3]),
                              axis=-1) < 0.1
        self.sim.data.qpos[0] = goalPos[0]
        self.sim.data.qpos[1] = goalPos[1]

    def set_ball_location(self, ballPos):

        self.sim.data.qpos[2] = ballPos[0]
        self.sim.data.qpos[3] = ballPos[1]

    def set_distance_threshold(self, distance_threshold):
        self.distance_threshold = distance_threshold

    def set_frame_skip(self, frame_skip):
        self.frame_skip = frame_skip

    def get_frame_skip(self):
        return self.frame_skip

    def get_distance_threshold(self):
        return self.distance_threshold

    def get_ball_location(self):
        return self.sim.data.qpos[2:4]

    def get_goal_location(self):
        return self.sim.data.qpos[0:2]

    def get_ball_velocity(self):
        return self.sim.data.qvel[2:4]

    def send_control_command(self, xDirectionControl, yDirectionControl):

        self.sim.data.ctrl[0] = xDirectionControl
        self.sim.data.ctrl[1] = yDirectionControl

    def get_current_observation(self):
        obs = np.concatenate([
            self.get_goal_location(),
            self.get_ball_location(),
            self.get_ball_velocity()
        ]).ravel()
        return obs.copy()
        # obs = np.concatenate([self.get_ball_location(), self.get_ball_velocity()]).ravel()
        # desired_goal = self.get_goal_location()
        # achieved_goal = self.get_ball_location()
        # return {
        #     'observation': obs.copy(),
        #     'achieved_goal': achieved_goal.copy(),
        #     'desired_goal': desired_goal.copy()
        #
        # }

    def get_image_of_goal_observation(self, xLoc=None, yLoc=None):

        if not xLoc:
            xLoc = self.sim.data.qpos[0]

        if not yLoc:
            yLoc = self.sim.data.qpos[1]

        self.sim.data.qpos[0] = xLoc
        self.sim.data.qpos[1] = yLoc
        self.sim.data.qpos[2] = xLoc
        self.sim.data.qpos[3] = yLoc

        self.render()

    def do_simulation(self, ctrl, n_frames):
        self.send_control_command(ctrl[0], ctrl[1])
        for _ in range(n_frames):
            self.take_step()

    def step(self, ctrl):

        if np.linalg.norm(self.get_goal_location() - [0.3, 0.3],
                          axis=-1) > 0.1:
            print(self.get_goal_location())
            # assert False
        ctrl = np.clip(ctrl, -1., 1.)
        self.do_simulation(ctrl, self.frame_skip)
        obs = self.get_current_observation()
        info = {}
        reward = self.compute_reward(self.get_ball_location(),
                                     self.get_goal_location(), {})
        done = (reward == 1.0)
        self.time_step += 1
        if self.time_step >= self.horizon:
            done = True
        return obs, reward, done, info

    def take_step(self):
        self.sim.step()

    def render(self, mode='human'):
        self._get_viewer().render()

    def compute_reward(self, achieved_goal, desired_goal, info):
        # Compute distance between goal and the achieved goal.
        d = np.linalg.norm(achieved_goal - desired_goal, axis=-1)
        if self.reward_type == 'sparse':
            return -(d > self.distance_threshold).astype(np.float32)
        else:
            return -d

    def _is_success(self, achieved_goal, desired_goal):
        d = np.linalg.norm(achieved_goal - desired_goal, axis=-1)
        return (d < self.distance_threshold).astype(np.float32)

    def log_diagnostics(self, paths):
        pass

    def terminate(self):
        pass
Ejemplo n.º 10
0
class MujocoEnv(metaclass=EnvMeta):
    """
    Initializes a Mujoco Environment.

    Args:
        has_renderer (bool): If true, render the simulation state in
            a viewer instead of headless mode.

        has_offscreen_renderer (bool): True if using off-screen rendering.

        render_camera (str): Name of camera to render if `has_renderer` is True. Setting this value to 'None'
            will result in the default angle being applied, which is useful as it can be dragged / panned by
            the user using the mouse

        render_collision_mesh (bool): True if rendering collision meshes
            in camera. False otherwise.

        render_visual_mesh (bool): True if rendering visual meshes
            in camera. False otherwise.

        control_freq (float): how many control signals to receive
            in every simulated second. This sets the amount of simulation time
            that passes between every action input.

        horizon (int): Every episode lasts for exactly @horizon timesteps.

        ignore_done (bool): True if never terminating the environment (ignore @horizon).

        hard_reset (bool): If True, re-loads model, sim, and render object upon a reset call, else,
            only calls sim.reset and resets all robosuite-internal variables

    Raises:
        ValueError: [Invalid renderer selection]
    """
    def __init__(self,
                 has_renderer=False,
                 has_offscreen_renderer=True,
                 render_camera="frontview",
                 render_collision_mesh=False,
                 render_visual_mesh=True,
                 control_freq=10,
                 horizon=1000,
                 ignore_done=False,
                 hard_reset=True):
        # First, verify that both the on- and off-screen renderers are not being used simultaneously
        if has_renderer is True and has_offscreen_renderer is True:
            raise ValueError(
                "the onscreen and offscreen renderers cannot be used simultaneously."
            )

        # Rendering-specific attributes
        self.has_renderer = has_renderer
        self.has_offscreen_renderer = has_offscreen_renderer
        self.render_camera = render_camera
        self.render_collision_mesh = render_collision_mesh
        self.render_visual_mesh = render_visual_mesh
        self.viewer = None

        # Simulation-specific attributes
        self.control_freq = control_freq
        self.horizon = horizon
        self.ignore_done = ignore_done
        self.hard_reset = hard_reset
        self.model = None
        self.cur_time = None
        self.model_timestep = None
        self.control_timestep = None
        self.deterministic_reset = False  # Whether to add randomized resetting of objects / robot joints

        # Load the model
        self._load_model()

        # Initialize the simulation
        self._initialize_sim()

        # Run all further internal (re-)initialization required
        self._reset_internal()

    def initialize_time(self, control_freq):
        """
        Initializes the time constants used for simulation.

        Args:
            control_freq (float): Hz rate to run control loop at within the simulation
        """
        self.cur_time = 0
        self.model_timestep = self.sim.model.opt.timestep
        if self.model_timestep <= 0:
            raise XMLError("xml model defined non-positive time step")
        self.control_freq = control_freq
        if control_freq <= 0:
            raise SimulationError(
                "control frequency {} is invalid".format(control_freq))
        self.control_timestep = 1. / control_freq

    def _load_model(self):
        """Loads an xml model, puts it in self.model"""
        pass

    def _get_reference(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flatten array, which is how MuJoCo stores physical simulation data.
        """
        pass

    def _initialize_sim(self, xml_string=None):
        """
        Creates a MjSim object and stores it in self.sim. If @xml_string is specified, the MjSim object will be created
        from the specified xml_string. Else, it will pull from self.model to instantiate the simulation

        Args:
            xml_string (str): If specified, creates MjSim object from this filepath
        """
        # if we have an xml string, use that to create the sim. Otherwise, use the local model
        self.mjpy_model = load_model_from_xml(
            xml_string) if xml_string else self.model.get_model(
                mode="mujoco_py")

        # Create the simulation instance and run a single step to make sure changes have propagated through sim state
        self.sim = MjSim(self.mjpy_model)
        self.sim.step()

        # Setup sim time based on control frequency
        self.initialize_time(self.control_freq)

    def reset(self):
        """
        Resets simulation.

        Returns:
            OrderedDict: Environment observation space after reset occurs
        """
        # TODO(yukez): investigate black screen of death
        # Use hard reset if requested
        if self.hard_reset and not self.deterministic_reset:
            self._destroy_viewer()
            self._load_model()
            self._initialize_sim()
        # Else, we only reset the sim internally
        else:
            self.sim.reset()
        # Reset necessary robosuite-centric variables
        self._reset_internal()
        self.sim.forward()
        return self._get_observation()

    def _reset_internal(self):
        """Resets simulation internal configurations."""

        # create visualization screen or renderer
        if self.has_renderer and self.viewer is None:
            self.viewer = MujocoPyRenderer(self.sim)
            self.viewer.viewer.vopt.geomgroup[0] = (
                1 if self.render_collision_mesh else 0)
            self.viewer.viewer.vopt.geomgroup[1] = (
                1 if self.render_visual_mesh else 0)

            # hiding the overlay speeds up rendering significantly
            self.viewer.viewer._hide_overlay = True

            # make sure mujoco-py doesn't block rendering frames
            # (see https://github.com/StanfordVL/robosuite/issues/39)
            self.viewer.viewer._render_every_frame = True

            # Set the camera angle for viewing
            if self.render_camera is not None:
                self.viewer.set_camera(camera_id=self.sim.model.camera_name2id(
                    self.render_camera))

        elif self.has_offscreen_renderer:
            if self.sim._render_context_offscreen is None:
                render_context = MjRenderContextOffscreen(self.sim)
                self.sim.add_render_context(render_context)
            self.sim._render_context_offscreen.vopt.geomgroup[0] = (
                1 if self.render_collision_mesh else 0)
            self.sim._render_context_offscreen.vopt.geomgroup[1] = (
                1 if self.render_visual_mesh else 0)

        # additional housekeeping
        self.sim_state_initial = self.sim.get_state()
        self._get_reference()
        self.cur_time = 0
        self.timestep = 0
        self.done = False

    def _get_observation(self):
        """
        Grabs observations from the environment.

        Returns:
            OrderedDict: OrderedDict containing observations [(name_string, np.array), ...]

        """
        return OrderedDict()

    def step(self, action):
        """
        Takes a step in simulation with control command @action.

        Args:
            action (np.array): Action to execute within the environment

        Returns:
            4-tuple:

                - (OrderedDict) observations from the environment
                - (float) reward from the environment
                - (bool) whether the current episode is completed or not
                - (dict) misc information

        Raises:
            ValueError: [Steps past episode termination]

        """
        if self.done:
            raise ValueError("executing action in terminated episode")

        self.timestep += 1

        # Since the env.step frequency is slower than the mjsim timestep frequency, the internal controller will output
        # multiple torque commands in between new high level action commands. Therefore, we need to denote via
        # 'policy_step' whether the current step we're taking is simply an internal update of the controller,
        # or an actual policy update
        policy_step = True

        # Loop through the simulation at the model timestep rate until we're ready to take the next policy step
        # (as defined by the control frequency specified at the environment level)
        for i in range(int(self.control_timestep / self.model_timestep)):
            self.sim.forward()
            self._pre_action(action, policy_step)
            self.sim.step()
            policy_step = False

        # Note: this is done all at once to avoid floating point inaccuracies
        self.cur_time += self.control_timestep

        reward, done, info = self._post_action(action)
        return self._get_observation(), reward, done, info

    def _pre_action(self, action, policy_step=False):
        """
        Do any preprocessing before taking an action.

        Args:
            action (np.array): Action to execute within the environment
            policy_step (bool): Whether this current loop is an actual policy step or internal sim update step
        """
        self.sim.data.ctrl[:] = action

    def _post_action(self, action):
        """
        Do any housekeeping after taking an action.

        Args:
            action (np.array): Action to execute within the environment

        Returns:
            3-tuple:

                - (float) reward from the environment
                - (bool) whether the current episode is completed or not
                - (dict) empty dict to be filled with information by subclassed method

        """
        reward = self.reward(action)

        # done if number of elapsed timesteps is greater than horizon
        self.done = (self.timestep >= self.horizon) and not self.ignore_done
        return reward, self.done, {}

    def reward(self, action):
        """
        Reward should be a function of state and action

        Args:
            action (np.array): Action to execute within the environment

        Returns:
            float: Reward from environment
        """
        raise NotImplementedError

    def render(self):
        """
        Renders to an on-screen window.
        """
        self.viewer.render()

    def observation_spec(self):
        """
        Returns an observation as observation specification.

        An alternative design is to return an OrderedDict where the keys
        are the observation names and the values are the shapes of observations.
        We leave this alternative implementation commented out, as we find the
        current design is easier to use in practice.

        Returns:
            OrderedDict: Observations from the environment
        """
        observation = self._get_observation()
        return observation

    @property
    def action_spec(self):
        """
        Action specification should be implemented in subclasses.

        Action space is represented by a tuple of (low, high), which are two numpy
        vectors that specify the min/max action limits per dimension.
        """
        raise NotImplementedError

    @property
    def action_dim(self):
        """
        Size of the action space
        Returns:
            int: Action space dimension
        """
        raise NotImplementedError

    def reset_from_xml_string(self, xml_string):
        """
        Reloads the environment from an XML description of the environment.

        Args:
            xml_string (str): Filepath to the xml file that will be loaded directly into the sim
        """

        # if there is an active viewer window, destroy it
        self.close()

        # Since we are reloading from an xml_string, we are deterministically resetting
        self.deterministic_reset = True

        # initialize sim from xml
        self._initialize_sim(xml_string=xml_string)

        # Now reset as normal
        self.reset()

        # Turn off deterministic reset
        self.deterministic_reset = False

    def find_contacts(self, geoms_1, geoms_2):
        """
        Finds contact between two geom groups.

        Args:
            geoms_1 (list of str): a list of geom names
            geoms_2 (list of str): another list of geom names

        Returns:
            generator: iterator of all contacts between @geoms_1 and @geoms_2
        """
        for contact in self.sim.data.contact[0:self.sim.data.ncon]:
            # check contact geom in geoms
            c1_in_g1 = self.sim.model.geom_id2name(contact.geom1) in geoms_1
            c2_in_g2 = self.sim.model.geom_id2name(contact.geom2) in geoms_2
            # check contact geom in geoms (flipped)
            c2_in_g1 = self.sim.model.geom_id2name(contact.geom2) in geoms_1
            c1_in_g2 = self.sim.model.geom_id2name(contact.geom1) in geoms_2
            if (c1_in_g1 and c2_in_g2) or (c1_in_g2 and c2_in_g1):
                yield contact

    def _check_success(self):
        """
        Checks if the task has been completed. Should be implemented by subclasses

        Returns:
            bool: True if the task has been completed
        """
        raise NotImplementedError

    def _destroy_viewer(self):
        """
        Destroys the current mujoco renderer instance if it exists
        """
        # if there is an active viewer window, destroy it
        if self.viewer is not None:
            self.viewer.close()  # change this to viewer.finish()?
            self.viewer = None

    def close(self):
        """Do any cleanup necessary here."""
        self._destroy_viewer()
Ejemplo n.º 11
0
def test_jacobians():
    xml = """
    <mujoco>
        <worldbody>
            <body name="body1" pos="0 0 0">
                <joint axis="1 0 0" name="a" pos="0 0 0" type="hinge"/>
                <geom name="geom1" pos="0 0 0" size="1.0"/>
                <body name="body2" pos="0 0 1">
                    <joint name="b" axis="1 0 0" pos="0 0 1" type="hinge"/>
                    <geom name="geom2" pos="1 1 1" size="0.5"/>
                    <site name="target" size="0.1"/>
                </body>
            </body>
        </worldbody>
        <actuator>
            <motor joint="a"/>
            <motor joint="b"/>
        </actuator>
    </mujoco>
    """
    model = load_model_from_xml(xml)
    sim = MjSim(model)
    sim.reset()
    # After reset jacobians are all zeros
    target_jacp = np.zeros(3 * sim.model.nv)
    sim.data.get_site_jacp('target', jacp=target_jacp)
    np.testing.assert_allclose(target_jacp, np.zeros(3 * sim.model.nv))
    # After first forward, jacobians are real
    sim.forward()
    sim.data.get_site_jacp('target', jacp=target_jacp)
    target_test = np.array([0, 0, -1, 1, 0, 0])
    np.testing.assert_allclose(target_jacp, target_test)
    # Should be unchanged after steps (zero action)
    for _ in range(2):
        sim.step()
        sim.forward()
    sim.data.get_site_jacp('target', jacp=target_jacp)
    assert np.linalg.norm(target_jacp - target_test) < 1e-3
    # Apply a very large action, ensure jacobian unchanged after step
    sim.reset()
    sim.forward()
    sim.data.ctrl[:] = np.ones(sim.model.nu) * 1e9
    sim.step()
    sim.data.get_site_jacp('target', jacp=target_jacp)
    np.testing.assert_allclose(target_jacp, target_test)
    # After large action, ensure jacobian changed after forward
    sim.forward()
    sim.data.get_site_jacp('target', jacp=target_jacp)
    assert not np.allclose(target_jacp, target_test)
    # Test the `site_jacp` property, which gets all at once
    np.testing.assert_allclose(target_jacp, sim.data.site_jacp[0])
    # Test not passing in array
    sim.reset()
    sim.forward()
    target_jacp = sim.data.get_site_jacp('target')
    np.testing.assert_allclose(target_jacp, target_test)
    # Test passing in bad array (long instead of double)
    target_jacp = np.zeros(3 * sim.model.nv, dtype=np.long)
    with pytest.raises(ValueError):
        sim.data.get_site_jacp('target', jacp=target_jacp)
    # Test rotation jacobian - like above but 'jacr' instead of 'jacp'
    # After reset jacobians are all zeros
    sim.reset()
    target_jacr = np.zeros(3 * sim.model.nv)
    sim.data.get_site_jacr('target', jacr=target_jacr)
    np.testing.assert_allclose(target_jacr, np.zeros(3 * sim.model.nv))
    # After first forward, jacobians are real
    sim.forward()
    sim.data.get_site_jacr('target', jacr=target_jacr)
    target_test = np.array([1, 1, 0, 0, 0, 0])
    # Test allocating dedicated array
    target_jacr = sim.data.get_site_jacr('target')
    np.testing.assert_allclose(target_jacr, target_test)
    # Test the batch getter (all sites at once)
    np.testing.assert_allclose(target_jacr, sim.data.site_jacr[0])
    # Test passing in bad array
    target_jacr = np.zeros(3 * sim.model.nv, dtype=np.long)
    with pytest.raises(ValueError):
        sim.data.get_site_jacr('target', jacr=target_jacr)
Ejemplo n.º 12
0
class Aether:
    """
    Deus do espaço e do paraíso
    Faz a conexão entre o simulador e os módulos do programa
    """
    def __init__(self):
        # DEFINIÇÕES
        self.field_width = 640
        self.field_height = 480
        self.cascadeTime = 0
        self.cascadeLoops = 1
        self.cascadeLastTime = 0

        # PREPARAÇÃO
        model = load_model_from_path("simulator/scene.xml")
        self.sim = MjSim(model)
        self.viewer = Viewer(self.sim, self)
        self.ball_joint = self.sim.model.get_joint_qpos_addr("Ball")[0]
        self.robot_joints = [
            self.sim.model.get_joint_qpos_addr("Robot_01")[0],
            self.sim.model.get_joint_qpos_addr("Robot_02")[0],
            self.sim.model.get_joint_qpos_addr("Robot_03")[0],
            self.sim.model.get_joint_qpos_addr("Robot_04")[0],
            self.sim.model.get_joint_qpos_addr("Robot_05")[0],
            self.sim.model.get_joint_qpos_addr("Robot_06")[0]
        ]

        # EXECUÇÃO
        # prepara os módulos
        self.enabled = [False] * 6  # [False, False, False, True, True, True]
        self.athena = [Athena(), Athena()]
        self.zeus = [Zeus(), Zeus()]
        Endless.setup(self.field_width, self.field_height)
        self.athena[0].setup(3, 0.8)
        self.athena[1].setup(3, 0.8)
        self.zeus[0].setup(3)
        self.zeus[1].setup(3)

        # inicializa o loop dos dados
        self.pause = False
        self.loopThread1 = threading.Thread(target=self.loopTeam, args=[0])
        self.loopThread2 = threading.Thread(target=self.loopTeam, args=[1])
        self.loopThread1.daemon = True
        self.loopThread2.daemon = True
        self.loopThread1.start()
        self.loopThread2.start()

        self.showInfos(0)
        self.showInfos(1)

    def run(self):
        while True:
            self.sim.step()
            self.viewer.render()

    def loopTeam(self, team):
        while True:
            time.sleep(0.0000000001)

            if self.pause or \
                    (not self.enabled[3 * team] and not self.enabled[3 * team + 1] and not self.enabled[3 * team + 2]):
                continue
            # executa nossos módulos
            positions = self.generatePositions(team)
            commands = self.athena[team].getTargets(positions)
            velocities = self.zeus[team].getVelocities(commands)
            # aplica resultados na simulação
            if self.enabled[0 + 3 * team]:
                self.sim.data.ctrl[0 + 6 * team] = self.convertVelocity(
                    velocities[0]["vLeft"])
                self.sim.data.ctrl[1 + 6 * team] = self.convertVelocity(
                    velocities[0]["vRight"])
            if self.enabled[1 + 3 * team]:
                self.sim.data.ctrl[2 + 6 * team] = self.convertVelocity(
                    velocities[1]["vLeft"])
                self.sim.data.ctrl[3 + 6 * team] = self.convertVelocity(
                    velocities[1]["vRight"])
            if self.enabled[2 + 3 * team]:
                self.sim.data.ctrl[4 + 6 * team] = self.convertVelocity(
                    velocities[2]["vLeft"])
                self.sim.data.ctrl[5 + 6 * team] = self.convertVelocity(
                    velocities[2]["vRight"])

            # mostra resultados
            self.showInfos(team, positions, commands)
            # indicadores 3D
            for i in range(3):
                position = positions[0][i]["position"]
                self.setObjectPose("indicator_" + str(i + 3 * team + 1),
                                   position, team, 0.2,
                                   velocities[i]["vector"])
                # self.setObjectPose("virtual_robot_1", velocities["virtualPos"], team=0)

    # HELPERS
    def showInfos(self, team, positions=None, commands=None):
        infos = []

        for i in range(3):
            if self.enabled[i + 3 * team] and positions and commands:
                # informações que todos os robôs tem
                robot = "X: " + "{:.1f}".format(positions[0][i]["position"][0])
                robot += ", Y: " + "{:.1f}".format(
                    positions[0][i]["position"][1])
                robot += ", O: " + "{:.1f}".format(
                    positions[0][i]["orientation"])
                robot += ", T: " + commands[i]["tactics"]
                robot += ", C: " + commands[i]["command"]

                if commands[i]["command"] == "lookAt":
                    if type(commands[i]["data"]["target"]) is tuple:
                        robot += "(" + "{:.1f}".format(
                            commands[i]["data"]["target"][0]) + ", "
                        robot += "{:.1f}".format(
                            commands[i]["data"]["target"][1]) + ")"
                    else:
                        robot += "(" + "{:.1f}".format(
                            commands[i]["data"]["target"]) + ")"

                elif commands[i]["command"] == "goTo":
                    robot += "(" + "{:.1f}".format(
                        commands[i]["data"]["target"]["position"][0]) + ", "
                    robot += "{:.1f}".format(
                        commands[i]["data"]["target"]["position"][1]) + ", "

                    if type(commands[i]["data"]["target"]
                            ["orientation"]) is tuple:
                        robot += "(" + "{:.1f}".format(
                            commands[i]["data"]["target"]["orientation"]
                            [0]) + ", "
                        robot += "{:.1f}".format(commands[i]["data"]["target"]
                                                 ["orientation"][1]) + ") )"
                    else:
                        robot += "{:.1f}".format(
                            commands[i]["data"]["target"]["orientation"]) + ")"

                elif commands[i]["command"] == "spin":
                    robot += "(" + commands[i]["data"]["direction"] + ")"

            else:
                robot = "[OFF]"

            infos.append(robot)

        self.viewer.infos["robots" + str(team + 1)] = infos

        if team == 0:
            if positions:
                self.viewer.infos["ball"] = "X: " + "{:.2f}".format(positions[2]["position"][0]) + ", Y: " + \
                                            "{:.2f}".format(positions[2]["position"][1])
                fps = self.getFPS()
                if fps:
                    self.viewer.infos["fps"] = fps

            if commands:
                # indicadores 3D
                # print(commands[0]["futureBall"])
                self.setObjectPose("virtual_ball", commands[0]["futureBall"],
                                   0, 0.022)
                for i in range(3):
                    if commands[i]["command"] == "goTo":
                        target = commands[i]["data"]["target"]["position"]
                        targetOrientation = commands[i]["data"]["target"][
                            "orientation"]
                        if type(targetOrientation) is tuple:
                            position = positions[0][i]["position"]
                            targetOrientation = math.atan2(
                                position[1] - targetOrientation[1],
                                -(position[0] - targetOrientation[0]))

                        self.setObjectPose("target_" + str(i + 1), target, 0,
                                           0.01, targetOrientation)

    def getFPS(self):
        # calcula o fps e manda pra interface
        self.cascadeTime += time.time() - self.cascadeLastTime
        self.cascadeLoops += 1
        self.cascadeLastTime = time.time()
        if self.cascadeTime > 1:
            fps = self.cascadeLoops / self.cascadeTime
            self.cascadeTime = self.cascadeLoops = 0
            return "{:.2f}".format(fps)
        return None

    # FUNÇÕES
    def reset(self):
        for i in range(6):
            self.enabled[i] = False

        self.showInfos(0)
        self.showInfos(1)
        self.sim.reset()

    def startStop(self, pause):
        self.pause = pause

    def moveBall(self, direction, keepVel=False):
        if not keepVel:
            for i in range(6):
                self.sim.data.qvel[self.ball_joint + i] = 0

        if direction == 0:  # UP
            self.sim.data.qpos[self.ball_joint + 1] += 0.01
        elif direction == 1:  # DOWN
            self.sim.data.qpos[self.ball_joint + 1] -= 0.01
        elif direction == 2:  # LEFT
            self.sim.data.qpos[self.ball_joint] -= 0.01
        elif direction == 3:  # RIGHT
            self.sim.data.qpos[self.ball_joint] += 0.01

    def toggleRobot(self, robotId, moveOut=False):
        if self.sim.data.qpos[self.robot_joints[robotId] + 1] >= 1:
            self.enabled[robotId] = False
            if moveOut:
                self.sim.data.qpos[self.robot_joints[robotId] + 1] = 0
        elif moveOut:
            self.enabled[robotId] = False
            self.sim.data.qpos[
                self.robot_joints[robotId]] = -0.62 + 0.25 * robotId
            self.sim.data.qpos[self.robot_joints[robotId] + 1] = 1.5
            self.sim.data.qpos[self.robot_joints[robotId] + 2] = 0.04
            self.sim.data.ctrl[robotId] = self.sim.data.ctrl[robotId + 1] = 0
        elif self.enabled[robotId]:
            self.enabled[robotId] = False
            self.sim.data.ctrl[robotId] = self.sim.data.ctrl[robotId + 1] = 0
        else:
            self.enabled[robotId] = True

        self.showInfos(0 if robotId < 3 else 1)

    def convertPositionX(self, coord, team):
        """Traz o valor pra positivo e multiplica pela proporção (largura máxima)/(posição x máxima)

        Args:
             coord: Coordenada da posição no mundo da simulação a ser convertida
             team: Time que está pedindo a conversão (0 ou 1)
        Returns:
            Coordenada da posição na proporção utilizada pela estratégia
        """
        if team == 0:
            return (coord +
                    0.8083874182591296) * self.field_width / 1.6167748365182593
        else:
            return -(coord - 0.8083874182591296
                     ) * self.field_width / 1.6167748365182593

    def convertPositionY(self, coord, team):
        """Traz o valor pra positivo e multiplica pela proporção (altura máxima)/(posição y máxima)

        Args:
            coord: Coordenada da posição no mundo da simulação a ser convertida
            team: Time que está pedindo a conversão (0 ou 1)

        Returns:
            Coordenada da posição na proporção utilizada pela estratégia
        """
        if team == 0:
            return (coord + 0.58339083) * self.field_height / 1.16678166
        else:
            return -(coord - 0.58339083) * self.field_height / 1.16678166

    @staticmethod
    def convertVelocity(vel):
        return vel * 30

    def setObjectPose(self,
                      objectName,
                      newPos,
                      team=0,
                      height=0.04,
                      newOrientation=0):
        """Seta a posição e orientação de um objeto no simulador
        Args:
            objectName: Nome do objeto a ter a pose alterada. Esse nome deve ser de um mocap configurado na cena.
                        Se o objeto for virtual_robot_i, o robô é amarelo se i <= 3, azul caso contrário
            newPos: (x, y), 'x' e 'y' valores em pixels
            team: índice do time (valor em pixels inverte de acordo com o time)
            height: altura do objeto no universo
            newOrientation: orientação Z em radianos do objeto
        """
        if team == 0:
            x = (newPos[0] /
                 self.field_width) * 1.6167748365182593 - 0.8083874182591296
            y = (newPos[1] / self.field_height) * 1.16678166 - 0.58339083
        else:
            x = -(newPos[0] /
                  self.field_width) * 1.6167748365182593 + 0.8083874182591296
            y = -(newPos[1] / self.field_height) * 1.16678166 + 0.58339083

        # conversão de eulerAngles para quaternions (wikipedia)
        newQuat = [
            math.sin(newOrientation / 2), 0, 0,
            math.cos(newOrientation / 2)
        ]

        self.sim.data.set_mocap_quat(objectName, newQuat)
        self.sim.data.set_mocap_pos(objectName, np.array([x, y, height]))

    def generatePositions(self, team):
        """Cria o vetor de posições no formato esperado pela estratégia
        O 'sim.data.qpos' possui, em cada posição, o seguinte:
            0: pos X
            1: pos Y
            2: pos Z
            3: quat component w
            4: quat component x
            5: quat component y
            6: quat component z

        Returns:
            Vetor de posições no formato correto
        """
        r1 = math.pi * team - math.atan2(
            2 * (self.sim.data.qpos[self.robot_joints[3 * team] + 3] *
                 self.sim.data.qpos[self.robot_joints[3 * team] + 6] +
                 self.sim.data.qpos[self.robot_joints[3 * team] + 4] *
                 self.sim.data.qpos[self.robot_joints[3 * team] + 6]), 1 - 2 *
            (self.sim.data.qpos[self.robot_joints[3 * team] + 5] *
             self.sim.data.qpos[self.robot_joints[3 * team] + 5] +
             self.sim.data.qpos[self.robot_joints[3 * team] + 6] *
             self.sim.data.qpos[self.robot_joints[3 * team] + 6]))
        r2 = math.pi * team - math.atan2(
            2 *
            (self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 3] *
             self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 6] +
             self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 4] *
             self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 6]), 1 - 2 *
            (self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 5] *
             self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 5] +
             self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 6] *
             self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 6]))
        r3 = math.pi * team - math.atan2(
            2 *
            (self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 3] *
             self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 6] +
             self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 4] *
             self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 6]), 1 - 2 *
            (self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 5] *
             self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 5] +
             self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 6] *
             self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 6]))
        return [
            [  # robôs aliados
                {
                    "position":
                    (self.convertPositionX(
                        self.sim.data.qpos[self.robot_joints[3 * team]], team),
                     self.convertPositionY(
                         self.sim.data.qpos[self.robot_joints[3 * team] + 1],
                         team)),
                    "orientation":
                    r1,
                    "robotLetter":
                    "A"
                }, {
                    "position":
                    (self.convertPositionX(
                        self.sim.data.qpos[self.robot_joints[1 + 3 * team]],
                        team),
                     self.convertPositionY(
                         self.sim.data.qpos[self.robot_joints[1 + 3 * team] +
                                            1], team)),
                    "orientation":
                    r2,
                    "robotLetter":
                    "B"
                }, {
                    "position":
                    (self.convertPositionX(
                        self.sim.data.qpos[self.robot_joints[2 + 3 * team]],
                        team),
                     self.convertPositionY(
                         self.sim.data.qpos[self.robot_joints[2 + 3 * team] +
                                            1], team)),
                    "orientation":
                    r3,
                    "robotLetter":
                    "C"
                }
            ],
            [  # robôs adversários
                {
                    "position":
                    (self.convertPositionX(
                        self.sim.data.qpos[self.robot_joints[(3 + 3 * team) %
                                                             6]], team),
                     self.convertPositionY(
                         self.sim.data.qpos[self.robot_joints[(3 + 3 * team) %
                                                              6] + 1], team)),
                }, {
                    "position": (self.convertPositionX(
                        self.sim.data.qpos[self.robot_joints[(4 + 3 * team) %
                                                             6]], team),
                                 self.convertPositionY(
                                     self.sim.data.qpos[self.robot_joints[
                                         (4 + 3 * team) % 6] + 1], team)),
                }, {
                    "position": (self.convertPositionX(
                        self.sim.data.qpos[self.robot_joints[(5 + 3 * team) %
                                                             6]], team),
                                 self.convertPositionY(
                                     self.sim.data.qpos[self.robot_joints[
                                         (5 + 3 * team) % 6] + 1], team)),
                }
            ],
            {  # bola
                "position":
                (self.convertPositionX(self.sim.data.qpos[self.ball_joint],
                                       team),
                 self.convertPositionY(self.sim.data.qpos[self.ball_joint + 1],
                                       team))
            }
        ]
Ejemplo n.º 13
0
class MujocoEnv(gym.Env):
    """Superclass for all MuJoCo environments.
    """
    def __init__(self, model_path, frame_skip):
        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), "assets",
                                    model_path)
        if not os.path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.frame_skip = frame_skip
        self.model = load_model_from_path(fullpath)
        self.sim = MjSim(self.model)
        self.data = self.sim.data
        self.viewer = None
        self.buffer_size = (1600, 1280)

        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second': 60,
        }

        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()
        observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
        assert not done
        self.obs_dim = np.sum([o.size for o in observation]) if (
            type(observation) is tuple) else observation.size

        bounds = self.model.actuator_ctrlrange.copy()
        low, high = bounds[:, 0], bounds[:, 1]
        self.action_space = spaces.Box(low, high)

        high = np.inf * np.ones(self.obs_dim)
        self.observation_space = spaces.Box(-high, high)

        self._seed()

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # methods to override:
    # ------------------------------------------------------------------------

    def reset_model(self):
        """Reset the robot degrees of freedom (qpos and qvel).
        Implement this in each subclass.
        """
        raise NotImplementedError

    def viewer_setup(self):
        """Called when the viewer is initialized and after every reset.
        Optionally implement this method, if you need to tinker with camera
        position and so forth.
        """
        pass

    # ------------------------------------------------------------------------

    def _reset(self):
        self.sim.reset()
        self.sim.forward()
        ob = self.reset_model()
        return ob

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq, )
        assert qvel.shape == (self.model.nv, )
        state = self.sim.get_state()
        for i in range(self.model.nq):
            state.qpos[i] = qpos[i]
        for i in range(self.model.nv):
            state.qvel[i] = qvel[i]
        self.sim.set_state(state)
        self.sim.forward()

    @property
    def dt(self):
        return self.model.opt.timestep * self.frame_skip

    def do_simulation(self, ctrl, n_frames):
        for i in range(self.model.nu):
            self.sim.data.ctrl[i] = ctrl[i]
        for _ in range(n_frames):
            self.sim.step()

    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer = None
            return

        if mode == 'rgb_array':
            self.viewer_setup()
            return _read_pixels(self.sim, *self.buffer_size)
        elif mode == 'human':
            self._get_viewer().render()

    def _get_viewer(self, mode='human'):
        if self.viewer is None and mode == 'human':
            self.viewer = MjViewer(self.sim)
            self.viewer_setup()
        return self.viewer

    def state_vector(self):
        state = self.sim.get_state()
        return np.concatenate([state.qpos.flat, state.qvel.flat])
Ejemplo n.º 14
0
class MujocoEnv(metaclass=EnvMeta):
    """Initializes a Mujoco Environment."""

    parameters_spec = {}

    def __init__(
        self,
        has_renderer=False,
        has_offscreen_renderer=True,
        render_collision_mesh=False,
        render_visual_mesh=True,
        control_freq=10,
        horizon=1000,
        ignore_done=False,
        use_camera_obs=False,
        camera_name="frontview",
        camera_height=256,
        camera_width=256,
        camera_depth=False,
    ):
        """
        Args:

            has_renderer (bool): If true, render the simulation state in 
                a viewer instead of headless mode.

            has_offscreen_renderer (bool): True if using off-screen rendering.

            render_collision_mesh (bool): True if rendering collision meshes 
                in camera. False otherwise.

            render_visual_mesh (bool): True if rendering visual meshes 
                in camera. False otherwise.

            control_freq (float): how many control signals to receive 
                in every simulated second. This sets the amount of simulation time 
                that passes between every action input.

            horizon (int): Every episode lasts for exactly @horizon timesteps.

            ignore_done (bool): True if never terminating the environment (ignore @horizon).

            use_camera_obs (bool): if True, every observation includes a 
                rendered image.

            camera_name (str): name of camera to be rendered. Must be 
                set if @use_camera_obs is True.

            camera_height (int): height of camera frame.

            camera_width (int): width of camera frame.

            camera_depth (bool): True if rendering RGB-D, and RGB otherwise.
        """

        self.has_renderer = has_renderer
        self.has_offscreen_renderer = has_offscreen_renderer
        self.render_collision_mesh = render_collision_mesh
        self.render_visual_mesh = render_visual_mesh
        self.control_freq = control_freq
        self.horizon = horizon
        self.ignore_done = ignore_done
        self.viewer = None
        self.model = None

        # settings for camera observations
        self.use_camera_obs = use_camera_obs
        if self.use_camera_obs and not self.has_offscreen_renderer:
            raise ValueError(
                "Camera observations require an offscreen renderer.")
        self.camera_name = camera_name
        if self.use_camera_obs and self.camera_name is None:
            raise ValueError("Must specify camera name when using camera obs")
        self.camera_height = camera_height
        self.camera_width = camera_width
        self.camera_depth = camera_depth

        self._reset_internal()

    def initialize_time(self, control_freq):
        """
        Initializes the time constants used for simulation.
        """
        self.cur_time = 0
        self.model_timestep = self.sim.model.opt.timestep
        if self.model_timestep <= 0:
            raise XMLError("xml model defined non-positive time step")
        self.control_freq = control_freq
        if control_freq <= 0:
            raise SimulationError(
                "control frequency {} is invalid".format(control_freq))
        self.control_timestep = 1. / control_freq

    def _load_model(self):
        """Loads an xml model, puts it in self.model"""
        pass

    def _get_reference(self):
        """
        Sets up references to important components. A reference is typically an
        index or a list of indices that point to the corresponding elements
        in a flatten array, which is how MuJoCo stores physical simulation data.
        """
        pass

    def reset(self, **kwargs):
        """Resets simulation."""
        # TODO(yukez): investigate black screen of death
        # if there is an active viewer window, destroy it
        self._destroy_viewer()
        self.reset_props(**kwargs)
        self._reset_internal()

        self.sim.forward()
        return self._get_observation()

    def reset_props(self):
        print(
            'INFO from GZZ: this is the base class reset_props. This means the environment does not support domain randomization'
        )

    def init_viewer(self):
        print('init_viewer', self.viewer)

        if self.has_offscreen_renderer:
            print(
                'WARNING from GZZ: robosuite has a bug on simutaneously rendering for both offscreen (like camera obs) and onscreen (X window), and will give (at least) onscreen `black screen of death` after a reset. Please check their issue: https://github.com/StanfordVL/robosuite/issues/25 . I failed to help them debug on this.'
            )

        self.viewer = MujocoPyRenderer(self.sim)
        self.viewer.viewer.vopt.geomgroup[0] = (1 if self.render_collision_mesh
                                                else 0)
        self.viewer.viewer.vopt.geomgroup[
            1] = 1 if self.render_visual_mesh else 0

        # hiding the overlay speeds up rendering significantly
        self.viewer.viewer._hide_overlay = True

    def _reset_internal(self):
        """Resets simulation internal configurations."""
        # instantiate simulation from MJCF model
        self._load_model()
        self.mjpy_model = self.model.get_model(mode="mujoco_py")
        self.sim = MjSim(self.mjpy_model)
        self.initialize_time(self.control_freq)

        # create visualization screen or renderer
        if self.has_renderer and self.viewer is None:
            self.init_viewer()

        elif self.has_offscreen_renderer:
            if self.sim._render_context_offscreen is None:
                render_context = MjRenderContextOffscreen(self.sim)
                self.sim.add_render_context(render_context)
            self.sim._render_context_offscreen.vopt.geomgroup[0] = (
                1 if self.render_collision_mesh else 0)
            self.sim._render_context_offscreen.vopt.geomgroup[1] = (
                1 if self.render_visual_mesh else 0)

        # additional housekeeping
        self.sim_state_initial = self.sim.get_state()
        self._get_reference()
        self.cur_time = 0
        self.timestep = 0
        self.done = False

    def _get_observation(self):
        """Returns an OrderedDict containing observations [(name_string, np.array), ...]."""
        return OrderedDict()

    def step(self, action):
        """Takes a step in simulation with control command @action."""
        if self.done:
            raise ValueError("executing action in terminated episode")
        self.timestep += 1
        self._pre_action(action)
        end_time = self.cur_time + self.control_timestep
        while self.cur_time < end_time:
            self.sim.step()
            self.cur_time += self.model_timestep
        reward, done, info = self._post_action(action)
        return self._get_observation(), reward, done, info

    def _pre_action(self, action):
        """Do any preprocessing before taking an action."""
        self.sim.data.ctrl[:] = action

    def _post_action(self, action):
        """Do any housekeeping after taking an action."""
        reward = self.reward(action)

        # done if number of elapsed timesteps is greater than horizon
        self.done = (self.timestep >= self.horizon) and not self.ignore_done
        return reward, self.done, {}

    def reward(self, action):
        """Reward should be a function of state and action."""
        return 0

    def render(self):
        """
        Renders to an on-screen window.
        """
        if self.viewer is None:
            self.has_renderer = True
            self.init_viewer()

        self.viewer.render()

    def observation_spec(self):
        """
        Returns an observation as observation specification.

        An alternative design is to return an OrderedDict where the keys
        are the observation names and the values are the shapes of observations.
        We leave this alternative implementation commented out, as we find the
        current design is easier to use in practice.
        """
        observation = self._get_observation()
        return observation

        # observation_spec = OrderedDict()
        # for k, v in observation.items():
        #     observation_spec[k] = v.shape
        # return observation_spec

    def action_spec(self):
        """
        Action specification should be implemented in subclasses.

        Action space is represented by a tuple of (low, high), which are two numpy
        vectors that specify the min/max action limits per dimension.
        """
        raise NotImplementedError

    def reset_from_xml_string(self, xml_string):
        """Reloads the environment from an XML description of the environment."""

        # if there is an active viewer window, destroy it
        self.close()

        # load model from xml
        self.mjpy_model = load_model_from_xml(xml_string)

        self.sim = MjSim(self.mjpy_model)
        self.initialize_time(self.control_freq)
        if self.has_renderer and self.viewer is None:
            self.init_viewer()

        elif self.has_offscreen_renderer:
            render_context = MjRenderContextOffscreen(self.sim)
            render_context.vopt.geomgroup[
                0] = 1 if self.render_collision_mesh else 0
            render_context.vopt.geomgroup[
                1] = 1 if self.render_visual_mesh else 0
            self.sim.add_render_context(render_context)

        self.sim_state_initial = self.sim.get_state()
        self._get_reference()
        self.cur_time = 0
        self.timestep = 0
        self.done = False

        # necessary to refresh MjData
        self.sim.forward()

    def find_contacts(self, geoms_1, geoms_2):
        """
        Finds contact between two geom groups.

        Args:
            geoms_1: a list of geom names (string)
            geoms_2: another list of geom names (string)

        Returns:
            iterator of all contacts between @geoms_1 and @geoms_2
        """
        for contact in self.sim.data.contact[0:self.sim.data.ncon]:
            # check contact geom in geoms
            c1_in_g1 = self.sim.model.geom_id2name(contact.geom1) in geoms_1
            c2_in_g2 = self.sim.model.geom_id2name(contact.geom2) in geoms_2
            # check contact geom in geoms (flipped)
            c2_in_g1 = self.sim.model.geom_id2name(contact.geom2) in geoms_1
            c1_in_g2 = self.sim.model.geom_id2name(contact.geom1) in geoms_2
            if (c1_in_g1 and c2_in_g2) or (c1_in_g2 and c2_in_g1):
                yield contact

    def _check_contact(self):
        """Returns True if gripper is in contact with an object."""
        return False

    def _check_success(self):
        """
        Returns True if task has been completed.
        """
        return False

    def _destroy_viewer(self):
        # if there is an active viewer window, destroy it
        if self.viewer is not None:
            print('_destroy_viewer', self.viewer)
            self.viewer.close()  # change this to viewer.finish()?
            self.viewer = None

    def close(self):
        """Do any cleanup necessary here."""
        self._destroy_viewer()
Ejemplo n.º 15
0
def test_inverse_kinematics():
    panda_kinematics = ikpy_panda_kinematics.panda_kinematics()
    model = load_model_from_path("robot.xml")
    sim = MjSim(model)
    viewer = MjViewer(sim)
    timestep = generatePatternedTrajectories.TIMESTEP
    control_freq = 1/timestep
    total_time = 3
    num_cycles = int(total_time * control_freq)
    qd_init = np.zeros(7)
    plt.ion()
    LW = 1.0
    fig = plt.figure(figsize=(4,15))
    axes = []
    lines = []
    goals = []
    min_vals = {'x': -0.5, 'y': -0.5, 'z': 0.2}
    max_vals = {'x': 0.5, 'y': 0.5, 'z': 0.7}
    ylabels = ["x(m)", "y(m)", "z(m)"]
    ylbounds = [min_vals['x'], min_vals['y'], min_vals['z']]
    yubounds = [max_vals['x'], max_vals['y'], max_vals['z']]
    for i in range(3):
        axes.append(fig.add_subplot(3,1,i+1))
        lines.append(axes[i].plot([],[],'b-', lw=LW)[0])
        goals.append(axes[i].plot([],[],'r-', lw=LW)[0])
        axes[i].set_ylim([ylbounds[i], yubounds[i]])
        axes[i].set_xlim([0,total_time])
        axes[i].set_ylabel(ylabels[i], fontsize=8)
        axes[i].set_xlabel("Time(s)", fontsize=8)

    for test in range(5):
        x_target = np.random.rand() * (max_vals['x'] - min_vals['x']) + min_vals['x']
        y_target = np.random.rand() * (max_vals['y'] - min_vals['y']) + min_vals['y']
        z_target = np.random.rand() * (max_vals['z'] - min_vals['z']) + min_vals['z']
        if\
            min_vals['x'] > x_target or max_vals['x'] < x_target or \
            min_vals['y'] > y_target or max_vals['y'] < y_target or \
            min_vals['z'] > z_target or max_vals['z'] < z_target:
            print("Error! 'xpos' out of range!")
            print("x = %.3f\ny = %.3f\nz = %.3f" % (x_target, y_target, z_target))

            print(max_vals['y'] - min_vals['y'])
            return
        # x_target, y_target, z_target = 0.088, -0.0, 0.926
        # roll = np.random.rand() * np.pi - np.pi/2
        # pitch = np.random.rand() * np.pi - np.pi/2
        # yaw = np.random.rand() * np.pi - np.pi/2
        roll = 0
        pitch = 0
        yaw = np.pi
        ee_goal = [x_target, y_target, z_target, roll, pitch, yaw]

        # temp = bounds.getRandPosInBounds()
        # ee_goal = panda_kinematics.forward_kinematics(temp)
        q_init = bounds.getRandPosInBounds()
        q_goal = panda_kinematics.inverse_kinematics(translation=ee_goal[0:3], rpy=ee_goal[3:6], init_qpos=q_init)

        for g in range(3):
            goals[g].set_ydata(np.ones(num_cycles) * ee_goal[g])
            goals[g].set_xdata(np.linspace(0,3,num_cycles))
        sim.set_state(MjSimState(time=0,qpos=q_init,qvel=qd_init,act=None,udd_state={}))
        sim.step()
        sim_time = 0
        for i in range(num_cycles):
            state = sim.get_state()
            q = state[1]
            qd = state[2]
            sim.data.ctrl[:] = controllers.PDControl(q=q,qd=qd,qgoal=q_goal)
            sim.step()
            viewer.render()
            if i % 70 == 0:
                xpos = panda_kinematics.forward_kinematics(q)
                for a in range(3):
                    lines[a].set_xdata(np.append(lines[a].get_xdata(), sim_time))
                    lines[a].set_ydata(np.append(lines[a].get_ydata(), xpos[a]))
                fig.canvas.draw()
                fig.canvas.flush_events()
                print("[q\t]:{}".format(np.around(q,3)))
                print("[qgoal\t]:{}".format(np.around(q_goal,3)))
                print("[qgoal2\t]:{}".format(np.around(panda_kinematics.inverse_kinematics(ee_goal[0:3], ee_goal[3:6]),3)))
                print("[EE\t]:{}".format(np.around(panda_kinematics.forward_kinematics(q),3)))
                print("[EEgoal\t]:{}".format(np.around(ee_goal,3)))
            sim_time += timestep
        # panda_kinematics.plot_stick_figure(q)
        for i in range(3):
            lines[i].set_xdata([])
            lines[i].set_ydata([])
        time.sleep(1)
Ejemplo n.º 16
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        help='random seed (default: 1)')
    parser.add_argument('--robot_file',
                        default='../../xml/simrobot/7dof/robot_mocap.xml',
                        type=str,
                        help='path to robot config')
    parser.add_argument('--save_dir',
                        default='../../xml/gen_xmls/simrobot/reacher',
                        type=str,
                        help='path to save initial joint poses')
    print('Program starts at: \033[92m %s \033[0m' %
          datetime.now().strftime("%Y-%m-%d %H:%M"))
    args = parser.parse_args()
    np.random.seed(args.seed)
    filename = 'poses.json'
    robot_file = args.robot_file
    save_dir = args.save_dir

    ini_bds = np.array([[0.3, 0.6], [-0.2, 0.2], [0.5, 0.7]])
    tgt_bds = np.array([[0.3, 0.6], [-0.3, 0.3], [-0.1, 0.3]])

    initial_num = [4, 5, 3]
    target_num = [4, 7, 5]
    target_state = np.mgrid[tgt_bds[0][0]:tgt_bds[0][1]:complex(target_num[0]),
                            tgt_bds[1][0]:tgt_bds[1][1]:complex(target_num[1]),
                            tgt_bds[2][0]:tgt_bds[2][1]:complex(target_num[2])]
    target_state = target_state.reshape(3, -1).T
    initial_state = np.mgrid[
        ini_bds[0][0]:ini_bds[0][1]:complex(initial_num[0]),
        ini_bds[1][0]:ini_bds[1][1]:complex(initial_num[1]),
        ini_bds[2][0]:ini_bds[2][1]:complex(initial_num[2])]
    initial_state = initial_state.reshape(3, -1).T

    np.random.shuffle(target_state)
    np.random.shuffle(initial_state)

    assert os.path.exists(robot_file)
    model = load_model_from_path(robot_file)
    sim = MjSim(model, nsubsteps=40)
    sim.reset()
    sim.step()
    site_xpos_cur = sim.data.site_xpos[0]
    print('site xpos:', site_xpos_cur)
    init_joint_angles = []
    goal_poses = []
    for idx in tqdm(range(initial_state.shape[0])):
        sim.reset()
        sim.step()
        site_xpos_target = initial_state[idx]
        delta = site_xpos_target - site_xpos_cur
        sim.data.mocap_pos[0] = sim.data.mocap_pos[0] + delta
        for i in range(30):
            sim.step()
            dist = np.linalg.norm(sim.data.site_xpos[0] - site_xpos_target)
            if dist < 0.002:
                joint_angle = sim.data.qpos[:7]
                init_joint_angles.append(joint_angle.tolist())
                break

    for idx in tqdm(range(target_state.shape[0])):
        sim.reset()
        sim.step()
        site_xpos_target = target_state[idx]
        delta = site_xpos_target - site_xpos_cur
        sim.data.mocap_pos[0] = sim.data.mocap_pos[0] + delta
        for i in range(30):
            sim.step()
            dist = np.linalg.norm(sim.data.site_xpos[0] - site_xpos_target)
            if dist < 0.002:
                goal_poses.append(site_xpos_target.tolist())
                break
    print('valid initial pose num: ', len(init_joint_angles))
    print('valid goal pose num: ', len(goal_poses))
    data = {'initial_joint_angles': init_joint_angles, 'ee_goal': goal_poses}
    os.makedirs(save_dir, exist_ok=True)
    save_file = os.path.join(save_dir, filename)
    print('saving to: ', save_file)
    with open(save_file, 'w') as f:
        json.dump(data, f, indent=2)

    print('Program ends at: \033[92m %s \033[0m' %
          datetime.now().strftime("%Y-%m-%d %H:%M"))
Ejemplo n.º 17
0
def test_mj_sim_buffers():
    model = load_model_from_xml(BASIC_MODEL_XML)

    # test no callback
    sim = MjSim(model, nsubsteps=2)
    assert (sim.udd_state == {})

    sim.step()
    assert (sim.udd_state == {})

    # test with callback
    foo = 10
    d = {"foo": foo, "foo_2": np.array([foo, foo])}

    def udd_callback(sim):
        return d

    sim = MjSim(model, nsubsteps=2, udd_callback=udd_callback)
    assert (sim.udd_state is not None)
    assert (sim.udd_state["foo"] == foo)
    assert (sim.udd_state["foo_2"].shape[0] == 2)
    assert (sim.udd_state["foo_2"][0] == foo)

    foo = 11
    d = {"foo": foo, "foo_2": np.array([foo, foo])}
    sim.step()
    assert (sim.udd_state is not None)
    assert (sim.udd_state["foo"] == foo)
    assert (sim.udd_state["foo_2"][0] == foo)

    d = {}
    with pytest.raises(AssertionError):
        sim.step()

    d = {"foo": foo, "foo_2": np.array([foo, foo]), "foo_3": foo}
    with pytest.raises(AssertionError):
        sim.step()

    d = {"foo": foo, "foo_2": np.array([foo, foo, foo])}
    with pytest.raises(AssertionError):
        sim.step()

    d = {"foo": "haha", "foo_2": np.array([foo, foo, foo])}
    with pytest.raises(AssertionError):
        sim.step()
Ejemplo n.º 18
0
class MujocoEnv(gym.Env):
    """Superclass for all MuJoCo environments.
    """

    def __init__(self, model_path, frame_skip):

        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.frame_skip = frame_skip
        self.model = load_model_from_path(fullpath)
        self.sim = MjSim(self.model)
        self.data = self.sim.data

        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second': int(np.round(1.0 / self.dt))
        }
        self.mujoco_render_frames = False

        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()
        observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
        assert not done
        self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size

        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = spaces.Box(low, high)

        high = np.inf*np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)

        self._seed()

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # methods to override:
    # ----------------------------

    def reset_model(self):
        """
        Reset the robot degrees of freedom (qpos and qvel).
        Implement this in each subclass.
        """
        raise NotImplementedError

    def mj_viewer_setup(self):
        """
        Due to specifics of new mujoco rendering, the standard viewer cannot be used
        with this set-up. Instead we use this mujoco specific function.
        """
        pass

    def viewer_setup(self):
        """
        Does not work. Use mj_viewer_setup() instead
        """
        pass

    # -----------------------------

    def _reset(self):
        self.sim.reset()
        self.sim.forward()
        ob = self.reset_model()
        return ob

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
        state = self.sim.get_state()
        for i in range(self.model.nq):
            state.qpos[i] = qpos[i]
        for i in range(self.model.nv):
            state.qvel[i] = qvel[i]
        self.sim.set_state(state)
        self.sim.forward()

    @property
    def dt(self):
        return self.model.opt.timestep * self.frame_skip

    def do_simulation(self, ctrl, n_frames):
        for i in range(self.model.nu):
            self.sim.data.ctrl[i] = ctrl[i]
        for _ in range(n_frames):
            self.sim.step()
            if self.mujoco_render_frames is True:
                self.mj_render()

    def mj_render(self):
        try:
            self.viewer.render()
        except:
            self.mj_viewer_setup()
            self.viewer._run_speed = 1.0
            #self.viewer._run_speed /= self.frame_skip
            self.viewer.render()

    def _get_viewer(self):
        return None

    def state_vector(self):
        state = self.sim.get_state()
        return np.concatenate([
            state.qpos.flat, state.qvel.flat])

    # -----------------------------

    def visualize_policy(self, policy, horizon=1000, num_episodes=1, mode='exploration'):
        self.mujoco_render_frames = True
        for ep in range(num_episodes):
            o = self.reset()
            d = False
            t = 0
            while t < horizon and d is False:
                a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t+1
        self.mujoco_render_frames = False

    def visualize_policy_offscreen(self, policy, horizon=1000,
                                   num_episodes=1,
                                   mode='exploration',
                                   save_loc='/tmp/',
                                   filename='newvid',
                                   camera_name=None):
        import skvideo.io
        for ep in range(num_episodes):
            print("Episode %d: rendering offline " % ep, end='', flush=True)
            o = self.reset()
            d = False
            t = 0
            arrs = []
            t0 = timer.time()
            while t < horizon and d is False:
                a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t+1
                curr_frame = self.sim.render(width=640, height=480, mode='offscreen',
                                             camera_name=camera_name, device_id=0)
                arrs.append(curr_frame[::-1,:,:])
                print(t, end=', ', flush=True)
            file_name = save_loc + filename + str(ep) + ".mp4"
            skvideo.io.vwrite( file_name, np.asarray(arrs))
            print("saved", file_name)
            t1 = timer.time()
            print("time taken = %f"% (t1-t0))
    def train_POMDP(self):
        args = self.args
        ROOT_DIR = os.path.dirname(os.path.dirname(
            os.path.abspath(__file__)))  # corl2019
        PARENT_DIR = os.path.dirname(ROOT_DIR)  # reserach
        # Create the output directory if it does not exist
        output_dir = os.path.join(PARENT_DIR, 'multistep_pomdp',
                                  args.output_dir)
        if not os.path.isdir(output_dir):
            os.makedirs(output_dir)

        # write args to file
        with open(os.path.join(output_dir, 'args.txt'), 'w+') as f:
            json.dump(args.__dict__, f, indent=2)
        f.close()

        # Create our policy net and a target net
        self.policy_net_1 = DRQN(args.ftdim, args.outdim).to(args.device)
        self.policy_net_2 = DRQN(args.ftdim, args.outdim).to(args.device)
        if args.position:
            self.tactile_net_1 = TactileNet(args.indim - 6,
                                            args.ftdim).to(args.device)
            self.tactile_net_2 = TactileNet(args.indim - 6,
                                            args.ftdim).to(args.device)
        elif args.force:
            self.tactile_net_1 = TactileNet(args.indim - 390,
                                            args.ftdim).to(args.device)
            self.tactile_net_2 = TactileNet(args.indim - 390,
                                            args.ftdim).to(args.device)
        else:
            self.tactile_net_1 = TactileNet(args.indim,
                                            args.ftdim).to(args.device)
            self.tactile_net_2 = TactileNet(args.indim,
                                            args.ftdim).to(args.device)

        # Set up the optimizer
        self.policy_optimizer_1 = optim.RMSprop(self.policy_net_1.parameters(),
                                                lr=args.lr)
        self.policy_optimizer_2 = optim.RMSprop(self.policy_net_2.parameters(),
                                                lr=args.lr)
        self.tactile_optimizer_1 = optim.RMSprop(
            self.tactile_net_1.parameters(), lr=args.lr)
        self.tactile_optimizer_2 = optim.RMSprop(
            self.tactile_net_2.parameters(), lr=args.lr)
        self.memory = RecurrentMemory(800)
        self.steps_done = 0

        # Setup the state normalizer
        normalizer = Multimodal_Normalizer(num_inputs=args.indim,
                                           device=args.device)

        print_variables = {'durations': [], 'rewards': [], 'loss': []}
        start_episode = 0
        if args.weight_policy:
            if os.path.exists(args.weight_policy):
                checkpoint = torch.load(args.weight_policy)
                self.policy_net_1.load_state_dict(checkpoint['policy_net_1'])
                self.policy_net_2.load_state_dict(checkpoint['policy_net_2'])
                self.policy_optimizer_1.load_state_dict(
                    checkpoint['policy_optimizer_1'])
                self.policy_optimizer_2.load_state_dict(
                    checkpoint['policy_optimizer_2'])
                start_episode = checkpoint['epochs']
                self.steps_done = checkpoint['steps_done']
                with open(
                        os.path.join(os.path.dirname(args.weight_policy),
                                     'results_pomdp.pkl'), 'rb') as file:
                    plot_dict = pickle.load(file)
                    print_variables['durations'] = plot_dict['durations']
                    print_variables['rewards'] = plot_dict['rewards']

        if args.normalizer_file:
            if os.path.exists(args.normalizer_file):
                normalizer.restore_state(args.normalizer_file)

        if args.memory:
            if os.path.exists(args.memory):
                self.memory.load(args.memory)

        if args.weight_tactile:
            checkpoint = torch.load(args.weight_tactile)
            self.tactile_net_1.load_state_dict(checkpoint['tactile_net_1'])
            self.tactile_optimizer_1.load_state_dict(
                checkpoint['tactile_optimizer_1'])
            self.tactile_net_2.load_state_dict(checkpoint['tactile_net_2'])
            self.tactile_optimizer_2.load_state_dict(
                checkpoint['tactile_optimizer_2'])

        action_space = ActionSpace(dp=0.06, df=10)

        # Create robot, reset simulation and grasp handle
        model = load_model_from_path(args.model_path)
        sim = MjSim(model)
        sim_param = SimParameter(sim)
        sim.step()
        if args.render:
            viewer = MjViewer(sim)
        else:
            viewer = None

        robot = RobotSim(sim, viewer, sim_param, args.render,
                         self.break_threshold)

        tactile_obs_space = TactileObs(
            robot.get_gripper_xpos(),  # 24
            robot.get_all_touch_buffer(args.hap_sample))  # 30 x 6

        # Main training loop
        for ii in range(start_episode, args.epochs):
            self.steps_done += 1
            start_time = time.time()
            act_sequence = []
            act_length = []
            velcro_params = init_model(robot.mj_sim)
            robot.reset_simulation()
            ret = robot.grasp_handle()
            if not ret:
                continue

            # Local memory for current episode
            localMemory = []

            # Get current observation
            hidden_state_1, cell_state_1 = self.policy_net_1.init_hidden_states(
                batch_size=1, device=args.device)
            hidden_state_2, cell_state_2 = self.policy_net_2.init_hidden_states(
                batch_size=1, device=args.device)

            broken_so_far = 0

            # pick a random action initially
            action = random.randrange(0, 5)
            current_state = None
            next_state = None

            t = 0

            while t < args.max_iter:
                if not args.quiet and t == 0:
                    print("Running training episode: {}".format(ii, t))

                if args.position:
                    multistep_obs = np.empty((0, args.indim - 6))
                elif args.force:
                    multistep_obs = np.empty((0, args.indim - 390))
                else:
                    multistep_obs = np.empty((0, args.indim))

                prev_action = action

                for k in range(args.len_ub):
                    # Observe tactile features and stack them
                    tactile_obs = tactile_obs_space.get_state()
                    normalizer.observe(tactile_obs)
                    tactile_obs = normalizer.normalize(tactile_obs)

                    if args.position:
                        tactile_obs = tactile_obs[6:]
                    elif args.force:
                        tactile_obs = tactile_obs[:6]

                    multistep_obs = np.vstack((multistep_obs, tactile_obs))

                    # current jpos
                    current_pos = robot.get_gripper_jpos()[:3]

                    # Perform action
                    delta = action_space.get_action(
                        self.ACTIONS[action])['delta'][:3]
                    target_position = np.add(robot.get_gripper_jpos()[:3],
                                             np.array(delta))
                    target_pose = np.hstack(
                        (target_position, robot.get_gripper_jpos()[3:]))
                    robot.move_joint(target_pose,
                                     True,
                                     self.gripping_force,
                                     hap_sample=args.hap_sample)

                    # Observe new state
                    tactile_obs_space.update(
                        robot.get_gripper_xpos(),  # 24
                        robot.get_all_touch_buffer(args.hap_sample))  # 30x6

                    displacement = la.norm(robot.get_gripper_jpos()[:3] -
                                           current_pos)

                    if displacement / 0.06 < 0.7:
                        break

                # input stiched multi-step tactile observation into tactile-net to generate tactile feature
                action, hidden_state_1, cell_state_1 = self.select_action(
                    multistep_obs, hidden_state_1, cell_state_1)

                if t == 0:
                    next_state = multistep_obs.copy()
                else:
                    current_state = next_state.copy()
                    next_state = multistep_obs.copy()

                # record actions in this epoch
                act_sequence.append(prev_action)
                act_length.append(k)

                # Get reward
                done, num = robot.update_tendons()
                failure = robot.check_slippage()
                if num > broken_so_far:
                    reward = num - broken_so_far
                    broken_so_far = num
                else:
                    if failure:
                        reward = -20
                    else:
                        reward = 0

                t += k + 1
                # Set max number of iterations
                if t >= self.max_iter:
                    done = True

                if done or failure:
                    next_state = None

                # Push new Transition into memory
                if t > k + 1:
                    localMemory.append(
                        Transition(current_state, prev_action, next_state,
                                   reward))

                # Optimize the model
                if self.steps_done % 10 == 0:
                    self.optimize()

                # If we are done, reset the model
                if done or failure:
                    self.memory.push(localMemory)
                    if failure:
                        print_variables['durations'].append(self.max_iter)
                    else:
                        print_variables['durations'].append(t)
                    print_variables['rewards'].append(broken_so_far)
                    plot_variables(self.figure, print_variables,
                                   "Training POMDP")
                    print("Model parameters: {}".format(velcro_params))
                    print(
                        "{} of Actions in this epoch are: {} \n Action length are: {}"
                        .format(len(act_sequence), act_sequence, act_length))
                    print("Epoch {} took {}s, total number broken: {}\n\n".
                          format(ii,
                                 time.time() - start_time, broken_so_far))

                    break

            # Save checkpoints every vew iterations
            if ii % args.save_freq == 0:
                save_path = os.path.join(output_dir,
                                         'policy_' + str(ii) + '.pth')
                torch.save(
                    {
                        'epochs': ii,
                        'steps_done': self.steps_done,
                        'policy_net_1': self.policy_net_1.state_dict(),
                        'policy_net_2': self.policy_net_2.state_dict(),
                        'policy_optimizer_1':
                        self.policy_optimizer_1.state_dict(),
                        'policy_optimizer_2':
                        self.policy_optimizer_2.state_dict(),
                    }, save_path)
                save_path = os.path.join(output_dir,
                                         'tactile_' + str(ii) + '.pth')
                torch.save(
                    {
                        'tactile_net_1':
                        self.tactile_net_1.state_dict(),
                        'tactile_net_2':
                        self.tactile_net_2.state_dict(),
                        'tactile_optimizer_1':
                        self.tactile_optimizer_1.state_dict(),
                        'tactile_optimizer_2':
                        self.tactile_optimizer_2.state_dict(),
                    }, save_path)

                write_results(os.path.join(output_dir, 'results_pomdp.pkl'),
                              print_variables)

                self.memory.save_memory(
                    os.path.join(output_dir, 'memory.pickle'))

        if args.savefig_path:
            now = dt.datetime.now()
            self.figure[0].savefig(
                args.savefig_path +
                '{}_{}_{}.png'.format(now.month, now.day, now.hour),
                format='png')

        print('Training done')
        plt.show()
        return print_variables
Ejemplo n.º 20
0
def test_jacobians():
    xml = """
    <mujoco>
        <worldbody>
            <body name="body1" pos="0 0 0">
                <joint axis="1 0 0" name="a" pos="0 0 0" type="hinge"/>
                <geom name="geom1" pos="0 0 0" size="1.0"/>
                <body name="body2" pos="0 0 1">
                    <joint name="b" axis="1 0 0" pos="0 0 1" type="hinge"/>
                    <geom name="geom2" pos="1 1 1" size="0.5"/>
                    <site name="target" size="0.1"/>
                </body>
            </body>
        </worldbody>
        <actuator>
            <motor joint="a"/>
            <motor joint="b"/>
        </actuator>
    </mujoco>
    """
    model = load_model_from_xml(xml)
    sim = MjSim(model)
    sim.reset()
    # After reset jacobians are all zeros
    target_jacp = np.zeros(3 * sim.model.nv)
    sim.data.get_site_jacp('target', jacp=target_jacp)
    np.testing.assert_allclose(target_jacp, np.zeros(3 * sim.model.nv))
    # After first forward, jacobians are real
    sim.forward()
    sim.data.get_site_jacp('target', jacp=target_jacp)
    target_test = np.array([0, 0, -1, 1, 0, 0])
    np.testing.assert_allclose(target_jacp, target_test)
    # Should be unchanged after steps (zero action)
    for _ in range(2):
        sim.step()
        sim.forward()
    sim.data.get_site_jacp('target', jacp=target_jacp)
    assert np.linalg.norm(target_jacp - target_test) < 1e-3
    # Apply a very large action, ensure jacobian unchanged after step
    sim.reset()
    sim.forward()
    sim.data.ctrl[:] = np.ones(sim.model.nu) * 1e9
    sim.step()
    sim.data.get_site_jacp('target', jacp=target_jacp)
    np.testing.assert_allclose(target_jacp, target_test)
    # After large action, ensure jacobian changed after forward
    sim.forward()
    sim.data.get_site_jacp('target', jacp=target_jacp)
    assert not np.allclose(target_jacp, target_test)
    # Test the `site_jacp` property, which gets all at once
    np.testing.assert_allclose(target_jacp, sim.data.site_jacp[0])
    # Test not passing in array
    sim.reset()
    sim.forward()
    target_jacp = sim.data.get_site_jacp('target')
    np.testing.assert_allclose(target_jacp, target_test)
    # Test passing in bad array (long instead of double)
    target_jacp = np.zeros(3 * sim.model.nv, dtype=np.long)
    with pytest.raises(ValueError):
        sim.data.get_site_jacp('target', jacp=target_jacp)
    # Test rotation jacobian - like above but 'jacr' instead of 'jacp'
    # After reset jacobians are all zeros
    sim.reset()
    target_jacr = np.zeros(3 * sim.model.nv)
    sim.data.get_site_jacr('target', jacr=target_jacr)
    np.testing.assert_allclose(target_jacr, np.zeros(3 * sim.model.nv))
    # After first forward, jacobians are real
    sim.forward()
    sim.data.get_site_jacr('target', jacr=target_jacr)
    target_test = np.array([1, 1, 0, 0, 0, 0])
    # Test allocating dedicated array
    target_jacr = sim.data.get_site_jacr('target')
    np.testing.assert_allclose(target_jacr, target_test)
    # Test the batch getter (all sites at once)
    np.testing.assert_allclose(target_jacr, sim.data.site_jacr[0])
    # Test passing in bad array
    target_jacr = np.zeros(3 * sim.model.nv, dtype=np.long)
    with pytest.raises(ValueError):
        sim.data.get_site_jacr('target', jacr=target_jacr)
Ejemplo n.º 21
0
class BaseEnv:
    def __init__(
        self,
        robot_folders,
        robot_dir,
        substeps,
        tol=0.02,
        train=True,
        with_kin=None,
        with_dyn=None,
        multi_goal=False,
    ):
        self.with_kin = with_kin
        self.with_dyn = with_dyn
        self.multi_goal = multi_goal
        self.goal_dim = 3

        if self.with_dyn:
            norm_file = os.path.join(robot_dir, 'stats/dyn_stats.json')
            with open(norm_file, 'r') as f:
                stats = json.load(f)
            self.dyn_mu = np.array(stats['mu']).reshape(-1)
            self.dyn_sigma = np.array(stats['sigma']).reshape(-1)
            self.dyn_min = np.array(stats['min']).reshape(-1)
            self.dyn_max = np.array(stats['max']).reshape(-1)

        self.nsubsteps = substeps
        self.metadata = {}
        self.reward_range = (-np.inf, np.inf)
        self.spec = None
        self.dist_tol = tol
        self.viewer = None

        self.links = [
            'gl0', 'gl1_1', 'gl1_2', 'gl2', 'gl3_1', 'gl3_2', 'gl4', 'gl5_1',
            'gl5_2', 'gl6'
        ]
        self.bodies = [
            "connector_plate_base", "electric_gripper_base",
            "gripper_l_finger", "gripper_l_finger_tip", "gripper_r_finger",
            "gripper_r_finger_tip", "l0", "l1", "l2", "l3", "l4", "l5", "l6"
        ]
        self.site_set = ['j0', 'j1', 'j2', 'j3', 'j4', 'j5', 'j6']
        self.joint_set = self.site_set

        self.robots = []
        for folder in robot_folders:
            self.robots.append(os.path.join(robot_dir, folder))

        self.dir2id = {folder: idx for idx, folder in enumerate(self.robots)}
        self.robot_num = len(self.robots)

        if train:
            self.test_robot_num = min(100, self.robot_num)
            self.train_robot_num = self.robot_num - self.test_robot_num
            self.test_robot_ids = list(
                range(self.train_robot_num, self.robot_num))
            self.train_test_robot_num = min(100, self.train_robot_num)
            self.train_test_robot_ids = list(range(self.train_test_robot_num))
            self.train_test_conditions = self.train_test_robot_num
        else:
            self.test_robot_num = self.robot_num
            self.train_robot_num = self.robot_num - self.test_robot_num
            self.test_robot_ids = list(range(self.robot_num))

        self.test_conditions = self.test_robot_num

        print('Train robots: ', self.train_robot_num)
        print('Test robots: ', self.test_robot_num)
        print('Multi goal:', self.multi_goal)
        self.reset_robot(0)

        self.ob_dim = self.get_obs()[0].size
        print('Ob dim:', self.ob_dim)

        high = np.inf * np.ones(self.ob_dim)
        low = -high
        self.observation_space = spaces.Box(low, high, dtype=np.float32)

        self.ep_reward = 0
        self.ep_len = 0

    def reset(self, robot_id=None):
        raise NotImplementedError

    def step(self, action):
        raise NotImplementedError

    def update_action_space(self):
        actuators = self.sim.model._actuator_name2id.keys()
        valid_joints = [ac for ac in actuators if ac in self.joint_set]
        self.act_dim = len(valid_joints)
        bounds = self.sim.model.actuator_ctrlrange[:self.act_dim]
        self.ctrl_low = np.copy(bounds[:, 0])
        self.ctrl_high = np.copy(bounds[:, 1])
        self.action_space = spaces.Box(self.ctrl_low,
                                       self.ctrl_high,
                                       dtype=np.float32)

    def scale_action(self, action):
        act_k = (self.action_space.high - self.action_space.low) / 2.
        act_b = (self.action_space.high + self.action_space.low) / 2.
        return act_k * action + act_b

    def reset_robot(self, robot_id):
        self.robot_folder_id = self.dir2id[self.robots[robot_id]]
        robot_file = os.path.join(self.robots[robot_id], 'robot.xml')
        self.model = load_model_from_path(robot_file)
        self.sim = MjSim(self.model, nsubsteps=self.nsubsteps)
        self.update_action_space()
        self.sim.reset()
        self.sim.step()
        if self.viewer is not None:
            self.viewer = MjViewer(self.sim)

    def test_reset(self, cond):
        robot_id = self.test_robot_ids[cond]
        return self.reset(robot_id=robot_id)

    def train_test_reset(self, cond):
        robot_id = self.train_test_robot_ids[cond]
        return self.reset(robot_id=robot_id)

    def cal_reward(self, s, goal, a):
        dist = np.linalg.norm(s - goal)

        if dist < self.dist_tol:
            done = True
            reward_dist = 1
        else:
            done = False
            reward_dist = -1
        reward = reward_dist
        reward -= 0.1 * np.square(a).sum()
        return reward, dist, done

    def render(self, mode='human'):
        if self.viewer is None:
            self.viewer = MjViewer(self.sim)
        self.viewer.render()

    def get_obs(self):
        qpos = self.get_qpos(self.sim)
        qvel = self.get_qvel(self.sim)

        ob = np.concatenate([qpos, qvel])
        if self.with_kin:
            link_rela = self.get_xpos_xrot(self.sim)
            ob = np.concatenate([ob, link_rela])
        if self.with_dyn:
            body_mass = self.get_body_mass(self.sim)
            joint_friction = self.get_friction(self.sim)
            joint_damping = self.get_damping(self.sim)
            armature = self.get_armature(self.sim)
            dyn_vec = np.concatenate(
                (body_mass, joint_friction, joint_damping, armature))
            dyn_vec = np.divide((dyn_vec - self.dyn_min),
                                self.dyn_max - self.dyn_min)
            ob = np.concatenate([ob, dyn_vec])
        target_pos = self.sim.data.get_site_xpos('target').flatten()
        ob = np.concatenate([ob, target_pos])
        achieved_goal = self.sim.data.get_site_xpos('ref_pt')
        return ob, achieved_goal

    def get_link_length(self, sim):
        link_length = []
        for link in self.links:
            geom_id = sim.model._geom_name2id[link]
            link_length.append(sim.model.geom_size[geom_id][1])
        return np.asarray(link_length).reshape(-1)

    def get_qpos(self, sim):
        angle_noise_range = 0.02
        qpos = sim.data.qpos[:self.act_dim]
        qpos += np.random.uniform(-angle_noise_range, angle_noise_range,
                                  self.act_dim)
        qpos = np.pad(qpos, (0, 7 - self.act_dim),
                      mode='constant',
                      constant_values=0)
        return qpos.reshape(-1)

    def get_qvel(self, sim):
        velocity_noise_range = 0.02
        qvel = sim.data.qvel[:self.act_dim]
        qvel += np.random.uniform(-velocity_noise_range, velocity_noise_range,
                                  self.act_dim)
        qvel = np.pad(qvel, (0, 7 - self.act_dim),
                      mode='constant',
                      constant_values=0)
        return qvel.reshape(-1)

    def get_xpos_xrot(self, sim):
        xpos = []
        xrot = []
        for joint_id in range(self.act_dim):
            joint = sim.model._actuator_id2name[joint_id]
            if joint == 'j0':
                pos1 = sim.data.get_body_xpos('base_link')
                mat1 = sim.data.get_body_xmat('base_link')
            else:
                prev_id = joint_id - 1
                prev_joint = sim.model._actuator_id2name[prev_id]
                pos1 = sim.data.get_site_xpos(prev_joint)
                mat1 = sim.data.get_site_xmat(prev_joint)
            pos2 = sim.data.get_site_xpos(joint)
            mat2 = sim.data.get_site_xmat(joint)
            relative_pos = pos2 - pos1
            rot_euler = self.relative_rotation(mat1, mat2)
            xpos.append(relative_pos)
            xrot.append(rot_euler)
        xpos = np.array(xpos).flatten()
        xrot = np.array(xrot).flatten()
        xpos = np.pad(xpos, (0, (7 - self.act_dim) * 3),
                      mode='constant',
                      constant_values=0)
        xrot = np.pad(xrot, (0, (7 - self.act_dim) * 3),
                      mode='constant',
                      constant_values=0)
        ref_pt_xpos = self.sim.data.get_site_xpos('ref_pt')
        ref_pt_xmat = self.sim.data.get_site_xmat('ref_pt')
        relative_pos = ref_pt_xpos - pos2
        rot_euler = self.relative_rotation(mat2, ref_pt_xmat)
        xpos = np.concatenate((xpos, relative_pos.flatten()))
        xrot = np.concatenate((xrot, rot_euler.flatten()))
        pos_rot = np.concatenate((xpos, xrot))
        return pos_rot

    def get_damping(self, sim):
        damping = sim.model.dof_damping[:self.act_dim]
        damping = np.pad(damping, (0, 7 - self.act_dim),
                         mode='constant',
                         constant_values=0)
        return damping.reshape(-1)

    def get_friction(self, sim):
        friction = sim.model.dof_frictionloss[:self.act_dim]
        friction = np.pad(friction, (0, 7 - self.act_dim),
                          mode='constant',
                          constant_values=0)
        return friction.reshape(-1)

    def get_body_mass(self, sim):
        body_mass = []
        for body in self.bodies:
            body_id = sim.model._body_name2id[body]
            body_mass.append(sim.model.body_mass[body_id])
        return np.asarray(body_mass).reshape(-1)

    def get_armature(self, sim):
        armature = sim.model.dof_armature[:self.act_dim]
        armature = np.pad(armature, (0, 7 - self.act_dim),
                          mode='constant',
                          constant_values=0)
        return armature.reshape(-1)

    def relative_rotation(self, mat1, mat2):
        # return the euler x,y,z of the relative rotation
        # (w.r.t site1 coordinate system) from site2 to site1
        rela_mat = np.dot(np.linalg.inv(mat1), mat2)
        return rotations.mat2euler(rela_mat)

    def close(self):
        pass
Ejemplo n.º 22
0
class MujocoEnv(gym.Env):
    """Superclass for all MuJoCo environments.
    """

    def __init__(self, model_path, frame_skip):

        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.frame_skip = frame_skip
        self.model = load_model_from_path(fullpath)
        self.sim = MjSim(self.model)
        self.data = self.sim.data
        self._seed()

        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second': int(np.round(1.0 / self.dt))
        }
        self.mujoco_render_frames = False


        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()
        observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
        assert not done
        self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size

        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = spaces.Box(low, high)

        high = np.inf*np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)
        self.sim.forward()



    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # methods to override:
    # ----------------------------

    def reset_model(self):
        """
        Reset the robot degrees of freedom (qpos and qvel).
        Implement this in each subclass.
        """
        raise NotImplementedError

    def mj_viewer_setup(self):
        """
        Due to specifics of new mujoco rendering, the standard viewer cannot be used
        with this set-up. Instead we use this mujoco specific function.
        """
        pass

    def viewer_setup(self):
        """
        Does not work. Use mj_viewer_setup() instead
        """
        pass

    # -----------------------------

    def _reset(self):
        self.sim.reset()
        self.sim.forward()
        ob = self.reset_model()
        return ob

    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
        state = self.sim.get_state()
        for i in range(self.model.nq):
            state.qpos[i] = qpos[i]
        for i in range(self.model.nv):
            state.qvel[i] = qvel[i]
        self.sim.set_state(state)
        self.sim.forward()

    @property
    def dt(self):
        return self.model.opt.timestep * self.frame_skip

    def do_simulation(self, ctrl, n_frames):
        for i in range(self.model.nu):
            self.sim.data.ctrl[i] = ctrl[i]
        for _ in range(n_frames):
            self.sim.step()
            if self.mujoco_render_frames is True:
                self.mj_render()

    def mj_render(self):
        try:
            self.viewer.render()
        except:
            self.mj_viewer_setup()
            self.viewer._run_speed = 0.5
            #self.viewer._run_speed /= self.frame_skip
            self.viewer.render()

    def _get_viewer(self):
        return None

    def state_vector(self):
        state = self.sim.get_state()
        return np.concatenate([
            state.qpos.flat, state.qvel.flat])

    # -----------------------------

    # def visualize_policy(self, policy, horizon=1000, num_episodes=1, mode='exploration'):
    #     self.mujoco_render_frames = True
    #     for ep in range(num_episodes):
    #         o = self.reset()
    #         d = False
    #         t = 0
    #         while t < horizon and d is False:
    #             a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation']
    #             o, r, d, _ = self.step(a)
    #             t = t+1
    #     self.mujoco_render_frames = False

    def visualize_policy(self, policy, horizon=1000, num_episodes=1, mode='exploration'):
        self.mujoco_render_frames = True
        for ep in range(num_episodes):
            o = self.reset()
            d = False
            t = 0
            # horizon = 1
            while t < horizon and d is False:
                if policy.type == 'object':
                    obs_used = o[self.object_start:]
                a = policy.get_action(obs_used)[0] if mode == 'exploration' else policy.get_action(obs_used)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t+1
                # timer.sleep(1)

        self.mujoco_render_frames = False

    def visualize_policy_offscreen(self, policy, horizon=1000,
                                   num_episodes=1,
                                   frame_size=(640,480),
                                   mode='exploration',
                                   save_loc='/tmp/',
                                   filename='newvid',
                                   camera_name=None):
        import skvideo.io
        for ep in range(num_episodes):
            print("Episode %d: rendering offline " % ep, end='', flush=True)
            o = self.reset()
            d = False
            t = 0
            arrs = []
            t0 = timer.time()
            while t < horizon and d is False:
                a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation']
                o, r, d, _ = self.step(a)
                t = t+1
                curr_frame = self.sim.render(width=frame_size[0], height=frame_size[1],
                                             mode='offscreen', camera_name=camera_name, device_id=0)
                arrs.append(curr_frame[::-1,:,:])
                print(t, end=', ', flush=True)
            file_name = save_loc + filename + str(ep) + ".mp4"
            skvideo.io.vwrite( file_name, np.asarray(arrs))
            print("saved", file_name)
            t1 = timer.time()
            print("time taken = %f"% (t1-t0))
Ejemplo n.º 23
0
class Dribble_Env(object):
    def __init__(self):
        self.model = load_model_from_path("./xml/world5.xml")
        self.sim = MjSim(self.model)
        # self.viewer = MyMjViewer(self.sim)
        self.viewer = MyMjViewer(self.sim)
        self.max_vel = [-1000, 1000]
        self.x_motor = 0
        self.y_motor = 0
        self.date_time = datetime.datetime.now()
        self.path = "./datas/path_date" + str(
            self.date_time.strftime("_%Y%m%d_%H%M%S"))
        os.mkdir(self.path)

    def step(self, action):
        self.x_motor = ((action % 3) - 1) * 200
        self.y_motor = ((action // 3) - 1) * 200
        self.sim.data.ctrl[0] = self.x_motor
        self.sim.data.ctrl[1] = self.y_motor
        self.sim.step()

    def get_state(self):
        robot_x, robot_y = self.sim.data.body_xpos[1][0:2]
        robot_xv, robot_yv = self.sim.data.qvel[0:2]
        ball_x, ball_y = self.sim.data.body_xpos[2][0:2]
        ball_xv, ball_yv = self.sim.data.qvel[2:4]
        ball_pos_local = -(robot_x - ball_x), -(robot_y - ball_y)
        object1_x, object1_y = self.sim.data.body_xpos[3][
            0] - robot_x, self.sim.data.body_xpos[3][1] - robot_y
        object2_x, object2_y = self.sim.data.body_xpos[4][
            0] - robot_x, self.sim.data.body_xpos[4][1] - robot_y
        object3_x, object3_y = self.sim.data.body_xpos[5][
            0] - robot_x, self.sim.data.body_xpos[5][1] - robot_y
        object4_x, object4_y = self.sim.data.body_xpos[6][
            0] - robot_x, self.sim.data.body_xpos[6][1] - robot_y

        return [robot_x, robot_y, ball_pos_local[0], ball_pos_local[1], \
                robot_xv, robot_yv, object1_x, object1_y, object2_x, object2_y,\
                object3_x, object3_y, object4_x, object4_y, ball_x, ball_y, ball_xv, ball_yv]

    def check_done(self):
        ball_x, ball_y = self.get_state()[14:16]
        if ball_x > 80 and -25 < ball_y < 25:
            return True
        else:
            return False

    def check_wall(self):
        ball_x, ball_y = self.get_state()[14:16]
        if math.fabs(ball_y) > 51:
            return True
        elif ball_x > 81 and math.fabs(ball_y) > 25:
            return True
        else:
            return False

    def check_avoidaince(self, object_num=4):
        for i in range(object_num):
            if math.fabs(self.sim.data.qvel[5 + i * 3]) > 0.1 or math.fabs(
                    self.sim.data.qvel[6 + 3 * i]) > 0.1:
                return True
        return False

    def reset(self):
        self.x_motor = 0
        self.y_motor = 0

        self.robot_x_data = []
        self.robot_y_data = []
        self.ball_x_data = []
        self.ball_y_data = []
        self.sim.reset()
        # self.sim.data.qpos[0] = np.random.randint(-3,3)
        self.sim.data.qpos[1] = np.random.randint(-5, 5)

    def render(self):
        self.viewer.render()

    def plot_data(self, step, t, done, episode, flag, reward):
        self.field_x = [-90, -90, 90, 90, -90]
        self.field_y = [-60, 60, 60, -60, -60]
        self.robot_x_data.append(self.sim.data.body_xpos[1][0])
        self.robot_y_data.append(self.sim.data.body_xpos[1][1])
        self.ball_x_data.append(self.sim.data.body_xpos[2][0])
        self.ball_y_data.append(self.sim.data.body_xpos[2][1])

        datas = str(self.robot_x_data[-1]) + " " + str(
            self.robot_y_data[-1]) + " " + str(
                self.ball_x_data[-1]) + " " + str(
                    self.ball_y_data[-1]) + " " + str(reward)
        with open(
                self.path + '/plotdata_' + str(episode + 1).zfill(4) + '.txt',
                'a') as f:
            f.write(str(datas) + '\n')

        if (t >= step - 1 or done) and flag:
            fig1 = plt.figure()
            plt.ion()
            plt.show()
            plt.plot(self.ball_x_data,
                     self.ball_y_data,
                     marker='o',
                     markersize=2,
                     color="red",
                     label="ball")
            plt.plot(self.robot_x_data,
                     self.robot_y_data,
                     marker="o",
                     markersize=2,
                     color='blue',
                     label="robot")
            plt.plot(self.sim.data.body_xpos[3][0],
                     self.sim.data.body_xpos[3][1],
                     marker="o",
                     markersize=8,
                     color='black')
            plt.plot(self.sim.data.body_xpos[4][0],
                     self.sim.data.body_xpos[4][1],
                     marker="o",
                     markersize=8,
                     color='black')
            plt.plot(self.sim.data.body_xpos[5][0],
                     self.sim.data.body_xpos[5][1],
                     marker="o",
                     markersize=8,
                     color='black')
            plt.plot(self.sim.data.body_xpos[6][0],
                     self.sim.data.body_xpos[6][1],
                     marker="o",
                     markersize=8,
                     color='black')
            plt.plot(self.field_x, self.field_y, markersize=1, color="black")
            plt.plot(80, 0, marker="X", color="green", label="goal")
            plt.legend(loc="lower right")
            plt.draw()
            plt.pause(0.001)
            plt.close(1)
    def train_POMDP(self):
        args = self.args
        ROOT_DIR = os.path.dirname(os.path.dirname(
            os.path.abspath(__file__)))  # corl2019
        PARENT_DIR = os.path.dirname(ROOT_DIR)  # reserach
        # Create the output directory if it does not exist
        output_dir = os.path.join(PARENT_DIR, 'multistep_pomdp',
                                  args.output_dir)
        if not os.path.isdir(output_dir):
            os.makedirs(output_dir)

        # write args to file
        with open(os.path.join(output_dir, 'args.txt'), 'w+') as f:
            json.dump(args.__dict__, f, indent=2)
        f.close()

        # Create our policy net and a target net
        self.policy_net_1 = DRQN(args.indim, args.outdim).to(args.device)
        self.policy_net_2 = DRQN(args.indim, args.outdim).to(args.device)
        self.policy_net_3 = DRQN(args.indim, args.outdim).to(args.device)

        # Set up the optimizer
        self.optimizer_1 = optim.RMSprop(self.policy_net_1.parameters())
        self.optimizer_2 = optim.RMSprop(self.policy_net_2.parameters())
        self.optimizer_3 = optim.RMSprop(self.policy_net_3.parameters())
        self.memory = RecurrentMemory(800)
        self.steps_done = 0

        # Setup the state normalizer
        normalizer = Multimodal_Normalizer(num_inputs=args.indim,
                                           device=args.device)

        print_variables = {'durations': [], 'rewards': [], 'loss': []}
        start_episode = 0
        if args.checkpoint_file:
            if os.path.exists(args.checkpoint_file):
                checkpoint = torch.load(args.checkpoint_file)
                self.policy_net_1.load_state_dict(checkpoint['policy_net_1'])
                self.policy_net_2.load_state_dict(checkpoint['policy_net_2'])
                self.policy_net_3.load_state_dict(checkpoint['policy_net_3'])
                self.optimizer_1.load_state_dict(checkpoint['optimizer_1'])
                self.optimizer_2.load_state_dict(checkpoint['optimizer_2'])
                self.optimizer_3.load_state_dict(checkpoint['optimizer_3'])
                start_episode = checkpoint['epochs']
                self.steps_done = checkpoint['steps_done']
                with open(
                        os.path.join(os.path.dirname(args.checkpoint_file),
                                     'results_pomdp.pkl'), 'rb') as file:
                    plot_dict = pickle.load(file)
                    print_variables['durations'] = plot_dict['durations']
                    print_variables['rewards'] = plot_dict['rewards']

        if args.normalizer_file:
            if os.path.exists(args.normalizer_file):
                normalizer.restore_state(args.normalizer_file)

        if args.memory:
            if os.path.exists(args.memory):
                self.memory.load(args.memory)

        action_space = ActionSpace(dp=0.06, df=10)

        # Create robot, reset simulation and grasp handle
        model = load_model_from_path(args.model_path)
        sim = MjSim(model)
        sim_param = SimParameter(sim)
        sim.step()
        if args.render:
            viewer = MjViewer(sim)
        else:
            viewer = None

        robot = RobotSim(sim, viewer, sim_param, args.render,
                         self.break_threshold)

        # Main training loop
        for ii in range(start_episode, args.epochs):
            start_time = time.time()
            self.steps_done += 1
            act_sequence = []
            if args.sim:
                sim_params = init_model(robot.mj_sim)
                robot.reset_simulation()
                ret = robot.grasp_handle()
                if not ret:
                    continue

                # Local memory for current episode
                localMemory = []

                # Get current observation
                hidden_state_1, cell_state_1 = self.policy_net_1.init_hidden_states(
                    batch_size=1, device=args.device)
                hidden_state_2, cell_state_2 = self.policy_net_2.init_hidden_states(
                    batch_size=1, device=args.device)
                hidden_state_3, cell_state_3 = self.policy_net_3.init_hidden_states(
                    batch_size=1, device=args.device)
                observation_space = TactileObs(
                    robot.get_gripper_xpos(),  # 24
                    robot.get_all_touch_buffer(args.hap_sample))  # 30 x 7
                broken_so_far = 0

            for t in count():
                if not args.quiet and t % 50 == 0:
                    print("Running training episode: {}, iteration: {}".format(
                        ii, t))

                # Select action
                observation = observation_space.get_state()
                if args.position:
                    observation = observation[6:]
                if args.shear:
                    indices = np.ones(len(observation), dtype=bool)
                    indices[6:166] = False
                    observation = observation[indices]
                if args.force:
                    observation = observation[:166]
                normalizer.observe(observation)
                observation = normalizer.normalize(observation)
                action, hidden_state_1, cell_state_1 = self.select_action(
                    observation, hidden_state_1, cell_state_1)

                # record actions in this epoch
                act_sequence.append(action)

                # Perform action
                delta = action_space.get_action(
                    self.ACTIONS[action])['delta'][:3]
                target_position = np.add(robot.get_gripper_jpos()[:3],
                                         np.array(delta))
                target_pose = np.hstack(
                    (target_position, robot.get_gripper_jpos()[3:]))

                if args.sim:
                    robot.move_joint(target_pose,
                                     True,
                                     self.gripping_force,
                                     hap_sample=args.hap_sample)

                    # Get reward
                    done, num = robot.update_tendons()
                    failure = robot.check_slippage()
                    if num > broken_so_far:
                        reward = num - broken_so_far
                        broken_so_far = num
                    else:
                        reward = 0

                    # # Add a movement reward
                    # reward -= 0.05 * np.linalg.norm(target_position - robot.get_gripper_jpos()[:3]) / np.linalg.norm(delta)

                    # Observe new state
                    observation_space.update(
                        robot.get_gripper_xpos(),  # 24
                        robot.get_all_touch_buffer(args.hap_sample))  # 30x7

                # Set max number of iterations
                if t >= self.max_iter:
                    done = True

                # Check if done
                if not done and not failure:
                    next_state = observation_space.get_state()
                    if args.position:
                        next_state = next_state[6:]
                    if args.shear:
                        indices = np.ones(len(next_state), dtype=bool)
                        indices[6:166] = False
                        next_state = next_state[indices]
                    if args.force:
                        next_state = next_state[:166]
                    normalizer.observe(next_state)
                    next_state = normalizer.normalize(next_state)
                else:
                    next_state = None

                # Push new Transition into memory
                localMemory.append(
                    Transition(observation, action, next_state, reward))

                # Optimize the model
                if t % 10 == 0:
                    loss = self.optimize_model()
        #        if loss:
        #            print_variables['loss'].append(loss.item())

        # If we are done, reset the model
                if done or failure:
                    self.memory.push(localMemory)
                    if failure:
                        print_variables['durations'].append(self.max_iter)
                    else:
                        print_variables['durations'].append(t)
                    print_variables['rewards'].append(broken_so_far)
                    plot_variables(self.figure, print_variables,
                                   "Training POMDP")
                    print("Model parameters: {}".format(sim_params))
                    print("Actions in this epoch are: {}".format(act_sequence))
                    print("Epoch {} took {}s, total number broken: {}\n\n".
                          format(ii,
                                 time.time() - start_time, broken_so_far))

                    break

            # Save checkpoints every vew iterations
            if ii % args.save_freq == 0:
                save_path = os.path.join(
                    output_dir, 'checkpoint_model_' + str(ii) + '.pth')
                torch.save(
                    {
                        'epochs': ii,
                        'steps_done': self.steps_done,
                        'policy_net_1': self.policy_net_1.state_dict(),
                        'policy_net_2': self.policy_net_2.state_dict(),
                        'policy_net_3': self.policy_net_3.state_dict(),
                        'optimizer_1': self.optimizer_1.state_dict(),
                        'optimizer_2': self.optimizer_2.state_dict(),
                        'optimizer_3': self.optimizer_3.state_dict(),
                    }, save_path)

            self.memory.save_memory(os.path.join(output_dir, 'memory.pickle'))

        if args.savefig_path:
            now = dt.datetime.now()
            self.figure[0].savefig(
                args.savefig_path +
                '{}_{}_{}.png'.format(now.month, now.day, now.hour),
                format='png')

        print('Training done')
        plt.show()
        return print_variables
Ejemplo n.º 25
0
def test_mj_sim_buffers():
    model = load_model_from_xml(BASIC_MODEL_XML)

    # test no callback
    sim = MjSim(model, nsubsteps=2)
    assert(sim.udd_state == {})

    sim.step()
    assert(sim.udd_state == {})

    # test with callback
    foo = 10
    d = {"foo": foo,
         "foo_2": np.array([foo, foo])}

    def udd_callback(sim):
        return d

    sim = MjSim(model, nsubsteps=2, udd_callback=udd_callback)

    assert(sim.udd_state is not None)
    assert(sim.udd_state["foo"] == foo)
    assert(sim.udd_state["foo_2"].shape[0] == 2)
    assert(sim.udd_state["foo_2"][0] == foo)

    foo = 11
    d = {"foo": foo,
         "foo_2": np.array([foo, foo])}
    sim.step()
    assert(sim.udd_state is not None)
    assert(sim.udd_state["foo"] == foo)
    assert(sim.udd_state["foo_2"][0] == foo)

    d = {}
    with pytest.raises(AssertionError):
        sim.step()

    d = {"foo": foo,
         "foo_2": np.array([foo, foo]),
         "foo_3": foo}
    with pytest.raises(AssertionError):
        sim.step()

    d = {"foo": foo,
         "foo_2": np.array([foo, foo, foo])}
    with pytest.raises(AssertionError):
        sim.step()

    d = {"foo": "haha",
         "foo_2": np.array([foo, foo, foo])}
    with pytest.raises(AssertionError):
        sim.step()
Ejemplo n.º 26
0
class BlockPickAndPlaceEnv:
    def __init__(self,
                 num_objects,
                 num_colors,
                 img_dim,
                 include_z,
                 random_initialize=False,
                 view=False):
        self.asset_path = os.path.join(
            os.path.dirname(os.path.abspath(__file__)), '../mujoco_data/stl/')
        self.img_dim = img_dim
        self.include_z = include_z
        self.polygons = ['cube', 'horizontal_rectangle', 'tetrahedron'][:1]
        self.num_colors = num_colors
        self.num_objects = num_objects
        self.view = view

        # Hyper-parameters
        self.internal_steps_per_step = 2000
        self.drop_height = 5
        self.pick_height = 0.59
        self.bounds = {
            'x_min': -2.5,
            'x_max': 2.5,
            'y_min': 1.0,
            'y_max': 4.0,
            'z_min': 0.05,
            'z_max': 2.2
        }
        self.TOLERANCE = 0.2

        self.names = []
        self.blocks = []

        if random_initialize:
            self.reset()

    #### Env initialization functions
    def get_unique_name(self, polygon):
        i = 0
        while '{}_{}'.format(polygon, i) in self.names:
            i += 1
        name = '{}_{}'.format(polygon, i)
        self.names.append(name)
        return name

    def add_mesh(self, polygon, pos, quat, rgba):
        name = self.get_unique_name(polygon)
        self.blocks.append({
            'name': name,
            'polygon': polygon,
            'pos': np.array(pos),
            'quat': np.array(quat),
            'rgba': rgba,
            'material': name
        })

    def get_asset_material_str(self):
        asset_base = '<material name="{}" rgba="{}" specular="0" shininess="0" emission="0.25"/>'
        asset_list = [
            asset_base.format(a['name'], self.convert_to_str(a['rgba']))
            for a in self.blocks
        ]
        asset_str = '\n'.join(asset_list)
        return asset_str

    def get_asset_mesh_str(self):
        asset_base = '<mesh name="{}" scale="0.6 0.6 0.6" file="{}"/>'
        asset_list = [
            asset_base.format(
                a['name'], os.path.join(self.asset_path,
                                        a['polygon'] + '.stl'))
            for a in self.blocks
        ]
        asset_str = '\n'.join(asset_list)
        return asset_str

    def get_body_str(self):
        body_base = '''
          <body name='{}' pos='{}' quat='{}'>
            <joint type='free' name='{}'/>
            <geom name='{}' type='mesh' mesh='{}' pos='0 0 0' quat='1 0 0 0' material='{}' 
            condim='3' friction='1 1 1' solimp="0.998 0.998 0.001" solref="0.02 1"/>
          </body>
        '''
        body_list = [
            body_base.format(m['name'], self.convert_to_str(m['pos']),
                             self.convert_to_str(m['quat']), m['name'],
                             m['name'], m['name'], m['material'])
            for i, m in enumerate(self.blocks)
        ]
        body_str = '\n'.join(body_list)
        return body_str

    def convert_to_str(self, an_iterable):
        tmp = ""
        for an_item in an_iterable:
            tmp += str(an_item) + " "
        return tmp[:-1]

    def get_random_pos(self, height=None):
        x = np.random.uniform(self.bounds['x_min'], self.bounds['x_max'])
        y = np.random.uniform(self.bounds['y_min'], self.bounds['y_max'])
        if height is None:
            z = np.random.uniform(self.bounds['z_min'], self.bounds['z_max'])
        else:
            z = height
        return np.array([x, y, z])

    def get_random_rbga(self, num_colors):
        rgb = list(pickRandomColor(num_colors))
        return rgb + [1]

    def initialize(self, use_cur_pos):
        tmp = MODEL_XML_BASE.format(self.get_asset_mesh_str(),
                                    self.get_asset_material_str(),
                                    self.get_body_str())
        model = load_model_from_xml(tmp)
        self.sim = MjSim(model)
        if self.view:
            self.viewer = MjViewer(self.sim)
        else:
            self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1)

        self._get_starting_step(use_cur_pos)

    def _get_starting_step(self, use_cur_pos):
        prev_positions = {}
        for i, aname in enumerate(self.names):
            if use_cur_pos:
                prev_positions[aname] = self.get_block_info(aname)["pos"]
            self.add_block(aname, [-5 + i, -5 + i, -5])

        for aname in self.names:
            if use_cur_pos:
                tmp_pos = prev_positions[aname]
            else:
                tmp_pos = self.get_random_pos(self.drop_height)
            self.add_block(aname, tmp_pos)
            for i in range(self.internal_steps_per_step):
                self.internal_step()
                if self.view:
                    self.viewer.render()

    #### Env internal step functions
    def add_block(self, ablock, pos):
        #pos (x,y,z)
        self.set_block_info(ablock, {"pos": pos})

    def pick_block(self, pos):
        block_name = None
        for a_block in self.names:
            if self.intersect(a_block, pos):
                block_name = a_block

        if block_name is None:
            return False
        return block_name

    def intersect(self, a_block, pos):
        cur_pos = self.get_block_info(a_block)["pos"]
        return np.max(np.abs(cur_pos - pos)) < self.TOLERANCE

    def get_block_info(self, a_block):
        info = {}
        info["poly"] = a_block[:-2]
        info["pos"] = np.copy(self.sim.data.get_body_xpos(a_block))  #np array
        info["quat"] = np.copy(self.sim.data.get_body_xquat(a_block))
        info["vel"] = np.copy(self.sim.data.get_body_xvelp(a_block))
        info["rot_vel"] = np.copy(self.sim.data.get_body_xvelr(a_block))
        return info

    def set_block_info(self, a_block, info):
        # print(a_block, info)
        # print("Setting state: {}, {}".format(a_block, info))
        sim_state = self.sim.get_state()
        start_ind = self.sim.model.get_joint_qpos_addr(a_block)[0]
        if "pos" in info:
            sim_state.qpos[start_ind:start_ind + 3] = np.array(info["pos"])
        if "quat" in info:
            sim_state.qpos[start_ind + 3:start_ind + 7] = info["quat"]
        else:
            sim_state.qpos[start_ind + 3:start_ind + 7] = np.array(
                [1, 0, 0, 0])

        start_ind = self.sim.model.get_joint_qvel_addr(a_block)[0]
        if "vel" in info:
            sim_state.qvel[start_ind:start_ind + 3] = info["vel"]
        else:
            sim_state.qvel[start_ind:start_ind + 3] = np.zeros(3)
        if "rot_vel" in info:
            sim_state.qvel[start_ind + 3:start_ind + 6] = info["rot_vel"]
        else:
            sim_state.qvel[start_ind + 3:start_ind + 6] = np.zeros(3)
        self.sim.set_state(sim_state)

    def internal_step(self, action=None):
        ablock = False
        if action is None:
            self.sim.forward()
            self.sim.step()
        else:
            pick_place = action[:3]
            drop_place = action[3:]

            ablock = self.pick_block(pick_place)
            if (ablock):
                # print("Dropping: {} {}".format(ablock, drop_place))
                self.add_block(ablock, drop_place)
        # self.sim_state = self.sim.get_state()
        return ablock

    #### Env external step functions
    # Input: action (4) or (6)
    # Output: resultant observation after taking the action
    def step(self, action):
        action = self._pre_process_actions(action)
        ablock = self.internal_step(action)
        # print(self.get_env_info())
        #if ablock:
        for i in range(self.internal_steps_per_step):
            self.sim.forward()
            self.sim.step()
            # self.internal_step()
            if self.view:
                self.viewer.render()

        # self.give_down_vel()
        # for i in range(200):
        #     self.sim.forward()
        #     self.sim.step()
        # self.sim_state = self.sim.get_state()

        # for aname in self.names: #This looks incorrect TODO: CHECK THIS
        #     self.add_block(aname, self.get_block_info(aname)["pos"])
        return self.get_observation()

    # Input: action can either be (A) or (T, A) where we want to execute T actions in a row
    # Output: Single obs
    def try_step(self, actions):
        tmp = self.get_env_info()
        # cur_state = copy.deepcopy(self.sim.get_state())
        if len(actions.shape) == 1:
            self.step(actions)
        elif len(actions.shape) == 2:
            for action in actions:
                self.step(action)
        else:
            raise KeyError("Wrong shape for actions: {}".format(actions.shape))
        obs = self.get_observation()
        # self.sim.set_state(cur_state)
        self.set_env_info(tmp)
        return obs

    # Input: actions (B,A)
    # Output: Largest manhattan distance component to closest block (B)
    def get_action_error(self, actions):
        vals = np.ones(actions.shape[0]) * 10000
        for i, an_action in enumerate(actions):
            for a_block in self.names:
                vals[i] = min(
                    np.max(
                        np.abs(
                            self.get_block_info(a_block)["pos"][:2] -
                            an_action[:2])), vals[i])
        return vals

    # Resets the environment
    def reset(self):
        self.names = []
        self.blocks = []
        quat = [1, 0, 0, 0]
        for i in range(self.num_objects):
            poly = np.random.choice(self.polygons)
            pos = self.get_random_pos()
            pos[-2] += -2 * (i + 1)
            self.add_mesh(poly, pos, quat,
                          self.get_random_rbga(self.num_colors))
        self.initialize(False)
        return self.get_observation()

    # Returns an observation
    def get_observation(self):
        img = self.sim.render(
            self.img_dim, self.img_dim, camera_name="fixed"
        )  # img is upside down, values btwn 0-255 (D,D,3)
        img = img[::-1, :, :]  # flips image right side up (D,D,3)
        return np.ascontiguousarray(img)  # values btwn 0-255 (D,D,3)

    def get_obs_size(self):
        return [self.img_dim, self.img_dim]

    def get_actions_size(self):
        if self.include_z:
            return [6]
        else:
            return [4]

    # Inputs: actions (*,6)
    # Outputs: (*,6) if including z, (*,4) if not
    def _post_process_actions(self, actions):
        if self.include_z:
            return actions
        else:
            return np.array(actions)[..., [0, 1, 3, 4]]

    # Inputs: actions (*,4), or (*,6)
    # Outputs: actions (*,6)
    def _pre_process_actions(self, actions):
        if actions.shape[-1] == 6:
            return actions

        full_actions = np.zeros(list(actions.shape)[:-1] + [6])  # (*,6)
        full_actions[..., [0, 1, 3, 4]] = actions
        full_actions[..., 2] = self.pick_height
        full_actions[..., 5] = self.drop_height
        return full_actions

    # Inputs: None
    # Outputs: Returns name of picked block
    #   If self.include z: Pick any random block
    #   Else: Picks a random block which can be picked up with the z pick set to self.pick_height
    def _get_rand_block_byz(self):
        if len(self.names) == 0:
            raise KeyError("No blocks in _get_rand_block_byz()!")
        if self.include_z:
            aname = np.random.choice(self.names)
        else:
            z_lim = self.pick_height
            tmp = [
                aname for aname in self.names
                if abs(self.get_block_info(aname)["pos"][2] -
                       z_lim) < self.TOLERANCE
            ]
            aname = np.random.choice(tmp)
        return aname

    # Input: action_type
    # Output: Single action either (6) or (4)
    def sample_action(self,
                      action_type=None,
                      pick_noise_ratio=0.0,
                      place_noise_ratio=0.0):
        if len(self.names) == 1 and action_type == 'place_block':
            action_type = None

        if action_type == 'pick_block':  #pick block, place randomly
            # aname = np.random.choice(self.names)
            aname = self._get_rand_block_byz()
            pick = self.get_block_info(aname)["pos"]
            place = self.get_random_pos()
        elif action_type == 'place_block':  #pick block, place on top of existing block
            # aname = np.random.choice(self.names)
            aname = self._get_rand_block_byz()
            pick = self.get_block_info(
                aname
            )["pos"]  #+ np.random.uniform(-self.TOLERANCE, self.TOLERANCE, size=6) * 0.5
            names = copy.deepcopy(self.names)
            names.remove(aname)
            aname = np.random.choice(names)
            place = self.get_block_info(
                aname
            )["pos"]  #+ np.random.uniform(-self.TOLERANCE, self.TOLERANCE, size=6)
            place[2] += 3  # Each block is roughly 1 unit wide
        elif action_type == 'remove_block':
            aname = self._get_rand_block_byz()
            pick = self.get_block_info(aname)["pos"]  # + np.random.randn(3)/50
            place = [
                0, 0, -5
            ]  # Place the block under the ground to remove it from scene
        # elif action_type == "noisy_pick":
        #     aname = self._get_rand_block_byz()
        #     pick = self.get_block_info(aname)["pos"] #+ np.random.uniform(-self.TOLERANCE, self.TOLERANCE, size=6) * 0.5
        #     place = self.get_random_pos()
        # elif action_type == "noisy_miss":
        #     aname = self._get_rand_block_byz()
        #     pick = self.get_block_info(aname)["pos"] #+ np.random.uniform(-self.TOLERANCE, self.TOLERANCE, size=6) * 5
        #     place = self.get_random_pos()
        elif action_type is None:
            if self.include_z:
                pick = self.get_random_pos()
                place = self.get_random_pos()
            else:
                pick = self.get_random_pos(self.pick_height)
                place = self.get_random_pos(self.drop_height)
        else:
            raise KeyError("Wrong input action_type!")
        ac = np.array(list(pick) + list(place))

        if pick_noise_ratio:
            ac[:3] += np.random.uniform(
                -self.TOLERANCE, self.TOLERANCE, size=3) * pick_noise_ratio
        else:
            ac[3:] += np.random.uniform(
                -self.TOLERANCE, self.TOLERANCE, size=3) * place_noise_ratio
        return self._post_process_actions(ac)

    # Input: mean (*), std (*), num_actions
    # Output: actions (num_actions, *)
    def sample_multiple_action_gaussian(self, mean, std, num_actions):
        # return np.stack([self.sample_action_gaussian(mean, std) for _ in range(num_actions)])
        ans = np.random.normal(mean,
                               std,
                               size=[num_actions] + list(mean.shape))

        ## Clip actions to stay in bounds
        if not self.include_z:
            ans[..., 0] = np.clip(ans[..., 0], self.bounds['x_min'],
                                  self.bounds['x_max'])
            ans[..., 1] = np.clip(ans[..., 1], self.bounds['y_min'],
                                  self.bounds['y_max'])
            ans[..., 2] = np.clip(ans[..., 2], self.bounds['x_min'],
                                  self.bounds['x_max'])
            ans[..., 3] = np.clip(ans[..., 3], self.bounds['y_min'],
                                  self.bounds['y_max'])
        else:
            ans[..., 0] = np.clip(ans[..., 0], self.bounds['x_min'],
                                  self.bounds['x_max'])
            ans[..., 1] = np.clip(ans[..., 1], self.bounds['y_min'],
                                  self.bounds['y_max'])
            ans[..., 2] = np.clip(ans[..., 2], self.bounds['z_min'],
                                  self.bounds['z_max'])
            ans[..., 3] = np.clip(ans[..., 3], self.bounds['x_min'],
                                  self.bounds['x_max'])
            ans[..., 4] = np.clip(ans[..., 4], self.bounds['y_min'],
                                  self.bounds['y_max'])
            ans[..., 5] = np.clip(ans[..., 5], self.bounds['z_min'],
                                  self.bounds['z_max'])
        return ans

    # Input: mean (*, 2/3), std (*, 2/3), num_actions
    # Output: actions (num_actions, *, 2/3)
    def sample_multiple_place_locs_gaussian(self, mean, std, num_actions):
        ans = np.random.normal(mean,
                               std,
                               size=[num_actions] + list(mean.shape))

        ## Clip actions to stay in bounds
        if not self.include_z:
            ans[..., 0] = np.clip(ans[..., 0], self.bounds['x_min'],
                                  self.bounds['x_max'])
            ans[..., 1] = np.clip(ans[..., 1], self.bounds['y_min'],
                                  self.bounds['y_max'])
        else:
            ans[..., 0] = np.clip(ans[..., 0], self.bounds['x_min'],
                                  self.bounds['x_max'])
            ans[..., 1] = np.clip(ans[..., 1], self.bounds['y_min'],
                                  self.bounds['y_max'])
            ans[..., 2] = np.clip(ans[..., 2], self.bounds['z_min'],
                                  self.bounds['z_max'])
        return ans

    # # Input: mean (*), std (*)
    # # Output: actions (*)
    # def sample_multiple_action_gaussian(self, mean, std, num_samples):
    #     #mean and std should be (T, A)
    #     random_a = np.random.normal(mean, std, [num_samples] + list(mean.shape))
    #     # set pick height
    #     random_a[:, :, 2] = 0.6
    #     # set place height
    #     random_a[:, :, 5] = self.drop_height
    #     return random_a

    def move_blocks_side(self):
        # Move blocks to either side
        z = self.drop_height
        side_pos = [[-2.2, 1.5, z], [2.2, 1.5, z], [-2.2, 3.5, z],
                    [2.2, 3.5, z]]
        # self.bounds = {'x_min':-2.5, 'x_max':2.5, 'y_min': 1.0, 'y_max' :4.0, 'z_min':0.05, 'z_max'2.2}
        place_lst = []
        for i, block in enumerate(self.names):
            place = copy.deepcopy(self.get_block_info(block)["pos"])
            place[-1] = self.drop_height
            self.add_block(block, side_pos[i])
            place_lst.append(place)
            #true_actions.append(side_pos[i] + list(place)) #Note pick & places z's might be
            # slightly
            #  off
        # sort by place height so place lowest block first

        for i in range(self.internal_steps_per_step):
            self.internal_step()
            if self.view:
                self.viewer.render()
        true_actions = []
        for i, block in enumerate(self.names):
            pick = self.get_block_info(block)["pos"]
            pick[-1] = 0.6
            place = place_lst[i]
            true_actions.append(np.concatenate([pick, place]))

        sorted(true_actions, key=lambda x: x[5])
        # print(true_actions)

        return self._post_process_actions(true_actions)

    def create_tower_shape(self):
        def get_valid_width_pos(width):
            num_pos = len(self.heights)
            possible = []
            for i in range(num_pos):
                valid = True
                for k in range(max(i - width, 0), min(i + width + 1, num_pos)):
                    if self.types[k] == "tetrahedron":
                        valid = False
                        break
                    if self.heights[i] < self.heights[k]:
                        valid = False
                        break
                    if self.heights[i] >= 3:
                        valid = False
                        break
                if valid:
                    possible.append(i)
            return possible

        def get_drop_pos(index):
            delta_x = 1
            y_val = 3
            left_most_x = -2.5
            return [left_most_x + index * delta_x, y_val, 4]

        self.names = []
        self.blocks = []

        self.heights = [0, 0, 0, 0, 0]
        self.types = [None] * 5
        self.check_clear_width = {
            'cube': 1,
            'horizontal_rectangle': 1,
            'tetrahedron': 1
        }
        self.add_height_width = {
            'cube': 0,
            'horizontal_rectangle': 1,
            'tetrahedron': 0
        }

        tmp_polygons = copy.deepcopy(
            self.polygons
        )  #['cube', 'horizontal_rectangle', 'tetrahedron'][:2]

        quat = [1, 0, 0, 0]
        colors = []
        for i in range(self.num_objects):
            poly = np.random.choice(tmp_polygons)
            tmp = get_valid_width_pos(self.check_clear_width[poly])
            if len(tmp) == 0:
                tmp_polygons.remove(poly)
                if len(tmp_polygons) == 0:
                    # print("DONE!")
                    break
                else:
                    continue

            tmp_polygons = copy.deepcopy(self.polygons)
            ind = np.random.choice(tmp)
            # print(poly, tmp, ind)
            self.update_tower_info(ind, poly)
            tmp_pos = get_drop_pos(ind)
            while True:
                color = self.get_random_rbga(self.num_colors)
                # if len(colors) > 0:
                #    import pdb; pdb.set_trace()
                if len(colors) == 0 or np.linalg.norm(
                        color[:3] - np.array(colors)[:, :3]).min(0) > 0.3:
                    break
            colors.append(color)
            self.add_mesh(poly, tmp_pos, quat, color)
        self.num_objects = len(self.names)
        self.initialize(True)

    def update_tower_info(self, ind, poly):
        self.types[ind] = poly
        width = self.add_height_width[poly]
        new_height = self.heights[ind] + 1
        for i in range(max(ind - width, 0),
                       min(ind + width + 1, len(self.heights))):
            self.heights[i] = new_height

        for i in range(1, 4):
            if self.heights[i - 1] == self.heights[
                    i + 1] and new_height == self.heights[i - 1]:
                self.heights[i] = self.heights[i - 1]

    def get_env_info(self):
        env_info = {}
        env_info["names"] = copy.deepcopy(self.names)
        env_info["blocks"] = copy.deepcopy(self.blocks)
        for i, aname in enumerate(self.names):
            info = self.get_block_info(aname)
            env_info["blocks"][i]["pos"] = copy.deepcopy(info["pos"])
            env_info["blocks"][i]["quat"] = copy.deepcopy(info["quat"])
        return env_info

    def set_env_info(self, env_info):
        self.names = env_info["names"]
        self.blocks = env_info["blocks"]
        self.initialize(True)

    # Output: If check_all_in_bounds is false, return actions(N,3)
    #  Else return true if all the boxes are in bounds, false otherwise
    def get_block_locs(self, check_all_in_bounds=False):
        ans = []
        for a_block in self.names:
            ans.append(self.get_block_info(a_block)["pos"])  # (3)
        ans = np.array(ans)  # (Num_blocks,3)
        if check_all_in_bounds:
            x_max = np.max(ans[:, 0])
            x_min = np.min(ans[:, 0])
            y_max = np.max(ans[:, 1])
            y_min = np.min(ans[:, 1])
            if x_max > self.bounds['x_max'] or x_min < self.bounds['x_min'] or y_max > self.bounds['y_max'] or \
                    y_min < self.bounds['y_min']:
                return False
            else:
                return True
        return ans

    # Input: Computes accuracy (#blocks correct/total #) of the current environment given the true data and threshold
    def compute_accuracy(self, true_data, threshold=0.2):
        mjc_data = copy.deepcopy(true_data)

        max_err = -float('inf')
        data = self.get_env_info()

        correct = 0
        for pred_datum in data['blocks']:
            err, mjc_match, err_pos, err_rgb = self._best_obj_match(
                pred_datum, mjc_data['blocks'])
            # del mjc_data[mjc_match]

            # print(err)
            if err > max_err:
                max_err = err
                max_pos = err_pos
                max_rgb = err_rgb

            if len(mjc_data) == 0:
                break

            if err < threshold:
                correct += 1

        correct /= float(len(data['blocks']))
        return correct

    def _best_obj_match(self, pred, targs):
        def np_mse(x1, x2):
            return np.square(x1 - x2).mean()

        pos = pred['pos']
        rgb = pred['rgba']

        best_err = float('inf')
        for obj_data in targs:
            obj_name = obj_data['name']
            obj_pos = obj_data['pos']
            obj_rgb = obj_data['rgba']

            pos_err = np_mse(pos, obj_pos)
            rgb_err = np_mse(np.array(rgb), np.array(obj_rgb))
            err = pos_err + rgb_err

            if err < best_err:
                best_err = err
                best_obj = obj_name
                best_pos = pos_err
                best_rgb = rgb_err

        return best_err, best_obj, best_pos, best_rgb