Example #1
0
    def _ParseCameraConfig(self, camera_config):
        # extract serial number
        serial_no = camera_config["serial_no"]

        # construct the transformation matrix
        world_transform = camera_config["world_transform"]
        X_WCamera = tf.euler_matrix(world_transform["roll"],
                                    world_transform["pitch"],
                                    world_transform["yaw"])
        X_WCamera[:3, 3] = \
            [world_transform["x"], world_transform["y"], world_transform["z"]]

        # construct the transformation matrix
        internal_transform = camera_config["internal_transform"]
        X_CameraDepth = tf.euler_matrix(internal_transform["roll"],
                                        internal_transform["pitch"],
                                        internal_transform["yaw"])
        X_CameraDepth[:3, 3] = ([internal_transform["x"],
                                 internal_transform["y"],
                                 internal_transform["z"]])

        # construct the camera info
        camera_info_data = camera_config["camera_info"]
        if "fov_y" in camera_info_data:
            camera_info = CameraInfo(camera_info_data["width"],
                                     camera_info_data["height"],
                                     camera_info_data["fov_y"])
        else:
            camera_info = CameraInfo(
                camera_info_data["width"], camera_info_data["height"],
                camera_info_data["focal_x"], camera_info_data["focal_y"],
                camera_info_data["center_x"], camera_info_data["center_y"])

        return serial_no, X_WCamera, X_CameraDepth, camera_info
Example #2
0
 def test_depth_image_to_point_cloud_api(self):
     camera_info = CameraInfo(width=640, height=480, fov_y=np.pi / 4)
     dut = mut.DepthImageToPointCloud(camera_info=camera_info)
     self.assertIsInstance(dut.depth_image_input_port(), InputPort)
     self.assertIsInstance(dut.point_cloud_output_port(), OutputPort)
     dut = mut.DepthImageToPointCloud(
         camera_info=camera_info,
         pixel_type=PixelType.kDepth16U,
         scale=0.001)
Example #3
0
 def _add_renderer(self):
     renderer_name = "renderer"
     self.scenegraph.AddRenderer(renderer_name, MakeRenderEngineVtk(RenderEngineVtkParams()))
     # Add a camera
     depth_camera = DepthRenderCamera(
         RenderCameraCore(
             renderer_name, 
             CameraInfo(width=640, height=480, fov_y=np.pi/4),
             ClippingRange(0.01, 10.0),
             RigidTransform()),
         DepthRange(0.01,10.0)
     )
     world_id = self.plant.GetBodyFrameIdOrThrow(self.plant.world_body().index())
     X_WB = xyz_rpy_deg([4,0,0],[-90,0,90])
     sensor = RgbdSensor(world_id, X_PB=X_WB, depth_camera=depth_camera)
     self.builder.AddSystem(sensor)
     self.builder.Connect(self.scenegraph.get_query_output_port(), sensor.query_object_input_port())
Example #4
0
 def test_rgbd_sensor_registration(self):
     X_PC = RigidTransform(p=[1, 2, 3])
     station = ManipulationStation(time_step=0.001)
     station.SetupManipulationClassStation()
     plant = station.get_multibody_plant()
     color_camera = ColorRenderCamera(
         RenderCameraCore("renderer", CameraInfo(10, 10, np.pi / 4),
                          ClippingRange(0.1, 10.0), RigidTransform()),
         False)
     depth_camera = DepthRenderCamera(color_camera.core(),
                                      DepthRange(0.1, 9.5))
     station.RegisterRgbdSensor("single_sensor", plant.world_frame(), X_PC,
                                depth_camera)
     station.RegisterRgbdSensor("dual_sensor", plant.world_frame(), X_PC,
                                color_camera, depth_camera)
     station.Finalize()
     camera_poses = station.GetStaticCameraPosesInWorld()
     # The three default cameras plus the two just added.
     self.assertEqual(len(camera_poses), 5)
     self.assertTrue("single_sensor" in camera_poses)
     self.assertTrue("dual_sensor" in camera_poses)
Example #5
0
    def add_rgbd_sensor(self, camera_name, sensor_config):
        """
        Adds Rgbd camera to the diagram
        """
        builder = self._builder
        if self._renderer_name is None:
            self._renderer_name = "vtk_renderer"
            self._sg.AddRenderer(self._renderer_name,
                                 MakeRenderEngineVtk(RenderEngineVtkParams()))

        width = sensor_config['width']
        height = sensor_config['height']
        fov_y = sensor_config['fov_y']
        z_near = sensor_config['z_near']
        z_far = sensor_config['z_far']

        # This is in right-down-forward convention
        X_W_camera = transform_from_dict(sensor_config)
        color_camera = ColorRenderCamera(
            RenderCameraCore(self._renderer_name,
                             CameraInfo(width, height, fov_y),
                             ClippingRange(z_near, z_far), RigidTransform()),
            False)
        depth_camera = DepthRenderCamera(color_camera.core(),
                                         DepthRange(z_near, z_far))

        # add camera system
        camera = builder.AddSystem(
            RgbdSensor(parent_id=self._sg.world_frame_id(),
                       X_PB=X_W_camera,
                       color_camera=color_camera,
                       depth_camera=depth_camera))
        builder.Connect(self._sg.get_query_output_port(),
                        camera.query_object_input_port())

        self._rgbd_sensors[camera_name] = camera
Example #6
0
    def test_render_engine_api(self):
        class DummyRenderEngine(mut.render.RenderEngine):
            """Mirror of C++ DummyRenderEngine."""

            # See comment below about `rgbd_sensor_test.cc`.
            latest_instance = None

            def __init__(self, render_label=None):
                mut.render.RenderEngine.__init__(self)
                # N.B. We do not hide these because this is a test class.
                # Normally, you would want to hide this.
                self.force_accept = False
                self.registered_geometries = set()
                self.updated_ids = {}
                self.include_group_name = "in_test"
                self.X_WC = RigidTransform_[float]()
                self.color_count = 0
                self.depth_count = 0
                self.label_count = 0
                self.color_camera = None
                self.depth_camera = None
                self.label_camera = None

            def UpdateViewpoint(self, X_WC):
                DummyRenderEngine.latest_instance = self
                self.X_WC = X_WC

            def ImplementGeometry(self, shape, user_data):
                DummyRenderEngine.latest_instance = self

            def DoRegisterVisual(self, id, shape, properties, X_WG):
                DummyRenderEngine.latest_instance = self
                mut.GetRenderLabelOrThrow(properties)
                if self.force_accept or properties.HasGroup(
                    self.include_group_name
                ):
                    self.registered_geometries.add(id)
                    return True
                return False

            def DoUpdateVisualPose(self, id, X_WG):
                DummyRenderEngine.latest_instance = self
                self.updated_ids[id] = X_WG

            def DoRemoveGeometry(self, id):
                DummyRenderEngine.latest_instance = self
                self.registered_geometries.remove(id)

            def DoClone(self):
                DummyRenderEngine.latest_instance = self
                new = DummyRenderEngine()
                new.force_accept = copy.copy(self.force_accept)
                new.registered_geometries = copy.copy(
                    self.registered_geometries)
                new.updated_ids = copy.copy(self.updated_ids)
                new.include_group_name = copy.copy(self.include_group_name)
                new.X_WC = copy.copy(self.X_WC)
                new.color_count = copy.copy(self.color_count)
                new.depth_count = copy.copy(self.depth_count)
                new.label_count = copy.copy(self.label_count)
                new.color_camera = copy.copy(self.color_camera)
                new.depth_camera = copy.copy(self.depth_camera)
                new.label_camera = copy.copy(self.label_camera)
                return new

            def DoRenderColorImage(self, camera, color_image_out):
                DummyRenderEngine.latest_instance = self
                self.color_count += 1
                self.color_camera = camera

            def DoRenderDepthImage(self, camera, depth_image_out):
                DummyRenderEngine.latest_instance = self
                self.depth_count += 1
                self.depth_camera = camera

            def DoRenderLabelImage(self, camera, label_image_out):
                DummyRenderEngine.latest_instance = self
                self.label_count += 1
                self.label_camera = camera

        engine = DummyRenderEngine()
        self.assertIsInstance(engine, mut.render.RenderEngine)
        self.assertIsInstance(engine.Clone(), DummyRenderEngine)

        # Test implementation of C++ interface by using RgbdSensor.
        renderer_name = "renderer"
        builder = DiagramBuilder()
        scene_graph = builder.AddSystem(mut.SceneGraph())
        # N.B. This passes ownership.
        scene_graph.AddRenderer(renderer_name, engine)
        sensor = builder.AddSystem(RgbdSensor(
            parent_id=scene_graph.world_frame_id(),
            X_PB=RigidTransform(),
            depth_camera=mut.render.DepthRenderCamera(
                mut.render.RenderCameraCore(
                    renderer_name, CameraInfo(640, 480, np.pi/4),
                    mut.render.ClippingRange(0.1, 5.0), RigidTransform()),
                mut.render.DepthRange(0.1, 5.0))))
        builder.Connect(
            scene_graph.get_query_output_port(),
            sensor.query_object_input_port(),
        )
        diagram = builder.Build()
        diagram_context = diagram.CreateDefaultContext()
        sensor_context = sensor.GetMyContextFromRoot(diagram_context)
        image = sensor.color_image_output_port().Eval(sensor_context)
        # N.B. Because there's context cloning going on under the hood, we
        # won't be interacting with our originally registered instance.
        # See `rgbd_sensor_test.cc` as well.
        current_engine = DummyRenderEngine.latest_instance
        self.assertIsNot(current_engine, engine)
        self.assertIsInstance(image, ImageRgba8U)
        self.assertEqual(current_engine.color_count, 1)

        image = sensor.depth_image_32F_output_port().Eval(sensor_context)
        self.assertIsInstance(image, ImageDepth32F)
        self.assertEqual(current_engine.depth_count, 1)

        image = sensor.depth_image_16U_output_port().Eval(sensor_context)
        self.assertIsInstance(image, ImageDepth16U)
        self.assertEqual(current_engine.depth_count, 2)

        image = sensor.label_image_output_port().Eval(sensor_context)
        self.assertIsInstance(image, ImageLabel16I)
        self.assertEqual(current_engine.label_count, 1)
Example #7
0
    def test_query_object(self, T):
        RigidTransform = RigidTransform_[float]
        SceneGraph = mut.SceneGraph_[T]
        QueryObject = mut.QueryObject_[T]
        SceneGraphInspector = mut.SceneGraphInspector_[T]
        FramePoseVector = mut.FramePoseVector_[T]

        # First, ensure we can default-construct it.
        model = QueryObject()
        self.assertIsInstance(model, QueryObject)

        scene_graph = SceneGraph()
        source_id = scene_graph.RegisterSource("source")
        frame_id = scene_graph.RegisterFrame(
            source_id=source_id, frame=mut.GeometryFrame("frame"))
        geometry_id = scene_graph.RegisterGeometry(
            source_id=source_id, frame_id=frame_id,
            geometry=mut.GeometryInstance(X_PG=RigidTransform(),
                                          shape=mut.Sphere(1.), name="sphere"))
        render_params = mut.render.RenderEngineVtkParams()
        renderer_name = "test_renderer"
        scene_graph.AddRenderer(renderer_name,
                                mut.render.MakeRenderEngineVtk(render_params))

        context = scene_graph.CreateDefaultContext()
        pose_vector = FramePoseVector()
        pose_vector.set_value(frame_id, RigidTransform_[T]())
        scene_graph.get_source_pose_port(source_id).FixValue(
            context, pose_vector)
        query_object = scene_graph.get_query_output_port().Eval(context)

        self.assertIsInstance(query_object.inspector(), SceneGraphInspector)
        self.assertIsInstance(
            query_object.GetPoseInWorld(frame_id=frame_id), RigidTransform_[T])
        self.assertIsInstance(
            query_object.GetPoseInParent(frame_id=frame_id),
            RigidTransform_[T])
        self.assertIsInstance(
            query_object.GetPoseInWorld(geometry_id=geometry_id),
            RigidTransform_[T])

        # Proximity queries -- all of these will produce empty results.
        results = query_object.ComputeSignedDistancePairwiseClosestPoints()
        self.assertEqual(len(results), 0)
        results = query_object.ComputePointPairPenetration()
        self.assertEqual(len(results), 0)
        results = query_object.ComputeSignedDistanceToPoint(p_WQ=(1, 2, 3))
        self.assertEqual(len(results), 0)
        results = query_object.FindCollisionCandidates()
        self.assertEqual(len(results), 0)
        self.assertFalse(query_object.HasCollisions())

        # ComputeSignedDistancePairClosestPoints() requires two valid geometry
        # ids. There are none in this SceneGraph instance. Rather than
        # populating the SceneGraph, we look for the exception thrown in
        # response to invalid ids as evidence of correct binding.
        with self.assertRaisesRegex(
            RuntimeError,
            r"The geometry given by id \d+ does not reference a geometry"
                + " that can be used in a signed distance query"):
            query_object.ComputeSignedDistancePairClosestPoints(
                geometry_id_A=mut.GeometryId.get_new_id(),
                geometry_id_B=mut.GeometryId.get_new_id())

        # Confirm rendering API returns images of appropriate type.
        camera_core = mut.render.RenderCameraCore(
            renderer_name=renderer_name,
            intrinsics=CameraInfo(width=10, height=10, fov_y=pi/6),
            clipping=mut.render.ClippingRange(0.1, 10.0),
            X_BS=RigidTransform())
        color_camera = mut.render.ColorRenderCamera(
            core=camera_core, show_window=False)
        depth_camera = mut.render.DepthRenderCamera(
            core=camera_core, depth_range=mut.render.DepthRange(0.1, 5.0))
        image = query_object.RenderColorImage(
                camera=color_camera, parent_frame=SceneGraph.world_frame_id(),
                X_PC=RigidTransform())
        self.assertIsInstance(image, ImageRgba8U)
        image = query_object.RenderDepthImage(
            camera=depth_camera, parent_frame=SceneGraph.world_frame_id(),
            X_PC=RigidTransform())
        self.assertIsInstance(image, ImageDepth32F)
        image = query_object.RenderLabelImage(
            camera=color_camera, parent_frame=SceneGraph.world_frame_id(),
            X_PC=RigidTransform())
        self.assertIsInstance(image, ImageLabel16I)
    def test_unimplemented_rendering(self):
        """The RenderEngine API throws exceptions for derived implementations
        that don't override DoRender*Image (or Render*Image for the deprecated
        API). This test confirms that behavior propagates down to Python."""
        class MinimalEngine(mut.render.RenderEngine):
            """Minimal implementation of the RenderEngine virtual API"""
            def UpdateViewpoint(self, X_WC):
                pass

            def DoRegisterVisual(self, id, shape, properties, X_WG):
                pass

            def DoUpdateVisualPose(self, id, X_WG):
                pass

            def DoRemoveGeometry(self, id):
                pass

            def DoClone(self):
                pass

        class ColorOnlyEngine(MinimalEngine):
            """Rendering Depth and Label images should throw"""
            def DoRenderColorImage(self, camera, image_out):
                pass

        class DepthOnlyEngine(MinimalEngine):
            """Rendering Color and Label images should throw"""
            def DoRenderDepthImage(self, camera, image_out):
                pass

        class LabelOnlyEngine(MinimalEngine):
            """Rendering Color and Depth images should throw"""
            def DoRenderLabelImage(self, camera, image_out):
                pass

        identity = RigidTransform_[float]()
        intrinsics = CameraInfo(10, 10, pi / 4)
        core = mut.render.RenderCameraCore("n/a", intrinsics,
                                           mut.render.ClippingRange(0.1, 10),
                                           identity)
        color_cam = mut.render.ColorRenderCamera(core, False)
        depth_cam = mut.render.DepthRenderCamera(
                        core, mut.render.DepthRange(0.1, 9))
        color_image = ImageRgba8U(intrinsics.width(), intrinsics.height())
        depth_image = ImageDepth32F(intrinsics.width(), intrinsics.height())
        label_image = ImageLabel16I(intrinsics.width(), intrinsics.height())

        color_only = ColorOnlyEngine()
        color_only.RenderColorImage(color_cam, color_image)
        with self.assertRaisesRegex(RuntimeError, ".+pure virtual function.+"):
            color_only.RenderDepthImage(depth_cam, depth_image)
        with self.assertRaisesRegex(RuntimeError, ".+pure virtual function.+"):
            color_only.RenderLabelImage(color_cam, label_image)

        depth_only = DepthOnlyEngine()
        with self.assertRaisesRegex(RuntimeError, ".+pure virtual function.+"):
            depth_only.RenderColorImage(color_cam, color_image)
        depth_only.RenderDepthImage(depth_cam, depth_image)
        with self.assertRaisesRegex(RuntimeError, ".+pure virtual function.+"):
            depth_only.RenderLabelImage(color_cam, label_image)

        label_only = LabelOnlyEngine()
        with self.assertRaisesRegex(RuntimeError, ".+pure virtual function.+"):
            label_only.RenderColorImage(color_cam, color_image)
        with self.assertRaisesRegex(RuntimeError, ".+pure virtual function.+"):
            label_only.RenderDepthImage(depth_cam, depth_image)
        label_only.RenderLabelImage(color_cam, label_image)
Example #9
0
    def test_unimplemented_rendering(self):
        # Test that a derived RenderEngine has implementations of
        # DoRender*Image that throw something suggestive of "not implemented"
        # and that they are overridable.
        class MinimalEngine(mut.render.RenderEngine):
            def UpdateViewpoint(self, X_WC):
                pass

            def DoRegisterVisual(self, id, shae, properties, X_WG):
                pass

            def DoUpdateVisualPose(self, id, X_WG):
                pass

            def DoRemoveGeometry(self, id):
                pass

            def DoClone(self):
                pass

        class ColorOnlyEngine(MinimalEngine):
            def DoRenderColorImage(self, camera, image_out):
                pass

        class DepthOnlyEngine(MinimalEngine):
            def DoRenderDepthImage(self, camera, image_out):
                pass

        class LabelOnlyEngine(MinimalEngine):
            def DoRenderLabelImage(self, camera, image_out):
                pass

        identity = RigidTransform_[float]()
        intrinsics = CameraInfo(10, 10, np.pi / 4)
        core = mut.render.RenderCameraCore("n/a", intrinsics,
                                           mut.render.ClippingRange(0.1, 10),
                                           identity)
        color_cam = mut.render.ColorRenderCamera(core, False)
        depth_cam = mut.render.DepthRenderCamera(core,
                                                 mut.render.DepthRange(0.1, 9))
        color_image = ImageRgba8U(intrinsics.width(), intrinsics.height())
        depth_image = ImageDepth32F(intrinsics.width(), intrinsics.height())
        label_image = ImageLabel16I(intrinsics.width(), intrinsics.height())

        color_only = ColorOnlyEngine()
        color_only.RenderColorImage(color_cam, color_image)
        with self.assertRaisesRegex(RuntimeError, ".+pure virtual function.+"):
            color_only.RenderDepthImage(depth_cam, depth_image)
        with self.assertRaisesRegex(RuntimeError, ".+pure virtual function.+"):
            color_only.RenderLabelImage(color_cam, label_image)

        depth_only = DepthOnlyEngine()
        with self.assertRaisesRegex(RuntimeError, ".+pure virtual function.+"):
            depth_only.RenderColorImage(color_cam, color_image)
        depth_only.RenderDepthImage(depth_cam, depth_image)
        with self.assertRaisesRegex(RuntimeError, ".+pure virtual function.+"):
            depth_only.RenderLabelImage(color_cam, label_image)

        label_only = LabelOnlyEngine()
        with self.assertRaisesRegex(RuntimeError, ".+pure virtual function.+"):
            label_only.RenderColorImage(color_cam, color_image)
        with self.assertRaisesRegex(RuntimeError, ".+pure virtual function.+"):
            label_only.RenderDepthImage(depth_cam, depth_image)
        label_only.RenderLabelImage(color_cam, label_image)
Example #10
0
iiwa_2 = parser.AddModelFromFile(iiwa_file, model_name="iiwa_2")
plant.WeldFrames(plant.world_frame(),
                 plant.GetFrameByName("iiwa_link_0", iiwa_2),
                 X_AB=xyz_rpy_deg([0, 1, 0], [0, 0, 0]))

# Adding the renderer / vtk stuff?
renderer_name = "renderer"
scene_graph.AddRenderer(renderer_name,
                        MakeRenderEngineVtk(RenderEngineVtkParams()))

# Now a camera with color and depth (values chosen arbitrarily)

# So here we leverage the renderer attached to the scene graph to power a camera for our robot
depth_camera = DepthRenderCamera(
    RenderCameraCore(renderer_name,
                     CameraInfo(width=640, height=480, fov_y=np.pi / 4),
                     ClippingRange(0.01, 10.0), RigidTransform()),
    DepthRange(0.01, 10.0))

world_id = plant.GetBodyFrameIdOrThrow(plant.world_body().index())
X_WB = xyz_rpy_deg([4, 0, 0], [-90, 0, 90])
sensor = RgbdSensor(world_id, X_PB=X_WB, depth_camera=depth_camera)

builder.AddSystem(sensor)
builder.Connect(scene_graph.get_query_output_port(),
                sensor.query_object_input_port())

# So here we are adding the drake visualizer into the mix.
# IMPORTANT: Need to actually run "drake visualizer" from the build.
DrakeVisualizer.AddToBuilder(builder, scene_graph)
Example #11
0
def perform_iou_testing(model_file, test_specific_temp_directory,
                        pose_directory):

    random_poses = {}
    # Read camera translation calculated and applied on gazebo
    # we read the random positions file as it contains everything:
    with open(
            test_specific_temp_directory + "/pics/" + pose_directory +
            "/poses.txt", "r") as datafile:
        for line in datafile:
            if line.startswith("Translation:"):
                line_split = line.split(" ")
                # we make the value negative since gazebo moved the robot
                # and in drakewe move the camera
                trans_x = float(line_split[1])
                trans_y = float(line_split[2])
                trans_z = float(line_split[3])
            elif line.startswith("Scaling:"):
                line_split = line.split(" ")
                scaling = float(line_split[1])
            else:
                line_split = line.split(" ")
                if line_split[1] == "nan":
                    random_poses[line_split[0][:-1]] = 0
                else:
                    random_poses[line_split[0][:-1]] = float(line_split[1])

    builder = DiagramBuilder()
    plant, scene_graph = AddMultibodyPlantSceneGraph(builder, 0.0)

    parser = Parser(plant)
    model = make_parser(plant).AddModelFromFile(model_file)

    model_bodies = me.get_bodies(plant, {model})
    frame_W = plant.world_frame()
    frame_B = model_bodies[0].body_frame()
    if len(plant.GetBodiesWeldedTo(plant.world_body())) < 2:
        plant.WeldFrames(frame_W,
                         frame_B,
                         X_PC=plant.GetDefaultFreeBodyPose(frame_B.body()))

    # Creating cameras:
    renderer_name = "renderer"
    scene_graph.AddRenderer(renderer_name,
                            MakeRenderEngineVtk(RenderEngineVtkParams()))

    # N.B. These properties are chosen arbitrarily.
    depth_camera = DepthRenderCamera(
        RenderCameraCore(
            renderer_name,
            CameraInfo(
                width=960,
                height=540,
                focal_x=831.382036787,
                focal_y=831.382036787,
                center_x=480,
                center_y=270,
            ),
            ClippingRange(0.01, 10.0),
            RigidTransform(),
        ),
        DepthRange(0.01, 10.0),
    )

    world_id = plant.GetBodyFrameIdOrThrow(plant.world_body().index())

    # Creating perspective cam
    X_WB = xyz_rpy_deg(
        [
            1.6 / scaling + trans_x, -1.6 / scaling + trans_y,
            1.2 / scaling + trans_z
        ],
        [-120, 0, 45],
    )
    sensor_perspective = create_camera(builder, world_id, X_WB, depth_camera,
                                       scene_graph)
    # Creating top cam
    X_WB = xyz_rpy_deg([0 + trans_x, 0 + trans_y, 2.2 / scaling + trans_z],
                       [-180, 0, -90])
    sensor_top = create_camera(builder, world_id, X_WB, depth_camera,
                               scene_graph)
    # Creating front cam
    X_WB = xyz_rpy_deg([2.2 / scaling + trans_x, 0 + trans_y, 0 + trans_z],
                       [-90, 0, 90])
    sensor_front = create_camera(builder, world_id, X_WB, depth_camera,
                                 scene_graph)
    # Creating side cam
    X_WB = xyz_rpy_deg([0 + trans_x, 2.2 / scaling + trans_y, 0 + trans_z],
                       [-90, 0, 180])
    sensor_side = create_camera(builder, world_id, X_WB, depth_camera,
                                scene_graph)
    # Creating back cam
    X_WB = xyz_rpy_deg([-2.2 / scaling + trans_x, 0 + trans_y, 0 + trans_z],
                       [-90, 0, -90])
    sensor_back = create_camera(builder, world_id, X_WB, depth_camera,
                                scene_graph)

    DrakeVisualizer.AddToBuilder(builder, scene_graph)

    # Remove gravity to avoid extra movements of the model when running the simulation
    plant.gravity_field().set_gravity_vector(
        np.array([0, 0, 0], dtype=np.float64))

    # Switch off collisions to avoid problems with random positions
    collision_filter_manager = scene_graph.collision_filter_manager()
    model_inspector = scene_graph.model_inspector()
    geometry_ids = GeometrySet(model_inspector.GetAllGeometryIds())
    collision_filter_manager.Apply(
        CollisionFilterDeclaration().ExcludeWithin(geometry_ids))

    plant.Finalize()
    diagram = builder.Build()

    simulator = Simulator(diagram)
    simulator.Initialize()

    dofs = plant.num_actuated_dofs()
    if dofs != plant.num_positions():
        raise ValueError(
            "Error on converted model: Num positions is not equal to num actuated dofs."
        )

    if pose_directory == "random_pose":
        joint_positions = [0] * dofs
        for joint_name, pose in random_poses.items():
            # check if NaN
            if pose != pose:
                pose = 0
            # drake will add '_joint' when there's a name collision
            if plant.HasJointNamed(joint_name):
                joint = plant.GetJointByName(joint_name)
            else:
                joint = plant.GetJointByName(joint_name + "_joint")
            joint_positions[joint.position_start()] = pose
        sim_plant_context = plant.GetMyContextFromRoot(
            simulator.get_mutable_context())
        plant.get_actuation_input_port(model).FixValue(sim_plant_context,
                                                       np.zeros((dofs, 1)))
        plant.SetPositions(sim_plant_context, model, joint_positions)

        simulator.AdvanceTo(1)

    generate_images_and_iou(simulator, sensor_perspective,
                            test_specific_temp_directory, pose_directory, 1)
    generate_images_and_iou(simulator, sensor_top,
                            test_specific_temp_directory, pose_directory, 2)
    generate_images_and_iou(simulator, sensor_front,
                            test_specific_temp_directory, pose_directory, 3)
    generate_images_and_iou(simulator, sensor_side,
                            test_specific_temp_directory, pose_directory, 4)
    generate_images_and_iou(simulator, sensor_back,
                            test_specific_temp_directory, pose_directory, 5)