Beispiel #1
0
    def calcDistance(self):
        self.newObjArray = Detection3DArray()

        for obj in self.lastObj.detections:
            # keep old values
            newObj = Detection3D()
            newObj.bbox.center.position.x = obj.bbox.center.x
            newObj.bbox.center.position.y = obj.bbox.center.y
            newObj.bbox.size.x = obj.bbox.size_x
            newObj.bbox.size.y = obj.bbox.size_y

            # calculate values
            angle, angle_min, angle_max = self.calcAngle(
                obj.bbox.center.x, obj.bbox.size_x)
            distance = self.getDistance(angle, angle_min, angle_max)
            if distance == None:
                distance = 0

            # add calculated values
            newObj.bbox.center.orientation.w = angle
            newObj.bbox.center.position.z = distance

            self.newObjArray.detections.append(newObj)

        self.lastPub = self.newObjArray
        self.distance_pub.publish(self.newObjArray)
Beispiel #2
0
    def __init__(self):
        # init Node
        rospy.init_node("distance_node")
        rospy.loginfo("Starting DistanceNode.")

        # Subscriber/Publisher
        rospy.Subscriber("/scan", LaserScan, self.lidar_calback)
        rospy.Subscriber("/objectDedector/objects", Detection2DArray,
                         self.obj_callback)
        self.distance_pub = rospy.Publisher("/objectDedector/3Dobjects",
                                            Detection3DArray,
                                            queue_size=1)

        # get params
        self.cam_width = float(rospy.get_param("/usb_cam/image_width"))
        self.flip_image = bool(
            rospy.get_param("/usb_cam/flip_image", default="True"))
        cam_fov = float(rospy.get_param("/usb_cam/fov"))
        self.angleFactor = cam_fov / self.cam_width

        # init messages
        self.lastScan = LaserScan()
        self.lastScan.angle_increment = 1
        self.lastObj = Detection2DArray()
        self.lastPub = Detection3DArray()

        # publish image with distances
        if True:
            rospy.Subscriber("/objectDedector/overlayImage", Image,
                             self.img_callback)
            self.img_pub = rospy.Publisher("/objectDedector/overlayImage3D",
                                           Image,
                                           queue_size=1)
            self.bridge = CvBridge()
            self.font = cv2.FONT_HERSHEY_SIMPLEX
Beispiel #3
0
    def filter_dbscan(self, detections_msg):

        # Get frame_id for drone body frame: 'drone_N/camera_array' -> 'drone_N'
        frame_id = detections_msg.header.frame_id.split('/')[0] + '/base_link'

        # Return early if we only have zero or one detection
        if len(detections_msg.detections) <= 1:
            detections_msg.header.frame_id = frame_id
            return detections_msg

        positions = []
        for detection in detections_msg.detections:
            p = detection.bbox.center.position
            position = np.array([p.x, p.y]) if self.dimensions == 2 \
                else np.array([p.x, p.y, p.z])
            positions.append(position)
        positions = np.array(positions)

        # Greedily find all positions with distance less than threshold
        self.dbscan.fit(positions)

        # Merge close positions as the average of all cluster positions
        labels = np.array(self.dbscan.labels_)
        indices = set(labels)
        filtered = np.array(
            [positions[labels == i].mean(axis=0) for i in indices])

        filtered_detections = Detection3DArray()
        filtered_detections.header.frame_id = frame_id
        filtered_detections.header.stamp = detections_msg.header.stamp
        for filtered_detection in filtered:
            detection = Detection3D()
            detection.header = detections_msg.header
            detection.bbox.center.position.x = filtered_detection[0]
            detection.bbox.center.position.y = filtered_detection[1]
            if self.dimensions == 3:
                detection.bbox.center.position.z = filtered_detection[2]
            filtered_detections.detections.append(detection)

        return filtered_detections
Beispiel #4
0
    def callback_detect_3d(self, req):
        try:
            image = rospy.wait_for_message("~input", Image, timeout=2)
        except rospy.ROSException as e:
            s = "could not get image from '" + rospy.resolve_name(
                "~input") + "'"
            rospy.logerr(s)
            raise rospy.ServiceException(s)

        resp = Detect3DResponse()

        detections = self._get_detections(image)
        for d2d in detections.detections:
            d3d = Detection3D()
            d3d.header = d2d.header
            d3d.results = d2d.results
            # todo calc bbox real poses
            d3d.bbox.center.position.x = d2d.bbox.center.x - 0.5
            d3d.bbox.center.position.y = d2d.bbox.center.y - 0.5
            d3d.bbox.center.position.z = 1

            d3d.bbox.center.orientation.w = 1

            d3d.bbox.size.x = 0.1
            d3d.bbox.size.y = 0.1
            d3d.bbox.size.z = 0.1

            resp.detections.append(d3d)

        if len(detections.detections) > 0 and self.publish_detections:
            msg = Detection3DArray()
            msg.header = d3d.header
            msg.detections = resp.detections
            self.pub.publish(msg)

        return resp
Beispiel #5
0
    def detections_callback(self, raw_msg):

        use_dbscan, use_gmphd = self.config['use_dbscan'], self.config[
            'use_gmphd']
        filtered_msg = Detection3DArray()
        filtered_msg.header = raw_msg.header

        # Filter detections of same object from multiple cameras using DBSCAN
        # DBSCAN guarantees that we only have one observation per true object
        filtered_msg = self.filter_dbscan(raw_msg) if use_dbscan else raw_msg

        # Filter detections with GM-PHD filter
        # GM-PHD requires at most one observation per true object
        filtered_msg = self.filter_gmphd(
            filtered_msg) if use_gmphd else filtered_msg

        # Log information
        # num_raw = len(raw_msg.detections)
        num_filtered = len(filtered_msg.detections)
        # rospy.loginfo('Detections raw/filtered: {}/{}'.format(num_raw, num_filtered))

        # Publish everything
        self.detections_pub.publish(filtered_msg)
        self.num_target_pub.publish(Int32(num_filtered))
Beispiel #6
0
    def inference(self, rgb, depth, pcl_msg):
        ## 1. Get rgb, depth, point cloud
        start_time = time.time()
        camera_header = rgb.header
        rgb = self.bridge.imgmsg_to_cv2(rgb, desired_encoding='bgr8')
        rgb = PIL.Image.fromarray(np.uint8(rgb), mode="RGB")
        rgb_img = np.uint8(rgb.resize((self.params["width"], self.params["height"]), PIL.Image.BICUBIC))

        # rgb_rect = self.bridge.imgmsg_to_cv2(rgb_rect, desired_encoding='bgr8')
        # rgb_rect = PIL.Image.fromarray(np.uint8(rgb_rect), mode="RGB")
        # rgb_rect_img = np.uint8(rgb_rect.resize((self.params["width"], self.params["height"]), PIL.Image.BICUBIC))

        depth = self.bridge.imgmsg_to_cv2(depth, desired_encoding='32FC1')
        depth = cv2.resize(depth, dsize=(self.params["width"], self.params["height"]), interpolation=cv2.INTER_AREA)

        rgb = self.rgb_transform(rgb_img).unsqueeze(0)
        print("-------------------------------------")
        # print("=> Get Input \t {}".format(time.time()-start_time))

        ## 2. Instance Segmentation using Mask R-CNN
        is_results = self.model(rgb.to(self.device))[0]
        pred_masks = is_results["masks"].cpu().detach().numpy()
        pred_boxes = is_results['boxes'].cpu().detach().numpy()
        pred_labels = is_results['labels'].cpu().detach().numpy()
        pred_scores = is_results['scores'].cpu().detach().numpy()
        boxes, scores, is_obj_ids, rgb_crops, is_masks = [], [], [], [], []
        for i, (label, (x1, y1, x2, y2), score, mask) in enumerate(zip(pred_labels, pred_boxes, pred_scores, pred_masks)):
            if score < self.params["is_thresh"]:
                continue
            if x1 < self.roi[0] or x2 > self.roi[1] or y1 < self.roi[2] or y2 > self.roi[3]:
                continue
            boxes.append(np.array([x1, y1, x2, y2]))
            scores.append(score)
            label = label-1
            is_obj_ids.append(label)
            is_masks.append(mask[0])
            if self.use_masked_crop[label]:
                mask[mask >= 0.5] = 1
                mask[mask < 0.5] = 0.3
                mask = np.repeat(mask, 3, 0)
                mask = np.transpose(mask, (1, 2, 0))
                masked_img = rgb_img * mask 
            else:
                masked_img = rgb_img 
            x = int(x1)
            y = int(y1)
            h = int(y2 - y1)
            w = int(x2 - x1)
            if self.use_sameWH_crop[label]:
                w_offset = int(np.maximum(h, w) * self.pad_factors[label])
                h_offset = int(np.maximum(h, w) * self.pad_factors[label])
            else:
                w_offset = int(w * self.pad_factors[label])
                h_offset = int(h * self.pad_factors[label])
            left = np.maximum(x+w//2-w_offset//2, 0)
            right = x+w//2+w_offset/2
            top = np.maximum(y+h//2-h_offset//2, 0)
            bottom = y+h//2+h_offset//2
            rgb_crop = masked_img[top:bottom, left:right].copy()
            if self.params["black_borders"]:
                rgb_crop[:(y-top),:] = 0
                rgb_crop[(y+h-top):,:] = 0
                rgb_crop[:,:(x-left)] = 0
                rgb_crop[:,(x+w-left):] = 0
            rgb_crop = cv2.resize(rgb_crop, (128, 128), interpolation=cv2.INTER_NEAREST)
            rgb_crops.append(rgb_crop)
        # print("=> Inst Seg \t {}".format(time.time()-start_time))

        ## 3. 6D Object Pose Estimation using MPAAE
        if len(is_masks) == 0:
            return
        all_pose_estimates = []
        for i, (box, score, label, rgb_crop) in enumerate(zip(boxes, scores, is_obj_ids, rgb_crops)):
            box_xywh = [box[0], box[1], box[2]-box[0], box[3]-box[1]]
            H_aae = np.eye(4)
            class_idx = self.pe_class_names.index(self.is_class_names[label])
            R_est, t_est, _ = self.codebook.auto_pose6d(self.sess, 
                                                    rgb_crop, 
                                                    box_xywh, 
                                                    self.intrinsic_matrix, 
                                                    1, 
                                                    self.train_args,
                                                    self.codebook._get_codebook_name(self.ply_centered_paths[class_idx]),
                                                    refine=False)
            H_aae[:3,:3] = R_est
            H_aae[:3, 3] = t_est 
            all_pose_estimates.append(H_aae)     
        # print("=> Pose Est \t {}".format(time.time()-start_time))

        ## 4. 6D Object Pose Refinement using ICP 
        if "merged" in self.params["point"]:
            cloud_map = convert_ros_to_o3d(pcl_msg) 
            cloud_cam = do_transform_o3d_cloud(copy.deepcopy(cloud_map), self.transform_map_to_cam)
        else:
            cloud_cam = convert_ros_to_o3d(pcl_msg, remove_nans=True) 

        aae_trans = []
        aae_rot = []
        icp_trans = []
        icp_rot = []
        for i, (pose_estimate, label) in enumerate(zip(all_pose_estimates, is_obj_ids)):
            # crop cloud with instance mask   
            is_mask = is_masks[i].copy()
            is_mask[is_mask < 0.5] = 0
            cloud_cam_obj = crop_o3d_cloud_with_mask(cloud_cam, is_mask, camera_info=self.camera_info)
            pe_obj_id = self.pe_class_names.index(self.is_class_names[label])
            # cloud_cam_obj, index = cloud_cam_obj.remove_statistical_outlier(nb_neighbors = 50, std_ratio=0.2)
            
            # transform cloud_obj to the origin of camera frame
            cloud_obj = copy.deepcopy(self.cloud_objs[pe_obj_id])
            H_obj2cam = np.eye(4)
            # add centeroids for misaligned CAD
            H_obj2cam[:3, 3] = - cloud_obj.get_center() + self.centroids[pe_obj_id]
            cloud_obj = cloud_obj.transform(H_obj2cam)
            # transform cloud_obj to the estimated 6d pose
            H_aae_cam2obj = np.eye(4)
            H_aae_cam2obj[:3, :3] = pose_estimate[:3, :3]
            H_aae_cam2obj[:3, 3] = pose_estimate[:3, 3]   # align scale
            cloud_obj = cloud_obj.transform(H_aae_cam2obj)
            # translate cloud_obj to the centroid of cloud cam
            H_obj_to_cam_centroid = cloud_cam_obj.get_center() - cloud_obj.get_center()
            # print(H_aae_cam2obj[:3, 3][2], H_obj_to_cam_centroid[2])
            H_aae_cam2obj[:3, 3] = H_aae_cam2obj[:3, 3] + H_obj_to_cam_centroid
            all_pose_estimates[i][:3, 3] = H_aae_cam2obj[:3, 3] * 1000
            cloud_obj = cloud_obj.translate(H_obj_to_cam_centroid)
            # o3d.visualization.draw_geometries([cloud_obj, cloud_cam_obj])
            
            # icp refinement
            icp_result, residual = icp_refinement_with_ppf_match(cloud_obj, cloud_cam_obj, 
                                n_points=self.params["n_points"], n_iter=self.params["n_iter"], tolerance=self.params["tolerance"], num_levels=self.params["num_levels"])
            # print("{}: \t res: {}".format(self.is_class_names[label], residual))
            if residual < 1:
                H_refined_cam2obj = np.matmul(icp_result, H_aae_cam2obj)
            else:
                H_refined_cam2obj = H_aae_cam2obj

            all_pose_estimates[i][:3, :3] = H_refined_cam2obj[:3, :3]
            T_centroid = np.eye(4)
            T_centroid[:3, 3] = self.centroids[pe_obj_id]

            translation = H_aae_cam2obj[:3, 3] 
            rotation = tf_trans.quaternion_from_matrix(H_aae_cam2obj)
            aae_trans.append(translation)
            aae_rot.append(rotation)

            translation = H_refined_cam2obj[:3, 3] 
            rotation = tf_trans.quaternion_from_matrix(H_refined_cam2obj)
            icp_trans.append(translation)
            icp_rot.append(rotation)
        # print("=> ICP \t {}".format(time.time()-start_time))

        # 5. calculate reprojection error (VSD) and publish the detection3D
        pe_obj_ids, pe_masks, pe_depth = self.reproject_pe(self.pose_aae_vis_pub, all_pose_estimates, is_obj_ids, boxes, scores, rgb_img)
        vsd_errors = self.get_vsd_error(self.vis_vsd_costmap, is_masks, is_obj_ids, depth, pe_masks, pe_obj_ids, pe_depth)
        cou_errors = self.get_cou_error(self.vis_cou_costmap, is_masks, is_obj_ids, depth, pe_masks, pe_obj_ids, pe_depth)
        reproj_errors = np.mean(np.vstack([vsd_errors, cou_errors]), axis=0)
        aae_detections_array = Detection3DArray()
        aae_detections_array.header = camera_header
        icp_detections_array = Detection3DArray()
        icp_detections_array.header = camera_header
        # print("=> Get reprojErr \t {}".format(time.time()-start_time))
        for i, is_obj_id in enumerate(is_obj_ids):
            class_id = self.pe_class_names.index(self.is_class_names[is_obj_id])
            aae_pose_msg, aae_detection = self.gather_pose_results(camera_header, class_id, aae_trans[i], aae_rot[i], reproj_errors[i])
            icp_pose_msg, icp_detection = self.gather_pose_results(camera_header, class_id, icp_trans[i], icp_rot[i], reproj_errors[i])
            aae_detections_array.detections.append(aae_detection)
            icp_detections_array.detections.append(icp_detection)

        self.publish_vis_is(rgb_img, is_masks, boxes, is_obj_ids, scores)
        self.aae_detections_pub.publish(aae_detections_array)
        self.icp_detections_pub.publish(icp_detections_array)
        self.publish_markers(self.aae_markers_pub, aae_detections_array, [13, 255, 128])
        self.publish_markers(self.icp_markers_pub, icp_detections_array, [128, 128, 255])
Beispiel #7
0
    def filter_gmphd(self, detections_msg):

        # Check when the last detection message arrived
        if self.last_detections_msg is None:
            self.last_detections_msg = detections_msg
            rospy.loginfo('GMPHD filter: last detection message initialized')
            return detections_msg

        # Compute time difference (dt) from last message timestamp
        last_detection_time = self.last_detections_msg.header.stamp.to_sec()
        this_detection_time = detections_msg.header.stamp.to_sec()
        dt = this_detection_time - last_detection_time
        self.last_detections_msg = detections_msg

        # Set dt for those GM-PHD filters that have it (e.g. EK-PHD)
        if hasattr(self.gmphd, 'dt'):
            self.gmphd.dt = dt

        # Update process noise covariance matrix (which depends on dt)
        self.gmphd.Q = self._compute_process_noise_covariance(dt)

        # Generate birth components for PHD filter
        birth_components = []
        for detection in detections_msg.detections:
            birth_component = deepcopy(self.gmphd.birth_component)
            p = detection.bbox.center.position
            birth_component.weight = self.config['birth_weight']
            birth_component.mean[0] = p.x
            birth_component.mean[1] = p.y
            if self.dimensions == 3:
                birth_component.mean[2] = p.z
            birth_components.append(birth_component)
        self.gmphd.birth_components = birth_components

        # Convert observations from cartesian into spherical coordinates
        observations = []
        for detection in detections_msg.detections:
            p = detection.bbox.center.position
            observation = cartesian2polar([p.x, p.y]) if self.dimensions == 2 \
                else cartesian2spherical([p.x, p.y, p.z])
            observations.append(observation)
        observations = np.array(observations)

        # Prepare control inputs (estimated velocity)
        control_inputs = np.zeros(self.gmphd.dim_x)
        if self.velocity is not None:
            control_inputs[:self.dimensions] = self.velocity[:self.dimensions]
        control_inputs = control_inputs[..., np.newaxis]

        # Update filter
        self.gmphd.filter(observations, control_inputs)

        # Prune
        self.gmphd.prune(trunc_thresh=self.config['trunc_thresh'],
                         merge_thresh=self.config['merge_thresh'],
                         max_components=self.config['max_components'])

        # Get frame_id for drone body frame: 'drone_N/camera_array' -> 'drone_N'
        frame_id = detections_msg.header.frame_id.split('/')[0] + '/base_link'

        # Publishes the biggest component as PoseWithCovariance (for visualization only!)
        if len(self.gmphd.components) > 0:
            comp = self.gmphd.components[0]
            pose_cov_msg = PoseWithCovarianceStamped()
            pose_cov_msg.header.frame_id = frame_id
            pose_cov_msg.header.stamp = rospy.Time.now()

            # Use mean as position
            pose_cov_msg.pose.pose.position.x = comp.mean[0]
            pose_cov_msg.pose.pose.position.y = comp.mean[1]
            pose_cov_msg.pose.pose.position.z = comp.mean[2]

            # Prepare covariance matrix
            covariance = np.zeros((6, 6))  # 6x6 row-major matrix
            dimz = self.gmphd.dim_z
            covariance[:dimz, :
                       dimz] = comp.cov[:dimz, :
                                        dimz]  # Need only position covariance
            pose_cov_msg.pose.covariance = list(covariance.flatten())

            self.pose_cov_pub.publish(pose_cov_msg)

        filtered_detections = Detection3DArray()
        filtered_detections.header.frame_id = frame_id
        filtered_detections.header.stamp = rospy.Time.now()
        for comp in self.gmphd.components:

            # Only report components above weight threshold as detections
            if comp.weight < self.config['weight_thresh']:
                continue

            detection = Detection3D()
            detection.header = detections_msg.header
            detection.bbox.center.position.x = comp.mean[0]
            detection.bbox.center.position.y = comp.mean[1]
            detection.bbox.size.x = 2 * np.sqrt(comp.cov[0, 0])
            detection.bbox.size.y = 2 * np.sqrt(comp.cov[1, 1])
            detection.bbox.size.z = 0.1  # rviz complains about scale 0 otherwise
            if self.dimensions == 3:
                detection.bbox.center.position.z = comp.mean[2]
                detection.bbox.size.z = 2 * np.sqrt(comp.cov[2, 2])

            filtered_detections.detections.append(detection)

        return filtered_detections
Beispiel #8
0
    def detections_callback(self, detections_msg):
        rospy.loginfo_once('Got detections')

        out_detections_msg = Detection3DArray()
        out_detections_msg.header.stamp = rospy.Time.now()
        out_detections_msg.header.frame_id = detections_msg.header.frame_id

        for detection in detections_msg.detections:

            # Get localizer based on detection frame_id
            frame_id = detection.header.frame_id  # drone_X/camera_X_optical
            camera_ns = frame_id.split('/')[-1].replace('_optical',
                                                        '')  # camera_X
            localizer = self.localizers[camera_ns]

            # Calculate unit-norm bearing from bounding box and physical object size
            bbox_center = (detection.bbox.center.x, detection.bbox.center.y)
            bbox_size = (detection.bbox.size_x, detection.bbox.size_y)
            object_width = self.config['object_width']
            object_height = self.config['object_height']
            object_depth = self.config['object_depth']
            object_size = (object_width, object_height, object_depth)
            bearing = localizer.detection_to_bearing(bbox_center, bbox_size,
                                                     object_size)

            # Transform bearing vector from camera/optical frame to body/base_link frame
            source_frame = detection.header.frame_id  # drone_X/camera_X_optical
            target_frame = source_frame.split('/')[0] + '/base_link'

            try:
                transform = self.buffer.lookup_transform(
                    target_frame=target_frame,
                    source_frame=source_frame,
                    time=rospy.Time(0))
            except Exception as e:
                print(e)
                continue

            point = PointStamped()
            point.header.frame_id = source_frame
            point.header.stamp = rospy.Time.now()
            point.point.x = bearing[0]
            point.point.y = bearing[1]
            point.point.z = bearing[2]

            point_tf = tf2_geometry_msgs.do_transform_point(point, transform)

            out_detection = Detection3D()
            out_detection.header = point_tf.header

            out_detection.bbox.center.position.x = point_tf.point.x
            out_detection.bbox.center.position.y = point_tf.point.y
            out_detection.bbox.center.position.z = point_tf.point.z

            out_detection.bbox.size.x = object_depth
            out_detection.bbox.size.y = object_width
            out_detection.bbox.size.z = object_height

            out_detections_msg.detections.append(out_detection)

        self.detections_pub.publish(out_detections_msg)
Beispiel #9
0
    def image_callback(self, image_msg, category_names_to_id):
        """Image callback"""

        img = self.cv_bridge.imgmsg_to_cv2(image_msg, "rgb8")
        # cv2.imwrite('img.png', cv2.cvtColor(img, cv2.COLOR_BGR2RGB))  # for debugging

        # Update camera matrix and distortion coefficients
        if self.input_is_rectified:
            P = np.matrix(camera_info.P, dtype='float64')
            P.resize((3, 4))
            camera_matrix = P[:, :3]
            dist_coeffs = np.zeros((4, 1))
        else:
            camera_matrix = np.matrix(camera_info.K, dtype='float64')
            camera_matrix.resize((3, 3))
            dist_coeffs = np.matrix(camera_info.D, dtype='float64')
            dist_coeffs.resize((len(camera_info.D), 1))

        # Downscale image if necessary
        height, width, _ = img.shape
        scaling_factor = float(self.downscale_height) / height
        if scaling_factor < 1.0:
            camera_matrix[:2] *= scaling_factor
            img = cv2.resize(img, (int(scaling_factor * width), int(scaling_factor * height)))

        for m in self.models:
            self.pnp_solvers[m].set_camera_intrinsic_matrix(camera_matrix)
            self.pnp_solvers[m].set_dist_coeffs(dist_coeffs)

        # Copy and draw image
        img_copy = img.copy()
        im = Image.fromarray(img_copy)
        draw = Draw(im)

        detection_array = Detection3DArray()
        detection_array.header = image_msg.header

        for m in self.models:
            # Detect object
            results = ObjectDetector.detect_object_in_image(
                self.models[m].net,
                self.pnp_solvers[m],
                img,
                self.config_detect
            )

            # Publish pose and overlay cube on image
            for i_r, result in enumerate(results):
                if result["location"] is None:
                    continue
                loc = result["location"]
                ori = result["quaternion"]

                # transform orientation
                transformed_ori = tf.transformations.quaternion_multiply(ori, self.model_transforms[m])

                # rotate bbox dimensions if necessary
                # (this only works properly if model_transform is in 90 degree angles)
                dims = rotate_vector(vector=self.dimensions[m], quaternion=self.model_transforms[m])
                dims = np.absolute(dims)
                dims = tuple(dims)

                pose_msg = PoseStamped()
                pose_msg.header = image_msg.header
                CONVERT_SCALE_CM_TO_METERS = 100
                pose_msg.pose.position.x = loc[0] / CONVERT_SCALE_CM_TO_METERS
                pose_msg.pose.position.y = loc[1] / CONVERT_SCALE_CM_TO_METERS
                pose_msg.pose.position.z = loc[2] / CONVERT_SCALE_CM_TO_METERS
                pose_msg.pose.orientation.x = transformed_ori[0]
                pose_msg.pose.orientation.y = transformed_ori[1]
                pose_msg.pose.orientation.z = transformed_ori[2]
                pose_msg.pose.orientation.w = transformed_ori[3]

                
                # Publish
                self.pubs[m].publish(pose_msg)
                self.pub_dimension[m].publish(str(dims))

                # Add to Detection3DArray
                detection = Detection3D()
                hypothesis = ObjectHypothesisWithPose()
                hypothesis.id = self.class_ids[result["name"]]
                hypothesis.score = result["score"]
                hypothesis.pose.pose = pose_msg.pose
                detection.results.append(hypothesis)
                detection.bbox.center = pose_msg.pose
                detection.bbox.size.x = dims[0] / CONVERT_SCALE_CM_TO_METERS
                detection.bbox.size.y = dims[1] / CONVERT_SCALE_CM_TO_METERS
                detection.bbox.size.z = dims[2] / CONVERT_SCALE_CM_TO_METERS
                detection_array.detections.append(detection)

                # Draw the cube
                if None not in result['projected_points']:
                    points2d = []
                    for pair in result['projected_points']:
                        points2d.append(tuple(pair))
                    draw.draw_cube(points2d, self.draw_colors[m])

        # Publish the image with results overlaid
        self.pub_rgb_dope_points.publish(
            CvBridge().cv2_to_imgmsg(
                np.array(im)[..., ::-1],
                "bgr8"
            )
        )
        self.pub_detections.publish(detection_array)
        self.publish_markers(detection_array)

        return annotations
Beispiel #10
0
    def run_on_image(self, image_filename, category_names_to_id, output_image_filepath):
        
        camera_info = self.camera_info

        if not self.info_manager.isCalibrated():
            rospy.logwarn('Camera is not calibrated, please supply a valid camera_info_url parameter!')
        img = cv2.imread(image_filename)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Update camera matrix and distortion coefficients

        # camera_matrix = np.matrix(camera_info.K, dtype='float64')
        # camera_matrix.resize((3, 3))
        # camera_matrix = np.array([[768.16058349609375, 0, 480], 
        #              [0, 768.16058349609375, 270], 
        #              [0, 0, 1]])
        # dist_coeffs = np.matrix(camera_info.D, dtype='float64')
        # dist_coeffs.resize((len(camera_info.D), 1))

        if self.input_is_rectified:
            P = np.matrix(camera_info.P, dtype='float64')
            P.resize((3, 4))
            camera_matrix = P[:, :3]
            dist_coeffs = np.zeros((4, 1))
        else:
            camera_matrix = np.matrix(camera_info.K, dtype='float64')
            camera_matrix.resize((3, 3))
            dist_coeffs = np.matrix(camera_info.D, dtype='float64')
            dist_coeffs.resize((len(camera_info.D), 1))

        # Downscale image if necessary
        height, width, _ = img.shape
        scaling_factor = float(self.downscale_height) / height
        if scaling_factor < 1.0:
            camera_matrix[:2] *= scaling_factor
            img = cv2.resize(img, (int(scaling_factor * width), int(scaling_factor * height)))

        for m in self.models:
            self.pnp_solvers[m].set_camera_intrinsic_matrix(camera_matrix)
            self.pnp_solvers[m].set_dist_coeffs(dist_coeffs)

        # Copy and draw image
        img_copy = img.copy()
        im = Image.fromarray(img_copy)
        draw = Draw(im)

        detection_array = Detection3DArray()
        # detection_array.header = image_msg.header
        detection_array.header = "camera"
        annotations = []
        for m in self.models:
            # Detect object
            results = ObjectDetector.detect_object_in_image(
                self.models[m].net,
                self.pnp_solvers[m],
                img,
                self.config_detect
            )
            print("results : {}".format(results))

            # Publish pose and overlay cube on image
            for i_r, result in enumerate(results):
                if result["location"] is None:
                    continue
                loc = result["location"]
                ori = result["quaternion"]

                # transform orientation
                # transformed_ori = tf.transformations.quaternion_multiply(ori, self.model_transforms[m])
                transformed_ori = quaternion_multiply(ori, self.model_transforms[m])

                # rotate bbox dimensions if necessary
                # (this only works properly if model_transform is in 90 degree angles)
                dims = rotate_vector(vector=self.dimensions[m], quaternion=self.model_transforms[m])
                dims = np.absolute(dims)
                dims = tuple(dims)

                pose_msg = PoseStamped()
                # pose_msg.header = image_msg.header
                pose_msg.header.stamp = rospy.Time.now()
                pose_msg.header.frame_id = "camera"
                CONVERT_SCALE_CM_TO_METERS = 100
                pose_msg.pose.position.x = loc[0] / CONVERT_SCALE_CM_TO_METERS
                pose_msg.pose.position.y = loc[1] / CONVERT_SCALE_CM_TO_METERS
                pose_msg.pose.position.z = loc[2] / CONVERT_SCALE_CM_TO_METERS
                pose_msg.pose.orientation.x = transformed_ori[0]
                pose_msg.pose.orientation.y = transformed_ori[1]
                pose_msg.pose.orientation.z = transformed_ori[2]
                pose_msg.pose.orientation.w = transformed_ori[3]

                annotations.append({
                                'location' : loc,
                                'quaternion_xyzw' : transformed_ori.tolist(),
                                'category_id' : category_names_to_id[m],
                                'id' : i_r
                            })
                # Publish
                self.pubs[m].publish(pose_msg)
                # self.pub_dimension[m].publish(str(dims))

                # Add to Detection3DArray
                detection = Detection3D()
                hypothesis = ObjectHypothesisWithPose()
                hypothesis.id = self.class_ids[result["name"]]
                hypothesis.score = result["score"]
                hypothesis.pose.pose = pose_msg.pose
                detection.results.append(hypothesis)
                detection.bbox.center = pose_msg.pose
                detection.bbox.size.x = dims[0] / CONVERT_SCALE_CM_TO_METERS
                detection.bbox.size.y = dims[1] / CONVERT_SCALE_CM_TO_METERS
                detection.bbox.size.z = dims[2] / CONVERT_SCALE_CM_TO_METERS
                detection_array.detections.append(detection)

                # Draw the cube
                if None not in result['projected_points']:
                    points2d = []
                    for pair in result['projected_points']:
                        points2d.append(tuple(pair))
                    draw.draw_cube(points2d, self.draw_colors[m])

        # Publish the image with results overlaid
        final_im = np.array(im)[..., ::-1]
        self.pub_rgb_dope_points.publish(
            CvBridge().cv2_to_imgmsg(
                final_im,
                "bgr8"
            )
        )
        cv2.imwrite(output_image_filepath, np.array(im)[..., ::-1])
        self.pub_detections.publish(detection_array)
        self.publish_markers(detection_array)
        return annotations
Beispiel #11
0
    def run_on_image_icp(self, 
            image_filename, category_names_to_id, cloud_in, output_image_filepath, publish_cloud=True):

        """Image callback"""
        
        camera_info = self.camera_info

        if not self.info_manager.isCalibrated():
            rospy.logwarn('Camera is not calibrated, please supply a valid camera_info_url parameter!')
        img = cv2.imread(image_filename)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Update camera matrix and distortion coefficients
        if self.input_is_rectified:
            P = np.matrix(camera_info.P, dtype='float64')
            P.resize((3, 4))
            camera_matrix = P[:, :3]
            dist_coeffs = np.zeros((4, 1))
        else:
            camera_matrix = np.matrix(camera_info.K, dtype='float64')
            camera_matrix.resize((3, 3))
            dist_coeffs = np.matrix(camera_info.D, dtype='float64')
            dist_coeffs.resize((len(camera_info.D), 1))

        # Downscale image if necessary
        height, width, _ = img.shape
        scaling_factor = float(self.downscale_height) / height
        if scaling_factor < 1.0:
            camera_matrix[:2] *= scaling_factor
            img = cv2.resize(img, (int(scaling_factor * width), int(scaling_factor * height)))

        for m in self.models:
            self.pnp_solvers[m].set_camera_intrinsic_matrix(camera_matrix)
            self.pnp_solvers[m].set_dist_coeffs(dist_coeffs)

        # Copy and draw image
        img_copy = img.copy()
        im = Image.fromarray(img_copy)
        draw = Draw(im)

        detection_array = Detection3DArray()
        detection_array.header = "camera"
        annotations = []

        start_time = time.time()

        for m in self.models:
            # Detect object
            results = ObjectDetector.detect_object_in_image(
                self.models[m].net,
                self.pnp_solvers[m],
                img,
                self.config_detect
            )

            # Publish pose and overlay cube on image
            for i_r, result in enumerate(results):
                if result["location"] is None:
                    continue
                print(result)
                loc = result["location"]
                ori = result["quaternion"]
                CONVERT_SCALE_CM_TO_METERS = 100
                pose_frame = "camera"

                rospy.logwarn("Doing ICP for result : {}, {}".format(i_r, result["name"]))
                loc_scale = np.array([loc[0] / 100, loc[1] / 100, loc[2] / 100])
                R = tf.transformations.quaternion_matrix(ori)
                T = tf.transformations.translation_matrix(loc_scale)

                total_transform = tf.transformations.concatenate_matrices(T, R)
                cloud_filtered_array = self.transform_cloud(self.mesh_clouds[result["name"]], mat=total_transform)
                cloud_color = np.zeros(cloud_filtered_array.shape[0])
                ros_msg = self.xyzrgb_array_to_pointcloud2(
                    cloud_filtered_array, cloud_color, rospy.Time.now(), "camera"
                )
                # self.pub_pose_cloud.publish(ros_msg)
                
                # rospy.logwarn("Num points after downsample and filter : {}".format(cloud_filtered_array.shape[0]))
                
                cloud_pose = pcl.PointCloud()
                cloud_pose.from_array(cloud_filtered_array)

                icp = cloud_pose.make_GeneralizedIterativeClosestPoint()
                converged, transf, estimate, fitness = icp.gicp(cloud_pose, cloud_in, max_iter=25)
                print('has converged:' + str(converged) + ' score: ' + str(fitness))
                print(str(transf))
                total_transform_icp = tf.transformations.concatenate_matrices(transf, total_transform)
                print(str(total_transform_icp))

                loc = tf.transformations.translation_from_matrix(total_transform_icp) * 100
                ori = tf.transformations.quaternion_from_matrix(total_transform_icp)
                pose_frame = "camera"

                if publish_cloud :
                    cloud_filtered_array = self.transform_cloud(self.mesh_clouds[result["name"]], mat=total_transform_icp)
                    cloud_color = np.zeros(cloud_filtered_array.shape[0])
                    ros_msg = self.xyzrgb_array_to_pointcloud2(
                        cloud_filtered_array, cloud_color, rospy.Time.now(), "camera"
                    )
                    self.pub_pose_cloud.publish(ros_msg)

                # transform orientation
                transformed_ori = tf.transformations.quaternion_multiply(ori, self.model_transforms[m])

                # rotate bbox dimensions if necessary
                # (this only works properly if model_transform is in 90 degree angles)
                dims = rotate_vector(vector=self.dimensions[m], quaternion=self.model_transforms[m])
                dims = np.absolute(dims)
                dims = tuple(dims)

                pose_msg = PoseStamped()
                # pose_msg.header = image_msg.header
                pose_msg.header.stamp = rospy.Time.now()
                pose_msg.header.frame_id = pose_frame
                pose_msg.pose.position.x = loc[0] / CONVERT_SCALE_CM_TO_METERS
                pose_msg.pose.position.y = loc[1] / CONVERT_SCALE_CM_TO_METERS
                pose_msg.pose.position.z = loc[2] / CONVERT_SCALE_CM_TO_METERS
                pose_msg.pose.orientation.x = transformed_ori[0]
                pose_msg.pose.orientation.y = transformed_ori[1]
                pose_msg.pose.orientation.z = transformed_ori[2]
                pose_msg.pose.orientation.w = transformed_ori[3]


                annotations.append({
                                'location' : loc,
                                'quaternion_xyzw' : transformed_ori.tolist(),
                                'category_id' : category_names_to_id[m],
                                'id' : i_r
                            })
                # Publish
                self.pubs[m].publish(pose_msg)
                self.pub_dimension[m].publish(str(dims))

                # Add to Detection3DArray
                detection = Detection3D()
                hypothesis = ObjectHypothesisWithPose()
                hypothesis.id = self.class_ids[result["name"]]
                hypothesis.score = result["score"]
                hypothesis.pose.pose = pose_msg.pose
                detection.results.append(hypothesis)
                detection.bbox.center = pose_msg.pose
                detection.bbox.size.x = dims[0] / CONVERT_SCALE_CM_TO_METERS
                detection.bbox.size.y = dims[1] / CONVERT_SCALE_CM_TO_METERS
                detection.bbox.size.z = dims[2] / CONVERT_SCALE_CM_TO_METERS
                detection_array.detections.append(detection)

                # Draw the cube
                if None not in result['projected_points'] and publish_cloud:
                    points2d = []
                    for pair in result['projected_points']:
                        points2d.append(tuple(pair))
                    draw.draw_cube(points2d, self.draw_colors[m])

        # Publish the image with results overlaid
        runtime = (time.time() - start_time)
        self.pub_rgb_dope_points.publish(
            CvBridge().cv2_to_imgmsg(
                np.array(im)[..., ::-1],
                "bgr8"
            )
        )
        final_im = np.array(im)[..., ::-1]
        cv2.imwrite(output_image_filepath, np.array(im)[..., ::-1])
        self.pub_detections.publish(detection_array)
        self.publish_markers(detection_array)

        return annotations, runtime
Beispiel #12
0
def run_dope_node(params, freq=5):
    '''Starts ROS node to listen to image topic, run DOPE, and publish DOPE results'''

    global g_img
    global g_draw

    pubs = {}
    models = {}
    pnp_solvers = {}
    pub_dimension = {}
    draw_colors = {}
    class_ids = {}
    model_transforms = {}
    dimensions = {}
    mesh_scales = {}
    meshes = {}

    # Initialize parameters
    matrix_camera = np.zeros((3,3))
    matrix_camera[0,0] = params["camera_settings"]['fx']
    matrix_camera[1,1] = params["camera_settings"]['fy']
    matrix_camera[0,2] = params["camera_settings"]['cx']
    matrix_camera[1,2] = params["camera_settings"]['cy']
    matrix_camera[2,2] = 1
    dist_coeffs = np.zeros((4,1))

    if "dist_coeffs" in params["camera_settings"]:
        dist_coeffs = np.array(params["camera_settings"]['dist_coeffs'])
    config_detect = lambda: None
    config_detect.mask_edges = 1
    config_detect.mask_faces = 1
    config_detect.vertex = 1
    config_detect.threshold = 0.5
    config_detect.softmax = 1000
    config_detect.thresh_angle = params['thresh_angle']
    config_detect.thresh_map = params['thresh_map']
    config_detect.sigma = params['sigma']
    config_detect.thresh_points = params["thresh_points"]

    # For each object to detect, load network model, create PNP solver, and start ROS publishers
    for model in params['weights']:
        models[model] =\
            ModelData(
                model, 
                g_path2package + "/weights/" + params['weights'][model]
            )
        models[model].load_net_model()
        mesh_scales[model] = 1.0
        model_transforms[model] = np.array([0.0, 0.0, 0.0, 1.0], dtype='float64')
        draw_colors[model] = \
            tuple(params["draw_colors"][model])
        class_ids[model] = \
            (params["class_ids"][model])
        dimensions[model] = tuple(params["dimensions"][model])
        pnp_solvers[model] = \
            CuboidPNPSolver(
                model,
                matrix_camera,
                Cuboid3d(params['dimensions'][model]),
                dist_coeffs=dist_coeffs
            )
        pubs[model] = \
            rospy.Publisher(
                '{}/pose_{}'.format(params['topic_publishing'], model), 
                PoseStamped, 
                queue_size=10
            )
        pub_dimension[model] = \
            rospy.Publisher(
                '{}/dimension_{}'.format(params['topic_publishing'], model),
                String, 
                queue_size=10
            )

    # Start ROS publisher
    pub_rgb_dope_points = \
        rospy.Publisher(
            params['topic_publishing']+"/rgb_points", 
            ImageSensor_msg, 
            queue_size=10
        )
    pub_detections = \
        rospy.Publisher(
            'detected_objects',
            Detection3DArray,
            queue_size=10
        )
    pub_markers = \
            rospy.Publisher(
                'markers',
                MarkerArray,
                queue_size=10
            )
    
    # Starts ROS listener
    rospy.Subscriber(
        topic_cam, 
        ImageSensor_msg, 
        __image_callback
    )

    # Initialize ROS node
    rospy.init_node('dope_vis', anonymous=True)
    rate = rospy.Rate(freq)

    print ("Running DOPE...  (Listening to camera topic: '{}')".format(topic_cam)) 
    print ("Ctrl-C to stop")

    while not rospy.is_shutdown():
        if g_img is not None:
            # Copy and draw image
            img_copy = g_img.copy()
            im = Image.fromarray(img_copy)
            g_draw = ImageDraw.Draw(im)

            detection_array = Detection3DArray()
            detection_array.header = image_msg.header


            for m in models:
                # Detect object
                results = ObjectDetector.detect_object_in_image(
                            models[m].net, 
                            pnp_solvers[m],
                            g_img,
                            config_detect
                            )
                
                # Publish pose and overlay cube on image
                for i_r, result in enumerate(results):
                    if result["location"] is None:
                        continue
                    loc = result["location"]
                    ori = result["quaternion"]  

                    # transform orientation
                    transformed_ori = tf.transformations.quaternion_multiply(ori, model_transforms[m])

                    # rotate bbox dimensions if necessary
                    # (this only works properly if model_transform is in 90 degree angles)
                    dims = rotate_vector(vector=dimensions[m], quaternion=model_transforms[m])
                    dims = np.absolute(dims)
                    dims = tuple(dims)
                                    
                    msg = PoseStamped()
                    msg.header.frame_id = params["frame_id"]
                    msg.header.stamp = rospy.Time.now()
                    CONVERT_SCALE_CM_TO_METERS = 100
                    msg.pose.position.x = loc[0] / CONVERT_SCALE_CM_TO_METERS
                    msg.pose.position.y = loc[1] / CONVERT_SCALE_CM_TO_METERS
                    msg.pose.position.z = loc[2] / CONVERT_SCALE_CM_TO_METERS
                    msg.pose.orientation.x = ori[0]
                    msg.pose.orientation.y = ori[1]
                    msg.pose.orientation.z = ori[2]
                    msg.pose.orientation.w = ori[3]

                    # Publish
                    pubs[m].publish(msg)
                    pub_dimension[m].publish(str(params['dimensions'][m]))

                    # Add to Detection3DArray
                    detection = Detection3D()
                    hypothesis = ObjectHypothesisWithPose()
                    hypothesis.id = class_ids[result["name"]]
                    hypothesis.score = result["score"]
                    hypothesis.pose.pose = msg.pose
                    detection.results.append(hypothesis)
                    detection.bbox.center = msg.pose
                    detection.bbox.size.x = dims[0] / CONVERT_SCALE_CM_TO_METERS
                    detection.bbox.size.y = dims[1] / CONVERT_SCALE_CM_TO_METERS
                    detection.bbox.size.z = dims[2] / CONVERT_SCALE_CM_TO_METERS
                    detection_array.detections.append(detection)

                    # Draw the cube
                    if None not in result['projected_points']:
                        points2d = []
                        for pair in result['projected_points']:
                            points2d.append(tuple(pair))
                        DrawCube(points2d, draw_colors[m])
                
            # Publish the image with results overlaid
            pub_rgb_dope_points.publish(
                CvBridge().cv2_to_imgmsg(
                    np.array(im)[...,::-1], 
                    "bgr8"
                )
            )

            # Delete all existing markers
            markers = MarkerArray()
            marker = Marker()
            marker.action = Marker.DELETEALL
            markers.markers.append(marker)
            pub_markers.publish(markers)

            # Object markers
            class_id_to_name = {class_id: name for name, class_id in class_ids.iteritems()}
            markers = MarkerArray()
            for i, det in enumerate(detection_array.detections):
                name = class_id_to_name[det.results[0].id]
                color = draw_colors[name]

                # cube marker
                marker = Marker()
                marker.header = detection_array.header
                marker.action = Marker.ADD
                marker.pose = det.bbox.center
                marker.color.r = color[0] / 255.0
                marker.color.g = color[1] / 255.0
                marker.color.b = color[2] / 255.0
                marker.color.a = 0.4
                marker.ns = "bboxes"
                marker.id = i
                marker.type = Marker.CUBE
                marker.scale = det.bbox.size
                markers.markers.append(marker)
            
            pub_markers.publish(markers)
            pub_detections.publish(detection_array)

        rate.sleep()