def getResult(predictions, classes):

    boxes = predictions.pred_boxes if predictions.has("pred_boxes") else None

    if predictions.has("pred_masks"):
        masks = np.asarray(predictions.pred_masks)
        #print(type(masks))
    else:
        return

    result_msg = Result()
    # result_msg.header = self._header
    result_msg.class_ids = predictions.pred_classes if predictions.has(
        "pred_classes") else None
    result_msg.class_names = np.array(classes)[result_msg.class_ids.numpy()]
    result_msg.scores = predictions.scores if predictions.has(
        "scores") else None

    for i, (x1, y1, x2, y2) in enumerate(boxes):
        mask = np.zeros(masks[i].shape, dtype="uint8")
        mask[masks[i, :, :]] = 255
        mask = br.cv2_to_imgmsg(mask)
        result_msg.masks.append(mask)

        box = RegionOfInterest()
        box.x_offset = np.uint32(x1)
        box.y_offset = np.uint32(y1)
        box.height = np.uint32(y2 - y1)
        box.width = np.uint32(x2 - x1)
        result_msg.boxes.append(box)

    return result_msg
Example #2
0
    def detections_callback(self, msg):
        # Receive detections and sort them according to y-offset
        temp = self.sort_detections(msg)

        # Reorganize back into Result() object type
        # TODO: Change the rest of the code to use the above organization (by object). It works well for now, it just might speed it up.
        self.detections = Result()
        for i in range(len(temp)):
            self.detections.class_ids.append(temp[i][0])
            self.detections.class_names.append(temp[i][1])
            self.detections.scores.append(temp[i][2])
            self.detections.boxes.append(temp[i][3])
            self.detections.masks.append(temp[i][4])
def main():
    """ Mask RCNN Object Detection with Detectron2 """
    rospy.init_node("mask_rcnn", anonymous=True)
    bridge = CvBridge()
    start_time = time.time()
    image_counter = 0

    register_coco_instances(
        "train_set", {},
        "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/train/annotations.json",
        "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/train")
    register_coco_instances(
        "test_set", {},
        "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/test/annotations.json",
        "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/test")

    train_metadata = MetadataCatalog.get("train_set")
    print(train_metadata)
    dataset_dicts_train = DatasetCatalog.get("train_set")

    test_metadata = MetadataCatalog.get("test_set")
    print(test_metadata)
    dataset_dicts_test = DatasetCatalog.get("test_set")

    cfg = get_cfg()
    cfg.merge_from_file(
        model_zoo.get_config_file(
            "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
    cfg.DATASETS.TRAIN = ("train_set")
    cfg.DATASETS.TEST = ()  # no metrics implemented for this dataset
    cfg.DATALOADER.NUM_WORKERS = 4
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
        "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
    )  # initialize from model zoo
    cfg.SOLVER.IMS_PER_BATCH = 4
    cfg.SOLVER.BASE_LR = 0.01
    cfg.SOLVER.MAX_ITER = 1000  # 300 iterations seems good enough, but you can certainly train longer
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = (
        128)  # faster, and good enough for this toy dataset
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 5  # 5 classes (Plate, Carrot, Celery, Pretzel, Gripper)

    # Temporary Solution. If I train again I think I can use the dynamically set path again
    cfg.MODEL.WEIGHTS = os.path.join(
        cfg.OUTPUT_DIR,
        "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/output/model_final.pth"
    )
    # cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3  # set the testing threshold for this model
    cfg.DATASETS.TEST = ("test_set")
    predictor = DefaultPredictor(cfg)

    class_names = MetadataCatalog.get("train_set").thing_classes

    # Set up custom cv2 visualization parameters
    # Classes: [name, id]
    #               -
    #          [Plate,   0]
    #          [Carrot,  1]
    #          [Celery,  2]
    #          [Pretzel, 3]
    #          [Gripper, 4]

    # Colors = [blue, green, red]
    color_plate = [0, 255, 0]  # green
    color_carrot = [255, 200, 0]  # blue
    color_celery = [0, 0, 255]  # red
    color_pretzel = [0, 220, 255]  # yellow
    color_gripper = [204, 0, 150]  # purple
    colors = list([
        color_plate, color_carrot, color_celery, color_pretzel, color_gripper
    ])

    alpha = .4

    run = updateClasses()

    rospy.sleep(1)
    print("Running...")

    clicked = False

    while not rospy.is_shutdown():
        # Get images
        img = run.get_img()

        # If no image, return to start of loop
        if img is None:
            continue

        # Get detections
        outputs = predictor(img)
        predictions = outputs["instances"].to("cpu")

        # Get results
        unsorted = run.getResult(predictions, class_names)

        # Sort detections by x and y offsets
        sorted = run.multisort(unsorted)

        # Reorganize back into Result() object type
        # TODO: Change the rest of the code to use the above organization (by object). It works well for now, it just might speed it up.
        result = Result()
        for i in range(len(sorted)):
            result.class_ids.append(sorted[i].id)
            result.class_names.append(sorted[i].name)
            result.scores.append(sorted[i].score)
            result.boxes.append(sorted[i].box)
            result.masks.append(sorted[i].mask)

        # If no detections, return to start of loop
        if result is None:
            continue

        result_cls = result.class_names
        result_clsId = result.class_ids
        result_scores = result.scores
        result_masks = result.masks

        # Create copies of the original image
        im = img.copy()
        output = img.copy()

        # Compute Masks
        im = run.compute_masks(im, result, colors)

        # Draw object masks on image
        cv2.addWeighted(im, alpha, output, 1 - alpha, 0, output)

        # Draw object bbox, class label, and score on image
        for i in range(len(result_clsId)):
            # Draw Bounding boxes
            start_point = (result.boxes[i].x_offset, result.boxes[i].y_offset)
            end_point = (result.boxes[i].x_offset + result.boxes[i].width,
                         result.boxes[i].y_offset + result.boxes[i].height)
            start_point2 = (result.boxes[i].x_offset + 2,
                            result.boxes[i].y_offset + 2)
            end_point2 = (result.boxes[i].x_offset + result.boxes[i].width - 2,
                          result.boxes[i].y_offset + 12)
            # color = colors[result_clsId[i]]
            color = [0, 0, 0]
            box_thickness = 1

            name = result_cls[i]
            score = result_scores[i]
            conf = round(score.item() * 100, 1)

            # Test strategy for updating model classes
            if score > .9:
                string = str(name) + ":" + str(conf) + "%"
            else:
                string = str(name) + "??? - " + str(conf) + "%"

            try:
                win = run.get_active_window()
                win_name = win.get_wm_name()
                win_bbox = run.get_window_bbox(win)
                # print(win_name, win_bbox)
            except Xlib.error.BadWindow:
                print("Window vanished")

            rel_cursor, click = run.get_rel_cursor(win_bbox, win_name)

            # if cursor is within object bbox and a click, do something
            if rel_cursor[0] is not None and rel_cursor[1] is not None:
                if rel_cursor[0] >= start_point[0] and rel_cursor[
                        0] <= end_point[0] and rel_cursor[1] >= start_point[
                            1] and rel_cursor[1] <= end_point[1]:
                    color = [0, 220, 255]  # yellow
                    box_thickness = 2
                    contain_id = i
                else:
                    color = [0, 0, 0]
                    contain_id = None
                    box_thickness = 1

                if contain_id is not None and click == "Yes":
                    print("Object " + str(i) + " (" + str(name) +
                          ") was clicked!!!")
                    rospy.sleep(.05)  # prevent multiple clicks detected
                    # centroids, avoid = run.compute_updates(output, contain_id, result)
                    clicked = True
                    selected_id = contain_id

            # Draw
            font = cv2.FONT_HERSHEY_SIMPLEX
            org = (result.boxes[i].x_offset + 2, result.boxes[i].y_offset + 10)
            fontScale = .3
            text_thickness = 1
            output = cv2.rectangle(output, start_point, end_point, color,
                                   box_thickness)
            output = cv2.rectangle(output, start_point2, end_point2, color,
                                   -1)  # Text box
            output = cv2.putText(output, string, org, font, fontScale,
                                 [255, 255, 255], text_thickness, cv2.LINE_AA,
                                 False)

        if clicked:
            # Change selected bbox color to green
            output = cv2.rectangle(output, (result.boxes[selected_id].x_offset, result.boxes[selected_id].y_offset), \
                (result.boxes[selected_id].x_offset + result.boxes[selected_id].width, result.boxes[selected_id].y_offset + result.boxes[selected_id].height), \
                [0, 255, 0], 2)

            # Display options to the right of the bbox
            start_x = result.boxes[selected_id].x_offset + result.boxes[
                selected_id].width + 20
            start_y = result.boxes[selected_id].y_offset - 20

            x = start_x
            y = start_y + 20

            for i in range(len(class_names)):
                y += 20
                output = cv2.rectangle(output, (x, y), (x + 40, y + 12),
                                       [255, 255, 255], -1)
                output = cv2.putText(output, class_names[i], (x + 2, y + 8),
                                     font, fontScale, [0, 0, 0],
                                     text_thickness, cv2.LINE_AA, False)

            # The below is potentially for a more robust way of displaying class update options
            # for i in range(len(centroids)):
            #     if i == selected_id:
            #         circle_color = [0, 0, 255]
            #     else:
            #         circle_color = [0, 255, 0]
            #     output = cv2.circle(output, (int(centroids[i][0]), int(centroids[i][1])), 2, circle_color, 2)
            # output = cv2.circle(output, avoid, 2, [255, 0, 0], 2)
            # output = cv2.arrowedLine(output, avoid, (int(centroids[selected_id][0]), int(centroids[selected_id][1])), [255,0,0], 1)  # y-axis (tag frame)

        im_rgb = cv2.cvtColor(output, cv2.COLOR_BGR2RGB)
        im_msg = bridge.cv2_to_imgmsg(im_rgb, encoding="rgb8")

        # # Display Image Counter
        # # image_counter = image_counter + 1
        # # if (image_counter % 11) == 10:
        # #     rospy.loginfo("Images detected per second=%.2f", float(image_counter) / (time.time() - start_time))

        run.publish(im_msg, result)

    return 0
Example #4
0
def main():
    """ Mask RCNN Object Detection with Detectron2 """
    rospy.init_node("mask_rcnn", anonymous=True)
    bridge = CvBridge()
    start_time = time.time()
    image_counter = 0

    register_coco_instances(
        "train_set", {},
        "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/train/annotations.json",
        "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/train")
    register_coco_instances(
        "test_set", {},
        "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/test/annotations.json",
        "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/test")

    train_metadata = MetadataCatalog.get("train_set")
    print(train_metadata)
    dataset_dicts_train = DatasetCatalog.get("train_set")

    test_metadata = MetadataCatalog.get("test_set")
    print(test_metadata)
    dataset_dicts_test = DatasetCatalog.get("test_set")

    cfg = get_cfg()
    cfg.merge_from_file(
        model_zoo.get_config_file(
            "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
    cfg.DATASETS.TRAIN = ("train_set")
    cfg.DATASETS.TEST = ()  # no metrics implemented for this dataset
    cfg.DATALOADER.NUM_WORKERS = 4
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
        "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
    )  # initialize from model zoo
    cfg.SOLVER.IMS_PER_BATCH = 4
    cfg.SOLVER.BASE_LR = 0.01
    cfg.SOLVER.MAX_ITER = 1000  # 300 iterations seems good enough, but you can certainly train longer
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = (
        128)  # faster, and good enough for this toy dataset
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 5  # 5 classes (Plate, Carrot, Celery, Pretzel, Gripper)

    # Temporary Solution. If I train again I think I can use the dynamically set path again
    cfg.MODEL.WEIGHTS = os.path.join(
        cfg.OUTPUT_DIR,
        "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/output/model_final.pth"
    )
    # cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.4  # set the testing threshold for this model
    cfg.DATASETS.TEST = ("test_set")
    predictor = DefaultPredictor(cfg)

    class_names = MetadataCatalog.get("train_set").thing_classes

    # Set up custom cv2 visualization parameters
    # Classes: [name, id]
    #               -
    #          [Plate,   0]
    #          [Carrot,  1]
    #          [Celery,  2]
    #          [Pretzel, 3]
    #          [Gripper, 4]

    # Colors = [blue, green, red]
    color_plate = [0, 255, 0]  # green
    color_carrot = [255, 200, 0]  # blue
    color_celery = [0, 0, 255]  # red
    color_pretzel = [0, 220, 255]  # yellow
    color_gripper = [204, 0, 150]  # purple
    colors = list([
        color_plate, color_carrot, color_celery, color_pretzel, color_gripper
    ])

    alpha = .4

    run = maskRCNN()
    while not rospy.is_shutdown():
        # Get images
        img = run.get_img()

        if img is not None:
            outputs = predictor(img)
            predictions = outputs["instances"].to("cpu")

            # Get results
            unsorted = run.getResult(predictions, class_names)

            # Sort detections by x and y
            sorted = run.sort_detections(unsorted)

            result = Result()
            for i in range(len(sorted)):
                result.class_ids.append(sorted[i][0])
                result.class_names.append(sorted[i][1])
                result.scores.append(sorted[i][2])
                result.boxes.append(sorted[i][3])
                result.masks.append(sorted[i][4])

            # Visualize using detectron2 built in visualizer
            # v = Visualizer(im[:, :, ::-1],
            #             metadata=train_metadata,
            #             scale=1.0
            #             # instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels
            # )
            # v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
            # im = v.get_image()[:, :, ::-1]
            # im_msg = bridge.cv2_to_imgmsg(im, encoding="bgr8")

            # Visualize using custom cv2 code
            if result is not None:
                result_cls = result.class_names
                result_clsId = result.class_ids
                result_scores = result.scores
                result_masks = result.masks

                # Create copies of the original image
                im = img.copy()
                output = img.copy()

                # Initialize lists
                masks = []
                masks_indices = []
                for i in range(len(result_clsId)):
                    # Obtain current object mask as a numpy array (black and white mask of single object)
                    current_mask = bridge.imgmsg_to_cv2(result_masks[i])

                    # Find current mask indices
                    mask_indices = np.where(current_mask == 255)

                    # Add to mask indices list
                    if len(masks_indices) > len(result_clsId):
                        masks_indices = []
                    else:
                        masks_indices.append(mask_indices)

                    # Add to mask list
                    if len(masks) > len(result_clsId):
                        masks = []
                    else:
                        masks.append(current_mask)

                if len(masks) > 0:
                    # Create composite mask
                    composite_mask = sum(masks)

                    # Clip composite mask between 0 and 255
                    composite_mask = composite_mask.clip(0, 255)

                for i in range(len(result_clsId)):
                    # Select correct object color
                    color = colors[result_clsId[i]]

                    # Change the color of the current mask object
                    im[masks_indices[i][0], masks_indices[i][1], :] = color

                # Apply alpha scaling to image to adjust opacity
                cv2.addWeighted(im, alpha, output, 1 - alpha, 0, output)

                for i in range(len(result_clsId)):
                    # Draw Bounding boxes
                    start_point = (result.boxes[i].x_offset,
                                   result.boxes[i].y_offset)
                    end_point = (result.boxes[i].x_offset +
                                 result.boxes[i].width,
                                 result.boxes[i].y_offset +
                                 result.boxes[i].height)
                    start_point2 = (result.boxes[i].x_offset + 2,
                                    result.boxes[i].y_offset + 2)
                    end_point2 = (result.boxes[i].x_offset +
                                  result.boxes[i].width - 2,
                                  result.boxes[i].y_offset + 12)
                    color = colors[result_clsId[i]]
                    box_thickness = 1

                    name = result_cls[i]
                    score = result_scores[i]
                    conf = round(score.item() * 100, 1)
                    string = str(name) + ":" + str(conf) + "%"
                    font = cv2.FONT_HERSHEY_SIMPLEX
                    org = (result.boxes[i].x_offset + 2,
                           result.boxes[i].y_offset + 10)
                    fontScale = .3
                    text_thickness = 1
                    output = cv2.rectangle(output, start_point, end_point,
                                           color, box_thickness)
                    output = cv2.rectangle(output, start_point2, end_point2,
                                           color, -1)  # Text box
                    output = cv2.putText(output, string, org, font, fontScale,
                                         [0, 0, 0], text_thickness,
                                         cv2.LINE_AA, False)

                im_rgb = cv2.cvtColor(output, cv2.COLOR_BGR2RGB)
                im_msg = bridge.cv2_to_imgmsg(im_rgb, encoding="rgb8")

                ##### The entire goal of the below code is to get N random points on the mask in 3D
                ##### and publish on cloud samples topic for GPD
                item_ids = result_clsId
                idx = [i for i, e in enumerate(item_ids) if e > 0 and e < 4]
                numFoodItems = len(idx)

                mask = bridge.imgmsg_to_cv2(result_masks[idx[0]])
                coord = cv2.findNonZero(
                    mask)  # Coordinates of the mask that are on the food item

                # Pick 3 random points on the object mask
                sample_list = list()
                for ii in range(3):
                    point = Point()
                    x = random.choice(
                        coord[:, 0, 1])  # x and y reversed for some reason
                    y = random.choice(
                        coord[:, 0, 0])  # x and y reversed for some reason
                    depth = (run.depth_array[y, x]) / 1000
                    # Deproject pixels and depth to 3D coordinates (camera frame)
                    X, Y, Z = run.convert_depth_to_phys_coord_using_realsense(
                        y, x, depth, run.cam_info)
                    # print("(x,y,z) to convert: ("+str(y)+", "+str(x)+", "+str(depth)+")")
                    # print("(X,Y,Z) converted: ("+str(X)+", "+str(Y)+", "+str(Z)+")")
                    point.x = X
                    point.y = Y
                    point.z = Z
                    sample_list.append(point)

                # print(sample_list)

                cam_source = Int64()
                cam_source.data = 0

                cloud_source = CloudSources()
                cloud_source.cloud = run.pointCloud
                cloud_source.camera_source = [cam_source]
                view_point = Point()
                view_point.x = 0.640
                view_point.y = 0.828
                view_point.z = 0.505
                # view_point.x = 0; view_point.y = 0; view_point.z = 0
                cloud_source.view_points = [view_point]

                cloud_samples = CloudSamples()
                cloud_samples.cloud_sources = cloud_source
                cloud_samples.samples = sample_list

                # Print publish info
                # print(type(cloud_source.cloud))
                # print(cloud_source.camera_source)
                # print(cloud_source.view_points)
                # print("")
                # print(type(cloud_samples.cloud_sources))
                # print(cloud_samples.samples)
                # print("-------------------------\n")

            # Display Image Counter
            # image_counter = image_counter + 1
            # if (image_counter % 11) == 10:
            #     rospy.loginfo("Images detected per second=%.2f", float(image_counter) / (time.time() - start_time))

            run.publish(im_msg, result, cloud_samples)

    return 0