def getResult(predictions, classes): boxes = predictions.pred_boxes if predictions.has("pred_boxes") else None if predictions.has("pred_masks"): masks = np.asarray(predictions.pred_masks) #print(type(masks)) else: return result_msg = Result() # result_msg.header = self._header result_msg.class_ids = predictions.pred_classes if predictions.has( "pred_classes") else None result_msg.class_names = np.array(classes)[result_msg.class_ids.numpy()] result_msg.scores = predictions.scores if predictions.has( "scores") else None for i, (x1, y1, x2, y2) in enumerate(boxes): mask = np.zeros(masks[i].shape, dtype="uint8") mask[masks[i, :, :]] = 255 mask = br.cv2_to_imgmsg(mask) result_msg.masks.append(mask) box = RegionOfInterest() box.x_offset = np.uint32(x1) box.y_offset = np.uint32(y1) box.height = np.uint32(y2 - y1) box.width = np.uint32(x2 - x1) result_msg.boxes.append(box) return result_msg
def detections_callback(self, msg): # Receive detections and sort them according to y-offset temp = self.sort_detections(msg) # Reorganize back into Result() object type # TODO: Change the rest of the code to use the above organization (by object). It works well for now, it just might speed it up. self.detections = Result() for i in range(len(temp)): self.detections.class_ids.append(temp[i][0]) self.detections.class_names.append(temp[i][1]) self.detections.scores.append(temp[i][2]) self.detections.boxes.append(temp[i][3]) self.detections.masks.append(temp[i][4])
def main(): """ Mask RCNN Object Detection with Detectron2 """ rospy.init_node("mask_rcnn", anonymous=True) bridge = CvBridge() start_time = time.time() image_counter = 0 register_coco_instances( "train_set", {}, "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/train/annotations.json", "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/train") register_coco_instances( "test_set", {}, "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/test/annotations.json", "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/test") train_metadata = MetadataCatalog.get("train_set") print(train_metadata) dataset_dicts_train = DatasetCatalog.get("train_set") test_metadata = MetadataCatalog.get("test_set") print(test_metadata) dataset_dicts_test = DatasetCatalog.get("test_set") cfg = get_cfg() cfg.merge_from_file( model_zoo.get_config_file( "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) cfg.DATASETS.TRAIN = ("train_set") cfg.DATASETS.TEST = () # no metrics implemented for this dataset cfg.DATALOADER.NUM_WORKERS = 4 cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url( "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml" ) # initialize from model zoo cfg.SOLVER.IMS_PER_BATCH = 4 cfg.SOLVER.BASE_LR = 0.01 cfg.SOLVER.MAX_ITER = 1000 # 300 iterations seems good enough, but you can certainly train longer cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = ( 128) # faster, and good enough for this toy dataset cfg.MODEL.ROI_HEADS.NUM_CLASSES = 5 # 5 classes (Plate, Carrot, Celery, Pretzel, Gripper) # Temporary Solution. If I train again I think I can use the dynamically set path again cfg.MODEL.WEIGHTS = os.path.join( cfg.OUTPUT_DIR, "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/output/model_final.pth" ) # cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth") cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3 # set the testing threshold for this model cfg.DATASETS.TEST = ("test_set") predictor = DefaultPredictor(cfg) class_names = MetadataCatalog.get("train_set").thing_classes # Set up custom cv2 visualization parameters # Classes: [name, id] # - # [Plate, 0] # [Carrot, 1] # [Celery, 2] # [Pretzel, 3] # [Gripper, 4] # Colors = [blue, green, red] color_plate = [0, 255, 0] # green color_carrot = [255, 200, 0] # blue color_celery = [0, 0, 255] # red color_pretzel = [0, 220, 255] # yellow color_gripper = [204, 0, 150] # purple colors = list([ color_plate, color_carrot, color_celery, color_pretzel, color_gripper ]) alpha = .4 run = updateClasses() rospy.sleep(1) print("Running...") clicked = False while not rospy.is_shutdown(): # Get images img = run.get_img() # If no image, return to start of loop if img is None: continue # Get detections outputs = predictor(img) predictions = outputs["instances"].to("cpu") # Get results unsorted = run.getResult(predictions, class_names) # Sort detections by x and y offsets sorted = run.multisort(unsorted) # Reorganize back into Result() object type # TODO: Change the rest of the code to use the above organization (by object). It works well for now, it just might speed it up. result = Result() for i in range(len(sorted)): result.class_ids.append(sorted[i].id) result.class_names.append(sorted[i].name) result.scores.append(sorted[i].score) result.boxes.append(sorted[i].box) result.masks.append(sorted[i].mask) # If no detections, return to start of loop if result is None: continue result_cls = result.class_names result_clsId = result.class_ids result_scores = result.scores result_masks = result.masks # Create copies of the original image im = img.copy() output = img.copy() # Compute Masks im = run.compute_masks(im, result, colors) # Draw object masks on image cv2.addWeighted(im, alpha, output, 1 - alpha, 0, output) # Draw object bbox, class label, and score on image for i in range(len(result_clsId)): # Draw Bounding boxes start_point = (result.boxes[i].x_offset, result.boxes[i].y_offset) end_point = (result.boxes[i].x_offset + result.boxes[i].width, result.boxes[i].y_offset + result.boxes[i].height) start_point2 = (result.boxes[i].x_offset + 2, result.boxes[i].y_offset + 2) end_point2 = (result.boxes[i].x_offset + result.boxes[i].width - 2, result.boxes[i].y_offset + 12) # color = colors[result_clsId[i]] color = [0, 0, 0] box_thickness = 1 name = result_cls[i] score = result_scores[i] conf = round(score.item() * 100, 1) # Test strategy for updating model classes if score > .9: string = str(name) + ":" + str(conf) + "%" else: string = str(name) + "??? - " + str(conf) + "%" try: win = run.get_active_window() win_name = win.get_wm_name() win_bbox = run.get_window_bbox(win) # print(win_name, win_bbox) except Xlib.error.BadWindow: print("Window vanished") rel_cursor, click = run.get_rel_cursor(win_bbox, win_name) # if cursor is within object bbox and a click, do something if rel_cursor[0] is not None and rel_cursor[1] is not None: if rel_cursor[0] >= start_point[0] and rel_cursor[ 0] <= end_point[0] and rel_cursor[1] >= start_point[ 1] and rel_cursor[1] <= end_point[1]: color = [0, 220, 255] # yellow box_thickness = 2 contain_id = i else: color = [0, 0, 0] contain_id = None box_thickness = 1 if contain_id is not None and click == "Yes": print("Object " + str(i) + " (" + str(name) + ") was clicked!!!") rospy.sleep(.05) # prevent multiple clicks detected # centroids, avoid = run.compute_updates(output, contain_id, result) clicked = True selected_id = contain_id # Draw font = cv2.FONT_HERSHEY_SIMPLEX org = (result.boxes[i].x_offset + 2, result.boxes[i].y_offset + 10) fontScale = .3 text_thickness = 1 output = cv2.rectangle(output, start_point, end_point, color, box_thickness) output = cv2.rectangle(output, start_point2, end_point2, color, -1) # Text box output = cv2.putText(output, string, org, font, fontScale, [255, 255, 255], text_thickness, cv2.LINE_AA, False) if clicked: # Change selected bbox color to green output = cv2.rectangle(output, (result.boxes[selected_id].x_offset, result.boxes[selected_id].y_offset), \ (result.boxes[selected_id].x_offset + result.boxes[selected_id].width, result.boxes[selected_id].y_offset + result.boxes[selected_id].height), \ [0, 255, 0], 2) # Display options to the right of the bbox start_x = result.boxes[selected_id].x_offset + result.boxes[ selected_id].width + 20 start_y = result.boxes[selected_id].y_offset - 20 x = start_x y = start_y + 20 for i in range(len(class_names)): y += 20 output = cv2.rectangle(output, (x, y), (x + 40, y + 12), [255, 255, 255], -1) output = cv2.putText(output, class_names[i], (x + 2, y + 8), font, fontScale, [0, 0, 0], text_thickness, cv2.LINE_AA, False) # The below is potentially for a more robust way of displaying class update options # for i in range(len(centroids)): # if i == selected_id: # circle_color = [0, 0, 255] # else: # circle_color = [0, 255, 0] # output = cv2.circle(output, (int(centroids[i][0]), int(centroids[i][1])), 2, circle_color, 2) # output = cv2.circle(output, avoid, 2, [255, 0, 0], 2) # output = cv2.arrowedLine(output, avoid, (int(centroids[selected_id][0]), int(centroids[selected_id][1])), [255,0,0], 1) # y-axis (tag frame) im_rgb = cv2.cvtColor(output, cv2.COLOR_BGR2RGB) im_msg = bridge.cv2_to_imgmsg(im_rgb, encoding="rgb8") # # Display Image Counter # # image_counter = image_counter + 1 # # if (image_counter % 11) == 10: # # rospy.loginfo("Images detected per second=%.2f", float(image_counter) / (time.time() - start_time)) run.publish(im_msg, result) return 0
def main(): """ Mask RCNN Object Detection with Detectron2 """ rospy.init_node("mask_rcnn", anonymous=True) bridge = CvBridge() start_time = time.time() image_counter = 0 register_coco_instances( "train_set", {}, "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/train/annotations.json", "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/train") register_coco_instances( "test_set", {}, "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/test/annotations.json", "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/test") train_metadata = MetadataCatalog.get("train_set") print(train_metadata) dataset_dicts_train = DatasetCatalog.get("train_set") test_metadata = MetadataCatalog.get("test_set") print(test_metadata) dataset_dicts_test = DatasetCatalog.get("test_set") cfg = get_cfg() cfg.merge_from_file( model_zoo.get_config_file( "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) cfg.DATASETS.TRAIN = ("train_set") cfg.DATASETS.TEST = () # no metrics implemented for this dataset cfg.DATALOADER.NUM_WORKERS = 4 cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url( "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml" ) # initialize from model zoo cfg.SOLVER.IMS_PER_BATCH = 4 cfg.SOLVER.BASE_LR = 0.01 cfg.SOLVER.MAX_ITER = 1000 # 300 iterations seems good enough, but you can certainly train longer cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = ( 128) # faster, and good enough for this toy dataset cfg.MODEL.ROI_HEADS.NUM_CLASSES = 5 # 5 classes (Plate, Carrot, Celery, Pretzel, Gripper) # Temporary Solution. If I train again I think I can use the dynamically set path again cfg.MODEL.WEIGHTS = os.path.join( cfg.OUTPUT_DIR, "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/output/model_final.pth" ) # cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth") cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.4 # set the testing threshold for this model cfg.DATASETS.TEST = ("test_set") predictor = DefaultPredictor(cfg) class_names = MetadataCatalog.get("train_set").thing_classes # Set up custom cv2 visualization parameters # Classes: [name, id] # - # [Plate, 0] # [Carrot, 1] # [Celery, 2] # [Pretzel, 3] # [Gripper, 4] # Colors = [blue, green, red] color_plate = [0, 255, 0] # green color_carrot = [255, 200, 0] # blue color_celery = [0, 0, 255] # red color_pretzel = [0, 220, 255] # yellow color_gripper = [204, 0, 150] # purple colors = list([ color_plate, color_carrot, color_celery, color_pretzel, color_gripper ]) alpha = .4 run = maskRCNN() while not rospy.is_shutdown(): # Get images img = run.get_img() if img is not None: outputs = predictor(img) predictions = outputs["instances"].to("cpu") # Get results unsorted = run.getResult(predictions, class_names) # Sort detections by x and y sorted = run.sort_detections(unsorted) result = Result() for i in range(len(sorted)): result.class_ids.append(sorted[i][0]) result.class_names.append(sorted[i][1]) result.scores.append(sorted[i][2]) result.boxes.append(sorted[i][3]) result.masks.append(sorted[i][4]) # Visualize using detectron2 built in visualizer # v = Visualizer(im[:, :, ::-1], # metadata=train_metadata, # scale=1.0 # # instance_mode=ColorMode.IMAGE_BW # remove the colors of unsegmented pixels # ) # v = v.draw_instance_predictions(outputs["instances"].to("cpu")) # im = v.get_image()[:, :, ::-1] # im_msg = bridge.cv2_to_imgmsg(im, encoding="bgr8") # Visualize using custom cv2 code if result is not None: result_cls = result.class_names result_clsId = result.class_ids result_scores = result.scores result_masks = result.masks # Create copies of the original image im = img.copy() output = img.copy() # Initialize lists masks = [] masks_indices = [] for i in range(len(result_clsId)): # Obtain current object mask as a numpy array (black and white mask of single object) current_mask = bridge.imgmsg_to_cv2(result_masks[i]) # Find current mask indices mask_indices = np.where(current_mask == 255) # Add to mask indices list if len(masks_indices) > len(result_clsId): masks_indices = [] else: masks_indices.append(mask_indices) # Add to mask list if len(masks) > len(result_clsId): masks = [] else: masks.append(current_mask) if len(masks) > 0: # Create composite mask composite_mask = sum(masks) # Clip composite mask between 0 and 255 composite_mask = composite_mask.clip(0, 255) for i in range(len(result_clsId)): # Select correct object color color = colors[result_clsId[i]] # Change the color of the current mask object im[masks_indices[i][0], masks_indices[i][1], :] = color # Apply alpha scaling to image to adjust opacity cv2.addWeighted(im, alpha, output, 1 - alpha, 0, output) for i in range(len(result_clsId)): # Draw Bounding boxes start_point = (result.boxes[i].x_offset, result.boxes[i].y_offset) end_point = (result.boxes[i].x_offset + result.boxes[i].width, result.boxes[i].y_offset + result.boxes[i].height) start_point2 = (result.boxes[i].x_offset + 2, result.boxes[i].y_offset + 2) end_point2 = (result.boxes[i].x_offset + result.boxes[i].width - 2, result.boxes[i].y_offset + 12) color = colors[result_clsId[i]] box_thickness = 1 name = result_cls[i] score = result_scores[i] conf = round(score.item() * 100, 1) string = str(name) + ":" + str(conf) + "%" font = cv2.FONT_HERSHEY_SIMPLEX org = (result.boxes[i].x_offset + 2, result.boxes[i].y_offset + 10) fontScale = .3 text_thickness = 1 output = cv2.rectangle(output, start_point, end_point, color, box_thickness) output = cv2.rectangle(output, start_point2, end_point2, color, -1) # Text box output = cv2.putText(output, string, org, font, fontScale, [0, 0, 0], text_thickness, cv2.LINE_AA, False) im_rgb = cv2.cvtColor(output, cv2.COLOR_BGR2RGB) im_msg = bridge.cv2_to_imgmsg(im_rgb, encoding="rgb8") ##### The entire goal of the below code is to get N random points on the mask in 3D ##### and publish on cloud samples topic for GPD item_ids = result_clsId idx = [i for i, e in enumerate(item_ids) if e > 0 and e < 4] numFoodItems = len(idx) mask = bridge.imgmsg_to_cv2(result_masks[idx[0]]) coord = cv2.findNonZero( mask) # Coordinates of the mask that are on the food item # Pick 3 random points on the object mask sample_list = list() for ii in range(3): point = Point() x = random.choice( coord[:, 0, 1]) # x and y reversed for some reason y = random.choice( coord[:, 0, 0]) # x and y reversed for some reason depth = (run.depth_array[y, x]) / 1000 # Deproject pixels and depth to 3D coordinates (camera frame) X, Y, Z = run.convert_depth_to_phys_coord_using_realsense( y, x, depth, run.cam_info) # print("(x,y,z) to convert: ("+str(y)+", "+str(x)+", "+str(depth)+")") # print("(X,Y,Z) converted: ("+str(X)+", "+str(Y)+", "+str(Z)+")") point.x = X point.y = Y point.z = Z sample_list.append(point) # print(sample_list) cam_source = Int64() cam_source.data = 0 cloud_source = CloudSources() cloud_source.cloud = run.pointCloud cloud_source.camera_source = [cam_source] view_point = Point() view_point.x = 0.640 view_point.y = 0.828 view_point.z = 0.505 # view_point.x = 0; view_point.y = 0; view_point.z = 0 cloud_source.view_points = [view_point] cloud_samples = CloudSamples() cloud_samples.cloud_sources = cloud_source cloud_samples.samples = sample_list # Print publish info # print(type(cloud_source.cloud)) # print(cloud_source.camera_source) # print(cloud_source.view_points) # print("") # print(type(cloud_samples.cloud_sources)) # print(cloud_samples.samples) # print("-------------------------\n") # Display Image Counter # image_counter = image_counter + 1 # if (image_counter % 11) == 10: # rospy.loginfo("Images detected per second=%.2f", float(image_counter) / (time.time() - start_time)) run.publish(im_msg, result, cloud_samples) return 0