def viz(args): cfg.merge_from_file(args.config_file) cfg.merge_from_list(["TEST.IMS_PER_BATCH", "1", "MODEL.WEIGHT", args.weight_file]) coco_demo = COCODemo( cfg, min_image_size=cfg.INPUT.MIN_SIZE_TEST, confidence_threshold=float(args.confidence_threshold), ) VAL_DATA_DIR = '../datasets/coco/timeline/val' ANN_FILE = '../datasets/coco/timeline/val/coco_format.json' OUTPUT_DIR = '../output/' + args.config_file.split('/')[-1].split('.')[0] + '-' + args.weight_file.split('/')[-1].split('.')[0] if not os.path.exists(OUTPUT_DIR): os.mkdir(OUTPUT_DIR) gtDataset = coco.COCODataset(ANN_FILE, VAL_DATA_DIR, True) for image, target, idx in gtDataset: image = np.array(image) top_prediction = coco_demo.compute_prediction(image) top_prediction = coco_demo.select_top_predictions(top_prediction) masked_image = draw_on_image(image, top_prediction) target.add_field('mask', get_mask_array(target)) gt_image = draw_on_image(image, target) cat_img = np.concatenate([image, masked_image, gt_image], axis=1) imsave(cat_img, os.path.join(OUTPUT_DIR, '{}.png'.format(idx))) print('finish {}'.format(idx))
class MaskRCNN(object): def __init__(self, confidence_threshold=0.7): cfg.merge_from_file('e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml') cfg.MODEL.DEVICE cfg.freeze() self.model_wrapper = COCODemo( cfg, confidence_threshold=confidence_threshold, ) def get_chips_and_masks(self, img, label_index=COCO_PERSON_INDEX): ''' Params ------ img : nd array like, RGB label_index : int, index of label wanted Returns ------- list of tuple (chip, mask) - chip is a ndarray: bb crop of the image - mask is a ndarray: same shape as chip, whose 'pixel' value is either 0 or 1, indicating if that pixel belongs to that class or not. ''' preds = self.model_wrapper.compute_prediction(img) top_preds = self.model_wrapper.select_top_predictions(preds) labels = top_preds.get_field('labels') person_bool_mask = (labels==label_index).numpy().astype(bool) masks = top_preds.get_field('mask').numpy()[person_bool_mask] bboxes = top_preds.bbox.to(torch.int64).numpy()[person_bool_mask] results = [] for mask, box in zip( masks, bboxes ): thresh = mask[0, :, :, None] # l,t,r,b = box.to(torch.int64).numpy() l,t,r,b = box if b - t <= 0 or r - l <= 0: continue content = img[ t:(b+1), l:(r+1), : ] minimask = thresh[ t:(b+1), l:(r+1), : ] results.append( (content, minimask) ) return results
def detect_person(cfg, image): coco_demo = COCODemo( cfg, min_image_size=800, confidence_threshold=0.7, ) predictions = coco_demo.compute_prediction(image) top_predictions = coco_demo.select_top_predictions(predictions) #result = coco_demo.overlay_class_names(result, top_predictions) labels = top_predictions.get_field("labels").tolist() labels = [coco_demo.CATEGORIES[i] for i in labels] if 'person' in labels: return 1 else: return 0
def single_predict(): os.environ['CUDA_VISIBLE_DEVICES'] = '0' config_file = "configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml" # update the config options with the config file cfg.merge_from_file(config_file) # manual override some options #cfg.merge_from_list(["MODEL.DEVICE", "cpu"]) coco_demo = COCODemo( cfg, min_image_size=800, confidence_threshold=0.5, ) import cv2 from process_image import show_image, draw_bb im = cv2.imread('/home/jianfw/data/sample_images/TaylorSwift.jpg', cv2.IMREAD_COLOR) predictions = coco_demo.compute_prediction(im) predictions = coco_demo.select_top_predictions(predictions) scores = predictions.get_field("scores").tolist() labels = predictions.get_field("labels").tolist() labels = [coco_demo.CATEGORIES[i] for i in labels] boxes = predictions.bbox rects = [] import torch for box, score, label in zip(boxes, scores, labels): box = box.to(torch.int64) top_left, bottom_right = box[:2].tolist(), box[2:].tolist() r = [top_left[0], top_left[1], bottom_right[0], bottom_right[1]] rect = {'class': label, 'conf': score, 'rect': r} rects.append(rect) import numpy as np im_mask = np.copy(im) draw_bb(im_mask, [r['rect'] for r in rects], [r['class'] for r in rects], [r['conf'] for r in rects]) import json from process_image import show_images show_images([im, im_mask], 1, 2)
class MaskRcnnRos(): #RosCppCommunicator): def __init__( self, config_path=os.path.dirname(__file__) + "/configs/caffe2/e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml"): self.model_config_path = config_path cfg.merge_from_file(self.model_config_path) cfg.merge_from_list([]) cfg.freeze() self._greyscale_palette = (2 * 25 - 1) # prepare object that handles inference plus adds predictions on top of image self.coco_demo = COCODemo(cfg, confidence_threshold=0.8, show_mask_heatmaps=False, masks_per_dim=1, min_image_size=800) self._greyscale_colours = self._generate_grayscale_values() self.mask_rcnn_service = rospy.Service("MaskRcnnService", MaskRcnn, self.mask_rcnn_service_callback) print("Ready to receive mask rcnn service calls.") self.mask_rcnn_publisher = rospy.Publisher('mask_rcnn_img', Image, queue_size=10) @torch.no_grad() def mask_rcnn_service_callback(self, req): response = MaskRcnnResponse() input_image = ros_numpy.numpify(req.input_image) response_image, labels, label_indexs = self.analyse_image(input_image) display_image = response_image * 48 output_image_msg = ros_numpy.msgify(Image, response_image, encoding='mono8') display_image_msg = ros_numpy.msgify(Image, display_image, encoding='mono8') self.mask_rcnn_publisher.publish(display_image_msg) response.success = True response.output_mask = output_image_msg response.labels = labels response.label_indexs = label_indexs del response_image del labels del label_indexs return response @torch.no_grad() def analyse_image(self, image): predictions = self.coco_demo.compute_prediction(image) top_predictions = self.coco_demo.select_top_predictions(predictions) return self.create_pixel_masks(image, top_predictions) def create_pixel_masks(self, image, predictions): """ Adds the instances contours for each predicted object. Each label has a different color. Arguments: image (np.ndarray): an image as returned by OpenCV predictions (BoxList): the result of the computation by the model. It should contain the field `mask` and `labels`. """ width = image.shape[0] height = image.shape[1] blank_mask = np.zeros((width, height), np.uint8) if predictions is None: return blank_mask, [], [] masks = predictions.get_field("mask") label_indexs = predictions.get_field("labels").numpy() labels = self.convert_label_index_to_string(label_indexs) # colours = self.get_greyscale_colours(label_indexs) if masks.ndim < 3: masks = np.expand_dims(masks, axis=0) masks = np.expand_dims(masks, axis=0) #TODO: make sure there is a boarder around each mask so that they are definetely considered #separate objects for mask, semantic_index in zip(masks, label_indexs): thresh = mask[0, :, :].astype(np.uint8) * semantic_index # print(mask.shape) # thresh = mask.astype(np.uint8) * colour # print(thresh) blank_mask += thresh # contours, hierarchy = cv2_util.findContours( # thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE # ) # # contours = contours[0] if len(contours) == 2 else contours[1] # image = cv2.drawContours(image, contours, -1, color, 3) composite = blank_mask return composite, labels, label_indexs def convert_label_index_to_string(self, labels): return [self.coco_demo.CATEGORIES[i] for i in labels] def get_single_label_from_index(self, label): return self.coco_demo.CATEGORIES[label] def get_greyscale_colours(self, label_index): return self._greyscale_colours[label_index] def _generate_grayscale_values(self): """[Generates n number of distinct values between 1 and 255 for each label. This should be used for visualisation purposes only as VDOSLAM just needs a distinct value] Returns: [List]: [List of values] """ numer_of_cats = len(self.coco_demo.CATEGORIES) categories_index = np.linspace(0, numer_of_cats, numer_of_cats + 1) colors = np.array(categories_index) * self._greyscale_palette colors = (colors % 255).astype("uint8") print(type(colors)) return colors
cfg, confidence_threshold=args.confidence_threshold, show_mask_heatmaps=args.show_mask_heatmaps, masks_per_dim=args.masks_per_dim, # min_image_size=args.min_image_size, ) cam = cv2.VideoCapture(0) cv2_win = 'MASKRCNN (COCO)' cv2.namedWindow(cv2_win, cv2.WINDOW_NORMAL) while True: ret, img = cam.read() preds = coco_demo.compute_prediction(img) top_preds = coco_demo.select_top_predictions(preds) masks = top_predictions.get_field('mask') labels = top_predictions.get_field('labels') bboxes = top_predictions.bbox person_bool_mask = (labels == COCO_PERSON_INDEX).numpy().astype(bool) img_show = deepcopy(img) cv2.imshow(cv2_win, img_show) if cv2.waitKey(1) == ord('q'): break # esc to quit cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default= "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--ckpt", help= "The path to the checkpoint for test, default is the latest checkpoint.", default=None, ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() config_file = args.config_file #"../configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml" # update the config options with the config file cfg.merge_from_file(config_file) # manual override some options cfg.merge_from_list(["MODEL.DEVICE", "cpu"]) cfg.merge_from_list(args.opts) cfg.freeze() for conf_thresh in [0.1, 0.3, 0.5, 0.7, 0.9]: coco_demo = COCODemo( cfg, min_image_size=800, confidence_threshold=conf_thresh, ) paths_catalog = import_file("maskrcnn_benchmark.config.paths_catalog", cfg.PATHS_CATALOG, True) DatasetCatalog = paths_catalog.DatasetCatalog for dataset_name in cfg.DATASETS.TEST: print(dataset_name) dataset = DatasetCatalog.get(dataset_name) # print(dataset) # print(len(dataset)) print(dataset) dataset = FolderDataset(dataset['args']['data_dir'], dataset['args']['split']) COCODemo.CATEGORIES = dataset.CLASSES for image, target, index in tqdm(dataset): image_name = dataset.img_files[index].split("/")[-1] image = np.array(image) all_labels = [ coco_demo.CATEGORIES[i] for i in target.get_field("labels").tolist() ] if len(all_labels) > 1: print(all_labels) ### GROND TRUTH result = image.copy() if coco_demo.show_mask_heatmaps: return coco_demo.create_mask_montage(result, target) result = coco_demo.overlay_boxes(result, target) # result = coco_demo.overlay_boxes(result, target) if coco_demo.cfg.MODEL.MASK_ON: result = coco_demo.overlay_mask(result, target) if coco_demo.cfg.MODEL.KEYPOINT_ON: result = coco_demo.overlay_keypoints(result, target) # result = coco_demo.overlay_class_names(result, top_predictions) labels = [ coco_demo.CATEGORIES[i] for i in target.get_field("labels").tolist() ] boxes = target.bbox for box, label in zip(boxes, labels): x, y = box[:2] cv2.putText(result, label, (x, y), cv2.FONT_HERSHEY_SIMPLEX, 5, (255, 255, 255), 1) result = Image.fromarray(result) for label_GT in all_labels: if ".tif" in image_name: out = os.path.join( cfg.OUTPUT_DIR, f"inference_{conf_thresh}", dataset_name, label_GT, image_name.replace(".tif", "_GT.tif")) if ".jpg" in image_name: out = os.path.join( cfg.OUTPUT_DIR, f"inference_{conf_thresh}", dataset_name, label_GT, image_name.replace(".jpg", "_GT.jpg")) if ".JPG" in image_name: out = os.path.join( cfg.OUTPUT_DIR, f"inference_{conf_thresh}", dataset_name, label_GT, image_name.replace(".JPG", "_GT.JPG")) os.makedirs(os.path.dirname(out), exist_ok=True) # result.save(out) if not os.path.exists(out): result.save(out) ### PREDICTION predictions = coco_demo.compute_prediction(image) top_predictions = coco_demo.select_top_predictions(predictions) # print(top_predictions) result = image.copy() if coco_demo.show_mask_heatmaps: return coco_demo.create_mask_montage( result, top_predictions) result = coco_demo.overlay_boxes(result, top_predictions) # result = coco_demo.overlay_boxes(result, target) if coco_demo.cfg.MODEL.MASK_ON: result = coco_demo.overlay_mask(result, top_predictions) if coco_demo.cfg.MODEL.KEYPOINT_ON: result = coco_demo.overlay_keypoints( result, top_predictions) result = coco_demo.overlay_class_names(result, top_predictions) result = Image.fromarray(result) for label_GT in all_labels: out = os.path.join(cfg.OUTPUT_DIR, f"inference_{conf_thresh}", dataset_name, label_GT, image_name) os.makedirs(os.path.dirname(out), exist_ok=True) if not os.path.exists(out): result.save(out) ### PREDICTION BEST only # predictions = coco_demo.compute_prediction(image) top_predictions = coco_demo.select_top_predictions( predictions, best_only=True) # print(top_predictions) result = image.copy() if coco_demo.show_mask_heatmaps: return coco_demo.create_mask_montage( result, top_predictions) result = coco_demo.overlay_boxes(result, top_predictions) # result = coco_demo.overlay_boxes(result, target) if coco_demo.cfg.MODEL.MASK_ON: result = coco_demo.overlay_mask(result, top_predictions) if coco_demo.cfg.MODEL.KEYPOINT_ON: result = coco_demo.overlay_keypoints( result, top_predictions) result = coco_demo.overlay_class_names(result, top_predictions) result = Image.fromarray(result) for label_GT in all_labels: if ".tif" in image_name: out = os.path.join( cfg.OUTPUT_DIR, f"inference_{conf_thresh}", dataset_name, label_GT, image_name.replace(".tif", "_best.tif")) if ".jpg" in image_name: out = os.path.join( cfg.OUTPUT_DIR, f"inference_{conf_thresh}", dataset_name, label_GT, image_name.replace(".jpg", "_best.jpg")) if ".JPG" in image_name: out = os.path.join( cfg.OUTPUT_DIR, f"inference_{conf_thresh}", dataset_name, label_GT, image_name.replace(".JPG", "_best.JPG")) os.makedirs(os.path.dirname(out), exist_ok=True) if not os.path.exists(out): result.save(out)
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Webcam Demo") parser.add_argument( "--config-file", default="configs/caffe2/e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument( "--confidence-threshold", type=float, default=0.6, help="Minimum score for the prediction to be shown", ) parser.add_argument( "--min-image-size", type=int, default=256, help="Smallest size of the image to feed to the model. " "Model was trained with 800, which gives best results", ) parser.add_argument( "--show-mask-heatmaps", dest="show_mask_heatmaps", help="Show a heatmap probability for the top masks-per-dim masks", action="store_true", ) parser.add_argument( "--masks-per-dim", type=int, default=2, help="Number of heatmaps per dimension to show", ) parser.add_argument( "opts", help="Modify model config options using the command-line", default=None, nargs=argparse.REMAINDER, ) parser.add_argument("--svo-filename", help="Optional SVO input filepath", default=None) args = parser.parse_args() # load config from file and command-line arguments cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() # prepare object that handles inference plus adds predictions on top of image coco_demo = COCODemo( cfg, confidence_threshold=args.confidence_threshold, show_mask_heatmaps=args.show_mask_heatmaps, masks_per_dim=args.masks_per_dim, min_image_size=args.min_image_size, ) init_cap_params = sl.InitParameters() if args.svo_filename: print("Loading SVO file " + args.svo_filename) init_cap_params.set_from_svo_file(args.svo_filename) init_cap_params.svo_real_time_mode = True init_cap_params.camera_resolution = sl.RESOLUTION.HD720 init_cap_params.depth_mode = sl.DEPTH_MODE.ULTRA init_cap_params.coordinate_units = sl.UNIT.METER init_cap_params.depth_stabilization = True init_cap_params.camera_image_flip = sl.FLIP_MODE.AUTO init_cap_params.coordinate_system = sl.COORDINATE_SYSTEM.RIGHT_HANDED_Y_UP cap = sl.Camera() if not cap.is_opened(): print("Opening ZED Camera...") status = cap.open(init_cap_params) if status != sl.ERROR_CODE.SUCCESS: print(repr(status)) exit() display = True runtime = sl.RuntimeParameters() left = sl.Mat() ptcloud = sl.Mat() depth_img = sl.Mat() depth = sl.Mat() res = sl.Resolution(1280, 720) py_transform = sl.Transform( ) # First create a Transform object for TrackingParameters object tracking_parameters = sl.PositionalTrackingParameters( init_pos=py_transform) tracking_parameters.set_as_static = True err = cap.enable_positional_tracking(tracking_parameters) if err != sl.ERROR_CODE.SUCCESS: exit(1) running = True keep_people_only = True if coco_demo.cfg.MODEL.MASK_ON: print("Mask enabled!") if coco_demo.cfg.MODEL.KEYPOINT_ON: print("Keypoints enabled!") while running: start_time = time.time() err_code = cap.grab(runtime) if err_code != sl.ERROR_CODE.SUCCESS: break cap.retrieve_image(left, sl.VIEW.LEFT, resolution=res) cap.retrieve_image(depth_img, sl.VIEW.DEPTH, resolution=res) cap.retrieve_measure(depth, sl.MEASURE.DEPTH, resolution=res) cap.retrieve_measure(ptcloud, sl.MEASURE.XYZ, resolution=res) ptcloud_np = np.array(ptcloud.get_data()) img = cv2.cvtColor(left.get_data(), cv2.COLOR_RGBA2RGB) prediction = coco_demo.select_top_predictions( coco_demo.compute_prediction(img)) # Keep people only if keep_people_only: labels_tmp = prediction.get_field("labels") people_coco_label = 1 keep = torch.nonzero(labels_tmp == people_coco_label).squeeze(1) prediction = prediction[keep] composite = img.copy() humans_3d = None masks_3d = None if coco_demo.show_mask_heatmaps: composite = coco_demo.create_mask_montage(composite, prediction) composite = coco_demo.overlay_boxes(composite, prediction) if coco_demo.cfg.MODEL.MASK_ON: masks_3d = get_masks3d(prediction, depth) composite = coco_demo.overlay_mask(composite, prediction) if coco_demo.cfg.MODEL.KEYPOINT_ON: # Extract 3D skeleton from the ZED depth humans_3d = get_humans3d(prediction, ptcloud_np) composite = coco_demo.overlay_keypoints(composite, prediction) if True: overlay_distances(prediction, get_boxes3d(prediction, ptcloud_np), composite, humans_3d, masks_3d) composite = coco_demo.overlay_class_names(composite, prediction) print(" Time: {:.2f} s".format(time.time() - start_time)) if display: cv2.imshow("COCO detections", composite) cv2.imshow("ZED Depth", depth_img.get_data()) key = cv2.waitKey(10) if key == 27: break # esc to quit