def handle_boxes_img(raw_image, predictor, obj_box, sub_box, union_box): boxes_all = torch.cat((obj_box, sub_box, union_box)) boxes_all = Boxes(boxes_all.cuda()) with torch.no_grad(): raw_height, raw_width = raw_image.shape[:2] # Preprocessing image = predictor.transform_gen.get_transform(raw_image).apply_image(raw_image) new_height, new_width = image.shape[:2] scale_x = 1. * new_width / raw_width scale_y = 1. * new_height / raw_height # print(scale_x, scale_y) boxes_all_copy = boxes_all.clone() boxes_all_copy.scale(scale_x=scale_x, scale_y=scale_y) # obj_box_copy = obj_box.clone() # sub_box_copy = sub_box.clone() # union_box_copy = union_box.clone() # # obj_box_copy.scale(scale_x=scale_x, scale_y=scale_y) # sub_box_copy.scale(scale_x=scale_x, scale_y=scale_y) # union_box_copy.scale(scale_x=scale_x, scale_y=scale_y) # boxes = raw_boxes.clone() # boxes.scale(scale_x=scale_x, scale_y=scale_y) image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) return image, boxes_all
def test_mock_rcnn_inference(self): image_size = (1920, 1080) resize_size = (398, 224) scale_xy = (1080.0 / 224, 1920.0 / 398) gt_boxes = Boxes(torch.Tensor([[50, 40, 100, 80], [150, 60, 200, 120]])) gt_kpts = torch.Tensor([75, 60, 1.0] * 21 + [175, 90, 1.0] * 21).reshape(2, 21, 3) # create inputs batched_inputs = rh.get_batched_inputs(2, image_size, resize_size, gt_boxes) # create model model = rh.MockRCNNInference(image_size, resize_size) # run without post processing det_instances = model(batched_inputs, None, do_postprocess=False) self.assertArrayAllClose( det_instances[0].pred_boxes.tensor, gt_boxes.tensor, atol=1e-4, ) self.assertArrayAllClose( det_instances[0].pred_keypoints, gt_kpts, atol=1e-4, ) # run with post processing det_instances = model(batched_inputs, None, do_postprocess=True) gt_boxes_scaled = gt_boxes.clone() gt_boxes_scaled.scale(*scale_xy) gt_kpts_scaled = torch.Tensor( [75 * scale_xy[0], 60 * scale_xy[1], 1.0] * 21 + [175 * scale_xy[0], 90 * scale_xy[1], 1.0] * 21).reshape(2, 21, 3) self.assertArrayAllClose( det_instances[0]["instances"].pred_boxes.tensor, gt_boxes_scaled.tensor, atol=1e-4, ) self.assertArrayAllClose( det_instances[0]["instances"].pred_keypoints, gt_kpts_scaled, atol=1e-4, )
def doit_boxes(raw_image, predictor, raw_boxes): raw_boxes = Boxes(torch.from_numpy(np.asarray(raw_boxes)).cuda()) with torch.no_grad(): raw_height, raw_width = raw_image.shape[:2] # print("Original image size: ", (raw_height, raw_width)) # Preprocessing image = predictor.transform_gen.get_transform(raw_image).apply_image(raw_image) # print("Transformed image size: ", image.shape[:2]) new_height, new_width = image.shape[:2] scale_x = 1. * new_width / raw_width scale_y = 1. * new_height / raw_height # print(scale_x, scale_y) boxes = raw_boxes.clone() boxes.scale(scale_x=scale_x, scale_y=scale_y) # --- image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) inputs = [{"image": image, "height": raw_height, "width": raw_width}] images = predictor.model.preprocess_image(inputs) # Run Backbone Res1-Res4 features = predictor.model.backbone(images.tensor) # print('features:', features['res4'].shape) # Generate proposals with RPN proposal_boxes = [boxes] features = [features[f] for f in predictor.model.roi_heads.in_features] box_features = predictor.model.roi_heads._shared_roi_transform( features, proposal_boxes ) feature_pooled = box_features.mean(dim=[2, 3]) # pooled to 1x1 # print('Pooled features size:', feature_pooled.shape) return feature_pooled.to('cpu').numpy()
def extract_features(args, detector, raw_images, given_boxes=None): with torch.no_grad(): inputs = [] for raw_image in raw_images: image = detector.transform_gen.get_transform(raw_image).apply_image(raw_image) image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) inputs.append({"image": image, "height": raw_image.shape[0], "width": raw_image.shape[1]}) images = detector.model.preprocess_image(inputs) # Run Backbone Res1-Res4 features = detector.model.backbone(images.tensor) # Feature extraction given the bounding boxes if given_boxes: # Process Boxes in batch mode proposal_boxes = [] original_boxes = [] box_ids = [] for i, boxes_data in enumerate(given_boxes): boxes = [] curr_box_ids = [] for bid, bbox in boxes_data: boxes.append(bbox) curr_box_ids.append(bid) raw_boxes = Boxes(torch.tensor(boxes, device=images.tensor.device)) raw_image = raw_images[i] # Remember that raw_image has shape [height, width, color_channel] raw_height, raw_width = raw_image.shape[:2] # Remember that images[i] has shape [color_channel, height, width] new_height, new_width = images[i].shape[1:] # Scale the box scale_x = 1. * new_width / raw_width scale_y = 1. * new_height / raw_height boxes = raw_boxes.clone() boxes.scale(scale_x=scale_x, scale_y=scale_y) proposal_boxes.append(boxes) original_boxes.append(raw_boxes) box_ids.append(curr_box_ids) features = [features[f] for f in detector.model.roi_heads.in_features] box_features = detector.model.roi_heads._shared_roi_transform( features, proposal_boxes ) feature_pooled = box_features.mean(dim=[2, 3]) # pooled to 1x1 # Predict classes and boxes for each proposal. pred_class_logits, pred_proposal_deltas = detector.model.roi_heads.box_predictor(feature_pooled) pred_class_prob = torch.softmax(pred_class_logits, -1) # we reset the background class that we will ignore later on pred_class_prob[:, -1] = 0.0 roi_features = feature_pooled outputs = [] total_boxes = 0 # roi_features.shape = (num_total_boxes, 2048) # we need to group the boxes by image id for batch_idx, raw_image in enumerate(raw_images): indexes = slice(total_boxes, total_boxes + len(given_boxes[batch_idx])) instances = Instances( image_size=raw_image.shape[:2], pred_boxes=original_boxes[batch_idx], scores=pred_class_prob[indexes], features=roi_features[indexes], box_ids=box_ids[batch_idx] ) outputs.append(instances) total_boxes += len(given_boxes[batch_idx]) return outputs # Feature extraction without bounding boxes # Generate proposals with RPN proposals, _ = detector.model.proposal_generator(images, features, None) # Run RoI head for each proposal (RoI Pooling + Res5) proposal_boxes = [x.proposal_boxes for x in proposals] features = [features[f] for f in detector.model.roi_heads.in_features] box_features = detector.model.roi_heads._shared_roi_transform( features, proposal_boxes ) feature_pooled = box_features.mean(dim=[2, 3]) # (sum_proposals, 2048), pooled to 1x1 # Predict classes and boxes for each proposal. pred_class_logits, pred_proposal_deltas = detector.model.roi_heads.box_predictor(feature_pooled) rcnn_outputs = FastRCNNOutputs( detector.model.roi_heads.box2box_transform, pred_class_logits, pred_proposal_deltas, proposals, detector.model.roi_heads.smooth_l1_beta, ) # Fixed-number NMS instances_list, ids_list = [], [] probs_list = rcnn_outputs.predict_probs() boxes_list = rcnn_outputs.predict_boxes() for probs, boxes, image_size in zip(probs_list, boxes_list, images.image_sizes): for nms_thresh in np.arange(0.5, 1.0, 0.1): instances, ids = fast_rcnn_inference_single_image( boxes, probs, image_size, nms_thresh=nms_thresh, topk_per_image=args.max_boxes ) if len(ids) >= args.min_boxes: break instances_list.append(instances) ids_list.append(ids) # Post processing for features features_list = feature_pooled.split( rcnn_outputs.num_preds_per_image) # (sum_proposals, 2048) --> [(p1, 2048), (p2, 2048), ..., (pn, 2048)] roi_features_list = [] for ids, features in zip(ids_list, features_list): roi_features_list.append(features[ids].detach()) # Post processing for bounding boxes (rescale to raw_image) raw_instances_list = [] for batch_idx, (instances, input_per_image, image_size) in enumerate(zip( instances_list, inputs, images.image_sizes )): height = input_per_image.get("height", image_size[0]) width = input_per_image.get("width", image_size[1]) raw_instances, nonempty = detector_postprocess(instances, height, width) raw_instances.features = roi_features_list[batch_idx][nonempty] raw_instances_list.append(raw_instances) return raw_instances_list