def doit(detector, raw_images): with torch.no_grad(): # Preprocessing inputs = [] for raw_image in raw_images: image = detector.transform_gen.get_transform( raw_image).apply_image(raw_image) image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) inputs.append({ "image": image, "height": raw_image.shape[0], "width": raw_image.shape[1] }) images = detector.model.preprocess_image(inputs) # Run Backbone Res1-Res4 features = detector.model.backbone(images.tensor) # Generate proposals with RPN proposals, _ = detector.model.proposal_generator( images, features, None) # Run RoI head for each proposal (RoI Pooling + Res5) proposal_boxes = [x.proposal_boxes for x in proposals] features = [features[f] for f in detector.model.roi_heads.in_features] box_features = detector.model.roi_heads._shared_roi_transform( features, proposal_boxes) feature_pooled = box_features.mean( dim=[2, 3]) # (sum_proposals, 2048), pooled to 1x1 # Predict classes and boxes for each proposal. pred_class_logits, pred_proposal_deltas = detector.model.roi_heads.box_predictor( feature_pooled) rcnn_outputs = FastRCNNOutputs( detector.model.roi_heads.box2box_transform, pred_class_logits, pred_proposal_deltas, proposals, detector.model.roi_heads.smooth_l1_beta, ) # Fixed-number NMS instances_list, ids_list = [], [] probs_list = rcnn_outputs.predict_probs() boxes_list = rcnn_outputs.predict_boxes() for probs, boxes, image_size in zip(probs_list, boxes_list, images.image_sizes): for nms_thresh in np.arange(0.3, 1.0, 0.1): instances, ids = fast_rcnn_inference_single_image( boxes, probs, image_size, score_thresh=0.2, nms_thresh=nms_thresh, topk_per_image=MAX_BOXES) if len(ids) >= MIN_BOXES: break instances_list.append(instances) ids_list.append(ids) # Post processing for features features_list = feature_pooled.split( rcnn_outputs.num_preds_per_image ) # (sum_proposals, 2048) --> [(p1, 2048), (p2, 2048), ..., (pn, 2048)] roi_features_list = [] for ids, features in zip(ids_list, features_list): roi_features_list.append(features[ids].detach()) # Post processing for bounding boxes (rescale to raw_image) raw_instances_list = [] for instances, input_per_image, image_size in zip( instances_list, inputs, images.image_sizes): height = input_per_image.get("height", image_size[0]) width = input_per_image.get("width", image_size[1]) raw_instances = detector_postprocess(instances, height, width) raw_instances_list.append(raw_instances) return raw_instances_list, roi_features_list
def get_image_features(img_paths, folder): with torch.no_grad( ): # https://github.com/sphinx-doc/sphinx/issues/4258 inputs = [] for i in range(len(img_paths)): im = cv2.imread(img_paths[i]) original_image = im transform_gen = T.ResizeShortestEdge( [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST) # inputs = {"image": image, "height": height, "width": width} # predictions = model.backbone(torch.tensor([image])) height, width = original_image.shape[:2] image = transform_gen.get_transform( original_image).apply_image(original_image) image = torch.as_tensor( image.astype("float32").transpose(2, 0, 1)) inputs.append({ "image": image, "height": height, "width": width }) images = image_model.preprocess_image(inputs) predictions = image_model.backbone(images.tensor.cuda()) proposals, _ = image_model.proposal_generator( images, predictions, None) # pred_instances = model.roi_heads(123123, 12412, 14212, None) # features = [ predictions[f] for f in image_model.roi_heads.in_features ] head_outputs = [] image_sizes = [x.image_size for x in proposals] for k in range(image_model.roi_heads.num_cascade_stages): if k > 0: # The output boxes of the previous stage are the input proposals of the next stage proposals = image_model.roi_heads._create_proposals_from_boxes( head_outputs[-1].predict_boxes(), image_sizes) box_features = image_model.roi_heads.box_pooler( features, [x.proposal_boxes for x in proposals]) box_features = _ScaleGradient.apply( box_features, 1.0 / image_model.roi_heads.num_cascade_stages) box_features = image_model.roi_heads.box_head[k](box_features) pred_class_logits, pred_proposal_deltas = image_model.roi_heads.box_predictor[ k](box_features) outputs = FastRCNNOutputs( image_model.roi_heads.box2box_transform[k], pred_class_logits, pred_proposal_deltas, proposals, image_model.roi_heads.smooth_l1_beta, ) head_outputs.append(outputs) scores_per_stage = [h.predict_probs() for h in head_outputs] scores = [ sum(list(scores_per_image)) * (1.0 / image_model.roi_heads.num_cascade_stages) for scores_per_image in zip(*scores_per_stage) ] boxes = head_outputs[-1].predict_boxes() pred_instances = fast_rcnn_inference( boxes, scores, image_sizes, image_model.roi_heads.test_score_thresh, image_model.roi_heads.test_nms_thresh, image_model.roi_heads.test_detections_per_img, box_features) outputs = GeneralizedRCNN._postprocess( pred_instances[0], inputs, images.image_sizes), pred_instances[2] for path, instance, feats in zip(img_paths, outputs[0], outputs[1]): d = {} d['boxes'] = instance['instances'].pred_boxes.tensor.cpu().numpy() d['x'] = feats.cpu().numpy() np.savez_compressed(folder + path.split('/')[-1] + '.npz', d)
def doit(raw_image, predictor): NUM_OBJECTS = 36 with torch.no_grad(): raw_height, raw_width = raw_image.shape[:2] # print("Original image size: ", (raw_height, raw_width)) # Preprocessing image = predictor.transform_gen.get_transform(raw_image).apply_image(raw_image) # print("Transformed image size: ", image.shape[:2]) image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) inputs = [{"image": image, "height": raw_height, "width": raw_width}] images = predictor.model.preprocess_image(inputs) # Run Backbone Res1-Res4 features = predictor.model.backbone(images.tensor) # Generate proposals with RPN proposals, _ = predictor.model.proposal_generator(images, features, None) proposal = proposals[0] # print('Proposal Boxes size:', proposal.proposal_boxes.tensor.shape) # Run RoI head for each proposal (RoI Pooling + Res5) proposal_boxes = [x.proposal_boxes for x in proposals] features = [features[f] for f in predictor.model.roi_heads.in_features] box_features = predictor.model.roi_heads._shared_roi_transform( features, proposal_boxes ) feature_pooled = box_features.mean(dim=[2, 3]) # pooled to 1x1 # print('Pooled features size:', feature_pooled.shape) # Predict classes and boxes for each proposal. pred_class_logits, pred_attr_logits, pred_proposal_deltas = predictor.model.roi_heads.box_predictor(feature_pooled) outputs = FastRCNNOutputs( predictor.model.roi_heads.box2box_transform, pred_class_logits, pred_proposal_deltas, proposals, predictor.model.roi_heads.smooth_l1_beta, ) probs = outputs.predict_probs()[0] boxes = outputs.predict_boxes()[0] #add for test #print(outputs.predict_boxes()) #print(outputs.predict_boxes()[0].shape) #add for test attr_prob = pred_attr_logits[..., :-1].softmax(-1) max_attr_prob, max_attr_label = attr_prob.max(-1) # Note: BUTD uses raw RoI predictions, # we use the predicted boxes instead. # boxes = proposal_boxes[0].tensor # NMS for nms_thresh in np.arange(0.5, 1.0, 0.1): instances, ids = fast_rcnn_inference_single_image( boxes, probs, image.shape[1:], score_thresh=0.2, nms_thresh=nms_thresh, topk_per_image=NUM_OBJECTS ) if len(ids) == NUM_OBJECTS: break instances = detector_postprocess(instances, raw_height, raw_width) roi_features = feature_pooled[ids].detach() # max_attr_prob = max_attr_prob[ids].detach() # max_attr_label = max_attr_label[ids].detach() # instances.attr_scores = max_attr_prob # instances.attr_classes = max_attr_label img_bboxes = instances.get_fields()['pred_boxes'].tensor.cpu().numpy() roi_features = roi_features.cpu().numpy() #print(instances) return img_bboxes, roi_features
proposal = proposals[0] print('Proposal Boxes size:', proposal.proposal_boxes.tensor.shape) # Run RoI head for each proposal (RoI Pooling + Res5) proposal_boxes = [x.proposal_boxes for x in proposals] features = [features[f] for f in predictor.model.roi_heads.in_features] box_features = predictor.model.roi_heads._shared_roi_transform( features, proposal_boxes) feature_pooled = box_features.mean(dim=[2, 3]) # pooled to 1x1 print('Pooled features size:', feature_pooled.shape) # Predict classes and boxes for each proposal. pred_class_logits, pred_attr_logits, pred_proposal_deltas = predictor.model.roi_heads.box_predictor( feature_pooled) outputs = FastRCNNOutputs(predictor.model.roi_heads.box2box_transform, pred_class_logits, pred_proposal_deltas, proposals, predictor.model.roi_heads.smooth_l1_beta) probs = outputs.predict_probs()[0] boxes = outputs.predict_boxes()[0] attr_prob = pred_attr_logits[..., :-1].softmax(-1) max_attr_prob, max_attr_label = attr_prob.max(-1) # Note: BUTD uses raw RoI predictions, we use the predicted boxes instead. # boxes = proposal_boxes[0].tensor # NMS for nms_thresh in np.arange(0.5, 0.7, 0.1): instances, ids = fast_rcnn_inference_single_image( boxes, probs,
def doit(self, raw_image, autocasting=False): raw_image = self.get_cv2_image(raw_image) from detectron2.modeling.postprocessing import detector_postprocess from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputs, fast_rcnn_inference_single_image predictor = self.predictor with torch.no_grad(): NUM_OBJECTS = 36 raw_height, raw_width = raw_image.shape[:2] image = predictor.transform_gen.get_transform( raw_image).apply_image(raw_image) image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) inputs = [{ "image": image, "height": raw_height, "width": raw_width }] images = predictor.model.preprocess_image(inputs) # Run Backbone Res1-Res4 features = predictor.model.backbone(images.tensor) # Generate proposals with RPN proposals, _ = predictor.model.proposal_generator( images, features, None) proposal = proposals[0] proposal_boxes = [x.proposal_boxes for x in proposals] features = [ features[f] for f in predictor.model.roi_heads.in_features ] if autocasting: # Half precision casting for fp16 for box in proposal_boxes: box.tensor = box.tensor.type(torch.cuda.HalfTensor) features = [f.type(torch.cuda.HalfTensor) for f in features] # Run RoI head for each proposal (RoI Pooling + Res5) box_features = predictor.model.roi_heads._shared_roi_transform( features, proposal_boxes) feature_pooled = box_features.mean(dim=[2, 3]) # pooled to 1x1 # Predict classes and boxes for each proposal. pred_class_logits, pred_attr_logits, pred_proposal_deltas = predictor.model.roi_heads.box_predictor( feature_pooled) outputs = FastRCNNOutputs( predictor.model.roi_heads.box2box_transform, pred_class_logits, pred_proposal_deltas, proposals, predictor.model.roi_heads.smooth_l1_beta, ) probs = outputs.predict_probs()[0] boxes = outputs.predict_boxes()[0] # probs = predictor.model.roi_heads.box_predictor.predict_probs((pred_class_logits, pred_proposal_deltas,), proposals)[0] # boxes = predictor.model.roi_heads.box_predictor.predict_boxes((pred_class_logits, pred_proposal_deltas,), proposals)[0] attr_prob = pred_attr_logits[..., :-1].softmax(-1) max_attr_prob, max_attr_label = attr_prob.max(-1) # Note: BUTD uses raw RoI predictions, # we use the predicted boxes instead. # boxes = proposal_boxes[0].tensor # NMS for nms_thresh in np.arange(0.5, 1.0, 0.1): instances, ids = fast_rcnn_inference_single_image( boxes, probs, image.shape[1:], score_thresh=0.2, nms_thresh=nms_thresh, topk_per_image=NUM_OBJECTS) if len(ids) == NUM_OBJECTS: break instances = detector_postprocess(instances, raw_height, raw_width) roi_features = feature_pooled[ids].detach() max_attr_prob = max_attr_prob[ids].detach() max_attr_label = max_attr_label[ids].detach() instances.attr_scores = max_attr_prob instances.attr_classes = max_attr_label return instances, roi_features
def _get_image_features(self, imgs, score_thresh=0.2, min_num_image=10, max_regions=36, tokens=[]): # imgs tensor(batch, H, W, C) with torch.no_grad(): inputs = [] for img in imgs: raw_img = img.permute(2, 0, 1) raw_img = raw_img.to(self.detectron2_gpu_device) (_, height, width) = raw_img.shape inputs.append({ "image": raw_img, "height": height, "width": width }) # Normalize the image by substracting mean # Moves the image to device (already in device) images = self.detector.model.preprocess_image(inputs) sizes = images.image_sizes # Features from the backbone features = self.detector.model.backbone(images.tensor) # Get RPN proposals # proposal_generator inputs are the images, features, gt_instances # since is detect we don't need the gt instances proposals, _ = self.detector.model.proposal_generator( images, features, None) images = None # The C4 model uses Res5ROIHeads where pooled feature can be extracted proposal_boxes = [x.proposal_boxes for x in proposals] features = [ features[f] for f in self.detector.model.roi_heads.in_features ] box_features = self.detector.model.roi_heads._shared_roi_transform( features, proposal_boxes) features = None # Pooled features to use in the agent feature_pooled = box_features.mean(dim=[2, 3]) # Predict classes and boxes for each proposal. pred_class_logits, pred_proposal_deltas = \ self.detector.model.roi_heads.box_predictor(feature_pooled) rcnn_outputs = FastRCNNOutputs( self.detector.model.roi_heads.box2box_transform, pred_class_logits, pred_proposal_deltas, proposals, self.detector.model.roi_heads.smooth_l1_beta, ) proposals = None # Filter proposals using Non-Maximum Suppression (NMS) instances_list, ids_list = [], [] probs_list = rcnn_outputs.predict_probs() boxes_list = rcnn_outputs.predict_boxes() #image_shapes = [x.image_size for x in proposals] num_boxes = [] for probs, boxes, image_size in zip(probs_list, boxes_list, sizes): # We need to get topk_per_image boxes so we gradually increase # the tolerance of the nms_thresh if we don't have enough boxes for nms_thresh in np.arange(0.3, 1.0, 0.1): instances, ids = fast_rcnn_inference_single_image( boxes, probs, image_size, score_thresh=score_thresh, nms_thresh=nms_thresh, topk_per_image=max_regions, device=self.detectron2_gpu_device, preferred_labels=self.class_names, tokens=tokens, tokenizer=self.tokenizer) # if len(ids) >= min_num_image: break num_boxes.append(len(ids) + 1) instances_list.append(instances) ids_list.append(ids) # Post processing for features features_list = feature_pooled.split( rcnn_outputs.num_preds_per_image ) # (sum_proposals, 2048) --> [(p1, 2048), (p2, 2048), ..., (pn, 2048)] feature_pooled = None roi_features_list = [] for ids, features in zip(ids_list, features_list): head_box = torch.sum(features[ids], axis=0) / \ len(features[ids]) head_box = head_box.unsqueeze(0) roi_features_list.append( torch.cat((head_box, features[ids]), 0)) # Post processing for bounding boxes (rescale to raw_image) boxes = [] classes = [] for instances, input_per_image, image_size in zip( instances_list, inputs, sizes): height = input_per_image.get("height", image_size[0]) width = input_per_image.get("width", image_size[1]) raw_instances = detector_postprocess(instances, height, width) box = torch.zeros((len(raw_instances) + 1, 5), device=self.detectron2_gpu_device) box[0] = torch.tensor( [[0, 0, 1, 1, 1]], device=self.detectron2_gpu_device).float() box[1:, :4] = raw_instances.pred_boxes.tensor box[:, 0] /= float(width) box[:, 1] /= float(height) box[:, 2] /= float(width) box[:, 3] /= float(height) box[:,4] = (box[:,3] - box[:,1]) * (box[:,2] - box[:,0]) / \ (float(height) * float(width)) boxes.append(box) classes.append(raw_instances.pred_classes) # features, boxes, image_mask return roi_features_list, boxes, num_boxes, classes #, pred_proposal_deltas
def inference(self, batched_inputs, do_postprocess=True): """ Run inference on the given inputs. Args: batched_inputs (list[dict]): same as in :meth:`forward` detected_instances (None or list[Instances]): if not None, it contains an `Instances` object per image. The `Instances` object contains "pred_boxes" and "pred_classes" which are known boxes in the image. The inference will then skip the detection of bounding boxes, and only predict other per-ROI outputs. do_postprocess (bool): whether to apply post-processing on the outputs. Returns: same as in :meth:`forward`. """ assert not self.training images = self.preprocess_image(batched_inputs) features = self.backbone(images.tensor) if self.attention is not None: attn_weights = self.attention(features['res4']) features['res4'] = features['res4'] * attn_weights.repeat( 1, features['res4'].size(1), 1, 1) if self.proposal_generator: proposals, _ = self.proposal_generator(images, features, None) else: assert "proposals" in batched_inputs[0] proposals = [ x["proposals"].to(self.device) for x in batched_inputs ] proposals, box_features_0 = self.roi_heads.transform_forward( images, features, proposals) box_features = self.roi_heads.box_head(box_features_0) pred_class_logits_0, pred_proposal_deltas_0 = self.roi_heads.box_predictor( box_features) box_features_T = box_features_0 + self.transformation(box_features_0) box_features = self.box_head(box_features_T) pred_class_logits_1, pred_proposal_deltas_1 = self.box_predictor( box_features) outputs = FastRCNNOutputs( self.roi_heads.box2box_transform, pred_class_logits_1, pred_proposal_deltas_0, proposals, self.roi_heads.smooth_l1_beta, ) results, _ = outputs.inference(self.roi_heads.test_score_thresh, self.roi_heads.test_nms_thresh, self.roi_heads.test_detections_per_img) if do_postprocess: processed_results = [] for results_per_image, input_per_image, image_size in zip( results, batched_inputs, images.image_sizes): height = input_per_image.get("height", image_size[0]) width = input_per_image.get("width", image_size[1]) r = detector_postprocess(results_per_image, height, width) processed_results.append({"instances": r}) return processed_results else: return results
def doit(self, raw_image): with torch.no_grad(): raw_height, raw_width = raw_image.shape[:2] #print("Original image size: ", (raw_height, raw_width)) # Preprocessing image = self.predictor.transform_gen.get_transform( raw_image).apply_image(raw_image) #print("Transformed image size: ", image.shape[:2]) image_height, image_width = image.shape[:2] image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) inputs = [{ "image": image, "height": raw_height, "width": raw_width }] images = self.predictor.model.preprocess_image(inputs) # Run Backbone Res1-Res4 features = self.predictor.model.backbone(images.tensor) # Generate proposals with RPN proposals, _ = self.predictor.model.proposal_generator( images, features, None) proposal = proposals[0] #print('Proposal Boxes size:', proposal.proposal_boxes.tensor.shape) # Run RoI head for each proposal (RoI Pooling + Res5) proposal_boxes = [x.proposal_boxes for x in proposals] features = [ features[f] for f in self.predictor.model.roi_heads.in_features ] box_features = self.predictor.model.roi_heads._shared_roi_transform( features, proposal_boxes) feature_pooled = box_features.mean(dim=[2, 3]) # pooled to 1x1 #print('Pooled features size:', feature_pooled.shape) # Predict classes and boxes for each proposal. pred_class_logits, pred_attr_logits, pred_proposal_deltas = self.predictor.model.roi_heads.box_predictor( feature_pooled) outputs = FastRCNNOutputs( self.predictor.model.roi_heads.box2box_transform, pred_class_logits, pred_proposal_deltas, proposals, self.predictor.model.roi_heads.smooth_l1_beta, ) probs = outputs.predict_probs()[0] boxes = outputs.predict_boxes()[0] box_width = boxes[:, 2] - boxes[:, 0] box_height = boxes[:, 3] - boxes[:, 1] scaled_width = box_width / image_width scaled_height = box_height / image_height scaled_x = boxes[:, 0] / image_width scaled_y = boxes[:, 1] / image_height scaled_width = scaled_width[..., np.newaxis] scaled_height = scaled_height[..., np.newaxis] scaled_x = scaled_x[..., np.newaxis] scaled_y = scaled_y[..., np.newaxis] spatial_features = torch.cat( (scaled_x, scaled_y, scaled_x + scaled_width, scaled_y + scaled_height, scaled_width, scaled_height), 1) oscar_features = torch.cat((feature_pooled, spatial_features), 1) attr_prob = pred_attr_logits[..., :-1].softmax(-1) max_attr_prob, max_attr_label = attr_prob.max(-1) # Note: BUTD uses raw RoI predictions, # we use the predicted boxes instead. # boxes = proposal_boxes[0].tensor # NMS for nms_thresh in np.arange(0.5, 1.0, 0.1): instances, ids = fast_rcnn_inference_single_image( boxes, probs, image.shape[1:], score_thresh=0.2, nms_thresh=nms_thresh, topk_per_image=self.NUM_OBJECTS) if len(ids) >= self.NUM_OBJECTS: break instances = detector_postprocess(instances, raw_height, raw_width) roi_features = feature_pooled[ids].detach() oscar_features = oscar_features[ids].detach() max_attr_prob = max_attr_prob[ids].detach() max_attr_label = max_attr_label[ids].detach() instances.attr_scores = max_attr_prob instances.attr_classes = max_attr_label return instances, roi_features, oscar_features
def doit(self): data_path = 'data/genome/' vg_classes = [] with open(os.path.join(data_path, 'objects_vocab.txt')) as f: for object in f.readlines(): vg_classes.append(object.split(',')[0].lower().strip()) vg_attrs = [] with open(os.path.join(data_path, 'attributes_vocab.txt')) as f: for object in f.readlines(): vg_attrs.append(object.split(',')[0].lower().strip()) MetadataCatalog.get("vg").thing_classes = vg_classes MetadataCatalog.get("vg").attr_classes = vg_attrs cfg = get_cfg() cfg.merge_from_file( "./configs/VG-Detection/faster_rcnn_R_101_C4_attr_caffemaxpool.yaml" ) cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 300 cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.6 cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.2 # VG Weight cfg.MODEL.WEIGHTS = "http://nlp.cs.unc.edu/models/faster_rcnn_from_caffe_attr_original.pkl" predictor = DefaultPredictor(cfg) with torch.no_grad(): raw_height, raw_width = self.raw_image.shape[:2] print("Original image size: ", (raw_height, raw_width)) # Preprocessing image = predictor.transform_gen.get_transform( self.raw_image).apply_image(self.raw_image) print("Transformed image size: ", image.shape[:2]) image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) inputs = [{ "image": image, "height": raw_height, "width": raw_width }] images = predictor.model.preprocess_image(inputs) # Run Backbone Res1-Res4 features = predictor.model.backbone(images.tensor) # Generate proposals with RPN proposals, _ = predictor.model.proposal_generator( images, features, None) proposal = proposals[0] print('Proposal Boxes size:', proposal.proposal_boxes.tensor.shape) # Run RoI head for each proposal (RoI Pooling + Res5) proposal_boxes = [x.proposal_boxes for x in proposals] features = [ features[f] for f in predictor.model.roi_heads.in_features ] box_features = predictor.model.roi_heads._shared_roi_transform( features, proposal_boxes) feature_pooled = box_features.mean(dim=[2, 3]) # pooled to 1x1 print('Pooled features size:', feature_pooled.shape) # Predict classes and boxes for each proposal. pred_class_logits, pred_attr_logits, pred_proposal_deltas = predictor.model.roi_heads.box_predictor( feature_pooled) outputs = FastRCNNOutputs( predictor.model.roi_heads.box2box_transform, pred_class_logits, pred_proposal_deltas, proposals, predictor.model.roi_heads.smooth_l1_beta, ) probs = outputs.predict_probs()[0] boxes = outputs.predict_boxes()[0] attr_prob = pred_attr_logits[..., :-1].softmax(-1) max_attr_prob, max_attr_label = attr_prob.max(-1) # Note: BUTD uses raw RoI predictions, # we use the predicted boxes instead. # boxes = proposal_boxes[0].tensor # NMS for nms_thresh in np.arange(0.5, 1.0, 0.1): instances, ids = fast_rcnn_inference_single_image( boxes, probs, image.shape[1:], score_thresh=0.2, nms_thresh=nms_thresh, topk_per_image=self.NUM_OBJECTS) if len(ids) >= self.NUM_OBJECTS: break instances = detector_postprocess(instances, raw_height, raw_width) roi_features = feature_pooled[ids].detach() max_attr_prob = max_attr_prob[ids].detach() max_attr_label = max_attr_label[ids].detach() instances.attr_scores = max_attr_prob instances.attr_classes = max_attr_label print(instances) print((roi_features).size()) return roi_features
def extract_featrue(predictor, split, dataset_name, data, th=0.2): save_path = f'/home/sunjiamei/work/ImageCaptioning/dataset/{dataset_name}_bu_features/{split}' if not os.path.isdir(save_path): os.makedirs(save_path) h5_file_name = [] print(len(data)) for idx,item in enumerate(data): print(idx, len(data)) img_path = item['image_path'] raw_image = cv2.imread(img_path) # im_rgb = cv2.cvtColor(raw_image, cv2.COLOR_BGR2RGB) image_filename = img_path.split('/')[-1] if image_filename not in h5_file_name: h5_file_name.append(image_filename) else: continue with torch.no_grad(): raw_height, raw_width = raw_image.shape[:2] # print("Original image size: ", (raw_height, raw_width)) # Preprocessing image = predictor.transform_gen.get_transform(raw_image).apply_image(raw_image) # print("Transformed image size: ", image.shape[:2]) image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) inputs = [{"image": image, "height": raw_height, "width": raw_width}] images = predictor.model.preprocess_image(inputs) # Run Backbone Res1-Res4 features = predictor.model.backbone(images.tensor) # Generate proposals with RPN proposals, _ = predictor.model.proposal_generator(images, features, None) proposal = proposals[0] # print('Proposal Boxes size:', proposal.proposal_boxes.tensor.shape) # Run RoI head for each proposal (RoI Pooling + Res5) proposal_boxes = [x.proposal_boxes for x in proposals] features = [features[f] for f in predictor.model.roi_heads.in_features] box_features = predictor.model.roi_heads._shared_roi_transform( features, proposal_boxes ) feature_pooled = box_features.mean(dim=[2, 3]) # pooled to 1x1 # print('Pooled features size:', feature_pooled.shape) # Predict classes and boxes for each proposal. pred_class_logits, pred_attr_logits, pred_proposal_deltas = predictor.model.roi_heads.box_predictor( feature_pooled) outputs = FastRCNNOutputs( predictor.model.roi_heads.box2box_transform, pred_class_logits, pred_proposal_deltas, proposals, predictor.model.roi_heads.smooth_l1_beta, ) probs = outputs.predict_probs()[0] boxes = outputs.predict_boxes()[0] attr_prob = pred_attr_logits[..., :-1].softmax(-1) max_attr_prob, max_attr_label = attr_prob.max(-1) # Note: # we use the predicted boxes instead. # boxes = proposal_boxes[0].tensor # NMS for nms_thresh in np.arange(0.5, 1.0, 0.1): instances, ids = fast_rcnn_inference_single_image( boxes, probs, image.shape[1:], score_thresh=th, nms_thresh=nms_thresh, topk_per_image=NUM_OBJECTS ) if len(ids) == NUM_OBJECTS: break instances = detector_postprocess(instances, raw_height, raw_width) roi_features = feature_pooled[ids].detach() max_attr_prob = max_attr_prob[ids].detach() max_attr_label = max_attr_label[ids].detach() instances.attr_scores = max_attr_prob instances.attr_classes = max_attr_label if roi_features.size(0)< NUM_OBJECTS: extract_single(img_path, save_path, image_filename) continue h5_save_file = h5py.File(os.path.join(save_path, image_filename+'.hdf5'), 'w') h5_save_file.create_dataset('image_features', data=roi_features.cpu().numpy()) h5_save_file.create_dataset('image_boxes', data=instances.pred_boxes.tensor.cpu().numpy()) h5_save_file.close()
def run_detector(raw_image, predictor, num_objects=8000, verbose=True): with torch.no_grad(): raw_height, raw_width = raw_image.shape[:2] if verbose: tqdm.write("Original image size: " + str((raw_height, raw_width))) # Preprocessing image = predictor.transform_gen.get_transform(raw_image).apply_image( raw_image) if verbose: tqdm.write("Transformed image size: " + str(image.shape[:2])) image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) inputs = [{"image": image, "height": raw_height, "width": raw_width}] images = predictor.model.preprocess_image(inputs) # Run Backbone Res1-Res4 features = predictor.model.backbone(images.tensor) # Generate proposals with RPN proposals, _ = predictor.model.proposal_generator( images, features, None) proposal = proposals[0] if verbose: tqdm.write('Proposal Boxes size: ' + str(proposal.proposal_boxes.tensor.shape)) # Run RoI head for each proposal (RoI Pooling + Res5) proposal_boxes = [x.proposal_boxes for x in proposals] features = [features[f] for f in predictor.model.roi_heads.in_features] box_features = predictor.model.roi_heads._shared_roi_transform( features, proposal_boxes) feature_pooled = box_features.mean(dim=[2, 3]) # pooled to 1x1 if verbose: tqdm.write('Pooled features size: ' + str(feature_pooled.shape)) # Predict classes and boxes for each proposal. pred_class_logits, pred_proposal_deltas = predictor.model.roi_heads.box_predictor( feature_pooled) outputs = FastRCNNOutputs( predictor.model.roi_heads.box2box_transform, pred_class_logits, pred_proposal_deltas, proposals, predictor.model.roi_heads.smooth_l1_beta, ) probs = outputs.predict_probs()[0] boxes = outputs.predict_boxes()[0] # Note: BUTD uses raw RoI predictions, # we use the predicted boxes instead. # boxes = proposal_boxes[0].tensor # NMS for nms_thresh in np.arange(0.5, 1.0, 0.1): instances, ids = fast_rcnn_inference_single_image( boxes, probs, image.shape[1:], score_thresh=0.0, nms_thresh=nms_thresh, topk_per_image=num_objects #0.01 ) if len(ids) == num_objects: break instances = detector_postprocess(instances, raw_height, raw_width) #roi_features = feature_pooled[ids].detach() if verbose: tqdm.write(str(instances)) return instances #, roi_features