def _process_feature_extraction_v2(self, output, im_scales, im_infos, feature_name="fc6", conf_thresh=0): from maskrcnn_benchmark.layers import nms batch_size = len(output[0]["proposals"]) n_boxes_per_image = [len(boxes) for boxes in output[0]["proposals"]] score_list = output[0]["scores"].split(n_boxes_per_image) score_list = [torch.nn.functional.softmax(x, -1) for x in score_list] feats = output[0][feature_name].split(n_boxes_per_image) cur_device = score_list[0].device feat_list = [] info_list = [] for i in range(batch_size): dets = output[0]["proposals"][i].bbox / im_scales[i] scores = score_list[i] max_conf = torch.zeros((scores.shape[0])).to(cur_device) conf_thresh_tensor = torch.full_like(max_conf, conf_thresh) start_index = 1 # Column 0 of the scores matrix is for the background class for cls_ind in range(start_index, scores.shape[1]): cls_scores = scores[:, cls_ind] keep = nms(dets, cls_scores, 0.5) max_conf[keep] = torch.where( # Better than max one till now and minimally greater than conf_thresh (cls_scores[keep] > max_conf[keep]) # & (cls_scores[keep] > conf_thresh_tensor[keep]) , cls_scores[keep], max_conf[keep], ) sorted_scores, sorted_indices = torch.sort(max_conf, descending=True) num_boxes = (sorted_scores[:self.num_features] != 0).sum() keep_boxes = sorted_indices[:self.num_features] feat_list.append(feats[i][keep_boxes]) bbox = output[0]["proposals"][i][keep_boxes].bbox / im_scales[i] # Predict the class label using the scores objects = torch.argmax(scores[keep_boxes][start_index:], dim=1) cls_prob = torch.max(scores[keep_boxes][start_index:], dim=1) info_list.append({ "bbox": bbox.cpu().numpy(), # "boxes": bbox.cpu().numpy(), # "num_boxes": num_boxes.item(), # "objects": objects.cpu().numpy(), "image_width": im_infos[i]["width"], "image_height": im_infos[i]["height"], # "image_h": im_infos[i]["height"], # "image_w": im_infos[i]["width"], "cls_prob": scores[keep_boxes].cpu().numpy(), "max_features": num_boxes.item(), }) return feat_list, info_list
def _process_feature_extraction( self, output, im_scales, feat_name="fc6", conf_thresh=0.2 ): batch_size = len(output[0]["proposals"]) n_boxes_per_image = [len(_) for _ in output[0]["proposals"]] score_list = output[0]["scores"].split(n_boxes_per_image) score_list = [torch.nn.functional.softmax(x, -1) for x in score_list] feats = output[0][feat_name].split(n_boxes_per_image) cur_device = score_list[0].device feat_list = [] for i in range(batch_size): dets = output[0]["proposals"][i].bbox / im_scales[i] scores = score_list[i] max_conf = torch.zeros((scores.shape[0])).to(cur_device) for cls_ind in range(1, scores.shape[1]): cls_scores = scores[:, cls_ind] keep = nms(dets, cls_scores, 0.5) max_conf[keep] = torch.where( cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep] ) keep_boxes = torch.argsort(max_conf, descending=True)[:100] feat_list.append(feats[i][keep_boxes]) return feat_list
def process_feature_extraction(output, im_scales, max_boxes=100, get_boxes=False, feat_name='fc6', conf_thresh=0.2): # TODO: Add docstring and explain get_boxes batch_size = len(output[0]["proposals"]) n_boxes_per_image = [len(_) for _ in output[0]["proposals"]] score_list = output[0]["scores"].split(n_boxes_per_image) score_list = [torch.nn.functional.softmax(x, -1) for x in score_list] feats = output[0][feat_name].split(n_boxes_per_image) cur_device = score_list[0].device feat_list = [] boxes_list = [] classes_list = [] conf_list = [] for i in range(batch_size): # bbox below stays on the device where it was generated dets = output[0]["proposals"][i].bbox.to(cur_device) / im_scales[i] scores = score_list[i] max_conf = torch.zeros((scores.shape[0])).to(cur_device) if get_boxes: max_cls = torch.zeros((scores.shape[0]), dtype=torch.long).to(cur_device) max_box = torch.zeros((scores.shape[0], 4)).to(cur_device) for cls_ind in range(1, scores.shape[1]): cls_scores = scores[:, cls_ind] keep = nms(dets, cls_scores, 0.5) if get_boxes: max_cls[keep] = torch.where( cls_scores[keep] > max_conf[keep], torch.tensor(cls_ind).to(cur_device), max_cls[keep]) max_box[keep] = torch.where( (cls_scores[keep] > max_conf[keep]).view(-1, 1), dets[keep], max_box[keep]) max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = torch.argsort(max_conf, descending=True)[:max_boxes] feat_list.append(feats[i][keep_boxes]) if not get_boxes: return feat_list conf_list.append(max_conf[keep_boxes]) boxes_list.append(max_box[keep_boxes]) classes_list.append(max_cls[keep_boxes]) return [boxes_list, feat_list, classes_list, conf_list]
def _process_feature_extraction(self, output, im_scales, im_infos, feature_name="fc6", conf_thresh=0): batch_size = len(output[0]["proposals"]) n_boxes_per_image = [len(boxes) for boxes in output[0]["proposals"]] score_list = output[0]["scores"].split(n_boxes_per_image) score_list = [torch.nn.functional.softmax(x, -1) for x in score_list] feats = output[0][feature_name].split(n_boxes_per_image) cur_device = score_list[0].device feat_list = [] info_list = [] for i in range(batch_size): dets = output[0]["proposals"][i].bbox / im_scales[i] scores = score_list[i] max_conf = torch.zeros((scores.shape[0])).to(cur_device) conf_thresh_tensor = torch.full_like(max_conf, conf_thresh) start_index = 1 # Column 0 of the scores matrix is for the background class if self.args.background: start_index = 0 for cls_ind in range(start_index, scores.shape[1]): cls_scores = scores[:, cls_ind] keep = nms(dets, cls_scores, 0.5) max_conf[keep] = torch.where( # Better than max one till now and minimally greater than conf_thresh (cls_scores[keep] > max_conf[keep]) & (cls_scores[keep] > conf_thresh_tensor[keep]), cls_scores[keep], max_conf[keep], ) feat_list.append(feats[i]) num_boxes = len(feats[i]) bbox = output[0]["proposals"][i] bbox = bbox.resize(((im_infos[i]["width"], im_infos[i]["height"]))) bbox = bbox.bbox # Predict the class label using the scores objects = torch.argmax(scores[:, start_index:], dim=1) info_list.append({ "bbox": bbox.cpu().numpy(), "num_boxes": num_boxes, "objects": objects.cpu().numpy(), "image_width": im_infos[i]["width"], "image_height": im_infos[i]["height"], "cls_prob": scores.cpu().numpy(), }) return feat_list, info_list
def _process_feature_extraction(self, output, im_scales, im_infos, feature_name="fc6", conf_thresh=0): batch_size = len(output[0]["proposals"]) n_boxes_per_image = [len(boxes) for boxes in output[0]["proposals"]] score_list = output[0]["scores"].split(n_boxes_per_image) score_list = [torch.nn.functional.softmax(x, -1) for x in score_list] feats = output[0][feature_name].split(n_boxes_per_image) cur_device = score_list[0].device feat_list = [] info_list = [] for i in range(batch_size): dets = output[0]["proposals"][i].bbox / im_scales[i] scores = score_list[i] max_conf = torch.zeros((scores.shape[0])).to(cur_device) conf_thresh_tensor = torch.full_like(max_conf, conf_thresh) start_index = 1 # Column 0 of the scores matrix is for the background class if self.args.background: start_index = 0 for cls_ind in range(start_index, scores.shape[1]): cls_scores = scores[:, cls_ind] keep = nms(dets, cls_scores, 0.5) max_conf[keep] = torch.where( # Better than max one till now and minimally greater than conf_thresh (cls_scores[keep] > max_conf[keep]) & (cls_scores[keep] > conf_thresh_tensor[keep]), cls_scores[keep], max_conf[keep], ) sorted_scores, sorted_indices = torch.sort(max_conf, descending=True) num_boxes = (sorted_scores[:self.args.num_features] != 0).sum() keep_boxes = sorted_indices[:self.args.num_features] feat = feats[i][keep_boxes] feat_list.append(feat) bbox = output[0]["proposals"][i][keep_boxes].bbox / im_scales[i] # Normalize the boxes (to 0 ~ 1) img_h, img_w = im_infos[i]['height'], im_infos[i]['width'] # boxes = boxes.copy() bbox[:, (0, 2)] /= img_w bbox[:, (1, 3)] /= img_h info_list.append(bbox) # print('size:', bbox.size(), feat.size()) return feat_list, info_list
def _features_extraction(self, output, im_scales, feature_name='fc6', conf_thresh=0.5): batch_size = len(output[0]["proposals"]) # list[num_of_boxes_per_image] n_boxes_per_image = [len(_) for _ in output[0]["proposals"]] # list[Tensor: (n_boxes_per_image, num_classes)] score_list = output[0]["scores"].split(n_boxes_per_image) score_list = [torch.nn.functional.softmax(x, -1) for x in score_list] # list[Tensor: (n_boxes_per_image, 2048)] features = output[0][feature_name].split(n_boxes_per_image) # list[Tensor: (num_features_selected_per_image, 2048)] # list contain selected features per image features_list = [] for i in range(batch_size): # reshape the bounding box to original size/coordinate dets = output[0]["proposals"][i].bbox / im_scales[i] # Tensor: (n_boxes_per_image, num_classes) scores = score_list[i] # Tensor: (n_boxes_per_image, ) # max_conf record the heightest probs of the class # associate with each bounding box. If the heightest prob # of a box (say i) is smaller than threshold (conf_thresh), # this box will not be select and max_conf[i] will be set # to 0 max_conf = torch.zeros((scores.shape[0])).to(device) for cls_ind in range(1, scores.shape[1]): # Tensor: (n_boxes_per_image, 1) # score for a specified class cls_scores = scores[:, cls_ind] # index of boxes that will be keep keep = nms(dets, cls_scores, 0.5) max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) # select the top 100 boxes which contain an onject with # probability greater than conf_thresh(usually 0.5) keep_boxes = torch.argsort(max_conf, descending=True)[:100] features_per_image = features[i][keep_boxes] features_list.append(features_per_image) return features_list
def _process_feature_extraction(self, output, im_scales, feature_name="fc6", conf_thresh=0.2): batch_size = len(output[0]["proposals"]) n_boxes_per_image = [len(boxes) for boxes in output[0]["proposals"]] score_list = output[0]["scores"].split(n_boxes_per_image) score_list = [torch.nn.functional.softmax(x, -1) for x in score_list] feats = output[0][feature_name].split(n_boxes_per_image) cur_device = score_list[0].device feat_list = [] info_list = [] for i in range(batch_size): dets = output[0]["proposals"][i].bbox / im_scales[i] scores = score_list[i] max_conf = torch.zeros((scores.shape[0])).to(cur_device) for cls_ind in range(1, scores.shape[1]): cls_scores = scores[:, cls_ind] keep = nms(dets, cls_scores, 0.5) max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = torch.argsort(max_conf, descending=True)[:self.NUM_FEATURES] feat_list.append(feats[i][keep_boxes]) bbox = output[0]["proposals"][i][keep_boxes].bbox / im_scales[i] objects = torch.argmax(scores[keep_boxes], dim=1) image_width = output[0]["proposals"][i].size[0] / im_scales[i] image_height = output[0]["proposals"][i].size[1] / im_scales[i] info_list.append({ "bbox": bbox.cpu().numpy(), "objects": objects.cpu().numpy(), "image_width": image_width, "image_height": image_height, }) return feat_list, info_list
def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4, method=-1): """ Removes detections with lower object confidence score than 'conf_thres' Non-Maximum Suppression to further filter detections. Returns detections with shape: (x1, y1, x2, y2, object_conf, class_score, class_pred) """ output = [None for _ in range(len(prediction))] for image_i, pred in enumerate(prediction): # Filter out confidence scores below threshold # Get score and class with highest confidence v = pred[:, 4] > conf_thres v = v.nonzero().squeeze() if len(v.shape) == 0: v = v.unsqueeze(0) pred = pred[v] # If none are remaining => process next image nP = pred.shape[0] if not nP: continue # From (center x, center y, width, height) to (x1, y1, x2, y2) pred[:, :4] = xywh2xyxy(pred[:, :4]) # Non-maximum suppression if method == -1: nms_indices = nms(pred[:, :4], pred[:, 4], nms_thres) else: dets = pred[:, :5].clone().contiguous().data.cpu().numpy() nms_indices = soft_nms(dets, Nt=nms_thres, method=method) det_max = pred[nms_indices] if len(det_max) > 0: # Add max detections to outputs output[ image_i] = det_max if output[image_i] is None else torch.cat( (output[image_i], det_max)) return output
def _process_feature_extraction(self, output, im_scales, feat_name='fc6'): bboxes, scores, feats = output[0]['proposals'][0].bbox, output[0][ 'scores'], output[0]['fc6'] scores = F.softmax(scores, dim=1) feat_list = [] dets = bboxes / im_scales[0] max_conf = torch.zeros((scores.shape[0])).cuda() for cls_ind in range(1, scores.shape[1]): cls_scores = scores[:, cls_ind] keep = nms(dets, cls_scores, 0.5) max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = torch.argsort(max_conf, descending=True)[:100] return feats[keep_boxes], bboxes[keep_boxes]
def _process_nms_bbox(self, output, point, im_scale, feat_name): bboxes, scores, feat = output[0]['proposals'][0].bbox, output[0][ 'scores'], output[0][feat_name] bboxes = bboxes / im_scale scores = F.softmax(scores, dim=1) xmin, ymin, xmax, ymax = bboxes.split(1, dim=1) filt_pt = ((point['x'] >= xmin) & (point['x'] <= xmax) & (point['y'] >= ymin) & (point['y'] <= ymax)).flatten() bboxes, scores, feat = bboxes[filt_pt], scores[filt_pt], feat[filt_pt] # check if no bounding boxes contain that point if bboxes.nelement() == 0: return torch.zeros(100, 2048), torch.zeros(100, 4) # perform nms max_conf = torch.zeros((scores.shape[0])).cuda() for cls_ind in range(1, scores.shape[1]): cls_scores = scores[:, cls_ind] keep = nms(bboxes, cls_scores, 0.5) max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) max_scores, _ = scores[:, 1:].max(dim=-1) filt_nms = (max_scores == max_conf) bboxes, scores, feat = bboxes[filt_nms], scores[filt_nms], feat[ filt_nms] max_scores, _ = scores[:, 1:].max(dim=-1) sorted_boxes = torch.argsort(max_scores, descending=True) return feat[sorted_boxes], bboxes[sorted_boxes]
def _process_feature_extraction( self, output: torch.Tensor, im_scales: List[float], im_infos: List[Dict[str, int]], feature_name: str = "fc6", conf_thresh: int = 0, ): """ Post-process feature extraction from the detection model. :param output: output from the detection model :param im_scales: list of scales for the processed images :param im_infos: list of dicts containing width/height for images :param feature_name: which feature to extract for the image :param conf_thresh: threshold for bounding box scores (?) :return (feature_list, info_list): return list of processed image features, and list of information for each image """ from maskrcnn_benchmark.layers import nms batch_size = len(output[0]["proposals"]) n_boxes_per_image = [len(boxes) for boxes in output[0]["proposals"]] score_list = output[0]["scores"].split(n_boxes_per_image) score_list = [torch.nn.functional.softmax(x, -1) for x in score_list] feats = output[0][feature_name].split(n_boxes_per_image) cur_device = score_list[0].device feat_list = [] info_list = [] for i in range(batch_size): dets = output[0]["proposals"][i].bbox / im_scales[i] scores = score_list[i] max_conf = torch.zeros(scores.shape[0]).to(cur_device) conf_thresh_tensor = torch.full_like(max_conf, conf_thresh) start_index = 1 # Column 0 of the scores matrix is for the background class for cls_ind in range(start_index, scores.shape[1]): cls_scores = scores[:, cls_ind] keep = nms(dets, cls_scores, 0.5) max_conf[keep] = torch.where( # Better than max one till now and minimally greater # than conf_thresh (cls_scores[keep] > max_conf[keep]) & (cls_scores[keep] > conf_thresh_tensor[keep]), cls_scores[keep], max_conf[keep], ) sorted_scores, sorted_indices = torch.sort(max_conf, descending=True) num_boxes = (sorted_scores[: self.num_features] != 0).sum() keep_boxes = sorted_indices[: self.num_features] feat_list.append(feats[i][keep_boxes]) bbox = output[0]["proposals"][i][keep_boxes].bbox / im_scales[i] # Predict the class label using the scores objects = torch.argmax(scores[keep_boxes][:, start_index:], dim=1) info_list.append( { "bbox": bbox.cpu().numpy(), "num_boxes": num_boxes.item(), "objects": objects.cpu().numpy(), "cls_prob": scores[keep_boxes][:, start_index:].cpu().numpy(), "image_width": im_infos[i]["width"], "image_height": im_infos[i]["height"], } ) return feat_list, info_list