def decode(self, loc_preds, cls_preds, score_thresh=0.6, nms_thresh=0.45): variances = (0.1, 0.2) xy = loc_preds[:, :2] * variances[0] * self.default_boxes[:, 2:] + self.default_boxes[:, :2] wh = torch.exp(loc_preds[:, 2:] * variances[1]) * self.default_boxes[:, 2:] box_preds = torch.cat([xy - wh / 2, xy + wh / 2], 1) boxes = [] labels = [] scores = [] num_classes = cls_preds.size(1) for i in range(num_classes - 1): score = cls_preds[:, i + 1] # class i corresponds to (i+1) column mask = score > score_thresh if not mask.any(): continue box = box_preds[mask.nonzero().squeeze(1)] score = score[mask] keep = box_nms(box, score, nms_thresh) boxes.append(box[keep]) labels.append(torch.LongTensor(len(box[keep])).fill_(i)) scores.append(score[keep]) boxes = torch.cat(boxes, 0) labels = torch.cat(labels, 0) scores = torch.cat(scores, 0) return boxes, labels, scores
def decode(self, loc_preds, cls_preds, input_size): '''Decode outputs back to bouding box locations and class labels. Args: loc_preds: (tensor) predicted locations, sized [#anchors, 4]. cls_preds: (tensor) predicted class labels, sized [#anchors, #classes]. input_size: (tuple) model input size of (w,h). Returns: boxes: (tensor) decode box locations, sized [#obj,4]. labels: (tensor) class labels for each box, sized [#obj,]. ''' CLS_THRESH = 0.5 NMS_THRESH = 0.5 input_size = torch.Tensor(input_size) anchor_boxes = self._get_anchor_boxes(input_size) # xywh loc_xy = loc_preds[:, :2] loc_wh = loc_preds[:, 2:] xy = loc_xy * anchor_boxes[:, 2:] + anchor_boxes[:, :2] wh = loc_wh.exp() * anchor_boxes[:, 2:] boxes = torch.cat([xy - wh / 2, xy + wh / 2], 1) # [#anchors,4] score, labels = cls_preds.sigmoid().max(1) # [#anchors,] ids = score > CLS_THRESH ids = ids.nonzero().squeeze() # [#obj,] keep = box_nms(boxes[ids], score[ids], threshold=NMS_THRESH) return boxes[ids][keep], labels[ids][keep]
def decode(self, loc_preds, cls_preds, score_thresh=0.6, nms_thresh=0.45): '''Decode predicted loc/cls back to real box locations and class labels. Args: loc_preds: (tensor) predicted loc, sized [8732,4]. cls_preds: (tensor) predicted conf, sized [8732,21]. score_thresh: (float) threshold for object confidence score. nms_thresh: (float) threshold for box nms. Returns: boxes: (tensor) bbox locations, sized [#obj,4]. labels: (tensor) class labels, sized [#obj,]. ''' variances = (0.1, 0.2) # variances = (1, 1) xy = loc_preds[:, :2] * variances[ 0] * self.default_boxes[:, 2:] + self.default_boxes[:, :2] wh = torch.exp( loc_preds[:, 2:] * variances[1]) * self.default_boxes[:, 2:] box_preds = torch.cat([xy - wh / 2, xy + wh / 2], 1) boxes = [] labels = [] scores = [] num_classes = cls_preds.size(1) for i in range(num_classes - 1): score = cls_preds[:, i + 1] # class i corresponds to (i+1) column mask = score > score_thresh if not mask.any(): continue box = box_preds[mask.nonzero().squeeze()] score = score[mask] keep = box_nms(box, score, nms_thresh) boxes.append(box[keep]) labels.append(torch.LongTensor(len(box[keep])).fill_(i)) scores.append(score[keep]) try: boxes = torch.cat(boxes, 0) labels = torch.cat(labels, 0) scores = torch.cat(scores, 0) except: boxes = None labels = None scores = None return boxes, labels, scores
def decode(self, loc_preds, cls_preds, score_thresh=0.6, nms_thresh=0.45): '''Decode predicted loc/cls back to real box locations and class labels. Args: loc_preds: (tensor) predicted loc, sized [#anchors,4]. cls_preds: (tensor) predicted conf, sized [#anchors,#classes]. score_thresh: (float) threshold for object confidence score. nms_thresh: (float) threshold for box nms. Returns: boxes: (tensor) bbox locations, sized [#obj,4]. labels: (tensor) class labels, sized [#obj,]. ''' anchor_boxes = change_box_order(self.anchor_boxes, 'xyxy2xywh') xy = loc_preds[:, :2] * anchor_boxes[:, 2:] + anchor_boxes[:, :2] wh = loc_preds[:, 2:].exp() * anchor_boxes[:, 2:] box_preds = torch.cat([xy - wh / 2, xy + wh / 2], 1) boxes = [] labels = [] scores = [] num_classes = cls_preds.size(1) for i in range(num_classes - 1): score = cls_preds[:, i + 1] # class i corresponds to (i+1) column mask = score > score_thresh if not mask.any(): continue box = box_preds[mask] score = score[mask] # print(box.size()) # print(score.size()) keep = box_nms(box, score, nms_thresh) boxes.append(box[keep]) labels.append(torch.empty_like(keep).fill_(i)) scores.append(score[keep]) boxes = torch.cat(boxes, 0) labels = torch.cat(labels, 0) scores = torch.cat(scores, 0) return boxes, labels, scores
def decode(self, loc_preds, cls_preds, input_size=conf.input_size, cls_thred=conf.cls_thred, max_output_size=conf.max_output_size, nms_thred=conf.nms_thred, return_score=False, tf_box_order=True): """Decode outputs back to bouding box locations and class labels. Args: loc_preds: (tensor) predicted locations, sized [#anchors, 4]. cls_preds: (tensor) predicted class labels, sized [#anchors, #classes]. input_size: (int/tuple) model input size of (w, h), should be the same. cls_thred: class score threshold max_output_size: max output nums after nms nms_thred: non-maximum suppression threshold return_score: (bool) indicate whether to return score value. tf_box_order: (bool) True: [ymin, xmin, ymax, xmax] False: [xmin, ymin, xmax, ymax] Returns: boxes: (tensor) decode box locations, sized [#obj, 4]. order determined by param: tf_box_order labels: (tensor) class labels for each box, sized [#obj, ]. NOTE: #obj == min(#detected_objs, #max_output_size) """ assert len(loc_preds.get_shape().as_list( )) == 2, 'Ensure the location input shape to be [#anchors, 4]' assert len(cls_preds.get_shape().as_list( )) == 2, 'Ensure the class input shape to be [#anchors, #classes]' input_size = _make_list_input_size(input_size) anchor_boxes = self._get_anchor_boxes(input_size) loc_xy = loc_preds[:, :2] loc_wh = loc_preds[:, 2:] xy = loc_xy * anchor_boxes[:, 2:] + anchor_boxes[:, :2] wh = tf.exp(loc_wh) * anchor_boxes[:, 2:] boxes = tf.concat([xy - wh / 2, xy + wh / 2], 1) # [#anchors, 4] labels = tf.argmax(cls_preds, 1) # [#anchors, ] score = tf.reduce_max(tf.sigmoid(cls_preds), 1) ids = tf.cast(score > cls_thred, tf.int32) ids = tf.where(tf.not_equal(ids, 0)) if not ids.numpy().any(): # Fail to detect, choose the max score ids = tf.expand_dims(tf.argmax(score), axis=-1) else: ids = tf.squeeze(ids, -1) if tf_box_order: # [ymin, xmin, ymax, xmax] boxes = tf.transpose(tf.gather(tf.transpose(boxes), [1, 0, 3, 2])) keep = tf.image.non_max_suppression( tf.gather(boxes, ids), tf.gather(score, ids), max_output_size=max_output_size, iou_threshold=nms_thred) else: # [xmin, ymin, xmax, ymax] keep = box_nms(tf.gather(boxes, ids), tf.gather(score, ids), threshold=nms_thred) def _index(t, index): """Gather tensor successively E.g., _index(boxes, [idx_1, idx_2]) = tf.gather(tf.gather(boxes, idx_1), idx_2) """ if not isinstance(index, (tuple, list)): index = list(index) for i in index: t = tf.gather(t, i) return t if return_score: return _index(boxes, [ids, keep]), _index(labels, [ids, keep]), _index( score, [ids, keep]) return _index(boxes, [ids, keep]), _index(labels, [ids, keep])
def decode(self, loc_preds, cls_preds, input_size=conf.input_size, output_size=None, cls_thred=conf.cls_thred, max_output_size=conf.max_output_size, nms_thred=conf.nms_thred, return_score=True, tf_box_order=conf.tf_box_order): """Decode outputs back to bouding box locations and class labels. We obey the Faster RCNN box coder: tx = (x - anchor_x) / anchor_w ty = (y - anchor_y) / anchor_h tw = log(w / anchor_w) th = log(h / anchor_h) Args: loc_preds: (tensor) predicted locations, sized [#anchors, 4]. cls_preds: (tensor) predicted class labels, sized [#anchors, #classes]. input_size: (int/tuple) model input size of (w, h), should be the same. cls_thred: class score threshold max_output_size: max output nums after nms nms_thred: non-maximum suppression threshold return_score: (bool) indicate whether to return score value. tf_box_order: (bool) True: [ymin, xmin, ymax, xmax] False: [xmin, ymin, xmax, ymax] Returns: boxes: (tensor) decode box locations, sized [#obj, 4]. order determined by param: tf_box_order labels: (tensor) class labels for each box, sized [#obj, ]. NOTE: #obj == min(#detected_objs, #max_output_size) """ input_size = _make_list_input_size(input_size) anchor_boxes = self._get_anchor_boxes(input_size) loc_xy = loc_preds[:, :2] loc_wh = loc_preds[:, 2:] xy = loc_xy * anchor_boxes[:, 2:] + anchor_boxes[:, :2] wh = tf.exp(loc_wh) * anchor_boxes[:, 2:] boxes = tf.concat([xy - wh / 2, xy + wh / 2], 1) # [#anchors, 4] labels = tf.argmax(cls_preds, 1) # [#anchors, ] #score = tf.reduce_max(tf.sigmoid(cls_preds), 1) score = tf.sigmoid(tf.reduce_max(cls_preds, 1)) ######### xpy #ids = tf.where(tf.greater_equal(score, cls_thred+0.6) & tf.less_equal(score, cls_thred+0.8)) ids = tf.cast(score > cls_thred, tf.int32) ids = tf.where(tf.not_equal(ids, 0)) #if not ids.numpy().any(): # Fail to detect, choose the max score if ids.shape[0] == 0: # Fail to detect, choose the max score ids = tf.expand_dims(tf.argmax(score), axis=-1) print( "!!! Box decode: Fail to detect, choose the max score !!!!!!!!!!!!!!!!!!" ) else: ids = tf.squeeze(ids, -1) #print("Here!!!!!!!!!!!!!") if tf_box_order: # [ymin, xmin, ymax, xmax] boxes = tf.transpose(tf.gather(tf.transpose(boxes), [1, 0, 3, 2])) keep = tf.image.non_max_suppression( tf.gather(boxes, ids), tf.gather(score, ids), max_output_size=max_output_size, iou_threshold=nms_thred) else: # [xmin, ymin, xmax, ymax] keep = box_nms(tf.gather(boxes, ids), tf.gather(score, ids), threshold=nms_thred) def _index(t, index): """Gather tensor successively E.g., _index(boxes, [idx_1, idx_2]) = tf.gather(tf.gather(boxes, idx_1), idx_2) """ if not isinstance(index, (tuple, list)): index = list(index) for i in index: t = tf.gather(t, i) #t = tf.gather(t, index[0]) return t #return boxes,labels,score bboxes = _index(boxes, [ids, keep]) if tf_box_order: bbox = tf.split(axis=1, num_or_size_splits=4, value=bboxes) bboxes = tf.concat([bbox[1], bbox[0], bbox[3], bbox[2]], axis=1) #bboxes[:, [0, 1, 2, 3]] = bboxes[:, [1, 0, 3, 2]] if return_score: return bboxes, _index(labels, [ids, keep]), _index(score, [ids, keep]) return _index(boxes, [ids, keep]), _index(labels, [ids, keep])
def decode___(self, loc_preds, cls_preds, score_thresh=0.6, nms_thresh=0.45): '''Decode predicted loc/cls back to real box locations and class labels. Args: loc_preds: (tensor) predicted loc, sized [8732,4]. cls_preds: (tensor) predicted conf, sized [8732,21]. score_thresh: (float) threshold for object confidence score. nms_thresh: (float) threshold for box nms. Returns: boxes: (tensor) bbox locations, sized [#obj,4]. labels: (tensor) class labels, sized [#obj,]. ''' self.steps = (4, 8, 16, 32, 64, 128, 256, 512) self.box_sizes = (17.92, 35.84, 76.8, 153.6, 230.4, 307.2, 384.0, 460.8, 537.6) self.aspect_ratios = ((), (2, ), (2, ), (2, ), (2, ), (2, ), (2, ), (2, )) boxes = [] score = [] for i in range(len(cls_preds)): cls_preds[i] = F.sigmoid(cls_preds[i].squeeze()) for i in range(len(loc_preds)): oreg, ocls = loc_preds[i].squeeze().data.cpu( ), cls_preds[i].data.cpu() FH, FW, anchor_num = ocls.size() # feature map size for Findex in range(FH * FW): windex, hindex = Findex % FW, Findex // FW cx = (windex + 0.5) * self.steps[i] cy = (hindex + 0.5) * self.steps[i] if ocls[hindex, windex, 0] > score_thresh: s = self.box_sizes[i] loc = oreg[hindex, windex, 0, :].unsqueeze(0) prior = torch.Tensor([cx, cy, s, s]).unsqueeze(0) variances = (1, 1) xy = loc[:, :2] * variances[0] * prior[:, 2:] + prior[:, :2] wh = torch.exp(loc[:, 2:] * variances[1]) * prior[:, 2:] boxes.append(torch.cat([xy - wh / 2, xy + wh / 2], 1)) score.append(ocls[hindex, windex, 0]) if ocls[hindex, windex, 1] > score_thresh: s = math.sqrt(self.box_sizes[i] * self.box_sizes[i + 1]) loc = oreg[hindex, windex, 1, :].unsqueeze(0) prior = torch.Tensor([cx, cy, s, s]).unsqueeze(0) variances = (1, 1) xy = loc[:, :2] * variances[0] * prior[:, 2:] + prior[:, :2] wh = torch.exp(loc[:, 2:] * variances[1]) * prior[:, 2:] boxes.append(torch.cat([xy - wh / 2, xy + wh / 2], 1)) score.append(ocls[hindex, windex, 1]) s = self.box_sizes[i] for j, ar in enumerate(self.aspect_ratios[i]): if ocls[hindex, windex, 2 + j * 2] > score_thresh: loc = oreg[hindex, windex, 2 + j * 2, :].unsqueeze(0) prior = torch.Tensor( [cx, cy, s * math.sqrt(ar), s / math.sqrt(ar)]).unsqueeze(0) variances = (1, 1) xy = loc[:, :2] * variances[ 0] * prior[:, 2:] + prior[:, :2] wh = torch.exp(loc[:, 2:] * variances[1]) * prior[:, 2:] boxes.append(torch.cat([xy - wh / 2, xy + wh / 2], 1)) score.append(ocls[hindex, windex, 2 + j * 2]) if ocls[hindex, windex, 2 + j * 2 + 1] > score_thresh: loc = oreg[hindex, windex, 2 + j * 2 + 1, :].unsqueeze(0) prior = torch.Tensor( [cx, cy, s / math.sqrt(ar), s * math.sqrt(ar)]).unsqueeze(0) variances = (1, 1) xy = loc[:, :2] * variances[ 0] * prior[:, 2:] + prior[:, :2] wh = torch.exp(loc[:, 2:] * variances[1]) * prior[:, 2:] boxes.append(torch.cat([xy - wh / 2, xy + wh / 2], 1)) score.append(ocls[hindex, windex, 2 + j * 2 + 1]) try: box = torch.cat(boxes, 0) except: boxes = None labels = None scores = None return boxes, labels, scores score = torch.Tensor(score) boxes = [] labels = [] scores = [] keep = box_nms(box, score, nms_thresh) boxes.append(box[keep]) labels.append(torch.LongTensor(len(box[keep])).fill_(i)) scores.append(score[keep]) boxes = torch.cat(boxes, 0) labels = torch.cat(labels, 0) scores = torch.cat(scores, 0) return boxes, labels, scores