def gt_test_one_sentence(sentence, rois, rcnn, k_fold_index, th_iou_nms, score_threshold=th_score): roi_num = rois.shape[0] ridx = np.zeros(roi_num).astype(int) pred_cls_score = rcnn(sentence, rois, ridx) pred_cls_score = F.softmax( pred_cls_score, dim=1).data.cpu().numpy() # softmax score for each row # filt = np.max(pred_cls_score > th_score, axis=1) # result_cls = np.argmax(pred_cls_score[filt], axis=1) result_cls = np.argmax(pred_cls_score, axis=1) # rois = rois[filt] # I think still need to do nms new_cbbox = [] for index in range(len(result_cls)): position_info = [] if result_cls[index] != 0: for add_ in range(result_cls[index]): position_info.append(np.zeros(2).tolist()) position_info.append(rois[index].tolist()) for add_ in range(classes_num - result_cls[index]): position_info.append(np.zeros(2).tolist()) new_cbbox.append(position_info) else: new_cbbox.append(np.zeros(((classes_num + 1), 2)).tolist()) new_cbbox = np.array(new_cbbox) # result_cls. result_bbox = [] result_cls = [] for c in range(1, classes_num + 1): c_cls_score = pred_cls_score[:, c] c_bboxs = new_cbbox[:, c, :] boxes = non_maximum_suppression(c_cls_score, c_bboxs, iou_threshold=th_iou_nms, score_threshold=score_threshold) result_bbox.extend(boxes) result_cls.extend( [c] * len(boxes)) # print the predict result of this sentence. return np.array(result_bbox), np.array(result_cls)
def _test_one_sentence(file, test_arguments, sentence, rois, rcnn, fold_index, this_sentence_len, info): roi_num = rois.shape[0] ridx = np.zeros(roi_num).astype(int) pred_cls_score, pred_tbbox = rcnn(sentence, rois, ridx) pred_cls_score = F.softmax( pred_cls_score, dim=1).data.cpu().numpy() # softmax score for each row pred_tbbox = pred_tbbox.data.cpu().numpy() if test_arguments.normalize: pred_tbbox = denorm(pred_tbbox, fold_index, test_arguments) if test_arguments.dx_compute_method == "left_boundary": pred_bbox = lb_reg_to_bbox(this_sentence_len, pred_tbbox, rois) else: pred_bbox = reg_to_bbox(this_sentence_len, pred_tbbox, rois) original_pred_bbox = pred_bbox.copy() pred_bbox, argmax_softmax = select_meaningful_bbox_regression( pred_bbox, pred_cls_score) # rs = output_bbox = pred_bbox.T.copy() output_result_cls = np.argmax(pred_cls_score, axis=1).copy() int_bbox(output_bbox, len(info["gt_str"])) if test_arguments.output_flow: output_file_result(file, output_bbox, output_result_cls, rois, info, np.max(pred_cls_score, axis=1)) # if test_arguments.output_flow: # output_detect_result(result_bbox, result_cls, original_rois, info, scores) # pred bbox shape is 2 * n_roi result_bbox = [] result_cls = [] drop_bbox = [] drop_cls = [] original_roi_ls = [] c_score_ls = [] for c in range(1, classes_num + 1): cls_index = np.where(argmax_softmax == c) if len(cls_index[0]) == 0: # this cls type is empty continue c_cls_score = pred_cls_score[cls_index[0], c] c_bboxs = pred_bbox[:, cls_index[0]].T original_roi = rois[np.where(argmax_softmax == c)[0], :] boxes, _boxes, roi, c_score = non_maximum_suppression( c_cls_score, c_bboxs, original_roi, iou_threshold=test_arguments.th_nms_iou, score_threshold=test_arguments.score_threshold, info=info) result_bbox.extend(boxes) drop_bbox.extend(_boxes) original_roi_ls.extend(roi) c_score_ls.extend(c_score) result_cls.extend( [c] * len(boxes)) # print the predict result of this sentence. drop_cls.extend([c] * len(_boxes)) return np.array(result_bbox), np.array(result_cls), np.array( drop_bbox), np.array(drop_cls), np.array(original_roi_ls), np.array( c_score_ls)
def _test_one_sentence_all_regression(test_arguments, sentence, rois, rcnn, fold_index, this_sentence_len, info): roi_num = rois.shape[0] ridx = np.zeros(roi_num).astype(int) pred_cls_score, pred_tbbox = rcnn(sentence, rois, ridx) pred_cls_score = F.softmax( pred_cls_score, dim=1).data.cpu().numpy() # softmax score for each row pred_tbbox = pred_tbbox.data.cpu().numpy() if test_arguments.normalize: pred_tbbox = denorm(pred_tbbox, fold_index, test_arguments) if test_arguments.dx_compute_method == "left_boundary": pred_bbox = lb_reg_to_bbox(this_sentence_len, pred_tbbox, rois) else: pred_bbox = reg_to_bbox(this_sentence_len, pred_tbbox, rois) result_bbox = [] result_cls = [] drop_bbox = [] drop_cls = [] original_roi_ls = [] c_score_ls = [] for c in range(1, classes_num + 1): c_cls_score = pred_cls_score[:, c] c_bboxs = pred_bbox[:, :, c].T boxes, _boxes, roi, c_score = non_maximum_suppression_all_regression( c_cls_score, c_bboxs, rois.copy(), iou_threshold=test_arguments.th_nms_iou, score_threshold=test_arguments.score_threshold, info=info) result_bbox.extend(boxes) drop_bbox.extend(_boxes) original_roi_ls.extend(roi) c_score_ls.extend(c_score) result_cls.extend( [c] * len(boxes)) # print the predict result of this sentence. drop_cls.extend([c] * len(_boxes)) if len( result_cls ) == 0: # if the sentence without detecting anything!, we will lower the score criterion lower_than_th_count[0] += 1 drop_bbox = [] drop_cls = [] for c in range(1, classes_num + 1): c_sc = pred_cls_score[:, c] c_bboxs = pred_bbox[:, :, c].T boxes, _boxes, roi, c_score = non_maximum_suppression( c_sc, c_bboxs, rois.copy(), iou_threshold=test_arguments.th_nms_iou, score_threshold=test_arguments.score_threshold / 6, info=info) result_bbox.extend(boxes) drop_bbox.extend(_boxes) original_roi_ls.extend(roi) c_score_ls.extend(c_score) result_cls.extend([c] * len(boxes)) drop_cls.extend([c] * len(_boxes)) # result_bbox = result_bbox[:1] # result_cls = result_cls[:1] # drop_bbox += result_bbox[1:] # drop_cls += result_cls[1:] # original_roi_ls = original_roi_ls[:1] # c_score_ls = c_score_ls[:1] return np.array(result_bbox), np.array(result_cls), np.array( drop_bbox), np.array(drop_cls), np.array(original_roi_ls), np.array( c_score_ls)
def process_bboxes(self, predictions, image_info, confidence_threshold=0.01, overlap_threshold=0.5, nms=True): image_idx_ = [] bboxes_ = [] classes_ = [] conf_ = [] for i, predictions_ in enumerate(predictions): if i not in [ 0, 1, 2 ]: # Use this for specifying only a subset of detectors continue predictions_ = predictions_.permute(0, 2, 3, 1) for j, prediction in enumerate(predictions_): prediction = prediction.contiguous().view( -1, self.num_features) prediction[:, 5:] = F.softmax(prediction[:, 5:], dim=-1) classes = torch.argmax(prediction[:, 5:], dim=-1) idx = torch.arange(0, len(prediction)) confidence = prediction[:, 4] * prediction[idx, 5 + classes] mask = confidence > confidence_threshold if sum(mask) == 0: continue bboxes = prediction[mask, :4].clone() bboxes[:, ::2] *= self.strides[i] bboxes[:, 1::2] *= self.strides[i] bboxes = xywh2xyxy(bboxes) confidence = confidence[mask] classes = classes[mask] bboxes[:, ::2] = torch.clamp( bboxes[:, ::2], min=image_info['padding'][0][j] + 1, max=self.image_size[0] - image_info['padding'][2][j]) bboxes[:, 1::2] = torch.clamp( bboxes[:, 1::2], min=image_info['padding'][1][j] + 1, max=self.image_size[1] - image_info['padding'][3][j]) image_idx_.append(j) bboxes_.append(bboxes) classes_.append(classes) conf_.append(confidence) bboxes_ = \ [torch.cat([bboxes_[ii] for ii, k in enumerate(image_idx_) if k == idx]) for idx in np.unique(image_idx_)] classes_ = \ [torch.cat([classes_[ii] for ii, k in enumerate(image_idx_) if k == idx]) for idx in np.unique(image_idx_)] conf_ = \ [torch.cat([conf_[ii] for ii, k in enumerate(image_idx_) if k == idx]) for idx in np.unique(image_idx_)] image_idx = [] bboxes = [] confidence = [] classes = [] for i, idx in enumerate(np.unique(image_idx_)): if nms: cls = torch.unique(classes_[i]) for c in cls: cls_mask = (classes_[i] == c).nonzero().flatten() mask = non_maximum_suppression(bboxes_[i][cls_mask], conf_[i][cls_mask], overlap=overlap_threshold) bboxes.append(bboxes_[i][cls_mask][mask]) classes.append(classes_[i][cls_mask][mask]) confidence.append(conf_[i][cls_mask][mask]) image_idx.append([image_info['id'][idx]] * len(bboxes_[i][cls_mask][mask])) else: bboxes.append(bboxes_[i]) confidence.append(conf_[i]) classes.append(classes_[i]) image_idx.append([image_info['id'][idx]] * len(bboxes_[i])) if len(bboxes) > 0: bboxes = torch.cat(bboxes).view(-1, 4) classes = torch.cat(classes).flatten() confidence = torch.cat(confidence).flatten() image_idx = [item for sublist in image_idx for item in sublist] return bboxes, classes, confidence, image_idx else: return torch.tensor([], device=self.device), \ torch.tensor([], dtype=torch.long, device=self.device), \ torch.tensor([], device=self.device), \ []