def inference(config, network, model_file, device, dataset, start, end, result_queue): torch.set_default_tensor_type('torch.FloatTensor') torch.multiprocessing.set_sharing_strategy('file_system') # init model net = network() net.cuda(device) net = net.eval() check_point = torch.load(model_file) net.load_state_dict(check_point['state_dict']) # init data dataset.records = dataset.records[start:end] data_iter = torch.utils.data.DataLoader(dataset=dataset, shuffle=False) # inference for (image, gt_boxes, im_info, ID) in data_iter: pred_boxes = net(image.cuda(device), im_info.cuda(device)) scale = im_info[0, 2] if config.test_nms_method == 'set_nms': assert pred_boxes.shape[ -1] > 6, "Not EMD Network! Using normal_nms instead." assert pred_boxes.shape[-1] % 6 == 0, "Prediction dim Error!" top_k = pred_boxes.shape[-1] // 6 n = pred_boxes.shape[0] pred_boxes = pred_boxes.reshape(-1, 6) idents = np.tile(np.arange(n)[:, None], (1, top_k)).reshape(-1, 1) pred_boxes = np.hstack((pred_boxes, idents)) keep = pred_boxes[:, 4] > config.pred_cls_threshold pred_boxes = pred_boxes[keep] keep = nms_utils.set_cpu_nms(pred_boxes, 0.5) pred_boxes = pred_boxes[keep] elif config.test_nms_method == 'normal_nms': assert pred_boxes.shape[-1] % 6 == 0, "Prediction dim Error!" pred_boxes = pred_boxes.reshape(-1, 6) keep = pred_boxes[:, 4] > config.pred_cls_threshold pred_boxes = pred_boxes[keep] keep = nms_utils.cpu_nms(pred_boxes, config.test_nms) pred_boxes = pred_boxes[keep] elif config.test_nms_method == 'none': assert pred_boxes.shape[-1] % 6 == 0, "Prediction dim Error!" pred_boxes = pred_boxes.reshape(-1, 6) keep = pred_boxes[:, 4] > config.pred_cls_threshold pred_boxes = pred_boxes[keep] else: raise ValueError('Unknown NMS method.') #if pred_boxes.shape[0] > config.detection_per_image and \ # config.test_nms_method != 'none': # order = np.argsort(-pred_boxes[:, 4]) # order = order[:config.detection_per_image] # pred_boxes = pred_boxes[order] # recovery the scale pred_boxes[:, :4] /= scale pred_boxes[:, 2:4] -= pred_boxes[:, :2] gt_boxes = gt_boxes[0].numpy() gt_boxes[:, 2:4] -= gt_boxes[:, :2] result_dict = dict(ID=ID[0], height=int(im_info[0, -3]), width=int(im_info[0, -2]), dtboxes=boxes_dump(pred_boxes), gtboxes=boxes_dump(gt_boxes)) result_queue.put_nowait(result_dict)
def post_process(pred_boxes, config, scale): if config.test_nms_method == 'set_nms': assert pred_boxes.shape[-1] > 6, "Not EMD Network! Using normal_nms instead." assert pred_boxes.shape[-1] % 6 == 0, "Prediction dim Error!" top_k = pred_boxes.shape[-1] // 6 n = pred_boxes.shape[0] pred_boxes = pred_boxes.reshape(-1, 6) idents = np.tile(np.arange(n)[:,None], (1, top_k)).reshape(-1, 1) pred_boxes = np.hstack((pred_boxes, idents)) keep = pred_boxes[:, 4] > config.pred_cls_threshold pred_boxes = pred_boxes[keep] keep = nms_utils.set_cpu_nms(pred_boxes, 0.5) pred_boxes = pred_boxes[keep] elif config.test_nms_method == 'normal_nms': assert pred_boxes.shape[-1] % 6 == 0, "Prediction dim Error!" pred_boxes = pred_boxes.reshape(-1, 6) keep = pred_boxes[:, 4] > config.pred_cls_threshold pred_boxes = pred_boxes[keep] keep = nms_utils.cpu_nms(pred_boxes, config.test_nms) pred_boxes = pred_boxes[keep] elif config.test_nms_method == 'none': assert pred_boxes.shape[-1] % 6 == 0, "Prediction dim Error!" pred_boxes = pred_boxes.reshape(-1, 6) keep = pred_boxes[:, 4] > config.pred_cls_threshold pred_boxes = pred_boxes[keep] pred_boxes[:, :4] /= scale keep = pred_boxes[:, 4] > config.visulize_threshold pred_boxes = pred_boxes[keep] return pred_boxes
def evaluate_on_cpu(y_pred, y_true, num_classes, calc_now=True, max_boxes=50, score_thresh=0.5, iou_thresh=0.5): ''' Given y_pred and y_true of a batch of data, get the recall and precision of the current batch. ''' num_images = y_true[0].shape[0] true_labels_dict = {i: 0 for i in range(num_classes)} # {class: count} pred_labels_dict = {i: 0 for i in range(num_classes)} true_positive_dict = {i: 0 for i in range(num_classes)} for i in range(num_images): true_labels_list, true_boxes_list = [], [] for j in range(3): # three feature maps # shape: [13, 13, 3, 80] true_probs_temp = y_true[j][i][..., 5:-1] # shape: [13, 13, 3, 4] (x_center, y_center, w, h) true_boxes_temp = y_true[j][i][..., 0:4] # [13, 13, 3] object_mask = true_probs_temp.sum(axis=-1) > 0 # [V, 3] V: Ground truth number of the current image true_probs_temp = true_probs_temp[object_mask] # [V, 4] true_boxes_temp = true_boxes_temp[object_mask] # [V], labels true_labels_list += np.argmax(true_probs_temp, axis=-1).tolist() # [V, 4] (x_center, y_center, w, h) true_boxes_list += true_boxes_temp.tolist() if len(true_labels_list) != 0: for cls, count in Counter(true_labels_list).items(): true_labels_dict[cls] += count # [V, 4] (xmin, ymin, xmax, ymax) true_boxes = np.array(true_boxes_list) box_centers, box_sizes = true_boxes[:, 0:2], true_boxes[:, 2:4] true_boxes[:, 0:2] = box_centers - box_sizes / 2. true_boxes[:, 2:4] = true_boxes[:, 0:2] + box_sizes # [1, xxx, 4] pred_boxes = y_pred[0][i:i + 1] pred_confs = y_pred[1][i:i + 1] pred_probs = y_pred[2][i:i + 1] # pred_boxes: [N, 4] # pred_confs: [N] # pred_labels: [N] # N: Detected box number of the current image pred_boxes, pred_confs, pred_labels = cpu_nms( pred_boxes, pred_confs * pred_probs, num_classes, max_boxes=max_boxes, score_thresh=score_thresh, iou_thresh=iou_thresh) # len: N pred_labels_list = [] if pred_labels is None else pred_labels.tolist() if pred_labels_list == []: continue # calc iou # [N, V] iou_matrix = calc_iou(pred_boxes, true_boxes) # [N] max_iou_idx = np.argmax(iou_matrix, axis=-1) correct_idx = [] correct_conf = [] for k in range(max_iou_idx.shape[0]): pred_labels_dict[pred_labels_list[k]] += 1 match_idx = max_iou_idx[k] # V level if iou_matrix[k, match_idx] > iou_thresh and true_labels_list[ match_idx] == pred_labels_list[k]: if match_idx not in correct_idx: correct_idx.append(match_idx) correct_conf.append(pred_confs[k]) else: same_idx = correct_idx.index(match_idx) if pred_confs[k] > correct_conf[same_idx]: correct_idx.pop(same_idx) correct_conf.pop(same_idx) correct_idx.append(match_idx) correct_conf.append(pred_confs[k]) for t in correct_idx: true_positive_dict[true_labels_list[t]] += 1 if calc_now: # avoid divided by 0 recall = sum(true_positive_dict.values()) / ( sum(true_labels_dict.values()) + 1e-6) precision = sum(true_positive_dict.values()) / ( sum(pred_labels_dict.values()) + 1e-6) return recall, precision else: return true_positive_dict, true_labels_dict, pred_labels_dict
def evaluate_on_cpu(y_pred, y_true, num_classes, calc_now=True, score_thresh=0.5, iou_thresh=0.5): # y_pred -> [None, 13, 13, 255], # [None, 26, 26, 255], # [None, 52, 52, 255], num_images = y_true[0].shape[0] # 检测依赖的layer数 true_labels_dict = {i: 0 for i in range(num_classes)} pred_labels_dict = {i: 0 for i in range(num_classes)} true_positive_dict = {i: 0 for i in range(num_classes)} for i in range(num_images): true_labels_list, true_boxes_list = [], [] for j in range(3): # 3个feature map true_probs_temp = y_true[j][i][..., 5:] # 大小: [13, 13, 3, 80] true_boxes_temp = y_true[j][i][ ..., 0:4] # 大小: [13, 13, 3, 4] (x_center, y_center, w, h) object_mask = true_probs_temp.sum(axis=-1) > 0 # [13, 13, 3] true_probs_temp = true_probs_temp[ object_mask] # [V, 3] V: 当前图片有目标的box true_boxes_temp = true_boxes_temp[object_mask] # [V, 4] true_labels_list += np.argmax(true_probs_temp, axis=-1).tolist() # [V], 类别 true_boxes_list += true_boxes_temp.tolist( ) # [V, 4] (x_center, y_center, w, h) if len(true_labels_list) != 0: for cls, count in Counter(true_labels_list).items(): true_labels_dict[cls] += count true_boxes = np.array( true_boxes_list) # [V, 4] (xmin, ymin, xmax, ymax) box_centers, box_sizes = true_boxes[:, 0:2], true_boxes[:, 2:4] true_boxes[:, 0:2] = box_centers - box_sizes / 2. true_boxes[:, 2:4] = true_boxes[:, 0:2] + box_sizes # [1, xxx, 4] pred_boxes = y_pred[0][i:i + 1] pred_confs = y_pred[1][i:i + 1] pred_probs = y_pred[2][i:i + 1] # pred_boxes: [N, 4] # pred_confs: [N] # pred_labels: [N] # N: Detected box number of the current image pred_boxes, pred_confs, pred_labels = cpu_nms( pred_boxes, pred_confs * pred_probs, num_classes, score_thresh=score_thresh, iou_thresh=iou_thresh) # len: N pred_labels_list = [] if pred_labels is None else pred_labels.tolist() if pred_labels_list == []: continue iou_matrix = calc_iou(pred_boxes, true_boxes) # calc iou [N, V] max_iou_idx = np.argmax(iou_matrix, axis=-1) # [N] correct_idx = [] correct_conf = [] for k in range(max_iou_idx.shape[0]): pred_labels_dict[pred_labels_list[k]] += 1 match_idx = max_iou_idx[k] # V level if iou_matrix[k, match_idx] > iou_thresh and true_labels_list[ match_idx] == pred_labels_list[k]: if not match_idx in correct_idx: correct_idx.append(match_idx) correct_conf.append(pred_confs[k]) else: same_idx = correct_idx.index(match_idx) if pred_confs[k] > correct_conf[same_idx]: correct_idx.pop(same_idx) correct_conf.pop(same_idx) correct_idx.append(match_idx) correct_conf.append(pred_confs[k]) for t in correct_idx: true_positive_dict[true_labels_list[t]] += 1 if calc_now: recall = sum(true_positive_dict.values()) / ( sum(true_labels_dict.values()) + 1e-6) # 防止分母为0 precision = sum(true_positive_dict.values()) / ( sum(pred_labels_dict.values()) + 1e-6) return recall, precision else: return true_positive_dict, true_labels_dict, pred_labels_dict
height_ori, width_ori = img_ori.shape[:2] img = cv2.resize(img_ori, tuple(args.new_size)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. start_time = time.time() if args.use_gpu: boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input_data: img}) else: pred_boxes_, pred_scores_ = sess.run([pred_boxes, pred_scores], feed_dict={input_data: img}) boxes_, scores_, labels_ = cpu_nms(pred_boxes_, pred_scores_, args.num_class, max_boxes=200, score_thresh=0.3, iou_thresh=0.45) end_time = time.time() # rescale the coordinates to the original image if args.letterbox_resize: boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio else: boxes_[:, [0, 2]] *= (width_ori / float(args.new_size[0])) boxes_[:, [1, 3]] *= (height_ori / float(args.new_size[1])) for i in range(len(boxes_)): x0, y0, x1, y1 = boxes_[i]
for y in range(5): time1 = timer() input_image = str(y) + ".jpg" img_ori = cv2.imread(input_image) height_ori, width_ori = img_ori.shape[:2] img = cv2.resize(img_ori, tuple(args.new_size)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. # CPU boxes, scores = sess.run([pred_boxes, pred_scores], feed_dict={input_data: img}) boxes_, scores_, labels_ = cpu_nms(boxes, scores, args.num_class, score_thresh=0.4, iou_thresh=0.5) #GPU #boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input_data: img}) boxes_[:, [0, 2]] *= (width_ori / float(args.new_size[0])) boxes_[:, [1, 3]] *= (height_ori / float(args.new_size[1])) print("box coords:") print(boxes_) print('*' * 30) print("scores:") print(scores_) print('*' * 30)