def test(): anchors = config.ANCHORS transform = config.test_transforms dataset = YOLODataset( "COCO/train.csv", "COCO/images/images/", "COCO/labels/labels_new/", S=[13, 26, 52], anchors=anchors, transform=transform, ) S = [13, 26, 52] scaled_anchors = torch.tensor(anchors) / ( 1 / torch.tensor(S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)) loader = DataLoader(dataset=dataset, batch_size=1, shuffle=True) for x, y in loader: boxes = [] for i in range(y[0].shape[1]): anchor = scaled_anchors[i] print(anchor.shape) print(y[i].shape) boxes += cells_to_bboxes(y[i], is_preds=False, S=y[i].shape[2], anchors=anchor)[0] boxes = nms(boxes, iou_threshold=1, threshold=0.7, box_format="midpoint") print(boxes) plot_image(x[0].permute(1, 2, 0).to("cpu"), boxes)
def run_inference(image, model, sess, mode, sign_map): """ Run inference on a given image Arguments: * image: Numpy array representing a single RGB image * model: Dict of tensor references returned by SSDModel() * sess: TensorFlow session reference * mode: String of either "image", "video", or "demo" Returns: * Numpy array representing annotated image """ # Save original image in memory image = np.asarray(image) image_orig = np.copy(image) # 原始图像 # Get relevant tensors # 获取相关Tensor x = model['x'] is_training = model['is_training'] preds_conf = model['preds_conf'] preds_loc = model['preds_loc'] probs = model['probs'] # Convert image to PIL Image, resize it, convert to grayscale (if necessary), convert back to numpy array image = Image.fromarray(image) # 将图像矩阵,转为图像对象 orig_w, orig_h = image.size logging.info("原始图像的宽:{:d},高:{:d}".format(orig_w, orig_h)) # 灰度图 if NUM_CHANNELS == 1: image = image.convert('L') # 8-bit grayscale image = image.resize((IMG_W, IMG_H), Image.LANCZOS) # high-quality downsampling filter logging.info("处理后的图像:{}".format(image.size)) image = np.asarray(image) # 将图像转为矩阵 images = np.array([image]) # create a "batch" of 1 image #创建包含一个图像的批次矩阵 if NUM_CHANNELS == 1: images = np.expand_dims( images, axis=-1 ) # need extra dimension of size 1 for grayscale # 增加一个灰度的额外维度??? # Perform object detection t0 = time.time( ) # keep track of duration of object detection + NMS # 开始处理前的时间 preds_conf_val, preds_loc_val, probs_val = sess.run( [preds_conf, preds_loc, probs], feed_dict={ x: images, is_training: False }) if mode != 'video': logging.info('图像识别花费时间 %.1f ms (%.2f fps)' % ((time.time() - t0) * 1000, 1 / (time.time() - t0))) # Gather class predictions and confidence values y_pred_conf = preds_conf_val[0] # batch size of 1, so just take [0] y_pred_conf = y_pred_conf.astype('float32') prob = probs_val[0] # Gather localization predictions y_pred_loc = preds_loc_val[0] # Perform NMS boxes = nms(y_pred_conf, y_pred_loc, prob) # logging.info("标注矩形:{}".format(boxes)) if mode != 'video': logging.info('Inference + NMS took %.1f ms (%.2f fps)' % ((time.time() - t0) * 1000, 1 / (time.time() - t0))) # Rescale boxes' coordinates back to original image's dimensions # Recall boxes = [[x1, y1, x2, y2, cls, cls_prob], [...], ...] # 将标注矩形框位置按缩放比例恢复 scale = np.array( [orig_w / IMG_W, orig_h / IMG_H, orig_w / IMG_W, orig_h / IMG_H]) if len(boxes) > 0: boxes[:, :4] = boxes[:, :4] * scale # logging.info("标注矩形2:{}".format(boxes)) # Draw and annotate boxes over original image, and return annotated image image = image_orig for box in boxes: # Get box parameters box_coords = [int(round(x)) for x in box[:4]] cls = int(box[4]) cls_prob = box[5] # Annotate image image = cv2.rectangle(image, tuple(box_coords[:2]), tuple(box_coords[2:]), (0, 255, 0)) label_str = '%s %.3f' % (sign_map[cls], cls_prob) image = cv2.putText(image, label_str, (box_coords[0], box_coords[1]), 0, 0.5, (0, 255, 0), 1, cv2.LINE_AA) return image