def predict(self, img): rgb_origin = img img_numpy = img img = torch.from_numpy(img.copy()).float() img = img.cuda() img_h, img_w = img.shape[0], img.shape[1] img_trans = FastBaseTransform()(img.unsqueeze(0)) net_outs = self.net(img_trans) nms_outs = NMS(net_outs, 0) results = after_nms(nms_outs, img_h, img_w, crop_masks=not self.args.no_crop, visual_thre=self.args.visual_thre) torch.cuda.synchronize() temp = self.time_here self.time_here = time.time() self.frame_times.add(self.time_here - temp) fps = 1 / self.frame_times.get_avg() frame_numpy = draw_img(results, img, self.args, class_color=True, fps=fps) return frame_numpy
def prep_benchmark(dets_out, h, w): with timer.env('After NMS'): results = after_nms(dets_out, w, h) with timer.env('Copy'): classes, scores, boxes, masks = [ x[:args.visual_top_k].cpu().numpy() for x in results ] with timer.env('Sync'): torch.cuda.synchronize()
num = len(images) for i, one_img in enumerate(images): img_name = one_img.split('/')[-1] img_origin = torch.from_numpy(cv2.imread(one_img)).cuda().float() img_h, img_w = img_origin.shape[0], img_origin.shape[1] img_trans = FastBaseTransform()(img_origin.unsqueeze(0)) net_outs = net(img_trans) nms_outs = NMS(net_outs, args.traditional_nms) show_lincomb = bool(args.show_lincomb and args.image_path) with timer.env('after nms'): results = after_nms(nms_outs, img_h, img_w, show_lincomb=show_lincomb, crop_masks=not args.no_crop, visual_thre=args.visual_thre, img_name=img_name) torch.cuda.synchronize() img_numpy = draw_img(results, img_origin, args) cv2.imwrite(f'results/images/{img_name}', img_numpy) print(f'\r{i + 1}/{num}', end='') print('\nDone.') # detect videos elif args.video is not None:
def prep_metrics(ap_data, nms_outs, gt, gt_masks, h, w, num_crowd, image_id, make_json, cocoapi): """ Returns a list of APs for this image, with each element being for a class """ with timer.env('After NMS'): class_ids, classes, boxes, masks = after_nms(nms_outs, h, w) if class_ids.size(0) == 0: return class_ids = list(class_ids.cpu().numpy().astype(int)) classes = list(classes.cpu().numpy().astype(float)) masks = masks.view(-1, h * w).cuda() if cuda else masks.view(-1, h * w) boxes = boxes.cuda() if cuda else boxes if cocoapi: with timer.env('Output json'): boxes = boxes.cpu().numpy() masks = masks.view(-1, h, w).cpu().numpy() for i in range(masks.shape[0]): # Make sure that the bounding box actually makes sense and a mask was produced if (boxes[i, 3] - boxes[i, 1]) * (boxes[i, 2] - boxes[i, 0]) > 0: make_json.add_bbox(image_id, class_ids[i], boxes[i, :], classes[i]) make_json.add_mask(image_id, class_ids[i], masks[i, :, :], classes[i]) return with timer.env('Prepare gt'): gt_boxes = torch.Tensor(gt[:, :4]) gt_boxes[:, [0, 2]] *= w gt_boxes[:, [1, 3]] *= h gt_classes = list(gt[:, 4].astype(int)) gt_masks = torch.Tensor(gt_masks).view(-1, h * w) if num_crowd > 0: split = lambda x: (x[-num_crowd:], x[:-num_crowd]) crowd_boxes, gt_boxes = split(gt_boxes) crowd_masks, gt_masks = split(gt_masks) crowd_classes, gt_classes = split(gt_classes) with timer.env('Eval Setup'): num_pred = len(class_ids) num_gt = len(gt_classes) mask_iou_cache = mask_iou(masks, gt_masks) bbox_iou_cache = bbox_iou(boxes.float(), gt_boxes.float()) if num_crowd > 0: crowd_mask_iou_cache = mask_iou(masks, crowd_masks, iscrowd=True) crowd_bbox_iou_cache = bbox_iou(boxes.float(), crowd_boxes.float(), iscrowd=True) else: crowd_mask_iou_cache = None crowd_bbox_iou_cache = None iou_types = [('box', lambda i, j: bbox_iou_cache[i, j].item(), lambda i, j: crowd_bbox_iou_cache[i, j].item()), ('mask', lambda i, j: mask_iou_cache[i, j].item(), lambda i, j: crowd_mask_iou_cache[i, j].item())] timer.start('Main loop') for _class in set(class_ids + gt_classes): num_gt_for_class = sum([1 for x in gt_classes if x == _class]) for iouIdx in range(len(iou_thresholds)): iou_threshold = iou_thresholds[iouIdx] for iou_type, iou_func, crowd_func in iou_types: gt_used = [False] * len(gt_classes) ap_obj = ap_data[iou_type][iouIdx][_class] ap_obj.add_gt_positives(num_gt_for_class) for i in range(num_pred): if class_ids[i] != _class: continue max_iou_found = iou_threshold max_match_idx = -1 for j in range(num_gt): if gt_used[j] or gt_classes[j] != _class: continue iou = iou_func(i, j) if iou > max_iou_found: max_iou_found = iou max_match_idx = j if max_match_idx >= 0: gt_used[max_match_idx] = True ap_obj.push(classes[i], True) else: # If the detection matches a crowd, we can just ignore it matched_crowd = False if num_crowd > 0: for j in range(len(crowd_classes)): if crowd_classes[j] != _class: continue iou = crowd_func(i, j) if iou > iou_threshold: matched_crowd = True break # All this crowd code so that we can make sure that our eval code gives the # same result as COCOEval. There aren't even that many crowd annotations to # begin with, but accuracy is of the utmost importance. if not matched_crowd: ap_obj.push(classes[i], False) timer.stop('Main loop')
def evaluate(net, cfg, step=None): dataset = COCODetection(cfg, mode='val') data_loader = data.DataLoader(dataset, 1, num_workers=4, shuffle=False, pin_memory=True, collate_fn=val_collate) ds = len(data_loader) progress_bar = ProgressBar(40, ds) timer.reset() ap_data = { 'box': [[APDataObject() for _ in cfg.class_names] for _ in iou_thres], 'mask': [[APDataObject() for _ in cfg.class_names] for _ in iou_thres] } with torch.no_grad(): for i, (img, gt, gt_masks, img_h, img_w) in enumerate(data_loader): if i == 1: timer.start() if cfg.cuda: img, gt, gt_masks = img.cuda(), gt.cuda(), gt_masks.cuda() with timer.counter('forward'): class_p, box_p, coef_p, proto_p, anchors = net(img) with timer.counter('nms'): ids_p, class_p, box_p, coef_p, proto_p = nms( class_p, box_p, coef_p, proto_p, anchors, cfg) with timer.counter('after_nms'): ids_p, class_p, boxes_p, masks_p = after_nms( ids_p, class_p, box_p, coef_p, proto_p, img_h, img_w) if ids_p is None: continue with timer.counter('metric'): ids_p = list(ids_p.cpu().numpy().astype(int)) class_p = list(class_p.cpu().numpy().astype(float)) if cfg.coco_api: boxes_p = boxes_p.cpu().numpy() masks_p = masks_p.cpu().numpy() for j in range(masks_p.shape[0]): if (boxes_p[j, 3] - boxes_p[j, 1]) * ( boxes_p[j, 2] - boxes_p[j, 0]) > 0: make_json.add_bbox(dataset.ids[i], ids_p[j], boxes_p[j, :], class_p[j]) make_json.add_mask(dataset.ids[i], ids_p[j], masks_p[j, :, :], class_p[j]) else: prep_metrics(ap_data, ids_p, class_p, boxes_p, masks_p, gt, gt_masks, img_h, img_w, iou_thres) aa = time.perf_counter() if i > 0: batch_time = aa - temp timer.add_batch_time(batch_time) temp = aa if i > 0: t_t, t_d, t_f, t_nms, t_an, t_me = timer.get_times( ['batch', 'data', 'forward', 'nms', 'after_nms', 'metric']) fps, t_fps = 1 / (t_d + t_f + t_nms + t_an), 1 / t_t bar_str = progress_bar.get_bar(i + 1) print( f'\rTesting: {bar_str} {i + 1}/{ds}, fps: {fps:.2f} | total fps: {t_fps:.2f} | ' f't_t: {t_t:.3f} | t_d: {t_d:.3f} | t_f: {t_f:.3f} | t_nms: {t_nms:.3f} | ' f't_after_nms: {t_an:.3f} | t_metric: {t_me:.3f}', end='') if cfg.coco_api: make_json.dump() print( f'\nJson files dumped, saved in: \'results/\', start evaluating.' ) gt_annotations = COCO(cfg.val_ann) bbox_dets = gt_annotations.loadRes(f'results/bbox_detections.json') mask_dets = gt_annotations.loadRes(f'results/mask_detections.json') print('\nEvaluating BBoxes:') bbox_eval = COCOeval(gt_annotations, bbox_dets, 'bbox') bbox_eval.evaluate() bbox_eval.accumulate() bbox_eval.summarize() print('\nEvaluating Masks:') bbox_eval = COCOeval(gt_annotations, mask_dets, 'segm') bbox_eval.evaluate() bbox_eval.accumulate() bbox_eval.summarize() else: table, box_row, mask_row = calc_map(ap_data, iou_thres, len(cfg.class_names), step=step) print(table) return table, box_row, mask_row
def process(): try: destFile = "" if request.method == 'POST': file = request.files['file'] if file and allowed_file(file.filename): filename = secure_filename(file.filename) destFile = os.path.join(app.config['UPLOAD_FOLDER'], filename) file.save(destFile) app.logger.warning('filename=(%s)', filename) else: app.logger.warning("Request dictionary data: {}".format(request.data)) app.logger.warning("Request dictionary form: {}".format(request.form)) url = request.form["url"] print("url:", url) # download file destFile = download_file(url) # app.logger.error('An error occurred') app.logger.warning('destFile=(%s)', destFile) img_name = destFile.split('/')[-1] app.logger.warning('img_name=(%s)', img_name) # img_origin = cv2.imread(one_img) # img_tensor = torch.from_numpy(img_origin).float() img_origin = cv2.imread(destFile) # torch.from_numpy(img_origin).float() img_tensor = torch.from_numpy(img_origin).float() if cuda: # img_origin = img_origin.cuda() img_tensor = img_tensor.cuda() img_h, img_w = img_tensor.shape[0], img_tensor.shape[1] img_trans = FastBaseTransform()(img_tensor.unsqueeze(0)) net_outs = net(img_trans) nms_outs = NMS(net_outs, args.traditional_nms) show_lincomb = bool(args.show_lincomb) results = after_nms(nms_outs, img_h, img_w, show_lincomb=show_lincomb, crop_masks=not args.no_crop, visual_thre=args.visual_thre, img_name=img_name) # img_h, img_w = img_origin.shape[0], img_origin.shape[1] # img_trans = FastBaseTransform()(img_origin.unsqueeze(0)) # net_outs = net(img_trans) # nms_outs = NMS(net_outs, args.traditional_nms) app.logger.warning('img_h=(%s)', img_h) app.logger.warning('img_w=(%s)', img_w) app.logger.warning('cuda=(%s)', cuda) app.logger.warning('args.show_lincomb=(%s)', args.show_lincomb) app.logger.warning('args.no_crop=(%s)', args.no_crop) app.logger.warning('args.visual_thre=(%s)', args.visual_thre) app.logger.warning('args=(%s)', args) # show_lincomb = bool(args.show_lincomb) with timer.env('after nms'): results = after_nms(nms_outs, img_h, img_w, show_lincomb=show_lincomb, crop_masks=not args.no_crop, visual_thre=args.visual_thre, img_name=img_name) if cuda: torch.cuda.synchronize() # app.logger.warning('results=(%s)', results) # img_numpy = draw_img(results, img_origin, args) img_numpy = draw_img(results, img_origin, img_name, args) cv2.imwrite(f'results/images/{img_name}', img_numpy) # print(f'\r{i + 1}/{num}', end='') try: im = Image.open(f'results/images/{img_name}') # im = Image.open(destFile) io = BytesIO() im.save(io, format='JPEG') return Response(io.getvalue(), mimetype='image/jpeg') except IOError: abort(404) # return send_from_directory('.', filename), 200 callback = json.dumps({"results": results}) return callback, 200 except: traceback.print_exc() return {'message': 'input error'}, 400
img_h, img_w = img_origin.shape[0:2] with timer.counter('forward'): class_p, box_p, coef_p, proto_p, anchors = net(img) with timer.counter('nms'): ids_p, class_p, box_p, coef_p, proto_p = nms( class_p, box_p, coef_p, proto_p, anchors, cfg) with timer.counter('after_nms'): ids_p, class_p, boxes_p, masks_p = after_nms(ids_p, class_p, box_p, coef_p, proto_p, img_h, img_w, cfg, img_name=img_name) with timer.counter('save_img'): img_numpy = draw_img(ids_p, class_p, boxes_p, masks_p, img_origin, cfg, img_name=img_name) cv2.imwrite(f'results/images/{img_name}', img_numpy)
if cuda: img = img.cuda() img_h, img_w = img_origin.shape[0:2] with timer.counter('forward'): net_outs = net(img) with timer.counter('nms'): nms_outs = nms(cfg, net_outs) with timer.counter('after_nms'): results = after_nms(nms_outs, img_h, img_w, cfg, img_name=img_name) with timer.counter('save_img'): img_numpy = draw_img(results, img_origin, cfg, img_name=img_name) cv2.imwrite(f'results/images/{img_name}', img_numpy) aa = time.perf_counter() if i > 0: batch_time = aa - temp timer.add_batch_time(batch_time) temp = aa
def main(): parser = argparse.ArgumentParser(description='YOLACT Detection.') parser.add_argument('--weight', default='weights/best_30.5_res101_coco_392000.pth', type=str) parser.add_argument('--image', default=None, type=str, help='The folder of images for detecting.') parser.add_argument('--video', default=None, type=str, help='The path of the video to evaluate.') parser.add_argument('--img_size', type=int, default=544, help='The image size for validation.') parser.add_argument('--traditional_nms', default=False, action='store_true', help='Whether to use traditional nms.') parser.add_argument('--hide_mask', default=False, action='store_true', help='Hide masks in results.') parser.add_argument('--hide_bbox', default=False, action='store_true', help='Hide boxes in results.') parser.add_argument('--hide_score', default=False, action='store_true', help='Hide scores in results.') parser.add_argument('--cutout', default=False, action='store_true', help='Cut out each object and save.') parser.add_argument('--save_lincomb', default=False, action='store_true', help='Show the generating process of masks.') parser.add_argument('--no_crop', default=False, action='store_true', help='Do not crop the output masks with the predicted bounding box.') parser.add_argument('--real_time', default=False, action='store_true', help='Show the detection results real-timely.') parser.add_argument('--visual_thre', default=0.3, type=float, help='Detections with a score under this threshold will be removed.') args = parser.parse_args() prefix = re.findall(r'best_\d+\.\d+_', args.weight)[0] suffix = re.findall(r'_\d+\.pth', args.weight)[0] args.cfg = args.weight.split(prefix)[-1].split(suffix)[0] cfg = get_config(args, mode='detect') net = Yolact(cfg) net.load_weights(cfg.weight, cfg.cuda) net.eval() if cfg.cuda: cudnn.benchmark = True cudnn.fastest = True net = net.cuda() # detect images if cfg.image is not None: dataset = COCODetection(cfg, mode='detect') data_loader = data.DataLoader(dataset, 1, num_workers=2, shuffle=False, pin_memory=True, collate_fn=detect_collate) ds = len(data_loader) assert ds > 0, 'No .jpg images found.' progress_bar = ProgressBar(40, ds) timer.reset() for i, (img, img_origin, img_name) in enumerate(data_loader): if i == 1: timer.start() if cfg.cuda: img = img.cuda() img_h, img_w = img_origin.shape[0:2] with torch.no_grad(), timer.counter('forward'): class_p, box_p, coef_p, proto_p = net(img) with timer.counter('nms'): ids_p, class_p, box_p, coef_p, proto_p = nms(class_p, box_p, coef_p, proto_p, net.anchors, cfg) with timer.counter('after_nms'): ids_p, class_p, boxes_p, masks_p = after_nms(ids_p, class_p, box_p, coef_p, proto_p, img_h, img_w, cfg, img_name=img_name) with timer.counter('save_img'): img_numpy = draw_img(ids_p, class_p, boxes_p, masks_p, img_origin, cfg, img_name=img_name) cv2.imwrite(f'results/images/{img_name}', img_numpy) aa = time.perf_counter() if i > 0: batch_time = aa - temp timer.add_batch_time(batch_time) temp = aa if i > 0: t_t, t_d, t_f, t_nms, t_an, t_si = timer.get_times(['batch', 'data', 'forward', 'nms', 'after_nms', 'save_img']) fps, t_fps = 1 / (t_d + t_f + t_nms + t_an), 1 / t_t bar_str = progress_bar.get_bar(i + 1) print(f'\rTesting: {bar_str} {i + 1}/{ds}, fps: {fps:.2f} | total fps: {t_fps:.2f} | ' f't_t: {t_t:.3f} | t_d: {t_d:.3f} | t_f: {t_f:.3f} | t_nms: {t_nms:.3f} | ' f't_after_nms: {t_an:.3f} | t_save_img: {t_si:.3f}', end='') print('\nFinished, saved in: results/images.') # detect videos elif cfg.video is not None: vid = cv2.VideoCapture(cfg.video) target_fps = round(vid.get(cv2.CAP_PROP_FPS)) frame_width = round(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = round(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) num_frames = round(vid.get(cv2.CAP_PROP_FRAME_COUNT)) name = cfg.video.split('/')[-1] video_writer = cv2.VideoWriter(f'results/videos/{name}', cv2.VideoWriter_fourcc(*"mp4v"), target_fps, (frame_width, frame_height)) progress_bar = ProgressBar(40, num_frames) timer.reset() t_fps = 0 for i in range(num_frames): if i == 1: timer.start() frame_origin = vid.read()[1] img_h, img_w = frame_origin.shape[0:2] frame_trans = val_aug(frame_origin, cfg.img_size) frame_tensor = torch.tensor(frame_trans).float() if cfg.cuda: frame_tensor = frame_tensor.cuda() with torch.no_grad(), timer.counter('forward'): class_p, box_p, coef_p, proto_p = net(frame_tensor.unsqueeze(0)) with timer.counter('nms'): ids_p, class_p, box_p, coef_p, proto_p = nms(class_p, box_p, coef_p, proto_p, net.anchors, cfg) with timer.counter('after_nms'): ids_p, class_p, boxes_p, masks_p = after_nms(ids_p, class_p, box_p, coef_p, proto_p, img_h, img_w, cfg) with timer.counter('save_img'): frame_numpy = draw_img(ids_p, class_p, boxes_p, masks_p, frame_origin, cfg, fps=t_fps) if cfg.real_time: cv2.imshow('Detection', frame_numpy) cv2.waitKey(1) else: video_writer.write(frame_numpy) aa = time.perf_counter() if i > 0: batch_time = aa - temp timer.add_batch_time(batch_time) temp = aa if i > 0: t_t, t_d, t_f, t_nms, t_an, t_si = timer.get_times(['batch', 'data', 'forward', 'nms', 'after_nms', 'save_img']) fps, t_fps = 1 / (t_d + t_f + t_nms + t_an), 1 / t_t bar_str = progress_bar.get_bar(i + 1) print(f'\rDetecting: {bar_str} {i + 1}/{num_frames}, fps: {fps:.2f} | total fps: {t_fps:.2f} | ' f't_t: {t_t:.3f} | t_d: {t_d:.3f} | t_f: {t_f:.3f} | t_nms: {t_nms:.3f} | ' f't_after_nms: {t_an:.3f} | t_save_img: {t_si:.3f}', end='') if not cfg.real_time: print(f'\n\nFinished, saved in: results/videos/{name}') vid.release() video_writer.release()
img_h, img_w = img_origin.shape[0:2] with timer.counter('forward'): class_p, box_p, coef_p, proto_p, anchors = net(img) with timer.counter('nms'): ids_p, class_p, box_p, coef_p, proto_p = nms( class_p, box_p, coef_p, proto_p, anchors, cfg) with timer.counter('after_nms'): ids_p, class_p, boxes_p, masks_p = after_nms(ids_p, class_p, box_p, coef_p, proto_p, img_h, img_w, cfg, img_name=img_name) with timer.counter('save_img'): img_numpy = draw_img(ids_p, class_p, boxes_p, masks_p, img_origin, cfg, img_name=img_name) cv2.imwrite(f'results/images/{img_name}', img_numpy)