def evaluate(net, dataset, max_num=-1, during_training=False, cocoapi=False, traditional_nms=False): frame_times = MovingAverage() dataset_size = len(dataset) if max_num < 0 else min(max_num, len(dataset)) dataset_indices = list(range(len(dataset))) dataset_indices = dataset_indices[:dataset_size] progress_bar = ProgressBar(40, dataset_size) # For each class and iou, stores tuples (score, isPositive) # Index ap_data[type][iouIdx][classIdx] ap_data = { 'box': [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds], 'mask': [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds] } make_json = Make_json() for i, image_idx in enumerate(dataset_indices): timer.reset() with timer.env('Data loading'): img, gt, gt_masks, h, w, num_crowd = dataset.pull_item(image_idx) batch = img.unsqueeze(0) if cuda: batch = batch.cuda() with timer.env('Network forward'): #changed net_outs = net(batch) nms_outs = NMS(net_outs, traditional_nms) prep_metrics(ap_data, nms_outs, gt, gt_masks, h, w, num_crowd, dataset.ids[image_idx], make_json, cocoapi) # First couple of images take longer because we're constructing the graph. # Since that's technically initialization, don't include those in the FPS calculations. fps = 0 if i > 1 and not during_training: frame_times.add(timer.total_time()) fps = 1 / frame_times.get_avg() progress = (i + 1) / dataset_size * 100 progress_bar.set_val(i + 1) print('\rProcessing: %s %d / %d (%.2f%%) %.2f fps ' % (repr(progress_bar), i + 1, dataset_size, progress, fps), end='') else: table, box_row, mask_row = calc_map(ap_data) print(table) return table, box_row, mask_row
def savevideo(net: Yolact, in_path: str, out_path: str): vid = cv2.VideoCapture(in_path) target_fps = round(vid.get(cv2.CAP_PROP_FPS)) frame_width = round(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = round(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) num_frames = round(vid.get(cv2.CAP_PROP_FRAME_COUNT)) out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*"mp4v"), target_fps, (frame_width, frame_height)) frame_freq = 10 transform = FastBaseTransform() frame_times = MovingAverage() progress_bar = ProgressBar(30, num_frames) preds = None try: for i in range(num_frames): timer.reset() with timer.env('Video'): # process only 10th frame # care to be taken that the first frame is read always frame = torch.from_numpy(vid.read()[1]).cuda().float() # need to adjust for multi frame if i % frame_freq == 0: batch = transform(frame.unsqueeze(0)) preds = net(batch) current_preds = make_copy(preds) processed = prep_display(current_preds, frame, None, None, undo_transform=False, class_color=True) out.write(processed) if i > 1: frame_times.add(timer.total_time()) fps = 1 / frame_times.get_avg() progress = (i + 1) / num_frames * 100 progress_bar.set_val(i + 1) print( '\rProcessing Frames %s %6d / %6d (%5.2f%%) %5.2f fps ' % (repr(progress_bar), i + 1, num_frames, progress, fps), end='') except KeyboardInterrupt: print('Stopping early.') vid.release() out.release() print()
def evaluate(net: Yolact, dataset, train_mode=False): net.detect.use_fast_nms = args.fast_nms net.detect.use_cross_class_nms = args.cross_class_nms cfg.mask_proto_debug = args.mask_proto_debug # TODO Currently we do not support Fast Mask Re-scroing in evalimage, evalimages, and evalvideo if args.image is not None: if ':' in args.image: inp, out = args.image.split(':') evalimage(net, inp, out) else: evalimage(net, args.image) return elif args.images is not None: inp, out = args.images.split(':') evalimages(net, inp, out) return elif args.video is not None: if ':' in args.video: inp, out = args.video.split(':') evalvideo(net, inp, out) else: evalvideo(net, args.video) return frame_times = MovingAverage() dataset_size = len(dataset) if args.max_images < 0 else min( args.max_images, len(dataset)) progress_bar = ProgressBar(30, dataset_size) print() if not args.display and not args.benchmark: # For each class and iou, stores tuples (score, isPositive) # Index ap_data[type][iouIdx][classIdx] ap_data = { 'box': [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds], 'mask': [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds] } detections = Detections() else: timer.disable('Load Data') dataset_indices = list(range(len(dataset))) if args.shuffle: random.shuffle(dataset_indices) elif not args.no_sort: # Do a deterministic shuffle based on the image ids # # I do this because on python 3.5 dictionary key order is *random*, while in 3.6 it's # the order of insertion. That means on python 3.6, the images come in the order they are in # in the annotations file. For some reason, the first images in the annotations file are # the hardest. To combat this, I use a hard-coded hash function based on the image ids # to shuffle the indices we use. That way, no matter what python version or how pycocotools # handles the data, we get the same result every time. hashed = [badhash(x) for x in dataset.ids] dataset_indices.sort(key=lambda x: hashed[x]) # dataset_size=1000 dataset_indices = dataset_indices[:dataset_size] try: # Main eval loop dataset.batch_size = 1 dataset.num_workers = 1 for it, batch in enumerate(dataset): timer.reset() image_idx, img, gt, gt_masks, h, w, num_crowd = batch[0] if not args.benchmark: gt = gt.numpy() gt_masks = gt_masks.numpy() batch = img.reshape(1, img.shape[0], img.shape[1], img.shape[2]) # batch = jt.array([img]) with timer.env('Network Extra'): preds = net(batch) if args.display: img_numpy = prep_display(preds, img, h, w) elif args.benchmark: prep_benchmark(preds, h, w) else: prep_metrics(ap_data, preds, img, gt, gt_masks, h, w, num_crowd, dataset.ids[image_idx], detections) # First couple of images take longer because we're constructing the graph. # Since that's technically initialization, don't include those in the FPS calculations. if it > 1: frame_times.add(timer.total_time()) if args.display: if it > 1: print('Avg FPS: %.4f' % (1 / frame_times.get_avg())) plt.imshow(img_numpy) plt.title(str(dataset.ids[image_idx])) plt.show() elif not args.no_bar: if it > 1: fps = 1 / frame_times.get_avg() else: fps = 0 progress = (it + 1) / dataset_size * 100 progress_bar.set_val(it + 1) print( '\rProcessing Images %s %6d / %6d (%5.2f%%) %5.2f fps ' % (repr(progress_bar), it + 1, dataset_size, progress, fps), end='') jt.sync_all(True) if not args.display and not args.benchmark: print() if args.output_coco_json: print('Dumping detections..') if args.output_web_json: detections.dump_web() else: detections.dump() else: if not train_mode: print('Saving data..') with open(args.ap_data_file, 'wb') as f: pickle.dump(ap_data, f) return calc_map(ap_data) elif args.benchmark: print() print() print('Stats for the last frame:') timer.print_stats() avg_seconds = frame_times.get_avg() print('Average: %5.2f fps, %5.2f ms' % (1 / frame_times.get_avg(), 1000 * avg_seconds)) except KeyboardInterrupt: print('Stopping..')
args = parser.parse_args() args.cfg = re.findall(r'res.+_[a-z]+', args.weight)[0] cfg = get_config(args, mode='detect') sess = ort.InferenceSession(cfg.weight) input_name = sess.get_inputs()[0].name # detect images if cfg.image is not None: dataset = COCODetection(cfg, mode='detect') data_loader = data.DataLoader(dataset, 1, num_workers=4, shuffle=False, pin_memory=True, collate_fn=detect_onnx_collate) ds = len(data_loader) assert ds > 0, 'No .jpg images found.' progress_bar = ProgressBar(40, ds) timer.reset() for i, (img, img_origin, img_name) in enumerate(data_loader): if i == 1: timer.start() img_h, img_w = img_origin.shape[0:2] with timer.counter('forward'): class_p, box_p, coef_p, proto_p, anchors = sess.run(None, {input_name: img}) with timer.counter('nms'): ids_p, class_p, box_p, coef_p, proto_p = nms_numpy(class_p, box_p, coef_p, proto_p, anchors, cfg) with timer.counter('after_nms'): ids_p, class_p, boxes_p, masks_p = after_nms_numpy(ids_p, class_p, box_p, coef_p,
args = parser.parse_args() cfg = get_config(args) model = PAA(cfg) model.train().cuda() # broadcast_buffers is True if BN is used model = DDP(model, device_ids=[args.local_rank], output_device=args.local_rank, broadcast_buffers=False) # if cfg.MODEL.USE_SYNCBN: # TODO: figure this out # model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) optim = Optimizer(model, cfg) checkpointer = Checkpointer(cfg, model.module, optim.optimizer) start_iter = int(cfg.resume.split('_')[-1].split('.')[0]) if cfg.resume else 0 data_loader = make_data_loader(cfg, start_iter=start_iter) max_iter = len(data_loader) - 1 timer.reset() main_gpu = dist.get_rank() == 0 num_gpu = dist.get_world_size() for i, (img_list_batch, box_list_batch) in enumerate(data_loader, start_iter): if main_gpu and i == start_iter + 1: timer.start() optim.update_lr(step=i) img_tensor_batch = torch.stack([aa.img for aa in img_list_batch], dim=0).cuda() for box_list in box_list_batch: box_list.to_cuda() with timer.counter('for+loss'): category_loss, box_loss, iou_loss = model(img_tensor_batch, box_list_batch)
def evaluate(net, cfg, step=None): dataset = COCODetection(cfg, mode='val') data_loader = data.DataLoader(dataset, 1, num_workers=4, shuffle=False, pin_memory=True, collate_fn=val_collate) ds = len(data_loader) progress_bar = ProgressBar(40, ds) timer.reset() ap_data = { 'box': [[APDataObject() for _ in cfg.class_names] for _ in iou_thres], 'mask': [[APDataObject() for _ in cfg.class_names] for _ in iou_thres] } with torch.no_grad(): for i, (img, gt, gt_masks, img_h, img_w) in enumerate(data_loader): if i == 1: timer.start() if cfg.cuda: img, gt, gt_masks = img.cuda(), gt.cuda(), gt_masks.cuda() with timer.counter('forward'): class_p, box_p, coef_p, proto_p, anchors = net(img) with timer.counter('nms'): ids_p, class_p, box_p, coef_p, proto_p = nms( class_p, box_p, coef_p, proto_p, anchors, cfg) with timer.counter('after_nms'): ids_p, class_p, boxes_p, masks_p = after_nms( ids_p, class_p, box_p, coef_p, proto_p, img_h, img_w) if ids_p is None: continue with timer.counter('metric'): ids_p = list(ids_p.cpu().numpy().astype(int)) class_p = list(class_p.cpu().numpy().astype(float)) if cfg.coco_api: boxes_p = boxes_p.cpu().numpy() masks_p = masks_p.cpu().numpy() for j in range(masks_p.shape[0]): if (boxes_p[j, 3] - boxes_p[j, 1]) * ( boxes_p[j, 2] - boxes_p[j, 0]) > 0: make_json.add_bbox(dataset.ids[i], ids_p[j], boxes_p[j, :], class_p[j]) make_json.add_mask(dataset.ids[i], ids_p[j], masks_p[j, :, :], class_p[j]) else: prep_metrics(ap_data, ids_p, class_p, boxes_p, masks_p, gt, gt_masks, img_h, img_w, iou_thres) aa = time.perf_counter() if i > 0: batch_time = aa - temp timer.add_batch_time(batch_time) temp = aa if i > 0: t_t, t_d, t_f, t_nms, t_an, t_me = timer.get_times( ['batch', 'data', 'forward', 'nms', 'after_nms', 'metric']) fps, t_fps = 1 / (t_d + t_f + t_nms + t_an), 1 / t_t bar_str = progress_bar.get_bar(i + 1) print( f'\rTesting: {bar_str} {i + 1}/{ds}, fps: {fps:.2f} | total fps: {t_fps:.2f} | ' f't_t: {t_t:.3f} | t_d: {t_d:.3f} | t_f: {t_f:.3f} | t_nms: {t_nms:.3f} | ' f't_after_nms: {t_an:.3f} | t_metric: {t_me:.3f}', end='') if cfg.coco_api: make_json.dump() print( f'\nJson files dumped, saved in: \'results/\', start evaluating.' ) gt_annotations = COCO(cfg.val_ann) bbox_dets = gt_annotations.loadRes(f'results/bbox_detections.json') mask_dets = gt_annotations.loadRes(f'results/mask_detections.json') print('\nEvaluating BBoxes:') bbox_eval = COCOeval(gt_annotations, bbox_dets, 'bbox') bbox_eval.evaluate() bbox_eval.accumulate() bbox_eval.summarize() print('\nEvaluating Masks:') bbox_eval = COCOeval(gt_annotations, mask_dets, 'segm') bbox_eval.evaluate() bbox_eval.accumulate() bbox_eval.summarize() else: table, box_row, mask_row = calc_map(ap_data, iou_thres, len(cfg.class_names), step=step) print(table) return table, box_row, mask_row
def evaluate(net: STMask, dataset): net.detect.use_fast_nms = args.fast_nms cfg.mask_proto_debug = args.mask_proto_debug frame_times = MovingAverage() dataset_size = math.ceil(len(dataset) / args.batch_size) if args.max_images < 0 else min( args.max_images, len(dataset)) progress_bar = ProgressBar(30, dataset_size) print() data_loader = data.DataLoader(dataset, args.batch_size, shuffle=False, collate_fn=detection_collate, pin_memory=True) results = [] try: # Main eval loop for it, data_batch in enumerate(data_loader): timer.reset() with timer.env('Load Data'): images, images_meta, ref_images, ref_images_meta = prepare_data( data_batch, is_cuda=True, train_mode=False) pad_h, pad_w = images.size()[2:4] with timer.env('Network Extra'): preds = net(images, img_meta=images_meta, ref_x=ref_images, ref_imgs_meta=ref_images_meta) # Perform the meat of the operation here depending on our mode. if it == dataset_size - 1: batch_size = len(dataset) % args.batch_size else: batch_size = images.size(0) for batch_id in range(batch_size): if args.display: img_id = (images_meta[batch_id]['video_id'], images_meta[batch_id]['frame_id']) if not cfg.display_mask_single: img_numpy = prep_display( preds[batch_id], images[batch_id], pad_h, pad_w, img_meta=images_meta[batch_id], img_ids=img_id) else: for p in range( preds[batch_id]['detection']['box'].size(0)): preds_single = {'detection': {}} for k in preds[batch_id]['detection']: if preds[batch_id]['detection'][ k] is not None and k not in {'proto'}: preds_single['detection'][k] = preds[ batch_id]['detection'][k][p] else: preds_single['detection'][k] = None preds_single['net'] = preds[batch_id]['net'] preds_single['detection'][ 'box_ids'] = torch.tensor(-1) img_numpy = prep_display( preds_single, images[batch_id], pad_h, pad_w, img_meta=images_meta[batch_id], img_ids=img_id) plt.imshow(img_numpy) plt.axis('off') plt.savefig(''.join([ args.mask_det_file[:-12], 'out_single/', str(img_id), '_', str(p), '.png' ])) plt.clf() else: cfg.preserve_aspect_ratio = True preds_cur = postprocess_ytbvis( preds[batch_id], pad_h, pad_w, images_meta[batch_id], score_threshold=cfg.eval_conf_thresh) segm_results = bbox2result_with_id(preds_cur, cfg.classes) results.append(segm_results) # First couple of images take longer because we're constructing the graph. # Since that's technically initialization, don't include those in the FPS calculations. if it > 1: frame_times.add(timer.total_time() / batch_size) if args.display and not cfg.display_mask_single: if it > 1: print('Avg FPS: %.4f' % (1 / frame_times.get_avg())) plt.imshow(img_numpy) plt.axis('off') plt.title(str(img_id)) root_dir = ''.join([ args.mask_det_file[:-12], 'out/', str(images_meta[batch_id]['video_id']), '/' ]) if not os.path.exists(root_dir): os.makedirs(root_dir) plt.savefig(''.join([ root_dir, str(images_meta[batch_id]['frame_id']), '.png' ])) plt.clf() # plt.show() elif not args.no_bar: if it > 1: fps = 1 / frame_times.get_avg() else: fps = 0 progress = (it + 1) / dataset_size * 100 progress_bar.set_val(it + 1) print( '\rProcessing Images %s %6d / %6d (%5.2f%%) %5.2f fps ' % (repr(progress_bar), it + 1, dataset_size, progress, fps), end='') if not args.display and not args.benchmark: print() if args.output_json: print('Dumping detections...') results2json_videoseg(dataset, results, args.mask_det_file) if cfg.use_valid_sub or cfg.use_train_sub: if cfg.use_valid_sub: print('calculate evaluation metrics ...') ann_file = cfg.valid_sub_dataset.ann_file else: print('calculate train_sub metrics ...') ann_file = cfg.train_dataset.ann_file dt_file = args.mask_det_file metrics = calc_metrics(ann_file, dt_file) return metrics elif args.benchmark: print() print() print('Stats for the last frame:') timer.print_stats() avg_seconds = frame_times.get_avg() print('Average: %5.2f fps, %5.2f ms' % (1 / frame_times.get_avg(), 1000 * avg_seconds)) except KeyboardInterrupt: print('Stopping...')
def validation(net: STMask, valid_data=False, output_metrics_file=None): cfg.mask_proto_debug = args.mask_proto_debug if not valid_data: cfg.valid_sub_dataset.test_mode = True dataset = get_dataset(cfg.valid_sub_dataset) else: cfg.valid_dataset.test_mode = True dataset = get_dataset(cfg.valid_dataset) frame_times = MovingAverage() dataset_size = math.ceil(len(dataset) / args.batch_size) if args.max_images < 0 else min( args.max_images, len(dataset)) progress_bar = ProgressBar(30, dataset_size) print() data_loader = data.DataLoader(dataset, args.batch_size, shuffle=False, collate_fn=detection_collate, pin_memory=True) results = [] try: # Main eval loop for it, data_batch in enumerate(data_loader): timer.reset() with timer.env('Load Data'): images, images_meta, ref_images, ref_images_meta = prepare_data( data_batch, is_cuda=True, train_mode=False) pad_h, pad_w = images.size()[2:4] with timer.env('Network Extra'): preds = net(images, img_meta=images_meta, ref_x=ref_images, ref_imgs_meta=ref_images_meta) if it == dataset_size - 1: batch_size = len(dataset) % args.batch_size else: batch_size = images.size(0) for batch_id in range(batch_size): cfg.preserve_aspect_ratio = True preds_cur = postprocess_ytbvis( preds[batch_id], pad_h, pad_w, images_meta[batch_id], score_threshold=cfg.eval_conf_thresh) segm_results = bbox2result_with_id(preds_cur, cfg.classes) results.append(segm_results) # First couple of images take longer because we're constructing the graph. # Since that's technically initialization, don't include those in the FPS calculations. if it > 1: if batch_size == 0: batch_size = 1 frame_times.add(timer.total_time() / batch_size) if it > 1 and frame_times.get_avg() > 0: fps = 1 / frame_times.get_avg() else: fps = 0 progress = (it + 1) / dataset_size * 100 progress_bar.set_val(it + 1) print( '\rProcessing Images %s %6d / %6d (%5.2f%%) %5.2f fps ' % (repr(progress_bar), it + 1, dataset_size, progress, fps), end='') print() print('Dumping detections...') if not valid_data: results2json_videoseg(dataset, results, args.mask_det_file) print('calculate evaluation metrics ...') ann_file = cfg.valid_sub_dataset.ann_file dt_file = args.mask_det_file calc_metrics(ann_file, dt_file, output_file=output_metrics_file) else: results2json_videoseg(dataset, results, output_metrics_file.replace('.txt', '.json')) except KeyboardInterrupt: print('Stopping...')
def evaluate(net: Yolact, dataset, train_mode=False): net.detect.use_fast_nms = args.fast_nms cfg.mask_proto_debug = args.mask_proto_debug if args.image is not None: if ':' in args.image: inp, out = args.image.split(':') evalimage(net, inp, out) else: evalimage(net, args.image) return elif args.images is not None: inp, out = args.images.split(':') evalimages(net, inp, out) return elif args.video is not None: if ':' in args.video: inp, out = args.video.split(':') savevideo(net, inp, out) else: evalvideo(net, args.video) return frame_times = MovingAverage() dataset_size = len(dataset) if args.max_images < 0 else min( args.max_images, len(dataset)) progress_bar = ProgressBar(30, dataset_size) print() if not args.display and not args.benchmark: # For each class and iou, stores tuples (score, isPositive) # Index ap_data[type][iouIdx][classIdx] ap_data = { 'box': [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds], 'mask': [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds] } detections = Detections() else: timer.disable('Load Data') dataset_indices = list(range(len(dataset))) if args.shuffle: random.shuffle(dataset_indices) elif not args.no_sort: hashed = [badhash(x) for x in dataset.ids] dataset_indices.sort(key=lambda x: hashed[x]) dataset_indices = dataset_indices[:dataset_size] try: # Main eval loop for it, image_idx in enumerate(dataset_indices): timer.reset() with timer.env('Load Data'): img, gt, gt_masks, h, w, num_crowd = dataset.pull_item( image_idx) # Test flag, do not upvote if cfg.mask_proto_debug: with open('scripts/info.txt', 'w') as f: f.write(str(dataset.ids[image_idx])) np.save('scripts/gt.npy', gt_masks) batch = Variable(img.unsqueeze(0)) if args.cuda: batch = batch.cuda() with timer.env('Network Extra'): preds = net(batch) # Perform the meat of the operation here depending on our mode. if args.display: img_numpy = prep_display(preds, img, h, w) elif args.benchmark: prep_benchmark(preds, h, w) else: prep_metrics(ap_data, preds, img, gt, gt_masks, h, w, num_crowd, dataset.ids[image_idx], detections) # First couple of images take longer because we're constructing the graph. # Since that's technically initialization, don't include those in the FPS calculations. if it > 1: frame_times.add(timer.total_time()) if args.display: if it > 1: print('Avg FPS: %.4f' % (1 / frame_times.get_avg())) plt.imshow(img_numpy) plt.title(str(dataset.ids[image_idx])) plt.show() elif not args.no_bar: if it > 1: fps = 1 / frame_times.get_avg() else: fps = 0 progress = (it + 1) / dataset_size * 100 progress_bar.set_val(it + 1) print( '\rProcessing Images %s %6d / %6d (%5.2f%%) %5.2f fps ' % (repr(progress_bar), it + 1, dataset_size, progress, fps), end='') if not args.display and not args.benchmark: print() if args.output_coco_json: print('Dumping detections...') if args.output_web_json: detections.dump_web() else: detections.dump() else: if not train_mode: print('Saving data...') with open(args.ap_data_file, 'wb') as f: pickle.dump(ap_data, f) return calc_map(ap_data) elif args.benchmark: print() print() print('Stats for the last frame:') timer.print_stats() avg_seconds = frame_times.get_avg() print('Average: %5.2f fps, %5.2f ms' % (1 / frame_times.get_avg(), 1000 * avg_seconds)) except KeyboardInterrupt: print('Stopping...')
def inference(model, cfg, during_training=False): model.eval() predictions, coco_results = {}, [] val_loader = make_data_loader(cfg, during_training=during_training) dataset = val_loader.dataset dl = len(val_loader) bar = ProgressBar(length=40, max_val=dl) timer.reset() with torch.no_grad(): for i, (img_list_batch, _) in enumerate(val_loader): if i == 1: timer.start() with timer.counter('forward'): img_tensor_batch = torch.stack( [aa.img for aa in img_list_batch], dim=0).cuda() c_pred, box_pred, iou_pred, anchors = model(img_tensor_batch) with timer.counter('post_process'): resized_size = [aa.resized_size for aa in img_list_batch] pred_batch = post_process(cfg, c_pred, box_pred, iou_pred, anchors, resized_size) with timer.counter('accumulate'): for pred in pred_batch: pred.to_cpu() for img_list, pred in zip(img_list_batch, pred_batch): if pred.box.shape[0] == 0: continue original_id = dataset.id_img_map[img_list.id] pred.resize(img_list.ori_size) pred.convert_mode("x1y1wh") boxes = pred.box.tolist() score = pred.score.tolist() label = pred.label.tolist() mapped_labels = [dataset.to_category_id[i] for i in label] coco_results.extend([{ "image_id": original_id, "category_id": mapped_labels[k], "bbox": box, "score": score[k] } for k, box in enumerate(boxes)]) aa = time.perf_counter() if i > 0: batch_time = aa - temp timer.add_batch_time(batch_time) time_name = [ 'batch', 'data', 'forward', 'post_process', 'accumulate' ] t_t, t_d, t_f, t_pp, t_acc = timer.get_times(time_name) fps, t_fps = 1 / (t_d + t_f + t_pp), 1 / t_t bar_str = bar.get_bar(i + 1) print( f'\rTesting: {bar_str} {i + 1}/{dl}, fps: {fps:.2f} | total fps: {t_fps:.2f} | t_t: {t_t:.3f} | ' f't_d: {t_d:.3f} | t_f: {t_f:.3f} | t_pp: {t_pp:.3f} | t_acc: {t_acc:.3f}', end='') temp = aa print('\n\nTest ended, doing evaluation...') json_name = cfg.weight.split('/')[-1].split('.')[0] file_path = f'results/{json_name}.json' with open(file_path, "w") as f: json.dump(coco_results, f) coco_dt = dataset.coco.loadRes(file_path) if cfg.val_api == 'Improved COCO': from my_cocoeval.cocoeval import SelfEval bbox_eval = SelfEval(dataset.coco, coco_dt, all_points=True) else: from pycocotools.cocoeval import COCOeval bbox_eval = COCOeval(dataset.coco, coco_dt, iouType='bbox') bbox_eval.evaluate() bbox_eval.accumulate() bbox_eval.summarize() if not during_training: if cfg.val_api == 'Improved COCO': bbox_eval.draw_curve() else: compute_thre_per_class(bbox_eval)
def evaluate(net: Yolact, dataset, train_mode=False): net.detect.use_fast_nms = args.fast_nms net.detect.use_cross_class_nms = args.cross_class_nms cfg.mask_proto_debug = args.mask_proto_debug # TODO Currently we do not support Fast Mask Re-scroing in evalimage, evalimages, and evalvideo if args.image is not None: if ":" in args.image: inp, out = args.image.split(":") evalimage(net, inp, out) else: evalimage(net, args.image) return elif args.images is not None: inp, out = args.images.split(":") evalimages(net, inp, out) return elif args.video is not None: if ":" in args.video: inp, out = args.video.split(":") evalvideo(net, inp, out) else: evalvideo(net, args.video) return frame_times = MovingAverage() dataset_size = ( len(dataset) if args.max_images < 0 else min(args.max_images, len(dataset)) ) progress_bar = ProgressBar(30, dataset_size) print() if not args.display and not args.benchmark: # For each class and iou, stores tuples (score, isPositive) # Index ap_data[type][iouIdx][classIdx] ap_data = { "box": [ [APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds ], "mask": [ [APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds ], } detections = Detections() else: timer.disable("Load Data") dataset_indices = list(range(len(dataset))) if args.shuffle: random.shuffle(dataset_indices) elif not args.no_sort: # Do a deterministic shuffle based on the image ids # # I do this because on python 3.5 dictionary key order is *random*, while in 3.6 it's # the order of insertion. That means on python 3.6, the images come in the order they are in # in the annotations file. For some reason, the first images in the annotations file are # the hardest. To combat this, I use a hard-coded hash function based on the image ids # to shuffle the indices we use. That way, no matter what python version or how pycocotools # handles the data, we get the same result every time. hashed = [badhash(x) for x in dataset.ids] dataset_indices.sort(key=lambda x: hashed[x]) dataset_indices = dataset_indices[:dataset_size] try: # Main eval loop for it, image_idx in enumerate(dataset_indices): timer.reset() with timer.env("Load Data"): img, gt, gt_masks, h, w, num_crowd = dataset.pull_item(image_idx) # Test flag, do not upvote if cfg.mask_proto_debug: with open("scripts/info.txt", "w") as f: f.write(str(dataset.ids[image_idx])) np.save("scripts/gt.npy", gt_masks) batch = Variable(img.unsqueeze(0)) if args.cuda: batch = batch.cuda() with timer.env("Network Extra"): preds = net(batch) # Perform the meat of the operation here depending on our mode. if args.display: img_numpy = prep_display(preds, img, h, w) elif args.benchmark: prep_benchmark(preds, h, w) else: prep_metrics( ap_data, preds, img, gt, gt_masks, h, w, num_crowd, dataset.ids[image_idx], detections, ) # First couple of images take longer because we're constructing the graph. # Since that's technically initialization, don't include those in the FPS calculations. if it > 1: frame_times.add(timer.total_time()) if args.display: if it > 1: print("Avg FPS: %.4f" % (1 / frame_times.get_avg())) plt.imshow(img_numpy) plt.title(str(dataset.ids[image_idx])) plt.show() elif not args.no_bar: if it > 1: fps = 1 / frame_times.get_avg() else: fps = 0 progress = (it + 1) / dataset_size * 100 progress_bar.set_val(it + 1) print( "\rProcessing Images %s %6d / %6d (%5.2f%%) %5.2f fps " % (repr(progress_bar), it + 1, dataset_size, progress, fps), end="", ) if not args.display and not args.benchmark: print() if args.output_coco_json: print("Dumping detections...") if args.output_web_json: detections.dump_web() else: detections.dump() else: if not train_mode: print("Saving data...") with open(args.ap_data_file, "wb") as f: pickle.dump(ap_data, f) return calc_map(ap_data) elif args.benchmark: print() print() print("Stats for the last frame:") timer.print_stats() avg_seconds = frame_times.get_avg() print( "Average: %5.2f fps, %5.2f ms" % (1 / frame_times.get_avg(), 1000 * avg_seconds) ) except KeyboardInterrupt: print("Stopping...")
def main(): parser = argparse.ArgumentParser(description='YOLACT Detection.') parser.add_argument('--weight', default='weights/best_30.5_res101_coco_392000.pth', type=str) parser.add_argument('--image', default=None, type=str, help='The folder of images for detecting.') parser.add_argument('--video', default=None, type=str, help='The path of the video to evaluate.') parser.add_argument('--img_size', type=int, default=544, help='The image size for validation.') parser.add_argument('--traditional_nms', default=False, action='store_true', help='Whether to use traditional nms.') parser.add_argument('--hide_mask', default=False, action='store_true', help='Hide masks in results.') parser.add_argument('--hide_bbox', default=False, action='store_true', help='Hide boxes in results.') parser.add_argument('--hide_score', default=False, action='store_true', help='Hide scores in results.') parser.add_argument('--cutout', default=False, action='store_true', help='Cut out each object and save.') parser.add_argument('--save_lincomb', default=False, action='store_true', help='Show the generating process of masks.') parser.add_argument('--no_crop', default=False, action='store_true', help='Do not crop the output masks with the predicted bounding box.') parser.add_argument('--real_time', default=False, action='store_true', help='Show the detection results real-timely.') parser.add_argument('--visual_thre', default=0.3, type=float, help='Detections with a score under this threshold will be removed.') args = parser.parse_args() prefix = re.findall(r'best_\d+\.\d+_', args.weight)[0] suffix = re.findall(r'_\d+\.pth', args.weight)[0] args.cfg = args.weight.split(prefix)[-1].split(suffix)[0] cfg = get_config(args, mode='detect') net = Yolact(cfg) net.load_weights(cfg.weight, cfg.cuda) net.eval() if cfg.cuda: cudnn.benchmark = True cudnn.fastest = True net = net.cuda() # detect images if cfg.image is not None: dataset = COCODetection(cfg, mode='detect') data_loader = data.DataLoader(dataset, 1, num_workers=2, shuffle=False, pin_memory=True, collate_fn=detect_collate) ds = len(data_loader) assert ds > 0, 'No .jpg images found.' progress_bar = ProgressBar(40, ds) timer.reset() for i, (img, img_origin, img_name) in enumerate(data_loader): if i == 1: timer.start() if cfg.cuda: img = img.cuda() img_h, img_w = img_origin.shape[0:2] with torch.no_grad(), timer.counter('forward'): class_p, box_p, coef_p, proto_p = net(img) with timer.counter('nms'): ids_p, class_p, box_p, coef_p, proto_p = nms(class_p, box_p, coef_p, proto_p, net.anchors, cfg) with timer.counter('after_nms'): ids_p, class_p, boxes_p, masks_p = after_nms(ids_p, class_p, box_p, coef_p, proto_p, img_h, img_w, cfg, img_name=img_name) with timer.counter('save_img'): img_numpy = draw_img(ids_p, class_p, boxes_p, masks_p, img_origin, cfg, img_name=img_name) cv2.imwrite(f'results/images/{img_name}', img_numpy) aa = time.perf_counter() if i > 0: batch_time = aa - temp timer.add_batch_time(batch_time) temp = aa if i > 0: t_t, t_d, t_f, t_nms, t_an, t_si = timer.get_times(['batch', 'data', 'forward', 'nms', 'after_nms', 'save_img']) fps, t_fps = 1 / (t_d + t_f + t_nms + t_an), 1 / t_t bar_str = progress_bar.get_bar(i + 1) print(f'\rTesting: {bar_str} {i + 1}/{ds}, fps: {fps:.2f} | total fps: {t_fps:.2f} | ' f't_t: {t_t:.3f} | t_d: {t_d:.3f} | t_f: {t_f:.3f} | t_nms: {t_nms:.3f} | ' f't_after_nms: {t_an:.3f} | t_save_img: {t_si:.3f}', end='') print('\nFinished, saved in: results/images.') # detect videos elif cfg.video is not None: vid = cv2.VideoCapture(cfg.video) target_fps = round(vid.get(cv2.CAP_PROP_FPS)) frame_width = round(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = round(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) num_frames = round(vid.get(cv2.CAP_PROP_FRAME_COUNT)) name = cfg.video.split('/')[-1] video_writer = cv2.VideoWriter(f'results/videos/{name}', cv2.VideoWriter_fourcc(*"mp4v"), target_fps, (frame_width, frame_height)) progress_bar = ProgressBar(40, num_frames) timer.reset() t_fps = 0 for i in range(num_frames): if i == 1: timer.start() frame_origin = vid.read()[1] img_h, img_w = frame_origin.shape[0:2] frame_trans = val_aug(frame_origin, cfg.img_size) frame_tensor = torch.tensor(frame_trans).float() if cfg.cuda: frame_tensor = frame_tensor.cuda() with torch.no_grad(), timer.counter('forward'): class_p, box_p, coef_p, proto_p = net(frame_tensor.unsqueeze(0)) with timer.counter('nms'): ids_p, class_p, box_p, coef_p, proto_p = nms(class_p, box_p, coef_p, proto_p, net.anchors, cfg) with timer.counter('after_nms'): ids_p, class_p, boxes_p, masks_p = after_nms(ids_p, class_p, box_p, coef_p, proto_p, img_h, img_w, cfg) with timer.counter('save_img'): frame_numpy = draw_img(ids_p, class_p, boxes_p, masks_p, frame_origin, cfg, fps=t_fps) if cfg.real_time: cv2.imshow('Detection', frame_numpy) cv2.waitKey(1) else: video_writer.write(frame_numpy) aa = time.perf_counter() if i > 0: batch_time = aa - temp timer.add_batch_time(batch_time) temp = aa if i > 0: t_t, t_d, t_f, t_nms, t_an, t_si = timer.get_times(['batch', 'data', 'forward', 'nms', 'after_nms', 'save_img']) fps, t_fps = 1 / (t_d + t_f + t_nms + t_an), 1 / t_t bar_str = progress_bar.get_bar(i + 1) print(f'\rDetecting: {bar_str} {i + 1}/{num_frames}, fps: {fps:.2f} | total fps: {t_fps:.2f} | ' f't_t: {t_t:.3f} | t_d: {t_d:.3f} | t_f: {t_f:.3f} | t_nms: {t_nms:.3f} | ' f't_after_nms: {t_an:.3f} | t_save_img: {t_si:.3f}', end='') if not cfg.real_time: print(f'\n\nFinished, saved in: results/videos/{name}') vid.release() video_writer.release()
def savevideo(net:Yolact, in_path:str, out_path:str): vid = cv2.VideoCapture(in_path) target_fps = round(vid.get(cv2.CAP_PROP_FPS)) frame_width = round(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = round(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) num_frames = round(vid.get(cv2.CAP_PROP_FRAME_COUNT)) print("target_fps:{} frame_width:{} frame_height:{} num_frames:{}".format(target_fps, frame_width, frame_height, num_frames)) out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*"mp4v"), target_fps, (frame_width, frame_height)) frame_freq = 10 transform = FastBaseTransform() frame_times = MovingAverage() progress_bar = ProgressBar(30, num_frames) preds = None drawing = True start_pt = None end_pt = None # load video here ret, frame = vid.read() # Adding Function Attached To Mouse Callback def draw(event,x,y,flags,params): nonlocal start_pt,end_pt,drawing # Left Mouse Button Down Pressed if(event==1): print("event 1 fired") if start_pt is None: start_pt = [x,y] else: end_pt = [x,y] cv2.destroyAllWindows() drawing = False print(x,y) # Making Window For The Image cv2.namedWindow("Window") # Adding Mouse CallBack Event cv2.setMouseCallback("Window",draw) cv2.imshow("Window",frame) cv2.waitKey(0) print(start_pt,end_pt) try: for i in range(num_frames): timer.reset() with timer.env('Video'): # process only 10th frame # care to be taken that the first frame is read always ret, full_frame = vid.read() if ret == True: frame = full_frame[start_pt[1]:end_pt[1], start_pt[0]:end_pt[0]].copy() # print("Full_frame Shape:", full_frame.shape, "frame shape:", frame.shape) frame = torch.from_numpy(frame).cuda().float() # need to adjust for multi frame if i%frame_freq == 0: batch = transform(frame.unsqueeze(0)) preds = net(batch) current_preds = make_copy(preds) processed= prep_display(current_preds, frame, None, None, undo_transform=False, class_color=True) full_frame[start_pt[1]:end_pt[1], start_pt[0]:end_pt[0]] = processed # print("Full_frame Shape:", full_frame.shape, "frame shape:", processed.shape) out.write(full_frame) if i > 1: frame_times.add(timer.total_time()) fps = 1 / frame_times.get_avg() progress = (i+1) / num_frames * 100 progress_bar.set_val(i+1) print('\rProcessing Frames %s %6d / %6d (%5.2f%%) %5.2f fps ' % (repr(progress_bar), i+1, num_frames, progress, fps), end='') except KeyboardInterrupt: print('Stopping early.') vid.release() out.release() print()
# not sure if this helps. # shuffle must be False if sampler is specified data_loader = data.DataLoader(dataset, cfg.bs_per_gpu, num_workers=cfg.bs_per_gpu // 2, shuffle=(train_sampler is None), collate_fn=train_collate, pin_memory=False, sampler=train_sampler) # data_loader = data.DataLoader(dataset, cfg.bs_per_gpu, num_workers=0, shuffle=False, # collate_fn=train_collate, pin_memory=True) epoch_seed = 0 map_tables = [] training = True timer.reset() step = start_step val_step = start_step writer = SummaryWriter(f'tensorboard_log/{cfg_name}') try: # try-except can shut down all processes after Ctrl + C. while training: if train_sampler: epoch_seed += 1 train_sampler.set_epoch(epoch_seed) for images, targets, masks in data_loader: if cfg.warmup_until > 0 and step <= cfg.warmup_until: # warm up learning rate. for param_group in optimizer.param_groups: param_group['lr'] = (cfg.lr - cfg.warmup_init) * ( step / cfg.warmup_until) + cfg.warmup_init
def evaluate(net, dataset, max_num=-1, during_training=False, benchmark=False, cocoapi=False, traditional_nms=False): frame_times = MovingAverage() dataset_size = len(dataset) if max_num < 0 else min(max_num, len(dataset)) dataset_indices = list(range(len(dataset))) dataset_indices = dataset_indices[:dataset_size] progress_bar = ProgressBar(40, dataset_size) if benchmark: timer.disable('Data loading') else: # For each class and iou, stores tuples (score, isPositive) # Index ap_data[type][iouIdx][classIdx] ap_data = { 'box': [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds], 'mask': [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds] } make_json = Make_json() for i, image_idx in enumerate(dataset_indices): timer.reset() with timer.env('Data loading'): img, gt, gt_masks, h, w, num_crowd = dataset.pull_item(image_idx) batch = Variable(img.unsqueeze(0)) if cuda: batch = batch.cuda() with timer.env('Network forward'): net_outs = net(batch) nms_outs = NMS(net_outs, traditional_nms) if benchmark: prep_benchmark(nms_outs, h, w) else: prep_metrics(ap_data, nms_outs, gt, gt_masks, h, w, num_crowd, dataset.ids[image_idx], make_json, cocoapi) # First couple of images take longer because we're constructing the graph. # Since that's technically initialization, don't include those in the FPS calculations. fps = 0 if i > 1 and not during_training: frame_times.add(timer.total_time()) fps = 1 / frame_times.get_avg() progress = (i + 1) / dataset_size * 100 progress_bar.set_val(i + 1) print('\rProcessing: %s %d / %d (%.2f%%) %.2f fps ' % (repr(progress_bar), i + 1, dataset_size, progress, fps), end='') if benchmark: print('\n\nStats for the last frame:') timer.print_stats() avg_seconds = frame_times.get_avg() print('Average: %5.2f fps, %5.2f ms' % (1 / frame_times.get_avg(), 1000 * avg_seconds)) else: if cocoapi: make_json.dump() print( f'\nJson files dumped, saved in: {json_path}, start evaluting.' ) gt_annotations = COCO(cfg.dataset.valid_info) bbox_dets = gt_annotations.loadRes( f'{json_path}/bbox_detections.json') mask_dets = gt_annotations.loadRes( f'{json_path}/mask_detections.json') print('\nEvaluating BBoxes:') bbox_eval = COCOeval(gt_annotations, bbox_dets, 'bbox') bbox_eval.evaluate() bbox_eval.accumulate() bbox_eval.summarize() print('\nEvaluating Masks:') bbox_eval = COCOeval(gt_annotations, mask_dets, 'segm') bbox_eval.evaluate() bbox_eval.accumulate() bbox_eval.summarize() return table, mask_row = calc_map(ap_data) print(table) return table, mask_row