def post_processing(self, output, padding_info, resize): """ 返回值:[batch, n, 6] 格式:x1,y1,x2,y2,confidence,class_id """ dets = ctdet_decode(*output) res = [] for det in dets: labels = det[:, -1] conf = torch.sigmoid(det[:, 4]) box_array = det[:, :4] * cfg.down_ratio # 还原到网络的输入图像尺寸 labels = labels.detach().cpu().numpy() conf = conf.detach().cpu().numpy() box_array = box_array.detach().cpu().numpy() box_array = (box_array - padding_info) / resize # 还原到原始尺寸 bboxes = np.zeros((0, 6)) for class_index in range(1, len(class_names)): cls_argwhere = labels == class_index ll_max_id = labels[cls_argwhere].reshape(-1, 1) scores = conf[cls_argwhere].reshape(-1, 1) box_array_new = box_array[cls_argwhere, :] bboxes = np.vstack( (bboxes, np.hstack((box_array_new, scores, ll_max_id)))) res.append(bboxes) return res
def val_map(epoch): print('\n Val@Epoch: %d' % epoch) model.eval() torch.cuda.empty_cache() max_per_image = 100 results = {} with torch.no_grad(): for inputs in val_loader: img_id, inputs = inputs[0] detections = [] for scale in inputs: inputs[scale]['image'] = inputs[scale]['image'].to( cfg.device) output = model(inputs[scale]['image'])[-1] dets = ctdet_decode(*output, K=cfg.test_topk) dets = dets.detach().cpu().numpy().reshape( 1, -1, dets.shape[2])[0] top_preds = {} dets[:, :2] = transform_preds( dets[:, 0:2], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) dets[:, 2:4] = transform_preds( dets[:, 2:4], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) clses = dets[:, -1] for j in range(val_dataset.num_classes): inds = (clses == j) top_preds[j + 1] = dets[inds, :5].astype(np.float32) top_preds[j + 1][:, :4] /= scale detections.append(top_preds) bbox_and_scores = { j: np.concatenate([d[j] for d in detections], axis=0) for j in range(1, val_dataset.num_classes + 1) } scores = np.hstack([ bbox_and_scores[j][:, 4] for j in range(1, val_dataset.num_classes + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, val_dataset.num_classes + 1): keep_inds = (bbox_and_scores[j][:, 4] >= thresh) bbox_and_scores[j] = bbox_and_scores[j][keep_inds] results[img_id] = bbox_and_scores eval_results = val_dataset.run_eval(results, save_dir=cfg.ckpt_dir) print(eval_results) summary_writer.add_scalar('val_mAP/mAP', eval_results[0], epoch)
def Evaluate(epoch, model): print('\n Evaluate@Epoch: %d' % epoch) start_time = time.clock() print('Start time %s Seconds' % start_time) model.eval() torch.cuda.empty_cache() max_per_image = 100 results = {} with torch.no_grad(): for inputs in data_loader: img_id, inputs, img_path = inputs[0] detections = [] for scale in inputs: inputs[scale]['image'] = inputs[scale]['image'].to( cfg.device) # (1,3) output = model( inputs[scale]['image'])[-1] # hmap, regs, pxpy dets = ctdet_decode( *output, K=cfg.test_topk ) # torch.cat([bboxes, scores, clses], dim=2) dets = dets.detach().cpu().numpy().reshape( 1, -1, dets.shape[2])[0] top_preds = {} dets[:, :2] = transform_preds( dets[:, 0:2], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) dets[:, 2:4] = transform_preds( dets[:, 2:4], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) clses = dets[:, -1] for j in range(dataset.num_classes): inds = (clses == j) top_preds[j + 1] = dets[inds, :5].astype(np.float32) top_preds[j + 1][:, :4] /= scale detections.append(top_preds) bbox_and_scores = { j: np.concatenate([d[j] for d in detections], axis=0) for j in range(1, dataset.num_classes + 1) } scores = np.hstack([ bbox_and_scores[j][:, 4] for j in range(1, dataset.num_classes + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, dataset.num_classes + 1): keep_inds = (bbox_and_scores[j][:, 4] >= thresh) bbox_and_scores[j] = bbox_and_scores[j][keep_inds] results[img_id] = bbox_and_scores end_time = time.clock() eval_results = dataset.run_eval(results, save_dir=cfg.ckpt_dir) print(eval_results) print('End time %s Seconds' % end_time) Run_time = end_time - start_time FPS = 100 / Run_time # replace 100 with the number of images print('FPS %s ' % FPS) #summary_writer.add_scalar('Evaluate_mAP/mAP', eval_results[0], epoch) return eval_results[0]
def main(): cfg.device = torch.device('cuda') torch.backends.cudnn.benchmark = False max_per_image = 100 num_classes = 80 if cfg.dataset == 'coco' else 4 colors = COCO_COLORS if cfg.dataset == 'coco' else DETRAC_COLORS names = COCO_NAMES if cfg.dataset == 'coco' else DETRAC_NAMES for j in range(len(names)): col_ = [c * 255 for c in colors[j]] colors[j] = tuple(col_) # Set up parameters for outputing video output_name = 'demo/' width = cfg.video_width height = cfg.video_height fps = cfg.video_fps # output video configuration video_out = cv2.VideoWriter(cfg.output_video_dir, cv2.VideoWriter_fourcc('D', 'I', 'V', 'X'), fps, (width, height)) text_out = open(cfg.output_text_dir, 'w') print('Creating model and recover from checkpoint ...') if 'hourglass' in cfg.arch: model = exkp(n=5, nstack=2, dims=[256, 256, 384, 384, 384, 512], modules=[2, 2, 2, 2, 2, 4], num_classes=num_classes) else: raise NotImplementedError model = load_demo_model(model, cfg.ckpt_dir) model = model.to(cfg.device) model.eval() # Loading images speed_list = [] frame_list = sorted(os.listdir(cfg.img_dir)) n_frames = len(frame_list) for frame_id in range(n_frames): frame_name = frame_list[frame_id] image_path = os.path.join(cfg.img_dir, frame_name) image = cv2.imread(image_path) original_image = image.copy() height, width = image.shape[0:2] padding = 127 if 'hourglass' in cfg.arch else 31 imgs = {} for scale in cfg.test_scales: new_height = int(height * scale) new_width = int(width * scale) if cfg.img_size > 0: img_height, img_width = cfg.img_size, cfg.img_size center = np.array([new_width / 2., new_height / 2.], dtype=np.float32) scaled_size = max(height, width) * 1.0 scaled_size = np.array([scaled_size, scaled_size], dtype=np.float32) else: img_height = (new_height | padding) + 1 img_width = (new_width | padding) + 1 center = np.array([new_width // 2, new_height // 2], dtype=np.float32) scaled_size = np.array([img_width, img_height], dtype=np.float32) img = cv2.resize(image, (new_width, new_height)) trans_img = get_affine_transform(center, scaled_size, 0, [img_width, img_height]) img = cv2.warpAffine(img, trans_img, (img_width, img_height)) img = img.astype(np.float32) / 255. img -= np.array(COCO_MEAN if cfg.dataset == 'coco' else DETRAC_MEAN, dtype=np.float32)[None, None, :] img /= np.array(COCO_STD if cfg.dataset == 'coco' else DETRAC_STD, dtype=np.float32)[None, None, :] img = img.transpose(2, 0, 1)[None, :, :, :] # from [H, W, C] to [1, C, H, W] # if cfg.test_flip: # img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0) imgs[scale] = {'image': torch.from_numpy(img).float(), 'center': np.array(center), 'scale': np.array(scaled_size), 'fmap_h': np.array(img_height // 4), 'fmap_w': np.array(img_width // 4)} with torch.no_grad(): detections = [] start_time = time.time() for scale in imgs: imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device) output = model(imgs[scale]['image'])[-1] dets = ctdet_decode(*output, K=cfg.test_topk) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])[0] top_preds = {} dets[:, :2] = transform_preds(dets[:, 0:2], imgs[scale]['center'], imgs[scale]['scale'], (imgs[scale]['fmap_w'], imgs[scale]['fmap_h'])) dets[:, 2:4] = transform_preds(dets[:, 2:4], imgs[scale]['center'], imgs[scale]['scale'], (imgs[scale]['fmap_w'], imgs[scale]['fmap_h'])) cls = dets[:, -1] for j in range(num_classes): inds = (cls == j) top_preds[j + 1] = dets[inds, :5].astype(np.float32) top_preds[j + 1][:, :4] /= scale detections.append(top_preds) bbox_and_scores = {} for j in range(1, num_classes + 1): bbox_and_scores[j] = np.concatenate([d[j] for d in detections], axis=0) if len(cfg.test_scales) > 1: soft_nms(bbox_and_scores[j], Nt=0.5, method=2) scores = np.hstack([bbox_and_scores[j][:, 4] for j in range(1, num_classes + 1)]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, num_classes + 1): keep_inds = (bbox_and_scores[j][:, 4] >= thresh) bbox_and_scores[j] = bbox_and_scores[j][keep_inds] # Use opencv functions to output a video # output_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) speed_list.append(time.time() - start_time) output_image = original_image counter = 1 for lab in bbox_and_scores: if cfg.dataset == 'coco': if names[lab] not in DETRAC_compatible_names: continue for boxes in bbox_and_scores[lab]: x1, y1, x2, y2, score = boxes if score > cfg.detect_thres: text = names[lab] + '%.2f' % score label_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_COMPLEX, 0.3, 1) text_location = [x1 + 2, y1 + 2, x1 + 2 + label_size[0][0], y1 + 2 + label_size[0][1]] # cv2.rectangle(output_image, pt1=(int(x1), int(y1)), # pt2=(int(x2), int(y2)), # color=colors[lab], thickness=2) cv2.rectangle(output_image, pt1=(int(x1), int(y1)), pt2=(int(x2), int(y2)), color=(0, 255, 0), thickness=2) # cv2.putText(output_image, text, org=(int(text_location[0]), int(text_location[3])), # fontFace=cv2.FONT_HERSHEY_COMPLEX, thickness=1, fontScale=0.3, # color=(0, 0, 255)) # add to text file new_line = '{0},{1},{2:.3f},{3:.3f},{4:.3f},{5:.3f},{6:.4f}\n'.format(str(frame_id + 1), counter, x1, y1, x2 - x1, y2 - y1, score) counter += 1 text_out.write(new_line) cv2.imshow('Frames'.format(frame_id), output_image) video_out.write(output_image) if cv2.waitKey(5) & 0xFF == ord('q'): break print('Test frame rate:', 1. / np.mean(speed_list))
def main(): logger = create_logger(save_dir=cfg.log_dir) print = logger.info print(cfg) cfg.device = torch.device('cuda') torch.backends.cudnn.benchmark = False max_per_image = 100 Dataset_eval = Damage_eval # your own data set # Crack RE Spalling dataset = Dataset_eval(cfg.data_dir, split='val', test_scales=cfg.test_scales, test_flip=cfg.test_flip) # split test data_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=True, collate_fn=dataset.collate_fn) print('Creating model...') if 'hourglass' in cfg.arch: model = get_hourglass[cfg.arch] elif 'resdcn' in cfg.arch: model = get_pose_net_resdcn(num_layers=18, head_conv=64, num_classes=3) elif cfg.arch == 'resnet': model = get_pose_net(num_layers=18, head_conv=64, num_classes=3) elif cfg.arch == 'res_CBAM': model = get_pose_net_resnet_CBAM(num_layers=18, head_conv=64, num_classes=3) elif cfg.arch == 'resnet_PAM': model = get_pose_net_resnet_PAM(num_layers=18, head_conv=64, num_classes=3) elif cfg.arch == 'resnet_SE': model = get_pose_net_resnet_SE(num_layers=18, head_conv=64, num_classes=3) model = load_model(model, cfg.pretrain_dir) model = model.to(cfg.device) model.eval() results = {} with torch.no_grad(): for inputs in tqdm(data_loader): img_id, inputs,img_path = inputs[0] print('id%s ',img_id) detections = [] for scale in inputs: inputs[scale]['image'] = inputs[scale]['image'].to(cfg.device) output = model(inputs[scale]['image'])[-1] dets = ctdet_decode(*output, K=cfg.test_topk) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])[0] top_preds = {} dets[:, :2] = transform_preds(dets[:, 0:2], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) dets[:, 2:4] = transform_preds(dets[:, 2:4], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) cls = dets[:, -1] for j in range(dataset.num_classes): inds = (cls == j) top_preds[j + 1] = dets[inds, :5].astype(np.float32) top_preds[j + 1][:, :4] /= scale detections.append(top_preds) bbox_and_scores = {} for j in range(1, dataset.num_classes + 1): bbox_and_scores[j] = np.concatenate([d[j] for d in detections], axis=0) if len(dataset.test_scales) > 1: soft_nms(bbox_and_scores[j], Nt=0.5, method=2) scores = np.hstack([bbox_and_scores[j][:, 4] for j in range(1, dataset.num_classes + 1)]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, dataset.num_classes + 1): keep_inds = (bbox_and_scores[j][:, 4] >= thresh) bbox_and_scores[j] = bbox_and_scores[j][keep_inds] images_test = cv2.imread(img_path) fig = plt.figure(0) colors = COCO_COLORS names = COCO_NAMES #cv2.imwrite('E:/test1.png',images_test) plt.imshow(cv2.cvtColor(images_test, cv2.COLOR_BGR2RGB)) for lab in bbox_and_scores: for boxes in bbox_and_scores[lab]: x1, y1, x2, y2, score = boxes if (x1 < 0): x1 = 0 if (y1 < 0): y1 = 0 if (x2 > 511): x2 = 511 if (y2 > 511): y2 = 511 if score > 0.2: plt.gca().add_patch(Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor=colors[lab], facecolor='none')) plt.text(x1 -12 , y1 - 12 , names[lab], bbox=dict(facecolor=colors[lab], alpha=0.5), fontsize=7, color='k') fig.patch.set_visible(False) Save_dir = 'data/damage/Predict_images' # save images Image_name = img_path[-10:] Save_dir = os.path.join(Save_dir, Image_name) plt.axis('off') plt.savefig(Save_dir, dpi=400, transparent=True, bbox_inches="tight", pad_inches=0.1) # 保存 plt.close(0) results[img_id] = bbox_and_scores eval_results = dataset.run_eval(results, cfg.ckpt_dir) print(eval_results)
def main(): cfg = get_cfg() max_per_image = 100 num_classes = cfg.num_classes print('Loading model...') model_name = '%s_hc%s' % (cfg.arch, cfg.head_conv) model, shift_buffer = load_network_arch(cfg.arch, cfg.num_classes, cfg.head_conv, pretrained=False) model = load_model(model, cfg.model_path, is_nested=False, map_location='cpu') model = model.to(cfg.device) model.eval() debugger = Debugger(dataset=cfg.dataset, ipynb=False, theme='black') all_inputs = [load_and_transform_image(cfg.fn_image, cfg.img_size)] results = {} with torch.no_grad(): img_id, inputs = all_inputs[0] detections = [] for scale in [1.]: img_numpy = inputs[scale]['image'] img = torch.from_numpy(img_numpy).to(cfg.device) output = model(img)[-1] # array of 3 dets = ctdet_decode(*output, K=cfg.test_topk) # torch.Size([1, 100, 6]) dets = dets.detach().cpu().numpy().reshape( 1, -1, dets.shape[2])[0] # (100,6) # debug img uses dets prior to post_process add_debug_image(debugger, img_numpy, dets, output, scale) # print( 'meta: ', inputs[scale]['center'], inputs[scale]['scale'], inputs[scale]['fmap_w'], inputs[scale]['fmap_h'] ) dets[:, :2] = transform_preds( dets[:, 0:2], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) dets[:, 2:4] = transform_preds( dets[:, 2:4], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) # print( 'dets post_proc: ', dets ) # MNV3: [[117.8218 132.52121 227.10435 351.23346 0.854211 14. ]] # resnet18: [[115.41386, 133.93118, 230.14862, 356.79816, 0.90593797]] cls = dets[:, -1] # (100,) top_preds = {} for j in range(num_classes): inds = (cls == j) top_preds[j + 1] = dets[inds, :5].astype(np.float32) top_preds[j + 1][:, :4] /= scale detections.append(top_preds) bbox_and_scores = {} for j in range(1, num_classes + 1): bbox_and_scores[j] = np.concatenate([d[j] for d in detections], axis=0) # if len(dataset.test_scales) > 1: # soft_nms(bbox_and_scores[j], Nt=0.5, method=2) scores = np.hstack( [bbox_and_scores[j][:, 4] for j in range(1, num_classes + 1)]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, num_classes + 1): keep_inds = (bbox_and_scores[j][:, 4] >= thresh) bbox_and_scores[j] = bbox_and_scores[j][keep_inds] results[img_id] = bbox_and_scores # print( 'bbox_and_scores: ', bbox_and_scores ) # show_results(debugger, image, results) debugger.show_all_imgs(pause=True)
class CenterNet(object): def __init__(self, cfg): self.cfg = cfg if cfg.arch == 'resnet50': self.model = get_pose_net(50, 64, cfg.num_classes) else: self.model = get_hourglass(cfg.arch, num_classes=cfg.num_classes) if cfg.pretrained_weights is not None: weight_file = os.path.join(cfg.save_folder, cfg.pretrained_weights) load_model(self.model, weight_file) print("load pretrain mode:{}".format(weight_file)) if cfg.num_gpu > 1 and torch.cuda.is_available(): self.model = torch.nn.DataParallel(self.model).cuda() else: self.model = self.model.to(cfg.device) self.save_folder = cfg.ckpt_dir self.optim = optim.Adam(self.model.parameters(), lr=cfg.lr) self.scheduler = optim.lr_scheduler.ExponentialLR(self.optim, gamma=0.99) def train(self, data_counts, data_loader, eval_loder, n_epochs): max_map = 0.28 for epoch in range(n_epochs): evaluator = self.train_epoch(data_counts, data_loader, eval_loder, epoch, n_epochs) stats = evaluator.coco_eval['bbox'].stats eval_map = stats[0] if eval_map > max_map: max_map = eval_map ckpt_path = os.path.join(self.save_folder, 'centernet_Epoch{0}_map{1}.pth'.format(epoch, max_map)) torch.save(self.model.state_dict(), ckpt_path) print('weights {0} saved success!'.format(ckpt_path)) self.scheduler.step() def train_epoch(self, data_counts, data_loader, eval_loder, epoch, n_epochs): with tqdm.tqdm(total=data_counts, desc=f'Epoch {epoch}/{n_epochs}', unit='img', ncols=150) as pbar: step = 0 for batch in data_loader: step += 1 load_t0 = time.time() for k in batch: batch[k] = batch[k].to(device=self.cfg.device, non_blocking=True) outputs = self.model(batch['image']) hmap, regs, w_h_ = zip(*outputs) regs = [_tranpose_and_gather_feature(r, batch['inds']) for r in regs] w_h_ = [_tranpose_and_gather_feature(r, batch['inds']) for r in w_h_] hmap_loss = _neg_loss(hmap, batch['hmap']) reg_loss = _reg_loss(regs, batch['regs'], batch['ind_masks']) w_h_loss = _reg_loss(w_h_, batch['w_h_'], batch['ind_masks']) loss = hmap_loss + 1 * reg_loss + 0.1 * w_h_loss self.model.zero_grad() loss.backward() self.optim.step() load_t1 = time.time() batch_time = load_t1 - load_t0 pbar.set_postfix(**{'hmap_loss': hmap_loss.item(), 'reg_loss': reg_loss.item(), 'w_h_loss': w_h_loss.item(), 'LR': self.optim.param_groups[0]['lr'], 'Batchtime': batch_time}) pbar.update(batch['image'].shape[0]) cons_acc = self._evaluate(eval_loder) return cons_acc @torch.no_grad() def _evaluate(self, data_loader): coco = convert_to_coco_api(data_loader.dataset, bbox_fmt='coco') coco_evaluator = CocoEvaluator(coco, iou_types=["bbox"], bbox_fmt='coco') if self.cfg.arch == 'resnet50': eval_net = get_pose_net(50, 64, self.cfg.num_classes) else: eval_net = get_hourglass(self.cfg.arch, num_classes=self.cfg.num_classes, is_training=False) if self.cfg.num_gpu > 1 and torch.cuda.is_available(): eval_net = torch.nn.DataParallel(eval_net).cuda() else: eval_net = eval_net.to(self.cfg.device) eval_net.load_state_dict(self.model.state_dict()) eval_net = eval_net.to(self.cfg.device) eval_net.eval() for inputs, targets in data_loader: targets = [{k: v.to(self.cfg.device) for k, v in t.items()} for t in targets] model_input = torch.stack(inputs, 0) model_input = model_input.to(self.cfg.device) output = eval_net(model_input)[-1] dets = ctdet_decode(*output, K=self.cfg.test_topk) # dets = dets.detach().cpu().numpy() res = {} for target, det in zip(targets, dets): labels = det[:, -1] scores = det[:, 4] boxes = det[:, :4] boxes[..., 2:] = boxes[..., 2:] - boxes[..., :2] # Transform [x1, y1, x2, y2] to [x1, y1, w, h] boxes = boxes.reshape((boxes.shape[0], 1, 4)) res[target["image_id"].item()] = { "boxes": boxes, "scores": scores, "labels": labels, } coco_evaluator.update(res) coco_evaluator.synchronize_between_processes() coco_evaluator.accumulate() coco_evaluator.summarize() del eval_net return coco_evaluator
def main(): cfg.device = torch.device('cuda') torch.backends.cudnn.benchmark = False max_per_image = 100 image = cv2.imread(cfg.img_dir) # orig_image = image height, width = image.shape[0:2] padding = 127 if 'hourglass' in cfg.arch else 31 imgs = {} for scale in cfg.test_scales: new_height = int(height * scale) new_width = int(width * scale) if cfg.img_size > 0: img_height, img_width = cfg.img_size, cfg.img_size center = np.array([new_width / 2., new_height / 2.], dtype=np.float32) scaled_size = max(height, width) * 1.0 scaled_size = np.array([scaled_size, scaled_size], dtype=np.float32) else: img_height = (new_height | padding) + 1 img_width = (new_width | padding) + 1 center = np.array([new_width // 2, new_height // 2], dtype=np.float32) scaled_size = np.array([img_width, img_height], dtype=np.float32) img = cv2.resize(image, (new_width, new_height)) trans_img = get_affine_transform(center, scaled_size, 0, [img_width, img_height]) img = cv2.warpAffine(img, trans_img, (img_width, img_height)) img = img.astype(np.float32) / 255. img -= np.array(COCO_MEAN if cfg.dataset == 'coco' else VOC_MEAN, dtype=np.float32)[None, None, :] img /= np.array(COCO_STD if cfg.dataset == 'coco' else VOC_STD, dtype=np.float32)[None, None, :] img = img.transpose(2, 0, 1)[None, :, :, :] # from [H, W, C] to [1, C, H, W] if cfg.test_flip: img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0) imgs[scale] = { 'image': torch.from_numpy(img).float(), 'center': np.array(center), 'scale': np.array(scaled_size), 'fmap_h': np.array(img_height // 4), 'fmap_w': np.array(img_width // 4) } print('Creating model...') if 'hourglass' in cfg.arch: model = get_hourglass[cfg.arch] elif 'resdcn' in cfg.arch: model = get_pose_net(num_layers=int(cfg.arch.split('_')[-1]), num_classes=80 if cfg.dataset == 'coco' else 20) else: raise NotImplementedError model = load_model(model, cfg.ckpt_dir) model = model.to(cfg.device) model.eval() with torch.no_grad(): detections = [] for scale in imgs: imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device) output = model(imgs[scale]['image'])[-1] dets = ctdet_decode(*output, K=cfg.test_topk) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])[0] top_preds = {} dets[:, :2] = transform_preds( dets[:, 0:2], imgs[scale]['center'], imgs[scale]['scale'], (imgs[scale]['fmap_w'], imgs[scale]['fmap_h'])) dets[:, 2:4] = transform_preds( dets[:, 2:4], imgs[scale]['center'], imgs[scale]['scale'], (imgs[scale]['fmap_w'], imgs[scale]['fmap_h'])) cls = dets[:, -1] for j in range(80): inds = (cls == j) top_preds[j + 1] = dets[inds, :5].astype(np.float32) top_preds[j + 1][:, :4] /= scale detections.append(top_preds) bbox_and_scores = {} for j in range(1, 81 if cfg.dataset == 'coco' else 21): bbox_and_scores[j] = np.concatenate([d[j] for d in detections], axis=0) if len(cfg.test_scales) > 1: soft_nms(bbox_and_scores[j], Nt=0.5, method=2) scores = np.hstack([ bbox_and_scores[j][:, 4] for j in range(1, 81 if cfg.dataset == 'coco' else 21) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, 81 if cfg.dataset == 'coco' else 21): keep_inds = (bbox_and_scores[j][:, 4] >= thresh) bbox_and_scores[j] = bbox_and_scores[j][keep_inds] # plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) # plt.show() fig = plt.figure(0) colors = COCO_COLORS if cfg.dataset == 'coco' else VOC_COLORS names = COCO_NAMES if cfg.dataset == 'coco' else VOC_NAMES plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) for lab in bbox_and_scores: for boxes in bbox_and_scores[lab]: x1, y1, x2, y2, score = boxes if score > 0.3: plt.gca().add_patch( Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor=colors[lab], facecolor='none')) plt.text(x1 + 3, y1 + 3, names[lab] + '%.2f' % score, bbox=dict(facecolor=colors[lab], alpha=0.5), fontsize=7, color='k') fig.patch.set_visible(False) plt.axis('off') plt.savefig('data/demo_results.png', dpi=300, transparent=True) plt.show()
def main(): # Create Test set labels for DETRAC detrac_root = cfg.label_dir dataType = 'Test' test_images = list() test_objects = list() annotation_folder = 'DETRAC-{}-Annotations-XML'.format(dataType) annotation_path = os.path.join(detrac_root, annotation_folder) if not os.path.exists(annotation_path): print('annotation_path not exist') raise FileNotFoundError label_file = os.path.join(annotation_path, cfg.video_name + '.xml') tree = ET.parse(label_file) root = tree.getroot() object_list = list() Box_dict = {} for obj in root.iter('frame'): boxes = list() frame_num = int(obj.attrib['num']) target_list = obj.find('target_list') for target in target_list: bbox = target.find('box').attrib left = float(bbox['left']) top = float(bbox['top']) width = float(bbox['width']) height = float(bbox['height']) boxes.append([left, top, left + width, top + height]) # x1, y1, x2, y2 Box_dict[frame_num] = boxes cfg.device = torch.device('cuda') torch.backends.cudnn.benchmark = False max_per_image = 150 num_classes = 80 if cfg.dataset == 'coco' else 4 colors = COCO_COLORS if cfg.dataset == 'coco' else DETRAC_COLORS names = COCO_NAMES if cfg.dataset == 'coco' else DETRAC_NAMES for j in range(len(names)): col_ = [c * 255 for c in colors[j]] colors[j] = tuple(col_) # Set up parameters for outputing video width = cfg.video_width height = cfg.video_height fps = cfg.video_fps # output video configuration video_out = cv2.VideoWriter( os.path.join(cfg.root_dir, cfg.video_name + '_compare.mkv'), cv2.VideoWriter_fourcc('D', 'I', 'V', 'X'), fps, (width, height)) print('Creating model and recover from checkpoint ...') if 'hourglass' in cfg.arch: model = exkp(n=5, nstack=2, dims=[256, 256, 384, 384, 384, 512], modules=[2, 2, 2, 2, 2, 4], num_classes=num_classes) else: raise NotImplementedError model = load_demo_model(model, cfg.ckpt_dir) model = model.to(cfg.device) model.eval() # Loading images speed_list = [] frame_list = sorted(os.listdir(os.path.join(cfg.img_dir, cfg.video_name))) n_frames = len(frame_list) for frame_id in range(n_frames): frame_n = frame_id + 1 frame_name = frame_list[frame_id] image_path = os.path.join(cfg.img_dir, cfg.video_name, frame_name) image = cv2.imread(image_path) original_image = image.copy() height, width = image.shape[0:2] padding = 127 if 'hourglass' in cfg.arch else 31 imgs = {} for scale in cfg.test_scales: new_height = int(height * scale) new_width = int(width * scale) if cfg.img_size > 0: img_height, img_width = cfg.img_size, cfg.img_size center = np.array([new_width / 2., new_height / 2.], dtype=np.float32) scaled_size = max(height, width) * 1.0 scaled_size = np.array([scaled_size, scaled_size], dtype=np.float32) else: img_height = (new_height | padding) + 1 img_width = (new_width | padding) + 1 center = np.array([new_width // 2, new_height // 2], dtype=np.float32) scaled_size = np.array([img_width, img_height], dtype=np.float32) img = cv2.resize(image, (new_width, new_height)) trans_img = get_affine_transform(center, scaled_size, 0, [img_width, img_height]) img = cv2.warpAffine(img, trans_img, (img_width, img_height)) img = img.astype(np.float32) / 255. img -= np.array( COCO_MEAN if cfg.dataset == 'coco' else DETRAC_MEAN, dtype=np.float32)[None, None, :] img /= np.array(COCO_STD if cfg.dataset == 'coco' else DETRAC_STD, dtype=np.float32)[None, None, :] img = img.transpose( 2, 0, 1)[None, :, :, :] # from [H, W, C] to [1, C, H, W] # if cfg.test_flip: # img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0) imgs[scale] = { 'image': torch.from_numpy(img).float(), 'center': np.array(center), 'scale': np.array(scaled_size), 'fmap_h': np.array(img_height // 4), 'fmap_w': np.array(img_width // 4) } with torch.no_grad(): detections = [] start_time = time.time() for scale in imgs: imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device) output = model(imgs[scale]['image'])[-1] dets = ctdet_decode(*output, K=cfg.test_topk) dets = dets.detach().cpu().numpy().reshape( 1, -1, dets.shape[2])[0] top_preds = {} dets[:, :2] = transform_preds( dets[:, 0:2], imgs[scale]['center'], imgs[scale]['scale'], (imgs[scale]['fmap_w'], imgs[scale]['fmap_h'])) dets[:, 2:4] = transform_preds( dets[:, 2:4], imgs[scale]['center'], imgs[scale]['scale'], (imgs[scale]['fmap_w'], imgs[scale]['fmap_h'])) cls = dets[:, -1] for j in range(num_classes): inds = (cls == j) top_preds[j + 1] = dets[inds, :5].astype(np.float32) top_preds[j + 1][:, :4] /= scale detections.append(top_preds) bbox_and_scores = {} for j in range(1, num_classes + 1): bbox_and_scores[j] = np.concatenate([d[j] for d in detections], axis=0) if len(cfg.test_scales) > 1: soft_nms(bbox_and_scores[j], Nt=0.5, method=2) scores = np.hstack( [bbox_and_scores[j][:, 4] for j in range(1, num_classes + 1)]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, num_classes + 1): keep_inds = (bbox_and_scores[j][:, 4] >= thresh) bbox_and_scores[j] = bbox_and_scores[j][keep_inds] # Use opencv functions to output a video speed_list.append(time.time() - start_time) output_image = original_image # Plot the GT boxes gt_bboxes = Box_dict[frame_n] for rect in gt_bboxes: x1, y1, x2, y2 = float(rect[0]), float(rect[1]), float( rect[2]), float(rect[3]) cv2.rectangle(output_image, pt1=(int(x1), int(y1)), pt2=(int(x2), int(y2)), color=(0, 255, 0), thickness=2) counter = 1 for lab in bbox_and_scores: if cfg.dataset == 'coco': if names[lab] not in DETRAC_compatible_names: continue for boxes in bbox_and_scores[lab]: x1, y1, x2, y2, score = boxes if score > cfg.detect_thres: text = names[lab] + '%.2f' % score label_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_COMPLEX, 0.3, 1) text_location = [ x1 + 2, y1 + 2, x1 + 2 + label_size[0][0], y1 + 2 + label_size[0][1] ] cv2.rectangle(output_image, pt1=(int(x1), int(y1)), pt2=(int(x2), int(y2)), color=(0, 0, 255), thickness=2) # cv2.putText(output_image, text, org=(int(text_location[0]), int(text_location[3])), # fontFace=cv2.FONT_HERSHEY_COMPLEX, thickness=1, fontScale=0.3, # color=(0, 0, 255)) cv2.imshow('Frames'.format(frame_id), output_image) video_out.write(output_image) if cv2.waitKey(1) & 0xFF == ord('q'): break print('Test frame rate:', 1. / np.mean(speed_list))
def main(): logger = create_logger(save_dir=cfg.log_dir) print = logger.info print(cfg) cfg.device = torch.device('cuda') torch.backends.cudnn.benchmark = False max_per_image = 100 Dataset_eval = COCO_eval if cfg.dataset == 'coco' else PascalVOC_eval dataset = Dataset_eval(cfg.data_dir, split='val', img_size=cfg.img_size, test_scales=cfg.test_scales, test_flip=cfg.test_flip) data_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=False, collate_fn=dataset.collate_fn) print('Creating model...') if 'hourglass' in cfg.arch: model = get_hourglass[cfg.arch] elif 'resdcn' in cfg.arch: model = get_pose_net(num_layers=int(cfg.arch.split('_')[-1]), num_classes=dataset.num_classes) else: raise NotImplementedError model = load_model(model, cfg.pretrain_dir) model = model.to(cfg.device) model.eval() results = {} with torch.no_grad(): for inputs in data_loader: img_id, inputs = inputs[0] detections = [] for scale in inputs: inputs[scale]['image'] = inputs[scale]['image'].to(cfg.device) output = model(inputs[scale]['image'])[-1] dets = ctdet_decode(*output, K=cfg.test_topk) dets = dets.detach().cpu().numpy().reshape( 1, -1, dets.shape[2])[0] top_preds = {} dets[:, :2] = transform_preds( dets[:, 0:2], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) dets[:, 2:4] = transform_preds( dets[:, 2:4], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) cls = dets[:, -1] for j in range(dataset.num_classes): inds = (cls == j) top_preds[j + 1] = dets[inds, :5].astype(np.float32) top_preds[j + 1][:, :4] /= scale detections.append(top_preds) bbox_and_scores = {} for j in range(1, dataset.num_classes + 1): bbox_and_scores[j] = np.concatenate([d[j] for d in detections], axis=0) if len(dataset.test_scales) > 1: soft_nms(bbox_and_scores[j], Nt=0.5, method=2) scores = np.hstack([ bbox_and_scores[j][:, 4] for j in range(1, dataset.num_classes + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, dataset.num_classes + 1): keep_inds = (bbox_and_scores[j][:, 4] >= thresh) bbox_and_scores[j] = bbox_and_scores[j][keep_inds] results[img_id] = bbox_and_scores eval_results = dataset.run_eval(results, cfg.ckpt_dir) print(eval_results)