def initLatent(loader, model, Y, nViews, S, AVG = False): model.eval() nIters = len(loader) N = loader.dataset.nImages M = np.zeros((N, ref.J, 3)) bar = Bar('==>', max=nIters) sum_sigma2 = 0 cnt_sigma2 = 1 for i, (input, target, meta) in enumerate(loader): output = (model(torch.autograd.Variable(input)).data).cpu().numpy() G = output.shape[0] / nViews output = output.reshape(G, nViews, ref.J, 3) if AVG: for g in range(G): id = int(meta[g * nViews, 1]) for j in range(nViews): RR, tt = horn87(output[g, j].transpose(), output[g, 0].transpose()) MM = (np.dot(RR, output[g, j].transpose())).transpose().copy() M[id] += MM.copy() / nViews else: for g in range(G): #assert meta[g * nViews, 0] > 1 + ref.eps p = np.zeros(nViews) sigma2 = 0.1 for j in range(nViews): for kk in range(Y.shape[0] / S): k = kk * S d = Dis(Y[k], output[g, j]) sum_sigma2 += d cnt_sigma2 += 1 p[j] += np.exp(- d / 2 / sigma2) id = int(meta[g * nViews, 1]) M[id] = output[g, p.argmax()] if DEBUG and g == 0: print 'M[id]', id, M[id], p.argmax() debugger = Debugger() for j in range(nViews): RR, tt = horn87(output[g, j].transpose(), output[g, p.argmax()].transpose()) MM = (np.dot(RR, output[g, j].transpose())).transpose().copy() debugger.addPoint3D(MM, 'b') debugger.addImg(input[g * nViews + j].numpy().transpose(1, 2, 0), j) debugger.showAllImg() debugger.addPoint3D(M[id], 'r') debugger.show3D() Bar.suffix = 'Init : [{0:3}/{1:3}] | Total: {total:} | ETA: {eta:} | Dis: {dis:.6f}'.format(i, nIters, total=bar.elapsed_td, eta=bar.eta_td, dis = sum_sigma2 / cnt_sigma2) bar.next() bar.finish() #print 'mean sigma2', sum_sigma2 / cnt_sigma2 return M
def main(): opt = opts().parse() if opt.loadModel == '': opt.loadModel = '../models/Pascal3D-cpu.pth' model = torch.load(opt.loadModel) img = cv2.imread(opt.demo) s = max(img.shape[0], img.shape[1]) * 1.0 c = np.array([img.shape[1] / 2., img.shape[0] / 2.]) img = Crop(img, c, s, 0, ref.inputRes).astype(np.float32).transpose( 2, 0, 1) / 256. input = torch.from_numpy(img.copy()).float() input = input.view(1, input.size(0), input.size(1), input.size(2)) input_var = torch.autograd.Variable(input).float() if opt.GPU > -1: model = model.cuda(opt.GPU) input_var = input_var.cuda(opt.GPU) output = model(input_var) hm = output[-1].data.cpu().numpy() debugger = Debugger() img = (input[0].numpy().transpose(1, 2, 0) * 256).astype(np.uint8).copy() inp = img.copy() star = (cv2.resize(hm[0, 0], (ref.inputRes, ref.inputRes)) * 255) star[star > 255] = 255 star[star < 0] = 0 star = np.tile(star, (3, 1, 1)).transpose(1, 2, 0) trans = 0.8 star = (trans * star + (1. - trans) * img).astype(np.uint8) ps = parseHeatmap(hm[0], thresh=0.1) canonical, pred, color, score = [], [], [], [] for k in range(len(ps[0])): x, y, z = ((hm[0, 1:4, ps[0][k], ps[1][k]] + 0.5) * ref.outputRes).astype(np.int32) dep = ((hm[0, 4, ps[0][k], ps[1][k]] + 0.5) * ref.outputRes).astype( np.int32) canonical.append([x, y, z]) pred.append([ps[1][k], ref.outputRes - dep, ref.outputRes - ps[0][k]]) score.append(hm[0, 0, ps[0][k], ps[1][k]]) color.append((1.0 * x / ref.outputRes, 1.0 * y / ref.outputRes, 1.0 * z / ref.outputRes)) cv2.circle(img, (ps[1][k] * 4, ps[0][k] * 4), 4, (255, 255, 255), -1) cv2.circle(img, (ps[1][k] * 4, ps[0][k] * 4), 2, (int(z * 4), int(y * 4), int(x * 4)), -1) pred = np.array(pred).astype(np.float32) canonical = np.array(canonical).astype(np.float32) pointS = canonical * 1.0 / ref.outputRes pointT = pred * 1.0 / ref.outputRes R, t, s = horn87(pointS.transpose(), pointT.transpose(), score) rotated_pred = s * np.dot( R, canonical.transpose()).transpose() + t * ref.outputRes debugger.addImg(inp, 'inp') debugger.addImg(star, 'star') debugger.addImg(img, 'nms') debugger.addPoint3D(canonical / ref.outputRes - 0.5, c=color, marker='^') debugger.addPoint3D(pred / ref.outputRes - 0.5, c=color, marker='x') debugger.addPoint3D(rotated_pred / ref.outputRes - 0.5, c=color, marker='*') debugger.showAllImg(pause=True) debugger.show3D()
def debug(self, batch, output, iter_id, dataset): opt = self.opt if 'pre_hm' in batch: output.update({'pre_hm': batch['pre_hm']}) dets = generic_decode(output, K=opt.K, opt=opt) for k in dets: dets[k] = dets[k].detach().cpu().numpy() dets_gt = batch['meta']['gt_det'] for i in range(1): debugger = Debugger(opt=opt, dataset=dataset) img = batch['image'][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * dataset.std + dataset.mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap( output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') if 'pre_img' in batch: pre_img = batch['pre_img'][i].detach().cpu().numpy().transpose( 1, 2, 0) pre_img = np.clip( ((pre_img * dataset.std + dataset.mean) * 255), 0, 255).astype(np.uint8) debugger.add_img(pre_img, 'pre_img_pred') debugger.add_img(pre_img, 'pre_img_gt') if 'pre_hm' in batch: pre_hm = debugger.gen_colormap( batch['pre_hm'][i].detach().cpu().numpy()) debugger.add_blend_img(pre_img, pre_hm, 'pre_hm') debugger.add_img(img, img_id='out_pred') if 'ltrb_amodal' in opt.heads: debugger.add_img(img, img_id='out_pred_amodal') debugger.add_img(img, img_id='out_gt_amodal') # Predictions for k in range(len(dets['scores'][i])): if dets['scores'][i, k] > opt.vis_thresh: debugger.add_coco_bbox(dets['bboxes'][i, k] * opt.down_ratio, dets['clses'][i, k], dets['scores'][i, k], img_id='out_pred') if 'ltrb_amodal' in opt.heads: debugger.add_coco_bbox(dets['bboxes_amodal'][i, k] * opt.down_ratio, dets['clses'][i, k], dets['scores'][i, k], img_id='out_pred_amodal') if 'hps' in opt.heads and int(dets['clses'][i, k]) == 0: debugger.add_coco_hp(dets['hps'][i, k] * opt.down_ratio, img_id='out_pred') if 'tracking' in opt.heads: debugger.add_arrow(dets['cts'][i][k] * opt.down_ratio, dets['tracking'][i][k] * opt.down_ratio, img_id='out_pred') debugger.add_arrow(dets['cts'][i][k] * opt.down_ratio, dets['tracking'][i][k] * opt.down_ratio, img_id='pre_img_pred') # Ground truth debugger.add_img(img, img_id='out_gt') for k in range(len(dets_gt['scores'][i])): if dets_gt['scores'][i][k] > opt.vis_thresh: debugger.add_coco_bbox(dets_gt['bboxes'][i][k] * opt.down_ratio, dets_gt['clses'][i][k], dets_gt['scores'][i][k], img_id='out_gt') if 'ltrb_amodal' in opt.heads: debugger.add_coco_bbox(dets_gt['bboxes_amodal'][i, k] * opt.down_ratio, dets_gt['clses'][i, k], dets_gt['scores'][i, k], img_id='out_gt_amodal') if 'hps' in opt.heads and \ (int(dets['clses'][i, k]) == 0): debugger.add_coco_hp(dets_gt['hps'][i][k] * opt.down_ratio, img_id='out_gt') if 'tracking' in opt.heads: debugger.add_arrow( dets_gt['cts'][i][k] * opt.down_ratio, dets_gt['tracking'][i][k] * opt.down_ratio, img_id='out_gt') debugger.add_arrow( dets_gt['cts'][i][k] * opt.down_ratio, dets_gt['tracking'][i][k] * opt.down_ratio, img_id='pre_img_gt') if 'hm_hp' in opt.heads: pred = debugger.gen_colormap_hp( output['hm_hp'][i].detach().cpu().numpy()) gt = debugger.gen_colormap_hp( batch['hm_hp'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hmhp') debugger.add_blend_img(img, gt, 'gt_hmhp') if 'rot' in opt.heads and 'dim' in opt.heads and 'dep' in opt.heads: dets_gt = {k: dets_gt[k].cpu().numpy() for k in dets_gt} calib = batch['meta']['calib'].detach().numpy() \ if 'calib' in batch['meta'] else None det_pred = generic_post_process( opt, dets, batch['meta']['c'].cpu().numpy(), batch['meta']['s'].cpu().numpy(), output['hm'].shape[2], output['hm'].shape[3], self.opt.num_classes, calib) det_gt = generic_post_process(opt, dets_gt, batch['meta']['c'].cpu().numpy(), batch['meta']['s'].cpu().numpy(), output['hm'].shape[2], output['hm'].shape[3], self.opt.num_classes, calib) debugger.add_3d_detection(batch['meta']['img_path'][i], batch['meta']['flipped'][i], det_pred[i], calib[i], vis_thresh=opt.vis_thresh, img_id='add_pred') debugger.add_3d_detection(batch['meta']['img_path'][i], batch['meta']['flipped'][i], det_gt[i], calib[i], vis_thresh=opt.vis_thresh, img_id='add_gt') debugger.add_bird_views(det_pred[i], det_gt[i], vis_thresh=opt.vis_thresh, img_id='bird_pred_gt') if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True)
def step(split, epoch, opt, data_loader, model, optimizer=None): if split == 'train': model.train() else: model.eval() crit = torch.nn.MSELoss() crit_3d = FusionLoss(opt.device, opt.weight_3d, opt.weight_var) acc_idxs = data_loader.dataset.acc_idxs edges = data_loader.dataset.edges edges_3d = data_loader.dataset.edges_3d shuffle_ref = data_loader.dataset.shuffle_ref mean = data_loader.dataset.mean std = data_loader.dataset.std convert_eval_format = data_loader.dataset.convert_eval_format Loss, Loss3D = AverageMeter(), AverageMeter() Acc, MPJPE = AverageMeter(), AverageMeter() data_time, batch_time = AverageMeter(), AverageMeter() preds = [] time_str = '' nIters = len(data_loader) bar = Bar('{}'.format(opt.exp_id), max=nIters) end = time.time() for i, batch in enumerate(data_loader): data_time.update(time.time() - end) for k in batch: if k != 'meta': batch[k] = batch[k].cuda(device=opt.device, non_blocking=True) gt_2d = batch['meta']['pts_crop'].cuda( device=opt.device, non_blocking=True).float() / opt.output_h output = model(batch['input']) loss = crit(output[-1]['hm'], batch['target']) loss_3d = crit_3d(output[-1]['depth'], batch['reg_mask'], batch['reg_ind'], batch['reg_target'], gt_2d) for k in range(opt.num_stacks - 1): loss += crit(output[k], batch['target']) loss_3d = crit_3d(output[-1]['depth'], batch['reg_mask'], batch['reg_ind'], batch['reg_target'], gt_2d) loss += loss_3d if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() else: input_ = batch['input'].cpu().numpy().copy() input_[0] = flip(input_[0]).copy()[np.newaxis, ...] input_flip_var = torch.from_numpy(input_).cuda(device=opt.device, non_blocking=True) output_flip_ = model(input_flip_var) output_flip = shuffle_lr( flip(output_flip_[-1]['hm'].detach().cpu().numpy()[0]), shuffle_ref) output_flip = output_flip.reshape(1, opt.num_output, opt.output_h, opt.output_w) output_depth_flip = shuffle_lr( flip(output_flip_[-1]['depth'].detach().cpu().numpy()[0]), shuffle_ref) output_depth_flip = output_depth_flip.reshape( 1, opt.num_output, opt.output_h, opt.output_w) output_flip = torch.from_numpy(output_flip).cuda(device=opt.device, non_blocking=True) output_depth_flip = torch.from_numpy(output_depth_flip).cuda( device=opt.device, non_blocking=True) output[-1]['hm'] = (output[-1]['hm'] + output_flip) / 2 output[-1]['depth'] = (output[-1]['depth'] + output_depth_flip) / 2 # pred, amb_idx = get_preds(output[-1]['hm'].detach().cpu().numpy()) # preds.append(convert_eval_format(pred, conf, meta)[0]) Loss.update(loss.item(), batch['input'].size(0)) Loss3D.update(loss_3d.item(), batch['input'].size(0)) Acc.update( accuracy(output[-1]['hm'].detach().cpu().numpy(), batch['target'].detach().cpu().numpy(), acc_idxs)) mpeje_batch, mpjpe_cnt = mpjpe( output[-1]['hm'].detach().cpu().numpy(), output[-1]['depth'].detach().cpu().numpy(), batch['meta']['gt_3d'].detach().numpy(), convert_func=convert_eval_format) MPJPE.update(mpeje_batch, mpjpe_cnt) batch_time.update(time.time() - end) end = time.time() if not opt.hide_data_time: time_str = ' |Data {dt.avg:.3f}s({dt.val:.3f}s)' \ ' |Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time) Bar.suffix = '{split}: [{0}][{1}/{2}] |Total {total:} |ETA {eta:} '\ '|Loss {loss.avg:.5f} |Loss3D {loss_3d.avg:.5f}'\ '|Acc {Acc.avg:.4f} |MPJPE {MPJPE.avg:.2f}'\ '{time_str}'.format(epoch, i, nIters, total=bar.elapsed_td, eta=bar.eta_td, loss=Loss, Acc=Acc, split=split, time_str=time_str, MPJPE=MPJPE, loss_3d=Loss3D) if opt.print_iter > 0: if i % opt.print_iter == 0: print('{}| {}'.format(opt.exp_id, Bar.suffix)) else: bar.next() if opt.debug >= 2: gt, amb_idx = get_preds(batch['target'].cpu().numpy()) gt *= 4 pred, amb_idx = get_preds(output[-1]['hm'].detach().cpu().numpy()) pred *= 4 debugger = Debugger(ipynb=opt.print_iter > 0, edges=edges) img = (batch['input'][0].cpu().numpy().transpose(1, 2, 0) * std + mean) * 256 img = img.astype(np.uint8).copy() debugger.add_img(img) debugger.add_mask( cv2.resize(batch['target'][0].cpu().numpy().max(axis=0), (opt.input_w, opt.input_h)), img, 'target') debugger.add_mask( cv2.resize( output[-1]['hm'][0].detach().cpu().numpy().max(axis=0), (opt.input_w, opt.input_h)), img, 'pred') debugger.add_point_2d(gt[0], (0, 0, 255)) debugger.add_point_2d(pred[0], (255, 0, 0)) debugger.add_point_3d(batch['meta']['gt_3d'].detach().numpy()[0], 'r', edges=edges_3d) pred_3d, ignore_idx = get_preds_3d( output[-1]['hm'].detach().cpu().numpy(), output[-1]['depth'].detach().cpu().numpy(), amb_idx) debugger.add_point_3d(convert_eval_format(pred_3d[0]), 'b', edges=edges_3d) debugger.show_all_imgs(pause=False) debugger.show_3d() bar.finish() return { 'loss': Loss.avg, 'acc': Acc.avg, 'mpjpe': MPJPE.avg, 'time': bar.elapsed_td.total_seconds() / 60. }, preds
import _init_paths import numpy as np from opts import opts from datasets.dataset.yolo import YOLO from utils.debugger import Debugger if __name__ == '__main__': opt = opts().parse() dataset = YOLO(opt.data_dir, opt.flip, opt.vflip, opt.rotate, opt.scale, opt.shear, opt, 'train') opt = opts().update_dataset_info_and_set_heads(opt, dataset) for i in range(len(dataset)): debugger = Debugger(dataset=opt.names) data = dataset[i] img = data['input'].transpose(1, 2, 0) hm = data['hm'] dets_gt = data['meta']['gt_det'] dets_gt[:, :4] *= opt.down_ratio img = np.clip(((img * dataset.std + dataset.mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap(hm) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_img(img, img_id='out_pred') for k in range(len(dets_gt)): debugger.add_coco_bbox(dets_gt[k, :4], dets_gt[k, -1], dets_gt[k, 4], img_id='out_pred')
pred[1] = (pred[1] - opt.numBins / 2) * PI / (opt.numBins / 2.) pred[2] = (pred[2] - opt.numBins / 2) * PI / (opt.numBins / 2.) bestR = angle2dcm(pred) R_gt = angle2dcm(gt_view) err_ = ((logm(np.dot(np.transpose(bestR), R_gt))** 2).sum())**0.5 / (2.**0.5) * 180 / PI num[class_name] += 1 acc[class_name] += 1 if err_ <= 30. else 0 err[class_name].append(err_) if DEBUG: input, target, mask, view = dataset[index] debugger = Debugger() img = (input[:3].transpose(1, 2, 0) * 256).astype(np.uint8).copy() debugger.addImg(img) debugger.showAllImg(pause=False) accAll = 0. numAll = 0. mid = {} err_all = [] for k, v in ref.pascalClassName.items(): accAll += acc[v] numAll += num[v] acc[v] = 1.0 * acc[v] / num[v] mid[v] = np.sort(np.array(err[v]))[len(err[v]) // 2] err_all = err_all + err[v] print('Acc', acc)
def demo(opt): # creat folder save results os.mkdir('../Detection/bboxes_{}'.format(opt.arch)) os.mkdir('../visualization/{}'.format(opt.arch)) ### class_map = {1: 1, 2: 2} # color for boundingbox ### os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str ### opt.debug = max(opt.debug, 1) ### Detector = detector_factory[opt.task] detector = Detector(opt) assert os.path.isdir(opt.demo), 'Need path to videos directory.' video_paths = [ os.path.join(opt.demo, video_name) for video_name in os.listdir(opt.demo) if video_name.split('.')[-1] == 'mp4' ] # video_paths = [ # os.path.join(opt.demo, 'cam_2.mp4') # ] ### debugger = Debugger(dataset=opt.dataset, theme=opt.debugger_theme) ### for video_path in sorted(video_paths): bboxes = [] video = cv2.VideoCapture(video_path) width, height = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), int( video.get(cv2.CAP_PROP_FRAME_HEIGHT)) ### bbox_video = cv2.VideoWriter( filename='../visualization/{}/'.format(opt.arch) + os.path.basename(video_path), fourcc=cv2.VideoWriter_fourcc(*'mp4v'), fps=float(30), frameSize=(width, height), isColor=True) ### num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) for i in tqdm(range(num_frames)): # skip_frame if opt.skip_frame > 0: if i % opt.skip_frame == 0: continue _, img = video.read() ret = detector.run(img) bboxes.append(ret['results']) ### debugger.add_img(img, img_id='default') for class_id in class_map.keys(): for bbox in ret['results'][class_id]: if bbox[4] > opt.vis_thresh: debugger.add_coco_bbox(bbox[:4], class_map[class_id], bbox[4], img_id='default') bbox_img = debugger.imgs['default'] bbox_video.write(bbox_img) ### with open( '../Detection/bboxes_{}'.format(opt.arch) + '/' + os.path.basename(video_path) + '.pkl', 'wb') as f: pickle.dump(bboxes, f)
def debug(self, batch, output, iter_id): opt = self.opt detections = self.decode(output['hm_t'], output['hm_l'], output['hm_b'], output['hm_r'], output['hm_c']).detach().cpu().numpy() detections[:, :, :4] *= opt.input_res / opt.output_res for i in range(1): debugger = Debugger( dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) pred_hm = np.zeros( (opt.input_res, opt.input_res, 3), dtype=np.uint8) gt_hm = np.zeros((opt.input_res, opt.input_res, 3), dtype=np.uint8) img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) img = ((img * self.opt.std + self.opt.mean) * 255.).astype(np.uint8) for p in self.parts: tag = 'hm_{}'.format(p) pred = debugger.gen_colormap( output[tag][i].detach().cpu().numpy()) gt = debugger.gen_colormap( batch[tag][i].detach().cpu().numpy()) if p != 'c': pred_hm = np.maximum(pred_hm, pred) gt_hm = np.maximum(gt_hm, gt) if p == 'c' or opt.debug > 2: debugger.add_blend_img(img, pred, 'pred_{}'.format(p)) debugger.add_blend_img(img, gt, 'gt_{}'.format(p)) debugger.add_blend_img(img, pred_hm, 'pred') debugger.add_blend_img(img, gt_hm, 'gt') debugger.add_img(img, img_id='out') for k in range(len(detections[i])): if detections[i, k, 4] > 0.1: debugger.add_coco_bbox(detections[i, k, :4], detections[i, k, -1], detections[i, k, 4], img_id='out') if opt.debug == 4: debugger.save_all_imgs( opt.debug_dir, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True)
def run(self, image_or_path_or_tensor, meta=None): load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0 merge_time, tot_time = 0, 0 debugger = Debugger(dataset=self.opt.dataset, ipynb=(self.opt.debug == 3), theme=self.opt.debugger_theme) start_time = time.time() pre_processed = False if isinstance(image_or_path_or_tensor, np.ndarray): image = image_or_path_or_tensor elif type(image_or_path_or_tensor) == type(''): image = cv2.imread(image_or_path_or_tensor) else: image = image_or_path_or_tensor['image'][0].numpy() pre_processed_images = image_or_path_or_tensor pre_processed = True loaded_time = time.time() load_time += (loaded_time - start_time) detections = [] for scale in self.scales: scale_start_time = time.time() if not pre_processed: print("no pre_processed") # intrinsic =[338.158, 0, 319.077, 0, 0, 338.158, 242.885, 0, 0, 0, 1, 0] #depth intrinsic = [ 614.678, 0, 318.892, 0, 0, 614.93, 240.121, 0, 0, 0, 1, 0 ] calib = np.array(intrinsic, dtype=np.float32) calib = calib.reshape(3, 4) images, meta = self.pre_process(image, scale, calib) else: # import pdb; pdb.set_trace() images = pre_processed_images['images'][scale][0] meta = pre_processed_images['meta'][scale] meta = {k: v.numpy()[0] for k, v in meta.items()} images = images.to(self.opt.device) torch.cuda.synchronize() pre_process_time = time.time() pre_time += pre_process_time - scale_start_time output, dets, forward_time = self.process(images, return_time=True) torch.cuda.synchronize() net_time += forward_time - pre_process_time decode_time = time.time() dec_time += decode_time - forward_time if self.opt.debug >= 2: self.debug(debugger, images, dets, output, scale) dets = self.post_process(dets, meta, scale) torch.cuda.synchronize() post_process_time = time.time() post_time += post_process_time - decode_time detections.append(dets) results = self.merge_outputs(detections) torch.cuda.synchronize() end_time = time.time() merge_time += end_time - post_process_time tot_time += end_time - start_time if self.opt.debug >= 1: image_id = str(1234321) self.show_results(debugger, image, results, image_id) return { 'results': results, 'tot': tot_time, 'load': load_time, 'pre': pre_time, 'net': net_time, 'dec': dec_time, 'post': post_time, 'merge': merge_time }
def step(split, epoch, opt, data_loader, model, optimizer=None): if split == 'train': model.train() else: model.eval() crit = torch.nn.MSELoss() acc_idxs = data_loader.dataset.acc_idxs edges = data_loader.dataset.edges shuffle_ref = data_loader.dataset.shuffle_ref mean = data_loader.dataset.mean std = data_loader.dataset.std convert_eval_format = data_loader.dataset.convert_eval_format Loss, Acc = AverageMeter(), AverageMeter() data_time, batch_time = AverageMeter(), AverageMeter() preds = [] nIters = len(data_loader) bar = Bar('{}'.format(opt.exp_id), max=nIters) end = time.time() for i, batch in enumerate(data_loader): data_time.update(time.time() - end) input, target, meta = batch['input'], batch['target'], batch['meta'] input_var = input.cuda(device=opt.device, non_blocking=True) target_var = target.cuda(device=opt.device, non_blocking=True) output = model(input_var) loss = crit(output[-1]['hm'], target_var) for k in range(opt.num_stacks - 1): loss += crit(output[k], target_var) if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() else: input_ = input.cpu().numpy().copy() input_[0] = flip(input_[0]).copy()[np.newaxis, ...] input_flip_var = torch.from_numpy(input_).cuda(device=opt.device, non_blocking=True) output_flip = model(input_flip_var) output_flip = shuffle_lr( flip(output_flip[-1]['hm'].detach().cpu().numpy()[0]), shuffle_ref) output_flip = output_flip.reshape(1, opt.num_output, opt.output_h, opt.output_w) # output_ = (output[-1].detach().cpu().numpy() + output_flip) / 2 output_flip = torch.from_numpy(output_flip).cuda(device=opt.device, non_blocking=True) output[-1]['hm'] = (output[-1]['hm'] + output_flip) / 2 pred, conf = get_preds(output[-1]['hm'].detach().cpu().numpy(), True) preds.append(convert_eval_format(pred, conf, meta)[0]) Loss.update(loss.detach()[0], input.size(0)) Acc.update( accuracy(output[-1]['hm'].detach().cpu().numpy(), target_var.detach().cpu().numpy(), acc_idxs)) batch_time.update(time.time() - end) end = time.time() if not opt.hide_data_time: time_str = ' |Data {dt.avg:.3f}s({dt.val:.3f}s)' \ ' |Net {bt.avg:.3f}s'.format(dt = data_time, bt = batch_time) else: time_str = '' Bar.suffix = '{split}: [{0}][{1}/{2}] |Total {total:} |ETA {eta:}' \ '|Loss {loss.avg:.5f} |Acc {Acc.avg:.4f}'\ '{time_str}'.format(epoch, i, nIters, total=bar.elapsed_td, eta=bar.eta_td, loss=Loss, Acc=Acc, split = split, time_str = time_str) if opt.print_iter > 0: if i % opt.print_iter == 0: print('{}| {}'.format(opt.exp_id, Bar.suffix)) else: bar.next() if opt.debug >= 2: gt = get_preds(target.cpu().numpy()) * 4 pred = get_preds(output[-1]['hm'].detach().cpu().numpy()) * 4 debugger = Debugger(ipynb=opt.print_iter > 0, edges=edges) img = (input[0].numpy().transpose(1, 2, 0) * std + mean) * 256 img = img.astype(np.uint8).copy() debugger.add_img(img) debugger.add_mask( cv2.resize(target[0].numpy().max(axis=0), (opt.input_w, opt.input_h)), img, 'target') debugger.add_mask( cv2.resize( output[-1]['hm'][0].detach().cpu().numpy().max(axis=0), (opt.input_w, opt.input_h)), img, 'pred') debugger.add_point_2d(pred[0], (255, 0, 0)) debugger.add_point_2d(gt[0], (0, 0, 255)) debugger.show_all_imgs(pause=True) bar.finish() return { 'loss': Loss.avg, 'acc': Acc.avg, 'time': bar.elapsed_td.total_seconds() / 60. }, preds
def demo_image(image, model, opt): s = max(image.shape[0], image.shape[1]) * 1.0 c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32) trans_input = get_affine_transform(c, s, 0, [opt.input_w, opt.input_h]) inp = cv2.warpAffine(image, trans_input, (opt.input_w, opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp / 255. - mean) / std inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32) inp = torch.from_numpy(inp).to(opt.device) out = model(inp)[-1] pred = get_preds(out['hm'].detach().cpu().numpy())[0] pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h)) pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(), out['depth'].detach().cpu().numpy())[0] pdb.set_trace() debugger = Debugger() debugger.add_img(image) debugger.add_point_2d(pred, (255, 0, 0)) debugger.add_point_3d(pred_3d, 'b') debugger.show_all_imgs(pause=True) debugger.show_3d()
def debug(self, batch, output, iter_id): opt = self.opt reg = output['reg'] if opt.reg_offset else None #dets = ctdet_decode( # output['hm'], output['wh'], reg=reg, # cat_spec_wh=opt.cat_spec_wh, K=opt.K) dets = gridneighbordet_decode(output['hm'], output['wh'], opt.point_flags, reg=reg, cat_spec_wh=opt.cat_spec_wh, K=opt.K) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets[:, :, :4] *= opt.down_ratio dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) dets_gt[:, :, :4] *= opt.down_ratio for i in range(1): debugger = Debugger(dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap( output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') debugger.add_img(img, img_id='out_pred') for k in range(len(dets[i])): if dets[i, k, 4] > opt.center_thresh: debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1], dets[i, k, 4], img_id='out_pred') debugger.add_img(img, img_id='out_gt') for k in range(len(dets_gt[i])): if dets_gt[i, k, 4] > opt.center_thresh: debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1], dets_gt[i, k, 4], img_id='out_gt') if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True)
def debug(self, batch, output, iter_id): opt = self.opt reg = output['reg'] if opt.reg_offset else None # print(output) dets = circledet_decode(output['hm'], output['cl'], reg=reg, cat_spec_wh=opt.cat_spec_wh, K=opt.K) # print(dets) if opt.filter_boarder: output_h = self.opt.default_resolution[ 0] // self.opt.down_ratio #hard coded output_w = self.opt.default_resolution[ 1] // self.opt.down_ratio #hard coded for i in range(dets.shape[1]): cp = [0, 0] cp[0] = dets[0, i, 0] cp[1] = dets[0, i, 1] cr = dets[0, i, 2] if cp[0] - cr < 0 or cp[0] + cr > output_w: dets[0, i, 3] = 0 continue if cp[1] - cr < 0 or cp[1] + cr > output_h: dets[0, i, 3] = 0 continue dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets[:, :, :3] *= opt.down_ratio dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) dets_gt[:, :, :3] *= opt.down_ratio for i in range(1): debugger = Debugger(dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap( output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') debugger.add_img(img, img_id='out_pred') for k in range(len(dets[i])): # print('risk = %f' % dets[i, k, 3]) if dets[i, k, 3] > opt.center_thresh: debugger.add_coco_circle(dets[i, k, :3], dets[i, k, -1], dets[i, k, 3], img_id='out_pred') debugger.add_img(img, img_id='out_gt') for k in range(len(dets_gt[i])): if dets_gt[i, k, 3] > opt.center_thresh: debugger.add_coco_circle(dets_gt[i, k, :3], dets_gt[i, k, -1], dets_gt[i, k, 3], img_id='out_gt') if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True)
def stepLatent(loader, model, M_, Y, nViews, lamb, mu, S): model.eval() nIters = len(loader) if nIters == 0: return None N = loader.dataset.nImages M = np.zeros((N, ref.J, 3)) bar = Bar('==>', max=nIters) ids = [] Mij = np.zeros((N, ref.J, 3)) err, num = 0, 0 for i, (input, target, meta) in enumerate(loader): output = (model(torch.autograd.Variable(input)).data).cpu().numpy() G = output.shape[0] / nViews output = output.reshape(G, nViews, ref.J, 3) for g in range(G): #assert meta[g * nViews, 0] > 1 + ref.eps id = int(meta[g * nViews, 1]) ids.append(id) #debugger = Debugger() for j in range(nViews): Rij, tt = horn87(output[g, j].transpose(), M_[id].transpose()) Mj = (np.dot(Rij, output[g, j].transpose()).copy()).transpose().copy() err += ((Mj - M_[id]) ** 2).sum() num += 1 Mij[id] = Mij[id] + Mj / nViews #print 'id, j, nViews', id, j, nViews #debugger.addPoint3D(Mj, 'b') #debugger.addPoint3D(M_[id], 'r') #debugger.show3D() Bar.suffix = 'Step Mij: [{0:3}/{1:3}] | Total: {total:} | ETA: {eta:} | Err : {err:.6f}'.format(i, nIters, total=bar.elapsed_td, eta=bar.eta_td, err = err / num) bar.next() bar.finish() if mu < ref.eps: for id in ids: M[id] = Mij[id] return M Mi = np.zeros((N, ref.J, 3)) bar = Bar('==>', max=len(ids)) err, num = 0, 0 for i, id in enumerate(ids): dis = np.ones((Y.shape[0])) * oo for kk in range(Y.shape[0] / S): k = kk * S dis[k] = Dis(Y[k], M_[id]) minK = np.argmin(dis) Ri, tt = horn87(Y[minK].transpose(), M_[id].transpose()) Mi_ = (np.dot(Ri, Y[minK].transpose())).transpose() Mi[id] = Mi[id] + Mi_ err += dis[minK] num += 1 Bar.suffix = 'Step Mi : [{0:3}/{1:3}] | Total: {total:} | ETA: {eta:} | Err: {err:.6f}'.format(i, len(ids), total=bar.elapsed_td, eta=bar.eta_td, err = err / num) bar.next() bar.finish() tI = np.zeros((Y.shape[0] / S, 3)) MI = np.zeros((N, ref.J, 3)) cnt = np.zeros(N) bar = Bar('==>', max=Y.shape[0] / S) err, num = 0, 0 for kk in range(Y.shape[0] / S): k = kk * S dis = np.ones((N)) * oo for id in ids: dis[id] = Dis(Y[k], M_[id]) minI = np.argmin(dis) RI, tt = horn87(Y[k].transpose(1, 0), M_[minI].transpose(1, 0)) MI_ = (np.dot(RI, Y[k].transpose())).transpose() err += ((MI_ - M_[minI]) ** 2).sum() num += 1 MI[minI] = MI[minI] + MI_ cnt[minI] += 1 Bar.suffix = 'Step MI : [{0:3}/{1:3}] | Total: {total:} | ETA: {eta:} | Err: {err:.6f}'.format(kk, Y.shape[0] / S, total=bar.elapsed_td, eta=bar.eta_td, err = err / num) bar.next() bar.finish() for id in ids: M[id] = (Mij[id] * (lamb / mu) + Mi[id] + MI[id] / (Y.shape[0] / S) * len(ids)) / (lamb / mu + 1 + cnt[id] / (Y.shape[0] / S) * (len(ids))) if DEBUG: for id in ids: debugger = Debugger() debugger.addPoint3D(M[id], 'b') debugger.addPoint3D(M_[id], 'r') debugger.show3D() return M
def run(self, image_or_path_or_tensor, meta=None): load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0 merge_time, tot_time = 0, 0 debugger = Debugger(dataset=self.opt.dataset, ipynb=(self.opt.debug == 3), theme=self.opt.debugger_theme) start_time = time.time() pre_processed = False if isinstance(image_or_path_or_tensor, np.ndarray): image = image_or_path_or_tensor elif type(image_or_path_or_tensor) == type(''): image = cv2.imread(image_or_path_or_tensor) else: image = image_or_path_or_tensor['image'][0].numpy() pre_processed_images = image_or_path_or_tensor pre_processed = True loaded_time = time.time() load_time += (loaded_time - start_time) detections = [] for scale in self.scales: scale_start_time = time.time() #pre_processed= False if not pre_processed: images, meta = self.pre_process(image, scale, meta) else: images = pre_processed_images['images'][scale][0] meta = pre_processed_images['meta'][scale] meta = {k: v.numpy()[0] for k, v in meta.items()} images = images.to(self.opt.device) torch.cuda.synchronize() pre_process_time = time.time() pre_time += pre_process_time - scale_start_time #输入imgs,输出process结果 output, dets, forward_time = self.process(images, return_time=True) torch.cuda.synchronize() net_time += forward_time - pre_process_time decode_time = time.time() dec_time += decode_time - forward_time if self.opt.debug >= 2: self.debug(debugger, images, dets, output, scale) dets = self.post_process(dets, meta, scale) torch.cuda.synchronize() post_process_time = time.time() post_time += post_process_time - decode_time detections.append(dets) results = self.merge_outputs(detections) torch.cuda.synchronize() end_time = time.time() merge_time += end_time - post_process_time tot_time += end_time - start_time if self.opt.debug == 1: self.show_results(debugger, image, results) if self.opt.debug == 2: self.generate_results(debugger, image, results) #如果debug = 7 则只输出包围框的坐标值,不显示图像 # if self.opt.debug == -2: # self.generate_results(debugger, image, results) return { 'results': results, 'tot': tot_time, 'load': load_time, 'pre': pre_time, 'net': net_time, 'dec': dec_time, 'post': post_time, 'merge': merge_time }
def run(self, image_or_path_or_tensor_l, image_or_path_or_tensor_r, mono_est, meta=None): load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0 merge_time, tot_time = 0, 0 debugger = Debugger(dataset=self.opt.dataset, ipynb=(self.opt.debug == 3), theme=self.opt.debugger_theme) start_time = time.time() pre_processed = False if isinstance(image_or_path_or_tensor_l, np.ndarray): image = image_or_path_or_tensor_l elif type(image_or_path_or_tensor_l) == type(''): self.image_path = image_or_path_or_tensor_l image_l = cv2.imread(image_or_path_or_tensor_l) image_r = cv2.imread(image_or_path_or_tensor_r) calib_path = os.path.join( self.opt.calib_dir, image_or_path_or_tensor_l[-10:-3] + 'txt') calib = self.read_clib(calib_path) calib = torch.from_numpy(calib).unsqueeze(0).to(self.opt.device) calib3 = self.read_clib3(calib_path) calib3 = torch.from_numpy(calib3).unsqueeze(0).to(self.opt.device) else: image = image_or_path_or_tensor_l['image'][0].numpy() pre_processed_images = image_or_path_or_tensor_l pre_processed = True loaded_time = time.time() load_time += (loaded_time - start_time) # cv2.imshow('s',image_l) # cv2.waitKey(0) detections = [] for scale in self.scales: scale_start_time = time.time() if not pre_processed: images_l, meta = self.pre_process(image_l, scale, meta) images_r, _ = self.pre_process(image_r, scale, meta) meta['imag_name'] = image_or_path_or_tensor_l[-10:-3] meta['trans_output_l'] = meta['trans_output_l'].to( self.opt.device) meta['trans_output_r'] = meta['trans_output_r'].to( self.opt.device) else: # import pdb; pdb.set_trace() images = pre_processed_images['images'][scale][0] meta = pre_processed_images['meta'][scale] meta = {k: v.numpy()[0] for k, v in meta.items()} meta['calib_l'] = calib meta['calib_r'] = calib3 images_l = images_l.to(self.opt.device) images_r = images_r.to(self.opt.device) self.read_est_from_mono(mono_est, meta) torch.cuda.synchronize() meta['input'] = images_l meta['input_r'] = images_r pre_process_time = time.time() pre_time += pre_process_time - scale_start_time output, dets, forward_time = self.process(meta, return_time=True) net_time += forward_time #- pre_process_time torch.cuda.synchronize() decode_time = time.time() dec_time += decode_time - pre_process_time if self.opt.debug >= 2: self.debug(debugger, images_l, dets, output, scale) #dets = self.post_process(dets, meta, scale) torch.cuda.synchronize() post_process_time = time.time() post_time += post_process_time - decode_time detections.append(dets) #results = self.merge_outputs(detections) torch.cuda.synchronize() end_time = time.time() merge_time += end_time - post_process_time tot_time += end_time - start_time if self.opt.debug >= 1: self.show_results(debugger, image_l, image_r, dets, calib) return { 'results': dets, 'tot': tot_time, 'load': load_time, 'pre': pre_time, 'net': net_time, 'dec': dec_time, 'post': post_time, 'merge': merge_time }
def run(self, image_or_path_or_tensor, meta=None): load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0 merge_time, tot_time = 0, 0 debugger = Debugger(dataset=self.opt.dataset, ipynb=(self.opt.debug==3), theme=self.opt.debugger_theme) start_time = time.time() pre_processed = False if isinstance(image_or_path_or_tensor, np.ndarray): image = image_or_path_or_tensor elif type(image_or_path_or_tensor) == type (''): image = cv2.imread(image_or_path_or_tensor) else: image = image_or_path_or_tensor['image'][0].numpy() pre_processed_images = image_or_path_or_tensor pre_processed = True try: _, _, _ = image.shape except AttributeError: print("Nonetype image at {}".format(image_or_path_or_tensor)) loaded_time = time.time() load_time += (loaded_time - start_time) detections = [] for scale in self.scales: scale_start_time = time.time() if not pre_processed: images, meta = self.pre_process(image, scale, meta) else: # import pdb; pdb.set_trace() images = pre_processed_images['images'][scale][0] meta = pre_processed_images['meta'][scale] meta = {k: v.numpy()[0] for k, v in meta.items()} images = images.to(self.opt.device) if 'cpu' not in self.opt.device.type: torch.cuda.synchronize() pre_process_time = time.time() pre_time += pre_process_time - scale_start_time output, dets, forward_time = self.process(images, return_time=True) if 'cpu' not in self.opt.device.type: torch.cuda.synchronize() net_time += forward_time - pre_process_time decode_time = time.time() dec_time += decode_time - forward_time if self.opt.debug >= 2: self.debug(debugger, images, dets, output, scale) dets = self.post_process(dets, meta, scale) if 'cpu' not in self.opt.device.type: torch.cuda.synchronize() post_process_time = time.time() post_time += post_process_time - decode_time detections.append(dets) results = self.merge_outputs(detections) if 'cpu' not in self.opt.device.type: torch.cuda.synchronize() end_time = time.time() merge_time += end_time - post_process_time tot_time += end_time - start_time # parse to our general format: cate_dict = OrderedDict() num_classes = 80 if self.opt.dataset == 'semantic_line_kaist': num_classes = 14 for i in range(num_classes): cate_dict.update({str(i): debugger.names[i]}) xml_pth = self.opt.save_path.replace("Images", "Preds") if not os.path.exists(xml_pth): os.makedirs(xml_pth) if self.opt.task == 'ctdet_line': bboxesToxml = parseLineCenterNet(results, image) bboxes_dict_to_xml = parseLineDict(bboxesToxml['detection_boxes'], bboxesToxml['detection_classes'], bboxesToxml['detection_scores'], bboxesToxml['detection_directs'], cate_dict, min_score_thresh=self.opt.vis_thresh) writeXml_line(box_dict_in=bboxes_dict_to_xml, image_filename=image_or_path_or_tensor, image_in=image, image_dir=image_or_path_or_tensor.split('/')[-2], image_dst_in=self.opt.save_path, xml_dst_in=xml_pth) else: bboxesToxml = parseBBoxesCornerNetLite(results, image) bboxes_dict_to_xml = parseBBoxDict(bboxesToxml['detection_boxes'], bboxesToxml['detection_classes'], bboxesToxml['detection_scores'], cate_dict, min_score_thresh=self.opt.vis_thresh) writeXml(box_dict_in=bboxes_dict_to_xml, image_filename=image_or_path_or_tensor, image_in=image, image_dir=image_or_path_or_tensor.split('/')[-2], image_dst_in=self.opt.save_path, xml_dst_in=xml_pth) if self.opt.debug >= 1: self.show_results(debugger, image, results, self.image_counter, image_or_path_or_tensor) self.image_counter += 1 return {'results': results, 'tot': tot_time, 'load': load_time, 'pre': pre_time, 'net': net_time, 'dec': dec_time, 'post': post_time, 'merge': merge_time}
def step(split, epoch, opt, dataLoader, model, criterion, optimizer=None): if split == 'train': model.train() else: model.eval() Loss, Acc, Mpjpe, Loss3D = AverageMeter(), AverageMeter(), AverageMeter( ), AverageMeter() nIters = len(dataLoader) bar = Bar('==>', max=nIters) for i, (input, target2D, target3D, meta) in enumerate(dataLoader): input_var = torch.autograd.Variable(input).float().cuda() target2D_var = torch.autograd.Variable(target2D).float().cuda() target3D_var = torch.autograd.Variable(target3D).float().cuda() output = model(input_var) reg = output[opt.nStack] if opt.DEBUG >= 2: gt = getPreds(target2D.cpu().numpy()) * 4 pred = getPreds((output[opt.nStack - 1].data).cpu().numpy()) * 4 debugger = Debugger() debugger.addImg( (input[0].numpy().transpose(1, 2, 0) * 256).astype(np.uint8)) debugger.addPoint2D(pred[0], (255, 0, 0)) debugger.addPoint2D(gt[0], (0, 0, 255)) debugger.showImg() debugger.saveImg('debug/{}.png'.format(i)) loss = FusionCriterion(opt.regWeight, opt.varWeight)(reg, target3D_var) Loss3D.update(loss.data[0], input.size(0)) for k in range(opt.nStack): loss += criterion(output[k], target2D_var) Loss.update(loss.data[0], input.size(0)) Acc.update( Accuracy((output[opt.nStack - 1].data).cpu().numpy(), (target2D_var.data).cpu().numpy())) mpjpe, num3D = MPJPE((output[opt.nStack - 1].data).cpu().numpy(), (reg.data).cpu().numpy(), meta, opt) if num3D > 0: Mpjpe.update(mpjpe, num3D) if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() Bar.suffix = '{split} Epoch: [{0}][{1}/{2}]| Total: {total:} | ETA: {eta:} | Loss {loss.avg:.6f} | Loss3D {loss3d.avg:.6f} | Acc {Acc.avg:.6f} | Mpjpe {Mpjpe.avg:.6f} ({Mpjpe.val:.6f})'.format( epoch, i, nIters, total=bar.elapsed_td, eta=bar.eta_td, loss=Loss, Acc=Acc, split=split, Mpjpe=Mpjpe, loss3d=Loss3D) bar.next() bar.finish() return Loss.avg, Acc.avg, Mpjpe.avg, Loss3D.avg
def run(self, image_or_path_or_tensor, meta=None): load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0 merge_time, tot_time = 0, 0 debugger = Debugger(dataset=self.opt.dataset, ipynb=(self.opt.debug == 3), theme=self.opt.debugger_theme) start_time = time.time() pre_processed = False if isinstance(image_or_path_or_tensor, np.ndarray): # 暂时用不到 image = image_or_path_or_tensor elif type(image_or_path_or_tensor) == type( ''): # 经过demo.py的处理:image_or_path_or_tensor是单张图片的路径 # print("image_or_path_or_tensor:", image_or_path_or_tensor) image = cv2.imread(image_or_path_or_tensor) else: # 暂时用不到 image = image_or_path_or_tensor['image'][0].numpy() pre_processed_images = image_or_path_or_tensor pre_processed = True loaded_time = time.time() load_time += (loaded_time - start_time) detections = [] for scale in self.scales: # self.scales = 1 # 这个参数等于2或者0.5也可以,但是等于1检测效果最好 scale_start_time = time.time() if not pre_processed: images, meta = self.pre_process(image, scale, meta) else: # import pdb; pdb.set_trace() images = pre_processed_images['images'][scale][0] meta = pre_processed_images['meta'][scale] meta = {k: v.numpy()[0] for k, v in meta.items()} images = images.to(self.opt.device) torch.cuda.synchronize() pre_process_time = time.time() pre_time += pre_process_time - scale_start_time output, dets, forward_time = self.process( images, return_time=True) # 这一步是底层的检测,是针对nparray数组的 torch.cuda.synchronize() net_time += forward_time - pre_process_time decode_time = time.time() dec_time += decode_time - forward_time if self.opt.debug >= 2: self.debug(debugger, images, dets, output, scale) dets = self.post_process(dets, meta, scale) torch.cuda.synchronize() post_process_time = time.time() post_time += post_process_time - decode_time detections.append(dets) results = self.merge_outputs(detections) torch.cuda.synchronize() end_time = time.time() merge_time += end_time - post_process_time tot_time += end_time - start_time # if self.opt.debug == 0: # add by zengyuan # pass # print("self.opt.debug", self.opt.debug) if self.opt.debug >= 1: # 意思是:每检测一张图片就展示检测结果图片,这两句注释掉就可以直接往下检测 self.show_results(debugger, image, results) return { 'results': results, 'tot': tot_time, 'load': load_time, 'pre': pre_time, 'net': net_time, 'dec': dec_time, 'post': post_time, 'merge': merge_time }
def run(self, image_or_path_or_tensor, meta=None): load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0 merge_time, tot_time = 0, 0 debugger = Debugger((self.cfg.DEBUG == 3), theme=self.cfg.DEBUG_THEME, num_classes=self.cfg.MODEL.NUM_CLASSES, dataset=self.cfg.SAMPLE_METHOD, down_ratio=self.cfg.MODEL.DOWN_RATIO) start_time = time.time() pre_processed = False if isinstance(image_or_path_or_tensor, np.ndarray): image = image_or_path_or_tensor elif type(image_or_path_or_tensor) == type(''): image = cv2.imread(image_or_path_or_tensor) else: image = image_or_path_or_tensor['image'][0].numpy() pre_processed_images = image_or_path_or_tensor pre_processed = True loaded_time = time.time() load_time += (loaded_time - start_time) detections = [] for scale in self.scales: scale_start_time = time.time() if not pre_processed: images, meta = self.pre_process(image, scale, meta) else: images = pre_processed_images['images'][scale][0] meta = pre_processed_images['meta'][scale] meta = {k: v.numpy()[0] for k, v in meta.items()} images = images.to(torch.device('cuda')) torch.cuda.synchronize() pre_process_time = time.time() pre_time += pre_process_time - scale_start_time output, dets, forward_time = self.process(images, return_time=True) torch.cuda.synchronize() net_time += forward_time - pre_process_time decode_time = time.time() dec_time += decode_time - forward_time if self.cfg.DEBUG >= 2: self.debug(debugger, images, dets, output, scale) dets = self.post_process(dets, meta, scale) torch.cuda.synchronize() post_process_time = time.time() post_time += post_process_time - decode_time detections.append(dets) results = self.merge_outputs(detections) torch.cuda.synchronize() end_time = time.time() merge_time += end_time - post_process_time tot_time += end_time - start_time if self.cfg.DEBUG >= 1: self.show_results(debugger, image, results) return { 'results': results, 'tot': tot_time, 'load': load_time, 'pre': pre_time, 'net': net_time, 'dec': dec_time, 'post': post_time, 'merge': merge_time }
class Detector(object): def __init__(self, opt): if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') print('Creating model...') self.model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt) self.model = load_model(self.model, opt.load_model, opt) self.model = self.model.to(opt.device) self.model.eval() self.opt = opt self.trained_dataset = get_dataset(opt.dataset) self.mean = np.array(self.trained_dataset.mean, dtype=np.float32).reshape(1, 1, 3) self.std = np.array(self.trained_dataset.std, dtype=np.float32).reshape(1, 1, 3) self.pause = not opt.no_pause self.rest_focal_length = self.trained_dataset.rest_focal_length \ if self.opt.test_focal_length < 0 else self.opt.test_focal_length self.flip_idx = self.trained_dataset.flip_idx self.cnt = 0 self.pre_images = None self.pre_image_ori = None self.tracker = Tracker(opt) self.debugger = Debugger(opt=opt, dataset=self.trained_dataset) def run(self, image_or_path_or_tensor, meta={}): load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0 merge_time, track_time, tot_time, display_time = 0, 0, 0, 0 self.debugger.clear() start_time = time.time() # read image pre_processed = False if isinstance(image_or_path_or_tensor, np.ndarray): image = image_or_path_or_tensor elif type(image_or_path_or_tensor) == type(''): image = cv2.imread(image_or_path_or_tensor) else: image = image_or_path_or_tensor['image'][0].numpy() pre_processed_images = image_or_path_or_tensor pre_processed = True loaded_time = time.time() load_time += (loaded_time - start_time) detections = [] # for multi-scale testing for scale in self.opt.test_scales: scale_start_time = time.time() if not pre_processed: # not prefetch testing or demo images, meta = self.pre_process(image, scale, meta) else: # prefetch testing images = pre_processed_images['images'][scale][0] meta = pre_processed_images['meta'][scale] meta = {k: v.numpy()[0] for k, v in meta.items()} if 'pre_dets' in pre_processed_images['meta']: meta['pre_dets'] = pre_processed_images['meta']['pre_dets'] if 'cur_dets' in pre_processed_images['meta']: meta['cur_dets'] = pre_processed_images['meta']['cur_dets'] images = images.to(self.opt.device, non_blocking=self.opt.non_block_test) # initializing tracker pre_hms, pre_inds = None, None if self.opt.tracking: # initialize the first frame if self.pre_images is None: print('Initialize tracking!') self.pre_images = images self.tracker.init_track(meta['pre_dets'] if 'pre_dets' in meta else []) if self.opt.pre_hm: # render input heatmap from tracker status # pre_inds is not used in the current version. # We used pre_inds for learning an offset from previous image to # the current image. pre_hms, pre_inds = self._get_additional_inputs( self.tracker.tracks, meta, with_hm=not self.opt.zero_pre_hm) pre_process_time = time.time() pre_time += pre_process_time - scale_start_time # run the network # output: the output feature maps, only used for visualizing # dets: output tensors after extracting peaks output, dets, forward_time = self.process(images, self.pre_images, pre_hms, pre_inds, return_time=True) net_time += forward_time - pre_process_time decode_time = time.time() dec_time += decode_time - forward_time # convert the cropped and 4x downsampled output coordinate system # back to the input image coordinate system result = self.post_process(dets, meta, scale) post_process_time = time.time() post_time += post_process_time - decode_time detections.append(result) if self.opt.debug >= 2: self.debug(self.debugger, images, result, output, scale, pre_images=self.pre_images if not self.opt.no_pre_img else None, pre_hms=pre_hms) # merge multi-scale testing results results = self.merge_outputs(detections) torch.cuda.synchronize() end_time = time.time() merge_time += end_time - post_process_time # if self.opt.tracking: # # public detection mode in MOT challenge # public_det = meta['cur_dets'] if self.opt.public_det else None # # add tracking id to results # results = self.tracker.step(results, public_det) # self.pre_images = images tracking_time = time.time() track_time += tracking_time - end_time tot_time += tracking_time - start_time # if self.opt.debug >= 1: # self.show_results(self.debugger, image, results) # self.cnt += 1 show_results_time = time.time() display_time += show_results_time - end_time # return results and run time ret = { 'results': results, 'tot': tot_time, 'load': load_time, 'pre': pre_time, 'net': net_time, 'dec': dec_time, 'post': post_time, 'merge': merge_time, 'track': track_time, 'display': display_time } if self.opt.save_video: try: # return debug image for saving video ret.update({'generic': self.debugger.imgs['generic']}) except: pass return ret def _transform_scale(self, image, scale=1): ''' Prepare input image in different testing modes. Currently support: fix short size/ center crop to a fixed size/ keep original resolution but pad to a multiplication of 32 ''' height, width = image.shape[0:2] new_height = int(height * scale) new_width = int(width * scale) if self.opt.fix_short > 0: if height < width: inp_height = self.opt.fix_short inp_width = (int(width / height * self.opt.fix_short) + 63) // 64 * 64 else: inp_height = (int(height / width * self.opt.fix_short) + 63) // 64 * 64 inp_width = self.opt.fix_short c = np.array([width / 2, height / 2], dtype=np.float32) s = np.array([width, height], dtype=np.float32) elif self.opt.fix_res: inp_height, inp_width = self.opt.input_h, self.opt.input_w c = np.array([new_width / 2., new_height / 2.], dtype=np.float32) s = max(height, width) * 1.0 # s = np.array([inp_width, inp_height], dtype=np.float32) else: inp_height = (new_height | self.opt.pad) + 1 inp_width = (new_width | self.opt.pad) + 1 c = np.array([new_width // 2, new_height // 2], dtype=np.float32) s = np.array([inp_width, inp_height], dtype=np.float32) resized_image = cv2.resize(image, (new_width, new_height)) return resized_image, c, s, inp_width, inp_height, height, width def pre_process(self, image, scale, input_meta={}): ''' Crop, resize, and normalize image. Gather meta data for post processing and tracking. ''' resized_image, c, s, inp_width, inp_height, height, width = \ self._transform_scale(image) trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height]) out_height = inp_height // self.opt.down_ratio out_width = inp_width // self.opt.down_ratio trans_output = get_affine_transform(c, s, 0, [out_width, out_height]) inp_image = cv2.warpAffine(resized_image, trans_input, (inp_width, inp_height), flags=cv2.INTER_LINEAR) inp_image = ((inp_image / 255. - self.mean) / self.std).astype( np.float32) images = inp_image.transpose(2, 0, 1).reshape(1, 3, inp_height, inp_width) if self.opt.flip_test: images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) images = torch.from_numpy(images) meta = {'calib': np.array(input_meta['calib'], dtype=np.float32) \ if 'calib' in input_meta else \ self._get_default_calib(width, height)} meta.update({ 'c': c, 's': s, 'height': height, 'width': width, 'out_height': out_height, 'out_width': out_width, 'inp_height': inp_height, 'inp_width': inp_width, 'trans_input': trans_input, 'trans_output': trans_output }) if 'pre_dets' in input_meta: meta['pre_dets'] = input_meta['pre_dets'] if 'cur_dets' in input_meta: meta['cur_dets'] = input_meta['cur_dets'] return images, meta def _trans_bbox(self, bbox, trans, width, height): ''' Transform bounding boxes according to image crop. ''' bbox = np.array(copy.deepcopy(bbox), dtype=np.float32) bbox[:2] = affine_transform(bbox[:2], trans) bbox[2:] = affine_transform(bbox[2:], trans) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, width - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, height - 1) return bbox def _get_additional_inputs(self, dets, meta, with_hm=True): ''' Render input heatmap from previous trackings. ''' trans_input, trans_output = meta['trans_input'], meta['trans_output'] inp_width, inp_height = meta['inp_width'], meta['inp_height'] out_width, out_height = meta['out_width'], meta['out_height'] input_hm = np.zeros((1, inp_height, inp_width), dtype=np.float32) output_inds = [] for det in dets: if det['score'] < self.opt.pre_thresh or det['active'] == 0: continue bbox = self._trans_bbox(det['bbox'], trans_input, inp_width, inp_height) bbox_out = self._trans_bbox(det['bbox'], trans_output, out_width, out_height) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if (h > 0 and w > 0): radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) if with_hm: draw_umich_gaussian(input_hm[0], ct_int, radius) ct_out = np.array([(bbox_out[0] + bbox_out[2]) / 2, (bbox_out[1] + bbox_out[3]) / 2], dtype=np.int32) output_inds.append(ct_out[1] * out_width + ct_out[0]) if with_hm: input_hm = input_hm[np.newaxis] if self.opt.flip_test: input_hm = np.concatenate((input_hm, input_hm[:, :, :, ::-1]), axis=0) input_hm = torch.from_numpy(input_hm).to(self.opt.device) output_inds = np.array(output_inds, np.int64).reshape(1, -1) output_inds = torch.from_numpy(output_inds).to(self.opt.device) return input_hm, output_inds def _get_default_calib(self, width, height): calib = np.array([[self.rest_focal_length, 0, width / 2, 0], [0, self.rest_focal_length, height / 2, 0], [0, 0, 1, 0]]) return calib def _sigmoid_output(self, output): if 'hm' in output: output['hm'] = output['hm'].sigmoid_() if 'hm_bdd' in output: output['hm_bdd'] = output['hm_bdd'].sigmoid_() if 'hm_tl' in output: output['hm_tl'] = output['hm_tl'].sigmoid_() if 'hm_hp' in output: output['hm_hp'] = output['hm_hp'].sigmoid_() if 'dep' in output: output['dep'] = 1. / (output['dep'].sigmoid() + 1e-6) - 1. output['dep'] *= self.opt.depth_scale return output def _flip_output(self, output): average_flips = ['hm', 'wh', 'dep', 'dim'] neg_average_flips = ['amodel_offset'] single_flips = [ 'ltrb', 'nuscenes_att', 'velocity', 'ltrb_amodal', 'reg', 'hp_offset', 'rot', 'tracking', 'pre_hm' ] for head in output: if head in average_flips: output[head] = (output[head][0:1] + flip_tensor(output[head][1:2])) / 2 if head in neg_average_flips: flipped_tensor = flip_tensor(output[head][1:2]) flipped_tensor[:, 0::2] *= -1 output[head] = (output[head][0:1] + flipped_tensor) / 2 if head in single_flips: output[head] = output[head][0:1] if head == 'hps': output['hps'] = (output['hps'][0:1] + flip_lr_off( output['hps'][1:2], self.flip_idx)) / 2 if head == 'hm_hp': output['hm_hp'] = (output['hm_hp'][0:1] + \ flip_lr(output['hm_hp'][1:2], self.flip_idx)) / 2 return output def process(self, images, pre_images=None, pre_hms=None, pre_inds=None, return_time=False): with torch.no_grad(): torch.cuda.synchronize() output = self.model(images, pre_images, pre_hms)[-1] output = self._sigmoid_output(output) output.update({'pre_inds': pre_inds}) if self.opt.flip_test: output = self._flip_output(output) torch.cuda.synchronize() forward_time = time.time() dets = generic_decode_custom_tl(output, K=self.opt.K, opt=self.opt) torch.cuda.synchronize() for k in dets: dets[k] = dets[k].detach().cpu().numpy() if return_time: return output, dets, forward_time else: return output, dets def post_process(self, dets, meta, scale=1): dets = generic_post_process(self.opt, dets, [meta['c']], [meta['s']], meta['out_height'], meta['out_width'], self.opt.num_classes, [meta['calib']], meta['height'], meta['width']) self.this_calib = meta['calib'] if scale != 1: for i in range(len(dets[0])): for k in ['bbox', 'hps']: if k in dets[0][i]: dets[0][i][k] = (np.array(dets[0][i][k], np.float32) / scale).tolist() return dets[0] def merge_outputs(self, detections): assert len(self.opt.test_scales) == 1, 'multi_scale not supported!' results = [] for i in range(len(detections[0])): if detections[0][i]['score'] > self.opt.out_thresh: results.append(detections[0][i]) return results def debug(self, debugger, images, dets, output, scale=1, pre_images=None, pre_hms=None): img = images[0].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * self.std + self.mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap(output['hm'][0].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') if 'hm_hp' in output: pred = debugger.gen_colormap_hp( output['hm_hp'][0].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hmhp') if pre_images is not None: pre_img = pre_images[0].detach().cpu().numpy().transpose(1, 2, 0) pre_img = np.clip(((pre_img * self.std + self.mean) * 255.), 0, 255).astype(np.uint8) debugger.add_img(pre_img, 'pre_img') if pre_hms is not None: pre_hm = debugger.gen_colormap( pre_hms[0].detach().cpu().numpy()) debugger.add_blend_img(pre_img, pre_hm, 'pre_hm') def show_results(self, debugger, image, results): debugger.add_img(image, img_id='generic') if self.opt.tracking: debugger.add_img(self.pre_image_ori if self.pre_image_ori is not None else image, img_id='previous') self.pre_image_ori = image for j in range(len(results)): if results[j]['score'] > self.opt.vis_thresh: if 'active' in results[j] and results[j]['active'] == 0: continue item = results[j] if ('bbox' in item): sc = item['score'] if self.opt.demo == '' or \ not ('tracking_id' in item) else item['tracking_id'] sc = item[ 'tracking_id'] if self.opt.show_track_color else sc debugger.add_coco_bbox(item['bbox'], item['class'] - 1, sc, img_id='generic') if 'tracking' in item: debugger.add_arrow(item['ct'], item['tracking'], img_id='generic') tracking_id = item[ 'tracking_id'] if 'tracking_id' in item else -1 if 'tracking_id' in item and self.opt.demo == '' and \ not self.opt.show_track_color: debugger.add_tracking_id(item['ct'], item['tracking_id'], img_id='generic') if (item['class'] in [1, 2]) and 'hps' in item: debugger.add_coco_hp(item['hps'], tracking_id=tracking_id, img_id='generic') if len(results) > 0 and \ 'dep' in results[0] and 'alpha' in results[0] and 'dim' in results[0]: debugger.add_3d_detection( image if not self.opt.qualitative else cv2.resize( debugger.imgs['pred_hm'], (image.shape[1], image.shape[0])), False, results, self.this_calib, vis_thresh=self.opt.vis_thresh, img_id='ddd_pred') debugger.add_bird_view(results, vis_thresh=self.opt.vis_thresh, img_id='bird_pred', cnt=self.cnt) if self.opt.show_track_color and self.opt.debug == 4: del debugger.imgs['generic'], debugger.imgs['bird_pred'] if 'ddd_pred' in debugger.imgs: debugger.imgs['generic'] = debugger.imgs['ddd_pred'] if self.opt.debug == 4: debugger.save_all_imgs(self.opt.debug_dir, prefix='{}'.format(self.cnt)) else: 1 # debugger.show_all_imgs(pause=self.pause) def reset_tracking(self): self.tracker.reset() self.pre_images = None self.pre_image_ori = None
def step(split, epoch, opt, dataLoader, model, criterion, optimizer=None): if split == 'train': model.train() else: model.eval() Loss, Acc = AverageMeter(), AverageMeter() preds = [] nIters = len(dataLoader) bar = Bar('{}'.format(opt.expID), max=nIters) for i, (input, targets, action, meta) in enumerate(dataLoader): input_var = torch.autograd.Variable(input).float().cuda(opt.GPU) target_var = [] for t in range(len(targets)): target_var.append( torch.autograd.Variable(targets[t]).float().cuda(opt.GPU)) z = [] for k in range(opt.numNoise): noise = torch.autograd.Variable( torch.randn((input_var.shape[0], 1, 64, 64))).cuda(opt.GPU) z.append(noise) output, samples = model(input_var, z, action) pred_sample = maximumExpectedUtility(samples, criterion) target = maximumExpectedUtility(target_var, criterion) if opt.DEBUG >= 2: gt = getPreds(target.cpu().numpy()) * 4 pred = getPreds((pred_sample.data).cpu().numpy()) * 4 debugger = Debugger() img = (input[0].numpy().transpose(1, 2, 0) * 256).astype( np.uint8).copy() debugger.addImg(img) debugger.addPoint2D(pred[0], (255, 0, 0)) debugger.addPoint2D(gt[0], (0, 0, 255)) debugger.showAllImg(pause=True) loss = DiscoLoss(output, samples, target_var, criterion) Loss.update(loss.item(), input.size(0)) Acc.update( Accuracy((pred_sample.data).cpu().numpy(), (target.data).cpu().numpy())) if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() else: input_ = input.cpu().numpy() input_[0] = Flip(input_[0]).copy() inputFlip_var = torch.autograd.Variable( torch.from_numpy(input_).view(1, input_.shape[1], ref.inputRes, ref.inputRes)).float().cuda( opt.GPU) _, samplesFlip = model(inputFlip_var, z, action) pred_sample_flip = maximumExpectedUtility(samplesFlip, criterion) outputFlip = ShuffleLR( Flip((pred_sample_flip.data).cpu().numpy()[0])).reshape( 1, ref.nJoints, ref.outputRes, ref.outputRes) output_ = old_div(((pred_sample.data).cpu().numpy() + outputFlip), 2) preds.append( finalPreds(output_, meta['center'], meta['scale'], meta['rotate'])[0]) Bar.suffix = '{split} Epoch: [{0}][{1}/{2}]| Total: {total:} | ETA: {eta:} | Loss {loss.avg:.6f} | Acc {Acc.avg:.6f} ({Acc.val:.6f})'.format( epoch, i, nIters, total=bar.elapsed_td, eta=bar.eta_td, loss=Loss, Acc=Acc, split=split) bar.next() bar.finish() return {'Loss': Loss.avg, 'Acc': Acc.avg}, preds
num[class_name] += 1 acc30[class_name] += 1 if err_ <= 30. else 0 acc10[class_name] += 1 if err_ <= 10. else 0 err[class_name].append(err_) Acc30 += 1 if err_ <= 30. else 0 Acc10 += 1 if err_ <= 10. else 0 bar.suffix = '[{0}/{1}]|Total: {total:} | ETA: {eta:} | Acc_10: {Acc10:.6f} | Acc_30: {Acc30:.6f}'.format( idx, n, total=bar.elapsed_td, eta=bar.eta_td, Acc10=Acc10 / (idx + 1.), Acc30=Acc30 / (idx + 1.)) next(bar) if DEBUG: debugger = Debugger() input, target, mask = dataset[index] img = (input[:3].transpose(1, 2, 0) * 256).astype(np.uint8).copy() star = (cv2.resize(hm[0, 0], (ref.inputRes, ref.inputRes)) * 255) star[star > 255] = 255 star[star < 0] = 0 star = star.astype(np.uint8) for k in range(len(ps[0])): x, y, z = ((hm[0, 1:4, ps[0][k], ps[1][k]] + 0.5) * ref.outputRes).astype(np.int32) dep = ((hm[0, 4, ps[0][k], ps[1][k]] + 0.5) * ref.outputRes).astype(np.int32) color.append((1.0 * x / ref.outputRes, 1.0 * y / ref.outputRes, 1.0 * z / ref.outputRes)) cv2.circle(img, (ps[1][k] * 4, ps[0][k] * 4), 6,
def _debug(image, t_heat, l_heat, b_heat, r_heat, ct_heat): debugger = Debugger(num_classes=3) k = 0 t_heat = torch.sigmoid(t_heat) l_heat = torch.sigmoid(l_heat) b_heat = torch.sigmoid(b_heat) r_heat = torch.sigmoid(r_heat) aggr_weight = 0.1 t_heat = _h_aggregate(t_heat, aggr_weight=aggr_weight) print("[exkp.py _debug] final t_heat", t_heat.shape) l_heat = _v_aggregate(l_heat, aggr_weight=aggr_weight) b_heat = _h_aggregate(b_heat, aggr_weight=aggr_weight) r_heat = _v_aggregate(r_heat, aggr_weight=aggr_weight) t_heat[t_heat > 1] = 1 l_heat[l_heat > 1] = 1 b_heat[b_heat > 1] = 1 r_heat[r_heat > 1] = 1 ct_heat = torch.sigmoid(ct_heat) t_hm = debugger.gen_colormap(t_heat[k].cpu().data.numpy()) l_hm = debugger.gen_colormap(l_heat[k].cpu().data.numpy()) b_hm = debugger.gen_colormap(b_heat[k].cpu().data.numpy()) r_hm = debugger.gen_colormap(r_heat[k].cpu().data.numpy()) ct_hm = debugger.gen_colormap(ct_heat[k].cpu().data.numpy()) hms = np.maximum(np.maximum(t_hm, l_hm), np.maximum(b_hm, r_hm)) # debugger.add_img(hms, 'hms') if image is not None: mean = np.array([0.40789654, 0.44719302, 0.47026115], dtype=np.float32).reshape(3, 1, 1) std = np.array([0.28863828, 0.27408164, 0.27809835], dtype=np.float32).reshape(3, 1, 1) img = (image[k].cpu().data.numpy() * std + mean) * 255 img = img.astype(np.uint8).transpose(1, 2, 0) debugger.add_img(img, 'img') # debugger.add_blend_img(img, t_hm, 't_hm') # debugger.add_blend_img(img, l_hm, 'l_hm') # debugger.add_blend_img(img, b_hm, 'b_hm') # debugger.add_blend_img(img, r_hm, 'r_hm') debugger.add_blend_img(img, hms, 'extreme') debugger.add_blend_img(img, ct_hm, 'center') debugger.show_all_imgs(pause=False)
def debug(self, batch, output, iter_id): opt = self.opt reg = output['reg'] if opt.reg_offset else None hm_hp = output['hm_hp'] if opt.hm_hp else None hp_offset = output['hp_offset'] if opt.reg_hp_offset else None dets = multi_pose_decode(output['hm'], output['wh'], output['hps'], reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=opt.K) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets[:, :, :4] *= opt.input_res / opt.output_res dets[:, :, 5:39] *= opt.input_res / opt.output_res dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) dets_gt[:, :, :4] *= opt.input_res / opt.output_res dets_gt[:, :, 5:39] *= opt.input_res / opt.output_res for i in range(1): debugger = Debugger(dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap( output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') debugger.add_img(img, img_id='out_pred') for k in range(len(dets[i])): if dets[i, k, 4] > opt.center_thresh: debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1], dets[i, k, 4], img_id='out_pred') debugger.add_coco_hp(dets[i, k, 5:39], img_id='out_pred') debugger.add_img(img, img_id='out_gt') for k in range(len(dets_gt[i])): if dets_gt[i, k, 4] > opt.center_thresh: debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1], dets_gt[i, k, 4], img_id='out_gt') debugger.add_coco_hp(dets_gt[i, k, 5:39], img_id='out_gt') if opt.hm_hp: pred = debugger.gen_colormap_hp( output['hm_hp'][i].detach().cpu().numpy()) gt = debugger.gen_colormap_hp( batch['hm_hp'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hmhp') debugger.add_blend_img(img, gt, 'gt_hmhp') if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True)
def debug(self, batch, output, iter_id): cfg = self.cfg reg = output[3] if cfg.LOSS.REG_OFFSET else None hm_hp = output[4] if cfg.LOSS.HM_HP else None hp_offset = output[5] if cfg.LOSS.REG_HP_OFFSET else None dets = multi_pose_decode(output[0], output[1], output[2], reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=cfg.TEST.TOPK) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets[:, :, :4] *= cfg.MODEL.INPUT_RES / cfg.MODEL.OUTPUT_RES dets[:, :, 5:39] *= cfg.MODEL.INPUT_RES / cfg.MODEL.OUTPUT_RES dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) dets_gt[:, :, :4] *= cfg.MODEL.INPUT_RES / cfg.MODEL.OUTPUT_RES dets_gt[:, :, 5:39] *= cfg.MODEL.INPUT_RES / cfg.MODEL.OUTPUT_RES for i in range(1): debugger = Debugger(dataset=cfg.SAMPLE_METHOD, ipynb=(cfg.DEBUG == 3), theme=cfg.DEBUG_THEME) img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * np.array(cfg.DATASET.STD).reshape( 1, 1, 3).astype(np.float32) + cfg.DATASET.MEAN) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap(output[0][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') debugger.add_img(img, img_id='out_pred') for k in range(len(dets[i])): if dets[i, k, 4] > cfg.MODEL.CENTER_THRESH: debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1], dets[i, k, 4], img_id='out_pred') debugger.add_coco_hp(dets[i, k, 5:39], img_id='out_pred') debugger.add_img(img, img_id='out_gt') for k in range(len(dets_gt[i])): if dets_gt[i, k, 4] > cfg.MODEL.CENTER_THRESH: debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1], dets_gt[i, k, 4], img_id='out_gt') debugger.add_coco_hp(dets_gt[i, k, 5:39], img_id='out_gt') if cfg.LOSS.HM_HP: pred = debugger.gen_colormap_hp( output[4][i].detach().cpu().numpy()) gt = debugger.gen_colormap_hp( batch['hm_hp'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hmhp') debugger.add_blend_img(img, gt, 'gt_hmhp') if cfg.DEBUG == 4: debugger.save_all_imgs(cfg.LOG_DIR, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True)
def debug(self, batch, output, iter_id): opt = self.opt wh = output['wh'] if opt.reg_bbox else None reg = output['reg'] if opt.reg_offset else None dets = ddd_decode(output['hm'], output['rot'], output['dep'], output['dim'], wh=wh, reg=reg, K=opt.K) # x, y, score, r1-r8, depth, dim1-dim3, cls dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) calib = batch['meta']['calib'].detach().numpy() # x, y, score, rot, depth, dim1, dim2, dim3 # if opt.dataset == 'gta': # dets[:, 12:15] /= 3 dets_pred = ddd_post_process(dets.copy(), batch['meta']['c'].detach().numpy(), batch['meta']['s'].detach().numpy(), calib, opt) dets_gt = ddd_post_process( batch['meta']['gt_det'].detach().numpy().copy(), batch['meta']['c'].detach().numpy(), batch['meta']['s'].detach().numpy(), calib, opt) #for i in range(input.size(0)): for i in range(1): debugger = Debugger(dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) img = ((img * self.opt.std + self.opt.mean) * 255.).astype( np.uint8) pred = debugger.gen_colormap( output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'hm_pred') debugger.add_blend_img(img, gt, 'hm_gt') # decode debugger.add_ct_detection(img, dets[i], show_box=opt.reg_bbox, center_thresh=opt.center_thresh, img_id='det_pred') debugger.add_ct_detection( img, batch['meta']['gt_det'][i].cpu().numpy().copy(), show_box=opt.reg_bbox, img_id='det_gt') debugger.add_3d_detection(batch['meta']['image_path'][i], dets_pred[i], calib[i], center_thresh=opt.center_thresh, img_id='add_pred') debugger.add_3d_detection(batch['meta']['image_path'][i], dets_gt[i], calib[i], center_thresh=opt.center_thresh, img_id='add_gt') # debugger.add_bird_view( # dets_pred[i], center_thresh=opt.center_thresh, img_id='bird_pred') # debugger.add_bird_view(dets_gt[i], img_id='bird_gt') debugger.add_bird_views(dets_pred[i], dets_gt[i], center_thresh=opt.center_thresh, img_id='bird_pred_gt') # debugger.add_blend_img(img, pred, 'out', white=True) debugger.compose_vis_add(batch['meta']['image_path'][i], dets_pred[i], calib[i], opt.center_thresh, pred, 'bird_pred_gt', img_id='out') # debugger.add_img(img, img_id='out') if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True)
scale = 1.0 * h / mheight new_im = image.resize((int(w / scale), int(h / scale)), Image.ANTIALIAS) new_im.save(filename) new_im.close() #opt = opts().parse() imageName = './images/test3.jpg' #process_image(imageName) model = torch.load('../model/Stage3/model_10.pth', map_location=lambda storage, loc: storage) img = cv2.imread(imageName) print(type(np.array(img))) input = torch.from_numpy(img.transpose(2, 0, 1)).float() / 256. input = input.view(1, input.size(0), input.size(1), input.size(2)) input_var = torch.autograd.Variable(input).float() output = model(input_var) pred = getPreds((output[-2].data).cpu().numpy())[0] * 4 reg = (output[-1].data).cpu().numpy().reshape(pred.shape[0], 1) print(pred, (reg + 1) / 2. * 256) debugger = Debugger() debugger.addImg((input[0].numpy().transpose(1, 2, 0) * 256).astype(np.uint8)) debugger.addPoint2D(pred, (255, 0, 0)) debugger.addPoint3D(np.concatenate([pred, (reg + 1) / 2. * 256], axis=1)) debugger.showImg(pause=True) debugger.show3D()
def run(self, image_or_path_or_tensor, meta=None): load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0 merge_time, tot_time = 0, 0 debugger = Debugger(dataset=self.opt.dataset, ipynb=(self.opt.debug == 3), theme=self.opt.debugger_theme) start_time = time.time() pre_processed = False if isinstance(image_or_path_or_tensor, np.ndarray): image = image_or_path_or_tensor elif type(image_or_path_or_tensor) == type( ''): # 如果终端输入的是图片路径或者folder,均会在demo.py中转化为图片路径 image = cv2.imread(image_or_path_or_tensor) image_name = image_or_path_or_tensor.split('/')[-1] else: image = image_or_path_or_tensor['image'][0].numpy() pre_processed_images = image_or_path_or_tensor pre_processed = True loaded_time = time.time() load_time += (loaded_time - start_time) detections = [] for scale in self.scales: scale_start_time = time.time() if not pre_processed: images, meta = self.pre_process(image, scale, meta) else: # import pdb; pdb.set_trace() images = pre_processed_images['images'][scale][0] meta = pre_processed_images['meta'][scale] meta = {k: v.numpy()[0] for k, v in meta.items()} images = images.to(self.opt.device) torch.cuda.synchronize() pre_process_time = time.time() pre_time += pre_process_time - scale_start_time output, dets, forward_time = self.process(images, return_time=True) torch.cuda.synchronize() net_time += forward_time - pre_process_time decode_time = time.time() dec_time += decode_time - forward_time if self.opt.debug >= 2: self.debug(debugger, images, dets, output, scale) dets = self.post_process(dets, meta, scale) torch.cuda.synchronize() post_process_time = time.time() post_time += post_process_time - decode_time detections.append(dets) results = self.merge_outputs(detections) torch.cuda.synchronize() end_time = time.time() merge_time += end_time - post_process_time tot_time += end_time - start_time if self.opt.debug >= 1: self.save_results(debugger, image, results, image_name) return { 'results': results, 'tot': tot_time, 'load': load_time, 'pre': pre_time, 'net': net_time, 'dec': dec_time, 'post': post_time, 'merge': merge_time }
def run(self, image_or_path_or_tensor, out, meta=None): load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0 merge_time, tot_time = 0, 0 debugger = Debugger(dataset=self.opt.dataset, ipynb=(self.opt.debug == 3), theme=self.opt.debugger_theme) start_time = time.time() pre_processed = False #图像载入:判断命令行给的是图片、路径还是张量 if isinstance(image_or_path_or_tensor, np.ndarray): image = image_or_path_or_tensor elif type(image_or_path_or_tensor) == type(''): image = cv2.imread(image_or_path_or_tensor) else: image = image_or_path_or_tensor['image'][0].numpy() pre_processed_images = image_or_path_or_tensor pre_processed = True loaded_time = time.time() load_time += (loaded_time - start_time) # 载入图片的时间 detections = [] for scale in self.scales: #scales 应该是将图片扩大一定倍数后检测 # print(self.scales) scale_start_time = time.time() if not pre_processed: #如果给的是图片或路径那就预处理一下 images, meta = self.pre_process(image, scale, meta) else: # import pdb; pdb.set_trace() images = pre_processed_images['images'][scale][0] meta = pre_processed_images['meta'][scale] meta = {k: v.numpy()[0] for k, v in meta.items()} images = images.to(self.opt.device) #image放入GPU torch.cuda.synchronize() #让所有核同步,测得真实时间 pre_process_time = time.time() pre_time += pre_process_time - scale_start_time #预处理时间 output, dets, forward_time = self.process( images, return_time=True ) #送入预测得到预测数据与包围盒及当前时间,dets是一个len=80的张量,每个元素是一个N * 5的ndarray torch.cuda.synchronize() net_time += forward_time - pre_process_time #预测热力图的时间 decode_time = time.time() dec_time += decode_time - forward_time #热力图解码的时间 if self.opt.debug >= 2: #debug大于2,则输出三种图:预测图,resize后预测图,热力图 self.debug(debugger, images, dets, output, scale) dets = self.post_process(dets, meta, scale) # torch.cuda.synchronize() post_process_time = time.time() post_time += post_process_time - decode_time #坐标系数回归过程的时间 detections.append(dets) # print(detections) results = self.merge_outputs(detections) # 回归到真实坐标 torch.cuda.synchronize() end_time = time.time() merge_time += end_time - post_process_time #回归到真实坐标时间 tot_time += end_time - start_time #总时间 #表情识别接口 # emotion_labels = {'0':'angry', '1':'disgust', '2':'fear', '3':'happy', '4':'sad', '5':'surprise', '6':'netural'} # emotion_model_path = '../models/emotion_models/simple_CNN.985-0.66.hdf5' # emotion_classifier = load_keras_model(emotion_model_path) # img = cv2.imread('depressed_412.jpg') # img = cv2.resize(img,(224,224)) # img = transforms.ToTensor()(img) # with torch.no_grad(): # pt = self.model1(img) visualize_model(self.model1, num_images=2) # print(pt) # for detection in detections: # faces = self.gray_preprocess(results) # print(faces) # for face in faces: # emotion_predict = self.model1(face) # print(emotion_predict) # emotion_text = emotion_labels[emotion_predict] if self.opt.debug >= 1: self.show_results(debugger, image, results, out=out) return { 'results': results, 'tot': tot_time, 'load': load_time, 'pre': pre_time, 'net': net_time, 'dec': dec_time, 'post': post_time, 'merge': merge_time }