def main(): parser = argparse.ArgumentParser() parser.add_argument('--fn', default='assets/demo.jpg', type=str) parser.add_argument('--output', default='output', type=str) parser.add_argument('--inres', default='512,512', type=str) args, _ = parser.parse_known_args() args.inres = tuple(int(x) for x in args.inres.split(',')) os.makedirs(args.output, exist_ok=True) kwargs = { 'num_stacks': 2, 'cnv_dim': 256, 'weights': 'hpdet_coco', 'inres': args.inres, } heads = { 'hm': 1, # 6 'hm_hp': 17, # 7 'hp_offset': 2, # 8 'hps': 34, # 9 'reg': 2, # 10 'wh': 2, # 11 } model = HourglassNetwork(heads=heads, **kwargs) model = HpDetDecode(model) drawer = COCODrawer() fns = sorted(glob(args.fn)) for fn in tqdm(fns): img = cv2.imread(fn) letterbox_transformer = LetterboxTransformer(args.inres[0], args.inres[1]) pimg = letterbox_transformer(img) pimg = normalize_image(pimg) pimg = np.expand_dims(pimg, 0) detections = model.predict(pimg)[0] for d in detections: score, cl = d[4], d[-1] if score < 0.3: break x1, y1, x2, y2 = d[:4] kps = d[5:-1] kps_x = kps[:17] kps_y = kps[17:] kps = letterbox_transformer.correct_coords( np.vstack([kps_x, kps_y])).T x1, y1, x2, y2 = letterbox_transformer.correct_box(x1, y1, x2, y2) img = drawer.draw_pose(img, kps) img = drawer.draw_box(img, x1, y1, x2, y2, cl) out_fn = os.path.join(args.output, 'hpdet.' + os.path.basename(fn)) cv2.imwrite(out_fn, img) print("Image saved to: %s" % out_fn)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--fn', default='assets/demo.jpg', type=str) parser.add_argument('--output', default='output', type=str) parser.add_argument('--inres', default='512,512', type=str) args, _ = parser.parse_known_args() args.inres = tuple(int(x) for x in args.inres.split(',')) os.makedirs(args.output, exist_ok=True) kwargs = { 'num_stacks': 2, 'cnv_dim': 256, 'weights': 'ctdet_coco', 'inres': args.inres, } heads = { 'hm': 80, # 3 'reg': 2, # 4 'wh': 2 # 5 } model = HourglassNetwork(heads=heads, **kwargs) model = CtDetDecode(model) drawer = COCODrawer() fns = sorted(glob(args.fn)) print(fns) for fn in tqdm(fns): img = cv2.imread(fn) # cv2.imshow(img) letterbox_transformer = LetterboxTransformer(args.inres[0], args.inres[1]) pimg = letterbox_transformer(img) pimg = normalize_image(pimg) pimg = np.expand_dims(pimg, 0) detections = model.predict(pimg)[0] for d in detections: x1, y1, x2, y2, score, cl = d if score < 0.3: break x1, y1, x2, y2 = letterbox_transformer.correct_box(x1, y1, x2, y2) img = drawer.draw_box(img, x1, y1, x2, y2, cl) out_fn = os.path.join(args.output, 'ctdet.' + os.path.basename(fn)) cv2.imwrite(out_fn, img) print("Image saved to: %s" % out_fn)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--output', default='output', type=str) parser.add_argument('--data', default='val2017', type=str) parser.add_argument('--annotations', default='annotations', type=str) parser.add_argument('--inres', default='512,512', type=str) parser.add_argument('--no-full-resolution', action='store_true') args, _ = parser.parse_known_args() args.inres = tuple(int(x) for x in args.inres.split(',')) if not args.no_full_resolution: args.inres = (None, None) os.makedirs(args.output, exist_ok=True) kwargs = { 'num_stacks': 2, 'cnv_dim': 256, 'weights': 'hpdet_coco', 'inres': args.inres, } heads = { 'hm': 1, # 6 'hm_hp': 17, # 7 'hp_offset': 2, # 8 'hps': 34, # 9 'reg': 2, # 10 'wh': 2, # 11 } out_fn_keypoints = os.path.join( args.output, args.data + '_keypoints_results_%s_%s.json' % (args.inres[0], args.inres[1])) model = HourglassNetwork(heads=heads, **kwargs) model = HpDetDecode(model) if args.no_full_resolution: letterbox_transformer = LetterboxTransformer(args.inres[0], args.inres[1]) else: letterbox_transformer = LetterboxTransformer(mode='testing', max_stride=128) fns = sorted(glob(os.path.join(args.data, '*.jpg'))) results = [] for fn in tqdm(fns): img = cv2.imread(fn) image_id = int(os.path.splitext(os.path.basename(fn))[0]) pimg = letterbox_transformer(img) pimg = normalize_image(pimg) pimg = np.expand_dims(pimg, 0) detections = model.predict(pimg)[0] for d in detections: score = d[4] x1, y1, x2, y2 = d[:4] x1, y1, x2, y2 = letterbox_transformer.correct_box(x1, y1, x2, y2) x1, y1, x2, y2 = float(x1), float(y1), float(x2), float(y2) kps = d[5:-1] kps_x = kps[:17] kps_y = kps[17:] kps = letterbox_transformer.correct_coords( np.vstack([kps_x, kps_y])).T # add z = 1 kps = np.concatenate([kps, np.ones((17, 1), dtype='float32')], -1) kps = list(map(float, kps.flatten())) image_result = { 'image_id': image_id, 'category_id': 1, 'score': float(score), 'bbox': [x1, y1, (x2 - x1), (y2 - y1)], 'keypoints': kps, } results.append(image_result) if not len(results): print("No predictions were generated.") return # write output with open(out_fn_keypoints, 'w') as f: json.dump(results, f, indent=2) print("Predictions saved to: %s" % out_fn_keypoints) # load results in COCO evaluation tool gt_fn = os.path.join(args.annotations, 'person_keypoints_%s.json' % args.data) print("Loading GT: %s" % gt_fn) coco_true = COCO(gt_fn) coco_pred = coco_true.loadRes(out_fn_keypoints) coco_eval = COCOeval(coco_true, coco_pred, 'keypoints') coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() return coco_eval.stats
def main(): parser = argparse.ArgumentParser() parser.add_argument('--video', default='webcam', type=str) parser.add_argument('--output', default='output', type=str) parser.add_argument('--inres', default='512,512', type=str) parser.add_argument('--outres', default='1080,1920', type=str) parser.add_argument('--max-frames', default=1000000, type=int) parser.add_argument('--fps', default=25.0 * 1.0, type=float) args, _ = parser.parse_known_args() args.inres = tuple(int(x) for x in args.inres.split(',')) args.outres = tuple(int(x) for x in args.outres.split(',')) os.makedirs(args.output, exist_ok=True) kwargs = { 'num_stacks': 2, 'cnv_dim': 256, 'weights': 'hpdet_coco', 'inres': args.inres, } heads = { 'hm': 1, # 6 'hm_hp': 17, # 7 'hp_offset': 2, # 8 'hps': 34, # 9 'reg': 2, # 10 'wh': 2, # 11 } model = HourglassNetwork(heads=heads, **kwargs) model = HpDetDecode(model) drawer = COCODrawer() letterbox_transformer = LetterboxTransformer(args.inres[0], args.inres[1]) cap = cv2.VideoCapture(0 if args.video == 'webcam' else args.video) out_fn = os.path.join(args.output, 'hpdet.' + os.path.basename(args.video)).replace( '.mp4', '.avi') fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(out_fn, fourcc, args.fps, args.outres[::-1]) k = 0 tic = time.time() while cap.isOpened(): if k > args.max_frames: print("Bye") break if k > 0 and k % 100 == 0: toc = time.time() duration = toc - tic print("[%05d]: %.3f seconds / 100 iterations" % (k, duration)) tic = toc k += 1 ret, img = cap.read() if not ret: print("Done") break pimg = letterbox_transformer(img) pimg = normalize_image(pimg) pimg = np.expand_dims(pimg, 0) detections = model.predict(pimg)[0] for d in detections: score, cl = d[4], d[-1] if score < 0.3: break x1, y1, x2, y2 = d[:4] kps = d[5:-1] kps_x = kps[:17] kps_y = kps[17:] kps = letterbox_transformer.correct_coords( np.vstack([kps_x, kps_y])).T x1, y1, x2, y2 = letterbox_transformer.correct_box(x1, y1, x2, y2) img = drawer.draw_pose(img, kps) img = drawer.draw_box(img, x1, y1, x2, y2, cl) out.write(img) print("Video saved to: %s" % out_fn)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--output', default='output', type=str) parser.add_argument('--data', default='val2017', type=str) parser.add_argument('--annotations', default='annotations', type=str) parser.add_argument('--inres', default='512,512', type=str) parser.add_argument('--no-full-resolution', action='store_true') args, _ = parser.parse_known_args() args.inres = tuple(int(x) for x in args.inres.split(',')) if not args.no_full_resolution: args.inres = (None, None) os.makedirs(args.output, exist_ok=True) kwargs = { 'num_stacks': 2, 'cnv_dim': 256, 'weights': 'ctdet_coco', 'inres': args.inres, } heads = {'hm': 80, 'reg': 2, 'wh': 2} out_fn_box = os.path.join( args.output, args.data + '_bbox_results_%s_%s.json' % (args.inres[0], args.inres[1])) model = HourglassNetwork(heads=heads, **kwargs) model = CtDetDecode(model) if args.no_full_resolution: letterbox_transformer = LetterboxTransformer(args.inres[0], args.inres[1]) else: letterbox_transformer = LetterboxTransformer(mode='testing', max_stride=128) fns = sorted(glob(os.path.join(args.data, '*.jpg'))) results = [] for fn in tqdm(fns): img = cv2.imread(fn) image_id = int(os.path.splitext(os.path.basename(fn))[0]) pimg = letterbox_transformer(img) pimg = normalize_image(pimg) pimg = np.expand_dims(pimg, 0) detections = model.predict(pimg)[0] for d in detections: x1, y1, x2, y2, score, cl = d # if score < 0.001: # break x1, y1, x2, y2 = letterbox_transformer.correct_box(x1, y1, x2, y2) cl = int(cl) x1, y1, x2, y2 = float(x1), float(y1), float(x2), float(y2) image_result = { 'image_id': image_id, 'category_id': COCO_IDS[cl + 1], 'score': float(score), 'bbox': [x1, y1, (x2 - x1), (y2 - y1)], } results.append(image_result) if not len(results): print("No predictions were generated.") return # write output with open(out_fn_box, 'w') as f: json.dump(results, f, indent=2) print("Predictions saved to: %s" % out_fn_box) # load results in COCO evaluation tool gt_fn = os.path.join(args.annotations, 'instances_%s.json' % args.data) print("Loading GT: %s" % gt_fn) coco_true = COCO(gt_fn) coco_pred = coco_true.loadRes(out_fn_box) # run COCO evaluation coco_eval = COCOeval(coco_true, coco_pred, 'bbox') coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() return coco_eval.stats