def evaluate(self, outs, cur_sample_idx): annots = self.datalist sample_num = len(outs) eval_result = {'joint_out': [], 'mesh_out': []} for n in range(sample_num): annot = annots[cur_sample_idx + n] out = outs[n] # x,y: resize to input image space and perform bbox to image affine transform mesh_out_img = out['mesh_coord_img'] mesh_out_img[:, 0] = mesh_out_img[:, 0] / cfg.output_hm_shape[ 2] * cfg.input_img_shape[1] mesh_out_img[:, 1] = mesh_out_img[:, 1] / cfg.output_hm_shape[ 1] * cfg.input_img_shape[0] mesh_out_img_xy1 = np.concatenate( (mesh_out_img[:, :2], np.ones_like(mesh_out_img[:, :1])), 1) mesh_out_img[:, :2] = np.dot(out['bb2img_trans'], mesh_out_img_xy1.transpose( 1, 0)).transpose(1, 0)[:, :2] # z: devoxelize and translate to absolute depth root_joint_depth = annot['root_joint_depth'] mesh_out_img[:, 2] = (mesh_out_img[:, 2] / cfg.output_hm_shape[0] * 2. - 1) * (cfg.bbox_3d_size / 2) mesh_out_img[:, 2] = mesh_out_img[:, 2] + root_joint_depth # camera back-projection cam_param = annot['cam_param'] focal, princpt = cam_param['focal'], cam_param['princpt'] mesh_out_cam = pixel2cam(mesh_out_img, focal, princpt) if cfg.stage == 'param': mesh_out_cam = out['mesh_coord_cam'] joint_out_cam = np.dot(self.joint_regressor, mesh_out_cam) eval_result['mesh_out'].append(mesh_out_cam.tolist()) eval_result['joint_out'].append(joint_out_cam.tolist()) vis = False if vis: filename = annot['img_path'].split('/')[-1][:-4] img = load_img(annot['img_path'])[:, :, ::-1] img = vis_mesh(img, mesh_out_img, 0.5) cv2.imwrite(filename + '.jpg', img) save_obj(mesh_out_cam, self.mano.face, filename + '.obj') return eval_result
def evaluate(self, preds): print() print('Evaluation start...') gts = self.datalist preds_joint_coord, preds_rel_root_depth, preds_hand_type, inv_trans = preds['joint_coord'], preds['rel_root_depth'], preds['hand_type'], preds['inv_trans'] assert len(gts) == len(preds_joint_coord) sample_num = len(gts) mpjpe_sh = [[] for _ in range(self.joint_num*2)] mpjpe_ih = [[] for _ in range(self.joint_num*2)] mrrpe = [] acc_hand_cls = 0; hand_cls_cnt = 0; for n in range(sample_num): data = gts[n] bbox, cam_param, joint, gt_hand_type, hand_type_valid = data['bbox'], data['cam_param'], data['joint'], data['hand_type'], data['hand_type_valid'] focal = cam_param['focal'] princpt = cam_param['princpt'] gt_joint_coord = joint['cam_coord'] joint_valid = joint['valid'] # restore xy coordinates to original image space pred_joint_coord_img = preds_joint_coord[n].copy() pred_joint_coord_img[:,0] = pred_joint_coord_img[:,0]/cfg.output_hm_shape[2]*cfg.input_img_shape[1] pred_joint_coord_img[:,1] = pred_joint_coord_img[:,1]/cfg.output_hm_shape[1]*cfg.input_img_shape[0] for j in range(self.joint_num*2): pred_joint_coord_img[j,:2] = trans_point2d(pred_joint_coord_img[j,:2],inv_trans[n]) # restore depth to original camera space pred_joint_coord_img[:,2] = (pred_joint_coord_img[:,2]/cfg.output_hm_shape[0] * 2 - 1) * (cfg.bbox_3d_size/2) # mrrpe if gt_hand_type == 'interacting' and joint_valid[self.root_joint_idx['left']] and joint_valid[self.root_joint_idx['right']]: pred_rel_root_depth = (preds_rel_root_depth[n]/cfg.output_root_hm_shape * 2 - 1) * (cfg.bbox_3d_size_root/2) pred_left_root_img = pred_joint_coord_img[self.root_joint_idx['left']].copy() pred_left_root_img[2] += data['abs_depth']['right'] + pred_rel_root_depth pred_left_root_cam = pixel2cam(pred_left_root_img[None,:], focal, princpt)[0] pred_right_root_img = pred_joint_coord_img[self.root_joint_idx['right']].copy() pred_right_root_img[2] += data['abs_depth']['right'] pred_right_root_cam = pixel2cam(pred_right_root_img[None,:], focal, princpt)[0] pred_rel_root = pred_left_root_cam - pred_right_root_cam gt_rel_root = gt_joint_coord[self.root_joint_idx['left']] - gt_joint_coord[self.root_joint_idx['right']] mrrpe.append(float(np.sqrt(np.sum((pred_rel_root - gt_rel_root)**2)))) # add root joint depth pred_joint_coord_img[self.joint_type['right'],2] += data['abs_depth']['right'] pred_joint_coord_img[self.joint_type['left'],2] += data['abs_depth']['left'] # back project to camera coordinate system pred_joint_coord_cam = pixel2cam(pred_joint_coord_img, focal, princpt) # root joint alignment for h in ('right', 'left'): pred_joint_coord_cam[self.joint_type[h]] = pred_joint_coord_cam[self.joint_type[h]] - pred_joint_coord_cam[self.root_joint_idx[h],None,:] gt_joint_coord[self.joint_type[h]] = gt_joint_coord[self.joint_type[h]] - gt_joint_coord[self.root_joint_idx[h],None,:] # mpjpe for j in range(self.joint_num*2): if joint_valid[j]: if gt_hand_type == 'right' or gt_hand_type == 'left': mpjpe_sh[j].append(np.sqrt(np.sum((pred_joint_coord_cam[j] - gt_joint_coord[j])**2))) else: mpjpe_ih[j].append(np.sqrt(np.sum((pred_joint_coord_cam[j] - gt_joint_coord[j])**2))) # handedness accuray if hand_type_valid: if gt_hand_type == 'right' and preds_hand_type[n][0] > 0.5 and preds_hand_type[n][1] < 0.5: acc_hand_cls += 1 elif gt_hand_type == 'left' and preds_hand_type[n][0] < 0.5 and preds_hand_type[n][1] > 0.5: acc_hand_cls += 1 elif gt_hand_type == 'interacting' and preds_hand_type[n][0] > 0.5 and preds_hand_type[n][1] > 0.5: acc_hand_cls += 1 hand_cls_cnt += 1 vis = False if vis: img_path = data['img_path'] cvimg = cv2.imread(img_path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) _img = cvimg[:,:,::-1].transpose(2,0,1) vis_kps = pred_joint_coord_img.copy() vis_valid = joint_valid.copy() capture = str(data['capture']) cam = str(data['cam']) frame = str(data['frame']) filename = 'out_' + str(n) + '_' + gt_hand_type + '.jpg' vis_keypoints(_img, vis_kps, vis_valid, self.skeleton, filename) vis = False if vis: filename = 'out_' + str(n) + '_3d.jpg' vis_3d_keypoints(pred_joint_coord_cam, joint_valid, self.skeleton, filename) if hand_cls_cnt > 0: print('Handedness accuracy: ' + str(acc_hand_cls / hand_cls_cnt)) if len(mrrpe) > 0: print('MRRPE: ' + str(sum(mrrpe)/len(mrrpe))) print() tot_err = [] eval_summary = 'MPJPE for each joint: \n' for j in range(self.joint_num*2): tot_err_j = np.mean(np.concatenate((np.stack(mpjpe_sh[j]), np.stack(mpjpe_ih[j])))) joint_name = self.skeleton[j]['name'] eval_summary += (joint_name + ': %.2f, ' % tot_err_j) tot_err.append(tot_err_j) print(eval_summary) print('MPJPE for all hand sequences: %.2f' % (np.mean(tot_err))) print() eval_summary = 'MPJPE for each joint: \n' for j in range(self.joint_num*2): mpjpe_sh[j] = np.mean(np.stack(mpjpe_sh[j])) joint_name = self.skeleton[j]['name'] eval_summary += (joint_name + ': %.2f, ' % mpjpe_sh[j]) print(eval_summary) print('MPJPE for single hand sequences: %.2f' % (np.mean(mpjpe_sh))) print() eval_summary = 'MPJPE for each joint: \n' for j in range(self.joint_num*2): mpjpe_ih[j] = np.mean(np.stack(mpjpe_ih[j])) joint_name = self.skeleton[j]['name'] eval_summary += (joint_name + ': %.2f, ' % mpjpe_ih[j]) print(eval_summary) print('MPJPE for interacting hand sequences: %.2f' % (np.mean(mpjpe_ih)))
def evaluate(self, outs, cur_sample_idx): annots = self.datalist sample_num = len(outs) eval_result = {'mpjpe_lixel': [], 'pa_mpjpe_lixel': [], 'mpjpe_param': [], 'pa_mpjpe_param': []} for n in range(sample_num): annot = annots[cur_sample_idx + n] out = outs[n] # h36m joint from gt mesh mesh_gt_cam = out['mesh_coord_cam_target'] pose_coord_gt_h36m = np.dot(self.h36m_joint_regressor, mesh_gt_cam) depth_gt_h36m = pose_coord_gt_h36m[self.h36m_root_joint_idx,2] pose_coord_gt_h36m = pose_coord_gt_h36m - pose_coord_gt_h36m[self.h36m_root_joint_idx,None] # root-relative pose_coord_gt_h36m = pose_coord_gt_h36m[self.h36m_eval_joint,:] # mesh from lixel # x,y: resize to input image space and perform bbox to image affine transform mesh_out_img = out['mesh_coord_img'] mesh_out_img[:,0] = mesh_out_img[:,0] / cfg.output_hm_shape[2] * cfg.input_img_shape[1] mesh_out_img[:,1] = mesh_out_img[:,1] / cfg.output_hm_shape[1] * cfg.input_img_shape[0] mesh_out_img_xy1 = np.concatenate((mesh_out_img[:,:2], np.ones_like(mesh_out_img[:,:1])),1) mesh_out_img[:,:2] = np.dot(out['bb2img_trans'], mesh_out_img_xy1.transpose(1,0)).transpose(1,0)[:,:2] # z: devoxelize and translate to absolute depth if cfg.use_gt_info: root_joint_depth = depth_gt_h36m else: root_joint_depth = annot['root_joint_depth'] mesh_out_img[:,2] = (mesh_out_img[:,2] / cfg.output_hm_shape[0] * 2. - 1) * (cfg.bbox_3d_size / 2) mesh_out_img[:,2] = mesh_out_img[:,2] + root_joint_depth # camera back-projection cam_param = annot['cam_param'] focal, princpt = cam_param['focal'], cam_param['princpt'] mesh_out_cam = pixel2cam(mesh_out_img, focal, princpt) # h36m joint from lixel mesh pose_coord_out_h36m = np.dot(self.h36m_joint_regressor, mesh_out_cam) pose_coord_out_h36m = pose_coord_out_h36m - pose_coord_out_h36m[self.h36m_root_joint_idx,None] # root-relative pose_coord_out_h36m = pose_coord_out_h36m[self.h36m_eval_joint,:] pose_coord_out_h36m_aligned = rigid_align(pose_coord_out_h36m, pose_coord_gt_h36m) eval_result['mpjpe_lixel'].append(np.sqrt(np.sum((pose_coord_out_h36m - pose_coord_gt_h36m)**2,1)).mean() * 1000) # meter -> milimeter eval_result['pa_mpjpe_lixel'].append(np.sqrt(np.sum((pose_coord_out_h36m_aligned - pose_coord_gt_h36m)**2,1)).mean() * 1000) # meter -> milimeter # h36m joint from parameter mesh if cfg.stage == 'param': mesh_out_cam = out['mesh_coord_cam'] pose_coord_out_h36m = np.dot(self.h36m_joint_regressor, mesh_out_cam) pose_coord_out_h36m = pose_coord_out_h36m - pose_coord_out_h36m[self.h36m_root_joint_idx,None] # root-relative pose_coord_out_h36m = pose_coord_out_h36m[self.h36m_eval_joint,:] pose_coord_out_h36m_aligned = rigid_align(pose_coord_out_h36m, pose_coord_gt_h36m) eval_result['mpjpe_param'].append(np.sqrt(np.sum((pose_coord_out_h36m - pose_coord_gt_h36m)**2,1)).mean() * 1000) # meter -> milimeter eval_result['pa_mpjpe_param'].append(np.sqrt(np.sum((pose_coord_out_h36m_aligned - pose_coord_gt_h36m)**2,1)).mean() * 1000) # meter -> milimeter vis = False if vis: seq_name = annot['img_path'].split('/')[-2] img_name = annot['img_path'].split('/')[-1][:-4] filename = seq_name + '_' + img_name + '_' + str(n) img = load_img(annot['img_path'])[:,:,::-1] img = vis_mesh(img, mesh_out_img, 0.5) cv2.imwrite(filename + '.jpg', img) save_obj(mesh_out_cam, self.smpl.face, filename + '.obj') return eval_result
mesh_lixel_img = out['mesh_coord_img'][0].cpu().numpy() mesh_param_cam = out['mesh_coord_cam'][0].cpu().numpy() # restore mesh_lixel_img to original image space and continuous depth space mesh_lixel_img[:,0] = mesh_lixel_img[:,0] / cfg.output_hm_shape[2] * cfg.input_img_shape[1] mesh_lixel_img[:,1] = mesh_lixel_img[:,1] / cfg.output_hm_shape[1] * cfg.input_img_shape[0] mesh_lixel_img[:,:2] = np.dot(bb2img_trans, np.concatenate((mesh_lixel_img[:,:2], np.ones_like(mesh_lixel_img[:,:1])),1).transpose(1,0)).transpose(1,0) mesh_lixel_img[:,2] = (mesh_lixel_img[:,2] / cfg.output_hm_shape[0] * 2. - 1) * (cfg.bbox_3d_size / 2) # root-relative 3D coordinates -> absolute 3D coordinates focal = (1500,1500); princpt = (original_img_width/2, original_img_height/2); root_xy = np.dot(joint_regressor, mesh_lixel_img)[root_joint_idx,:2] root_depth = 11250.5732421875 # obtain this from RootNet (https://github.com/mks0601/3DMPPE_ROOTNET_RELEASE/tree/master/demo) root_depth /= 1000 # output of RootNet is milimeter. change it to meter root_img = np.array([root_xy[0], root_xy[1], root_depth]) root_cam = pixel2cam(root_img[None,:], focal, princpt) mesh_lixel_img[:,2] += root_depth mesh_lixel_cam = pixel2cam(mesh_lixel_img, focal, princpt) mesh_param_cam += root_cam.reshape(1,3) # visualize lixel mesh in 2D space vis_img = original_img.copy() vis_img = vis_mesh(vis_img, mesh_lixel_img) cv2.imwrite('output_mesh_lixel.jpg', vis_img) # visualize lixel mesh in 2D space vis_img = original_img.copy() mesh_param_img = cam2pixel(mesh_param_cam, focal, princpt) vis_img = vis_mesh(vis_img, mesh_param_img) cv2.imwrite('output_mesh_param.jpg', vis_img)
def evaluate(self, preds): print() print('Evaluation start...') gts = self.datalist preds_joint_coord, preds_rel_root_depth, preds_hand_type, inv_trans = preds['joint_coord'], preds['rel_root_depth'], preds['hand_type'], preds['inv_trans'] assert len(gts) == len(preds_joint_coord) sample_num = len(gts) mpjpe = [[] for _ in range(self.joint_num)] # treat right and left hand identical acc_hand_cls = 0 for n in range(sample_num): data = gts[n] bbox, cam_param, joint, gt_hand_type = data['bbox'], data['cam_param'], data['joint'], data['hand_type'] focal = cam_param['focal'] princpt = cam_param['princpt'] gt_joint_coord = joint['cam_coord'] joint_valid = joint['valid'] # restore coordinates to original space pred_joint_coord_img = preds_joint_coord[n].copy() pred_joint_coord_img[:,0] = pred_joint_coord_img[:,0]/cfg.output_hm_shape[2]*cfg.input_img_shape[1] pred_joint_coord_img[:,1] = pred_joint_coord_img[:,1]/cfg.output_hm_shape[1]*cfg.input_img_shape[0] for j in range(self.joint_num*2): pred_joint_coord_img[j,:2] = trans_point2d(pred_joint_coord_img[j,:2],inv_trans[n]) pred_joint_coord_img[:,2] = (pred_joint_coord_img[:,2]/cfg.output_hm_shape[0] * 2 - 1) * (cfg.bbox_3d_size/2) pred_joint_coord_img[:,2] = pred_joint_coord_img[:,2] + data['abs_depth'] # back project to camera coordinate system pred_joint_coord_cam = pixel2cam(pred_joint_coord_img, focal, princpt) # root joint alignment pred_joint_coord_cam[self.joint_type['right']] = pred_joint_coord_cam[self.joint_type['right']] - pred_joint_coord_cam[self.root_joint_idx['right'],None,:] pred_joint_coord_cam[self.joint_type['left']] = pred_joint_coord_cam[self.joint_type['left']] - pred_joint_coord_cam[self.root_joint_idx['left'],None,:] gt_joint_coord[self.joint_type['right']] = gt_joint_coord[self.joint_type['right']] - gt_joint_coord[self.root_joint_idx['right'],None,:] gt_joint_coord[self.joint_type['left']] = gt_joint_coord[self.joint_type['left']] - gt_joint_coord[self.root_joint_idx['left'],None,:] # select right or left hand using groundtruth hand type pred_joint_coord_cam = pred_joint_coord_cam[self.joint_type[gt_hand_type]] gt_joint_coord = gt_joint_coord[self.joint_type[gt_hand_type]] joint_valid = joint_valid[self.joint_type[gt_hand_type]] # mpjpe save for j in range(self.joint_num): if joint_valid[j]: mpjpe[j].append(np.sqrt(np.sum((pred_joint_coord_cam[j] - gt_joint_coord[j])**2))) if gt_hand_type == 'right' and preds_hand_type[n][0] > 0.5 and preds_hand_type[n][1] < 0.5: acc_hand_cls += 1 elif gt_hand_type == 'left' and preds_hand_type[n][0] < 0.5 and preds_hand_type[n][1] > 0.5: acc_hand_cls += 1 vis = False if vis: img_path = data['img_path'] cvimg = cv2.imread(img_path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) _img = cvimg[:,:,::-1].transpose(2,0,1) vis_kps = pred_joint_coord_img.copy() vis_valid = joint_valid.copy() filename = 'out_' + str(n) + '.jpg' vis_keypoints(_img, vis_kps, vis_valid, self.skeleton[:self.joint_num], filename) vis = False if vis: filename = 'out_' + str(n) + '_3d.png' vis_3d_keypoints(pred_joint_coord_cam, joint_valid, self.skeleton[:self.joint_num], filename) print('Handedness accuracy: ' + str(acc_hand_cls / sample_num)) eval_summary = 'MPJPE for each joint: \n' for j in range(self.joint_num): mpjpe[j] = np.mean(np.stack(mpjpe[j])) joint_name = self.skeleton[j]['name'] eval_summary += (joint_name + ': %.2f, ' % mpjpe[j]) print(eval_summary) print('MPJPE: %.2f' % (np.mean(mpjpe)))
def main(): # input_size=416 # iou_threshold=0.45 # score_threshold=0.3 # Yolo = Load_Yolo_model() times = [] output_path = "output" vid = cv2.VideoCapture(0) vid.set(3, 1280) vid.set(4, 1024) # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) focal = (1500, 1500) princpt = (width / 2, height / 2) print(f"Width {width} Height {height}") bbox = [0, 0, width, height] bbox = process_bbox(bbox, width, height) root_depth = 11250.5732421875 # obtain this from RootNet (https://github.com/mks0601/3DMPPE_ROOTNET_RELEASE/tree/master/demo) root_depth /= 1000 # output of RootNet is milimeter. change it to meter with torch.no_grad(): while True: _, frame = vid.read() t1 = time.time() try: original_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) original_frame = cv2.cvtColor(original_frame, cv2.COLOR_BGR2RGB) except: break # image_data = image_preprocess(np.copy(original_frame), [input_size, input_size]) # image_data = image_data[np.newaxis, ...].astype(np.float32) # if YOLO_FRAMEWORK == "tf": # pred_bbox = Yolo.predict(image_data) # elif YOLO_FRAMEWORK == "trt": # batched_input = tf.constant(image_data) # result = Yolo(batched_input) # pred_bbox = [] # for key, value in result.items(): # value = value.numpy() # pred_bbox.append(value) # pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] # pred_bbox = tf.concat(pred_bbox, axis=0) # bboxes = postprocess_boxes(pred_bbox, original_frame, input_size, score_threshold) # bboxes = nms(bboxes, iou_threshold, method='nms') # frame = draw_bbox(original_frame, bboxes) #----------------------------------------i2l meshnet--------------------------------------- # original_img_height, original_img_width = original_frame.shape[:2] # bbox = bboxes[0][:4] img, img2bb_trans, bb2img_trans = generate_patch_image( original_frame, bbox, 1.0, 0.0, False, cfg.input_img_shape) img = transform(img.astype(np.float32)) / 255 img = img.cuda()[None, :, :, :] # forward inputs = {'img': img} targets = {} meta_info = {'bb2img_trans': bb2img_trans} out = model(inputs, targets, meta_info, 'test') img = img[0].cpu().numpy().transpose( 1, 2, 0) # cfg.input_img_shape[1], cfg.input_img_shape[0], 3 mesh_lixel_img = out['mesh_coord_img'][0].cpu().numpy() mesh_param_cam = out['mesh_coord_cam'][0].cpu().numpy() # restore mesh_lixel_img to original image space and continuous depth space mesh_lixel_img[:, 0] = mesh_lixel_img[:, 0] / cfg.output_hm_shape[ 2] * cfg.input_img_shape[1] mesh_lixel_img[:, 1] = mesh_lixel_img[:, 1] / cfg.output_hm_shape[ 1] * cfg.input_img_shape[0] mesh_lixel_img[:, :2] = np.dot( bb2img_trans, np.concatenate((mesh_lixel_img[:, :2], np.ones_like(mesh_lixel_img[:, :1])), 1).transpose(1, 0)).transpose(1, 0) mesh_lixel_img[:, 2] = ( mesh_lixel_img[:, 2] / cfg.output_hm_shape[0] * 2. - 1) * (cfg.bbox_3d_size / 2) # root-relative 3D coordinates -> absolute 3D coordinates root_xy = np.dot(joint_regressor, mesh_lixel_img)[root_joint_idx, :2] root_img = np.array([root_xy[0], root_xy[1], root_depth]) root_cam = pixel2cam(root_img[None, :], focal, princpt) mesh_lixel_img[:, 2] += root_depth mesh_lixel_cam = pixel2cam(mesh_lixel_img, focal, princpt) mesh_param_cam += root_cam.reshape(1, 3) # visualize lixel mesh in 2D space # vis_img = frame.copy() # vis_img = vis_mesh(vis_img, mesh_lixel_img) # cv2.imwrite('output_mesh_lixel.jpg', vis_img) # visualize lixel mesh in 2D space # vis_img = frame.copy() # mesh_param_img = cam2pixel(mesh_param_cam, focal, princpt) # vis_img = vis_mesh(vis_img, mesh_param_img) # cv2.imwrite('output_mesh_param.jpg', vis_img) # save mesh (obj) # save_obj(mesh_lixel_cam, face, 'output_mesh_lixel.obj') # save_obj(mesh_param_cam, face, 'output_mesh_param.obj') # render mesh from lixel vis_img = frame.copy() rendered_img = render_mesh(vis_img, mesh_lixel_cam, face, { 'focal': focal, 'princpt': princpt }) # cv2.imwrite('rendered_mesh_lixel.jpg', rendered_img) cv2.imshow('output', rendered_img / 255) if cv2.waitKey(25) & 0xFF == ord("q"): cv2.destroyAllWindows() break # render mesh from param # vis_img = frame.copy() # rendered_img = render_mesh(vis_img, mesh_param_cam, face, {'focal': focal, 'princpt': princpt}) # cv2.imwrite('rendered_mesh_param.jpg', rendered_img) #----------------------------------------i2l meshnet--------------------------------------- t2 = time.time() times.append(t2 - t1) times = times[-20:] ms = sum(times) / len(times) * 1000 fps = 1000 / ms print("Time: {:.2f}ms, {:.1f} FPS".format(ms, fps))