def test_net(tester, dets, det_range, gpu_id): dump_results = [] start_time = time.time() img_start = det_range[0] img_id = 0 img_id2 = 0 pbar = tqdm(total=det_range[1] - img_start - 1, position=gpu_id) pbar.set_description("GPU %s" % str(gpu_id)) while img_start < det_range[1]: img_end = img_start + 1 im_info = dets[img_start] while img_end < det_range[1] and dets[img_end]['image_id'] == im_info[ 'image_id']: img_end += 1 # all human detection results of a certain image cropped_data = dets[img_start:img_end] pbar.update(img_end - img_start) img_start = img_end kps_result = np.zeros((len(cropped_data), cfg.num_kps, 3)) area_save = np.zeros(len(cropped_data)) # cluster human detection results with test_batch_size for batch_id in range(0, len(cropped_data), cfg.test_batch_size): start_id = batch_id end_id = min(len(cropped_data), batch_id + cfg.test_batch_size) imgs = [] crop_infos = [] for i in range(start_id, end_id): img, crop_info = generate_batch(cropped_data[i], stage='test') imgs.append(img) crop_infos.append(crop_info) imgs = np.array(imgs) crop_infos = np.array(crop_infos) # forward heatmap = tester.predict_one([imgs])[0] if cfg.flip_test: flip_imgs = imgs[:, :, ::-1, :] flip_heatmap = tester.predict_one([flip_imgs])[0] flip_heatmap = flip_heatmap[:, :, ::-1, :] for (q, w) in cfg.kps_symmetry: flip_heatmap_w, flip_heatmap_q = flip_heatmap[:, :, :, w].copy( ), flip_heatmap[:, :, :, q].copy() flip_heatmap[:, :, :, q], flip_heatmap[:, :, :, w] = flip_heatmap_w, flip_heatmap_q flip_heatmap[:, :, 1:, :] = flip_heatmap.copy()[:, :, 0:-1, :] heatmap += flip_heatmap heatmap /= 2 # for each human detection from clustered batch for image_id in range(start_id, end_id): for j in range(cfg.num_kps): hm_j = heatmap[image_id - start_id, :, :, j] idx = hm_j.argmax() y, x = np.unravel_index(idx, hm_j.shape) px = int(math.floor(x + 0.5)) py = int(math.floor(y + 0.5)) if 1 < px < cfg.output_shape[ 1] - 1 and 1 < py < cfg.output_shape[0] - 1: diff = np.array([ hm_j[py][px + 1] - hm_j[py][px - 1], hm_j[py + 1][px] - hm_j[py - 1][px] ]) diff = np.sign(diff) x += diff[0] * .25 y += diff[1] * .25 kps_result[image_id, j, :2] = (x * cfg.input_shape[1] / cfg.output_shape[1], y * cfg.input_shape[0] / cfg.output_shape[0]) kps_result[image_id, j, 2] = hm_j.max() / 255 vis = False crop_info = crop_infos[image_id - start_id, :] area = (crop_info[2] - crop_info[0]) * (crop_info[3] - crop_info[1]) if vis and np.any(kps_result[image_id, :, 2]) > 0.9 and area > 96**2: tmpimg = imgs[image_id - start_id].copy() tmpimg = cfg.denormalize_input(tmpimg) tmpimg = tmpimg.astype('uint8') tmpkps = np.zeros((3, cfg.num_kps)) tmpkps[:2, :] = kps_result[image_id, :, :2].transpose(1, 0) tmpkps[2, :] = kps_result[image_id, :, 2] _tmpimg = tmpimg.copy() _tmpimg = cfg.vis_keypoints(_tmpimg, tmpkps) cv2.imwrite( osp.join(cfg.vis_dir, str(img_id) + '_output.jpg'), _tmpimg) img_id += 1 # map back to original images for j in range(cfg.num_kps): kps_result[image_id, j, 0] = kps_result[image_id, j, 0] / cfg.input_shape[1] * (\ crop_infos[image_id - start_id][2] - crop_infos[image_id - start_id][0]) + crop_infos[image_id - start_id][0] kps_result[image_id, j, 1] = kps_result[image_id, j, 1] / cfg.input_shape[0] * (\ crop_infos[image_id - start_id][3] - crop_infos[image_id - start_id][1]) + crop_infos[image_id - start_id][1] area_save[image_id] = (crop_infos[image_id - start_id][2] - crop_infos[image_id - start_id][0]) * ( crop_infos[image_id - start_id][3] - crop_infos[image_id - start_id][1]) #vis vis = False if vis and np.any(kps_result[:, :, 2] > 0.9): tmpimg = cv2.imread( os.path.join(cfg.img_path, cropped_data[0]['imgpath'])) tmpimg = tmpimg.astype('uint8') for i in range(len(kps_result)): tmpkps = np.zeros((3, cfg.num_kps)) tmpkps[:2, :] = kps_result[i, :, :2].transpose(1, 0) tmpkps[2, :] = kps_result[i, :, 2] tmpimg = cfg.vis_keypoints(tmpimg, tmpkps) cv2.imwrite(osp.join(cfg.vis_dir, str(img_id2) + '.jpg'), tmpimg) img_id2 += 1 score_result = np.copy(kps_result[:, :, 2]) kps_result[:, :, 2] = 1 kps_result = kps_result.reshape(-1, cfg.num_kps * 3) # rescoring and oks nms if cfg.dataset == 'COCO': rescored_score = np.zeros((len(score_result))) for i in range(len(score_result)): score_mask = score_result[i] > cfg.score_thr if np.sum(score_mask) > 0: rescored_score[i] = np.mean( score_result[i][score_mask]) * cropped_data[i]['score'] score_result = rescored_score keep = oks_nms(kps_result, score_result, area_save, cfg.oks_nms_thr) if len(keep) > 0: kps_result = kps_result[keep, :] score_result = score_result[keep] area_save = area_save[keep] elif cfg.dataset == 'PoseTrack': keep = oks_nms(kps_result, np.mean(score_result, axis=1), area_save, cfg.oks_nms_thr) if len(keep) > 0: kps_result = kps_result[keep, :] score_result = score_result[keep, :] area_save = area_save[keep] # save result for i in range(len(kps_result)): if cfg.dataset == 'COCO': result = dict(image_id=im_info['image_id'], category_id=1, score=float(round(score_result[i], 4)), keypoints=kps_result[i].round(3).tolist()) elif cfg.dataset == 'PoseTrack': result = dict(image_id=im_info['image_id'], category_id=1, track_id=0, scores=score_result[i].round(4).tolist(), keypoints=kps_result[i].round(3).tolist()) elif cfg.dataset == 'MPII': result = dict(image_id=im_info['image_id'], scores=score_result[i].round(4).tolist(), keypoints=kps_result[i].round(3).tolist()) dump_results.append(result) return dump_results
def generate_batch(d, stage='train', add_paf=False): #print(os.path.join(cfg.img_path, d['imgpath'])) img = cv2.imread(os.path.join(cfg.img_path, d['imgpath']), cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if img is None: print('cannot read ' + os.path.join(cfg.img_path, d['imgpath'])) assert 0 bbox = np.array(d['bbox']).astype(np.float32) x, y, w, h = bbox aspect_ratio = cfg.input_shape[1] / cfg.input_shape[0] center = np.array([x + w * 0.5, y + h * 0.5]) if w > aspect_ratio * h: h = w / aspect_ratio elif w < aspect_ratio * h: w = h * aspect_ratio scale = np.array([w, h]) * 1.25 rotation = 0 if stage == 'train': joints = np.array(d['joints']).reshape(-1, cfg.num_kps, 3) estimated_joints = np.array(d['estimated_joints']).reshape( -1, cfg.num_kps, 3) if 'estimated_joints' in d else np.empty( [0, cfg.num_kps, 3]) near_joints = np.array(d['near_joints']).reshape( -1, cfg.num_kps, 3) if 'near_joints' in d else np.empty( [0, cfg.num_kps, 3]) total_joints = np.concatenate([joints, estimated_joints, near_joints], axis=0) # data augmentation scale = scale * np.clip(np.random.randn() * cfg.scale_factor + 1, 1 - cfg.scale_factor, 1 + cfg.scale_factor) rotation = np.clip(np.random.randn()*cfg.rotation_factor, -cfg.rotation_factor*2, cfg.rotation_factor*2)\ if random.random() <= 0.6 else 0 if random.random() <= 0.5: img = img[:, ::-1, :] center[0] = img.shape[1] - 1 - center[0] total_joints[:, :, 0] = img.shape[1] - 1 - total_joints[:, :, 0] for (q, w) in cfg.kps_symmetry: total_joints_q, total_joints_w = total_joints[:, q, :].copy( ), total_joints[:, w, :].copy() total_joints[:, w, :], total_joints[:, q, :] = total_joints_q, total_joints_w trans = get_affine_transform(center, scale, rotation, (cfg.input_shape[1], cfg.input_shape[0])) cropped_img = cv2.warpAffine(img, trans, (cfg.input_shape[1], cfg.input_shape[0]), flags=cv2.INTER_LINEAR) if cfg.voc_augment: cropped_img = cropped_img[:, :, ::-1] cropped_img = occlude_with_objects(cropped_img, occluder) cropped_img = cropped_img[:, :, ::-1] cropped_img = cfg.normalize_input(cropped_img) for i in range(len(total_joints)): for j in range(cfg.num_kps): if total_joints[i, j, 2] > 0: total_joints[i, j, :2] = affine_transform( total_joints[i, j, :2], trans) total_joints[i, j, 2] *= ( (total_joints[i, j, 0] >= 0) & (total_joints[i, j, 0] < cfg.input_shape[1]) & (total_joints[i, j, 1] >= 0) & (total_joints[i, j, 1] < cfg.input_shape[0])) joints = total_joints[0] estimated_joints = total_joints[ 1] if estimated_joints.shape[0] > 0 else total_joints[0] near_joints = total_joints[ 2:] if near_joints.shape[0] > 0 else total_joints[0] xmin, ymin, xmax, ymax = bbox[0], bbox[ 1], bbox[0] + bbox[2], bbox[1] + bbox[3] pt1 = affine_transform(np.array([xmin, ymin]), trans) pt2 = affine_transform(np.array([xmax, ymin]), trans) pt3 = affine_transform(np.array([xmax, ymax]), trans) area = math.sqrt(pow(pt2[0] - pt1[0], 2) + pow(pt2[1] - pt1[1], 2)) * math.sqrt( pow(pt3[0] - pt2[0], 2) + pow(pt3[1] - pt2[1], 2)) # input pose synthesize synth_joints = synthesize_pose(joints, estimated_joints, near_joints, area, d['overlap'] if 'overlap' in d else 0) target_coord = joints[:, :2] target_valid = joints[:, 2] input_pose_coord = synth_joints[:, :2] input_pose_valid = synth_joints[:, 2] # for debug vis = False if vis: filename = str(random.randrange(1, 500)) tmpimg = cropped_img.astype(np.float32).copy() tmpimg = cfg.denormalize_input(tmpimg) tmpimg = tmpimg.astype(np.uint8).copy() tmpkps = np.zeros((3, cfg.num_kps)) tmpkps[:2, :] = target_coord.transpose(1, 0) tmpkps[2, :] = target_valid tmpimg = cfg.vis_keypoints(tmpimg, tmpkps) cv2.imwrite(osp.join(cfg.vis_dir, filename + '_gt.jpg'), tmpimg) tmpimg = cropped_img.astype(np.float32).copy() tmpimg = cfg.denormalize_input(tmpimg) tmpimg = tmpimg.astype(np.uint8).copy() tmpkps = np.zeros((3, cfg.num_kps)) tmpkps[:2, :] = input_pose_coord.transpose(1, 0) tmpkps[2, :] = input_pose_valid tmpimg = cfg.vis_keypoints(tmpimg, tmpkps) cv2.imwrite(osp.join(cfg.vis_dir, filename + '_input_pose.jpg'), tmpimg) if add_paf: paf, paf_valid = render_paf(target_coord, target_valid, cfg.kps_lines, cfg.input_shape, cfg.output_shape) #print('sum check B', np.sum(paf[:,20,6]), 'paf shape', paf.shape) return [ cropped_img, target_coord, input_pose_coord, (target_valid > 0), (input_pose_valid > 0), paf, paf_valid ] else: return [ cropped_img, target_coord, input_pose_coord, (target_valid > 0), (input_pose_valid > 0) ] else: trans = get_affine_transform(center, scale, rotation, (cfg.input_shape[1], cfg.input_shape[0])) cropped_img = cv2.warpAffine(img, trans, (cfg.input_shape[1], cfg.input_shape[0]), flags=cv2.INTER_LINEAR) #cropped_img = cropped_img[:,:, ::-1] cropped_img = cfg.normalize_input(cropped_img) estimated_joints = np.array(d['estimated_joints']).reshape( cfg.num_kps, 3) for i in range(cfg.num_kps): if estimated_joints[i, 2] > 0: estimated_joints[i, :2] = affine_transform( estimated_joints[i, :2], trans) estimated_joints[i, 2] *= ( (estimated_joints[i, 0] >= 0) & (estimated_joints[i, 0] < cfg.input_shape[1]) & (estimated_joints[i, 1] >= 0) & (estimated_joints[i, 1] < cfg.input_shape[0])) input_pose_coord = estimated_joints[:, :2] input_pose_valid = np.array( [1 if i not in cfg.ignore_kps else 0 for i in range(cfg.num_kps)]) input_pose_score = d['estimated_score'] crop_info = np.asarray([ center[0] - scale[0] * 0.5, center[1] - scale[1] * 0.5, center[0] + scale[0] * 0.5, center[1] + scale[1] * 0.5 ]) return [ cropped_img, input_pose_coord, input_pose_valid, input_pose_score, crop_info ]
def test_net(tester, input_pose, det_range, gpu_id): dump_results = [] start_time = time.time() img_start = det_range[0] img_id = 0 img_id2 = 0 pbar = tqdm(total=det_range[1] - img_start - 1, position=gpu_id) pbar.set_description("GPU %s" % str(gpu_id)) test_count = 0 while img_start < det_range[1]: record_id = img_start img_end = img_start + 1 im_info = input_pose[img_start] while img_end < det_range[1] and input_pose[img_end][ 'image_id'] == im_info['image_id']: img_end += 1 # all human detection results of a certain image cropped_data = input_pose[img_start:img_end] #pbar.set_description("GPU %s" % str(gpu_id)) pbar.update(img_end - img_start) img_start = img_end kps_result = np.zeros((len(cropped_data), cfg.num_kps, 3)) area_save = np.zeros(len(cropped_data)) # cluster human detection results with test_batch_size for batch_id in range(0, len(cropped_data), cfg.test_batch_size): start_id = batch_id end_id = min(len(cropped_data), batch_id + cfg.test_batch_size) imgs = [] input_pose_coords = [] input_pose_valids = [] input_pose_scores = [] crop_infos = [] for i in range(start_id, end_id): img, input_pose_coord, input_pose_valid, input_pose_score, crop_info = generate_batch( cropped_data[i], stage='test') imgs.append(img) input_pose_coords.append(input_pose_coord) input_pose_valids.append(input_pose_valid) input_pose_scores.append(input_pose_score) crop_infos.append(crop_info) imgs = np.array(imgs) input_pose_coords = np.array(input_pose_coords) input_pose_valids = np.array(input_pose_valids) input_pose_scores = np.array(input_pose_scores) crop_infos = np.array(crop_infos) #if test_count < 2100: # test_count += imgs.shape[0] # continue #test_count += imgs.shape[0] # forward coord, heatmaps = tester.predict_one( [imgs, input_pose_coords, input_pose_valids]) # np.savez('temp/imgs_{:d}_{:d}.npz'.format(record_id, batch_id), imgs=imgs, heatmaps=heatmaps, coord=coord) if cfg.flip_test: flip_imgs = imgs[:, :, ::-1, :] flip_input_pose_coords = input_pose_coords.copy() flip_input_pose_coords[:, :, 0] = cfg.input_shape[ 1] - 1 - flip_input_pose_coords[:, :, 0] flip_input_pose_valids = input_pose_valids.copy() for (q, w) in cfg.kps_symmetry: flip_input_pose_coords_w, flip_input_pose_coords_q = flip_input_pose_coords[:, w, :].copy( ), flip_input_pose_coords[:, q, :].copy() flip_input_pose_coords[:, q, :], flip_input_pose_coords[:, w, :] = flip_input_pose_coords_w, flip_input_pose_coords_q flip_input_pose_valids_w, flip_input_pose_valids_q = flip_input_pose_valids[:, w].copy( ), flip_input_pose_valids[:, q].copy() flip_input_pose_valids[:, q], flip_input_pose_valids[:, w] = flip_input_pose_valids_w, flip_input_pose_valids_q flip_coord = tester.predict_one([ flip_imgs, flip_input_pose_coords, flip_input_pose_valids ])[0] flip_coord[:, :, 0] = cfg.input_shape[1] - 1 - flip_coord[:, :, 0] for (q, w) in cfg.kps_symmetry: flip_coord_w, flip_coord_q = flip_coord[:, w, :].copy( ), flip_coord[:, q, :].copy() flip_coord[:, q, :], flip_coord[:, w, :] = flip_coord_w, flip_coord_q coord += flip_coord coord /= 2 # for each human detection from clustered batch for image_id in range(start_id, end_id): kps_result[image_id, :, :2] = coord[image_id - start_id] kps_result[image_id, :, 2] = input_pose_scores[image_id - start_id] vis = True crop_info = crop_infos[image_id - start_id, :] area = (crop_info[2] - crop_info[0]) * (crop_info[3] - crop_info[1]) if vis and np.any(kps_result[image_id, :, 2]) > 0.9 and area > 96**2: tmpimg = imgs[image_id - start_id].copy() tmpimg = cfg.denormalize_input(tmpimg) tmpimg = tmpimg.astype('uint8') tmpkps = np.zeros((3, cfg.num_kps)) tmpkps[:2, :] = kps_result[image_id, :, :2].transpose(1, 0) tmpkps[2, :] = kps_result[image_id, :, 2] _tmpimg = tmpimg.copy() _tmpimg = cfg.vis_keypoints(_tmpimg, tmpkps, kp_thresh=0.05) cv2.imwrite( osp.join(cfg.vis_dir, str(img_id) + '_output.jpg'), _tmpimg) temp_heatmaps = heatmaps[image_id - start_id] save_mergedHeatmaps(heatmaps[image_id - start_id], osp.join(cfg.vis_dir, str(img_id) + '_hm.jpg'), softmax=True) img_id += 1 # map back to original images for j in range(cfg.num_kps): kps_result[image_id, j, 0] = kps_result[image_id, j, 0] / cfg.input_shape[1] * (\ crop_infos[image_id - start_id][2] - crop_infos[image_id - start_id][0]) + crop_infos[image_id - start_id][0] kps_result[image_id, j, 1] = kps_result[image_id, j, 1] / cfg.input_shape[0] * (\ crop_infos[image_id - start_id][3] - crop_infos[image_id - start_id][1]) + crop_infos[image_id - start_id][1] area_save[image_id] = (crop_infos[image_id - start_id][2] - crop_infos[image_id - start_id][0]) * ( crop_infos[image_id - start_id][3] - crop_infos[image_id - start_id][1]) #vis vis = False if vis and np.any(kps_result[:, :, 2] > 0.9): tmpimg = cv2.imread( os.path.join(cfg.img_path, cropped_data[0]['imgpath'])) tmpimg = tmpimg.astype('uint8') for i in range(len(kps_result)): tmpkps = np.zeros((3, cfg.num_kps)) tmpkps[:2, :] = kps_result[i, :, :2].transpose(1, 0) tmpkps[2, :] = kps_result[i, :, 2] tmpimg = cfg.vis_keypoints(tmpimg, tmpkps) cv2.imwrite(osp.join(cfg.vis_dir, str(img_id2) + '.jpg'), tmpimg) img_id2 += 1 # oks nms if cfg.dataset in ['COCO', 'PoseTrack']: nms_kps = np.delete(kps_result, cfg.ignore_kps, 1) nms_score = np.mean(nms_kps[:, :, 2], axis=1) nms_kps[:, :, 2] = 1 nms_kps = nms_kps.reshape(len(kps_result), -1) nms_sigmas = np.delete(cfg.kps_sigmas, cfg.ignore_kps) keep = oks_nms(nms_kps, nms_score, area_save, cfg.oks_nms_thr, nms_sigmas) if len(keep) > 0: kps_result = kps_result[keep, :, :] area_save = area_save[keep] score_result = np.copy(kps_result[:, :, 2]) kps_result[:, :, 2] = 1 kps_result = kps_result.reshape(-1, cfg.num_kps * 3) # save result for i in range(len(kps_result)): if cfg.dataset == 'COCO': result = dict(image_id=im_info['image_id'], category_id=1, score=float(round(np.mean(score_result[i]), 4)), keypoints=kps_result[i].round(3).tolist()) elif cfg.dataset == 'PoseTrack': result = dict(image_id=im_info['image_id'], category_id=1, track_id=0, scores=score_result[i].round(4).tolist(), keypoints=kps_result[i].round(3).tolist()) elif cfg.dataset == 'MPII': result = dict(image_id=im_info['image_id'], scores=score_result[i].round(4).tolist(), keypoints=kps_result[i].round(3).tolist()) dump_results.append(result) return dump_results
def generate_batch(d, stage='train'): img = cv2.imread(os.path.join(cfg.img_path, d['imgpath']), cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if img is None: return None #print('cannot read ' + os.path.join(cfg.img_path, d['imgpath'])) #assert 0 bbox = np.array(d['bbox']).astype(np.float32) x, y, w, h = bbox aspect_ratio = cfg.input_shape[1]/cfg.input_shape[0] center = np.array([x + w * 0.5, y + h * 0.5]) if w > aspect_ratio * h: h = w / aspect_ratio elif w < aspect_ratio * h: w = h * aspect_ratio scale = np.array([w,h]) * 1.25 rotation = 0 if stage == 'train': joints = np.array(d['joints']).reshape(cfg.num_kps, 3).astype(np.float32) # data augmentation scale = scale * np.clip(np.random.randn()*cfg.scale_factor + 1, 1-cfg.scale_factor, 1+cfg.scale_factor) rotation = np.clip(np.random.randn()*cfg.rotation_factor, -cfg.rotation_factor*2, cfg.rotation_factor*2)\ if random.random() <= 0.6 else 0 if random.random() <= 0.5: img = img[:, ::-1, :] center[0] = img.shape[1] - 1 - center[0] joints[:,0] = img.shape[1] - 1 - joints[:,0] for (q, w) in cfg.kps_symmetry: joints_q, joints_w = joints[q,:].copy(), joints[w,:].copy() joints[w,:], joints[q,:] = joints_q, joints_w trans = get_affine_transform(center, scale, rotation, (cfg.input_shape[1], cfg.input_shape[0])) cropped_img = cv2.warpAffine(img, trans, (cfg.input_shape[1], cfg.input_shape[0]), flags=cv2.INTER_LINEAR) #cropped_img = cropped_img[:,:, ::-1] cropped_img = cfg.normalize_input(cropped_img) for i in range(cfg.num_kps): if joints[i,2] > 0: joints[i,:2] = affine_transform(joints[i,:2], trans) joints[i,2] *= ((joints[i,0] >= 0) & (joints[i,0] < cfg.input_shape[1]) & (joints[i,1] >= 0) & (joints[i,1] < cfg.input_shape[0])) target_coord = joints[:,:2] target_valid = joints[:,2] # for debug vis = True if vis: filename = str(random.randrange(1,500)) tmpimg = cropped_img.astype(np.float32).copy() tmpimg = cfg.denormalize_input(tmpimg) tmpimg = tmpimg.astype(np.uint8).copy() tmpkps = np.zeros((3,cfg.num_kps)) tmpkps[:2,:] = target_coord.transpose(1,0) tmpkps[2,:] = target_valid tmpimg = cfg.vis_keypoints(tmpimg, tmpkps) cv2.imwrite(osp.join(cfg.vis_dir, filename + '_gt.jpg'), tmpimg) return [cropped_img, target_coord, (target_valid > 0)] else: trans = get_affine_transform(center, scale, rotation, (cfg.input_shape[1], cfg.input_shape[0])) cropped_img = cv2.warpAffine(img, trans, (cfg.input_shape[1], cfg.input_shape[0]), flags=cv2.INTER_LINEAR) #cropped_img = cropped_img[:,:, ::-1] cropped_img = cfg.normalize_input(cropped_img) crop_info = np.asarray([center[0]-scale[0]*0.5, center[1]-scale[1]*0.5, center[0]+scale[0]*0.5, center[1]+scale[1]*0.5]) return [cropped_img, crop_info]
def test_net(tester, dets, det_range, gpu_id): dump_results = [] start_time = time.time() img_start = det_range[0] img_id = 0 img_id2 = 0 pbar = tqdm(total=det_range[1] - img_start - 1, position=gpu_id) pbar.set_description("GPU %s" % str(gpu_id)) while img_start < det_range[1]: img_end = img_start + 1 im_info = dets[img_start] while img_end < det_range[1] and dets[img_end]['image_id'] == im_info[ 'image_id']: img_end += 1 # all human detection results of a certain image cropped_data = dets[img_start:img_end] pbar.update(img_end - img_start) img_start = img_end kps_result = np.zeros((len(cropped_data), cfg.num_kps, 3)) area_save = np.zeros(len(cropped_data)) # cluster human detection results with test_batch_size for batch_id in range(0, len(cropped_data), cfg.test_batch_size): start_id = batch_id end_id = min(len(cropped_data), batch_id + cfg.test_batch_size) imgs = [] crop_infos = [] for i in range(start_id, end_id): img, crop_info = generate_batch(cropped_data[i], stage='test') imgs.append(img) crop_infos.append(crop_info) imgs = np.array(imgs) crop_infos = np.array(crop_infos) # forward heatmap = tester.predict_one([imgs])[0] if cfg.flip_test: flip_imgs = imgs[:, :, ::-1, :] flip_heatmap = tester.predict_one([flip_imgs])[0] flip_heatmap = flip_heatmap[:, :, ::-1, :] for (q, w) in cfg.kps_symmetry: flip_heatmap_w, flip_heatmap_q = flip_heatmap[:, :, :, w].copy( ), flip_heatmap[:, :, :, q].copy() flip_heatmap[:, :, :, q], flip_heatmap[:, :, :, w] = flip_heatmap_w, flip_heatmap_q flip_heatmap[:, :, 1:, :] = flip_heatmap.copy()[:, :, 0:-1, :] heatmap += flip_heatmap heatmap /= 2 # for each human detection from clustered batch for image_id in range(start_id, end_id): for j in range(cfg.num_kps): hm_j = heatmap[image_id - start_id, :, :, j] idx = hm_j.argmax() y, x = np.unravel_index(idx, hm_j.shape) px = int(math.floor(x + 0.5)) py = int(math.floor(y + 0.5)) if 1 < px < cfg.output_shape[ 1] - 1 and 1 < py < cfg.output_shape[0] - 1: diff = np.array([ hm_j[py][px + 1] - hm_j[py][px - 1], hm_j[py + 1][px] - hm_j[py - 1][px] ]) diff = np.sign(diff) x += diff[0] * .25 y += diff[1] * .25 kps_result[image_id, j, :2] = (x * cfg.input_shape[1] / cfg.output_shape[1], y * cfg.input_shape[0] / cfg.output_shape[0]) kps_result[image_id, j, 2] = hm_j.max() / 255 vis = False crop_info = crop_infos[image_id - start_id, :] area = (crop_info[2] - crop_info[0]) * (crop_info[3] - crop_info[1]) if vis and np.any(kps_result[image_id, :, 2]) > 0.9 and area > 96**2: tmpimg = imgs[image_id - start_id].copy() tmpimg = cfg.denormalize_input(tmpimg) tmpimg = tmpimg.astype('uint8') tmpkps = np.zeros((3, cfg.num_kps)) tmpkps[:2, :] = kps_result[image_id, :, :2].transpose(1, 0) tmpkps[2, :] = kps_result[image_id, :, 2] _tmpimg = tmpimg.copy() _tmpimg = cfg.vis_keypoints(_tmpimg, tmpkps) cv2.imwrite( osp.join(cfg.vis_dir, str(img_id) + '_output.jpg'), _tmpimg) img_id += 1 # map back to original images for j in range(cfg.num_kps): kps_result[image_id, j, 0] = kps_result[image_id, j, 0] / cfg.input_shape[1] * (\ crop_infos[image_id - start_id][2] - crop_infos[image_id - start_id][0]) + crop_infos[image_id - start_id][0] kps_result[image_id, j, 1] = kps_result[image_id, j, 1] / cfg.input_shape[0] * (\ crop_infos[image_id - start_id][3] - crop_infos[image_id - start_id][1]) + crop_infos[image_id - start_id][1] area_save[image_id] = (crop_infos[image_id - start_id][2] - crop_infos[image_id - start_id][0]) * ( crop_infos[image_id - start_id][3] - crop_infos[image_id - start_id][1]) #vis vis = True if vis and np.any(kps_result[:, :, 2] > 0.9): tmpimg = dets[0]['img'] #tmpimg = cv2.imread(os.path.join(cfg.img_path, cropped_data[0]['imgpath'])) tmpimg = tmpimg.astype('uint8') for i in range(len(kps_result)): tmpkps = np.zeros((3, cfg.num_kps)) tmpkps[:2, :] = kps_result[i, :, :2].transpose(1, 0) tmpkps[2, :] = kps_result[i, :, 2] kps = {} kps["nose"] = { "x": tmpkps[0][0], "y": tmpkps[1][0], "w": tmpkps[2][0] } kps["eye_l"] = { "x": tmpkps[0][1], "y": tmpkps[1][1], "w": tmpkps[2][1] } kps["eye_r"] = { "x": tmpkps[0][2], "y": tmpkps[1][2], "w": tmpkps[2][2] } kps["ear_l"] = { "x": tmpkps[0][3], "y": tmpkps[1][3], "w": tmpkps[2][3] } kps["ear_r"] = { "x": tmpkps[0][4], "y": tmpkps[1][4], "w": tmpkps[2][4] } kps["shldr_l"] = { "x": tmpkps[0][5], "y": tmpkps[1][5], "w": tmpkps[2][5] } kps["shldr_r"] = { "x": tmpkps[0][6], "y": tmpkps[1][6], "w": tmpkps[2][6] } kps["elbw_l"] = { "x": tmpkps[0][7], "y": tmpkps[1][7], "w": tmpkps[2][7] } kps["elbw_r"] = { "x": tmpkps[0][8], "y": tmpkps[1][8], "w": tmpkps[2][8] } kps["wrst_l"] = { "x": tmpkps[0][9], "y": tmpkps[1][9], "w": tmpkps[2][9] } kps["wrst_r"] = { "x": tmpkps[0][10], "y": tmpkps[1][10], "w": tmpkps[2][10] } print("\nNose \t{:.0f}\t{:.0f}\t{:.2f}".format( kps["nose"]["x"], kps["nose"]["y"], kps["nose"]["w"])) print("L Eye \t{:.0f}\t{:.0f}\t{:.2f}".format( kps["eye_l"]["x"], kps["eye_l"]["y"], kps["eye_l"]["w"])) print("R Eye \t{:.0f}\t{:.0f}\t{:.2f}".format( kps["eye_r"]["x"], kps["eye_r"]["y"], kps["eye_r"]["w"])) print("L Ear \t{:.0f}\t{:.0f}\t{:.2f}".format( kps["ear_l"]["x"], kps["ear_l"]["y"], kps["ear_l"]["w"])) print("R Ear \t{:.0f}\t{:.0f}\t{:.2f}".format( kps["ear_r"]["x"], kps["ear_r"]["y"], kps["ear_r"]["w"])) print("L Shldr\t{:.0f}\t{:.0f}\t{:.2f}".format( kps["shldr_l"]["x"], kps["shldr_l"]["y"], kps["shldr_l"]["w"])) print("R Shldr\t{:.0f}\t{:.0f}\t{:.2f}".format( kps["shldr_r"]["x"], kps["shldr_r"]["y"], kps["shldr_r"]["w"])) print("L Elbw \t{:.0f}\t{:.0f}\t{:.2f}".format( kps["elbw_l"]["x"], kps["elbw_l"]["y"], kps["elbw_l"]["w"])) print("R Elbw \t{:.0f}\t{:.0f}\t{:.2f}".format( kps["elbw_r"]["x"], kps["elbw_r"]["y"], kps["elbw_r"]["w"])) print("L Wrist\t{:.0f}\t{:.0f}\t{:.2f}".format( kps["wrst_l"]["x"], kps["wrst_l"]["y"], kps["wrst_l"]["w"])) print("R Wrist\t{:.0f}\t{:.0f}\t{:.2f}".format( kps["wrst_r"]["x"], kps["wrst_r"]["y"], kps["wrst_r"]["w"])) nose_ratio = 99 nose_shoulder_perp = 99 eye_shldr_angle = 99 arm_angle_left = 99 arm_angle_right = 99 ear_eye_left = 99 ear_eye_right = 99 if (kps["shldr_l"]["w"] > 0.4 and kps["shldr_r"]["w"] > 0.4 and kps["nose"]["w"] > 0.4 and kps["eye_l"]["w"] > 0.4 and kps["eye_r"]["w"] > 0.4): shoulder_mid = mid(kps["shldr_l"]["x"], kps["shldr_l"]["y"], kps["shldr_r"]["x"], kps["shldr_r"]["y"]) nose_elevation = cdist(kps["nose"]["x"], kps["nose"]["y"], shoulder_mid[0], shoulder_mid[1]) eye_spacing = cdist(kps["eye_l"]["x"], kps["eye_l"]["y"], kps["eye_r"]["x"], kps["eye_r"]["y"]) nose_ratio = nose_elevation / eye_spacing print("\nNose Angle Ratio\t{:.1f}".format(nose_ratio)) if (kps["shldr_l"]["w"] > 0.4 and kps["shldr_r"]["w"] > 0.4 and kps["nose"]["w"] > 0.4 and kps["eye_l"]["w"] > 0.4 and kps["eye_r"]["w"] > 0.4): shoulder_spacing = cdist(kps["shldr_l"]["x"], kps["shldr_l"]["y"], kps["shldr_r"]["x"], kps["shldr_r"]["y"]) shoulder_nose_left = cdist(kps["shldr_l"]["x"], kps["shldr_l"]["y"], kps["nose"]["x"], kps["nose"]["y"]) shoulder_nose_right = cdist(kps["shldr_r"]["x"], kps["shldr_r"]["y"], kps["nose"]["x"], kps["nose"]["y"]) nose_shoulder_perp = tri_height( shoulder_nose_left, shoulder_spacing, shoulder_nose_right) / eye_spacing print( "Nose Perp Angle Ratio\t{:.1f}".format(nose_shoulder_perp)) if (kps["shldr_l"]["w"] > 0.4 and kps["shldr_r"]["w"] > 0.4 and kps["eye_l"]["w"] > 0.4 and kps["eye_r"]["w"] > 0.4): eye_slope = math.degrees( math.atan((kps["eye_l"]["y"] - kps["eye_r"]["y"]) / (kps["eye_l"]["x"] - kps["eye_r"]["x"]))) shldr_slope = math.degrees( math.atan((kps["shldr_l"]["y"] - kps["shldr_r"]["y"]) / (kps["shldr_l"]["x"] - kps["shldr_r"]["x"]))) eye_shldr_angle = eye_slope - shldr_slope print("Eye Shldr Angle\t\t{:.1f}".format(eye_shldr_angle)) if (kps["shldr_l"]["w"] > 0.4 and kps["shldr_r"]["w"] > 0.4 and kps["elbw_l"]["w"] > 0.4): arm_left = cdist(kps["shldr_l"]["x"], kps["shldr_l"]["y"], kps["elbw_l"]["x"], kps["elbw_l"]["y"]) diag_left = cdist(kps["elbw_l"]["x"], kps["elbw_l"]["y"], kps["shldr_r"]["x"], kps["shldr_r"]["y"]) arm_angle_left = cos_angle(arm_left, shoulder_spacing, diag_left) if (kps["shldr_l"]["w"] > 0.4 and kps["shldr_r"]["w"] > 0.4 and kps["elbw_r"]["w"] > 0.4): arm_right = cdist(kps["shldr_r"]["x"], kps["shldr_r"]["y"], kps["elbw_r"]["x"], kps["elbw_r"]["y"]) diag_right = cdist(kps["elbw_r"]["x"], kps["elbw_r"]["y"], kps["shldr_l"]["x"], kps["shldr_l"]["y"]) arm_angle_right = cos_angle(arm_right, shoulder_spacing, diag_right) print("Left Arm Angle\t\t{:.1f}".format(arm_angle_left)) print("Right Arm Angle\t\t{:.1f}".format(arm_angle_right)) if (kps["eye_l"]["w"] > 0.4 and kps["ear_l"]["w"]): ear_eye_left = math.degrees( math.atan((kps["eye_l"]["y"] - kps["ear_l"]["y"]) / (kps["eye_l"]["x"] - kps["ear_l"]["x"]))) if (kps["eye_r"]["w"] > 0.4 and kps["ear_r"]["w"]): ear_eye_right = math.degrees( math.atan((kps["eye_r"]["y"] - kps["ear_r"]["y"]) / (kps["ear_r"]["x"] - kps["eye_r"]["x"]))) print("Left E-E Angle\t\t{:.1f}".format(ear_eye_left)) print("Right E-E Angle\t\t{:.1f}".format(ear_eye_right)) tmpimg = cfg.vis_keypoints(tmpimg, tmpkps) #cv2.imwrite(osp.join(cfg.vis_dir, str(img_id2) + '.jpg'), tmpimg) cv2.imshow('vis', tmpimg) cv2.waitKey(0) img_id2 += 1 return dump_results