def get_mask(i_subject, i_seq, i_cam, i_frame): chroma_frame = improc.imread_jpeg( f'{paths.DATA_ROOT}/3dhp/S{i_subject}/Seq{i_seq}/FGmasks/img_{i_cam}_{i_frame:06d}.jpg' ) person_box = get_box(i_subject, i_seq, i_cam, i_frame) is_fg = chroma_frame[..., 0] > 100 n_labels, labels, stats, centroids = cv2.connectedComponentsWithStats( is_fg.astype(np.uint8), 4, cv2.CV_32S) component_boxes = stats[:, :4] ious = [ boxlib.iou(component_box, person_box) for component_box in component_boxes ] ious[0] = 0 person_label = np.argmax(ious) mask = (labels == person_label).astype(np.uint8) # Remove foreground pixels that are far from the person box intbox = boxlib.intersect(boxlib.full_box((2048, 2048)), boxlib.expand(person_box, 1.3)).astype(int) mask[:intbox[1]] = 0 mask[:, :intbox[0]] = 0 mask[:, intbox[0] + intbox[2]:] = 0 mask[intbox[1] + intbox[3]:] = 0 encoded_mask = improc.encode_mask(mask) return encoded_mask
def get_connected_component_with_highest_iou(mask, person_box): """Finds the 4-connected component in `mask` with the highest bbox IoU with the `person box`""" mask = mask.astype(np.uint8) _, labels, stats, _ = cv2.connectedComponentsWithStats(mask, 4, cv2.CV_32S) component_boxes = stats[:, :4] ious = [boxlib.iou(component_box, person_box) for component_box in component_boxes] person_label = np.argmax(ious) return improc.encode_mask(labels == person_label)
def get_bbox(im_coords, image_relpath, detections_all): joint_box = boxlib.expand(boxlib.bb_of_points(im_coords), 1.05) relpath_in_dataset = os.path.join(*util.split_path(image_relpath)[1:]) if relpath_in_dataset in detections_all and detections_all[relpath_in_dataset]: most_confident_detection = max(detections_all[relpath_in_dataset], key=lambda x: x[4]) detection_box = np.array(most_confident_detection[:4]) union_box = boxlib.box_hull(detection_box, joint_box) # Sanity check if boxlib.iou(union_box, joint_box) > 0.5: return union_box return joint_box
def get_crop_boxes(i_frame, camera, tracks, detections): live_tracks = [ track for track in tracks if len(track) > 30 and i_frame - track[-1][0] < 10 ] last_live_poses = np.array([track[-1][1] for track in live_tracks]) shadow_boxes = [ boxlib.expand(boxlib.bb_of_points(camera.camera_to_image(p)), 1.2) for p in last_live_poses ] crop_boxes = list(detections) for shadow_box in shadow_boxes: for box in crop_boxes: if boxlib.iou(box[:4], shadow_box[:4]) > 0.65: break else: crop_boxes.append([*shadow_box, 0]) if len(crop_boxes) == 0: return np.zeros((0, 5), np.float32) return np.array(crop_boxes, np.float32)
def make_muco(): joint_info, selected_joints = make_joint_info() root_3dhp = f'{paths.DATA_ROOT}/3dhp' root_muco = f'{paths.DATA_ROOT}/muco' sample_info = np.load(f'{root_muco}/composite_frame_origins.npy') n_all_joints = 28 valid_indices = list(np.load(f'{root_muco}/valid_composite_frame_indices.npy')) all_detections = util.load_pickle(f'{root_muco}/yolov3_detections.pkl') all_detections = np.array([all_detections[k] for k in sorted(all_detections.keys())]) all_visible_boxes = np.load(f'{root_muco}/visible_boxes.npy') matloader = functools.lru_cache(1024)(matlabfile.load) @functools.lru_cache(1024) def get_world_coords(i_subject, i_seq, i_cam, anno_name): seqpath = f'{root_3dhp}/S{i_subject}/Seq{i_seq}' anno_file = matloader(f'{seqpath}/annot.mat') camcoords = anno_file[anno_name][i_cam].reshape( [-1, n_all_joints, 3])[:, selected_joints] camera = load_cameras(f'{seqpath}/camera.calibration')[i_cam] world_coords = [camera.camera_to_world(c) for c in camcoords] return world_coords examples = [] with util.BoundedPool(None, 120) as pool: for i_sample, people, detections, visible_boxes in zip( util.progressbar(valid_indices), sample_info[valid_indices], all_detections[valid_indices], all_visible_boxes[valid_indices]): detections = [box for box in detections if box[-1] > 0.1] if not detections: continue filename = f'{i_sample + 1:06d}.jpg' image_relpath = f'unaugmented_set_001/{filename[:2]}/{filename[:4]}/{filename}' gt_people = [] for i_person, ((i_subject, i_seq, i_cam, i_frame), visible_box) in enumerate( zip(people, visible_boxes)): seqpath = f'{root_3dhp}/S{i_subject}/Seq{i_seq}' world_coords = get_world_coords(i_subject, i_seq, i_cam, 'annot3')[i_frame] univ_world_coords = get_world_coords( i_subject, i_seq, i_cam, 'univ_annot3')[i_frame] camera = load_cameras(f'{seqpath}/camera.calibration')[i_cam] im_coords = camera.world_to_image(world_coords) coord_bbox = boxlib.expand(boxlib.intersect( boxlib.bb_of_points(im_coords), boxlib.full_box([2048, 2048])), 1.05) bbox = boxlib.intersect_vertical(visible_box, coord_bbox) ex = p3ds.Pose3DExample( image_relpath, world_coords, bbox, camera, mask=None, univ_coords=univ_world_coords) gt_people.append(ex) if not gt_people: continue iou_matrix = np.array([[boxlib.iou(gt_person.bbox, box[:4]) for box in detections] for gt_person in gt_people]) gt_indices, det_indices = scipy.optimize.linear_sum_assignment(-iou_matrix) for i_gt, i_det in zip(gt_indices, det_indices): gt_box = gt_people[i_gt].bbox det_box = detections[i_det] if (iou_matrix[i_gt, i_det] > 0.1 and boxlib.area(det_box) < 2 * boxlib.area(gt_box)): ex = gt_people[i_gt] ex.bbox = np.array(detections[i_det][:4]) pool.apply_async(make_efficient_example, (ex, root_muco, i_gt), callback=examples.append) examples.sort(key=lambda ex: ex.image_path) return p3ds.Pose3DDataset(joint_info, examples)
def make_mpii_yolo(): joint_info_full = JointInfo( 'rank,rkne,rhip,lhip,lkne,lank,pelv,thor,neck,head,rwri,relb,rsho,lsho,lelb,lwri', 'lsho-lelb-lwri,rsho-relb-rwri,lhip-lkne-lank,rhip-rkne-rank,neck-head,pelv-thor') joint_info_used = JointInfo( 'rank,rkne,rhip,lhip,lkne,lank,rwri,relb,lelb,lwri', 'lelb-lwri,relb-rwri,lhip-lkne-lank,rhip-rkne-rank') selected_joints = [joint_info_full.ids[name] for name in joint_info_used.names] mat_path = f'{paths.DATA_ROOT}/mpii/mpii_human_pose_v1_u12_1.mat' s = matlabfile.load(mat_path).RELEASE annolist = np.atleast_1d(s.annolist) all_boxes = util.load_pickle(f'{paths.DATA_ROOT}/mpii/yolov3_detections.pkl') examples = [] with util.BoundedPool(None, 120) as pool: for anno_id, (anno, is_train) in enumerate( zip(annolist, util.progressbar(s.img_train))): if not is_train: continue image_path = f'{paths.DATA_ROOT}/mpii/images/{anno.image.name}' annorect = np.atleast_1d(anno.annorect) gt_people = [] for rect_id, rect in enumerate(annorect): if 'annopoints' not in rect or len(rect.annopoints) == 0: continue coords = np.full( shape=[joint_info_full.n_joints, 2], fill_value=np.nan, dtype=np.float32) for joint in np.atleast_1d(rect.annopoints.point): coords[joint.id] = [joint.x, joint.y] bbox = boxlib.expand(boxlib.bb_of_points(coords), 1.25) coords = coords[selected_joints] ex = Pose2DExample(image_path, coords, bbox=bbox) gt_people.append(ex) if not gt_people: continue image_relpath = os.path.relpath(f'images/{anno.image.name}') boxes = [box for box in all_boxes[image_relpath] if box[-1] > 0.5] if not boxes: continue iou_matrix = np.array([[boxlib.iou(gt_person.bbox, box[:4]) for box in boxes] for gt_person in gt_people]) gt_indices, box_indices = scipy.optimize.linear_sum_assignment(-iou_matrix) for i_gt, i_det in zip(gt_indices, box_indices): if iou_matrix[i_gt, i_det] > 0.1: ex = gt_people[i_gt] ex.bbox = np.array(boxes[i_det][:4]) new_im_path = image_path.replace('mpii', 'mpii_downscaled_yolo') without_ext, ext = os.path.splitext(new_im_path) new_im_path = f'{without_ext}_{i_gt:02d}{ext}' pool.apply_async(make_efficient_example, (ex, new_im_path), callback=examples.append) examples.sort(key=lambda ex: ex.image_path) def n_valid_joints(example): return np.count_nonzero(np.all(~np.isnan(example.coords), axis=-1)) examples = [ex for ex in examples if n_valid_joints(ex) > 6] return Pose2DDataset(joint_info_used, examples)
def make_coco(single_person=True): joint_info = JointInfo( 'nose,leye,reye,lear,rear,lsho,rsho,lelb,relb,lwri,rwri,lhip,rhip,lkne,rkne,lank,rank', 'lsho-lelb-lwri,rsho-relb-rwri,lhip-lkne-lank,rhip-rkne-rank,lear-leye-nose-reye-rear') n_joints = joint_info.n_joints learning_phase_shortnames = {TRAIN: 'train', VALID: 'val', TEST: 'test'} UNLABELED = 0 OCCLUDED = 1 VISIBLE = 2 iou_threshold = 0.1 if single_person else 0.5 suffix = '' if single_person else '_multi' examples_per_phase = {TRAIN: [], VALID: []} with util.BoundedPool(None, 120) as pool: for example_phase in (TRAIN, VALID): phase_shortname = learning_phase_shortnames[example_phase] coco_filepath = ( f'{paths.DATA_ROOT}/coco/annotations/person_keypoints_{phase_shortname}2014.json') coco = pycocotools.coco.COCO(coco_filepath) impath_to_examples = {} for ann in coco.anns.values(): filename = coco.imgs[ann['image_id']]['file_name'] image_path = f'{paths.DATA_ROOT}/coco/{phase_shortname}2014/{filename}' joints = np.array(ann['keypoints']).reshape([-1, 3]) visibilities = joints[:, 2] coords = joints[:, :2].astype(np.float32).copy() n_visible_joints = np.count_nonzero(visibilities == VISIBLE) n_occluded_joints = np.count_nonzero(visibilities == OCCLUDED) n_labeled_joints = n_occluded_joints + n_visible_joints if n_visible_joints >= n_joints / 3 and n_labeled_joints >= n_joints / 2: coords[visibilities == UNLABELED] = np.nan bbox_pt1 = np.array(ann['bbox'][0:2], np.float32) bbox_wh = np.array(ann['bbox'][2:4], np.float32) bbox = np.array([*bbox_pt1, *bbox_wh]) ex = Pose2DExample(image_path, coords, bbox=bbox) impath_to_examples.setdefault(image_path, []).append(ex) n_images = len(impath_to_examples) for impath, examples in util.progressbar(impath_to_examples.items(), total=n_images): for i_example, example in enumerate(examples): box = boxlib.expand(boxlib.bb_of_points(example.coords), 1.25) if np.max(box[2:]) < 200: continue if single_person: other_boxes = [boxlib.expand(boxlib.bb_of_points(e.coords), 1.25) for e in examples if e is not example] ious = np.array([boxlib.iou(b, box) for b in other_boxes]) usable = np.all(ious < iou_threshold) else: usable = True if usable: new_im_path = impath.replace('coco', 'coco_downscaled' + suffix) without_ext, ext = os.path.splitext(new_im_path) new_im_path = f'{without_ext}_{i_example:02d}{ext}' pool.apply_async( make_efficient_example, (example, new_im_path), callback=examples_per_phase[example_phase].append) examples_per_phase[TRAIN].sort(key=lambda ex: ex.image_path) examples_per_phase[VALID].sort(key=lambda ex: ex.image_path) return Pose2DDataset(joint_info, examples_per_phase[TRAIN], examples_per_phase[VALID])
def make_mupots_yolo(): all_short_names = ( 'thor,spi4,spi2,spin,pelv,neck,head,htop,lcla,lsho,lelb,lwri,lhan,rcla,rsho,relb,rwri,' 'rhan,lhip,lkne,lank,lfoo,ltoe,rhip,rkne,rank,rfoo,rtoe'.split(',')) # originally: [7, 5, 14, 15, 16, 9, 10, 11, 23, 24, 25, 18, 19, 20, 4, 3, 6] selected_joints = [ 7, 5, 14, 15, 16, 9, 10, 11, 23, 24, 25, 18, 19, 20, 3, 6, 4 ] order_joints = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 14] joint_names = [all_short_names[j] for j in selected_joints] j = p3ds.JointInfo.make_id_map(joint_names) edges = [(j.htop, j.head), (j.head, j.neck), (j.neck, j.lsho), (j.lsho, j.lelb), (j.lelb, j.lwri), (j.neck, j.rsho), (j.rsho, j.relb), (j.relb, j.rwri), (j.neck, j.spin), (j.spin, j.pelv), (j.pelv, j.lhip), (j.lhip, j.lkne), (j.lkne, j.lank), (j.pelv, j.rhip), (j.rhip, j.rkne), (j.rkne, j.rank)] joint_info = p3ds.JointInfo(j, edges) root = f'{paths.DATA_ROOT}/mupots' intrinsic_matrices = util.load_json(f'{root}/camera_intrinsics.json') dummy_coords = np.ones((joint_info.n_joints, 3)) detections_all = util.load_pickle(f'{root}/yolov3_detections.pkl') examples_val = [] examples_test = [] for i_seq in range(1, 21): annotations = matlabfile.load( f'{root}/TS{i_seq}/annot.mat')['annotations'] intrinsic_matrix = intrinsic_matrices[f'TS{i_seq}'] camera = cameralib.Camera(np.zeros(3), np.eye(3), intrinsic_matrix, distortion_coeffs=None, world_up=(0, -1, 0)) n_people = annotations.shape[1] n_frames = annotations.shape[0] for i_frame in range(n_frames): image_relpath = f'TS{i_seq}/img_{i_frame:06d}.jpg' detections_frame = detections_all[image_relpath] image_path = f'{root}/{image_relpath}' for detection in detections_frame: if detection[4] > 0.1: ex = p3ds.Pose3DExample(image_path, dummy_coords, detection[:4], camera, mask=None, univ_coords=dummy_coords, scene_name=f'TS{i_seq}') examples_test.append(ex) gt_people = [] for i_person in range(n_people): world_coords = np.array( annotations[i_frame, i_person].annot3.T[order_joints], dtype=np.float32) univ_world_coords = np.array( annotations[i_frame, i_person].univ_annot3.T[order_joints], dtype=np.float32) im_coords = camera.world_to_image(world_coords) gt_box = boxlib.expand(boxlib.bb_of_points(im_coords), 1.1) ex = p3ds.Pose3DExample(image_path, world_coords, gt_box, camera, mask=None, univ_coords=univ_world_coords, scene_name=f'TS{i_seq}') gt_people.append(ex) confident_detections = [ det for det in detections_frame if det[-1] > 0.1 ] if confident_detections: iou_matrix = np.array([[ boxlib.iou(gt_person.bbox, box[:4]) for box in confident_detections ] for gt_person in gt_people]) gt_indices, detection_indices = scipy.optimize.linear_sum_assignment( -iou_matrix) for i_gt, i_det in zip(gt_indices, detection_indices): if iou_matrix[i_gt, i_det] > 0.1: ex = gt_people[i_gt] ex.bbox = np.array(confident_detections[i_det][:4]) examples_val.append(ex) return p3ds.Pose3DDataset(joint_info, valid_examples=examples_val, test_examples=examples_test)