def get_mask(i_subject, i_seq, i_cam, i_frame): chroma_frame = improc.imread_jpeg( f'{paths.DATA_ROOT}/3dhp/S{i_subject}/Seq{i_seq}/FGmasks/img_{i_cam}_{i_frame:06d}.jpg' ) person_box = get_box(i_subject, i_seq, i_cam, i_frame) is_fg = chroma_frame[..., 0] > 100 n_labels, labels, stats, centroids = cv2.connectedComponentsWithStats( is_fg.astype(np.uint8), 4, cv2.CV_32S) component_boxes = stats[:, :4] ious = [ boxlib.iou(component_box, person_box) for component_box in component_boxes ] ious[0] = 0 person_label = np.argmax(ious) mask = (labels == person_label).astype(np.uint8) # Remove foreground pixels that are far from the person box intbox = boxlib.intersect(boxlib.full_box((2048, 2048)), boxlib.expand(person_box, 1.3)).astype(int) mask[:intbox[1]] = 0 mask[:, :intbox[0]] = 0 mask[:, intbox[0] + intbox[2]:] = 0 mask[intbox[1] + intbox[3]:] = 0 encoded_mask = improc.encode_mask(mask) return encoded_mask
def make_efficient_example(ex, further_expansion_factor=1, further_scale_up=1, dir_suffix=''): """Make example by storing the image in a cropped and resized version for efficient loading""" # Determine which area we will need from the image # This is a bit larger than the tight crop because of the geometric augmentations max_rotate = np.pi / 6 padding_factor = 1 / 0.85 scale_up_factor = 1 / 0.85 * further_scale_up scale_down_factor = 1 / 0.85 shift_factor = 1.1 base_dst_side = 256 box_center = boxlib.center(ex.bbox) s, c = np.sin(max_rotate), np.cos(max_rotate) w, h = ex.bbox[2:] rot_bbox_side = max(c * w + s * h, c * h + s * w) rot_bbox = boxlib.box_around(box_center, rot_bbox_side) scale_factor = min(base_dst_side / np.max(ex.bbox[2:]) * scale_up_factor, 1) expansion_factor = (padding_factor * shift_factor * scale_down_factor * further_expansion_factor) expanded_bbox = boxlib.expand(rot_bbox, expansion_factor) expanded_bbox = boxlib.intersect(expanded_bbox, np.array([0, 0, 1000, 1000])) new_camera = copy.deepcopy(ex.camera) new_camera.intrinsic_matrix[:2, 2] -= expanded_bbox[:2] new_camera.scale_output(scale_factor) new_camera.undistort() new_im_relpath = ex.image_path.replace('h36m', f'h36m_downscaled{dir_suffix}') new_im_path = f'{paths.DATA_ROOT}/{new_im_relpath}' if not (util.is_file_newer(new_im_path, "2019-11-14T23:33:14") and improc.is_image_readable(new_im_path)): im = improc.imread_jpeg(ex.image_path) dst_shape = improc.rounded_int_tuple(scale_factor * expanded_bbox[[3, 2]]) new_im = cameralib.reproject_image(im, ex.camera, new_camera, dst_shape) util.ensure_path_exists(new_im_path) imageio.imwrite(new_im_path, new_im) new_bbox_topleft = cameralib.reproject_image_points( ex.bbox[:2], ex.camera, new_camera) new_bbox = np.concatenate([new_bbox_topleft, ex.bbox[2:] * scale_factor]) ex = ps3d.Pose3DExample(new_im_relpath, ex.world_coords, new_bbox, new_camera, activity_name=ex.activity_name) return ex
def make_efficient_example(ex, root_muco, i_person): image_relpath = ex.image_path max_rotate = np.pi / 6 padding_factor = 1 / 0.85 scale_up_factor = 1 / 0.85 scale_down_factor = 1 / 0.85 shift_factor = 1.2 base_dst_side = 256 box_center = boxlib.center(ex.bbox) s = np.sin(max_rotate) c = np.cos(max_rotate) rot_bbox_size = (np.array([[c, s], [s, c]]) @ ex.bbox[2:, np.newaxis])[:, 0] side = np.max(rot_bbox_size) rot_bbox_size = np.array([side, side]) rot_bbox = boxlib.box_around(box_center, rot_bbox_size) scale_factor = min(base_dst_side / np.max(ex.bbox[2:]) * scale_up_factor, 1) expansion_factor = padding_factor * shift_factor * scale_down_factor expanded_bbox = boxlib.expand(rot_bbox, expansion_factor) expanded_bbox = boxlib.intersect(expanded_bbox, boxlib.full_box([2048, 2048])) new_camera = ex.camera.copy() new_camera.intrinsic_matrix[:2, 2] -= expanded_bbox[:2] new_camera.scale_output(scale_factor) new_camera.undistort() dst_shape = improc.rounded_int_tuple(scale_factor * expanded_bbox[[3, 2]]) new_im_path = f'{root_muco}_downscaled/{image_relpath[:-4]}_{i_person:01d}.jpg' if not (util.is_file_newer(new_im_path, "2020-02-15T23:28:26")): im = improc.imread_jpeg(f'{root_muco}/{image_relpath}') new_im = cameralib.reproject_image(im, ex.camera, new_camera, dst_shape, antialias_factor=4) util.ensure_path_exists(new_im_path) imageio.imwrite(new_im_path, new_im, quality=95) new_bbox_topleft = cameralib.reproject_image_points(ex.bbox[:2], ex.camera, new_camera) new_bbox = np.concatenate([new_bbox_topleft, ex.bbox[2:] * scale_factor]) if ex.mask is None: noext, ext = os.path.splitext(image_relpath[:-4]) noext = noext.replace('unaugmented_set_001/', '') mask = improc.decode_mask(util.load_pickle(f'{root_muco}/masks/{noext}.pkl')) else: mask = ex.mask if mask is False: new_mask_encoded = None else: new_mask = cameralib.reproject_image(mask, ex.camera, new_camera, dst_shape) new_mask_encoded = improc.encode_mask(new_mask) return p3ds.Pose3DExample( os.path.relpath(new_im_path, paths.DATA_ROOT), ex.world_coords.astype(np.float32), new_bbox.astype(np.float32), new_camera, mask=new_mask_encoded, univ_coords=ex.univ_coords.astype(np.float32))
def get_bbox(im_coords, image_relpath, detections_all): joint_box = boxlib.expand(boxlib.bb_of_points(im_coords), 1.05) relpath_in_dataset = os.path.join(*util.split_path(image_relpath)[1:]) if relpath_in_dataset in detections_all and detections_all[relpath_in_dataset]: most_confident_detection = max(detections_all[relpath_in_dataset], key=lambda x: x[4]) detection_box = np.array(most_confident_detection[:4]) union_box = boxlib.box_hull(detection_box, joint_box) # Sanity check if boxlib.iou(union_box, joint_box) > 0.5: return union_box return joint_box
def get_expanded_crop_box(bbox, full_box, further_expansion_factor): max_rotate = np.pi / 6 padding_factor = 1 / 0.85 scale_down_factor = 1 / 0.85 shift_factor = 1.1 s, c = np.sin(max_rotate), np.cos(max_rotate) w, h = bbox[2:] box_center = boxlib.center(bbox) rot_bbox_side = max(c * w + s * h, c * h + s * w) rot_bbox = boxlib.box_around(box_center, rot_bbox_side) expansion_factor = (padding_factor * shift_factor * scale_down_factor * further_expansion_factor) expanded_bbox = boxlib.intersect(boxlib.expand(rot_bbox, expansion_factor), full_box) return expanded_bbox
def make_efficient_example(ex, rect_id): """Make example by storing the image in a cropped and resized version for efficient loading""" # Determine which area we will need # For rotation, usual padding around box, scale (shrink) augmentation and shifting padding_factor = 1 / 0.85 scale_up_factor = 1 / 0.85 scale_down_factor = 1 / 0.85 shift_factor = 1.1 max_rotate = np.pi / 6 rot_factor = np.sin(max_rotate) + np.cos(max_rotate) base_dst_side = 256 scale_factor = min(base_dst_side / ex.bbox[3] * scale_up_factor, 1) hopeful_factor = 0.9 expansion_factor = ( rot_factor * padding_factor * shift_factor * scale_down_factor * hopeful_factor) expanded_bbox = boxlib.expand(boxlib.expand_to_square(ex.bbox), expansion_factor) imsize = improc.image_extents(ex.image_path) full_box = np.array([0, 0, imsize[0], imsize[1]]) expanded_bbox = boxlib.intersect(expanded_bbox, full_box) old_camera = cameralib.Camera.create2D() new_camera = old_camera.copy() new_camera.shift_image(-expanded_bbox[:2]) new_camera.scale_output(scale_factor) dst_shape = improc.rounded_int_tuple(scale_factor * expanded_bbox[[3, 2]]) new_im_path = ex.image_path.replace('mpii', f'mpii_downscaled') without_ext, ext = os.path.splitext(new_im_path) new_im_path = f'{without_ext}_{rect_id:02d}{ext}' if not (util.is_file_newer(new_im_path, "2019-11-12T17:54:06") and improc.is_image_readable(new_im_path)): im = improc.imread_jpeg(ex.image_path) new_im = cameralib.reproject_image(im, old_camera, new_camera, dst_shape) util.ensure_path_exists(new_im_path) imageio.imwrite(new_im_path, new_im) new_bbox_topleft = cameralib.reproject_image_points(ex.bbox[:2], old_camera, new_camera) new_bbox = np.concatenate([new_bbox_topleft, ex.bbox[2:] * scale_factor]) new_coords = cameralib.reproject_image_points(ex.coords, old_camera, new_camera) ex = Pose2DExample(os.path.relpath(new_im_path, paths.DATA_ROOT), new_coords, bbox=new_bbox) return ex
def get_crop_boxes(i_frame, camera, tracks, detections): live_tracks = [ track for track in tracks if len(track) > 30 and i_frame - track[-1][0] < 10 ] last_live_poses = np.array([track[-1][1] for track in live_tracks]) shadow_boxes = [ boxlib.expand(boxlib.bb_of_points(camera.camera_to_image(p)), 1.2) for p in last_live_poses ] crop_boxes = list(detections) for shadow_box in shadow_boxes: for box in crop_boxes: if boxlib.iou(box[:4], shadow_box[:4]) > 0.65: break else: crop_boxes.append([*shadow_box, 0]) if len(crop_boxes) == 0: return np.zeros((0, 5), np.float32) return np.array(crop_boxes, np.float32)
def get_examples(phase, pool): result = [] seq_filepaths = glob.glob(f'{root}/sequenceFiles/{phase}/*.pkl') for filepath in seq_filepaths: with open(filepath, 'rb') as f: seq = pickle.load(f, encoding='latin1') seq_name = seq['sequence'] intrinsics = seq['cam_intrinsics'] extrinsics_per_frame = seq['cam_poses'] for i_person, (coord_seq, coords2d_seq, trans_seq, camvalid_seq) in enumerate(zip( seq['jointPositions'], seq['poses2d'], seq['trans'], seq['campose_valid'])): for i_frame, (coords, coords2d, trans, extrinsics, campose_valid) in enumerate( zip(coord_seq, coords2d_seq, trans_seq, extrinsics_per_frame, camvalid_seq)): if not campose_valid or np.all(coords2d == 0): continue impath = f'{root}/imageFiles/{seq_name}/image_{i_frame:05d}.jpg' camera = cameralib.Camera( extrinsic_matrix=extrinsics, intrinsic_matrix=intrinsics, world_up=(0, 1, 0)) camera.t *= 1000 world_coords = (coords.reshape(-1, 3))[selected_joints] * 1000 camera2 = cameralib.Camera(intrinsic_matrix=intrinsics, world_up=(0, -1, 0)) camcoords = camera.world_to_camera(world_coords) imcoords = camera.world_to_image(world_coords) bbox = boxlib.expand(boxlib.bb_of_points(imcoords), 1.15) ex = p3ds.Pose3DExample(impath, camcoords, bbox=bbox, camera=camera2) noext, ext = os.path.splitext(os.path.relpath(impath, root)) new_image_relpath = f'tdpw_downscaled/{noext}_{i_person:03d}.jpg' pool.apply_async( make_efficient_example, (ex, new_image_relpath, 1, False, "2021-07-09T12:28:07"), callback=result.append) return result
def make_muco(): joint_info, selected_joints = make_joint_info() root_3dhp = f'{paths.DATA_ROOT}/3dhp' root_muco = f'{paths.DATA_ROOT}/muco' sample_info = np.load(f'{root_muco}/composite_frame_origins.npy') n_all_joints = 28 valid_indices = list(np.load(f'{root_muco}/valid_composite_frame_indices.npy')) all_detections = util.load_pickle(f'{root_muco}/yolov3_detections.pkl') all_detections = np.array([all_detections[k] for k in sorted(all_detections.keys())]) all_visible_boxes = np.load(f'{root_muco}/visible_boxes.npy') matloader = functools.lru_cache(1024)(matlabfile.load) @functools.lru_cache(1024) def get_world_coords(i_subject, i_seq, i_cam, anno_name): seqpath = f'{root_3dhp}/S{i_subject}/Seq{i_seq}' anno_file = matloader(f'{seqpath}/annot.mat') camcoords = anno_file[anno_name][i_cam].reshape( [-1, n_all_joints, 3])[:, selected_joints] camera = load_cameras(f'{seqpath}/camera.calibration')[i_cam] world_coords = [camera.camera_to_world(c) for c in camcoords] return world_coords examples = [] with util.BoundedPool(None, 120) as pool: for i_sample, people, detections, visible_boxes in zip( util.progressbar(valid_indices), sample_info[valid_indices], all_detections[valid_indices], all_visible_boxes[valid_indices]): detections = [box for box in detections if box[-1] > 0.1] if not detections: continue filename = f'{i_sample + 1:06d}.jpg' image_relpath = f'unaugmented_set_001/{filename[:2]}/{filename[:4]}/{filename}' gt_people = [] for i_person, ((i_subject, i_seq, i_cam, i_frame), visible_box) in enumerate( zip(people, visible_boxes)): seqpath = f'{root_3dhp}/S{i_subject}/Seq{i_seq}' world_coords = get_world_coords(i_subject, i_seq, i_cam, 'annot3')[i_frame] univ_world_coords = get_world_coords( i_subject, i_seq, i_cam, 'univ_annot3')[i_frame] camera = load_cameras(f'{seqpath}/camera.calibration')[i_cam] im_coords = camera.world_to_image(world_coords) coord_bbox = boxlib.expand(boxlib.intersect( boxlib.bb_of_points(im_coords), boxlib.full_box([2048, 2048])), 1.05) bbox = boxlib.intersect_vertical(visible_box, coord_bbox) ex = p3ds.Pose3DExample( image_relpath, world_coords, bbox, camera, mask=None, univ_coords=univ_world_coords) gt_people.append(ex) if not gt_people: continue iou_matrix = np.array([[boxlib.iou(gt_person.bbox, box[:4]) for box in detections] for gt_person in gt_people]) gt_indices, det_indices = scipy.optimize.linear_sum_assignment(-iou_matrix) for i_gt, i_det in zip(gt_indices, det_indices): gt_box = gt_people[i_gt].bbox det_box = detections[i_det] if (iou_matrix[i_gt, i_det] > 0.1 and boxlib.area(det_box) < 2 * boxlib.area(gt_box)): ex = gt_people[i_gt] ex.bbox = np.array(detections[i_det][:4]) pool.apply_async(make_efficient_example, (ex, root_muco, i_gt), callback=examples.append) examples.sort(key=lambda ex: ex.image_path) return p3ds.Pose3DDataset(joint_info, examples)
def load_and_transform3d(ex, joint_info, learning_phase, rng=None): appearance_rng = util.new_rng(rng) background_rng = util.new_rng(rng) geom_rng = util.new_rng(rng) partial_visi_rng = util.new_rng(rng) output_side = FLAGS.proc_side output_imshape = (output_side, output_side) box = ex.bbox if '3dhp' in ex.image_path: box = boxlib.expand(box, 1.05) if FLAGS.partial_visibility: box = util.random_partial_subbox(boxlib.expand_to_square(box), partial_visi_rng) crop_side = np.max(box[2:]) center_point = boxlib.center(box) if learning_phase == TRAIN and FLAGS.geom_aug and FLAGS.shift_aug_by_rot: # TODO: geom_rng in next line not rng center_point += util.random_uniform_disc( rng) * FLAGS.shift_aug / 100 * crop_side if learning_phase != TRAIN and FLAGS.test_aug and FLAGS.geom_aug and FLAGS.shift_aug_by_rot: center_point += util.random_uniform_disc( rng) * FLAGS.shift_aug / 100 * crop_side if box[2] < box[3]: delta_y = np.array([0, box[3] / 2]) sidepoints = np.stack([center_point - delta_y, center_point + delta_y]) else: delta_x = np.array([box[2] / 2, 0]) sidepoints = np.stack([center_point - delta_x, center_point + delta_x]) cam = ex.camera.copy() cam.turn_towards(target_image_point=center_point) cam.undistort() cam.square_pixels() world_sidepoints = ex.camera.image_to_world(sidepoints) cam_sidepoints = cam.world_to_image(world_sidepoints) crop_side = np.linalg.norm(cam_sidepoints[0] - cam_sidepoints[1]) cam.zoom(output_side / crop_side) cam.center_principal_point(output_imshape) if FLAGS.geom_aug and (learning_phase == TRAIN or FLAGS.test_aug): s1 = FLAGS.scale_aug_down / 100 s2 = FLAGS.scale_aug_up / 100 r = FLAGS.rot_aug * np.pi / 180 zoom = geom_rng.uniform(1 - s1, 1 + s2) cam.zoom(zoom) cam.rotate(roll=geom_rng.uniform(-r, r)) if not FLAGS.shift_aug_by_rot: sh = FLAGS.shift_aug / 100 * crop_side cam.intrinsic_matrix[:2, 2] += geom_rng.uniform(-sh, sh, size=2) world_coords = ex.univ_coords if FLAGS.universal_skeleton else ex.world_coords metric_world_coords = ex.world_coords if learning_phase == TRAIN and geom_rng.rand() < 0.5: cam.horizontal_flip() camcoords = cam.world_to_camera(world_coords)[ joint_info.mirror_mapping] metric_world_coords = metric_world_coords[joint_info.mirror_mapping] else: camcoords = cam.world_to_camera(world_coords) imcoords = cam.world_to_image(metric_world_coords) image_path = util.ensure_absolute_path(ex.image_path) origsize_im = improc.imread_jpeg(image_path) if ('3dhp' in ex.image_path and re.match('.+/(TS[1-4]|S[1-8])/', ex.image_path)): origsize_im = improc.adjust_gamma(origsize_im, 0.67, inplace=True) origsize_im = improc.white_balance(origsize_im, 110, 145) im = cameralib.reproject_image(origsize_im, ex.camera, cam, output_imshape) if (FLAGS.background_aug_prob and hasattr(ex, 'mask') and ex.mask is not None and background_rng.rand() < FLAGS.background_aug_prob and (learning_phase == TRAIN or FLAGS.test_aug)): fgmask = improc.decode_mask(ex.mask) fgmask = cameralib.reproject_image(fgmask, ex.camera, cam, output_imshape) im = augmentation.background.augment_background( im, fgmask, background_rng) im = augmentation.appearance.augment_appearance(im, learning_phase, appearance_rng) im = tfu.nhwc_to_std(im) im = improc.normalize01(im) # Joints with NaN coordinates are invalid is_joint_in_fov = ~np.logical_or( np.any(imcoords < 0, axis=-1), np.any(imcoords >= FLAGS.proc_side, axis=-1)) joint_validity_mask = ~np.any(np.isnan(camcoords), axis=-1) rot_to_orig_cam = ex.camera.R @ cam.R.T rot_to_world = cam.R.T inv_intrinsics = np.linalg.inv(cam.intrinsic_matrix) return (ex.image_path, im, np.nan_to_num(camcoords).astype(np.float32), np.nan_to_num(imcoords).astype(np.float32), inv_intrinsics.astype(np.float32), rot_to_orig_cam.astype(np.float32), rot_to_world.astype(np.float32), cam.t.astype(np.float32), joint_validity_mask, np.float32(is_joint_in_fov), ex.activity_name, ex.scene_name)
def make_efficient_example(ex): image_relpath = ex.image_path max_rotate = np.pi / 6 padding_factor = 1 / 0.85 scale_up_factor = 1 / 0.85 scale_down_factor = 1 / 0.85 shift_factor = 1.2 base_dst_side = 256 box_center = boxlib.center(ex.bbox) s, c = np.sin(max_rotate), np.cos(max_rotate) w, h = ex.bbox[2:] rot_bbox_side = max(c * w + s * h, c * h + s * w) rot_bbox = boxlib.box_around(box_center, rot_bbox_side) scale_factor = min(base_dst_side / np.max(ex.bbox[2:]) * scale_up_factor, 1) expansion_factor = padding_factor * shift_factor * scale_down_factor expanded_bbox = boxlib.expand(rot_bbox, expansion_factor) expanded_bbox = boxlib.intersect(expanded_bbox, np.array([0, 0, 2048, 2048])) new_camera = ex.camera.copy() new_camera.intrinsic_matrix[:2, 2] -= expanded_bbox[:2] new_camera.scale_output(scale_factor) new_camera.undistort() dst_shape = improc.rounded_int_tuple(scale_factor * expanded_bbox[[3, 2]]) new_im_relpath = ex.image_path.replace('3dhp', f'3dhp_downscaled') new_im_path = os.path.join(paths.DATA_ROOT, new_im_relpath) if not (util.is_file_newer(new_im_path, "2019-11-14T23:32:07") and improc.is_image_readable(new_im_path)): im = improc.imread_jpeg(f'{paths.DATA_ROOT}/{image_relpath}') new_im = cameralib.reproject_image(im, ex.camera, new_camera, dst_shape) util.ensure_path_exists(new_im_path) imageio.imwrite(new_im_path, new_im) new_bbox_topleft = cameralib.reproject_image_points( ex.bbox[:2], ex.camera, new_camera) new_bbox = np.concatenate([new_bbox_topleft, ex.bbox[2:] * scale_factor]) mask_rle_relpath = new_im_path.replace('Images', 'FGmaskImages').replace( '.jpg', '.pkl') mask_rle_path = os.path.join(paths.DATA_ROOT, mask_rle_relpath) if util.is_file_newer(mask_rle_path, "2020-03-11T20:46:46"): mask_runlength = util.load_pickle(mask_rle_path) else: mask_relpath = ex.image_path.replace('Images', 'FGmaskImages').replace( '.jpg', '.png') mask = imageio.imread(os.path.join(paths.DATA_ROOT, mask_relpath)) mask_reproj = cameralib.reproject_image(mask, ex.camera, new_camera, dst_shape) mask_runlength = get_mask_with_highest_iou(mask_reproj, new_bbox) util.dump_pickle(mask_runlength, mask_rle_path) return p3ds.Pose3DExample(new_im_relpath, ex.world_coords, new_bbox, new_camera, mask=mask_runlength, univ_coords=ex.univ_coords)
def make_mpii_yolo(): joint_info_full = JointInfo( 'rank,rkne,rhip,lhip,lkne,lank,pelv,thor,neck,head,rwri,relb,rsho,lsho,lelb,lwri', 'lsho-lelb-lwri,rsho-relb-rwri,lhip-lkne-lank,rhip-rkne-rank,neck-head,pelv-thor') joint_info_used = JointInfo( 'rank,rkne,rhip,lhip,lkne,lank,rwri,relb,lelb,lwri', 'lelb-lwri,relb-rwri,lhip-lkne-lank,rhip-rkne-rank') selected_joints = [joint_info_full.ids[name] for name in joint_info_used.names] mat_path = f'{paths.DATA_ROOT}/mpii/mpii_human_pose_v1_u12_1.mat' s = matlabfile.load(mat_path).RELEASE annolist = np.atleast_1d(s.annolist) all_boxes = util.load_pickle(f'{paths.DATA_ROOT}/mpii/yolov3_detections.pkl') examples = [] with util.BoundedPool(None, 120) as pool: for anno_id, (anno, is_train) in enumerate( zip(annolist, util.progressbar(s.img_train))): if not is_train: continue image_path = f'{paths.DATA_ROOT}/mpii/images/{anno.image.name}' annorect = np.atleast_1d(anno.annorect) gt_people = [] for rect_id, rect in enumerate(annorect): if 'annopoints' not in rect or len(rect.annopoints) == 0: continue coords = np.full( shape=[joint_info_full.n_joints, 2], fill_value=np.nan, dtype=np.float32) for joint in np.atleast_1d(rect.annopoints.point): coords[joint.id] = [joint.x, joint.y] bbox = boxlib.expand(boxlib.bb_of_points(coords), 1.25) coords = coords[selected_joints] ex = Pose2DExample(image_path, coords, bbox=bbox) gt_people.append(ex) if not gt_people: continue image_relpath = os.path.relpath(f'images/{anno.image.name}') boxes = [box for box in all_boxes[image_relpath] if box[-1] > 0.5] if not boxes: continue iou_matrix = np.array([[boxlib.iou(gt_person.bbox, box[:4]) for box in boxes] for gt_person in gt_people]) gt_indices, box_indices = scipy.optimize.linear_sum_assignment(-iou_matrix) for i_gt, i_det in zip(gt_indices, box_indices): if iou_matrix[i_gt, i_det] > 0.1: ex = gt_people[i_gt] ex.bbox = np.array(boxes[i_det][:4]) new_im_path = image_path.replace('mpii', 'mpii_downscaled_yolo') without_ext, ext = os.path.splitext(new_im_path) new_im_path = f'{without_ext}_{i_gt:02d}{ext}' pool.apply_async(make_efficient_example, (ex, new_im_path), callback=examples.append) examples.sort(key=lambda ex: ex.image_path) def n_valid_joints(example): return np.count_nonzero(np.all(~np.isnan(example.coords), axis=-1)) examples = [ex for ex in examples if n_valid_joints(ex) > 6] return Pose2DDataset(joint_info_used, examples)
def make_coco(single_person=True): joint_info = JointInfo( 'nose,leye,reye,lear,rear,lsho,rsho,lelb,relb,lwri,rwri,lhip,rhip,lkne,rkne,lank,rank', 'lsho-lelb-lwri,rsho-relb-rwri,lhip-lkne-lank,rhip-rkne-rank,lear-leye-nose-reye-rear') n_joints = joint_info.n_joints learning_phase_shortnames = {TRAIN: 'train', VALID: 'val', TEST: 'test'} UNLABELED = 0 OCCLUDED = 1 VISIBLE = 2 iou_threshold = 0.1 if single_person else 0.5 suffix = '' if single_person else '_multi' examples_per_phase = {TRAIN: [], VALID: []} with util.BoundedPool(None, 120) as pool: for example_phase in (TRAIN, VALID): phase_shortname = learning_phase_shortnames[example_phase] coco_filepath = ( f'{paths.DATA_ROOT}/coco/annotations/person_keypoints_{phase_shortname}2014.json') coco = pycocotools.coco.COCO(coco_filepath) impath_to_examples = {} for ann in coco.anns.values(): filename = coco.imgs[ann['image_id']]['file_name'] image_path = f'{paths.DATA_ROOT}/coco/{phase_shortname}2014/{filename}' joints = np.array(ann['keypoints']).reshape([-1, 3]) visibilities = joints[:, 2] coords = joints[:, :2].astype(np.float32).copy() n_visible_joints = np.count_nonzero(visibilities == VISIBLE) n_occluded_joints = np.count_nonzero(visibilities == OCCLUDED) n_labeled_joints = n_occluded_joints + n_visible_joints if n_visible_joints >= n_joints / 3 and n_labeled_joints >= n_joints / 2: coords[visibilities == UNLABELED] = np.nan bbox_pt1 = np.array(ann['bbox'][0:2], np.float32) bbox_wh = np.array(ann['bbox'][2:4], np.float32) bbox = np.array([*bbox_pt1, *bbox_wh]) ex = Pose2DExample(image_path, coords, bbox=bbox) impath_to_examples.setdefault(image_path, []).append(ex) n_images = len(impath_to_examples) for impath, examples in util.progressbar(impath_to_examples.items(), total=n_images): for i_example, example in enumerate(examples): box = boxlib.expand(boxlib.bb_of_points(example.coords), 1.25) if np.max(box[2:]) < 200: continue if single_person: other_boxes = [boxlib.expand(boxlib.bb_of_points(e.coords), 1.25) for e in examples if e is not example] ious = np.array([boxlib.iou(b, box) for b in other_boxes]) usable = np.all(ious < iou_threshold) else: usable = True if usable: new_im_path = impath.replace('coco', 'coco_downscaled' + suffix) without_ext, ext = os.path.splitext(new_im_path) new_im_path = f'{without_ext}_{i_example:02d}{ext}' pool.apply_async( make_efficient_example, (example, new_im_path), callback=examples_per_phase[example_phase].append) examples_per_phase[TRAIN].sort(key=lambda ex: ex.image_path) examples_per_phase[VALID].sort(key=lambda ex: ex.image_path) return Pose2DDataset(joint_info, examples_per_phase[TRAIN], examples_per_phase[VALID])
def get_box(i_subject, i_seq, i_cam, i_frame): imcoords = get_coords(i_subject, i_seq, i_cam, 'annot3')[2][i_frame] box = boxlib.expand(boxlib.bb_of_points(imcoords), 1.05) return boxlib.intersect(boxlib.full_box((2048, 2048)), box).astype(np.float32)
def make_mupots_yolo(): all_short_names = ( 'thor,spi4,spi2,spin,pelv,neck,head,htop,lcla,lsho,lelb,lwri,lhan,rcla,rsho,relb,rwri,' 'rhan,lhip,lkne,lank,lfoo,ltoe,rhip,rkne,rank,rfoo,rtoe'.split(',')) # originally: [7, 5, 14, 15, 16, 9, 10, 11, 23, 24, 25, 18, 19, 20, 4, 3, 6] selected_joints = [ 7, 5, 14, 15, 16, 9, 10, 11, 23, 24, 25, 18, 19, 20, 3, 6, 4 ] order_joints = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 14] joint_names = [all_short_names[j] for j in selected_joints] j = p3ds.JointInfo.make_id_map(joint_names) edges = [(j.htop, j.head), (j.head, j.neck), (j.neck, j.lsho), (j.lsho, j.lelb), (j.lelb, j.lwri), (j.neck, j.rsho), (j.rsho, j.relb), (j.relb, j.rwri), (j.neck, j.spin), (j.spin, j.pelv), (j.pelv, j.lhip), (j.lhip, j.lkne), (j.lkne, j.lank), (j.pelv, j.rhip), (j.rhip, j.rkne), (j.rkne, j.rank)] joint_info = p3ds.JointInfo(j, edges) root = f'{paths.DATA_ROOT}/mupots' intrinsic_matrices = util.load_json(f'{root}/camera_intrinsics.json') dummy_coords = np.ones((joint_info.n_joints, 3)) detections_all = util.load_pickle(f'{root}/yolov3_detections.pkl') examples_val = [] examples_test = [] for i_seq in range(1, 21): annotations = matlabfile.load( f'{root}/TS{i_seq}/annot.mat')['annotations'] intrinsic_matrix = intrinsic_matrices[f'TS{i_seq}'] camera = cameralib.Camera(np.zeros(3), np.eye(3), intrinsic_matrix, distortion_coeffs=None, world_up=(0, -1, 0)) n_people = annotations.shape[1] n_frames = annotations.shape[0] for i_frame in range(n_frames): image_relpath = f'TS{i_seq}/img_{i_frame:06d}.jpg' detections_frame = detections_all[image_relpath] image_path = f'{root}/{image_relpath}' for detection in detections_frame: if detection[4] > 0.1: ex = p3ds.Pose3DExample(image_path, dummy_coords, detection[:4], camera, mask=None, univ_coords=dummy_coords, scene_name=f'TS{i_seq}') examples_test.append(ex) gt_people = [] for i_person in range(n_people): world_coords = np.array( annotations[i_frame, i_person].annot3.T[order_joints], dtype=np.float32) univ_world_coords = np.array( annotations[i_frame, i_person].univ_annot3.T[order_joints], dtype=np.float32) im_coords = camera.world_to_image(world_coords) gt_box = boxlib.expand(boxlib.bb_of_points(im_coords), 1.1) ex = p3ds.Pose3DExample(image_path, world_coords, gt_box, camera, mask=None, univ_coords=univ_world_coords, scene_name=f'TS{i_seq}') gt_people.append(ex) confident_detections = [ det for det in detections_frame if det[-1] > 0.1 ] if confident_detections: iou_matrix = np.array([[ boxlib.iou(gt_person.bbox, box[:4]) for box in confident_detections ] for gt_person in gt_people]) gt_indices, detection_indices = scipy.optimize.linear_sum_assignment( -iou_matrix) for i_gt, i_det in zip(gt_indices, detection_indices): if iou_matrix[i_gt, i_det] > 0.1: ex = gt_people[i_gt] ex.bbox = np.array(confident_detections[i_det][:4]) examples_val.append(ex) return p3ds.Pose3DDataset(joint_info, valid_examples=examples_val, test_examples=examples_test)