def get_coco_dataset(insize, image_root, annotations, min_num_keypoints=1, use_cache=False, do_augmentation=False): cat_id = 1 dataset_type = 'coco' dataset = json.load(open(annotations, 'r')) cat = dataset['categories'][cat_id - 1] assert cat['keypoints'] == DEFAULT_KEYPOINT_NAMES # image_id => filename, keypoints, bbox, is_visible, is_labeled images = {} for image in dataset['images']: images[image['id']] = image['file_name'], [], [], [], [] for anno in dataset['annotations']: if anno['num_keypoints'] < min_num_keypoints: continue if anno['category_id'] != cat_id: continue if anno['iscrowd'] != 0: continue image_id = anno['image_id'] d = np.array(anno['keypoints'], dtype='float32').reshape(-1, 3) # define neck from left_shoulder and right_shoulder left_shoulder_idx = DEFAULT_KEYPOINT_NAMES.index('left_shoulder') right_shoulder_idx = DEFAULT_KEYPOINT_NAMES.index('right_shoulder') left_shoulder, left_v = d[left_shoulder_idx][:2], d[left_shoulder_idx][ 2] right_shoulder, right_v = d[right_shoulder_idx][:2], d[ right_shoulder_idx][2] if left_v >= 1 and right_v >= 1: neck = (left_shoulder + right_shoulder) / 2. labeled = 1 d = np.vstack([np.array([*neck, labeled]), d]) else: labeled = 0 # insert dummy data correspond to `neck` d = np.vstack([np.array([0.0, 0.0, labeled]), d]) keypoints = d[:, [1, 0]] # array of y,x bbox = anno['bbox'] is_visible = d[:, 2] == 2 is_labeled = d[:, 2] >= 1 entry = images[image_id] entry[1].append(keypoints) entry[2].append(bbox) entry[3].append(is_visible) entry[4].append(is_labeled) # filter-out non annotated images image_paths = [] keypoints = [] bbox = [] is_visible = [] is_labeled = [] for filename, k, b, v, l in images.values(): if len(k) == 0: continue image_paths.append(filename) bbox.append(b) keypoints.append(k) is_visible.append(v) is_labeled.append(l) return KeypointDataset2D(dataset_type=dataset_type, insize=insize, keypoint_names=KEYPOINT_NAMES, edges=np.array(EDGES), flip_indices=FLIP_INDICES, keypoints=keypoints, bbox=bbox, is_visible=is_visible, is_labeled=is_labeled, image_paths=image_paths, image_root=image_root, use_cache=use_cache, do_augmentation=do_augmentation)
def get_mpii_dataset(insize, image_root, annotations, train_size=0.5, min_num_keypoints=1, use_cache=False, seed=0): dataset_type = 'mpii' annotations = json.load(open(annotations, 'r')) # filename => keypoints, bbox, is_visible, is_labeled images = {} # DEBUGGING OVERFITTING # create a list containing the actual iamges in folder -> for debugging from os import listdir files = listdir(image_root) for filename in np.unique([anno['filename'] for anno in annotations]): if filename in files: images[filename] = [], [], [], [], [], [ ] # include scale and position for anno in annotations: if anno['filename'] in files: is_visible = [anno['is_visible'][k] for k in KEYPOINT_NAMES[1:]] if sum(is_visible) < min_num_keypoints: continue keypoints = [ anno['joint_pos'][k][::-1] for k in KEYPOINT_NAMES[1:] ] x1, y1, x2, y2 = anno['head_rect'] entry = images[anno['filename']] entry[0].append(np.array(keypoints)) # array of y,x entry[1].append(np.array([x1, y1, x2 - x1, y2 - y1])) # x, y, w, h entry[2].append(np.array(is_visible, dtype=np.bool)) is_labeled = np.ones(len(is_visible), dtype=np.bool) entry[3].append(is_labeled) entry[4].append(np.array(anno['scale'])) entry[5].append(np.array(anno['position'])) # THIS IS THE OFFICIAL CODE WHEN ALL IMAGES OR USED # for filename in np.unique([anno['filename'] for anno in annotations]): # images[filename] = [], [], [], [], [], [] # include scale and position # # for anno in annotations: # is_visible = [anno['is_visible'][k] for k in KEYPOINT_NAMES[1:]] # if sum(is_visible) < min_num_keypoints: # continue # keypoints = [anno['joint_pos'][k][::-1] for k in KEYPOINT_NAMES[1:]] # # x1, y1, x2, y2 = anno['head_rect'] # # entry = images[anno['filename']] # entry[0].append(np.array(keypoints)) # array of y,x # entry[1].append(np.array([x1, y1, x2 - x1, y2 - y1])) # x, y, w, h # entry[2].append(np.array(is_visible, dtype=np.bool)) # is_labeled = np.ones(len(is_visible), dtype=np.bool) # entry[3].append(is_labeled) # entry[4].append(np.array(anno['scale'])) # entry[5].append(np.array(anno['position'])) # split dataset train_images, test_images = split_dataset_random( list(images.keys()), int(len(images) * train_size), seed=seed) train_set = KeypointDataset2D( dataset_type=dataset_type, insize=insize, keypoint_names=KEYPOINT_NAMES, edges=np.array(EDGES), flip_indices=FLIP_INDICES, keypoints=[images[i][0] for i in train_images], bbox=[images[i][1] for i in train_images], is_visible=[images[i][2] for i in train_images], is_labeled=[images[i][3] for i in train_images], scale=[images[i][4] for i in train_images], position=[images[i][5] for i in train_images], image_paths=train_images, image_root=image_root, use_cache=use_cache, do_augmentation=False # TODO must be True ) test_set = KeypointDataset2D( dataset_type=dataset_type, insize=insize, keypoint_names=KEYPOINT_NAMES, edges=np.array(EDGES), flip_indices=FLIP_INDICES, keypoints=[images[i][0] for i in test_images], bbox=[images[i][1] for i in test_images], is_visible=[images[i][2] for i in test_images], is_labeled=[images[i][3] for i in test_images], scale=[images[i][4] for i in test_images], position=[images[i][5] for i in test_images], image_paths=test_images, image_root=image_root, use_cache=use_cache, do_augmentation=False) return train_set, test_set
def get_mpii_dataset(insize, image_root, annotations, train_size=0.5, min_num_keypoints=1, use_cache=False, seed=0): dataset_type = 'mpii' annotations = json.load(open(annotations, 'r')) # filename => keypoints, bbox, is_visible, is_labeled images = {} for filename in np.unique([anno['filename'] for anno in annotations]): images[filename] = [], [], [], [] for anno in annotations: is_visible = [anno['is_visible'][k] for k in KEYPOINT_NAMES[1:]] if sum(is_visible) < min_num_keypoints: continue keypoints = [anno['joint_pos'][k][::-1] for k in KEYPOINT_NAMES[1:]] x1, y1, x2, y2 = anno['head_rect'] entry = images[anno['filename']] entry[0].append(np.array(keypoints)) # array of y,x entry[1].append(np.array([x1, y1, x2 - x1, y2 - y1])) # x, y, w, h entry[2].append(np.array(is_visible, dtype=np.bool)) entry[3].append(np.ones(len(is_visible), dtype=np.bool)) # split dataset train_images, test_images = split_dataset_random( list(images.keys()), int(len(images) * train_size), seed=seed) train_set = KeypointDataset2D( dataset_type=dataset_type, insize=insize, keypoint_names=KEYPOINT_NAMES, edges=np.array(EDGES), flip_indices=FLIP_INDICES, keypoints=[images[i][0] for i in train_images], bbox=[images[i][1] for i in train_images], is_visible=[images[i][2] for i in train_images], is_labeled=[images[i][3] for i in train_images], image_paths=train_images, image_root=image_root, use_cache=use_cache, do_augmentation=True) test_set = KeypointDataset2D( dataset_type=dataset_type, insize=insize, keypoint_names=KEYPOINT_NAMES, edges=np.array(EDGES), flip_indices=FLIP_INDICES, keypoints=[images[i][0] for i in test_images], bbox=[images[i][1] for i in test_images], is_visible=[images[i][2] for i in test_images], is_labeled=[images[i][3] for i in test_images], image_paths=test_images, image_root=image_root, use_cache=use_cache, do_augmentation=False) return train_set, test_set
def get_coco_dataset(insize, image_root, annotations, min_num_keypoints=1, use_cache=False, do_augmentation=False): cat_id = 1 # just persons dataset_type = 'coco' dataset = json.load(open(annotations, 'r')) xy_offset = json.load(open(os.path.join(image_root, 'xy_offset'))) cat = dataset['categories'][cat_id - 1] # get just persons assert cat['keypoints'] == DEFAULT_KEYPOINT_NAMES # image_id => filename, keypoints, bbox, is_visible, is_labeled images = {} annotations_dict = {} for image in dataset['images']: images[image['id']] = image['file_name'], [], [], [], [], [] for anno in dataset['annotations']: if anno['num_keypoints'] < min_num_keypoints: continue if anno['category_id'] != cat_id: continue #if anno['iscrowd'] != 0: # iscrowd 0 means single object # continue if anno['bbox'][3] <= 150: continue annotations_dict[anno['id']] = anno['image_id'], [], [], [], [], [] x_offset, y_offset = xy_offset.get(str(anno['id']), (0, 0)) image_id = anno['image_id'] d = np.array(anno['keypoints'], dtype='float32').reshape(-1, 3) # define neck from left_shoulder and right_shoulder left_shoulder_idx = DEFAULT_KEYPOINT_NAMES.index('left_shoulder') right_shoulder_idx = DEFAULT_KEYPOINT_NAMES.index('right_shoulder') left_shoulder, left_v = d[left_shoulder_idx][:2], d[left_shoulder_idx][2] right_shoulder, right_v = d[right_shoulder_idx][:2], d[right_shoulder_idx][2] if left_v >= 1 and right_v >= 1: neck = (left_shoulder + right_shoulder) / 2. labeled = 1 d = np.vstack([np.array([*neck, labeled]), d]) else: labeled = 0 # insert dummy data correspond to `neck` d = np.vstack([np.array([0.0, 0.0, labeled]), d]) keypoints = d[:, [1, 0]] # array of y,x corr_scalar = np.array([y_offset, x_offset]) corr_array = np.multiply(np.ones_like(keypoints), corr_scalar) keypoints_corr = keypoints - corr_array bbox = anno['bbox'] bbox[0] -= x_offset bbox[1] -= y_offset is_visible = d[:, 2] == 2 is_labeled = d[:, 2] >= 1 # entry = images[image_id] entry = annotations_dict[anno['id']] entry[1].append(np.asarray(keypoints_corr)) entry[2].append(np.asarray(bbox)) entry[3].append(np.asarray(is_visible).astype(np.bool)) entry[4].append(np.asarray(is_labeled).astype(np.bool)) # fifth entry is modified file name # file_name = entry[0].split('.') file_name = images[image_id][0].split('.') entry[5].append('{}_annid{}.{}'.format(file_name[0], anno['id'], file_name[1])) # modify to get annotation ID # filter-out non annotated images image_paths = [] keypoints = [] bbox = [] is_visible = [] is_labeled = [] # for filename, k, b, v, l, new_filename in images.values(): for annotationID, k, b, v, l, new_filename in annotations_dict.values(): if len(k) == 0: continue # image_paths.append(filename) image_paths.append(new_filename[0]) bbox.append(b) keypoints.append(k) is_visible.append(v) is_labeled.append(l) return KeypointDataset2D( dataset_type=dataset_type, insize=insize, keypoint_names=KEYPOINT_NAMES, edges=np.array(EDGES), flip_indices=FLIP_INDICES, keypoints=keypoints, bbox=bbox, is_visible=is_visible, is_labeled=is_labeled, image_paths=image_paths, image_root=image_root, use_cache=use_cache, do_augmentation=do_augmentation )