def test_round_trip(self): # first, write some garbage in adapter = RoundTripAdapter() output_directory = os.path.join(self.temp_dir, "ANY_OUTPUT_DIR") ingestor = GulpIngestor(adapter, output_directory, 2, 1) ingestor() # then, read it and make sure the garbage came back out gulp_directory = GulpDirectory(output_directory) gulp_chunk = next(gulp_directory.chunks()) expected_output_shapes = [[(4, 1, 3), (3, 1, 3), (2, 1, 3), (1, 1, 3)] ] expected_meta = [{'name': 'bunch of numpy arrays'}] for i, (frames, meta) in enumerate(gulp_chunk): self.assertEqual(expected_meta[i], meta) self.assertEqual(expected_output_shapes[i], [np.array(f).shape for f in frames]) # check that random_access works expected_frames = [ np.ones((4, 1, 3), dtype='uint8'), np.ones((3, 1, 3), dtype='uint8'), np.ones((2, 1, 3), dtype='uint8'), np.ones((1, 1, 3), dtype='uint8'), ] received_frames, received_meta = gulp_directory[1] for ef, rf in zip(expected_frames, received_frames): npt.assert_array_equal(ef, np.array(rf)) self.assertEqual(expected_meta[0], received_meta) # now append/extend the gulps GulpIngestor(RoundTripAdapter(ids=[3, 4, 5]), output_directory, 2, 1)() # then, read it again, using __iter__ gulp_directory = GulpDirectory(output_directory) gulp_chunk = next(iter(gulp_directory)) expected_output_shapes = [(4, 1, 3), (3, 1, 3), (2, 1, 3), (1, 1, 3)] expected_meta = {'name': 'bunch of numpy arrays'} for frames, meta in gulp_chunk: self.assertEqual(expected_meta, meta) self.assertEqual(expected_output_shapes, [np.array(f).shape for f in frames])
def main(): adapter = CrossTaskGulpIO(args.dataset_path, args.annotations_path) ingestor = GulpIngestor(adapter, args.output_folder, args.videos_per_gulp, args.num_workers) ingestor() return 0
def setUp(self, mock_adapter): super().setUp() self.adapter = mock_adapter self.output_folder = os.path.join(self.temp_dir, 'ANY_OUTPUT_FOLDER') self.videos_per_chunk = 1 self.num_workers = 1 self.gulp_ingestor = GulpIngestor(self.adapter, self.output_folder, self.videos_per_chunk, self.num_workers)
def test_one_out_of_one_duplicate(self): adapter = DummyVideosAdapter(3) output_directory = os.path.join(self.temp_dir, "ANY_OUTPUT_DIR") ingestor = GulpIngestor(adapter, output_directory, 2, 1) ingestor() meta_dict = [{ 'meta': { 'name': 'new_video' }, 'frames': [np.ones((4, 1, 3), dtype='uint8')], 'id': 1 }] with self.assertRaises(DuplicateIdException): remove_entries_with_duplicate_ids(output_directory, meta_dict)
def test_init(self): adapter = RoundTripAdapter() output_directory = os.path.join(self.temp_dir, "ANY_OUTPUT_DIR") ingestor = GulpIngestor(adapter, output_directory, 2, 1) ingestor() gulp_directory = GulpDirectory(output_directory) self.assertEqual(gulp_directory.output_dir, output_directory) expected_all_meta_dicts = [ OrderedDict([('1', OrderedDict([('frame_info', [[0, 1, 632], [632, 1, 632], [1264, 1, 632], [1896, 1, 632]]), ('meta_data', [OrderedDict( [('name', 'bunch of numpy arrays')])])]))]), OrderedDict([('2', OrderedDict([('frame_info', [[0, 1, 632], [632, 1, 632]]), ('meta_data', [OrderedDict( [('name', 'shorter_video')])])]))])] self.assertEqual(gulp_directory.all_meta_dicts, expected_all_meta_dicts) self.assertEqual(gulp_directory.chunk_lookup, {'1': 0, '2': 1}) expected_merged_meta_dict = { '1': OrderedDict([('frame_info', [[0, 1, 632], [632, 1, 632], [1264, 1, 632], [1896, 1, 632]]), ('meta_data', [OrderedDict( [('name', 'bunch of numpy arrays')])])]), '2': OrderedDict([('frame_info', [[0, 1, 632], [632, 1, 632]]), ('meta_data', [OrderedDict([('name', 'shorter_video')])])])} self.assertEqual(gulp_directory.merged_meta_dict, expected_merged_meta_dict)
def test_no_duplicates(self): adapter = DummyVideosAdapter(3) output_directory = os.path.join(self.temp_dir, "ANY_OUTPUT_DIR") ingestor = GulpIngestor(adapter, output_directory, 2, 1) ingestor() meta_dict = [{ 'meta': { 'name': 'new_video' }, 'frames': [np.ones((4, 1, 3), dtype='uint8')], 'id': 3 }] new_meta = remove_entries_with_duplicate_ids(output_directory, meta_dict) self.assertEqual(meta_dict, new_meta)
def test_random_access(self): # ingest dummy videos adapter = DummyVideosAdapter(num_videos=25) output_directory = os.path.join(self.temp_dir, "ANY_OUTPUT_DIR") ingestor = GulpIngestor(adapter, output_directory, 2, 1) ingestor() # create gulp directory gulp_directory = GulpDirectory(output_directory) # check all videos can be accessed for id_ in adapter.ids: with self.subTest(id_=id_): # check img id is in the lookup table self.assertTrue(id_ in gulp_directory.chunk_lookup) # check the img can be accessed img, meta = gulp_directory[id_] # check the meta id match self.assertEqual(meta['id'], id_)
print('Gulping from [{}] split'.format(split)) split_tsv = os.path.join(splits_dir, split + '.txt') output_dir_split = output_dir + '/{}'.format(split) shuffle = False if split != 'train' else shuffle adapter = GulpTGIFAdapter(data_gulp, split_tsv, videos_dir, output_dir_split, shuffle=shuffle, frame_size=frame_size, frame_rate=frame_rate, shm_dir_path=shm_dir, label_name=label_name, remove_duplicate_ids=remove_duplicates) ingestor = GulpIngestor(adapter, output_dir_split, videos_per_chunk, num_workers) ingestor() # Update TSV with num_frames from gulped data uid2nfrms = dict() for split in splits: uid2nfrms_split = retrieve_nfrms_from_gulp( os.path.join(output_dir, split)) uid2nfrms.update(uid2nfrms_split) with open(data_file, 'w') as outfile: for item in data: outfile.write('{}\t{}\t{}\n'.format(item[0], item[1], \ uid2nfrms[get_uid(item[0])] if get_uid(item[0]) in uid2nfrms else -1))
if viz: dataset = ImagePairVisDataset(args.root_folder) loader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0, drop_last=False) print("data size:", len(dataset)) for img_full, img_crop in loader: cv2.imshow("Image Full", img_full[0]) cv2.imshow("Image Crop", img_crop[0]) key = cv2.waitKey() if key == 27: break else: for subset in ['train', 'val']: check_existing_dataset(config[args.name][subset]['gulpio_dir']) adapter = ImagePairListAdapter( FlickrPro(config[args.name][subset]['root_dir'], config[args.name][subset]['meta'], config[args.name][subset]['download']), # args.shuffle ) ingestor = GulpIngestor(adapter, config[args.name][subset]['gulpio_dir'], images_per_chunk, num_workers) ingestor() # call to trigger ingestion
def iter_data(self, slice_element=None): s = slice_element or slice(0, len(self)) indices = range(s.start, s.stop, s.step if s.step is not None else 1) for idx in indices: sample = self.metadata.iloc[idx].to_dict() id = sample['SAMPLE_KEY'] frames = self.load_img(id) result = {'id': id, 'frames': frames, 'meta': sample} yield result def load_img(self, key): img = np.load(os.path.join(self.datadir, key + ".npz")) img = img["sample"] # Shape 520 x 696 x 5 img = [img[:, :, idx] for idx in range(5)] return img if __name__ == "__main__": adapter = MolChemAdapter(datadir='../data/images/', metafile=[ '../data/metadata/datasplit1-train.csv', '../data/metadata/datasplit1-val.csv', '../data/metadata/datasplit1-test.csv' ]) ingestor = GulpIngestor(adapter, output_folder='../data/gulpio/', videos_per_chunk=1000, num_workers=16) ingestor()