Exemple #1
0
    def test_round_trip(self):
        # first, write some garbage in
        adapter = RoundTripAdapter()
        output_directory = os.path.join(self.temp_dir, "ANY_OUTPUT_DIR")
        ingestor = GulpIngestor(adapter, output_directory, 2, 1)
        ingestor()

        # then, read it and make sure the garbage came back out
        gulp_directory = GulpDirectory(output_directory)
        gulp_chunk = next(gulp_directory.chunks())
        expected_output_shapes = [[(4, 1, 3),
                                   (3, 1, 3),
                                   (2, 1, 3),
                                   (1, 1, 3)]
                                  ]
        expected_meta = [{'name': 'bunch of numpy arrays'}]
        for i, (frames, meta) in enumerate(gulp_chunk):
            self.assertEqual(expected_meta[i], meta)
            self.assertEqual(expected_output_shapes[i],
                             [np.array(f).shape for f in frames])

        # check that random_access works
        expected_frames = [
            np.ones((4, 1, 3), dtype='uint8'),
            np.ones((3, 1, 3), dtype='uint8'),
            np.ones((2, 1, 3), dtype='uint8'),
            np.ones((1, 1, 3), dtype='uint8'),
        ]
        received_frames, received_meta = gulp_directory[1]
        for ef, rf in zip(expected_frames, received_frames):
            npt.assert_array_equal(ef, np.array(rf))
        self.assertEqual(expected_meta[0], received_meta)

        # now append/extend the gulps
        GulpIngestor(RoundTripAdapter(ids=[3, 4, 5]),
                     output_directory, 2, 1)()

        # then, read it again, using __iter__
        gulp_directory = GulpDirectory(output_directory)
        gulp_chunk = next(iter(gulp_directory))
        expected_output_shapes = [(4, 1, 3),
                                  (3, 1, 3),
                                  (2, 1, 3),
                                  (1, 1, 3)]
        expected_meta = {'name': 'bunch of numpy arrays'}

        for frames, meta in gulp_chunk:
            self.assertEqual(expected_meta, meta)
            self.assertEqual(expected_output_shapes,
                             [np.array(f).shape for f in frames])
Exemple #2
0
def main():
    adapter = CrossTaskGulpIO(args.dataset_path, args.annotations_path)

    ingestor = GulpIngestor(adapter, args.output_folder, args.videos_per_gulp,
                            args.num_workers)

    ingestor()

    return 0
Exemple #3
0
 def setUp(self, mock_adapter):
     super().setUp()
     self.adapter = mock_adapter
     self.output_folder = os.path.join(self.temp_dir, 'ANY_OUTPUT_FOLDER')
     self.videos_per_chunk = 1
     self.num_workers = 1
     self.gulp_ingestor = GulpIngestor(self.adapter, self.output_folder,
                                       self.videos_per_chunk,
                                       self.num_workers)
Exemple #4
0
 def test_one_out_of_one_duplicate(self):
     adapter = DummyVideosAdapter(3)
     output_directory = os.path.join(self.temp_dir, "ANY_OUTPUT_DIR")
     ingestor = GulpIngestor(adapter, output_directory, 2, 1)
     ingestor()
     meta_dict = [{
         'meta': {
             'name': 'new_video'
         },
         'frames': [np.ones((4, 1, 3), dtype='uint8')],
         'id': 1
     }]
     with self.assertRaises(DuplicateIdException):
         remove_entries_with_duplicate_ids(output_directory, meta_dict)
Exemple #5
0
    def test_init(self):
        adapter = RoundTripAdapter()
        output_directory = os.path.join(self.temp_dir, "ANY_OUTPUT_DIR")
        ingestor = GulpIngestor(adapter, output_directory, 2, 1)
        ingestor()
        gulp_directory = GulpDirectory(output_directory)
        self.assertEqual(gulp_directory.output_dir, output_directory)

        expected_all_meta_dicts = [
            OrderedDict([('1',
                         OrderedDict([('frame_info',
                                      [[0, 1, 632],
                                       [632, 1, 632],
                                       [1264, 1, 632],
                                       [1896, 1, 632]]),
                                      ('meta_data',
                                       [OrderedDict(
                                           [('name',
                                             'bunch of numpy arrays')])])]))]),
            OrderedDict([('2',
                        OrderedDict([('frame_info',
                                     [[0, 1, 632],
                                      [632, 1, 632]]),
                                    ('meta_data',
                                     [OrderedDict(
                                        [('name', 'shorter_video')])])]))])]
        self.assertEqual(gulp_directory.all_meta_dicts,
                         expected_all_meta_dicts)

        self.assertEqual(gulp_directory.chunk_lookup, {'1': 0, '2': 1})

        expected_merged_meta_dict = {
            '1': OrderedDict([('frame_info',
                              [[0, 1, 632],
                               [632, 1, 632],
                               [1264, 1, 632],
                               [1896, 1, 632]]),
                              ('meta_data',
                               [OrderedDict(
                                   [('name',
                                     'bunch of numpy arrays')])])]),
            '2': OrderedDict([('frame_info',
                              [[0, 1, 632],
                               [632, 1, 632]]),
                              ('meta_data',
                               [OrderedDict([('name',
                                              'shorter_video')])])])}
        self.assertEqual(gulp_directory.merged_meta_dict,
                         expected_merged_meta_dict)
Exemple #6
0
 def test_no_duplicates(self):
     adapter = DummyVideosAdapter(3)
     output_directory = os.path.join(self.temp_dir, "ANY_OUTPUT_DIR")
     ingestor = GulpIngestor(adapter, output_directory, 2, 1)
     ingestor()
     meta_dict = [{
         'meta': {
             'name': 'new_video'
         },
         'frames': [np.ones((4, 1, 3), dtype='uint8')],
         'id': 3
     }]
     new_meta = remove_entries_with_duplicate_ids(output_directory,
                                                  meta_dict)
     self.assertEqual(meta_dict, new_meta)
Exemple #7
0
    def test_random_access(self):
        # ingest dummy videos
        adapter = DummyVideosAdapter(num_videos=25)
        output_directory = os.path.join(self.temp_dir, "ANY_OUTPUT_DIR")
        ingestor = GulpIngestor(adapter, output_directory, 2, 1)
        ingestor()

        # create gulp directory
        gulp_directory = GulpDirectory(output_directory)

        # check all videos can be accessed
        for id_ in adapter.ids:
            with self.subTest(id_=id_):
                # check img id is in the lookup table
                self.assertTrue(id_ in gulp_directory.chunk_lookup)
                # check the img can be accessed
                img, meta = gulp_directory[id_]
                # check the meta id match
                self.assertEqual(meta['id'], id_)
Exemple #8
0
        print('Gulping from [{}] split'.format(split))
        split_tsv = os.path.join(splits_dir, split + '.txt')
        output_dir_split = output_dir + '/{}'.format(split)
        shuffle = False if split != 'train' else shuffle

        adapter = GulpTGIFAdapter(data_gulp,
                                  split_tsv,
                                  videos_dir,
                                  output_dir_split,
                                  shuffle=shuffle,
                                  frame_size=frame_size,
                                  frame_rate=frame_rate,
                                  shm_dir_path=shm_dir,
                                  label_name=label_name,
                                  remove_duplicate_ids=remove_duplicates)
        ingestor = GulpIngestor(adapter, output_dir_split, videos_per_chunk,
                                num_workers)
        ingestor()

    # Update TSV with num_frames from gulped data
    uid2nfrms = dict()
    for split in splits:
        uid2nfrms_split = retrieve_nfrms_from_gulp(
            os.path.join(output_dir, split))
        uid2nfrms.update(uid2nfrms_split)

    with open(data_file, 'w') as outfile:
        for item in data:
            outfile.write('{}\t{}\t{}\n'.format(item[0], item[1], \
              uid2nfrms[get_uid(item[0])] if get_uid(item[0]) in uid2nfrms else -1))
Exemple #9
0
    if viz:
        dataset = ImagePairVisDataset(args.root_folder)
        loader = DataLoader(dataset,
                            batch_size=1,
                            shuffle=False,
                            num_workers=0,
                            drop_last=False)
        print("data size:", len(dataset))

        for img_full, img_crop in loader:
            cv2.imshow("Image Full", img_full[0])
            cv2.imshow("Image Crop", img_crop[0])
            key = cv2.waitKey()
            if key == 27:
                break
    else:
        for subset in ['train', 'val']:
            check_existing_dataset(config[args.name][subset]['gulpio_dir'])

            adapter = ImagePairListAdapter(
                FlickrPro(config[args.name][subset]['root_dir'],
                          config[args.name][subset]['meta'],
                          config[args.name][subset]['download']),
                # args.shuffle
            )

            ingestor = GulpIngestor(adapter,
                                    config[args.name][subset]['gulpio_dir'],
                                    images_per_chunk, num_workers)
            ingestor()  # call to trigger ingestion
Exemple #10
0
    def iter_data(self, slice_element=None):
        s = slice_element or slice(0, len(self))
        indices = range(s.start, s.stop, s.step if s.step is not None else 1)
        for idx in indices:
            sample = self.metadata.iloc[idx].to_dict()
            id = sample['SAMPLE_KEY']
            frames = self.load_img(id)
            result = {'id': id, 'frames': frames, 'meta': sample}
            yield result

    def load_img(self, key):
        img = np.load(os.path.join(self.datadir, key + ".npz"))
        img = img["sample"]  # Shape 520 x 696 x 5
        img = [img[:, :, idx] for idx in range(5)]
        return img


if __name__ == "__main__":
    adapter = MolChemAdapter(datadir='../data/images/',
                             metafile=[
                                 '../data/metadata/datasplit1-train.csv',
                                 '../data/metadata/datasplit1-val.csv',
                                 '../data/metadata/datasplit1-test.csv'
                             ])
    ingestor = GulpIngestor(adapter,
                            output_folder='../data/gulpio/',
                            videos_per_chunk=1000,
                            num_workers=16)
    ingestor()