Esempio n. 1
0
 def get_test_image_resource_descriptions(
         self, filenames, size=None, min_num_images_for_batch_loading=100):
     if len(filenames) == 0:
         return {}
     result = {}
     if size == 'scaled_256':
         filenames_by_batch = {}
         for fn in filenames:
             cur_batch = self.test_batch_by_filename[fn]
             if cur_batch not in filenames_by_batch:
                 filenames_by_batch[cur_batch] = []
             filenames_by_batch[cur_batch].append(fn)
         for batch, cur_filenames in filenames_by_batch.items():
             if len(cur_filenames) >= min_num_images_for_batch_loading:
                 pickle_key = 'imagenet-test-batches/' + batch + '.pickle'
                 for fn in cur_filenames:
                     result[fn] = utils.S3BatchResource(
                         pickle_key, fn, 'pickle_dict', 50 * 1000 * 1000)
             else:
                 for fn in cur_filenames:
                     img_key = 'imagenet-test-scaled/' + fn
                     result[fn] = utils.S3BatchResource(
                         img_key, None, 'object_bytes', 50 * 1000)
     elif size == 'scaled_500':
         for fn in filenames:
             img_key = 'imagenet-test/' + fn
             result[fn] = utils.S3BatchResource(img_key, None,
                                                'object_bytes', 150 * 1000)
     else:
         raise NotImplementedError()
     assert len(result) == len(filenames)
     for fn in filenames:
         assert fn in result
     return result
Esempio n. 2
0
 def get_train_image_resource_descriptions(
         self, filenames, size=None, min_num_images_for_batch_loading=100):
     if len(filenames) == 0:
         return {}
     if size != 'scaled_256':
         raise NotImplementedError()
     result = {}
     filenames_by_wnid = {}
     for fn in filenames:
         cur_wnid = self.get_wnid_from_train_filename(fn)
         if cur_wnid not in filenames_by_wnid:
             filenames_by_wnid[cur_wnid] = []
         filenames_by_wnid[cur_wnid].append(fn)
     for wnid, cur_filenames in filenames_by_wnid.items():
         if len(cur_filenames) >= min_num_images_for_batch_loading:
             tarball_key = 'imagenet-train/' + wnid + '-scaled.tar'
             for fn in cur_filenames:
                 result[fn] = utils.S3BatchResource(tarball_key,
                                                    wnid + '/' + fn,
                                                    'tarball',
                                                    50 * 1300 * 1000)
         else:
             for fn in cur_filenames:
                 img_key = 'imagenet-train-individual/' + wnid + '/' + fn
                 result[fn] = utils.S3BatchResource(img_key, None,
                                                    'object_bytes',
                                                    50 * 1000)
     assert len(result) == len(filenames)
     for fn in filenames:
         assert fn in result
     return result
Esempio n. 3
0
 def get_image_resource_descriptions(self, candidate_ids, size=None, min_num_images_for_batch_loading=100000):
     if len(candidate_ids) == 0:
         return {}
     for cid in candidate_ids:
         assert cid in self.all_candidates
     result = {}
     if size == 'scaled_256':
         cids_by_batch = {}
         cids_without_batch = []
         for cid in candidate_ids:
             if 'batch' in self.all_candidates[cid]:
                 cur_batch = self.all_candidates[cid]['batch']
                 if cur_batch not in cids_by_batch:
                     cids_by_batch[cur_batch] = []
                 cids_by_batch[cur_batch].append(cid)
             else:
                 cids_without_batch.append(cid)
         for cur_batch, cids in cids_by_batch.items():
             if len(cids) >= min_num_images_for_batch_loading:
                 pickle_key = 'imagenet2candidates_batches/' + cur_batch + '.pickle'
                 for cid in cids:
                     result[cid] = utils.S3BatchResource(pickle_key, cid, 'pickle_dict', 1000 * 50 * 1000)
             else:
                 for cid in cids:
                     img_key = 'imagenet2candidates_scaled/' + cid + '.jpg'
                     result[cid] = utils.S3BatchResource(img_key, None, 'object_bytes', 50 * 1000)
         for cid in cids_without_batch:
             img_key = 'imagenet2candidates_scaled/' + cid + '.jpg'
             result[cid] = utils.S3BatchResource(img_key, None, 'object_bytes', 150 * 1000)
     elif size == 'scaled_500':
         for cid in candidate_ids:
             img_key = 'imagenet2candidates_mturk/' + cid + '.jpg'
             result[cid] = utils.S3BatchResource(img_key, None, 'object_bytes', 150 * 1000)
     elif size == 'original':
         for cid in candidate_ids:
             img_key = 'imagenet2candidates_original/' + cid + '.jpg'
             result[cid] = utils.S3BatchResource(img_key, None, 'object_bytes', 2 * 1000 * 1000)
     else:
         raise NotImplementedError()
     assert len(result) == len(candidate_ids)
     for cid in candidate_ids:
         assert cid in result
     return result
Esempio n. 4
0
 def get_test_feature_resource_descriptions(self, filenames):
     if len(filenames) == 0:
         return {}
     result = {}
     for fn in filenames:
         img_key = 'imagenet-test-featurized/' + fn + '.npy'
         result[fn] = utils.S3BatchResource(img_key, None, 'numpy_bytes', 8 * 4096)
     assert len(result) == len(filenames)
     for fn in filenames:
         assert fn in result
     return result
Esempio n. 5
0
 def get_feature_resource_descriptions(self, candidate_ids):
     if len(candidate_ids) == 0:
         return {}
     for cid in candidate_ids:
         assert cid in self.all_candidates
     result = {}
     for cid in candidate_ids:
         features_key = 'imagenet2candidates_featurized/' + cid + '.npy'
         result[cid] = utils.S3BatchResource(features_key, None, 'numpy_bytes', 8 * 4096)
     assert len(result) == len(candidate_ids)
     for cid in candidate_ids:
         assert cid in result
     return result
Esempio n. 6
0
 def get_val_feature_resource_descriptions(self, filenames):
     if len(filenames) == 0:
         return {}
     result = {}
     filenames_by_wnid = {}
     for fn in filenames:
         cur_wnid = self.wnid_by_val_filename[fn]
         if cur_wnid not in filenames_by_wnid:
             filenames_by_wnid[cur_wnid] = []
         filenames_by_wnid[cur_wnid].append(fn)
     for wnid, cur_filenames in filenames_by_wnid.items():
         batch_key = 'imagenet-validation-featurized/val-' + wnid + '-fc7.pkl'
         for fn in cur_filenames:
             stem = pathlib.Path(fn).stem
             result[fn] = utils.S3BatchResource(batch_key, stem, 'pickle_dict', 32 * 50 * 1000)
     assert len(result) == len(filenames)
     for fn in filenames:
         assert fn in result
     return result