def _get_fextractor_instance(self, fextractor_type): fextractor_class = FeatureExtractor.find_subclass(fextractor_type) fextractor = fextractor_class(assets=self.assets, logger=self.logger, fifo_mode=self.fifo_mode, delete_workdir=self.delete_workdir, result_store=self.result_store, optional_dict=self.optional_dict) return fextractor
def _get_atom_features(self, fextractor_type): if self.feature_dict[fextractor_type] == 'all': fextractor_class = FeatureExtractor.find_subclass(fextractor_type) atom_features = fextractor_class.ATOM_FEATURES + \ (fextractor_class.DERIVED_ATOM_FEATURES if hasattr(fextractor_class, 'DERIVED_ATOM_FEATURES') else []) else: atom_features = self.feature_dict[fextractor_type] return atom_features
def _get_fextractor_instance(self, fextractor_type): fextractor_class = FeatureExtractor.find_subclass(fextractor_type) fextractor = fextractor_class(assets=self.assets, logger=self.logger, fifo_mode=self.fifo_mode, delete_workdir=self.delete_workdir, result_store=self.result_store, optional_dict=self.optional_dict ) return fextractor
def run(self): """ Do all the calculation here. :return: """ # for each FeatureExtractor_type key in feature_dict, find the subclass # of FeatureExtractor, run, and put results in a dict for fextractor_type in self.feature_dict: # fextractor = self._get_fextractor_instance(fextractor_type) # fextractor.run() # results = fextractor.results fextractor_class = FeatureExtractor.find_subclass(fextractor_type) _, results = run_executors_in_parallel( fextractor_class, assets=self.assets, fifo_mode=self.fifo_mode, delete_workdir=self.delete_workdir, parallelize=self.parallelize, result_store=self.result_store, optional_dict=self.optional_dict, optional_dict2=self.optional_dict2, ) self.type2results_dict[fextractor_type] = results # assemble an output dict with demanded atom features # atom_features_dict = self.fextractor_atom_features_dict result_dicts = [dict() for _ in self.assets] for fextractor_type in self.feature_dict: assert fextractor_type in self.type2results_dict for atom_feature in self._get_atom_features(fextractor_type): scores_key = self._get_scores_key(fextractor_type, atom_feature) for result_index, result in enumerate(self.type2results_dict[ fextractor_type]): result_dicts[result_index][scores_key] = result[scores_key] self.results = map( lambda (asset, result_dict): BasicResult(asset, result_dict), zip(self.assets, result_dicts) )
def run(self): """ Do all the calculation here. :return: """ # for each FeatureExtractor_type key in feature_dict, find the subclass # of FeatureExtractor, run, and put results in a dict for fextractor_type in self.feature_dict: # fextractor = self._get_fextractor_instance(fextractor_type) # fextractor.run() # results = fextractor.results fextractor_class = FeatureExtractor.find_subclass(fextractor_type) _, results = run_executors_in_parallel( fextractor_class, assets=self.assets, fifo_mode=self.fifo_mode, delete_workdir=self.delete_workdir, parallelize=self.parallelize, result_store=self.result_store, optional_dict=self.optional_dict, optional_dict2=self.optional_dict2, ) self.type2results_dict[fextractor_type] = results # assemble an output dict with demanded atom features # atom_features_dict = self.fextractor_atom_features_dict result_dicts = [dict() for _ in self.assets] for fextractor_type in self.feature_dict: assert fextractor_type in self.type2results_dict for atom_feature in self._get_atom_features(fextractor_type): scores_key = self._get_scores_key(fextractor_type, atom_feature) for result_index, result in enumerate( self.type2results_dict[fextractor_type]): result_dicts[result_index][scores_key] = result[scores_key] self.results = map( lambda (asset, result_dict): BasicResult(asset, result_dict), zip(self.assets, result_dicts))
def main(): args = parser.parse_args() # phases to be processed. phases = [phase.strip() for phase in args.phases.split(',')] # annotation files to be processed if sorted(phases) == sorted(['train', 'val', 'test' ]) and args.ann_files == '': tmplt = 'data/annotations/captions_%s2017.json' ann_files = [tmplt % 'train', tmplt % 'val', ''] else: ann_files = [ ann_file.strip() for ann_file in args.ann_files.split(',') ] # batch size for extracting feature vectors. batch_size = args.batch_size # maximum length of caption(number of word). if caption is longer than max_length, deleted. max_length = args.max_length # if word occurs less than word_count_threshold in training dataset, the word index is special unknown token. word_count_threshold = args.word_count_threshold vocab_size = args.vocab_size for phase, ann_file in zip(phases, ann_files): _process_caption_data(phase, ann_file=ann_file, max_length=max_length) if phase == 'train': captions_data = load_json('./data/train/captions_train2017.json') word_to_idx = _build_vocab(captions_data, threshold=word_count_threshold, vocab_size=vocab_size) save_json(word_to_idx, './data/word_to_idx.json') new_captions_data = _build_caption_vector(captions_data, word_to_idx=word_to_idx, max_length=max_length) save_json(new_captions_data, ann_file) print('Finished processing caption data') feature_extractor = FeatureExtractor(model_name='resnet101', layer=3) for phase in phases: if not os.path.isdir('./data/%s/feats/' % phase): os.makedirs('./data/%s/feats/' % phase) image_paths = os.listdir('./image/%s/' % phase) dataset = CocoImageDataset(root='./image/%s/' % phase, image_paths=image_paths) data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, num_workers=8) for batch_paths, batch_images in tqdm(data_loader): feats = feature_extractor(batch_images).data.cpu().numpy() feats = feats.reshape(-1, feats.shape[1] * feats.shape[2], feats.shape[-1]) for j in range(len(feats)): np.save('./data/%s/feats/%s.npy' % (phase, batch_paths[j]), feats[j])
def test_get_fextractor_subclasses(self): from core.noref_feature_extractor import NorefFeatureExtractor fextractor_subclasses = FeatureExtractor.get_subclasses_recursively() self.assertEquals(len(fextractor_subclasses), 7) self.assertTrue(VmafFeatureExtractor in fextractor_subclasses) self.assertTrue(MomentFeatureExtractor in fextractor_subclasses)
def _get_scores_key(self, fextractor_type, atom_feature): fextractor_subclass = FeatureExtractor.find_subclass(fextractor_type) scores_key = fextractor_subclass.get_scores_key(atom_feature) return scores_key
def test_get_fextractor_subclasses(self): fextractor_subclasses = FeatureExtractor.get_subclasses_recursively() self.assertEquals(len(fextractor_subclasses), 3) self.assertTrue(VmafFeatureExtractor in fextractor_subclasses) self.assertTrue(MomentFeatureExtractor in fextractor_subclasses)