def __init__(self, dataset_type, imdb_file_index, config, *args, **kwargs): super().__init__("vqa2", dataset_type, config) imdb_files = self.config.imdb_files if dataset_type not in imdb_files: raise ValueError( "Dataset type {} is not present in " "imdb_files of dataset config".format(dataset_type)) self.imdb_file = imdb_files[dataset_type][imdb_file_index] self.imdb_file = self._get_absolute_path(self.imdb_file) self.imdb = ImageDatabase(self.imdb_file) self.kwargs = kwargs self.image_depth_first = self.config.image_depth_first self._should_fast_read = self.config.fast_read self.use_ocr = self.config.use_ocr self.use_ocr_info = self.config.use_ocr_info self._use_features = False if hasattr(self.config, "image_features"): self._use_features = True self.features_max_len = self.config.features_max_len self._return_info = self.config.get("return_info", True) all_image_feature_dirs = self.config.image_features[dataset_type] curr_image_features_dir = all_image_feature_dirs[imdb_file_index] curr_image_features_dir = curr_image_features_dir.split(",") curr_image_features_dir = self._get_absolute_path( curr_image_features_dir) self.features_db = FeaturesDataset( "coco", directories=curr_image_features_dir, depth_first=self.image_depth_first, max_features=self.features_max_len, fast_read=self._should_fast_read, imdb=self.imdb, return_info=self._return_info, ) self.fast_dir = os.path.join(config.fast_dir, self._dataset_type) self.fasted = set() if not os.path.exists(self.fast_dir): os.mkdir(self.fast_dir) for sample in os.listdir(self.fast_dir): self.fasted.add(int(sample[:-2])) self.use_ocr = self.config.use_ocr self.use_ocr_info = self.config.use_ocr_info
def __init__(self, dataset_type, imdb_file_index, config, *args, **kwargs): super().__init__("vqa_introspect", dataset_type, config) imdb_files = self.config.imdb_files if dataset_type not in imdb_files: raise ValueError( "Dataset type {} is not present in " "imdb_files of dataset config".format(dataset_type)) self.imdb_file = imdb_files[dataset_type][imdb_file_index] self.imdb_file = self._get_absolute_path(self.imdb_file) self.dataset = dataset_type if dataset_type == 'train' or dataset_type == 'val': if 'imdb_train2014.npy' in self.imdb_file or 'imdb_val2014.npy' in self.imdb_file or 'imdb_vqa2014_val_reasoning_questions' in self.imdb_file: self.dataset = 'train_vqa' elif 'train_introspect.npy' in self.imdb_file or 'train_binary.npy' in self.imdb_file: self.dataset = 'train_introspect' self.imdb = ImageDatabase(self.imdb_file) self.kwargs = kwargs self.image_depth_first = self.config.image_depth_first self._should_fast_read = self.config.fast_read self.use_ocr = self.config.use_ocr self.use_ocr_info = self.config.use_ocr_info self._use_features = False if hasattr(self.config, "image_features"): self._use_features = True self.features_max_len = self.config.features_max_len self._return_info = self.config.get("return_info", True) all_image_feature_dirs = self.config.image_features[dataset_type] curr_image_features_dir = all_image_feature_dirs[imdb_file_index] curr_image_features_dir = curr_image_features_dir.split(",") curr_image_features_dir = self._get_absolute_path( curr_image_features_dir) self.features_db = FeaturesDataset( "coco", directories=curr_image_features_dir, depth_first=self.image_depth_first, max_features=self.features_max_len, fast_read=self._should_fast_read, imdb=self.imdb, return_info=self._return_info, )
def __init__(self, dataset_type, imdb_file_index, config, *args, **kwargs): super().__init__("vqa2", dataset_type, config) imdb_files = self.config.imdb_files if dataset_type not in imdb_files: raise ValueError( "Dataset type {} is not present in " "imdb_files of dataset config".format(dataset_type)) self.imdb_file = imdb_files[dataset_type][imdb_file_index] self.imdb_file = self._get_absolute_path(self.imdb_file) self.imdb = ImageDatabase(self.imdb_file) self.kwargs = kwargs self.image_depth_first = self.config.image_depth_first self._should_fast_read = self.config.fast_read self.use_ocr = self.config.use_ocr self.use_ocr_info = self.config.use_ocr_info self._use_features = False if hasattr(self.config, "image_features"): self._use_features = True self.features_max_len = self.config.features_max_len all_image_feature_dirs = self.config.image_features[dataset_type] curr_image_features_dir = all_image_feature_dirs[imdb_file_index] curr_image_features_dir = curr_image_features_dir.split(",") curr_image_features_dir = self._get_absolute_path( curr_image_features_dir) self.features_db = FeaturesDataset( "coco", directories=curr_image_features_dir, depth_first=self.image_depth_first, max_features=self.features_max_len, fast_read=self._should_fast_read, imdb=self.imdb, )
def __init__(self, dataset_type, imdb_file_index, config, *args, **kwargs): super().__init__("vqa2", dataset_type, config) imdb_files = self.config.imdb_files if dataset_type not in imdb_files: raise ValueError( "Dataset type {} is not present in " "imdb_files of dataset config".format(dataset_type)) self.imdb_file = imdb_files[dataset_type][imdb_file_index] if self.imdb_file.endswith( "imdb_karpathy_test.npy" ) and self._global_config['imdb_file_content'] is not None: if not self.imdb_file.startswith("data"): self.imdb_file = "data/" + os.path.dirname( self.imdb_file ) + "/imdb_" + self._global_config['imdb_file_content'] else: self.imdb_file = os.path.dirname( self.imdb_file ) + "/imdb_" + self._global_config['imdb_file_content'] imdb = np.zeros(2, dtype=object) imdb[0] = {'metadata': 'coco'} imdb[1] = { 'feature_path': self._global_config['imdb_file_content'], 'image_id': 0, 'reference_tokens': [['<s>']] } np.save(self.imdb_file, imdb) print('imdb_files', imdb_files, flush=True) if not self.imdb_file.startswith("data"): self.imdb_file = 'data/' + self.imdb_file #self.imdb_file = self._get_absolute_path(self.imdb_file) self.imdb = ImageDatabase(self.imdb_file) self.kwargs = kwargs self.image_depth_first = self.config.image_depth_first self._should_fast_read = self.config.fast_read self.use_ocr = self.config.use_ocr self.use_ocr_info = self.config.use_ocr_info self._use_features = False if hasattr(self.config, "image_features"): self._use_features = True self.features_max_len = self.config.features_max_len all_image_feature_dirs = self.config.image_features[dataset_type] curr_image_features_dir = all_image_feature_dirs[imdb_file_index] curr_image_features_dir = curr_image_features_dir.split(",") curr_image_features_dir = self._get_absolute_path( curr_image_features_dir) self.features_db = FeaturesDataset( "coco", directories=curr_image_features_dir, depth_first=self.image_depth_first, max_features=self.features_max_len, fast_read=self._should_fast_read, imdb=self.imdb, )