def get_dataset(self, input_fname, config): key = self.get_cache_key(input_fname, config) training_set_metadata_fp = self.get_cache_path(input_fname, key, 'meta', 'json') if path_exists(training_set_metadata_fp): cache_training_set_metadata = data_utils.load_json( training_set_metadata_fp) dataset_fp = self.get_cache_path(input_fname, key, TRAINING) test_fp = self.get_cache_path(input_fname, key, TEST) val_fp = self.get_cache_path(input_fname, key, VALIDATION) valid = key == cache_training_set_metadata.get( CHECKSUM) and path_exists(dataset_fp) return valid, cache_training_set_metadata, dataset_fp, test_fp, val_fp return None
def delete_dataset(self, input_fname, config): key = self.get_cache_key(input_fname, config) fnames = [ self.get_cache_path(input_fname, key, 'meta', 'json'), self.get_cache_path(input_fname, key, TRAINING), self.get_cache_path(input_fname, key, TEST), self.get_cache_path(input_fname, key, VALIDATION), ] for fname in fnames: if path_exists(fname): delete(fname)
def get(self): training_set_metadata_fp = self.cache_map[META] if not path_exists(training_set_metadata_fp): return None cache_training_set_metadata = data_utils.load_json( training_set_metadata_fp) cached_training_set = self.cache_map[TRAINING] if path_exists( self.cache_map[TRAINING]) else None cached_test_set = self.cache_map[TEST] if path_exists( self.cache_map[TEST]) else None cached_validation_set = self.cache_map[VALIDATION] if path_exists( self.cache_map[VALIDATION]) else None valid = self.checksum == cache_training_set_metadata.get( CHECKSUM) and cached_training_set is not None return valid, cache_training_set_metadata, cached_training_set, cached_test_set, cached_validation_set
def get_image_from_path( src_path: Union[str, torch.Tensor], img_entry: Union[str, bytes], ret_bytes: bool = False ) -> Union[BytesIO, BinaryIO, TextIO, bytes, str]: if not isinstance(img_entry, str): return img_entry if is_http(img_entry): if ret_bytes: # Returns BytesIO. return get_image_from_http_bytes(img_entry) return img_entry if src_path or os.path.isabs(img_entry): return get_abs_path(src_path, img_entry) if path_exists(img_entry): with open_file(img_entry, "rb") as f: if ret_bytes: return f.read() return f else: return bytes(img_entry, "utf-8")
def delete(self): for fname in self.cache_map.values(): if path_exists(fname): delete(fname)
def delete(self): for fname in self.cache_map.values(): if path_exists(fname): # Parquet entries in the cache_ma can be pointers to directories. delete(fname, recursive=True)