def __init__(self, split="train", size256=False, transform=None): dataset_name = "ImageNet256" if size256 else "ImageNet" assert (split in ("train", "val")) or (split.find("train_subset") != -1) self.split = split self.name = f"{dataset_name}_Split_" + self.split data_dir = _IMAGENET256_DATASET_DIR if size256 else _IMAGENET_DATASET_DIR print(f"==> Loading {dataset_name} dataset - split {self.split}") print(f"==> {dataset_name} directory: {data_dir}") self.transform = transform print(f"==> transform: {self.transform}") train_dir = os.path.join(data_dir, "train") val_dir = os.path.join(data_dir, "val") split_dir = train_dir if (self.split.find("train") != -1) else val_dir self.data = datasets.ImageFolder(split_dir, self.transform) self.labels = [item[1] for item in self.data.imgs] if self.split.find("train_subset") != -1: subsetK = int(self.split[len("train_subset"):]) assert subsetK > 0 self.split = "train" label2ind = utils.build_label_index(self.data.targets) all_indices = [] for label, img_indices in label2ind.items(): assert len(img_indices) >= subsetK all_indices += img_indices[:subsetK] self.data.imgs = [self.data.imgs[idx] for idx in all_indices] self.data.samples = [self.data.samples[idx] for idx in all_indices] self.data.targets = [self.data.targets[idx] for idx in all_indices] self.labels = [self.labels[idx] for idx in all_indices]
def __init__(self, data_dir, image_split="train", phase="train"): # data_dir: path to the directory with the saved ImageNet features. # image_split: the image split of the ImageNet that will be loaded. # phase: whether the dataset will be used for training, validating, or # testing the few-shot model model. assert image_split in ("train", "val") assert phase in ("train", "val", "test") self.phase = phase self.image_split = image_split self.name = (f"ImageNetLowShotFeatures_ImageSplit_{self.image_split}" f"_Phase_{self.phase}") dataset_file = os.path.join(data_dir, "ImageNet_" + self.image_split + ".h5") self.data_file = h5py.File(dataset_file, "r") self.count = self.data_file["count"][0] self.features = self.data_file["all_features"][...] self.labels = self.data_file["all_labels"][:self.count].tolist() # *********************************************************************** data_tmp = datasets.ImageFolder( os.path.join(_IMAGENET_DATASET_DIR, "train"), None) ( base_classes, base_classes_val, base_classes_test, novel_classes_val, novel_classes_test, ) = load_ImageNet_fewshot_split(data_tmp.classes) # *********************************************************************** self.label2ind = utils.build_label_index(self.labels) self.labelIds = sorted(self.label2ind.keys()) self.num_cats = len(self.labelIds) assert self.num_cats == 1000 self.labelIds_base = base_classes self.num_cats_base = len(self.labelIds_base) if self.phase == "val" or self.phase == "test": self.labelIds_novel = (novel_classes_val if (self.phase == "val") else novel_classes_test) self.num_cats_novel = len(self.labelIds_novel) intersection = set(self.labelIds_base) & set(self.labelIds_novel) assert len(intersection) == 0 self.base_classes_eval_split = (base_classes_val if (self.phase == "val") else base_classes_test)
def __init__(self, phase="train", split="train", do_not_use_random_transf=False): assert phase in ("train", "test", "val") assert split in ("train", "val") use_aug = (phase == "train") and (do_not_use_random_transf == False) super().__init__(split=split, use_geometric_aug=use_aug, use_color_aug=use_aug) self.phase = phase self.split = split self.name = "ImageNetLowShot_Phase_" + phase + "_Split_" + split print(f"==> Loading ImageNet few-shot benchmark - phase {phase}") # *********************************************************************** ( base_classes, _, _, novel_classes_val, novel_classes_test, ) = load_ImageNet_fewshot_split(self.data.classes) # *********************************************************************** self.label2ind = utils.build_label_index(self.labels) self.labelIds = sorted(self.label2ind.keys()) self.num_cats = len(self.labelIds) assert self.num_cats == 1000 self.labelIds_base = base_classes self.num_cats_base = len(self.labelIds_base) if self.phase == "val" or self.phase == "test": self.labelIds_novel = (novel_classes_val if (self.phase == "val") else novel_classes_test) self.num_cats_novel = len(self.labelIds_novel) intersection = set(self.labelIds_base) & set(self.labelIds_novel) assert len(intersection) == 0
def __init__(self, phase="train", do_not_use_random_transf=False): assert phase in ("train", "val", "test") self.phase = phase self.name = "CIFAR100FewShot_" + phase normalize = transforms.Normalize(mean=_CIFAR_MEAN_PIXEL, std=_CIFAR_STD_PIXEL) if (self.phase == "test" or self.phase == "val") or (do_not_use_random_transf == True): self.transform = transforms.Compose( [lambda x: np.asarray(x), transforms.ToTensor(), normalize]) else: self.transform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), lambda x: np.asarray(x), transforms.ToTensor(), normalize, ]) cifar100_metadata_path = os.path.join(_CIFAR_DATASET_DIR, "cifar-100-python", "meta") all_category_names = pickle.load(open(cifar100_metadata_path, "rb"))["fine_label_names"] def read_categories(filename): with open(filename) as f: categories = f.readlines() categories = [x.strip() for x in categories] return categories def get_label_ids(category_names): label_ids = [ all_category_names.index(cname) for cname in category_names ] return label_ids train_category_names = read_categories( os.path.join(_CIFAR_CATEGORY_SPLITS_DIR, "train.txt")) val_category_names = read_categories( os.path.join(_CIFAR_CATEGORY_SPLITS_DIR, "val.txt")) test_category_names = read_categories( os.path.join(_CIFAR_CATEGORY_SPLITS_DIR, "test.txt")) train_category_ids = get_label_ids(train_category_names) val_category_ids = get_label_ids(val_category_names) test_category_ids = get_label_ids(test_category_names) print(f"Loading CIFAR-100 FewShot dataset - phase {phase}") if self.phase == "train": self.data_train = datasets.__dict__["CIFAR100"]( _CIFAR_DATASET_DIR, train=True, download=True, transform=self.transform) self.labels = self.data_train.targets self.images = self.data_train.data self.label2ind = utils.build_label_index(self.labels) self.labelIds = sorted(self.label2ind.keys()) self.num_cats = len(self.labelIds) self.labelIds_base = train_category_ids self.num_cats_base = len(self.labelIds_base) elif self.phase == "val" or self.phase == "test": self.data_train = datasets.__dict__["CIFAR100"]( _CIFAR_DATASET_DIR, train=True, download=True, transform=self.transform) labels_train = self.data_train.targets images_train = self.data_train.data label2ind_train = utils.build_label_index(labels_train) self.labelIds_novel = (val_category_ids if (self.phase == "val") else test_category_ids) labels_novel = [] images_novel = [] for label_id in self.labelIds_novel: indices = label2ind_train[label_id] images_novel.append(images_train[indices]) labels_novel += [labels_train[index] for index in indices] images_novel = np.concatenate(images_novel, axis=0) assert images_novel.shape[0] == len(labels_novel) self.data_test = datasets.__dict__["CIFAR100"]( _CIFAR_DATASET_DIR, train=False, download=True, transform=self.transform) labels_test = self.data_test.targets images_test = self.data_test.data label2ind_test = utils.build_label_index(labels_test) self.labelIds_base = train_category_ids labels_base = [] images_base = [] for label_id in self.labelIds_base: indices = label2ind_test[label_id] images_base.append(images_test[indices]) labels_base += [labels_test[index] for index in indices] images_base = np.concatenate(images_base, axis=0) assert images_base.shape[0] == len(labels_base) self.images = np.concatenate([images_base, images_novel], axis=0) self.labels = labels_base + labels_novel assert self.images.shape[0] == len(self.labels) self.num_cats_base = len(self.labelIds_base) self.num_cats_novel = len(self.labelIds_novel) intersection = set(self.labelIds_base) & set(self.labelIds_novel) assert len(intersection) == 0 self.label2ind_base = utils.build_label_index(labels_base) assert len(self.label2ind_base) == self.num_cats_base self.label2ind_novel = utils.build_label_index(labels_novel) assert len(self.label2ind_novel) == self.num_cats_novel self.label2ind = utils.build_label_index(self.labels) assert len( self.label2ind) == self.num_cats_novel + self.num_cats_base self.labelIds = sorted(self.label2ind.keys()) self.num_cats = len(self.labelIds) else: raise ValueError(f"Not valid phase {self.phase}")
def __init__( self, transform_test, transform_train, phase="train", load_single_file_split=False, file_split=None, do_not_use_random_transf=False, ): data_dir = _MINIIMAGENET_DATASET_DIR print(f"==> Download MiniImageNet dataset at {data_dir}") file_train_categories_train_phase = os.path.join( data_dir, "miniImageNet_category_split_train_phase_train.pickle" ) file_train_categories_val_phase = os.path.join( data_dir, "miniImageNet_category_split_train_phase_val.pickle" ) file_train_categories_test_phase = os.path.join( data_dir, "miniImageNet_category_split_train_phase_test.pickle" ) file_val_categories_val_phase = os.path.join( data_dir, "miniImageNet_category_split_val.pickle" ) file_test_categories_test_phase = os.path.join( data_dir, "miniImageNet_category_split_test.pickle" ) self.phase = phase if load_single_file_split: assert file_split in ( "category_split_train_phase_train", "category_split_train_phase_val", "category_split_train_phase_test", "category_split_val", "category_split_test", ) self.name = "MiniImageNet_" + file_split print(f"==> Loading mini ImageNet dataset - phase {file_split}") file_to_load = os.path.join(data_dir, f"miniImageNet_{file_split}.pickle") data = utils.load_pickle_data(file_to_load) self.data = data["data"] self.labels = data["labels"] self.label2ind = utils.build_label_index(self.labels) self.labelIds = sorted(self.label2ind.keys()) self.num_cats = len(self.labelIds) else: assert phase in ("train", "val", "test", "trainval") or "train_subset" in phase self.name = "MiniImageNet_" + phase print(f"Loading mini ImageNet dataset - phase {phase}") if self.phase == "train": # Loads the training classes (and their data) as base classes data_train = utils.load_pickle_data(file_train_categories_train_phase) self.data = data_train["data"] self.labels = data_train["labels"] self.label2ind = utils.build_label_index(self.labels) self.labelIds = sorted(self.label2ind.keys()) self.num_cats = len(self.labelIds) self.labelIds_base = self.labelIds self.num_cats_base = len(self.labelIds_base) elif self.phase == "trainval": # Loads the training + validation classes (and their data) as # base classes data_train = utils.load_pickle_data(file_train_categories_train_phase) data_val = utils.load_pickle_data(file_val_categories_val_phase) self.data = np.concatenate([data_train["data"], data_val["data"]], axis=0) self.labels = data_train["labels"] + data_val["labels"] self.label2ind = utils.build_label_index(self.labels) self.labelIds = sorted(self.label2ind.keys()) self.num_cats = len(self.labelIds) self.labelIds_base = self.labelIds self.num_cats_base = len(self.labelIds_base) elif self.phase.find("train_subset") != -1: subsetK = int(self.phase[len("train_subset") :]) assert subsetK > 0 # Loads the training classes as base classes. For each class it # loads only the `subsetK` first images. data_train = utils.load_pickle_data(file_train_categories_train_phase) label2ind = utils.build_label_index(data_train["labels"]) all_indices = [] for label, img_indices in label2ind.items(): assert len(img_indices) >= subsetK all_indices += img_indices[:subsetK] labels_semi = [data_train["labels"][idx] for idx in all_indices] data_semi = data_train["data"][all_indices] self.data = data_semi self.labels = labels_semi self.label2ind = utils.build_label_index(self.labels) self.labelIds = sorted(self.label2ind.keys()) self.num_cats = len(self.labelIds) self.labelIds_base = self.labelIds self.num_cats_base = len(self.labelIds_base) self.phase = "train" elif self.phase == "val" or self.phase == "test": # Uses the validation / test classes (and their data) as novel # as novel class data and the vaditation / test image split of # the training classes for the base classes. if self.phase == "test": # load data that will be used for evaluating the recognition # accuracy of the base classes. data_base = utils.load_pickle_data(file_train_categories_test_phase) # load data that will be use for evaluating the few-shot # recogniton accuracy on the novel classes. data_novel = utils.load_pickle_data(file_test_categories_test_phase) else: # phase=='val' # load data that will be used for evaluating the recognition # accuracy of the base classes. data_base = utils.load_pickle_data(file_train_categories_val_phase) # load data that will be use for evaluating the few-shot # recogniton accuracy on the novel classes. data_novel = utils.load_pickle_data(file_val_categories_val_phase) self.data = np.concatenate([data_base["data"], data_novel["data"]], axis=0) self.labels = data_base["labels"] + data_novel["labels"] self.label2ind = utils.build_label_index(self.labels) self.labelIds = sorted(self.label2ind.keys()) self.num_cats = len(self.labelIds) self.labelIds_base = utils.build_label_index(data_base["labels"]).keys() self.labelIds_novel = utils.build_label_index(data_novel["labels"]).keys() self.num_cats_base = len(self.labelIds_base) self.num_cats_novel = len(self.labelIds_novel) intersection = set(self.labelIds_base) & set(self.labelIds_novel) assert len(intersection) == 0 else: raise ValueError(f"Not valid phase {self.phase}") self.transform_test = transform_test self.transform_train = transform_train if (self.phase == "test" or self.phase == "val") or (do_not_use_random_transf == True): self.transform = self.transform_test else: self.transform = self.transform_train