def train_val_dataset_for_bias(self, train_dataset): exemplars_indexes = np.hstack(self.exemplar_sets) exemplars_labels = np.hstack( [[label] * len(self.exemplar_sets[label]) for label in range(len(self.exemplar_sets))]) exemplar_train_ind, exemplar_val_ind = train_test_split( exemplars_indexes, stratify=exemplars_labels, test_size=500) train_dataset_ind = train_dataset.indices _, train_dataset_labels = self.dataset.get_items_of(train_dataset_ind) train_dataset_ind, val_dataset_ind = train_test_split( train_dataset_ind, stratify=train_dataset_labels, test_size=500) exemplar_train_imgs, exemplar_train_labels = self.dataset.get_items_of( exemplar_train_ind) exemplar_val_imgs, exemplar_val_labels = self.dataset.get_items_of( exemplar_val_ind) train_dataset = Subset(self.dataset, train_dataset_ind) val_dataset = Subset(self.dataset, val_dataset_ind) exemplar_train_set = ExemplarSet(exemplar_train_imgs, exemplar_train_labels, utils.get_train_eval_transforms()[0]) exemplar_val_set = ExemplarSet(exemplar_val_imgs, exemplar_val_labels, utils.get_train_eval_transforms()[0]) train_dataset = utils.create_augmented_dataset(train_dataset, exemplar_train_set) val_dataset = utils.create_augmented_dataset(val_dataset, exemplar_val_set) return train_dataset, val_dataset
def herding_construct_exemplar_set(self, indexes, images, label, m): exemplar_set = ExemplarSet(images, [label] * len(images), utils.get_train_eval_transforms()[1]) loader = utils.get_eval_loader(exemplar_set, self.batch_size) self.net.eval() flatten_features = [] not_normalized_features = [] with torch.no_grad(): for images, _ in loader: images = images.to(self.device) features = self._extract_features(images, normalize=False) not_normalized_features.append(features.detach()) features = features / features.norm(dim=1).unsqueeze(1) flatten_features.append(features) # storing features to calculate mean and std not_normalized_features = torch.cat( not_normalized_features).cpu().numpy() #self.generator.add_data(not_normalized_features, [label]*not_normalized_features.shape[0]) # computing mean per class flatten_features = torch.cat(flatten_features).cpu().numpy() class_mean = np.mean(flatten_features, axis=0) class_mean = class_mean / np.linalg.norm(class_mean) # class_mean = torch.from_numpy(class_mean).to(self.device) flatten_features = torch.from_numpy(flatten_features).to( self.device) exemplars = set( ) # lista di exemplars selezionati per la classe corrente exemplar_feature = [ ] # lista di features per ogni exemplars già selezionato feature_to_generalize = [] for k in range(m): S = 0 if k == 0 else torch.stack(exemplar_feature).sum(0) phi = flatten_features mu = class_mean mu_p = ((phi + S) / (k + 1)).cpu().numpy() mu_p = mu_p / np.linalg.norm(mu_p) distances = np.sqrt(np.sum((mu - mu_p)**2, axis=1)) # Evito che si creino duplicati sorted_indexes = np.argsort(distances) for i in sorted_indexes: if indexes[i] not in exemplars: exemplars.add(indexes[i]) exemplar_feature.append(flatten_features[i]) feature_to_generalize.append(not_normalized_features[i]) break assert len(exemplars) == m self.exemplar_sets.append(list(exemplars)) self.generator.add_data(feature_to_generalize, [label] * len(feature_to_generalize))
def _nme(self, images): if self.compute_means: exemplar_means = [] for exemplar_class_idx in self.exemplar_sets: imgs, labs = self.dataset.get_items_of(exemplar_class_idx) exemplars = ExemplarSet(imgs, labs, utils.get_train_eval_transforms()[1]) loader = utils.get_eval_loader(exemplars, self.batch_size) flatten_features = [] with torch.no_grad(): for imgs, _ in loader: imgs = imgs.to(self.device) features = self._extract_features(imgs) flatten_features.append(features) flatten_features = torch.cat( flatten_features).cpu().numpy() class_mean = np.mean(flatten_features, axis=0) class_mean = class_mean / np.linalg.norm(class_mean) class_mean = torch.from_numpy(class_mean).to(self.device) exemplar_means.append(class_mean) self.compute_means = False self.exemplar_means = exemplar_means exemplar_means = self.exemplar_means means = torch.stack(exemplar_means) # (n_classes, feature_size) means = torch.stack( [means] * len(images)) # (batch_size, n_classes, feature_size) means = means.transpose(1, 2) # (batch_size, feature_size, n_classes) with torch.no_grad(): feature = self._extract_features(images) feature = feature.unsqueeze(2) # (batch_size, feature_size, 1) feature = feature.expand_as( means) # (batch_size, feature_size, n_classes) dists = (feature - means).pow(2).sum(1).squeeze() # (batch_size, n_classes) _, preds = dists.min(1) return preds
def _compute_means(self): exemplar_means = [] for exemplar_class_idx in self.exemplar_sets: imgs, labs = self.dataset.get_items_of(exemplar_class_idx) exemplars = ExemplarSet(imgs, labs, utils.get_train_eval_transforms()[1]) loader = utils.get_eval_loader(exemplars, self.batch_size) flatten_features = [] with torch.no_grad(): for imgs, _ in loader: imgs = imgs.to(self.device) features = self._extract_features(imgs) flatten_features.append(features) flatten_features = torch.cat(flatten_features).to(self.device) class_mean = flatten_features.mean(0) class_mean = class_mean / class_mean.norm() exemplar_means.append(class_mean) self.compute_means = False self.exemplar_means = exemplar_means
def combine_trainset_exemplars(self, train_dataset: Cifar100): exemplar_indexes = np.hstack(self.exemplar_sets) images, labels = self.dataset.get_items_of(exemplar_indexes) exemplar_dataset = ExemplarSet(images, labels, utils.get_train_eval_transforms()[0]) return utils.create_augmented_dataset(train_dataset, exemplar_dataset)