def __init__(self, exp_dict): super().__init__() self.backbone = models.vgg16( pretrained=exp_dict["imagenet_pretraining"], progress=True) num_ftrs = self.backbone.classifier[-1].in_features self.backbone.classifier[-1] = torch.nn.Linear(num_ftrs, exp_dict["num_classes"]) self.backbone = patch_module(self.backbone) self.initial_weights = deepcopy(self.backbone.state_dict()) self.backbone.cuda() self.batch_size = exp_dict['batch_size'] self.calibrate = exp_dict.get('calibrate', False) self.learning_epoch = exp_dict['learning_epoch'] self.optimizer = torch.optim.SGD(self.backbone.parameters(), lr=exp_dict['lr'], weight_decay=5e-4, momentum=0.9, nesterov=True) self.criterion = CrossEntropyLoss() shuffle_prop = exp_dict.get('shuffle_prop', 0.0) max_sample = -1 self.heuristic = get_heuristic(exp_dict['heuristic'], shuffle_prop=shuffle_prop) self.wrapper = ModelWrapper(self.backbone, criterion=self.criterion) self.wrapper.add_metric( 'cls_report', lambda: ClassificationReport(exp_dict["num_classes"])) self.wrapper.add_metric('accuracy', lambda: Accuracy()) self.loop = ActiveLearningLoop(None, self.wrapper.predict_on_dataset, heuristic=self.heuristic, ndata_to_label=exp_dict['query_size'], batch_size=self.batch_size, iterations=exp_dict['iterations'], use_cuda=True, max_sample=max_sample) self.calib_set = get_dataset('calib', exp_dict['dataset']) self.valid_set = get_dataset('val', exp_dict['dataset']) self.calibrator = DirichletCalibrator( self.wrapper, exp_dict["num_classes"], lr=0.001, reg_factor=exp_dict['reg_factor'], mu=exp_dict['mu']) self.active_dataset = None self.active_dataset_settings = None
def __init__(self, wrapper: ModelWrapper, num_classes: int, lr: float, reg_factor: float, mu: float = None): self.num_classes = num_classes self.criterion = nn.CrossEntropyLoss() self.lr = lr self.reg_factor = reg_factor self.mu = mu or reg_factor self.dirichlet_linear = nn.Linear(self.num_classes, self.num_classes) self.model = nn.Sequential(wrapper.model, self.dirichlet_linear) self.wrapper = ModelWrapper(self.model, self.criterion) self.wrapper.add_metric("ece", lambda: ECE()) self.wrapper.add_metric("ece", lambda: ECE_PerCLs(num_classes))
def segmentation_task(tmpdir): model = nn.Sequential(nn.Conv2d(3, 32, 3), nn.ReLU(), nn.Conv2d(32, 64, 3), nn.MaxPool2d(2), nn.Conv2d(64, 64, 3), Dropout2d(), nn.ConvTranspose2d(64, 10, 3, 1)) model = ModelWrapper(model, nn.CrossEntropyLoss()) test = datasets.CIFAR10(tmpdir, train=False, download=True, transform=transforms.ToTensor()) return model, test
def classification_task(tmpdir): model = nn.Sequential(nn.Conv2d(3, 32, 3), nn.ReLU(), nn.Conv2d(32, 64, 3), nn.MaxPool2d(2), nn.AdaptiveAvgPool2d((7, 7)), Flatten(), nn.Linear(7 * 7 * 64, 128), Dropout(), nn.Linear(128, 10)) model = ModelWrapper(model, nn.CrossEntropyLoss()) test = datasets.CIFAR10(tmpdir, train=False, download=True, transform=transforms.ToTensor()) return model, test
def segmentation_task(tmpdir): model = nn.Sequential(nn.Conv2d(3, 32, 3), nn.ReLU(), nn.Conv2d(32, 64, 3), nn.MaxPool2d(2), nn.Conv2d(64, 64, 3), Dropout2d(), nn.ConvTranspose2d(64, 10, 3, 1) ) model = ModelWrapper(model, nn.CrossEntropyLoss()) test = SimpleDataset() return model, test
def classification_task(tmpdir): model = nn.Sequential(nn.Conv2d(3, 32, 3), nn.ReLU(), nn.Conv2d(32, 64, 3), nn.MaxPool2d(2), nn.AdaptiveAvgPool2d((7, 7)), Flatten(), nn.Linear(7 * 7 * 64, 128), Dropout(), nn.Linear(128, 10) ) model = ModelWrapper(model, nn.CrossEntropyLoss()) test = SimpleDataset() return model, test
def val_on_loader(self, loader, savedir=None): val_data = loader.dataset if self.calibrate: self.calibrator.calibrate(self.calib_set, self.valid_set, batch_size=16, epoch=10, use_cuda=True, double_fit=True) calibrated_model = ModelWrapper(self.calibrator.calibrated_model, None) self.loop.get_probabilities = calibrated_model.predict_on_dataset self.loop.step() self.wrapper.test_on_dataset(val_data, batch_size=self.batch_size, use_cuda=True, average_predictions=20) metrics = self.wrapper.metrics mets = self._format_metrics(metrics, 'test') mets.update({'num_samples': len(self.active_dataset)}) return mets
class ActiveLearning(torch.nn.Module): def __init__(self, exp_dict): super().__init__() self.backbone = models.vgg16( pretrained=exp_dict["imagenet_pretraining"], progress=True) num_ftrs = self.backbone.classifier[-1].in_features self.backbone.classifier[-1] = torch.nn.Linear(num_ftrs, exp_dict["num_classes"]) self.backbone = patch_module(self.backbone) self.initial_weights = deepcopy(self.backbone.state_dict()) self.backbone.cuda() self.batch_size = exp_dict['batch_size'] self.calibrate = exp_dict.get('calibrate', False) self.learning_epoch = exp_dict['learning_epoch'] self.optimizer = torch.optim.SGD(self.backbone.parameters(), lr=exp_dict['lr'], weight_decay=5e-4, momentum=0.9, nesterov=True) self.criterion = CrossEntropyLoss() shuffle_prop = exp_dict.get('shuffle_prop', 0.0) max_sample = -1 self.heuristic = get_heuristic(exp_dict['heuristic'], shuffle_prop=shuffle_prop) self.wrapper = ModelWrapper(self.backbone, criterion=self.criterion) self.wrapper.add_metric( 'cls_report', lambda: ClassificationReport(exp_dict["num_classes"])) self.wrapper.add_metric('accuracy', lambda: Accuracy()) self.loop = ActiveLearningLoop(None, self.wrapper.predict_on_dataset, heuristic=self.heuristic, ndata_to_label=exp_dict['query_size'], batch_size=self.batch_size, iterations=exp_dict['iterations'], use_cuda=True, max_sample=max_sample) self.calib_set = get_dataset('calib', exp_dict['dataset']) self.valid_set = get_dataset('val', exp_dict['dataset']) self.calibrator = DirichletCalibrator( self.wrapper, exp_dict["num_classes"], lr=0.001, reg_factor=exp_dict['reg_factor'], mu=exp_dict['mu']) self.active_dataset = None self.active_dataset_settings = None def train_on_loader(self, loader: DataLoader): self.wrapper.load_state_dict(self.initial_weights) if self.active_dataset is None: self.active_dataset = loader.dataset if self.active_dataset_settings is not None: self.active_dataset.load_state_dict( self.active_dataset_settings) self.loop.dataset = self.active_dataset self.criterion.train() self.wrapper.train_on_dataset(self.active_dataset, self.optimizer, self.batch_size, epoch=self.learning_epoch, use_cuda=True) metrics = self.wrapper.metrics return self._format_metrics(metrics, 'train') def val_on_loader(self, loader, savedir=None): val_data = loader.dataset self.loop.step() self.criterion.eval() self.wrapper.test_on_dataset(val_data, batch_size=self.batch_size, use_cuda=True, average_predictions=20) metrics = self.wrapper.metrics mets = self._format_metrics(metrics, 'test') mets.update({'num_samples': len(self.active_dataset)}) return mets def on_train_end(self, savedir, epoch): h5_path = pjoin(savedir, 'ckpt.h5') labelled = self.active_dataset.state_dict()['labelled'] with h5py.File(h5_path, 'a') as f: if f'epoch_{epoch}' not in f: g = f.create_group(f'epoch_{epoch}') g.create_dataset('labelled', data=labelled.astype(np.bool)) def _format_metrics(self, metrics, step): mets = {k: v.value for k, v in metrics.items() if step in k} mets_unpacked = {} for k, v in mets.items(): if isinstance(v, float): mets_unpacked[k] = v elif isinstance(v, np.ndarray): mets_unpacked[k] = v.mean() else: mets_unpacked.update( {f"{k}_{ki}": np.mean(vi) for ki, vi in v.items()}) return mets_unpacked def get_state_dict(self): state = {} state["model"] = self.backbone.state_dict() state["optimizer"] = self.optimizer.state_dict() if self.active_dataset is None: state['dataset'] = None else: state["dataset"] = self.active_dataset.state_dict() return state def set_state_dict(self, state_dict): self.backbone.load_state_dict(state_dict["model"]) self.optimizer.load_state_dict(state_dict["optimizer"]) self.active_dataset_settings = state_dict["dataset"] if self.active_dataset is not None: self.active_dataset.load_state_dict(self.active_dataset_settings)
def main(): args = parse_args() batch_size = args.batch_size use_cuda = torch.cuda.is_available() hyperparams = vars(args) pprint(hyperparams) active_set, test_set = get_datasets(hyperparams["initial_pool"], hyperparams["data_path"]) # We will use the FocalLoss criterion = FocalLoss(gamma=2, alpha=0.25) # Our model is a simple Unet model = smp.Unet( encoder_name="resnext50_32x4d", encoder_depth=5, encoder_weights="imagenet", decoder_use_batchnorm=False, classes=len(pascal_voc_ids), ) # Add a Dropout layerto use MC-Dropout add_dropout(model, classes=len(pascal_voc_ids), activation=None) # This will enable Dropout at test time. model = MCDropoutModule(model) # Put everything on GPU. if use_cuda: model.cuda() # Make an optimizer optimizer = optim.SGD(model.parameters(), lr=hyperparams["lr"], momentum=0.9, weight_decay=5e-4) # Keep a copy of the original weights initial_weights = deepcopy(model.state_dict()) # Add metrics model = ModelWrapper(model, criterion) model.add_metric("cls_report", lambda: ClassificationReport(len(pascal_voc_ids))) # Which heuristic you want to use? # We will use our custom reduction function. heuristic = get_heuristic(hyperparams["heuristic"], reduction=mean_regions) # The ALLoop is in charge of predicting the uncertainty and loop = ActiveLearningLoop( active_set, model.predict_on_dataset_generator, heuristic=heuristic, query_size=hyperparams["query_size"], # Instead of predicting on the entire pool, only a subset is used max_sample=1000, batch_size=batch_size, iterations=hyperparams["iterations"], use_cuda=use_cuda, ) acc = [] for epoch in tqdm(range(args.al_step)): # Following Gal et al. 2016, we reset the weights. model.load_state_dict(initial_weights) # Train 50 epochs before sampling. model.train_on_dataset(active_set, optimizer, batch_size, hyperparams["learning_epoch"], use_cuda) # Validation! model.test_on_dataset(test_set, batch_size, use_cuda) should_continue = loop.step() metrics = model.metrics val_loss = metrics["test_loss"].value logs = { "val": val_loss, "epoch": epoch, "train": metrics["train_loss"].value, "labeled_data": active_set.labelled, "Next Training set size": len(active_set), "cls_report": metrics["test_cls_report"].value, } pprint(logs) acc.append(logs) if not should_continue: break
def main(): args = parse_args() use_cuda = torch.cuda.is_available() torch.backends.cudnn.benchmark = True random.seed(1337) torch.manual_seed(1337) if not use_cuda: print("warning, the experiments would take ages to run on cpu") hyperparams = vars(args) active_set, test_set = get_datasets(hyperparams["initial_pool"]) heuristic = get_heuristic(hyperparams["heuristic"], hyperparams["shuffle_prop"]) criterion = CrossEntropyLoss() model = vgg16(pretrained=False, num_classes=10) weights = load_state_dict_from_url( "https://download.pytorch.org/models/vgg16-397923af.pth") weights = {k: v for k, v in weights.items() if "classifier.6" not in k} model.load_state_dict(weights, strict=False) # change dropout layer to MCDropout model = patch_module(model) if use_cuda: model.cuda() optimizer = optim.SGD(model.parameters(), lr=hyperparams["lr"], momentum=0.9) # Wraps the model into a usable API. model = ModelWrapper(model, criterion) logs = {} logs["epoch"] = 0 # for prediction we use a smaller batchsize # since it is slower active_loop = ActiveLearningLoop( active_set, model.predict_on_dataset, heuristic, hyperparams.get("query_size", 1), batch_size=10, iterations=hyperparams["iterations"], use_cuda=use_cuda, ) # We will reset the weights at each active learning step. init_weights = deepcopy(model.state_dict()) for epoch in tqdm(range(args.epoch)): # Load the initial weights. model.load_state_dict(init_weights) model.train_on_dataset( active_set, optimizer, hyperparams["batch_size"], hyperparams["learning_epoch"], use_cuda, ) # Validation! model.test_on_dataset(test_set, hyperparams["batch_size"], use_cuda) metrics = model.metrics should_continue = active_loop.step() if not should_continue: break val_loss = metrics["test_loss"].value logs = { "val": val_loss, "epoch": epoch, "train": metrics["train_loss"].value, "labeled_data": active_set.labelled, "Next Training set size": len(active_set), } print(logs)
class DirichletCalibrator(object): """ Adding a linear layer to a classifier model after the model is trained and train this new layer until convergence. Together with the linear layer, the model is now calibrated. Source: https://arxiv.org/abs/1910.12656 Code inspired from: https://github.com/dirichletcal/experiments_neurips References: @article{kullbeyond, title={Beyond temperature scaling: Obtaining well-calibrated multi-class probabilities with Dirichlet calibration Supplementary material}, author={Kull, Meelis and Perello-Nieto, Miquel and K{\"a}ngsepp, Markus and Silva Filho, Telmo and Song, Hao and Flach, Peter} } Args: wrapper (ModelWrapper): Provides training and testing methods. num_classes (int): Number of classes in classification task. lr (float): Learning rate. reg_factor (float): Regularization factor for the linear layer weights. mu (float): Regularization factor for the linear layer biases. If not given, will be initialized by "l". """ def __init__( self, wrapper: ModelWrapper, num_classes: int, lr: float, reg_factor: float, mu: float = None, ): self.num_classes = num_classes self.criterion = nn.CrossEntropyLoss() self.lr = lr self.reg_factor = reg_factor self.mu = mu or reg_factor self.dirichlet_linear = nn.Linear(self.num_classes, self.num_classes) self.model = nn.Sequential(wrapper.model, self.dirichlet_linear) self.wrapper = ModelWrapper(self.model, self.criterion) self.wrapper.add_metric("ece", lambda: ECE()) self.wrapper.add_metric("ece", lambda: ECE_PerCLs(num_classes)) def l2_reg(self): """Using trainable layer's parameters for l2 regularization. Returns: The regularization term for the linear layer. """ weight_p, bias_p = self.dirichlet_linear.parameters() w_l2_factor = weight_p.norm(2) b_l2_factor = bias_p.norm(2) return self.reg_factor * w_l2_factor + self.mu * b_l2_factor def calibrate(self, train_set: Dataset, test_set: Dataset, batch_size: int, epoch: int, use_cuda: bool, double_fit: bool = False, **kwargs): """ Training the linear layer given a training set and a validation set. The training set should be different from what model is trained on. Args: train_set (Dataset): The training set. test_set (Dataset): The validation set. batch_size (int): Batch size used. epoch (int): Number of epochs to train the linear layer for. use_cuda (bool): If "True", will use GPU. double_fit (bool): If "True" would fit twice on the train set. kwargs (dict): Rest of parameters for baal.ModelWrapper.train_and_test_on_dataset(). Returns: loss_history (list[float]): List of loss values for each epoch. model.state_dict (dict): Model weights. """ # reinitialize the dirichlet calibration layer self.dirichlet_linear.weight.data.copy_( torch.eye(self.dirichlet_linear.weight.shape[0])) self.dirichlet_linear.bias.data.copy_( torch.zeros(*self.dirichlet_linear.bias.shape)) if use_cuda: self.dirichlet_linear.cuda() optimizer = Adam(self.dirichlet_linear.parameters(), lr=self.lr) loss_history, weights = self.wrapper.train_and_test_on_datasets( train_set, test_set, optimizer, batch_size, epoch, use_cuda, regularizer=self.l2_reg, return_best_weights=True, patience=None, **kwargs) self.model.load_state_dict(weights) if double_fit: lr = self.lr / 10 optimizer = Adam(self.dirichlet_linear.parameters(), lr=lr) loss_history, weights = self.wrapper.train_and_test_on_datasets( train_set, test_set, optimizer, batch_size, epoch, use_cuda, regularizer=self.l2_reg, return_best_weights=True, patience=None, **kwargs) self.model.load_state_dict(weights) return loss_history, self.model.state_dict() @property def calibrated_model(self): return self.model @property def metrics(self): return self.wrapper.metrics
train_ds, pool_specifics={"transform": test_transform}) al_dataset.label_randomly(200) # Start with 200 items labelled. # Creates an MLP to classify MNIST model = nn.Sequential( nn.Flatten(), nn.Linear(784, 512), nn.Dropout(), nn.Linear(512, 512), nn.Dropout(), nn.Linear(512, 10), ) model = patch_module(model) # Set dropout layers for MC-Dropout. if use_cuda: model = model.cuda() wrapper = ModelWrapper(model=model, criterion=nn.CrossEntropyLoss()) optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4) # We will use BALD as our heuristic as it is a great tradeoff between performance and efficiency. bald = BALD() # Setup our active learning loop for our experiments al_loop = ActiveLearningLoop( dataset=al_dataset, get_probabilities=wrapper.predict_on_dataset, heuristic=bald, query_size=100, # We will label 100 examples per step. # KWARGS for predict_on_dataset iterations=20, # 20 sampling for MC-Dropout
# You might need to add the path to the list of python system paths. al_dataset = ActiveLearningDataset(FeatureDataset("./data/CASP.csv")) test_ds = FeatureDataset("./data/CASP.csv", split="test") al_dataset.label_randomly(1000) # Start with 1000 items labelled. # Creates an MLP to classify MNIST model = nn.Sequential(nn.Flatten(), nn.Linear(9, 16), nn.Dropout(), nn.Linear(16, 8), nn.ReLU(), nn.Linear(8, 1)) model = patch_module(model) # Set dropout layers for MC-Dropout. model.apply(weight_init_normal) if use_cuda: model = model.cuda() wrapper = ModelWrapper(model=model, criterion=nn.L1Loss()) optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4) # We will use Variance as our heuristic for regression problems. variance = Variance() # Setup our active learning loop for our experiments al_loop = ActiveLearningLoop( dataset=al_dataset, get_probabilities=wrapper.predict_on_dataset, heuristic=variance, query_size=250, # We will label 20 examples per step. # KWARGS for predict_on_dataset