def training_deconvolution(self): """Train the model using reconstructed loss on half an image. :return list of performance estimators that observed performance on the last epoch run. """ header_written = False lr_train_helper = LearningRateHelper(scheduler=self.scheduler_train, learning_rate_name="train_lr") previous_test_perfs = None perfs = PerformanceList() print("Training with unsupervised set..") for epoch in range(self.start_epoch, self.start_epoch + self.args.num_epochs): perfs = PerformanceList() perfs += self.train_unsup_only(epoch) perfs += [lr_train_helper] if previous_test_perfs is None or self.epoch_is_test_epoch(epoch): perfs += self.test(epoch) if (not header_written): header_written = True self.log_performance_header(perfs) early_stop, perfs = self.log_performance_metrics(epoch, perfs) if early_stop: # early stopping requested. return perfs return perfs
def train_with_reconstructed_half(self): """Train the model using two half images: one half original from training set, the other reconstructed with encoder/generator trained on unsup set. """ header_written = False self.optimizer = None optimizer_training=torch.optim.Adam(self.net.parameters(), lr=self.args.lr, betas=(0.5, 0.999), weight_decay=self.args.L2) lr_train_helper = LearningRateHelper(scheduler=self.scheduler_train, learning_rate_name="train_lr") previous_test_perfs = None perfs = PerformanceList() print("Training with unsupervised half..") for epoch in range(self.start_epoch, self.start_epoch + self.args.num_epochs): perfs = PerformanceList() perfs += self.train_with_two_halves(epoch, optimizer_training) perfs += [lr_train_helper] if previous_test_perfs is None or self.epoch_is_test_epoch(epoch): perfs += self.test_acc(epoch) if (not header_written): header_written = True self.log_performance_header(perfs) early_stop, perfs = self.log_performance_metrics(epoch, perfs) if early_stop: # early stopping requested. return perfs return perfs
def test_optimize_with_train_model_two_passes(self): test_problem = TestProblem() model_trainer = TrainModel(DummyArgs(ureg=True, lr=0.001, shave_lr=0.01), problem=test_problem, use_cuda=False) model_trainer.init_model( create_model_function=lambda name: torch.nn.Sequential( torch.nn.Linear(2, 2), torch.nn.Linear(2, 1))) model_trainer.ureg.set_num_examples(100, 100) for epoch in range(0, 100): estimators = PerformanceList() estimators += [LossHelper("train_loss")] estimators += [LossHelper("reg_loss")] model_trainer.train(epoch=epoch, performance_estimators=estimators, train_supervised_model=True, train_ureg=True, regularize=False) model_trainer.regularize(epoch=epoch) print("train_loss {:3f} ureg loss={:3f}".format( estimators.get_metric("train_loss"), estimators.get_metric("reg_loss"))) test_inputs = test_problem.test_loader() eps = 0.001 print("\n") for (index, (input, true_target)) in enumerate(test_problem.test_loader()): input = Variable(input) result = model_trainer.net(input) print( "test_inputs: ({:.3f}, {:.3f}) predicted target: {:.3f} true target: {:.1f} " .format(input.data[0, 0], input.data[0, 1], result.data[0, 0], true_target[0, 0])) sys.stdout.flush() for (index, (input, true_target)) in enumerate(test_problem.test_loader()): input = Variable(input) result = model_trainer.net(input) if abs(input.data[0, 1] - 0.6) < eps: self.assertTrue( result.data[0, 0] > 0.8, msg= "probability must be larger than 0.9 on true signal when ureg is enabled" ) if abs(input.data[0, 0] - 0.45) < eps and abs(input.data[0, 1] - 0.4) < eps: self.assertTrue( result.data[0, 0] < 0.6, msg= "probability must be larger than 0.4 on biased signal when ureg is enabled" )
def train(self, epoch, confusion_data): args = self.args optimizer = self.optimizer problem = self.problem performance_estimators = PerformanceList() performance_estimators += [FloatHelper("train_loss")] for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() self.model.train() shuffle(confusion_data) max = min(args.max_training, len(confusion_data)) confusion_data = confusion_data[0:max] for batch_idx, confusion_list in enumerate(batch(confusion_data, args.mini_batch_size)): batch_size = min(len(confusion_list), args.mini_batch_size) images = [None] * batch_size targets = torch.zeros(batch_size) optimizer.zero_grad() training_loss_input = torch.zeros(batch_size, 1) trained_with_input = torch.zeros(batch_size, 1) for index, confusion in enumerate(confusion_list): num_classes = problem.num_classes() targets[index] = class_label(num_classes, confusion.predicted_label, confusion.true_label) dataset = problem.train_set() if confusion.trained_with else problem.test_set() images[index], _ = dataset[confusion.example_index] training_loss_input[index] = confusion.train_loss trained_with_input[index] = 1.0 if confusion.trained_with else 0.0 image_input = Variable(torch.stack(images, dim=0), requires_grad=True) training_loss_input = Variable(training_loss_input, requires_grad=True) trained_with_input = Variable(trained_with_input, requires_grad=True) targets = Variable(targets, requires_grad=False).type(torch.LongTensor) if self.use_cuda: image_input = image_input.cuda() training_loss_input = training_loss_input.cuda() trained_with_input = trained_with_input.cuda() targets = targets.cuda() outputs = self.model(training_loss_input, trained_with_input, image_input) loss = self.criterion(outputs, targets) loss.backward() optimizer.step() performance_estimators.set_metric(batch_idx, "train_loss", loss.data[0]) if args.progress_bar: progress_bar(batch_idx * batch_size, len(confusion_data), " ".join([performance_estimator.progress_message() for performance_estimator in performance_estimators])) return performance_estimators
def training_mixup(self): """Train the model with unsupervised mixup. Returns the performance obtained at the end of the configured training run. :return list of performance estimators that observed performance on the last epoch run. """ header_written = False lr_train_helper = LearningRateHelper(scheduler=self.scheduler_train, learning_rate_name="train_lr") previous_test_perfs = None perfs = PerformanceList() train_loss = None test_loss = None for epoch in range(self.start_epoch, self.start_epoch + self.args.num_epochs): perfs = PerformanceList() perfs += self.train_mixup(epoch, train_supervised_model=True, alpha=self.args.alpha, ratio_unsup=self.args.unsup_proportion ) if self.args.unsup_proportion > 1: self.args.unsup_proportion = 1 self.args.alpha *= 1. / self.args.increase_decrease if self.args.unsup_proportion < 1E-5: self.args.unsup_proportion = 0 self.args.alpha *= 1. / self.args.increase_decrease if self.args.alpha < 0: self.args.alpha = 0 if self.args.alpha > 1: self.args.alpha = 1 perfs += [lr_train_helper] if previous_test_perfs is None or self.epoch_is_test_epoch(epoch): perfs += self.test(epoch) if (not header_written): header_written = True self.log_performance_header(perfs) early_stop, perfs = self.log_performance_metrics(epoch, perfs) if early_stop: # early stopping requested. return perfs return perfs
def test_acc(self, epoch, performance_estimators=None): print('\nTesting, epoch: %d' % epoch) if performance_estimators is None: performance_estimators = PerformanceList() performance_estimators += [LossHelper("test_loss"), AccuracyHelper("test_")] self.net.eval() for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() cm = ConfusionMeter(self.problem.num_classes(), normalized=False) for batch_idx, (inputs, targets) in enumerate(self.problem.test_loader_range(0, self.args.num_validation)): if self.use_cuda: inputs, targets = inputs.cuda(), targets.cuda() image1, image2 = half_images(inputs, slope=get_random_slope(), cuda=self.use_cuda) encoded = self.image_encoder(image1) unsup_image = self.image_generator(encoded) if self.args.mode == "separate": inputs, targets = Variable(inputs, volatile=True), Variable(targets, volatile=True) outputs = self.net(inputs) elif self.args.mode == "average": inputs = (inputs + unsup_image.data) / 2 inputs, targets = Variable(inputs), Variable(targets, requires_grad=False) outputs = self.net(inputs) elif self.args.mode=="uonly": inputs = unsup_image.data inputs, targets = Variable(inputs), Variable(targets, requires_grad=False) outputs = self.net(inputs) loss = self.criterion(outputs, targets) # accumulate the confusion matrix: _, predicted = torch.max(outputs.data, 1) cm.add(predicted=predicted, target=targets.data) performance_estimators.set_metric_with_outputs(batch_idx, "test_loss", loss.data[0], outputs, targets) performance_estimators.set_metric_with_outputs(batch_idx, "test_accuracy", loss.data[0], outputs, targets) progress_bar(batch_idx * self.mini_batch_size, self.max_validation_examples, performance_estimators.progress_message(["test_loss", "test_accuracy"])) if ((batch_idx + 1) * self.mini_batch_size) > self.max_validation_examples: break # print() # Apply learning rate schedule: test_accuracy = performance_estimators.get_metric("test_accuracy") assert test_accuracy is not None, "test_accuracy must be found among estimated performance metrics" if not self.args.constant_learning_rates: self.scheduler_train.step(test_accuracy, epoch) self.confusion_matrix = cm.value().transpose() return performance_estimators
def test(self, epoch, performance_estimators=None): print('\nTesting, epoch: %d' % epoch) if performance_estimators is None: performance_estimators = PerformanceList() performance_estimators += [LossHelper("test_loss"), AccuracyHelper("test_")] self.net.eval() for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() cm = ConfusionMeter(self.problem.num_classes(), normalized=False) for batch_idx, (inputs, targets) in enumerate(self.problem.test_loader_range(0, self.args.num_validation)): if self.use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = Variable(inputs, volatile=True), Variable(targets, volatile=True) if not hasattr(self.net, 'is_dual'): outputs = self.net(inputs) else: outputs, _, _ =self.net(inputs,None) if self.args.mode=="capsules": one_hot_targets=Variable(self.problem.one_hot(targets.data),volatile=True) loss, capsule_loss, _ =self.net.loss(inputs, outputs, one_hot_targets) # ||vc|| also known as norm: v_c = torch.sqrt((outputs**2).sum(dim=2, keepdim=True)) outputs=v_c.view(v_c.size()[0],-1) # recover index of predicted class: #_, outputs =torch.max(v_c.view(10,-1),dim=0) else: loss = self.criterion(outputs, targets) # accumulate the confusion matrix: _, predicted = torch.max(outputs.data, 1) cm.add(predicted=predicted, target=targets.data) performance_estimators.set_metric_with_outputs(batch_idx, "test_loss", loss.data[0], outputs, targets) performance_estimators.set_metric_with_outputs(batch_idx, "test_accuracy", loss.data[0], outputs, targets) progress_bar(batch_idx * self.mini_batch_size, self.max_validation_examples, performance_estimators.progress_message(["test_loss", "test_accuracy"])) if ((batch_idx + 1) * self.mini_batch_size) > self.max_validation_examples: break # print() # Apply learning rate schedule: test_accuracy = performance_estimators.get_metric("test_accuracy") assert test_accuracy is not None, "test_accuracy must be found among estimated performance metrics" if not self.args.constant_learning_rates: self.scheduler_train.step(test_accuracy, epoch) self.confusion_matrix = cm.value().transpose() return performance_estimators
def training_fm_loss(self): """Train the model with unsupervised mixup. Returns the performance obtained at the end of the configured training run. :return list of performance estimators that observed performance on the last epoch run. """ header_written = False loss_estimator=LossEstimator_sim # replace the loss function of the dual model: def set_loss(x): if hasattr(x, 'loss_estimator'): x.loss_estimator = loss_estimator self.net.apply(set_loss) self.optimizer_training = torch.optim.SGD(self.net.parameters(), lr=self.args.lr, momentum=self.args.momentum, weight_decay=self.args.L2) lr_train_helper = LearningRateHelper(scheduler=self.scheduler_train, learning_rate_name="train_lr") previous_test_perfs = None perfs = PerformanceList() train_loss = None test_loss = None for epoch in range(self.start_epoch, self.start_epoch + self.args.num_epochs): perfs = PerformanceList() perfs += self.train_with_fm_loss(epoch, self.args.gamma) perfs += [lr_train_helper] if previous_test_perfs is None or self.epoch_is_test_epoch(epoch): perfs += self.test(epoch) if (not header_written): header_written = True self.log_performance_header(perfs) early_stop, perfs = self.log_performance_metrics(epoch, perfs) if early_stop: # early stopping requested. return perfs return perfs
def training_supervised(self): """Train the model in a completely supervised manner. Returns the performance obtained at the end of the configured training run. :return list of performance estimators that observed performance on the last epoch run. """ header_written = False lr_train_helper = LearningRateHelper(scheduler=self.scheduler_train, learning_rate_name="train_lr") previous_test_perfs = None perfs = PerformanceList() if self.args.mode=="capsules": self.optimizer_training = Adam(self.net.parameters(), lr=self.args.lr, betas=(0.5, 0.999)) for epoch in range(self.start_epoch, self.start_epoch + self.args.num_epochs): perfs = PerformanceList() if self.args.mode=="capsules": perfs += self.train_capsules(epoch) else: perfs += self.train(epoch, train_supervised_model=True) perfs += [lr_train_helper] if previous_test_perfs is None or self.epoch_is_test_epoch(epoch): perfs += self.test(epoch) if (not header_written): header_written = True self.log_performance_header(perfs) early_stop, perfs = self.log_performance_metrics(epoch, perfs) if early_stop: # early stopping requested. return perfs return perfs
def test(self, epoch, performance_estimators=None): criterion = MSELoss() print('\nTesting, epoch: %d' % epoch) self.net.eval() self.image_generator.eval() self.image_encoder.eval() if performance_estimators is None: performance_estimators = PerformanceList() performance_estimators += [LossHelper("test_loss"), AccuracyHelper("test_")] for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() # we used unsup set to train, use training to validate: for batch_idx, (inputs, _) in enumerate(self.problem.train_loader_subset(range(0, self.args.num_validation))): if self.use_cuda: inputs = inputs.cuda() image1, image2= half_images(inputs, slope=get_random_slope(), cuda=self.use_cuda) # train the discriminator/generator pair on the first half of the image: encoded = self.image_encoder(image1) output = self.image_generator(encoded) if batch_idx == 0: self.save_images(epoch, image1, image2, generated_image2=output) full_image = Variable(inputs, requires_grad=False) loss = criterion(output, full_image) performance_estimators.set_metric(batch_idx, "test_loss", loss.data[0]) progress_bar(batch_idx * self.mini_batch_size, self.max_validation_examples, performance_estimators.progress_message(["test_loss"])) if ((batch_idx + 1) * self.mini_batch_size) > self.max_validation_examples: break # print() # Apply learning rate schedule: test_loss = performance_estimators.get_metric("test_loss") assert test_loss is not None, "test_loss must be found among estimated performance metrics" if not self.args.constant_learning_rates: self.scheduler_train.step(test_loss, epoch) return performance_estimators
def train_capsules(self, epoch, performance_estimators=None, train_supervised_model=True, ): if performance_estimators is None: performance_estimators = PerformanceList() performance_estimators += [LossHelper("optimized_loss")] performance_estimators += [LossHelper("capsule_loss")] performance_estimators += [LossHelper("reconstruction_loss")] performance_estimators += [AccuracyHelper("train_")] performance_estimators += [FloatHelper("train_grad_norm")] #performance_estimators += [FloatHelper("reconstruct_grad_norm")] print('\nTraining, epoch: %d' % epoch) self.net.train() supervised_grad_norm = 1. for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() unsupervised_loss_acc = 0 num_batches = 0 train_loader_subset = self.problem.train_loader_subset_range(0, self.args.num_training) for batch_idx, (inputs, targets) in enumerate(train_loader_subset): num_batches += 1 if self.use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = Variable(inputs,requires_grad=True), Variable(targets, requires_grad=False) # outputs used to calculate the loss of the supervised model # must be done with the model prior to regularization: self.net.train() self.net.zero_grad() self.optimizer_training.zero_grad() outputs = self.net(inputs) one_hot_targets = Variable(self.problem.one_hot(targets.data), requires_grad=False) if self.published_reconstruction_loss: (optimized_loss, capsule_loss, reconstruction_loss) = self.net.loss(inputs, outputs, one_hot_targets) optimized_loss.backward() self.optimizer_training.step() reconstruct_grad_norm=0 else: margin_loss = self.net.margin_loss(outputs, one_hot_targets) margin_loss = margin_loss.mean() margin_loss.backward(retain_graph=True) #reconstruct_grad_norm = grad_norm(inputs.grad) reconstruction_loss = self.net.focused_reconstruction_loss(inputs, inputs.grad, outputs, one_hot_targets) reconstruction_loss.backward() self.optimizer_training.step() optimized_loss=margin_loss+reconstruction_loss capsule_loss=margin_loss supervised_grad_norm = grad_norm(self.net.decoder.parameters()) performance_estimators.set_metric(batch_idx, "train_grad_norm", supervised_grad_norm) #performance_estimators.set_metric(batch_idx, "reconstruct_grad_norm", reconstruct_grad_norm) performance_estimators.set_metric(batch_idx, "optimized_loss", optimized_loss.data[0]) performance_estimators.set_metric(batch_idx,"reconstruction_loss", reconstruction_loss.data[0]) performance_estimators.set_metric(batch_idx,"capsule_loss", capsule_loss.data[0]) progress_bar(batch_idx * self.mini_batch_size, self.max_training_examples, performance_estimators.progress_message(["optimized_loss","capsule_loss","reconstruction_loss"])) if (batch_idx + 1) * self.mini_batch_size > self.max_training_examples: break return performance_estimators
def train_with_fm_loss(self, epoch, gamma=1E-5, performance_estimators=None): if performance_estimators is None: performance_estimators = PerformanceList() performance_estimators += [LossHelper("optimized_loss")] performance_estimators += [LossHelper("train_loss")] performance_estimators += [FloatHelper("fm_loss")] performance_estimators += [AccuracyHelper("train_")] performance_estimators += [FloatHelper("train_grad_norm")] print('\nTraining, epoch: %d' % epoch) self.net.train() supervised_grad_norm = 1. for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() unsupervised_loss_acc = 0 num_batches = 0 train_loader_subset = self.problem.train_loader_subset_range(0, self.args.num_training) #sec_train_loader_subset = self.problem.train_loader_subset_range(0, self.args.num_training) unsuploader_shuffled = self.problem.reg_loader_subset_range(0, self.args.num_shaving) unsupiter = itertools.cycle(unsuploader_shuffled) for batch_idx, ((inputs, targets), (uinputs, _)) in enumerate(zip(train_loader_subset , unsupiter)): num_batches += 1 if self.use_cuda: inputs = inputs.cuda() uinputs = uinputs.cuda() targets = targets.cuda() # outputs used to calculate the loss of the supervised model # must be done with the model prior to regularization: self.net.train() self.net.zero_grad() self.optimizer_training.zero_grad() inputs, targets, uinputs = Variable(inputs), Variable(targets, requires_grad=False), Variable(uinputs, requires_grad=True) if self.use_cuda: inputs, targets, uinputs=inputs.cuda(),targets.cuda(), uinputs.cuda() outputs, outputu, fm_loss = self.net(inputs,uinputs) supervised_loss = self.criterion(outputs, targets) optimized_loss = supervised_loss+gamma*fm_loss optimized_loss.backward() self.optimizer_training.step() supervised_grad_norm = grad_norm(self.net.parameters()) performance_estimators.set_metric(batch_idx, "train_grad_norm", supervised_grad_norm) performance_estimators.set_metric(batch_idx, "optimized_loss", optimized_loss.data[0]) performance_estimators.set_metric(batch_idx, "fm_loss", fm_loss.data[0]) performance_estimators.set_metric_with_outputs(batch_idx, "train_loss", supervised_loss.data[0], outputs, targets) performance_estimators.set_metric_with_outputs(batch_idx, "train_accuracy", supervised_loss.data[0], outputs, targets) # performance_estimators.set_metric_with_outputs(batch_idx, "train_accuracy", supervised_loss.data[0], # outputs, targets) progress_bar(batch_idx * self.mini_batch_size, min(self.max_regularization_examples, self.max_training_examples), performance_estimators.progress_message(["optimized_loss","train_loss","train_accuracy"])) if (batch_idx + 1) * self.mini_batch_size > self.max_training_examples: break return performance_estimators
def train_mixup(self, epoch, performance_estimators=None, train_supervised_model=True, alpha=0.5, ratio_unsup=0, ): if performance_estimators is None: performance_estimators = PerformanceList() performance_estimators += [LossHelper("optimized_loss")] performance_estimators += [LossHelper("train_loss")] # performance_estimators += [AccuracyHelper("train_")] performance_estimators += [FloatHelper("train_grad_norm")] performance_estimators += [FloatHelper("alpha")] performance_estimators += [FloatHelper("unsup_proportion")] print('\nTraining, epoch: %d' % epoch) self.net.train() supervised_grad_norm = 1. for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() unsupervised_loss_acc = 0 num_batches = 0 train_loader_subset = self.problem.train_loader_subset_range(0, self.args.num_training) sec_train_loader_subset = self.problem.train_loader_subset_range(0, self.args.num_training) unsuploader_shuffled = self.problem.reg_loader_subset_range(0, self.args.num_shaving) unsupiter = itertools.cycle(unsuploader_shuffled) performance_estimators.set_metric(epoch, "alpha", alpha) performance_estimators.set_metric(epoch, "unsup_proportion", ratio_unsup) for batch_idx, ((inputs1, targets1), (inputs2, targets2), (uinputs1, _)) in enumerate(zip(train_loader_subset, sec_train_loader_subset, unsupiter)): num_batches += 1 use_unsup = random() < ratio_unsup if use_unsup: # use an example from the unsupervised set to mixup with inputs1: inputs2 = uinputs1 if self.use_cuda: inputs1 = inputs1.cuda() inputs2 = inputs2.cuda() inputs, targets = self.mixup_inputs_targets(alpha, inputs1, inputs2, targets1, targets2) # outputs used to calculate the loss of the supervised model # must be done with the model prior to regularization: self.net.train() self.net.zero_grad() self.optimizer_training.zero_grad() outputs = self.net(inputs) if train_supervised_model: supervised_loss = self.criterion_multi_label(outputs, targets) optimized_loss = supervised_loss optimized_loss.backward() self.optimizer_training.step() supervised_grad_norm = grad_norm(self.net.parameters()) performance_estimators.set_metric(batch_idx, "train_grad_norm", supervised_grad_norm) performance_estimators.set_metric(batch_idx, "optimized_loss", optimized_loss.data[0]) performance_estimators.set_metric_with_outputs(batch_idx, "train_loss", supervised_loss.data[0], outputs, targets) # performance_estimators.set_metric_with_outputs(batch_idx, "train_accuracy", supervised_loss.data[0], # outputs, targets) progress_bar(batch_idx * self.mini_batch_size, min(self.max_regularization_examples, self.max_training_examples), performance_estimators.progress_message(["train_loss","train_accuracy"])) if (batch_idx + 1) * self.mini_batch_size > self.max_training_examples: break return performance_estimators
def train(self, epoch, performance_estimators=None, train_supervised_model=True, ): if performance_estimators is None: performance_estimators = PerformanceList() performance_estimators += [LossHelper("optimized_loss")] performance_estimators += [LossHelper("train_loss")] performance_estimators += [AccuracyHelper("train_")] performance_estimators += [FloatHelper("train_grad_norm")] print('\nTraining, epoch: %d' % epoch) self.net.train() supervised_grad_norm = 1. for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() unsupervised_loss_acc = 0 num_batches = 0 train_loader_subset = self.problem.train_loader_subset_range(0, self.args.num_training) for batch_idx, (inputs, targets) in enumerate(train_loader_subset): num_batches += 1 if self.use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = Variable(inputs), Variable(targets, requires_grad=False) # outputs used to calculate the loss of the supervised model # must be done with the model prior to regularization: self.net.train() self.optimizer_training.zero_grad() outputs = self.net(inputs) if train_supervised_model and self.args.mode=="supervised": # if self.ureg._which_one_model is not None: # self.ureg.estimate_example_weights(inputs) supervised_loss = self.criterion(outputs, targets) optimized_loss = supervised_loss optimized_loss.backward() self.optimizer_training.step() performance_estimators.set_metric_with_outputs(batch_idx, "train_accuracy", supervised_loss.data[0], outputs, targets) performance_estimators.set_metric_with_outputs(batch_idx, "train_loss", supervised_loss.data[0], outputs, targets) elif self.args.mode=="capsules": one_hot_targets= Variable(self.problem.one_hot(targets.data),requires_grad=False) (optimized_loss, capsule_loss, reconstruction_loss) = self.net.loss(inputs, outputs, one_hot_targets) optimized_loss.backward() self.optimizer_training.step() supervised_grad_norm = grad_norm(self.net.parameters()) performance_estimators.set_metric(batch_idx, "train_grad_norm", supervised_grad_norm) performance_estimators.set_metric_with_outputs(batch_idx, "optimized_loss", optimized_loss.data[0], outputs, targets) progress_bar(batch_idx * self.mini_batch_size, self.max_training_examples, " ".join([performance_estimator.progress_message() for performance_estimator in performance_estimators])) if (batch_idx + 1) * self.mini_batch_size > self.max_training_examples: break return performance_estimators
print("Loaded {} lines of confusion data".format(len(confusion_data))) print("Loading pre-trained image model from {}".format(args.checkpoint_key)) image_model = TrainModelSplit(args, problem, use_cuda).load_checkpoint() helper = ConfusionTrainingHelper(image_model, problem, args, use_cuda) random.shuffle(confusion_data) train_split = confusion_data[0:int(len(confusion_data) * 2 / 3)] test_split = confusion_data[int(len(confusion_data) / 3):len(confusion_data)] best_loss = sys.maxsize no_improvement = 0 distinct_training_losses = set([cd.train_loss for cd in confusion_data]) distinct_validation_losses = set([cd.val_loss for cd in confusion_data]) for epoch in range(0, args.num_epochs): perfs = PerformanceList() perfs += [helper.train(epoch, train_split)] perfs += [helper.test(epoch, test_split)] train_loss = perfs.get_metric("train_loss") test_loss = perfs.get_metric("test_loss") print("epoch {} train_loss={} test_loss={}".format(epoch, train_loss, test_loss)) if test_loss < best_loss: best_loss = test_loss helper.save_confusion_model(epoch, test_loss, distinct_training_losses, distinct_validation_losses) no_improvement = 0 else: no_improvement += 1 if no_improvement > 20:
def training_supervised(self, unsup_only=False): """Train the model in a completely supervised manner. Returns the performance obtained at the end of the configured training run. :param unsup_only Set to true to train with dreamed-up labels on the unsupervised examples only. :return list of performance estimators that observed performance on the last epoch run. """ header_written = False lr_train_helper = LearningRateHelper(scheduler=self.scheduler_train, learning_rate_name="train_lr") previous_test_perfs = None perfs = PerformanceList() best_test_loss = sys.maxsize num_rollbacks = 0 epochs_since_rollback = 0 if unsup_only: assert self.best_model is not None, "best model cannot be None to continue training with unsup only." # scan the unsupervised set to calculate labels using the previously trained best model: print("Calculating labels for unsupervised set..") unsup_index_to_labels = {} unsup_set_loader = self.problem.loader_for_dataset( self.problem.unsup_set()) for batch_idx, (inputs, _) in enumerate(unsup_set_loader): if self.use_cuda: inputs = inputs.cuda() inputs = Variable(inputs, volatile=True) predicted = self.best_model(inputs) if predicted.size()[1] == self.problem.num_classes(): # need to take the argmax to find the index of predicted class. _, predicted = torch.max(predicted.data, 1) predicted = predicted.type( torch.cuda.LongTensor ) if self.use_cuda else predicted.type(torch.LongTensor) select = torch.index_select(self.best_model_confusion_matrix, dim=0, index=predicted) select = select.type( torch.cuda.FloatTensor) if self.use_cuda else select.type( torch.FloatTensor) confusion_labels = torch.renorm(select, p=1, dim=1, maxnorm=1) start_of_range = batch_idx * self.problem.mini_batch_size() for example_index in range( start_of_range, start_of_range + self.problem.mini_batch_size()): label_for_example = confusion_labels[example_index - start_of_range] unsup_index_to_labels[example_index] = label_for_example progress_bar( batch_idx, self.args.num_shaving / self.problem.mini_batch_size()) if batch_idx * self.problem.mini_batch_size( ) > self.args.num_shaving: break self.net = self.best_model print("Training with unsupervised set..") for epoch in range(self.start_epoch, self.start_epoch + self.args.num_epochs): perfs = PerformanceList() perfs += self.train_unsup_only(epoch, unsup_index_to_label=unsup_index_to_labels) if unsup_only else \ self.train(epoch, train_supervised_model=True) perfs += [lr_train_helper] if previous_test_perfs is None or self.epoch_is_test_epoch(epoch): perfs += self.test(epoch) test_loss = perfs.get_metric("test_loss") if (not header_written): header_written = True self.log_performance_header(perfs) early_stop, perfs = self.log_performance_metrics(epoch, perfs) if early_stop: # early stopping requested. return perfs if self.args.rollback_when_worse and epochs_since_rollback > 5 and test_loss > best_test_loss: self.net = self.load_checkpoint() if self.use_cuda: self.net.cuda() print("Rolled-back") num_rollbacks += 1 epochs_since_rollback = 0 else: best_test_loss = test_loss epochs_since_rollback += 1 print("best test loss={} rolled-back {} times.".format( best_test_loss, num_rollbacks)) return perfs
def train_unsup_only(self, epoch, unsup_index_to_label, performance_estimators=None): """ Continue training a model on the unsupervised set with labels. :param epoch: :param unsup_index_to_label: map from index of the unsupervised example to label (in one hot encoding format, one element per class) :param performance_estimators: :param train_supervised_model: :return: """ if performance_estimators is None: performance_estimators = PerformanceList() performance_estimators += [LossHelper("optimized_loss")] performance_estimators += [LossHelper("train_loss")] performance_estimators += [FloatHelper("train_grad_norm")] # reset the model before training: #init_params(self.net) print('\nTraining, epoch: %d' % epoch) self.net.train() train_supervised_model = True for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() num_batches = 0 training_dataset = SubsetDataset( self.problem.unsup_set(), range(0, self.args.num_shaving), get_label=lambda index: unsup_index_to_label[index]) length = len(training_dataset) train_loader_subset = torch.utils.data.DataLoader( training_dataset, batch_size=self.problem.mini_batch_size(), shuffle=False, num_workers=0) self.optimizer_training = torch.optim.SGD(self.net.parameters(), lr=self.args.lr, momentum=self.args.momentum, weight_decay=self.args.L2) # we use binary cross-entropy for single label with smoothing. self.net.train() criterion = BCELoss() for batch_idx, (inputs, targets) in enumerate(train_loader_subset): num_batches += 1 if self.use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = Variable(inputs), Variable(targets, requires_grad=False) # outputs used to calculate the loss of the supervised model # must be done with the model prior to regularization: self.optimizer_training.zero_grad() outputs = self.net(inputs) # renormalize outputs by example, from multi-label to single label prediction:: outputs = torch.renorm(torch.exp(outputs), p=1, maxnorm=1, dim=1) supervised_loss = criterion(outputs, targets) optimized_loss = supervised_loss optimized_loss.backward() self.optimizer_training.step() supervised_grad_norm = grad_norm(self.net.parameters()) performance_estimators.set_metric(batch_idx, "train_grad_norm", supervised_grad_norm) performance_estimators.set_metric_with_outputs( batch_idx, "optimized_loss", optimized_loss.data[0], outputs, targets) performance_estimators.set_metric_with_outputs( batch_idx, "train_loss", supervised_loss.data[0], outputs, targets) progress_bar( batch_idx * self.mini_batch_size, length, performance_estimators.progress_message( ["train_loss", "train_accuracy"])) return performance_estimators
def regularize(self, epoch, performance_estimators=None, previous_ureg_loss=1.0, previous_training_loss=1.0): """ Performs training vs test regularization/shaving phase. :param epoch: :param performance_estimators: estimators for performance metrics to collect. :return: """ print('\nRegularizing, epoch: %d' % epoch) self.net.train() if performance_estimators is None: performance_estimators=PerformanceList() performance_estimators.append(FloatHelper("reg_grad_norm")) performance_estimators.append(LossHelper("reg_loss")) performance_estimators.append(FloatHelper("ureg_alpha")) trainiter = iter(self.trainloader) train_examples_used = 0 use_max_shaving_records = self.args.num_shaving # make sure we process the entire training set, but limit how many regularization_examples we scan (randomly # from the entire set): if self.max_examples_per_epoch > self.args.num_training: max_loop_index = min(self.max_examples_per_epoch, self.max_regularization_examples) else: max_loop_index = self.args.num_training performance_estimators.init_performance_metrics() unsuper_records_to_be_seen = min(max_loop_index, use_max_shaving_records) # max_loop_index is the number of times training examples are seen, # use_max_shaving_records is the number of times unsupervised examples are seen, # estimate weights: a = unsuper_records_to_be_seen / max_loop_index b = 1 weight_s = a / (a + b) weight_u = 1 / (a + b) print("weight_s={} weight_u={} unsuper_records_to_be_seen={} max_loop_index={}".format( weight_s, weight_u, unsuper_records_to_be_seen, max_loop_index)) for shaving_index in range(self.num_shaving_epochs): print("Shaving step {}".format(shaving_index)) # produce a random subset of the unsupervised samples, exactly matching the number of training examples: unsupsampler = self.problem.reg_loader_subset_range(0, use_max_shaving_records) performance_estimators.init_performance_metrics() performance_estimators.set_metric(1, "ureg_alpha",self.ureg._alpha) for batch_idx, (inputs, targets) in enumerate(unsupsampler): if self.use_cuda: inputs = inputs.cuda() self.optimizer_reg.zero_grad() uinputs = Variable(inputs) # don't use more training examples than allowed (-n) even if we don't use # their labels: if train_examples_used > self.args.num_training: trainiter = iter(self.trainloader) train_examples_used = 0 try: # first, read a minibatch from the unsupervised dataset: features, _ = next(trainiter) except StopIteration: trainiter = iter(self.trainloader) features, _ = next(trainiter) train_examples_used += 1 if self.use_cuda: features = features.cuda() # then use it to calculate the unsupervised regularization contribution to the loss: inputs = Variable(features) regularization_loss = self.estimate_regularization_loss(inputs, uinputs, weight_s=weight_s, weight_u=weight_u) if regularization_loss is not None: regularization_loss = regularization_loss * self.args.ureg_alpha reg_grad_norm = grad_norm(self.net.parameters()) performance_estimators.set_metric(batch_idx, "reg_grad_norm", reg_grad_norm) optimized_loss = regularization_loss.data[0] performance_estimators.set_metric(batch_idx, "reg_loss", optimized_loss) regularization_loss.backward() self.optimizer_reg.step() else: print("Found None in regularize") optimized_loss = 0 performance_estimators[0].observe_performance_metric(batch_idx, optimized_loss, inputs, uinputs) # keep training the ureg model while regularizing, this is needed to keep ureg relevant to the # regularized weights: self.ureg.train_ureg(inputs, uinputs) progress_bar(batch_idx * self.mini_batch_size, max_loop_index, " ".join([performance_estimator.progress_message() for performance_estimator in performance_estimators])) if ((batch_idx + 1) * self.mini_batch_size) > max_loop_index: break print() return performance_estimators
def train_with_two_halves(self, epoch, optimizer_training, performance_estimators=None, train_supervised_model=True, ): if performance_estimators is None: performance_estimators = PerformanceList() performance_estimators += [LossHelper("supervised_loss")] if self.args.mode == "separate": performance_estimators += [LossHelper("unsup_loss")] performance_estimators += [AccuracyHelper("train_")] performance_estimators += [FloatHelper("supervised_grad_norm")] if self.args.mode == "separate": performance_estimators += [FloatHelper("unsup_grad_norm")] print('\nTraining, epoch: %d' % epoch) self.net.train() supervised_grad_norm = 1. for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() unsupervised_loss_acc = 0 num_batches = 0 train_loader_subset = self.problem.train_loader_subset_range(0, self.args.num_training) self.image_encoder.eval() self.image_generator.eval() self.net.train() for batch_idx, (inputs, targets) in enumerate(train_loader_subset): num_batches += 1 if self.use_cuda: inputs, targets = inputs.cuda(), targets.cuda() self.net.zero_grad() optimizer_training.zero_grad() image1, image2 = half_images(inputs, slope=get_random_slope(), cuda=self.use_cuda) encoded = self.image_encoder(image1) unsup_image = self.image_generator(encoded) if self.args.mode=="separate": # train the discriminator/generator pair on the first half of the image: inputs, targets = Variable(inputs), Variable(targets, requires_grad=False) outputs = self.net(unsup_image.detach()) unsup_loss = self.criterion(outputs, targets) unsup_loss.backward() unsup_grad_norm = grad_norm(self.net.parameters()) optimizer_training.step() # outputs used to calculate the loss of the supervised model # must be done with the model prior to regularization: self.net.zero_grad() optimizer_training.zero_grad() outputs = self.net(inputs) supervised_loss = self.criterion(outputs, targets) supervised_grad_norm = grad_norm(self.net.parameters()) supervised_loss.backward() unsup_grad_norm = grad_norm(self.net.parameters()) optimizer_training.step() elif self.args.mode=="average": inputs = (inputs + unsup_image.data) / 2 inputs, targets = Variable(inputs), Variable(targets, requires_grad=False) outputs = self.net(inputs) supervised_loss = self.criterion(outputs, targets) supervised_grad_norm = grad_norm(self.net.parameters()) supervised_loss.backward() optimizer_training.step() elif self.args.mode=="uonly": inputs = unsup_image.data inputs, targets = Variable(inputs), Variable(targets, requires_grad=False) outputs = self.net(inputs) supervised_loss = self.criterion(outputs, targets) supervised_grad_norm = grad_norm(self.net.parameters()) supervised_loss.backward() optimizer_training.step() performance_estimators.set_metric(batch_idx, "supervised_grad_norm", supervised_grad_norm) if self.args.mode == "separate": performance_estimators.set_metric(batch_idx, "unsup_grad_norm", unsup_grad_norm) performance_estimators.set_metric_with_outputs(batch_idx, "unsup_loss", unsup_loss.data[0], outputs, targets) performance_estimators.set_metric_with_outputs(batch_idx, "supervised_loss", supervised_loss.data[0], outputs, targets) performance_estimators.set_metric_with_outputs(batch_idx, "train_accuracy", supervised_loss.data[0], outputs, targets) performance_estimators.set_metric_with_outputs(batch_idx, "train_loss", supervised_loss.data[0], outputs, targets) progress_bar(batch_idx * self.mini_batch_size, min(self.max_regularization_examples, self.max_training_examples), " ".join([performance_estimator.progress_message() for performance_estimator in performance_estimators])) if (batch_idx + 1) * self.mini_batch_size > self.max_training_examples: break return performance_estimators
def train_ureg_to_convergence(self, problem, train_dataset, unsup_dataset, performance_estimators=None, epsilon=0.01, max_epochs=30, max_examples=None): """Train the ureg model for a number of epochs until improvements in the loss are minor. :param supervised_loader loader for supervised examples. :param unsupervised_loader loader for unsupervised examples. :param max_epochs maximum number of epochs before stopping :param epsilon used to determine convergence. :param max_examples maximum number of examples to scan per epoch. :return list of performance estimators """ if performance_estimators is None: performance_estimators = PerformanceList() performance_estimators += [ LossHelper("ureg_loss"), FloatHelper("ureg_accuracy") ] len_supervised = len(train_dataset) len_unsupervised = len(unsup_dataset) print( "Training ureg to convergence with {} training and {} unsupervised samples," " using at most {} shuffled combinations of examples per training epoch" .format(len_supervised * self._mini_batch_size, len_unsupervised * self._mini_batch_size, max_examples)) self._adjust_learning_rate(self._learning_rate) previous_average_loss = sys.maxsize for ureg_epoch in range(0, max_epochs): # reset metric at each ureg training epoch (we use the loss average as stopping condition): for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() from itertools import cycle length = 0 if len_supervised < len_unsupervised: supervised_iter = iter( cycle(self.shuffling_iter(problem, train_dataset))) length = len_unsupervised else: supervised_iter = iter( self.shuffling_iter(problem, train_dataset)) length = len_supervised if len_unsupervised < len_supervised: unsupervised_iter = iter( cycle(self.shuffling_iter(problem, train_dataset))) else: unsupervised_iter = iter( self.shuffling_iter(problem, unsup_dataset)) if max_examples is None: max_examples = length * self._mini_batch_size length = max_examples / self._mini_batch_size num_batches = 0 for (batch_idx, ((s_input, s_labels), (u_input, _))) in enumerate( zip(supervised_iter, unsupervised_iter)): xs = Variable(s_input) xu = Variable(u_input) if self._use_cuda: xs = xs.cuda() xu = xu.cuda() weight_s, weight_u = self.loss_weights(None, None) loss = self.train_ureg(xs, xu, weight_s, weight_u) if loss is not None: # print("ureg batch {} average loss={} ".format(batch_idx, loss.data[0])) num_batches += 1 performance_estimators.set_metric_with_outputs( batch_idx, "ureg_loss", loss.data[0], None, None) performance_estimators.set_metric(batch_idx, "ureg_accuracy", self.ureg_accuracy()) epoch_ = "epoch " + str(ureg_epoch) + " " progress_bar( batch_idx * self._mini_batch_size, max_examples, epoch_ + " ".join([ performance_estimator.progress_message() for performance_estimator in performance_estimators ])) if ((batch_idx + 1) * self._mini_batch_size > max_examples): break average_loss = performance_estimators[0].estimates_of_metric()[0] # print("ureg epoch {} average loss={} ".format(ureg_epoch, average_loss)) if average_loss > previous_average_loss: if self._scheduler is not None: self.schedule(epoch=ureg_epoch, val_loss=average_loss) else: break if average_loss < previous_average_loss and abs( average_loss - previous_average_loss) < epsilon: break previous_average_loss = average_loss return performance_estimators
def train_unsup_only(self, epoch, performance_estimators=None ): """ Continue training a model on the unsupervised set with labels. :param epoch: :param unsup_index_to_label: map from index of the unsupervised example to label (in one hot encoding format, one element per class) :param performance_estimators: :param train_supervised_model: :return: """ if performance_estimators is None: performance_estimators = PerformanceList() performance_estimators += [LossHelper("train_loss")] performance_estimators += [FloatHelper("encoder_grad_norm")] performance_estimators += [FloatHelper("generator_grad_norm")] performance_estimators += [FloatHelper("net_grad_norm")] print('\nTraining, epoch: %d' % epoch) train_supervised_model = True for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() num_batches = 0 training_dataset = SubsetDataset(self.problem.unsup_set(), range(0, self.args.num_shaving), get_label=lambda index: 1) length = len(training_dataset) train_loader_subset = torch.utils.data.DataLoader(training_dataset, batch_size=self.problem.mini_batch_size(), shuffle=True, num_workers=0) # we use binary cross-entropy for single label with smoothing. criterion = MSELoss() self.net.train() self.image_generator.train() self.image_encoder.train() for batch_idx, (inputs, _) in enumerate(train_loader_subset): num_batches += 1 if self.use_cuda: inputs = inputs.cuda() self.optimizer.zero_grad() image1, image2 = half_images(inputs, slope=get_random_slope(), cuda=self.use_cuda) # train the discriminator/generator pair on the first half of the image: encoded = self.image_encoder(image1) # norm_encoded=encoded.norm(p=1) output = self.image_generator(encoded,) full_image=Variable(inputs,requires_grad=False) optimized_loss = criterion(output, full_image) optimized_loss.backward() self.optimizer.step() if batch_idx == 0: self.save_images(epoch, image1, image2, generated_image2=output, prefix="train") encoder_grad_norm = grad_norm(self.image_encoder.parameters()) generator_grad_norm = grad_norm(self.image_generator.parameters()) net_grad_norm = grad_norm(self.net.parameters()) performance_estimators.set_metric(batch_idx, "encoder_grad_norm", encoder_grad_norm) performance_estimators.set_metric(batch_idx, "generator_grad_norm", generator_grad_norm) performance_estimators.set_metric(batch_idx, "net_grad_norm", net_grad_norm) performance_estimators.set_metric(batch_idx, "train_loss", optimized_loss.data[0]) progress_bar(batch_idx * self.mini_batch_size, length, performance_estimators.progress_message(["train_loss", "train_accuracy"])) return performance_estimators
def train( self, epoch, performance_estimators=None, train_supervised_model=True, ): if performance_estimators is None: performance_estimators = PerformanceList() performance_estimators += [LossHelper("optimized_loss")] performance_estimators += [LossHelper("train_loss")] performance_estimators += [AccuracyHelper("train_")] performance_estimators += [FloatHelper("train_grad_norm")] print('\nTraining, epoch: %d' % epoch) self.net.train() supervised_grad_norm = 1. for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() unsupervised_loss_acc = 0 num_batches = 0 unsup_examples = numpy.random.random_integers( 0, self.args.num_shaving - 1, int(self.args.unsup_proportion * self.args.num_training)) if self.args.label_strategy == "RANDOM_UNIFORM": made_up_label = lambda index: randint( 0, self.problem.num_classes() - 1) else: print("Unsupported --label-strategy: " + self.args.label_strategy + " only RANDOM_UNIFORM is supported with this mode.") exit(1) training_dataset = ConcatDataset(datasets=[ SubsetDataset(self.problem.train_set(), range(0, self.args.num_training)), SubsetDataset(self.problem.unsup_set(), unsup_examples, get_label=made_up_label) ]) length = len(training_dataset) train_loader_subset = torch.utils.data.DataLoader( training_dataset, batch_size=self.problem.mini_batch_size(), shuffle=True, num_workers=0) for batch_idx, (inputs, targets) in enumerate(train_loader_subset): num_batches += 1 if self.use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = Variable(inputs), Variable(targets, requires_grad=False) # outputs used to calculate the loss of the supervised model # must be done with the model prior to regularization: self.net.train() self.optimizer_training.zero_grad() outputs = self.net(inputs) if train_supervised_model: supervised_loss = self.criterion(outputs, targets) optimized_loss = supervised_loss optimized_loss.backward() self.optimizer_training.step() supervised_grad_norm = grad_norm(self.net.parameters()) performance_estimators.set_metric(batch_idx, "train_grad_norm", supervised_grad_norm) performance_estimators.set_metric_with_outputs( batch_idx, "optimized_loss", optimized_loss.data[0], outputs, targets) performance_estimators.set_metric_with_outputs( batch_idx, "train_accuracy", supervised_loss.data[0], outputs, targets) performance_estimators.set_metric_with_outputs( batch_idx, "train_loss", supervised_loss.data[0], outputs, targets) progress_bar( batch_idx * self.mini_batch_size, length, performance_estimators.progress_message( ["train_loss", "train_accuracy"])) if (batch_idx + 1) * self.mini_batch_size > self.max_training_examples: break return performance_estimators
def train_linear_combination(self, epoch, performance_estimators=None, train_supervised_model=True, train_ureg=True, ): if performance_estimators is None: performance_estimators = PerformanceList() performance_estimators += [LossHelper("train_loss"), AccuracyHelper("train_")] performance_estimators += [LossHelper("reg_loss")] if train_ureg: performance_estimators += [LossHelper("ureg_loss"), FloatHelper("ureg_accuracy")] performance_estimators += [FloatHelper("train_grad_norm")] print('\nTraining, epoch: %d' % epoch) self.net.train() for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() num_batches = 0 train_loader_subset = self.problem.train_loader_subset_range(0, self.args.num_training) unsuploader_shuffled = self.problem.reg_loader_subset_range(0, self.args.num_shaving) unsupiter = iter(unsuploader_shuffled) for batch_idx, (inputs, targets) in enumerate(train_loader_subset): num_batches += 1 if self.use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = Variable(inputs), Variable(targets, requires_grad=False) # outputs used to calculate the loss of the supervised model # must be done with the model prior to regularization: self.net.train() self.optimizer_training.zero_grad() outputs = self.net(inputs) if train_ureg: # obtain an unsupervised sample, put it in uinputs autograd Variable: try: # first, read a minibatch from the unsupervised dataset: ufeatures, ulabels = next(unsupiter) except StopIteration: unsupiter = iter(unsuploader_shuffled) ufeatures, ulabels = next(unsupiter) if self.use_cuda: ufeatures = ufeatures.cuda() # then use it to calculate the unsupervised regularization contribution to the loss: uinputs = Variable(ufeatures) performance_estimators.set_metric(batch_idx, "ureg_alpha", self.ureg._alpha) if train_ureg: ureg_loss = self.ureg.train_ureg(inputs, uinputs) if (ureg_loss is not None): performance_estimators.set_metric(batch_idx, "ureg_loss", ureg_loss.data[0]) performance_estimators.set_metric(batch_idx, "ureg_accuracy", self.ureg.ureg_accuracy()) # adjust ureg model learning rate as needed: self.ureg.schedule(ureg_loss.data[0], epoch) if train_supervised_model: alpha = self.args.ureg_alpha # if self.ureg._which_one_model is not None: # self.ureg.estimate_example_weights(inputs) supervised_loss = self.criterion(outputs, targets) regularization_loss = self.estimate_regularization_loss(inputs, uinputs, 1., 1.) if regularization_loss is not None: optimized_loss = supervised_loss * (1. - alpha) + regularization_loss * alpha performance_estimators.set_metric(batch_idx, "reg_loss", regularization_loss.data[0]) else: optimized_loss = supervised_loss optimized_loss.backward() supervised_grad_norm = grad_norm(self.net.parameters()) performance_estimators.set_metric(batch_idx, "train_grad_norm", supervised_grad_norm) self.optimizer_training.step() performance_estimators.set_metric_with_outputs(batch_idx, "train_loss", optimized_loss.data[0], outputs, targets) performance_estimators.set_metric_with_outputs(batch_idx, "train_accuracy", optimized_loss.data[0], outputs, targets) progress_bar(batch_idx * self.mini_batch_size, min(self.max_regularization_examples, self.max_training_examples), " ".join([performance_estimator.progress_message() for performance_estimator in performance_estimators])) if (batch_idx + 1) * self.mini_batch_size > self.max_regularization_examples: break if (batch_idx + 1) * self.mini_batch_size > self.max_training_examples: break print("\n") return performance_estimators