def get_loss(self, forward_pass: ForwardPass, y: Tensor = None) -> Loss: """Get a `Loss` for the given forward pass and resulting rewards/labels. Take a look at the `AuxiliaryTask` class for more info, NOTE: This is the same simplified version of EWC used throughout the other examples: the loss is the P-norm between the current weights and the weights as they were on the begining of the task. Also note, this particular example doesn't actually use the provided arguments. """ if self.previous_task is None: # We're in the first task: do nothing. return Loss(name=self.name) old_weights: Dict[str, Tensor] = self.previous_model_weights new_weights: Dict[str, Tensor] = dict(self.model.named_parameters()) loss = 0.0 for weight_name, (new_w, old_w) in dict_intersection(new_weights, old_weights): loss += torch.dist(new_w, old_w.type_as(new_w), p=self.options.distance_norm) ewc_loss = Loss(name=self.name, loss=loss) return ewc_loss
def get_loss(self, x: Tensor, h_x: Tensor, y_pred: Tensor, y: Tensor = None) -> Loss: if y is None: return Loss() if not y_pred.requires_grad: # Can't evaluate the IRM score when the y_pred doesn't require grad! with torch.enable_grad(): y_pred = self.classifier(h_x) loss = self.penalty(y_pred, y) return Loss(loss) loss = self.penalty(y_pred, y) return Loss(loss)
def get_loss(self, x: Tensor, h_x: Tensor, y_pred: Tensor, y: Tensor = None) -> Loss: batch_size = x.shape[0] # assert batch_size % 2 == 0, f"Can only mix an even number of samples. (batch size is {batch_size})" if batch_size % 2 != 0: h_x = h_x[:-1] y_pred = y_pred[:-1] mix_coeff = torch.rand(batch_size // 2, dtype=x.dtype, device=x.device) h1 = h_x[0::2] h2 = h_x[1::2] mix_h_x = mixup(h1, h2, mix_coeff) y_pred_1 = y_pred[0::2] y_pred_2 = y_pred[1::2] y_pred_mix = mixup(y_pred_1, y_pred_2, mix_coeff) mix_y_pred = self.classifier(mix_h_x) loss = torch.dist(y_pred_mix, mix_y_pred) loss_info = Loss( name=self.name, total_loss=loss, y_pred=y_pred_mix, y=mix_y_pred, ) return loss_info
def get_loss(self, forward_pass: Dict[str, Tensor], y: Tensor = None) -> Loss: x = forward_pass["x"] h_x = forward_pass["h_x"] h_x = h_x.view([h_x.shape[0], -1]) mu, logvar = self.mu(h_x), self.logvar(h_x) z = self.reparameterize(mu, logvar) x_hat = self.decoder(z) recon_loss = self.reconstruction_loss(x_hat, x) kl_loss = self.options.beta * self.kl_divergence_loss(mu, logvar) loss = Loss(self.name, tensors=dict(mu=mu, logvar=logvar, z=z, x_hat=x_hat)) loss += Loss("recon", loss=recon_loss) loss += Loss("kl", loss=kl_loss) return loss
def get_loss_for_arg(self, x: Tensor, h_x: Tensor, fn_arg: Any, alpha: Tensor) -> Loss: alpha = alpha.to(x.device) # TODO: Transform before or after the `preprocess_inputs` function? x = fix_channels(x) # Transform X using the function. x_t = self.function(x, fn_arg) # Get the code for the transformed x. h_x_t = self.encode(x_t) aux_layer_input = h_x_t if self.options.compare_with_original: aux_layer_input = torch.cat([h_x, h_x_t], dim=-1) # Get the predicted argument of the transformation. alpha_t = self.auxiliary_layer(aux_layer_input) # get the metrics for this particular argument (accuracy, mse, etc.) if isinstance(fn_arg, int): name = f"{fn_arg}" else: name = f"{fn_arg:.3f}" loss = Loss(name) loss.loss = self.loss(alpha_t, alpha) loss.metrics[name] = get_metrics(x=x_t, h_x=h_x_t, y_pred=alpha_t, y=alpha) # Save some tensors for debugging purposes: loss.tensors["x_t"] = x_t loss.tensors["h_x_t"] = h_x_t loss.tensors["alpha_t"] = alpha_t return loss
def get_loss(self, forward_pass: ForwardPass, y: Tensor = None) -> Loss: """ Gets the EWC loss. """ if self._model.training: self.observation_collector.append(forward_pass.observations) if self.previous_task is None or not self.enabled or self._shared_net is None: # We're in the first task: do nothing. return Loss(name=self.name) loss = 0.0 v_current = PVector.from_model(self._shared_net) for fim in self.fisher_information_matrices: diff = v_current - self.previous_model_weights loss += fim.vTMv(diff) self._i += 1 ewc_loss = Loss(name=self.name, loss=loss) return ewc_loss
def get_loss(self, x: Tensor, h_x: Tensor, y_pred: Tensor, y: Tensor = None) -> Loss: # select only unlabelled examples like in ICT: https://arxiv.org/pdf/1903.03825.pdf # TODO: fix this, y may be None, which would break this. batch_size = x.shape[0] # assert batch_size % 2 == 0, f"Can only mix an even number of samples. (batch size is {batch_size})" if batch_size % 2 != 0: x = x[:-1] y_pred = y_pred[:-1] loss = Loss(name=self.name) if self.epoch_in_task < self.options.consistency_rampup_starts: mixup_consistency_weight = 0.0 else: mixup_consistency_weight = self.get_current_consistency_weight( self.epoch_in_task, step_in_epoch=self.update_number, total_steps_in_epoch=self.epoch_length) if batch_size > 0 and mixup_consistency_weight > 0: #mix_coeff = torch.rand(batch_size//2, dtype=x.dtype, device=x.device) #x1 = x[0::2] #x2 = x[1::2] #mix_x = mixup(x1, x2, mix_coeff) #y_pred_1 = y_pred[0::2] #y_pred_2 = y_pred[1::2] h_x = self.mean_encode(x) y_pred_ema = self.mean_logits(h_x) mix_x, y_pred_mix, lam = mixup_data( x, Variable(y_pred_ema.detach().data, requires_grad=False), self.options.mixup_usup_alpha) loss.tensors["mix_x"] = mix_x.detach() mix_h_x = self.encode(mix_x) mix_y_pred = self.classifier(mix_h_x) # Use the mean teacher to get the h_x and y_pred for the unlabeled data. #h_x = self.mean_encode(x) #y_pred = self.mean_logits(h_x) #y_pred_mix = mixup(y_pred_1, y_pred_2, mix_coeff) loss.tensors["y_pred_mix"] = y_pred_mix.detach() loss = self.consistency_criterion(mix_y_pred, y_pred_mix) / batch_size # #loss = torch.dist(y_pred_mix, mix_y_pred) loss.total_loss = mixup_consistency_weight * loss else: loss.total_loss = torch.zeros(1, device=self.device, requires_grad=True) return loss
def get_loss(self, forward_pass: ForwardPass, rewards: Rewards = None, loss_name: str = "") -> Loss: """Gets a Loss given the results of the forward pass and the reward. Args: forward_pass (Dict[str, Tensor]): Results of the forward pass. reward (Tensor, optional): The reward that resulted from the action chosen in the forward pass. Defaults to None. loss_name (str, optional): The name for the resulting Loss. Defaults to "". Returns: Loss: a Loss object containing the loss tensor, associated metrics and sublosses. This could look a bit like this, for example: ``` action = forward_pass["action"] predicted_reward = forward_pass["predicted_reward"] nce = self.loss_fn(predicted_reward, reward) loss = Loss(loss_name, loss=nce) return loss ``` """ assert loss_name # Create an 'empty' Loss object with the given name, so that we always # return a Loss object, even when `y` is None and we can't the loss from # the output_head. total_loss = Loss(name=loss_name) if rewards: assert rewards.y is not None # TODO: If we decide to re-organize the forward pass object to also # contain the predictions of the self-supervised tasks, (atm they # perform their 'forward pass' in their get_loss functions) # then we could change 'actions' to be a dict, and index the # dict with the 'name' of each output head, like so: # actions_of_head = forward_pass.actions[self.output_head.name] # rewards_of_head = forward_pass.rewards[self.output_head.name] # For now though, we only have one "prediction" in the actions: actions = forward_pass.actions # So far we only use 'y' from the rewards in the output head. supervised_loss = self.output_head_loss(forward_pass, actions=actions, rewards=rewards) total_loss += supervised_loss return total_loss
def get_knn_performance( x_t: np.ndarray, y_t: np.ndarray, scaler: StandardScaler, knn_classifier: KNeighborsClassifier, num_classes: int, loss_name: str = "KNN", ) -> Loss: # Flatten the inputs to two dimensions only. x_t = x_t.reshape(x_t.shape[0], -1) assert len(x_t.shape) == 2 x_t = scaler.transform(x_t) y_t_prob = knn_classifier.predict_proba(x_t) classes = knn_classifier.classes_ # make sure the classes are sorted: assert np.array_equal(sorted(classes), classes) if y_t_prob.shape[-1] == num_classes: y_t_logits = y_t_prob else: # Not all classes were encountered, so we need to 'expand' the predicted # logits to the right shape. logger.info(f"{y_t_prob.shape} {num_classes}") num_classes = max(num_classes, y_t_prob.shape[-1]) y_t_logits = np.zeros([y_t_prob.shape[0], num_classes], dtype=y_t_prob.dtype) for i, logits in enumerate(y_t_prob): for label, logit in zip(classes, logits): y_t_logits[i][label - 1] = logit ## We were constructing this to reorder the classes in case the ordering was ## not the same between the KNN's internal `classes_` attribute and the task ## classes, However I'm not sure if this is necessary anymore. # y_t_logits = np.zeros((y_t.size, y_t.max() + 1)) # for i, label in enumerate(classes): # y_t_logits[:, label] = y_t_prob[:, i] # We get the Negative Cross Entropy using the scikit-learn function, but we # could instead get it using pytorch's function (maybe even inside the # Loss object! nce_t = log_loss(y_true=y_t, y_pred=y_t_prob, labels=classes) # BUG: There is sometimes a case where some classes aren't present in # `classes_`, and as such the ClassificationMetrics object created in the # Loss constructor has an error. test_loss = Loss(loss_name, loss=nce_t, y_pred=y_t_logits, y=y_t) return test_loss
def get_loss(self, forward_pass: Dict[str, Tensor], y: Tensor = None) -> Loss: x = forward_pass["x"] h_x = forward_pass["h_x"] # y_pred = forward_pass["y_pred"] z = h_x.view([h_x.shape[0], -1]) if self.decoder is None or self.decoder.output_shape != x.shape: self.decoder = self.create_decoder(x.shape) x_hat = self.decoder(z) assert x_hat.shape == x.shape, ( f"reconstructed x should have same shape as original x! " f"({x_hat.shape} != {x.shape})") recon_loss = self.reconstruction_loss(x_hat, x) loss_info = Loss(name=self.name, loss=recon_loss) return loss_info
def get_loss(self, forward_pass: Dict[str, Tensor], y: Tensor = None) -> Loss: x = forward_pass["x"] # TODO: is there a more efficient way to do this than with a list # comprehension? (torch multiprocessing-ish?) # concat all the x's into a single list. x_t = torch.cat([self.augment(x_i) for x_i in x.cpu()], dim=0) # [2*B, C, H, W] h_t = self.encode(x_t.to(self.device)).flatten( start_dim=1) # [2*B, repr_dim] z = self.projector(h_t) # [2*B, proj_dim] loss = self.loss(z, self.hparams.xent_temp) loss_object = Loss(name=self.name, loss=loss) self.model.log("simclr_loss", loss_object.loss, prog_bar=True) return loss_object
def get_loss(self, x: Tensor, h_x: Tensor, y_pred: Tensor=None, y: Tensor=None) -> Loss: loss_info = Loss(self.name) batch_size = x.shape[0] assert self.alphas is not None, "set the `self.alphas` attribute in the base class." assert self.function_args is not None, "set the `self.function_args` attribute in the base class." # Get the loss for each transformation argument. for fn_arg, alpha in zip(self.function_args, self.alphas): loss_i = self.get_loss_for_arg(x=x, h_x=h_x, fn_arg=fn_arg, alpha=alpha) loss_info += loss_i # print(f"{self.name}_{fn_arg}", loss_i.metrics) # Fuse all the sub-metrics into a total metric. # For instance, all the "rotate_0", "rotate_90", "rotate_180", etc. metrics = loss_info.metrics total_metrics = sum(loss_info.metrics.values(), Metrics()) # we actually add up all the metrics to get the "overall" metric. metrics.clear() metrics[self.name] = total_metrics return loss_info
def get_loss(self, forward_pass: Dict[str, Tensor], rewards: Optional[Rewards] = None, loss_name: str = "") -> Loss: """Trains the model on a batch of (potentially partially labeled) data. Args: forward_pass (Dict[str, Tensor]): WIP: The results of the forward pass (processed input, predictions, etc.) rewards (Union[Optional[Tensor], List[Optional[Tensor]]]): Labels associated with the data. Can either be: - None: fully unlabeled batch - Tensor: fully labeled batch - List[Optional[Tensor]]: Partially labeled batch. loss_name (str, optional): Name of the resulting loss object. Defaults to "Train". Returns: Loss: a loss object made from both the unsupervised and supervised losses. """ # TODO: We could also just use '-1' instead as the 'no-label' val: this # would make it a bit simpler than having both numpy arrays and tensors # in the batch y: Union[Optional[Tensor], Sequence[Optional[Tensor]]] = rewards.y if y is None or all(y_i is not None for y_i in y): # Fully labeled/unlabeled batch # NOTE: Tensors can't have None items, so if we get a Tensor that # means that we have all task labels. labeled_ratio = float(y is not None) return super().get_loss(forward_pass, rewards, loss_name=loss_name) is_labeled: np.ndarray = np.asarray([y_i is not None for y_i in y]) # Batch is maybe a mix of labeled / unlabeled data. labeled_y = y[is_labeled] # TODO: Might have to somehow re-order the results based on the indices? # TODO: Join (merge) the metrics? or keep them separate? labeled_forward_pass = { k: v[is_labeled] for k, v in forward_pass.items() } unlabeled_forward_pass = { k: v[~is_labeled] for k, v in forward_pass.items() } labeled_ratio = len(labeled_y) / len(y) logger.debug(f"Labeled ratio: {labeled_ratio}") # Create the 'total' loss for the batch, with the required name. # We will then create two 'sublosses', one named 'unsupervised' and one # named 'supervised', each containing the respective losses and metrics. # TODO: Make sure that this doesn't make it harder to get the metrics # from the Loss object. If it does, then we could maybe just fuse the # labeled and unlabeled losses and metrics, but that might also cause # issues. loss = Loss(name=loss_name) if unlabeled_forward_pass: # TODO: Setting a different loss name for the for this is definitely going to cause trouble! unsupervised_loss = super().get_loss( unlabeled_forward_pass, rewards=None, loss_name="unsupervised", ) loss += unsupervised_loss if labeled_forward_pass: supervised_loss = super().get_loss( labeled_forward_pass, rewards=labeled_y, loss_name="supervised", ) loss += supervised_loss return loss
def evaluate_knn(self, model: LightningModule) -> Tuple[Loss, Loss]: """ Evaluate the representations with a KNN in the context of CL. We shorten the train dataloaders to take only the first `knn_samples` samples in order to save some compute. TODO: Figure out a way to cleanly add the metrics from the callback to the ``log dict'' which is returned by the model. Right now they are only printed / logged to wandb directly from here. """ setting = model.datamodule assert isinstance(setting, Setting) # TODO: Remove this if we want to use this for something else than a # Continual setting in the future. assert isinstance(setting, ClassIncrementalSetting) num_classes = setting.num_classes # Check wether the method has access to the task labels at train/test time. task_labels_at_test_time: bool = False from sequoia.settings import TaskIncrementalSetting if isinstance(setting, TaskIncrementalSetting): if setting.task_labels_at_test_time: task_labels_at_test_time = True # TODO: Figure out a way to make sure that we get at least one example # of each class to fit the KNN. self.knn_samples = max(self.knn_samples, num_classes**2) self.max_num_batches = math.ceil(self.knn_samples / model.batch_size) logger.info(f"number of classes: {num_classes}") logger.info(f"Number of KNN samples: {self.knn_samples}") logger.debug( f"Taking a maximum of {self.max_num_batches} batches from each dataloader." ) train_loaders: List[DataLoader] = self.get_dataloaders(model, mode="train") valid_loaders: List[DataLoader] = self.get_dataloaders(model, mode="val") test_loaders: List[DataLoader] = self.get_dataloaders(model, mode="test") # Only take the first `knn_samples` samples from each dataloader. def shorten(dataloader: DataLoader): return take(dataloader, n=self.max_num_batches) if self.max_num_batches: train_loaders = list(map(shorten, train_loaders)) valid_loaders = list(map(shorten, valid_loaders)) test_loaders = list(map(shorten, test_loaders)) # Create an iterator that alternates between each of the train dataloaders. # NOTE: we shortened each of the dataloaders just to be sure that we get at least train_loader = roundrobin(*train_loaders) h_x, y = get_hidden_codes_array(model=model, dataloader=train_loader, description="KNN (Train)") train_loss, scaler, knn_classifier = fit_knn(x=h_x, y=y, options=self.knn_options, num_classes=num_classes, loss_name="knn/train") logger.info(f"KNN Train Acc: {train_loss.accuracy:.2%}") self.log(train_loss) total_valid_loss = Loss("knn/valid") # Save the current task ID so we can reset it after testing. starting_task_id = model.setting.current_task_id for i, dataloader in enumerate(valid_loaders): if task_labels_at_test_time: model.on_task_switch(i, training=False) loss_i = evaluate(model=model, dataloader=dataloader, loss_name=f"[{i}]", scaler=scaler, knn_classifier=knn_classifier, num_classes=setting.num_classes_in_task(i)) # We use `.absorb(loss_i)` here so that the metrics get merged. # That way, if we access `total_valid_loss.accuracy`, this gives the # accuracy over all the validation tasks. # If we instead used `+= loss_i`, then loss_i would become a subloss # of `total_valid_loss`, since they have different names. # TODO: Explain this in more detail somewhere else. total_valid_loss.absorb(loss_i) logger.info(f"KNN Valid[{i}] Acc: {loss_i.accuracy:.2%}") self.log(loss_i) logger.info(f"KNN Average Valid Acc: {total_valid_loss.accuracy:.2%}") self.log(total_valid_loss) total_test_loss = Loss("knn/test") for i, dataloader in enumerate(test_loaders): if task_labels_at_test_time: model.on_task_switch(i, training=False) # TODO Should we set the number of classes to be the number of # classes in the current task? loss_i = evaluate( model=model, dataloader=dataloader, loss_name=f"[{i}]", scaler=scaler, knn_classifier=knn_classifier, num_classes=num_classes, ) total_test_loss.absorb(loss_i) logger.info(f"KNN Test[{i}] Acc: {loss_i.accuracy:.2%}") self.log(loss_i) if task_labels_at_test_time: model.on_task_switch(starting_task_id, training=False) logger.info(f"KNN Average Test Acc: {total_test_loss.accuracy:.2%}") self.log(total_test_loss) return total_valid_loss, total_test_loss
def log(self, loss_object: Loss): if self.trainer.logger: self.trainer.logger.log_metrics(loss_object.to_log_dict())