def __init__(self, n_cla_per_tsk: Union[np.ndarray, List[int]], class_names_to_idx: Dict[str, int], config: Dict): super(Model, self).__init__(n_cla_per_tsk, class_names_to_idx, config) self.sigma = True device = next(self.net.parameters()).device self.net.model.output_layer = cosine_linear.CosineLinear(in_features=self.latent_dim, out_features=n_cla_per_tsk[0], sigma=self.sigma).to(device) self.reset_optimizer_and_scheduler() self.old_net = copy_freeze(self.net) # type: Union[ResNet, ResNetCIFAR] self.batch_size = config["batch_size"] self.lambda_base = config["lucir_lambda"] self.lambda_cur = self.lambda_base self.K = 2 self.margin_1 = config["lucir_margin_1"] self.margin_2 = config["lucir_margin_2"] # setup losses # self.loss_classification = nn.CrossEntropyLoss(reduction="mean") self.loss_classification = nn.BCEWithLogitsLoss(reduction="mean") self.loss_distill = nn.CosineEmbeddingLoss(reduction="mean") # several losses to allow for the use of different margins self.loss_mr_1 = nn.MarginRankingLoss(margin=self.margin_1, reduction="mean") self.loss_mr_2 = nn.MarginRankingLoss(margin=self.margin_2, reduction="mean") self.method_variables.extend(["lambda_base", "lambda_cur", "K", "margin_1", "margin_2", "sigma"])
def _prepare_model_for_new_task(self, **kwargs) -> None: """ A method specific function that takes place before the starting epoch of each new task (runs from the prepare_model_for_task function). It copies the old network and freezes it's gradients. It also extends the output layer. """ self.old_net = copy_freeze(self.net) self.old_net.eval() cur_task_id = self.cur_task_id num_old_classes = int(sum(self.n_cla_per_tsk[:cur_task_id])) num_new_classes = self.n_cla_per_tsk[cur_task_id] device = next(self.net.parameters()).device # Extend last layer if cur_task_id > 0: output_layer = cosine_linear.SplitCosineLinear( in_features=self.latent_dim, out_features1=num_old_classes, out_features2=num_new_classes, sigma=self.sigma).to(device) if cur_task_id == 1: output_layer.fc1.weight.data = self.net.model.output_layer.weight.data else: out_features1 = self.net.model.output_layer.fc1.out_features output_layer.fc1.weight.data[: out_features1] = self.net.model.output_layer.fc1.weight.data output_layer.fc1.weight.data[ out_features1:] = self.net.model.output_layer.fc2.weight.data self.net.model.output_layer = output_layer elif cur_task_id != 0: raise ValueError("task id cannot be negative") self.reset_optimizer_and_scheduler()
def _prepare_model_for_new_task(self, **kwargs) -> None: """ A method specific function that takes place before the starting epoch of each new task (runs from the prepare_model_for_task function). It copies the old network and freezes it's gradients. """ self.old_net = copy_freeze(self.net) self.old_net.eval()
def __init__(self, n_cla_per_tsk: Union[np.ndarray, List[int]], class_names_to_idx: Dict[str, int], config: Dict): super(Model, self).__init__(n_cla_per_tsk, class_names_to_idx, config) self.old_net = copy_freeze(self.net) # setup losses self.bce = nn.BCEWithLogitsLoss(reduction="mean")
def __init__(self, n_cla_per_tsk: Union[np.ndarray, List[int]], class_names_to_idx: Dict[str, int], config: Dict): super(Model, self).__init__(n_cla_per_tsk, class_names_to_idx, config) self.old_net = copy_freeze(self.net) # setup losses self.bce = nn.BCEWithLogitsLoss(reduction="mean") # stores the most recent class means (should be updated whenever the model changes) self.class_means = None # type: Optional[torch.Tensor]
def _prepare_model_for_new_task(self, task_data: Dataset, dist_args: Optional[dict] = None, **kwargs) -> None: """ A method specific function that takes place before the starting epoch of each new task (runs from the prepare_model_for_task function). It copies the old network and freezes it's gradients. It also extends the output layer, imprints weights for those extended nodes, and change the trainable parameters Args: task_data (Dataset): The new task dataset dist_args (Optional[Dict]): a dictionary of the distributed processing values in case of multiple gpu (ex: rank of the device) (default: None) """ self.old_net = copy_freeze(self.net) self.old_net.eval() cur_task_id = self.cur_task_id num_old_classes = int(sum(self.n_cla_per_tsk[: cur_task_id])) num_new_classes = self.n_cla_per_tsk[cur_task_id] device = next(self.net.parameters()).device # Extend last layer if cur_task_id > 0: output_layer = cosine_linear.SplitCosineLinear(in_features=self.latent_dim, out_features1=num_old_classes, out_features2=num_new_classes, sigma=self.sigma).to(device) if cur_task_id == 1: output_layer.fc1.weight.data = self.net.model.output_layer.weight.data else: out_features1 = self.net.model.output_layer.fc1.out_features output_layer.fc1.weight.data[:out_features1] = self.net.model.output_layer.fc1.weight.data output_layer.fc1.weight.data[out_features1:] = self.net.model.output_layer.fc2.weight.data output_layer.sigma.data = self.net.model.output_layer.sigma.data self.net.model.output_layer = output_layer self.lambda_cur = self.lambda_base * math.sqrt(num_old_classes * 1.0 / num_new_classes) print_msg(f"Lambda for less forget is set to {self.lambda_cur}") elif cur_task_id != 0: raise ValueError("task id cannot be negative") # Imprint weights with task_data.disable_augmentations(): if cur_task_id > 0: print_msg("Imprinting weights") self.net = self._imprint_weights(task_data, self.net, dist_args) # Fix parameters of FC1 for less forget and reset optimizer/scheduler if cur_task_id > 0: trainable_parameters = [param for name, param in self.net.named_parameters() if "output_layer.fc1" not in name] else: trainable_parameters = self.net.parameters() self.reset_optimizer_and_scheduler(trainable_parameters)
def __init__(self, n_cla_per_tsk: Union[np.ndarray, List[int]], class_names_to_idx: Dict[str, int], config: Dict): super(Model, self).__init__(n_cla_per_tsk, class_names_to_idx, config) self.sigma = True device = next(self.net.parameters()).device self.net.model.output_layer = cosine_linear.CosineLinear( in_features=self.latent_dim, out_features=n_cla_per_tsk[0], sigma=self.sigma).to(device) self.reset_optimizer_and_scheduler() self.old_net = copy_freeze( self.net) # type: Optional[Union[ResNet, ResNetCIFAR]] # setup losses self.bce = nn.BCEWithLogitsLoss(reduction="mean") self.method_variables.extend(["sigma"])