Exemple #1
0
    def __init__(self, n_cla_per_tsk: Union[np.ndarray, List[int]], class_names_to_idx: Dict[str, int], config: Dict):
        super(Model, self).__init__(n_cla_per_tsk, class_names_to_idx, config)

        self.sigma = True
        device = next(self.net.parameters()).device
        self.net.model.output_layer = cosine_linear.CosineLinear(in_features=self.latent_dim,
                                                                 out_features=n_cla_per_tsk[0],
                                                                 sigma=self.sigma).to(device)
        self.reset_optimizer_and_scheduler()
        self.old_net = copy_freeze(self.net)  # type: Union[ResNet, ResNetCIFAR]

        self.batch_size = config["batch_size"]

        self.lambda_base = config["lucir_lambda"]
        self.lambda_cur = self.lambda_base
        self.K = 2
        self.margin_1 = config["lucir_margin_1"]
        self.margin_2 = config["lucir_margin_2"]

        # setup losses
        # self.loss_classification = nn.CrossEntropyLoss(reduction="mean")
        self.loss_classification = nn.BCEWithLogitsLoss(reduction="mean")
        self.loss_distill = nn.CosineEmbeddingLoss(reduction="mean")
        # several losses to allow for the use of different margins
        self.loss_mr_1 = nn.MarginRankingLoss(margin=self.margin_1, reduction="mean")
        self.loss_mr_2 = nn.MarginRankingLoss(margin=self.margin_2, reduction="mean")

        self.method_variables.extend(["lambda_base", "lambda_cur", "K", "margin_1", "margin_2", "sigma"])
Exemple #2
0
    def _prepare_model_for_new_task(self, **kwargs) -> None:
        """
        A method specific function that takes place before the starting epoch of each new task (runs from the
        prepare_model_for_task function).
        It copies the old network and freezes it's gradients. It also extends the output layer.
        """
        self.old_net = copy_freeze(self.net)
        self.old_net.eval()

        cur_task_id = self.cur_task_id
        num_old_classes = int(sum(self.n_cla_per_tsk[:cur_task_id]))
        num_new_classes = self.n_cla_per_tsk[cur_task_id]
        device = next(self.net.parameters()).device

        # Extend last layer
        if cur_task_id > 0:
            output_layer = cosine_linear.SplitCosineLinear(
                in_features=self.latent_dim,
                out_features1=num_old_classes,
                out_features2=num_new_classes,
                sigma=self.sigma).to(device)
            if cur_task_id == 1:
                output_layer.fc1.weight.data = self.net.model.output_layer.weight.data
            else:
                out_features1 = self.net.model.output_layer.fc1.out_features
                output_layer.fc1.weight.data[:
                                             out_features1] = self.net.model.output_layer.fc1.weight.data
                output_layer.fc1.weight.data[
                    out_features1:] = self.net.model.output_layer.fc2.weight.data
            self.net.model.output_layer = output_layer
        elif cur_task_id != 0:
            raise ValueError("task id cannot be negative")
        self.reset_optimizer_and_scheduler()
Exemple #3
0
 def _prepare_model_for_new_task(self, **kwargs) -> None:
     """
     A method specific function that takes place before the starting epoch of each new task (runs from the
     prepare_model_for_task function).
     It copies the old network and freezes it's gradients.
     """
     self.old_net = copy_freeze(self.net)
     self.old_net.eval()
Exemple #4
0
    def __init__(self, n_cla_per_tsk: Union[np.ndarray, List[int]],
                 class_names_to_idx: Dict[str, int], config: Dict):
        super(Model, self).__init__(n_cla_per_tsk, class_names_to_idx, config)

        self.old_net = copy_freeze(self.net)

        # setup losses
        self.bce = nn.BCEWithLogitsLoss(reduction="mean")
Exemple #5
0
    def __init__(self, n_cla_per_tsk: Union[np.ndarray, List[int]],
                 class_names_to_idx: Dict[str, int], config: Dict):
        super(Model, self).__init__(n_cla_per_tsk, class_names_to_idx, config)

        self.old_net = copy_freeze(self.net)

        # setup losses
        self.bce = nn.BCEWithLogitsLoss(reduction="mean")

        # stores the most recent class means (should be updated whenever the model changes)
        self.class_means = None  # type: Optional[torch.Tensor]
Exemple #6
0
    def _prepare_model_for_new_task(self, task_data: Dataset, dist_args: Optional[dict] = None,
                                    **kwargs) -> None:
        """
        A method specific function that takes place before the starting epoch of each new task (runs from the
        prepare_model_for_task function).
        It copies the old network and freezes it's gradients.
        It also extends the output layer, imprints weights for those extended nodes, and change the trainable parameters

        Args:
            task_data (Dataset): The new task dataset
            dist_args (Optional[Dict]): a dictionary of the distributed processing values in case of multiple gpu (ex:
            rank of the device) (default: None)
        """
        self.old_net = copy_freeze(self.net)
        self.old_net.eval()

        cur_task_id = self.cur_task_id
        num_old_classes = int(sum(self.n_cla_per_tsk[: cur_task_id]))
        num_new_classes = self.n_cla_per_tsk[cur_task_id]
        device = next(self.net.parameters()).device

        # Extend last layer
        if cur_task_id > 0:
            output_layer = cosine_linear.SplitCosineLinear(in_features=self.latent_dim,
                                                           out_features1=num_old_classes,
                                                           out_features2=num_new_classes,
                                                           sigma=self.sigma).to(device)
            if cur_task_id == 1:
                output_layer.fc1.weight.data = self.net.model.output_layer.weight.data
            else:
                out_features1 = self.net.model.output_layer.fc1.out_features
                output_layer.fc1.weight.data[:out_features1] = self.net.model.output_layer.fc1.weight.data
                output_layer.fc1.weight.data[out_features1:] = self.net.model.output_layer.fc2.weight.data
            output_layer.sigma.data = self.net.model.output_layer.sigma.data
            self.net.model.output_layer = output_layer
            self.lambda_cur = self.lambda_base * math.sqrt(num_old_classes * 1.0 / num_new_classes)
            print_msg(f"Lambda for less forget is set to {self.lambda_cur}")
        elif cur_task_id != 0:
            raise ValueError("task id cannot be negative")

        # Imprint weights
        with task_data.disable_augmentations():
            if cur_task_id > 0:
                print_msg("Imprinting weights")
                self.net = self._imprint_weights(task_data, self.net, dist_args)

        # Fix parameters of FC1 for less forget and reset optimizer/scheduler
        if cur_task_id > 0:
            trainable_parameters = [param for name, param in self.net.named_parameters() if
                                    "output_layer.fc1" not in name]
        else:
            trainable_parameters = self.net.parameters()
        self.reset_optimizer_and_scheduler(trainable_parameters)
Exemple #7
0
    def __init__(self, n_cla_per_tsk: Union[np.ndarray, List[int]],
                 class_names_to_idx: Dict[str, int], config: Dict):
        super(Model, self).__init__(n_cla_per_tsk, class_names_to_idx, config)

        self.sigma = True
        device = next(self.net.parameters()).device
        self.net.model.output_layer = cosine_linear.CosineLinear(
            in_features=self.latent_dim,
            out_features=n_cla_per_tsk[0],
            sigma=self.sigma).to(device)
        self.reset_optimizer_and_scheduler()
        self.old_net = copy_freeze(
            self.net)  # type: Optional[Union[ResNet, ResNetCIFAR]]

        # setup losses
        self.bce = nn.BCEWithLogitsLoss(reduction="mean")

        self.method_variables.extend(["sigma"])