Example #1
0
    def test_cwrstar(self):
        # SIT scenario
        model, optimizer, criterion, my_nc_benchmark = self.init_sit()
        last_fc_name, _ = get_last_fc_layer(model)
        strategy = CWRStar(model,
                           optimizer,
                           criterion,
                           last_fc_name,
                           train_mb_size=64,
                           device=self.device)
        self.run_strategy(my_nc_benchmark, strategy)

        # MT scenario
        strategy = CWRStar(model,
                           optimizer,
                           criterion,
                           last_fc_name,
                           train_mb_size=64,
                           device=self.device)
        benchmark = self.load_benchmark(use_task_labels=True)
        self.run_strategy(benchmark, strategy)
Example #2
0
    def __init__(
        self,
        criterion=None,
        lr: float = 0.001,
        momentum=0.9,
        l2=0.0005,
        train_epochs: int = 4,
        init_update_rate: float = 0.01,
        inc_update_rate=0.00005,
        max_r_max=1.25,
        max_d_max=0.5,
        inc_step=4.1e-05,
        rm_sz: int = 1500,
        freeze_below_layer: str = "lat_features.19.bn.beta",
        latent_layer_num: int = 19,
        ewc_lambda: float = 0,
        train_mb_size: int = 128,
        eval_mb_size: int = 128,
        device=None,
        plugins: Optional[Sequence[SupervisedPlugin]] = None,
        evaluator: EvaluationPlugin = default_evaluator,
        eval_every=-1,
    ):
        """
        Creates an instance of the AR1 strategy.

        :param criterion: The loss criterion to use. Defaults to None, in which
            case the cross entropy loss is used.
        :param lr: The learning rate (SGD optimizer).
        :param momentum: The momentum (SGD optimizer).
        :param l2: The L2 penalty used for weight decay.
        :param train_epochs: The number of training epochs. Defaults to 4.
        :param init_update_rate: The initial update rate of BatchReNorm layers.
        :param inc_update_rate: The incremental update rate of BatchReNorm
            layers.
        :param max_r_max: The maximum r value of BatchReNorm layers.
        :param max_d_max: The maximum d value of BatchReNorm layers.
        :param inc_step: The incremental step of r and d values of BatchReNorm
            layers.
        :param rm_sz: The size of the replay buffer. The replay buffer is shared
            across classes. Defaults to 1500.
        :param freeze_below_layer: A string describing the name of the layer
            to use while freezing the lower (nearest to the input) part of the
            model. The given layer is not frozen (exclusive).
        :param latent_layer_num: The number of the layer to use as the Latent
            Replay Layer. Usually this is the same of `freeze_below_layer`.
        :param ewc_lambda: The Synaptic Intelligence lambda term. Defaults to
            0, which means that the Synaptic Intelligence regularization
            will not be applied.
        :param train_mb_size: The train minibatch size. Defaults to 128.
        :param eval_mb_size: The eval minibatch size. Defaults to 128.
        :param device: The device to use. Defaults to None (cpu).
        :param plugins: (optional) list of StrategyPlugins.
        :param evaluator: (optional) instance of EvaluationPlugin for logging
            and metric computations.
        :param eval_every: the frequency of the calls to `eval` inside the
            training loop.
                if -1: no evaluation during training.
                if  0: calls `eval` after the final epoch of each training
                    experience.
                if >0: calls `eval` every `eval_every` epochs and at the end
                    of all the epochs for a single experience.
        """

        warnings.warn("The AR1 strategy implementation is in an alpha stage "
                      "and is not perfectly aligned with the paper "
                      "implementation. Please use at your own risk!")

        if plugins is None:
            plugins = []

        # Model setup
        model = MobilenetV1(pretrained=True, latent_layer_num=latent_layer_num)
        replace_bn_with_brn(
            model,
            momentum=init_update_rate,
            r_d_max_inc_step=inc_step,
            max_r_max=max_r_max,
            max_d_max=max_d_max,
        )

        fc_name, fc_layer = get_last_fc_layer(model)

        if ewc_lambda != 0:
            # Synaptic Intelligence is not applied to the last fully
            # connected layer (and implicitly to "freeze below" ones.
            plugins.append(
                SynapticIntelligencePlugin(ewc_lambda,
                                           excluded_parameters=[fc_name]))

        self.cwr_plugin = CWRStarPlugin(model,
                                        cwr_layer_name=fc_name,
                                        freeze_remaining_model=False)
        plugins.append(self.cwr_plugin)

        optimizer = SGD(model.parameters(),
                        lr=lr,
                        momentum=momentum,
                        weight_decay=l2)

        if criterion is None:
            criterion = CrossEntropyLoss()

        self.ewc_lambda = ewc_lambda
        self.freeze_below_layer = freeze_below_layer
        self.rm_sz = rm_sz
        self.inc_update_rate = inc_update_rate
        self.max_r_max = max_r_max
        self.max_d_max = max_d_max
        self.lr = lr
        self.momentum = momentum
        self.l2 = l2
        self.rm = None
        self.cur_acts: Optional[Tensor] = None
        self.replay_mb_size = 0

        super().__init__(
            model,
            optimizer,
            criterion,
            train_mb_size=train_mb_size,
            train_epochs=train_epochs,
            eval_mb_size=eval_mb_size,
            device=device,
            plugins=plugins,
            evaluator=evaluator,
            eval_every=eval_every,
        )