def before_training_exp(self, strategy, **kwargs): if self.freeze_remaining_model and strategy.training_exp_counter > 0: self.freeze_other_layers() # Count current classes and number of samples for each of them. data = strategy.experience.dataset self.model.cur_j = examples_per_class(data.targets) self.cur_class = [cls for cls in set(self.model.cur_j.keys()) if self.model.cur_j[cls] > 0] self.reset_weights(self.cur_class)
def _before_training_exp(self, **kwargs): self.model.eval() self.model.end_features.train() self.model.output.train() if self.clock.train_exp_counter > 0: # In AR1 batch 0 is treated differently as the feature extractor is # left more free to learn. # This if is executed for batch > 0, in which we freeze layers # below "self.freeze_below_layer" (which usually is the latent # replay layer!) and we also change the parameters of BatchReNorm # layers to a more conservative configuration. # "freeze_up_to" will freeze layers below "freeze_below_layer" # Beware that Batch ReNorm layers are not frozen! freeze_up_to( self.model, freeze_until_layer=self.freeze_below_layer, layer_filter=AR1.filter_bn_and_brn, ) # Adapt the parameters of BatchReNorm layers change_brn_pars( self.model, momentum=self.inc_update_rate, r_d_max_inc_step=0, r_max=self.max_r_max, d_max=self.max_d_max, ) # Adapt the model and optimizer self.model = self.model.to(self.device) self.optimizer = SGD( self.model.parameters(), lr=self.lr, momentum=self.momentum, weight_decay=self.l2, ) # super()... will run S.I. and CWR* plugin callbacks super()._before_training_exp(**kwargs) # Update cur_j of CWR* to consider latent patterns if self.clock.train_exp_counter > 0: for class_id, count in examples_per_class(self.rm[1]).items(): self.model.cur_j[class_id] += count self.cwr_plugin.cur_class = [ cls for cls in set(self.model.cur_j.keys()) if self.model.cur_j[cls] > 0 ] self.cwr_plugin.reset_weights(self.cwr_plugin.cur_class)