def __init__(self, image_size, image_channels, classes, fc_layers=3, fc_units=1000, fc_drop=0, fc_bn=True, fc_nl="relu", gated=False, z_dim=20, dataset="mnist"): '''Class for variational auto-encoder (VAE) models.''' # Set configurations super().__init__() self.label = "VAE" self.image_size = image_size self.image_channels = image_channels self.classes = classes self.fc_layers = fc_layers self.z_dim = z_dim self.fc_units = fc_units # Weigths of different components of the loss function self.lamda_rcl = 1. self.lamda_vl = 1. self.lamda_pl = 0. #--> when used as "classifier with feedback-connections", this should be set to 1. self.average = True #--> makes that [reconL] and [variatL] are both divided by number of input-pixels # Check whether there is at least 1 fc-layer if fc_layers<1: raise ValueError("VAE cannot have 0 fully-connected layers!") ######------SPECIFY MODEL------###### ##>----Encoder (= q[z|x])----<## # -flatten image to 2D-tensor self.flatten = utils.Flatten() if dataset == "ckplus" or dataset == "affectnet": self.input_size = image_size[0] * image_size[1] * image_channels else: self.input_size = image_channels*image_size**2 # -fully connected hidden layers self.fcE = MLP(input_size=self.input_size, output_size=fc_units, layers=fc_layers-1, hid_size=fc_units, drop=fc_drop, batch_norm=fc_bn, nl=fc_nl, gated=gated) mlp_output_size = fc_units if fc_layers > 1 else self.input_size # -to z self.toZ = fc_layer_split(mlp_output_size, z_dim, nl_mean='none', nl_logvar='none') ##>----Classifier----<## self.classifier = fc_layer(mlp_output_size, classes, excit_buffer=True, nl='none') ##>----Decoder (= p[x|z])----<## # -from z out_nl = True if fc_layers > 1 else False self.fromZ = fc_layer(z_dim, mlp_output_size, batch_norm=(out_nl and fc_bn), nl=fc_nl if out_nl else "none") # -fully connected hidden layers self.fcD = MLP(input_size=fc_units, output_size=self.input_size, layers=fc_layers-1, hid_size=fc_units, drop=fc_drop, batch_norm=fc_bn, nl=fc_nl, gated=gated, output='BCE') # -to image-shape self.to_image = utils.Reshape(image_channels=image_channels)
def __init__(self, image_size, image_channels, classes, fc_layers=3, fc_units=1000, fc_drop=0, fc_bn=False, fc_nl="relu", gated=False, bias=True, excitability=False, excit_buffer=False, binaryCE=False, binaryCE_distill=False, AGEM=False, experiment='splitMNIST'): # configurations super().__init__() self.classes = classes self.label = "Classifier" self.fc_layers = fc_layers # settings for training self.binaryCE = binaryCE # -> use binary (instead of multiclass) prediction error self.binaryCE_distill = binaryCE_distill # -> for classes from previous tasks, use the by the previous model # predicted probs as binary targets (only in Class-IL with binaryCE) self.AGEM = AGEM # -> use gradient of replayed data as inequality constraint for (instead of adding it to) # the gradient of the current data (as in A-GEM, see Chaudry et al., 2019; ICLR) # Online mem distillation self.is_offline_training = False self.is_ready_distill = False self.alpha_t = 0.5 # check whether there is at least 1 fc-layer if fc_layers < 1: raise ValueError("The classifier needs to have at least 1 fully-connected layer.") ######------SPECIFY MODEL------###### self.experiment = experiment if self.experiment in ['CIFAR10', 'CIFAR100', 'CUB2011']: self.fcE = rn.resnet32(classes, pretrained=False) self.fcE.linear = nn.Identity() self.classifier = fc_layer(64, classes, excit_buffer=True, nl='none', drop=fc_drop) elif self.experiment == 'ImageNet': ResNet.name = 'ResNet-18' self.fcE = resnet18(pretrained=True) self.fcE.fc = nn.Identity() self.classifier = fc_layer(512, classes, excit_buffer=True, nl='none', drop=fc_drop) else: # flatten image to 2D-tensor self.flatten = utils.Flatten() # fully connected hidden layers self.fcE = MLP(input_size=image_channels * image_size ** 2, output_size=fc_units, layers=fc_layers - 1, hid_size=fc_units, drop=fc_drop, batch_norm=fc_bn, nl=fc_nl, bias=bias, excitability=excitability, excit_buffer=excit_buffer, gated=gated) mlp_output_size = fc_units if fc_layers > 1 else image_channels * image_size ** 2 # classifier self.classifier = fc_layer(mlp_output_size, classes, excit_buffer=True, nl='none', drop=fc_drop)
def __init__(self, classes, fc_layers=3, fc_units=1000, fc_drop=0, fc_bn=False, fc_nl="relu", gated=False, bias=True, excitability=False, excit_buffer=False, binaryCE=False, binaryCE_distill=False, AGEM=False): # configurations super().__init__() self.classes = classes self.label = "Classifier" self.fc_layers = fc_layers # settings for training self.binaryCE = binaryCE #-> use binary (instead of multiclass) prediction error self.binaryCE_distill = binaryCE_distill #-> for classes from previous tasks, use the by the previous model # predicted probs as binary targets (only in Class-IL with binaryCE) self.AGEM = AGEM #-> use gradient of replayed data as inequality constraint for (instead of adding it to) # the gradient of the current data (as in A-GEM, see Chaudry et al., 2019; ICLR) # classifier self.classifier = fc_layer(128, classes, excit_buffer=True, nl='none', drop=fc_drop)
def __init__(self, image_size, image_channels, classes, fc_layers=3, fc_units=1000, fc_drop=0, fc_bn=True, fc_nl="relu", gated=False, bias=True, excitability=False, excit_buffer=False, binaryCE=False, binaryCE_distill=False): # configurations super().__init__() self.classes = classes self.label = "Classifier" self.fc_layers = fc_layers # settings for training self.binaryCE = binaryCE self.binaryCE_distill = binaryCE_distill # check whether there is at least 1 fc-layer if fc_layers < 1: raise ValueError( "The classifier needs to have at least 1 fully-connected layer." ) ######------SPECIFY MODEL------###### # flatten image to 2D-tensor self.flatten = utils.Flatten() # fully connected hidden layers self.fcE = MLP(input_size=image_channels * image_size**2, output_size=fc_units, layers=fc_layers - 1, hid_size=fc_units, drop=fc_drop, batch_norm=fc_bn, nl=fc_nl, bias=bias, excitability=excitability, excit_buffer=excit_buffer, gated=gated) mlp_output_size = fc_units if fc_layers > 1 else image_channels * image_size**2 # classifier self.classifier = fc_layer(mlp_output_size, classes, excit_buffer=True, nl='none', drop=fc_drop)
def __init__(self, image_size, image_channels, classes, fc_layers=3, fc_units=1000, fc_drop=0, fc_bn=True, fc_nl="relu", z_dim=20): # Set configurations super().__init__() self.label = "VAE" self.image_size = image_size self.image_channels = image_channels self.classes = classes self.fc_layers = fc_layers self.z_dim = z_dim self.fc_units = fc_units # Training related components that should be set before training # -criterion for reconstruction self.recon_criterion = None # -weigths of different components of the loss function self.lamda_rcl = 1. self.lamda_vl = 1. self.lamda_pl = 0. # --> when used as "classifier with feedback-connections", this should be set to 1. # Check whether there is at least 1 fc-layer if fc_layers<1: raise ValueError("VAE cannot have 0 fully-connected layers!") ######------SPECIFY MODEL------###### # encoder: flatten image to 2D-tensor self.flatten = utils.Flatten() # encoder: fully connected hidden layers self.fcE = linear_nets.MLP( input_size=image_channels*image_size**2, output_size=fc_units, layers=fc_layers-1, hid_size=fc_units, drop=fc_drop, batch_norm=fc_bn, nl=fc_nl, final_nl=True, ) enc_mlp_output_size = fc_units if fc_layers>1 else image_channels*image_size**2 # classifier (from final hidden layer of encoder) self.classifier = nn.Sequential(nn.Dropout(fc_drop), eM.LinearExcitability(enc_mlp_output_size, classes)) # reparametrization ("to Z and back") out_nl = True if fc_layers>1 else False dec_mlp_input_size = fc_units if fc_layers>1 else image_channels*image_size**2 self.toZ = nn.Linear(enc_mlp_output_size, z_dim) # estimating mean self.toZlogvar = nn.Linear(enc_mlp_output_size, z_dim) # estimating log(SD**2) self.fromZ = linear_nets.fc_layer(z_dim, dec_mlp_input_size, batch_norm=(out_nl and fc_bn), nl=fc_nl if out_nl else "none") # decoder: fully connected hidden layers (with no non-linearity or batchnorm in final layer!) self.fcD = linear_nets.MLP( input_size=fc_units, output_size=image_channels*image_size**2, layers=fc_layers-1, hid_size=fc_units, drop=fc_drop, batch_norm=fc_bn, nl=fc_nl, final_nl=False, ) # decoder: reshape to image self.reshapeD = utils.ToImage(image_channels=image_channels)
def __init__(self, image_size, image_channels, classes, fc_layers=3, fc_units=1000, fc_drop=0, fc_bn=False, fc_nl="relu", gated=False, bias=True, excitability=False, excit_buffer=False, binaryCE=False, binaryCE_distill=False, AGEM=False): # configurations super().__init__() self.classes = classes self.label = "Classifier" self.fc_layers = fc_layers # settings for training self.binaryCE = binaryCE #-> use binary (instead of multiclass) prediction error self.binaryCE_distill = binaryCE_distill #-> for classes from previous tasks, use the by the previous model # predicted probs as binary targets (only in Class-IL with binaryCE) self.AGEM = AGEM #-> use gradient of replayed data as inequality constraint for (instead of adding it to) # the gradient of the current data (as in A-GEM, see Chaudry et al., 2019; ICLR) # check whether there is at least 1 fc-layer if fc_layers < 1: raise ValueError( "The classifier needs to have at least 1 fully-connected layer." ) ######------SPECIFY MODEL------###### # flatten image to 2D-tensor self.flatten = utils.Flatten() # fully connected hidden layers self.fcE = MLP(input_size=image_channels * image_size**2, output_size=fc_units, layers=fc_layers - 1, hid_size=fc_units, drop=fc_drop, batch_norm=fc_bn, nl=fc_nl, bias=bias, excitability=excitability, excit_buffer=excit_buffer, gated=gated) mlp_output_size = fc_units if fc_layers > 1 else image_channels * image_size**2 # classifier self.classifier = fc_layer(mlp_output_size, classes, excit_buffer=True, nl='none', drop=fc_drop)
def __init__(self, num_features, num_seq, classes, fc_layers=3, fc_units=1000, fc_drop=0, fc_bn=True, fc_nl="relu", gated=False, bias=True, excitability=None, excit_buffer=False, binaryCE=False, binaryCE_distill=False, experiment='splitMNIST', cls_type='mlp', args=None): # configurations super().__init__() self.num_features = num_features self.num_seq = num_seq self.classes = classes self.label = "Classifier" self.fc_layers = fc_layers self.hidden_dim = fc_units self.layer_dim = fc_layers - 1 self.cuda = None if args is None else args.cuda self.device = args.device self.weights_per_class = None if args is None else torch.FloatTensor( args.weights_per_class).to(args.device) # store precision_dict into model so that we can fetch # self.precision_dict_list = [[] for i in range(len(args.num_classes_per_task_l))] # self.precision_dict = {} # settings for training self.binaryCE = binaryCE self.binaryCE_distill = binaryCE_distill # check whether there is at least 1 fc-layer if fc_layers < 1: raise ValueError( "The classifier needs to have at least 1 fully-connected layer." ) ######------SPECIFY MODEL------###### self.cls_type = cls_type self.experiment = experiment # flatten image to 2D-tensor self.flatten = utils.Flatten() # fully connected hidden layers if experiment == 'sensor': if self.cls_type == 'mlp': self.fcE = MLP(input_size=num_seq * num_features, output_size=fc_units, layers=fc_layers - 1, hid_size=fc_units, drop=fc_drop, batch_norm=fc_bn, nl=fc_nl, bias=bias, excitability=excitability, excit_buffer=excit_buffer, gated=gated) elif self.cls_type == 'lstm': self.lstm_input_dropout = nn.Dropout(args.input_drop) self.lstm = nn.LSTM(input_size=num_features, hidden_size=fc_units, num_layers=fc_layers - 1, dropout=0.0 if (fc_layers - 1) == 1 else fc_drop, batch_first=True) # self.name = "LSTM([{} X {} X {}])".format(num_features, num_seq, classes) if self.fc_layers > 0 else "" else: self.fcE = MLP(input_size=num_seq * num_features**2, output_size=fc_units, layers=fc_layers - 1, hid_size=fc_units, drop=fc_drop, batch_norm=fc_bn, nl=fc_nl, bias=bias, excitability=excitability, excit_buffer=excit_buffer, gated=gated) # classifier if self.cls_type == 'mlp': mlp_output_size = fc_units if fc_layers > 1 else num_seq * num_features**2 self.classifier = fc_layer(mlp_output_size, classes, excit_buffer=True, nl='none', drop=fc_drop) elif self.cls_type == 'lstm': self.lstm_fc = nn.Linear(fc_units, classes) ################# # +++++ GEM +++++ ##### if args.gem: print('this is test for GEM ') self.margin = args.memory_strength self.ce = nn.CrossEntropyLoss() self.n_outputs = classes self.n_memories = args.n_memories self.gpu = args.cuda n_tasks = len(args.num_classes_per_task_l) # allocate episodic memory self.memory_data = torch.FloatTensor(n_tasks, self.n_memories, self.num_seq, self.num_features) self.memory_labs = torch.LongTensor(n_tasks, self.n_memories) if args.cuda: # self.memory_data = self.memory_data.cuda() self.memory_data = self.memory_data.to(self.device) # self.memory_labs = self.memory_labs.cuda() self.memory_labs = self.memory_labs.to(self.device) # allocate temporary synaptic memory self.grad_dims = [] for param in self.parameters(): self.grad_dims.append(param.data.numel()) self.grads = torch.Tensor(sum(self.grad_dims), n_tasks) if args.cuda: # self.grads = self.grads.cuda() self.grads = self.grads.to(self.device) # allocate counters self.observed_tasks = [] self.old_task = -1 self.mem_cnt = 0