예제 #1
0
    def __init__(self, image_size, image_channels, classes,
                 fc_layers=3, fc_units=1000, fc_drop=0, fc_bn=True, fc_nl="relu", gated=False, z_dim=20, 
                 dataset="mnist"):
        '''Class for variational auto-encoder (VAE) models.'''

        # Set configurations
        super().__init__()
        self.label = "VAE"
        self.image_size = image_size
        self.image_channels = image_channels
        self.classes = classes
        self.fc_layers = fc_layers
        self.z_dim = z_dim
        self.fc_units = fc_units

        # Weigths of different components of the loss function
        self.lamda_rcl = 1.
        self.lamda_vl = 1.
        self.lamda_pl = 0.  #--> when used as "classifier with feedback-connections", this should be set to 1.

        self.average = True #--> makes that [reconL] and [variatL] are both divided by number of input-pixels

        # Check whether there is at least 1 fc-layer
        if fc_layers<1:
            raise ValueError("VAE cannot have 0 fully-connected layers!")


        ######------SPECIFY MODEL------######

        ##>----Encoder (= q[z|x])----<##
        # -flatten image to 2D-tensor
        self.flatten = utils.Flatten()

        if dataset == "ckplus" or dataset == "affectnet": 
            self.input_size = image_size[0] * image_size[1] * image_channels
        else: 
            self.input_size = image_channels*image_size**2

        # -fully connected hidden layers
        self.fcE = MLP(input_size=self.input_size, output_size=fc_units, layers=fc_layers-1,
                       hid_size=fc_units, drop=fc_drop, batch_norm=fc_bn, nl=fc_nl, gated=gated)
        mlp_output_size = fc_units if fc_layers > 1 else self.input_size
        # -to z
        self.toZ = fc_layer_split(mlp_output_size, z_dim, nl_mean='none', nl_logvar='none')

        ##>----Classifier----<##
        self.classifier = fc_layer(mlp_output_size, classes, excit_buffer=True, nl='none')

        ##>----Decoder (= p[x|z])----<##
        # -from z
        out_nl = True if fc_layers > 1 else False
        self.fromZ = fc_layer(z_dim, mlp_output_size, batch_norm=(out_nl and fc_bn), nl=fc_nl if out_nl else "none")
        # -fully connected hidden layers
        self.fcD = MLP(input_size=fc_units, output_size=self.input_size, layers=fc_layers-1,
                       hid_size=fc_units, drop=fc_drop, batch_norm=fc_bn, nl=fc_nl, gated=gated, output='BCE')
        # -to image-shape
        self.to_image = utils.Reshape(image_channels=image_channels)
예제 #2
0
    def __init__(self, image_size, image_channels, classes,
                 fc_layers=3, fc_units=1000, fc_drop=0, fc_bn=False, fc_nl="relu", gated=False,
                 bias=True, excitability=False, excit_buffer=False, binaryCE=False, binaryCE_distill=False, AGEM=False,
                 experiment='splitMNIST'):

        # configurations
        super().__init__()
        self.classes = classes
        self.label = "Classifier"
        self.fc_layers = fc_layers

        # settings for training
        self.binaryCE = binaryCE  # -> use binary (instead of multiclass) prediction error
        self.binaryCE_distill = binaryCE_distill  # -> for classes from previous tasks, use the by the previous model
        #   predicted probs as binary targets (only in Class-IL with binaryCE)
        self.AGEM = AGEM  # -> use gradient of replayed data as inequality constraint for (instead of adding it to)
        #   the gradient of the current data (as in A-GEM, see Chaudry et al., 2019; ICLR)

        # Online mem distillation
        self.is_offline_training = False
        self.is_ready_distill = False
        self.alpha_t = 0.5
        # check whether there is at least 1 fc-layer
        if fc_layers < 1:
            raise ValueError("The classifier needs to have at least 1 fully-connected layer.")

        ######------SPECIFY MODEL------######
        self.experiment = experiment
        if self.experiment in ['CIFAR10', 'CIFAR100', 'CUB2011']:
            self.fcE = rn.resnet32(classes, pretrained=False)
            self.fcE.linear = nn.Identity()

            self.classifier = fc_layer(64, classes, excit_buffer=True, nl='none', drop=fc_drop)
        elif self.experiment == 'ImageNet':
            ResNet.name = 'ResNet-18'
            self.fcE = resnet18(pretrained=True)
            self.fcE.fc = nn.Identity()

            self.classifier = fc_layer(512, classes, excit_buffer=True, nl='none', drop=fc_drop)
        else:
            # flatten image to 2D-tensor
            self.flatten = utils.Flatten()

            # fully connected hidden layers
            self.fcE = MLP(input_size=image_channels * image_size ** 2, output_size=fc_units, layers=fc_layers - 1,
                           hid_size=fc_units, drop=fc_drop, batch_norm=fc_bn, nl=fc_nl, bias=bias,
                           excitability=excitability, excit_buffer=excit_buffer, gated=gated)
            mlp_output_size = fc_units if fc_layers > 1 else image_channels * image_size ** 2

            # classifier
            self.classifier = fc_layer(mlp_output_size, classes, excit_buffer=True, nl='none', drop=fc_drop)
예제 #3
0
    def __init__(self,
                 classes,
                 fc_layers=3,
                 fc_units=1000,
                 fc_drop=0,
                 fc_bn=False,
                 fc_nl="relu",
                 gated=False,
                 bias=True,
                 excitability=False,
                 excit_buffer=False,
                 binaryCE=False,
                 binaryCE_distill=False,
                 AGEM=False):

        # configurations
        super().__init__()
        self.classes = classes
        self.label = "Classifier"
        self.fc_layers = fc_layers

        # settings for training
        self.binaryCE = binaryCE  #-> use binary (instead of multiclass) prediction error
        self.binaryCE_distill = binaryCE_distill  #-> for classes from previous tasks, use the by the previous model
        #   predicted probs as binary targets (only in Class-IL with binaryCE)
        self.AGEM = AGEM  #-> use gradient of replayed data as inequality constraint for (instead of adding it to)
        #   the gradient of the current data (as in A-GEM, see Chaudry et al., 2019; ICLR)

        # classifier
        self.classifier = fc_layer(128,
                                   classes,
                                   excit_buffer=True,
                                   nl='none',
                                   drop=fc_drop)
예제 #4
0
    def __init__(self,
                 image_size,
                 image_channels,
                 classes,
                 fc_layers=3,
                 fc_units=1000,
                 fc_drop=0,
                 fc_bn=True,
                 fc_nl="relu",
                 gated=False,
                 bias=True,
                 excitability=False,
                 excit_buffer=False,
                 binaryCE=False,
                 binaryCE_distill=False):

        # configurations
        super().__init__()
        self.classes = classes
        self.label = "Classifier"
        self.fc_layers = fc_layers

        # settings for training
        self.binaryCE = binaryCE
        self.binaryCE_distill = binaryCE_distill

        # check whether there is at least 1 fc-layer
        if fc_layers < 1:
            raise ValueError(
                "The classifier needs to have at least 1 fully-connected layer."
            )

        ######------SPECIFY MODEL------######

        # flatten image to 2D-tensor
        self.flatten = utils.Flatten()

        # fully connected hidden layers
        self.fcE = MLP(input_size=image_channels * image_size**2,
                       output_size=fc_units,
                       layers=fc_layers - 1,
                       hid_size=fc_units,
                       drop=fc_drop,
                       batch_norm=fc_bn,
                       nl=fc_nl,
                       bias=bias,
                       excitability=excitability,
                       excit_buffer=excit_buffer,
                       gated=gated)
        mlp_output_size = fc_units if fc_layers > 1 else image_channels * image_size**2

        # classifier
        self.classifier = fc_layer(mlp_output_size,
                                   classes,
                                   excit_buffer=True,
                                   nl='none',
                                   drop=fc_drop)
예제 #5
0
    def __init__(self, image_size, image_channels, classes,
                 fc_layers=3, fc_units=1000, fc_drop=0, fc_bn=True, fc_nl="relu", z_dim=20):

        # Set configurations
        super().__init__()
        self.label = "VAE"
        self.image_size = image_size
        self.image_channels = image_channels
        self.classes = classes
        self.fc_layers = fc_layers
        self.z_dim = z_dim
        self.fc_units = fc_units

        # Training related components that should be set before training
        # -criterion for reconstruction
        self.recon_criterion = None
        # -weigths of different components of the loss function
        self.lamda_rcl = 1.
        self.lamda_vl = 1.
        self.lamda_pl = 0. # --> when used as "classifier with feedback-connections", this should be set to 1.

        # Check whether there is at least 1 fc-layer
        if fc_layers<1:
            raise ValueError("VAE cannot have 0 fully-connected layers!")


        ######------SPECIFY MODEL------######

        # encoder: flatten image to 2D-tensor
        self.flatten = utils.Flatten()
        # encoder: fully connected hidden layers
        self.fcE = linear_nets.MLP(
            input_size=image_channels*image_size**2, output_size=fc_units, layers=fc_layers-1, hid_size=fc_units,
            drop=fc_drop, batch_norm=fc_bn, nl=fc_nl, final_nl=True,
        )
        enc_mlp_output_size = fc_units if fc_layers>1 else image_channels*image_size**2

        # classifier (from final hidden layer of encoder)
        self.classifier = nn.Sequential(nn.Dropout(fc_drop),
                                        eM.LinearExcitability(enc_mlp_output_size, classes))

        # reparametrization ("to Z and back")
        out_nl = True if fc_layers>1 else False
        dec_mlp_input_size = fc_units if fc_layers>1 else image_channels*image_size**2
        self.toZ = nn.Linear(enc_mlp_output_size, z_dim)       # estimating mean
        self.toZlogvar = nn.Linear(enc_mlp_output_size, z_dim) # estimating log(SD**2)
        self.fromZ = linear_nets.fc_layer(z_dim, dec_mlp_input_size, batch_norm=(out_nl and fc_bn),
                                         nl=fc_nl if out_nl else "none")

        # decoder: fully connected hidden layers (with no non-linearity or batchnorm in final layer!)
        self.fcD = linear_nets.MLP(
            input_size=fc_units, output_size=image_channels*image_size**2, layers=fc_layers-1, hid_size=fc_units,
            drop=fc_drop, batch_norm=fc_bn, nl=fc_nl, final_nl=False,
        )
        # decoder: reshape to image
        self.reshapeD = utils.ToImage(image_channels=image_channels)
예제 #6
0
    def __init__(self,
                 image_size,
                 image_channels,
                 classes,
                 fc_layers=3,
                 fc_units=1000,
                 fc_drop=0,
                 fc_bn=False,
                 fc_nl="relu",
                 gated=False,
                 bias=True,
                 excitability=False,
                 excit_buffer=False,
                 binaryCE=False,
                 binaryCE_distill=False,
                 AGEM=False):

        # configurations
        super().__init__()
        self.classes = classes
        self.label = "Classifier"
        self.fc_layers = fc_layers

        # settings for training
        self.binaryCE = binaryCE  #-> use binary (instead of multiclass) prediction error
        self.binaryCE_distill = binaryCE_distill  #-> for classes from previous tasks, use the by the previous model
        #   predicted probs as binary targets (only in Class-IL with binaryCE)
        self.AGEM = AGEM  #-> use gradient of replayed data as inequality constraint for (instead of adding it to)
        #   the gradient of the current data (as in A-GEM, see Chaudry et al., 2019; ICLR)

        # check whether there is at least 1 fc-layer
        if fc_layers < 1:
            raise ValueError(
                "The classifier needs to have at least 1 fully-connected layer."
            )

        ######------SPECIFY MODEL------######

        # flatten image to 2D-tensor
        self.flatten = utils.Flatten()

        # fully connected hidden layers
        self.fcE = MLP(input_size=image_channels * image_size**2,
                       output_size=fc_units,
                       layers=fc_layers - 1,
                       hid_size=fc_units,
                       drop=fc_drop,
                       batch_norm=fc_bn,
                       nl=fc_nl,
                       bias=bias,
                       excitability=excitability,
                       excit_buffer=excit_buffer,
                       gated=gated)
        mlp_output_size = fc_units if fc_layers > 1 else image_channels * image_size**2

        # classifier
        self.classifier = fc_layer(mlp_output_size,
                                   classes,
                                   excit_buffer=True,
                                   nl='none',
                                   drop=fc_drop)
    def __init__(self,
                 num_features,
                 num_seq,
                 classes,
                 fc_layers=3,
                 fc_units=1000,
                 fc_drop=0,
                 fc_bn=True,
                 fc_nl="relu",
                 gated=False,
                 bias=True,
                 excitability=None,
                 excit_buffer=False,
                 binaryCE=False,
                 binaryCE_distill=False,
                 experiment='splitMNIST',
                 cls_type='mlp',
                 args=None):

        # configurations
        super().__init__()
        self.num_features = num_features
        self.num_seq = num_seq
        self.classes = classes
        self.label = "Classifier"
        self.fc_layers = fc_layers
        self.hidden_dim = fc_units
        self.layer_dim = fc_layers - 1
        self.cuda = None if args is None else args.cuda
        self.device = args.device
        self.weights_per_class = None if args is None else torch.FloatTensor(
            args.weights_per_class).to(args.device)

        # store precision_dict into model so that we can fetch
        # self.precision_dict_list = [[] for i in range(len(args.num_classes_per_task_l))]
        # self.precision_dict = {}

        # settings for training
        self.binaryCE = binaryCE
        self.binaryCE_distill = binaryCE_distill

        # check whether there is at least 1 fc-layer
        if fc_layers < 1:
            raise ValueError(
                "The classifier needs to have at least 1 fully-connected layer."
            )

        ######------SPECIFY MODEL------######
        self.cls_type = cls_type
        self.experiment = experiment
        # flatten image to 2D-tensor
        self.flatten = utils.Flatten()

        # fully connected hidden layers
        if experiment == 'sensor':
            if self.cls_type == 'mlp':
                self.fcE = MLP(input_size=num_seq * num_features,
                               output_size=fc_units,
                               layers=fc_layers - 1,
                               hid_size=fc_units,
                               drop=fc_drop,
                               batch_norm=fc_bn,
                               nl=fc_nl,
                               bias=bias,
                               excitability=excitability,
                               excit_buffer=excit_buffer,
                               gated=gated)
            elif self.cls_type == 'lstm':
                self.lstm_input_dropout = nn.Dropout(args.input_drop)
                self.lstm = nn.LSTM(input_size=num_features,
                                    hidden_size=fc_units,
                                    num_layers=fc_layers - 1,
                                    dropout=0.0 if
                                    (fc_layers - 1) == 1 else fc_drop,
                                    batch_first=True)
                # self.name = "LSTM([{} X {} X {}])".format(num_features, num_seq, classes) if self.fc_layers > 0 else ""
        else:
            self.fcE = MLP(input_size=num_seq * num_features**2,
                           output_size=fc_units,
                           layers=fc_layers - 1,
                           hid_size=fc_units,
                           drop=fc_drop,
                           batch_norm=fc_bn,
                           nl=fc_nl,
                           bias=bias,
                           excitability=excitability,
                           excit_buffer=excit_buffer,
                           gated=gated)
        # classifier
        if self.cls_type == 'mlp':
            mlp_output_size = fc_units if fc_layers > 1 else num_seq * num_features**2
            self.classifier = fc_layer(mlp_output_size,
                                       classes,
                                       excit_buffer=True,
                                       nl='none',
                                       drop=fc_drop)
        elif self.cls_type == 'lstm':
            self.lstm_fc = nn.Linear(fc_units, classes)

        #################
        # +++++ GEM +++++
        #####
        if args.gem:
            print('this is test for GEM ')
            self.margin = args.memory_strength
            self.ce = nn.CrossEntropyLoss()
            self.n_outputs = classes
            self.n_memories = args.n_memories
            self.gpu = args.cuda
            n_tasks = len(args.num_classes_per_task_l)
            # allocate episodic memory
            self.memory_data = torch.FloatTensor(n_tasks, self.n_memories,
                                                 self.num_seq,
                                                 self.num_features)
            self.memory_labs = torch.LongTensor(n_tasks, self.n_memories)
            if args.cuda:
                # self.memory_data = self.memory_data.cuda()
                self.memory_data = self.memory_data.to(self.device)
                # self.memory_labs = self.memory_labs.cuda()
                self.memory_labs = self.memory_labs.to(self.device)

            # allocate temporary synaptic memory
            self.grad_dims = []
            for param in self.parameters():
                self.grad_dims.append(param.data.numel())
            self.grads = torch.Tensor(sum(self.grad_dims), n_tasks)
            if args.cuda:
                # self.grads = self.grads.cuda()
                self.grads = self.grads.to(self.device)

            # allocate counters
            self.observed_tasks = []
            self.old_task = -1
            self.mem_cnt = 0