Ejemplo n.º 1
0
    def test_multihead(self, task_id, testloader):
        self.net.eval()
        test_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for batch_idx, (inputs, targets) in enumerate(testloader):
                inputs, targets = inputs.to(self.device), targets.to(
                    self.device)
                inputs = Variable(inputs)
                targets = Variable(targets)

                outputs, _ = self.net(inputs)
                loss = self.criterion(outputs, targets)

                test_loss += loss.item()
                _, predicted = outputs[:, args.classes_per_task *
                                       task_id:args.classes_per_task *
                                       (task_id + 1)].max(1)
                total += targets.size(0)
                correct += (
                    predicted +
                    args.classes_per_task * task_id).eq(targets).sum().item()

                progress_bar(
                    batch_idx, len(testloader),
                    'Loss:%.3f|Acc:%.3f%% (%d/%d)--Test' %
                    (test_loss /
                     (batch_idx + 1), 100. * correct / total, correct, total))

        return correct / total
    def test(self, testloader):
        self.net.eval()
        test_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for batch_idx, (inputs, targets) in enumerate(testloader):
                inputs, targets = inputs.to(self.device), targets.to(self.device)
                inputs = Variable(inputs)
                targets = Variable(targets)

                outputs = self.net(inputs)
                loss = self.criterion(outputs, targets)

                test_loss += loss.item()
                _, predicted = outputs.max(1)
                total += targets.size(0)
                correct += predicted.eq(targets).sum().item()

                progress_bar(batch_idx, len(testloader), 'Loss:%.3f|Acc:%.3f%% (%d/%d)--Test'
                             % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))
            #
            # print('target', targets)
            # print('predicted', predicted)
        return correct/total
Ejemplo n.º 3
0
    def train(self, epoch, trainloader):
        if args.resume:
            # Load checkpoint.
            print('==> Resuming from checkpoint..')
            assert os.path.isdir(
                'checkpoint'), 'Error: no checkpoint directory found!'
            checkpoint = torch.load('./checkpoint/ckpt.t7')
            self.net.load_state_dict(checkpoint['net'])
            best_acc = checkpoint['acc']
            start_epoch = checkpoint['epoch']

        print('\nEpoch: %d lr: %s' % (epoch, self.scheduler.get_lr()))
        self.scheduler.step()
        self.net.train()
        train_loss = 0.0
        correct = 0
        total = 0

        for batch_idx, (inputs, targets) in enumerate(trainloader):
            inputs, targets = inputs.to(self.device), targets.to(self.device)
            inputs_var = Variable(inputs)
            targets_var = Variable(targets)

            self.optimizer.zero_grad()

            outputs, _ = self.net(inputs_var)
            loss = self.criterion(outputs, targets_var)

            loss.backward()
            self.optimizer.step()

            train_loss += loss.item()
            _, predicted = outputs.max(1)  # outputs.shape: (batch, classes)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            self.loss = train_loss
            progress_bar(
                batch_idx, len(trainloader),
                'Loss:%.3f|Acc:%.3f%% (%d/%d)--Train' %
                (train_loss /
                 (batch_idx + 1), 100. * correct / total, correct, total))
        return correct / total
    def train_fc(self, epoch, trainloader):
        for name, param in self.net.named_parameters():
            if re.search('conv', name) or re.search('bn', name):
                param.requires_grad = False
            elif re.search('linear', name):
                param.requires_grad = True

        self.optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                          self.net.parameters()),
                                   lr=args.lr,
                                   momentum=0.9,
                                   weight_decay=args.weight_decay)

        logging.info('\nEpoch: %d lr: %s' % (epoch, self.scheduler.get_lr()))
        self.optimizer.step()
        self.scheduler.step()
        self.net.train()
        train_loss = 0.0
        correct = 0
        total = 0

        for batch_idx, (inputs, targets) in enumerate(trainloader):
            inputs, targets = inputs.to(self.device), targets.to(self.device)
            inputs_var = Variable(inputs)
            targets_var = Variable(targets)
            self.optimizer.zero_grad()
            outputs = self.net(inputs_var)
            loss = self.criterion(outputs, targets_var)
            loss.backward()
            self.optimizer.step()
            train_loss += loss.item()
            _, predicted = outputs.max(1)  # outputs.shape: (batch, classes)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            self.loss = train_loss
            progress_bar(
                batch_idx, len(trainloader),
                'Loss:%.3f|Acc:%.3f%% (%d/%d)--Train' %
                (train_loss /
                 (batch_idx + 1), 100. * correct / total, correct, total))
        return correct / total
    def train_with_mask_with_KD(self, epoch, trainloader, KD_target_list, len_onehot):
        mask_dict = pickle.load(open(save_mask_file, "rb"))
        mask_reverse_dict = pickle.load(open(save_mask_fileR, "rb"))

        lr_list = self.scheduler.get_lr()
        logging.info('\nEpoch: %d lr: %s' % (epoch, self.scheduler.get_lr()))
        self.scheduler.step()
        self.net.train()
        train_loss = 0.0
        correct = 0
        total = 0

        for batch_idx, (inputs, targets) in enumerate(trainloader):
            targets_KD =  self.make_one_hot(targets, len_onehot)
            w = KD_target_list[batch_idx].shape[1]
            targets_KD[:, 0:w] = KD_target_list[batch_idx][:, 0:w]

            inputs, targets_KD, targets = inputs.to(self.device), targets_KD.to(self.device), targets.to(self.device)

            inputs_var = Variable(inputs)
            targets_var_KD = Variable(targets_KD)

            self.optimizer.zero_grad()
            # print(targets_var_KD.shape)
            outputs = self.net(inputs_var)/args.temperature
            loss = self.xentropy_cost(outputs, targets_var_KD)
            # break
            loss.backward()
            self.optimizer.step()

            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets_var_KD.size(0)
            correct += predicted.eq(targets).sum().item()
            self.loss = train_loss

            progress_bar(batch_idx, len(trainloader), 'Loss:%.3f|Acc:%.3f%% (%d/%d)--Train' % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))
        return correct / total
    def train(self, epoch, trainloader):
        logging.info('\nEpoch: %d lr: %s' % (epoch, self.scheduler.get_lr()))
        self.net.train()
        train_loss = 0.0
        correct = 0
        total = 0
        self.optimizer.step()

        self.scheduler.step()

        for batch_idx, (inputs, targets) in enumerate(trainloader):
            inputs, targets = inputs.to(self.device), targets.to(self.device)
            inputs_var = Variable(inputs)
            targets_var = Variable(targets)

            self.optimizer.zero_grad()
            outputs = self.net(inputs_var)
            loss = self.criterion(outputs, targets_var)

            loss.backward()
            self.optimizer.step()

            train_loss += loss.item()
            _, predicted = outputs.max(1)  # outputs.shape: (batch, classes)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            self.loss = train_loss
            acc = 100. * correct / total
            progress_bar(
                batch_idx, len(trainloader),
                'Loss:%.3f|Acc:%.3f%% (%d/%d)--Train' %
                (train_loss / (batch_idx + 1), acc, correct, total))

        if epoch == args.epoch - 1:
            self.save_checkpoint_t7(epoch, acc, train_loss)
        return correct / total
    def train_with_mask_with_EWC(self, current_trainloader, previous_trainloader,  fisher_estimation_sample_size=256): # retrain percentage
        all_loader = []
        all_loader.append(previous_trainloader)
        all_loader.append(current_trainloader)

        for task_id, trainloader in enumerate(all_loader):
            # self.initialization(args.lr_mutant, args.lr_mutant_step_size, args.weight_decay_2)
            self.optimizer = optim.SGD(self.net.parameters(), lr=args.lr_mutant, momentum=0.9, weight_decay=args.weight_decay_2)
            self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=args.lr_mutant_step_size, gamma=args.lr_gamma)
            # self.save_mutant(55, 1)
            train_acc = np.zeros([1, args.num_epoch])
            test_acc = np.zeros([1, args.num_epoch])

            for epoch in range(args.num_epoch):

                lr_list = self.scheduler.get_lr()
                logging.info('\nEpoch: %d lr: %s' % (epoch, self.scheduler.get_lr()))
                self.scheduler.step()
                self.net.train()
                train_loss = 0.0
                correct = 0
                total = 0

                for batch_idx, (inputs, targets) in enumerate(trainloader):
                    # print(batch_idx)
                    # targets =  self.make_one_hot(targets, len_onehot)
                    # print(target)
                    inputs, targets = inputs.to(self.device), targets.to(self.device)

                    inputs_var = Variable(inputs)
                    targets_var = Variable(targets)

                    self.optimizer.zero_grad()
                    outputs = self.net(inputs_var)
                    loss = self.criterion(outputs, targets_var)

                    if args.ewc and task_id > 0 :
                        ewc_loss = self.net.ewc_loss(cuda = self.device)
                    else:
                        ewc_loss = 0.0

                    loss = loss + ewc_loss
                    loss.backward()
                    self.optimizer.step()

                    _, predicted = outputs.max(1)
                    total += targets.size(0)
                    correct += predicted.eq(targets).sum().item()
                    self.loss = loss
                   

                progress_bar(batch_idx, len(trainloader), ' | Loss:%.3f| ewc_loss:%.3f | Acc:%.3f%% (%d/%d) -- Train Task(%d/%d)'
                     % ( loss, ewc_loss, 100.*correct/total, correct, total, task_id, len(all_loader)))
                train_acc[0, epoch] = correct / total
                # test_acc[0, epoch] = self.test(trainloader)


            if args.consolidate and task_id < len(all_loader):
                # estimate the fisher information of the parameters and consolidate
                # them in the network.
                print(
                    '=> Estimating diagonals of the fisher information matrix...',
                    end='', flush=True
                )
                self.net.consolidate(self.net.estimate_fisher(
                    trainloader, fisher_estimation_sample_size
                ))
                print(' Done!')
            else:
                logging.info('No consolidate/EWC loss available')

        return test_acc
Ejemplo n.º 8
0
    def train_with_frozen_filter(self, epoch, trainloader, mask_dict,
                                 mask_dict_R):
        param_old_dict = OrderedDict([(k, None)
                                      for k in self.net.state_dict().keys()])
        for layer_name, param in self.net.state_dict().items():
            param_old_dict[layer_name] = param.clone()

        print('\nEpoch: %d lr: %s' % (epoch, self.scheduler.get_lr()))
        self.scheduler.step()
        self.net.train()
        train_loss = 0.0
        correct = 0
        total = 0

        for batch_idx, (inputs, targets) in enumerate(trainloader):

            inputs, targets = inputs.to(self.device), targets.to(self.device)

            inputs_var = Variable(inputs)
            targets_var = Variable(targets)
            self.optimizer.zero_grad()
            outputs, _ = self.net(inputs_var)
            loss = self.criterion(outputs, targets)
            loss.backward()
            self.optimizer.step()

            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

            # apply mask
            param_processed = OrderedDict([
                (k, None) for k in self.net.state_dict().keys()
            ])
            for layer_name, param_new in self.net.state_dict().items():
                param_new = param_new.type(torch.cuda.FloatTensor)
                param_old_dict[layer_name] = param_old_dict[layer_name].type(
                    torch.cuda.FloatTensor)

                if re.search('conv', layer_name):
                    param_processed[layer_name] = Variable(
                        torch.mul(param_old_dict[layer_name],
                                  mask_dict[layer_name]) +
                        torch.mul(param_new, mask_dict_R[layer_name]),
                        requires_grad=True)

                elif re.search('shortcut', layer_name):
                    if len(param_new.shape) == 4:  # conv in shortcut
                        param_processed[layer_name] = Variable(
                            torch.mul(param_old_dict[layer_name],
                                      mask_dict[layer_name]) +
                            torch.mul(param_new, mask_dict_R[layer_name]),
                            requires_grad=True)
                    else:
                        param_processed[layer_name] = Variable(
                            param_new, requires_grad=True)
                elif re.search('linear', layer_name):
                    param_processed[layer_name] = Variable(
                        torch.mul(param_old_dict[layer_name],
                                  mask_dict[layer_name]) +
                        torch.mul(param_new, mask_dict_R[layer_name]),
                        requires_grad=True)

                else:
                    param_processed[layer_name] = Variable(
                        param_new, requires_grad=True)  # num_batches_tracked
                    # raise ValueError('some parameters are skipped, plz check {}'.format(layer_name))  # num_batches_tracked
            self.net.load_state_dict(param_processed)
            progress_bar(
                batch_idx, len(trainloader),
                'Loss:%.3f|Acc:%.3f%% (%d/%d)--Train' %
                (train_loss /
                 (batch_idx + 1), 100. * correct / total, correct, total))
        return correct / total
    def train_with_frozen_filter(self,
                                 epoch,
                                 trainloader,
                                 mask_dict,
                                 mask_dict_R,
                                 path_postfix=''):

        param_old_dict = OrderedDict([(k, None)
                                      for k in self.net.state_dict().keys()])
        for layer_name, param in self.net.state_dict().items():
            param_old_dict[layer_name] = param.clone()

        self.net.train()
        logging.info('\nEpoch: %d lr: %s' % (epoch, self.scheduler.get_lr()))
        train_loss = 0.0
        correct = 0
        total = 0
        self.optimizer.step()
        self.scheduler.step()

        for batch_idx, (inputs, targets) in enumerate(trainloader):
            inputs, targets = inputs.to(self.device), targets.to(self.device)
            inputs_var = Variable(inputs)
            targets_var = Variable(targets)

            self.optimizer.zero_grad()
            outputs = self.net(inputs_var)
            loss = self.criterion(outputs, targets)

            loss.backward()
            self.optimizer.step()

            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            acc = 100. * correct / total
            # apply mask
            param_processed = OrderedDict([
                (k, None) for k in self.net.state_dict().keys()
            ])
            for layer_name, param_new in self.net.state_dict().items():
                param_new = param_new.type(torch.cuda.FloatTensor)
                param_old_dict[layer_name] = param_old_dict[layer_name].type(
                    torch.cuda.FloatTensor)
                # print(layer_name)
                if re.search('conv', layer_name):
                    param_processed[layer_name] = Variable(
                        torch.mul(param_old_dict[layer_name],
                                  mask_dict[layer_name]) +
                        torch.mul(param_new, mask_dict_R[layer_name]),
                        requires_grad=True)

                    # print('new\n', param_new[0:3, 0, :, :])

                elif re.search('shortcut', layer_name):
                    if len(param_new.shape) == 4:  # conv in shortcut
                        param_processed[layer_name] = Variable(
                            torch.mul(param_old_dict[layer_name],
                                      mask_dict[layer_name]) +
                            torch.mul(param_new, mask_dict_R[layer_name]),
                            requires_grad=True)
                    else:
                        param_processed[layer_name] = Variable(
                            param_new, requires_grad=True)
                elif re.search('linear', layer_name):
                    param_processed[layer_name] = Variable(
                        torch.mul(param_old_dict[layer_name],
                                  mask_dict[layer_name]) +
                        torch.mul(param_new, mask_dict_R[layer_name]),
                        requires_grad=True)

                else:
                    param_processed[layer_name] = Variable(
                        param_new, requires_grad=True)  # num_batches_tracked

            # print('old\n', param_old_dict['conv1.weight'][0:3, 0, :, :])
            # print('mask\n', mask_dict['conv1.weight'][0:3, 0, :, :])
            # print('mask_R\n', mask_dict_R['conv1.weight'][0:3, 0, :, :])
            # print('param_processed\n', param_processed['conv1.weight'][0:3, 0, :, :])

            self.net.load_state_dict(param_processed)
            progress_bar(
                batch_idx, len(trainloader),
                'Loss:%.3f|Acc:%.3f%% (%d/%d)--Train' %
                (train_loss / (batch_idx + 1), acc, correct, total))

        if epoch == 0 or epoch == args.epoch_edge - 1 or epoch == args.epoch_edge // 2:
            self.save_checkpoint_t7(
                epoch,
                acc,
                train_loss,
                '_edge_model',
                path_postfix,
            )
        return correct / total