Esempio n. 1
0
class iCaRL(BaseLearner):
    def __init__(self, args):
        super().__init__(args)
        self._network = IncrementalNet(args['convnet_type'], False)

    def after_task(self):
        self._old_network = self._network.copy().freeze()
        self._known_classes = self._total_classes
        logging.info('Exemplar size: {}'.format(self.exemplar_size))

    def incremental_train(self, data_manager):
        self._cur_task += 1
        self._total_classes = self._known_classes + data_manager.get_task_size(
            self._cur_task)
        self._network.update_fc(self._total_classes)
        logging.info('Learning on {}-{}'.format(self._known_classes,
                                                self._total_classes))

        # Loader
        train_dataset = data_manager.get_dataset(np.arange(
            self._known_classes, self._total_classes),
                                                 source='train',
                                                 mode='train',
                                                 appendent=self._get_memory())
        self.train_loader = DataLoader(train_dataset,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=num_workers)
        test_dataset = data_manager.get_dataset(np.arange(
            0, self._total_classes),
                                                source='test',
                                                mode='test')
        self.test_loader = DataLoader(test_dataset,
                                      batch_size=batch_size,
                                      shuffle=False,
                                      num_workers=num_workers)

        # Procedure
        if len(self._multiple_gpus) > 1:
            self._network = nn.DataParallel(self._network, self._multiple_gpus)
        self._train(self.train_loader, self.test_loader)
        self.build_rehearsal_memory(data_manager, self.samples_per_class)
        if len(self._multiple_gpus) > 1:
            self._network = self._network.module

    def _train(self, train_loader, test_loader):
        self._network.to(self._device)
        if self._old_network is not None:
            self._old_network.to(self._device)
        optimizer = optim.SGD(self._network.parameters(),
                              lr=lrate,
                              momentum=0.9,
                              weight_decay=weight_decay)  # 1e-5
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer=optimizer,
                                                   milestones=milestones,
                                                   gamma=lrate_decay)
        self._update_representation(train_loader, test_loader, optimizer,
                                    scheduler)

    def _update_representation(self, train_loader, test_loader, optimizer,
                               scheduler):
        prog_bar = tqdm(range(epochs))
        for _, epoch in enumerate(prog_bar):
            self._network.train()
            losses = 0.
            correct, total = 0, 0
            for i, (_, inputs, targets) in enumerate(train_loader):
                inputs, targets = inputs.to(self._device), targets.to(
                    self._device)
                logits = self._network(inputs)['logits']
                onehots = target2onehot(targets, self._total_classes)

                if self._old_network is None:
                    loss = F.binary_cross_entropy_with_logits(logits, onehots)
                else:
                    old_onehots = torch.sigmoid(
                        self._old_network(inputs)['logits'].detach())
                    new_onehots = onehots.clone()
                    new_onehots[:, :self._known_classes] = old_onehots
                    loss = F.binary_cross_entropy_with_logits(
                        logits, new_onehots)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                losses += loss.item()

                # acc
                _, preds = torch.max(logits, dim=1)
                correct += preds.eq(targets.expand_as(preds)).cpu().sum()
                total += len(targets)

            scheduler.step()
            # train_acc = self._compute_accuracy(self._network, train_loader)
            train_acc = np.around(tensor2numpy(correct) * 100 / total,
                                  decimals=2)
            test_acc = self._compute_accuracy(self._network, test_loader)
            info = 'Task {}, Epoch {}/{} => Loss {:.3f}, Train_accy {:.2f}, Test_accy {:.2f}'.format(
                self._cur_task, epoch + 1, epochs, losses / len(train_loader),
                train_acc, test_acc)
            prog_bar.set_description(info)

        logging.info(info)
class End2End(BaseLearner):
    def __init__(self, args):
        super().__init__(args)
        self._network = IncrementalNet(args['convnet_type'], False)
        self._seen_classes = []

    def after_task(self):
        self._old_network = self._network.copy().freeze()
        self._known_classes = self._total_classes
        logging.info('Exemplar size: {}'.format(self.exemplar_size))

    def incremental_train(self, data_manager):
        self._cur_task += 1
        self.task_size = data_manager.get_task_size(self._cur_task)
        self._total_classes = self._known_classes + self.task_size
        self._network.update_fc(self._total_classes)
        self._seen_classes.append(self.task_size)
        logging.info('Learning on {}-{}'.format(self._known_classes,
                                                self._total_classes))

        # Loader
        train_dataset = data_manager.get_dataset(np.arange(
            self._known_classes, self._total_classes),
                                                 source='train',
                                                 mode='train',
                                                 appendent=self._get_memory())
        self.train_loader = DataLoader(train_dataset,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=num_workers)
        test_dataset = data_manager.get_dataset(np.arange(
            0, self._total_classes),
                                                source='test',
                                                mode='test')
        self.test_loader = DataLoader(test_dataset,
                                      batch_size=batch_size,
                                      shuffle=False,
                                      num_workers=num_workers)

        # Procedure
        if len(self._multiple_gpus) > 1:
            self._network = nn.DataParallel(self._network, self._multiple_gpus)
        self._train(data_manager, self.train_loader, self.test_loader)
        self.build_rehearsal_memory(data_manager, self.samples_per_class)
        if len(self._multiple_gpus) > 1:
            self._network = self._network.module

    def _train(self, data_manager, train_loader, test_loader):
        self._network.to(self._device)
        if self._old_network is not None:
            self._old_network.to(self._device)
        if self._cur_task == 0:
            optimizer = optim.SGD(self._network.parameters(),
                                  lr=lrate_init,
                                  momentum=0.9,
                                  weight_decay=weight_decay)
            scheduler = optim.lr_scheduler.MultiStepLR(
                optimizer=optimizer,
                milestones=milestones_init,
                gamma=lrate_decay)
            self._is_finetuning = False
            self._run(self.train_loader, self.test_loader, epochs_init,
                      optimizer, scheduler, 'Training')
            return

        # New + exemplars
        optimizer = optim.SGD(self._network.parameters(),
                              lr=lrate,
                              momentum=0.9,
                              weight_decay=weight_decay)
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer=optimizer,
                                                   milestones=milestones,
                                                   gamma=lrate_decay)
        self._is_finetuning = False
        self._run(self.train_loader, self.test_loader, epochs, optimizer,
                  scheduler, 'Training')

        # Finetune
        if self._fixed_memory:
            finetune_samples_per_class = self._memory_per_class
            self._construct_exemplar_unified(data_manager,
                                             finetune_samples_per_class)
        else:
            finetune_samples_per_class = self._memory_size // self._known_classes
            self._reduce_exemplar(data_manager, finetune_samples_per_class)
            self._construct_exemplar(data_manager, finetune_samples_per_class)

        if len(self._multiple_gpus) > 1:
            self._old_network = self._network.module.copy().freeze()
        else:
            self._old_network = self._network.copy().freeze()
        finetune_train_dataset = data_manager.get_dataset(
            [], source='train', mode='train', appendent=self._get_memory())
        finetune_train_loader = DataLoader(finetune_train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True,
                                           num_workers=num_workers)
        # Update all weights or only the weights of FC layer?
        # According to the experiment results, fine-tuning all weights is slightly better.
        optimizer = optim.SGD(self._network.parameters(),
                              lr=lrate_finetune,
                              momentum=0.9,
                              weight_decay=weight_decay)
        scheduler = optim.lr_scheduler.MultiStepLR(
            optimizer=optimizer,
            milestones=milestones_finetune,
            gamma=lrate_decay)
        self._is_finetuning = True
        self._run(finetune_train_loader, self.test_loader, epochs_finetune,
                  optimizer, scheduler, 'Finetuning')

        # Remove the temporary exemplars of new classes
        if self._fixed_memory:
            self._data_memory = self._data_memory[:-self._memory_per_class *
                                                  self.task_size]
            self._targets_memory = self._targets_memory[:-self.
                                                        _memory_per_class *
                                                        self.task_size]
            # Check
            assert len(
                np.setdiff1d(self._targets_memory,
                             np.arange(
                                 0,
                                 self._known_classes))) == 0, 'Exemplar error!'

    def _run(self, train_loader, test_loader, epochs_, optimizer, scheduler,
             process):
        prog_bar = tqdm(range(epochs_))
        for _, epoch in enumerate(prog_bar, start=1):
            self._network.train()
            losses = 0.
            correct, total = 0, 0
            for i, (_, inputs, targets) in enumerate(train_loader):
                inputs, targets = inputs.to(self._device), targets.to(
                    self._device)
                logits = self._network(inputs)['logits']

                # CELoss
                clf_loss = F.cross_entropy(logits, targets)

                if self._cur_task == 0:
                    distill_loss = torch.zeros(1, device=self._device)
                else:
                    finetuning_task = (
                        self._cur_task +
                        1) if self._is_finetuning else self._cur_task
                    distill_loss = 0.
                    old_logits = self._old_network(inputs)['logits']
                    for i in range(1, finetuning_task + 1):
                        lo = sum(self._seen_classes[:i - 1])
                        hi = sum(self._seen_classes[:i])

                        task_prob_new = F.softmax(logits[:, lo:hi], dim=1)
                        task_prob_old = F.softmax(old_logits[:, lo:hi], dim=1)

                        task_prob_new = task_prob_new**(1 / T)
                        task_prob_old = task_prob_old**(1 / T)

                        task_prob_new = task_prob_new / task_prob_new.sum(
                            1).view(-1, 1)
                        task_prob_old = task_prob_old / task_prob_old.sum(
                            1).view(-1, 1)

                        distill_loss += F.binary_cross_entropy(
                            task_prob_new, task_prob_old)

                    distill_loss *= 1 / finetuning_task

                loss = clf_loss + distill_loss
                losses += loss.item()

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                # acc
                _, preds = torch.max(logits, dim=1)
                correct += preds.eq(targets.expand_as(preds)).cpu().sum()
                total += len(targets)

            scheduler.step()
            # train_acc = self._compute_accuracy(self._network, train_loader)
            train_acc = np.around(tensor2numpy(correct) * 100 / total,
                                  decimals=2)
            test_acc = self._compute_accuracy(self._network, test_loader)
            info1 = '{} => '.format(process)
            info2 = 'Task {}, Epoch {}/{}, Loss {:.3f}, Train_accy {:.2f}, Test_accy {:.2f}'.format(
                self._cur_task, epoch + 1, epochs_, losses / len(train_loader),
                train_acc, test_acc)
            prog_bar.set_description(info1 + info2)

        logging.info(info1 + info2)
class End2End(BaseLearner):
    def __init__(self, args):
        super().__init__()
        self._network = IncrementalNet(args['convnet_type'], False)
        self._device = args['device']
        self._seen_classes = []

    def after_task(self):
        self._old_network = self._network.copy().freeze()
        self._known_classes = self._total_classes

    def incremental_train(self, data_manager):
        self._cur_task += 1
        task_size = data_manager.get_task_size(self._cur_task)
        self._total_classes = self._known_classes + task_size
        self._network.update_fc(self._total_classes)
        self._seen_classes.append(task_size)
        logging.info('Learning on {}-{}'.format(self._known_classes,
                                                self._total_classes))

        # Loader
        train_dataset = data_manager.get_dataset(np.arange(
            self._known_classes, self._total_classes),
                                                 source='train',
                                                 mode='train',
                                                 appendent=self._get_memory())
        self.train_loader = DataLoader(train_dataset,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=4)
        test_dataset = data_manager.get_dataset(np.arange(
            0, self._total_classes),
                                                source='test',
                                                mode='test')
        self.test_loader = DataLoader(test_dataset,
                                      batch_size=batch_size,
                                      shuffle=False,
                                      num_workers=4)

        # Procedure
        self._train(data_manager, self.train_loader, self.test_loader)
        self._reduce_exemplar(data_manager, memory_size // self._total_classes)
        self._construct_exemplar(data_manager,
                                 memory_size // self._total_classes)

    def _train(self, data_manager, train_loader, test_loader):
        self._network.to(self._device)
        if self._old_network is not None:
            self._old_network.to(self._device)
        if self._cur_task == 0:
            optimizer = optim.SGD(self._network.parameters(),
                                  lr=lrate_init,
                                  momentum=0.9,
                                  weight_decay=1e-3)
            scheduler = optim.lr_scheduler.MultiStepLR(
                optimizer=optimizer,
                milestones=milestones_init,
                gamma=lrate_decay)
            self._is_finetuning = False
            self._run(self.train_loader, self.test_loader, epochs_init,
                      optimizer, scheduler, 'Training')
            return

        # New + exemplars
        optimizer = optim.SGD(self._network.parameters(),
                              lr=lrate,
                              momentum=0.9,
                              weight_decay=1e-3)
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer=optimizer,
                                                   milestones=milestones,
                                                   gamma=lrate_decay)
        self._is_finetuning = False
        self._run(self.train_loader, self.test_loader, epochs, optimizer,
                  scheduler, 'Training')

        # Finetune
        samples_per_class = memory_size // self._known_classes
        self._reduce_exemplar(data_manager, samples_per_class)
        self._construct_exemplar(data_manager, samples_per_class)
        self._old_network = self._network.copy().freeze()
        finetune_train_dataset = data_manager.get_dataset(
            [], source='train', mode='train', appendent=self._get_memory())
        finetune_train_loader = DataLoader(finetune_train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True,
                                           num_workers=4)
        optimizer = optim.SGD(self._network.parameters(),
                              lr=lrate_finetune,
                              momentum=0.9,
                              weight_decay=1e-3)
        scheduler = optim.lr_scheduler.MultiStepLR(
            optimizer=optimizer,
            milestones=milestones_finetune,
            gamma=lrate_decay)
        self._is_finetuning = True
        self._run(finetune_train_loader, self.test_loader, epochs_finetune,
                  optimizer, scheduler, 'Finetuning')

    def _run(self, train_loader, test_loader, epochs_, optimizer, scheduler,
             process):
        prog_bar = tqdm(range(epochs_))
        for _, epoch in enumerate(prog_bar, start=1):
            self._network.train()
            losses = 0.
            correct, total = 0, 0
            for i, (_, inputs, targets) in enumerate(train_loader):
                inputs, targets = inputs.to(self._device), targets.to(
                    self._device)
                logits = self._network(inputs)

                # CELoss
                clf_loss = F.cross_entropy(logits, targets)

                if self._cur_task == 0:
                    distill_loss = torch.zeros(1, device=self._device)
                else:
                    finetuning_task = (
                        self._cur_task +
                        1) if self._is_finetuning else self._cur_task
                    distill_loss = 0.
                    old_logits = self._old_network(inputs)
                    for i in range(1, finetuning_task + 1):
                        lo = sum(self._seen_classes[:i - 1])
                        hi = sum(self._seen_classes[:i])
                        distill_loss += F.binary_cross_entropy(
                            F.softmax(logits[:, lo:hi] / T, dim=1),
                            F.softmax(old_logits[:, lo:hi] / T, dim=1))

                loss = clf_loss + distill_loss
                losses += loss.item()

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                # acc
                _, preds = torch.max(logits, dim=1)
                correct += preds.eq(targets.expand_as(preds)).cpu().sum()
                total += len(targets)

            scheduler.step()
            # train_acc = self._compute_accuracy(self._network, train_loader)
            train_acc = np.around(tensor2numpy(correct) * 100 / total,
                                  decimals=2)
            test_acc = self._compute_accuracy(self._network, test_loader)
            info1 = '{} => '.format(process)
            info2 = 'Task {}, Epoch {}/{}, Loss {:.3f}, Train_accy {:.2f}, Test_accy {:.2f}'.format(
                self._cur_task, epoch + 1, epochs_, losses / len(train_loader),
                train_acc, test_acc)
            prog_bar.set_description(info1 + info2)

        logging.info(info1 + info2)
class LwM(BaseLearner):
    def __init__(self, args):
        super().__init__(args)
        self._network = IncrementalNet(args['convnet_type'],
                                       pretrained=False,
                                       gradcam=True)

    def after_task(self):
        self._network.zero_grad()
        self._network.unset_gradcam_hook()
        self._old_network = self._network.copy().eval()
        self._network.set_gradcam_hook()
        self._old_network.set_gradcam_hook()

        self._known_classes = self._total_classes
        logging.info('Exemplar size: {}'.format(self.exemplar_size))

    def incremental_train(self, data_manager):
        self._cur_task += 1
        self._total_classes = self._known_classes + data_manager.get_task_size(
            self._cur_task)
        self._network.update_fc(self._total_classes)
        logging.info('Learning on {}-{}'.format(self._known_classes,
                                                self._total_classes))

        # Loader
        train_dataset = data_manager.get_dataset(np.arange(
            self._known_classes, self._total_classes),
                                                 source='train',
                                                 mode='train',
                                                 appendent=self._get_memory())
        self.train_loader = DataLoader(train_dataset,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=num_workers)
        test_dataset = data_manager.get_dataset(np.arange(
            0, self._total_classes),
                                                source='test',
                                                mode='test')
        self.test_loader = DataLoader(test_dataset,
                                      batch_size=batch_size,
                                      shuffle=False,
                                      num_workers=num_workers)

        # Procedure
        if len(self._multiple_gpus) > 1:
            self._network = nn.DataParallel(self._network, self._multiple_gpus)
        self._train(self.train_loader, self.test_loader)
        if len(self._multiple_gpus) > 1:
            self._network = self._network.module

    def _train(self, train_loader, test_loader):
        self._network.to(self._device)
        if self._old_network is not None:
            self._old_network.to(self._device)
        optimizer = optim.SGD(self._network.parameters(),
                              lr=lrate,
                              momentum=0.9,
                              weight_decay=weight_decay)
        # optimizer = optim.Adam(self._network.parameters(), lr=lrate, weight_decay=1e-5)
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer=optimizer,
                                                   milestones=milestones,
                                                   gamma=lrate_decay)
        self._run(train_loader, test_loader, optimizer, scheduler)

    def _run(self, train_loader, test_loader, optimizer, scheduler):
        for epoch in range(1, epochs + 1):
            self._network.train()
            clf_losses = 0.  # cross entropy
            distill_losses = 0.  # distillation
            attention_losses = 0.  # attention distillation
            correct, total = 0, 0
            for i, (_, inputs, targets) in enumerate(train_loader):
                inputs, targets = inputs.to(self._device), targets.to(
                    self._device)
                outputs = self._network(inputs)
                logits = outputs['logits']
                optimizer.zero_grad()  # Same effect as nn.Module.zero_grad()
                if self._old_network is None:
                    clf_loss = F.cross_entropy(logits, targets)
                    clf_losses += clf_loss.item()
                    loss = clf_loss
                else:
                    self._old_network.zero_grad()
                    old_outputs = self._old_network(inputs)
                    old_logits = old_outputs['logits']

                    # Classification loss
                    # if no old samples saved, only calculate loss for new logits
                    clf_loss = F.cross_entropy(logits[:, self._known_classes:],
                                               targets - self._known_classes)
                    clf_losses += clf_loss.item()

                    # Distillation loss
                    # if no old samples saved, only calculate distillation loss for old logits
                    '''
                    distill_loss = F.binary_cross_entropy_with_logits(
                        logits[:, :self._known_classes], torch.sigmoid(old_logits.detach())
                    ) * distill_ratio
                    '''
                    distill_loss = _KD_loss(logits[:, :self._known_classes],
                                            old_logits.detach(),
                                            T=2) * distill_ratio
                    distill_losses += distill_loss.item()

                    # Attention distillation loss
                    top_base_indices = logits[:, :self._known_classes].argmax(
                        dim=1)
                    onehot_top_base = target2onehot(
                        top_base_indices, self._known_classes).to(self._device)

                    logits[:, :self._known_classes].backward(
                        gradient=onehot_top_base, retain_graph=True)
                    old_logits.backward(gradient=onehot_top_base)

                    attention_loss = gradcam_distillation(
                        outputs['gradcam_gradients'][0],
                        old_outputs['gradcam_gradients'][0].detach(),
                        outputs['gradcam_activations'][0],
                        old_outputs['gradcam_activations']
                        [0].detach()) * attention_ratio
                    attention_losses += attention_loss.item()

                    # Integration
                    loss = clf_loss + distill_loss + attention_loss

                    self._old_network.zero_grad()
                    self._network.zero_grad()

                optimizer.zero_grad()  # Same effect as nn.Module.zero_grad()
                loss.backward()
                optimizer.step()

                # acc
                _, preds = torch.max(logits, dim=1)
                correct += preds.eq(targets.expand_as(preds)).cpu().sum()
                total += len(targets)

            scheduler.step()
            # train_acc = self._compute_accuracy(self._network, train_loader)
            train_acc = np.around(tensor2numpy(correct) * 100 / total,
                                  decimals=2)
            test_acc = self._compute_accuracy(self._network, test_loader)
            info1 = 'Task {}, Epoch {}/{} => clf_loss {:.2f}, '.format(
                self._cur_task, epoch, epochs, clf_losses / (i + 1))
            info2 = 'distill_loss {:.2f}, attention_loss {:.2f}, Train_accy {:.2f}, Test_accy {:.2f}'.format(
                distill_losses / (i + 1), attention_losses / (i + 1),
                train_acc, test_acc)
            logging.info(info1 + info2)
Esempio n. 5
0
class iCaRL(BaseLearner):
    def __init__(self, args):
        super().__init__()
        self._network = IncrementalNet(args['convnet_type'], False)
        self._device = args['device']

    def after_task(self):
        self._old_network = self._network.copy().freeze()
        self._known_classes = self._total_classes

    def eval_task(self):
        y_pred, y_true = self._eval_ncm(self.test_loader, self._class_means)
        accy = accuracy(y_pred, y_true, self._known_classes)

        return accy

    def incremental_train(self, data_manager):
        self._cur_task += 1
        self._total_classes = self._known_classes + data_manager.get_task_size(
            self._cur_task)
        self._network.update_fc(self._total_classes)
        logging.info('Learning on {}-{}'.format(self._known_classes,
                                                self._total_classes))

        # Loader
        train_dataset = data_manager.get_dataset(np.arange(
            self._known_classes, self._total_classes),
                                                 source='train',
                                                 mode='train',
                                                 appendent=self._get_memory())
        self.train_loader = DataLoader(train_dataset,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=4)
        test_dataset = data_manager.get_dataset(np.arange(
            0, self._total_classes),
                                                source='test',
                                                mode='test')
        self.test_loader = DataLoader(test_dataset,
                                      batch_size=batch_size,
                                      shuffle=False,
                                      num_workers=4)

        # Procedure
        self._train(self.train_loader, self.test_loader)  #训练集 测试集训练
        self._reduce_exemplar(data_manager,
                              memory_size // self._total_classes)  #范例集精简
        self._construct_exemplar(data_manager, memory_size //
                                 self._total_classes)  #为新类样本创建范例集

    def _train(self, train_loader, test_loader):
        self._network.to(self._device)
        optimizer = optim.SGD(self._network.parameters(),
                              lr=lrate,
                              momentum=0.9,
                              weight_decay=1e-5)
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer=optimizer,
                                                   milestones=milestones,
                                                   gamma=lrate_decay)
        self._update_representation(train_loader, test_loader, optimizer,
                                    scheduler)

    def _update_representation(self, train_loader, test_loader, optimizer,
                               scheduler):  #特征表示更新
        prog_bar = tqdm(range(epochs))
        for _, epoch in enumerate(prog_bar):
            self._network.train()
            losses = 0.
            for i, (_, inputs, targets) in enumerate(train_loader):  #新类
                inputs, targets = inputs.to(self._device), targets.to(
                    self._device)
                logits = self._network(inputs)  #新类的输入和forward
                onehots = target2onehot(targets,
                                        self._total_classes)  #新类标签->onehot

                if self._old_network is None:  #没有原始网络 单纯分类
                    loss = F.binary_cross_entropy_with_logits(logits, onehots)
                else:
                    old_onehots = torch.sigmoid(
                        self._old_network(
                            inputs).detach())  #原来的网络对新样本的预测(用于计算蒸馏损失)
                    new_onehots = onehots.clone()  #新的onehot
                    new_onehots[:, :self.
                                _known_classes] = old_onehots  #由于gt这个onehot向量的label位肯定在 :_known_classes后面
                    loss = F.binary_cross_entropy_with_logits(
                        logits, new_onehots)  #因此新网络的输出既可以与新样本的新位置的gt算CELoss
                    #也可以与原来的网络的预测结果算CELoss 以求不要忘记原来网络的输出
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                losses += loss.item()

            scheduler.step()
            train_acc = self._compute_accuracy(self._network, train_loader)
            test_acc = self._compute_accuracy(self._network, test_loader)
            info = 'Task {}, Epoch {}/{} => Loss {:.3f}, Train_accy {:.3f}, Test_accy {:.3f}'.format(
                self._cur_task, epoch + 1, epochs, losses / len(train_loader),
                train_acc, test_acc)
            prog_bar.set_description(info)

        logging.info(info)
Esempio n. 6
0
class DR(BaseLearner):
    def __init__(self, args):
        super().__init__(args)
        self._network = IncrementalNet(args['convnet_type'], False)

        self.convnet_type = args['convnet_type']
        self.expert = None

    def after_task(self):
        self._old_network = self._network.copy().freeze()
        self._known_classes = self._total_classes
        logging.info('Exemplar size: {}'.format(self.exemplar_size))

    def incremental_train(self, data_manager):
        self._cur_task += 1
        self.task_size = data_manager.get_task_size(self._cur_task)
        self._total_classes = self._known_classes + self.task_size
        self._network.update_fc(self._total_classes)
        logging.info('Learning on {}-{}'.format(self._known_classes,
                                                self._total_classes))

        # Loader
        train_dataset = data_manager.get_dataset(np.arange(
            self._known_classes, self._total_classes),
                                                 source='train',
                                                 mode='train',
                                                 appendent=self._get_memory())
        self.train_loader = DataLoader(train_dataset,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=num_workers)
        test_dataset = data_manager.get_dataset(np.arange(
            0, self._total_classes),
                                                source='test',
                                                mode='test')
        self.test_loader = DataLoader(test_dataset,
                                      batch_size=batch_size,
                                      shuffle=False,
                                      num_workers=num_workers)

        expert_train_dataset = data_manager.get_dataset(np.arange(
            self._known_classes, self._total_classes),
                                                        source='train',
                                                        mode='train')
        self.expert_train_loader = DataLoader(expert_train_dataset,
                                              batch_size=batch_size,
                                              shuffle=True,
                                              num_workers=num_workers)
        expert_test_dataset = data_manager.get_dataset(np.arange(
            self._known_classes, self._total_classes),
                                                       source='test',
                                                       mode='test')
        self.expert_test_loader = DataLoader(expert_test_dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=num_workers)

        # Procedure
        logging.info('Training the expert CNN...')
        self._train_expert(self.expert_train_loader, self.expert_test_loader)
        if self._cur_task == 0:
            self._network = self.expert.copy()
        else:
            self.expert = self.expert.freeze()
            logging.info('Training the updated CNN...')
            if len(self._multiple_gpus) > 1:
                self._network = nn.DataParallel(self._network,
                                                self._multiple_gpus)
            self._train(self.train_loader, self.test_loader)
        self.build_rehearsal_memory(data_manager, self.samples_per_class)
        if len(self._multiple_gpus) > 1 and self._cur_task > 0:
            self._network = self._network.module

    def _train(self, train_loader, test_loader):
        self._network.to(self._device)
        if self._old_network is not None:
            self._old_network.to(self._device)
        optimizer = optim.SGD(self._network.parameters(),
                              lr=lrate,
                              momentum=0.9,
                              weight_decay=weight_decay)
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                                   milestones=milestones,
                                                   gamma=lrate_decay)

        prog_bar = tqdm(range(epochs))
        for _, epoch in enumerate(prog_bar):
            self._network.train()
            losses = 0.
            correct, total = 0, 0
            for i, (_, inputs, targets) in enumerate(train_loader):
                inputs, targets = inputs.to(self._device), targets.to(
                    self._device)
                logits = self._network(inputs)['logits']
                exp_logits = self.expert(inputs)['logits']
                old_logits = self._old_network(inputs)['logits']

                # Distillation
                dist_term = _KD_loss(logits[:, self._known_classes:],
                                     exp_logits, T1)
                # Retrospection
                retr_term = _KD_loss(logits[:, :self._known_classes],
                                     old_logits, T2)

                loss = dist_term + retr_term
                losses += loss.item()

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                # acc
                _, preds = torch.max(logits, dim=1)
                correct += preds.eq(targets.expand_as(preds)).cpu().sum()
                total += len(targets)

            scheduler.step()
            # train_acc = self._compute_accuracy(self._network, train_loader)
            train_acc = np.around(tensor2numpy(correct) * 100 / total,
                                  decimals=2)
            test_acc = self._compute_accuracy(self._network, test_loader)
            info = 'Updated CNN => Epoch {}/{}, Loss {:.3f}, Train accy {:.2f}, Test accy {:.2f}'.format(
                epoch + 1, epochs, losses / len(train_loader), train_acc,
                test_acc)
            prog_bar.set_description(info)

        logging.info(info)

    def _train_expert(self, train_loader, test_loader):
        self.expert = IncrementalNet(self.convnet_type, False)
        self.expert.update_fc(self.task_size)
        if len(self._multiple_gpus) > 1:
            self.expert = nn.DataParallel(self.expert, self._multiple_gpus)
        self.expert.to(self._device)
        optimizer = optim.SGD(self.expert.parameters(),
                              lr=lrate_expert,
                              momentum=0.9,
                              weight_decay=weight_decay)
        scheduler = optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=milestones_expert, gamma=lrate_decay_expert)

        prog_bar = tqdm(range(epochs_expert))
        for _, epoch in enumerate(prog_bar):
            self.expert.train()
            losses = 0.
            for i, (_, inputs, targets) in enumerate(train_loader):
                inputs, targets = inputs.to(
                    self._device), (targets - self._known_classes).to(
                        self._device)
                logits = self.expert(inputs)['logits']

                loss = F.cross_entropy(logits, targets)
                losses += loss.item()

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            scheduler.step()
            train_acc = self._compute_accuracy(self.expert, train_loader,
                                               self._known_classes)
            test_acc = self._compute_accuracy(self.expert, test_loader,
                                              self._known_classes)
            info = 'Expert CNN => Epoch {}/{}, Loss {:.3f}, Train accy {:.2f}, Test accy {:.2f}'.format(
                epoch + 1, epochs_expert, losses / len(train_loader),
                train_acc, test_acc)
            prog_bar.set_description(info)

        logging.info(info)
        if len(self._multiple_gpus) > 1:
            self.expert = self.expert.module

    def _compute_accuracy(self, model, loader, offset=0):
        model.eval()
        correct, total = 0, 0
        for i, (_, inputs, targets) in enumerate(loader):
            inputs = inputs.to(self._device)
            targets -= offset
            with torch.no_grad():
                outputs = model(inputs)['logits']
            predicts = torch.max(outputs, dim=1)[1]
            correct += (predicts.cpu() == targets).sum()
            total += len(targets)

        return np.around(tensor2numpy(correct) * 100 / total, decimals=2)
Esempio n. 7
0
class DR(BaseLearner):
    def __init__(self, args):
        super().__init__()
        self._network = IncrementalNet(args['convnet_type'], False)  #网络
        self._device = args['device']

        self.convnet_type = args['convnet_type']
        self.expert = None  #专家网络

    def after_task(self):
        self._old_network = self._network.copy().freeze()  #上一次迭代的网络
        self._known_classes = self._total_classes

    def eval_task(self):
        y_pred, y_true = self._eval_ncm(self.test_loader, self._class_means)
        accy = accuracy(y_pred, y_true, self._known_classes)

        return accy

    def incremental_train(self, data_manager):
        self._cur_task += 1
        self.task_size = data_manager.get_task_size(self._cur_task)
        self._total_classes = self._known_classes + self.task_size
        self._network.update_fc(self._total_classes)
        logging.info('Learning on {}-{}'.format(self._known_classes,
                                                self._total_classes))

        # Loader
        train_dataset = data_manager.get_dataset(np.arange(
            self._known_classes, self._total_classes),
                                                 source='train',
                                                 mode='train',
                                                 appendent=self._get_memory())
        self.train_loader = DataLoader(train_dataset,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=4)
        test_dataset = data_manager.get_dataset(np.arange(
            0, self._total_classes),
                                                source='test',
                                                mode='test')
        self.test_loader = DataLoader(test_dataset,
                                      batch_size=batch_size,
                                      shuffle=False,
                                      num_workers=4)

        expert_train_dataset = data_manager.get_dataset(np.arange(
            self._known_classes, self._total_classes),
                                                        source='train',
                                                        mode='train')
        self.expert_train_loader = DataLoader(expert_train_dataset,
                                              batch_size=batch_size,
                                              shuffle=True,
                                              num_workers=4)
        expert_test_dataset = data_manager.get_dataset(np.arange(
            self._known_classes, self._total_classes),
                                                       source='test',
                                                       mode='test')
        self.expert_test_loader = DataLoader(expert_test_dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=4)

        # Procedure
        logging.info('Training the expert CNN...')
        self._train_expert(
            self.expert_train_loader,
            self.expert_test_loader)  #专家网络训练   专家网络在每次训练时 专门用于学习新类
        if self._cur_task == 0:
            self._network = self.expert.copy()  #刚刚开始 网络=专家网络
        else:
            self.expert = self.expert.freeze()  #专家网络.freeze()
            logging.info('Training the updated CNN...')
            self._train(self.train_loader,
                        self.test_loader)  #针对专家网络和上一轮的旧网络进行增量学习
        self._reduce_exemplar(data_manager,
                              memory_size // self._total_classes)  #范例集精简
        self._construct_exemplar(data_manager,
                                 memory_size // self._total_classes)  #新类构建范例集

    def _train(self, train_loader, test_loader):
        self._network.to(self._device)
        optimizer = optim.SGD(self._network.parameters(),
                              lr=lrate,
                              momentum=0.9,
                              weight_decay=1e-5)
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                                   milestones=milestones,
                                                   gamma=lrate_decay)

        prog_bar = tqdm(range(epochs))
        for _, epoch in enumerate(prog_bar):  #epoch进度条
            self._network.train()
            losses = 0.
            for i, (_, inputs, targets) in enumerate(train_loader):  #训练集
                inputs, targets = inputs.to(self._device), targets.to(
                    self._device)  #数据 标签
                logits = self._network(inputs)  #当前网络的特征向量的softmax置信度输出
                exp_logits = self.expert(inputs)  #专家网络的分类置信度输出
                old_logits = self._old_network(inputs)  #旧网络的分类置信度输出

                # Distillation
                dist_term = _KD_loss(logits[:,
                                            self._known_classes:], exp_logits,
                                     T1)  #蒸馏损失 当前网络对新类别的分类结果与专家网络贴近
                # Retrospection
                retr_term = _KD_loss(logits[:, :self._known_classes],
                                     old_logits, T2)  #记忆损失 当前网络对旧类别的分类结果与旧网络贴近
                #旧网络是k1分类器 专家网络是k2分类器 当前网络是k1+k2分类器 损失分为两部分
                loss = dist_term + retr_term
                losses += loss.item()

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            scheduler.step()
            train_acc = self._compute_accuracy(self._network, train_loader)
            test_acc = self._compute_accuracy(self._network, test_loader)
            info = 'Updated CNN => Epoch {}/{}, Loss {:.3f}, Train accy {:.3f}, Test accy {:.3f}'.format(
                epoch + 1, epochs, losses / len(train_loader), train_acc,
                test_acc)
            prog_bar.set_description(info)

        logging.info(info)

    def _train_expert(self, train_loader, test_loader):
        self.expert = IncrementalNet(self.convnet_type, False)  #专家网络
        self.expert.update_fc(self.task_size)
        self.expert.to(self._device)
        optimizer = optim.SGD(self.expert.parameters(),
                              lr=lrate_expert,
                              momentum=0.9,
                              weight_decay=1e-5)
        scheduler = optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=milestones_expert, gamma=lrate_decay_expert)

        prog_bar = tqdm(range(epochs_expert))
        for _, epoch in enumerate(prog_bar):
            self.expert.train()
            losses = 0.
            for i, (_, inputs, targets) in enumerate(train_loader):
                inputs, targets = inputs.to(
                    self._device), (targets - self._known_classes).to(
                        self._device)
                #专家网络只分k2种类,这里有k1+k2种,因此下标要减去k1
                logits = self.expert(inputs)

                loss = F.cross_entropy(logits, targets)
                losses += loss.item()

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            scheduler.step()
            train_acc = self._compute_accuracy(self.expert, train_loader,
                                               self._known_classes)
            test_acc = self._compute_accuracy(self.expert, test_loader,
                                              self._known_classes)
            info = 'Expert CNN => Epoch {}/{}, Loss {:.3f}, Train accy {:.3f}, Test accy {:.3f}'.format(
                epoch + 1, epochs_expert, losses / len(train_loader),
                train_acc, test_acc)
            prog_bar.set_description(info)

        logging.info(info)

    def _compute_accuracy(self, model, loader, offset=0):
        model.eval()
        correct, total = 0, 0
        for i, (_, inputs, targets) in enumerate(loader):
            inputs = inputs.to(self._device)
            targets -= offset
            with torch.no_grad():
                outputs = model(inputs)
            predicts = torch.max(outputs, dim=1)[1]
            correct += (predicts.cpu() == targets).sum()
            total += len(targets)

        return np.around(tensor2numpy(correct) / total, decimals=3)