Example #1
0
    def _update_representation(self, train_loader, test_loader, optimizer,
                               scheduler):
        prog_bar = tqdm(range(epochs))
        for _, epoch in enumerate(prog_bar):
            self._network.train()
            losses = 0.
            correct, total = 0, 0
            for i, (_, inputs, targets) in enumerate(train_loader):
                inputs, targets = inputs.to(self._device), targets.to(
                    self._device)
                logits = self._network(inputs)['logits']
                onehots = target2onehot(targets, self._total_classes)

                if self._old_network is None:
                    loss = F.binary_cross_entropy_with_logits(logits, onehots)
                else:
                    old_onehots = torch.sigmoid(
                        self._old_network(inputs)['logits'].detach())
                    new_onehots = onehots.clone()
                    new_onehots[:, :self._known_classes] = old_onehots
                    loss = F.binary_cross_entropy_with_logits(
                        logits, new_onehots)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                losses += loss.item()

                # acc
                _, preds = torch.max(logits, dim=1)
                correct += preds.eq(targets.expand_as(preds)).cpu().sum()
                total += len(targets)

            scheduler.step()
            # train_acc = self._compute_accuracy(self._network, train_loader)
            train_acc = np.around(tensor2numpy(correct) * 100 / total,
                                  decimals=2)
            test_acc = self._compute_accuracy(self._network, test_loader)
            info = 'Task {}, Epoch {}/{} => Loss {:.3f}, Train_accy {:.2f}, Test_accy {:.2f}'.format(
                self._cur_task, epoch + 1, epochs, losses / len(train_loader),
                train_acc, test_acc)
            prog_bar.set_description(info)

        logging.info(info)
Example #2
0
    def _update_representation(self, train_loader, test_loader, optimizer,
                               scheduler):  #特征表示更新
        prog_bar = tqdm(range(epochs))
        for _, epoch in enumerate(prog_bar):
            self._network.train()
            losses = 0.
            for i, (_, inputs, targets) in enumerate(train_loader):  #新类
                inputs, targets = inputs.to(self._device), targets.to(
                    self._device)
                logits = self._network(inputs)  #新类的输入和forward
                onehots = target2onehot(targets,
                                        self._total_classes)  #新类标签->onehot

                if self._old_network is None:  #没有原始网络 单纯分类
                    loss = F.binary_cross_entropy_with_logits(logits, onehots)
                else:
                    old_onehots = torch.sigmoid(
                        self._old_network(
                            inputs).detach())  #原来的网络对新样本的预测(用于计算蒸馏损失)
                    new_onehots = onehots.clone()  #新的onehot
                    new_onehots[:, :self.
                                _known_classes] = old_onehots  #由于gt这个onehot向量的label位肯定在 :_known_classes后面
                    loss = F.binary_cross_entropy_with_logits(
                        logits, new_onehots)  #因此新网络的输出既可以与新样本的新位置的gt算CELoss
                    #也可以与原来的网络的预测结果算CELoss 以求不要忘记原来网络的输出
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                losses += loss.item()

            scheduler.step()
            train_acc = self._compute_accuracy(self._network, train_loader)
            test_acc = self._compute_accuracy(self._network, test_loader)
            info = 'Task {}, Epoch {}/{} => Loss {:.3f}, Train_accy {:.3f}, Test_accy {:.3f}'.format(
                self._cur_task, epoch + 1, epochs, losses / len(train_loader),
                train_acc, test_acc)
            prog_bar.set_description(info)

        logging.info(info)
    def _run(self, train_loader, test_loader, optimizer, scheduler):
        for epoch in range(1, epochs + 1):
            self._network.train()
            ce_losses = 0.
            lf_losses = 0.
            is_losses = 0.
            correct, total = 0, 0
            for i, (_, inputs, targets) in enumerate(train_loader):
                inputs, targets = inputs.to(self._device), targets.to(
                    self._device)
                outputs = self._network(inputs)
                logits = outputs[
                    'logits']  # Final outputs after scaling  (bs, nb_classes)
                features = outputs[
                    'features']  # Features before fc layer  (bs, 64)
                ce_loss = F.cross_entropy(logits,
                                          targets)  # Cross entropy loss

                lf_loss = 0.  # Less forgetting loss
                is_loss = 0.  # Inter-class speration loss
                if self._old_network is not None:
                    old_outputs = self._old_network(inputs)
                    old_features = old_outputs[
                        'features']  # Features before fc layer
                    lf_loss = F.cosine_embedding_loss(
                        features, old_features.detach(),
                        torch.ones(inputs.shape[0]).to(
                            self._device)) * self.lamda

                    scores = outputs[
                        'new_scores']  # Scores before scaling  (bs, nb_new)
                    old_scores = outputs[
                        'old_scores']  # Scores before scaling  (bs, nb_old)
                    old_classes_mask = np.where(
                        tensor2numpy(targets) < self._known_classes)[0]
                    if len(old_classes_mask) != 0:
                        scores = scores[old_classes_mask]  # (n, nb_new)
                        old_scores = old_scores[
                            old_classes_mask]  # (n, nb_old)

                        # Ground truth targets
                        gt_targets = targets[old_classes_mask]  # (n)
                        old_bool_onehot = target2onehot(
                            gt_targets, self._known_classes).type(torch.bool)
                        anchor_positive = torch.masked_select(
                            old_scores, old_bool_onehot)  # (n)
                        anchor_positive = anchor_positive.view(-1, 1).repeat(
                            1, K)  # (n, K)

                        # Top K hard
                        anchor_hard_negative = scores.topk(K,
                                                           dim=1)[0]  # (n, K)

                        is_loss = F.margin_ranking_loss(anchor_positive,
                                                        anchor_hard_negative,
                                                        torch.ones(K).to(
                                                            self._device),
                                                        margin=margin)

                loss = ce_loss + lf_loss + is_loss
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                ce_losses += ce_loss.item()
                lf_losses += lf_loss.item() if self._cur_task != 0 else lf_loss
                is_losses += is_loss.item() if self._cur_task != 0 and len(
                    old_classes_mask) != 0 else is_loss

                # acc
                _, preds = torch.max(logits, dim=1)
                correct += preds.eq(targets.expand_as(preds)).cpu().sum()
                total += len(targets)

            scheduler.step()
            # train_acc = self._compute_accuracy(self._network, train_loader)
            train_acc = np.around(tensor2numpy(correct) * 100 / total,
                                  decimals=2)
            test_acc = self._compute_accuracy(self._network, test_loader)
            info1 = 'Task {}, Epoch {}/{} => '.format(self._cur_task, epoch,
                                                      epochs)
            info2 = 'CE_loss {:.3f}, LF_loss {:.3f}, IS_loss {:.3f}, Train_accy {:.2f}, Test_accy {:.2f}'.format(
                ce_losses / (i + 1), lf_losses / (i + 1), is_losses / (i + 1),
                train_acc, test_acc)
            logging.info(info1 + info2)
    def _run(self, train_loader, test_loader, optimizer, scheduler):
        for epoch in range(1, epochs + 1):
            self._network.train()
            clf_losses = 0.  # cross entropy
            distill_losses = 0.  # distillation
            attention_losses = 0.  # attention distillation
            correct, total = 0, 0
            for i, (_, inputs, targets) in enumerate(train_loader):
                inputs, targets = inputs.to(self._device), targets.to(
                    self._device)
                outputs = self._network(inputs)
                logits = outputs['logits']
                optimizer.zero_grad()  # Same effect as nn.Module.zero_grad()
                if self._old_network is None:
                    clf_loss = F.cross_entropy(logits, targets)
                    clf_losses += clf_loss.item()
                    loss = clf_loss
                else:
                    self._old_network.zero_grad()
                    old_outputs = self._old_network(inputs)
                    old_logits = old_outputs['logits']

                    # Classification loss
                    # if no old samples saved, only calculate loss for new logits
                    clf_loss = F.cross_entropy(logits[:, self._known_classes:],
                                               targets - self._known_classes)
                    clf_losses += clf_loss.item()

                    # Distillation loss
                    # if no old samples saved, only calculate distillation loss for old logits
                    '''
                    distill_loss = F.binary_cross_entropy_with_logits(
                        logits[:, :self._known_classes], torch.sigmoid(old_logits.detach())
                    ) * distill_ratio
                    '''
                    distill_loss = _KD_loss(logits[:, :self._known_classes],
                                            old_logits.detach(),
                                            T=2) * distill_ratio
                    distill_losses += distill_loss.item()

                    # Attention distillation loss
                    top_base_indices = logits[:, :self._known_classes].argmax(
                        dim=1)
                    onehot_top_base = target2onehot(
                        top_base_indices, self._known_classes).to(self._device)

                    logits[:, :self._known_classes].backward(
                        gradient=onehot_top_base, retain_graph=True)
                    old_logits.backward(gradient=onehot_top_base)

                    attention_loss = gradcam_distillation(
                        outputs['gradcam_gradients'][0],
                        old_outputs['gradcam_gradients'][0].detach(),
                        outputs['gradcam_activations'][0],
                        old_outputs['gradcam_activations']
                        [0].detach()) * attention_ratio
                    attention_losses += attention_loss.item()

                    # Integration
                    loss = clf_loss + distill_loss + attention_loss

                    self._old_network.zero_grad()
                    self._network.zero_grad()

                optimizer.zero_grad()  # Same effect as nn.Module.zero_grad()
                loss.backward()
                optimizer.step()

                # acc
                _, preds = torch.max(logits, dim=1)
                correct += preds.eq(targets.expand_as(preds)).cpu().sum()
                total += len(targets)

            scheduler.step()
            # train_acc = self._compute_accuracy(self._network, train_loader)
            train_acc = np.around(tensor2numpy(correct) * 100 / total,
                                  decimals=2)
            test_acc = self._compute_accuracy(self._network, test_loader)
            info1 = 'Task {}, Epoch {}/{} => clf_loss {:.2f}, '.format(
                self._cur_task, epoch, epochs, clf_losses / (i + 1))
            info2 = 'distill_loss {:.2f}, attention_loss {:.2f}, Train_accy {:.2f}, Test_accy {:.2f}'.format(
                distill_losses / (i + 1), attention_losses / (i + 1),
                train_acc, test_acc)
            logging.info(info1 + info2)
    def _run(self, train_loader, test_loader, optimizer, scheduler):
        for epoch in range(1, epochs + 1):
            self._network.train()  # set train mode
            ce_losses = 0.
            lf_losses = 0.
            is_losses = 0.
            correct, total = 0, 0
            for i, (_, inputs, targets) in enumerate(train_loader):
                inputs, targets = inputs.to(self._device), targets.to(
                    self._device)
                outputs = self._network(inputs)
                logits = outputs[
                    'logits']  # Final outputs after scaling  (bs, nb_classes), |* i.e., befroe probs=softmax(logits)
                features = outputs[
                    'features']  # Features before fc layer  (bs, 64) |* i.e., feature vector from feature extractor(backbone)
                ce_loss = F.cross_entropy(
                    logits, targets
                )  # Cross entropy loss |* cross_entrophy implicityly implement softmax, so its input is logits.

                lf_loss = 0.  # Less forgetting loss
                is_loss = 0.  # Inter-class speration loss, i.e. margin ranking loss. Eq 8.
                if self._old_network is not None:
                    old_outputs = self._old_network(inputs)
                    old_features = old_outputs[
                        'features']  # Features before fc layer
                    lf_loss = F.cosine_embedding_loss(
                        features, old_features.detach(),
                        torch.ones(inputs.shape[0]).to(
                            self._device)) * self.lamda  # Eq 6.

                    scores = outputs[
                        'new_scores']  # Scores before scaling  (bs, nb_new)
                    old_scores = outputs[
                        'old_scores']  # Scores before scaling  (bs, nb_old)
                    '''@Author:defeng
                        24 May 2021 (Monday)
                        see Line 45 here, we know ucir uses CosineincNet and CosineincNet uses (Split)CosineLinearLayer.
                        Line 93 forward function of SplitCosineLinearLayer, "out" times(X) the scaling factor eta while out1/2 doesn't.
                        (CosineLinearLayer does not have the new/old_scores.)
                    '''
                    old_classes_mask = np.where(
                        tensor2numpy(targets) < self._known_classes)[0]
                    if len(old_classes_mask) != 0:
                        scores = scores[old_classes_mask]  # (n, nb_new)
                        old_scores = old_scores[
                            old_classes_mask]  # (n, nb_old)

                        # Ground truth targets
                        gt_targets = targets[old_classes_mask]  # (n)
                        old_bool_onehot = target2onehot(
                            gt_targets, self._known_classes).type(torch.bool)
                        anchor_positive = torch.masked_select(
                            old_scores, old_bool_onehot
                        )  # *(n)*   |* i.e. select GT class correspoding scores.
                        anchor_positive = anchor_positive.view(-1, 1).repeat(
                            1, K
                        )  # *(n, K)*   |* i.e., <\bar{\theta}, \bar(f(x))>
                        '''@Author:defeng
                            torch.repeat is different from numpy.repeat.
                            see for details: https://pytorch.org/docs/stable/tensors.html?highlight=repeat#torch.Tensor.repeat
                        '''

                        # Top K hard
                        anchor_hard_negative = scores.topk(
                            K, dim=1
                        )[0]  # *(n, K)* |* i.e., <\bar{\theta_{k}}, \bar(f(x))>

                        is_loss = F.margin_ranking_loss(anchor_positive,
                                                        anchor_hard_negative,
                                                        torch.ones(K).to(
                                                            self._device),
                                                        margin=margin)
                        '''@Author:defeng
                            here, the params "torch.ones(K).to(self._device)" for margin_ranking_loss follows the params \
                            requirements in pytorch documentation(specifically, ones(K) is the variable y).
                            see for details: https://pytorch.org/docs/stable/generated/torch.nn.MarginRankingLoss.html#torch.nn.MarginRankingLoss
                        '''

                loss = ce_loss + lf_loss + is_loss
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                ce_losses += ce_loss.item()
                lf_losses += lf_loss.item() if self._cur_task != 0 else lf_loss
                is_losses += is_loss.item() if self._cur_task != 0 and len(
                    old_classes_mask) != 0 else is_loss

                # acc(classification)
                _, preds = torch.max(
                    logits, dim=1
                )  # pred is the indexs/location of the max value in dim1.
                correct += preds.eq(targets.expand_as(preds)).cpu().sum()
                total += len(targets)

            scheduler.step()
            # train_acc = self._compute_accuracy(self._network, train_loader)
            train_acc = np.around(tensor2numpy(correct) * 100 / total,
                                  decimals=2)
            test_acc = self._compute_accuracy(self._network, test_loader)
            info1 = 'Task {}, Epoch {}/{} => '.format(self._cur_task, epoch,
                                                      epochs)
            info2 = 'CE_loss {:.3f}, LF_loss {:.3f}, IS_loss {:.3f}, Train_accy {:.2f}, Test_accy {:.2f}'.format(
                ce_losses / (i + 1), lf_losses / (i + 1), is_losses / (i + 1),
                train_acc, test_acc)
            logging.info(info1 + info2)