Ejemplo n.º 1
0
    def f(self, x, return_acc=False):  #x, layer number to calculate
        if x.size == 1:
            x = np.append(x, 0.32)
        x = x.reshape(1, 2)
        target = int(x[:, 0])
        print("Start run ", target)
        start_time = default_timer()

        self.net = resnet50(60).cuda()
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        if device == 'cuda':
            self.net = torch.nn.DataParallel(self.net)
            cudnn.benchmark = True

        self.net.load_state_dict(torch.load(checkpoint), True)
        if self.inc_index == 1:
            self.net.module.fc = nn.Linear(512 * 4, 30).cuda()
        else:
            self.net.module.fc = nn.Linear(512 * 4, 10).cuda()

        self.net.train()

        cur_wc = 0
        count = 0
        for m in self.net.modules():
            if target == count:
                break
            elif isinstance(m, nn.Conv2d):
                for param in m.parameters():
                    cur_wc += param.numel()
                    param.requires_grad = False
            elif isinstance(m, nn.BatchNorm2d):
                for param in m.parameters():
                    param.requires_grad = False
                count += 1

        BASE_DATA_ROOT = '/home/bbboming/HDD/Paper/datasets_object/ICIFAR100_60_30_10/BASE/'
        DATA_ROOT = '/home/bbboming/HDD/Paper/datasets_object/ICIFAR100_60_30_10/INC%d/' % self.inc_index
        train_transform = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(settings.CIFAR100_TRAIN_MEAN,
                                 settings.CIFAR100_TRAIN_STD),
        ])
        trainset = datasets.ImageFolder(os.path.join(DATA_ROOT, 'train'),
                                        train_transform)
        cifar100_training_loader = torch.utils.data.DataLoader(
            trainset,
            batch_size=self.batch_size,
            pin_memory=True,
            num_workers=4,
            shuffle=self.shuffle)

        test_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(settings.CIFAR100_TRAIN_MEAN,
                                 settings.CIFAR100_TRAIN_STD),
        ])

        testset = datasets.ImageFolder(os.path.join(DATA_ROOT, 'test'),
                                       test_transform)
        cifar100_test_loader = torch.utils.data.DataLoader(
            testset,
            batch_size=self.batch_size,
            pin_memory=True,
            num_workers=4,
            shuffle=False)

        base_testset = datasets.ImageFolder(
            os.path.join(BASE_DATA_ROOT, 'test'), test_transform)
        cifar100_base_test_loader = torch.utils.data.DataLoader(
            base_testset,
            batch_size=self.batch_size,
            pin_memory=True,
            num_workers=4,
            shuffle=False)

        loss_function = nn.CrossEntropyLoss()
        optimizer = optim.SGD(self.net.parameters(),
                              lr=self.lr,
                              momentum=0.9,
                              weight_decay=5e-4)
        train_scheduler = optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, 'min')
        iter_per_epoch = len(cifar100_training_loader)
        warmup_scheduler = WarmUpLR(optimizer, iter_per_epoch * self.warm)
        checkpoint_path = os.path.join(settings.CHECKPOINT_PATH,
                                       'resnet50_inc%d' % self.inc_index,
                                       settings.TIME_NOW)

        #create checkpoint folder to save model
        if not os.path.exists(checkpoint_path):
            os.makedirs(checkpoint_path)
        checkpoint_path = os.path.join(checkpoint_path,
                                       '{net}-{target}-{type}.pth')

        best_acc = 0.0
        best_base_acc = 0.0
        best_inc_acc = 0.0
        for epoch in range(1, settings.EPOCH):
            self.net.train()
            # train(epoch)
            for batch_index, (images,
                              labels) in enumerate(cifar100_training_loader):
                images = Variable(images)
                labels = Variable(labels)

                labels = labels.cuda()
                images = images.cuda()

                optimizer.zero_grad()
                outputs = self.net(images)
                loss = loss_function(outputs, labels)
                loss.backward()
                optimizer.step()

                if epoch <= self.warm:
                    warmup_scheduler.step()
                n_iter = (epoch -
                          1) * len(cifar100_training_loader) + batch_index + 1

            #print('[Target {target}] [Training Epoch: {epoch}/{total_epoch}]\tLoss: {:0.4f}\tLR: {:0.6f}'.format(
            #    loss.item(),
            #    optimizer.param_groups[0]['lr'],
            #    target=target,
            #    epoch=epoch,
            #    total_epoch=settings.EPOCH
            #))

            #Evaluation Accuracy
            self.net.eval()
            self.basenet.eval()

            test_loss = 0.0  # cost function error
            correct = 0.0

            #INC Testset
            for (images, labels) in cifar100_test_loader:
                images = Variable(images)
                labels = Variable(labels)
                images = images.cuda()
                labels = labels.cuda()

                soft_layer = nn.Softmax(dim=1).cuda()

                base_outputs = self.basenet(images)
                outputs = self.net(images)

                loss = loss_function(outputs, labels)
                test_loss += loss.item()

                soft_base = soft_layer(base_outputs)
                soft_inc = soft_layer(outputs)
                softmax = torch.cat([soft_base, soft_inc], dim=1)
                labels_all = labels + 60
                _, preds = softmax.max(1)
                correct += preds.eq(labels_all).sum()

            #Base Testset
            correct_base = 0.0
            for (images, labels) in cifar100_base_test_loader:
                images = Variable(images)
                labels = Variable(labels)
                images = images.cuda()
                labels = labels.cuda()

                soft_layer = nn.Softmax(dim=1).cuda()

                base_outputs = self.basenet(images)
                outputs = self.net(images)

                soft_base = soft_layer(base_outputs)
                soft_inc = soft_layer(outputs)
                softmax = torch.cat([soft_base, soft_inc], dim=1)
                labels_all = labels
                _, preds = softmax.max(1)
                correct_base += preds.eq(labels_all).sum()

            avg_loss = test_loss / len(cifar100_test_loader.dataset)
            base_acc = correct_base.float() / len(
                cifar100_base_test_loader.dataset)
            inc_acc = correct.float() / len(cifar100_test_loader.dataset)
            acc = (correct.float() + correct_base.float()) / (
                len(cifar100_test_loader.dataset) +
                len(cifar100_base_test_loader.dataset))

            print(
                'Test set: Average loss: {:.4f}, Accuracy: {:.4f} (BaseAcc {:.4f} IncAcc {:.4f})'
                .format(avg_loss, acc, base_acc, inc_acc))

            train_scheduler.step(avg_loss)

            #start to save best performance model after learning rate decay to 0.01
            if epoch > 10 and best_acc < acc:
                torch.save(
                    self.net.state_dict(),
                    checkpoint_path.format(target=target,
                                           net='resnet50',
                                           type='best'))
                best_acc = acc
                best_inc_acc = inc_acc
                best_base_acc = base_acc

        # share_ratio = target / self.count
        best_dict[str(target)] = best_acc.detach().cpu().item()

        memory_efficiency = cur_wc / self.total_wc
        obj_acc = best_acc.detach().cpu().item()
        alpha = x[:, 1].item()
        threshold = 0.02
        target_mem_eff = 0.70
        #Objective Function
        obj_f = np.abs((self.max_acc - obj_acc) - threshold)
        print_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " x= {x}, alpha= {alpha} Memory_Efficiency= {memory_efficiency}, combined_classification_acc= {best_acc}, obj_acc= {obj_acc}, OBJ_F= {obj_f}" \
                        .format(x=target, alpha=alpha,best_acc=best_acc, obj_acc=obj_acc, memory_efficiency=memory_efficiency, obj_f=obj_f)
        with open("history.log", "a") as f_hist:
            f_hist.write(print_str + "\n")
        print(print_str)

        if self.min_acc != 0:
            csv.write("%d, %d, %f, %f, %f, %f, %f\n" %
                      (self.iteration, target, obj_acc, threshold, obj_f,
                       self.min_acc, self.max_acc))
            self.iteration += 1

        end_time = default_timer()
        print("operation time: ", (end_time - start_time))

        if return_acc:
            return (best_acc.detach().cpu().item())
        return (obj_f)
Ejemplo n.º 2
0
class Trainer:
    def __init__(self,
                 model: Module,
                 train_loader: DataLoader,
                 test_loader: DataLoader,
                 device=DEFAULT_DEVICE,
                 lr=DEFAULT_LR,
                 momentum=DEFAULT_MOMENTUM,
                 epochs=DEFAULT_EPOCHS,
                 batch_size=DEFAULT_BATCH_SIZE,
                 parallelism=DEFAULT_PARALLELISM,
                 milestones=MILESTONES,
                 gamma=0.2,
                 warm_phases=WARM_PHASES,
                 criterion=loss.CrossEntropyLoss()):
        print("initialize trainer")
        # parameter pre-processing
        self.test_loader = test_loader

        if torch.cuda.device_count() > 1 and parallelism:
            print(f"using {torch.cuda.device_count()} GPUs")
            self.model = nn.DataParallel(model)
        else:
            self.model = model
        self.model.to(device)

        optimizer = optim.SGD(
            # choose whether train or not
            filter(lambda p: p.requires_grad, self.model.parameters()),
            lr=lr,
            momentum=momentum,
            weight_decay=5e-4)

        train_scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                                         milestones=milestones,
                                                         gamma=gamma)

        # warm phases
        self.warm_phases = warm_phases
        # warmup learning rate
        self.warmup_scheduler = WarmUpLR(optimizer,
                                         len(train_loader) * self.warm_phases)

        self.hp = HyperParameter(scheduler=train_scheduler,
                                 optimizer=optimizer,
                                 criterion=criterion,
                                 batch_size=batch_size,
                                 epochs=epochs,
                                 device=device)

        self.train_loader = train_loader
        print("initialize finished")
        print(f"hyper parameter: {self.hp}")

    def train(self,
              save_path,
              attack=False,
              attacker=None,
              params: Dict = None):
        self._init_attacker(attack, attacker, params)

        batch_number = len(self.train_loader)
        # get current learning rate
        now_lr = self.hp.optimizer.state_dict().get("param_groups")[0].get(
            "lr")
        # record best accuracy
        best_acc = 0

        for ep in range(1, self.hp.epochs + 1):

            training_acc, running_loss = 0, .0
            start_time = time.process_time()

            for index, data in enumerate(self.train_loader):
                inputs, labels = data[0].to(self.hp.device), data[1].to(
                    self.hp.device)

                self.hp.optimizer.zero_grad()
                if attack:
                    # calculate this first, for this will zero the grad
                    adv_inputs = self.attacker.calc_perturbation(
                        inputs, labels)
                    # zero the grad
                    self.hp.optimizer.zero_grad()
                    outputs = self.model(inputs)
                    adv_outputs = self.model(adv_inputs)
                    _loss = self.hp.criterion(outputs,
                                              labels) + self.hp.criterion(
                                                  adv_outputs, labels)
                else:
                    outputs = self.model(inputs)
                    _loss = self.hp.criterion(outputs, labels)

                _loss.backward()
                self.hp.optimizer.step()

                outputs: torch.Tensor
                training_acc += (outputs.argmax(
                    dim=1) == labels).float().mean().item()

                # warm up learning rate
                if ep <= self.warm_phases:
                    self.warmup_scheduler.step()

                # detect learning rate change
                new_lr = self.hp.optimizer.state_dict().get(
                    "param_groups")[0].get("lr")
                if new_lr != now_lr:
                    now_lr = new_lr
                    print(f"learning rate changes to {now_lr:.6f}")

                running_loss += _loss.item()

                if index % batch_number == batch_number - 1:
                    end_time = time.process_time()

                    acc = self.test(self.model,
                                    test_loader=self.test_loader,
                                    device=self.hp.device)
                    print(
                        f"epoch: {ep}   loss: {(running_loss / batch_number):.6f}   train accuracy: {training_acc / batch_number}   "
                        f"test accuracy: {acc}   time: {end_time - start_time:.2f}s"
                    )

                    if best_acc < acc:
                        best_acc = acc
                        self._save_best_model(save_path, ep, acc)

            # change learning rate by step
            self.hp.scheduler.step(ep)
        torch.save(self.model.state_dict(), f"{save_path}-latest")
        print("finished training")
        print(f"best accuracy on test set: {best_acc}")

    @staticmethod
    def test(model: Module, test_loader, device, debug=False):

        correct = 0
        with torch.no_grad():
            for data in test_loader:
                inputs, labels = data[0].to(device), data[1].to(device)
                _, y_hats = model(inputs).max(1)
                match = (y_hats == labels)
                correct += len(match.nonzero())

        if debug:
            print(f"Testing: {len(test_loader.dataset)}")
            print(f"correct: {correct}")
            print(f"accuracy: {100*correct/len(test_loader.dataset):.3f}%")

        return correct / len(test_loader.dataset)

    def _init_attacker(self, attack, attacker, params):
        self.attack = attack
        if attack:
            print(f"robustness training with {attacker.__name__}")
            self.attacker = attacker(self.model, **params)
            self.attacker.print_params()
        else:
            print("normal training")

    def _save_best_model(self, save_path, current_epochs, accuracy):
        """save best model with current info"""
        info = {
            "current_epochs": current_epochs,
            "total_epochs": self.hp.epochs,
            "accuracy": accuracy
        }
        if self.attack:
            info.update({
                "attack": self.attack,
                "attacker": type(self.attacker).__name__,
                "epsilons": self.attacker.epsilon,
            })
        with open(os.path.join(os.path.dirname(save_path), "info.json"),
                  "w",
                  encoding="utf8") as f:
            json.dump(info, f)
        torch.save(self.model.state_dict(), f"{save_path}-best")

    @staticmethod
    def train_tl(origin_model_path,
                 save_path,
                 train_loader,
                 test_loader,
                 device,
                 choice="resnet50"):
        print(f"transform learning on model: {origin_model_path}")
        model = TLResNet.create_model(choice)
        model.load_model(origin_model_path)
        trainer = Trainer(model=model,
                          train_loader=train_loader,
                          test_loader=test_loader,
                          device=device)
        trainer.train(save_path)