Beispiel #1
0
    def load_network(loc, masked=False):
        net_checkpoint = torch.load(loc)
        start_epoch = net_checkpoint['epoch']
        SavedConv, SavedBlock = what_conv_block(net_checkpoint['conv'],
                                                net_checkpoint['blocktype'],
                                                net_checkpoint['module'])

        net = WideResNet(args.wrn_depth,
                         args.wrn_width,
                         SavedConv,
                         SavedBlock,
                         num_classes=num_classes,
                         dropRate=0,
                         masked=masked).cuda()

        if masked:
            new_sd = net.state_dict()
            old_sd = net_checkpoint['net']
            new_names = [v for v in new_sd]

            old_names = [v for v in old_sd]
            for i, j in enumerate(new_names):
                new_sd[j] = old_sd[old_names[i]]

            net.load_state_dict(new_sd)
        else:
            net.load_state_dict(net_checkpoint['net'])
        return net, start_epoch
def main(args):
    harakiri = Harakiri()
    harakiri.set_max_plateau(20)
    train_loss_meter = Meter()
    val_loss_meter = Meter()
    val_accuracy_meter = Meter()
    log = JsonLogger(args.log_path, rand_folder=True)
    log.update(args.__dict__)
    state = args.__dict__
    state['exp_dir'] = os.path.dirname(log.path)
    state['start_lr'] = state['lr']
    print(state)

    imagenet_mean = [0.485, 0.456, 0.406]
    imagenet_std = [0.229, 0.224, 0.225]

    train_dataset = ImageList(args.root_folder,
                              args.train_listfile,
                              transform=transforms.Compose([
                                  transforms.Resize(256),
                                  transforms.RandomCrop(224),
                                  transforms.RandomHorizontalFlip(),
                                  transforms.ToTensor(),
                                  transforms.Normalize(imagenet_mean,
                                                       imagenet_std)
                              ]))
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               pin_memory=False,
                                               num_workers=args.num_workers)
    val_dataset = ImageList(args.root_folder,
                            args.val_listfile,
                            transform=transforms.Compose([
                                transforms.Resize(256),
                                transforms.CenterCrop(224),
                                transforms.ToTensor(),
                                transforms.Normalize(imagenet_mean,
                                                     imagenet_std)
                            ]))
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             pin_memory=False,
                                             num_workers=args.num_workers)

    if args.attention_depth == 0:
        from models.wide_resnet import WideResNet
        model = WideResNet().finetune(args.nlabels).cuda()
    else:
        from models.wide_resnet_attention import WideResNetAttention
        model = WideResNetAttention(args.nlabels, args.attention_depth,
                                    args.attention_width, args.has_gates,
                                    args.reg_weight).finetune(args.nlabels)

    # if args.load != "":
    #     net.load_state_dict(torch.load(args.load), strict=False)
    #     net = net.cuda()

    optimizer = optim.SGD([{
        'params': model.get_base_params(),
        'lr': args.lr * 0.1
    }, {
        'params': model.get_classifier_params()
    }],
                          lr=args.lr,
                          weight_decay=1e-4,
                          momentum=0.9,
                          nesterov=True)

    if args.ngpu > 1:
        model = torch.nn.DataParallel(model, range(args.ngpu)).cuda()
    else:
        model = model.cuda()
        criterion = torch.nn.NLLLoss().cuda()

    def train():
        """

        """
        model.train()
        for data, label in train_loader:
            data, label = torch.autograd.Variable(data, requires_grad=False).cuda(async=True), \
                          torch.autograd.Variable(label, requires_grad=False).cuda()
            optimizer.zero_grad()
            if args.attention_depth > 0:
                output, loss = model(data)
                if args.reg_weight > 0:
                    loss = loss.mean()
                else:
                    loss = 0
            else:
                loss = 0
                output = model(data)
            loss += F.nll_loss(output, label)
            loss.backward()
            optimizer.step()
            train_loss_meter.update(loss.data[0], data.size(0))
        state['train_loss'] = train_loss_meter.mean()

    def val():
        """

        """
        model.eval()
        for data, label in val_loader:
            data, label = torch.autograd.Variable(data, volatile=True).cuda(async=True), \
                          torch.autograd.Variable(label, volatile=True).cuda()
            if args.attention_depth > 0:
                output, loss = model(data)
            else:
                output = model(data)
            loss = F.nll_loss(output, label)
            val_loss_meter.update(loss.data[0], data.size(0))
            preds = output.max(1)[1]
            val_accuracy_meter.update((preds == label).float().sum().data[0],
                                      data.size(0))
        state['val_loss'] = val_loss_meter.mean()
        state['val_accuracy'] = val_accuracy_meter.mean()

    best_accuracy = 0
    counter = 0
    for epoch in range(args.epochs):
        train()
        val()
        harakiri.update(epoch, state['val_accuracy'])
        if state['val_accuracy'] > best_accuracy:
            counter = 0
            best_accuracy = state['val_accuracy']
            if args.save:
                torch.save(model.state_dict(),
                           os.path.join(state["exp_dir"], "model.pytorch"))
        else:
            counter += 1
        state['epoch'] = epoch + 1
        log.update(state)
        print(state)
        if (epoch + 1) in args.schedule:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.1
            state['lr'] *= 0.1
            test_loss += loss.item()  # train_loss に結果を蓄積
    avg_test_loss = test_loss / len(test_loader)  # lossの平均を計算
    avg_test_acc = test_acc / len(test_loader.dataset)  # accの平均を計算

    # print log
    print('Epoch [{}/{}], train_loss: {loss:.8f}, train_acc: {train_acc:.4f}'.
          format(epoch + 1,
                 EPOCHS,
                 loss=avg_train_loss,
                 train_acc=avg_train_acc))
    print('Epoch [{}/{}], test_loss: {loss:.8f}, test_acc: {train_acc:.4f}'.
          format(epoch + 1, EPOCHS, loss=avg_test_loss,
                 train_acc=avg_test_acc))

    # append list for polt graph after training
    train_loss_list.append(avg_train_loss)
    train_acc_list.append(avg_train_acc)
    test_loss_list.append(avg_test_loss)
    test_acc_list.append(avg_test_acc)
    wandb.log({"epoch": epoch + 1, "train accuracy": avg_train_acc})
    wandb.log({"epoch": epoch + 1, "train accuracy": avg_train_acc})
    wandb.log({"epoch": epoch + 1, "test accuracy": avg_test_acc})
    wandb.log({"epoch": epoch + 1, "train loss": avg_train_loss})
    wandb.log({"epoch": epoch + 1, "test loss": avg_test_loss})

end_time = time.time()
print('elapsed time: {:.4f}'.format(end_time - start_time))

torch.save(model.state_dict(), "result/model_weight/" + run_name + '.pth')
wandb.save("result/model_weight/" + run_name + '.pth')