Esempio n. 1
0
def test(i, key, shape, rand=False, randFactor=256):
    global best_acc
    test_loss = 0
    correct = 0
    if (not rand) or (len(shape) != 4):
        model = nin.Net()
        pretrained_model = torch.load(args.pretrained)
        best_acc = pretrained_model['best_acc']
        model.load_state_dict(pretrained_model['state_dict'])
        model.to(device)
        bin_op = util.BinOp(model)
        model.eval()
        bin_op.binarization()
        state_dict = model.state_dict()

    if len(shape) == 4:
        size1 = shape[1]
        size2 = shape[2]
        size3 = shape[3]
        if rand:
            if (int(i / (size2 * size3)) % int(size1)) == torch.randint(
                    0, size1 - 1, [1]):
                model = nin.Net()
                pretrained_model = torch.load(args.pretrained)
                model.load_state_dict(pretrained_model['state_dict'])
                model.to(device)
                bin_op = util.BinOp(model)
                model.eval()
                bin_op.binarization()
                state_dict = model.state_dict()
                (state_dict[key][int(i / size1 / size2 / size3)][int(
                    i / size2 / size3 % size1)][int(i / size3 % size2)][int(
                        i % size3)]).mul_(-1)
            else:
                return 100
        else:
            (state_dict[key][int(i / size1 / size2 / size3)][int(
                i / size2 / size3 % size1)][int(i / size3 % size2)][int(
                    i % size3)]).mul_(-1)

    if len(shape) == 1:
        state_dict[key][i].mul_(-1)

    if len(shape) == 2:
        size = state_dict[key].shape[1]
        (state_dict[key][int(i / size)][i % size]).mul_(-1)

    with torch.no_grad():
        for data, target in testloader:
            data, target = Variable(data.to(device)), Variable(
                target.to(device))

            output = model(data)
            test_loss += criterion(output, target).data.item()
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).cpu().sum()
    bin_op.restore()
    acc = 100. * float(correct) / len(testloader.dataset)
    return acc
Esempio n. 2
0
def test():
    import torch
    from torch.autograd import Variable
    model = vgg16XNOR()
    print(model)
    bin_range = [1, 11]
    bin_op = util.BinOp(model, bin_range)
    img = torch.rand(2, 3, 224, 224)
    img = Variable(img)
    feature, output = model(img)
    print(output.size())
    print(feature.size())
Esempio n. 3
0
def model_components():
    print('==> building model', args.arch, '...')

    model = resnet('ResNet_imagenet',
                   pretrained=args.pretrained,
                   num_classes=1000,
                   depth=18,
                   dataset='imagenet')
    #load model
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            new_params = model.state_dict()
            new_params.update(checkpoint['state_dict'])
            model.load_state_dict(new_params)
            del checkpoint
        else:
            raise Exception(args.resume + ' is found.')
    else:
        print('==> Initializing model parameters ...')
        for m in model.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                c = float(m.weight.data[0].nelement())
                m.weight.data.normal_(0, 1. / c)
                m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                if m.weight is not None:
                    m.weight.data = m.weight.data.zero_().add(1.0)

    #data parallel
    model = torch.nn.DataParallel(model).cuda()
    cudnn.benchmark = True

    # define solver and criterio
    ps = filter(lambda x: x.requires_grad, model.parameters())

    optimizer = optim.Adam(ps, lr=args.lr, weight_decay=0.00001)

    criterion = nn.CrossEntropyLoss().cuda()
    criterion_seperated = nn.CrossEntropyLoss(reduce=False).cuda()
    # define the binarization operator
    bin_op = util.BinOp(model, 'FL_Full')
    return model, optimizer, criterion, criterion_seperated, bin_op
Esempio n. 4
0
def test():
    import torch
    from torch.autograd import Variable
    model, bin_range = vgg16_mix3()
    model.classifier = nn.Sequential(
        nn.Linear(512 * 7 * 7, 4096),
        nn.ReLU(True),
        nn.Dropout(),
        nn.Linear(4096, 4096),
        nn.ReLU(True),
        nn.Dropout(),
        nn.Linear(4096, 1470),
    )
    print(model.classifier[6])
    print(model)
    print(bin_range)
    bin_op = util.BinOp(model, bin_range)
    img = torch.rand(2, 3, 224, 224)
    img = Variable(img)
    output = model(img)
    print(output.size())
Esempio n. 5
0
    print(model)
    param_dict = dict(
        model.named_parameters())  # model.named_parameters()是generator类型的数据
    params = []

    base_lr = 0.1

    for key, value in param_dict.items():  #value是torch里的parameter类型数据
        params += [{
            'params': [value],
            'lr': args.lr,  #为每个参数单独指定超参数
            'weight_decay': args.weight_decay,
            'key': key
        }]

    optimizer = optim.Adam(params, lr=args.lr, weight_decay=args.weight_decay)

    criterion = nn.CrossEntropyLoss()

    # define the binarization operator
    bin_op = util.BinOp(model)

    if args.evaluate:
        test(evaluate=True)  #测试模式不保存模型
        exit()

    for epoch in range(1, args.epochs + 1):  #训练模式
        adjust_learning_rate(optimizer, epoch)
        train(epoch)
        test()
Esempio n. 6
0
                print(count)
                para.requires_grad = True
            else:
                para.requires_grad = False

    for name, para in model.named_parameters():
        print(name, para.requires_grad)

    optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                  model.parameters()),
                           lr=args.lr,
                           weight_decay=0.00001)
    criterion = nn.CrossEntropyLoss()

    # define the binarization operator
    bin_op = util.BinOp(model, True if args.main else False)

    print(len(bin_op.target_modules))

    # start training
    global writer, name_rec
    name_rec = 'NIN_Bin_initial_weight' if not args.layer else 'NIN_Bin_fintune_{:}_{:}_lr_{:}'.format(
        'weight' if args.main else 'mask', args.layer, args.lr)
    print(name_rec)

    current_time = datetime.now().strftime('%b%d_%H-%M-%S')
    logdir = os.path.join('runs', current_time + '_' + socket.gethostname())
    writer = SummaryWriter(logdir + name_rec)

    test(0, model)
    for epoch in range(1, args.epochs + 1):
    net = vgg_bin_prune.VGG()
print(net)

if use_cuda:
    net.cuda()
    net = torch.nn.DataParallel(net,
                                device_ids=range(torch.cuda.device_count()))

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(),
                      lr=opt.lr,
                      momentum=0.9,
                      weight_decay=6e-4)

# define the binarization operator
bin_op = util.BinOp(net)


# Training
def train(epoch):
    print('\nEpoch: %d' % epoch)
    global Train_acc
    net.train()
    train_loss = 0
    correct = 0
    total = 0

    if epoch > learning_rate_decay_start and learning_rate_decay_start >= 0:
        frac = (epoch - learning_rate_decay_start) // learning_rate_decay_every
        decay_factor = learning_rate_decay_rate**frac
        current_lr = opt.lr * decay_factor
Esempio n. 8
0
print(model)

# define solver and criterion
base_lr = float(args.lr)
param_dict = dict(model.named_parameters())
params = []

for key, value in param_dict.items():
    params += [{'params': [value], 'lr': base_lr, 'weight_decay': 0.00001}]

    optimizer = optim.Adam(params, lr=0.10, weight_decay=0.00001)

criterion = nn.CrossEntropyLoss()
criterion_seperated = nn.CrossEntropyLoss(reduce=False)
# define the binarization operator
bin_op = util.BinOp(model, 'nin')


def update_weights(softmax_output, target, sample_weights):
    print("start updating..")
    pred_numpy = softmax_output
    target_numpy = target
    pred_numpy = torch.squeeze(pred_numpy)
    miss = torch.Tensor([int(x) for x in (pred_numpy != target_numpy)])
    miss2 = torch.Tensor([x if x == 1 else -1 for x in miss])
    miss = miss.unsqueeze(1)

    err_m = torch.mm(torch.t(sample_weights), miss) / torch.sum(sample_weights)
    alpha_m = 0.5 * torch.log((1 - err_m) / float(err_m))

    prior_exp = torch.t(torch.Tensor(alpha_m * miss2))
Esempio n. 9
0
    def __init__(self,
                 mlb_path='data/mlb_cell.npy',
                 num_ctrl=100,
                 num_sc=415,
                 num_merge=20,
                 upper_bound_pre=0.2,
                 upper_bound=0.5,
                 arch='fc_ae_1layer',
                 aplha=0.008,
                 epoches=300,
                 batch_size=16,
                 lr=0.01,
                 wd=1e-5,
                 seed=208):
        # self.mlb = np.load(mlb_path)
        self.mlb = None
        self.num_ctrl = num_ctrl
        self.num_sc = num_sc
        self.num_merge = num_merge
        self.upper_bound_pre = upper_bound_pre
        self.upper_bound = upper_bound
        self.epoches = epoches
        self.alpha = aplha
        self.seed = seed
        self._get_device()
        self._set_random_seed()
        self.writer = SummaryWriter('runs')

        # pre-merge to generate training data
        # self.merge_pre()
        # exit()
        # self.data = np.load('data/data_{}_rotate.npy'.format(self.num_merge))
        # self.data = (np.abs(self.data).sum(axis=2) != 0).astype(float)
        self.data = np.load('data/data_stochastic.npy')
        # self.data = np.load('data/data_1.npy')
        # self.data = (np.abs(self.data).sum(axis=2) != 0).astype(float)

        logging.info('The size of dataset is {}'.format(self.data.shape[0]))
        specified_percentage = self.data.sum() / (self.data.shape[0] *
                                                  self.num_sc)
        logging.info(
            'Specified scan chain percentage after merging is {:.2f}% ({:.2f}).'
            .format(100. * specified_percentage,
                    specified_percentage * self.num_sc))

        # Traininig dataset and its loader
        self.data = 2 * self.data - 1

        self.train_dataset = torch.utils.data.TensorDataset(
            torch.from_numpy(self.data).float())
        self.train_loader = torch.utils.data.DataLoader(self.train_dataset,
                                                        batch_size=32,
                                                        shuffle=True)

        # Define models
        if arch == 'fc_ae':
            self.model = FCAutoEncoder(num_sc, num_ctrl)
            self.bin_op = util.BinOp(self.model)
        elif arch == 'fc_ae_1layer':
            self.model = FCAutoEncoder1Layer(num_sc, num_ctrl)
            self.bin_op = util.BinOp(self.model)
        else:
            raise NotImplementedError

        # Define optimizer
        # self.optimizer = optim.Adam(self.model.parameters(), lr=lr, weight_decay=wd)
        self.optimizer = optim.Adam(self.model.parameters(), lr=lr)
        # self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, 40, 0.1)

        # Define loss function
        self.criterion = nn.MSELoss()
Esempio n. 10
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    # create model
    if args.arch == 'alexnet':
        model = model_list.alexnet(pretrained=args.pretrained)
        input_size = 227
    else:
        raise Exception('Model not supported yet')

    if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
        model.features = torch.nn.DataParallel(model.features)
        model.cuda()
    else:
        model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    optimizer = torch.optim.Adam(model.parameters(),
                                 args.lr,
                                 weight_decay=args.weight_decay)

    for m in model.modules():
        if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
            c = float(m.weight.data[0].nelement())
            m.weight.data = m.weight.data.normal_(0, 1.0 / c)
        elif isinstance(m, nn.BatchNorm2d):
            m.weight.data = m.weight.data.zero_().add(1.0)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            del checkpoint
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code

    if args.caffe_data:
        print('==> Using Caffe Dataset')
        cwd = os.getcwd()
        sys.path.append(cwd + '/../')
        import datasets as datasets
        import datasets.transforms as transforms
        if not os.path.exists(args.data + '/imagenet_mean.binaryproto'):
            print("==> Data directory" + args.data + "does not exits")
            print("==> Please specify the correct data path by")
            print("==>     --data <DATA_PATH>")
            return

        normalize = transforms.Normalize(meanfile=args.data +
                                         '/imagenet_mean.binaryproto')

        train_dataset = datasets.ImageFolder(
            args.data,
            transforms.Compose([
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
                transforms.RandomSizedCrop(input_size),
            ]),
            Train=True)

        train_sampler = None

        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=args.batch_size,
                                                   shuffle=False,
                                                   num_workers=args.workers,
                                                   pin_memory=True,
                                                   sampler=train_sampler)

        val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
            args.data,
            transforms.Compose([
                transforms.ToTensor(),
                normalize,
                transforms.CenterCrop(input_size),
            ]),
            Train=False),
                                                 batch_size=args.batch_size,
                                                 shuffle=False,
                                                 num_workers=args.workers,
                                                 pin_memory=True)
    else:
        print('==> Using Pytorch Dataset')
        import torchvision
        import torchvision.transforms as transforms
        import torchvision.datasets as datasets
        traindir = os.path.join(args.data, 'train')
        valdir = os.path.join(args.data, 'val')
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])

        torchvision.set_image_backend('accimage')

        train_dataset = datasets.ImageFolder(
            traindir,
            transforms.Compose([
                transforms.RandomResizedCrop(input_size, scale=(0.40, 1.0)),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
            ]))

        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   num_workers=args.workers,
                                                   pin_memory=True)
        val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
            valdir,
            transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(input_size),
                transforms.ToTensor(),
                normalize,
            ])),
                                                 batch_size=args.batch_size,
                                                 shuffle=False,
                                                 num_workers=args.workers,
                                                 pin_memory=True)

    print model

    # define the binarization operator
    global bin_op
    bin_op = util.BinOp(model)

    if args.evaluate:
        validate(val_loader, model, criterion)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            }, is_best)
Esempio n. 11
0
        if args.mixnet:
            model, foo = vgg16_mix3(pretrained=False)
            model = torch.nn.DataParallel(model)
            model.cuda()
            checkpoint = torch.load(
                './experiment/vgg16mix/2019_04_08/checkpoint.pth.tar')
            model.load_state_dict(checkpoint['state_dict'])
        else:
            model = model_list.vgg(pretrained=False)
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
            checkpoint = torch.load(
                './experiment/vgg16xnor/model_best.pth.tar')
            model.load_state_dict(checkpoint['state_dict'])
        bin_range = [10, 11]
        bin_op = util.BinOp(model, bin_range)
        bin_op.binarization()

    print model
    model.eval()
    count = 0
    for image_path in tqdm(image_list):
        result = predict_gpu(
            model,
            image_path,
            root_path='/mnt/lustre/share/DSK/datasets/VOC07+12/JPEGImages/'
        )  #result[[left_up,right_bottom,class_name,image_path],]
        for (x1, y1), (x2, y2), class_name, image_id, prob in result:
            preds[class_name].append([image_id, prob, x1, y1, x2, y2])

    print('---start evaluate---')
    for key, value in student_param_dict.items():
        student_params += [{'params':[value], 'lr': args.studlr, 'weight_decay':0.00001}]

    criterion = nn.CrossEntropyLoss()

    if args.losstype == 'gan':
        GANLoss = torch.nn.BCEWithLogitsLoss()
    else:
        GANLoss = torch.nn.MSELoss()

    student_optimizer = optim.Adam(student_params, lr=args.studlr, weight_decay=0.00001)
    netD_optimizer = optim.Adam(netD.parameters(), lr=args.netDlr, weight_decay=0.00001)

    # define the binarization operator
    bin_op = util.BinOp(student)

    # do the evaluation if specified
    if args.evaluate:
        test(student)
        exit(0)

    best_acc = 0

    # start training
    test(teacher, best_acc, False)
    print("Now testing dumb student")
    test(student, best_acc)

    writer = SummaryWriter()
Esempio n. 13
0
    params = []

    for key, value in param_dict.items():
        params += [{'params':[value], 'lr': base_lr,
            'weight_decay':0.00001}]

        optimizer = optim.Adam(params, lr=0.10,weight_decay=0.00001)
    criterion = nn.CrossEntropyLoss()

    flip_mat = [0] * 7
    flip_mat_sum = [0] * 7
    target_modules_last = [0] * 7
    flip_mat_mask = [0] * 7

    # define the binarization operator
    bin_op = util.BinOp(model, flip_mat_mask)

    # do the evaluation if specified
    if args.evaluate:
        test()
        exit(0)

    # start training
    for epoch in range(1, 320):
        start = time.clock()
        adjust_learning_rate(optimizer, epoch)
        train(epoch)
        test()

        # # ### new
        if epoch > 1:
Esempio n. 14
0
def main():

    global args, best_prec1
    args = parser.parse_args()

    # create model
    if args.arch=='alexnet':
        model = model_list.alexnet(pretrained=args.pretrained)
        input_size = 224
    elif args.arch=='vgg16':
        model = model_list.vgg_net(pretrained=args.pretrained)
        input_size = 224
    elif args.arch=='vgg15_bwn':
        model = model_list.vgg_15(pretrained=args.pretrained)
        input_size = 224
    elif args.arch=='vgg15_bn_XNOR':
        model = model_list.vgg15_bn_XNOR(pretrained=args.pretrained)
        input_size = 224
    elif args.arch=='vgg15ab':
        model = model_list.vgg15ab(pretrained=args.pretrained)
        input_size = 224
    elif args.arch=='sq':
        model = model_list.squeezenet1_1()
        input_size = 224
    else:
        raise Exception('Model not supported yet')

    # if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
    #     pass
    #     model.features = torch.nn.DataParallel(model.features)
    #     model.cuda()
    # else:
    # model = torch.nn.DataParallel(model).cuda()
    model.cuda()
    # model.features = torch.nn.DataParallel(model.features)
    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()
    # optimizer = torch.optim.SGD(model.parameters(), args.lr,
    #                             momentum=args.momentum,
    #                              # betas=(0.0, 0.999),
    #                              weight_decay=args.weight_decay)
    optimizer = torch.optim.Adam(model.parameters(), args.lr,
                                 betas=(0.0, 0.999),
                                weight_decay=args.weight_decay)
# scratch
#     for m in model.modules():
#         if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
#             c = float(m.weight.data[0].nelement())
#             m.weight.data = m.weight.data.normal_(0, 2.0/c)
#         elif isinstance(m, nn.BatchNorm2d):
#             m.weight.data = m.weight.data.zero_().add(1.0)
#             m.bias.data = m.bias.data.zero_()

    # optionally resume from a checkpoint
    # if args.resume:
    #     if os.path.isfile(args.resume):
    #         print("=> loading checkpoint '{}'".format(args.resume))
    checkpoint = torch.load(args.pretrained)
    #         # TODO: Temporary remake
    #         # args.start_epoch = 0
    #         # best_prec1 = 0.0
    #         # model.features = torch.nn.DataParallel(model.features)
    try:
        args.start_epoch = checkpoint['epoch']
        if args.pretrained:
            best_prec1 = 0
        model = torch.nn.DataParallel(model)
        model.load_state_dict(checkpoint['state_dict'])
    except KeyError:
        model.load_state_dict(checkpoint)
        pass
    #
    #
    #
    #
    #         # optimizer.load_state_dict(checkpoint['optimizer'])
    #         print("=> loaded checkpoint '{}' (epoch {})"
    #               .format(args.resume, args.start_epoch))
    #         del checkpoint
    #     else:
    #         print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code

    if args.caffe_data:
        print('==> Using Caffe Dataset')
        cwd = os.getcwd()
        sys.path.append(cwd+'/../')
        import datasets as datasets
        import datasets.transforms as transforms
        if not os.path.exists(args.data+'/imagenet_mean.binaryproto'):
            print("==> Data directory"+args.data+"does not exits")
            print("==> Please specify the correct data path by")
            print("==>     --data <DATA_PATH>")
            return

        normalize = transforms.Normalize(
                meanfile=args.data+'/imagenet_mean.binaryproto')


        train_dataset = datasets.ImageFolder(
            args.data,
            transforms.Compose([
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
                transforms.RandomSizedCrop(input_size),
            ]),
            Train=True)

        train_sampler = None

        train_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=args.batch_size, shuffle=False,
            num_workers=args.workers, pin_memory=True, sampler=train_sampler)

        val_loader = torch.utils.data.DataLoader(
            datasets.ImageFolder(args.data, transforms.Compose([
                transforms.ToTensor(),
                normalize,
                transforms.CenterCrop(input_size),
            ]),
            Train=False),
            batch_size=args.batch_size, shuffle=False,
            num_workers=args.workers, pin_memory=True)
    elif args.cifar:
        import torchvision.transforms as transforms
        import torchvision
        transform = transforms.Compose(
            [transforms.ToTensor(),
             transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

        trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                                download=True, transform=transform)
        train_loader = torch.utils.data.DataLoader(trainset, batch_size=128,
                                                  shuffle=True, num_workers=2)

        testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                               download=True, transform=transform)
        val_loader = torch.utils.data.DataLoader(testset, batch_size=100,
                                                 shuffle=False, num_workers=2)

        classes = ('plane', 'car', 'bird', 'cat',
                   'deer', 'dog', 'frog', 'horse', 'ship', 'truck')


    else:
        print('==> Using Pytorch Dataset')
        import torchvision
        import torchvision.transforms as transforms
        import torchvision.datasets as datasets
        # traindir = os.path.join(args.data, 'train')
        # valdir = os.path.join(args.data, 'test')
        traindir = os.path.join(args.data, 'train')
        valdir = os.path.join(args.data, 'val')
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])

        train_dataset = datasets.ImageFolder(
            traindir,
            transforms.Compose([
                transforms.RandomResizedCrop(224),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
            ]))

        if True:
        #     train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
        # else:
            train_sampler = None

        train_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
            num_workers=args.workers, pin_memory=True, sampler=train_sampler)

        val_loader = torch.utils.data.DataLoader(
            datasets.ImageFolder(valdir, transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                normalize,
            ])),
            batch_size=args.batch_size//2 if args.arch.startswith('vgg') else args.batch_size, shuffle=False,
            num_workers=args.workers, pin_memory=True)
    # print (model)

    # define the binarization operator
    global bin_op

    bin_op = util.BinOp(model)


    if args.evaluate:
        if args.binarize:
            bin_op.binarization()
            save_checkpoint(model.state_dict(), False, filename='{}/{}_bin_'.format(args.workdir, args.arch))
            bin_op.restore()
        # bin_op.binarization()
        # save_checkpoint(model.state_dict(), False, 'vgg_binarized')
        # bin_op.restore()
        validate(val_loader, model, criterion)
        return
    val_prec_list = []
    writer = SummaryWriter(args.workdir+'/runs/loss_graph')
    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, writer)

        # evaluate on validation set
        prec1, prec5 = validate(val_loader, model, criterion)
        val_prec_list.append(prec1)
        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint({
            'epoch': epoch + 1,
            'arch': args.arch,
            'state_dict': model.state_dict(),
            'best_prec1': best_prec1,
            'optimizer' : optimizer.state_dict(),
        }, is_best, filename='{}/{}_'.format(args.workdir, args.arch))
        writer.add_scalar('top1 accuracy', prec1, epoch)
        writer.add_scalar('top5 accuracy', prec5, epoch)
        writer.add_scalar('learning rate', args.lr, epoch)
    print(val_prec_list)
Esempio n. 15
0
          0.001,
          momentum=0.9,
          weight_decay=3e-4)
    
    else:
        print('unsupported learning rate scheduler: ',args.lr_scheduler)
        exit(1)

    criterion = nn.CrossEntropyLoss()

    # define the binarization operator
    # quantization:
    # bwn => only model quantization
    # joint_bwn => joint data and model quantization
    # none => no model and no data quantization
    # data => only data quantization
    if quanModel==True:
        bin_op = util.BinOp(model,binarize_first_layer=args.binarize_first_layer,binarize_last_layer=args.binarize_last_layer)

    # do the evaluation if specified
    if args.evaluate:
        test(testloader,model,criterion)
        exit(0)


    ############################### MODEL TRAINING ###############################
    for epoch in range(0, args.epochs):
        if args.lr_scheduler=='Adam':
            adjust_learning_rate(optimizer, epoch)
        train(epoch,trainloader,model,criterion,optimizer)
        test(epoch, testloader,model,criterion)
Esempio n. 16
0
def main():
    '''Parse argument.'''
    parser = argparse.ArgumentParser(description='Pytorch XNOR-YOLO Training')

    parser.add_argument('--epochs',
                        default=300,
                        type=int,
                        metavar='N',
                        help='number of total epochs to run')
    parser.add_argument('--lr',
                        '--learning-rate',
                        default=0.001,
                        type=float,
                        metavar='LR',
                        help='initial learning rate')
    parser.add_argument('--l',
                        '--wd',
                        default=1e-5,
                        type=float,
                        metavar='W',
                        help='weight decay (default: 1e-5)')
    parser.add_argument('--pretrained',
                        dest='pretrained',
                        action='store_true',
                        default=False,
                        help='use pre-trained model')
    parser.add_argument('--mixnet',
                        dest='mixnet',
                        action='store_true',
                        default=False,
                        help='use mixnet model')
    parser.add_argument('--start-epoch',
                        default=0,
                        type=int,
                        metavar='N',
                        help='manual epoch number (useful on restarts)')
    parser.add_argument('--resume',
                        default='',
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('-b',
                        '--batch-size',
                        default=8,
                        type=int,
                        metavar='N',
                        help='mini-batch size (default: 256)')
    global args
    args = parser.parse_args()
    '''Data loading module'''
    train_dataset = yoloDataset(
        root='/mnt/lustre/share/DSK/datasets/VOC07+12/JPEGImages/',
        list_file=['./meta/voc2007.txt', './meta/voc2012.txt'],
        train=True,
        transform=[transforms.ToTensor()])
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=4)

    test_dataset = yoloDataset(
        root='/mnt/lustre/share/DSK/datasets/VOC07+12/JPEGImages/',
        list_file='./meta/voc2007test.txt',
        train=False,
        transform=[transforms.ToTensor()])

    test_loader = DataLoader(test_dataset,
                             batch_size=args.batch_size,
                             shuffle=False,
                             num_workers=4)
    '''Create model.'''
    teacher_model = vgg16(pretrained=False)
    student_model = vgg16XNOR(pretrained=False)

    teacher_model = torch.nn.DataParallel(teacher_model)
    student_model.features = torch.nn.DataParallel(student_model.features)
    teacher_model.cuda()
    student_model.cuda()
    '''Define loss functin i.e. YoloLoss and optimizer i.e. ADAM'''
    gt_criterion = yoloLoss(7, 2, 5, 0.5)
    mm_criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(student_model.parameters(),
                                 args.lr,
                                 weight_decay=args.l)
    '''weight initialization'''
    for m in student_model.modules():
        if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
            c = float(m.weight.data[0].nelement())
            m.weight.data = m.weight.data.normal_(0, 2.0 / c)
        elif isinstance(m, nn.BatchNorm2d):
            m.weight.data = m.weight.data.zero_().add(1.0)
            m.bias.data = m.bias.data.zero_()
    '''weight loading'''
    teacher_model.load_state_dict(
        torch.load('./experiment/vgg16fp/checkpoint.pth'))
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_loss = checkpoint['best_loss']
            student_model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            del checkpoint
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    print student_model, teacher_model
    '''Define binarization operator.'''
    global bin_op
    bin_range = [1, 11]
    bin_op = util.BinOp(student_model, bin_range)

    best_loss = 100

    for epoch in range(args.start_epoch, args.epochs):

        adjust_learning_rate(optimizer, epoch)
        '''Train& validate for one epoch.'''
        train(train_loader, student_model, teacher_model, gt_criterion,
              mm_criterion, optimizer, epoch)
        val_loss = validate(test_loader, student_model, teacher_model,
                            gt_criterion)

        is_best = val_loss < best_loss
        best_loss = min(val_loss, best_loss)

        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': student_model.state_dict(),
                'best_loss': best_loss,
                'optimizer': optimizer.state_dict(),
            }, is_best)