def test(i, key, shape, rand=False, randFactor=256): global best_acc test_loss = 0 correct = 0 if (not rand) or (len(shape) != 4): model = nin.Net() pretrained_model = torch.load(args.pretrained) best_acc = pretrained_model['best_acc'] model.load_state_dict(pretrained_model['state_dict']) model.to(device) bin_op = util.BinOp(model) model.eval() bin_op.binarization() state_dict = model.state_dict() if len(shape) == 4: size1 = shape[1] size2 = shape[2] size3 = shape[3] if rand: if (int(i / (size2 * size3)) % int(size1)) == torch.randint( 0, size1 - 1, [1]): model = nin.Net() pretrained_model = torch.load(args.pretrained) model.load_state_dict(pretrained_model['state_dict']) model.to(device) bin_op = util.BinOp(model) model.eval() bin_op.binarization() state_dict = model.state_dict() (state_dict[key][int(i / size1 / size2 / size3)][int( i / size2 / size3 % size1)][int(i / size3 % size2)][int( i % size3)]).mul_(-1) else: return 100 else: (state_dict[key][int(i / size1 / size2 / size3)][int( i / size2 / size3 % size1)][int(i / size3 % size2)][int( i % size3)]).mul_(-1) if len(shape) == 1: state_dict[key][i].mul_(-1) if len(shape) == 2: size = state_dict[key].shape[1] (state_dict[key][int(i / size)][i % size]).mul_(-1) with torch.no_grad(): for data, target in testloader: data, target = Variable(data.to(device)), Variable( target.to(device)) output = model(data) test_loss += criterion(output, target).data.item() pred = output.data.max(1, keepdim=True)[1] correct += pred.eq(target.data.view_as(pred)).cpu().sum() bin_op.restore() acc = 100. * float(correct) / len(testloader.dataset) return acc
def test(): import torch from torch.autograd import Variable model = vgg16XNOR() print(model) bin_range = [1, 11] bin_op = util.BinOp(model, bin_range) img = torch.rand(2, 3, 224, 224) img = Variable(img) feature, output = model(img) print(output.size()) print(feature.size())
def model_components(): print('==> building model', args.arch, '...') model = resnet('ResNet_imagenet', pretrained=args.pretrained, num_classes=1000, depth=18, dataset='imagenet') #load model if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) new_params = model.state_dict() new_params.update(checkpoint['state_dict']) model.load_state_dict(new_params) del checkpoint else: raise Exception(args.resume + ' is found.') else: print('==> Initializing model parameters ...') for m in model.modules(): if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): c = float(m.weight.data[0].nelement()) m.weight.data.normal_(0, 1. / c) m.bias.data.zero_() elif isinstance(m, nn.BatchNorm2d): if m.weight is not None: m.weight.data = m.weight.data.zero_().add(1.0) #data parallel model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True # define solver and criterio ps = filter(lambda x: x.requires_grad, model.parameters()) optimizer = optim.Adam(ps, lr=args.lr, weight_decay=0.00001) criterion = nn.CrossEntropyLoss().cuda() criterion_seperated = nn.CrossEntropyLoss(reduce=False).cuda() # define the binarization operator bin_op = util.BinOp(model, 'FL_Full') return model, optimizer, criterion, criterion_seperated, bin_op
def test(): import torch from torch.autograd import Variable model, bin_range = vgg16_mix3() model.classifier = nn.Sequential( nn.Linear(512 * 7 * 7, 4096), nn.ReLU(True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(True), nn.Dropout(), nn.Linear(4096, 1470), ) print(model.classifier[6]) print(model) print(bin_range) bin_op = util.BinOp(model, bin_range) img = torch.rand(2, 3, 224, 224) img = Variable(img) output = model(img) print(output.size())
print(model) param_dict = dict( model.named_parameters()) # model.named_parameters()是generator类型的数据 params = [] base_lr = 0.1 for key, value in param_dict.items(): #value是torch里的parameter类型数据 params += [{ 'params': [value], 'lr': args.lr, #为每个参数单独指定超参数 'weight_decay': args.weight_decay, 'key': key }] optimizer = optim.Adam(params, lr=args.lr, weight_decay=args.weight_decay) criterion = nn.CrossEntropyLoss() # define the binarization operator bin_op = util.BinOp(model) if args.evaluate: test(evaluate=True) #测试模式不保存模型 exit() for epoch in range(1, args.epochs + 1): #训练模式 adjust_learning_rate(optimizer, epoch) train(epoch) test()
print(count) para.requires_grad = True else: para.requires_grad = False for name, para in model.named_parameters(): print(name, para.requires_grad) optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=0.00001) criterion = nn.CrossEntropyLoss() # define the binarization operator bin_op = util.BinOp(model, True if args.main else False) print(len(bin_op.target_modules)) # start training global writer, name_rec name_rec = 'NIN_Bin_initial_weight' if not args.layer else 'NIN_Bin_fintune_{:}_{:}_lr_{:}'.format( 'weight' if args.main else 'mask', args.layer, args.lr) print(name_rec) current_time = datetime.now().strftime('%b%d_%H-%M-%S') logdir = os.path.join('runs', current_time + '_' + socket.gethostname()) writer = SummaryWriter(logdir + name_rec) test(0, model) for epoch in range(1, args.epochs + 1):
net = vgg_bin_prune.VGG() print(net) if use_cuda: net.cuda() net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count())) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=opt.lr, momentum=0.9, weight_decay=6e-4) # define the binarization operator bin_op = util.BinOp(net) # Training def train(epoch): print('\nEpoch: %d' % epoch) global Train_acc net.train() train_loss = 0 correct = 0 total = 0 if epoch > learning_rate_decay_start and learning_rate_decay_start >= 0: frac = (epoch - learning_rate_decay_start) // learning_rate_decay_every decay_factor = learning_rate_decay_rate**frac current_lr = opt.lr * decay_factor
print(model) # define solver and criterion base_lr = float(args.lr) param_dict = dict(model.named_parameters()) params = [] for key, value in param_dict.items(): params += [{'params': [value], 'lr': base_lr, 'weight_decay': 0.00001}] optimizer = optim.Adam(params, lr=0.10, weight_decay=0.00001) criterion = nn.CrossEntropyLoss() criterion_seperated = nn.CrossEntropyLoss(reduce=False) # define the binarization operator bin_op = util.BinOp(model, 'nin') def update_weights(softmax_output, target, sample_weights): print("start updating..") pred_numpy = softmax_output target_numpy = target pred_numpy = torch.squeeze(pred_numpy) miss = torch.Tensor([int(x) for x in (pred_numpy != target_numpy)]) miss2 = torch.Tensor([x if x == 1 else -1 for x in miss]) miss = miss.unsqueeze(1) err_m = torch.mm(torch.t(sample_weights), miss) / torch.sum(sample_weights) alpha_m = 0.5 * torch.log((1 - err_m) / float(err_m)) prior_exp = torch.t(torch.Tensor(alpha_m * miss2))
def __init__(self, mlb_path='data/mlb_cell.npy', num_ctrl=100, num_sc=415, num_merge=20, upper_bound_pre=0.2, upper_bound=0.5, arch='fc_ae_1layer', aplha=0.008, epoches=300, batch_size=16, lr=0.01, wd=1e-5, seed=208): # self.mlb = np.load(mlb_path) self.mlb = None self.num_ctrl = num_ctrl self.num_sc = num_sc self.num_merge = num_merge self.upper_bound_pre = upper_bound_pre self.upper_bound = upper_bound self.epoches = epoches self.alpha = aplha self.seed = seed self._get_device() self._set_random_seed() self.writer = SummaryWriter('runs') # pre-merge to generate training data # self.merge_pre() # exit() # self.data = np.load('data/data_{}_rotate.npy'.format(self.num_merge)) # self.data = (np.abs(self.data).sum(axis=2) != 0).astype(float) self.data = np.load('data/data_stochastic.npy') # self.data = np.load('data/data_1.npy') # self.data = (np.abs(self.data).sum(axis=2) != 0).astype(float) logging.info('The size of dataset is {}'.format(self.data.shape[0])) specified_percentage = self.data.sum() / (self.data.shape[0] * self.num_sc) logging.info( 'Specified scan chain percentage after merging is {:.2f}% ({:.2f}).' .format(100. * specified_percentage, specified_percentage * self.num_sc)) # Traininig dataset and its loader self.data = 2 * self.data - 1 self.train_dataset = torch.utils.data.TensorDataset( torch.from_numpy(self.data).float()) self.train_loader = torch.utils.data.DataLoader(self.train_dataset, batch_size=32, shuffle=True) # Define models if arch == 'fc_ae': self.model = FCAutoEncoder(num_sc, num_ctrl) self.bin_op = util.BinOp(self.model) elif arch == 'fc_ae_1layer': self.model = FCAutoEncoder1Layer(num_sc, num_ctrl) self.bin_op = util.BinOp(self.model) else: raise NotImplementedError # Define optimizer # self.optimizer = optim.Adam(self.model.parameters(), lr=lr, weight_decay=wd) self.optimizer = optim.Adam(self.model.parameters(), lr=lr) # self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, 40, 0.1) # Define loss function self.criterion = nn.MSELoss()
def main(): global args, best_prec1 args = parser.parse_args() # create model if args.arch == 'alexnet': model = model_list.alexnet(pretrained=args.pretrained) input_size = 227 else: raise Exception('Model not supported yet') if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.Adam(model.parameters(), args.lr, weight_decay=args.weight_decay) for m in model.modules(): if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): c = float(m.weight.data[0].nelement()) m.weight.data = m.weight.data.normal_(0, 1.0 / c) elif isinstance(m, nn.BatchNorm2d): m.weight.data = m.weight.data.zero_().add(1.0) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) del checkpoint else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code if args.caffe_data: print('==> Using Caffe Dataset') cwd = os.getcwd() sys.path.append(cwd + '/../') import datasets as datasets import datasets.transforms as transforms if not os.path.exists(args.data + '/imagenet_mean.binaryproto'): print("==> Data directory" + args.data + "does not exits") print("==> Please specify the correct data path by") print("==> --data <DATA_PATH>") return normalize = transforms.Normalize(meanfile=args.data + '/imagenet_mean.binaryproto') train_dataset = datasets.ImageFolder( args.data, transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, transforms.RandomSizedCrop(input_size), ]), Train=True) train_sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader(datasets.ImageFolder( args.data, transforms.Compose([ transforms.ToTensor(), normalize, transforms.CenterCrop(input_size), ]), Train=False), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) else: print('==> Using Pytorch Dataset') import torchvision import torchvision.transforms as transforms import torchvision.datasets as datasets traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) torchvision.set_image_backend('accimage') train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(input_size, scale=(0.40, 1.0)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(datasets.ImageFolder( valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(input_size), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) print model # define the binarization operator global bin_op bin_op = util.BinOp(model) if args.evaluate: validate(val_loader, model, criterion) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader, model, criterion) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best)
if args.mixnet: model, foo = vgg16_mix3(pretrained=False) model = torch.nn.DataParallel(model) model.cuda() checkpoint = torch.load( './experiment/vgg16mix/2019_04_08/checkpoint.pth.tar') model.load_state_dict(checkpoint['state_dict']) else: model = model_list.vgg(pretrained=False) model.features = torch.nn.DataParallel(model.features) model.cuda() checkpoint = torch.load( './experiment/vgg16xnor/model_best.pth.tar') model.load_state_dict(checkpoint['state_dict']) bin_range = [10, 11] bin_op = util.BinOp(model, bin_range) bin_op.binarization() print model model.eval() count = 0 for image_path in tqdm(image_list): result = predict_gpu( model, image_path, root_path='/mnt/lustre/share/DSK/datasets/VOC07+12/JPEGImages/' ) #result[[left_up,right_bottom,class_name,image_path],] for (x1, y1), (x2, y2), class_name, image_id, prob in result: preds[class_name].append([image_id, prob, x1, y1, x2, y2]) print('---start evaluate---')
for key, value in student_param_dict.items(): student_params += [{'params':[value], 'lr': args.studlr, 'weight_decay':0.00001}] criterion = nn.CrossEntropyLoss() if args.losstype == 'gan': GANLoss = torch.nn.BCEWithLogitsLoss() else: GANLoss = torch.nn.MSELoss() student_optimizer = optim.Adam(student_params, lr=args.studlr, weight_decay=0.00001) netD_optimizer = optim.Adam(netD.parameters(), lr=args.netDlr, weight_decay=0.00001) # define the binarization operator bin_op = util.BinOp(student) # do the evaluation if specified if args.evaluate: test(student) exit(0) best_acc = 0 # start training test(teacher, best_acc, False) print("Now testing dumb student") test(student, best_acc) writer = SummaryWriter()
params = [] for key, value in param_dict.items(): params += [{'params':[value], 'lr': base_lr, 'weight_decay':0.00001}] optimizer = optim.Adam(params, lr=0.10,weight_decay=0.00001) criterion = nn.CrossEntropyLoss() flip_mat = [0] * 7 flip_mat_sum = [0] * 7 target_modules_last = [0] * 7 flip_mat_mask = [0] * 7 # define the binarization operator bin_op = util.BinOp(model, flip_mat_mask) # do the evaluation if specified if args.evaluate: test() exit(0) # start training for epoch in range(1, 320): start = time.clock() adjust_learning_rate(optimizer, epoch) train(epoch) test() # # ### new if epoch > 1:
def main(): global args, best_prec1 args = parser.parse_args() # create model if args.arch=='alexnet': model = model_list.alexnet(pretrained=args.pretrained) input_size = 224 elif args.arch=='vgg16': model = model_list.vgg_net(pretrained=args.pretrained) input_size = 224 elif args.arch=='vgg15_bwn': model = model_list.vgg_15(pretrained=args.pretrained) input_size = 224 elif args.arch=='vgg15_bn_XNOR': model = model_list.vgg15_bn_XNOR(pretrained=args.pretrained) input_size = 224 elif args.arch=='vgg15ab': model = model_list.vgg15ab(pretrained=args.pretrained) input_size = 224 elif args.arch=='sq': model = model_list.squeezenet1_1() input_size = 224 else: raise Exception('Model not supported yet') # if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): # pass # model.features = torch.nn.DataParallel(model.features) # model.cuda() # else: # model = torch.nn.DataParallel(model).cuda() model.cuda() # model.features = torch.nn.DataParallel(model.features) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() # optimizer = torch.optim.SGD(model.parameters(), args.lr, # momentum=args.momentum, # # betas=(0.0, 0.999), # weight_decay=args.weight_decay) optimizer = torch.optim.Adam(model.parameters(), args.lr, betas=(0.0, 0.999), weight_decay=args.weight_decay) # scratch # for m in model.modules(): # if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): # c = float(m.weight.data[0].nelement()) # m.weight.data = m.weight.data.normal_(0, 2.0/c) # elif isinstance(m, nn.BatchNorm2d): # m.weight.data = m.weight.data.zero_().add(1.0) # m.bias.data = m.bias.data.zero_() # optionally resume from a checkpoint # if args.resume: # if os.path.isfile(args.resume): # print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.pretrained) # # TODO: Temporary remake # # args.start_epoch = 0 # # best_prec1 = 0.0 # # model.features = torch.nn.DataParallel(model.features) try: args.start_epoch = checkpoint['epoch'] if args.pretrained: best_prec1 = 0 model = torch.nn.DataParallel(model) model.load_state_dict(checkpoint['state_dict']) except KeyError: model.load_state_dict(checkpoint) pass # # # # # # optimizer.load_state_dict(checkpoint['optimizer']) # print("=> loaded checkpoint '{}' (epoch {})" # .format(args.resume, args.start_epoch)) # del checkpoint # else: # print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code if args.caffe_data: print('==> Using Caffe Dataset') cwd = os.getcwd() sys.path.append(cwd+'/../') import datasets as datasets import datasets.transforms as transforms if not os.path.exists(args.data+'/imagenet_mean.binaryproto'): print("==> Data directory"+args.data+"does not exits") print("==> Please specify the correct data path by") print("==> --data <DATA_PATH>") return normalize = transforms.Normalize( meanfile=args.data+'/imagenet_mean.binaryproto') train_dataset = datasets.ImageFolder( args.data, transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, transforms.RandomSizedCrop(input_size), ]), Train=True) train_sampler = None train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader( datasets.ImageFolder(args.data, transforms.Compose([ transforms.ToTensor(), normalize, transforms.CenterCrop(input_size), ]), Train=False), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) elif args.cifar: import torchvision.transforms as transforms import torchvision transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform) train_loader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform) val_loader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') else: print('==> Using Pytorch Dataset') import torchvision import torchvision.transforms as transforms import torchvision.datasets as datasets # traindir = os.path.join(args.data, 'train') # valdir = os.path.join(args.data, 'test') traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) if True: # train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) # else: train_sampler = None train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader( datasets.ImageFolder(valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size//2 if args.arch.startswith('vgg') else args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # print (model) # define the binarization operator global bin_op bin_op = util.BinOp(model) if args.evaluate: if args.binarize: bin_op.binarization() save_checkpoint(model.state_dict(), False, filename='{}/{}_bin_'.format(args.workdir, args.arch)) bin_op.restore() # bin_op.binarization() # save_checkpoint(model.state_dict(), False, 'vgg_binarized') # bin_op.restore() validate(val_loader, model, criterion) return val_prec_list = [] writer = SummaryWriter(args.workdir+'/runs/loss_graph') for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, writer) # evaluate on validation set prec1, prec5 = validate(val_loader, model, criterion) val_prec_list.append(prec1) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer' : optimizer.state_dict(), }, is_best, filename='{}/{}_'.format(args.workdir, args.arch)) writer.add_scalar('top1 accuracy', prec1, epoch) writer.add_scalar('top5 accuracy', prec5, epoch) writer.add_scalar('learning rate', args.lr, epoch) print(val_prec_list)
0.001, momentum=0.9, weight_decay=3e-4) else: print('unsupported learning rate scheduler: ',args.lr_scheduler) exit(1) criterion = nn.CrossEntropyLoss() # define the binarization operator # quantization: # bwn => only model quantization # joint_bwn => joint data and model quantization # none => no model and no data quantization # data => only data quantization if quanModel==True: bin_op = util.BinOp(model,binarize_first_layer=args.binarize_first_layer,binarize_last_layer=args.binarize_last_layer) # do the evaluation if specified if args.evaluate: test(testloader,model,criterion) exit(0) ############################### MODEL TRAINING ############################### for epoch in range(0, args.epochs): if args.lr_scheduler=='Adam': adjust_learning_rate(optimizer, epoch) train(epoch,trainloader,model,criterion,optimizer) test(epoch, testloader,model,criterion)
def main(): '''Parse argument.''' parser = argparse.ArgumentParser(description='Pytorch XNOR-YOLO Training') parser.add_argument('--epochs', default=300, type=int, metavar='N', help='number of total epochs to run') parser.add_argument('--lr', '--learning-rate', default=0.001, type=float, metavar='LR', help='initial learning rate') parser.add_argument('--l', '--wd', default=1e-5, type=float, metavar='W', help='weight decay (default: 1e-5)') parser.add_argument('--pretrained', dest='pretrained', action='store_true', default=False, help='use pre-trained model') parser.add_argument('--mixnet', dest='mixnet', action='store_true', default=False, help='use mixnet model') parser.add_argument('--start-epoch', default=0, type=int, metavar='N', help='manual epoch number (useful on restarts)') parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('-b', '--batch-size', default=8, type=int, metavar='N', help='mini-batch size (default: 256)') global args args = parser.parse_args() '''Data loading module''' train_dataset = yoloDataset( root='/mnt/lustre/share/DSK/datasets/VOC07+12/JPEGImages/', list_file=['./meta/voc2007.txt', './meta/voc2012.txt'], train=True, transform=[transforms.ToTensor()]) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4) test_dataset = yoloDataset( root='/mnt/lustre/share/DSK/datasets/VOC07+12/JPEGImages/', list_file='./meta/voc2007test.txt', train=False, transform=[transforms.ToTensor()]) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=4) '''Create model.''' teacher_model = vgg16(pretrained=False) student_model = vgg16XNOR(pretrained=False) teacher_model = torch.nn.DataParallel(teacher_model) student_model.features = torch.nn.DataParallel(student_model.features) teacher_model.cuda() student_model.cuda() '''Define loss functin i.e. YoloLoss and optimizer i.e. ADAM''' gt_criterion = yoloLoss(7, 2, 5, 0.5) mm_criterion = nn.MSELoss() optimizer = torch.optim.Adam(student_model.parameters(), args.lr, weight_decay=args.l) '''weight initialization''' for m in student_model.modules(): if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): c = float(m.weight.data[0].nelement()) m.weight.data = m.weight.data.normal_(0, 2.0 / c) elif isinstance(m, nn.BatchNorm2d): m.weight.data = m.weight.data.zero_().add(1.0) m.bias.data = m.bias.data.zero_() '''weight loading''' teacher_model.load_state_dict( torch.load('./experiment/vgg16fp/checkpoint.pth')) if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_loss = checkpoint['best_loss'] student_model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) del checkpoint else: print("=> no checkpoint found at '{}'".format(args.resume)) print student_model, teacher_model '''Define binarization operator.''' global bin_op bin_range = [1, 11] bin_op = util.BinOp(student_model, bin_range) best_loss = 100 for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) '''Train& validate for one epoch.''' train(train_loader, student_model, teacher_model, gt_criterion, mm_criterion, optimizer, epoch) val_loss = validate(test_loader, student_model, teacher_model, gt_criterion) is_best = val_loss < best_loss best_loss = min(val_loss, best_loss) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': student_model.state_dict(), 'best_loss': best_loss, 'optimizer': optimizer.state_dict(), }, is_best)