def forward(self, input): qweight = quantize_weights_bias_gemm(self.weight) if self.bias is not None: qbias = quantize_weights_bias_gemm(self.bias) else: qbias = None return F.linear(input, qweight, qbias)
def forward(self, input): qweight = quantize_weights_bias_gemm(self.weight) if self.bias is not None: qbias = quantize_weights_bias_gemm(self.bias) else: qbias = None qinput = quantize_activations_gemm(input) return F.conv2d(qinput, qweight, qbias, self.stride, self.padding, self.dilation, self.groups)
def forward(self, input): """ 关键在于使用函数 F.conv2d, 而不是使用模块 nn.ConV2d """ qweight = quantize_weights_bias_gemm(self.weight) if self.bias is not None: qbias = quantize_weights_bias_gemm(self.bias) else: qbias = None return F.conv2d(input, qweight, qbias, self.stride, self.padding, self.dilation, self.groups)
def forward(self, input, scale, bias): """ 关键在于使用函数 F.conv2d, 而不是使用模块 nn.ConV2d """ qweight = quantize_weights_bias_gemm(self.weight) qbias = None return F.conv2d(input, qweight, qbias, self.stride, self.padding, self.dilation, self.groups)*scale + bias
# 根据训练经验, 设为 2.5 self.scalar = nn.Parameter( torch.tensor([s], requires_grad=True, dtype=torch.float)) def forward(self, i): return self.scalar * i if __name__ == "__main__": qconv = QWConv2D(1, 1, 3) qconv.zero_grad() x = torch.ones(1, 1, 3, 3, requires_grad=True).float() y = qconv(x) y.backward() print("QConv2D 权重梯度", qconv.weight.grad) # 直接求梯度 a = torch.ones(3, 3, requires_grad=True).float() w = nn.init.constant_(torch.empty(3, 3, requires_grad=True), 1) qw = quantize_weights_bias_gemm(w) # qw = quantize_weights_bias_tanh(w) z = (qw * a).sum() z.backward() print("求权重梯度", w.grad) # 验证量化梯度 qa = quantize_weights_bias_gemm(a).sum() # qa = quantize_weights_bias_tanh(a).sum() qa.backward() print("直接求量化权重梯度", a.grad)
def main(): global best_prec1 print("\n" "=> arch {: <20}\n" "=> init_lr {: <20}\n" "=> lr-step {: <20}\n" "=> momentum {: <20}\n" "=> weight-decay {: <20}\n" "=> batch-size {: <20}\n" "=> balance {: <20}\n" "=> save-dir {: <20}\n".format( args.arch, args.lr, args.lr_step, args.momentum, args.weight_decay, args.batch_size, args.balance, args.save_dir)) if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably!, You may see unexpected behavior' ' when restarting from checkpoints.') # 下面的 warning 可以看出, 如果指定一个 gpu id, 就不会使用多 gpu 训练 if args.gpu is not None: warnings.warn('You have chosen a specific GPU, This will completely disable data parallelism.') # 多机器训练而不是一机多卡(集群训练模式) args.distributed = args.world_size > 1 if args.distributed: dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size) # 根据训练模式加载训练模型 if args.mode == 0: print("=> training mode {}: full precision training from scratch\n".format(args.mode)) model = models.__dict__[args.arch]() elif args.mode == 1: print("=> training mode {}: quantize weight only\n".format(args.mode)) print("=> loading imageNet pre-trained model {}".format(args.arch)) model = net_quantize_weight.__dict__[args.arch]() model_dict = model.state_dict() init_model = models.__dict__[args.arch](pretrained=True) model_dict.update(init_model.state_dict()) model.load_state_dict(model_dict) print("=> loaded imageNet pre-trained model {}".format(args.arch)) elif args.mode == 2: print("=> training mode {}: quantize activation using quantized weight\n".format(args.mode)) model = net_quantize_activation.__dict__[args.arch]() if os.path.isfile(args.weight_quantized): print("=> loading weight quantized model '{}'".format(args.weight_quantized)) model_dict = model.state_dict() quantized_model = torch.load(args.weight_quantized) init_dict = {} for k, v in quantized_model['state_dict'].items(): if k in model.state_dict(): if k.find("conv") != -1 or k.find("fc") != -1: init_dict[k[7:]] = quantize_weights_bias_gemm(v) else: init_dict[k[7:]] = v model_dict.update(init_dict) model.load_state_dict(model_dict) print("=> loaded weight_quantized '{}'".format(args.weight_quantized)) else: warnings.warn("=> no weight quantized model found at '{}'".format(args.weight_quantized)) return elif args.mode == 3: print("=> training mode {}: quantize weight and activation simultaneously\n".format(args.mode)) print("=> loading imageNet pre-trained model '{}'".format(args.arch)) # 使用预训练的ResNet18来初始化同时量化网络权重和激活 model = net_quantize_activation.__dict__[args.arch]() # 获取预训练模型参数 model_dict = model.state_dict() init_model = models.__dict__[args.arch](pretrained=True) init_dict = {k: v for k, v in init_model.state_dict().items() if k in model_dict} model_dict.update(init_dict) model.load_state_dict(model_dict) elif args.mode == 4: print("=> Training mode {}: guided quantize weight and activation " "from pre-trained imageNet model {}\n ".format(args.mode, args.arch)) # quantize_guided.guided(args) quantize_guided.guided(args) return else: raise Exception("invalid mode, valid mode is 0~4!!") if args.gpu is not None: # 指定GPU model = model.cuda(args.gpu) elif args.distributed: # 集群训练(多机器) model.cuda() model = torch.nn.parallel.DistributedDataParallel(model) else: # 单机训练(单卡或者多卡) if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: # 一机多卡时, 多 GPU 训练, 指定要用到 GPU 的 ids """ list(model.state_dict().keys())[0] model 在使用 torch.nn.DataParallel 之前每层的名字, 如 conv1.weight model 在使用 torch.nn.DataParallel 之后每层的名字, 如 module.conv1.weight 如果训练使用并行化, 而验证使用指定GPU的话就会出现问题, 所以需要在指定GPU代码中,添加解决冲突的代码 """ model = torch.nn.DataParallel(model, args.device_ids).cuda() criterion = torch.nn.CrossEntropyLoss().cuda(args.gpu) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.lr_step) # optionally resume from a checkpoint if args.resume: print("\n=> resume training from checkpoint") checkpoint_filename = os.path.join(args.save_dir, "checkpoint.pth.tar") if os.path.isfile(checkpoint_filename): print("=> loading checkpoint '{}'".format(checkpoint_filename)) checkpoint = torch.load(checkpoint_filename) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})" .format(checkpoint_filename, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(checkpoint_filename)) cudnn.benchmark = True val_loader = load_val_data(args.data, args.batch_size, args.workers) if args.evaluate: if os.path.isfile(args.evaluate): print("Loading evaluate model '{}'".format(args.evaluate)) checkpoint = torch.load(args.evaluate) if "state_dict" in checkpoint.keys(): model.load_state_dict(checkpoint['state_dict']) print("epoch: {} ".format(checkpoint['epoch'])) else: checkpoint = {''.join(("module.", k)): v for k, v in checkpoint.items() if not k.startswith("module")} model.load_state_dict(checkpoint) print("Loaded evaluate model '{}'".format(args.evaluate)) else: print("No evaluate mode found at '{}'".format(args.evaluate)) return validate(model, val_loader, criterion, args.gpu) return train_loader, train_sampler = load_train_data(args.data, args.batch_size, args.workers, args.distributed) summary_writer = SummaryWriter(args.save_dir) for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) lr_scheduler.step() # train for one epoch train(model, train_loader, criterion, optimizer, args.gpu, epoch, summary_writer) # evaluate on validation set prec1 = validate(model, val_loader, criterion, args.gpu, epoch, summary_writer) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint({ 'epoch': epoch+1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best, args.save_dir) summary_writer.close()
init_model = torch.load('resnet/resnet18_cifar100/model_best.pth.tar') model.load_state_dict(init_model['state_dict']) print(init_model['best_prec1']) state_dict = model.state_dict() merge_state_dict = merge_model.state_dict() # for k in state_dict: # print(k) # for k in merge_state_dict: # print(k) weight = state_dict["module.linear.weight"] bias = state_dict["module.linear.bias"] qweight = quantize_weights_bias_gemm(state_dict["module.linear.weight"]) qbias = quantize_weights_bias_gemm(state_dict["module.linear.bias"]) merge_state_dict.update({ "module.linear.weight": qweight, "module.linear.bias": qbias }) del state_dict["module.linear.weight"] del state_dict["module.linear.bias"] params = np.array(list(state_dict.keys())) params = params.reshape((-1, 6)) # params = params.reshape((-1, 6)) l_vgg = [ '.0.', '.2.', '.5.', '.7.', '.10.', '.12.', '.14.', '.17.', '.19.', '.21.', '.24.', '.26.', '.28.' ]
def forward(self, input, scale, bias): qweight = quantize_weights_bias_gemm(self.weight) qbias = None qinput = quantize_activations_gemm(input) return F.conv2d(qinput, qweight, qbias, self.stride, self.padding, self.dilation, self.groups)*scale + bias