def test_load_quantized(): from megengine.core.tensor import dtype data_shape = (2, 28) data = tensor(np.random.random(data_shape), dtype="float32") data = data.astype(dtype.qint8(0.1)) mlp = MLP() quantize_qat(mlp) quantize(mlp) mlp.dense0.weight = Parameter( mlp.dense0.weight.astype(dtype.qint8(0.001)).numpy()) mlp.dense1.weight = Parameter( mlp.dense1.weight.astype(dtype.qint8(0.0002)).numpy()) mlp.eval() pred0 = mlp(data) with BytesIO() as fout: mge.save(mlp.state_dict(), fout) fout.seek(0) checkpoint = mge.load(fout) # change mlp weight. mlp.dense0.weight = Parameter( mlp.dense0.weight.astype(dtype.qint8(0.00001)).numpy()) mlp.dense1.weight = Parameter( mlp.dense1.weight.astype(dtype.qint8(0.2)).numpy()) mlp.load_state_dict(checkpoint) pred1 = mlp(data) np.testing.assert_allclose(pred0.astype("float32").numpy(), pred1.astype("float32").numpy(), atol=5e-6)
def test_quantized_module_user_naming_param(symbolic): class Simple(M.Module): def __init__(self, name): super().__init__(name=name) self.quant = M.QuantStub() self.linear = M.Linear(3, 3, bias=True) self.dequant = M.DequantStub() self.linear.weight.name = "user-weight" self.linear.bias.name = "user-bias" def forward(self, x): out = self.quant(x) out = self.linear(out) out = self.dequant(out) return out m = Simple("simple") quantize_qat(m) quantize(m) m.eval() ops = _dump_and_load(m, symbolic) (matrix_mul_op, ) = [ op for op in ops if op.name == "simple.linear.MatrixMul" ] for var in matrix_mul_op.inputs: assert var.name in ("simple.quant.TypeCvt", "simple.linear.user-weight")
def test_quantized_module_user_naming(symbolic): class Simple(M.Module): def __init__(self, name): super().__init__(name=name) self.quant = M.QuantStub() self.linear = M.Linear(3, 3, bias=True, name="user-linear") self.dequant = M.DequantStub() def forward(self, x): out = self.quant(x) out = self.linear(out) out = self.dequant(out) return out m = Simple("simple") quantize_qat(m) quantize(m) m.eval() ops = _dump_and_load(m, symbolic) ops_name = ( "x", "simple.quant.TypeCvt", "simple.user-linear.MatrixMul", "simple.user-linear.ADD", "simple.user-linear.TypeCvt", "simple.dequant.TypeCvt", ) for op, name in zip(ops, ops_name): assert op.name == name
def test_load_quantized(): data_shape = (2, 28) data = tensor(np.random.random(data_shape), dtype="float32") data = data.astype(mgb.dtype.qint8(0.1)) mlp = MLP() quantize_qat(mlp) quantize(mlp) mlp.dense0.weight = Parameter( mlp.dense0.weight.astype(mgb.dtype.qint8(0.001)).numpy()) mlp.dense1.weight = Parameter( mlp.dense1.weight.astype(mgb.dtype.qint8(0.0002)).numpy()) mlp.eval() pred0 = mlp(data) with BytesIO() as fout: mge.save(mlp.state_dict(), fout) fout.seek(0) checkpoint = mge.load(fout) # change mlp weight. mlp.dense0.weight = Parameter( mlp.dense0.weight.astype(mgb.dtype.qint8(0.00001)).numpy()) mlp.dense1.weight = Parameter( mlp.dense1.weight.astype(mgb.dtype.qint8(0.2)).numpy()) mlp.load_state_dict(checkpoint) pred1 = mlp(data) assertTensorClose(pred0.astype("float32").numpy(), pred1.astype("float32").numpy(), max_err=5e-6)
def worker(world_size, args): # pylint: disable=too-many-statements rank = dist.get_rank() if world_size > 1: # Initialize distributed process group logger.info("init distributed process group {} / {}".format( rank, world_size)) model = models.__dict__[args.arch]() if args.mode != "normal": quantize_qat(model, qconfig=Q.ema_fakequant_qconfig) if args.checkpoint: logger.info("Load pretrained weights from %s", args.checkpoint) ckpt = mge.load(args.checkpoint) ckpt = ckpt["state_dict"] if "state_dict" in ckpt else ckpt model.load_state_dict(ckpt, strict=False) if args.mode == "quantized": quantize(model) # Define valid graph def valid_func(image, label): model.eval() logits = model(image) loss = F.loss.cross_entropy(logits, label, label_smooth=0.1) acc1, acc5 = F.topk_accuracy(logits, label, (1, 5)) if dist.is_distributed(): # all_reduce_mean loss = dist.functional.all_reduce_sum(loss) / dist.get_world_size() acc1 = dist.functional.all_reduce_sum(acc1) / dist.get_world_size() acc5 = dist.functional.all_reduce_sum(acc5) / dist.get_world_size() return loss, acc1, acc5 # Build valid datasets logger.info("preparing dataset..") valid_dataset = data.dataset.ImageNet(args.data, train=False) valid_sampler = data.SequentialSampler(valid_dataset, batch_size=100, drop_last=False) valid_queue = data.DataLoader( valid_dataset, sampler=valid_sampler, transform=T.Compose([ T.Resize(256), T.CenterCrop(224), T.Normalize(mean=128), T.ToMode("CHW") ]), num_workers=args.workers, ) _, valid_acc, valid_acc5 = infer(valid_func, valid_queue, args) if rank == 0: logger.info("TEST %f, %f", valid_acc, valid_acc5)
def test_quantize(): qat_net = init_qat_net() q_net = quantize(qat_net, inplace=False) assert isinstance(q_net.quant, Q.QuantStub) assert isinstance(q_net.linear[0], Q.Linear) assert isinstance(q_net.linear[1], Q.Linear) assert isinstance(q_net.dequant, Q.DequantStub)
def test_quantize_batchmatmul_activation(): batch = 4 in_features = 8 out_features = 4 class TestNet(Module): def __init__(self, bias): super().__init__() self.quant = QuantStub() self.dequant = DequantStub() self.batch_mm = BatchMatMulActivation( batch, in_features, out_features, bias=bias ) def forward(self, inp): out = self.quant(inp) out = self.batch_mm(out) out = expand_dims(out, -1) out = self.dequant(out) return out inputs = tensor( np.random.randn(batch, in_features, out_features).astype(np.float32) ) for bias in (True, False): net = TestNet(bias) net.train() qat_net = quantize_qat(net, inplace=False) disable_fake_quant(qat_net) normal_outputs = net(inputs) qat_outputs = qat_net(inputs) np.testing.assert_allclose(normal_outputs.numpy(), qat_outputs.numpy()) net.eval() normal_outputs = net(inputs) qat_net.eval() qat_outputs = qat_net(inputs) np.testing.assert_allclose(normal_outputs.numpy(), qat_outputs.numpy()) enable_fake_quant(qat_net) qat_outputs = qat_net(inputs) qnet = quantize(qat_net, inplace=False) qnet.eval() quantize_outputs = qnet(inputs) np.testing.assert_allclose( qat_outputs.numpy(), quantize_outputs.numpy(), atol=1e-6 ) @jit.trace(capture_as_const=True) def f(x): qnet.eval() return qnet(x) f(inputs) file = io.BytesIO() f.dump(file, enable_nchw4=True) file.seek(0) dumped_outputs = cgtools.load_and_inference(file, [inputs])[0] np.testing.assert_allclose(quantize_outputs.numpy(), dumped_outputs, atol=1e-6)
def worker(world_size, args): # pylint: disable=too-many-statements rank = dist.get_rank() if world_size > 1: # Initialize distributed process group logger.info("init distributed process group {} / {}".format(rank, world_size)) save_dir = os.path.join(args.save, args.arch + "." + "calibration") if not os.path.exists(save_dir): os.makedirs(save_dir, exist_ok=True) mge.set_log_file(os.path.join(save_dir, "log.txt")) model = models.__dict__[args.arch]() # load calibration model assert args.checkpoint logger.info("Load pretrained weights from %s", args.checkpoint) ckpt = mge.load(args.checkpoint) ckpt = ckpt["state_dict"] if "state_dict" in ckpt else ckpt model.load_state_dict(ckpt, strict=False) # Build valid datasets valid_dataset = data.dataset.ImageNet(args.data, train=False) valid_sampler = data.SequentialSampler( valid_dataset, batch_size=100, drop_last=False ) valid_queue = data.DataLoader( valid_dataset, sampler=valid_sampler, transform=T.Compose( [T.Resize(256), T.CenterCrop(224), T.Normalize(mean=128), T.ToMode("CHW")] ), num_workers=args.workers, ) # calibration model.fc.disable_quantize() model = quantize_qat(model, qconfig=Q.calibration_qconfig) # calculate scale def calculate_scale(image, label): model.eval() enable_observer(model) logits = model(image) loss = F.loss.cross_entropy(logits, label, label_smooth=0.1) acc1, acc5 = F.topk_accuracy(logits, label, (1, 5)) if dist.is_distributed(): # all_reduce_mean loss = dist.functional.all_reduce_sum(loss) / dist.get_world_size() acc1 = dist.functional.all_reduce_sum(acc1) / dist.get_world_size() acc5 = dist.functional.all_reduce_sum(acc5) / dist.get_world_size() return loss, acc1, acc5 infer(calculate_scale, valid_queue, args) # quantized model = quantize(model) # eval quantized model def eval_func(image, label): model.eval() logits = model(image) loss = F.loss.cross_entropy(logits, label, label_smooth=0.1) acc1, acc5 = F.topk_accuracy(logits, label, (1, 5)) if dist.is_distributed(): # all_reduce_mean loss = dist.functional.all_reduce_sum(loss) / dist.get_world_size() acc1 = dist.functional.all_reduce_sum(acc1) / dist.get_world_size() acc5 = dist.functional.all_reduce_sum(acc5) / dist.get_world_size() return loss, acc1, acc5 _, valid_acc, valid_acc5 = infer(eval_func, valid_queue, args) logger.info("TEST %f, %f", valid_acc, valid_acc5) # save quantized model mge.save( {"step": -1, "state_dict": model.state_dict()}, os.path.join(save_dir, "checkpoint-calibration.pkl"), ) logger.info( "save in {}".format(os.path.join(save_dir, "checkpoint-calibration.pkl")) )
def main(): parser = argparse.ArgumentParser() parser.add_argument("-a", "--arch", default="resnet18", type=str) parser.add_argument("-c", "--checkpoint", default=None, type=str) parser.add_argument("-i", "--image", default=None, type=str) parser.add_argument( "-m", "--mode", default="quantized", type=str, choices=["normal", "qat", "quantized"], help="Quantization Mode\n" "normal: no quantization, using float32\n" "qat: quantization aware training, simulate int8\n" "quantized: convert mode to int8 quantized, inference only", ) parser.add_argument("--dump", action="store_true", help="Dump quantized model") args = parser.parse_args() model = models.__dict__[args.arch]() if args.mode != "normal": quantize_qat(model, qconfig=Q.ema_fakequant_qconfig) if args.mode == "quantized": quantize(model) if args.checkpoint: logger.info("Load pretrained weights from %s", args.checkpoint) ckpt = mge.load(args.checkpoint) ckpt = ckpt["state_dict"] if "state_dict" in ckpt else ckpt model.load_state_dict(ckpt, strict=False) rpath = os.path.realpath(__file__ + "/../../") if args.image is None: path = rpath + "/assets/cat.jpg" else: path = args.image image = cv2.imread(path, cv2.IMREAD_COLOR) transform = T.Compose( [T.Resize(256), T.CenterCrop(224), T.Normalize(mean=128), T.ToMode("CHW")] ) @trace(symbolic=True, capture_as_const=True) def infer_func(processed_img): model.eval() logits = model(processed_img) probs = F.softmax(logits) return probs processed_img = transform.apply(image)[np.newaxis, :] processed_img = mge.tensor(processed_img, dtype="float32") probs = infer_func(processed_img) top_probs, classes = F.topk(probs, k=5, descending=True) if args.dump: output_file = ".".join([args.arch, args.mode, "megengine"]) logger.info("Dump to {}".format(output_file)) infer_func.dump(output_file, arg_names=["data"]) mge.save(model.state_dict(), output_file.replace("megengine", "pkl")) with open(rpath + "/assets/imagenet_class_info.json") as fp: imagenet_class_index = json.load(fp) for rank, (prob, classid) in enumerate( zip(top_probs.numpy().reshape(-1), classes.numpy().reshape(-1)) ): print( "{}: class = {:20s} with probability = {:4.1f} %".format( rank, imagenet_class_index[str(classid)][1], 100 * prob ) )
def quantized_resnet18(**kwargs): model = resnet18(**kwargs) quantize_qat(model) quantize(model) return model
def worker(rank, world_size, args): # pylint: disable=too-many-statements if world_size > 1: # Initialize distributed process group logger.info("init distributed process group {} / {}".format(rank, world_size)) dist.init_process_group( master_ip="localhost", master_port=23456, world_size=world_size, rank=rank, dev=rank, ) save_dir = os.path.join(args.save, args.arch + "." + args.mode) if not os.path.exists(save_dir): os.makedirs(save_dir, exist_ok=True) mge.set_log_file(os.path.join(save_dir, "log.txt")) model = models.__dict__[args.arch]() cfg = config.get_finetune_config(args.arch) cfg.LEARNING_RATE *= world_size # scale learning rate in distributed training total_batch_size = cfg.BATCH_SIZE * world_size steps_per_epoch = 1280000 // total_batch_size total_steps = steps_per_epoch * cfg.EPOCHS # load calibration model assert args.checkpoint logger.info("Load pretrained weights from %s", args.checkpoint) ckpt = mge.load(args.checkpoint) ckpt = ckpt["state_dict"] if "state_dict" in ckpt else ckpt model.load_state_dict(ckpt, strict=False) # Build valid datasets valid_dataset = data.dataset.ImageNet(args.data, train=False) # valid_dataset = ImageNetNoriDataset(args.data) valid_sampler = data.SequentialSampler( valid_dataset, batch_size=100, drop_last=False ) valid_queue = data.DataLoader( valid_dataset, sampler=valid_sampler, transform=T.Compose( [ T.Resize(256), T.CenterCrop(224), T.Normalize(mean=128), T.ToMode("CHW"), ] ), num_workers=args.workers, ) # calibration model.fc.disable_quantize() model = quantize_qat(model, qconfig=Q.calibration_qconfig) # calculate scale @jit.trace(symbolic=True) def calculate_scale(image, label): model.eval() enable_observer(model) logits = model(image) loss = F.cross_entropy_with_softmax(logits, label, label_smooth=0.1) acc1, acc5 = F.accuracy(logits, label, (1, 5)) if dist.is_distributed(): # all_reduce_mean loss = dist.all_reduce_sum(loss, "valid_loss") / dist.get_world_size() acc1 = dist.all_reduce_sum(acc1, "valid_acc1") / dist.get_world_size() acc5 = dist.all_reduce_sum(acc5, "valid_acc5") / dist.get_world_size() return loss, acc1, acc5 # model.fc.disable_quantize() infer(calculate_scale, valid_queue, args) # quantized model = quantize(model) # eval quantized model @jit.trace(symbolic=True) def eval_func(image, label): model.eval() logits = model(image) loss = F.cross_entropy_with_softmax(logits, label, label_smooth=0.1) acc1, acc5 = F.accuracy(logits, label, (1, 5)) if dist.is_distributed(): # all_reduce_mean loss = dist.all_reduce_sum(loss, "valid_loss") / dist.get_world_size() acc1 = dist.all_reduce_sum(acc1, "valid_acc1") / dist.get_world_size() acc5 = dist.all_reduce_sum(acc5, "valid_acc5") / dist.get_world_size() return loss, acc1, acc5 _, valid_acc, valid_acc5 = infer(eval_func, valid_queue, args) logger.info("TEST %f, %f", valid_acc, valid_acc5) # save quantized model mge.save( {"step": -1, "state_dict": model.state_dict()}, os.path.join(save_dir, "checkpoint-calibration.pkl") ) logger.info("save in {}".format(os.path.join(save_dir, "checkpoint-calibration.pkl")))