def test_resnet(): img_data = [(torch.rand(1, 3, 224, 224, dtype=torch.float), torch.randint(0, 1, (2, ), dtype=torch.long)) for _ in range(5)] annotated_model = qresnet.resnet18(pretrained=True).eval() raw_model = models.resnet.resnet18(pretrained=True).eval() quantize_and_run(annotated_model, raw_model, img_data, True)
def test_qnn_mergecomposite(): from torchvision.models.quantization import resnet as qresnet model = qresnet.resnet18(pretrained=True) model.eval() inp = torch.zeros((1, 3, 224, 224)) model.fuse_model() model.qconfig = torch.quantization.get_default_qconfig("fbgemm") torch.quantization.prepare(model, inplace=True) model(inp) torch.quantization.convert(model, inplace=True) script_module = torch.jit.trace(model, inp).eval() input_name = "image" run_qnn_mergecomposite(script_module, input_name, inp.shape)
def test_quantized_imagenet(): def get_transform(): import torchvision.transforms as transforms normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) return transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ]) def get_real_image(im_height, im_width): repo_base = 'https://github.com/dmlc/web-data/raw/master/tensorflow/models/InceptionV1/' img_name = 'elephant-299.jpg' image_url = os.path.join(repo_base, img_name) img_path = download_testdata(image_url, img_name, module='data') return Image.open(img_path).resize((im_height, im_width)) def get_imagenet_input(): im = get_real_image(224, 224) preprocess = get_transform() pt_tensor = preprocess(im) return np.expand_dims(pt_tensor.numpy(), 0) from torchvision.models.quantization import resnet as qresnet from torchvision.models.quantization import mobilenet as qmobilenet from torchvision.models.quantization import inception as qinception from torchvision.models.quantization import googlenet as qgooglenet qmodels = [] for per_channel in [False, True]: qmodels += [ ("resnet18", qresnet.resnet18(pretrained=True), per_channel), ("mobilenet_v2", qmobilenet.mobilenet_v2(pretrained=True), per_channel), # disable inception test for now, since loading it takes ~5min on torchvision-0.5 #("inception_v3", qinception.inception_v3(pretrained=True), per_channel), ("googlenet", qgooglenet(pretrained=True), per_channel), ] results = [] for (model_name, raw_model, per_channel) in qmodels: raw_model.eval() if per_channel: model_name += ", per channel quantization" else: model_name += ", per tensor quantization" inp = get_imagenet_input() pt_inp = torch.from_numpy(inp) quantize_model(raw_model, pt_inp, per_channel=per_channel, dummy=False) script_module = torch.jit.trace(raw_model, pt_inp).eval() with torch.no_grad(): pt_result = script_module(pt_inp).numpy() input_name = get_graph_input_names(script_module)[0] runtime = get_tvm_runtime(script_module, input_name, (1, 3, 224, 224)) runtime.set_input(input_name, inp) runtime.run() tvm_result = runtime.get_output(0).asnumpy() results.append((model_name, pt_result[0], tvm_result[0])) for (model_name, pt_result, tvm_result) in results: max_abs_diff = np.max(np.abs(tvm_result - pt_result)) mean_abs_diff = np.mean(np.abs(tvm_result - pt_result)) num_identical = np.sum(tvm_result == pt_result) pt_top3_labels = np.argsort(pt_result)[::-1][:3] tvm_top3_labels = np.argsort(pt_result)[::-1][:3] print("\nModel name: %s" % model_name) print("PyTorch top3 label:", pt_top3_labels) print("TVM top3 label:", tvm_top3_labels) print("max abs diff:", max_abs_diff) print("mean abs_diff:", mean_abs_diff) print("%d in 1000 raw outputs identical." % num_identical) assert set(pt_top3_labels) == set(tvm_top3_labels) # sample outputs """
def test_quantized_imagenet(): def get_transform(): import torchvision.transforms as transforms normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) return transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize ]) def get_real_image(im_height, im_width): repo_base = "https://github.com/dmlc/web-data/raw/main/tensorflow/models/InceptionV1/" img_name = "elephant-299.jpg" image_url = os.path.join(repo_base, img_name) img_path = download_testdata(image_url, img_name, module="data") return Image.open(img_path).resize((im_height, im_width)) def get_imagenet_input(): im = get_real_image(224, 224) preprocess = get_transform() pt_tensor = preprocess(im) return np.expand_dims(pt_tensor.numpy(), 0) from torchvision.models.quantization import resnet as qresnet from torchvision.models.quantization import mobilenet as qmobilenet from torchvision.models.quantization import inception as qinception from torchvision.models.quantization import googlenet as qgooglenet from torchvision.models.quantization import mobilenet_v3_large as qmobilenet_v3_large per_channel = True qmodels = [ ("resnet18", qresnet.resnet18(pretrained=True), per_channel), ("mobilenet_v2", qmobilenet.mobilenet_v2(pretrained=True), per_channel), ("inception_v3", qinception.inception_v3(pretrained=True), per_channel), # tracing quantized googlenet broken as of v1.6 # ("googlenet", qgooglenet(pretrained=True), per_channel), # As of v1.10, quantized mobilenet v3 has a weird segfault issue # during make_conv_packed_param # See https://ci.tlcpack.ai/blue/organizations/jenkins/tvm/detail/ci-docker-staging/192 # ("mobilenet_v3_large", qmobilenet_v3_large(pretrained=True, quantize=True).eval(), True) ] results = [] for (model_name, raw_model, per_channel) in qmodels: raw_model.eval() if per_channel: model_name += ", per channel quantization" else: model_name += ", per tensor quantization" inp = get_imagenet_input() pt_inp = torch.from_numpy(inp) if "mobilenet_v3_large" not in model_name: # mv3 was qat-ed, quantize=True option above makes it already quantized quantize_model(raw_model, pt_inp, per_channel=per_channel) script_module = torch.jit.trace(raw_model, pt_inp).eval() with torch.no_grad(): pt_result = script_module(pt_inp).numpy() input_name = "image" runtime = get_tvm_runtime(script_module, input_name, (1, 3, 224, 224)) runtime.set_input(input_name, inp) runtime.run() tvm_result = runtime.get_output(0).numpy() results.append((model_name, pt_result[0], tvm_result[0])) for (model_name, pt_result, tvm_result) in results: max_abs_diff = np.max(np.abs(tvm_result - pt_result)) mean_abs_diff = np.mean(np.abs(tvm_result - pt_result)) num_identical = np.sum(tvm_result == pt_result) pt_top3_labels = np.argsort(pt_result)[::-1][:3] tvm_top3_labels = np.argsort(tvm_result)[::-1][:3] print("\nModel name: %s" % model_name) print("PyTorch top3 label:", pt_top3_labels) print("TVM top3 label:", tvm_top3_labels) print("max abs diff:", max_abs_diff) print("mean abs_diff:", mean_abs_diff) print("%d in 1000 raw outputs identical." % num_identical) assert set(pt_top3_labels) == set(tvm_top3_labels) # sample outputs """
def test_quantized_imagenet(): def get_transform(): import torchvision.transforms as transforms normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) return transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize ]) def get_real_image(im_height, im_width): repo_base = "https://github.com/dmlc/web-data/raw/main/tensorflow/models/InceptionV1/" img_name = "elephant-299.jpg" image_url = os.path.join(repo_base, img_name) img_path = download_testdata(image_url, img_name, module="data") return Image.open(img_path).resize((im_height, im_width)) def get_imagenet_input(): im = get_real_image(224, 224) preprocess = get_transform() pt_tensor = preprocess(im) return np.expand_dims(pt_tensor.numpy(), 0) from torchvision.models.quantization import resnet as qresnet from torchvision.models.quantization import mobilenet as qmobilenet from torchvision.models.quantization import inception as qinception from torchvision.models.quantization import googlenet as qgooglenet qmodels = [] for per_channel in [False, True]: qmodels += [ ("resnet18", qresnet.resnet18(pretrained=True), per_channel), ("mobilenet_v2", qmobilenet.mobilenet_v2(pretrained=True), per_channel), # disable inception test for now, since loading it takes ~5min on torchvision-0.5 due to scipy bug # See https://discuss.pytorch.org/t/torchvisions-inception-v3-takes-much-longer-to-load-than-other-models/68756 # ("inception_v3", qinception.inception_v3(pretrained=True), per_channel), # tracing quantized googlenet broken as of v1.6 # ("googlenet", qgooglenet(pretrained=True), per_channel), ] if is_version_greater_than("1.7.1"): from torchvision.models.quantization import mobilenet_v3_large as qmobilenet_v3_large qmodels.append(("mobilenet_v3_large", qmobilenet_v3_large(pretrained=True, quantize=True).eval(), True)) results = [] for (model_name, raw_model, per_channel) in qmodels: raw_model.eval() if per_channel: model_name += ", per channel quantization" else: model_name += ", per tensor quantization" inp = get_imagenet_input() pt_inp = torch.from_numpy(inp) if "mobilenet_v3_large" not in model_name: # mv3 was qat-ed, quantize=True option above makes it already quantized quantize_model(raw_model, pt_inp, per_channel=per_channel) script_module = torch.jit.trace(raw_model, pt_inp).eval() with torch.no_grad(): pt_result = script_module(pt_inp).numpy() input_name = "image" runtime = get_tvm_runtime(script_module, input_name, (1, 3, 224, 224)) runtime.set_input(input_name, inp) runtime.run() tvm_result = runtime.get_output(0).asnumpy() results.append((model_name, pt_result[0], tvm_result[0])) for (model_name, pt_result, tvm_result) in results: max_abs_diff = np.max(np.abs(tvm_result - pt_result)) mean_abs_diff = np.mean(np.abs(tvm_result - pt_result)) num_identical = np.sum(tvm_result == pt_result) pt_top3_labels = np.argsort(pt_result)[::-1][:3] tvm_top3_labels = np.argsort(tvm_result)[::-1][:3] print("\nModel name: %s" % model_name) print("PyTorch top3 label:", pt_top3_labels) print("TVM top3 label:", tvm_top3_labels) print("max abs diff:", max_abs_diff) print("mean abs_diff:", mean_abs_diff) print("%d in 1000 raw outputs identical." % num_identical) assert set(pt_top3_labels) == set(tvm_top3_labels) # sample outputs """
def load_model(model_file): model = resnet18(pretrained=False) state_dict = torch.load(model_file) model.load_state_dict(state_dict) model.to("cpu") return model
data_loader_test = torch.utils.data.DataLoader(dataset_test, batch_size=eval_batch_size, sampler=test_sampler) return data_loader, data_loader_test data_path = '~/my_imagenet/' train_batch_size = 30 eval_batch_size = 50 data_loader, data_loader_test = prepare_data_loaders(data_path) criterion = nn.CrossEntropyLoss() float_model = resnet18(pretrained=True) float_model.eval() # deepcopy the model since we need to keep the original model around import copy model_to_quantize = copy.deepcopy(float_model) model_to_quantize.eval() """ Prepare models """ # Note that this is temporary, we'll expose these functions to torch.quantization after official releasee from torch.quantization.quantize_fx import prepare_fx, convert_fx
pt_model(inp) t1 = time.time() for i in range(n_repeat): pt_model(inp) t2 = time.time() print("Torch elapsed ms:", (t2 - t1) * 1e3 / n_repeat) msg = """ Loading inception v3 models on torch 1.4 + torchvision 0.5 takes a very long time (~5min). Remove "inception_v3" below to speed up testing. """ logging.warning(msg) # Mobilenet v2 was trained using QAT, post training calibration is disabled qmodels = [ ("resnet18", False, qresnet.resnet18(pretrained=True).eval()), ("resnet50", False, qresnet.resnet50(pretrained=True).eval()), ("mobilenet_v2", True, qmobilenet.mobilenet_v2(pretrained=True).eval()), ("inception_v3", False, qinception.inception_v3(pretrained=True).eval()), ("googlenet", False, qgooglenet(pretrained=True).eval()), ] if torch_version_check(): print("Adding Mobilenet v3 test") import sys sys.path.append("../models") from qmobilenet_v3 import load_model model_file = "../data/mobilenetv3small-f3be529c.pth" qmodels.append( ("mobilenet_v3 small", False, load_model(model_file).eval()))
from torchvision.models.quantization.resnet import resnet18 from torch.ao.quantization.experimental.quantization_helper import ( evaluate, prepare_data_loaders, training_loop ) # training and validation dataset: full ImageNet dataset data_path = '~/my_imagenet/' train_batch_size = 30 eval_batch_size = 50 data_loader, data_loader_test = prepare_data_loaders(data_path) criterion = nn.CrossEntropyLoss() float_model = resnet18(pretrained=True) float_model.eval() # deepcopy the model since we need to keep the original model around import copy model_to_quantize = copy.deepcopy(float_model) model_to_quantize.eval() """ Prepare model QAT for specified qconfig for torch.nn.Linear """ def prepare_qat_linear(qconfig): qconfig_dict = {"object_type": [(torch.nn.Linear, qconfig)]} prepared_model = prepare_fx(copy.deepcopy(float_model), qconfig_dict) # fuse modules and insert observers training_loop(prepared_model, criterion, data_loader)