def get_qconfig(per_channel): if per_channel: return torch.quantization.get_default_qconfig('fbgemm') else: act = MovingAverageMinMaxObserver.with_args(reduce_range=False) return torch.quantization.QConfig(activation=act, weight=default_weight_observer)
print('\n Inverted Residual Block: After fusion\n\n', float_model.features[1].conv) print("Size of baseline model") original_size = print_size_of_model(float_model) top1_fp32, top5_fp32 = eval_accuracy(float_model, data_dir) print('\nFP32 accuracy: %2.2f' % top1_fp32.avg) per_tensor_quantized_model = load_model(saved_model_dir + float_model_file).to('cpu') per_tensor_quantized_model.eval() # Fuse Conv, bn and relu per_tensor_quantized_model.fuse_model() act = MovingAverageMinMaxObserver.with_args(reduce_range=False) per_tensor_quantized_model.qconfig = torch.quantization.QConfig( activation=act, weight=default_weight_observer) print(per_tensor_quantized_model.qconfig) torch.quantization.prepare(per_tensor_quantized_model, inplace=True) # Calibrate first print('Post Training Quantization Prepare: Inserting Observers') print('\n Inverted Residual Block:After observer insertion \n\n', per_tensor_quantized_model.features[1].conv) # Calibrate with the training set for image, _ in get_train_loader("imagenet_1k"): per_tensor_quantized_model(image) print('Post Training Quantization: Calibration done')