예제 #1
0
def get_qconfig(per_channel):
    if per_channel:
        return torch.quantization.get_default_qconfig('fbgemm')
    else:
        act = MovingAverageMinMaxObserver.with_args(reduce_range=False)
        return torch.quantization.QConfig(activation=act,
                                          weight=default_weight_observer)
예제 #2
0
print('\n Inverted Residual Block: After fusion\n\n',
      float_model.features[1].conv)

print("Size of baseline model")
original_size = print_size_of_model(float_model)

top1_fp32, top5_fp32 = eval_accuracy(float_model, data_dir)
print('\nFP32 accuracy: %2.2f' % top1_fp32.avg)
per_tensor_quantized_model = load_model(saved_model_dir +
                                        float_model_file).to('cpu')
per_tensor_quantized_model.eval()

# Fuse Conv, bn and relu
per_tensor_quantized_model.fuse_model()

act = MovingAverageMinMaxObserver.with_args(reduce_range=False)
per_tensor_quantized_model.qconfig = torch.quantization.QConfig(
    activation=act, weight=default_weight_observer)
print(per_tensor_quantized_model.qconfig)
torch.quantization.prepare(per_tensor_quantized_model, inplace=True)

# Calibrate first
print('Post Training Quantization Prepare: Inserting Observers')
print('\n Inverted Residual Block:After observer insertion \n\n',
      per_tensor_quantized_model.features[1].conv)

# Calibrate with the training set
for image, _ in get_train_loader("imagenet_1k"):
    per_tensor_quantized_model(image)

print('Post Training Quantization: Calibration done')