def main(args): """Main function""" # User model files model_file = args.model_file weights_file = args.weights_file # amct.set_cpu_mode() or set_gpu_mode() decides whether using CPU/GPU # to do weights calibration, but activation calibration is controled # by caffe APIs: caffe.set_mode_cpu() or set_mode_gpu(). # Need to set amct mode before the whole calibration process, default # using CPU mode to do weights calibration. # amct.set_gpu_mode() does not set which GPU card to use. Users can set # GPU card in two ways: # 1) use pycaffe API set_device(gpu_id) # 2) use environment variable CUDA_VISIBLE_DEVICES if args.gpu_id is not None and not args.cpu_mode: caffe.set_mode_gpu() caffe.set_device(args.gpu_id) amct.set_gpu_mode() else: caffe.set_mode_cpu() # Run pre model test ori_top1 = do_benchmark_test(model_file, weights_file, args.iterations) # Quantize configurations config_json_file = os.path.join(TMP, 'config.json') skip_layers = [] batch_num = 2 amct.create_quant_config(config_json_file, model_file, weights_file, skip_layers, batch_num) # Phase0: Init amct task scale_offset_record_file = os.path.join(TMP, 'record.txt') graph = amct.init(config_json_file, model_file, weights_file, scale_offset_record_file) # Phase1: do conv+bn+scale fusion, weights calibration and fake quant, # insert quant and dequant layer modified_model_file = os.path.join(TMP, 'modified_model.prototxt') modified_weights_file = os.path.join(TMP, 'modified_model.caffemodel') amct.quantize_model(graph, modified_model_file, modified_weights_file) # Phase2: run caffe model to do activation calibration do_benchmark_test(modified_model_file, modified_weights_file, batch_num) # Phase3: save final model, one for caffe do fake quant test, # one deploy model for GE result_path = os.path.join(RESULT, 'mnist') amct.save_model(graph, 'Both', result_path) # Phase4: do final fake quant model test fake_quant_model = os.path.join(RESULT, 'mnist_fake_quant_model.prototxt') fake_quant_weights = os.path.join( RESULT, 'mnist_fake_quant_weights.caffemodel') quant_top1 = do_benchmark_test( fake_quant_model, fake_quant_weights, args.iterations) print('[AMCT][INFO] mnist top1 before quantize is {}, after quantize ' \ 'is {}'.format(ori_top1, quant_top1)) print('[AMCT][INFO]Run mnist sample with quantize success!')
def main(): parser = argparse.ArgumentParser(description='Example of convert yolov3 caffe model to int8') parser.add_argument('--prototxt', type=str, help='path to yolov3_pp.prototxt', default='model/yolov3_pp.prototxt') parser.add_argument('--caffemodel', type=str, help='path to yolov3.caffemodel', default='model/yolov3.caffemodel') parser.add_argument('--tmp_dir', type=str, help='path to save temp files', default='./tmp') parser.add_argument('--output_dir', type=str, help='path to save output model file', default='model/') parser.add_argument('--output_model_name', type=str, help='prefix of output model files', default='yolov3_int8') parser.add_argument('--calib_video', action='append', required=True, help='videos used in calibration, it can be specified multiple times.') parser.add_argument('--calib_frame', type=int, help='number of frames used in each video file', default=200) args = parser.parse_args() tmp_dir = args.tmp_dir mkdir(tmp_dir) caffe.set_mode_cpu() config_json_file = os.path.join(tmp_dir, 'config.json') skip_layers = [] batch_num = 1 amct.create_quant_config(config_json_file, args.prototxt, args.caffemodel, skip_layers, batch_num) scale_offset_record_file = os.path.join(tmp_dir, 'scale_offset_record.txt') graph = amct.init(config_json_file, args.prototxt, args.caffemodel, scale_offset_record_file) print("done init") modified_model_file = os.path.join(tmp_dir, 'modified_model.prototxt') modified_weights_file = os.path.join(tmp_dir, 'modified_model.caffemodel') amct.quantize_model(graph, modified_model_file, modified_weights_file) print("done quantize") net = caffe.Net(modified_model_file, modified_weights_file, caffe.TEST) calibration(net, args.calib_video, args.calib_frame) result_path = os.path.join(args.output_dir, args.output_model_name) amct.save_model(graph, 'Both', result_path) print("done save")
def main(): """Main function""" args = parse_args() faster_rcnn_args_check(args) if args.cpu_mode: args.gpu_id = None # User model files model_file = args.model_file weights_file = args.weights_file if args.pre_test: caffe_test(model_file, weights_file, args.iterations, gpu_id=args.gpu_id, calibration=False, data_dir='./datasets', is_quantize=False) print('[AMCT][INFO]Run faster_rcnn without quantize success!') m_ap = do_voc2007_benchmark_test(model_file, weights_file, args.gpu_id) print( '[AMCT][INFO]Run faster_rcnn without quantize success, and mAP is {}' .format(m_ap)) return # amct.set_cpu_mode() or set_gpu_mode() decides whether using CPU/GPU # to do weights calibration, but activation calibration is controled # by caffe APIs: caffe.set_mode_cpu() or set_mode_gpu(). # Need to set amct mode before the whole calibration process, default # using CPU mode to do weights calibration. # amct.set_gpu_mode() does not set which GPU card to use. Users can set # GPU card in two ways: # 1) use pycaffe API set_device(gpu_id) # 2) use environment variable CUDA_VISIBLE_DEVICES if args.gpu_id is not None: caffe.set_mode_gpu() caffe.set_device(args.gpu_id) amct.set_gpu_mode() # Quantize configurations config_json_file = './config.json' skip_layers = [] batch_num = 1 amct.create_quant_config(config_json_file, model_file, weights_file, \ skip_layers, batch_num) # Phase0: Init amct task scale_offset_record_file = './tmp/scale_offset_record/record.txt' graph = amct.init(config_json_file, model_file, weights_file, scale_offset_record_file) # Phase1: Do conv+bn+scale fusion, weights calibration and fake quant, # insert quant and dequant layer. modified_model_file = os.path.realpath('./tmp/modified_model.prototxt') modified_weights_file = os.path.realpath('./tmp/modified_model.caffemodel') amct.quantize_model(graph, modified_model_file, modified_weights_file) # Phase2: run caffe model to do activation calibration caffe_test(modified_model_file, modified_weights_file, batch_num, \ gpu_id=args.gpu_id, calibration=True, data_dir='./datasets', is_quantize=False) # Phase3: Save final model, one for caffe do fake quant test, # one deploy model for GE amct.save_model(graph, 'Both', './results/faster_rcnn') # Phase4: do final fake quant model test fake_quant_model = './results/faster_rcnn_fake_quant_model.prototxt' fake_quant_weights = './results/faster_rcnn_fake_quant_weights.caffemodel' caffe_test(fake_quant_model, fake_quant_weights, \ args.iterations, gpu_id=args.gpu_id, calibration=False, data_dir='./datasets', is_quantize=True) print('[AMCT][INFO]Run faster_rcnn with quantize success!') m_ap = do_voc2007_benchmark_test(fake_quant_model, fake_quant_weights, args.gpu_id) print('[AMCT][INFO]Run faster_rcnn with quantize success, and mAP is {}!'. format(m_ap))
def main(args): """Main function""" args_check(args) mkdir(TMP) mkdir(RESULT) # set_cpu_mode or set_gpu_mode decides whether using # CPU/GPU to do weights calibration, but activation calibration is # controled by caffe APIs: caffe.set_mode_cpu() or set_mode_gpu(). # Need to set amct mode before the whole calibration process, # default using CPU mode to do weights calibration. # amct.set_gpu_mode() does not set which GPU card to use. Users can # set GPU card in two ways: # 1) use pycaffe API set_device(gpu_id) # 2) use environment variable CUDA_VISIBLE_DEVICES if args.gpu_id is not None and not args.cpu_mode: caffe.set_mode_gpu() caffe.set_device(args.gpu_id) amct.set_gpu_mode() else: caffe.set_mode_cpu() # Run pre model test if args.pre_test: if not args.benchmark: run_caffe_model(args.model_file, args.weights_file, args.iterations) else: do_benchmark_test(args, args.model_file, args.weights_file, args.iterations) print('[AMCT][INFO]Run ResNet-50 without quantize success!') return # Quantize configurations config_json_file = os.path.join(TMP, 'config.json') skip_layers = [] batch_num = 2 if args.cfg_define is not None: # do weights calibration with non uniform quantize configure amct.create_quant_config(config_json_file, args.model_file, args.weights_file, skip_layers, batch_num, config_defination=args.cfg_define) else: amct.create_quant_config(config_json_file, args.model_file, args.weights_file, skip_layers, batch_num) # Phase0: Init amct task scale_offset_record_file = os.path.join(TMP, 'scale_offset_record.txt') graph = amct.init(config_json_file, args.model_file, args.weights_file, scale_offset_record_file) # Phase1: do conv+bn+scale fusion, weights calibration and fake # quant, insert quant and dequant layer modified_model_file = os.path.join(TMP, 'modified_model.prototxt') modified_weights_file = os.path.join(TMP, 'modified_model.caffemodel') amct.quantize_model(graph, modified_model_file, modified_weights_file) # Phase2: run caffe model to do activation calibration if not args.benchmark: run_caffe_model(modified_model_file, modified_weights_file, batch_num) else: do_benchmark_test(args, modified_model_file, modified_weights_file, batch_num) # Phase3: save final model, one for caffe do fake quant test, one # deploy model for GE result_path = os.path.join(RESULT, 'ResNet50') amct.save_model(graph, 'Both', result_path) # Phase4: do final fake quant model test fake_quant_model = os.path.join(RESULT, 'ResNet50_fake_quant_model.prototxt') fake_quant_weights = os.path.join( RESULT, 'ResNet50_fake_quant_weights.caffemodel') if not args.benchmark: run_caffe_model(fake_quant_model, fake_quant_weights, args.iterations) else: do_benchmark_test(args, fake_quant_model, fake_quant_weights, args.iterations) print('[AMCT][INFO]Run ResNet-50 with quantize success!')