def set_vars(self): self.qat = ImperativeQuantAware() self.train_batch_num = 30 self.train_batch_size = 32 self.test_batch_num = 100 self.test_batch_size = 32 self.eval_acc_top1 = 0.99
def _remove_preprocess(self, model): state_dict = model.state_dict() self.imperative_qat = ImperativeQuantAware( weight_bits=self.config['weight_bits'], activation_bits=self.config['activation_bits'], weight_quantize_type=self.config['weight_quantize_type'], activation_quantize_type=self.config['activation_quantize_type'], moving_rate=self.config['moving_rate'], quantizable_layer_type=self.config['quantizable_layer_type']) with paddle.utils.unique_name.guard(): model.__init__() self.imperative_qat.quantize(model) state_dict = model.state_dict() model.set_state_dict(state_dict) return model
def test_warning(self): path = "./dynamic_outscale_infer_model_with_warnings/lenet" imperative_out_scale = ImperativeQuantAware() with fluid.dygraph.guard(): lenet = ImperativeLenet() with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") imperative_out_scale.save_quantized_model( layer=lenet, path=path, input_spec=[ paddle.static.InputSpec(shape=[None, 1, 28, 28], dtype='float32') ]) warning_message = "Warning: No Layer of the model while to be " \ "saved contains the out_threshold attribute, so the " \ "generated inference model would not contain the " \ "out_threshold." num = get_vaild_warning_num(warning_message, w) assert num == 1
def func_out_scale_acc(self): seed = 1000 lr = 0.001 weight_quantize_type = 'abs_max' activation_quantize_type = 'moving_average_abs_max' imperative_out_scale = ImperativeQuantAware( weight_quantize_type=weight_quantize_type, activation_quantize_type=activation_quantize_type) with fluid.dygraph.guard(): np.random.seed(seed) fluid.default_main_program().random_seed = seed fluid.default_startup_program().random_seed = seed lenet = ImperativeLenet() lenet = fix_model_dict(lenet) imperative_out_scale.quantize(lenet) reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=32, drop_last=True) adam = AdamOptimizer(learning_rate=lr, parameter_list=lenet.parameters()) loss_list = train_lenet(lenet, reader, adam) lenet.eval() param_save_path = "test_save_quantized_model/lenet.pdparams" save_dict = lenet.state_dict() paddle.save(save_dict, param_save_path) save_path = "./dynamic_outscale_infer_model/lenet" imperative_out_scale.save_quantized_model( layer=lenet, path=save_path, input_spec=[ paddle.static.InputSpec(shape=[None, 1, 28, 28], dtype='float32') ]) for i in range(len(loss_list) - 1): self.assertTrue(loss_list[i] > loss_list[i + 1], msg='Failed to do the imperative qat.')
def test_save_quantized_model(self): lr = 0.001 load_param_path = "test_save_quantized_model/lenet.pdparams" save_path = "./dynamic_outscale_infer_model_from_checkpoint/lenet" weight_quantize_type = 'abs_max' activation_quantize_type = 'moving_average_abs_max' imperative_out_scale = ImperativeQuantAware( weight_quantize_type=weight_quantize_type, activation_quantize_type=activation_quantize_type) with fluid.dygraph.guard(): lenet = ImperativeLenet() load_dict = paddle.load(load_param_path) imperative_out_scale.quantize(lenet) lenet.set_dict(load_dict) reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=32, drop_last=True) adam = AdamOptimizer( learning_rate=lr, parameter_list=lenet.parameters()) loss_list = train_lenet(lenet, reader, adam) lenet.eval() imperative_out_scale.save_quantized_model( layer=lenet, path=save_path, input_spec=[ paddle.static.InputSpec( shape=[None, 1, 28, 28], dtype='float32') ]) for i in range(len(loss_list) - 1): self.assertTrue( loss_list[i] > loss_list[i + 1], msg='Failed to do the imperative qat.')
class TestImperativeQatAmp(unittest.TestCase): """ Test the combination of qat and amp. """ @classmethod def setUpClass(cls): timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime()) cls.root_path = os.path.join(os.getcwd(), "imperative_qat_amp_" + timestamp) cls.save_path = os.path.join(cls.root_path, "model") cls.download_path = 'dygraph_int8/download' cls.cache_folder = os.path.expanduser('~/.cache/paddle/dataset/' + cls.download_path) cls.lenet_url = "https://paddle-inference-dist.cdn.bcebos.com/int8/unittest_model_data/lenet_pretrained.tar.gz" cls.lenet_md5 = "953b802fb73b52fae42896e3c24f0afb" seed = 1 np.random.seed(seed) paddle.static.default_main_program().random_seed = seed paddle.static.default_startup_program().random_seed = seed @classmethod def tearDownClass(cls): try: shutil.rmtree(cls.root_path) except Exception as e: print("Failed to delete {} due to {}".format( cls.root_path, str(e))) def cache_unzipping(self, target_folder, zip_path): if not os.path.exists(target_folder): cmd = 'mkdir {0} && tar xf {1} -C {0}'.format( target_folder, zip_path) os.system(cmd) def download_model(self, data_url, data_md5, folder_name): download(data_url, self.download_path, data_md5) file_name = data_url.split('/')[-1] zip_path = os.path.join(self.cache_folder, file_name) print('Data is downloaded at {0}'.format(zip_path)) data_cache_folder = os.path.join(self.cache_folder, folder_name) self.cache_unzipping(data_cache_folder, zip_path) return data_cache_folder def set_vars(self): self.qat = ImperativeQuantAware() self.train_batch_num = 30 self.train_batch_size = 32 self.test_batch_num = 100 self.test_batch_size = 32 self.eval_acc_top1 = 0.99 def model_train(self, model, batch_num=-1, batch_size=32, use_amp=False): model.train() train_reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=batch_size) adam = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) scaler = paddle.amp.GradScaler(init_loss_scaling=500) for batch_id, data in enumerate(train_reader()): x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array([x[1] for x in data]).astype('int64').reshape(-1, 1) img = paddle.to_tensor(x_data) label = paddle.to_tensor(y_data) if use_amp: with paddle.amp.auto_cast(): out = model(img) acc = fluid.layers.accuracy(out, label) loss = fluid.layers.cross_entropy(out, label) avg_loss = fluid.layers.mean(loss) scaled_loss = scaler.scale(avg_loss) scaled_loss.backward() scaler.minimize(adam, scaled_loss) adam.clear_gradients() else: out = model(img) acc = fluid.layers.accuracy(out, label) loss = fluid.layers.cross_entropy(out, label) avg_loss = fluid.layers.mean(loss) avg_loss.backward() adam.minimize(avg_loss) model.clear_gradients() if batch_id % 100 == 0: _logger.info("Train | step {}: loss = {:}, acc= {:}".format( batch_id, avg_loss.numpy(), acc.numpy())) if batch_num > 0 and batch_id + 1 >= batch_num: break def model_test(self, model, batch_num=-1, batch_size=32, use_amp=False): model.eval() test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=batch_size) acc_top1_list = [] for batch_id, data in enumerate(test_reader()): x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array([x[1] for x in data]).astype('int64').reshape(-1, 1) img = paddle.to_tensor(x_data) label = paddle.to_tensor(y_data) with paddle.amp.auto_cast(use_amp): out = model(img) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) acc_top1_list.append(float(acc_top1.numpy())) if batch_id % 100 == 0: _logger.info( "Test | At step {}: acc1 = {:}, acc5 = {:}".format( batch_id, acc_top1.numpy(), acc_top5.numpy())) if batch_num > 0 and batch_id + 1 >= batch_num: break acc_top1 = sum(acc_top1_list) / len(acc_top1_list) return acc_top1 def ptq(self): start_time = time.time() self.set_vars() params_path = self.download_model(self.lenet_url, self.lenet_md5, "lenet") params_path += "/lenet_pretrained/lenet.pdparams" with fluid.dygraph.guard(): model = ImperativeLenet() model_state_dict = paddle.load(params_path) model.set_state_dict(model_state_dict) _logger.info("Test fp32 model") fp32_acc_top1 = self.model_test(model, self.test_batch_num, self.test_batch_size) self.qat.quantize(model) use_amp = True self.model_train(model, self.train_batch_num, self.train_batch_size, use_amp) _logger.info("Test int8 model") int8_acc_top1 = self.model_test(model, self.test_batch_num, self.test_batch_size, use_amp) _logger.info('fp32_acc_top1: %f, int8_acc_top1: %f' % (fp32_acc_top1, int8_acc_top1)) self.assertTrue(int8_acc_top1 > fp32_acc_top1 - 0.01, msg='fp32_acc_top1: %f, int8_acc_top1: %f' % (fp32_acc_top1, int8_acc_top1)) input_spec = [ paddle.static.InputSpec(shape=[None, 1, 28, 28], dtype='float32') ] paddle.jit.save(layer=model, path=self.save_path, input_spec=input_spec) print('Quantized model saved in {%s}' % self.save_path) end_time = time.time() print("total time: %ss" % (end_time - start_time)) def test_ptq(self): self.ptq() with _test_eager_guard(): self.ptq()
def setUp(self): _logger.info("test weight_quantize") self.imperative_qat = ImperativeQuantAware( weight_quantize_layer=CustomQAT)
def setUp(self): _logger.info("test weight_preprocess") self.imperative_qat = ImperativeQuantAware( weight_preprocess_layer=PACT)
def func_out_scale_acc(self): paddle.disable_static() seed = 1000 lr = 0.1 qat = ImperativeQuantAware() np.random.seed(seed) reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=512, drop_last=True) lenet = ImperativeLenetWithSkipQuant() lenet = fix_model_dict(lenet) qat.quantize(lenet) adam = AdamOptimizer(learning_rate=lr, parameter_list=lenet.parameters()) dynamic_loss_rec = [] lenet.train() loss_list = train_lenet(lenet, reader, adam) lenet.eval() path = "./save_dynamic_quant_infer_model/lenet" save_dir = "./save_dynamic_quant_infer_model" qat.save_quantized_model(layer=lenet, path=path, input_spec=[ paddle.static.InputSpec( shape=[None, 1, 28, 28], dtype='float32') ]) paddle.enable_static() if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) else: place = core.CPUPlace() exe = fluid.Executor(place) [inference_program, feed_target_names, fetch_targets] = (fluid.io.load_inference_model( dirname=save_dir, executor=exe, model_filename="lenet" + INFER_MODEL_SUFFIX, params_filename="lenet" + INFER_PARAMS_SUFFIX)) model_ops = inference_program.global_block().ops conv2d_count, matmul_count = 0, 0 conv2d_skip_count, matmul_skip_count = 0, 0 find_conv2d = False find_matmul = False for i, op in enumerate(model_ops): if op.type == 'conv2d': find_conv2d = True if op.has_attr("skip_quant"): conv2d_skip_count += 1 if conv2d_count > 0: self.assertTrue( 'fake_quantize_dequantize' in model_ops[i - 1].type) else: self.assertTrue( 'fake_quantize_dequantize' not in model_ops[i - 1].type) conv2d_count += 1 if op.type == 'matmul': find_matmul = True if op.has_attr("skip_quant"): matmul_skip_count += 1 if matmul_count > 0: self.assertTrue( 'fake_quantize_dequantize' in model_ops[i - 1].type) else: self.assertTrue( 'fake_quantize_dequantize' not in model_ops[i - 1].type) matmul_count += 1 if find_conv2d: self.assertTrue(conv2d_skip_count == 1) if find_matmul: self.assertTrue(matmul_skip_count == 1)
def test_qat_save(self): imperative_qat = ImperativeQuantAware( weight_quantize_type='abs_max', activation_quantize_type='moving_average_abs_max', quantizable_layer_type=[ 'Conv2D', 'Linear', 'ReLU', 'LeakyReLU', 'ReLU6', 'Tanh', 'Swish' ]) with fluid.dygraph.guard(): lenet = ImperativeLenet() imperative_qat.quantize(lenet) adam = AdamOptimizer( learning_rate=0.001, parameter_list=lenet.parameters()) train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=32, drop_last=True) test_reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=32) epoch_num = 1 for epoch in range(epoch_num): lenet.train() for batch_id, data in enumerate(train_reader()): x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array( [x[1] for x in data]).astype('int64').reshape(-1, 1) img = fluid.dygraph.to_variable(x_data) label = fluid.dygraph.to_variable(y_data) out = lenet(img) acc = fluid.layers.accuracy(out, label) loss = fluid.layers.cross_entropy(out, label) avg_loss = fluid.layers.mean(loss) avg_loss.backward() adam.minimize(avg_loss) lenet.clear_gradients() if batch_id % 100 == 0: _logger.info( "Train | At epoch {} step {}: loss = {:}, acc= {:}". format(epoch, batch_id, avg_loss.numpy(), acc.numpy())) lenet.eval() for batch_id, data in enumerate(test_reader()): x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array( [x[1] for x in data]).astype('int64').reshape(-1, 1) img = fluid.dygraph.to_variable(x_data) label = fluid.dygraph.to_variable(y_data) out = lenet(img) acc_top1 = fluid.layers.accuracy( input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy( input=out, label=label, k=5) if batch_id % 100 == 0: _logger.info( "Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}". format(epoch, batch_id, acc_top1.numpy(), acc_top5.numpy())) # save weights model_dict = lenet.state_dict() fluid.save_dygraph(model_dict, "save_temp") # test the correctness of `paddle.jit.save` data = next(test_reader()) test_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') test_img = fluid.dygraph.to_variable(test_data) lenet.eval() before_save = lenet(test_img) # save inference quantized model path = "./qat_infer_model/lenet" save_dir = "./qat_infer_model" paddle.jit.save( layer=lenet, path=path, input_spec=[ paddle.static.InputSpec( shape=[None, 1, 28, 28], dtype='float32') ]) if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) else: place = core.CPUPlace() exe = fluid.Executor(place) [inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model( dirname=save_dir, executor=exe, model_filename="lenet" + INFER_MODEL_SUFFIX, params_filename="lenet" + INFER_PARAMS_SUFFIX) after_save, = exe.run(inference_program, feed={feed_target_names[0]: test_data}, fetch_list=fetch_targets) self.assertTrue( np.allclose(after_save, before_save.numpy()), msg='Failed to save the inference quantized model.')
class QAT(object): """ Quant Aware Training(QAT): Add the fake quant logic for given quantizable layers, namely add the quant_dequant computational logic both for activation inputs and weight inputs. """ def __init__(self, config=None, weight_preprocess=None, act_preprocess=None, weight_quantize=None, act_quantize=None): """ Args: model(nn.Layer) config(dict, optional): configs for quantization. if None, will use default config. Default: None. weight_quantize(class, optional): Defines how to quantize weight. Using this can quickly test if user's quantization method works or not. In this method, user should both define quantization function and dequantization function, that is, the function's input is non-quantized weight and function returns dequantized weight. If None, will use quantization op defined by 'weight_quantize_type'. Default is None. act_quantize(class, optional): Defines how to quantize activation. Using this can quickly test if user's quantization method works or not. In this function, user should both define quantization and dequantization process, that is, the function's input is non-quantized activation and function returns dequantized activation. If None, will use quantization op defined by 'activation_quantize_type'. Default is None. weight_preprocess(class, optional): Defines how to preprocess weight before quantization. Using this can quickly test if user's preprocess method works or not. The function's input is non-quantized weight and function returns processed weight to be quantized. If None, will use preprocess method defined by 'weight_preprocess_type'. Default is None. act_preprocess(class, optional): Defines how to preprocess activation before quantization. Using this can quickly test if user's preprocess method works or not. The function's input is non-quantized activation and function returns processed activation to be quantized. If None, will use preprocess method defined by 'activation_preprocess_type'. Default is None. """ if config is None: config = _quant_config_default else: assert isinstance(config, dict), "config must be dict" config = _parse_configs(config) self.config = config self.weight_preprocess = PACT if self.config[ 'weight_preprocess_type'] == 'PACT' else None self.act_preprocess = PACT if self.config[ 'activation_preprocess_type'] == 'PACT' else None self.weight_preprocess = weight_preprocess if weight_preprocess is not None else self.weight_preprocess self.act_preprocess = act_preprocess if act_preprocess is not None else self.act_preprocess self.weight_quantize = weight_quantize self.act_quantize = act_quantize self.imperative_qat = ImperativeQuantAware( weight_bits=self.config['weight_bits'], activation_bits=self.config['activation_bits'], weight_quantize_type=self.config['weight_quantize_type'], activation_quantize_type=self.config['activation_quantize_type'], moving_rate=self.config['moving_rate'], quantizable_layer_type=self.config['quantizable_layer_type'], weight_preprocess_layer=self.weight_preprocess, act_preprocess_layer=self.act_preprocess, weight_quantize_layer=self.weight_quantize, act_quantize_layer=self.act_quantize) def quantize(self, model): self.imperative_qat.quantize(model) def save_quantized_model(self, model, path, input_spec=None): if self.weight_preprocess is not None or self.act_preprocess is not None: model = self._remove_preprocess(model) self.imperative_qat.save_quantized_model( layer=model, path=path, input_spec=input_spec) def _remove_preprocess(self, model): state_dict = model.state_dict() self.imperative_qat = ImperativeQuantAware( weight_bits=self.config['weight_bits'], activation_bits=self.config['activation_bits'], weight_quantize_type=self.config['weight_quantize_type'], activation_quantize_type=self.config['activation_quantize_type'], moving_rate=self.config['moving_rate'], quantizable_layer_type=self.config['quantizable_layer_type']) with paddle.utils.unique_name.guard(): if hasattr(model, "_layers"): model = model._layers model.__init__() self.imperative_qat.quantize(model) state_dict = model.state_dict() model.set_state_dict(state_dict) return model
def main(): # create model model_list = [x for x in models.__dict__["__all__"]] assert FLAGS.arch in model_list, \ "Expected FLAGS.arch in {}, but received {}".format( model_list, FLAGS.arch) model = models.__dict__[FLAGS.arch](pretrained=not FLAGS.resume) # quantize model if FLAGS.enable_quant: if not FLAGS.use_naive_api: print("use slim api") quant_config = { 'weight_quantize_type': FLAGS.weight_quantize_type, } dygraph_qat = QAT(quant_config) else: print("use navie api") dygraph_qat = ImperativeQuantAware( weight_quantize_type=FLAGS.weight_quantize_type, ) dygraph_qat.quantize(model) # prepare model = paddle.Model(model) if FLAGS.resume is not None: print("Resume from " + FLAGS.resume) model.load(FLAGS.resume) train_dataset = ImageNetDataset(FLAGS.data, mode='train') val_dataset = ImageNetDataset(FLAGS.data, mode='val') optim = make_optimizer( np.ceil( float(len(train_dataset)) / FLAGS.batch_size / ParallelEnv().nranks), parameter_list=model.parameters()) model.prepare(optim, paddle.nn.CrossEntropyLoss(), Accuracy(topk=(1, 5))) # test if FLAGS.eval_only: model.evaluate( val_dataset, batch_size=FLAGS.batch_size, num_workers=FLAGS.num_workers) return # train output_dir = os.path.join(FLAGS.output_dir, "checkpoint", FLAGS.arch + "_checkpoint", time.strftime('%Y-%m-%d-%H-%M', time.localtime())) if not os.path.exists(output_dir): os.makedirs(output_dir) model.fit(train_dataset, val_dataset, batch_size=FLAGS.batch_size, epochs=FLAGS.epoch, save_dir=output_dir, num_workers=FLAGS.num_workers) # save if FLAGS.enable_quant: quant_output_dir = os.path.join(FLAGS.output_dir, FLAGS.arch, "model") input_spec = paddle.static.InputSpec( shape=[None, 3, 224, 224], dtype='float32') dygraph_qat.save_quantized_model(model.network, quant_output_dir, [input_spec]) print("save all checkpoints in " + output_dir) print("save quantized inference model in " + quant_output_dir)
def test_out_scale_acc(self): seed = 1000 lr = 0.1 imperative_out_scale = ImperativeQuantAware() np.random.seed(seed) reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=32, drop_last=True) lenet = ImperativeLenet() fixed_state = {} for name, param in lenet.named_parameters(): p_shape = param.numpy().shape p_value = param.numpy() if name.endswith("bias"): value = np.zeros_like(p_value).astype('float32') else: value = np.random.normal( loc=0.0, scale=0.01, size=np.product(p_shape)).reshape(p_shape).astype('float32') fixed_state[name] = value lenet.set_dict(fixed_state) imperative_out_scale.quantize(lenet) adam = AdamOptimizer( learning_rate=lr, parameter_list=lenet.parameters()) dynamic_loss_rec = [] lenet.train() for batch_id, data in enumerate(reader()): x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array( [x[1] for x in data]).astype('int64').reshape(-1, 1) img = fluid.dygraph.to_variable(x_data) label = fluid.dygraph.to_variable(y_data) out = lenet(img) loss = fluid.layers.cross_entropy(out, label) avg_loss = fluid.layers.mean(loss) avg_loss.backward() adam.minimize(avg_loss) lenet.clear_gradients() dynamic_loss_rec.append(avg_loss.numpy()[0]) if batch_id % 100 == 0: _logger.info('{}: {}'.format('loss', avg_loss.numpy())) lenet.eval() path = "./save_dynamic_quant_infer_model/lenet" save_dir = "./save_dynamic_quant_infer_model" imperative_out_scale.save_quantized_model( layer=lenet, path=path, input_spec=[ paddle.static.InputSpec( shape=[None, 1, 28, 28], dtype='float32') ]) paddle.enable_static() if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) else: place = core.CPUPlace() exe = fluid.Executor(place) [inference_program, feed_target_names, fetch_targets] = ( fluid.io.load_inference_model( dirname=save_dir, executor=exe, model_filename="lenet" + INFER_MODEL_SUFFIX, params_filename="lenet" + INFER_PARAMS_SUFFIX)) model_ops = inference_program.global_block().ops conv2d_count, mul_count = 0, 0 for i, op in enumerate(model_ops): if op.type == 'conv2d': if conv2d_count > 0: self.assertTrue( 'fake_quantize_dequantize' in model_ops[i - 1].type) else: self.assertTrue( 'fake_quantize_dequantize' not in model_ops[i - 1].type) conv2d_count += 1 if op.type == 'mul': if mul_count > 0: self.assertTrue( 'fake_quantize_dequantize' in model_ops[i - 1].type) else: self.assertTrue( 'fake_quantize_dequantize' not in model_ops[i - 1].type) mul_count += 1
def test_out_scale_acc(self): def _build_static_lenet(main, startup, is_test=False, seed=1000): with fluid.unique_name.guard(): with fluid.program_guard(main, startup): main.random_seed = seed startup.random_seed = seed img = fluid.layers.data(name='image', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') prediction = StaticLenet(img) if not is_test: loss = fluid.layers.cross_entropy(input=prediction, label=label) avg_loss = fluid.layers.mean(loss) else: avg_loss = prediction return img, label, avg_loss reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=32, drop_last=True) weight_quantize_type = 'abs_max' activation_quant_type = 'moving_average_abs_max' param_init_map = {} seed = 1000 lr = 0.1 dynamic_out_scale_list = [] static_out_scale_list = [] # imperative train _logger.info( "--------------------------dynamic graph qat--------------------------" ) imperative_out_scale = ImperativeQuantAware() with fluid.dygraph.guard(): np.random.seed(seed) fluid.default_main_program().random_seed = seed fluid.default_startup_program().random_seed = seed lenet = ImperativeLenet() fixed_state = {} for name, param in lenet.named_parameters(): p_shape = param.numpy().shape p_value = param.numpy() if name.endswith("bias"): value = np.zeros_like(p_value).astype('float32') else: value = np.random.normal(loc=0.0, scale=0.01, size=np.product(p_shape)).reshape( p_shape).astype('float32') fixed_state[name] = value param_init_map[param.name] = value lenet.set_dict(fixed_state) imperative_out_scale.quantize(lenet) adam = AdamOptimizer(learning_rate=lr, parameter_list=lenet.parameters()) dynamic_loss_rec = [] lenet.train() for batch_id, data in enumerate(reader()): x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array([x[1] for x in data ]).astype('int64').reshape(-1, 1) img = fluid.dygraph.to_variable(x_data) label = fluid.dygraph.to_variable(y_data) out = lenet(img) loss = fluid.layers.cross_entropy(out, label) avg_loss = fluid.layers.mean(loss) avg_loss.backward() adam.minimize(avg_loss) lenet.clear_gradients() dynamic_loss_rec.append(avg_loss.numpy()[0]) if batch_id % 100 == 0: _logger.info('{}: {}'.format('loss', avg_loss.numpy())) lenet.eval() path = "./dynamic_outscale_infer_model/lenet" dynamic_save_dir = "./dynamic_outscale_infer_model" imperative_out_scale.save_quantized_model( layer=lenet, path=path, input_spec=[ paddle.static.InputSpec(shape=[None, 1, 28, 28], dtype='float32') ]) _logger.info( "--------------------------static graph qat--------------------------" ) static_loss_rec = [] if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) else: place = core.CPUPlace() exe = fluid.Executor(place) main = fluid.Program() infer = fluid.Program() startup = fluid.Program() static_img, static_label, static_loss = _build_static_lenet( main, startup, False, seed) infer_img, _, infer_pre = _build_static_lenet(infer, startup, True, seed) with fluid.unique_name.guard(): with fluid.program_guard(main, startup): opt = AdamOptimizer(learning_rate=lr) opt.minimize(static_loss) scope = core.Scope() with fluid.scope_guard(scope): exe.run(startup) for param in main.all_parameters(): param_tensor = scope.var(param.name).get_tensor() param_tensor.set(param_init_map[param.name], place) main_graph = IrGraph(core.Graph(main.desc), for_test=False) infer_graph = IrGraph(core.Graph(infer.desc), for_test=True) transform_pass = QuantizationTransformPass( scope=scope, place=place, activation_quantize_type=activation_quant_type, weight_quantize_type=weight_quantize_type, quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul']) transform_pass.apply(main_graph) transform_pass.apply(infer_graph) outscale_pass = OutScaleForTrainingPass(scope=scope, place=place) outscale_pass.apply(main_graph) build_strategy = fluid.BuildStrategy() build_strategy.fuse_all_reduce_ops = False binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel( loss_name=static_loss.name, build_strategy=build_strategy) feeder = fluid.DataFeeder(feed_list=[static_img, static_label], place=place) with fluid.scope_guard(scope): for batch_id, data in enumerate(reader()): loss_v, = exe.run(binary, feed=feeder.feed(data), fetch_list=[static_loss]) static_loss_rec.append(loss_v[0]) if batch_id % 100 == 0: _logger.info('{}: {}'.format('loss', loss_v)) scale_inference_pass = OutScaleForInferencePass(scope=scope) scale_inference_pass.apply(infer_graph) save_program = infer_graph.to_program() static_save_dir = "./static_outscale_infer_model" with fluid.scope_guard(scope): fluid.io.save_inference_model( dirname=static_save_dir, feeded_var_names=[infer_img.name], target_vars=[infer_pre], executor=exe, main_program=save_program, model_filename="lenet" + INFER_MODEL_SUFFIX, params_filename="lenet" + INFER_PARAMS_SUFFIX) rtol = 1e-05 atol = 1e-08 for i, (loss_d, loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)): diff = np.abs(loss_d - loss_s) if diff > (atol + rtol * np.abs(loss_s)): _logger.info( "diff({}) at {}, dynamic loss = {}, static loss = {}". format(diff, i, loss_d, loss_s)) break self.assertTrue(np.allclose(np.array(dynamic_loss_rec), np.array(static_loss_rec), rtol=rtol, atol=atol, equal_nan=True), msg='Failed to do the imperative qat.') # load dynamic model [dynamic_inference_program, feed_target_names, fetch_targets] = (fluid.io.load_inference_model( dirname=dynamic_save_dir, executor=exe, model_filename="lenet" + INFER_MODEL_SUFFIX, params_filename="lenet" + INFER_PARAMS_SUFFIX)) # load static model [static_inference_program, feed_target_names, fetch_targets] = (fluid.io.load_inference_model( dirname=static_save_dir, executor=exe, model_filename="lenet" + INFER_MODEL_SUFFIX, params_filename="lenet" + INFER_PARAMS_SUFFIX)) dynamic_ops = dynamic_inference_program.global_block().ops static_ops = static_inference_program.global_block().ops for op in dynamic_ops[:]: if op.type == "flatten2" or 'fake' in op.type: dynamic_ops.remove(op) for op in static_ops[:]: if 'fake' in op.type: static_ops.remove(op) for i in range(len(dynamic_ops)): if dynamic_ops[i].has_attr("out_threshold"): self.assertTrue(dynamic_ops[i].type == static_ops[i].type) self.assertTrue(dynamic_ops[i].attr("out_threshold") == static_ops[i].attr("out_threshold"))
class QAT(object): """ Quant Aware Training(QAT): Add the fake quant logic for given quantizable layers, namely add the quant_dequant computational logic both for activation inputs and weight inputs. """ def __init__(self, config=None, weight_preprocess=None, act_preprocess=None, weight_quantize=None, act_quantize=None): """ Args: model(nn.Layer) config(dict, optional): configs for quantization. if None, will use default config. Default: None. weight_quantize(class, optional): Defines how to quantize weight. Using this can quickly test if user's quantization method works or not. In this method, user should both define quantization function and dequantization function, that is, the function's input is non-quantized weight and function returns dequantized weight. If None, will use quantization op defined by 'weight_quantize_type'. Default is None. act_quantize(class, optional): Defines how to quantize activation. Using this can quickly test if user's quantization method works or not. In this function, user should both define quantization and dequantization process, that is, the function's input is non-quantized activation and function returns dequantized activation. If None, will use quantization op defined by 'activation_quantize_type'. Default is None. weight_preprocess(class, optional): Defines how to preprocess weight before quantization. Using this can quickly test if user's preprocess method works or not. The function's input is non-quantized weight and function returns processed weight to be quantized. If None, will use preprocess method defined by 'weight_preprocess_type'. Default is None. act_preprocess(class, optional): Defines how to preprocess activation before quantization. Using this can quickly test if user's preprocess method works or not. The function's input is non-quantized activation and function returns processed activation to be quantized. If None, will use preprocess method defined by 'activation_preprocess_type'. Default is None. """ if config is None: config = _quant_config_default else: assert isinstance(config, dict), "config must be dict" config = _parse_configs(config) self.config = config self.weight_preprocess = PACT if self.config[ 'weight_preprocess_type'] == 'PACT' else None self.act_preprocess = PACT if self.config[ 'activation_preprocess_type'] == 'PACT' else None self.weight_preprocess = weight_preprocess if weight_preprocess is not None else self.weight_preprocess self.act_preprocess = act_preprocess if act_preprocess is not None else self.act_preprocess self.weight_quantize = weight_quantize self.act_quantize = act_quantize self.imperative_qat = ImperativeQuantAware( weight_bits=self.config['weight_bits'], activation_bits=self.config['activation_bits'], weight_quantize_type=self.config['weight_quantize_type'], activation_quantize_type=self.config['activation_quantize_type'], moving_rate=self.config['moving_rate'], quantizable_layer_type=self.config['quantizable_layer_type'], weight_preprocess_layer=self.weight_preprocess, act_preprocess_layer=self.act_preprocess, weight_quantize_layer=self.weight_quantize, act_quantize_layer=self.act_quantize) def quantize(self, model, inplace=True): """ Quantize the input model. Args: model(paddle.nn.Layer): The model to be quantized. inplace(bool): Whether apply quantization to the input model. Default: False. Returns: quantized_model(paddle.nn.Layer): The quantized model. """ assert isinstance(model, paddle.nn.Layer), \ "The model must be the instance of paddle.nn.Layer." self._model = copy.deepcopy(model) if inplace: self.imperative_qat.quantize(model) quant_model = model else: quant_model = copy.deepcopy(model) self.imperative_qat.quantize(quant_model) return quant_model def save_quantized_model(self, model, path, input_spec=None, onnx_format=False): """ Save the quantized inference model. Args: model (Layer): The model to be saved. path (str): The path prefix to save model. The format is ``dirname/file_prefix`` or ``file_prefix``. input_spec (list[InputSpec|Tensor], optional): Describes the input of the saved model's forward method, which can be described by InputSpec or example Tensor. If None, all input variables of the original Layer's forward method would be the inputs of the saved model. Default: None. Returns: None """ if self.weight_preprocess is not None or self.act_preprocess is not None: training = model.training model = self._remove_preprocess(model) if training: model.train() else: model.eval() self.imperative_qat.save_quantized_model(layer=model, path=path, input_spec=input_spec, onnx_format=onnx_format) def _remove_preprocess(self, model): state_dict = model.state_dict() self.imperative_qat = ImperativeQuantAware( weight_bits=self.config['weight_bits'], activation_bits=self.config['activation_bits'], weight_quantize_type=self.config['weight_quantize_type'], activation_quantize_type=self.config['activation_quantize_type'], moving_rate=self.config['moving_rate'], quantizable_layer_type=self.config['quantizable_layer_type']) with paddle.utils.unique_name.guard(): if hasattr(model, "_layers"): model = model._layers model = self._model self.imperative_qat.quantize(model) model.set_state_dict(state_dict) return model
def __init__(self, config=None, weight_preprocess=None, act_preprocess=None, weight_quantize=None, act_quantize=None): """ Args: model(nn.Layer) config(dict, optional): configs for quantization. if None, will use default config. Default: None. weight_quantize(class, optional): Defines how to quantize weight. Using this can quickly test if user's quantization method works or not. In this method, user should both define quantization function and dequantization function, that is, the function's input is non-quantized weight and function returns dequantized weight. If None, will use quantization op defined by 'weight_quantize_type'. Default is None. act_quantize(class, optional): Defines how to quantize activation. Using this can quickly test if user's quantization method works or not. In this function, user should both define quantization and dequantization process, that is, the function's input is non-quantized activation and function returns dequantized activation. If None, will use quantization op defined by 'activation_quantize_type'. Default is None. weight_preprocess(class, optional): Defines how to preprocess weight before quantization. Using this can quickly test if user's preprocess method works or not. The function's input is non-quantized weight and function returns processed weight to be quantized. If None, will use preprocess method defined by 'weight_preprocess_type'. Default is None. act_preprocess(class, optional): Defines how to preprocess activation before quantization. Using this can quickly test if user's preprocess method works or not. The function's input is non-quantized activation and function returns processed activation to be quantized. If None, will use preprocess method defined by 'activation_preprocess_type'. Default is None. """ if config is None: config = _quant_config_default else: assert isinstance(config, dict), "config must be dict" config = _parse_configs(config) self.config = config self.weight_preprocess = PACT if self.config[ 'weight_preprocess_type'] == 'PACT' else None self.act_preprocess = PACT if self.config[ 'activation_preprocess_type'] == 'PACT' else None self.weight_preprocess = weight_preprocess if weight_preprocess is not None else self.weight_preprocess self.act_preprocess = act_preprocess if act_preprocess is not None else self.act_preprocess self.weight_quantize = weight_quantize self.act_quantize = act_quantize self.imperative_qat = ImperativeQuantAware( weight_bits=self.config['weight_bits'], activation_bits=self.config['activation_bits'], weight_quantize_type=self.config['weight_quantize_type'], activation_quantize_type=self.config['activation_quantize_type'], moving_rate=self.config['moving_rate'], quantizable_layer_type=self.config['quantizable_layer_type'], weight_preprocess_layer=self.weight_preprocess, act_preprocess_layer=self.act_preprocess, weight_quantize_layer=self.weight_quantize, act_quantize_layer=self.act_quantize)
def func_qat(self): self.set_vars() imperative_qat = ImperativeQuantAware( weight_quantize_type=self.weight_quantize_type, activation_quantize_type=self.activation_quantize_type, fuse_conv_bn=self.fuse_conv_bn) with fluid.dygraph.guard(): # For CI coverage conv1 = Conv2D( in_channels=3, out_channels=2, kernel_size=3, stride=1, padding=1, padding_mode='replicate') quant_conv1 = QuantizedConv2D(conv1) data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32') quant_conv1(fluid.dygraph.to_variable(data)) conv_transpose = Conv2DTranspose(4, 6, (3, 3)) quant_conv_transpose = QuantizedConv2DTranspose(conv_transpose) x_var = paddle.uniform( (2, 4, 8, 8), dtype='float32', min=-1.0, max=1.0) quant_conv_transpose(x_var) seed = 1 np.random.seed(seed) fluid.default_main_program().random_seed = seed fluid.default_startup_program().random_seed = seed lenet = ImperativeLenet() lenet = fix_model_dict(lenet) imperative_qat.quantize(lenet) adam = AdamOptimizer( learning_rate=0.001, parameter_list=lenet.parameters()) train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=32, drop_last=True) test_reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=32) epoch_num = 1 for epoch in range(epoch_num): lenet.train() for batch_id, data in enumerate(train_reader()): x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array( [x[1] for x in data]).astype('int64').reshape(-1, 1) img = fluid.dygraph.to_variable(x_data) label = fluid.dygraph.to_variable(y_data) out = lenet(img) acc = fluid.layers.accuracy(out, label) loss = fluid.layers.cross_entropy(out, label) avg_loss = fluid.layers.mean(loss) avg_loss.backward() adam.minimize(avg_loss) lenet.clear_gradients() if batch_id % 100 == 0: _logger.info( "Train | At epoch {} step {}: loss = {:}, acc= {:}". format(epoch, batch_id, avg_loss.numpy(), acc.numpy())) if batch_id == 500: # For shortening CI time break lenet.eval() eval_acc_top1_list = [] for batch_id, data in enumerate(test_reader()): x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array( [x[1] for x in data]).astype('int64').reshape(-1, 1) img = fluid.dygraph.to_variable(x_data) label = fluid.dygraph.to_variable(y_data) out = lenet(img) acc_top1 = fluid.layers.accuracy( input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy( input=out, label=label, k=5) if batch_id % 100 == 0: eval_acc_top1_list.append(float(acc_top1.numpy())) _logger.info( "Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}". format(epoch, batch_id, acc_top1.numpy(), acc_top5.numpy())) # check eval acc eval_acc_top1 = sum(eval_acc_top1_list) / len( eval_acc_top1_list) print('eval_acc_top1', eval_acc_top1) self.assertTrue( eval_acc_top1 > 0.9, msg="The test acc {%f} is less than 0.9." % eval_acc_top1) # test the correctness of `paddle.jit.save` data = next(test_reader()) test_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array( [x[1] for x in data]).astype('int64').reshape(-1, 1) test_img = fluid.dygraph.to_variable(test_data) label = fluid.dygraph.to_variable(y_data) lenet.eval() fp32_out = lenet(test_img) fp32_acc = fluid.layers.accuracy(fp32_out, label).numpy() with tempfile.TemporaryDirectory(prefix="qat_save_path_") as tmpdir: # save inference quantized model imperative_qat.save_quantized_model( layer=lenet, path=os.path.join(tmpdir, "lenet"), input_spec=[ paddle.static.InputSpec( shape=[None, 1, 28, 28], dtype='float32') ], onnx_format=self.onnx_format) print('Quantized model saved in %s' % tmpdir) if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) else: place = core.CPUPlace() exe = fluid.Executor(place) [inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model( dirname=tmpdir, executor=exe, model_filename="lenet" + INFER_MODEL_SUFFIX, params_filename="lenet" + INFER_PARAMS_SUFFIX) quant_out, = exe.run(inference_program, feed={feed_target_names[0]: test_data}, fetch_list=fetch_targets) paddle.disable_static() quant_out = fluid.dygraph.to_variable(quant_out) quant_acc = fluid.layers.accuracy(quant_out, label).numpy() paddle.enable_static() delta_value = fp32_acc - quant_acc self.assertLess(delta_value, self.diff_threshold)
def test_qat_acc(self): def _build_static_lenet(main, startup, is_test=False, seed=1000): with fluid.unique_name.guard(): with fluid.program_guard(main, startup): main.random_seed = seed startup.random_seed = seed img = fluid.layers.data( name='image', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data( name='label', shape=[1], dtype='int64') prediction = StaticLenet(img) if not is_test: loss = fluid.layers.cross_entropy( input=prediction, label=label) avg_loss = fluid.layers.mean(loss) else: avg_loss = prediction return img, label, avg_loss reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=32, drop_last=True) weight_quantize_type = 'abs_max' activation_quant_type = 'moving_average_abs_max' param_init_map = {} seed = 1000 lr = 0.001 # imperative train _logger.info( "--------------------------dynamic graph qat--------------------------" ) imperative_qat = ImperativeQuantAware( weight_quantize_type=weight_quantize_type, activation_quantize_type=activation_quant_type, quantizable_layer_type=[ 'Conv2D', 'Linear', 'ReLU', 'LeakyReLU', 'ReLU6', 'Tanh', 'Swish' ]) with fluid.dygraph.guard(): np.random.seed(seed) fluid.default_main_program().random_seed = seed fluid.default_startup_program().random_seed = seed lenet = ImperativeLenet() fixed_state = {} for name, param in lenet.named_parameters(): p_shape = param.numpy().shape p_value = param.numpy() if name.endswith("bias"): value = np.zeros_like(p_value).astype('float32') else: value = np.random.normal( loc=0.0, scale=0.01, size=np.product(p_shape)).reshape( p_shape).astype('float32') fixed_state[name] = value param_init_map[param.name] = value lenet.set_dict(fixed_state) imperative_qat.quantize(lenet) adam = AdamOptimizer( learning_rate=lr, parameter_list=lenet.parameters()) dynamic_loss_rec = [] lenet.train() for batch_id, data in enumerate(reader()): x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array( [x[1] for x in data]).astype('int64').reshape(-1, 1) img = fluid.dygraph.to_variable(x_data) label = fluid.dygraph.to_variable(y_data) out = lenet(img) loss = fluid.layers.cross_entropy(out, label) avg_loss = fluid.layers.mean(loss) avg_loss.backward() adam.minimize(avg_loss) lenet.clear_gradients() dynamic_loss_rec.append(avg_loss.numpy()[0]) if batch_id % 100 == 0: _logger.info('{}: {}'.format('loss', avg_loss.numpy())) if batch_id > 500: break lenet.eval() paddle.jit.save( layer=lenet, path="./dynamic_mnist/model", input_spec=[ paddle.static.InputSpec( shape=[None, 1, 28, 28], dtype='float32') ]) # static graph train _logger.info( "--------------------------static graph qat--------------------------" ) static_loss_rec = [] if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) else: place = core.CPUPlace() exe = fluid.Executor(place) main = fluid.Program() infer = fluid.Program() startup = fluid.Program() static_img, static_label, static_loss = _build_static_lenet( main, startup, False, seed) infer_img, _, infer_pre = _build_static_lenet(infer, startup, True, seed) with fluid.unique_name.guard(): with fluid.program_guard(main, startup): opt = AdamOptimizer(learning_rate=lr) opt.minimize(static_loss) scope = core.Scope() with fluid.scope_guard(scope): exe.run(startup) for param in main.all_parameters(): param_tensor = scope.var(param.name).get_tensor() param_tensor.set(param_init_map[param.name], place) main_graph = IrGraph(core.Graph(main.desc), for_test=False) infer_graph = IrGraph(core.Graph(infer.desc), for_test=True) transform_pass = QuantizationTransformPass( scope=scope, place=place, activation_quantize_type=activation_quant_type, weight_quantize_type=weight_quantize_type, quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul']) add_quant_dequant_pass = AddQuantDequantPass( scope=scope, place=place, quantizable_op_type=[ 'relu', 'leaky_relu', 'relu6', 'tanh', 'swish' ]) transform_pass.apply(main_graph) transform_pass.apply(infer_graph) add_quant_dequant_pass.apply(main_graph) add_quant_dequant_pass.apply(infer_graph) build_strategy = fluid.BuildStrategy() build_strategy.fuse_all_reduce_ops = False binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel( loss_name=static_loss.name, build_strategy=build_strategy) feeder = fluid.DataFeeder( feed_list=[static_img, static_label], place=place) with fluid.scope_guard(scope): for batch_id, data in enumerate(reader()): loss_v, = exe.run(binary, feed=feeder.feed(data), fetch_list=[static_loss]) static_loss_rec.append(loss_v[0]) if batch_id % 100 == 0: _logger.info('{}: {}'.format('loss', loss_v)) save_program = infer_graph.to_program() with fluid.scope_guard(scope): fluid.io.save_inference_model("./static_mnist", [infer_img.name], [infer_pre], exe, save_program) rtol = 1e-08 atol = 1e-10 for i, (loss_d, loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)): diff = np.abs(loss_d - loss_s) if diff > (atol + rtol * np.abs(loss_s)): _logger.info( "diff({}) at {}, dynamic loss = {}, static loss = {}". format(diff, i, loss_d, loss_s)) break self.assertTrue( np.allclose( np.array(dynamic_loss_rec), np.array(static_loss_rec), rtol=rtol, atol=atol, equal_nan=True), msg='Failed to do the imperative qat.')
def test_save_quantized_model(self): weight_quantize_type = 'abs_max' activation_quantize_type = 'moving_average_abs_max' load_param_path = "test_save_quantized_model/lenet.pdparams" path = "./dynamic_outscale_infer_model_from_checkpoint/lenet" dynamic_model_save_dir = "./dynamic_outscale_infer_model_from_checkpoint" static_model_save_dir = "./static_outscale_infer_model" imperative_out_scale = ImperativeQuantAware( weight_quantize_type=weight_quantize_type, activation_quantize_type=activation_quantize_type) with fluid.dygraph.guard(): lenet = ImperativeLenet() load_dict = paddle.load(load_param_path) imperative_out_scale.quantize(lenet) lenet.set_dict(load_dict) imperative_out_scale.save_quantized_model( layer=lenet, path=path, input_spec=[ paddle.static.InputSpec(shape=[None, 1, 28, 28], dtype='float32') ]) if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) else: place = core.CPUPlace() exe = fluid.Executor(place) # load dynamic model [dynamic_inference_program, feed_target_names, fetch_targets] = (fluid.io.load_inference_model( dirname=dynamic_model_save_dir, executor=exe, model_filename="lenet" + INFER_MODEL_SUFFIX, params_filename="lenet" + INFER_PARAMS_SUFFIX)) # load static model [static_inference_program, feed_target_names, fetch_targets] = (fluid.io.load_inference_model( dirname=static_model_save_dir, executor=exe, model_filename="lenet" + INFER_MODEL_SUFFIX, params_filename="lenet" + INFER_PARAMS_SUFFIX)) dynamic_ops = dynamic_inference_program.global_block().ops static_ops = static_inference_program.global_block().ops for op in dynamic_ops[:]: if op.type == "flatten2" or 'fake' in op.type: dynamic_ops.remove(op) for op in static_ops[:]: if 'fake' in op.type: static_ops.remove(op) op_count = 0 for i in range(len(dynamic_ops)): if dynamic_ops[i].has_attr("out_threshold"): op_count += 1 self.assertTrue(dynamic_ops[i].type == static_ops[i].type) self.assertTrue(dynamic_ops[i].attr("out_threshold") == static_ops[i].attr("out_threshold")) _logger.info("op_cout: {}".format(op_count)) self.assertTrue(op_count == 14)