def quant_embedding(program, place, config=None, scope=None): """quantize lookup_table op parameters Args: program(paddle.static.Program): infer program scope(paddle.static.Scope, optional): Scope records the mapping between variable names and variables, similar to brackets in programming languages. Usually users can use `paddle.static.global_scope() <https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api_cn/executor_cn/global_scope_cn.html>`_ . When ``None`` will use `paddle.static.global_scope() <https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api_cn/executor_cn/global_scope_cn.html>`_. Default : ``None``. place(paddle.CPUPlace or paddle.CUDAPlace): This parameter represents the executor run on which device. config(dict, optional): config to quantize. The keys are 'quantize_op_types'. For op in quantize_op_types, you can define 'quantize_type', \ 'quantize_bits', 'dtype', 'threshold'. \ ``quantize_type`` is quantize type, supported types are ['abs_max'], default is "abs_max". ``quantize_bits`` supported bits are [8] and default is 8. ``dtype`` is quantize dtype, supported dtype are ['int8'], default is 'int8'. ``threshold`` is threshold to clip tensor before quant. When threshold is not set, \ tensor will not be clipped. Returns: None """ config = config or {} config = _merge_config(copy.deepcopy(_default_config), config) scope = paddle.static.global_scope() if scope is None else scope graph = IrGraph(core.Graph(program.desc), for_test=True) quantize_params_map = {} all_op = graph.all_op_nodes() for op in all_op: if op.inputs == [] and op.outputs == []: continue op_type = op.name() if op_type in config['quantize_op_types']: weight_name = op.input('W')[0] if weight_name in quantize_params_map.values(): continue embedding_node = graph._find_node_by_name(op.inputs, op.input('W')[0]) for op_node in embedding_node.outputs: if op_node.name() == 'fused_embedding_seq_pool': _split_embedding_seq_pool(graph, op_node) if config[op_type]['quantize_type'] == 'abs_max': _quant_embedding_abs_max(graph, scope, place, config[op_type], weight_name, embedding_node) elif config[op_type]['quantize_type'] == 'log': _quant_embedding_log(graph, scope, place, config[op_type], weight_name, embedding_node) quantize_params_map[weight_name] = _get_quant_var_name(weight_name) for op in all_op: if op.name() == 'fused_embedding_seq_pool': graph.safe_remove_nodes(op) return graph.to_program()
def test_graph_functions(self): main = fluid.Program() startup = fluid.Program() with fluid.program_guard(main, startup): loss = residual_block(2) opt = fluid.optimizer.Adam(learning_rate=0.001) opt.minimize(loss) graph = IrGraph(core.Graph(main.desc), for_test=False) marked_nodes = set() for op in graph.all_op_nodes(): if op.name().find('conv2d') > -1: marked_nodes.add(op) graph.draw('.', 'residual', marked_nodes) self.assertFalse(graph.has_circle()) self.assertEqual(graph.graph_num(), 1) nodes = graph.topology_sort() self.assertEqual(len(nodes), len(graph.all_op_nodes())) nodes_map = graph.build_adjacency_list() self.assertEqual(len(nodes_map), len(graph.all_op_nodes())) nodes_num = len(graph.all_nodes()) graph.safe_remove_nodes(marked_nodes) self.assertEqual(len(graph.all_nodes()), nodes_num - len(marked_nodes))
def graph_apis(self, use_cuda=False, for_ci=True): main = fluid.Program() startup = fluid.Program() with fluid.unique_name.guard(): with fluid.program_guard(main, startup): feeds, loss = conv_block() opt = fluid.optimizer.Adam(learning_rate=0.001) opt.minimize(loss) graph = IrGraph(core.Graph(main.desc), for_test=False) backup_graph = graph.clone() self.assertEqual(len(graph.all_nodes()), len(backup_graph.all_nodes())) build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = False build_strategy.enable_inplace = False origin_binary = fluid.CompiledProgram(graph.graph).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy) backup_binary = fluid.CompiledProgram( backup_graph.graph).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup) iters = 5 batch_size = 8 train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=batch_size) feeder = fluid.DataFeeder(feed_list=feeds, place=place) def _train(binary): for _ in range(iters): data = next(train_reader()) loss_v = exe.run(binary, feed=feeder.feed(data), fetch_list=[loss.name]) if not for_ci: print('{}: {}'.format('loss', loss_v)) _train(origin_binary) _train(backup_binary) checkponit_dir = "checkpoint_gpu" if use_cuda else "checkpoint_cpu" def _set_zero(var_name, scope, place): var = scope.find_var(var_name).get_tensor() var_array = np.zeros(var._get_dims()).astype("float32") var.set(var_array, place) sum_before = np.sum( np.array(fluid.global_scope().find_var('conv2d_1.w_0').get_tensor( ))) fluid.io._save_persistable_nodes(exe, checkponit_dir, graph) _set_zero('conv2d_1.w_0', fluid.global_scope(), place) set_after = np.sum( np.array(fluid.global_scope().find_var('conv2d_1.w_0').get_tensor( ))) self.assertEqual(set_after, 0) fluid.io._load_persistable_nodes(exe, checkponit_dir, graph) sum_after = np.sum( np.array(fluid.global_scope().find_var('conv2d_1.w_0').get_tensor( ))) self.assertEqual(sum_before, sum_after) marked_nodes = set() for op in graph.all_op_nodes(): if op.name().find('conv2d') > -1: marked_nodes.add(op) if not for_ci: graph.draw('.', 'residual', marked_nodes) backup_marked_nodes = set() for op in backup_graph.all_op_nodes(): if op.name().find('conv2d') > -1: backup_marked_nodes.add(op) backup_graph.draw('./origin', 'backup', backup_marked_nodes) self.assertFalse(graph.has_circle()) self.assertEqual(graph.graph_num(), 1) nodes = graph.topology_sort() self.assertEqual(len(nodes), len(graph.all_op_nodes())) nodes_map = graph.build_adjacency_list() self.assertEqual(len(nodes_map), len(graph.all_op_nodes())) nodes_num = len(graph.all_nodes()) graph.safe_remove_nodes(marked_nodes) self.assertEqual(len(graph.all_nodes()), nodes_num - len(marked_nodes))