def _append_ops(self, block): op_proto = OpProtoHolder.instance().get_op_proto(self.op_type) "infer datatype from inputs and outputs for this test case" self.infer_dtype_from_inputs_outputs(self.inputs, self.outputs) inputs = append_input_output(block, op_proto, self.inputs, True, self.dtype) outputs = append_input_output(block, op_proto, self.outputs, False, self.dtype) if hasattr(self, "cache_name_list"): for name in self.cache_name_list: inputs[name] = block.create_var(name=name, persistable=True, type=core.VarDesc.VarType.RAW, stop_gradient=True) op = block.append_op( type=self.op_type, inputs=inputs, outputs=outputs, attrs=self.attrs if hasattr(self, "attrs") else dict()) # infer variable type and infer shape in compile-time op.desc.infer_var_type(block.desc) op.desc.infer_shape(block.desc) return op
def _calc_output(self, place): op_proto = OpProtoHolder.instance().get_op_proto(self.op_type) program = Program() block = program.global_block() inputs = append_input_output(block, op_proto, self.inputs, True) outputs = append_input_output(block, op_proto, self.outputs, False) op = block.append_op( type=self.op_type, inputs=inputs, outputs=outputs, attrs=self.attrs if hasattr(self, "attrs") else dict()) # infer variable type and infer shape in compile-time op.desc.infer_var_type(block.desc) op.desc.infer_shape(block.desc) fetch_list = [] for var_name, var in outputs.iteritems(): if var_name in self.outputs: if isinstance(var, list): for v in var: fetch_list.append(v) else: fetch_list.append(var) feed_map = self.feed_var(inputs, place) exe = Executor(place) outs = exe.run(program, feed=feed_map, fetch_list=fetch_list, return_numpy=False) return outs, fetch_list
def _append_ops(self, block): op_proto = OpProtoHolder.instance().get_op_proto(self.op_type) "infer datatype from inputs and outputs for this test case" self.infer_dtype_from_inputs_outputs(self.inputs, self.outputs) inputs = append_input_output(block, op_proto, self.inputs, True, self.dtype) outputs = append_input_output(block, op_proto, self.outputs, False, self.dtype) op = block.append_op( type=self.op_type, inputs=inputs, outputs=outputs, attrs=self.attrs if hasattr(self, "attrs") else dict()) # infer variable type and infer shape in compile-time op.desc.infer_var_type(block.desc) op.desc.infer_shape(block.desc)
def _remove_no_need_ops(auto_parallel_main_prog, dist_context, rank_id): """Remove no need ops in the main program""" not_remove_op_ref = [ "create_py_reader", "create_double_buffer_reader", "read" ] remove_op_idx = [] block = auto_parallel_main_prog.global_block() ops = block.ops vars = block.vars for idx, op in enumerate(ops): # handle read op in the pipeline scene specially, it will be removed in the future. if op.type == "read": dim_list = [] for var_name in op.output_arg_names: dim_list.extend(vars[var_name].shape) for i in range(idx, -1, -1): if ops[i].type == "create_py_reader": ops[i]._set_attr("shape_concat", dim_list) break continue # replace the input and output of c_sync_comm_stream op when in pipeline scene. if op.type == "c_sync_comm_stream": need_save = [] for var_name in op.input_arg_names: process_mesh = dist_context.get_tensor_dist_attr_for_program( vars[var_name]).process_mesh if rank_id in process_mesh.processes: need_save.append(var_name) if not need_save: remove_op_idx.append(idx) continue proto = OpProtoHolder.instance().get_op_proto(op.type) op.desc.set_input(proto.inputs[0].name, need_save) op.desc.set_output(proto.outputs[0].name, need_save) continue # judge the other op whether should be removed. op_dist_attr = dist_context.get_op_dist_attr_for_program(op) if op_dist_attr is not None: op_process_mesh = op_dist_attr.process_mesh if rank_id not in op_process_mesh.processes and op.type not in not_remove_op_ref: remove_op_idx.append(idx) for idx in remove_op_idx[::-1]: block._remove_op(idx)
import os import numpy as np import paddle import paddle.fluid as fluid from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.framework import OpProtoHolder # export LD_LIBRARY_PATH=/work/scripts/custom_op/new:$( python3.7 -c 'import paddle; print(paddle.sysconfig.get_lib())'):$LD_LIBRARY_PATH paddle.disable_static() fluid.core.load_custom_op('relu2_op.so') OpProtoHolder.instance().update_op_proto() op_proto = OpProtoHolder.instance().get_op_proto("relu2") print(op_proto) def relu2(x, name=None): # relu2的type和在OP中定义的type相同 helper = LayerHelper("relu2", **locals()) # 创建输出Variable out = helper.create_variable_for_type_inference(dtype=x.dtype) helper.append_op(type="relu2", inputs={"X0": [x]}, outputs={"Out": out}) return out paddle.disable_static() paddle.set_device('cpu')
def remove_no_need_in_startup(auto_parallel_main_prog, auto_parallel_startup_prog): """Remove no need vars and ops in the startup program.""" main_input_vars = set() main_ops = auto_parallel_main_prog.global_block().ops for op in main_ops: for var_name in op.input_arg_names: main_input_vars.add(var_name) startup_block = auto_parallel_startup_prog.global_block() startup_output_vars = set() startup_ops = startup_block.ops for op in startup_ops: # skip c_sync_comm_stream op if op.type == "c_sync_comm_stream": continue for var_name in op.output_arg_names: startup_output_vars.add(var_name) need_vars = set() for var_name in startup_output_vars: if var_name in main_input_vars: need_vars.add(var_name) startup_ops = startup_block.ops actual_need_vars = set() for idx, op in enumerate(startup_ops): is_need_op = False if op.type == "c_sync_comm_stream": continue for var_name in op.output_arg_names: if var_name in need_vars: is_need_op = True break if is_need_op: for var_name in op.output_arg_names: actual_need_vars.add(var_name) for var_name in op.input_arg_names: actual_need_vars.add(var_name) remove_vars = set() for var_name in startup_block.vars: if var_name not in actual_need_vars: remove_vars.add(var_name) for var in remove_vars: startup_block._remove_var(var) remove_op_idx = [] vars = startup_block.vars for idx, op in enumerate(startup_block.ops): is_no_need_op = False if op.type == "c_sync_comm_stream": var_names = [] for var_name in op.input_arg_names: if var_name in vars: var_names.append(var_name) if not var_names: remove_op_idx.append(idx) else: proto = OpProtoHolder.instance().get_op_proto(op.type) op.desc.set_input(proto.inputs[0].name, var_names) op.desc.set_output(proto.outputs[0].name, var_names) continue for var_name in op.output_arg_names: if var_name not in vars: is_no_need_op = True break if is_no_need_op: remove_op_idx.append(idx) for idx in remove_op_idx[::-1]: startup_block._remove_op(idx)
def eval_fluid_op(self, no_check_set, return_numpy): """Run a Paddle program only with the op to test. Returns the output values after running. """ op_proto = OpProtoHolder.instance().get_op_proto(self.op_type) # Create a new paddle scope and program. place = core.CPUPlace() exe = Executor(place) scope = core.Scope() with scope_guard(scope): program = Program() self.block = program.global_block() # A list of inputs and outputs used by the op # that need to persisted in the global block. persistable = self.persistable if hasattr(self, "persistable") else [] # Add input and output variables to the global block. inputs = self.append_input_output(self.block, op_proto, self.inputs, persistable, True) outputs = self.append_input_output(self.block, op_proto, self.outputs, persistable, False) # Append the op. self.op = self.block.append_op( type=self.op_type, inputs=inputs, outputs=outputs, attrs=self.attrs if hasattr(self, "attrs") else dict()) # Infer the var type and share of the op based on the block's # inputs and outputs. self.op.desc.infer_var_type(self.block.desc) self.op.desc.infer_shape(self.block.desc) # A list containing outputs that wouldn't be used as outputs # of ONNX node ignored_outputs = self.ignored_outputs if hasattr( self, "ignored_outputs") else [] # Construct a unique list of outputs to fetch. self.fetch_list = [] for var_name, var in outputs.items(): if var_name in no_check_set: continue if var_name in self.outputs and var_name not in ignored_outputs: if isinstance(var, list): for v in var: self.fetch_list.append(v) else: self.fetch_list.append(var) self.feed_map = self.feed_var(inputs, place) if return_numpy: outs = exe.run(program, feed=self.feed_map, fetch_list=self.fetch_list, return_numpy=True) else: start_time = time.time() outs = self.run_executor_return_tensor(exe, program) end_time = time.time() return outs
def __init__(self, op_name, is_show=True): self.place = fluid.CPUPlace() self.op_name = op_name self.op_proto = OpProtoHolder.instance().get_op_proto(self.op_name) if is_show: self.show()
def load_custom_op(so_name): fluid.core.load_custom_op(so_name) OpProtoHolder.instance().update_op_proto()