def get_test_files(name): """Extract tar file and returns model path and input, output data""" tar_name = download(URLS.get(name), dirname=CURR_PATH.__str__()) # extract tar file tar_path = os.path.join(CURR_PATH, tar_name) tar = tarfile.open(tar_path.__str__(), "r:*") tar.extractall(path=CURR_PATH.__str__()) tar.close() data_dir = os.path.join(CURR_PATH, name) model_path = os.path.join(data_dir, 'model.onnx') inputs = [] outputs = [] # get test files for test_file in os.listdir(data_dir): case_dir = os.path.join(data_dir, test_file) # skip the non-dir files if not os.path.isdir(case_dir): continue input_file = os.path.join(case_dir, 'input_0.pb') input_tensor = TensorProto() with open(input_file, 'rb') as proto_file: input_tensor.ParseFromString(proto_file.read()) inputs.append(numpy_helper.to_array(input_tensor)) output_tensor = TensorProto() output_file = os.path.join(case_dir, 'output_0.pb') with open(output_file, 'rb') as proto_file: output_tensor.ParseFromString(proto_file.read()) outputs.append(numpy_helper.to_array(output_tensor)) return model_path, inputs, outputs
def test_make_tensor(self): # type: () -> None np_array = np.random.randn(2, 3).astype(np.float32) tensor = helper.make_tensor( name='test', data_type=TensorProto.FLOAT, dims=(2, 3), vals=np_array.reshape(6).tolist() ) self.assertEqual(tensor.name, 'test') np.testing.assert_equal(np_array, numpy_helper.to_array(tensor)) # use raw_data field to store the data tensor = helper.make_tensor( name='test', data_type=TensorProto.FLOAT, dims=(2, 3), vals=np_array.reshape(6).tobytes(), raw=True, ) np.testing.assert_equal(np_array, numpy_helper.to_array(tensor)) string_list = list(s.encode('ascii') for s in ['Amy', 'Billy', 'Cindy', 'David']) tensor = helper.make_tensor( name='test', data_type=TensorProto.STRING, dims=(2, 2), vals=string_list, raw=False ) self.assertEqual(string_list, list(tensor.string_data))
def _parse_array(self, tensor_proto): """Grab data in TensorProto and convert to numpy array.""" try: from onnx.numpy_helper import to_array except ImportError: raise ImportError("Onnx and protobuf need to be installed. " + "Instructions to install - https://github.com/onnx/onnx") if len(tuple(tensor_proto.dims)) > 0: np_array = to_array(tensor_proto).reshape(tuple(tensor_proto.dims)) else: # If onnx's params are scalar values without dims mentioned. np_array = np.array([to_array(tensor_proto)]) return nd.array(np_array)
def run_generated_test(model_file, data_dir, device='CPU'): model = onnx.load(model_file) input_num = len(glob.glob(os.path.join(data_dir, "input_*.pb"))) inputs = [] for i in range(input_num): inputs.append(numpy_helper.to_array(load_tensor_as_numpy_array( os.path.join(data_dir, "input_{}.pb".format(i))))) output_num = len(glob.glob(os.path.join(data_dir, "output_*.pb"))) outputs = [] for i in range(output_num): outputs.append(numpy_helper.to_array(load_tensor_as_numpy_array( os.path.join(data_dir, "output_{}.pb".format(i))))) prepared = c2.prepare(model, device=device) c2_outputs = prepared.run(inputs) assert_similar(outputs, c2_outputs)
def prepare( cls, model, # type: ModelProto device, # type: singa device **kwargs # type: Any ): # type: (...) -> Optional[BackendRep] """ Args: model: onnx model proto device: singa device Return: SingaBackendRep instance """ super(SingaBackend, cls).prepare(model, device, **kwargs) name2tensor = {} for node in model.graph.node: if node.op_type == "Constant": data = helper.get_attribute_value(node.attribute[0]) requires_grad, stores_grad = True, True if len(node.attribute) == 3: requires_grad = helper.get_attribute_value( node.attribute[1] ) stores_grad = helper.get_attribute_value(node.attribute[2]) t = tensor.Tensor( device=device, data=numpy_helper.to_array(data), requires_grad=requires_grad, stores_grad=stores_grad, ) name2tensor[node.output[0]] = t return SingaBackendRep(model, device, name2tensor)
def _parse_array(self, tensor_proto): """Grab data in TensorProto and convert to numpy array.""" try: from onnx.numpy_helper import to_array except ImportError as e: raise ImportError("Unable to import onnx which is required {}".format(e)) np_array = to_array(tensor_proto).reshape(tuple(tensor_proto.dims)) return tvm.nd.array(np_array)
def _parse_array(self, tensor_proto): """Grab data in TensorProto and convert to numpy array.""" try: from onnx.numpy_helper import to_array except ImportError: raise ImportError("Onnx and protobuf need to be installed. " + "Instructions to install - https://github.com/onnx/onnx") np_array = to_array(tensor_proto).reshape(tuple(tensor_proto.dims)) return nd.array(np_array)
def test_fuse_bn_into_conv_simple(self): # type: () -> None for (tensor_type, np_type) in [(TensorProto.FLOAT, np.float32), (TensorProto.DOUBLE, np.float64)]: conv = helper.make_node("Conv", ["X", "W", "B"], ["Y"]) bn = helper.make_node("BatchNormalization", ["Y", "scale", "b", "mean", "var"], ["Z"]) W = np.random.randn(3, 2, 5, 5).astype(np_type) + 2 B = np.random.randn(3,).astype(np_type) + 2 scale = np.random.randn(3,).astype(np_type) + 2 b = np.random.randn(3,).astype(np_type) + 2 mean = np.random.randn(3,).astype(np_type) + 2 var = np.abs(np.random.randn(3,).astype(np_type)) + 2 initializers = [ helper.make_tensor(name, tensor_type, npa.shape, npa.tobytes(), raw=True) for name, npa in [('W', W), ('B', B), ('scale', scale), ('b', b), ('mean', mean), ('var', var)] ] graph = helper.make_graph( [conv, bn], "test", [helper.make_tensor_value_info("X", tensor_type, (5, 2, 28, 28)), helper.make_tensor_value_info("W", tensor_type, (3, 2, 5, 5)), helper.make_tensor_value_info("B", tensor_type, (3,)), helper.make_tensor_value_info("scale", tensor_type, (3,)), helper.make_tensor_value_info("b", tensor_type, (3,)), helper.make_tensor_value_info("mean", tensor_type, (3,)), helper.make_tensor_value_info("var", tensor_type, (3,))], [helper.make_tensor_value_info("Z", tensor_type, (3,))], initializer=initializers, value_info=[ helper.make_tensor_value_info("Y", tensor_type, (3,)) ] ) optimized_model = self._optimized(graph, ["fuse_bn_into_conv"]) self.assertEqual(len(optimized_model.graph.node), 1) self.assertEqual(optimized_model.graph.node[0].op_type, 'Conv') self.assertEqual(len(optimized_model.graph.initializer), 2) new_W = numpy_helper.to_array(optimized_model.graph.initializer[0]) new_b = numpy_helper.to_array(optimized_model.graph.initializer[1]) f = scale / np.sqrt(var + 1e-5) np.testing.assert_almost_equal((B - mean) * f + b, new_b) np.testing.assert_almost_equal(W * f[:, np.newaxis, np.newaxis, np.newaxis], new_W)
def _test_numpy_helper_int_type(self, dtype): # type: (np.number) -> None a = np.random.randint( np.iinfo(dtype).min, np.iinfo(dtype).max, dtype=dtype, size=(13, 37)) tensor_def = numpy_helper.from_array(a, "test") self.assertEqual(tensor_def.name, "test") a_recover = numpy_helper.to_array(tensor_def) np.testing.assert_equal(a, a_recover)
def get_model_input(model_dir): import onnx from onnx import numpy_helper model_inputs = [] for test_data_npz in glob.glob( os.path.join(model_dir, 'test_data_*.npz')): test_data = np.load(test_data_npz, encoding='bytes') model_inputs = list(test_data['inputs']) for test_data_dir in glob.glob( os.path.join(model_dir, "test_data_set*")): inputs_num = len(glob.glob(os.path.join(test_data_dir, 'input_*.pb'))) for i in range(inputs_num): input_file = os.path.join(test_data_dir, 'input_{}.pb'.format(i)) tensor = onnx.TensorProto() with open(input_file, 'rb') as f: tensor.ParseFromString(f.read()) model_inputs.append(numpy_helper.to_array(tensor)) return model_inputs
def getInputNodes(self, nodeName): """Get names of nodes that are inputs to the given node Args: nodeName (str): Name of node saveConstant (bool): If true, save constant variables to self.constantMap Returns: (list of str): Names of nodes that are inputs to the given node :meta private: """ node = self.getNode(nodeName) inNodes = [] for inp in node.input: if len([nde for nde in self.graph.node if inp in nde.output]): inNodes += [inp] elif len( [nde for nde in self.graph.initializer if nde.name == inp]): self.constantMap[inp] = [ numpy_helper.to_array(init) for init in self.graph.initializer if init.name == inp ][0] return inNodes
def fuse_add_bias_skip_layer_norm(self): input_name_to_nodes = self.input_name_to_nodes() output_name_to_node = self.output_name_to_node() nodes_to_remove = [] nodes_to_add = [] skip_layer_norm_nodes = self.get_nodes_by_op_type( "SkipLayerNormalization") for node in skip_layer_norm_nodes: if len(node.input) != 4: continue return_indice = [] nodes = self.match_parent_path(node, ['Add', 'MatMul'], [None, None], None, return_indice) if nodes is None: continue assert len(return_indice) == 2 add_input_index = return_indice[0] if add_input_index >= 2: continue (add, matmul) = nodes # bias should be one dimension bias_index = -1 for i, input in enumerate(add.input): initializer = self.get_initializer(input) if initializer is None: continue bias_index = i bias_weight = numpy_helper.to_array(initializer) break if bias_weight is None: logger.debug(f"Bias weight not found") continue if len(bias_weight.shape) != 1: logger.debug(f"Bias weight is not 1D") continue subgraph_nodes = [node, add] if not self.is_safe_to_fuse_nodes(subgraph_nodes, [node.output[0]], input_name_to_nodes, output_name_to_node): logger.debug( f"Skip fusing SkipLayerNormalization with Bias since it is not safe" ) continue nodes_to_remove.extend(subgraph_nodes) new_node = onnx.helper.make_node( "SkipLayerNormalization", inputs=[ node.input[1 - add_input_index], matmul.output[0], node.input[2], node.input[3], add.input[bias_index] ], outputs=node.output, name=self.create_node_name("SkipLayerNormalization", "SkipLayerNorm_AddBias_")) new_node.domain = "com.microsoft" nodes_to_add.append(new_node) if len(nodes_to_add) > 0: logger.info( f"Fused SkipLayerNormalization with Bias count:{len(nodes_to_add)}" ) self.remove_nodes(nodes_to_remove) self.add_nodes(nodes_to_add)
def fuse_reshape(self): nodes = self.nodes() input_name_to_nodes = self.input_name_to_nodes() output_name_to_node = self.output_name_to_node() nodes_to_remove = [] nodes_to_add = [] for reshape_node in self.get_nodes_by_op_type('Reshape'): if reshape_node.input[1] not in output_name_to_node: continue concat_node = output_name_to_node[reshape_node.input[1]] if concat_node.op_type != 'Concat' or len( concat_node.input) < 3 or len(concat_node.input) > 4: continue path0 = self.match_parent_path(concat_node, ['Unsqueeze', 'Gather', 'Shape'], [0, 0, 0], output_name_to_node) if path0 is None: continue (unsqueeze_0, gather_0, shape_0) = path0 path1 = self.match_parent_path(concat_node, ['Unsqueeze', 'Gather', 'Shape'], [1, 0, 0], output_name_to_node) if path1 is None: continue (unsqueeze_1, gather_1, shape_1) = path1 shape = [] gather_value = self.get_constant_value(gather_0.input[1]) if gather_value == 0: shape.append(0) gather_value = self.get_constant_value(gather_1.input[1]) if gather_value == 1: shape.append(0) if len(shape) != 2: continue path2 = [] path3 = [] shape_nodes = [shape_0, shape_1] if len(concat_node.input) == 3 and self.get_initializer( concat_node.input[2]) is None: path2 = self.match_parent_path( concat_node, ['Unsqueeze', 'Mul', 'Gather', 'Shape'], [2, 0, 0, 0], output_name_to_node) if path2 is None: path2 = self.match_parent_path( concat_node, ['Unsqueeze', 'Mul', 'Squeeze', 'Slice', 'Shape'], [2, 0, 0, 0, 0], output_name_to_node ) # GPT2 exported by PyTorch 1.4 with opset_version=11 if path2 is None: continue path3 = self.match_parent_path( concat_node, ['Unsqueeze', 'Mul', 'Gather', 'Shape'], [2, 0, 1, 0], output_name_to_node) if path3 is None: path3 = self.match_parent_path( concat_node, ['Unsqueeze', 'Mul', 'Squeeze', 'Slice', 'Shape'], [2, 0, 1, 0, 0], output_name_to_node ) # GPT2 exported by PyTorch 1.4 with opset_version=11 if path3 is None: continue shape_nodes.extend([path2[-1], path3[-1]]) shape.append(-1) elif (len(concat_node.input) > 2): concat_2 = self.get_initializer(concat_node.input[2]) if concat_2 is None: continue concat_value = numpy_helper.to_array(concat_2) if isinstance(concat_value, list): shape.extend(concat_value) else: shape.append(concat_value) if len(concat_node.input) == 4 and self.get_initializer( concat_node.input[3]) is None: if -1 in shape: continue path2 = self.match_parent_path( concat_node, ['Unsqueeze', 'Div', 'Gather', 'Shape'], [3, 0, 0, 0], output_name_to_node) if path2 is None: path2 = self.match_parent_path( concat_node, ['Unsqueeze', 'Div', 'Squeeze', 'Slice', 'Shape'], [3, 0, 0, 0, 0], output_name_to_node ) # GPT2 exported by PyTorch 1.4 with opset_version=11 if path2 is None: continue shape_nodes.extend([path2[-1]]) shape.append(-1) elif (len(concat_node.input) > 3): concat_3 = self.get_initializer(concat_node.input[3]) if concat_3 is None: continue concat_value = numpy_helper.to_array(concat_3) if isinstance(concat_value, list): shape.extend(concat_value) else: shape.append(concat_value) root_input = reshape_node.input[0] same_shape_input = True for shape_node in shape_nodes: if shape_node.input[0] != root_input: same_shape_input = False if not same_shape_input: continue shape_value = np.asarray(shape, dtype=np.int64) constant_shape_name = self.create_node_name( 'Constant', 'constant_shape') new_node = onnx.helper.make_node('Constant', inputs=[], outputs=[constant_shape_name], value=onnx.helper.make_tensor( name='const_tensor', data_type=TensorProto.INT64, dims=shape_value.shape, vals=shape_value)) reshape_node.input[1] = constant_shape_name reshape_node.name = self.create_node_name('Reshape', 'Reshape_Fuse') nodes_to_remove.extend([concat_node]) nodes_to_remove.extend(path0) nodes_to_remove.extend(path1) nodes_to_remove.extend(path2) nodes_to_remove.extend(path3) nodes_to_add.append(new_node) logger.info(f"Fused Reshape count:{len(nodes_to_add)}") self.remove_nodes(nodes_to_remove) self.add_nodes(nodes_to_add)
def convert_tensor(tensor): a = numpy_helper.to_array(tensor) a = a / max(abs(np.max(a)), abs(np.min(a))) * 127 a = a.astype(dest_np_type) tensor.CopyFrom(numpy_helper.from_array(a, name=tensor.name))
def test_attention_streamingmemory(tmpdir): np.random.seed(0XDEAD1337) batches_per_step = 5 batch_size = 8 hidden_size = 16 sequence_length = 8 attention_heads = 4 qkv_length = hidden_size / attention_heads input_shape = [batch_size * sequence_length, hidden_size] mask_shape = [batch_size, 1, 1, sequence_length] qkv_data = np.random.normal( 0, 0.02, [hidden_size, hidden_size * 3]).astype(np.float32) r = np.arange(0, sequence_length) r = np.reshape(batch_size * [r], mask_shape) masks = [] for i in range(batches_per_step): masks.append(np.less(r, i).astype(np.float32)) mask_data = (1 - np.stack(masks)) * -1000.0 input_data = np.random.normal(0, 0.02, [batches_per_step] + input_shape).astype(np.float32) def run_test(index, options): per_replica_batch_size = batch_size / options["replication"] model_input_shape = input_shape[:] model_input_shape[0] = int(model_input_shape[0] / options["replication"]) model_mask_shape = mask_shape[:] model_mask_shape[0] = int(model_mask_shape[0] / options["replication"]) stride = 2 // options["stages"] if "stride" in options and options["stride"]: stride = options["stride"] builder = popart.Builder(opsets={ "ai.onnx": 9, "ai.onnx.ml": 1, "ai.graphcore": 1 }) mask = builder.addInputTensor( popart.TensorInfo("FLOAT", model_mask_shape), "mask") x_in = builder.addInputTensor( popart.TensorInfo("FLOAT", model_input_shape), "x_in") anchors = {} x = x_in for i in range(options["numLayers"]): qkv = builder.addInitializedInputTensor(qkv_data, f"qkv_{i}") anchors[popart.reservedGradientPrefix() + qkv] = popart.AnchorReturnType("All") vgid = (i % options["stages"]) if options["phasedExecution"] else i with builder.virtualGraph(vgid), builder.executionPhase(i * stride): x = builder.aiOnnx.matmul([x, qkv]) x = attention_onnx(builder, x, mask, per_replica_batch_size, sequence_length, hidden_size, attention_heads, qkv_length) vgid = ((options["numLayers"] - 1) % options["stages"] ) if options["phasedExecution"] else options["numLayers"] - 1 with builder.virtualGraph(vgid), builder.executionPhase( (options["numLayers"] - 1) * stride): l1 = builder.aiGraphcore.l1loss([x], 0.2, popart.ReductionType.Sum) proto = builder.getModelProto() gradient_keys = list(anchors.keys()) anchors[x] = popart.AnchorReturnType("All") dataFlow = popart.DataFlow(batches_per_step, anchors) opts = popart.SessionOptions() opts.executionPhaseSettings.stages = options["stages"] opts.executionPhaseSettings.phases = ( options["numLayers"] * stride if options["phasedExecution"] else 0) opts.enableOutlining = options["outlining"] if "phaseSchedule" in options: opts.executionPhaseSettings.schedule = options["phaseSchedule"] # Phased execution currently does its own recompute annotations opts.autoRecomputation = (popart.RecomputationType.Standard if options["explicitRecomputation"] else popart.RecomputationType.NoRecompute) opts.outlineThreshold = -np.inf opts.enableOutliningCopyCostPruning = False opts.virtualGraphMode = (popart.VirtualGraphMode.ExecutionPhases if options["phasedExecution"] else popart.VirtualGraphMode.Manual) opts.explicitRecomputation = options["explicitRecomputation"] opts.aliasZeroCopy = options["aliasZeroCopy"] opts.batchSerializationSettings.factor = options["batchSerialize"] if "batchSchedule" in options: opts.batchSerializationSettings.batchSchedule = options[ "batchSchedule"] if "batchConcat" in options: # Do not concatenate the batch across phases and virtual graphs # (causes more, smalle transfers but allows for individual sub-batch # elements to be transferred) opts.batchSerializationSettings.concatOnVirtualGraphChange = options[ "batchConcat"] opts.batchSerializationSettings.concatOnExecutionPhaseChange = options[ "batchConcat"] # Wait with loading activations until they are required opts.executionPhaseSettings.activationIOSchedule = popart.ExecutionPhaseIOSchedule.OnDemand if "tensorLocationSettings" in options and options[ "tensorLocationSettings"]: opts.activationTensorLocationSettings = options[ "tensorLocationSettings"] opts.weightTensorLocationSettings = options[ "tensorLocationSettings"] opts.optimizerStateTensorLocationSettings = options[ "tensorLocationSettings"] opts.accumulatorTensorLocationSettings = options[ "tensorLocationSettings"] if "weightTensorLocationSettings" in options and options[ "weightTensorLocationSettings"]: opts.weightTensorLocationSettings = options[ "weightTensorLocationSettings"] if options["replication"] > 1: opts.replicatedGraphCount = options["replication"] opts.enableReplicatedGraphs = True if "ioTiles" in options: opts.numIOTiles = options["ioTiles"] pat = popart.Patterns(popart.PatternsLevel.Default) if options["phasedExecution"]: numIpus = options["stages"] else: numIpus = options["numLayers"] + 1 if options["replication"] > 1: numIpus = numIpus * options["replication"] device = tu.create_test_device(numIpus, pattern=popart.SyncPattern.Full) session = popart.TrainingSession(fnModel=proto, dataFlow=dataFlow, userOptions=opts, loss=l1, optimizer=popart.ConstSGD(0.1), patterns=pat, deviceInfo=device) session.prepareDevice() session.weightsFromHost() anchors = session.initAnchorArrays() for k, v in anchors.items(): print(f"anchor_before {k}={v.shape}") inputs = {x_in: input_data, mask: mask_data} stepio = popart.PyStepIO(inputs, anchors) for __ in range(10): session.run(stepio) session.modelToHost( str(tmpdir / f"streamingmemory_attention_{index}.onnx")) if options["replication"] > 1: for k, v in anchors.items(): if k in gradient_keys: # The gradient anchors will have an additional replication axis. anchors[k] = np.sum(v, 1 if batches_per_step > 1 else 0) else: # Output tensor needs reshaping. anchors[k] = np.reshape(anchors[k], [ batches_per_step, sequence_length * batch_size, hidden_size ]) for k, v in anchors.items(): print(f"anchor_after {k}={v.shape}") return anchors test_results = [] # AliasZeroCopy only supported with explicit recomputation, but not with # standard recomputation # Phased execution only supported with explicit recomputaton, but not with # standard recomputation test_variants = [] defaultOffChip = popart.TensorLocationSettings( location=popart.TensorLocation( storage=popart.TensorStorage.OffChip, loadTileSet=popart.TileSet.Compute, storageTileSet=popart.TileSet.Compute, replicatedTensorSharding=popart.ReplicatedTensorSharding.Off), minElementsForOffChip=0, minElementsForReplicatedTensorSharding=2) ioOffChip = popart.TensorLocationSettings( location=popart.TensorLocation( storage=popart.TensorStorage.OffChip, loadTileSet=popart.TileSet.IO, storageTileSet=popart.TileSet.IO, replicatedTensorSharding=popart.ReplicatedTensorSharding.Off), minElementsForOffChip=0, minElementsForReplicatedTensorSharding=2) # Ground truth variant test_variants.append({ "stages": 2, "numLayers": 3, "phasedExecution": False, "outlining": False, "explicitRecomputation": False, "aliasZeroCopy": False, "batchSerialize": 1, "replication": 1, }) test_variants.append({ "stages": 2, "numLayers": 3, "phasedExecution": False, "outlining": False, "explicitRecomputation": False, "aliasZeroCopy": False, "batchSerialize": 4, "replication": 1, }) test_variants.append({ "stages": 2, "numLayers": 3, "phasedExecution": True, "outlining": False, "explicitRecomputation": False, "aliasZeroCopy": False, "batchSerialize": 1, "replication": 1, "tensorLocationSettings": defaultOffChip, }) test_variants.append({ "stages": 2, "numLayers": 3, "phasedExecution": True, "outlining": True, "explicitRecomputation": False, "aliasZeroCopy": False, "batchSerialize": 1, "replication": 1, "tensorLocationSettings": defaultOffChip, }) test_variants.append({ "stages": 2, "numLayers": 3, "phasedExecution": True, "outlining": True, "explicitRecomputation": True, "aliasZeroCopy": False, "batchSerialize": 1, "replication": 1, "tensorLocationSettings": defaultOffChip, }) test_variants.append({ "stages": 2, "numLayers": 3, "phasedExecution": True, "outlining": True, "explicitRecomputation": True, "aliasZeroCopy": True, "batchSerialize": 1, "replication": 1, "tensorLocationSettings": defaultOffChip, }) # Test batch serialized single device per replica execution, where all # streaming memory traffic goes through IO tiles, and activations are # stored and loaded one-by-one test_variants.append({ "stages": 1, "stride": 4, "numLayers": 3, "phasedExecution": True, "outlining": True, "explicitRecomputation": True, "aliasZeroCopy": True, "batchSerialize": 4, "batchConcat": False, "replication": 2, "tensorLocationSettings": ioOffChip, "ioTiles": 192 }) # Test batch serialized single device per replica execution, where all # streaming memory traffic goes through IO tiles, and loading of the next # phase happens before storing the current phase test_variants.append({ "stages": 1, "stride": 1, "numLayers": 3, "phasedExecution": True, "phaseSchedule": popart.ExecutionPhaseSchedule.BatchClusteredIO, "outlining": False, "explicitRecomputation": True, "aliasZeroCopy": True, "batchSerialize": 4, "batchConcat": True, "replication": 2, "tensorLocationSettings": ioOffChip, "ioTiles": 192 }) # Test a variety of batch serialisation schedules. for batchSchedule in [ popart.BatchSerializationBatchSchedule.Scheduler, popart.BatchSerializationBatchSchedule.Isomorphic, popart.BatchSerializationBatchSchedule.OverlapOnIo, popart.BatchSerializationBatchSchedule.OverlapOnCompute, ]: test_variants.append({ "stages": 1, "stride": 4, "numLayers": 3, "phasedExecution": True, "outlining": False, "explicitRecomputation": True, "aliasZeroCopy": True, "batchSerialize": 4, "batchSchedule": batchSchedule, "batchConcat": False, "replication": 2, "tensorLocationSettings": ioOffChip, "ioTiles": 192 }) # Test replicated tensor sharding + on chip (no outlining). test_variants.append({ "stages": 2, "numLayers": 3, "phasedExecution": True, "outlining": False, "explicitRecomputation": False, "aliasZeroCopy": False, "batchSerialize": 1, "replication": 2, "tensorLocationSettings": defaultOffChip, "weightTensorLocationSettings": popart.TensorLocationSettings(location=popart.TensorLocation( storage=popart.TensorStorage.OnChip, loadTileSet=popart.TileSet.Compute, storageTileSet=popart.TileSet.Compute, replicatedTensorSharding=popart.ReplicatedTensorSharding.On), minElementsForOffChip=0, minElementsForReplicatedTensorSharding=2) }) # Test replicated tensor sharding + off chip (no outlining). test_variants.append({ "stages": 2, "numLayers": 3, "phasedExecution": True, "outlining": False, "explicitRecomputation": False, "aliasZeroCopy": False, "batchSerialize": 1, "replication": 2, "tensorLocationSettings": defaultOffChip, "weightTensorLocationSettings": popart.TensorLocationSettings(location=popart.TensorLocation( storage=popart.TensorStorage.OffChip, loadTileSet=popart.TileSet.Compute, storageTileSet=popart.TileSet.Compute, replicatedTensorSharding=popart.ReplicatedTensorSharding.On), minElementsForOffChip=0, minElementsForReplicatedTensorSharding=2) }) index = 0 for test_option in test_variants: print(f"Running {index}: {test_option}") test_results.append(run_test(index, test_option)) index += 1 gt_onnx = onnx.load(str(tmpdir / f"streamingmemory_attention_0.onnx")) for i in range(1, index): print(f"Testing run {i}: {test_variants[i]}") for key in test_results[0].keys(): assert np.all( np.isclose(test_results[0][key], test_results[i][key], equal_nan=False)) val_onnx = onnx.load( str(tmpdir / f"streamingmemory_attention_{i}.onnx")) for j in range(len(gt_onnx.graph.initializer)): print(f"Checking initializer {j}") gt = gt_onnx.graph.initializer[j] gt = numpy_helper.to_array(gt) val = val_onnx.graph.initializer[j] val = numpy_helper.to_array(val) assert np.allclose(gt, val, equal_nan=False)
def test_bool(self): a = np.random.randint(2, size=(13, 37)).astype(np.bool) tensor_def = numpy_helper.from_array(a, "test") self.assertEqual(tensor_def.name, "test") a_recover = numpy_helper.to_array(tensor_def) np.testing.assert_equal(a, a_recover)
#Load sample inputs and outputs test_data_dir = 'test_data_set' test_data_num = 3 import glob import os # Load inputs inputs = [] for i in range(test_data_num): input_file = os.path.join(test_data_dir + '_{}'.format(i), 'input_0.pb') tensor = onnx.TensorProto() with open(input_file, 'rb') as f: tensor.ParseFromString(f.read()) inputs.append(numpy_helper.to_array(tensor)) print('Loaded {} inputs successfully.'.format(test_data_num)) # Load reference outputs ref_outputs = [] for i in range(test_data_num): output_file = os.path.join(test_data_dir + '_{}'.format(i), 'output_0.pb') tensor = onnx.TensorProto() with open(output_file, 'rb') as f: tensor.ParseFromString(f.read()) ref_outputs.append(numpy_helper.to_array(tensor)) print('Loaded {} reference outputs successfully.'.format(test_data_num))
def convert_operations(onnx_model, batch_dim=0): """ Convert onnx model operations. Yields onnx's operator_id, opeartor_name and converted pytorch operator. Parameters ---------- onnx_model: onnx.ModelProto Loaded onnx model. batch_dim: int Usually 0 for computer vision models and 1 for NLP models. Returns ------- iterator: (op_id, op_name, op) """ weights = {tensor.name: tensor for tensor in onnx_model.graph.initializer} for i, node in enumerate(onnx_model.graph.node): # extract only useful inputs params = [ weights[par_name] for par_name in node.input if par_name in weights ] if node.op_type == "Conv": op = convert_layer(node, "Conv", params) elif node.op_type == "Relu": op = nn.ReLU(inplace=True) elif node.op_type == "LeakyRelu": op = nn.LeakyReLU(**extract_attributes(node), inplace=True) elif node.op_type == "Sigmoid": op = nn.Sigmoid() elif node.op_type == "MaxPool": op = convert_layer(node, "MaxPool") elif node.op_type == "AveragePool": op = convert_layer(node, "AvgPool") elif node.op_type == "Flatten": op = Flatten(**extract_attributes(node)) elif node.op_type == "Gemm": op = convert_linear_layer(node, params) op.feature_dim = batch_dim + 1 # Necessary for transformers elif node.op_type == "BatchNormalization": op = convert_batch_norm_layer(node, params=params) elif node.op_type == "InstanceNormalization": op = convert_instance_norm_layer(node, params=params) elif node.op_type == "Concat": op = Concat(**extract_attributes(node)) elif node.op_type == "Constant": # 常量OP如何解决的问题 op = value_wrapper( torch.from_numpy(extract_attributes(node)["constant"])) elif node.op_type == "Reshape": shape = list( filter(lambda x: x.name == node.input[1], onnx_model.graph.initializer)) shape = numpy_helper.to_array(shape[0]) if shape else None op = Reshape(tuple(shape)) elif node.op_type == "Shape": op = Shape() elif node.op_type == "Gather": op = Gather(**extract_attributes(node)) elif node.op_type == "Squeeze": op = Squeeze(**extract_attributes(node)) elif node.op_type == "Unsqueeze": op = partial(torch.unsqueeze, **extract_attributes(node)) elif node.op_type == "ConstantOfShape": op = ConstantOfShape(**extract_attributes(node)) elif node.op_type == "Slice": op = Slice(**extract_attributes(node)) elif node.op_type == "Cast": op = Cast(**extract_attributes(node)) elif node.op_type == "Where": op = Where() elif node.op_type == "Equal": op = torch.eq elif node.op_type == "Mul": op = Mul(**extract_attributes(node)) elif node.op_type == "Div": op = torch.true_divide elif node.op_type == "MatMul": if params: weight = torch.from_numpy(numpy_helper.to_array(params[0])) op = nn.Linear(weight.shape[0], weight.shape[1], bias=False) op.weight.data = weight.t() # check if next node Add to add bias next_node = onnx_model.graph.node[i + 1] next_params = [ weights[par_name] for par_name in next_node.input if par_name in weights ] if next_params and next_node.op_type == "Add": bias = torch.from_numpy( numpy_helper.to_array(next_params[0])) op.bias = nn.Parameter(bias) node.output.pop() node.output.extend(next_node.output) onnx_model.graph.node.pop(i + 1) # remove next node else: op = Matmul() elif node.op_type == "Sub": op = torch.sub elif node.op_type == "Pow": op = torch.pow elif node.op_type == "Sqrt": op = torch.sqrt elif node.op_type == "Softmax": op = nn.Softmax(dim=1) elif node.op_type == "Transpose": op = partial(torch.Tensor.permute, **extract_attributes(node)) elif node.op_type == "Split": kwargs = extract_attributes(node) # if the split_size_or_sections is not in node attributes, # the number_of_splits becomes the number of node outputs if "split_size_or_sections" not in kwargs: kwargs["number_of_splits"] = len(node.output) op = Split(**kwargs) elif node.op_type == "ReduceMean": kwargs = dict(keepdim=True) kwargs.update(extract_attributes(node)) op = partial(torch.mean, **kwargs) elif node.op_type == "Add": op = Add() elif node.op_type == "GlobalAveragePool": op = GlobalAveragePool() elif node.op_type == "ConvTranspose": op = convert_layer(node, "ConvTranspose", params) elif node.op_type == "Identity": op = nn.Identity() elif node.op_type == "Resize": op = Resize(**extract_attributes(node)) elif node.op_type == "Upsample": op = Upsample(**extract_attributes(node)) elif node.op_type == "OneHot": op = OneHot(**extract_attributes(node)) elif node.op_type == "Pad": op = Pad(**extract_attributes(node)) elif node.op_type == "Clip": op = Clamp(**extract_attributes(node)) elif node.op_type == "Tanh": op = torch.tanh elif node.op_type == "Erf": op = torch.erf elif node.op_type == "Log": op = torch.log elif node.op_type == "Exp": op = torch.exp elif node.op_type == "LRN": op = nn.LocalResponseNorm(**extract_attributes(node)) elif node.op_type == "Dropout": op = nn.Dropout(p=1.0) else: op = getattr(torch, node.op_type.lower(), None) if op is None: raise NotImplementedError( "Conversion not implemented for op_type={}.".format( node.op_type)) else: print("Automatic inference of operator: {}".format( node.op_type.lower())) op_name = "{}_{}".format(node.op_type, node.output[0]) op_id = node.output[0] yield op_id, op_name, op
def from_onnx(filename, value_dtypes=None, default_placeholder_dtype=dtype_list.int32, default_variable_dtype=dtype_list.int32, default_constant_dtype=dtype_list.int32, default_operator_dtype=dtype_list.int32, default_scale_dtype=dtype_list.int32, default_bias_dtype=dtype_list.int32, onnx_input_layout='NCHW', onnx_filter_layout='OIHW', disable_fusion=False): """ Convert ONNX model to NNgen model Parameters ---------- filename : str File name of ONNX model value_dtypes : dict dtype_info dictionary by name default_placeholder_dtype : nngen.dtype_info Default dtype for placeholder default_variable_dtype : nngen.dtype_info Default dtype for variable default_constant_dtype : nngen.dtype_info Default dtype for constant default_operator_dtype : nngen.dtype_info Default dtype for operator default_scale_dtype : nngen.dtype_info Default dtype for scale default_bias_dtype : nngen.dtype_info Default dtype for bias onnx_input_layout : str Layout of ONNX input values onnx_filter_layout : str Layout of ONNX filter (weight) values disable_fusion : bool Disable operator fusion Returns ------- outputs : collections.OrderedDict Dict of output values placeholders : collections.OrderedDict Dictionary of placeholders variables : collections.OrderedDict Dictionary of variables constants : collections.OrderedDict Dictionary of constants operators : collections.OrderedDict Dictionary of operators """ try: import onnx from onnx import numpy_helper except: raise ImportError('onnx is required.') if value_dtypes is None: value_dtypes = {} # load model model = onnx.load(filename) # input/output node dict input_nodes = collections.OrderedDict() output_nodes = collections.OrderedDict() for input_var in model.graph.input: input_nodes[input_var.name] = input_var for output_var in model.graph.output: output_nodes[output_var.name] = output_var # variable ndarray dict variable_values = collections.OrderedDict() for weight in model.graph.initializer: name = weight.name np_weight = numpy_helper.to_array(weight) variable_values[name] = np_weight # constant ndarray dict constant_values = collections.OrderedDict() for node in model.graph.node: if node.op_type == 'Constant': name = util.get_name(node) value = numpy_helper.to_array(node.attribute[0].t) constant_values[name] = value # placeholders placeholders = _to_placeholders(input_nodes, output_nodes, variable_values, constant_values, value_dtypes, default_placeholder_dtype, default_variable_dtype, default_constant_dtype, default_operator_dtype) # variables variables = _to_variables(input_nodes, output_nodes, variable_values, constant_values, value_dtypes, default_placeholder_dtype, default_variable_dtype, default_constant_dtype, default_operator_dtype) # constants # constants = _to_constants(input_nodes, output_nodes, # variable_values, constant_values, # value_dtypes, # default_placeholder_dtype, # default_variable_dtype, # default_constant_dtype, # default_operator_dtype) constants = constant_values # producer/consumer table producers = collections.defaultdict(list) consumers = collections.defaultdict(list) for node in model.graph.node: node_name = util.get_name(node) for arg in node.input: if arg not in producers[node_name]: producers[node_name].append(arg) if node_name not in consumers[arg]: consumers[arg].append(node_name) # operators operators = collections.OrderedDict() visitor = _OperatorVisitor(model, placeholders, variables, constants, operators, producers, consumers, value_dtypes, default_placeholder_dtype, default_variable_dtype, default_constant_dtype, default_operator_dtype, default_scale_dtype, default_bias_dtype, onnx_input_layout, onnx_filter_layout, disable_fusion) placeholders = visitor.placeholders variables = visitor.variables constants = visitor.constants operators = visitor.operators for name, output_node in output_nodes.items(): visitor.visit(name) # outputs outputs = collections.OrderedDict() for name, node in output_nodes.items(): if name in operators: outputs[name] = operators[name] elif name in placeholders: outputs[name] = placeholders[name] elif name in variables: outputs[name] = variables[name] elif name in constants: outputs[name] = constants[name] return outputs, placeholders, variables, constants, operators
def prepare_model(model): """ The constructor has produced a graph_def with the help of the functions graph_util.convert_variables_to_constants and graph_util.remove_training_nodes. translate() takes that graph_def, imports it, and translates it into two lists which then can be processed by an Optimzer object. Return ------ (operation_types, operation_resources) : (list, list) A tuple with two lists, the first one has items of type str and the second one of type dict. In the first list the operation types are stored (like "Add", "MatMul", etc.). In the second list we store the resources (matrices, biases, etc.) for those operations. It is organised as follows: operation_resources[i][domain] has the resources related to operation_types[i] when analyzed with domain (domain is currently either 'deepzono' or 'deeppoly', as of 8/30/18) """ shape_map = {} constants_map = {} output_node_map = {} input_node_map = {} for initial in model.graph.initializer: const = nchw_to_nhwc(numpy_helper.to_array(initial)).copy() constants_map[initial.name] = const shape_map[initial.name] = const.shape placeholdernames = [] for input in model.graph.input: placeholdernames.append(input.name) if input.name not in shape_map: shape_map[input.name] = onnxshape_to_intlist( input.type.tensor_type.shape) input_node_map[input.name] = input for node in model.graph.node: #print(node) output_node_map[node.output[0]] = node for input in node.input: input_node_map[input] = node if node.op_type == "Constant": const = node.attribute const = nchw_to_nhwc(numpy_helper.to_array(const[0].t)) constants_map[node.output[0]] = const shape_map[node.output[0]] = const.shape elif node.op_type in ["MatMul", "Gemm"]: transA = 0 transB = 0 for attribute in node.attribute: if 'transA' == attribute.name: transA = attribute.i elif 'transB' == attribute.name: transB = attribute.i M = shape_map[node.input[0]][transA] if len(shape_map[node.input[1]]) == 1 and transB == 0: N = 1 else: N = shape_map[node.input[1]][1 - transB] shape_map[node.output[0]] = [M, N] elif node.op_type in ["Add", "Sub", "Mul"]: shape_map[node.output[0]] = shape_map[node.input[0]] if node.input[0] in constants_map and node.input[ 1] in constants_map: if node.op_type == "Add": result = np.add(constants_map[node.input[0]], constants_map[node.input[1]]) elif node.op_type == "Sub": result = np.subtract(constants_map[node.input[0]], constants_map[node.input[1]]) elif node.op_type == "Mul": result = np.multiply(constants_map[node.input[0]], constants_map[node.input[1]]) constants_map[node.output[0]] = result elif node.op_type in ["Conv", "MaxPool", "AveragePool"]: output_shape = [] input_shape = shape_map[node.input[0]] require_kernel_shape = node.op_type in ["MaxPool", "AveragePool"] if not require_kernel_shape: filter_shape = shape_map[node.input[1]] kernel_shape = filter_shape[1:-1] strides = [1, 1] padding = [0, 0, 0, 0] auto_pad = 'NOTSET' dilations = [1, 1] group = 1 ceil_mode = 0 for attribute in node.attribute: if attribute.name == 'strides': strides = attribute.ints elif attribute.name == 'pads': padding = attribute.ints elif attribute.name == 'auto_pad': auto_pad = attribute.s elif attribute.name == 'kernel_shape': kernel_shape = attribute.ints elif attribute.name == 'dilations': dilations = attribute.ints elif attribute.name == 'group': group = attribute.i elif attribute.name == 'ceil_mode': ceil_mode = attribute.i effective_kernel_shape = [(kernel_shape[i] - 1) * dilations[i] + 1 for i in range(len(kernel_shape))] output_shape.append(input_shape[0]) for i in range(len(kernel_shape)): effective_input_size = input_shape[1 + i] effective_input_size += padding[i] effective_input_size += padding[i + len(kernel_shape)] if ceil_mode == 1: strided_kernel_positions = int( np.ceil( (effective_input_size - effective_kernel_shape[i]) / float(strides[i]))) else: strided_kernel_positions = int( np.floor( (effective_input_size - effective_kernel_shape[i]) / strides[i])) output_shape.append(1 + strided_kernel_positions) if require_kernel_shape: output_shape.append(input_shape[1]) else: output_shape.append(filter_shape[0]) shape_map[node.output[0]] = output_shape elif node.op_type in ["Relu", "Sigmoid", "Tanh", "Softmax"]: shape_map[node.output[0]] = shape_map[node.input[0]] # Gather is for the moment solely for shapes elif node.op_type == "Gather": axis = 0 for attribute in node.attribute: axis = attribute.i if node.input[0] in constants_map and node.input[ 1] in constants_map: data = constants_map[node.input[0]] indexes = constants_map[node.input[1]] constants_map[node.output[0]] = np.take(data, indexes, axis) if node.input[0] in shape_map and node.input[1] in shape_map: r = len(shape_map[node.input[0]]) q = len(shape_map[node.input[1]]) out_rank = q + r - 1 if out_rank == 0: shape_map[node.output[0]] = shape_map[node.input[1]] else: output_shape = [] for i in range(out_rank): if i < axis: output_shape.append( shape_map[node.input[0]][i]) # i < axis < r elif i >= axis and i < axis + q: output_shape.append( shape_map[node.input[0]][i - axis]) # i - axis < q else: output_shape.append(shape_map[node.input[0]][ i - q + 1]) # i < out_rank < q + r - 1 shape_map[node.output[0]] = output_shape elif node.op_type == "Shape": if node.input[0] in shape_map: constants_map[node.output[0]] = shape_map[node.input[0]] shape_map[node.output[0]] = [len(shape_map[node.input[0]])] elif node.op_type == "Reshape": if node.input[1] in constants_map: total = 1 replace_index = -1 for index in range(len(constants_map[node.input[1]])): if constants_map[node.input[1]][index] == -1: replace_index = index else: total *= constants_map[node.input[1]][index] if replace_index != -1: constants_map[node.input[1]][replace_index] = np.prod( shape_map[node.input[0]]) / total if len(constants_map[node.input[1]]) == 4: shape_map[node.output[0]] = [ constants_map[node.input[1]][0], constants_map[node.input[1]][2], constants_map[node.input[1]][3], constants_map[node.input[1]][1] ] else: shape_map[node.output[0]] = constants_map[node.input[1]] elif node.op_type == "Unsqueeze": if node.input[0] in shape_map: axis = node.attribute[0].ints output_shape = list(shape_map[node.input[0]]) if node.input[0] in constants_map: constants_map[node.output[0]] = constants_map[ node.input[0]] for i in axis: output_shape.insert(i, 1) if node.input[0] in constants_map: constants_map[node.output[0]] = np.expand_dims( constants_map[node.output[0]], axis=i) shape_map[node.output[0]] = output_shape elif node.op_type == "Concat": all_constant = True axis = node.attribute[0].i for input in node.input: if not input in constants_map: all_constant = False break if all_constant: constants_map[node.output[0]] = np.concatenate( [constants_map[input] for input in node.input], axis=axis) all_shape_known = True for input in node.input: if not input in shape_map: all_shape_known = False break if all_shape_known: new_axis_size = 0 for input in node.input: new_axis_size += shape_map[input][axis] shape_map[node.output[0]] = [ shape_map[node.input[0]][i] if i != axis else new_axis_size for i in range(len(shape_map[node.input[0]])) ] elif node.op_type == "Expand": if node.input[1] in constants_map: if len(constants_map[node.input[1]]) == 4: shape_map[node.output[0]] = [ constants_map[node.input[1]][0], constants_map[node.input[1]][2], constants_map[node.input[1]][3], constants_map[node.input[1]][1] ] else: shape_map[node.output[0]] = constants_map[node.input[1]] result = np.zeros( shape_map[node.output[0]]) + constants_map[node.input[0]] constants_map[node.output[0]] = result else: assert 0, "Operations of type " + node.op_type + " are not yet supported." #print('const_map') #print(constants_map) #print('shape_map') #print(shape_map) return shape_map, constants_map, output_node_map, input_node_map, placeholdernames
total_existing_data_set = 0 print('Verifying model {} with existing test data...'.format(onnx_model_name)) for f in glob.glob(os.path.join(onnx_model_dir, '*.npz')): test_data = np.load(f, encoding='bytes') inputs = list(test_data['inputs']) ref_outputs = list(test_data['outputs']) onnx_verify(onnx_model, inputs, ref_outputs) total_existing_data_set += 1 for f in glob.glob(os.path.join(onnx_model_dir, 'test_data_set*')): inputs = [] inputs_num = len(glob.glob(os.path.join(f, 'input_*.pb'))) for i in range(inputs_num): tensor = onnx.TensorProto() with open(os.path.join(f, 'input_{}.pb'.format(i)), 'rb') as pf: tensor.ParseFromString(pf.read()) inputs.append(numpy_helper.to_array(tensor)) ref_outputs = [] ref_outputs_num = len(glob.glob(os.path.join(f, 'output_*.pb'))) for i in range(ref_outputs_num): tensor = onnx.TensorProto() with open(os.path.join(f, 'output_{}.pb'.format(i)), 'rb') as pf: tensor.ParseFromString(pf.read()) ref_outputs.append(numpy_helper.to_array(tensor)) onnx_verify(onnx_model, inputs, ref_outputs) total_existing_data_set += 1 starting_index = 0 while os.path.exists(os.path.join(onnx_model_dir, 'test_data_set_{}'.format(starting_index))): starting_index += 1 if total_existing_data_set == 0 and add_test_data == 0:
def _asarray(proto): return jnp.asarray( numpy_helper.to_array(proto).reshape(tuple(proto.dims)))
def convert_model_float32_to_float16(self, cast_input_output=True): """Convert a graph to FLOAT16. By default, we will keep data types of inputs and outputs. For decoder model with past_key_values, it is recommended to set cast_input_output=False for better performance. Args: cast_input_output (bool, optional): keep data type of inputs and outputs, and add Cast nodes to convert float32 inputs to float16, and float16 to float32 for outputs. Defaults to True. """ from packaging.version import Version import onnxconverter_common as oc if Version(oc.__version__) > Version("1.7.0"): self.model = oc.float16.convert_float_to_float16(self.model, keep_io_types=cast_input_output) return graph = self.model.graph initializers = graph.initializer for initializer in initializers: if initializer.data_type == 1: initializer.CopyFrom( numpy_helper.from_array(numpy_helper.to_array(initializer).astype(np.float16), initializer.name)) for node in graph.node: if node.op_type in ['Constant', 'ConstantOfShape']: for att in node.attribute: if att.name == 'value' and att.t.data_type == 1: att.CopyFrom( helper.make_attribute( "value", numpy_helper.from_array(numpy_helper.to_array(att.t).astype(np.float16)))) if node.op_type == 'Cast': for att in node.attribute: if att.name == 'to' and att.i == 1: att.CopyFrom(helper.make_attribute("to", int(TensorProto.FLOAT16))) if not cast_input_output: self.change_input_output_float32_to_float16() return # Below assumes that we keep input and output data types. # Add Cast node to convert input from float32 to float16. for input_value_info in graph.input: if input_value_info.type.tensor_type.elem_type == TensorProto.FLOAT: initializer = self.get_initializer(input_value_info.name) if initializer is not None: # for compatibility for old converter/exporter input_value_info.type.tensor_type.elem_type = TensorProto.FLOAT16 else: cast_input = input_value_info.name cast_output = input_value_info.name + '_float16' self.replace_input_of_all_nodes(cast_input, cast_output) cast_node = helper.make_node('Cast', inputs=[cast_input], outputs=[cast_output]) cast_node.attribute.extend([helper.make_attribute("to", int(TensorProto.FLOAT16))]) self.add_node(cast_node) # Add Cast node to convert output from float16 back to float32. for output_value_info in graph.output: if output_value_info.type.tensor_type.elem_type == TensorProto.FLOAT: cast_input = output_value_info.name + '_float16' cast_output = output_value_info.name self.replace_output_of_all_nodes(cast_output, cast_input) self.replace_input_of_all_nodes(cast_output, cast_input) cast_node = helper.make_node('Cast', inputs=[cast_input], outputs=[cast_output]) cast_node.attribute.extend([helper.make_attribute("to", int(TensorProto.FLOAT))]) self.add_node(cast_node)
def create_attention_node(self, mask_index, q_matmul, k_matmul, v_matmul, q_add, k_add, v_add, input, output): q_weight = self.get_initializer(q_matmul.input[1]) k_weight = self.get_initializer(k_matmul.input[1]) v_weight = self.get_initializer(v_matmul.input[1]) q_bias = self.get_initializer(q_add.input[1]) k_bias = self.get_initializer(k_add.input[1]) v_bias = self.get_initializer(v_add.input[1]) qw = numpy_helper.to_array(q_weight) assert qw.shape == (self.hidden_size, self.hidden_size) kw = numpy_helper.to_array(k_weight) assert kw.shape == (self.hidden_size, self.hidden_size) vw = numpy_helper.to_array(v_weight) assert vw.shape == (self.hidden_size, self.hidden_size) qkv_weight = np.stack((qw, kw, vw), axis=-2) qb = numpy_helper.to_array(q_bias) assert qb.shape == (self.hidden_size, ) kb = numpy_helper.to_array(k_bias) assert kb.shape == (self.hidden_size, ) vb = numpy_helper.to_array(v_bias) assert vb.shape == (self.hidden_size, ) qkv_bias = np.stack((qb, kb, vb), axis=-2) attention_node_name = self.create_node_name('Attention') weight = onnx.helper.make_tensor( name=attention_node_name + '_qkv_weight', data_type=TensorProto.FLOAT, dims=[self.hidden_size, 3 * self.hidden_size], vals=qkv_weight.flatten().tolist()) self.add_initializer(weight) weight_input = onnx.helper.make_tensor_value_info( weight.name, TensorProto.FLOAT, [self.hidden_size, 3 * self.hidden_size]) self.add_input(weight_input) bias = onnx.helper.make_tensor(name=attention_node_name + '_qkv_bias', data_type=TensorProto.FLOAT, dims=[3 * self.hidden_size], vals=qkv_bias.flatten().tolist()) self.add_initializer(bias) bias_input = onnx.helper.make_tensor_value_info( bias.name, TensorProto.FLOAT, [3 * self.hidden_size]) self.add_input(bias_input) attention_node = onnx.helper.make_node( 'Attention', inputs=[ input, attention_node_name + '_qkv_weight', attention_node_name + '_qkv_bias', mask_index ], outputs=[output], name=attention_node_name) attention_node.domain = "com.microsoft" attention_node.attribute.extend( [onnx.helper.make_attribute("num_heads", self.num_heads)]) self.add_node(attention_node)
def _test_numpy_helper_float_type(self, dtype): # type: (np.number) -> None a = np.random.rand(13, 37).astype(dtype) tensor_def = numpy_helper.from_array(a, "test") self.assertEqual(tensor_def.name, "test") a_recover = numpy_helper.to_array(tensor_def) np.testing.assert_equal(a, a_recover)
def test_bool(self): # type: () -> None a = np.random.randint(2, size=(13, 37)).astype(np.bool) tensor_def = numpy_helper.from_array(a, "test") self.assertEqual(tensor_def.name, "test") a_recover = numpy_helper.to_array(tensor_def) np.testing.assert_equal(a, a_recover)
def fuse_embedding(self, node, output_name_to_node): assert node.op_type == 'LayerNormalization' logger.debug( f"start fusing embedding from node with output={node.output[0]}..." ) word_embed_path = self.match_parent_path(node, ['Add', 'Add', 'Gather'], [0, 0, 0], output_name_to_node) if word_embed_path is None: logger.debug("failed to match word_embed_path") return False skip_node, add_node, gather_node = word_embed_path word_initializer = self.get_initializer(gather_node.input[0]) if word_initializer is None: logger.debug("failed to get word initializer") return False temp = numpy_helper.to_array(word_initializer) if len(temp.shape) == 2: logger.info("Found word embedding. name:{}, shape:{}".format( word_initializer.name, temp.shape)) word_embedding = word_initializer.name else: logger.info( "Failed to find word embedding. name:{}, shape:{}".format( word_initializer.name, temp.shape)) return False pos_initializer = self.get_initializer(add_node.input[1]) if pos_initializer is not None: temp = numpy_helper.to_array(pos_initializer) if len(temp.shape) == 3 and temp.shape[0] == 1: tensor = numpy_helper.from_array( temp.reshape((temp.shape[1], temp.shape[2])), "position_embedding") self.add_initializer(tensor) logger.info( "Found position embedding. name:{}, shape:{}".format( pos_initializer.name, temp.shape[1:])) position_embedding = "position_embedding" else: logger.info( "Failed to find position embedding. name:{}, shape:{}". format(pos_initializer.name, temp.shape)) return False else: pos_embed_path = self.match_parent_path(add_node, ['Gather', 'Slice'], [1, 1], output_name_to_node) if pos_embed_path is None: logger.debug("failed to match pos_embed_path") return False pos_gather, pos_slice = pos_embed_path pos_initializer = self.get_initializer(pos_gather.input[0]) if pos_initializer is None: logger.debug("failed to get pos initializer") return False temp = numpy_helper.to_array(pos_initializer) if len(temp.shape) == 2: logger.info("Found word embedding. name:{}, shape:{}".format( pos_initializer.name, temp.shape)) position_embedding = pos_initializer.name else: logger.info( "Failed to find position embedding. name:{}, shape:{}". format(pos_initializer.name, temp.shape)) return False gather = self.get_parent(skip_node, 1, output_name_to_node) if gather is None or gather.op_type != "Gather": logger.debug("failed to get gather") return False segment_initializer = self.get_initializer(gather.input[0]) if segment_initializer is None: logger.debug("failed to get segment initializer") return False temp = numpy_helper.to_array(segment_initializer) if len(temp.shape) == 2: logger.info("Found segment embedding. name:{}, shape:{}".format( segment_initializer.name, temp.shape)) segment_embedding = segment_initializer.name else: logger.info( "Failed to find segment embedding. name:{}, shape:{}".format( segment_initializer.name, temp.shape)) return False logger.info("Create Embedding node") self.create_embedding_subgraph(node, word_embedding, segment_embedding, position_embedding) return True
def boxprop(box, model): resources = {} for initial in model.graph.initializer: const = numpy_helper.to_array(initial) resources[initial.name] = const ignoredNodes = { 'Constant', 'Reshape', 'Concat', 'Unsqueeze', 'Shape', 'Gather' } for node in model.graph.node: if node.op_type == 'Relu': # print('box.relu()') box.relu() elif node.op_type == 'Tanh': # print('box.tanh()') box.tanh() elif node.op_type == 'Conv': weight = None bias = None for inp in node.input: if inp.split('.')[-1] == 'weight': weight = resources[inp] if inp.split('.')[-1] == 'bias': bias = resources[inp] c_out = weight.shape[0] kernel_size = 1 stride = 1 padding = 0 for attr in node.attribute: if attr.name == 'kernel_shape': kernel_size = attr.ints[0] elif attr.name == 'strides': stride = attr.ints[0] elif attr.name == 'pads': padding = attr.ints[0] # print('box.conv2d','weight',c_out,kernel_size[0],stride, padding,'bias') box.conv2d(weight, c_out, kernel_size, stride, padding, bias) elif node.op_type == 'ConvTranspose': weight = None for inp in node.input: if inp.split('.')[-1] == 'weight': weight = resources[inp] c_out = weight.shape[1] kernel_size = [0, 0] stride = 1 padding = 0 for attr in node.attribute: if attr.name == 'kernel_shape': for i, ints in enumerate(attr.ints): kernel_size[i] = ints elif attr.name == 'strides': stride = attr.ints[0] elif attr.name == 'pads': padding = attr.ints[0] # print('box.convTranspose2d','weight', c_out, kernel_size, stride, padding) box.convTranspose2d(weight, c_out, kernel_size, stride, padding) elif node.op_type == 'BatchNormalization': mean = None var = None weight = None bias = None for inp in node.input: if inp.split('.')[-1] == 'weight': weight = resources[inp] elif inp.split('.')[-1] == 'bias': bias = resources[inp] elif inp.split('.')[-1] == 'running_mean': mean = resources[inp] elif inp.split('.')[-1] == 'running_var': var = resources[inp] eps = 0 for attr in node.attribute: if attr.name == 'epsilon': eps = attr.f # print('box.batchNorm2d','mean', 'var', eps, weight, bias) box.batchNorm2d(mean, var, eps, weight, bias) elif node.op_type == 'MaxPool': kernel_size = 1 for attr in node.attribute: if attr.name == 'kernel_shape': kernel_size = attr.ints[0] # print('box.maxpool2d',kernel_size) box.maxpool2d(kernel_size) elif node.op_type == 'Gemm': weight = None bias = None for inp in node.input: if inp.split('.')[-1] == 'weight': weight = resources[inp] if inp.split('.')[-1] == 'bias': bias = resources[inp] # print('box.linear','weight','bias') box.linear(weight, bias) elif node.op_type not in ignoredNodes: raise ValueError('Cannot handle layer of type ' + node.op_type)
def quantitize_graph(g, verbose=False): """Quantitize graph.""" new_weights = [] quantitized_weights = [] nodes = [] remap = {} remove = [] for i, w in enumerate(g.initializer): # only quantitize float32 if w.data_type != onnx_pb.TensorProto.FLOAT: continue w_np = numpy_helper.to_array(w) # only look at sizes >= 32 elements if w_np.size < 32: continue # weights we want to quantitize remove.append(i) name = w.name if verbose: logger.info("quantitizing %s", name) w_quant, zp, scale = eight_bit_quantitize(w_np) nw = numpy_helper.from_array(w_quant, name=name) if verbose: w_dequant = eight_bit_dequantitize(w_quant, zp, scale) rtol = np.abs(w_dequant - w_np) s = {} for j in [1.0, 5.0, 10.0, 20.0]: above_rtol = np.sum(rtol > np.abs(j * w_np / 100.)) / w_np.size s["> " + str(j) + "%"] = "{:.2f}".format(100. * above_rtol) logger.info("above_rtol: %s", str(s)) logger.info("raw: %s", stats(w_np)) logger.info("quant: %s", stats(w_dequant)) output_name = _compose_quantitize(nodes, new_weights, zp, scale, name) remap[name] = output_name quantitized_weights.append(nw) # few things to do to initializers and graph inputs: # 1. remove initializers that got quantitized for i in reversed(remove): del g.initializer[i] # 2. add quantitized to initializers g.initializer.extend(new_weights) g.initializer.extend(quantitized_weights) # 3. modify the type of weights that we quantitized modified = {w.name: w for w in quantitized_weights} new_inputs = [] remove = [] for i, inp in enumerate(g.input): w = modified.get(inp.name) if w is not None: new_inputs.append( helper.make_tensor_value_info(w.name, w.data_type, w.dims)) remove.append(i) for i in reversed(remove): del g.input[i] # 4. add new weights as inputs for w in new_weights: tv = helper.make_tensor_value_info(w.name, w.data_type, w.dims) new_inputs.append(tv) g.input.extend(new_inputs) # 5. rewrite consumers of the quantitized weights for node in g.node: for i, name in enumerate(node.input): new_name = remap.get(name) if new_name is not None: node.input[i] = new_name # 6. add composed nodes to graph, new nodes in the front nodes.extend(g.node) del g.node[:] g.node.extend(nodes) return g
import onnx from onnx import numpy_helper itensor = onnx.TensorProto() with open('input_0.pb', 'rb') as f: itensor.ParseFromString(f.read()) npitensor = numpy_helper.to_array(itensor) otensor = onnx.TensorProto() with open('output_0.pb', 'rb') as f: otensor.ParseFromString(f.read()) npotensor = numpy_helper.to_array(otensor) pass
# Preprocessing: create a Numpy array numpy_array = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=float) if LooseVersion(numpy.version.version) < LooseVersion('1.14'): print('Original Numpy array:\n{}\n'.format( numpy.array2string(numpy_array))) else: print('Original Numpy array:\n{}\n'.format( numpy.array2string(numpy_array, legacy='1.13'))) # Convert the Numpy array to a TensorProto tensor = numpy_helper.from_array(numpy_array) print('TensorProto:\n{}'.format(tensor)) # Convert the TensorProto to a Numpy array new_array = numpy_helper.to_array(tensor) if LooseVersion(numpy.version.version) < LooseVersion('1.14'): print('After round trip, Numpy array:\n{}\n'.format( numpy.array2string(numpy_array))) else: print('After round trip, Numpy array:\n{}\n'.format( numpy.array2string(numpy_array, legacy='1.13'))) # Save the TensorProto with open(os.path.join('resources', 'tensor.pb'), 'wb') as f: f.write(tensor.SerializeToString()) # Load the TensorProto new_tensor = onnx.TensorProto() with open(os.path.join('resources', 'tensor.pb'), 'rb') as f: new_tensor.ParseFromString(f.read())
def from_onnx(onnx_string_or_file): """ Constructs a CrypTen model or module from an ONNX Protobuf string or file. """ # if input is file, read string: if hasattr(onnx_string_or_file, "seek"): # input is file-like onnx_string_or_file.seek(0) onnx_model = onnx.load(onnx_string_or_file) else: onnx_model = onnx.load_model_from_string(onnx_string_or_file) # create dict of all parameters, inputs, and outputs: all_parameters = { t.name: torch.from_numpy(numpy_helper.to_array(t)) for t in onnx_model.graph.initializer } input_names = [input.name for input in onnx_model.graph.input] output_names = [output.name for output in onnx_model.graph.output] input_names = [ name for name in input_names if name not in all_parameters.keys() ] # parameters are not inputs assert len(input_names) == 1, "number of inputs should be 1" assert len(output_names) == 1, "number of outputs should be 1" # create graph by looping over nodes: crypten_model = Graph(input_names[0], output_names[0]) for node in onnx_model.graph.node: # retrieve inputs, outputs, attributes, and parameters for this node: node_output_name = list(node.output)[0] node_input_names = list(node.input) # includes parameters parameters = { get_parameter_name(name): all_parameters[name] for name in node_input_names if name in all_parameters and name not in input_names } # all the parameters for the current module node_input_names = [ name for name in node_input_names if get_parameter_name(name) not in parameters ] attributes = {attr.name: get_attribute_value(attr) for attr in node.attribute} # get operator type: if node.op_type == "Conv": dims = len(attributes["kernel_shape"]) if dims == 1: cls = Conv1d elif dims == 2: cls = Conv2d else: raise ValueError("CrypTen does not support op Conv%dd." % dims) else: if node.op_type not in ONNX_TO_CRYPTEN: raise ValueError("CrypTen does not support op %s." % node.op_type) cls = ONNX_TO_CRYPTEN[node.op_type] # add CrypTen module to graph: crypten_module = cls.from_onnx(parameters=parameters, attributes=attributes) crypten_model.add_module(node_output_name, crypten_module, node_input_names) # return model (or module when there is only one module): num_modules = len(list(crypten_model.modules())) if num_modules == 1: for crypten_module in crypten_model.modules(): return crypten_module else: return crypten_model
name = 'var' + name return name data_dir = os.path.dirname(sys.argv[1]) input_names = sys.argv[2].split(',') output_names = sys.argv[3].split(',') squeeze_data = len(sys.argv) > 4 # Load inputs inputs = [] for fn in glob(os.path.join(data_dir, 'input_*.pb')): tensor = onnx.TensorProto() with open(fn, 'rb') as f: tensor.ParseFromString(f.read()) tensor = numpy_helper.to_array(tensor) while squeeze_data and tensor.ndim > 4 and tensor.shape[0] == 1: tensor = tensor.squeeze(0) inputs.append(tensor) # Load outputs outputs = [] for fn in glob(os.path.join(data_dir, 'output_*.pb')): tensor = onnx.TensorProto() with open(fn, 'rb') as f: tensor.ParseFromString(f.read()) tensor = numpy_helper.to_array(tensor) while squeeze_data and tensor.ndim > 2 and tensor.shape[0] == 1: tensor = tensor.squeeze(0) outputs.append(tensor)
def test_onnx_model(model_name, round_trip): if model_name in skip_model_names and not round_trip: pytest.skip('Skip onnx model test. ') if model_name in skip_round_trip_model_names and round_trip: pytest.skip('Skip onnx model round trip test. ') model_dir = os.path.join(onnx_base_dir, model_name) model = C.Function.load(os.path.join(model_dir, 'model.onnx'), format=C.ModelFormat.ONNX) if round_trip: resave_model_path = 'model_resave.onnx' model.save(resave_model_path, format=C.ModelFormat.ONNX) model = C.Function.load(resave_model_path, format=C.ModelFormat.ONNX) data_dirs = [ os.path.join(model_dir, dir) for dir in os.listdir(model_dir) if os.path.isdir(os.path.join(model_dir, dir)) ] for data_dir in data_dirs: inputs = [] ref_outputs = [] tensor = onnx.TensorProto() input_filenames = [ filename for filename in os.listdir(data_dir) if input_filename_pattern.match(filename) ] input_files_sorted = [ os.path.join(data_dir, 'input_{:d}.pb'.format(i)) for i in range(len(input_filenames)) ] output_filenames = [ filename for filename in os.listdir(data_dir) if output_filename_pattern.match(filename) ] output_files_sorted = [ os.path.join(data_dir, 'output_{:d}.pb'.format(i)) for i in range(len(output_filenames)) ] for input_file in input_files_sorted: with open(input_file, 'rb') as f: tensor.ParseFromString(f.read()) inputs.append(numpy_helper.to_array(tensor)) for output_file in output_files_sorted: with open(output_file, 'rb') as f: tensor.ParseFromString(f.read()) ref_outputs.append(numpy_helper.to_array(tensor)) cntk_input = { model.arguments[i]: inputs[i] for i in range(len(inputs)) } cntk_res = [model.eval(cntk_input)] if ref_outputs[0].dtype == np.bool: cntk_res = [cntk_res[0].astype("bool")] outputs = list(cntk_res) np.testing.assert_equal(len(ref_outputs), len(outputs)) for i in range(len(outputs)): np.testing.assert_equal(ref_outputs[i].dtype, outputs[i].dtype) np.testing.assert_allclose(ref_outputs[i], outputs[i], rtol=1e-3, atol=1e-4)
def _main_predict(args) -> None: inputs = {} if args.pb_in: for path, input_name in zip(args.pb_in, args.pb_in_names): with open(path, 'rb') as fp: tensor = onnx.TensorProto() tensor.ParseFromString(fp.read()) print('Using {} as "{}" input'.format(path, input_name)) inputs[input_name] = numpy_helper.to_array(tensor) elif args.json_in: with open(args.json_in) as fp: obj = json.load(fp) for input_name, obj in obj.items(): arr = np.asarray(obj['values'], dtype=obj['type']) inputs[input_name] = arr else: raise NotImplementedError if args.verbose: print('Inputs:') for input_name, arr in inputs.items(): print(' {}: dtype={} shape={}'.format(input_name, arr.dtype, arr.shape)) enclave_signing_key = None if args.enclave_signing_key_file: with open(args.enclave_signing_key_file) as fp: enclave_signing_key = fp.read() if args.enclave_model_hash_file: with open(args.enclave_model_hash_file) as f: enclave_model_hash = f.read() else: enclave_model_hash = args.enclave_model_hash c = Client(url=args.url, auth=get_auth(args), enclave_signing_key=enclave_signing_key, enclave_hash=args.enclave_hash, enclave_model_hash=enclave_model_hash, enclave_allow_debug=args.enclave_allow_debug) try: outputs = c.predict(inputs) except Exception as e: if args.verbose: raise else: print(f'{C.FAIL}{C.BOLD}ERROR: {e}{C.END}') sys.exit(1) if args.verbose: print('Outputs:') for output_name, arr in outputs.items(): print(' {}: dtype={} shape={}'.format(output_name, arr.dtype, arr.shape)) if args.pb_out: os.makedirs(args.pb_out, exist_ok=True) for i, (output_name, arr) in enumerate(outputs.items()): filename = 'output_{}.pb'.format(i) print('Saving "{}" output as {}'.format(output_name, filename)) path = os.path.join(args.pb_out, filename) tensor = numpy_helper.from_array(arr, output_name) with open(path, 'wb') as fp: fp.write(tensor.SerializeToString()) if args.json_out: print('Saving inference results to {}'.format(args.json_out)) with open(args.json_out, 'w') as fp: json.dump(outputs, fp, cls=NumpyEncoder, sort_keys=True)
def from_onnx(graph): # type: (GraphProto) -> Graph input_tensors = { t.name: numpy_helper.to_array(t) for t in graph.initializer } nodes_ = [] nodes_by_input = {} # type: Dict[Text, List[Node]] nodes_by_output = {} for node in graph.node: node_ = Node.from_onnx(node) for input_ in node_.inputs: if input_ in input_tensors: node_.input_tensors[input_] = input_tensors[input_] else: if input_ in nodes_by_input: input_nodes = nodes_by_input[input_] else: input_nodes = [] nodes_by_input[input_] = input_nodes input_nodes.append(node_) for output_ in node_.outputs: nodes_by_output[output_] = node_ nodes_.append(node_) inputs = [] for i in graph.input: if i.name not in input_tensors: inputs.append(_input_from_onnx_input(i)) outputs = [] for o in graph.output: outputs.append(_input_from_onnx_input(o)) for node_ in nodes_: for input_ in node_.inputs: if input_ in nodes_by_output: node_.parents.append(nodes_by_output[input_]) for output_ in node_.outputs: if output_ in nodes_by_input: node_.children.extend(nodes_by_input[output_]) # Dictionary to hold the "value_info" field from ONNX graph shape_dict = {} # type: Dict[Text,Tuple[int,...]] def extract_value_info( shape_dict, # type: Dict[Text,Tuple[int,...]] value_info, # type: ValueInfoProto[...] ): # type: (...) -> None t = tuple([ int(dim.dim_value) for dim in value_info.type.tensor_type.shape.dim ]) if t: shape_dict[value_info.name] = t for value_info in graph.value_info: extract_value_info(shape_dict, value_info) for value_info in graph.input: extract_value_info(shape_dict, value_info) for value_info in graph.output: extract_value_info(shape_dict, value_info) return Graph(nodes_, inputs, outputs, shape_dict)
def fuse_attention(self): output_name_to_node = self.output_name_to_node() nodes_to_remove = [] attention_count = 0 start_nodes = [] skip_layer_norm_nodes = self.get_nodes_by_op_type( "SkipLayerNormalization") layer_norm_nodes = self.get_nodes_by_op_type("LayerNormalization") # Sometimes we can not fuse skiplayernormalization since the add before layernorm has an output that used by nodes outside skiplayernorm # Conceptually we treat add before layernorm as skiplayernorm node since they share the same pattern start_nodes.extend(skip_layer_norm_nodes) start_nodes.extend(layer_norm_nodes) for normalize_node in start_nodes: # SkipLayerNormalization has two inputs, and one of them is the root input for attention. if normalize_node.op_type == 'LayerNormalization': add_before_layernorm = self.match_parent( normalize_node, 'Add', 0) if add_before_layernorm is not None: normalize_node = add_before_layernorm else: continue parent = self.get_parent(normalize_node, 1) if parent is None or parent.op_type not in [ "SkipLayerNormalization", "LayerNormalization", "Reshape" ]: parent = self.get_parent(normalize_node, 0) if parent is None or parent.op_type not in [ "SkipLayerNormalization", "LayerNormalization", "Reshape" ]: logger.debug("Failed to match parent of normalize_node") continue qkv_nodes = self.match_parent_path( normalize_node, ['Add', 'MatMul', 'Reshape', 'Transpose', 'MatMul'], [0, 0, 0, 0, 0]) if qkv_nodes is None: qkv_nodes = self.match_parent_path( normalize_node, ['MatMul', 'Reshape', 'Transpose', 'MatMul'], [1, 0, 0, 0]) if qkv_nodes is None: qkv_nodes = self.match_parent_path( normalize_node, ['Add', 'Einsum', 'Einsum'], [0, 0, 0]) if qkv_nodes is None: logger.debug("Failed to match qkv nodes") continue matmul_qkv = qkv_nodes[-1] v_nodes = self.match_parent_path( matmul_qkv, ['Transpose', 'Reshape', 'Add', 'MatMul'], [1, 0, 0, 0]) if v_nodes is None: v_nodes = self.match_parent_path(matmul_qkv, ['Add', 'Einsum'], [1, 0]) if v_nodes is None: logger.debug("Failed to match v path") continue add_v = v_nodes[-2] matmul_v = v_nodes[-1] qk_nodes = self.match_parent_path( matmul_qkv, ['Softmax', 'Add', "Mul", 'MatMul'], [0, 0, 0, 0]) if qk_nodes is None: qk_nodes = self.match_parent_path(matmul_qkv, ['Softmax', 'Add', 'Einsum'], [0, 0, 0]) if qk_nodes is None: logger.debug("Failed to match qk_paths") continue matmul_qk = qk_nodes[-1] q_nodes = self.match_parent_path( matmul_qk, ['Transpose', 'Reshape', 'Add', 'MatMul'], [0, 0, 0, 0]) if q_nodes is None: q_nodes = self.match_parent_path(matmul_qk, ['Add', 'Einsum'], [0, 0]) if q_nodes is None: logger.debug("Failed to match q path") continue add_q = q_nodes[-2] matmul_q = q_nodes[-1] k_nodes = self.match_parent_path( matmul_qk, ['Transpose', 'Reshape', 'Add', 'MatMul'], [1, 0, 0, 0]) if k_nodes is None: k_nodes = self.match_parent_path(matmul_qk, ['Mul', 'Add', 'Einsum'], [1, 0, 0]) if k_nodes is None: logger.debug("Failed to match k path") continue add_k = k_nodes[-2] matmul_k = k_nodes[-1] mask_nodes = self.match_mask_path(qk_nodes[1]) if mask_nodes is None: logger.debug("Cannot find mask_nodes.") continue if not self.has_constant_input(mask_nodes[1], 1): logger.debug( "Sub node expected to have an input with constant value 1.0." ) continue # add a squeeze node to convert a 3-d mask to 2-d squeeze_node = self.match_parent_path( mask_nodes[-1], ['Squeeze'], [0]) or self.match_parent_path( mask_nodes[-1], ['Expand'], [0]) squeeze_node_name = "Squeeze_3d_to_2d_mask" squeeze_output_name = squeeze_node_name + "_output" if squeeze_node is None and len( mask_nodes) == 5 and self.find_graph_input( mask_nodes[-1].input[0]) is None: mask_input = mask_nodes[-1].input[1] self.add_node( helper.make_node("Squeeze", [mask_input], [squeeze_output_name], squeeze_node_name, axes=[1])) mask_nodes[-1].input[0] = squeeze_output_name is_same_root = self.check_attention_input(matmul_q, matmul_k, matmul_v, parent, output_name_to_node) if is_same_root: mask_index = self.attention_mask.process_mask( mask_nodes[-1].input[0]) logger.debug("Create an Attention node.") # For tf models, q and v are flipped. attention_node = self.attention_fusion.create_attention_node( mask_index, matmul_k, matmul_q, matmul_v, add_k, add_q, add_v, self.num_heads, self.hidden_size, parent.output[0], qkv_nodes[2].output[0]) if attention_node is None: continue if qkv_nodes[1].op_type == 'Einsum': # add reshape before einsum tensor = helper.make_tensor( name=qkv_nodes[1].name + "_newshape", data_type=TensorProto.INT64, dims=[4], vals=np.int64([[ 0, 0, self.num_heads, int(self.hidden_size / self.num_heads) ]]).tobytes(), raw=True) self.add_initializer(tensor) reshape_ = helper.make_node( "Reshape", inputs=[ attention_node.output[0], qkv_nodes[1].name + "_newshape" ], outputs=[qkv_nodes[1].name + "_reshape_output"], name=qkv_nodes[1].name + "_reshape") qkv_nodes[1].input[ 0] = qkv_nodes[1].name + "_reshape_output" self.add_node(reshape_) if parent.op_type == 'Reshape': # Temporary work around: we require the skiplayernorm and attention op be fed with 3-d input hidden_size = numpy_helper.to_array( self.get_initializer(parent.input[1]))[1] tensor = helper.make_tensor( name=parent.name + "_modified", data_type=TensorProto.INT64, dims=[3], vals=np.int64([[1, -1, hidden_size]]).tobytes(), raw=True) self.add_initializer(tensor) parent.input[1] = parent.name + "_modified" self.add_node(attention_node) attention_count += 1 nodes_to_remove.extend(qkv_nodes[2:]) nodes_to_remove.extend(qk_nodes) nodes_to_remove.extend(q_nodes) nodes_to_remove.extend(k_nodes) nodes_to_remove.extend(v_nodes) nodes_to_remove.extend(mask_nodes) else: logger.debug("Root node not matched.") continue self.remove_nodes(nodes_to_remove) self.update_graph() logger.info(f"Fused Attention count:{attention_count}")
from yolo3 import YoloBody def get_anchors(anchors_path): with open(anchors_path) as f: anchors = f.readline() anchors = [float(x) for x in anchors.split(',')] return np.array(anchors).reshape([-1, 3, 2])[::-1, :, :] num_classes = 3 anchors_path = 'Practice\Pytorch\EPT\yolo_anchors.txt' anchors = get_anchors(anchors_path) model = YoloBody(anchors, num_classes) onnx_model = onnx.load('Practice\Pytorch\EPT\Yolov3_55.onnx') graph = onnx_model.graph initalizers = dict() for init in graph.initializer: initalizers[init.name] = numpy_helper.to_array(init) for name, p in model.named_parameters(): p.data = (torch.from_numpy(initalizers[name])).data torch.save(model.state_dict(), '999.pth')
def _test_numpy_helper_float_type(self, dtype): a = np.random.rand(13, 37).astype(dtype) tensor_def = numpy_helper.from_array(a, "test") self.assertEqual(tensor_def.name, "test") a_recover = numpy_helper.to_array(tensor_def) np.testing.assert_equal(a, a_recover)
def fuse(self, reshape_node, input_name_to_nodes, output_name_to_node): if reshape_node.input[1] not in output_name_to_node: return concat_node = output_name_to_node[reshape_node.input[1]] if concat_node.op_type != 'Concat' or len(concat_node.input) != 4: return path0 = self.model.match_parent_path(concat_node, ['Unsqueeze', 'Gather', 'Shape'], [0, 0, 0], output_name_to_node) if path0 is None: return (_, gather_0, shape_0) = path0 shape = [] gather_value = self.model.get_constant_value(gather_0.input[1]) if gather_value == 0: shape.append(0) path1 = self.model.match_parent_path(concat_node, ['Unsqueeze', 'Gather', 'Shape'], [1, 0, 0], output_name_to_node) if path1 is None: input_1_proto = self.model.get_initializer(concat_node.input[1]) input_2_proto = self.model.get_initializer(concat_node.input[2]) input_3_proto = self.model.get_initializer(concat_node.input[3]) if input_1_proto is None or input_2_proto is None or input_3_proto is None: return input_1 = numpy_helper.to_array(input_1_proto) input_2 = numpy_helper.to_array(input_2_proto) input_3 = numpy_helper.to_array(input_3_proto) if len(input_1) != 1 or len(input_2) != 1 or len(input_3) != 1: return if not (input_1[0] == -1 and input_2[0] > 0 and input_3[0] > 0): return shape.extend(input_1) shape.extend(input_2) shape.extend(input_3) gemm_path = self.model.match_parent_path(reshape_node, ['Add', 'MatMul'], [0, 1], output_name_to_node) if gemm_path is None: return top_matmul = gemm_path[-1] root_input = top_matmul.input[0] if shape_0.input[0] != root_input: return self.replace_reshape_node(shape, reshape_node, concat_node) else: (_, gather_1, shape_1) = path1 gather_value = self.model.get_constant_value(gather_1.input[1]) if gather_value == 1: shape.append(0) input_2_proto = self.model.get_initializer(concat_node.input[2]) input_3_proto = self.model.get_initializer(concat_node.input[3]) if input_2_proto is None or input_3_proto is None: return input_2 = numpy_helper.to_array(input_2_proto) input_3 = numpy_helper.to_array(input_3_proto) if len(input_2) != 1 or len(input_3) != 1: return if not (input_2[0] > 0 and input_3[0] > 0): return shape.extend(input_2) shape.extend(input_3) gemm_path = self.model.match_parent_path(reshape_node, ['Mul', 'Add', 'MatMul'], [0, 0, 1], output_name_to_node) if gemm_path is None: return top_matmul = gemm_path[-1] root_input = top_matmul.input[0] if shape_0.input[0] != root_input or shape_1.input[0] != root_input: return self.replace_reshape_node(shape, reshape_node, concat_node)
def resize_model(self): graph = self.model.graph initializers = graph.initializer for input in graph.input: if (input.type.tensor_type.shape.dim[1].dim_value == old_parameters["seq_len"]): print("input", input.name, input.type.tensor_type.shape) input.type.tensor_type.shape.dim[1].dim_value = new_parameters[ "seq_len"] print("=>", input.type.tensor_type.shape) reshapes = {} for initializer in initializers: tensor = numpy_helper.to_array(initializer) if initializer.data_type == TensorProto.FLOAT: dtype = np.float32 elif initializer.data_type == TensorProto.INT32: dtype = np.int32 elif initializer.data_type == TensorProto.INT64: dtype = np.int64 else: print("data type not supported by this tool:", dtype) if len(tensor.shape) == 1 and tensor.shape[0] == 1: if tensor == old_parameters["num_heads"]: print("initializer type={}".format(initializer.data_type), initializer.name, old_parameters["num_heads"], "=>[", new_parameters["num_heads"], "]") initializer.CopyFrom( numpy_helper.from_array( np.asarray([new_parameters["num_heads"]], dtype=dtype), initializer.name)) elif tensor == old_parameters["seq_len"]: print("initializer type={}".format(initializer.data_type), initializer.name, old_parameters["seq_len"], "=>[", new_parameters["seq_len"], "]") initializer.CopyFrom( numpy_helper.from_array( np.asarray([new_parameters["seq_len"]], dtype=dtype), initializer.name)) elif tensor == old_parameters["size_per_head"]: print("initializer type={}".format(initializer.data_type), initializer.name, old_parameters["size_per_head"], "=>[", new_parameters["size_per_head"], "]") initializer.CopyFrom( numpy_helper.from_array( np.asarray([new_parameters["size_per_head"]], dtype=dtype), initializer.name)) elif tensor == old_parameters["hidden_size"]: print("initializer type={}".format(initializer.data_type), initializer.name, old_parameters["hidden_size"], "=>[", new_parameters["hidden_size"], "]") initializer.CopyFrom( numpy_helper.from_array( np.asarray([new_parameters["hidden_size"]], dtype=dtype), initializer.name)) elif tensor == 4 * old_parameters["hidden_size"]: print("initializer type={}".format(initializer.data_type), initializer.name, 4 * old_parameters["hidden_size"], "=>[", 4 * new_parameters["hidden_size"], "]") initializer.CopyFrom( numpy_helper.from_array( np.asarray([4 * new_parameters["hidden_size"]], dtype=dtype), initializer.name)) elif tensor == 3 * old_parameters["hidden_size"]: print("initializer type={}".format(initializer.data_type), initializer.name, 3 * old_parameters["hidden_size"], "=>[", 3 * new_parameters["hidden_size"], "]") initializer.CopyFrom( numpy_helper.from_array( np.asarray([3 * new_parameters["hidden_size"]], dtype=dtype), initializer.name)) elif len(tensor.shape) == 0: if tensor == old_parameters["num_heads"]: print("initializer type={}".format(initializer.data_type), initializer.name, old_parameters["num_heads"], "=>", new_parameters["num_heads"]) initializer.CopyFrom( numpy_helper.from_array( np.asarray(new_parameters["num_heads"], dtype=dtype), initializer.name)) elif tensor == old_parameters["seq_len"]: print("initializer type={}".format(initializer.data_type), initializer.name, old_parameters["seq_len"], "=>", new_parameters["seq_len"]) initializer.CopyFrom( numpy_helper.from_array( np.asarray(new_parameters["seq_len"], dtype=dtype), initializer.name)) elif tensor == old_parameters["size_per_head"]: print("initializer type={}".format(initializer.data_type), initializer.name, old_parameters["size_per_head"], "=>", new_parameters["size_per_head"]) initializer.CopyFrom( numpy_helper.from_array( np.asarray(new_parameters["size_per_head"], dtype=dtype), initializer.name)) elif tensor == old_parameters["hidden_size"]: print("initializer type={}".format(initializer.data_type), initializer.name, old_parameters["hidden_size"], "=>", new_parameters["hidden_size"]) initializer.CopyFrom( numpy_helper.from_array( np.asarray(new_parameters["hidden_size"], dtype=dtype), initializer.name)) elif tensor == 4 * old_parameters["hidden_size"]: print("initializer type={}".format(initializer.data_type), initializer.name, 4 * old_parameters["hidden_size"], "=>", 4 * new_parameters["hidden_size"]) initializer.CopyFrom( numpy_helper.from_array( np.asarray(4 * new_parameters["hidden_size"], dtype=dtype), initializer.name)) elif tensor == 3 * old_parameters["hidden_size"]: print("initializer type={}".format(initializer.data_type), initializer.name, 3 * old_parameters["hidden_size"], "=>", 3 * new_parameters["hidden_size"]) initializer.CopyFrom( numpy_helper.from_array( np.asarray(3 * new_parameters["hidden_size"], dtype=dtype), initializer.name)) elif tensor == 1.0 / np.sqrt(old_parameters["size_per_head"]): print("initializer type={}".format(initializer.data_type), initializer.name, 1.0 / np.sqrt(old_parameters["size_per_head"]), "=>", 1.0 / np.sqrt(new_parameters["size_per_head"])) initializer.CopyFrom( numpy_helper.from_array( np.asarray( 1.0 / np.sqrt(new_parameters["size_per_head"]), dtype=dtype), initializer.name)) elif tensor == np.sqrt(old_parameters["size_per_head"]): print("initializer type={}".format(initializer.data_type), initializer.name, np.sqrt(old_parameters["size_per_head"]), "=>", np.sqrt(new_parameters["size_per_head"])) initializer.CopyFrom( numpy_helper.from_array( np.asarray(np.sqrt( new_parameters["size_per_head"]), dtype=dtype), initializer.name)) new_shape = [] shape_changed = False for dim in tensor.shape: if (dim == old_parameters["hidden_size"]): new_shape.append(new_parameters["hidden_size"]) shape_changed = True elif (dim == 4 * old_parameters["hidden_size"]): new_shape.append(4 * new_parameters["hidden_size"]) shape_changed = True elif (dim == 3 * old_parameters["hidden_size"]): new_shape.append(3 * new_parameters["hidden_size"]) shape_changed = True elif (dim in old_parameters["word_dict_size"]): new_shape.append(new_parameters["word_dict_size"]) shape_changed = True elif (dim == old_parameters["max_word_position"]): new_shape.append(new_parameters["max_word_position"]) shape_changed = True else: new_shape.append(dim) if shape_changed: reshapes[initializer.name] = new_shape print("initializer", initializer.name, tensor.shape, "=>", new_shape) for initializer_name in reshapes: self.replace_input_of_all_nodes(initializer_name, initializer_name + '_resize') tensor = self.resize_weight(initializer_name, reshapes[initializer_name]) self.model.graph.initializer.extend([tensor]) # Add node name, replace split node attribute. nodes_to_add = [] nodes_to_remove = [] for i, node in enumerate(graph.node): if node.op_type == "Split": nodes_to_add.append( onnx.helper.make_node('Split', node.input, node.output, name="Split_{}".format(i), axis=2, split=[ new_parameters["hidden_size"], new_parameters["hidden_size"], new_parameters["hidden_size"] ])) nodes_to_remove.append(node) print("update split", [ new_parameters["hidden_size"], new_parameters["hidden_size"], new_parameters["hidden_size"] ]) if node.op_type == "Constant": for att in node.attribute: if att.name == 'value': if numpy_helper.to_array( att.t) == old_parameters["num_heads"]: nodes_to_add.append( onnx.helper.make_node( 'Constant', inputs=node.input, outputs=node.output, value=onnx.helper.make_tensor( name=att.t.name, data_type=TensorProto.INT64, dims=[], vals=[new_parameters["num_heads"]]))) print("constant", att.t.name, old_parameters["num_heads"], "=>", new_parameters["num_heads"]) if numpy_helper.to_array(att.t) == np.sqrt( old_parameters["size_per_head"]): nodes_to_add.append( onnx.helper.make_node( 'Constant', inputs=node.input, outputs=node.output, value=onnx.helper.make_tensor( name=att.t.name, data_type=TensorProto.FLOAT, dims=[], vals=[ np.sqrt( new_parameters["size_per_head"] ) ]))) print("constant", att.t.name, np.sqrt(old_parameters["size_per_head"]), "=>", np.sqrt(new_parameters["size_per_head"])) else: node.name = node.op_type + "_" + str(i) for node in nodes_to_remove: graph.node.remove(node) graph.node.extend(nodes_to_add) for i, input in enumerate(self.model.graph.input): if i > 0: dim_proto = input.type.tensor_type.shape.dim[2] dim_proto.dim_value = new_parameters["num_heads"] dim_proto = input.type.tensor_type.shape.dim[4] dim_proto.dim_value = new_parameters["size_per_head"] for i, output in enumerate(self.model.graph.output): if i == 0: dim_proto = output.type.tensor_type.shape.dim[2] dim_proto.dim_value = new_parameters["hidden_size"] if i > 0: dim_proto = output.type.tensor_type.shape.dim[2] dim_proto.dim_value = new_parameters["num_heads"] dim_proto = output.type.tensor_type.shape.dim[4] dim_proto.dim_value = new_parameters["size_per_head"]
def fuse(self, normalize_node, input_name_to_nodes, output_name_to_node): past = None present = None return_indice = [] qkv_nodes = self.model.match_parent_path( normalize_node, ['Add', 'Reshape', 'Gemm', 'Reshape', 'Reshape', 'Transpose', 'MatMul'], [0, None, 0, 0, 0, 0, 0], output_name_to_node=output_name_to_node, return_indice=return_indice ) # yapf: disable if qkv_nodes is None: return (add_qkv, reshape_qkv, gemm_qkv, reshape_1, reshape_2, transpose_qkv, matmul_qkv) = qkv_nodes another_input = add_qkv.input[1 - return_indice[0]] v_nodes = self.model.match_parent_path( matmul_qkv, ['Concat', 'Transpose', 'Reshape', 'Split'], [1, 1, 0, 0]) if v_nodes is None: logger.debug("fuse_attention: failed to match v path") return (concat_v, transpose_v, reshape_v, split_fc) = v_nodes fc_nodes = self.model.match_parent_path( split_fc, ['Reshape', 'Gemm', 'Reshape', 'LayerNormalization'], [0, 0, 0, 0], output_name_to_node) if fc_nodes is None: fc_nodes = self.model.match_parent_path( split_fc, ['Add', 'MatMul', 'LayerNormalization'], [0, None, 0], output_name_to_node) if fc_nodes is None: logger.debug("fuse_attention: failed to match fc path") return fc_weight = fc_nodes[1].input[1] i, _ = self.model.get_constant_input(fc_nodes[0]) fc_bias = fc_nodes[0].input[i] else: fc_weight = fc_nodes[1].input[1] fc_bias = fc_nodes[1].input[2] layernorm_before_attention = fc_nodes[-1] if not another_input in layernorm_before_attention.input: logger.debug( "Add and LayerNormalization shall have one same input") return is_unidirectional = True slice_mask = None input_mask_nodes = None concat_k_to_match = None qk_nodes = self.model.match_parent_path( matmul_qkv, ['Softmax', 'Sub', 'Mul', 'Div', 'MatMul'], [0, 0, 0, 0, 0]) if qk_nodes is not None: (softmax_qk, sub_qk, mul_qk, div_qk, matmul_qk) = qk_nodes mask_nodes = self.model.match_parent_path( sub_qk, ['Mul', 'Sub', 'Slice', 'Slice', 'Unsqueeze', 'Sub', 'Squeeze', 'Slice', 'Shape', 'Div'], [1, 0, 1, 0, 1, 0, 0, 0, 0, 0]) # yapf: disable if mask_nodes is None: logger.debug( "fuse_attention: failed to match unidirectional mask path") return div_mask = mask_nodes[-1] slice_mask = mask_nodes[3] if div_qk != div_mask: logger.debug("fuse_attention: skip since div_qk != div_mask") return else: # New pattern for gpt2 from PyTorch 1.5.0 and Transformers 2.9.0. i, qk_nodes, _ = self.model.match_parent_paths( matmul_qkv, [(['Softmax', 'Where', 'Div', 'MatMul'], [0, 0, 1, 0]), (['Softmax', 'Add', 'Where', 'Div', 'MatMul' ], [0, 0, None, 1, 0])], output_name_to_node) if qk_nodes is None: logger.debug("fuse_attention: failed to match qk nodes") return where_qk = qk_nodes[-3] div_qk = qk_nodes[-2] matmul_qk = qk_nodes[-1] if i == 1: add_qk = qk_nodes[1] _, input_mask_nodes, _ = self.model.match_parent_paths( add_qk, [ (['Mul', 'Sub', 'Cast', 'Unsqueeze', 'Unsqueeze', 'Reshape'], [None, 0, 1, 0, 0, 0]), (['Mul', 'Sub', 'Unsqueeze', 'Unsqueeze', 'Reshape'], [None, 0, 1, 0, 0]), (['Mul', 'Sub', 'Unsqueeze', 'Unsqueeze'], [None, 0, 1, 0]), # useless cast and reshape are removed. ], output_name_to_node) # yapf: disable if input_mask_nodes is None: logger.debug( "fuse_attention: failed to match input attention mask path" ) return mask_nodes = self.model.match_parent_path( where_qk, ['Cast', 'Slice', 'Slice', 'Unsqueeze', 'Sub', 'Squeeze', 'Slice', 'Shape'], [ 0, 0, 0, 1, 0, 0, 0, 0], output_name_to_node) # yapf: disable if mask_nodes is None: # TODO: match mask path for GPT2LMHeadModel_BeamSearchStep. logger.debug("fuse_attention: failed to match mask path") return slice_mask = mask_nodes[2] div_or_concat = self.model.get_parent(mask_nodes[-1], 0, output_name_to_node) if div_or_concat.op_type == "Div": div_mask = div_or_concat if div_qk != div_mask: logger.debug( "fuse_attention: skip since div_qk != div_mask") return elif div_or_concat.op_type == "Concat": concat_k_to_match = div_or_concat else: logger.debug("fuse_attention: failed to match mask path") # Validate that the mask data is either lower triangular (unidirectional) or all ones mask_data = numpy_helper.to_array( self.model.get_initializer(slice_mask.input[0])) if not (len(mask_data.shape) == 4 and mask_data.shape[:2] == (1, 1) and mask_data.shape[2] == mask_data.shape[3]): logger.debug( "fuse_attention: skip since mask shape is not 1x1xWxW") return if np.allclose(mask_data, np.ones_like(mask_data)): is_unidirectional = False elif not np.allclose(mask_data, np.tril(np.ones_like(mask_data))): logger.debug( "fuse_attention: skip since mask is neither lower triangular nor ones" ) return q_nodes = self.model.match_parent_path( matmul_qk, ['Transpose', 'Reshape', 'Split'], [0, 0, 0]) if q_nodes is None: logger.debug("fuse_attention: failed to match q path") return (transpose_q, reshape_q, split_q) = q_nodes if split_fc != split_q: logger.debug("fuse_attention: skip since split_fc != split_q") return k_nodes = self.model.match_parent_path( matmul_qk, ['Concat', 'Transpose', 'Reshape', 'Split'], [1, 1, 0, 0]) if k_nodes is None: # This pattern is from pytorch 1.7.1 and transformers 4.6.1 k_nodes = self.model.match_parent_path( matmul_qk, ['Transpose', 'Concat', 'Transpose', 'Reshape', 'Split'], [1, 0, 1, 0, 0]) if k_nodes is None: logger.debug("fuse_attention: failed to match k path") return else: (_, concat_k, transpose_k, reshape_k, split_k) = k_nodes else: (concat_k, transpose_k, reshape_k, split_k) = k_nodes if split_fc != split_k: logger.debug("fuse_attention: skip since split_fc != split_k") return if concat_k_to_match and concat_k != concat_k_to_match: logger.debug( "fuse_attention: skip since concat_k != concat_k_to_match") return attention_mask_input_name = '' if input_mask_nodes is not None: input_name = input_mask_nodes[-1].input[0] attention_mask_input_name = self.cast_attention_mask(input_name) # Match past and present paths past = self.match_past_pattern_1(concat_k, concat_v, output_name_to_node) or \ self.match_past_pattern_2(concat_k, concat_v, output_name_to_node) if past is None: logger.info("fuse_attention: failed to match past path") return if not self.model.find_graph_input(past): logger.debug("past is not graph input.") # For GPT2LMHeadModel_BeamSearchStep, there is an extra Gather node to select beam index so it is not graph input. present = self.match_present(concat_v, input_name_to_nodes) if present is None: logger.info("fuse_attention: failed to match present path") return if not self.model.find_graph_output(present): logger.info("expect present to be graph output") return self.create_attention_node(fc_weight, fc_bias, gemm_qkv, past, present, layernorm_before_attention.output[0], reshape_qkv.output[0], attention_mask_input_name, is_unidirectional) # we rely on prune_graph() to clean old subgraph nodes: # qk_nodes + q_nodes + k_nodes + v_nodes + mask_nodes + [reshape_qkv, transpose_qkv, matmul_qkv] self.prune_graph = True
def process_embedding(self): """ Automatically detect word, segment and position embeddings. """ logger.info("start processing embedding layer...") output_name_to_node = self.output_name_to_node() layer_norm_nodes = self.get_nodes_by_op_type("LayerNormalization") for layer_norm_node in layer_norm_nodes: pos_embed_path = self.match_parent_path( layer_norm_node, ['Add', 'Reshape', 'Slice'], [0, 1, 0], output_name_to_node) if pos_embed_path is None: continue add_node, reshape_node, slice_node = pos_embed_path initializer = self.get_initializer(slice_node.input[0]) if initializer is None: continue temp = numpy_helper.to_array(initializer) if len(temp.shape) == 2: logger.info( "Found position embedding. name:{}, shape:{}".format( initializer.name, temp.shape)) position_embedding = initializer.name else: logger.info( "Failed to find position embedding. name:{}, shape:{}". format(initializer.name, temp.shape)) return first_parent = self.get_parent(add_node, 0, output_name_to_node) if first_parent is not None and first_parent.op_type == "Add": embeddings = self.get_2d_initializers_from_parent_subgraphs( first_parent) if len(embeddings) != 2: logger.warning( "Failed to find two embeddings (word and segment) from Add node. Found {}" .format(embeddings)) return word_embedding = None segment_embedding = None for name, shape in embeddings.items(): if shape[0] == 2: segment_embedding = name logger.info( "Found segment embedding. name:{}, shape:{}". format(name, shape)) else: word_embedding = name logger.info( "Found words embedding. name:{}, shape:{}".format( name, shape)) if word_embedding is None or segment_embedding is None: logger.info( "Failed to find both word and segment embedding") return logger.info("Create Embedding node") self.create_embedding_subgraph(layer_norm_node, word_embedding, segment_embedding, position_embedding) # Prune graph to remove those original embedding nodes. self.prune_graph() break
def read_pb(filename): read_tensor = onnx.TensorProto() with open(filename, 'rb') as f: read_tensor.ParseFromString(f.read()) return numpy_helper.to_array(read_tensor)