def test_symbolic_shape_infer(self): cwd = os.getcwd() test_model_dir = os.path.join(cwd, '..', 'models') for filename in Path(test_model_dir).rglob('*.onnx'): if filename.name.startswith('.'): continue # skip some bad model files print("Running symbolic shape inference on : " + str(filename)) SymbolicShapeInference.infer_shapes(in_mp=onnx.load(str(filename)), auto_merge=True, int_max=100000, guess_output_rank=True)
def _export_model(self, *inputs, **kwargs): # 1. Set the self._device from the user module # 2. Verify input schema matches schema used on previous model export # 3. Export the user model under self._export_training_flag mode # Return True if the model needed to be exported, False if no export was required. schema = _io._extract_schema({ 'args': copy.copy(inputs), 'kwargs': copy.copy(kwargs) }) if self._onnx_model and schema == self._input_info.schema: # All required models have already been exported previously return False self._set_device_from_module() self._onnx_model = self._get_exported_model(*inputs, **kwargs) if self._save_onnx: onnx.save(self._onnx_model, self._save_onnx_prefix + '_torch_exporter.onnx') if self._run_symbolic_shape_infer: self._onnx_model = SymbolicShapeInference.infer_shapes( self._onnx_model, auto_merge=True, guess_output_rank=True) return True
def _export_model(self, *inputs, **kwargs): # 1. Set the self._device from the user module # 2. Verify input schema matches schema used on previous model export # 3. Export the user model under self._export_training_flag mode # Return True if the model needed to be exported, False if no export was required. # Note: Model is only exported when: # 1. Model has never been exported before. # 2. Model input schema has changed (changes in inputs requiring gradient, shape, boolean inputs values change, etc) # Model is not re-exported when the model parameters change. This can happen when the model is a stateful model, # or the user explicitly changed model parameters after the onnx export. schema = _io._extract_schema( {'args': copy.copy(inputs), 'kwargs': copy.copy(kwargs)}) if self._onnx_models.exported_model and schema == self._input_info.schema and not self._original_model_has_changed: # All required models have already been exported previously return False self._set_device_from_module(inputs, kwargs) self._onnx_models.exported_model = self._get_exported_model( schema, *inputs, **kwargs) load_aten_op_executor_cpp_extension_if_needed(self._onnx_models.exported_model) if self._debug_options.save_onnx_models.save: self._onnx_models.save_exported_model(self._debug_options.save_onnx_models.path, self._debug_options.save_onnx_models.name_prefix, self._export_mode) if self._run_symbolic_shape_infer: self._onnx_models.exported_model = SymbolicShapeInference.infer_shapes(self._onnx_models.exported_model, auto_merge=True, guess_output_rank=True) return True
def _init_session(self): if self.onnx_model_ is None: return self._verify_fully_optimized_model(self.onnx_model_) if self.run_symbolic_shape_infer: self.onnx_model_ = SymbolicShapeInference.infer_shapes(self.onnx_model_, auto_merge=True, guess_output_rank=True) # old ort session may already exists and occupies GPU memory when creating new session, this may cause OOM error. # for example, load_state_dict will be called before returing the function, and it calls _init_session again del self.session self.session, self.train_io_binding, self.eval_io_binding, self.output_name, _, self.output_types = \ create_ort_training_session_with_optimizer( self.onnx_model_, self.device_, self.training_optimizer_name_, self.learning_rate_description_.name_, self.map_optimizer_attributes_, self.world_rank, self.world_size, self.gradient_accumulation_steps, bind_parameters=False, use_mixed_precision=self.use_mixed_precision, allreduce_post_accumulation=self.allreduce_post_accumulation_, deepspeed_zero_stage=self.deepspeed_zero_stage_, enable_grad_norm_clip=self.enable_grad_norm_clip_, frozen_weights=self.frozen_weights_, opset_version=self.opset_version_, use_deterministic_compute=self._use_deterministic_compute, use_invertible_layernorm_grad=self.use_invertible_layernorm_grad, enable_adasum=self.enable_adasum) self.loss_scale_input_name = self.session.loss_scale_input_name if self.use_mixed_precision: self.input_desc_with_lr_and_loss_scale = [ *self.input_desc_with_lr, IODescription(self.loss_scale_input_name, [], torch.float32)] # ORT backend has modified model output dtype from float32 to float16. for o_desc in self.model_desc_.outputs_: if self.use_mixed_precision and o_desc.dtype_ == torch.float32 and not self.session.is_output_fp32_node(o_desc.name_): o_desc.eval_dtype_ = torch.float16 else: o_desc.eval_dtype_ = o_desc.dtype_ # gradient accumulation buffers are connected to a single node with a boolean, dimension 1 tensor output. # add a matching output to drive gradient accumulation. if self.gradient_accumulation_steps > 1: self.output_desc_with_group_accumulated_gradients = [ *self.model_desc_.outputs_, IODescription(get_group_accumulated_gradients_output_node_arg_name(self.session), [1], torch.bool)] if self.use_mixed_precision: # when ready to use accumulated gradient with mixed precision, we need to fetch all_infinite to determine # if the gradient is usable. self.output_desc_with_all_fp_16_or_fp32_gradients_finite = [ *self.model_desc_.outputs_, IODescription(get_all_gradients_finite_arg_name(self.session), [1], torch.bool)] if self.state_dict_: self.load_state_dict(self.state_dict_, self.strict_) self.state_dict_ = None
def shape_inference(gpt2_onnx_path): # Run symbolic shape inference to walk around ORT shape inference issue for subgraph. from onnxruntime.tools.symbolic_shape_infer import SymbolicShapeInference out = SymbolicShapeInference.infer_shapes(onnx.load(gpt2_onnx_path), auto_merge=True, guess_output_rank=False) if out: # TODO: Use external format if input has extra data. onnx.save(out, gpt2_onnx_path) else: print("Failed to run symbolic shape inference on the model.")
def shape_inference(decoder_onnx_path): if version.parse(onnx.__version__) >= version.parse('1.11.0'): logger.warn("SymbolicShapeInference might fail using onnx version 1.11. Please install 1.10.0 for now.") # Run symbolic shape inference to walk around ORT shape inference issue for subgraph. from onnxruntime.tools.symbolic_shape_infer import SymbolicShapeInference out = SymbolicShapeInference.infer_shapes(onnx.load(decoder_onnx_path), auto_merge=True, guess_output_rank=False) if out: # TODO: Use external format if input has extra data. onnx.save(out, decoder_onnx_path) else: print("Failed to run symbolic shape inference on the model.")
def _export_model(self, *inputs, **kwargs): # 1. Set the self._device from the user module # 2. Verify input schema matches schema used on previous model export # 3. Export the user model under self._export_training_flag mode # Return True if the model needed to be exported, False if no export was required. # Note: Model is only exported when: # 1. Model has never been exported before. # 2. Model input schema has changed (changes in inputs requiring gradient, shape, boolean inputs values change, etc) # Model is not re-exported when the model parameters change. This can happen when the model is a stateful model, # or the user explicitly changed model parameters after the onnx export. # Record random states here and restore later in case any of them gets changed during the export, # e.g., some sympy functions in symbolic_shape_infer will change Python's random state. random_states = _utils.get_random_states() schema = _io._extract_schema({ "args": copy.copy(inputs), "kwargs": copy.copy(kwargs) }) if (self._onnx_models.exported_model and schema == self._input_info.schema and not self._original_model_has_changed): # All required models have already been exported previously return False self._set_device_from_module(inputs, kwargs) self._onnx_models.exported_model = self._get_exported_model( schema, *inputs, **kwargs) if self._debug_options.save_onnx_models.save: self._onnx_models.save_exported_model( self._debug_options.save_onnx_models.path, self._debug_options.save_onnx_models.name_prefix, self._export_mode, ) if self._run_symbolic_shape_infer: self._onnx_models.exported_model = SymbolicShapeInference.infer_shapes( self._onnx_models.exported_model, auto_merge=True, guess_output_rank=True) # Restore the recorded random states _utils.set_random_states(random_states) return True
def _run(self, inputs_np): inputs_np_dict = {k: v for k, v in inputs_np if k != ""} model = onnx.ModelProto() model.CopyFrom(omm.model) sess_options = onnxruntime.SessionOptions() session = onnxruntime.InferenceSession(model.SerializeToString(), sess_options) ort_outputs = session.run(None, inputs_np_dict) model.graph.ClearField("value_info") initializers = {i.name: i for i in model.graph.initializer} for i in model.graph.input: if i.name in initializers: continue for idx, d in enumerate(i.type.tensor_type.shape.dim): if d.dim_param != "": d.ClearField("dim_param") d.dim_value = inputs_np_dict[i.name].shape[idx] try: model = SymbolicShapeInference.infer_shapes( model, 2**31 - 1, True, True, 1) except: logging.warning("Shape infer by onnxruntime failed.") with TemporaryDirectory() as tmpdir: clear_op_code_generator() model_code_generator = code_gen.get_model_code_generator( model, output_dir=tmpdir, tensor_inplace=True, simplify_names=True, shape_infer=False) model_code_generator.run() spec = importlib.util.spec_from_file_location( "model", os.path.join(tmpdir, "model.py")) mod = importlib.util.module_from_spec(spec) spec.loader.exec_module(mod) pt_outputs = mod.test_run_model( [torch.from_numpy(v) for k, v in inputs_np if k != ""]) if type(pt_outputs) == torch.Tensor: pt_outputs = [pt_outputs.detach().numpy()] elif type(pt_outputs) in (list, tuple): pt_outputs = [o.detach().numpy() for o in pt_outputs] for l, r in zip(ort_outputs, pt_outputs): assert np.allclose(l, r, atol=1e-4, rtol=1e-4, equal_nan=True)
def _run(self, inputs_np, onnx_model, gen_kwargs=None, tol=None): inputs_np_dict = {k: v for k, v in inputs_np} model = onnx.ModelProto() model.CopyFrom(onnx_model) sess_options = onnxruntime.SessionOptions() session = onnxruntime.InferenceSession(model.SerializeToString(), sess_options) ort_outputs = session.run(None, inputs_np_dict) model.graph.ClearField("value_info") initializers = {i.name: i for i in model.graph.initializer} for i in model.graph.input: if i.name in initializers: continue for idx, d in enumerate(i.type.tensor_type.shape.dim): if d.dim_param != "": d.ClearField("dim_param") d.dim_value = inputs_np_dict[i.name].shape[idx] try: model = SymbolicShapeInference.infer_shapes(model, 2**31 - 1, True, True, 1) except: logging.warning("Shape infer by onnxruntime failed.") with TemporaryDirectory() as tmpdir: if gen_kwargs is None: gen_kwargs = {} code_gen.gen(model, output_dir=tmpdir, tensor_inplace=False, simplify_names=False, shape_infer=False, **gen_kwargs) spec = importlib.util.spec_from_file_location( "model", os.path.join(tmpdir, "model.py")) mod = importlib.util.module_from_spec(spec) spec.loader.exec_module(mod) pt_outputs = mod.test_run_model( [torch.from_numpy(v) for _, v in inputs_np]) if tol is None: tol = {"atol": 1e-5, "rtol": 1e-5} for l, r in zip(ort_outputs, [o.detach().numpy() for o in pt_outputs]): assert np.allclose(l, r, equal_nan=True, **tol)
def test_unsqueeze_opset_11(self): graph = helper.make_graph([ helper.make_node("Unsqueeze", ["input"], ["temp"], axes=[0]), helper.make_node("Identity", ["temp"], ["output"]), ], "Unsqueeze_Test", [ helper.make_tensor_value_info('input', TensorProto.FLOAT, ['b', 's']), ], [ helper.make_tensor_value_info('output', TensorProto.FLOAT, [1, 'b', 's']), ]) model = helper.make_model(graph, producer_name='Unsqueeze_Test_Model') model.opset_import[0].version = 11 inferred = SymbolicShapeInference.infer_shapes(model, auto_merge=True) expected_shapes = [ helper.make_tensor_value_info('temp', TensorProto.FLOAT, [1, 'b', 's']), helper.make_tensor_value_info('output', TensorProto.FLOAT, [1, 'b', 's']) ] self._check_shapes(graph, inferred.graph, expected_shapes)
def preprocess_onnx_model(self): for n in self.onnx_model.graph.node: inputs, outputs = [], [] for ls, f in ((inputs, n.input), (outputs, n.output)): for i in f: new_i = re.sub("[:/.]", "_", i) ls.append(new_i) if i != ls[-1] and not self.rename_helper.simplify_names: logging.info( f"Tensor name {i} is changed to {ls[-1]}.") self.rename_helper.tensor_name_counter[ls[-1]] += 1 n.ClearField("input") n.input.extend(inputs) n.ClearField("output") n.output.extend(outputs) old_name = n.name n.name = re.sub("[:/.]", "_", n.name) if old_name != n.name and not self.rename_helper.simplify_names: logging.info(f"Node name {old_name} is changed to {n.name}.") self.rename_helper.node_name_counter[n.name] += 1 for f in (self.onnx_model.graph.input, self.onnx_model.graph.output, self.onnx_model.graph.initializer): for i in f: old_name = i.name i.name = re.sub("[:/.]", "_", i.name) if old_name != i.name and not self.rename_helper.simplify_names: logging.info( f"Tensor name {i.name} is changed to {i.name}.") self.rename_helper.tensor_name_counter[i.name] += 1 model = self.onnx_model for f in (model.graph.input, model.graph.output): for i in f: for d in i.type.tensor_type.shape.dim: if d.dim_param != "": d.dim_param = "" d.dim_value = -1 elif d.dim_value == 0: d.dim_value = -1 # TODO how to deal with custom op? if self.shape_infer: try: model.graph.ClearField("value_info") model = SymbolicShapeInference.infer_shapes( model, 2**31 - 1, True, True, 1) except: logging.warning("Shape infer by onnxruntime failed.") else: for f in (self.onnx_model.graph.value_info, ): for i in f: old_name = i.name i.name = re.sub("[:/.]", "_", i.name) if old_name != i.name and not self.rename_helper.simplify_names: logging.info( f"Tensor name {i.name} is changed to {i.name}.") self.rename_helper.tensor_name_counter[i.name] += 1 onnx.save(model, os.path.join(self.output_dir, "tmp_processed.onnx")) self.onnx_model = model
def run_shape_inference(input_model, output_model): in_mp = onnx.load(input_model) in_mp = SymbolicShapeInference.infer_shapes(in_mp, auto_merge=True) onnx.save(in_mp, output_model)
def _init_session(self): if self._onnx_model is None: return if self.options.utils.run_symbolic_shape_infer: self._onnx_model = SymbolicShapeInference.infer_shapes( self._onnx_model, auto_merge=True, guess_output_rank=True) # Create training session used by train_step self._create_ort_training_session() # Update model description to update dtype when mixed precision is enabled # C++ backend modifies model's output dtype from float32 to float16 for mixed precision # Note that for training we must use float32 and for evaluation we must use float16 for idx, o_desc in enumerate(self.model_desc.outputs): if (self.options.mixed_precision.enabled and o_desc.dtype == torch.float32 and not self._training_session.is_output_fp32_node( o_desc.name)): self.model_desc.add_type_to_output_description( idx, o_desc.dtype, torch.float16) # Update model description self._model_desc_inputs_with_lr = [ *self.model_desc.inputs, self.model_desc.learning_rate ] # Update Mixed Precision, if applicable if self.options.mixed_precision.enabled: self.model_desc.loss_scale_input = self._training_session.loss_scale_input_name self._model_desc_inputs_with_lr_and_loss_scale = [ *self._model_desc_inputs_with_lr, self.model_desc.loss_scale_input ] self.model_desc.all_finite = _utils.get_all_gradients_finite_name_from_session( self._training_session) self._model_desc_outputs_with_all_finite = [ *self.model_desc.outputs, self.model_desc.all_finite ] elif self.options.mixed_precision.loss_scaler: raise ValueError( "Loss Scaler cannot be specified when Mixed Precision is not enabled" ) # Update Loss Scaler Input Name, if applicable if self.options.mixed_precision.enabled and self.options.mixed_precision.loss_scaler: self.options.mixed_precision.loss_scaler.input_name = self.model_desc.loss_scale_input.name elif not self.options.mixed_precision.enabled and self.options.mixed_precision.loss_scaler: raise ValueError( "Loss Scaler cannot be specified when Mixed Precision is not enabled" ) # Update Gradient Accumulation, if applicable if self.options.batch.gradient_accumulation_steps > 1: self.model_desc.gradient_accumulation = _utils.get_gradient_accumulation_name_from_session( self._training_session) self._model_desc_outputs_with_gradient_accumulation = [ *self.model_desc.outputs, self.model_desc.gradient_accumulation ] # TODO: Subject to change after checkpoint redesign if self._state_dict: checkpoint.experimental_load_state_dict( self, self._state_dict, self._load_state_dict_strict) self._state_dict_debug = self._state_dict self._state_dict = {}
def check_slice_of_concat(self, input_dims, start, end, step, expected_output_dim): _dimstrmap = {dim: f"dim{i}" for i, dim in enumerate(input_dims)} def dimstrmap(dim): return _dimstrmap.get(dim, dim) def get_initializer(name): valuemap = { "zero": 0, "one": 1, "two": 2, "ten": 10, "intmax": 2**32 } value = -valuemap[name[4:]] if name.startswith( "neg_") else valuemap[name] return onnx.helper.make_tensor(name, TensorProto.INT64, [1], [value]) initializers = [ get_initializer(name) for name in [ "zero", "one", "two", "ten", "intmax", "neg_intmax", "neg_one", "neg_ten" ] ] inputs = [] nodes = [] for i, dim in enumerate(input_dims): inputs.append( onnx.helper.make_tensor_value_info(f"t{i}", TensorProto.FLOAT, ["B", dim])) nodes.extend([ onnx.helper.make_node("Shape", [f"t{i}"], [f"shape{i}"]), onnx.helper.make_node( "Slice", [f"shape{i}", "one", "two", "zero", "one"], [f"dim{i}"]), onnx.helper.make_node("Neg", [f"dim{i}"], [f"neg_dim{i}"]) ]) def make_concat_dims(concat_name, dims): dims = [ f"neg_{dimstrmap(dim[1:])}" if dim.startswith("-") else dimstrmap(dim) for dim in dims ] return onnx.helper.make_node("Concat", dims, [concat_name], axis=0) nodes.extend([ onnx.helper.make_node("Concat", [inp.name for inp in inputs], ["concat"], axis=1), make_concat_dims("starts", ["zero", start]), make_concat_dims("ends", ["intmax", end]), make_concat_dims("axes", ["zero", "one"]), make_concat_dims("steps", ["one", step]), onnx.helper.make_node( "Slice", ["concat", "starts", "ends", "axes", "steps"], ["output"]) ]) output = onnx.helper.make_tensor_value_info("output", TensorProto.FLOAT, ["d1", "d2"]) graph_def = onnx.helper.make_graph(nodes, "graph", inputs, [output], initializer=initializers) model = SymbolicShapeInference.infer_shapes( onnx.helper.make_model(graph_def)) output = unique_element(model.graph.output) shape = [ d.dim_param if d.dim_param else d.dim_value for d in output.type.tensor_type.shape.dim ] self.assertEqual(shape, ["B", expected_output_dim])