def convert_to_mlmodel(model_spec, tensor_inputs, backend=("neuralnetwork", "fp32"), converter_input_type=None, use_cpu_for_conversion=False): def _convert_to_inputtype(inputs): if isinstance(inputs, list): return [_convert_to_inputtype(x) for x in inputs] elif isinstance(inputs, tuple): return tuple([_convert_to_inputtype(x) for x in inputs]) elif isinstance(inputs, TensorType): return inputs elif isinstance(inputs, torch.Tensor): return TensorType(shape=inputs.shape, dtype=torch_to_mil_types[inputs.dtype]) else: raise ValueError("Unable to parse type {} into InputType.".format( type(inputs))) if converter_input_type is None: inputs = list(_convert_to_inputtype(tensor_inputs)) else: inputs = converter_input_type return ct_convert(model_spec, inputs=inputs, convert_to=backend, source="pytorch", useCPUOnly=use_cpu_for_conversion)
def tf_graph_to_mlmodel( graph, feed_dict, output_nodes, frontend="tensorflow", backend=("neuralnetwork", "fp32"), use_cpu_for_conversion=False, ): """ Parameters ---------- graph: tf.Graph TensorFlow 1.x model in tf.Graph format. feed_dict: dict of {tf.placeholder -> np.array or python primitive) Dict of placeholder and value pairs representing inputs. output_nodes: tf.node or list[tf.node] List of names representing outputs. frontend: str Frontend to convert from. backend: str Backend to convert to. use_cpu_for_conversion: bool Argument which is passed as is to the unified converter API. That is, "ct.convert(...., useCPUOnly=use_cpu_for_conversion)" It forces the model to be loaded on the CPU context, post conversion. ----------- Returns MLModel, Input Values, Output Names """ if isinstance(output_nodes, tuple): output_nodes = list(output_nodes) if not isinstance(output_nodes, list): output_nodes = [output_nodes] # Convert TF graph. input_names = get_tf_node_names(list(feed_dict.keys()), mode="inputs") output_names = get_tf_node_names(output_nodes, mode="outputs") input_values = { name: val for name, val in zip(input_names, feed_dict.values()) } mlmodel = ct_convert( graph, inputs=None, outputs=output_names, source=frontend, convert_to=backend, useCPUOnly=use_cpu_for_conversion, ) return mlmodel, input_values, output_names, output_nodes
def convert_to_mlmodel(model_spec, tensor_inputs, backend=("neuralnetwork", "fp32"), converter_input_type=None, use_cpu_for_conversion=False, minimum_deployment_target=None): def _convert_to_inputtype(inputs): if isinstance(inputs, list): return [_convert_to_inputtype(x) for x in inputs] elif isinstance(inputs, tuple): return tuple([_convert_to_inputtype(x) for x in inputs]) elif isinstance(inputs, TensorType): return inputs elif isinstance(inputs, torch.Tensor): return TensorType(shape=inputs.shape, dtype=torch_to_mil_types[inputs.dtype]) else: raise ValueError("Unable to parse type {} into InputType.".format( type(inputs))) if converter_input_type is None: inputs = list(_convert_to_inputtype(tensor_inputs)) else: inputs = converter_input_type if use_cpu_for_conversion: compute_unit = ComputeUnit.CPU_ONLY else: compute_unit = ComputeUnit.ALL return ct_convert(model_spec, inputs=inputs, convert_to=backend, source="pytorch", compute_units=compute_unit, minimum_deployment_target=minimum_deployment_target)
def run_compare_tf2( model, input_dict, output_names, use_cpu_only=False, use_cpu_for_conversion=False, frontend_only=False, frontend="tensorflow", backend=("neuralnetwork", "fp32"), debug=False, atol=1e-04, rtol=1e-05, ): """ Parameters ---------- model: list of tf.ConcreteFunction List of TensorFlow 2.x concrete functions. input_dict: dict of (str, np.array) Dict of name and value pairs representing inputs. output_names: list of str List of output node names. use_cpu_only: bool If true, use CPU only for prediction, otherwise, use GPU also. use_cpu_for_conversion: bool If true, the converter is invoked using "ct.convert(...., useCPUOnly=True)", which in turn forces the model to be loaded with the CPU context, which happens when the converter loads the ML model object from the proto spec using "ct.models.MLModel(proto_spec, useCPUOnly=True)". The other argument, i.e., "use_cpu_only" on the other hand refers to only the compute engine for prediction purposes. For a model that is loaded on a non-CPU context, it can still be forced to execute on the CPU at the time of prediction. Hence, "use_cpu_for_conversion = False && use_cpu_only = True" is valid and results in a case when a model is loaded for GPU but executed on the CPU. The scenario, "use_cpu_for_conversion = True && use_cpu_only = False" is invalid though, since once a model is loaded on a CPU context its context cannot be changed to a non CPU device at the time of prediction. frontend_only: bool If true, skip the prediction call, only validate conversion. frontend: str Frontend to convert from. backend: str Backend to convert to. debug: bool If true, print verbose information and plot intermediate graphs. atol: float The absolute tolerance parameter. rtol: float The relative tolerance parameter. """ if use_cpu_for_conversion and not use_cpu_only: # use_cpu_for_conversion = True && use_cpu_only = False raise ValueError( "use_cpu_for_conversion = True && use_cpu_only = False is an invalid test case" ) inputs = [] cf_inputs = [t for t in model[0].inputs if t.dtype != dtypes.resource] for t in cf_inputs: name = get_tf_node_names(t.name)[0] shape = [RangeDim() if s is None or s == -1 else s \ for s in list(t.get_shape())] inputs.append( TensorType(name=name, shape=shape, dtype=t.dtype.as_numpy_dtype)) outputs = [] for t in output_names: name = get_tf_node_names(t)[0] outputs.append(name) # get TensorFlow 2.x output as reference and run comparison tf_input_values = [tf.constant(t) for t in input_dict.values()] tf_outputs = model[0](*tf_input_values) if isinstance(tf_outputs, (tuple, list)): ref = [t.numpy() for t in tf_outputs] else: ref = [tf_outputs.numpy()] expected_outputs = {n: v for n, v in zip(outputs, ref)} mlmodel = ct_convert( model, source=frontend, inputs=inputs, outputs=outputs, convert_to=backend, debug=debug, useCPUOnly=use_cpu_for_conversion, ) for k, v in input_dict.items(): if isinstance(v, np.ndarray) and issubclass(v.dtype.type, np.integer): input_dict[k] = v.astype(np.float) # Core ML only accepts floats if frontend_only or _macos_version() < (10, 13) \ or (mlmodel.is_package and _macos_version() < (12, 0)): return mlmodel._spec, mlmodel, input_dict, None compare_backend( mlmodel, input_dict, expected_outputs, use_cpu_only, atol=atol, rtol=rtol, also_compare_shapes=True, dtype=backend[1], ) pred = None if not coremltoolsutils._has_custom_layer(mlmodel.get_spec()): pred = run_core_ml_predict(mlmodel, input_dict, use_cpu_only) else: print('Skipping model prediction as it has a custom nn layer!') return mlmodel._spec, mlmodel, input_dict, pred
def run_compare_tf_keras( model, input_values, use_cpu_only=False, frontend_only=False, frontend="tensorflow", backend=("neuralnetwork", "fp32"), atol=1e-04, rtol=1e-05, ): """ Parameters ---------- model: TensorFlow 2.x model TensorFlow 2.x model annotated with @tf.function. input_values: list of np.array List of input values in the same order as the input signature. use_cpu_only: bool If true, use CPU only for prediction, otherwise, use GPU also. frontend_only: bool If true, skip the prediction call, only validate conversion. frontend: str Frontend to convert from. backend: str Backend to convert to. atol: float The absolute tolerance parameter. rtol: float The relative tolerance parameter. """ mlmodel = ct_convert(model, source=frontend, convert_to=backend) # assumes conversion preserve the i/o names proto = mlmodel.get_spec() inputs = [i.name.split(":")[0].strip() for i in model.inputs] outputs = [str(o.name) for o in proto.description.output] # get tf.keras model output as reference and run comparison keras_outputs = model(input_values) if not isinstance(keras_outputs, list): keras_outputs = [keras_outputs] ref = [output.numpy() for output in keras_outputs] expected_outputs = {n: v for n, v in zip(outputs, ref)} input_key_values = {n: v for n, v in zip(inputs, input_values)} if frontend_only or _macos_version() < (10, 13) \ or (mlmodel.is_package and _macos_version() < (12, 0)): return proto, mlmodel, input_key_values, None compare_backend(mlmodel, input_key_values, expected_outputs, use_cpu_only, atol=atol, rtol=rtol, also_compare_shapes=True, dtype=backend[1]) pred = None if not coremltoolsutils._has_custom_layer(proto): pred = run_core_ml_predict(mlmodel, input_key_values, use_cpu_only) else: print('Skipping model prediction as it has a custom nn layer!') return proto, mlmodel, input_key_values, pred
def run_compare_builder( build, input_placeholders, input_values, expected_output_types=None, expected_outputs=None, use_cpu_only=False, frontend_only=False, backend=("neuralnetwork", "fp32"), atol=1e-04, rtol=1e-05, inputs=None, also_compare_shapes=False, use_cpu_for_conversion=False, ): """ Inputs: - build: python function taking input of Vars and returning Var or list[Var]. Each input argument in build must match a key in input_values / input_placeholders. - input_placeholders: str -> placeholder. It may not be an empty dict as MLModel doesn't support function with no input. - input_values: str -> np.array or PIL.Image. Keys must match those in input_placeholders. - expected_output_types: list[(shape, builtin_type)] or (shape, builtin_type). None skips type inference validation. - expected_outputs: list[np.array] or np.array. Required iff frontend_only == False - frontend_only: True to test up to proto generation. - inputs: type of inputs (either None (defaults to tensor) or [ct.ImageType]) - use_cpu_for_conversion: bool Argument which is passed as is to the unified converter API. That is, "ct.convert(...., useCPUOnly=use_cpu_for_conversion)" It forces the model to be loaded on the CPU context, post conversion. Returns: The converted mlmodel """ if not isinstance(expected_output_types, list): expected_output_types = [expected_output_types] if expected_outputs is not None and not isinstance(expected_outputs, list): expected_outputs = [expected_outputs] prog = Program() with Function(input_placeholders) as ssa_func: output_vars = build(**ssa_func.inputs) if isinstance(output_vars, tuple): output_vars = list(output_vars) elif not isinstance(output_vars, list): output_vars = [output_vars] ssa_func.set_outputs(output_vars) prog.add_function("main", ssa_func) # get output names for output_vars output_names = [x.name for x in output_vars] # Validate type inference msg = ("Provided expected outputs types {} should match number of output" + " variables {}") assert_msg = msg.format(len(expected_output_types), len(output_vars)) assert len(output_vars) == len(expected_output_types), assert_msg for out_var, s in zip(output_vars, expected_output_types): if out_var.dtype != s[-1]: raise ValueError( "Output {} type: expect {}, got {}. Program:\n{}".format( out_var.name, s[-1].__type_info__(), out_var.dtype.__type_info__(), prog)) if UNK_VARIADIC in s[:-1]: msg = "Skip type checking for UNK_VARIADIC. Output shape: {} vs expected shape: {}" logging.debug(msg.format(out_var.shape, s[:-1])) continue expected_shape = s[:-1] msg = "Output {} shape: expect {}, got {}. Program:\n{}".format( out_var.name, expected_shape, out_var.shape, prog) # No more variadic here. if len(out_var.shape) != len(expected_shape): raise ValueError(msg) # replace UNK_SYM in out_var.shape. output_shape = [ 0 if es == UNK_SYM else os for os, es in zip(out_var.shape, expected_shape) ] expected_shape = [0 if es == UNK_SYM else es for es in expected_shape] # convert float etc to int. output_shape = [i if is_symbolic(i) else int(i) for i in output_shape] expected_shape = [ i if is_symbolic(i) else int(i) for i in expected_shape ] if output_shape != expected_shape: raise ValueError(msg) mlmodel = ct_convert(prog, source="milinternal", convert_to=backend, inputs=inputs, useCPUOnly=use_cpu_for_conversion) if frontend_only: return mlmodel if expected_outputs: assert len(output_vars) == len(expected_outputs), ( "Provided expected_outputs {}" " should match number of output" " variables {}".format(len(expected_outputs), len(output_vars))) expected_outputs = { name: val for name, val in zip(output_names, expected_outputs) } compare_backend(mlmodel=mlmodel, input_key_values=input_values, expected_outputs=expected_outputs, use_cpu_only=use_cpu_only, atol=atol, rtol=rtol, also_compare_shapes=also_compare_shapes, dtype=backend[1]) return mlmodel
def run_compare_tf2( model, input_dict, output_names, inputs_for_conversion=None, use_cpu_for_conversion=False, frontend_only=False, frontend="tensorflow", backend=("neuralnetwork", "fp32"), debug=False, atol=1e-04, rtol=1e-05, minimum_deployment_target=None, ): """ Parameters ---------- model: list of tf.ConcreteFunction List of TensorFlow 2.x concrete functions. input_dict: dict of (str, np.array) Dict of name and value pairs representing inputs. output_names: list of str List of output node names. inputs_for_conversion: list of coremltools.TensorType() or coremltools.ImageType() objects Defaults to None. It is passed as is to the "inputs" argument of the converter. use_cpu_for_conversion: bool If True, forces the model to be loaded with the CPU context. frontend_only: bool If True, skip the prediction call, only validate conversion. frontend: str Frontend to convert from. backend: str Backend to convert to. debug: bool If True, print verbose information and plot intermediate graphs. atol: float The absolute tolerance parameter. rtol: float The relative tolerance parameter. minimum_deployment_target: coremltools.target enumeration The spec version for the mlmodel """ inputs = [] if inputs_for_conversion is None: cf_inputs = [t for t in model[0].inputs if t.dtype != dtypes.resource] for t in cf_inputs: name = get_tf_node_names(t.name)[0] shape = [RangeDim() if s is None or s == -1 else s \ for s in list(t.get_shape())] inputs.append( TensorType(name=name, shape=shape, dtype=t.dtype.as_numpy_dtype)) else: inputs = inputs_for_conversion outputs = [] for t in output_names: name = get_tf_node_names(t)[0] outputs.append(name) # get TensorFlow 2.x output as reference and run comparison tf_input_values = [tf.constant(t) for t in input_dict.values()] tf_outputs = model[0](*tf_input_values) if isinstance(tf_outputs, (tuple, list)): ref = [t.numpy() for t in tf_outputs] else: ref = [tf_outputs.numpy()] expected_outputs = {n: v for n, v in zip(outputs, ref)} if use_cpu_for_conversion: compute_unit = ct.ComputeUnit.CPU_ONLY else: compute_unit = ct.ComputeUnit.ALL mlmodel = ct_convert( model, source=frontend, inputs=inputs, outputs=outputs, convert_to=backend, debug=debug, compute_units=compute_unit, minimum_deployment_target=minimum_deployment_target, ) for k, v in input_dict.items(): if isinstance(v, np.ndarray) and issubclass(v.dtype.type, np.integer): input_dict[k] = v.astype(np.float) # Core ML only accepts floats if frontend_only or _macos_version() < (10, 13) \ or (mlmodel.is_package and _macos_version() < (12, 0)): return mlmodel._spec, mlmodel, input_dict, None pred = None if not coremltoolsutils._has_custom_layer(mlmodel._spec): pred = compare_backend( mlmodel, input_dict, expected_outputs, atol=atol, rtol=rtol, also_compare_shapes=True, dtype=backend[1], ) else: print('Skipping model prediction as it has a custom nn layer!') return mlmodel._spec, mlmodel, input_dict, pred
def tf_graph_to_mlmodel( graph, feed_dict, output_nodes, frontend="tensorflow", backend=("neuralnetwork", "fp32"), use_cpu_for_conversion=False, inputs_for_conversion=None, minimum_deployment_target=None, ): """ Parameters ---------- graph: tf.Graph TensorFlow 1.x model in tf.Graph format. feed_dict: dict of {tf.placeholder -> np.array or python primitive) Dict of placeholder and value pairs representing inputs. output_nodes: tf.node or list[tf.node] List of names representing outputs. frontend: str Frontend to convert from. backend: str Backend to convert to. use_cpu_for_conversion: bool Argument which is passed as is to the unified converter API. It forces the model to be loaded on the CPU context, post conversion. inputs_for_conversion: list of coremltools.TensorType() or coremltools.ImageType() objects Defaults to None. It is passed as is to the "inputs" argument of the converter. minimum_deployment_target : coremltools.target enumeration It set the minimum_deployment_target argument in the coremltools.convert functino. ----------- Returns MLModel, Input Values, Output Names """ if isinstance(output_nodes, tuple): output_nodes = list(output_nodes) if not isinstance(output_nodes, list): output_nodes = [output_nodes] # Convert TF graph. input_names = get_tf_node_names(list(feed_dict.keys()), mode="inputs") output_names = get_tf_node_names(output_nodes, mode="outputs") input_values = { name: val for name, val in zip(input_names, feed_dict.values()) } if use_cpu_for_conversion: compute_unit = ct.ComputeUnit.CPU_ONLY else: compute_unit = ct.ComputeUnit.ALL inputs = inputs_for_conversion if inputs_for_conversion is not None else None mlmodel = ct_convert( graph, inputs=inputs, outputs=output_names, source=frontend, convert_to=backend, compute_units=compute_unit, minimum_deployment_target=minimum_deployment_target, ) return mlmodel, input_values, output_names, output_nodes