def build(x): x = mb.cast(x=x, dtype=src_dtype) x = mb.square(x=x) x = mb.cast(x=x, dtype=dst_dtype) x = mb.sqrt(x=x) x = mb.cast(x=x, dtype="fp32") return x
def prog(x): x = mb.relu(x=x) x = mb.relu(x=x) x = mb.cast(x=x, dtype="fp32") x = mb.cast(x=x, dtype="fp16") x = mb.cast(x=x, dtype="fp32") x = mb.cast(x=x, dtype="fp16", name="original_output_name") return x
def prog(x): x = mb.cast(x=x, dtype="int32") x1 = mb.cast(x=x, dtype="fp32") x2 = mb.cast(x=x, dtype="fp16") x3 = mb.square(x=x1) x4 = mb.relu(x=x2) x5 = mb.log(x=x) return x3, x4, x5
def prog(x): x = mb.cast(x=x, dtype="fp16", name="castop") x = mb.cast(x=x, dtype="fp16", name="castop") x = mb.cast(x=x, dtype="int32", name="castop_2") x = mb.cast(x=x, dtype="int64", name="castop") x = mb.cast(x=x, dtype="fp32", name="castop_2") x = mb.square(x=x, name="square") return x
def prog(x): x = mb.cast(x=x, dtype="fp16") x1 = mb.square(x=x) x2 = mb.cast(x=x1, dtype="fp32") x3 = mb.log(x=x) x4 = mb.cast(x=x3, dtype="fp32") x5 = mb.relu(x=x) x6 = mb.cast(x=x5, dtype="fp32") x7 = mb.relu(x=x6) return x2, x4, x7
def prog(x): x = mb.cast(x=x, dtype="fp16") x1 = mb.square(x=x) x3 = mb.log(x=x) x5 = mb.relu(x=x) x6 = mb.cast(x=x5, dtype="fp32") x7 = mb.relu(x=x6) x8 = mb.relu(x=x) x1_t = mb.transpose(x=x1, perm=[1, 0]) x2 = mb.cast(x=x1_t, dtype="fp32") x3_t = mb.transpose(x=x3, perm=[1, 0]) x4 = mb.cast(x=x3_t, dtype="fp32") return x2, x4, x7, x8
def _adjust_main_outputs(func): new_outputs = [] for output_var in func.outputs: output_type = output_var.sym_type if (types.is_tensor(output_type) or types.is_scalar(output_type)) \ and output_var.dtype != types.fp32 \ and output_var.dtype != types.int32 \ and (func.opset_version < target.iOS16 or output_var.dtype != types.fp16): # since fp16 is a valid output type for coreml from ios16 spec onwards, no need to cast output_dtype_str = types.builtin_to_string(output_var.dtype) supported_dtypes = "{int32, fp32, fp64}" if func.opset_version < target.iOS16 else \ "{int32, fp16, fp32, fp64}" msg = "\nOutput '{}' is of dtype {}. The " +\ "CoreML runtime does not support outputs with this dtype " +\ "(supported dtypes are: {}). This output will be assigned a dtype " +\ "of fp32. A cast will be inserted at the end of the program to convert" +\ "the original output dtype to the dtype supported by the CoreML runtime.\n" if output_var.dtype == types.fp16: msg += "fp16 dtype output is supported if function.opset_version is chosen to be at least " \ "iOS16/macOS13.\n" logging.warning( msg.format( output_var.name, output_dtype_str, supported_dtypes, )) output_var_name = output_var.name output_var.set_name(output_var_name + "__pre__output__fp32__cast") # Convert the output to fp32, and add a cast. output_var = mb.cast(x=output_var, dtype="fp32") output_var.set_name(output_var_name) new_outputs.append(output_var) func.set_outputs(new_outputs)
def apply(self, prog): user_provided_output_types = prog.main_output_types main_func = prog.functions["main"] output_vars = main_func.outputs if user_provided_output_types is None or len( user_provided_output_types) == 0: return if len(output_vars) != len(user_provided_output_types): msg = "Number of outputs provided by the user, which is {}, " \ "does not match the number of outputs generated by the model, which is {}" raise ValueError( msg.format(len(user_provided_output_types), len(output_vars))) new_outputs = [] for i, output_type in enumerate(user_provided_output_types): required_output_dtype = output_type.dtype output_var = output_vars[i] if required_output_dtype is None or \ not (types.is_tensor(output_var.sym_type) or types.is_scalar(output_var.sym_type)) or \ required_output_dtype == output_var.dtype: # no need to update the output var's dtype in this case new_outputs.append(output_var) else: output_var_name = output_var.name output_var.set_name(output_var_name + "_type_" + types.builtin_to_string(output_var.dtype)) with main_func: output_var = mb.cast( x=output_var, dtype=types.builtin_to_string(required_output_dtype)) output_var.set_name(output_var_name) new_outputs.append(output_var) main_func.set_outputs(new_outputs)
def _try_to_transform(root_op, cached_vars): block = root_op.enclosing_block # Scenario: Redundant cast when source and destination dtype are same. if root_op.op_type == "cast" and root_op.x.is_tensor_or_scalar_of( dtype=root_op.dtype.val): block.replace_uses_of_var_after_op( anchor_op=root_op, old_var=root_op.outputs[0], new_var=root_op.x, ) block.remove_ops([root_op]) return True # Scenario: Consecutive casts list_of_ops_in_pattern = _match_linear_pattern( root_op, [ Node("cast"), Node("cast"), ], ) if not list_of_ops_in_pattern: return False cast_1, cast_2 = list_of_ops_in_pattern fused_output_var_name = cast_1.x.name + "_to_{}".format(cast_2.dtype.val) if cast_1.x.is_tensor_or_scalar_of(dtype=cast_2.dtype.val): # when consecutive casts cancel each other # Please checkout: test_linear_consecutive_cast_ops_cancellation in test_cast_optimization.py new_output_var = cast_1.x elif fused_output_var_name in cached_vars: # When the output of 1 cast goes into multiple casts of same configuration # Please checkout: test_consecutive_fusable_casts_on_all_branches in test_cast_optimization.py new_output_var = cached_vars[fused_output_var_name] else: new_output_var = mb.cast( x=cast_1.x, dtype=cast_2.dtype, name=fused_output_var_name, before_op=cast_2, ) cached_vars[fused_output_var_name] = new_output_var # It's important to use `cast_2.enclosing_block` over `block` since `cast_2` might be present in # a block nested under `block` cast_2.enclosing_block.replace_uses_of_var_after_op( anchor_op=cast_2, old_var=cast_2.outputs[0], new_var=new_output_var, ) # Remove just the last cast op and let dce eliminate the rest of the ops if needed, # The reason is that first cast op could be feeding into other non-cast ops. cast_2.enclosing_block.remove_ops([cast_2]) return True
def prog(x): x = mb.cast(x=x, dtype="fp16") x1 = mb.square(x=x) x1_t = mb.transpose(x=x1, perm=[1, 0]) def true_fn(): return mb.add(x=x1_t, y=1, name='x2') def false_fn(): return mb.add(x=x1_t, y=2, name='x2') is_one = mb.equal(x=mb.squeeze(x=x), y=1) pred = mb.squeeze(x=is_one) x3 = mb.cond(pred=pred, _true_fn=true_fn, _false_fn=false_fn) x4 = mb.add(x=x1_t, y=x3) x5 = mb.cast(x=x4, dtype="fp32") return x5
def __init__(self, **kwargs): # TODO(rdar://79925291): Allow int32 input to floor_div from coremltools.converters.mil.mil import Builder as mb from coremltools.converters.mil.mil import types accepted_types = [types.fp32, types.fp16] for input_name in ["x", "y"]: if kwargs[input_name].dtype not in accepted_types: kwargs[input_name] = mb.cast(x=kwargs[input_name], dtype="fp32") super(real_div, self).__init__(**kwargs)
def _promoted_var(op, var, promoted_dtype): if var.val is None: x = mb.cast( x=var, dtype=builtin_to_string(promoted_dtype), name=var.name + "_promoted", before_op=op ) else: const_value_after_cast = cast_op_class.get_cast_value(var, builtin_to_string(promoted_dtype)) x = mb.const(val=const_value_after_cast, name=var.name + "_promoted", before_op=op) return x
def _adjust_ops(block): len_block = len(block.operations) i = 0 while i < len_block: op = block.operations[i] # Classifier is a special exception to this rule. It can output 64 bit integer labels. # Classifier should be inserted after running this pass. if op.op_type == "classify": raise ValueError("ML Program backend pass adjust_to_supported_types does not support programs" +\ " that have already added a classify op.") for subblock in op.blocks: _adjust_block_inputs(subblock) _adjust_ops(subblock) for var in op.outputs: _adjust_var(var) # Cast ops have a param (dtype) that should match the output dtype. # If the output dtype or input dtype was previously adjusted, # the cast op must change or be removed in kind. if op.op_type == "cast": output_type_str = _types.builtin_to_string(op.outputs[0].dtype) if op.outputs[0].dtype == op.x.dtype: # The type of the input or output of this cast op was changed per the rules # defined in the top level comment for adjust_io_to_supported_types. # # That changed output type is the same type as the input to the cast # op. Therefore, regardless of whether the user created this cast or # not, it is now redundant (noop), and should be removed. # # The removal isn't covered by the main cast # optimization pass since that pass runs before this pass. block.replace_uses_of_var_after_op(anchor_op=op, old_var=op.outputs[0], new_var=op.x) block.remove_ops([op]) len_block = len(block.operations) i -= 1 elif output_type_str != op.dtype.val: # The type of the output of this cast op was changed per the rules # defined in the top level comment for adjust_io_to_supported_types. # # This cast is meaningful, and the "dtype" param now differs from the output # type. Replace the dtype cast with a new cast op with a matching dtype param. with block: new_cast_out = _mb.cast(x=op.x, dtype=output_type_str, before_op=op) block.replace_uses_of_var_after_op(anchor_op=op, old_var=op.outputs[0], new_var=new_cast_out) block.remove_ops([op]) len_block = len(block.operations) i = i + 1 return block
def test_builder_cast_eval(self): val = np.array([[-1.2, 2, -3.4], [4.5, -5, 6.7]], dtype=np.float32) expected_outputs = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.int32) v = mb.cast(x=val, dtype="int32") np.testing.assert_allclose(expected_outputs, v.val, atol=1e-04, rtol=1e-05)
def prog(x): def true_fn(): topk, _ = mb.topk(x=x, k=1, axis=-1, ascending=True) return mb.add(x=topk, y=1) def false_fn(): topk, _ = mb.topk(x=x, k=1, axis=-1, ascending=True) return mb.add(x=topk, y=2) shape = mb.shape(x=x) rank = mb.shape(x=shape) pred = mb.squeeze(x=rank) return mb.cond(pred=mb.cast(x=pred, dtype="bool"), _true_fn=true_fn, _false_fn=false_fn)
def prog(x): x = mb.cast(x=x, dtype="fp16") x = mb.cast(x=x, dtype="fp16") x = mb.cast(x=x, dtype="int32") x = mb.cast(x=x, dtype="int64") x = mb.cast(x=x, dtype="fp32") x = mb.cast(x=x, dtype="fp16") x = mb.square(x=x) return x
def _adjust_main_inputs(func): first_op = func.operations[0] if len(func.operations) > 0 else None for input_name, input_var in func.inputs.items(): if (types.is_tensor(input_var.sym_type) or types.is_scalar(input_var.sym_type)) \ and input_var.dtype != types.fp32 \ and input_var.dtype != types.int32: input_dtype_str = types.builtin_to_string(input_var.dtype) if types.is_int(input_var.dtype): # Replace non-int32 input type with int32. logging.warning("Input" + input_var.name + " is of dtype " + input_dtype_str +\ ". Only integer variables of bit width 32 are supported by the CoreML runtime. " +\ "This input will be assigned a dtype of int32. " +\ "No cast will be inserted; the previous dtype will be replaced.") _adjust_var_dtype_helper(input_var, types.int32) elif input_var.dtype == types.fp64: # Replace float64 input type with fp32. logging.warning("Input '" + input_var.name + "' is of dtype fp64. 64 bit float inputs are " +\ "not supported by ML program models. This input will be assigned a dtype " +\ "of fp32. No cast will be inserted; the previous dtype will be replaced.") _adjust_var_dtype_helper(input_var, types.fp32) elif input_var.dtype == types.fp16 \ and func.opset_version >= target.iOS16: pass # do nothing, since fp16 is a valid input type for CoreML else: # This is some other dtype. Change the type to fp32 and add a cast. # This is only a limitation of main--other functions do not represent CoreML model inputs # and do not have the same limitation on input types. supported_dtypes = "{int32, fp32, fp64}" if func.opset_version < target.iOS16 else \ "{int32, fp16, fp32, fp64}" msg = "\nInput '{}' is of dtype {}. The " +\ "CoreML runtime does not support inputs with this dtype " +\ "(supported dtypes are: {}). This input will be assigned a dtype of " +\ "fp32. A cast will be inserted at the beginning of the program to " +\ "convert the input to the originally defined dtype.\n" if input_var.dtype == types.fp16: msg += "fp16 dtype input is supported if the function.opset_version is chosen to be at least " \ "iOS16/macOS13.\n" logging.warning( msg.format(input_var.name, input_dtype_str, supported_dtypes)) casted_input_var = mb.cast(x=input_var, dtype=input_dtype_str, before_op=first_op) func.replace_uses_of_var_after_op( anchor_op=casted_input_var.op, old_var=input_var, new_var=casted_input_var) _adjust_var_dtype_helper(input_var, types.fp32)
def prog(x): x = mb.cast(x=x, dtype="int32") x1 = mb.cast(x=x, dtype="fp32") x2 = mb.cast(x=x, dtype="fp16") x3 = mb.cast(x=x, dtype="fp16") x4 = mb.cast(x=x, dtype="fp16") x5 = mb.cast(x=x, dtype="fp32") x6 = mb.square(x=x1) x7 = mb.square(x=x2) x8 = mb.relu(x=x3) x9 = mb.log(x=x4) x10 = mb.log(x=x5) return x6, x7, x8, x9, x10
def _adjust_main_outputs(func): new_outputs = [] for output_var in func.outputs: output_type = output_var.sym_type if (_types.is_tensor(output_type) or _types.is_scalar(output_type)) \ and output_var.dtype != _types.fp32 \ and output_var.dtype != _types.int32: output_dtype_str = _types.builtin_to_string(output_var.dtype) _warnings.warn("Output" + output_var.name + " is of dType " + output_dtype_str + ". The " +\ "CoreML runtime does not support outputs with this dType (only int32 and " +\ "fp32 are supported for outputs). This output will be assigned a dType " +\ "of fp32. A cast will be inserted at the end of the program to convert" +\ "the original output dType to the dType supported by the CoreML runtime.") output_var_name = output_var.name output_var.set_name(output_var_name + "__pre__output__fp32__cast") # Convert the output to fp32, and add a cast. with func: output_var = _mb.cast(x=output_var, dtype="fp32") output_var.set_name(output_var_name) new_outputs.append(output_var) func.set_outputs(new_outputs)
def _adjust_main_inputs(func): first_op = func.operations[0] if len(func.operations) > 0 else None for input_name, input_var in func.inputs.items(): if (_types.is_tensor(input_var.sym_type) or _types.is_scalar(input_var.sym_type)) \ and input_var.dtype != _types.fp32 \ and input_var.dtype != _types.int32: input_dtype_str = _types.builtin_to_string(input_var.dtype) if _types.is_int(input_var.dtype): # Replace non-int32 input type with int32. _warnings.warn("Input" + input_var.name + " is of dType " + input_dtype_str +\ ". Only integer variables of bit width 32 are supported by the CoreML runtime. " +\ "This input will be assigned a dType of int32. " +\ "No cast will be inserted; the previous dtype will be replaced.") _adjust_var_dtype_helper(input_var, _types.int32) elif input_var.dtype == _types.fp64: # Replace float64 input type with fp32. _warnings.warn("Input" + input_var.name + " is of dtype fp64. 64 bit float inputs are " +\ "not supported by ML program models. This input will be assigned a dType " +\ "of fp32. No cast will be inserted; the previous dtype will be replaced.") _adjust_var_dtype_helper(input_var, _types.fp32) else: # This is some other dType. Change the type to fp32 and add a cast. # This is only a limitation of main--other functions do not represent CoreML model inputs # and do not have the same limitation on input types. _warnings.warn("Input" + input_var.name + " is of dType " + input_dtype_str + ". The " +\ "CoreML runtime does not support inputs with this dType (only fp32 and " +\ "int32 inputs are supported). This input will be assigned a dType of " +\ "fp32. A cast will be inserted at the beginning of the program to " +\ "convert the input to the originally defined dType.") with func: casted_input_var = _mb.cast(x=input_var, dtype=input_dtype_str, before_op=first_op) func.replace_uses_of_var_after_op( anchor_op=casted_input_var.op, old_var=input_var, new_var=casted_input_var) _adjust_var_dtype_helper(input_var, _types.fp32)
def build(cond, a, b): if not types.is_bool(cond.dtype): cond = mb.cast(x=cond, dtype="bool") return [mb.select(cond=cond, a=a, b=b)]
def prog(x): x = mb.cast(x=x, dtype="fp16", name="castop") x = mb.cast(x=x, dtype="fp32", name="castop") x = mb.square(x=x, name="square_last") return x
def build(x): shape = mb.shape(x=x) return mb.cast(x=shape, dtype="int32")
def test_builder_to_backend_smoke(self, use_cpu_for_conversion, backend, mode): if mode == "abs": val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32) expected_outputs = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32) build = lambda x: mb.abs(x=x) elif mode == "acos": val = np.array([[-1, -0.5, 0], [0.4, 0.5, 0.8]], dtype=np.float32) expected_outputs = np.array( [ [3.14159265, 2.0943951, 1.57079633], [1.15927948, 1.04719755, 0.64350111], ], dtype=np.float32, ) build = lambda x: mb.acos(x=x) elif mode == "asin": val = np.array([[-1, -0.5, 0], [0.4, 0.5, 0.8]], dtype=np.float32) expected_outputs = np.array( [[-1.57079633, -0.52359878, 0.0], [0.41151685, 0.52359878, 0.92729522]], dtype=np.float32, ) build = lambda x: mb.asin(x=x) elif mode == "atan": val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32) expected_outputs = np.array( [ [-0.78539816, 1.10714872, -1.24904577], [1.32581766, -1.37340077, 1.40564765], ], dtype=np.float32, ) build = lambda x: mb.atan(x=x) elif mode == "atanh": val = np.array([[-0.8, -0.5, 0], [0.4, 0.5, 0.8]], dtype=np.float32) expected_outputs = np.array( [[-1.09861229, -0.54930614, 0.0], [0.42364893, 0.54930614, 1.09861229]], dtype=np.float32, ) build = lambda x: mb.atanh(x=x) elif mode == "cast": val = np.array([[-1.2, 2, -3.6], [4.5, -5, 6.7]], dtype=np.float32) expected_outputs = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.int32) build = lambda x: mb.cast(x=x, dtype="int32") elif mode == "ceil": val = np.array([[-1.2, 2, -3.4], [4.5, -5, 6.7]], dtype=np.float32) expected_outputs = np.array([[-1, 2, -3], [5, -5, 7]], dtype=np.float32) build = lambda x: mb.ceil(x=x) elif mode == "clip": val = np.array([[-1.2, 2, -3.4], [4.5, -5, 6.7]], dtype=np.float32) expected_outputs = np.array([[0, 2, 0], [4.5, 0, 5]], dtype=np.float32) build = lambda x: mb.clip(x=x, alpha=0.0, beta=5.0) elif mode == "cos": val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32) expected_outputs = np.array( [ [0.54030231, -0.41614684, -0.9899925], [-0.65364362, 0.28366219, 0.96017029], ], dtype=np.float32, ) build = lambda x: mb.cos(x=x) elif mode == "cosh": val = np.array([[-1, -2, -3], [1, 2, 3]], dtype=np.float32) expected_outputs = np.array( [ [1.54308063, 3.76219569, 10.067662], [1.54308063, 3.76219569, 10.067662], ], dtype=np.float32, ) build = lambda x: mb.cosh(x=x) elif mode == "erf": val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32) expected_outputs = np.array( [ [ -0.8427007929497148, 0.9953222650189527, -0.9999779095030014 ], [0.9999999845827421, -0.9999999999984626, 1.0], ], dtype=np.float32, ) build = lambda x: mb.erf(x=x) elif mode == "exp": val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32) expected_outputs = np.array( [ [0.36787944, 7.3890561, 0.04978707], [54.5981500, 0.0067379, 403.428793], ], dtype=np.float32, ) build = lambda x: mb.exp(x=x) elif mode == "exp2": val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32) expected_outputs = np.array([[0.5, 4.0, 0.125], [16, 0.03125, 64]], dtype=np.float32) build = lambda x: mb.exp2(x=x) elif mode == "floor": val = np.array([[-1.2, 2, -3.4], [4.5, -5, 6.7]], dtype=np.float32) expected_outputs = np.array([[-2, 2, -4], [4, -5, 6]], dtype=np.float32) build = lambda x: mb.floor(x=x) elif mode == "inverse": val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32) expected_outputs = np.array( [[-1.0, 0.5, -0.33333334], [0.25, -0.2, 0.16666667]], dtype=np.float32) build = lambda x: mb.inverse(x=x) elif mode == "log": val = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32) expected_outputs = np.array( [[0.0, 0.69314718, 1.09861229], [1.38629436, 1.60943791, 1.79175947]], dtype=np.float32, ) build = lambda x: mb.log(x=x) elif mode == "round": val = np.array([[-1.2, 2, -3.4], [4.6, -5, 6.7]], dtype=np.float32) expected_outputs = np.array([[-1, 2, -3], [5, -5, 7]], dtype=np.float32) build = lambda x: mb.round(x=x) elif mode == "rsqrt": val = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32) expected_outputs = np.array( [[1.0, 0.70710678, 0.57735027], [0.5, 0.4472136, 0.40824829]], dtype=np.float32, ) build = lambda x: mb.rsqrt(x=x) elif mode == "sign": val = np.array([[-1, 2, 0], [0, -5, 6]], dtype=np.float32) expected_outputs = np.array([[-1, 1, 0], [0, -1, 1]], dtype=np.float32) build = lambda x: mb.sign(x=x) elif mode == "sin": val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32) expected_outputs = np.array( [ [-0.84147098, 0.90929743, -0.14112001], [-0.7568025, 0.95892427, -0.2794155], ], dtype=np.float32, ) build = lambda x: mb.sin(x=x) elif mode == "sinh": val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32) expected_outputs = np.array( [[-1.1752, 3.62686, -10.017874], [27.289917, -74.20321, 201.71315]], dtype=np.float32, ) build = lambda x: mb.sinh(x=x) elif mode == "sqrt": val = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32) expected_outputs = np.array( [[1.0, 1.41421356, 1.73205081], [2.0, 2.23606798, 2.44948974]], dtype=np.float32, ) build = lambda x: mb.sqrt(x=x) elif mode == "square": val = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32) expected_outputs = np.array( [[1.0, 4.0, 9.0], [16.0, 25.0, 36.]], dtype=np.float32, ) build = lambda x: mb.square(x=x) elif mode == "tan": val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32) expected_outputs = np.array( [[-1.5574, -2.185, 0.1425], [1.15782, 3.3805, -0.291]], dtype=np.float32) build = lambda x: mb.tan(x=x) elif mode == "tanh": val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32) expected_outputs = np.array( [ [-0.7615942, 0.9640276, -0.9950548], [0.9993293, -0.9999092, 0.9999877], ], dtype=np.float32, ) build = lambda x: mb.tanh(x=x) elif mode == "threshold": val = np.array([[-1.2, 2, -3.4], [4.5, -5, 6.7]], dtype=np.float32) expected_outputs = np.array([[1.0, 2, 1.0], [4.5, 1.0, 6.7]], dtype=np.float32) build = lambda x: mb.threshold(x=x, alpha=1.0) input_placeholders = {"x": mb.placeholder(shape=val.shape)} input_values = {"x": val} expected_output_types = ((2, 3, types.int32) if mode == "cast" else (2, 3, types.fp32)) run_compare_builder( build, input_placeholders, input_values, expected_output_types, expected_outputs, use_cpu_only=use_cpu_for_conversion, frontend_only=False, backend=backend, )
def prog(x): x1 = mb.cast(x=x, dtype="fp16") x2 = mb.cast(x=x1, dtype="fp32") x3 = mb.transpose(x=x1, perm=[1, 0]) x4 = mb.transpose(x=x3, perm=[1, 0]) return x2, x4
def convert(self): _logging.info("Converting graph.") # This will hold the converted model. prog = self._prog # Construct placeholder for input to ssa function # This is where input renaming occurs ssa_func_inputs = OrderedDict() for index, (name, spec) in enumerate(self.graph.inputs.items()): placeholder = self._create_placeholder(spec) # Set ssa function input name to user defined name if provided. if spec.name is not None: name = spec.name self.inputs[index].name = name ssa_func_inputs[name] = placeholder prog.set_main_input_types(tuple(self.inputs)) # Initialize the SSA for conversion with Function(ssa_func_inputs, opset_version=self.opset_version) as ssa_func: # Map internal @self.graph.inputs to user specified @ssa_func_inputs # If @self.graph.inputs == @ssa_func_inputs this just adds the inputs # to the context. for internal_name, users_name in zip(self.graph.inputs.keys(), ssa_func_inputs.keys()): input_var = ssa_func.inputs[users_name] if (types.is_tensor(input_var.sym_type) or types.is_scalar(input_var.sym_type)) \ and (input_var.dtype == types.fp16 or input_var.dtype == types.fp64): # cast the input var to float32 # We need to do this because the type inference is very buggy when started from # float16/float64 typed inputs. Until that is fixed in the following radar # we cast all inputs of type float16/float64 to float32 as the first step. # These casts will later get removed, if compute_precision=Float16 is # provided, which will cause the FP16ComputePrecision pass to run. # TODO: remove this when this radar is fixed: rdar://93731970 input_var = mb.cast(x=input_var, dtype="fp32") self.context.add(input_var, torch_name=internal_name) self.convert_const() # Add the rest of the operations convert_nodes(self.context, self.graph) graph_outputs = [self.context[name] for name in self.graph.outputs] # An output can be None when it's a None constant, which happens # in Fairseq MT. for g in graph_outputs: if g is None: msg = "Droping output {} which is None" _logging.warning(msg.format(g)) graph_outputs = [g for g in graph_outputs if g is not None] # Output renaming occurs if self.outputs is not None: if len(self.outputs) != len(graph_outputs): msg = "Number of outputs provided, {}, do not match the number of outputs detected in the model, {}." raise ValueError( msg.format( len(self.outputs), len(graph_outputs), )) if self.output_names: for index, var in enumerate(graph_outputs): if self.output_names[index] is not None: output_rename = self.output_names[index] var.name = output_rename ssa_func.set_outputs(graph_outputs) prog.add_function("main", ssa_func) if self.outputs is not None: prog.set_main_output_types(self.outputs) self.torch_passes(prog) return prog
def transform_op(self, op): block = op.enclosing_block casted_inputs = {} inputs_modified = False for param, inputs in op.inputs.items(): # First loop, iterates over all the input parameters of an operation. if not self.is_valid_parameter(op, param): continue is_list_input = isinstance(inputs, (list, tuple)) if not is_list_input: inputs = [inputs] casted_inputs[param] = list(inputs[:]) for i, var in enumerate(inputs): # Second loop, iterates over all the vars of a python list corresponding to an input parameter. if not var.is_tensor_or_scalar_of(dtype="fp32"): continue inputs_modified = True with block: casted_var_name = var.name + "_to_fp16" if len(var._child_ops ) > 1 and casted_var_name in self.cache_vars and ( self.cache_vars[casted_var_name] in block._visible_vars_in_block()[1]): casted_inputs[param][i] = self.cache_vars[ casted_var_name] else: x = mb.cast(x=var, dtype="fp16", name=casted_var_name, before_op=op) self._check_underflow_to_zero(x, var) casted_inputs[param][i] = x if len(var._child_ops) > 1: self.cache_vars[casted_var_name] = casted_inputs[ param][i] if not is_list_input: casted_inputs[param] = casted_inputs[param][0] if inputs_modified: casted_inputs.update( {k: v for k, v in op.inputs.items() if k not in casted_inputs}) casted_inputs["name"] = op.name + "_cast" casted_inputs["before_op"] = op with block: quant_output = getattr(mb, op.op_type)(**casted_inputs) if not isinstance(quant_output, (list, tuple)): quant_output = [quant_output] for old_output_var, new_output_var in zip(op.outputs, quant_output): if old_output_var.is_tensor_or_scalar_of(dtype="fp32") and ( not new_output_var.is_tensor_or_scalar_of( dtype="fp32")): with block: x = mb.cast( x=new_output_var, dtype="fp32", name=new_output_var.name + "_to_fp32", before_op=op, ) op.enclosing_block.replace_uses_of_var_after_op( anchor_op=op, old_var=old_output_var, new_var=x) else: op.enclosing_block.replace_uses_of_var_after_op( anchor_op=op, old_var=old_output_var, new_var=new_output_var) block.remove_ops([op])
def prog(x): y = mb.cast(x=x, dtype="int64") return y
def build(x): y = mb.const(val=constant) x = mb.cast(x=x, dtype='int32') z = mb.add(x=x, y=y) return mb.cast(x=z, dtype='fp32')
def prog(x): x = mb.cast(x=x, dtype="fp32") x = mb.square(x=x) x = mb.cast(x=x, dtype="fp32") return x