def conv_transpose_scale_div(x): conv = mb.conv_transpose(x=x, weight=arbitrary_weight, pad_type="valid", name="conv") real_div = mb.real_div(x=conv, y=arbitrary_scalar, name="scale") return real_div
def instancenorm_2(x): """ Identify the pattern: y = (x - mean) / pow(variance + epsilon) * gamma + beta This pattern corresponds to, should be fused as instance_norm. All of the following must be satisty: 1) Input is rank 4 tensor 2) Reduce operates on spatial dimensions axes=[-2, -1], or axes=[-3, -2] (a channel first to channel last transpose would be inserted in such case) 3) Gamma and beta are both shape (C,) after squeeze, where C is number of channels |----> sub0 ----------| const (0.5) | ^ | | | | V V x ---> main_reduce square --> mean1 --> add_eps ---> pow const_gamma const_beta | | | | | | V V V V |----> sub1 --------------------------------------> real_div --> mul_gamma --> add_beta --> ... """ main_reduce = mb.reduce_mean(x=x, axes=[2, 3], keep_dims=True, name="main_reduce") sub0 = mb.sub(x=x, y=main_reduce, name="sub0") sub1 = mb.sub(x=x, y=main_reduce, name="sub1") square = mb.square(x=sub0, name="square") mean1 = mb.reduce_mean(x=square, axes=[2, 3], keep_dims=True, name="mean1") add_epsilon = mb.add(x=mean1, y=1e-5, name="add_epsilon") pow = mb.pow(x=add_epsilon, y=0.5, name="pow") real_div = mb.real_div(x=sub1, y=pow, name="real_div") mul_gamma = mb.mul(x=np.random.rand(1, 5, 1, 1), y=real_div, name="mul_gamma") add_beta = mb.add(x=np.random.rand(1, 5, 1, 1), y=mul_gamma, name="add_beta") return add_beta
def conv_bias_pattern(x): if not conv_transpose: conv = mb.conv(x=x, weight=arbitrary_weight, pad_type="valid", name="conv") else: conv = mb.conv_transpose(x=x, weight=arbitrary_weight, pad_type="valid", name="conv") if transpose: transpose_layer = mb.transpose(x=conv, perm=arbitrary_perm, name="transpose") if sub: add_or_sub = mb.sub(x=transpose_layer if transpose else conv, y=arbitrary_scalar, name="add_or_sub") else: add_or_sub = mb.add(x=transpose_layer if transpose else conv, y=arbitrary_scalar, name="add_or_sub") return add_or_sub
def instancenorm_3(x): """ Detect InstanceNorm pattern in TensorFlow-Addons. This pattern corresponds to, should be fused as instance_norm. All of the following must be satisty: 1) Input is rank 4 tensor 2) Reduce operates on spatial dimensions axes=[-2, -1], or axes=[-3, -2] (a channel first to channel last transpose would be inserted in such case) 3) Gamma and beta are absent. Default values for gamma and beta would be used. |-------------------------------------------------------| | | | V x --> main_reduce square --> mean1 --> add_eps --> rsqrt --> mul2 --> mul_sub | | ^ | | | V | | | | --> sub -----------| | | | V V |--------------------------------------------------> mul1 -------------> add --> ... """ main_reduce = mb.reduce_mean(x=x, axes=[2, 3], keep_dims=True, name="main_reduce") sub = mb.sub(x=x, y=main_reduce, name="sub") square = mb.square(x=sub, name="square") mean1 = mb.reduce_mean(x=square, axes=[2, 3], keep_dims=True, name="mean1") add_epsilon = mb.add(x=mean1, y=1e-5, name="add_epsilon") # epsilon rsqrt = mb.rsqrt(x=add_epsilon, name="rsqrt") mul1 = mb.mul(x=rsqrt, y=x, name="mul1") mul2 = mb.mul(x=main_reduce, y=rsqrt, name="mul2") mul_sub = mb.mul(x=mul2, y=-1, name="mul_sub") add = mb.add(x=mul1, y=mul_sub, name="add") return add
def transform_transpose_pattern(pattern): is_deconv = pattern.conv.op_type == "conv_transpose" # get the bias bias = pattern.add_or_sub.x.val if pattern.add_or_sub.x.val is not None else pattern.add_or_sub.y.val is_first_input = pattern.add_or_sub.y.val is not None is_sub = pattern.add_or_sub.op_type == "sub" # get the conv bias/weight conv_shape = pattern.conv.outputs[0].shape Cout = conv_shape[1] conv_weight = pattern.conv.weight.val conv_weight_type = conv_weight.dtype conv_bias = np.zeros(Cout).astype(conv_weight_type) if pattern.conv.bias is None else pattern.conv.bias.val bias = _bias_mod_and_validity(bias, Cout, pattern) # compute the new bias if is_sub: if is_first_input: bias = -bias else: conv_bias = -conv_bias new_bias = conv_bias + bias # compute the new weight if is_sub and not is_first_input: new_weight = -conv_weight else: new_weight = conv_weight # create a new conv op with the new weight, bias value, copying rest of the attributes conv_kargs = {"weight": new_weight, "bias": new_bias, "before_op": pattern.conv} for k, v in pattern.conv.inputs.items(): if k in ["weight", "bias"]: continue conv_kargs[k] = v if is_deconv: x = mb.conv_transpose(**conv_kargs) else: x = mb.conv(**conv_kargs) # create a new transpose op out_name = pattern.add_or_sub.outputs[0].name tranpose_kargs = {"x": x, "name": out_name, "before_op": pattern.transpose} for k, v in pattern.transpose.inputs.items(): if k == "x": continue tranpose_kargs[k] = v x = mb.transpose(**tranpose_kargs) pattern.add_or_sub.enclosing_block.replace_uses_of_var_after_op( anchor_op=pattern.add_or_sub, old_var=pattern.add_or_sub.outputs[0], new_var=x ) # Remove all the ops at once pattern.block.remove_ops(pattern.op_list())
def prog(x): x = mb.relu(x=x, name="relu") x = mb.transpose(x=x, perm=[0, 3, 1, 2], name="transpose") x = mb.reduce_mean(x=x, axes=[2, 3], keep_dims=False, name="reduce") x = mb.log(x=x, name="log") y = mb.add(x=1, y=2) return x
def conv_transpose_scale_mul(x): conv = mb.conv_transpose(x=x, weight=arbitrary_weight, pad_type="valid", name="conv") mul = mb.mul(x=conv, y=arbitrary_scalar, name="scale") return mul
def _prelu_pattern(x): # MIL operation takes named inputs (instead of positional inputs). # Here `name` argument is MANDATORY. neg = mb.mul(x=x, y=-1, name="neg") relu1 = mb.relu(x=neg, name="relu1") # use any constant here to match, rank and shape will be verified in "is_var_constraint_satisifed" method mul = mb.mul(x=relu1, y=np.random.rand(2, 2, 2, 2), name="alpha_mul") relu2 = mb.relu(x=x, name="relu2") out = mb.add(x=relu2, y=mul, name="out_op") return out
def conv_transpose_batchorm(x): conv = mb.conv_transpose(x=x, weight=arbitrary_weight, pad_type="valid", name="conv") batch_norm = mb.batch_norm(x=conv, mean=arbitrary_mean, variance=arbitrary_variance, name="batchnorm") return batch_norm
def transform_pattern(pattern): """ Insert instance_norm / layer_norm and delete all ops. :param pattern: A pattern object that contains all relevant information. """ out_name = pattern.final_op.outputs[0].name axes = pattern.main_reduce.axes.val if pattern.requires_rank4_transpose: x = mb.transpose( x=pattern.main_reduce.x, perm=[0, 3, 1, 2], name=out_name + "_transpose_nhwc_nchw", before_op=pattern.final_op, ) if pattern.is_instancenorm: x = mb.instance_norm( x=x if pattern.requires_rank4_transpose else pattern.main_reduce.x, gamma=np.squeeze(pattern.gamma_var.val), beta=np.squeeze(pattern.beta_var.val), epsilon=pattern.epsilon_var, name=out_name + "_instancenorm" if pattern.requires_rank4_transpose else out_name, before_op=pattern.final_op, ) else: # is_layernorm x = mb.layer_norm( x=x if pattern.requires_rank4_transpose else pattern.main_reduce.x, axes=axes, gamma=pattern.gamma_var, beta=pattern.beta_var, epsilon=pattern.epsilon_var, name=out_name + "_layernorm" if pattern.requires_rank4_transpose else out_name, before_op=pattern.final_op, ) if pattern.requires_rank4_transpose: x = mb.transpose( x=x, perm=[0, 2, 3, 1], name=out_name + "_transpose_nchw_nhwc", before_op=pattern.final_op, ) pattern.final_op.enclosing_block.replace_uses_of_var_after_op( anchor_op=pattern.final_op, old_var=pattern.final_op.outputs[0], new_var=x) # Remove all the ops at once pattern.block.remove_ops(pattern.op_list())
def test_tutorial(): from coremltools.converters.mil import Builder as mb @mb.program( input_specs=[mb.TensorSpec(shape=(1, 100, 100, 3)),] ) def prog(x): x = mb.relu(x=x, name="relu") x = mb.transpose(x=x, perm=[0, 3, 1, 2], name="transpose") x = mb.reduce_mean(x=x, axes=[2, 3], keep_dims=False, name="reduce") x = mb.log(x=x, name="log") y = mb.add(x=1, y=2) return x print("prog:\n", prog) # Convert and verify from coremltools.converters.mil.converter import _convert from coremltools import models proto = _convert(prog, convert_from="mil") model = models.MLModel(proto) # running predict() is only supported on macOS if ct.utils._is_macos(): prediction = model.predict( {"x": np.random.rand(1, 100, 100, 3).astype(np.float32),} ) assert len(prediction) == 1
def pattern_to_detect(conv_transpose, transpose, sub): """ Wrapper to create 8 patterns to detect for conciseness. """ @mb.program(input_specs=[mb.TensorSpec(shape=arbitrary_input)]) def conv_bias_pattern(x): if not conv_transpose: conv = mb.conv(x=x, weight=arbitrary_weight, pad_type="valid", name="conv") else: conv = mb.conv_transpose(x=x, weight=arbitrary_weight, pad_type="valid", name="conv") if transpose: transpose_layer = mb.transpose(x=conv, perm=arbitrary_perm, name="transpose") if sub: add_or_sub = mb.sub(x=transpose_layer if transpose else conv, y=arbitrary_scalar, name="add_or_sub") else: add_or_sub = mb.add(x=transpose_layer if transpose else conv, y=arbitrary_scalar, name="add_or_sub") return add_or_sub return conv_bias_pattern
def get_gelu_pattern1(): """ y = x * (0.5 * (tanh(((.0447)x^3 + x ) * sqrt(2/pi)) + 1)) [...] -----> pow (3) ----> mul (.044715) ---> add -----> mul (sqrt(2/pi)) ---> tanh ----> add (1) ----> mul (0.5) -----> mul ---> [...] | ^ ^ | | | |------------------------------------------------------------------------------------------------------------------------ """ @mb.program(input_specs=[mb.TensorSpec(shape=([get_new_symbol(), get_new_symbol(), get_new_symbol()])), ]) def gelu_to_detect_1(x): # MIL operation takes named inputs (instead of positional inputs). # Here `name` argument is MANDATORY. pow = mb.pow(x=x, y=3.0, name="pow") mul_1 = mb.mul(x=0.044714998453855515, y=pow, name="mul_1") add = mb.add(x=x, y=mul_1, name="add") mul_2 = mb.mul(x=0.7978845834732056, y=add, name="mul_2") tanh = mb.tanh(x=mul_2, name="tanh") add_1 = mb.add(x=1.0, y=tanh, name="add_1") mul = mb.mul(x=0.5, y=add_1, name="mul") mul_3 = mb.mul(x=mul, y=x, name="mul_3") return mul_3 return gelu_to_detect_1
def get_gelu_pattern2(): """ y = (0.5 * x) * (tanh(((.0447)x^3 + x ) * sqrt(2/pi)) + 1) --------------------------------------------------------------------------------------------------------- ^ | | V [...] -----> mul(0.5) pow (3) ----> mul (.044715) ---> add -----> mul (sqrt(2/pi)) ---> tanh ----> add (1) -----> mul ---> [...] | ^ ^ | | | |------------------------------------------------------------ """ @mb.program(input_specs=[mb.TensorSpec(shape=([get_new_symbol(), get_new_symbol(), get_new_symbol()])), ]) def gelu_to_detect_2(x): pow = mb.pow(x=x, y=3.0, name="pow") mul_1 = mb.mul(x=0.044714998453855515, y=pow, name="mul_1") add = mb.add(x=x, y=mul_1, name="add") mul_2 = mb.mul(x=0.7978845834732056, y=add, name="mul_2") tanh = mb.tanh(x=mul_2, name="tanh") add_1 = mb.add(x=1.0, y=tanh, name="add_1") mul = mb.mul(x=0.5, y=x, name="mul") mul_3 = mb.mul(x=mul, y=add_1, name="mul_3") return mul_3 return gelu_to_detect_2
def test_rename_feature_mlprogram(self): @mb.program(input_specs=[mb.TensorSpec(shape=(3, ))]) def linear_prog(input): W = np.ones((10, 3), dtype=np.float) out = mb.linear(x=input, weight=W, name="output") return out model = coremltools.convert(linear_prog, convert_to='mlprogram') spec = model.get_spec() input_name = spec.description.input[0].name output_name = spec.description.output[0].name # rename input rename_feature(spec, input_name, "new_input_name") self.assertEqual(spec.description.input[0].name, "new_input_name") model = coremltools.models.MLModel(spec, weights_dir=model.weights_dir) out = model.predict({"new_input_name": np.array([1.0, 2.0, 3.0])})[output_name] self.assertEqual(out.shape, (10, )) self.assertEqual(out[0], 6.0) # rename output rename_feature(spec, output_name, "new_output_name") self.assertEqual(spec.description.output[0].name, "new_output_name") model = coremltools.models.MLModel(spec, weights_dir=model.weights_dir) out = model.predict({"new_input_name": np.array([1.0, 2.0, 3.0])})["new_output_name"] self.assertEqual(out.shape, (10, )) self.assertEqual(out[1], 6.0)
def transform_pattern(pattern): # remove all the ops, and replace with a prelu op + transpose op perm = pattern.transpose.perm.val out_var = pattern.out_op.outputs[0] if pattern.alpha_mul.x.val is not None: alpha = pattern.alpha_mul.x.val else: alpha = pattern.alpha_mul.y.val alpha_vector = -1 * alpha.flatten() x = mb.prelu(x=pattern.root_var, alpha=alpha_vector, before_op=pattern.out_op) x = mb.transpose(x=x, perm=perm, name=out_var.name, before_op=pattern.out_op) pattern.out_op.enclosing_block.replace_uses_of_var_after_op( anchor_op=pattern.out_op, old_var=out_var, new_var=x ) # Remove all the ops at once pattern.block.remove_ops(pattern.op_list())
def gelu_to_detect_2(x): pow = mb.pow(x=x, y=3.0, name="pow") mul_1 = mb.mul(x=0.044714998453855515, y=pow, name="mul_1") add = mb.add(x=x, y=mul_1, name="add") mul_2 = mb.mul(x=0.7978845834732056, y=add, name="mul_2") tanh = mb.tanh(x=mul_2, name="tanh") add_1 = mb.add(x=1.0, y=tanh, name="add_1") mul = mb.mul(x=0.5, y=x, name="mul") mul_3 = mb.mul(x=mul, y=add_1, name="mul_3") return mul_3
def pattern_add(x): """ Original: % 4 = linear(x= % 1, weight = % 2, bias = % 3) # %2 is a rank-2 const tensor (weight) # %3 is a rank-1 const tensor (bias) ... % 6 = add(x= % 4, y = % 5) # %5 is a const tensor with same shape as %3 Result: % 8 = linear(x= % 1, weight = % 2, bias = % 7) # where %7 is a new const tensor with value # %7 = %3 + %6 """ linear = mb.linear(x=x, weight=arbitrary_weight, bias=arbitrary_bias, name="linear") add_or_sub = mb.add(x=linear, y=arbitrary_bias, name="add_or_sub") return add_or_sub
def setup_class(self): # define an mlprogram, which has weights @mb.program(input_specs=[mb.TensorSpec(shape=(4, 5000))]) def linear_prog(input): W = mb.const(val=np.random.rand(100, 5000), name="const_W") out = mb.linear(x=input, weight=W, name="output") return out # define another mlprogram, which does not have weights @mb.program(input_specs=[mb.TensorSpec(shape=(4, 5, 2))]) def relu_prog(input): out = mb.relu(x=input, name="output") return out # convert and save model on disk self.mlmodel = coremltools.convert(linear_prog, convert_to="mlprogram") self.mlpackage_path = tempfile.mkdtemp(suffix=utils._MLPACKAGE_EXTENSION) self.mlmodel.save(self.mlpackage_path) self.mlmodel_no_weights = coremltools.convert(relu_prog, convert_to="mlprogram")
def gelu_to_detect_1(x): # MIL operation takes named inputs (instead of positional inputs). # Here `name` argument is MANDATORY. pow = mb.pow(x=x, y=3.0, name="pow") mul_1 = mb.mul(x=0.044714998453855515, y=pow, name="mul_1") add = mb.add(x=x, y=mul_1, name="add") mul_2 = mb.mul(x=0.7978845834732056, y=add, name="mul_2") tanh = mb.tanh(x=mul_2, name="tanh") add_1 = mb.add(x=1.0, y=tanh, name="add_1") mul = mb.mul(x=0.5, y=add_1, name="mul") mul_3 = mb.mul(x=mul, y=x, name="mul_3") return mul_3
def pattern_sub(x): """ Original: %4 = linear(x=%1, weight=%2, bias=%3) # %2 is a rank-2 const tensor (weight) # %3 is a rank-1 const tensor (bias) ... %6 = sub(x=%5, y=%4) # %5 is a const tensor with a broacasable shape with %3. i.e. if %3 has shape (Dout), %5 could be (1, Dout). Result: %9 = linear(x=%1, weight=%7, bias=%8) # where %7 is a new const tensor with value %7 = -%2 # %8 = %5 - %3 """ linear = mb.linear(x=x, weight=arbitrary_weight, bias=arbitrary_bias, name="linear") add_or_sub = mb.sub(x=linear, y=arbitrary_bias, name="add_or_sub") return add_or_sub
def instancenorm_3_constraints(pattern): epsilon_var = _get_var(pattern.add_epsilon, pattern.mean1) gamma_var = mb.const( val=np.ones(shape=(1, pattern.root_var.shape[1], 1, 1)), name="gamma_var" ) beta_var = mb.const( val=np.zeros(shape=(1, pattern.root_var.shape[1], 1, 1)), name="_fuse_layernorm_or_instancenorm_beta", ) passed = True passed = passed and _check_reduce_op(pattern.main_reduce) passed = passed and pattern.sub.x == pattern.root_var and pattern.sub.y == pattern.main_reduce.outputs[0] passed = passed and _check_reduce_op(pattern.mean1) passed = passed and pattern.mul_sub.y.val is not None and pattern.mul_sub.y.val == -1 passed = passed and _general_constraints(pattern, epsilon_var, gamma_var, beta_var) passed = passed and _instancenorm_constraints(pattern) return passed
def transform_pattern(pattern): # remove all the ops, and replace with a gelu op out_name = pattern.mul_3.outputs[0].name x = mb.gelu(x=pattern.root_var, mode="TANH_APPROXIMATION", name=out_name, before_op=pattern.mul) pattern.mul_3.enclosing_block.replace_uses_of_var_after_op( anchor_op=pattern.mul_3, old_var=pattern.mul_3.outputs[0], new_var=x ) # Remove all the ops at once pattern.block.remove_ops(pattern.op_list())
def get_prelu_pattern(): """ y = a * relu(-1 * x) + relu(x) when x is rank 4, and "a" is of shape (1, C, 1, 1) or (C, 1, 1), this is equivalent to prelu with alpha = -a.flatten(), """ @mb.program(input_specs=[mb.TensorSpec(shape=([get_new_symbol(), get_new_symbol(), get_new_symbol(), get_new_symbol()])), ]) def prelu_pattern(x): return _prelu_pattern(x) return prelu_pattern
def get_prelu_pattern(): """ x1 = transpose(perm=(0,2,3,1))(x) y = a * relu(-1 * x1) + relu(x1) when x is rank 4, and "a" is of shape (C,) or (1, C) or (1,1,C) or (1,1,1,C), this is equivalent to prelu with alpha = -a.flatten(), followed by a transpose with perm (0,2,3,1) """ @mb.program(input_specs=[mb.TensorSpec(shape=([get_new_symbol(), get_new_symbol(), get_new_symbol(), get_new_symbol()])), ]) def prelu_pattern(x): # perm value can be anything, it will be checked in "is_var_constraint_satisifed" method x = mb.transpose(x=x, perm=[0,1,2,3], name="transpose") return _prelu_pattern(x) return prelu_pattern
def transform_pattern(pattern): is_sub, is_first_input = _get_is_sub_and_is_first_input(pattern) linear_bias, bias, Dout = _get_linear_bias_bias_Dout( pattern, is_first_input) bias = np.reshape(bias, (Dout, )) if is_sub and is_first_input: bias = -bias if is_sub and not is_first_input: linear_bias = -linear_bias new_bias = linear_bias + bias # compute the new weight if is_sub and not is_first_input: new_weight = -pattern.linear.weight.val else: new_weight = pattern.linear.weight.val # create a new linear op with the new weight, bias value, copying rest of the attributes out_name = pattern.add_or_sub.outputs[0].name linear_kargs = { "weight": new_weight, "bias": new_bias, "name": out_name, "before_op": pattern.linear } linear_kargs.update({ k: v for k, v in pattern.linear.inputs.items() if k not in ["weight", "bias"] }) x = mb.linear(**linear_kargs) pattern.add_or_sub.enclosing_block.replace_uses_of_var_after_op( anchor_op=pattern.add_or_sub, old_var=pattern.add_or_sub.outputs[0], new_var=x) # Remove all the ops at once pattern.block.remove_ops(pattern.op_list())
def silu(context, node): inputs = _get_inputs(context, node, expected=1) x = inputs[0] y = mb.sigmoid(x=x) z = mb.mul(x=x, y=y, name=node.name) context.add(z)
def linear_prog(input): W = mb.const(val=np.random.rand(100, 5000), name="const_W") out = mb.linear(x=input, weight=W, name="output") return out
def prelu_pattern(x): # perm value can be anything, it will be checked in "is_var_constraint_satisifed" method x = mb.transpose(x=x, perm=[0,1,2,3], name="transpose") return _prelu_pattern(x)
def relu_prog(input): out = mb.relu(x=input, name="output") return out