def instancenorm_4(x): """ Identify the pattern: y = x * [gamma * rsqrt(variance + eps)] + (beta - mean * [gamma * rsqrt(variance + eps)]) This pattern corresponds to, should be fused as instance_norm. All of the following must be satisty: 1) Input is rank 4 tensor 2) Reduce operates on spatial dimensions axes=[-2, -1], or axes=[-3, -2] (a channel first to channel last transpose would be inserted in such case) 3) Gamma and beta are both shape (C,) after squeeze, where C is number of channels |-----------| | V |------> mul_square1 -------------> sum1 -----> mul_mean1 | | | V x --> main_reduce --> mul_mean ==> mul_square --> sub_variance --> add_eps --> rsqrt | | | | | V | | mul_gamma | | | | | |----------------| | | | V | |--------------------------------------------+-------------> mul2 | V | |------------------------------------------------------------------> mul1 | | V | sub_beta --> add --> [...] | ^ |---------------------------| """ mul_square1 = mb.mul(x=x, y=x, name="mul_square1") main_reduce = mb.reduce_sum(x=x, axes=[2, 3], keep_dims=True, name="main_reduce") mul_mean = mb.mul(x=main_reduce, y=3.3333334e-05, name="mul_mean") # dummy value here mul_square = mb.mul(x=mul_mean, y=mul_mean, name="mul_square") sum1 = mb.reduce_sum(x=mul_square1, axes=[2, 3], keep_dims=True, name="sum1") mul_mean1 = mb.mul(x=sum1, y=8.333333e-06, name="mul_mean1") # dummy value here sub_variance = mb.sub(x=mul_mean1, y=mul_square, name="sub_variance") add_epsilon = mb.add(x=sub_variance, y=1e-5, name="add_epsilon") # epsilon rsqrt = mb.rsqrt(x=add_epsilon, name="rsqrt") mul_gamma = mb.mul(x=rsqrt, y=np.random.rand(1, 5, 1, 1), name="mul_gamma") mul1 = mb.mul(x=mul_gamma, y=x, name="mul1") mul2 = mb.mul(x=mul_mean, y=mul_gamma, name="mul2") sub_beta = mb.sub(x=np.random.rand(1, 5, 1, 1), y=mul2, name="sub_beta") add = mb.add(x=mul1, y=sub_beta, name="add") return add
def instancenorm_2(x): """ Identify the pattern: y = (x - mean) / pow(variance + epsilon) * gamma + beta This pattern corresponds to, should be fused as instance_norm. All of the following must be satisty: 1) Input is rank 4 tensor 2) Reduce operates on spatial dimensions axes=[-2, -1], or axes=[-3, -2] (a channel first to channel last transpose would be inserted in such case) 3) Gamma and beta are both shape (C,) after squeeze, where C is number of channels |----> sub0 ----------| const (0.5) | ^ | | | | V V x ---> main_reduce square --> mean1 --> add_eps ---> pow const_gamma const_beta | | | | | | V V V V |----> sub1 --------------------------------------> real_div --> mul_gamma --> add_beta --> ... """ main_reduce = mb.reduce_mean(x=x, axes=[2, 3], keep_dims=True, name="main_reduce") sub0 = mb.sub(x=x, y=main_reduce, name="sub0") sub1 = mb.sub(x=x, y=main_reduce, name="sub1") square = mb.square(x=sub0, name="square") mean1 = mb.reduce_mean(x=square, axes=[2, 3], keep_dims=True, name="mean1") add_epsilon = mb.add(x=mean1, y=1e-5, name="add_epsilon") pow = mb.pow(x=add_epsilon, y=0.5, name="pow") real_div = mb.real_div(x=sub1, y=pow, name="real_div") mul_gamma = mb.mul(x=np.random.rand(1, 5, 1, 1), y=real_div, name="mul_gamma") add_beta = mb.add(x=np.random.rand(1, 5, 1, 1), y=mul_gamma, name="add_beta") return add_beta
def instancenorm_3(x): """ Detect InstanceNorm pattern in TensorFlow-Addons. This pattern corresponds to, should be fused as instance_norm. All of the following must be satisty: 1) Input is rank 4 tensor 2) Reduce operates on spatial dimensions axes=[-2, -1], or axes=[-3, -2] (a channel first to channel last transpose would be inserted in such case) 3) Gamma and beta are absent. Default values for gamma and beta would be used. |-------------------------------------------------------| | | | V x --> main_reduce square --> mean1 --> add_eps --> rsqrt --> mul2 --> mul_sub | | ^ | | | V | | | | --> sub -----------| | | | V V |--------------------------------------------------> mul1 -------------> add --> ... """ main_reduce = mb.reduce_mean(x=x, axes=[2, 3], keep_dims=True, name="main_reduce") sub = mb.sub(x=x, y=main_reduce, name="sub") square = mb.square(x=sub, name="square") mean1 = mb.reduce_mean(x=square, axes=[2, 3], keep_dims=True, name="mean1") add_epsilon = mb.add(x=mean1, y=1e-5, name="add_epsilon") # epsilon rsqrt = mb.rsqrt(x=add_epsilon, name="rsqrt") mul1 = mb.mul(x=rsqrt, y=x, name="mul1") mul2 = mb.mul(x=main_reduce, y=rsqrt, name="mul2") mul_sub = mb.mul(x=mul2, y=-1, name="mul_sub") add = mb.add(x=mul1, y=mul_sub, name="add") return add
def instancenorm_or_layernorm(x): """ Identify the pattern: y = gamma * (x - mean) / sqrt(variance + epsilon) + beta y = x * [gamma * rsqrt(variance + eps)] + (beta - mean * [gamma * rsqrt(variance + eps)]) x --> main_reduce --> sub --> square --> reduce_mean_2 --> add(epsilon) --> rsqrt | | ^ | | | | V |----------------------- mul (gamma) | | | | | --------|--------- | | | | | | | V | |------------------------------------------------------------------> mul_3 | | | | V | |----------------------------------------------------------------> mul_2 | | V | sub (beta) --> add_2 --> [...] | ^ |------------------------------- This pattern corresponds to either layer_norm or instance_norm. It is instance_norm if all of the following are true: - input is rank 4 - axes of reduce_mean is [-2, -1] or [-3, -2] (when [-3, -2], a channel first to channel last transpose would be inserted) - gamma and beta are rank 1, after squeeze It is layer_norm if all of the following are true: - axes is either [-1] or [-1, -2] or [-1, -2, -3] and so on - rank of gamma and beta is equal to the length of the axes """ main_reduce = mb.reduce_mean(x=x, axes=[2, 3], keep_dims=True, name="main_reduce") sub = mb.sub(x=x, y=main_reduce, name="sub") square = mb.square(x=sub, name="square") reduce_mean_2 = mb.reduce_mean(x=square, axes=[2, 3], keep_dims=True, name="reduce_mean_2") add_epsilon = mb.add(x=reduce_mean_2, y=1e-5, name="add_epsilon") rsqrt = mb.rsqrt(x=add_epsilon, epsilon=1e-12, name="rsqrt") mul_gamma = mb.mul(x=rsqrt, y=np.random.rand(1, 5, 1, 1), name="mul_gamma") mul_2 = mb.mul(x=x, y=mul_gamma, name="mul_2") mul_3 = mb.mul(x=main_reduce, y=mul_gamma, name="mul_3") sub_beta = mb.sub(x=np.random.rand(1, 5, 1, 1), y=mul_3, name="sub_beta") add_2 = mb.add(x=sub_beta, y=mul_2, name="add_2") return add_2
def gelu_to_detect_2(x): pow = mb.pow(x=x, y=3.0, name="pow") mul_1 = mb.mul(x=0.044714998453855515, y=pow, name="mul_1") add = mb.add(x=x, y=mul_1, name="add") mul_2 = mb.mul(x=0.7978845834732056, y=add, name="mul_2") tanh = mb.tanh(x=mul_2, name="tanh") add_1 = mb.add(x=1.0, y=tanh, name="add_1") mul = mb.mul(x=0.5, y=x, name="mul") mul_3 = mb.mul(x=mul, y=add_1, name="mul_3") return mul_3
def gelu_to_detect_1(x): # MIL operation takes named inputs (instead of positional inputs). # Here `name` argument is MANDATORY. pow = mb.pow(x=x, y=3.0, name="pow") mul_1 = mb.mul(x=0.044714998453855515, y=pow, name="mul_1") add = mb.add(x=x, y=mul_1, name="add") mul_2 = mb.mul(x=0.7978845834732056, y=add, name="mul_2") tanh = mb.tanh(x=mul_2, name="tanh") add_1 = mb.add(x=1.0, y=tanh, name="add_1") mul = mb.mul(x=0.5, y=add_1, name="mul") mul_3 = mb.mul(x=mul, y=x, name="mul_3") return mul_3
def prog(x): x = mb.relu(x=x, name="relu") x = mb.transpose(x=x, perm=[0, 3, 1, 2], name="transpose") x = mb.reduce_mean(x=x, axes=[2, 3], keep_dims=False, name="reduce") x = mb.log(x=x, name="log") y = mb.add(x=1, y=2) return x
def conv_bias_pattern(x): if not conv_transpose: conv = mb.conv(x=x, weight=arbitrary_weight, pad_type="valid", name="conv") else: conv = mb.conv_transpose(x=x, weight=arbitrary_weight, pad_type="valid", name="conv") if transpose: transpose_layer = mb.transpose(x=conv, perm=arbitrary_perm, name="transpose") if sub: add_or_sub = mb.sub(x=transpose_layer if transpose else conv, y=arbitrary_scalar, name="add_or_sub") else: add_or_sub = mb.add(x=transpose_layer if transpose else conv, y=arbitrary_scalar, name="add_or_sub") return add_or_sub
def _prelu_pattern(x): # MIL operation takes named inputs (instead of positional inputs). # Here `name` argument is MANDATORY. neg = mb.mul(x=x, y=-1, name="neg") relu1 = mb.relu(x=neg, name="relu1") # use any constant here to match, rank and shape will be verified in "is_var_constraint_satisifed" method mul = mb.mul(x=relu1, y=np.random.rand(2, 2, 2, 2), name="alpha_mul") relu2 = mb.relu(x=x, name="relu2") out = mb.add(x=relu2, y=mul, name="out_op") return out
def pattern_add(x): """ Original: % 4 = linear(x= % 1, weight = % 2, bias = % 3) # %2 is a rank-2 const tensor (weight) # %3 is a rank-1 const tensor (bias) ... % 6 = add(x= % 4, y = % 5) # %5 is a const tensor with same shape as %3 Result: % 8 = linear(x= % 1, weight = % 2, bias = % 7) # where %7 is a new const tensor with value # %7 = %3 + %6 """ linear = mb.linear(x=x, weight=arbitrary_weight, bias=arbitrary_bias, name="linear") add_or_sub = mb.add(x=linear, y=arbitrary_bias, name="add_or_sub") return add_or_sub