def compute_expect(data_sum, in_bn, head_active, in_active, layout): out_dtype = data_sum.dtype relugrad = relu_grad_np(head_active, in_active).astype(out_dtype) inbn_cast = in_bn.astype(out_dtype) bn_beta_ad = bn_beta_grad_np(relugrad, layout) bn_gamma_ad = bn_gamma_grad_np(relugrad, inbn_cast, data_sum, layout) return [bn_gamma_ad, bn_beta_ad]
def compute_expect(inshp_data, outshp_data): out_shape = outshp_data.shape scale = out_shape[0] * out_shape[1] * out_shape[2] mul = np.multiply(inshp_data, inshp_data) mean1 = np.divide(mul, scale) add = np.add(outshp_data, outshp_data) addgrad = relu_grad_np(add, outshp_data).astype(inshp_data.dtype) mul1 = np.multiply(addgrad, scale) sub = np.subtract(mul1, inshp_data) outdata_cast = outshp_data.astype(inshp_data.dtype) mean2 = np.divide(inshp_data, scale) sub1 = np.subtract(outdata_cast, mean2) mul2 = np.multiply(sub1, inshp_data) div = np.divide(mul2, inshp_data) sub2 = np.subtract(sub, div) mul3 = np.multiply(mean1, sub2).astype(outshp_data.dtype) mul4 = np.multiply(inshp_data, inshp_data) mean3 = np.divide(mul4, scale) mean4 = np.divide(inshp_data, scale) sub3 = np.subtract(outshp_data.astype(inshp_data.dtype), mean4) mul5 = np.multiply(inshp_data, sub3) div1 = np.divide(mul5, inshp_data) sub4 = np.subtract(sub, div1) mul6 = np.multiply(mean3, sub4).astype(outshp_data.dtype) return [mul3, mul6]