def testBatchNormalizeLayerFusedFp16(self): with ops.device("/device:IPU:0"): with variable_scope.variable_scope("", use_resource=True): x = array_ops.placeholder(np.float16, [4, 64, 64, 4], name="a") normed = layers_norm.batch_normalization(x, fused=True) with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() tu.configure_ipu_system() with tu.ipu_session() as sess: sess.run(report) sess.run(variables.global_variables_initializer()) result = sess.run(normed, {x: np.zeros([4, 64, 64, 4])}) self.assertAllClose(result, np.zeros([4, 64, 64, 4])) rep = sess.run(report) s = tu.extract_all_strings_from_event_trace(rep) cs = tu.get_compute_sets_from_report(s) bl = ['*convert*/Cast*'] self.assertTrue(tu.check_compute_sets_not_in_blacklist(cs, bl))
def testBatchNormalizeFused(self): x = array_ops.placeholder(np.float32, [4, 64, 64, 4], name="a") with ops.device("/device:IPU:0"): with variable_scope.variable_scope("", use_resource=True): beta = variable_scope.get_variable( "x", dtype=np.float32, shape=[4], initializer=init_ops.constant_initializer(0.0)) gamma = variable_scope.get_variable( "y", dtype=np.float32, shape=[4], initializer=init_ops.constant_initializer(1.0)) b_mean, b_var = nn.moments(x, [0, 1, 2], name='moments') normed = nn.fused_batch_norm(x, gamma, beta, b_mean, b_var, is_training=False) with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() tu.configure_ipu_system() with tu.ipu_session() as sess: sess.run(report) sess.run(variables.global_variables_initializer()) result, _, _ = sess.run(normed, {x: np.zeros([4, 64, 64, 4])}) self.assertAllClose(result, np.zeros([4, 64, 64, 4])) rep = sess.run(report) s = tu.extract_all_strings_from_event_trace(rep) cs = tu.get_compute_sets_from_report(s) bl = ['*convert*/Cast*'] self.assertTrue(tu.check_compute_sets_not_in_blacklist(cs, bl))