def testWhenSideEffect(self): with self.session() as sess: def f_1(x): rand_num = 10 * random_ops.random_uniform(shape=[2, 2], minval=1, maxval=9, dtype=dtypes.int32, name="namef1") return rand_num * x def f_cond(x1, z): cond_1 = control_flow_ops.cond(math_ops.less(z[0], z[1]), lambda: f_1(x1), lambda: f_1(x1)) return cond_1 with ops.device('cpu'): x1 = array_ops.placeholder(dtypes.int32, [2, 2]) z = array_ops.placeholder(dtypes.int32, [2]) with ipu.scopes.ipu_scope("/device:IPU:0"): r1 = ipu.ipu_compiler.compile(f_cond, inputs=[x1, z]) i_x1 = np.full((2, 2), 10) i_z = np.full((2), 8) report = ReportJSON(self, sess) sess.run(r1, {x1: i_x1, z: i_z}) report.parse_log() report.assert_compute_sets_matches( '*namef1*', 2, "f1 should be on the list twice as it should not be cashed " "due to SideEffect.")
def testGRULayerInference(self): ReportJSON(self) np.random.seed(0) # Run with all-0 weights weight0 = 0. for init_state_value in [0., 1.]: self._RunInferenceComparison('ones', input_value=0., weights_value=weight0, init_state_value=init_state_value) # Run with all-1 weights weight1 = 1. for init_state_value in [0., 1.]: self._RunInferenceComparison('ones', input_value=0., weights_value=weight1, init_state_value=init_state_value) # Run with random weights for weight in np.random.rand(3): for init_state_value in [0., 1.]: self._RunInferenceComparison('rand', input_value=0., weights_value=weight, init_state_value=init_state_value)
def testSimpleCaching(self): with self.session() as sess: def f_1(x): return math_ops.square(x, name="namef1") def f_cond(x1, z): cond_1 = control_flow_ops.cond(math_ops.less(z[0], z[1]), lambda: f_1(x1), lambda: f_1(x1)) return cond_1 with ops.device('cpu'): x1 = array_ops.placeholder(dtypes.int32, [2, 2]) z = array_ops.placeholder(dtypes.int32, [2]) with ipu.scopes.ipu_scope("/device:IPU:0"): r1 = ipu.ipu_compiler.compile(f_cond, inputs=[x1, z]) i_x1 = np.full((2, 2), 10) i_z = np.full((2), 8) report = ReportJSON(self, sess) sess.run(r1, {x1: i_x1, z: i_z}) report.parse_log() report.assert_compute_sets_matches( '*namef1*', 1, "There should be only one f_1 due to cash.")
def testMultipleReduces(self): with self.session() as sess: with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float16, [3]) pb = array_ops.placeholder(np.float16, [3]) a = math_ops.cast(pa, np.float32) a = math_ops.reduce_sum(a) a = math_ops.cast(a, np.float16) b = math_ops.cast(pb, np.float32) b = math_ops.reduce_sum(b) b = math_ops.cast(b, np.float16) c = a + b report = ReportJSON(self, sess) report.reset() fd = {pa: [2.0, 0.5, 1.0], pb: [1.0, 1.0, 2.0]} result = sess.run(c, fd) self.assertAllClose(result, 7.5) report.parse_log() ok = [ '__seed*', 'host-exchange-local-copy-', 'Sum/reduce*/Reduce', 'Sum_1/reduce*/Reduce', 'add/add*/Add' ] report.assert_all_compute_sets_and_list(ok)
def testGRUNotCached(self): with self.session() as sess: # Note here the second GRU is larger. pinputs1 = array_ops.placeholder(dataType, [seq_len, batch_size, input_size], name="inputs1") pinputs2 = array_ops.placeholder(dataType, [seq_len * 2, batch_size, input_size], name="inputs2") plabels = array_ops.placeholder(np.int32, [batch_size], name="labels") with ops.device("/device:IPU:0"): def gru_layer(inputs, name): initial_state = _get_variable( "initial_state", shape=[batch_size, num_channels], initializer=init_ops.constant_initializer(0.1, dataType)) return self._GRULayer(inputs=inputs, weights_value=1., initial_state=initial_state, training=True, name=name) with variable_scope.variable_scope("gru_layer1", use_resource=True): logits1 = gru_layer(pinputs1, "layer1") with variable_scope.variable_scope("gru_layer2", use_resource=True): logits2 = gru_layer(pinputs2, "layer2") logits = (math_ops.reduce_mean(logits1, axis=0) + math_ops.reduce_mean(logits2, axis=0)) softmax = nn.sparse_softmax_cross_entropy_with_logits_v2( logits=logits, labels=array_ops.stop_gradient(plabels)) loss = math_ops.reduce_mean(softmax) train = gradient_descent.GradientDescentOptimizer(0.01).minimize(loss) report = ReportJSON(self, sess) sess.run(variables.global_variables_initializer()) report.reset() sess.run( [loss, train], { pinputs1: _createGRUInput(0.5, batch_size, seq_len, input_size), pinputs2: _createGRUInput(1.5, batch_size, seq_len * 2, input_size), plabels: np.ones(shape=[batch_size], dtype=np.int32), }) report.parse_log() report.assert_compute_sets_matches( '*BasicGruCell/ProcessUnits/Weight/Conv*/Convolve', 4, "There should be four fwd GRUs") report.assert_compute_sets_matches('*/MulOGate/Op/Multiply', 2, "There should be two bwd GRUs")
def testGRULayerTraining(self): ReportJSON(self) np.random.seed(42) # Run with random weights for weight in np.random.rand(3): for init_state_value in [0., 1.]: self._RunTrainingComparison('rand', input_value=0., weights_value=weight, init_state_value=init_state_value, training_steps=3)
def testNoCastsF32ToF16ToF32(self): with self.session() as sess: with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, [3]) b = math_ops.cast(pa, np.float16) c = math_ops.cast(b, np.float32) report = ReportJSON(self, sess) report.reset() fd = {pa: [2.0, 0.5, 1.0]} result = sess.run(c, fd) self.assertAllClose(result, [2.0, 0.5, 1.0]) report.parse_log(assert_len=0) report.assert_no_compute_set()
def testArgMaxHalf(self, dtype): def model(a): return math_ops.argmax(a, output_type=dtypes.int32) with self.session() as sess: ReportJSON(self, sess) with ops.device('cpu'): pa = array_ops.placeholder(dtype, [3, 5, 2]) with ops.device("/device:IPU:0"): out = model(pa) input = _get_random_input(dtype, (3, 5, 2)) fd = {pa: input} result = sess.run(out, fd) self.assertAllClose(result, np.argmax(input, axis=0))
def testBatchNormalizeFused(self): with self.session() as sess: a = array_ops.placeholder(np.float32, [4, 64, 64, 4], name="input_a") def my_graph(a): with ops.device("/device:IPU:0"): with variable_scope.variable_scope("", use_resource=True): beta = variable_scope.get_variable( "x", dtype=np.float32, shape=[4], initializer=init_ops.constant_initializer(0.0)) gamma = variable_scope.get_variable( "y", dtype=np.float32, shape=[4], initializer=init_ops.constant_initializer(1.0)) b_mean, b_var = nn.moments(a, [0, 1, 2], name='moments') normed = nn.fused_batch_norm(a, gamma, beta, b_mean, b_var, is_training=False) return normed report = ReportJSON(self, sess) out = ipu.ipu_compiler.compile(my_graph, [a]) sess.run(variables.global_variables_initializer()) report.reset() result, _, _ = sess.run(out, {a: np.zeros([4, 64, 64, 4])}) self.assertAllClose(result, np.zeros([4, 64, 64, 4])) report.parse_log() bl = ['*convert*/Cast*'] report.assert_compute_sets_not_in_blacklist(bl) report.assert_tensor_input_names("input_a", "x", "y")
def testBatchNormalizeLayerFusedTrainingFp16(self): with self.session() as sess: # This test checks for the correct behaviour in batch norm grad when # perofrming training, but the batch norm attribute `training` is False with ops.device("/device:IPU:0"): with variable_scope.variable_scope("", use_resource=True): a = array_ops.placeholder(np.float16, [4, 64, 64, 4], name="input_a") normed = layers_norm.batch_normalization(a, fused=True, training=False) loss = math_ops.reduce_sum(normed) optimizer = gradient_descent.GradientDescentOptimizer(0.1) train = optimizer.minimize(loss) ReportJSON(self, sess) sess.run(variables.global_variables_initializer()) result = sess.run([normed, train], {a: np.zeros([4, 64, 64, 4])}) self.assertAllClose(result[0], np.zeros([4, 64, 64, 4]))
def testArgMaxMultiDimensional(self, dtype): def model(a, axis): return math_ops.argmax(a, axis=axis, output_type=dtypes.int32) for axis in range(6): with self.session() as sess: ReportJSON(self, sess) with ops.device('cpu'): pa = array_ops.placeholder(dtype, [1, 2, 3, 4, 5, 6]) p_axis = array_ops.placeholder(np.int32, shape=()) with ops.device("/device:IPU:0"): out = model(pa, p_axis) input = _get_random_input(dtype, (1, 2, 3, 4, 5, 6)) fd = {pa: input, p_axis: axis} result = sess.run(out, fd) self.assertAllClose(result, np.argmax(input, axis=axis))
def testBatchNormalizeLayerFusedFp16(self): with self.session() as sess: with ops.device("/device:IPU:0"): with variable_scope.variable_scope("", use_resource=True): a = array_ops.placeholder(np.float16, [4, 64, 64, 4], name="input_a") normed = layers_norm.batch_normalization(a, fused=True) report = ReportJSON(self, sess) sess.run(variables.global_variables_initializer()) report.reset() result = sess.run(normed, {a: np.zeros([4, 64, 64, 4])}) self.assertAllClose(result, np.zeros([4, 64, 64, 4])) report.parse_log() bl = ['*convert*/Cast*'] report.assert_compute_sets_not_in_blacklist(bl) report.assert_tensor_input_names("input_a")
def testReduceMean(self): with self.session() as sess: shape = [2, 10000] with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float16, shape) output = math_ops.reduce_mean(pa, axis=[1]) report = ReportJSON(self, sess) report.reset() val = np.finfo(np.float16).max / 2 result = sess.run(output, {pa: np.full(shape, val)}) self.assertAllClose(result, [val, val]) report.parse_log(assert_len=4) ok = [ '__seed*', 'host-exchange-local-copy-', 'Mean/fusion/Reduce', 'Mean/fusion*/Op/Multiply', 'Mean/convert*/Cast' ] report.assert_all_compute_sets_and_list(ok)
def testArgMaxVector(self, dtype): def model(a): return math_ops.argmax(a, axis=0, output_type=dtypes.int32) with self.session() as sess: report = ReportJSON(self, sess) report.reset() with ops.device('cpu'): pa = array_ops.placeholder(dtype, [3]) with ops.device("/device:IPU:0"): out = model(pa) input = _get_random_input(dtype, (3)) fd = {pa: input} result = sess.run(out, fd) self.assertAllClose(result, np.argmax(input)) report.parse_log(assert_len=4)
def testNoCastsF16ReduceWithReshape(self): with self.session() as sess: with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float16, [3, 4]) a = gen_array_ops.reshape(pa, [4, 3]) a = math_ops.reduce_sum(a, axis=(1)) report = ReportJSON(self, sess) report.reset() fd = {pa: np.ones([3, 4])} result = sess.run(a, fd) self.assertAllClose(result, [3.0, 3.0, 3.0, 3.0]) report.parse_log() ok = [ '__seed*', 'Sum/reduce*/Reduce', ] report.assert_all_compute_sets_and_list(ok)
def testBatchNormalizeLayerWithStableStatistics(self): with self.session() as sess: with ops.device("/device:IPU:0"): with variable_scope.variable_scope("", use_resource=True): a = array_ops.placeholder(np.float32, [4, 64, 64, 4], name="input_a") normed = layers_norm.batch_normalization(a, training=True) ReportJSON(self, sess, use_stable_norm_statistics=True) sess.run(variables.global_variables_initializer()) # Use a tensor with large mean to test the stability. This blows up with # the non-stable implementation (NaN output). Use a power-of-two that can # be represented exactly in float32 to make sure we work with an exact # mean internally. input_mean = 2.0**64 inputs = input_mean * np.ones([4, 64, 64, 4]) # y = gamma * (x - mean) / sqrt(variance + epsilon) + beta # Both (x - mean) and beta_initializer are zero, so this should be zero. result = sess.run(normed, {a: inputs}) self.assertAllEqual(result, np.zeros([4, 64, 64, 4]))
def testSameFunctions(self): # f_1, f_2 are the same with self.session() as sess: def f_1(x): return math_ops.square(x, name="namef1") def f_2(x): return math_ops.square(x, name="namef2") def f_cond(x1): cond_1 = control_flow_ops.cond(math_ops.less(1, 0), lambda: f_1(x1), lambda: f_1(x1)) cond_2 = control_flow_ops.cond(math_ops.less(1, 0), lambda: f_2(x1), lambda: f_2(x1)) return cond_1 + cond_2 with ops.device('cpu'): x1 = array_ops.placeholder(dtypes.int32, [2, 2]) with ipu.scopes.ipu_scope("/device:IPU:0"): r1 = ipu.ipu_compiler.compile(f_cond, inputs=[x1]) i_x1 = np.full((2, 2), 10) report = ReportJSON(self, sess) sess.run(r1, {x1: i_x1}) report.parse_log() report.assert_compute_sets_matches( '*namef1*', 1, "There should be only one f_1 due to cash.") report.assert_compute_sets_matches( '*namef2*', 0, "There should not be f_2, as it is the same as f_1, due to cash." )
def testDontRemoveCastsIfUsed(self): with self.session() as sess: with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float16, [3]) b = math_ops.cast(pa, np.float32) const = array_ops.constant(1.0, np.float32) b = b + const c = math_ops.cast(b, np.float16) report = ReportJSON(self, sess) report.reset() fd = {pa: [2.0, 0.5, 1.0]} result = sess.run(c, fd) self.assertAllClose(result, [3.0, 1.5, 2.0]) report.parse_log(assert_len=4) ok = [ '__seed*', 'host-exchange-local-copy-', 'Cast/convert.*/Cast', 'add/fusion*/Add', 'Cast_1/convert.*/Cast' ] report.assert_all_compute_sets_and_list(ok)
def testReductionSumVectorF16NoConverts(self): with self.session() as sess: with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float16, [4096], name="a") output = math_ops.reduce_sum(pa, axis=[0]) report = ReportJSON(self, sess) report.reset() fd = {pa: np.ones([4096])} result = sess.run(output, fd) self.assertAllClose(result, 4096) report.parse_log() # Check that there are no casts to float at the beginning. ok = [ '__seed*', 'host-exchange-local-copy-', 'Sum/reduce*/ReduceOnTile/InToIntermediateNoExchange/Reduce', 'Sum/reduce*/ReduceFinalStage/IntermediateToOutput/Reduce' ] report.assert_all_compute_sets_and_list(ok)
def testConvolutionsDontMatchDifferentDevices(self): with self.session() as sess: with ops.device("/device:IPU:0"): x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2]) with variable_scope.variable_scope("vs", use_resource=True): with ipu.scopes.ipu_shard(0): y = layers.Conv2D( 2, 1, use_bias=False, kernel_initializer=init_ops.ones_initializer())(x) with ipu.scopes.ipu_shard(1): y = layers.Conv2D( 2, 1, use_bias=False, kernel_initializer=init_ops.ones_initializer())(y) report = ReportJSON(self, sess, sharded=True) sess.run(variables.global_variables_initializer()) report.reset() sess.run(y, {x: np.zeros([1, 4, 4, 2])}) report.parse_log() # Note how there are two convolutions ok = [ '__seed*', '*OnTileCopy*', 'vs/conv2d/Conv2D/convolution.*', 'Copy_vs/conv2d/Conv2D/convolution.*', 'vs/conv2d_1/Conv2D/convolution.*' ] report.assert_all_compute_sets_and_list(ok)