def testSimpleXlaCompileTrainingInLoopV1WithEarlySharding(self): dataset = tu.create_dual_increasing_dataset(3) infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, "feed3") def my_net(): def my_model(loss, x, y): with ops.device("/device:IPU:0"): inp = x x = layers.Conv2D( 8, 3, padding='same', name="conv1", use_bias=False)(x) x = layers.Conv2D( 8, 3, padding='same', name="conv2", use_bias=False)(x) x = layers.Conv2D( 8, 3, padding='same', name="conv3", use_bias=False)(x) x = math_ops.reduce_max(x, axis=[1, 2]) cross_entropy = nn.softmax_cross_entropy_with_logits_v2( logits=x, labels=array_ops.stop_gradient(y)) loss = math_ops.reduce_mean(cross_entropy) optim = so.ShardedOptimizer(gd.GradientDescentOptimizer(0.01)) train = optim.minimize(cross_entropy) autoshard.automatic_sharding(2, inp, loss) return [loss, train] loss = 0.0 return loops.repeat( 10, my_model, [loss], infeed_queue, use_while_v1=True) ipu_compiler.compile(my_net, inputs=[]) op_set = ops.get_default_graph().get_operations() op_types = set() for o in op_set: if o.device == '/device:IPU:0' and o.type not in allowed_op_types: op_types.add(o.type) self.assertTrue(o.get_attr('_XlaSharding') is not None) self.assertTrue(len(op_types) > 10) self.assertTrue('Conv2D' in op_types) self.assertTrue('Conv2DBackpropInput' in op_types) self.assertTrue('Conv2DBackpropFilter' in op_types) self.assertTrue('ResourceApplyGradientDescent' in op_types)
def testReadResourceVar(self): def my_model(pcond): va = variable_scope.get_variable( "x", shape=[], dtype=np.float32, initializer=init_ops.constant_initializer(1)) o = control_flow_ops.cond( pcond, true_fn=lambda: va.read_value(), false_fn=lambda: constant_op.constant(0.)) return [o] with ops.device("cpu"): pcond = array_ops.placeholder(np.bool, [], name="pred") with ops.device("/device:IPU:0"): r = ipu_compiler.compile(my_model, inputs=[pcond]) with session_lib.Session() as sess: sess.run(variables.global_variables_initializer()) fd = {pcond: True} result = sess.run(r[0], fd) self.assertAllClose(result, 1.) fd = {pcond: False} result = sess.run(r[0], fd) self.assertAllClose(result, 0.)
def testSimpleXlaCompileTraining(self): def my_model(inp, lab): x = inp y = lab x = layers.Conv2D(8, 3, padding='same', name="conv1", use_bias=False)(x) x = layers.Conv2D(8, 3, padding='same', name="conv2", use_bias=False)(x) x = layers.Conv2D(8, 3, padding='same', name="conv3", use_bias=False)(x) x = math_ops.reduce_max(x, axis=[1, 2]) cross_entropy = nn.softmax_cross_entropy_with_logits_v2( logits=x, labels=array_ops.stop_gradient(y)) loss = math_ops.reduce_mean(cross_entropy) optim = so.ShardedOptimizer(gd.GradientDescentOptimizer(0.01)) train = optim.minimize(cross_entropy) autoshard.automatic_sharding(2, inp, loss) return [loss, train] with ops.device("cpu"): inp = array_ops.placeholder(np.float32, [1, 12, 12, 4], name="data") lab = array_ops.placeholder(np.float32, [1, 8], name="labl") with ops.device("/device:IPU:0"): out = ipu_compiler.compile(my_model, inputs=[inp, lab]) op_set = sharding.dependencies([out[0]]) for o in op_set: if o.device == '/device:IPU:0' and o.type not in allowed_op_types: self.assertTrue(o.get_attr('_XlaSharding') is not None)
def testPopnnLstmXlaCompileTrainingInLoop(self): dataset = tu.create_dual_increasing_dataset( 3, data_shape=[16, 2, 8], label_shape=[16, 2, 256]) infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, "feed1") def my_net(): def my_model(loss, x, y): with ops.device("/device:IPU:0"): inp = x lstm_cell = popnn_rnn.PopnnLSTM(256, dtype=dtypes.float32) x, _ = lstm_cell(x, training=True) cross_entropy = nn.softmax_cross_entropy_with_logits_v2( logits=x, labels=array_ops.stop_gradient(y)) loss = math_ops.reduce_mean(cross_entropy) optim = so.ShardedOptimizer(gd.GradientDescentOptimizer(0.01)) train = optim.minimize(cross_entropy) autoshard.automatic_sharding(2, inp, loss) return [loss, train] loss = 0.0 return loops.repeat( 10, my_model, [loss], infeed_queue, use_while_v1=False) ipu_compiler.compile(my_net, inputs=[]) body = get_single_while_op_body(ops.get_default_graph()) op_set = body.get_operations() op_types = set() for o in op_set: if o.device == '/device:IPU:0' and o.type not in allowed_op_types: op_types.add(o.type) self.assertTrue(o.get_attr('_XlaSharding') is not None) self.assertTrue(len(op_types) > 10) self.assertTrue('PopnnLstmLayer' in op_types) self.assertTrue('PopnnLstmLayerBackprop' in op_types) self.assertTrue('LogSoftmax' in op_types) self.assertTrue('SoftmaxCrossEntropyWithLogits' in op_types) self.assertTrue('ResourceApplyGradientDescent' in op_types)
def testFusionsInWhileLoops(self): def my_net(): def cond(i, x): return i < 3 def body(i, loss): i = i + 1 init = init_ops.random_normal_initializer(0.0, 1.0, seed=1, dtype=np.float32) x = variable_scope.get_variable("v2", dtype=np.float32, shape=[1, 4, 4, 2], initializer=init) with variable_scope.variable_scope("vs", use_resource=True): y = layers.Conv2D( 2, 1, use_bias=True, kernel_initializer=init_ops.ones_initializer(), name='conv1')(x) y = layers.Conv2D( 2, 1, use_bias=True, kernel_initializer=init_ops.ones_initializer(), name='conv2')(y) y = layers.Conv2D( 2, 1, use_bias=True, kernel_initializer=init_ops.ones_initializer(), name='conv3')(y) loss = math_ops.reduce_sum(y) optimizer = gradient_descent.GradientDescentOptimizer(0.1) train = optimizer.minimize(loss) with ops.control_dependencies([train]): i = array_ops.identity(i) loss = array_ops.identity(loss) return (i, loss) i = 0 loss = 0.0 return control_flow_ops.while_loop(cond, body, (i, loss), maximum_iterations=10) with ipu.ops.ipu_scope("/device:IPU:0"): r = ipu_compiler.compile(my_net, inputs=[]) with session_lib.Session() as sess: sess.run(variables.global_variables_initializer()) c, val = sess.run(r, {}) self.assertEqual(c, 3)
def testSimpleXlaCompileInference(self): def my_model(inp): output = inp * inp return [output] with ops.device("cpu"): inp = array_ops.placeholder(np.float32, [], name="a") with ops.device("/device:IPU:0"): out = ipu_compiler.compile(my_model, inputs=[inp]) autoshard.automatic_sharding(2, inp, out[0]) op_list = ops.get_default_graph().get_operations() for o in op_list: if o.device == '/device:IPU:0' and o.type != 'NoOp': self.assertTrue(o.get_attr('_XlaSharding') is not None)
def testGather(self): def my_net(p, i): # Forward pass a = array_ops.gather(p, i, axis=0) return [a] with ops.device('cpu'): X = array_ops.placeholder(dtypes.int32, [2, 4]) Y = array_ops.placeholder(dtypes.int32, [2]) with ipu.ops.ipu_scope("/device:IPU:0"): r = ipu_compiler.compile(my_net, inputs=[X, Y]) with session_lib.Session() as sess: sess.run(variables.global_variables_initializer()) result = sess.run(r, {X: [[1, 3, 5, 7], [0, 2, 4, 6]], Y: [1, 0]}) self.assertAllClose(result[0], [[0, 2, 4, 6], [1, 3, 5, 7]])
def testSimpleXlaCompileTrainingInLoopWithParam(self): dataset = tu.create_dual_increasing_dataset(3) infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, "feed") def my_net(lr): def my_model(lr, loss, x, y): with ipu.ops.ipu_scope("/device:IPU:0"): inp = x x = layers.Conv2D(8, 3, padding='same', name="conv1", use_bias=False)(x) x = math_ops.reduce_max(x, axis=[1, 2]) cross_entropy = nn.softmax_cross_entropy_with_logits_v2( logits=x, labels=array_ops.stop_gradient(y)) loss = math_ops.reduce_mean(cross_entropy) optim = so.ShardedOptimizer( gd.GradientDescentOptimizer(lr)) train = optim.minimize(cross_entropy) autoshard.automatic_sharding(2, inp, loss) return [lr, loss, train] loss = 0.0 return loops.repeat(2, my_model, [lr, loss], infeed_queue) lr = array_ops.placeholder(dtypes.float32, []) out = ipu_compiler.compile(my_net, inputs=[lr]) cfg = ipu.utils.create_ipu_config(profiling=False) cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) cfg = ipu.utils.auto_select_ipus(cfg, 2) ipu.utils.configure_ipu_system(cfg) with session_lib.Session() as sess: sess.run(infeed_queue.initializer) sess.run(variables.global_variables_initializer()) sess.run(out[0], {lr: 0.1})
def testWhileLoopTupleOfTuples(self): # This test makes sure that we can handle tuple of tuples for while loops random_seed.set_random_seed(1) dataType = dtypes.float32 num_input = 14 timesteps = 2 num_units = 128 def RNN(x): # Define a GRU cell with tensorflow gru_cell = nn.rnn_cell.GRUCell(num_units, name="GRU") # Get gru cell output outputs, states = nn.dynamic_rnn(gru_cell, x, dtype=dataType) return outputs[-1] def my_net(X, Y): # Forward pass logits = RNN(X) # Loss cross_entropy = math_ops.reduce_mean( nn.softmax_cross_entropy_with_logits_v2( logits=logits, labels=array_ops.stop_gradient(Y))) # Training train = gradient_descent.GradientDescentOptimizer(0.01).minimize( cross_entropy) return [cross_entropy, train] with ops.device('cpu'): X = array_ops.placeholder(dataType, [1, timesteps, num_input]) Y = array_ops.placeholder(dataType, [1, timesteps, num_units]) with ipu.ops.ipu_scope("/device:IPU:0"): r = ipu_compiler.compile(my_net, inputs=[X, Y]) with session_lib.Session() as sess: sess.run(variables.global_variables_initializer()) result = sess.run(r, {X: np.ones(X.shape), Y: np.ones(Y.shape)}) # Compare the value - check that the loss is within 1 of the expected # value obtained by running on XLA_CPU. self.assertAllClose(result[0], 621.9, rtol=1)
def testNestedWhileLoopsSimplified(self): def my_net(x): def cond(i, x): return i < 3 def cond1(j, x): return j < 2 def body1(j, x): j = j + 1 x = x * 2 return (j, x) def body(i, x): i = i + 1 j = 0 _, x = control_flow_ops.while_loop(cond1, body1, (j, x), maximum_iterations=10) return (i, x) i = 0 a, b = control_flow_ops.while_loop(cond, body, (i, x), maximum_iterations=10) return (a, b) with ops.device('cpu'): x = array_ops.placeholder(dtypes.int32, [4]) with ipu.ops.ipu_scope("/device:IPU:0"): r = ipu_compiler.compile(my_net, inputs=[x]) with session_lib.Session() as sess: sess.run(variables.global_variables_initializer()) c, val = sess.run(r, {x: np.full([4], 2, dtype=np.int32)}) self.assertEqual(c, 3) self.assertAllClose(val, np.full([4], 128))
def testDifferentArgs(self): def my_model(pcond, pa, pb, pc): output = control_flow_ops.cond( pcond, true_fn=lambda: pa + pb, false_fn=lambda: pb - pc) return [output] with ops.device("cpu"): pcond = array_ops.placeholder(np.bool, [], name="pred") pa = array_ops.placeholder(np.float32, [], name="a") pb = array_ops.placeholder(np.float32, [], name="b") pc = array_ops.placeholder(np.float32, [], name="c") with ops.device("/device:IPU:0"): r = ipu_compiler.compile(my_model, inputs=[pcond, pa, pb, pc]) with session_lib.Session() as sess: fd = {pcond: True, pa: 1., pb: 2., pc: 3.} result = sess.run(r[0], fd) self.assertAllClose(result, 3.) fd = {pcond: False, pa: 1., pb: 2., pc: 3.} result = sess.run(r[0], fd) self.assertAllClose(result, -1.)
def testInplaceOpsInRepeats(self): def my_net(x): def cond(i, x): return i < 3 def body(i, x): i = i + 1 x = nn.relu(x * x) return (i, x) i = 0 return control_flow_ops.while_loop(cond, body, (i, x)) with ops.device('cpu'): x = array_ops.placeholder(dtypes.float32, [4]) with ipu.ops.ipu_scope("/device:IPU:0"): r = ipu_compiler.compile(my_net, inputs=[x]) with session_lib.Session() as sess: sess.run(variables.global_variables_initializer()) (c, x) = sess.run(r, {x: np.full([4], 2)}) self.assertEqual(c, 3) self.assertAllClose(x, np.full([4], 256))
def testTfLstmInWhileV1(self): dataset = tu.create_dual_increasing_dataset(3, data_shape=[4, 1, 8], label_shape=[4, 1, 128]) infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, "feed") def my_net(): def my_model(loss, x, y): with ipu.ops.ipu_scope("/device:IPU:0"): lstm_cell = rnn_cell.LSTMCell(128) x, _ = rnn.dynamic_rnn(cell=lstm_cell, inputs=x, dtype=dtypes.float32, time_major=True) cross_entropy = nn.softmax_cross_entropy_with_logits_v2( logits=x, labels=array_ops.stop_gradient(y)) loss = math_ops.reduce_mean(cross_entropy) optim = gradient_descent.GradientDescentOptimizer(0.01) train = optim.minimize(cross_entropy) return [loss, train] loss = 0.0 return loops.repeat(10, my_model, [loss], infeed_queue, use_while_v1=True) out = ipu_compiler.compile(my_net, inputs=[]) cfg = ipu.utils.create_ipu_config(profiling=True) cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) cfg = ipu.utils.auto_select_ipus(cfg, 1) ipu.utils.configure_ipu_system(cfg) with session_lib.Session() as sess: sess.run(infeed_queue.initializer) sess.run(variables.global_variables_initializer()) sess.run(out[0], {}) def testRepeatLoopGradient(self): def model(features): a = variable_scope.get_variable("a", initializer=1.0) def body(x): return a * x logits = ipu.loops.repeat(5, body, [features]) loss = math_ops.reduce_sum(logits) optimizer = momentum.MomentumOptimizer(learning_rate=.001, momentum=0.9) grads_and_vars = optimizer.compute_gradients(loss) train_op = optimizer.apply_gradients(grads_and_vars) return a, loss, train_op with ops.device('cpu'): features = array_ops.placeholder(dtypes.float32, shape=[10]) with ipu.ops.ipu_scope('/device:IPU:0'): ret = ipu.ipu_compiler.compile(model, [features]) options = ipu.utils.create_ipu_config() options = ipu.utils.auto_select_ipus(options, 1) ipu.utils.configure_ipu_system(options) with session_lib.Session() as sess: sess.run(variables.global_variables_initializer()) x, z = sess.run(ret, feed_dict={features: np.ones([10])}) self.assertEqual(x, 1)