def _do_all_reduce_pack_test(self, tt): """Test that all-reduce results are the same with or without packing.""" with ops.Graph().as_default(): tower_grads, consts, _, _ = self._init_tensors( tt.num_devices, tt.in_shapes) dev_prefixes = ['/job:localhost'] num_workers = 1 alg = 'xring' shards = 1 gpu_indices = range(0, tt.num_devices) assert len(gpu_indices) == len(tower_grads) no_pack_all_reduce = allreduce.sum_gradients_all_reduce( dev_prefixes, tower_grads, num_workers, alg, shards, gpu_indices, agg_small_grads_max_bytes=0, agg_small_grads_max_group=1) packed_tg, packing = allreduce.pack_small_tensors(tower_grads, 100, 100) packed_all_reduce = allreduce.sum_gradients_all_reduce( dev_prefixes, packed_tg, num_workers, alg, shards, gpu_indices, agg_small_grads_max_bytes=0, agg_small_grads_max_group=1) unpacked_tg = allreduce.unpack_small_tensors(packed_all_reduce, packing) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) no_pack_values = sess.run(no_pack_all_reduce) pack_unpack_values = sess.run(unpacked_tg) for d in range(1, tt.num_devices): for t in range(0, len(tt.in_shapes)): self.assertTrue(np.allclose(no_pack_values[d][t][0], tt.num_devices * consts[0][t])) self.assertTrue(np.array_equal(no_pack_values[d][t][0], pack_unpack_values[d][t][0]))
def _do_pack_unpack_test(self, tt): """Do a single pack-unpack test. Args: tt: A _test_tuple defining the parameters of the test to do. This test executes a graph that performs a pack of tower_grads followed by an unpack and verifies that the shapes and values of gradient tensors are unchanged, along with paired variables. """ with ops.Graph().as_default(): tower_grads, consts, _, vrbls = self._init_tensors( tt.num_devices, tt.in_shapes) packed_tg, packing = allreduce.pack_small_tensors( tower_grads, max_bytes=40, max_group=10) unpacked_tg = allreduce.unpack_small_tensors(packed_tg, packing) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) packed = sess.run(packed_tg) for d in range(0, tt.num_devices): for t in range(0, len(tt.out_shapes)): num_elts = 0 for dim in tt.out_shapes[t]: num_elts = (num_elts or 1) * dim self.assertTrue(np.array_equal( np.array(range(tt.out_i[t], tt.out_i[t] + num_elts), dtype=np.float32).reshape(tt.out_shapes[t]), packed[d][t][0])) unpacked = sess.run(unpacked_tg) for d in range(0, tt.num_devices): for t in range(0, len(tt.in_shapes)): self.assertTrue(np.array_equal(consts[d][t], unpacked[d][t][0])) self.assertEqual(vrbls[d][t], unpacked_tg[d][t][1])
def testPackSmallTensors(self): t0 = tf.constant([0, 1, 2, 3], dtype=tf.float32) t1 = tf.constant([4, 5, 6, 7], dtype=tf.float32) t2 = tf.constant([[0, 1, 2], [3, 4, 5], [6, 7, 8]], dtype=tf.float32) t3 = tf.constant([[0, 1, 2], [3, 4, 5], [6, 7, 8]], dtype=tf.float32) tower_grads = [] for d in range(0, 3): gv = [(t0, 'v_%d_0' % d), (t1, 'v_%d_1' % d), (t2, 'v_%d_2' % d), (t3, 'v_%d_3' % d)] tower_grads.append(gv) # 1) Set the size limit so small that nothing gets concatenated. new_tower_grads, packing = allreduce.pack_small_tensors(tower_grads, max_bytes=12, max_group=10) self.assertEqual(tower_grads, new_tower_grads) self.assertTrue(packing is None) # 2) Set the size limit so only the first two tensors get concatenated new_tower_grads, packing = allreduce.pack_small_tensors( tower_grads, max_bytes=16, # 16 bytes == 4 elements max_group=10) self.assertEqual(3, len(new_tower_grads)) self.assertEqual(4, len(tower_grads[0])) first_tower = new_tower_grads[0] self.assertEqual(3, len(first_tower)) self.assertEqual(1, first_tower[0][0].shape.ndims) self.assertEqual(8, first_tower[0][0].shape.dims[0]) self.assertEqual( packing, { '0:0': allreduce.GradPackTuple( indices=range(2), vars=['v_0_0', 'v_0_1'], shapes=[tf.TensorShape([4]), tf.TensorShape([4])]), '1:0': allreduce.GradPackTuple( indices=range(2), vars=['v_1_0', 'v_1_1'], shapes=[tf.TensorShape([4]), tf.TensorShape([4])]), '2:0': allreduce.GradPackTuple( indices=range(2), vars=['v_2_0', 'v_2_1'], shapes=[tf.TensorShape([4]), tf.TensorShape([4])]) }) # 3) Set the size limit so all tensors get concatenated new_tower_grads, packing = allreduce.pack_small_tensors( tower_grads, max_bytes=256, # bytes = 64 elements max_group=10) self.assertEqual(3, len(new_tower_grads)) self.assertEqual(4, len(tower_grads[0])) self.assertEqual(1, len(new_tower_grads[0])) first_tower = new_tower_grads[0] self.assertEqual(1, first_tower[0][0].shape.ndims) self.assertEqual(26, first_tower[0][0].shape.dims[0]) self.assertEqual( packing, { '0:0': allreduce.GradPackTuple( indices=range(4), vars=['v_0_0', 'v_0_1', 'v_0_2', 'v_0_3'], shapes=[ tf.TensorShape([4]), tf.TensorShape([4]), tf.TensorShape([ 3, 3, ]), tf.TensorShape([ 3, 3, ]) ]), '1:0': allreduce.GradPackTuple( indices=range(4), vars=['v_1_0', 'v_1_1', 'v_1_2', 'v_1_3'], shapes=[ tf.TensorShape([4]), tf.TensorShape([4]), tf.TensorShape([ 3, 3, ]), tf.TensorShape([ 3, 3, ]) ]), '2:0': allreduce.GradPackTuple( indices=range(4), vars=['v_2_0', 'v_2_1', 'v_2_2', 'v_2_3'], shapes=[ tf.TensorShape([4]), tf.TensorShape([4]), tf.TensorShape([ 3, 3, ]), tf.TensorShape([ 3, 3, ]) ]) })