def test_mpi_allgather_variable_size(self): """Test that the allgather correctly gathers 1D, 2D, 3D tensors, even if those tensors have different sizes along the first dim.""" with mpi.Session() as session: size = session.run(mpi.size()) rank = session.run(mpi.rank()) dtypes = tf.int32, tf.float32 dims = 1, 2, 3 for dtype, dim in itertools.product(dtypes, dims): # Support tests up to MPI Size of 35 if size > 35: break tensor_sizes = [17, 32, 81, 12, 15, 23, 22] * 5 tensor_sizes = tensor_sizes[:size] tensor = tf.ones([tensor_sizes[rank]] + [17] * (dim - 1), dtype=dtype) * rank gathered = mpi.allgather(tensor) gathered_tensor = session.run(gathered) expected_size = sum(tensor_sizes) self.assertEqual(list(gathered_tensor.shape), [expected_size] + [17] * (dim - 1)) for i in range(size): rank_size = [tensor_sizes[i]] + [17] * (dim - 1) rank_tensor = tf.slice( gathered, [sum(tensor_sizes[:i])] + [0] * (dim - 1), rank_size) self.assertEqual(list(rank_tensor.shape), rank_size) self.assertTrue( session.run(tf.reduce_all(tf.equal(rank_tensor, i))), "mpi.allgather produces incorrect gathered tensor")
def test_mpi_allgather_variable_size(self): """Test that the allgather correctly gathers 1D, 2D, 3D tensors, even if those tensors have different sizes along the first dim.""" with self.test_session() as session: size = session.run(mpi.size()) rank = session.run(mpi.rank()) dtypes = tf.int32, tf.float32 dims = 1, 2, 3 for dtype, dim in itertools.product(dtypes, dims): # Support tests up to MPI Size of 35 if size > 35: break tensor_sizes = [17, 32, 81, 12, 15, 23, 22] * 5 tensor_sizes = tensor_sizes[:size] tensor = tf.ones([tensor_sizes[rank]] + [17] * (dim - 1), dtype=dtype) * rank gathered = mpi.allgather(tensor) gathered_tensor = session.run(gathered) expected_size = sum(tensor_sizes) self.assertEqual(list(gathered_tensor.shape), [expected_size] + [17] * (dim - 1)) for i in range(size): rank_size = [tensor_sizes[i]] + [17] * (dim - 1) rank_tensor = tf.slice(gathered, [sum(tensor_sizes[:i])] + [0] * (dim - 1), rank_size) self.assertEqual(list(rank_tensor.shape), rank_size) self.assertTrue(session.run(tf.reduce_all(tf.equal(rank_tensor, i))), "mpi.allgather produces incorrect gathered tensor")
def test_mpi_allgather_type_error(self): """Test that the allgather returns an error if the types being gathered differ among the processes""" with mpi.Session() as session: rank = session.run(mpi.rank()) size = session.run(mpi.size()) # This test does not apply if there is only one worker. if size == 1: return tensor_size = [17] * 3 dtype = tf.int32 if rank % 2 == 0 else tf.float32 tensor = tf.ones(tensor_size, dtype=dtype) * rank with self.assertRaises(tf.errors.FailedPreconditionError): session.run(mpi.allgather(tensor))
def test_mpi_allgather_error(self): """Test that the allgather returns an error if any dimension besides the first is different among the tensors being gathered.""" with mpi.Session() as session: rank = session.run(mpi.rank()) size = session.run(mpi.size()) # This test does not apply if there is only one worker. if size == 1: return tensor_size = [17] * 3 tensor_size[1] = 10 * (rank + 1) tensor = tf.ones(tensor_size, dtype=tf.float32) * rank with self.assertRaises(tf.errors.FailedPreconditionError): session.run(mpi.allgather(tensor))
def test_mpi_allgather_type_error(self): """Test that the allgather returns an error if the types being gathered differ among the processes""" with self.test_session() as session: rank = session.run(mpi.rank()) size = session.run(mpi.size()) # This test does not apply if there is only one worker. if size == 1: return tensor_size = [17] * 3 dtype = tf.int32 if rank % 2 == 0 else tf.float32 tensor = tf.ones(tensor_size, dtype=dtype) * rank with self.assertRaises(tf.errors.FailedPreconditionError): session.run(mpi.allgather(tensor))
def test_mpi_allgather_error(self): """Test that the allgather returns an error if any dimension besides the first is different among the tensors being gathered.""" with self.test_session() as session: rank = session.run(mpi.rank()) size = session.run(mpi.size()) # This test does not apply if there is only one worker. if size == 1: return tensor_size = [17] * 3 tensor_size[1] = 10 * (rank + 1) tensor = tf.ones(tensor_size, dtype=tf.float32) * rank with self.assertRaises(tf.errors.FailedPreconditionError): session.run(mpi.allgather(tensor))
def test_mpi_allgather(self): """Test that the allgather correctly gathers 1D, 2D, 3D tensors.""" with self.test_session() as session: size = session.run(mpi.size()) rank = session.run(mpi.rank()) dtypes = tf.int32, tf.float32 dims = 1, 2, 3 for dtype, dim in itertools.product(dtypes, dims): tensor = tf.ones([17] * dim, dtype=dtype) * rank gathered = mpi.allgather(tensor) gathered_tensor = session.run(gathered) self.assertEqual(list(gathered_tensor.shape), [17 * size] + [17] * (dim - 1)) for i in range(size): rank_tensor = tf.slice(gathered_tensor, [i * 17] + [0] * (dim - 1), [17] + [-1] * (dim - 1)) self.assertEqual(list(rank_tensor.shape), [17] * dim) self.assertTrue(session.run(tf.reduce_all(tf.equal(rank_tensor, i))), "mpi.allgather produces incorrect gathered tensor")
def test_mpi_allgather(self): """Test that the allgather correctly gathers 1D, 2D, 3D tensors.""" with mpi.Session() as session: size = session.run(mpi.size()) rank = session.run(mpi.rank()) dtypes = tf.int32, tf.float32 dims = 1, 2, 3 for dtype, dim in itertools.product(dtypes, dims): tensor = tf.ones([17] * dim, dtype=dtype) * rank gathered = mpi.allgather(tensor) gathered_tensor = session.run(gathered) self.assertEqual(list(gathered_tensor.shape), [17 * size] + [17] * (dim - 1)) for i in range(size): rank_tensor = tf.slice(gathered_tensor, [i * 17] + [0] * (dim - 1), [17] + [-1] * (dim - 1)) self.assertEqual(list(rank_tensor.shape), [17] * dim) self.assertTrue( session.run(tf.reduce_all(tf.equal(rank_tensor, i))), "mpi.allgather produces incorrect gathered tensor")