Beispiel #1
0
    def test_mpi_allgather_variable_size(self):
        """Test that the allgather correctly gathers 1D, 2D, 3D tensors,
    even if those tensors have different sizes along the first dim."""
        with self.test_session() as session:
            size = session.run(mpi.size())
            rank = session.run(mpi.rank())

            dtypes = tf.int32, tf.float32
            dims = 1, 2, 3
            for dtype, dim in itertools.product(dtypes, dims):
                # Support tests up to MPI Size of 35
                if size > 35:
                    break

                tensor_sizes = [17, 32, 81, 12, 15, 23, 22] * 5
                tensor_sizes = tensor_sizes[:size]

                tensor = tf.ones([tensor_sizes[rank]] + [17] * (dim - 1),
                                 dtype=dtype) * rank
                gathered = mpi.allgather(tensor)

                gathered_tensor = session.run(gathered)
                expected_size = sum(tensor_sizes)
                self.assertEqual(list(gathered_tensor.shape),
                                 [expected_size] + [17] * (dim - 1))

                for i in range(size):
                    rank_size = [tensor_sizes[i]] + [17] * (dim - 1)
                    rank_tensor = tf.slice(gathered, [sum(tensor_sizes[:i])] +
                                           [0] * (dim - 1), rank_size)
                    self.assertEqual(list(rank_tensor.shape), rank_size)
                    self.assertTrue(
                        session.run(tf.reduce_all(tf.equal(rank_tensor, i))),
                        "mpi.allgather produces incorrect gathered tensor")
Beispiel #2
0
    def test_mpi_allreduce_error(self):
        """Test that the allreduce raises an error if different ranks try to
    send tensors of different rank or dimension."""
        with self.test_session() as session:
            rank = session.run(mpi.rank())
            size = session.run(mpi.size())

            # This test does not apply if there is only one worker.
            if size == 1:
                return

            # Same rank, different dimension
            tf.set_random_seed(1234)
            dims = [17 + rank] * 3
            tensor = tf.random_uniform(dims, -1.0, 1.0)
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(mpi.allreduce(tensor))

            # Same number of elements, different rank
            tf.set_random_seed(1234)
            if rank == 0:
                dims = [17, 23 * 57]
            else:
                dims = [17, 23, 57]
            tensor = tf.random_uniform(dims, -1.0, 1.0)
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(mpi.allreduce(tensor))
Beispiel #3
0
    def test_mpi_allgather_type_error(self):
        """Test that the allgather returns an error if the types being gathered
    differ among the processes"""
        with self.test_session() as session:
            rank = session.run(mpi.rank())
            size = session.run(mpi.size())

            # This test does not apply if there is only one worker.
            if size == 1:
                return

            tensor_size = [17] * 3
            dtype = tf.int32 if rank % 2 == 0 else tf.float32
            tensor = tf.ones(tensor_size, dtype=dtype) * rank
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(mpi.allgather(tensor))
Beispiel #4
0
    def test_mpi_allgather_error(self):
        """Test that the allgather returns an error if any dimension besides
    the first is different among the tensors being gathered."""
        with self.test_session() as session:
            rank = session.run(mpi.rank())
            size = session.run(mpi.size())

            # This test does not apply if there is only one worker.
            if size == 1:
                return

            tensor_size = [17] * 3
            tensor_size[1] = 10 * (rank + 1)
            tensor = tf.ones(tensor_size, dtype=tf.float32) * rank
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(mpi.allgather(tensor))
Beispiel #5
0
    def test_mpi_allreduce_type_error(self):
        """Test that the allreduce raises an error if different ranks try to
    send tensors of different type."""
        with self.test_session() as session:
            rank = session.run(mpi.rank())
            size = session.run(mpi.size())

            # This test does not apply if there is only one worker.
            if size == 1:
                return

            # Same rank, different dimension
            dims = [17] * 3
            tensor = tf.ones(dims,
                             dtype=tf.int32 if rank % 2 == 0 else tf.float32)
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(mpi.allreduce(tensor))
Beispiel #6
0
    def test_mpi_allgather(self):
        """Test that the allgather correctly gathers 1D, 2D, 3D tensors."""
        with self.test_session() as session:
            size = session.run(mpi.size())
            rank = session.run(mpi.rank())

            dtypes = tf.int32, tf.float32
            dims = 1, 2, 3
            for dtype, dim in itertools.product(dtypes, dims):
                tensor = tf.ones([17] * dim, dtype=dtype) * rank
                gathered = mpi.allgather(tensor)

                gathered_tensor = session.run(gathered)
                self.assertEqual(list(gathered_tensor.shape),
                                 [17 * size] + [17] * (dim - 1))

                for i in range(size):
                    rank_tensor = tf.slice(gathered_tensor,
                                           [i * 17] + [0] * (dim - 1),
                                           [17] + [-1] * (dim - 1))
                    self.assertEqual(list(rank_tensor.shape), [17] * dim)
                    self.assertTrue(
                        session.run(tf.reduce_all(tf.equal(rank_tensor, i))),
                        "mpi.allgather produces incorrect gathered tensor")
Beispiel #7
0
 def test_mpi_rank(self):
     """Test that the rank returned by mpi.rank() is correct."""
     true_rank, _ = mpi_env_rank_and_size()
     with self.test_session() as session:
         rank = session.run(mpi.rank())
         self.assertEqual(true_rank, rank)