Beispiel #1
0
    def test_mpi_allreduce_error(self):
        """Test that the allreduce raises an error if different ranks try to
        send tensors of different rank or dimension."""
        with mpi.Session() as session:
            rank = session.run(mpi.rank())
            size = session.run(mpi.size())

            # This test does not apply if there is only one worker.
            if size == 1:
                return

            # Same rank, different dimension
            tf.set_random_seed(1234)
            dims = [17 + rank] * 3
            tensor = tf.random_uniform(dims, -1.0, 1.0)
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(mpi.allreduce(tensor))

            # Same number of elements, different rank
            tf.set_random_seed(1234)
            if rank == 0:
                dims = [17, 23 * 57]
            else:
                dims = [17, 23, 57]
            tensor = tf.random_uniform(dims, -1.0, 1.0)
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(mpi.allreduce(tensor))
Beispiel #2
0
    def test_mpi_allgather_variable_size(self):
        """Test that the allgather correctly gathers 1D, 2D, 3D tensors,
        even if those tensors have different sizes along the first dim."""
        with mpi.Session() as session:
            size = session.run(mpi.size())
            rank = session.run(mpi.rank())

            dtypes = tf.int32, tf.float32
            dims = 1, 2, 3
            for dtype, dim in itertools.product(dtypes, dims):
                # Support tests up to MPI Size of 35
                if size > 35:
                    break

                tensor_sizes = [17, 32, 81, 12, 15, 23, 22] * 5
                tensor_sizes = tensor_sizes[:size]

                tensor = tf.ones([tensor_sizes[rank]] + [17] * (dim - 1),
                                 dtype=dtype) * rank
                gathered = mpi.allgather(tensor)

                gathered_tensor = session.run(gathered)
                expected_size = sum(tensor_sizes)
                self.assertEqual(list(gathered_tensor.shape),
                                 [expected_size] + [17] * (dim - 1))

                for i in range(size):
                    rank_size = [tensor_sizes[i]] + [17] * (dim - 1)
                    rank_tensor = tf.slice(
                        gathered, [sum(tensor_sizes[:i])] + [0] * (dim - 1),
                        rank_size)
                    self.assertEqual(list(rank_tensor.shape), rank_size)
                    self.assertTrue(
                        session.run(tf.reduce_all(tf.equal(rank_tensor, i))),
                        "mpi.allgather produces incorrect gathered tensor")
  def test_mpi_allgather_variable_size(self):
    """Test that the allgather correctly gathers 1D, 2D, 3D tensors,
    even if those tensors have different sizes along the first dim."""
    with self.test_session() as session:
      size = session.run(mpi.size())
      rank = session.run(mpi.rank())

      dtypes = tf.int32, tf.float32
      dims = 1, 2, 3
      for dtype, dim in itertools.product(dtypes, dims):
        # Support tests up to MPI Size of 35
        if size > 35:
          break

        tensor_sizes = [17, 32, 81, 12, 15, 23, 22] * 5
        tensor_sizes = tensor_sizes[:size]

        tensor = tf.ones([tensor_sizes[rank]] + [17] * (dim - 1),
                         dtype=dtype) * rank
        gathered = mpi.allgather(tensor)

        gathered_tensor = session.run(gathered)
        expected_size = sum(tensor_sizes)
        self.assertEqual(list(gathered_tensor.shape),
                         [expected_size] + [17] * (dim - 1))

        for i in range(size):
          rank_size = [tensor_sizes[i]] + [17] * (dim - 1)
          rank_tensor = tf.slice(gathered,
                                 [sum(tensor_sizes[:i])] + [0] * (dim - 1),
                                 rank_size)
          self.assertEqual(list(rank_tensor.shape), rank_size)
          self.assertTrue(session.run(tf.reduce_all(tf.equal(rank_tensor, i))),
                          "mpi.allgather produces incorrect gathered tensor")
  def test_mpi_allreduce_error(self):
    """Test that the allreduce raises an error if different ranks try to
    send tensors of different rank or dimension."""
    with self.test_session() as session:
      rank = session.run(mpi.rank())
      size = session.run(mpi.size())

      # This test does not apply if there is only one worker.
      if size == 1:
        return

      # Same rank, different dimension
      tf.set_random_seed(1234)
      dims = [17 + rank] * 3
      tensor = tf.random_uniform(dims, -1.0, 1.0)
      with self.assertRaises(tf.errors.FailedPreconditionError):
        session.run(mpi.allreduce(tensor))

      # Same number of elements, different rank
      tf.set_random_seed(1234)
      if rank == 0:
        dims = [17, 23 * 57]
      else:
        dims = [17, 23, 57]
      tensor = tf.random_uniform(dims, -1.0, 1.0)
      with self.assertRaises(tf.errors.FailedPreconditionError):
        session.run(mpi.allreduce(tensor))
Beispiel #5
0
    def test_mpi_allgather_type_error(self):
        """Test that the allgather returns an error if the types being gathered
        differ among the processes"""
        with mpi.Session() as session:
            rank = session.run(mpi.rank())
            size = session.run(mpi.size())

            # This test does not apply if there is only one worker.
            if size == 1:
                return

            tensor_size = [17] * 3
            dtype = tf.int32 if rank % 2 == 0 else tf.float32
            tensor = tf.ones(tensor_size, dtype=dtype) * rank
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(mpi.allgather(tensor))
Beispiel #6
0
    def test_mpi_allgather_error(self):
        """Test that the allgather returns an error if any dimension besides
        the first is different among the tensors being gathered."""
        with mpi.Session() as session:
            rank = session.run(mpi.rank())
            size = session.run(mpi.size())

            # This test does not apply if there is only one worker.
            if size == 1:
                return

            tensor_size = [17] * 3
            tensor_size[1] = 10 * (rank + 1)
            tensor = tf.ones(tensor_size, dtype=tf.float32) * rank
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(mpi.allgather(tensor))
  def test_mpi_allgather_type_error(self):
    """Test that the allgather returns an error if the types being gathered
    differ among the processes"""
    with self.test_session() as session:
      rank = session.run(mpi.rank())
      size = session.run(mpi.size())

      # This test does not apply if there is only one worker.
      if size == 1:
        return

      tensor_size = [17] * 3
      dtype = tf.int32 if rank % 2 == 0 else tf.float32
      tensor = tf.ones(tensor_size, dtype=dtype) * rank
      with self.assertRaises(tf.errors.FailedPreconditionError):
        session.run(mpi.allgather(tensor))
  def test_mpi_allgather_error(self):
    """Test that the allgather returns an error if any dimension besides
    the first is different among the tensors being gathered."""
    with self.test_session() as session:
      rank = session.run(mpi.rank())
      size = session.run(mpi.size())

      # This test does not apply if there is only one worker.
      if size == 1:
        return

      tensor_size = [17] * 3
      tensor_size[1] = 10 * (rank + 1)
      tensor = tf.ones(tensor_size, dtype=tf.float32) * rank
      with self.assertRaises(tf.errors.FailedPreconditionError):
        session.run(mpi.allgather(tensor))
  def test_mpi_allreduce_type_error(self):
    """Test that the allreduce raises an error if different ranks try to
    send tensors of different type."""
    with self.test_session() as session:
      rank = session.run(mpi.rank())
      size = session.run(mpi.size())

      # This test does not apply if there is only one worker.
      if size == 1:
        return

      # Same rank, different dimension
      dims = [17] * 3
      tensor = tf.ones(dims, dtype=tf.int32 if rank % 2 == 0 else tf.float32)
      with self.assertRaises(tf.errors.FailedPreconditionError):
        session.run(mpi.allreduce(tensor))
Beispiel #10
0
    def test_mpi_allreduce_type_error(self):
        """Test that the allreduce raises an error if different ranks try to
        send tensors of different type."""
        with mpi.Session() as session:
            rank = session.run(mpi.rank())
            size = session.run(mpi.size())

            # This test does not apply if there is only one worker.
            if size == 1:
                return

            # Same rank, different dimension
            dims = [17] * 3
            tensor = tf.ones(dims,
                             dtype=tf.int32 if rank % 2 == 0 else tf.float32)
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(mpi.allreduce(tensor))
  def test_mpi_allgather(self):
    """Test that the allgather correctly gathers 1D, 2D, 3D tensors."""
    with self.test_session() as session:
      size = session.run(mpi.size())
      rank = session.run(mpi.rank())

      dtypes = tf.int32, tf.float32
      dims = 1, 2, 3
      for dtype, dim in itertools.product(dtypes, dims):
        tensor = tf.ones([17] * dim, dtype=dtype) * rank
        gathered = mpi.allgather(tensor)

        gathered_tensor = session.run(gathered)
        self.assertEqual(list(gathered_tensor.shape),
                         [17 * size] + [17] * (dim - 1))

        for i in range(size):
          rank_tensor = tf.slice(gathered_tensor, [i * 17] + [0] * (dim - 1),
                                 [17] + [-1] * (dim - 1))
          self.assertEqual(list(rank_tensor.shape), [17] * dim)
          self.assertTrue(session.run(tf.reduce_all(tf.equal(rank_tensor, i))),
                          "mpi.allgather produces incorrect gathered tensor")
Beispiel #12
0
    def test_mpi_allgather(self):
        """Test that the allgather correctly gathers 1D, 2D, 3D tensors."""
        with mpi.Session() as session:
            size = session.run(mpi.size())
            rank = session.run(mpi.rank())

            dtypes = tf.int32, tf.float32
            dims = 1, 2, 3
            for dtype, dim in itertools.product(dtypes, dims):
                tensor = tf.ones([17] * dim, dtype=dtype) * rank
                gathered = mpi.allgather(tensor)

                gathered_tensor = session.run(gathered)
                self.assertEqual(list(gathered_tensor.shape),
                                 [17 * size] + [17] * (dim - 1))

                for i in range(size):
                    rank_tensor = tf.slice(gathered_tensor,
                                           [i * 17] + [0] * (dim - 1),
                                           [17] + [-1] * (dim - 1))
                    self.assertEqual(list(rank_tensor.shape), [17] * dim)
                    self.assertTrue(
                        session.run(tf.reduce_all(tf.equal(rank_tensor, i))),
                        "mpi.allgather produces incorrect gathered tensor")
Beispiel #13
0
 def test_mpi_rank(self):
     """Test that the rank returned by mpi.rank() is correct."""
     true_rank, _ = mpi_env_rank_and_size()
     with mpi.Session() as session:
         rank = session.run(mpi.rank())
         self.assertEqual(true_rank, rank)
 def test_mpi_rank(self):
   """Test that the rank returned by mpi.rank() is correct."""
   true_rank, _ = mpi_env_rank_and_size()
   with self.test_session() as session:
     rank = session.run(mpi.rank())
     self.assertEqual(true_rank, rank)