Beispiel #1
0
    def testSingleOutfeed(self):
        feed_name = next_feed_id()
        shape = [10, 10]
        with ops.device("/device:IPU:0"):
            a = array_ops.placeholder(np.float32, shape)
            b = array_ops.placeholder(np.float32, shape)
            add = math_ops.add(a, b)
            outfeed_op = gen_pop_datastream_ops.pop_datastream_outfeed_enqueue(
                [add],
                feed_id=feed_name,
                replication_factor=1,
                io_batch_size=1,
                output_shapes=[shape])

        with ops.device('cpu'):
            outfeed = gen_pop_datastream_ops.pop_datastream_outfeed_dequeue(
                feed_id=feed_name,
                replication_factor=1,
                output_types=[np.float32],
                output_shapes=[shape])

        with session_lib.Session() as sess:
            sess.run(variables.global_variables_initializer())
            sess.run(outfeed_op,
                     feed_dict={
                         a: np.ones(shape, np.float32),
                         b: np.ones(shape, np.float32)
                     })
            outfed = sess.run(outfeed)
            self.assertEqual(len(outfed[0]), 1)
            self.assertAllClose(outfed[0][0], 2 * np.ones(shape, np.float32))
    def testTupleOutfeedGetLast(self):
        feed_name = next_feed_id()
        shape_1 = [10, 10]
        shape_2 = [4, 4]

        with ops.device("/device:IPU:0"):
            a = array_ops.placeholder(np.float32, shape_1)
            b = array_ops.placeholder(np.float32, shape_1)
            c = array_ops.placeholder(np.float32, shape_2)
            d = array_ops.placeholder(np.float32, shape_2)
            add = math_ops.add(a, b)
            sub = math_ops.sub(c, d)
            outfeed_op = gen_pop_datastream_ops.pop_datastream_outfeed_enqueue(
                [add, sub],
                feed_id=feed_name,
                replication_factor=1,
                outfeed_mode='get_last')

        with ops.device('cpu'):
            outfeed = gen_pop_datastream_ops.pop_datastream_outfeed_dequeue(
                feed_id=feed_name,
                replication_factor=1,
                output_types=[np.float32, np.float32],
                output_shapes=[shape_1, shape_2])

        def get_result(sess, result):
            result.append(sess.run(outfeed))

        with session_lib.Session() as sess:
            sess.run(variables.global_variables_initializer())
            result = []
            sess.run(outfeed_op,
                     feed_dict={
                         a: np.ones(shape_1, np.float32),
                         b: np.ones(shape_1, np.float32),
                         c: np.ones(shape_2, np.float32),
                         d: np.ones(shape_2, np.float32)
                     })
            sess.run(outfeed_op,
                     feed_dict={
                         a: 2 * np.ones(shape_1, np.float32),
                         b: np.ones(shape_1, np.float32),
                         c: 2 * np.ones(shape_2, np.float32),
                         d: np.ones(shape_2, np.float32)
                     })
            outfed = sess.run(outfeed)
            self.assertTrue(len(outfed) == 2)
            self.assertEqual(outfed[0].shape, (10, 10))
            self.assertEqual(outfed[1].shape, (4, 4))
            self.assertAllClose(outfed[0], np.broadcast_to(3, [10, 10]))
            self.assertAllClose(outfed[1], np.broadcast_to(1, [4, 4]))
Beispiel #3
0
    def testOutfeedGetLast(self):
        feed_name = next_feed_id()
        shape = [2, 2]
        with ops.device("/device:IPU:0"):
            a = array_ops.placeholder(np.float32, shape)
            b = array_ops.placeholder(np.float32, shape)
            add = math_ops.add(a, b)
            outfeed_op = gen_pop_datastream_ops.pop_datastream_outfeed_enqueue(
                [add],
                feed_id=feed_name,
                replication_factor=1,
                io_batch_size=1,
                outfeed_mode='get_last',
                output_shapes=[shape])

        with ops.device('cpu'):
            outfeed_last = gen_pop_datastream_ops.pop_datastream_outfeed_dequeue(
                feed_id=feed_name,
                replication_factor=1,
                outfeed_mode='get_last',
                output_types=[np.float32],
                output_shapes=[shape])

        with session_lib.Session() as sess:
            sess.run(variables.global_variables_initializer())
            sess.run(outfeed_op,
                     feed_dict={
                         a: np.ones(shape, np.float32),
                         b: np.ones(shape, np.float32)
                     })
            sess.run(outfeed_op,
                     feed_dict={
                         a: 3.1 * np.ones(shape, np.float32),
                         b: 2 * np.ones(shape, np.float32)
                     })

            outfed = sess.run(outfeed_last)
            self.assertTrue(len(outfed) == 1)
            self.assertEqual(outfed[0].shape, (2, 2))
            self.assertAllClose(outfed[0],
                                (3.1 + 2) * np.ones(shape, np.float32))
Beispiel #4
0
  def dequeue(self):
    """Generate host side operation to dequeue the outfeed values. The
    operation generated by this function will block if called prior
    to any enqueues.

    The return value of this operation depends on the enqueued tensors,
    replication factor and the execution mode.

    Examples:

    1. Outfeed returning a single tensor:

    .. code-block:: python

        outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue(feed_name="outfeed",
                                                          replication_factor=2)

        def body(input):
          output = input + 1
          outfeed = outfeed_queue.enqueue(output)
          return (output, outfeed)

        def my_net(input):
          r = loops.repeat(20, body, (input))
          return r

        with ipu.scopes.ipu_scope("/device:IPU:0"):
          res = ipu_compiler.compile(my_net, inputs=[v])

        with ops.device('cpu'):
          v = tf.placeholder(np.float32, [4, 4])

        outfeed = outfeed_queue.dequeue()
        with tf.Session() as sess:
          result = sess.run(res, {v:np.ones([4, 4], np.float32)})
          outfed = sess.run(outfeed)

    In this example the tensor `output` is of shape [4, 4] and it's enqueued
    into the outfeed with replication_factor = 2. If the `outfeed_mode` is
    `outfeed_mode == IPUOutfeedMode.ALL`, then the shape of the resulting
    `outfed` tensor will be [20, 2, 4, 4], where the first dimension represents
    the number of times we have enqueued a tensor to the outfeed - in this
    example the loop is repeated 20 times, and therefore we get 20 values back
    from the outfeed. The second dimension is the replication_factor, which
    allows us to see the individual values from each replicated graph. If the
    `outfeed_mode` is `outfeed_mode == IPUOutfeedMode.LAST`, then the shape of
    the resulting `outfed` tensor will be [2, 4, 4], which represents the value
    of the output tensor the last time it was enqueued during execution for
    each of the replicated graphs.

    2. Outfeed returning a tuple of tensors:

    .. code-block:: python

        outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue(feed_name="outfeed")

        def body(input):
          output = input + 1
          sum = tf.reduce_sum(output)
          outfeed = outfeed_queue.enqueue((output, sum))
          return (output, outfeed)

        def my_net(input):
          r = loops.repeat(20, body, (input))
          return r

        with ipu.scopes.ipu_scope("/device:IPU:0"):
          res = ipu_compiler.compile(my_net, inputs=[v])

        with ops.device('cpu'):
          v = tf.placeholder(np.float32, [4, 4])

        outfeed = outfeed_queue.dequeue()
        with tf.Session() as sess:
          result = sess.run(res, {v:np.ones([4, 4], np.float32)})
          outfed = sess.run(outfeed)

    In this example we outfeed a tuple of tensors, `output` and `sum`, where
    the former is of shape [4, 4] and latter [1]. If the `outfeed_mode` is
    `outfeed_mode == IPUOutfeedMode.ALL`, then the resulting outfed is a
    two-tuple of tensors with shapes ([20, 4, 4], [20, 1]), where the first
    dimension in each of the tensors represents the number of times we have
    enqueued these tensors to the outfeed - in this example the loop is repeated
    20 times, and therefore we get 20 values back from the outfeed for each of
    the tensors in the tuple. If the `outfeed_mode` is
    `outfeed_mode == IPUOutfeedMode.LAST`, then the `outfed` is a two tuple of
    tensors with shapes ([4, 4], [1]), which represents the values of the
    `output` and `sum` tensors the last time they were enqueued during
    execution.

    Note that `replication_factor` here is the default (=1), which means that
    the extra replication dimension is not added.

    3. Outfeed returning a dictionary of tensors:

    .. code-block:: python

        outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue(feed_name="outfeed",
                                                          replication_factor=8)

        def body(input):
          output = input + 1
          sum = tf.reduce_sum(output)
          outfeed = outfeed_queue.enqueue({"x": output,
                                           "y": sum})
          return (output, outfeed)

        def my_net(input):
          r = loops.repeat(40, body, (input))
          return r

        with ipu.scopes.ipu_scope("/device:IPU:0"):
          res = ipu_compiler.compile(my_net, inputs=[v])

        with ops.device('cpu'):
          v = tf.placeholder(np.float32, [4, 4])

        outfeed = outfeed_queue.dequeue()
        with tf.Session() as sess:
          result = sess.run(res, {v:np.ones([4, 4], np.float32)})
          outfed = sess.run(outfeed)

    In this example we outfeed a dictionary of tensors, `output` and `sum`,
    where the former is of shape [4, 4] and latter [1]. If the `outfeed_mode` is
    `outfeed_mode == IPUOutfeedMode.ALL`, then the resulting outfed is a
    dictionary of tensors with shapes: {"x": [40, 8, 4, 4], "y": [40, 8, 1]},
    where the first dimension in each of the tensors represents the number of
    times we have enqueued these tensors to the outfeed - in this example the
    loop is repeated 40 times, and therefore we get 40 values back from the
    outfeed for each of the tensors in the tuple. The second dimension is the
    replication_factor, which allows us to see the individual values from each
    replicated graph. If the `outfeed_mode` is
    `outfeed_mode == IPUOutfeedMode.LAST`, then the `outfed` is a dictionary of
    tensors with shapes: {"x": [8, 4, 4], "y": [8, 1]}, which represents the
    values of the `output` and `sum` tensors the last time they were enqueued
    during execution for each of the replicated graphs.

    """
    if not self.enqueued:
      raise ValueError(
          "Trying to dequeue an outfeed which has not been enqueued.")
    with ops.device('cpu'):
      outfeed_dequeue = \
        gen_pop_datastream_ops.pop_datastream_outfeed_dequeue(
            output_types=self._structure.flat_types,
            output_shapes=self._structure.flat_shapes,
            outfeed_mode=self._outfeed_mode.value,
            feed_id=self._feed_name,
            device_ordinal=self._device_ordinal,
            replication_factor=self._replication_factor)

    return self._structure.from_tensor_list(outfeed_dequeue)