Esempio n. 1
0
  def testNoLookup(self):
    shape = [100000, 200]
    lookup_count = 4096

    host_embedding = embedding_ops.create_host_embedding(
        "my_host_embedding",
        shape,
        np.float32,
        optimizer_spec=embedding_ops.HostEmbeddingOptimizerSpec(0.5))

    def my_net(i):
      return i

    with ops.device('cpu'):
      i = array_ops.placeholder(np.int32, [lookup_count])

    with ipu.scopes.ipu_scope("/device:IPU:0"):
      r = ipu.ipu_compiler.compile(my_net, inputs=[i])

    cfg = ipu.utils.create_ipu_config(profiling=True,
                                      always_rearrange_copies_on_the_host=True)
    cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
    ipu.utils.configure_ipu_system(cfg)
    with sl.Session() as sess:
      i_h = np.arange(0, lookup_count).reshape([lookup_count])

      report = tu.ReportJSON(self, sess, configure_device=False)
      sess.run(variables.global_variables_initializer())
      report.reset()

      with host_embedding.register(sess):
        result = sess.run([r], {i: i_h})

      # Check the indices are correct, but the real test is no timeout.
      self.assertAllClose(result[0][0], i_h)
Esempio n. 2
0
def build_embeddings(opts, name, shape, is_training, seed_b):
    data_type = 'float32'
    if is_training:
        optimizer_spec = embedding_ops.HostEmbeddingOptimizerSpec(opts["learning_rate"])
    else:
        optimizer_spec = None
    return embedding_ops.create_host_embedding(name, shape=shape, dtype=getattr(np, data_type), optimizer_spec=optimizer_spec, initializer=tf.keras.initializers.glorot_uniform(seed=seed_b))
Esempio n. 3
0
 def _build_embedding(vocab_size,
                      embedding_size,
                      host_embeddings,
                      name="embedding"):
     if host_embeddings:
         embedding = embedding_ops.create_host_embedding(
             name,
             shape=[vocab_size, embedding_size],
             dtype=DTYPE,
             optimizer_spec=embedding_ops.HostEmbeddingOptimizerSpec(0.03),
             initializer=tf.initializers.random_uniform(maxval=1.0,
                                                        dtype=DTYPE),
         )
     else:
         with tf.variable_scope("embedding", dtype=DTYPE,
                                use_resource=True) as scope:
             # Random embedding
             embedding = tf.get_variable(
                 name,
                 [vocab_size, embedding_size],
                 scope.dtype,
                 initializer=tf.initializers.random_uniform(
                     maxval=1.0, dtype=scope.dtype),
                 trainable=True,
             )
     return embedding
Esempio n. 4
0
    def testModel(self):
        shape = [1000, 256]
        lookup_count = 128
        lr = 1 / 2
        acc_factor = 2
        num_iterations = 6

        host_embedding = embedding_ops.create_host_embedding(
            "my_host_embedding",
            shape,
            np.float32,
            optimizer_spec=embedding_ops.HostEmbeddingSGDGAOptimizerSpec(
                lr, acc_factor))

        optimizer = ga.GradientAccumulationOptimizerV2(
            gd.GradientDescentOptimizer(lr), acc_factor)

        # A dummy model that has an embedding lookup and a matmul
        def model(i, w):
            a = host_embedding.lookup(i)
            return math_ops.matmul(a * a, w)

        def training(loss, i, w):
            loss_ = model(i, w)
            # mean_loss = math_ops.reduce_mean(loss)
            abs_mean_loss = math_ops.abs(loss_)
            train = optimizer.minimize(abs_mean_loss)
            return loss, i, w, train

        def my_net(i, w):
            loss = array_ops.constant(0.0, shape=[])
            r = loops.repeat(num_iterations, training, [loss, i, w])
            return r

        with ops.device('cpu'):
            i = array_ops.placeholder(np.int32, [lookup_count])
            w = array_ops.placeholder(np.float32, [256, 128])

        with ipu.scopes.ipu_scope("/device:IPU:0"):
            r = ipu.ipu_compiler.compile(my_net, inputs=[i, w])

        cfg = ipu.utils.create_ipu_config(
            profiling=True, always_rearrange_copies_on_the_host=True)
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        ipu.utils.configure_ipu_system(cfg)
        with sl.Session() as sess:
            i_h = np.arange(0, lookup_count).reshape([lookup_count])
            w_h = np.random.rand(256, 128).astype(np.float32)

            report = tu.ReportJSON(self, sess, configure_device=False)
            sess.run(variables.global_variables_initializer())
            report.reset()

            with host_embedding.register(sess):
                result = sess.run([r], {i: i_h, w: w_h})

            # Given the dumb model and the LR is the inverse of the accumulation factor,
            # we expect the "mean loss" to be zero.
            self.assertAllClose(result[0][0], 0.0)
Esempio n. 5
0
    def testTrainNoExec(self):
        shape = [100000, 200]
        lookup_count = 4096

        host_embedding = embedding_ops.create_host_embedding(
            "my_host_embedding",
            shape,
            np.float32,
            optimizer_spec=embedding_ops.HostEmbeddingSGDGAOptimizerSpec(
                0.5, 2))

        def my_net(i):
            out = host_embedding.lookup(i)

            return out

        with ops.device('cpu'):
            i = array_ops.placeholder(np.int32, [lookup_count])

        with ipu.scopes.ipu_scope("/device:IPU:0"):
            r = ipu.ipu_compiler.compile(my_net, inputs=[i])

        cfg = ipu.utils.create_ipu_config(
            profiling=True, always_rearrange_copies_on_the_host=True)
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        ipu.utils.configure_ipu_system(cfg)
        with sl.Session() as sess:
            i_h = np.arange(0, lookup_count).reshape([lookup_count])

            report = tu.ReportJSON(self, sess, configure_device=False)
            sess.run(variables.global_variables_initializer())
            report.reset()

            with host_embedding.register(sess):
                # training=False should ignore the number of expected updates.
                result = sess.run([r], {i: i_h})

            v = sess.run(host_embedding.get_embedding_tensor())
            # Check the lookup result, but we are really interested that it doesn't hang.
            self.assertAllClose(result[0][0], np.take(v, i_h, axis=0))
# The host side queues
infeed_queue = ipu_infeed_queue.IPUInfeedQueue(
    ds, feed_name="infeed", replication_factor=replication_factor)

# Set the learning rate
lr = 0.0001

# Create a momentum optimiser for replication
optimizer = cross_replica_optimizer.CrossReplicaOptimizer(
    tf.train.MomentumOptimizer(lr, 0.99))

# Create a host embedding object
embedding = embedding_ops.create_host_embedding(
    "char_embedding",
    shape=[256, 256],
    dtype=tf.float32,
    partition_strategy="TOKEN",
    optimizer_spec=embedding_ops.HostEmbeddingOptimizerSpec(lr))


# PopnnGRU is time-major
def gru(partials):
    gru_ = rnn_ops.PopnnGRU(256)
    partial_t = tf.transpose(partials, [1, 0, 2])
    gru_outputs_t, _ = gru_(partial_t)
    return tf.transpose(gru_outputs_t, [1, 0, 2])


# The main model
def model(sequence):
    # Perform a lookup on the embedding