Esempio n. 1
0
def make_graph(fc_weights):
    graph = tf.Graph()

    with graph.as_default():
        outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue()
        fc, weights["init_weights"] = create_sparse_layers(opts, fc_weights)

        model_op = partial(model,
                           fc=fc,
                           opts=opts,
                           outfeed_queue=outfeed_queue,
                           dtype=dtype)

        with tf.device("cpu"):
            x_fc = tf.placeholder(dtype,
                                  shape=[opts.batchsize, opts.input_size])

        with ipu_scope('/device:IPU:0'):
            test_op = ipu_compiler.compile(model_op, inputs=[x_fc])

        with tf.device("cpu"):
            fc.create_placeholders()

        with ipu_scope('/device:IPU:0'):
            upload_sparse = build_update_op(fc)

        sparse_feed = {}
        sparse_feed.update(fc.feed_dict())

        dequeue = outfeed_queue.dequeue()
        ipu.utils.move_variable_initialization_to_cpu()

    return graph, outfeed_queue, fc, x_fc, test_op, upload_sparse, dequeue
Esempio n. 2
0
    def testSendScalar(self, dtype):
        with self.session() as sess:

            def device_fn(x):
                return gen_sendrecv_ops.ipu_send_to_host(
                    x,
                    tensor_name="test_tensor",
                    send_device="/device:IPU:0",
                    send_device_incarnation=0,
                    recv_device="/device:CPU:0")

            inputs = array_ops.placeholder(dtype=dtype, shape=())

            with ipu_scope("/device:IPU:0"):
                send_op = ipu_compiler.compile(device_fn, inputs=[inputs])

            with ops.device("/device:CPU:0"):
                recv_op = gen_sendrecv_ops.ipu_recv_at_host(
                    T=dtype,
                    tensor_name="test_tensor",
                    send_device="/device:IPU:0",
                    send_device_incarnation=0,
                    recv_device="/device:CPU:0")

            opts = utils.create_ipu_config()
            utils.configure_ipu_system(opts)

            sent, received = sess.run([send_op, recv_op],
                                      feed_dict={inputs: 1})

            self.assertIsNone(sent)  # Send op has no output
            self.assertEqual(dtype, received.dtype)
            self.assertEqual(0, len(received.shape))
            self.assertEqual(1, received)
Esempio n. 3
0
def training_graph(opts, training_data):
    train_graph = tf.Graph()

    with train_graph.as_default():

        dataset, train_iterator, placeholders = training_data.get_dataset(
            opts, is_training=True)
        infeed = ipu_infeed_queue.IPUInfeedQueue(dataset)

        with ipu_scope('/device:IPU:0'):

            def comp_fn():
                def body(total_loss_, sum_rmse_metric, *args, **kwargs):
                    data_tensors = args
                    observed_ratings = data_tensors[0]
                    loss, rmse_metric, apply_grads_ = graph_builder(opts,
                                                                    observed_ratings=observed_ratings,
                                                                    learning_rate=placeholders["learning_rate"],
                                                                    type='TRAIN')
                    with tf.control_dependencies([apply_grads_]):
                        return total_loss_ + loss, sum_rmse_metric + rmse_metric

                return loops.repeat(opts.batches_per_step,
                                    body,
                                    [tf.constant(0, tf.float32),
                                     tf.constant(0, tf.float32)],
                                    infeed)

            total_loss, sum_rmse_metric = ipu_compiler.compile(comp_fn, [])

        rmse = sum_rmse_metric / opts.batches_per_step
        loss = total_loss / opts.batches_per_step

        tf.summary.scalar("loss", loss)
        tf.summary.scalar("learning_rate", placeholders["learning_rate"])
        tf.summary.scalar("RMSE/train", rmse)

        train_summary = tf.summary.merge_all()
        train_saver = tf.train.Saver()

        ipu_utils.move_variable_initialization_to_cpu()
        train_init = tf.global_variables_initializer()

    train_writer = tf.summary.FileWriter(
        opts.logs_path + '/train',
        graph=train_graph,
        flush_secs=30)

    ipu_options = util.get_config(opts)
    ipu_options.configure_ipu_system()
    train_sess = tf.Session(graph=train_graph)

    return GraphOps(train_graph,
                    train_sess,
                    train_init,
                    [loss, train_summary, rmse],
                    placeholders,
                    infeed,
                    train_saver,
                    train_writer)
Esempio n. 4
0
 def body(img):
     with scopes.ipu_scope('/device:IPU:0'):
         if mode == 'sharded':
             with autoshard.ipu_autoshard():
                 probs = tf.import_graph_def(
                     network.optimized_graph,
                     input_map={network.graph_input: img},
                     name="optimized",
                     return_elements=[network.graph_output])[0]
             autoshard.automatic_sharding(num_shards=num_ipus,
                                          input_ts=img,
                                          loss_ts=probs,
                                          frozen_inference=True)
             outfeed_op = outfeed_queue.enqueue(probs)
             outfeed_op._set_attr(
                 sharding._XLA_SHARDING,
                 attr_value_pb2.AttrValue(
                     s=probs.op.get_attr('_XlaSharding')))
         else:
             probs = tf.import_graph_def(
                 network.optimized_graph,
                 input_map={network.graph_input: img},
                 name="optimized",
                 return_elements=[network.graph_output])[0]
             outfeed_op = outfeed_queue.enqueue(probs)
         # Note that enqueue happens on the IPU.
         return outfeed_op
Esempio n. 5
0
 def device_fn(x):
     with ipu_scope("/device:IPU:0"):
         x = x * x
         with outside_compilation_scope():
             y = constant_op.constant(2.0, dtype=dtypes.float32)
             z = constant_op.constant(3.0, dtype=dtypes.float32)
         return x + y + z
Esempio n. 6
0
def validation_graph(model, opts):
    valid_graph = tf.Graph()
    with valid_graph.as_default():
        # datasets must be defined outside the ipu device scope
        valid_iterator = ipu_infeed_queue.IPUInfeedQueue(
            dataset.data(opts, is_training=False),
            feed_name='validation_feed',
            replication_factor=opts['replicas'] * opts['shards'])

        with ipu_scope('/device:IPU:0'):

            def comp_fn():
                def body(total_accuracy, image, label):
                    accuracy = validation_graph_builder(
                        model, image, label, opts)
                    return total_accuracy + (
                        tf.cast(accuracy, tf.float32) /
                        opts["validation_batches_per_step"])

                accuracy = loops.repeat(
                    int(opts["validation_batches_per_step"]), body,
                    [tf.constant(0, tf.float32)], valid_iterator)
                if opts['replicas'] > 1:
                    accuracy = cross_replica_ops.cross_replica_sum(
                        accuracy) / (opts['replicas'] * opts['shards'])
                return accuracy

            (accuracy, ) = xla.compile(comp_fn, [])

        accuracy = 100 * accuracy

        valid_saver = tf.train.Saver()

        ipu.utils.move_variable_initialization_to_cpu()
        valid_init = tf.global_variables_initializer()

    globalAMP = None
    if opts["available_memory_proportion"] and len(
            opts["available_memory_proportion"]) == 1:
        globalAMP = opts["available_memory_proportion"][0]

    ipu_options = get_config(
        ipu_id=opts["select_ipu"],
        prng=not opts["no_stochastic_rounding"],
        shards=1,
        number_of_replicas=opts['replicas'] * opts['shards'],
        max_cross_replica_buffer_size=opts["max_cross_replica_buffer_size"],
        fp_exceptions=opts["fp_exceptions"],
        xla_recompute=opts["xla_recompute"],
        seed=opts["seed"],
        profile=opts['profile'],
        availableMemoryProportion=globalAMP,
        stable_norm=opts["stable_norm"])
    ipu.utils.configure_ipu_system(ipu_options)

    valid_sess = tf.Session(graph=valid_graph, config=tf.ConfigProto())

    return train.GraphOps(valid_graph, valid_sess, valid_init, [accuracy],
                          None, valid_iterator, None, valid_saver, None)
Esempio n. 7
0
 def device_fn(x):
     with ipu_scope("/device:IPU:0"):
         x = x * x
         with outside_compilation_scope():
             logging_ops.print_v2(x,
                                  output_stream=sys.stdout,
                                  end="")
         return x
Esempio n. 8
0
 def device_fn(x):
     with ipu_scope("/device:IPU:0"):
         x *= x
         with outside_compilation_scope():
             a = x + 1.0
             b = x + 1.0
             x = a + b
         x += 1.0
         return x
Esempio n. 9
0
def validation_graph(opts, valid_data):
    # Do not apply dropout during validation
    opts.apply_dropout = False

    valid_graph = tf.Graph()
    tf_device_ordinal = 0 if opts.multiprocessing else 1
    with valid_graph.as_default():
        dataset, _, _ = valid_data.get_dataset(opts, is_training=False)
        infeed = ipu_infeed_queue.IPUInfeedQueue(
            dataset, device_ordinal=tf_device_ordinal)

        with ipu_scope('/device:IPU:{}'.format(tf_device_ordinal)):
            def comp_fn():
                def body(sum_rmse_metric, *args, **kwargs):
                    data_tensors = args
                    observed_ratings, ground_truth = tf.split(
                        data_tensors[0], num_or_size_splits=2, axis=1)
                    rmse_metric = graph_builder(opts,
                                                observed_ratings=observed_ratings,
                                                ground_truth=ground_truth,
                                                type='VALID')
                    return sum_rmse_metric + rmse_metric

                return loops.repeat(opts.validation_batches_per_step,
                                    body,
                                    [tf.constant(0, tf.float32)],
                                    infeed)

            (sum_rmse_metric,) = ipu_compiler.compile(comp_fn, [])

        # Accuracy Ops
        rmse = sum_rmse_metric / opts.validation_batches_per_step

        valid_summary = tf.summary.scalar("RMSE/validation", rmse)
        valid_saver = tf.train.Saver()

        ipu_utils.move_variable_initialization_to_cpu()
        valid_init = tf.global_variables_initializer()

    valid_writer = tf.summary.FileWriter(
        opts.logs_path + '/valid',
        graph=valid_graph,
        flush_secs=30)

    ipu_options = util.get_config(opts)
    if opts.multiprocessing:
        ipu_options.configure_ipu_system()
    valid_sess = tf.Session(graph=valid_graph)

    return GraphOps(valid_graph,
                    valid_sess,
                    valid_init,
                    [rmse, valid_summary],
                    None,
                    infeed,
                    valid_saver,
                    valid_writer)
Esempio n. 10
0
 def device_fn(x):
     with ipu_scope("/device:IPU:0"):
         x = math_ops.cast(x * x, dtype=dtype)
         with outside_compilation_scope():
             # Use float64 which is not supported on IPU
             x = math_ops.cast(x, dtype=dtypes.float64)
             x *= constant_op.constant(2.0, dtype=dtypes.float64)
             x = math_ops.cast(x, dtype=dtype)
         x += constant_op.constant(2, dtype=dtype)
     return x
Esempio n. 11
0
 def device_fn(x):
     with ipu_scope("/device:IPU:0"):
         x *= 2.0
         with outside_compilation_scope():
             x *= 2.0
         x *= 2.0
         with outside_compilation_scope():
             x *= 2.0
         x *= 2.0
     return x
Esempio n. 12
0
 def device_fn(x1, x2):
     with ipu_scope("/device:IPU:0"):
         x1 *= x1
         x2 *= x2
         with outside_compilation_scope():
             x1 += 1.0
             x2 += 2.0
         x1 *= 1.0
         x2 *= 2.0
         return x1, x2
Esempio n. 13
0
 def body(img):
     with scopes.ipu_scope('/device:IPU:0'):
         probs = tf.import_graph_def(
             network.optimized_graph,
             input_map={network.graph_input: img},
             name="optimized",
             return_elements=[network.graph_output])[0]
         outfeed_op = outfeed_queue.enqueue(probs)
         # Note that enqueue happens on the IPU.
         return outfeed_op
Esempio n. 14
0
    def test_log_twice_not_supported(self):
        hook = IPULoggingTensorHook(at_end=True)

        with ipu_scope("/device:IPU:0"):
            t = constant_op.constant(0.0)
            hook.log(t)
            with self.assertRaisesRegex(
                    RuntimeError,
                    "Cannot use this hook object's log function more than once"
            ):
                return hook.log(t)
Esempio n. 15
0
 def without_outside_scope(x1, x2):
     with ipu_scope("/device:IPU:0"):
         x1 *= 1.0
         x2 *= 2.0
         y1 = constant_op.constant(1.0, dtype=dtypes.float32)
         y1 += x1
         y2 = constant_op.constant(2.0, dtype=dtypes.float32)
         y2 += x2
         x1 += y1
         x2 += y2
         return x1, x2
Esempio n. 16
0
    def testResetSeed(self):
        # The dataset for feeding the graphs
        ds = dataset_ops.Dataset.from_tensors(
            array_ops.constant(1.0, shape=[SIZE]))
        ds = ds.map(lambda x: [x, x])
        ds = ds.repeat()

        # The host side queues
        infeed_queue = ipu_infeed_queue.IPUInfeedQueue(
            ds, feed_name="infeed", replication_factor=REPLICAS)
        outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue(
            feed_name="outfeed", replication_factor=REPLICAS)

        # The device side
        def body(x1, x2):
            d1 = rand_ops.dropout(x1)
            d2 = rand_ops.dropout(x2)
            outfeed = outfeed_queue.enqueue({'d1': d1, 'd2': d2})
            return outfeed

        def my_net():
            r = loops.repeat(REPEATS, body, [], infeed_queue)
            return r

        with scopes.ipu_scope('/device:IPU:0'):
            res = ipu_compiler.compile(my_net, inputs=[])

        # The outfeed dequeue has to happen after the outfeed enqueue
        dequeue_outfeed = outfeed_queue.dequeue()

        # Configure the hardware
        config = utils.create_ipu_config(profiling=True)
        config = utils.auto_select_ipus(config, REPLICAS)
        config = utils.set_floating_point_behaviour_options(config)
        utils.configure_ipu_system(config)

        with session.Session() as sess:
            res_all = set()
            total = 0

            sess.run(infeed_queue.initializer)

            for _ in range(EXECS):
                sess.run(res)
                outfed_result = sess.run(dequeue_outfeed)
                for r in np.array(list(outfed_result.values())).reshape(
                    [-1, SIZE]):
                    total += 1
                    res_all.add(r.tostring())

            # 2 dropouts per replica * REPLICAS * REPEATS * EXECS
            expected = 2 * REPLICAS * REPEATS * EXECS
            self.assertEqual(total, expected)
            self.assertEqual(len(res_all), expected)
Esempio n. 17
0
def generic_train_graph(opts, is_training):
    data_type = 'float32'
    train_graph = tf.Graph()
    with train_graph.as_default():
        placeholders = {}
        placeholders["learning_rate"] = tf.compat.v1.placeholder(data_type, shape=[])
        uid_embedding, mid_embedding, cat_embedding = id_embedding(opts, is_training, seed)

        if opts['use_synthetic_data']:
            dataset_train = get_synthetic_dataset(opts)
        else:
            dataset_train = get_dataset_embed(opts, is_training=True)

        infeed_train = ipu_infeed_queue.IPUInfeedQueue(dataset_train, feed_name = 'DIN_dataset_infeed_train', replication_factor = (opts['replicas']))

        with ipu_scope('/device:IPU:0'):
            def comp_fn():
                def body(total_loss, total_aux_loss, total_accuracy, uids, mids, cats, mid_his, cat_his, mid_mask, target, seqlen):
                    prob, loss, aux_loss, accuracy, grad_op = graph_builder(opts, uid_embedding, mid_embedding, cat_embedding, placeholders['learning_rate'], uids, mids, cats, mid_his, cat_his, mid_mask, target, seqlen, use_negsampling=False)

                    with tf.control_dependencies([grad_op]):
                        return total_loss + loss, total_aux_loss + aux_loss, total_accuracy + accuracy

                return loops.repeat(opts['batches_per_step'], body, [tf.constant(0, getattr(np, 'float32'))] * 3, infeed_train)

            outputs_train = ipu_compiler.compile(comp_fn, [])
            avg_loss, avg_aux_loss, avg_accuracy = [x / opts['batches_per_step'] for x in outputs_train]
            outfeed = None

        saver = tf.compat.v1.train.Saver()
        utils.move_variable_initialization_to_cpu()
        init = tf.compat.v1.global_variables_initializer()

    if opts['use_ipu_model']:
        os.environ["TF_POPLAR_FLAGS"] = "--use_ipu_model"
    ipu_options = utils.create_ipu_config()
    ipu_options = utils.set_optimization_options(ipu_options,
                                                 combine_embedding_lookups=True)
    ipu_options = utils.set_recomputation_options(ipu_options, allow_recompute=True)
    ipu_options = utils.auto_select_ipus(ipu_options, [opts['replicas']])
    utils.configure_ipu_system(ipu_options)
    if seed is not None:
        utils.reset_ipu_seed(seed)

    ops_train = [avg_loss, avg_aux_loss, avg_accuracy]
    sess = tf.compat.v1.Session(graph=train_graph)

    return GraphOps(sess,
                    init,
                    ops_train,
                    placeholders,
                    infeed_train,
                    outfeed,
                    saver), uid_embedding, mid_embedding, cat_embedding
Esempio n. 18
0
def training_graph(model, opts, iterations_per_step=1):

    train_graph = tf.Graph()
    with train_graph.as_default():
        placeholders = dict()
        datatype = tf.float16 if opts["precision"].split(
            '.') == '16' else tf.float32
        placeholders['learning_rate'] = tf.placeholder(datatype, shape=[])
        learning_rate = placeholders['learning_rate']

        # datasets must be defined outside the ipu device scope
        train_iterator = ipu_infeed_queue.IPUInfeedQueue(
            dataset.data(opts, is_training=True),
            feed_name='training_feed',
            replication_factor=opts['replicas'])
        outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue(
            feed_name="outfeed", replication_factor=opts['replicas'])

        with ipu_scope('/device:IPU:0'):
            train = training_step_with_infeeds_and_outfeeds(
                train_iterator, outfeed_queue, model, opts, learning_rate,
                iterations_per_step)

        outfeed = outfeed_queue.dequeue()

        logging.print_trainable_variables(opts)

        train_saver = tf.train.Saver(max_to_keep=999999)

        ipu.utils.move_variable_initialization_to_cpu()
        train_init = tf.global_variables_initializer()

    globalAMP = None
    if opts["available_memory_proportion"] and len(
            opts["available_memory_proportion"]) == 1:
        globalAMP = opts["available_memory_proportion"][0]

    ipu_options = get_config(
        ipu_id=opts["select_ipu"],
        prng=not opts["no_stochastic_rounding"],
        shards=opts["shards"],
        number_of_replicas=opts['replicas'],
        max_cross_replica_buffer_size=opts["max_cross_replica_buffer_size"],
        fp_exceptions=opts["fp_exceptions"],
        xla_recompute=opts["xla_recompute"],
        seed=opts["seed"],
        availableMemoryProportion=globalAMP)

    ipu.utils.configure_ipu_system(ipu_options)
    train_sess = tf.Session(graph=train_graph, config=tf.ConfigProto())

    return GraphOps(train_graph, train_sess, train_init, [train], placeholders,
                    train_iterator, outfeed, train_saver)
Esempio n. 19
0
            def device_fn(x):
                with ipu_scope("/device:IPU:0"):
                    x *= x  # 4

                    with outside_compilation_scope():
                        y = x + 1.0  # 5

                    # Use `x` after receiving `y` and make sure that we still have the correct
                    # value of `x` (i.e. it is not overwritten by the receive, in which case
                    # we would get 25).
                    z = x * y  # 20

                    return z
Esempio n. 20
0
 def device_fn(x):
     with ipu_scope("/device:IPU:0"):
         x = x + x
         with outside_compilation_scope():
             # Use float64 which is not supported on IPU
             x = math_ops.cast(x, dtype=dtypes.float64)
             c = constant_op.constant(2.0,
                                      dtype=dtypes.float64,
                                      shape=(2, ))
             x += c
             x = math_ops.cast(x, dtype=dtypes.float32)
         x = x + 2.0
     return x
Esempio n. 21
0
    def testSendFromTwoEngines(self):
        with self.session() as sess:

            def make_device_fn(i):
                def device_fn(x):
                    return gen_sendrecv_ops.ipu_send_to_host(
                        x,
                        tensor_name="tensor_{}".format(i),
                        send_device="/device:IPU:0",
                        send_device_incarnation=0,
                        recv_device="/device:CPU:0")

                return device_fn

            input_1 = array_ops.placeholder(dtype=dtypes.float32, shape=())
            input_2 = array_ops.placeholder(dtype=dtypes.float32, shape=())

            with ipu_scope("/device:IPU:0"):
                send_1 = ipu_compiler.compile(make_device_fn(1),
                                              inputs=[input_1])
                send_2 = ipu_compiler.compile(make_device_fn(2),
                                              inputs=[input_2])

            with ops.device("/device:CPU:0"):
                recv_1 = gen_sendrecv_ops.ipu_recv_at_host(
                    T=dtypes.float32,
                    tensor_name="tensor_1",
                    send_device="/device:IPU:0",
                    send_device_incarnation=0,
                    recv_device="/device:CPU:0")
                recv_2 = gen_sendrecv_ops.ipu_recv_at_host(
                    T=dtypes.float32,
                    tensor_name="tensor_2",
                    send_device="/device:IPU:0",
                    send_device_incarnation=0,
                    recv_device="/device:CPU:0")

            opts = utils.create_ipu_config()
            utils.configure_ipu_system(opts)

            # Test it a couple of times to verify the communication channel is reusable.
            for i in range(2):
                _, _, result_1, result_2 = sess.run(
                    [send_1, send_2, recv_1, recv_2],
                    feed_dict={
                        input_1: i,
                        input_2: i + 1
                    })
                self.assertEqual(i, result_1)
                self.assertEqual(i + 1, result_2)
Esempio n. 22
0
def build_train_op(previous_loss, *infeed_data):
    """Construct loss and optimizer."""
    with ipu_scope("/device:IPU:0"):
        action_prob = create_policy(*infeed_data)
        loss = tf.reduce_sum(action_prob * infeed_data[-2])
        opt = tf.train.GradientDescentOptimizer(LEARNING_RATE)
        if args.accumulate_grad:
            opt = gradient_accumulation_optimizer.GradientAccumulationOptimizer(
                opt, num_mini_batches=args.num_mini_batches)
        opt = cross_replica_optimizer.CrossReplicaOptimizer(opt)
        train_op = opt.minimize(loss)
        with tf.control_dependencies([train_op]):
            loss = tf.identity(loss)
        return previous_loss + loss
Esempio n. 23
0
def train():
    graph = tf.Graph()
    with graph.as_default():
        dataset = tf.data.Dataset.from_tensors(tf.constant(1, shape=[]))
        #         dataset = tf.data.Dataset.from_tensors(np.array([1,2,3,4,5,6,7,8,9,0]))
        dataset = dataset.map(lambda x: [x, x])
        dataset = dataset.batch(BS, drop_remainder=True)
        dataset = dataset.repeat()
        infeed_queue = ipu_infeed_queue.IPUInfeedQueue(get_data_set(),
                                                       feed_name="infeed")
        outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue(feed_name='outfeed')
        time_steps_ph = tf.placeholder(tf.int32, shape=[])
        with ipu_scope('/device:IPU:0'):

            def compile_fn():
                def body(x, y):
                    #                     z1, z2 = model1(x, y, time_steps_ph)
                    #                     outfeed = outfeed_queue.enqueue({'z1':z1, 'z2':z2})
                    z3 = model2(time_steps_ph)
                    outfeed = outfeed_queue.enqueue({'z3': z3})
                    return outfeed

                return loops.repeat(1, body, [], infeed_queue)

        utils.move_variable_initialization_to_cpu()
        init = tf.global_variables_initializer()
        outputs = ipu_compiler.compile(compile_fn, [])

        dequeue_outfeed = outfeed_queue.dequeue()
    ipu_options = utils.create_ipu_config(
        profiling=False,
        profile_execution=False,
        max_cross_replica_sum_buffer_size=10000000,
        max_inter_ipu_copies_buffer_size=10000000)
    ipu_options = utils.auto_select_ipus(ipu_options, 1)
    utils.configure_ipu_system(ipu_options)
    utils.reset_ipu_seed(SEED)

    sess = tf.Session(graph=graph)
    sess.run(init)
    sess.run(infeed_queue.initializer)

    steps = 6
    i = 0
    while i < steps:
        sess.run(outputs, feed_dict={time_steps_ph: 3})
        result = sess.run(dequeue_outfeed)
        print(result)
        i = i + 1
        break
Esempio n. 24
0
    def test_print_tensor(self):
        hook = IPULoggingTensorHook(at_end=True)

        def model():
            t = constant_op.constant(42.0, name="foo")
            return hook.log(t)

        with ipu_scope("/device:IPU:0"):
            compiled_model = ipu_compiler.compile(model)

        with test.mock.patch.object(tf_logging, "info", self.mock_log):
            with MonitoredTrainingSession(hooks=[hook]) as mon_sess:
                mon_sess.run(compiled_model)

        self.assertRegex(str(self.logged_message), "foo:0 = 42.0")
Esempio n. 25
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--connection_type",
                        choices=['ALWAYS', 'ON_DEMAND', 'NEVER'],
                        help="Specify connection type")
    parser.set_defaults(connection_type='ALWAYS')
    opts = parser.parse_args()

    with tf.device("cpu"):
        pa = tf.compat.v1.placeholder(np.float32, [2], name="a")
        pb = tf.compat.v1.placeholder(np.float32, [2], name="b")
        pc = tf.compat.v1.placeholder(np.float32, [2], name="c")

    # Create the IPU section of the graph.
    with scopes.ipu_scope("/device:IPU:0"):
        out = ipu_compiler.compile(my_graph, [pa, pb, pc])

    # Define the feed_dict input data.
    fd = {pa: [1., 1.], pb: [0., 1.], pc: [1., 5.]}

    # Connection type from options.
    connection_type = device_connection_type(opts.connection_type)

    cfg = utils.create_ipu_config()
    cfg = utils.auto_select_ipus(cfg, 1)
    cfg = utils.set_ipu_connection_type(cfg,
                                        connection_type,
                                        1)
    utils.configure_ipu_system(cfg)

    # Run the session.
    # If running with DeviceConnectionType.NEVER then anticipate the
    # specific exception with message "configured for compilation only".
    with tf.compat.v1.Session() as sess:
        try:
            result = sess.run(out, fd)
            print(result)
        except tf.errors.InvalidArgumentError as invalid_arg_exception:
            if (connection_type == utils.DeviceConnectionType.NEVER) and \
               ("configured for compilation only" in invalid_arg_exception.message):
                print("Compiled")
                pass
            else:
                print("ERROR: {}".format(invalid_arg_exception.message))
        except:
            general_exception = sys.exc_info()[0]
            print("ERROR: {}".format(general_exception))
Esempio n. 26
0
  def testSyntheticDataWithOutfeeds(self):
    poplar_flags = os.environ.get("TF_POPLAR_FLAGS", "")
    poplar_flags += " --use_ipu_model"
    poplar_flags += " --use_synthetic_data"
    poplar_flags += " --synthetic_data_initializer=random"

    with test.mock.patch.dict("os.environ", {"TF_POPLAR_FLAGS": poplar_flags}):

      # The device side main
      def body(x1, x2):
        d1 = x1 + x2
        d2 = x1 - x2
        outfeed = outfeed_queue.enqueue({'d1': d1, 'd2': d2})
        return outfeed

      def my_net():
        r = loops.repeat(5, body, [], infeed_queue)
        return r

      with ops.device('cpu'):
        # The dataset for feeding the graphs
        ds = tf.data.Dataset.from_tensors(tf.constant(1.0, shape=[10]))
        ds = ds.map(lambda x: [x, x])
        ds = ds.repeat()

        # The host side queues
        infeed_queue = ipu_infeed_queue.IPUInfeedQueue(ds, feed_name="infeed2")
        outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue(feed_name="outfeed2")

      with scopes.ipu_scope('/device:IPU:0'):
        run_loop = ipu_compiler.compile(my_net, inputs=[])

      # The outfeed dequeue has to happen after the outfeed enqueue
      dequeue_outfeed = outfeed_queue.dequeue()

      # Configure the hardware
      config = utils.create_ipu_config()
      config = utils.auto_select_ipus(config, 1)
      utils.configure_ipu_system(config)

      with tf.Session() as sess:
        sess.run(infeed_queue.initializer)
        sess.run(run_loop)
        result = sess.run(dequeue_outfeed)
        self.assertAllEqual(len(result['d1']), 0)
Esempio n. 27
0
def testInput():
    config = utils.create_ipu_config()
    config = utils.auto_select_ipus(config, 1)
    config = utils.create_ipu_config(profiling=True,
                                     use_poplar_text_report=True)
    utils.configure_ipu_system(config)

    # config = utils.set_convolution_options(config, {"partialsType": str('half')})
    # config = utils.set_matmul_options(config, {"partialsType": str('half')})

    gdv = tf.Graph()
    with gdv.as_default():
        g1 = tf.GraphDef()
        # Load model with pywrap isntead? https://github.com/graphcore/examples/blob/master/applications/tensorflow/cnns/training/weight_avg.py#L33
        with tf.gfile.GFile('model.pb', 'rb') as fid:
            serialized_graph = fid.read()
            g1.ParseFromString(serialized_graph)
            tf.import_graph_def(g1, name='')

    with tf.Session(graph=gdv) as sess:
        inp_tensor = gdv.get_tensor_by_name('input:0')
        out_tensor = gdv.get_tensor_by_name(
            'InceptionV3/Predictions/Softmax:0')
        image_np = getExamples()
        #image_np = getSyntheticExamples()
        np.set_printoptions(threshold=np.inf)

        import time
        tic = time.time()

        # This is new and doesn't crash
        # But doesn't seem to do anything either
        with ipu_scope("/device:IPU:0"):
            proba = sess.run(out_tensor, {inp_tensor: image_np})
            print(proba)

        toc = time.time()
        duration = toc - tic
        num_images = len(image_np)

        print("Total time taken: {0} seconds".format(duration))
        print("Number of examples: {0}".format(num_images))
        print("Throughput: {0} im/s".format(num_images / duration))
Esempio n. 28
0
    def testSendMatrices(self, dtype):
        with self.session() as sess:
            L = 3

            def device_fn(x):
                for i in range(L):
                    x = math_ops.matmul(x, x)
                    if i < L - 1:
                        gen_sendrecv_ops.ipu_send_to_host(
                            x,
                            tensor_name="x_{}".format(i),
                            send_device="/device:IPU:0",
                            send_device_incarnation=0,
                            recv_device="/device:CPU:0")
                return x

            N = 2
            inputs = array_ops.placeholder(dtype=dtype, shape=(N, N))

            with ipu_scope("/device:IPU:0"):
                [device_out] = ipu_compiler.compile(device_fn, inputs=[inputs])

            received = []
            with ops.device("/device:CPU:0"):
                for i in range(L - 1):
                    received.append(
                        gen_sendrecv_ops.ipu_recv_at_host(
                            T=dtype,
                            tensor_name="x_{}".format(i),
                            send_device="/device:IPU:0",
                            send_device_incarnation=0,
                            recv_device="/device:CPU:0"))

            opts = utils.create_ipu_config()
            utils.configure_ipu_system(opts)

            received_values, device_value = sess.run(
                [received, device_out], feed_dict={inputs: np.ones((N, N))})

            self.assertAllClose(2 * np.ones((N, N)), received_values[0])
            self.assertAllClose(8 * np.ones((N, N)), received_values[1])
            self.assertAllClose(128 * np.ones((N, N)), device_value)
Esempio n. 29
0
  def testReportInfoDirCreated0(self):
    with self.session() as sess:
      tmpdir = tempfile.mkdtemp()
      cfg = ipu.utils.create_ipu_config(profiling=True,
                                        use_poplar_text_report=True,
                                        profile_execution=True,
                                        report_directory=tmpdir)
      cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
      cfg = ipu.utils.auto_select_ipus(cfg, 1)
      ipu.utils.configure_ipu_system(cfg)

      with ops.device("cpu"):
        pa = array_ops.placeholder(np.float32, [2], name="a")
        pb = array_ops.placeholder(np.float32, [2], name="b")
        pc = array_ops.placeholder(np.float32, [2], name="c")

      def basic_graph(pa, pb, pc):
        o1 = pa + pb
        o2 = pa + pc
        simple_graph_output = o1 + o2
        return simple_graph_output

      with ipu_scope("/device:IPU:0"):
        result = basic_graph(pa, pb, pc)

        result = sess.run(result,
                          feed_dict={
                              pa: [1., 1.],
                              pb: [0., 1.],
                              pc: [1., 5.]
                          })

        tmpdir_files = os.listdir(tmpdir)
        self.assertEqual(1, len(tmpdir_files))
        tmpdir_sub = tmpdir + "/" + tmpdir_files[0]
        tmpdir_sub_files = os.listdir(tmpdir_sub)
        self.assertTrue("framework.json" in tmpdir_sub_files)
        test_file = tmpdir_sub + "/framework.json"
        with open(test_file) as f:
          json_txt = json.load(f)
          is_j, _ = is_json(json_txt)
          self.assertTrue(is_j)
Esempio n. 30
0
def run_without_ipu_functions(opts, input_values):
    g = tf.Graph()
    with g.as_default():
        with tf.device("cpu"):
            input_x = tf.placeholder(np.float32, input_values.shape, name="input_x")

        with ipu_scope("/device:IPU:0"):
            result = ipu.ipu_compiler.compile(partial(model, opts, False), [input_x])

        with tf.device("cpu"):
            variables = [var for var in tf.global_variables() if var.name.count("metainfo") == 1]
            variables += tf.trainable_variables()
            saver = tf.train.Saver(variables)
            initializer = tf.global_variables_initializer()

    g.finalize()
    with tf.Session(graph=g) as sess:
        sess.run(initializer)
        saver.save(sess, opts.checkpoint_path)  # save variables to use in second run
        results = sess.run(result, feed_dict={input_x: input_values})
    return results