def test_sok_dense_demo(args, init_tensors, *random_samples):
    port = 12345
    os.environ["TF_CONFIG"] = json.dumps({
        "cluster": {"worker": [args.ips[i] + ":" + str(port + i) for i in range(args.worker_num)]},
        "task": {"type": "worker", "index": args.task_id}
    })
    strategy = tf.distribute.MultiWorkerMirroredStrategy()
    with strategy.scope():
        sok.Init(global_batch_size=args.global_batch_size)

        sok_dense_demo = SOKDenseDemo(max_vocabulary_size_per_gpu=args.max_vocabulary_size_per_gpu,
                                      embedding_vec_size=args.embedding_vec_size,
                                      slot_num=args.slot_num,
                                      nnz_per_slot=args.nnz_per_slot,
                                      use_hashtable=args.use_hashtable)

        emb_opt = utils.get_embedding_optimizer(args.optimizer)(learning_rate=0.1)
        dense_opt = utils.get_dense_optimizer(args.optimizer)(learning_rate=0.1)
        if args.mixed_precision:
            emb_opt = tf.keras.mixed_precision.LossScaleOptimizer(emb_opt, initial_scale=1024)

    sok_saver = sok.Saver()
    if 1 == args.restore_params:
        filepath = r"./embedding_variables"
        sok_saver.restore_from_file(sok_dense_demo.embedding_layer.embedding_variable, filepath)
    else:
        sok_saver.load_embedding_values(sok_dense_demo.embedding_layer.embedding_variable, init_tensors)

    loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True, reduction=tf.keras.losses.Reduction.NONE)
    def _replica_loss(labels, logits):
        loss = loss_fn(labels, logits)
        _dtype = loss.dtype
        loss = tf.cast(loss, tf.float32)
        loss = tf.nn.compute_average_loss(loss, global_batch_size=args.global_batch_size)
        return tf.cast(loss, _dtype)

    @tf.function
    def _train_step(inputs, labels):
        with tf.GradientTape() as tape:
            logit, embedding_vector = sok_dense_demo(inputs, training=True)
            loss = _replica_loss(labels, logit)
            if args.mixed_precision:
                _loss = emb_opt.get_scaled_loss(loss)
            else:
                _loss = loss

        embedding_variables, other_variable = sok.split_embedding_variable_from_others(sok_dense_demo.trainable_variables)
        grads, emb_grads = tape.gradient(_loss, [other_variable, embedding_variables])
        if args.mixed_precision:
            grads = emb_opt.get_unscaled_gradients(grads)
            emb_grads = emb_opt.get_unscaled_gradients(emb_grads)

        if "plugin" not in args.optimizer:
            with sok.OptimizerScope(embedding_variables):
                emb_opt.apply_gradients(zip(emb_grads, embedding_variables),
                                        experimental_aggregate_gradients=False)
        else:
            emb_opt.apply_gradients(zip(emb_grads, embedding_variables),
                                    experimental_aggregate_gradients=False)
        dense_opt.apply_gradients(zip(grads, other_variable))
        return loss, embedding_vector

    sok_results = list()

    def _dataset_fn(input_context):
        replica_batch_size = input_context.get_per_replica_batch_size(args.global_batch_size)
        dataset = utils.tf_dataset(*random_samples, batchsize=replica_batch_size, 
                                   to_sparse_tensor=False, repeat=1)
        return dataset

    dataset = strategy.distribute_datasets_from_function(_dataset_fn)

    for i, (input_tensors, replica_labels) in enumerate(dataset):
        print("-"*30, "step ", str(i), "-"*30)
        loss, embedding_vector = strategy.run(_train_step, args=(input_tensors, replica_labels))
        loss = strategy.reduce("sum", loss, axis=None)
        print("[INFO]: iteration {}, loss {}".format(i, loss))
        sok_results.append(embedding_vector)


    # save params to file.
    if 1 == args.save_params:
        filepath = r"./embedding_variables"
        utils.try_make_dirs(filepath, chief=(True if args.task_id == 0 else False))

        sok_saver.dump_to_file(sok_dense_demo.embedding_layer.embedding_variable, filepath)

    return sok_results, sok_dense_demo.embedding_layer.embedding_variable.values[0].m_var_name
def get_sok_results(args, init_tensors, *random_samples):
    if args.distributed_tool == "onedevice":
        strategy = strategy_wrapper.OneDeviceStrategy()
    elif args.distributed_tool == "horovod":
        import horovod.tensorflow as hvd
        hvd.init()
        strategy = strategy_wrapper.HorovodStrategy()
    else:
        raise ValueError(f"{args.distributed_tool} is not supported.")

    with strategy.scope():
        sok_init_op = sok.Init(global_batch_size=args.global_batch_size)

        embedding_initializer = tf.keras.initializers.Ones(
        ) if args.use_tf_initializer else None

        sok_dense_demo = SOKDemo(
            max_vocabulary_size_per_gpu=args.max_vocabulary_size_per_gpu,
            embedding_vec_size=args.embedding_vec_size,
            slot_num=args.slot_num,
            nnz_per_slot=args.nnz_per_slot,
            use_hashtable=args.use_hashtable,
            dynamic_input=args.dynamic_input,
            num_of_dense_layers=0,
            key_dtype=args.key_dtype,
            embedding_initializer=embedding_initializer)

        emb_opt = utils.get_embedding_optimizer(
            args.optimizer)(learning_rate=0.1)
        dense_opt = utils.get_dense_optimizer(
            args.optimizer)(learning_rate=0.1)
        if args.mixed_precision:
            emb_opt = sok.tf.keras.mixed_precision.LossScaleOptimizer(
                emb_opt, 1024)

    sok_saver = sok.Saver()
    restore_op = list()
    for i, embedding_layer in enumerate(sok_dense_demo.embedding_layers):
        control_inputs = [restore_op[-1]] if restore_op else None
        with tf.control_dependencies(control_inputs):
            if args.restore_params:
                filepath = r"./embedding_variables"
                op = sok_saver.restore_from_file(
                    embedding_layer.embedding_variable, filepath)
            else:
                if not args.use_tf_initializer:
                    op = sok_saver.load_embedding_values(
                        embedding_layer.embedding_variable, init_tensors[i])
                else:
                    op = tf.constant(1.0)
            restore_op.append(op)

    loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True,
                                                 reduction='none')

    def _replica_loss(labels, logits):
        loss = loss_fn(labels, logits)
        _dtype = loss.dtype
        loss = tf.cast(loss, tf.float32)
        loss = tf.nn.compute_average_loss(
            loss, global_batch_size=args.global_batch_size)
        return tf.cast(loss, _dtype)

    def _train_step(inputs, labels, training):
        def _step_fn(inputs, labels):
            logit, embedding_vector = sok_dense_demo(inputs, training=training)
            loss = _replica_loss(labels, logit)
            if args.mixed_precision:
                _loss = emb_opt.get_scaled_loss(loss)
            else:
                _loss = loss
            emb_var, other_var = sok.split_embedding_variable_from_others(
                sok_dense_demo.trainable_variables)
            grads = tf.gradients(
                _loss,
                emb_var + other_var,
                colocate_gradients_with_ops=True,
                unconnected_gradients=tf.UnconnectedGradients.NONE)
            emb_grads, other_grads = grads[:len(emb_var)], grads[len(emb_var):]
            if args.mixed_precision:
                other_grads = emb_opt.get_unscaled_gradients(other_grads)
                emb_grads = emb_opt.get_unscaled_gradients(emb_grads)

            if "plugin" in args.optimizer:
                emb_train_op = emb_opt.apply_gradients(zip(emb_grads, emb_var))
            else:
                with sok.OptimizerScope(emb_var):
                    emb_train_op = emb_opt.apply_gradients(
                        zip(emb_grads, emb_var))
            with tf.control_dependencies([*emb_grads]):
                # in case NCCL runs concurrently via SOK and horovod
                other_grads = strategy.reduce("sum", other_grads)
            other_train_op = dense_opt.apply_gradients(
                zip(other_grads, other_var))

            with tf.control_dependencies([emb_train_op, other_train_op]):
                total_loss = strategy.reduce("sum", loss)
                total_loss = tf.identity(total_loss)
                return total_loss, embedding_vector

        return strategy.run(_step_fn, inputs, labels)

    replica_batch_size = args.global_batch_size // args.gpu_num
    dataset = utils.tf_dataset(*random_samples,
                               batchsize=replica_batch_size,
                               to_sparse_tensor=False,
                               repeat=1,
                               args=args)
    train_iterator = dataset.make_initializable_iterator()
    iterator_init = train_iterator.initializer

    inputs, labels = train_iterator.get_next()
    graph_results = _train_step(inputs, labels, training=True)

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    if "plugin" in args.optimizer:
        init_op = tf.group(init_op, emb_opt.initializer)

    save_op = list()
    for i, embedding_layer in enumerate(sok_dense_demo.embedding_layers):
        control_inputs = [save_op[-1]] if save_op else None
        with tf.control_dependencies(control_inputs):
            if args.save_params:
                filepath = r"./embedding_variables/"
                utils.try_make_dirs(filepath)
                op = sok_saver.dump_to_file(embedding_layer.embedding_variable,
                                            filepath)
            else:
                op = tf.constant(1.0)
        save_op.append(op)

    sok_results = list()

    config = tf.ConfigProto()
    config.log_device_placement = False
    with tf.Session(config=config) as sess:
        sess.run(sok_init_op)
        sess.run([init_op, iterator_init])
        sess.run(restore_op)
        sess.graph.finalize()

        for step in range(args.iter_num):
            loss_v, emb_vector_v = sess.run([*graph_results])
            print("*" * 80)
            print(f"Step: {step}, loss: {loss_v}"
                  )  #", embedding_vector:\n{emb_vector_v}")
            sok_results.append(emb_vector_v)

        sess.run(save_op)

    name = list()
    for embedding_layer in sok_dense_demo.embedding_layers:
        name.append(embedding_layer.embedding_variable.m_var_name)

    return sok_results, name
Exemple #3
0
def test_sok_demo(args, init_tensors, *random_samples):
    strategy = tf.distribute.MirroredStrategy()
    with strategy.scope():
        result = sok.Init(global_batch_size=args.global_batch_size)

        embedding_initializer = tf.keras.initializers.Ones(
        ) if args.use_tf_initializer else None

        plugin_demo = SOKDemo(
            combiner=args.combiner,
            max_vocabulary_size_per_gpu=args.max_vocabulary_size_per_gpu,
            slot_num=args.slot_num,
            max_nnz=args.max_nnz,
            embedding_vec_size=args.embedding_vec_size,
            use_hashtable=args.use_hashtable,
            key_dtype=args.key_dtype,
            embedding_initializer=embedding_initializer)

        emb_opt = utils.get_embedding_optimizer(
            args.optimizer)(learning_rate=0.1)
        dense_opt = utils.get_dense_optimizer(
            args.optimizer)(learning_rate=0.1)
        if args.mixed_precision:
            emb_opt = tf.keras.mixed_precision.LossScaleOptimizer(
                emb_opt, initial_scale=1024)

    plugin_saver = sok.Saver()

    if (1 == args.restore_params):  # restore from trained parameters
        filepath = r"./embedding_variables"
        plugin_saver.restore_from_file(
            plugin_demo.embedding_layer.embedding_variable, filepath)
    else:  # initialize using randomized initial value
        if not args.use_tf_initializer and init_tensors:
            status = plugin_saver.load_embedding_values(
                plugin_demo.embedding_layer.embedding_variable, init_tensors)

    loss_fn = tf.keras.losses.BinaryCrossentropy(
        from_logits=True, reduction=tf.keras.losses.Reduction.NONE)

    def _replica_loss(labels, logits):
        loss = loss_fn(labels, logits)
        _dtype = loss.dtype
        loss = tf.cast(loss, tf.float32)
        loss = tf.nn.compute_average_loss(
            loss, global_batch_size=args.global_batch_size)
        return tf.cast(loss, _dtype)

    @tf.function
    def _train_step(inputs, labels):
        with tf.GradientTape() as tape:
            logit, embedding_vector = plugin_demo(inputs, training=True)
            loss = _replica_loss(labels, logit)
            if args.mixed_precision:
                _loss = emb_opt.get_scaled_loss(loss)
            else:
                _loss = loss
        embedding_variables, other_variable = sok.split_embedding_variable_from_others(
            plugin_demo.trainable_variables)
        grads, emb_grads = tape.gradient(_loss,
                                         [other_variable, embedding_variables])
        if args.mixed_precision:
            grads = emb_opt.get_unscaled_gradients(grads)
            emb_grads = emb_opt.get_unscaled_gradients(emb_grads)

        with tf.control_dependencies([*emb_grads]):
            # in case NCCL runs concurrently via SOK and TF
            if 'plugin' not in args.optimizer:
                with sok.OptimizerScope(embedding_variables):
                    emb_opt.apply_gradients(
                        zip(emb_grads, embedding_variables),
                        experimental_aggregate_gradients=False)
            else:
                emb_opt.apply_gradients(zip(emb_grads, embedding_variables),
                                        experimental_aggregate_gradients=False)
            dense_opt.apply_gradients(zip(grads, other_variable))
            return loss, embedding_vector

    sok_results = list()

    def _dataset_fn(input_context):
        replica_batch_size = input_context.get_per_replica_batch_size(
            args.global_batch_size)
        dataset = utils.tf_dataset(*random_samples,
                                   batchsize=replica_batch_size,
                                   to_sparse_tensor=True,
                                   repeat=1,
                                   args=args)
        dataset = dataset.shard(input_context.num_input_pipelines,
                                input_context.input_pipeline_id)
        return dataset

    dataset = strategy.distribute_datasets_from_function(_dataset_fn)

    for i, (sparse_tensors, replica_labels) in enumerate(dataset):
        print("-" * 30, "step ", str(i), "-" * 30)
        loss, embedding_vector = strategy.run(_train_step,
                                              args=(sparse_tensors,
                                                    replica_labels))
        loss = strategy.reduce("sum", loss, axis=None)
        print("[INFO]: iteration {}, loss {}".format(i, loss))
        sok_results.append(embedding_vector)

    # save params to file.
    if 1 == args.save_params:
        filepath = r"./embedding_variables/"
        utils.try_make_dirs(filepath)

        plugin_saver.dump_to_file(
            plugin_demo.embedding_layer.embedding_variable, filepath)

    return sok_results, plugin_demo.embedding_layer.embedding_variable.values[
        0].m_var_name
def get_tf_results(args, init_tensors, *random_samples):
    graph = tf.Graph()
    with graph.as_default():
        tf_dense_demo = TFDemo(
            vocabulary_size=args.max_vocabulary_size_per_gpu * args.gpu_num,
            slot_num=args.slot_num,
            nnz_per_slot=args.nnz_per_slot,
            embedding_vec_size=args.embedding_vec_size,
            num_of_dense_layers=0,
            use_hashtable=False,
            dynamic_input=False)

        optimizer = utils.get_dense_optimizer(
            args.optimizer)(learning_rate=0.1)
        if args.mixed_precision:
            optimizer = sok.tf.keras.mixed_precision.LossScaleOptimizer(
                optimizer, 1024)

        loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)

        def _train_step(inputs, labels, training):
            logit, embedding_vector = tf_dense_demo(inputs, training=training)
            loss = loss_fn(labels, logit)
            if args.mixed_precision:
                _loss = optimizer.get_scaled_loss(loss)
            else:
                _loss = loss
            grads = tf.gradients(
                _loss,
                tf_dense_demo.trainable_variables,
                colocate_gradients_with_ops=True,
                unconnected_gradients=tf.UnconnectedGradients.NONE)
            if args.mixed_precision:
                grads = optimizer.get_unscaled_gradients(grads)
            train_op = optimizer.apply_gradients(
                zip(grads, tf_dense_demo.trainable_variables))
            with tf.control_dependencies([train_op]):
                loss = tf.identity(loss)
                return loss, embedding_vector

        dataset = utils.tf_dataset(*random_samples,
                                   batchsize=args.global_batch_size,
                                   to_sparse_tensor=False,
                                   repeat=1)
        train_iterator = dataset.make_initializable_iterator()
        iterator_init = train_iterator.initializer

        inputs, labels = train_iterator.get_next()
        graph_results = _train_step(inputs, labels, training=True)

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())

        restore_op = list()
        for i, embedding_layer in enumerate(tf_dense_demo.embedding_layers):
            restore_op.append(
                embedding_layer.embeddings.assign(
                    tf.concat(init_tensors[i], axis=0)))

        emb_values = list()
        for embedding_layer in tf_dense_demo.embedding_layers:
            if args.save_params:
                filepath = r"./embedding_variables/"
                utils.try_make_dirs(filepath)
                emb_values.append(embedding_layer.embeddings.read_value())
            else:
                emb_values = tf.constant(1.0)

    tf_results = list()
    with tf.Session(graph=graph) as sess:
        sess.run([init_op, iterator_init])
        sess.run(restore_op)
        sess.graph.finalize()

        for step in range(args.iter_num):
            loss_v, embedding_vector_v = sess.run([*graph_results])
            print("*" * 80)
            print(f"step: {step}, loss: {loss_v}"
                  )  #", embedding_vector:\n{embedding_vector_v}")
            tf_results.append(embedding_vector_v)

        emb_values_v = sess.run(emb_values)
        if args.save_params:
            for i, value in enumerate(emb_values_v):
                utils.save_to_file(
                    os.path.join(filepath,
                                 r"tf_variable_" + str(i) + r".file"), value)
    name = list()
    for embedding_layer in tf_dense_demo.embedding_layers:
        name.append(embedding_layer.embeddings.name)

    return tf_results, name