Esempio n. 1
0
def compare_sok_and_tf(args):
    sok_results = test_sok_multi_dense_emb(args)
    utils.save_to_file("./sok_results_" + str(args.task_id) + ".file",
                       sok_results)

    barrier = hvd.allreduce(tf.zeros([1]))

    # if args.task_id != 0:
    #    return

    tf_results = test_tf_multi_dense_emb(args)

    all_sok_results_list = list()
    for i in range(args.worker_num):
        sok_results = utils.restore_from_file("./sok_results_" + str(i) +
                                              ".file")
        sok_results = tf.concat(sok_results,
                                axis=0)  # [iter-num, replica-bs, vectors]
        all_sok_results_list.append(sok_results)
    all_sok_results_list = tf.concat(all_sok_results_list, axis=1)
    all_sok_results_list = tf.split(all_sok_results_list,
                                    num_or_size_splits=len(tf_results),
                                    axis=0)
    all_sok_results_list = [tf.squeeze(item) for item in all_sok_results_list]

    if len(all_sok_results_list) != len(tf_results):
        raise ValueError(
            "The length of sok results is not equal to that of tensorflow.")

    if args.dynamic_input == 1:
        atol = 1e0
        rtol = 1e-2
    elif args.mixed_precision:
        atol = 1e-2
        rtol = 1e-2
    else:
        atol = 1e-4
        rtol = 1e-4
    for i, sok_vector in enumerate(all_sok_results_list):
        tf.debugging.assert_near(
            tf.reshape(sok_vector, shape=[-1, tf.shape(sok_vector)[-1]]),
            tf_results[i],
            atol=atol,
            rtol=rtol,
            message=("the values is not consistent on Iteration: %d" % i))

    print("\n[INFO]: For multiple dense embedding layer: with Horovod, the embedding"+\
          " vectors obtained from SOK and TF are consistent for %d iterations."
          " With mixed_precision = %s"
          %(len(sok_results), args.mixed_precision))
Esempio n. 2
0
def check_saved_embedding_variables(args,
                                    embedding_variable_name,
                                    use_hashtable=True,
                                    gpu_num=None,
                                    atol=1e-4,
                                    rtol=1e-4):
    filepath = r"./embedding_variables"

    sok_keys_filename = os.path.join(filepath,
                                     embedding_variable_name + r"_keys.file")
    element_type = "long long"
    if hasattr(args, "key_dtype"):
        element_type = "long long" if args.key_dtype == "int64" else "unsigned int"
    sok_keys = utils.read_binary_file(sok_keys_filename,
                                      element_type=element_type)
    sok_values_filename = os.path.join(
        filepath, embedding_variable_name + r"_values.file")
    sok_values = utils.read_binary_file(sok_values_filename,
                                        element_type="float")

    sorted_sok_keys, sorted_sok_values = utils.sort_embedding_variables_by_key(
        sok_keys,
        sok_values,
        embedding_vec_size=args.embedding_vec_size,
        use_hashtable=use_hashtable,
        gpu_num=gpu_num)

    tf_values_filename = os.path.join(filepath, r"tf_variable.file")
    tf_values = utils.restore_from_file(tf_values_filename)
    valid_tf_values = utils.get_valid_tf_values(sorted_sok_keys, tf_values[0])

    import numpy as np
    atol, rtol = atol, rtol
    sorted_sok_values = np.reshape(sorted_sok_values,
                                   newshape=(sorted_sok_keys.size,
                                             args.embedding_vec_size))
    allclose = np.allclose(sorted_sok_values,
                           valid_tf_values,
                           atol=atol,
                           rtol=rtol)
    if not allclose:
        raise ValueError(
            f"The Variable from SOK: \n{sorted_sok_values}, \nis not near to that from TF: \n{valid_tf_values}"
            f" \n at atol: {atol}, rtol: {rtol}")
    print(
        "[INFO]: the saved parameters are consistent between sparse operation kit and TensorFlow"
    )
Esempio n. 3
0
def check_saved_embedding_variables(args,
                                    embedding_variable_names,
                                    use_hashtable=True,
                                    gpu_num=None,
                                    atol=1e-4,
                                    rtol=1e-4):
    filepath = r"./embedding_variables"
    for i, embedding_variable_name in enumerate(embedding_variable_names):
        sok_keys_filename = os.path.join(
            filepath, embedding_variable_name + r"_keys.file")
        element_type = "long long"
        if hasattr(args, "key_dtype"):
            element_type = "long long" if args.key_dtype == "int64" else "unsigned int"
        sok_keys = utils.read_binary_file(sok_keys_filename,
                                          element_type=element_type)
        sok_values_filename = os.path.join(
            filepath, embedding_variable_name + r"_values.file")
        sok_values = utils.read_binary_file(sok_values_filename,
                                            element_type="float")

        sorted_sok_keys, sorted_sok_values = utils.sort_embedding_variables_by_key(
            sok_keys,
            sok_values,
            embedding_vec_size=args.embedding_vec_size[i],
            use_hashtable=use_hashtable,
            gpu_num=gpu_num)

        tf_values_filename = os.path.join(filepath,
                                          r"tf_variable_" + str(i) + r".file")
        tf_values = utils.restore_from_file(tf_values_filename)
        valid_tf_values = utils.get_valid_tf_values(sorted_sok_keys,
                                                    tf_values[0])

        vec_size = args.embedding_vec_size[i]
        newshape = tuple([sorted_sok_keys.size, vec_size])
        sorted_sok_values = np.reshape(sorted_sok_values, newshape=newshape)
        allclose = np.allclose(sorted_sok_values,
                               valid_tf_values,
                               atol=atol,
                               rtol=rtol)
        if not allclose:
            raise ValueError(
                f"\n{sorted_sok_values} \nis not near to \n{valid_tf_values} "
                f"\nat rotl={rtol}, atol={atol}")
    print(
        "[INFO]: the saved parameters are consistent between sparse operation kit and TensorFlow"
    )
def compare_dense_emb_sok_with_tf(args):
    if (args.global_batch_size % args.local_gpu_num != 0):
        raise ValueError("global_batch_size: %d is not divisible by local_gpu_num: %d"
                        %(args.global_batch_size, args.local_gpu_num))
    if (args.global_batch_size % args.worker_num != 0):
        raise ValueError("global_batch_size: %d is not divisible by worker_num: %d"
                        %(args.global_batch_size, args.worker_num))

    if args.mixed_precision:
        policy = tf.keras.mixed_precision.Policy("mixed_float16")
        tf.keras.mixed_precision.set_global_policy(policy)

    #each worker generate different dataset
    if args.generate_new_datas:
        if args.use_hashtable:
            vocabulary_size = args.local_gpu_num * args.max_vocabulary_size_per_gpu * args.worker_num
        else:
            vocabulary_size = args.max_vocabulary_size_per_gpu

        worker_batch_size = args.global_batch_size // args.worker_num
        random_samples_local = utils.generate_random_samples(num_of_samples=worker_batch_size * args.iter_num,
                                                             vocabulary_size=vocabulary_size,
                                                             slot_num=args.slot_num,
                                                             max_nnz=args.nnz_per_slot,
                                                             use_sparse_mask=False)
        utils.save_to_file(r"./random_samples_" + str(args.task_id) + r".file", *random_samples_local)
    else:
        random_samples_local = utils.restore_from_file(r"./random_samples_" + str(args.task_id) + r".file")

    if 0 == args.restore_params:
        # each worker generate same init tensors, because each worker will do the filtering by itself
        init_tensors = utils.get_ones_tensor(max_vocab_size_per_gpu=args.max_vocabulary_size_per_gpu,
                                            embedding_vec_size=args.embedding_vec_size,
                                            num=args.local_gpu_num * args.worker_num)
    else:
        filepath = r"./embedding_variables"
        tf_values_filename = os.path.join(filepath, r"tf_variable.file")
        init_tensors = utils.restore_from_file(tf_values_filename)

    sok_results_local, embedding_variable_name = test_sok_dense_demo(args, init_tensors, *random_samples_local)
    # save the forward embedding vector from different worker to file
    utils.save_to_file(r"./sok_embedding_vectors_" + str(args.task_id) + r".file", *sok_results_local)

    # only 1 process needs to do tf computation
    if args.task_id != 0:
        return

    # aggregate dataset from different worker
    dataset_filenames = [r"./random_samples_" + str(task_id) + r".file"
                         for task_id in range(args.worker_num)]
    random_samples_total = [list() for _ in range(args.iter_num)]
    random_labels_total = [list() for _ in range(args.iter_num)]
    local_batch_size = args.global_batch_size // args.worker_num
    for worker_id in range(args.worker_num):
        samples, labels = utils.restore_from_file(dataset_filenames[worker_id])
        for i in range(args.iter_num):
            random_samples_total[i].extend(samples[i * local_batch_size : (i + 1) * local_batch_size])
            random_labels_total[i].extend(labels[i * local_batch_size : (i + 1) * local_batch_size])
    random_samples_total = np.concatenate(random_samples_total, axis=0)
    random_labels_total = np.concatenate(random_labels_total, axis=0)

    tf_results = test_tf_dense_model(args, init_tensors, random_samples_total, random_labels_total)

    # aggregate forward embedding vector from different worker
    sok_results_filenames = [r"./sok_embedding_vectors_" + str(task_id) + r".file"
                             for task_id in range(args.worker_num)]
    sok_results_total = list()
    for file_name in sok_results_filenames:
        sok_results_local = utils.restore_from_file(file_name)
        sok_results_total.append(sok_results_local)
    
    if (len(sok_results_total[0]) != len(tf_results)):
        raise ValueError("The length of results obtained from sok: %d is not equal to that obtained from TF: %d"
                         %(len(sok_results_total[0]), len(tf_results)))
    if (len(tf_results) != args.iter_num):
        raise ValueError("The length of embedding vectors: %d is not equal to iteration number: %d."
                         %(len(tf_results), args.iter_num))
    
    if 1 == args.restore_params or args.mixed_precision:
        tolerance = 1e-2
    else:
        tolerance = 1e-4

    for i in range(args.iter_num):
        if args.local_gpu_num != 1:
            sok_vector = tf.concat([tf.concat(sok_results_total[task_id][i].values, axis=0)
                                    for task_id in range(args.worker_num)], axis=0)
        else:
            sok_vector = tf.concat([sok_results_total[task_id][i]
                                    for task_id in range(args.worker_num)],
                                    axis=0)
        tf.debugging.assert_near(tf.reshape(sok_vector,
                                            shape=[-1, tf.shape(sok_vector)[-1]]),
                                tf_results[i],
                                atol=tolerance,
                                rtol=tolerance)

    print("\n[INFO]: For Dense Embedding Layer, with MultiWorkerMirroredStrategy, the embedding vectors "+\
          "obtained from sparse operation kit and TensorFlow are consistent for %d iterations"
          ", with mixed_precision = %s"
          %(args.iter_num, args.mixed_precision))

    if 1 == args.save_params:
        check_saved_embedding_variables(args, embedding_variable_name, 
                                        use_hashtable=args.use_hashtable, 
                                        gpu_num=args.worker_num * args.local_gpu_num,
                                        atol=tolerance, rtol=tolerance)
Esempio n. 5
0
def test_tf_multi_dense_emb(args):
    dataset_filenames = [
        args.file_prefix + str(task_id) + ".file"
        for task_id in range(args.worker_num)
    ]

    samples_total = [list() for _ in range(args.dataset_iter_num)]
    labels_total = [list() for _ in range(args.dataset_iter_num)]
    replica_batch_size = args.global_batch_size // args.worker_num
    for worker_id in range(args.worker_num):
        samples, labels = utils.restore_from_file(dataset_filenames[worker_id])
        for i in range(args.dataset_iter_num):
            samples_total[i].extend(samples[i * replica_batch_size:(i + 1) *
                                            replica_batch_size])
            labels_total[i].extend(labels[i * replica_batch_size:(i + 1) *
                                          replica_batch_size])
    samples_total = np.concatenate(samples_total, axis=0)
    labels_total = np.concatenate(labels_total, axis=0)

    dataset = utils.tf_dataset(samples_total,
                               labels_total,
                               batchsize=args.global_batch_size,
                               to_sparse_tensor=False,
                               repeat=1)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)

    model = TFDenseModel(
        vocabulary_size=args.max_vocabulary_size_per_gpu * args.worker_num,
        embedding_vec_size_list=args.embedding_vec_size_list,
        slot_num_list=args.slot_num_list,
        nnz_per_slot_list=[
            args.nnz_per_slot for _ in range(len(args.slot_num_list))
        ],
        num_dense_layers=args.num_dense_layers)

    optimizer = tf.keras.optimizers.Adam(learning_rate=0.1)
    if args.mixed_precision:
        optimizer = tf.keras.mixed_precision.LossScaleOptimizer(
            optimizer, initial_scale=1024)

    # set initial value to embedding variables
    for i, param in enumerate(model.embedding_params):
        init_tensors = utils.get_ones_tensor(
            max_vocab_size_per_gpu=args.max_vocabulary_size_per_gpu *
            args.worker_num,
            embedding_vec_size=args.embedding_vec_size_list[i],
            num=1)
        param.assign(init_tensors[0])

    loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)

    @tf.function
    def _train_step(inputs, labels):
        with tf.GradientTape() as tape:
            logit, all_vectors = model(inputs, training=True)
            loss = loss_fn(labels, logit)
            if args.mixed_precision:
                _loss = optimizer.get_scaled_loss(loss)
            else:
                _loss = loss
        grads = tape.gradient(_loss, model.trainable_variables)
        if args.mixed_precision:
            grads = optimizer.get_unscaled_gradients(grads)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))
        return loss, all_vectors

    # save its results
    tf_results = list()
    for i, (inputs, labels) in enumerate(dataset):
        if args.stop_iter >= 0 and i >= args.stop_iter:
            break

        loss, all_vectors = _train_step(inputs, labels)
        print("[INFO]: Iteration: {}, loss={}".format(i, loss))

        with tf.device("CPU:0"):
            tf_results.append(all_vectors)
    return tf_results
Esempio n. 6
0
def compare_sok_with_tf(args):
    if (args.global_batch_size % args.gpu_num != 0):
        raise ValueError(
            "global_batch_size: %d is not divisible by gpu_num: %d" %
            (args.global_batch_size, args.gpu_num))

    if args.use_hashtable:
        vocabulary_size = args.max_vocabulary_size_per_gpu * args.gpu_num
    else:
        vocabulary_size = args.max_vocabulary_size_per_gpu

    if args.generate_new_datas:
        random_samples = utils.generate_random_samples(
            num_of_samples=args.global_batch_size * args.iter_num,
            vocabulary_size=vocabulary_size,
            slot_num=args.slot_num,
            max_nnz=args.max_nnz)
        utils.save_to_file(r"./random_samples.file", *random_samples)
    else:
        random_samples = utils.restore_from_file(r"./random_samples.file")

    if (1 == args.restore_params):  # initialize using trained params
        filepath = r"./embedding_variables"

        # because we already checked the Variable consistency when saving.
        # so that here we can directly use TensorFlow Variable file to initialize
        # tf's variable.
        # FIXME: what if not all TensorFlow embedding vectors are used??
        tf_values_filename = os.path.join(filepath, r"tf_variable.file")
        init_tensors = utils.restore_from_file(tf_values_filename)

    else:  # initialize using random initial value
        init_tensors = utils.get_ones_tensor(
            max_vocab_size_per_gpu=args.max_vocabulary_size_per_gpu,
            embedding_vec_size=args.embedding_vec_size,
            num=args.gpu_num)

    sok_results, embedding_variable_name = test_sok_demo(
        args, init_tensors, *random_samples)
    tf_results = test_tf_demo(args, init_tensors, *random_samples)

    if (len(sok_results) != len(tf_results)):
        raise ValueError(
            "The length of plugin results is not equal to that of tensorflow.")
    if (len(tf_results) != args.iter_num):
        raise ValueError(
            "The length of embedding vectors: %d is not equal to iteration number: %d."
            % (len(tf_results), args.iter_num))

    tolerance = 1e-4
    if args.mixed_precision:
        tolerance = 1e-3

    for i, sok_vector in enumerate(sok_results):
        if args.gpu_num != 1:
            sok_vector = tf.stack(sok_vector.values, axis=0)
        tf.debugging.assert_near(tf.reshape(
            sok_vector, shape=[-1, tf.shape(sok_vector)[-1]]),
                                 tf_results[i],
                                 atol=tolerance,
                                 rtol=tolerance)
    print("\n[INFO]: With MirroredStrategy, the embedding vector obtained from " +\
          "sparse operation kit and tensorflow are consistent for %d iterations."
          " With mixed_precision = %s, and key_dtype = %s, and use_tf_initializer = %s"
          %(args.iter_num, args.mixed_precision, args.key_dtype, args.use_tf_initializer))

    if (1 == args.save_params):
        check_saved_embedding_variables(args,
                                        embedding_variable_name,
                                        use_hashtable=args.use_hashtable,
                                        gpu_num=args.gpu_num,
                                        atol=tolerance,
                                        rtol=tolerance)
Esempio n. 7
0
def compare_dense_emb_sok_with_tf(args):
    if args.global_batch_size % args.gpu_num != 0:
        raise ValueError(
            f"global_batch_size: {args.global_batch_size} is not divisible by"
            f" gpu_num: {args.gpu_num}")

    if args.use_hashtable:
        vocabulary_size = args.max_vocabulary_size_per_gpu * args.gpu_num
    else:
        vocabulary_size = args.max_vocabulary_size_per_gpu

    if args.generate_new_datas:
        replica_batch_size = args.global_batch_size // args.gpu_num
        random_samples = utils.generate_random_samples(
            num_of_samples=replica_batch_size * args.iter_num,
            vocabulary_size=vocabulary_size,
            slot_num=sum(args.slot_num),
            max_nnz=args.nnz_per_slot,
            use_sparse_mask=False)
        utils.save_to_file(
            r"./random_samples_" + str(args.rank_idx) + r".file",
            *random_samples)
    else:
        random_samples = utils.restore_from_file(r"./random_samples_" +
                                                 str(args.rank_idx) + r".file")

    if args.restore_params:
        filepath = r"./embedding_variables"
        # because we already checked the Variable consistency when saving
        # so that we can directly use TensorFlow Variable file to initialize
        # TF's Variable
        init_tensors = list()
        for i in range(len(args.slot_num)):
            tf_values_filename = os.path.join(
                filepath, r"tf_variable_" + str(i) + r".file")
            init_tensors.append(utils.restore_from_file(tf_values_filename))
    else:
        init_tensors = list()
        for i in range(len(args.slot_num)):
            init_tensors.append(
                utils.get_ones_tensor(
                    max_vocab_size_per_gpu=args.max_vocabulary_size_per_gpu,
                    embedding_vec_size=args.embedding_vec_size[i],
                    num=args.gpu_num))

    sok_results, embedding_variable_name = get_sok_results(
        args, init_tensors, *random_samples)
    utils.save_to_file(
        r"./sok_embedding_vectors_" + str(args.rank_idx) + r".file",
        *sok_results)

    if args.rank_idx != 0:
        return

    # aggregate dataset from different worker
    dataset_filenames = [
        r"./random_samples_" + str(rank_idx) + r".file"
        for rank_idx in range(args.rank_size)
    ]
    random_samples_total = [list() for _ in range(args.iter_num)]
    random_labels_total = [list() for _ in range(args.iter_num)]
    local_batch_size = args.global_batch_size // args.gpu_num
    for rank_idx in range(args.rank_size):
        samples, labels = utils.restore_from_file(dataset_filenames[rank_idx])
        for i in range(args.iter_num):
            random_samples_total[i].extend(
                samples[i * local_batch_size:(i + 1) * local_batch_size])
            random_labels_total[i].extend(labels[i * local_batch_size:(i + 1) *
                                                 local_batch_size])
    random_samples_total = np.concatenate(random_samples_total, axis=0)
    random_labels_total = np.concatenate(random_labels_total, axis=0)

    tf_results, _ = get_tf_results(args, init_tensors, random_samples_total,
                                   random_labels_total)

    # aggregate sok forward results from different worker
    sok_results_filenames = [
        r"./sok_embedding_vectors_" + str(rank_idx) + r".file"
        for rank_idx in range(args.rank_size)
    ]
    sok_results_total = list()
    for filename in sok_results_filenames:
        sok_results = utils.restore_from_file(filename)
        sok_results_total.append(sok_results)

    if len(sok_results_total[0]) != len(tf_results):
        raise ValueError(
            "The length of sok results is not equal to that of tensorflow.")
    if len(sok_results) != args.iter_num:
        raise ValueError(
            "The length of embedding vectors: %d is not equal to iteration number: %d."
            % (len(sok_results), args.iter_num))

    rtol = 1e-4
    atol = 1e-4
    if args.restore_params:
        rtol, atol = 1e-3, 1e-3
    elif args.distributed_tool == "horovod":
        rtol, atol = rtol * 10, atol * 10
    elif args.mixed_precision:
        rtol, atol = 1e-2, 1e-2

    for i in range(args.iter_num):
        sok_vector = np.concatenate([
            sok_results_total[rank_idx][i]
            for rank_idx in range(args.rank_size)
        ],
                                    axis=0)
        allclose = np.allclose(sok_vector, tf_results[i], rtol=rtol, atol=atol)
        if not allclose:
            raise ValueError(
                f"\n{sok_vector} \nis not near to \n{tf_results[i]} \nat rtol={rtol}, atol={atol}"
            )

        # TODO: add an verbose option
        if False:
            print("--------------- step: {}---------------------".format(i))
            print("sok_embedding_vector:\n{}".format(sok_vector))
            print("tf_embedding_vector:\n{}".format(tf_results[i]))

    print(
        f"\n[INFO]: For {len(args.slot_num)} Dense Embedding layer, using {args.gpu_num} GPUs + {args.optimizer} optimizer, "
        f"using hashtable? {args.use_hashtable}, dynamic_input? {args.dynamic_input}, "
        "the embedding vectors"
        f" obtained from sok and tf are consistent for {args.iter_num} iterations,"
        f" with mixed_precision = {args.mixed_precision}, key_dtype = {args.key_dtype}",
        f" use_tf_initializer = {args.use_tf_initializer}")

    if args.save_params:
        check_saved_embedding_variables(args,
                                        embedding_variable_name,
                                        use_hashtable=args.use_hashtable,
                                        gpu_num=args.gpu_num,
                                        atol=atol,
                                        rtol=rtol)