Example #1
0
def test_forward_distribute_keys_v4(embedding_type):
    with tf.GradientTape() as tape:
        with tf.device("/gpu:0"):

            vocabulary_size = 8
            slot_num = 3
            embedding_vec_size = 4

            init_value = np.float32([
                i for i in range(1, vocabulary_size * embedding_vec_size + 1)
            ]).reshape(vocabulary_size, embedding_vec_size)
            # init_value = False
            # print(init_value)

            hugectr.init(visiable_gpus=[0, 1, 3, 4],
                         seed=123,
                         key_type='int64',
                         value_type='float',
                         batch_size=4,
                         batch_size_eval=4)
            embedding_name = hugectr.create_embedding(
                init_value=init_value,
                opt_hparams=[1.0, 0.9, 0.99, 1e-3],
                name_='test_embedding',
                max_vocabulary_size_per_gpu=1737710,
                slot_num=slot_num,
                embedding_vec_size=embedding_vec_size,
                max_feature_num=4,
                embedding_type=embedding_type,
                max_nnz=2)

            keys = np.array(
                [[[0, -1], [1, -1], [2, 6]], [[0, -1], [1, -1], [-1, -1]],
                 [[0, -1], [1, -1], [6, -1]], [[0, -1], [1, -1], [2, -1]]],
                dtype=np.int64)

            sparse_indices = tf.where(keys != -1)  #[N, ndims]
            values = tf.gather_nd(keys, sparse_indices)  # [N]

            row_offsets, value_tensors, nnz_array = hugectr.distribute_keys_v4(
                all_keys=keys,
                gpu_count=4,
                embedding_type=embedding_type,
                max_nnz=2,
                batch_size=4,
                slot_num=3)
            print("row_offsets = ", row_offsets, "\n")
            print("value_tensors = ", value_tensors, "\n")
            print("nnz_array = ", nnz_array, "\n")

            bp_trigger = tf.Variable(
                initial_value=[1.0, 2.0],
                trainable=True,
                dtype=tf.float32,
                name='embedding_plugin_bprop_trigger')  # must be trainable

            forward_result = hugectr.fprop_v2(
                embedding_name=embedding_name,
                row_offsets=row_offsets,
                nnz_array=nnz_array,
                value_tensors=value_tensors,
                is_training=True,
                bp_trigger=bp_trigger,
                output_shape=[4, slot_num, embedding_vec_size])
            print("first step: \n", forward_result)

            grads = tape.gradient(forward_result, bp_trigger)

            forward_result = hugectr.fprop_v2(
                embedding_name=embedding_name,
                row_offsets=row_offsets,
                nnz_array=nnz_array,
                value_tensors=value_tensors,
                is_training=False,
                bp_trigger=bp_trigger,
                output_shape=[4, slot_num, embedding_vec_size])
            print("second step: \n", forward_result)
Example #2
0
def tf_distribute_keys_fprop_v3(embedding_type):
    with tf.GradientTape() as tape:
        with tf.device("/gpu:0"):

            vocabulary_size = 8
            slot_num = 3
            embedding_vec_size = 4

            init_value = np.float32([
                i for i in range(1, vocabulary_size * embedding_vec_size + 1)
            ]).reshape(vocabulary_size, embedding_vec_size)
            # init_value = False
            # print(init_value)

            hugectr.init(visiable_gpus=[0, 1, 3, 4],
                         seed=123,
                         key_type='int64',
                         value_type='float',
                         batch_size=4,
                         batch_size_eval=4)
            embedding_name = hugectr.create_embedding(
                init_value=init_value,
                opt_hparams=[1.0, 0.9, 0.99, 1e-3],
                name_='test_embedding',
                max_vocabulary_size_per_gpu=1737710,
                slot_num=slot_num,
                embedding_vec_size=embedding_vec_size,
                max_feature_num=4,
                embedding_type=embedding_type,
                max_nnz=2)

            keys = np.array(
                [[[0, -1], [1, -1], [2, 6]], [[0, -1], [1, -1], [-1, -1]],
                 [[0, -1], [1, -1], [6, -1]], [[0, -1], [1, -1], [2, -1]]],
                dtype=np.int64)

            row_offsets, value_tensors, nnz_array = _distribute_kyes(
                tf.convert_to_tensor(keys),
                gpu_count=4,
                embedding_type=embedding_type)
            print("row_ptrs", row_offsets)
            print("\nvalues", value_tensors)
            print("\n", nnz_array)

            row_offsets, value_tensors, nnz_array = _distribute_kyes(
                tf.convert_to_tensor(keys),
                gpu_count=4,
                embedding_type=embedding_type)
            print("\nrow_ptrs", row_offsets)
            print("\nvalues", value_tensors)
            print("\n", nnz_array)
            # print("\n", _distribute_kyes.pretty_printed_concrete_signatures(), "\n")

            bp_trigger = tf.Variable(
                initial_value=[1.0, 2.0],
                trainable=True,
                dtype=tf.float32,
                name='embedding_plugin_bprop_trigger')  # must be trainable

            forward_result = hugectr.fprop_v3(
                embedding_name=embedding_name,
                row_offsets=row_offsets,
                nnz_array=nnz_array,
                value_tensors=value_tensors,
                is_training=True,
                bp_trigger=bp_trigger,
                output_shape=[4, slot_num, embedding_vec_size])
            print("first step: \n", forward_result)

            grads = tape.gradient(forward_result, bp_trigger)

            forward_result = hugectr.fprop_v3(
                embedding_name=embedding_name,
                row_offsets=row_offsets,
                nnz_array=nnz_array,
                value_tensors=value_tensors,
                is_training=False,
                bp_trigger=bp_trigger,
                output_shape=[4, slot_num, embedding_vec_size])
            print("second step: \n", forward_result)
Example #3
0
def test():
    with tf.GradientTape() as tape:
        with tf.device("/gpu:0"):

            vocabulary_size = 8
            slot_num = 3
            embedding_vec_size = 4

            init_value = np.float32([
                i for i in range(1, vocabulary_size * embedding_vec_size + 1)
            ]).reshape(vocabulary_size, embedding_vec_size)
            # init_value = False
            # print(init_value)

            hugectr.init(visiable_gpus=[0, 1, 3, 4],
                         seed=123,
                         key_type='uint32',
                         value_type='float',
                         batch_size=4,
                         batch_size_eval=4)
            embedding_name = hugectr.create_embedding(
                init_value=init_value,
                opt_hparams=[0.1, 0.9, 0.99, 1e-3],
                name_='test_embedding',
                max_vocabulary_size_per_gpu=5,
                slot_num=slot_num,
                embedding_vec_size=embedding_vec_size,
                max_feature_num=4,
                embedding_type='localized',
                max_nnz=2)
            # print(embedding_name)
            # embedding_name = hugectr.create_embedding(init_value=init_value, opt_hparams=[0.001, 0.9, 0.99, 1e-3], name_='test_embedding',
            #                                           max_vocabulary_size_per_gpu=5, slot_num=slot_num, embedding_vec_size=embedding_vec_size,
            #                                           max_feature_num=4)
            # print(embedding_name)
            # embedding_name = hugectr.create_embedding(init_value=init_value, opt_hparams=[0.001, 0.9, 0.99, 1e-3], name_='test_embedding',
            #                                           max_vocabulary_size_per_gpu=5, slot_num=slot_num, embedding_vec_size=embedding_vec_size,
            #                                           max_feature_num=4)
            # print(embedding_name)

            keys = np.array(
                [[[0, -1, -1, -1], [1, -1, -1, -1], [2, 6, -1, -1]],
                 [[0, -1, -1, -1], [1, -1, -1, -1], [-1, -1, -1, -1]],
                 [[0, -1, -1, -1], [1, -1, -1, -1], [6, -1, -1, -1]],
                 [[0, -1, -1, -1], [1, -1, -1, -1], [2, -1, -1, -1]]],
                dtype=np.int64)

            sparse_indices = tf.where(keys != -1)  #[N, ndims]
            values = tf.gather_nd(keys, sparse_indices)  # [N]
            # print("sparse_indices = ", sparse_indices)
            # print("values = ", values)

            bp_trigger = tf.Variable(
                initial_value=[1.0, 2.0],
                trainable=True,
                dtype=tf.float32,
                name='embedding_plugin_bprop_trigger')  # must be trainable

            forward_result = hugectr.fprop(embedding_name=embedding_name,
                                           sparse_indices=sparse_indices,
                                           values=values,
                                           dense_shape=keys.shape,
                                           output_type=tf.float32,
                                           is_training=True,
                                           bp_trigger=bp_trigger)
            print("first step: \n", forward_result)

            grads = tape.gradient(forward_result, bp_trigger)

            forward_result = hugectr.fprop(embedding_name=embedding_name,
                                           sparse_indices=sparse_indices,
                                           values=values,
                                           dense_shape=keys.shape,
                                           output_type=tf.float32,
                                           is_training=False,
                                           bp_trigger=bp_trigger)
            print("second step: \n", forward_result)