def __init__(self,
                batch_size,
                gpus,
                init_value, 
                name_,
                embedding_type,
                optimizer_type,
                max_vocabulary_size_per_gpu,
                opt_hparams,
                update_type,
                atomic_update,
                scaler,
                slot_num,
                max_nnz,
                max_feature_num,
                embedding_vec_size,
                combiner,
                num_dense_layers,
                input_buffer_reset=False):
        super(PluginSparseModel, self).__init__()

        self.num_dense_layers = num_dense_layers
        self.input_buffer_reset = input_buffer_reset

        self.batch_size = batch_size
        self.slot_num = slot_num
        self.embedding_vec_size = embedding_vec_size
        self.gpus = gpus

        hugectr_tf_ops_v2.init(visible_gpus=gpus, seed=0, key_type='int64', value_type='float',
                                batch_size=batch_size, batch_size_eval=len(gpus))

        self.embedding_name = hugectr_tf_ops_v2.create_embedding(init_value=init_value, 
                                name_=name_, embedding_type=embedding_type, optimizer_type=optimizer_type, 
                                max_vocabulary_size_per_gpu=max_vocabulary_size_per_gpu, opt_hparams=opt_hparams,
                                update_type=update_type, atomic_update=atomic_update, scaler=scaler, slot_num=slot_num,
                                max_nnz=max_nnz, max_feature_num=max_feature_num, embedding_vec_size=embedding_vec_size, 
                                combiner=combiner)

        self.dense_layers = []
        for _ in range(self.num_dense_layers - 1):
            self.dense_layers.append(tf.keras.layers.Dense(units=1024, activation='relu'))

        self.out_layer = tf.keras.layers.Dense(units=1, activation='sigmoid', use_bias=True,
                                                kernel_initializer='glorot_normal', 
                                                bias_initializer='glorot_normal')
예제 #2
0
    def _v2_fprop_v1_test():
        print("[INFO]: Testing plugin_v2 fprop_experimental vs tf..")
        if vocabulary_size < slot_num:
            raise ValueError("vocabulary_size must > slot_num.")

        # generate initial values
        init_value, input_keys = generate_embedding_init_value_and_inputs()

        # -------------------------------- hugectr ops ------------------------------------ #
        class TestModel(tf.keras.models.Model):
            def __init__(self, init_value, name_, embedding_type,
                         optimizer_type, max_vocabulary_size_per_gpu,
                         opt_hparams, update_type, atomic_update, scaler,
                         slot_num, max_nnz, max_feature_num,
                         embedding_vec_size, combiner):
                super(TestModel, self).__init__()

                self.input_buffer_reset = True if "distributed" == embedding_type else False

                self.embedding_name = hugectr_tf_ops_v2.create_embedding(
                    init_value=init_value,
                    name_=name_,
                    embedding_type=embedding_type,
                    optimizer_type=optimizer_type,
                    max_vocabulary_size_per_gpu=max_vocabulary_size_per_gpu,
                    opt_hparams=opt_hparams,
                    update_type=update_type,
                    atomic_update=atomic_update,
                    scaler=scaler,
                    slot_num=slot_num,
                    max_nnz=max_nnz,
                    max_feature_num=max_feature_num,
                    embedding_vec_size=embedding_vec_size,
                    combiner=combiner)

            def build(self, _):
                self.bp_trigger = self.add_weight(name="bp_trigger",
                                                  shape=(1, ),
                                                  dtype=tf.float32,
                                                  trainable=True)

            @tf.function
            def call(self, row_offset, values, nnz, training=True):
                replica_ctx = tf.distribute.get_replica_context()
                result = hugectr_tf_ops_v2.fprop_experimental(
                    self.embedding_name,
                    replica_ctx.replica_id_in_sync_group,
                    row_offset,
                    values,
                    nnz,
                    self.bp_trigger,
                    input_buffer_reset=self.input_buffer_reset)
                return result

        hugectr_tf_ops_v2.init(visible_gpus=gpus,
                               seed=0,
                               key_type='int64',
                               value_type='float',
                               batch_size=batch_size,
                               batch_size_eval=len(gpus))

        strategy = tf.distribute.MirroredStrategy(
            devices=['/GPU:' + str(i) for i in gpus])
        with strategy.scope():
            hugectr_model = TestModel(
                init_value=init_value,
                name_='test_embedding',
                embedding_type=embedding_type,
                optimizer_type='Adam',
                max_vocabulary_size_per_gpu=(vocabulary_size // len(gpus)) * 2
                + 1,
                opt_hparams=[0.1, 0.9, 0.99, 1e-5],
                update_type='Global',
                atomic_update=True,
                scaler=1.0,
                slot_num=slot_num,
                max_nnz=max_nnz,
                max_feature_num=slot_num * max_nnz,
                embedding_vec_size=embedding_vec_size,
                combiner='sum')
            opt = tf.keras.optimizers.Adam(learning_rate=0.1,
                                           beta_1=0.9,
                                           beta_2=0.99,
                                           epsilon=1e-5)

        # preprocess inputs
        dataset_utils = CreateDataset(dataset_names=None,
                                      feature_desc=None,
                                      batch_size=batch_size,
                                      n_epochs=None,
                                      slot_num=slot_num,
                                      max_nnz=max_nnz,
                                      convert_to_csr=None,
                                      gpu_count=len(gpus),
                                      embedding_type=embedding_type,
                                      get_row_indices=None)
        if "distributed" == embedding_type:
            row_offsets, value_tensors, nnz_array = dataset_utils._distribute_keys_for_distributed(
                input_keys)
        elif "localized" == embedding_type:
            row_offsets, value_tensors, nnz_array = dataset_utils._distribute_keys_for_localized(
                input_keys)
        else:
            raise ValueError("Not supported embedding_type %s." %
                             embedding_type)

        # forward function
        @tf.function
        def hugectr_train_step(row_offset, values, nnz):
            with tf.GradientTape() as tape:
                forward_result = hugectr_model(row_offset, values, nnz)

            grads = tape.gradient(forward_result,
                                  hugectr_model.trainable_weights)
            opt.apply_gradients(zip(grads, hugectr_model.trainable_weights))
            return forward_result

        # -------------------------------- tf ops ------------------------------------------- #
        reshape_input_keys = np.reshape(input_keys, [-1, max_nnz])
        tf_indices = tf.where(reshape_input_keys != -1)
        tf_values = tf.gather_nd(reshape_input_keys, tf_indices)
        sparse_tensor = tf.sparse.SparseTensor(tf_indices, tf_values,
                                               reshape_input_keys.shape)

        tf_embedding_layer = OriginalEmbedding(
            vocabulary_size=vocabulary_size,
            embedding_vec_size=embedding_vec_size,
            initializer=init_value,
            combiner='sum',
            gpus=gpus)

        tf_opt = tf.keras.optimizers.Adam(learning_rate=0.1,
                                          beta_1=0.9,
                                          beta_2=0.99,
                                          epsilon=1e-5)

        @tf.function
        def tf_train_step(sparse_tensor):
            with tf.GradientTape() as tape:
                tf_forward = tf_embedding_layer(
                    sparse_tensor,
                    output_shape=[batch_size, slot_num, embedding_vec_size])

            grads = tape.gradient(tf_forward,
                                  tf_embedding_layer.trainable_weights)
            tf_opt.apply_gradients(
                zip(grads, tf_embedding_layer.trainable_weights))
            return tf_forward

        # ------------------ comparison ---------------------------------------------------- #
        for iteration in range(2):
            replica_row_offsets = PerReplica(row_offsets)
            replica_values = PerReplica(value_tensors)
            replica_nnz = PerReplica(nnz_array)

            hugectr_forward = strategy.run(hugectr_train_step,
                                           args=(replica_row_offsets,
                                                 replica_values, replica_nnz))
            if len(gpus) > 1:
                hugectr_forward = tf.concat(hugectr_forward.values, axis=0)

            tf_forward = tf_train_step(sparse_tensor)

            try:
                tf.debugging.assert_near(hugectr_forward,
                                         tf_forward,
                                         rtol=1e-4,
                                         atol=1e-5)
            except tf.errors.InvalidArgumentError as error:
                raise error
            else:
                print(
                    "[INFO]: The results from HugeCTR and tf in %d iteration are the same"
                    % (iteration + 1))

        # --------------------- release resources -------------------------------------- #
        hugectr_tf_ops_v2.reset()
    def __init__(self,
                 batch_size,
                 gpus,
                 init_value,
                 name_,
                 embedding_type,
                 optimizer_type,
                 max_vocabulary_size_per_gpu,
                 opt_hparams,
                 update_type,
                 atomic_update,
                 scaler,
                 slot_num,
                 max_nnz,
                 max_feature_num,
                 embedding_vec_size,
                 combiner,
                 num_dense_layers,
                 input_buffer_reset=False):
        super(PluginSparseModel, self).__init__()

        self.num_dense_layers = num_dense_layers
        self.input_buffer_reset = input_buffer_reset

        self.batch_size = batch_size
        self.slot_num = slot_num
        self.embedding_vec_size = embedding_vec_size
        self.gpus = gpus

        # Make use init() only be called once. It will create resource manager for embedding_plugin.
        hugectr_tf_ops_v2.init(visible_gpus=gpus,
                               seed=0,
                               key_type='int64',
                               value_type='float',
                               batch_size=batch_size,
                               batch_size_eval=len(gpus))

        # create one embedding layer, and its embedding_name will be unique if there are more than one embedding layer.
        self.embedding_name = hugectr_tf_ops_v2.create_embedding(
            init_value=init_value,
            name_=name_,
            embedding_type=embedding_type,
            optimizer_type=optimizer_type,
            max_vocabulary_size_per_gpu=max_vocabulary_size_per_gpu,
            opt_hparams=opt_hparams,
            update_type=update_type,
            atomic_update=atomic_update,
            scaler=scaler,
            slot_num=slot_num,
            max_nnz=max_nnz,
            max_feature_num=max_feature_num,
            embedding_vec_size=embedding_vec_size,
            combiner=combiner)

        # define other parts of this DNN model
        self.dense_layers = []
        for _ in range(self.num_dense_layers - 1):
            self.dense_layers.append(
                tf.keras.layers.Dense(units=1024, activation='relu'))

        self.out_layer = tf.keras.layers.Dense(
            units=1,
            activation='sigmoid',
            use_bias=True,
            kernel_initializer='glorot_normal',
            bias_initializer='glorot_normal')