Beispiel #1
0
    def __init__(self,
                 model_config,
                 inputs,
                 labels,
                 scope='LR',
                 mode=tf.estimator.ModeKeys.TRAIN):
        self.config = copy.deepcopy(model_config)
        self.inputs = inputs
        self.labels = labels
        self.mode = mode

        with tf.variable_scope(scope, default_name='embeddings'):
            linear_dense_value_list, linear_sparse_embedding_list = input_from_feature_columns(
                self.inputs,
                self.config.linear_feature_columns,
                target='linear')

        linear_logits = Linear()(linear_dense_value_list,
                                 linear_sparse_embedding_list)
        self.logits = linear_logits

        if self.mode == tf.estimator.ModeKeys.PREDICT:
            self.loss = None
        else:
            if len(labels.shape) == 1:
                labels = tf.expand_dims(labels, axis=-1)
            self.loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits,
                                                        labels=labels) +
                tf.losses.get_regularization_loss())
Beispiel #2
0
    def __init__(self,
                 model_config,
                 inputs,
                 labels,
                 scope='FFM',
                 mode=tf.estimator.ModeKeys.TRAIN):
        self.config = copy.deepcopy(model_config)
        self.inputs = inputs
        self.labels = labels

        with tf.variable_scope(scope, default_name='embeddings'):
            _, ffm_sparse_embedding_dict = input_from_feature_columns(
                self.inputs,
                self.config.linear_feature_columns,
                target='dnn',
                field=True)
            linear_dense_value_list, linear_sparse_embedding_list = input_from_feature_columns(
                self.inputs,
                self.config.linear_feature_columns,
                target='linear',
                field=False)

        linear_logits = Linear()(linear_dense_value_list,
                                 linear_sparse_embedding_list)
        self.logits = linear_logits

        sparse_feature_columns = [
            fc for fc in self.config.linear_feature_columns
            if isinstance(fc, SparseFeature)
        ]
        if len(sparse_feature_columns) > 1:
            ffm_logits = self.ffm(sparse_feature_columns,
                                  ffm_sparse_embedding_dict)
            self.logits += ffm_logits

        if mode == tf.estimator.ModeKeys.PREDICT:
            self.loss = None
        else:
            if len(labels.shape) == 1:
                labels = tf.expand_dims(labels, axis=-1)
            self.loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits,
                                                        labels=labels))

            regularity_loss = tf.losses.get_regularization_loss()
            # tf.losses.get_regularization_losses 获取的应该是list, tf.losses.get_regularization_loss是加过的
            self.loss = self.loss + regularity_loss
Beispiel #3
0
    def __init__(self,
                 model_config,
                 inputs,
                 labels,
                 scope='PNN',
                 mode=tf.estimator.ModeKeys.TRAIN):
        self.config = copy.deepcopy(model_config)
        self.inputs = inputs
        self.labels = labels
        self.mode = mode
        self.use_bn = model_config.use_bn
        self.method = model_config.method

        with tf.variable_scope(scope, default_name='embeddings'):
            dnn_dense_value_list, dnn_sparse_embedding_list = input_from_feature_columns(
                self.inputs, self.config.dnn_feature_columns, target='dnn')
            for i in range(len(dnn_dense_value_list)):
                if len(dnn_dense_value_list[i].shape) == 1:
                    dnn_dense_value_list[i] = tf.expand_dims(
                        dnn_dense_value_list[i], axis=-1)

        linear_signal = tf.concat(dnn_dense_value_list +
                                  dnn_sparse_embedding_list,
                                  axis=-1)
        dnn_input = linear_signal
        if len(dnn_sparse_embedding_list) > 0:
            inner_product_signal = self.inner_product(
                dnn_sparse_embedding_list)
            dnn_input = tf.concat([dnn_input, linear_signal], axis=-1)

        dnn_out = DNN(units=self.config.units,
                      activation=self.config.activation,
                      dropout_rate=self.config.dropout_rate,
                      use_bn=self.config.use_bn,
                      training=mode == tf.estimator.ModeKeys.TRAIN,
                      toonedim=False)([], [dnn_input])
        # hidden = dnn_input
        # for u in self.config.units:
        #     hidden =tf.layers.dense(hidden, u, kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001))
        #     if self.config.use_bn:
        #         hidden = tf.layers.batch_normalization(hidden, training=mode==tf.estimator.ModeKeys.TRAIN)
        #     hidden = tf.layers.dropout(hidden, rate=0.5, training=mode==tf.estimator.ModeKeys.TRAIN)

        self.logits = tf.layers.dense(
            dnn_out,
            1,
            kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001))
        self.logits = tf.reduce_sum(self.logits, axis=-1)

        if self.mode == tf.estimator.ModeKeys.PREDICT:
            self.loss = None
        else:
            self.loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits,
                                                        labels=labels) +
                tf.losses.get_regularization_loss())
Beispiel #4
0
    def __init__(self,
                 model_config,
                 inputs,
                 labels,
                 scope='WDL',
                 mode=tf.estimator.ModeKeys.TRAIN):
        self.config = copy.deepcopy(model_config)
        self.inputs = inputs
        self.labels = labels
        self.mode = mode
        self.use_bn = model_config.use_bn

        with tf.variable_scope(scope, default_name='embeddings'):
            linear_dense_value_list, linear_sparse_embedding_list = input_from_feature_columns(
                self.inputs,
                self.config.linear_feature_columns,
                target='linear')
            dnn_dense_value_list, dnn_sparse_embedding_list = input_from_feature_columns(
                self.inputs, self.config.dnn_feature_columns, target='dnn')

        linear_logits = Linear()(linear_dense_value_list,
                                 linear_sparse_embedding_list)
        dnn_logits = DNN(units=self.config.units,
                         activation=self.config.activation,
                         dropout_rate=self.config.dropout_rate,
                         use_bn=True,
                         training=tf.estimator.ModeKeys.TRAIN == mode,
                         toonedim=True)(dnn_dense_value_list,
                                        dnn_sparse_embedding_list)

        self.logits = linear_logits + dnn_logits

        if self.mode == tf.estimator.ModeKeys.PREDICT:
            self.loss = None
        else:
            if len(labels.shape) == 1:
                labels = tf.expand_dims(labels, axis=-1)
            self.loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits,
                                                        labels=labels) +
                tf.losses.get_regularization_loss())
Beispiel #5
0
    def __init__(self,
                 model_config,
                 inputs,
                 labels,
                 scope='DCN',
                 mode=tf.estimator.ModeKeys.TRAIN):
        self.config = copy.deepcopy(model_config)
        self.inputs = inputs
        self.labels = labels
        self.mode = mode
        self.use_bn = model_config.use_bn
        self.num_crosses = model_config.num_crosses

        with tf.variable_scope(scope, default_name='embeddings'):
            dnn_dense_value_list, dnn_sparse_embedding_list = input_from_feature_columns(
                self.inputs, self.config.dnn_feature_columns, target='dnn')

        if len(dnn_sparse_embedding_list) > 0:
            cross_logits = self.cross(dnn_sparse_embedding_list)
            deep_logits = DNN(units=self.config.units,
                              activation=self.config.activation,
                              dropout_rate=self.config.dropout_rate,
                              use_bn=self.config.use_bn,
                              training=mode == tf.estimator.ModeKeys.TRAIN,
                              toonedim=False)(dnn_dense_value_list,
                                              dnn_sparse_embedding_list)

            logits = tf.concat([cross_logits, deep_logits], axis=-1)
        else:
            deep_logits = DNN(units=self.config.units,
                              activation=self.config.activation,
                              dropout_rate=self.config.dropout_rate,
                              use_bn=self.config.use_bn,
                              training=mode == tf.estimator.ModeKeys.TRAIN,
                              toonedim=False)(dnn_dense_value_list,
                                              dnn_sparse_embedding_list)
            logits = deep_logits

        self.logits = tf.layers.dense(
            logits,
            1,
            kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001))
        self.logits = tf.reduce_sum(self.logits, axis=-1)

        if self.mode == tf.estimator.ModeKeys.PREDICT:
            self.loss = None
        else:
            self.loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits,
                                                        labels=labels) +
                tf.losses.get_regularization_loss())
Beispiel #6
0
    def __init__(self,
                 model_config,
                 inputs,
                 labels,
                 scope='MLR',
                 mode=tf.estimator.ModeKeys.TRAIN):
        self.config = copy.deepcopy(model_config)
        self.inputs = inputs
        self.labels = labels
        self.mode = mode
        self.num_lr = model_config.num_lr

        with tf.variable_scope(scope, default_name='embeddings'):
            linear_dense_value_list, linear_sparse_embedding_list = input_from_feature_columns(
                self.inputs,
                self.config.linear_feature_columns,
                target='linear')

        lr_logits_list = []
        for i in range(self.num_lr):
            with tf.variable_scope('lr%d' % i, default_name='linear'):
                lr_logits = Linear()(linear_dense_value_list,
                                     linear_sparse_embedding_list)
                lr_logits_list.append(lr_logits)

        logits = tf.concat(lr_logits_list, axis=-1)
        self.logits = tf.layers.dense(logits, 1, name='logits_weight')
        self.logits = tf.reduce_mean(self.logits, axis=-1)

        if self.mode == tf.estimator.ModeKeys.PREDICT:
            self.loss = None
        else:
            self.loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits,
                                                        labels=labels) +
                tf.losses.get_regularization_loss())
Beispiel #7
0
def DeepFM(linear_feature_columns,
           dnn_feature_columns,
           embedding_size=8,
           use_fm=True,
           dnn_hidden_units=(128, 128),
           l2_reg_linear=0.00001,
           l2_reg_embedding=0.00001,
           l2_reg_dnn=0,
           init_std=0.0001,
           seed=1024,
           dnn_dropout=0,
           dnn_activation='relu',
           dnn_use_bn=False,
           task='binary',
           att=False,
           seq_len=None,
           cate_feats=[],
           cate2nunique={}):
    """Instantiates the DeepFM Network architecture.

    :param linear_feature_columns: An iterable containing all the features used by linear part of the model.
    :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
    :param embedding_size: positive integer,sparse feature embedding_size
    :param use_fm: bool,use FM part or not
    :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
    :param l2_reg_linear: float. L2 regularizer strength applied to linear part
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
    :param init_std: float,to use as the initialize std of embedding vector
    :param seed: integer ,to use as random seed.
    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
    :param dnn_activation: Activation function to use in DNN
    :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
    :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
    :return: A Keras model instance.
    """

    features = build_input_features(linear_feature_columns +
                                    dnn_feature_columns)

    inputs_list = list(features.values())

    sparse_embedding_list, dense_value_list, embedding_dict = input_from_feature_columns(
        features, dnn_feature_columns, embedding_size, l2_reg_embedding,
        init_std, seed)

    linear_logit = get_linear_logit(features,
                                    linear_feature_columns,
                                    l2_reg=l2_reg_linear,
                                    init_std=init_std,
                                    seed=seed,
                                    prefix='linear')

    fm_input = concat_fun(sparse_embedding_list, axis=1)
    fm_logit = FM()(fm_input)

    dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)

    input_lstm = Input(shape=(seq_len, 1 + len(cate_feats)), name='lstm_input')
    input_lstm_gap = Lambda(lambda x: x[:, :, 0:1])(input_lstm)
    concate_list = [input_lstm_gap]
    for i, cate in enumerate(cate_feats):
        input_cate = Lambda(lambda x: x[:, :, i + 1])(input_lstm)
        emb = embedding_dict.get(cate)
        if emb is None:
            emb = Embedding(output_dim=8, input_dim=cate2nunique[cate])
        concate_list.append(emb(input_cate))
    input_lstm_concat = Concatenate(axis=-1)(concate_list)
    if att:
        lstm_out = LSTM(units=128, return_sequences=True)(input_lstm_concat)
        attention_mul = attention_3d_block(lstm_out, seq_len)
        lstm_out = Lambda(lambda x: K.sum(x, axis=1))(attention_mul)
    else:
        lstm_out = LSTM(units=128, return_sequences=False)(input_lstm_concat)

    dnn_input = concat_fun([dnn_input, lstm_out])
    dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
                  dnn_use_bn, seed)(dnn_input)
    dnn_logit = tf.keras.layers.Dense(1, use_bias=False,
                                      activation=None)(dnn_out)

    if len(dnn_hidden_units) == 0 and use_fm == False:  # only linear
        final_logit = linear_logit
    elif len(dnn_hidden_units) == 0 and use_fm == True:  # linear + FM
        final_logit = tf.keras.layers.add([linear_logit, fm_logit])
    elif len(dnn_hidden_units) > 0 and use_fm == False:  # linear + Deep
        final_logit = tf.keras.layers.add([linear_logit, dnn_logit])
    elif len(dnn_hidden_units) > 0 and use_fm == True:  # linear + FM + Deep
        final_logit = tf.keras.layers.add([linear_logit, fm_logit, dnn_logit])
    else:
        raise NotImplementedError

    output = PredictionLayer(task)(final_logit)
    model = tf.keras.models.Model(inputs=inputs_list + [input_lstm],
                                  outputs=output)
    return model