Ejemplo n.º 1
0
def get_input(feature_dim_dict, seq_feature_list, seq_max_len):
    sparse_input, dense_input = create_singlefeat_inputdict(feature_dim_dict)
    user_behavior_input = OrderedDict()
    for i, feat in enumerate(seq_feature_list):
        user_behavior_input[feat] = Input(shape=(seq_max_len, ),
                                          name='seq_' + str(i) + '-' + feat)

    return sparse_input, dense_input, user_behavior_input
Ejemplo n.º 2
0
def preprocess_input_embedding(
    feature_dim_dict,
    embedding_size,
    l2_reg_embedding,
    l2_reg_linear,
    init_std,
    seed,
    return_linear_logit=True,
):
    sparse_input_dict, dense_input_dict = create_singlefeat_inputdict(
        feature_dim_dict)
    sequence_input_dict, sequence_input_len_dict, sequence_max_len_dict = create_varlenfeat_inputdict(
        feature_dim_dict)
    inputs_list, deep_emb_list, linear_emb_list = get_inputs_embedding(
        feature_dim_dict,
        embedding_size,
        l2_reg_embedding,
        l2_reg_linear,
        init_std,
        seed,
        sparse_input_dict,
        dense_input_dict,
        sequence_input_dict,
        sequence_input_len_dict,
        sequence_max_len_dict,
        return_linear_logit,
        prefix='')
    _, fg_deep_emb_list, _ = get_inputs_embedding(feature_dim_dict,
                                                  embedding_size,
                                                  l2_reg_embedding,
                                                  l2_reg_linear,
                                                  init_std,
                                                  seed,
                                                  sparse_input_dict,
                                                  dense_input_dict,
                                                  sequence_input_dict,
                                                  sequence_input_len_dict,
                                                  sequence_max_len_dict,
                                                  False,
                                                  prefix='fg')
    if return_linear_logit:
        linear_logit = get_linear_logit(linear_emb_list, dense_input_dict,
                                        l2_reg_linear)
    else:
        linear_logit = None
    return deep_emb_list, fg_deep_emb_list, linear_logit, inputs_list
Ejemplo n.º 3
0
def WDL(
    deep_feature_dim_dict,
    wide_feature_dim_dict,
    embedding_size=8,
    dnn_hidden_units=(128, 128),
    l2_reg_linear=1e-5,
    l2_reg_embedding=1e-5,
    l2_reg_dnn=0,
    init_std=0.0001,
    seed=1024,
    dnn_dropout=0,
    dnn_activation='relu',
    task='binary',
):
    """Instantiates the Wide&Deep Learning architecture.

    :param deep_feature_dim_dict: dict,to indicate sparse field and dense field in deep part like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']}
    :param wide_feature_dim_dict: dict,to indicate sparse field and dense field in wide part like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']}
    :param embedding_size: positive integer,sparse feature embedding_size
    :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
    :param l2_reg_linear: float. L2 regularizer strength applied to wide part
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
    :param init_std: float,to use as the initialize std of embedding vector
    :param seed: integer ,to use as random seed.
    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
    :param dnn_activation: Activation function to use in DNN
    :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
    :return: A Keras model instance.
    """
    if not isinstance(
            deep_feature_dim_dict, dict
    ) or "sparse" not in deep_feature_dim_dict or "dense" not in deep_feature_dim_dict:
        raise ValueError(
            "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}"
        )

    sparse_input, dense_input, = create_singlefeat_inputdict(
        deep_feature_dim_dict)
    bias_sparse_input, bias_dense_input = create_singlefeat_inputdict(
        wide_feature_dim_dict, 'bias')
    sparse_embedding = create_embedding_dict(deep_feature_dim_dict,
                                             embedding_size, init_std, seed,
                                             l2_reg_embedding)
    wide_linear_embedding = create_embedding_dict(wide_feature_dim_dict, 1,
                                                  init_std, seed,
                                                  l2_reg_linear, 'linear')

    embed_list = get_embedding_vec_list(sparse_embedding, sparse_input,
                                        deep_feature_dim_dict['sparse'])

    deep_input = Concatenate()(
        embed_list) if len(embed_list) > 1 else embed_list[0]
    deep_input = Flatten()(deep_input)
    if len(dense_input) > 0:
        deep_input = Concatenate()([deep_input] + list(dense_input.values()))

    deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
                   False, seed)(deep_input)
    deep_logit = Dense(1, use_bias=False, activation=None)(deep_out)
    final_logit = deep_logit
    if len(wide_feature_dim_dict['dense']) + len(
            wide_feature_dim_dict['sparse']) > 0:
        if len(wide_feature_dim_dict['sparse']) > 0:
            bias_embed_list = get_embedding_vec_list(
                wide_linear_embedding, bias_sparse_input,
                wide_feature_dim_dict['sparse'])
            linear_term = add(bias_embed_list) if len(
                bias_embed_list) > 1 else bias_embed_list[0]
            final_logit = add([final_logit, linear_term])
        if len(wide_feature_dim_dict['dense']) > 0:
            wide_dense_term = Dense(
                1, use_bias=False,
                activation=None)(Concatenate()(list(bias_dense_input.values(
                ))) if len(bias_dense_input) > 1 else list(bias_dense_input.
                                                           values())[0])
            final_logit = add([final_logit, wide_dense_term])

    output = PredictionLayer(task)(final_logit)

    inputs_list = get_inputs_list(
        [sparse_input, dense_input, bias_sparse_input, bias_dense_input])
    model = Model(inputs=inputs_list, outputs=output)
    return model
Ejemplo n.º 4
0
def NFFM(
    feature_dim_dict,
    embedding_size=4,
    dnn_hidden_units=(128, 128),
    l2_reg_embedding=1e-5,
    l2_reg_linear=1e-5,
    l2_reg_dnn=0,
    dnn_dropout=0,
    init_std=0.0001,
    seed=1024,
    include_linear=True,
    use_bn=True,
    reduce_sum=False,
    task='binary',
):
    """Instantiates the Field-aware Neural Factorization Machine architecture.

    :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']}
    :param embedding_size: positive integer,sparse feature embedding_size
    :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param l2_reg_linear: float. L2 regularizer strength applied to linear part.
    :param l2_reg_dnn: float . L2 regularizer strength applied to DNN
    :param init_std: float,to use as the initialize std of embedding vector
    :param seed: integer ,to use as random seed.
    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
    :param include_linear: bool,whether include linear term or not
    :param use_bn: bool,whether use bn after ffm out or not
    :param reduce_sum: bool,whether apply reduce_sum on cross vector
    :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
    :return: A Keras model instance.
    """

    check_feature_config_dict(feature_dim_dict)
    if 'sequence' in feature_dim_dict and len(
            feature_dim_dict['sequence']) > 0:
        raise ValueError("now sequence input is not supported in NFFM"
                         )  #TODO:support sequence input

    sparse_input_dict, dense_input_dict = create_singlefeat_inputdict(
        feature_dim_dict)

    sparse_embedding, dense_embedding, linear_embedding = create_embedding_dict(
        feature_dim_dict,
        embedding_size,
        init_std,
        seed,
        l2_reg_embedding,
        l2_reg_linear,
    )

    embed_list = []
    for i, j in itertools.combinations(feature_dim_dict['sparse'], 2):
        i_input = sparse_input_dict[i.name]
        if i.hash_flag:
            i_input = Hash(i.dimension)(i_input)
        j_input = sparse_input_dict[j.name]
        if j.hash_flag:
            j_input = Hash(j.dimension)(j_input)

        element_wise_prod = multiply([
            sparse_embedding[i.name][j.name](i_input),
            sparse_embedding[j.name][i.name](j_input)
        ])
        if reduce_sum:
            element_wise_prod = Lambda(lambda element_wise_prod: K.sum(
                element_wise_prod, axis=-1))(element_wise_prod)
        embed_list.append(element_wise_prod)
    for i, j in itertools.combinations(feature_dim_dict['dense'], 2):
        element_wise_prod = multiply([
            dense_embedding[i.name][j.name](dense_input_dict[i.name]),
            dense_embedding[j.name][i.name](dense_input_dict[j.name])
        ])
        if reduce_sum:
            element_wise_prod = Lambda(lambda element_wise_prod: K.sum(
                element_wise_prod, axis=-1))(element_wise_prod)
        embed_list.append(
            Lambda(lambda x: K.expand_dims(x, axis=1))(element_wise_prod))

    for i in feature_dim_dict['sparse']:
        i_input = sparse_input_dict[i.name]
        if i.hash_flag:
            i_input = Hash(i.dimension)(i_input)
        for j in feature_dim_dict['dense']:
            element_wise_prod = multiply([
                sparse_embedding[i.name][j.name](i_input),
                dense_embedding[j.name][i.name](dense_input_dict[j.name])
            ])

            if reduce_sum:
                element_wise_prod = Lambda(lambda element_wise_prod: K.sum(
                    element_wise_prod, axis=-1))(element_wise_prod)
            embed_list.append(element_wise_prod)

    ffm_out = tf.keras.layers.Flatten()(concat_fun(embed_list, axis=1))
    if use_bn:
        ffm_out = tf.keras.layers.BatchNormalization()(ffm_out)
    ffm_out = DNN(dnn_hidden_units,
                  l2_reg=l2_reg_dnn,
                  dropout_rate=dnn_dropout)(ffm_out)
    final_logit = Dense(1, use_bias=False)(ffm_out)

    linear_emb_list = get_embedding_vec_list(linear_embedding,
                                             sparse_input_dict,
                                             feature_dim_dict['sparse'])

    linear_logit = get_linear_logit(linear_emb_list, dense_input_dict,
                                    l2_reg_linear)

    if include_linear:
        final_logit = add([final_logit, linear_logit])

    output = PredictionLayer(task)(final_logit)

    inputs_list = get_inputs_list([sparse_input_dict, dense_input_dict])
    model = Model(inputs=inputs_list, outputs=output)
    return model