Python DIN.DINの例、deepctr.models.DIN.DIN Pythonの例

コード例 #1

0

ファイルを表示

ファイル: DIN_test.py プロジェクト: nwf5d/DeepCTR

def test_DIN_sum():

    model_name = "DIN_sum"
    x, y, feature_dim_dict, behavior_feature_list = get_xy_fd()

    model = DIN(feature_dim_dict,
                behavior_feature_list,
                hist_len_max=4,
                embedding_size=8,
                use_din=False,
                hidden_size=[4, 4, 4],
                keep_prob=0.6,
                activation="sigmoid")

    model.compile('adam',
                  'binary_crossentropy',
                  metrics=['binary_crossentropy'])
    model.fit(x, y, verbose=1, validation_split=0.5)

    print(model_name + " test train valid pass!")
    model.save_weights(model_name + '_weights.h5')
    model.load_weights(model_name + '_weights.h5')
    print(model_name + " test save load weight pass!")

    save_model(model, model_name + '.h5')
    model = load_model(model_name + '.h5', custom_objects)
    print(model_name + " test save load model pass!")

    print(model_name + " test pass!")

コード例 #2

0

ファイルを表示

def test_DIN_att():
    model_name = "DIN_att"

    x, y, feature_dim_dict, behavior_feature_list = get_xy_fd()

    model = DIN(
        feature_dim_dict,
        behavior_feature_list,
        hist_len_max=4,
        embedding_size=8,
        use_din=True,
        hidden_size=[4, 4, 4],
        keep_prob=0.6,
    )

    model.compile('adam',
                  'binary_crossentropy',
                  metrics=['binary_crossentropy'])
    model.fit(x, y, verbose=1, validation_split=0.5)

    print(model_name + " test train valid pass!")
    model.save_weights(model_name + '_weights.h5')
    model.load_weights(model_name + '_weights.h5')
    print(model_name + " test save load weight pass!")

    # try:
    #     save_model(model,  name + '.h5')
    #     model = load_model(name + '.h5', custom_objects)
    #     print(name + " test save load model pass!")
    # except:
    #     print("【Error】There is a bug when save model use Dice---------------------------------------------------")

    print(model_name + " test pass!")

コード例 #3

0

ファイルを表示

def test_DIN_sum():

    model_name = "DIN_sum"
    x, y, feature_dim_dict, behavior_feature_list = get_xy_fd()

    model = DIN(feature_dim_dict,
                behavior_feature_list,
                hist_len_max=4,
                embedding_size=8,
                use_din=False,
                hidden_size=[4, 4, 4],
                keep_prob=0.6,
                activation="sigmoid")

    check_model(model, model_name, x, y)

コード例 #4

0

ファイルを表示

ファイル: DIN_test.py プロジェクト: hunterhawk/DeepCTR

def test_DIN_model_io():
    name = "DIN_att"
    x, y, feature_dim_dict, behavior_feature_list = get_xy_fd()

    model = DIN(
        feature_dim_dict,
        behavior_feature_list,
        hist_len_max=4,
        embedding_size=8,
        use_din=True,
        hidden_size=[4, 4, 4],
        keep_prob=0.6,
    )

    model.compile('adam',
                  'binary_crossentropy',
                  metrics=['binary_crossentropy'])
    #model.fit(x, y, verbose=1, validation_split=0.5)
    save_model(model, name + '.h5')
    model = load_model(name + '.h5', custom_objects)
    print(name + " test save load model pass!")

コード例 #5

0

ファイルを表示

def example_din():
    """
    1. 生成训练数据为txt格式的，逗号分割字段
    2. 转换成tfrecord
    3. 读取数据，区分dense, sparse, VarLenSparse, 用户行为序列特征
    4. 分别喂到模型中，看看会怎么样
    :return:
    """
    # x, y, feature_columns, behavior_feature_list = get_xy_fd() #说一下哪几列是当前的item需要和历史的行为进行attention的。所以之后就可以尝试，还是像之前一样读数据，然后只是把需要attention的列名单拿出来，放到list中就可以了
    x, y, feature_columns, behavior_feature_list = get_xy_from_txt(
    )  #说一下哪几列是当前的item需要和历史的行为进行attention的。所以之后就可以尝试，还是像之前一样读数据，然后只是把需要attention的列名单拿出来，放到list中就可以了
    # dataset = tf.data.Dataset.from_tensor_slices((x.values, y.values))

    model = DIN(feature_columns, behavior_feature_list)
    model.compile(
        'adam',
        keras.losses.binary_crossentropy,
        metrics=[keras.metrics.AUC(), keras.metrics.categorical_accuracy])
    history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5)
    # history = model.fit(dataset, verbose=1, epochs=10, validation_data=(x,y))
    # history = model.fit(dataset, verbose=1, epochs=10, validation_split=0.5)
    print("history: ", history)

コード例 #6

0

ファイルを表示

def get_xy_fd():
    feature_dim_dict = {"sparse": [SingleFeat('user', 3), SingleFeat(
        'gender', 2), SingleFeat('item', 3+1), SingleFeat('item_gender', 2+1)], "dense": [SingleFeat('score', 0)]}
    behavior_feature_list = ["item", "item_gender"]
    uid = np.array([0, 1, 2])
    ugender = np.array([0, 1, 0])
    iid = np.array([1, 2, 3])  # 0 is mask value
    igender = np.array([1, 2, 1])  # 0 is mask value
    score = np.array([0.1, 0.2, 0.3])

    hist_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]])
    hist_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [2, 1, 0, 0]])

    feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender,
                    'hist_item': hist_iid, 'hist_item_gender': hist_igender, 'score': score}

    x = [feature_dict[feat.name] for feat in feature_dim_dict["sparse"]] + [feature_dict[feat.name]
                                                                            for feat in feature_dim_dict["dense"]] + [feature_dict['hist_'+feat] for feat in behavior_feature_list]

    y = [1, 0, 1]
    return x, y, feature_dim_dict, behavior_feature_list


if __name__ == "__main__":
    x, y, feature_dim_dict, behavior_feature_list = get_xy_fd()
    model = DIN(feature_dim_dict, behavior_feature_list, hist_len_max=4,)
    model.compile('adam', 'binary_crossentropy',
                  metrics=['binary_crossentropy'])
    history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5)

コード例 #7

0

ファイルを表示

    hist_cate_id = np.array([[1, 2, 2, 0], [2, 2, 1, 0], [1, 2, 0, 0]])
    seq_length = np.array([3, 3,
                           2])  # the actual length of the behavior sequence

    feature_dict = {
        'user': uid,
        'gender': ugender,
        'item_id': iid,
        'cate_id': cate_id,
        'hist_item_id': hist_iid,
        'hist_cate_id': hist_cate_id,
        'pay_score': pay_score,
        'seq_length': seq_length
    }
    x = {
        name: feature_dict[name]
        for name in get_feature_names(feature_columns)
    }
    y = np.array([1, 0, 1])
    return x, y, feature_columns, behavior_feature_list


if __name__ == "__main__":
    x, y, feature_columns, behavior_feature_list = get_xy_fd()
    model = DIN(feature_columns, behavior_feature_list)
    # model = BST(feature_columns, behavior_feature_list,att_head_num=4)
    model.compile('adam',
                  'binary_crossentropy',
                  metrics=['binary_crossentropy'])
    history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5)

コード例 #8

0

ファイルを表示

        elif column == 'action_type':
            feature_columns += [SparseFeat(column, 4 + 1, embedding_dim=dim)]
        else:
            feature_columns += [DenseFeat(column, 1)]

# maxlen为历史信息的长度，vocabulary_size为onehot的长度
feature_columns += [
    VarLenSparseFeat(sparsefeat=SparseFeat('hist_merchant_id', vocabulary_size=1993, embedding_dim=8,
                                           embedding_name='merchant_id'), maxlen=M),
    VarLenSparseFeat(sparsefeat=SparseFeat('hist_action_type', vocabulary_size=4, embedding_dim=4,
                                           embedding_name='action_type'), maxlen=M)]
history_features = ['merchant_id', 'action_type']
print(len(feature_columns))

# 使用DIN模型
model = DIN(feature_columns, history_features)
# 使用Adam优化器，二分类的交叉熵
model.compile('adam', 'binary_crossentropy', metrics=['binary_crossentropy'])
# model.compile(loss=[binary_focal_loss(alpha=.25, gamma=2)], metrics=["accuracy"])

# 组装train_model_input，得到feature names，将train_X转换为字典格式
feature_names = list(train_X.columns)
train_model_input = {name: train_X[name].values for name in get_feature_names(feature_columns)}
print("########################################")

# histroy输入必须是二维数组
from tqdm import tqdm

for fea in ['hist_merchant_id', 'hist_action_type']:
    list = []
    for i in tqdm(train_model_input[fea]):

コード例 #9

0

ファイルを表示

    sess_len_max = SESS_MAX_LEN
    BATCH_SIZE = 1024
    sess_feature = ['item_id']
    # def auc(y_true,y_pred):
    #   return tf.py_func(roc_auc_score, (y_true, y_pred), tf.double)

    EMBEDDING_SIZE = int(ebs)
    if EMBEDDING_SIZE == 0:
        EMBEDDING_SIZE = "auto"

    model = DIN(fd,
                sess_feature,
                embedding_size=EMBEDDING_SIZE,
                dnn_dropout=float(dnn_dropout),
                att_activation='dice',
                att_weight_normalization=False,
                hist_len_max=sess_len_max,
                dnn_hidden_units=(200, 80),
                att_hidden_size=(
                    64,
                    16,
                ))
    model.compile('adagrad',
                  'binary_crossentropy',
                  metrics=['binary_crossentropy'])
    model_dir = "../model_dir_" + str(EMBEDDING_SIZE)
    if not os.path.exists(model_dir):
        os.mkdir(model_dir)
    if os.path.exists(model_dir + '/ckpt.h5'):
        model.load_weights(model_dir + '/ckpt.h5')
    """
    test_input_pos = pd.read_pickle(

コード例 #10

0

ファイルを表示

import numpy as np
from deepctr.models import DIN
from deepctr.inputs import SparseFeat,VarLenSparseFeat,DenseFeat,get_fixlen_feature_names,get_varlen_feature_names

feature_columns = [SparseFeat('user',3),SparseFeat(
    'gender', 2), SparseFeat('item', 3 + 1), SparseFeat('item_gender', 2 + 1),DenseFeat('score', 1)]
feature_columns += [VarLenSparseFeat('hist_item',3+1, maxlen=4, embedding_name='item'),
                    VarLenSparseFeat('hist_item_gender',3+1, maxlen=4, embedding_name='item_gender')]
behavior_feature_list = ["item", "item_gender"]
uid = np.array([0, 1, 2])
ugender = np.array([0, 1, 0])
iid = np.array([1, 2, 3])  # 0 is mask value
igender = np.array([1, 2, 1])  # 0 is mask value
score = np.array([0.1, 0.2, 0.3])

hist_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]])
hist_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [2, 1, 0, 0]])
feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender,
                'hist_item': hist_iid, 'hist_item_gender': hist_igender, 'score': score}

fixlen_feature_names = get_fixlen_feature_names(feature_columns)
varlen_feature_names = get_varlen_feature_names(feature_columns)
x = [feature_dict[name] for name in fixlen_feature_names] + [feature_dict[name] for name in varlen_feature_names]

y = [1, 0, 1]

model = DIN(feature_columns, behavior_feature_list, hist_len_max=4, )
model.compile('adam', 'binary_crossentropy',
              metrics=['binary_crossentropy'])
history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5)

コード例 #11

0

ファイルを表示

elif sys.argv[1] == 'PNN_UDG':
    model = PNN_UDG(dnn_feature_columns, untrainable_features_columns, (200, 80), uid_feature_name=udg_features, 
                    udg_embedding_size=int(sys.argv[5]))
elif sys.argv[1] == 'PNN':
    model = PNN(dnn_feature_columns, untrainable_features_columns, (200, 80))
elif sys.argv[1] == 'WDL':
    model = WDL(linear_feature_columns, dnn_feature_columns, [], (200, 80))
elif sys.argv[1] == 'WDL_UDG':
    model = WDL_UDG(linear_feature_columns, dnn_feature_columns, untrainable_features_columns, (200, 80), uid_feature_name=udg_features, udg_embedding_size=int(sys.argv[5]))
elif sys.argv[1] == 'DIEN':
    model = DIEN(fixlen_feature_columns, behavior_feature_list,
             dnn_hidden_units=[200, 80], dnn_dropout=0, gru_type="AUGRU", use_negsampling=True)
elif sys.argv[1] == 'DIEN_UDG':
    model = DIEN_UDG(fixlen_feature_columns, untrainable_features_columns, behavior_feature_list, dnn_hidden_units=[200, 80], dnn_dropout=0, gru_type="AUGRU", use_negsampling=True, uid_feature_name=udg_features, udg_embedding_size=int(sys.argv[5]))
elif sys.argv[1] == 'DIN':
    model = DIN(fixlen_feature_columns, behavior_feature_list, dnn_hidden_units=[200, 80], dnn_dropout=0)
elif sys.argv[1] == 'DIN_UDG':
    model = DIN_UDG(fixlen_feature_columns, untrainable_features_columns, behavior_feature_list, dnn_hidden_units=[200, 80], dnn_dropout=0, uid_feature_name=udg_features, udg_embedding_size=int(sys.argv[5]))
    
if sys.argv[4] == 'focal':
    model.compile("adam", loss=focal_loss, metrics=['binary_crossentropy'], )
else:
    model.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy'], )
init_lr = float(tf.keras.backend.get_value(model.optimizer.learning_rate))
lr = [init_lr, init_lr/2, init_lr/4]
history_all = {}
max_auc, min_log, min_rmse, max_rig = 0, 0, 0, 0
for x in range(epoch):
    tf.keras.backend.set_value(model.optimizer.lr, lr[x])
    history = CustomCallback()
    model.fit(train_model_input, train[target].values, batch_size=256, epochs=1, verbose=1,

コード例 #12

0

ファイルを表示

    hist_iid = np.array([[1, 2, 3, 0], [3, 2, 1, 0], [1, 2, 0, 0]])
    hist_cate_id = np.array([[1, 2, 2, 0], [2, 2, 1, 0], [1, 2, 0, 0]])

    # 特征名->data输入
    feature_dict = {
        'user': uid,
        'gender': ugender,
        'item_id': iid,
        'cate_id': cate_id,
        'hist_item_id': hist_iid,
        'hist_cate_id': hist_cate_id,
        'pay_score': pay_score
    }
    x = {
        name: feature_dict[name]
        for name in get_feature_names(feature_columns)
    }
    y = np.array([1, 0, 1])
    return x, y, feature_columns, behavior_feature_list


if __name__ == "__main__":
    # feature_columns 模型DNN部分使用到的全部特征，包括变长特征
    x, y, feature_columns, behavior_feature_list = get_xy_fd()
    model = DIN(feature_columns, behavior_feature_list)  # 模型的输入的抽象属性
    model.compile('adam',
                  'binary_crossentropy',
                  metrics=['binary_crossentropy'])
    history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5)

コード例 #13

0

ファイルを表示

ファイル: train_din.py プロジェクト: zwcdp/DSIN-1

    test_label = label[test_idx]

    sess_len_max = SESS_MAX_LEN
    BATCH_SIZE = 4096

    sess_feature = ['cate_id', 'brand']
    TEST_BATCH_SIZE = 2**17
    REG = 1e-6

    model = DIN(fd,
                sess_feature,
                embedding_size=4,
                att_activation='dice',
                att_weight_normalization=False,
                hist_len_max=sess_len_max,
                dnn_hidden_units=(200, 80),
                att_hidden_size=(
                    64,
                    16,
                ),
                l2_reg_embedding=REG,
                seed=2019)

    model.compile('adagrad',
                  'binary_crossentropy',
                  metrics=[
                      'binary_crossentropy',
                  ])

    hist_ = model.fit(
        train_input[:],

コード例 #14

0

ファイルを表示

 def buildModel(self):
     feature_columns = self.encoder.getFeatureColumns()
     self.model = DIN(feature_columns, self.encoder.behavior_list)
     self.model.compile('adam',
                        'binary_crossentropy',
                        metrics=['binary_crossentropy'])