Python DIN.fit Exemples, deepctr.models.DIN.fit Python Exemples

Exemple #1

0

Afficher le fichier

def test_DIN_att():
    model_name = "DIN_att"

    x, y, feature_dim_dict, behavior_feature_list = get_xy_fd()

    model = DIN(
        feature_dim_dict,
        behavior_feature_list,
        hist_len_max=4,
        embedding_size=8,
        use_din=True,
        hidden_size=[4, 4, 4],
        keep_prob=0.6,
    )

    model.compile('adam',
                  'binary_crossentropy',
                  metrics=['binary_crossentropy'])
    model.fit(x, y, verbose=1, validation_split=0.5)

    print(model_name + " test train valid pass!")
    model.save_weights(model_name + '_weights.h5')
    model.load_weights(model_name + '_weights.h5')
    print(model_name + " test save load weight pass!")

    # try:
    #     save_model(model,  name + '.h5')
    #     model = load_model(name + '.h5', custom_objects)
    #     print(name + " test save load model pass!")
    # except:
    #     print("【Error】There is a bug when save model use Dice---------------------------------------------------")

    print(model_name + " test pass!")

Exemple #2

0

Afficher le fichier

Fichier : DIN_test.py Projet : nwf5d/DeepCTR

def test_DIN_sum():

    model_name = "DIN_sum"
    x, y, feature_dim_dict, behavior_feature_list = get_xy_fd()

    model = DIN(feature_dim_dict,
                behavior_feature_list,
                hist_len_max=4,
                embedding_size=8,
                use_din=False,
                hidden_size=[4, 4, 4],
                keep_prob=0.6,
                activation="sigmoid")

    model.compile('adam',
                  'binary_crossentropy',
                  metrics=['binary_crossentropy'])
    model.fit(x, y, verbose=1, validation_split=0.5)

    print(model_name + " test train valid pass!")
    model.save_weights(model_name + '_weights.h5')
    model.load_weights(model_name + '_weights.h5')
    print(model_name + " test save load weight pass!")

    save_model(model, model_name + '.h5')
    model = load_model(model_name + '.h5', custom_objects)
    print(model_name + " test save load model pass!")

    print(model_name + " test pass!")

Exemple #3

0

Afficher le fichier

Fichier : DIN_test.py Projet : SundeepMehta/DeepCTR

def test_DIN_att():
    model_name = "DIN_att"

    x, y, feature_dim_dict, behavior_feature_list = get_xy_fd()

    model = DIN(feature_dim_dict, behavior_feature_list, hist_len_max=4, embedding_size=8,
                use_din=True, hidden_size=[4, 4, 4], keep_prob=0.6,)

    model.compile('adam', 'binary_crossentropy',
                  metrics=['binary_crossentropy'])
    model.fit(x, y, verbose=1, validation_split=0.5)

    print(model_name+" test train valid pass!")
    model.save_weights(model_name + '_weights.h5')
    model.load_weights(model_name + '_weights.h5')
    print(model_name+" test save load weight pass!")

    # try:
    #     save_model(model,  name + '.h5')
    #     model = load_model(name + '.h5', custom_objects)
    #     print(name + " test save load model pass!")
    # except:
    #     print("【Error】There is a bug when save model use Dice---------------------------------------------------")

    print(model_name + " test pass!")

Exemple #4

0

Afficher le fichier

def example_din():
    """
    1. 生成训练数据为txt格式的，逗号分割字段
    2. 转换成tfrecord
    3. 读取数据，区分dense, sparse, VarLenSparse, 用户行为序列特征
    4. 分别喂到模型中，看看会怎么样
    :return:
    """
    # x, y, feature_columns, behavior_feature_list = get_xy_fd() #说一下哪几列是当前的item需要和历史的行为进行attention的。所以之后就可以尝试，还是像之前一样读数据，然后只是把需要attention的列名单拿出来，放到list中就可以了
    x, y, feature_columns, behavior_feature_list = get_xy_from_txt(
    )  #说一下哪几列是当前的item需要和历史的行为进行attention的。所以之后就可以尝试，还是像之前一样读数据，然后只是把需要attention的列名单拿出来，放到list中就可以了
    # dataset = tf.data.Dataset.from_tensor_slices((x.values, y.values))

    model = DIN(feature_columns, behavior_feature_list)
    model.compile(
        'adam',
        keras.losses.binary_crossentropy,
        metrics=[keras.metrics.AUC(), keras.metrics.categorical_accuracy])
    history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5)
    # history = model.fit(dataset, verbose=1, epochs=10, validation_data=(x,y))
    # history = model.fit(dataset, verbose=1, epochs=10, validation_split=0.5)
    print("history: ", history)

Exemple #5

0

Afficher le fichier

def get_xy_fd():
    feature_dim_dict = {"sparse": [SingleFeat('user', 3), SingleFeat(
        'gender', 2), SingleFeat('item', 3+1), SingleFeat('item_gender', 2+1)], "dense": [SingleFeat('score', 0)]}
    behavior_feature_list = ["item", "item_gender"]
    uid = np.array([0, 1, 2])
    ugender = np.array([0, 1, 0])
    iid = np.array([1, 2, 3])  # 0 is mask value
    igender = np.array([1, 2, 1])  # 0 is mask value
    score = np.array([0.1, 0.2, 0.3])

    hist_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]])
    hist_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [2, 1, 0, 0]])

    feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender,
                    'hist_item': hist_iid, 'hist_item_gender': hist_igender, 'score': score}

    x = [feature_dict[feat.name] for feat in feature_dim_dict["sparse"]] + [feature_dict[feat.name]
                                                                            for feat in feature_dim_dict["dense"]] + [feature_dict['hist_'+feat] for feat in behavior_feature_list]

    y = [1, 0, 1]
    return x, y, feature_dim_dict, behavior_feature_list


if __name__ == "__main__":
    x, y, feature_dim_dict, behavior_feature_list = get_xy_fd()
    model = DIN(feature_dim_dict, behavior_feature_list, hist_len_max=4,)
    model.compile('adam', 'binary_crossentropy',
                  metrics=['binary_crossentropy'])
    history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5)

Exemple #6

0

Afficher le fichier

# 组装train_model_input，得到feature names，将train_X转换为字典格式
feature_names = list(train_X.columns)
train_model_input = {name: train_X[name].values for name in get_feature_names(feature_columns)}
print("########################################")

# histroy输入必须是二维数组
from tqdm import tqdm

for fea in ['hist_merchant_id', 'hist_action_type']:
    list = []
    for i in tqdm(train_model_input[fea]):
        list.append(i)
    train_model_input[fea] = np.array(list)

history = model.fit(train_model_input, train_y.values, verbose=True, epochs=10, validation_split=0.2, batch_size=512)

# 转换test__model_input
test_data['action_type'] = 3
test_model_input = {name: test_data[name].values for name in feature_names}
from tqdm import tqdm

for fea in ['hist_merchant_id', 'hist_action_type']:
    list = []
    for i in tqdm(test_model_input[fea]):
        list.append(i)
    test_model_input[fea] = np.array(list)

# 得到预测结果
prob = model.predict(test_model_input)
submission['prob'] = prob

Exemple #7

0

Afficher le fichier

            del train_input_pos
            del train_input_neg
            del train_label_pos
            del train_label_neg

            n = 10
            length_train = len(train_input[0])
            length_train = int(length_train / n + 1)

            for i in range(n):
                train_input_slice = [
                    column[i * length_train:(i + 1) * length_train]
                    for column in train_input
                ]
                hist_ = model.fit(
                    train_input_slice,
                    train_label[i * length_train:(i + 1) * length_train],
                    batch_size=BATCH_SIZE,
                    epochs=1,
                    initial_epoch=0,
                    verbose=1,
                )

            # pred_ans = model.predict(test_input, BATCH_SIZE)
            #AUC = round(roc_auc_score(test_label, pred_ans), 4)
            #if AUC > best_AUC:
            #  best_AUC = AUC
            #  model.save_weights(model_dir+'/ckpt.h5')
            #print("epoch : ",e,"----date : ",date,"----test AUC : ",AUC)

Exemple #8

0

Afficher le fichier

Fichier : run_din.py Projet : SundeepMehta/DeepCTR

    user_age = np.array([1, 2, 3])
    user_gender = np.array([0, 1, 0])
    item_id = np.array([0, 1, 2])
    item_gender = np.array([0, 1, 0])

    # multi-value feature input
    hist_item_id = np.array([[0, 1, 2, 3], [0, 1, 2, 3], [0, 1, 2, 0]])
    hist_item_gender = np.array([[0, 1, 0, 1], [0, 1, 1, 1], [0, 0, 1, 0]])
    # valid length of behavior sequence of every sample
    hist_length = np.array([4, 4, 3])

    feature_dict = {'user_age': user_age, 'user_gender': user_gender, 'item_id': item_id, 'item_gender': item_gender,
                    'hist_item_id': hist_item_id, 'hist_item_gender': hist_item_gender, }

    x = [feature_dict[feat] for feat in feature_dim_dict["sparse"]] + \
        [feature_dict['hist_'+feat]
            for feat in behavior_feature_list] + [hist_length]
    # Notice the concatenation order: single feature + multi-value feature + length
    # Since the length of the historical sequences of different features in DIN are the same(they are all extended from item_id),only one length vector is enough.
    y = [1, 0, 1]

    return x, y, feature_dim_dict, behavior_feature_list


if __name__ == "__main__":
    x, y, feature_dim_dict, behavior_feature_list = get_xy_fd()
    model = DIN(feature_dim_dict, behavior_feature_list, hist_len_max=4,)
    model.compile('adam', 'binary_crossentropy',
                  metrics=['binary_crossentropy'])
    history = model.fit(x, y, verbose=1, validation_split=0.5)

Exemple #9

0

Afficher le fichier

Fichier : train_din.py Projet : zwcdp/DSIN-1

                att_activation='dice',
                att_weight_normalization=False,
                hist_len_max=sess_len_max,
                dnn_hidden_units=(200, 80),
                att_hidden_size=(
                    64,
                    16,
                ),
                l2_reg_embedding=REG,
                seed=2019)

    model.compile('adagrad',
                  'binary_crossentropy',
                  metrics=[
                      'binary_crossentropy',
                  ])

    hist_ = model.fit(
        train_input[:],
        train_label,
        batch_size=BATCH_SIZE,
        epochs=1,
        initial_epoch=0,
        verbose=1,
    )
    pred_ans = model.predict(test_input, TEST_BATCH_SIZE)

    print()
    print("test LogLoss", round(log_loss(test_label, pred_ans), 4), "test AUC",
          round(roc_auc_score(test_label, pred_ans), 4))

Exemple #10

0

Afficher le fichier

class Trainer:
    def __init__(self):
        self.data = None
        self.encoder = None
        self.model = None
        # number of positive samples
        self.num_pos = None
        self.recipeDomain = None

    def loadData(self, url: str):
        self.data = pd.read_csv(url)
        self.recipeDomain = pd.read_csv(url)
        self.num_pos = self.data.shape[0]
        self.encoder = ModelEncoder()
        self.encoder.train()

    def preProcessData(self):
        self.data = self.encoder.encode(self.data)
        self.data['result'] = [1] * self.num_pos

        self.recipeDomain = self.encoder.encode(self.recipeDomain)

        self.build_negative_data()

    def buildModel(self):
        feature_columns = self.encoder.getFeatureColumns()
        self.model = DIN(feature_columns, self.encoder.behavior_list)
        self.model.compile('adam',
                           'binary_crossentropy',
                           metrics=['binary_crossentropy'])

    def train(self):
        model_input = {}
        for feat, _ in self.encoder.fixed_sparse_dict + self.encoder.var_sparse_dict:
            model_input[feat] = self.data[feat]
        history = self.model.fit(model_input,
                                 self.data['gt'].values,
                                 batch_size=256,
                                 epochs=10,
                                 verbose=2,
                                 validation_split=0.2,
                                 shuffle=True)

    def dump_model(self, path: str):
        save_model(self.model, path)

    def dump_encoder(self, path: str):
        pickle.dump(self.encoder, self, protocol=4)

    def update(self):
        url = ''
        self.recipeDomain = pd.read_csv(url)
        self.encoder.encode(self.recipeDomain)

    def build_negative_data(self):
        for i in range(self.num_pos):
            record = self.data.iloc[i]
            clicked_set = record['hist_recipe'] + record['recipe'][i]
            for j in self.recipeDomain.shape[0]:
                if self.recipeDomain.iloc[j]['recipe'] not in clicked_set:
                    # valid unclicked combination
                    for feat in record.columns:
                        if feat in self.recipeDomain.columns:
                            record[feat] = self.recipeDomain.iloc[j][feat]
                    record['result'] = 0
                    self.data.append(record)