Beispiel #1
0
def test_DIN_att():
    model_name = "DIN_att"

    x, y, feature_dim_dict, behavior_feature_list = get_xy_fd()

    model = DIN(feature_dim_dict, behavior_feature_list, hist_len_max=4, embedding_size=8,
                use_din=True, hidden_size=[4, 4, 4], keep_prob=0.6,)

    model.compile('adam', 'binary_crossentropy',
                  metrics=['binary_crossentropy'])
    model.fit(x, y, verbose=1, validation_split=0.5)

    print(model_name+" test train valid pass!")
    model.save_weights(model_name + '_weights.h5')
    model.load_weights(model_name + '_weights.h5')
    print(model_name+" test save load weight pass!")

    # try:
    #     save_model(model,  name + '.h5')
    #     model = load_model(name + '.h5', custom_objects)
    #     print(name + " test save load model pass!")
    # except:
    #     print("【Error】There is a bug when save model use Dice---------------------------------------------------")

    print(model_name + " test pass!")
Beispiel #2
0
def test_DIN_att():
    model_name = "DIN_att"

    x, y, feature_dim_dict, behavior_feature_list = get_xy_fd()

    model = DIN(
        feature_dim_dict,
        behavior_feature_list,
        hist_len_max=4,
        embedding_size=8,
        use_din=True,
        hidden_size=[4, 4, 4],
        keep_prob=0.6,
    )

    model.compile('adam',
                  'binary_crossentropy',
                  metrics=['binary_crossentropy'])
    model.fit(x, y, verbose=1, validation_split=0.5)

    print(model_name + " test train valid pass!")
    model.save_weights(model_name + '_weights.h5')
    model.load_weights(model_name + '_weights.h5')
    print(model_name + " test save load weight pass!")

    # try:
    #     save_model(model,  name + '.h5')
    #     model = load_model(name + '.h5', custom_objects)
    #     print(name + " test save load model pass!")
    # except:
    #     print("【Error】There is a bug when save model use Dice---------------------------------------------------")

    print(model_name + " test pass!")
Beispiel #3
0
def test_DIN_sum():

    model_name = "DIN_sum"
    x, y, feature_dim_dict, behavior_feature_list = get_xy_fd()

    model = DIN(feature_dim_dict,
                behavior_feature_list,
                hist_len_max=4,
                embedding_size=8,
                use_din=False,
                hidden_size=[4, 4, 4],
                keep_prob=0.6,
                activation="sigmoid")

    model.compile('adam',
                  'binary_crossentropy',
                  metrics=['binary_crossentropy'])
    model.fit(x, y, verbose=1, validation_split=0.5)

    print(model_name + " test train valid pass!")
    model.save_weights(model_name + '_weights.h5')
    model.load_weights(model_name + '_weights.h5')
    print(model_name + " test save load weight pass!")

    save_model(model, model_name + '.h5')
    model = load_model(model_name + '.h5', custom_objects)
    print(model_name + " test save load model pass!")

    print(model_name + " test pass!")
Beispiel #4
0
def test_DIN_model_io():

    model_name = "DIN_att"
    _, _, feature_dim_dict, behavior_feature_list = get_xy_fd()

    model = DIN(feature_dim_dict, behavior_feature_list, hist_len_max=4, embedding_size=8, att_activation=Dice,

                use_din=True, hidden_size=[4, 4, 4], keep_prob=0.6,)

    model.compile('adam', 'binary_crossentropy',
                  metrics=['binary_crossentropy'])
   #model.fit(x, y, verbose=1, validation_split=0.5)
    save_model(model,  model_name + '.h5')
    model = load_model(model_name + '.h5', custom_objects)
    print(model_name + " test save load model pass!")
Beispiel #5
0
def test_DIN_model_io():
    name = "DIN_att"
    x, y, feature_dim_dict, behavior_feature_list = get_xy_fd()

    model = DIN(
        feature_dim_dict,
        behavior_feature_list,
        hist_len_max=4,
        embedding_size=8,
        use_din=True,
        hidden_size=[4, 4, 4],
        keep_prob=0.6,
    )

    model.compile('adam',
                  'binary_crossentropy',
                  metrics=['binary_crossentropy'])
    #model.fit(x, y, verbose=1, validation_split=0.5)
    save_model(model, name + '.h5')
    model = load_model(name + '.h5', custom_objects)
    print(name + " test save load model pass!")
Beispiel #6
0
def example_din():
    """
    1. 生成训练数据为txt格式的,逗号分割字段
    2. 转换成tfrecord
    3. 读取数据,区分dense, sparse, VarLenSparse, 用户行为序列特征
    4. 分别喂到模型中,看看会怎么样
    :return:
    """
    # x, y, feature_columns, behavior_feature_list = get_xy_fd() #说一下哪几列是当前的item需要和历史的行为进行attention的。所以之后就可以尝试,还是像之前一样读数据,然后只是把需要attention的列名单拿出来,放到list中就可以了
    x, y, feature_columns, behavior_feature_list = get_xy_from_txt(
    )  #说一下哪几列是当前的item需要和历史的行为进行attention的。所以之后就可以尝试,还是像之前一样读数据,然后只是把需要attention的列名单拿出来,放到list中就可以了
    # dataset = tf.data.Dataset.from_tensor_slices((x.values, y.values))

    model = DIN(feature_columns, behavior_feature_list)
    model.compile(
        'adam',
        keras.losses.binary_crossentropy,
        metrics=[keras.metrics.AUC(), keras.metrics.categorical_accuracy])
    history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5)
    # history = model.fit(dataset, verbose=1, epochs=10, validation_data=(x,y))
    # history = model.fit(dataset, verbose=1, epochs=10, validation_split=0.5)
    print("history: ", history)
Beispiel #7
0
def get_xy_fd():
    feature_dim_dict = {"sparse": [SingleFeat('user', 3), SingleFeat(
        'gender', 2), SingleFeat('item', 3+1), SingleFeat('item_gender', 2+1)], "dense": [SingleFeat('score', 0)]}
    behavior_feature_list = ["item", "item_gender"]
    uid = np.array([0, 1, 2])
    ugender = np.array([0, 1, 0])
    iid = np.array([1, 2, 3])  # 0 is mask value
    igender = np.array([1, 2, 1])  # 0 is mask value
    score = np.array([0.1, 0.2, 0.3])

    hist_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]])
    hist_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [2, 1, 0, 0]])

    feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender,
                    'hist_item': hist_iid, 'hist_item_gender': hist_igender, 'score': score}

    x = [feature_dict[feat.name] for feat in feature_dim_dict["sparse"]] + [feature_dict[feat.name]
                                                                            for feat in feature_dim_dict["dense"]] + [feature_dict['hist_'+feat] for feat in behavior_feature_list]

    y = [1, 0, 1]
    return x, y, feature_dim_dict, behavior_feature_list


if __name__ == "__main__":
    x, y, feature_dim_dict, behavior_feature_list = get_xy_fd()
    model = DIN(feature_dim_dict, behavior_feature_list, hist_len_max=4,)
    model.compile('adam', 'binary_crossentropy',
                  metrics=['binary_crossentropy'])
    history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5)
Beispiel #8
0
    user_age = np.array([1, 2, 3])
    user_gender = np.array([0, 1, 0])
    item_id = np.array([0, 1, 2])
    item_gender = np.array([0, 1, 0])

    # multi-value feature input
    hist_item_id = np.array([[0, 1, 2, 3], [0, 1, 2, 3], [0, 1, 2, 0]])
    hist_item_gender = np.array([[0, 1, 0, 1], [0, 1, 1, 1], [0, 0, 1, 0]])
    # valid length of behavior sequence of every sample
    hist_length = np.array([4, 4, 3])

    feature_dict = {'user_age': user_age, 'user_gender': user_gender, 'item_id': item_id, 'item_gender': item_gender,
                    'hist_item_id': hist_item_id, 'hist_item_gender': hist_item_gender, }

    x = [feature_dict[feat] for feat in feature_dim_dict["sparse"]] + \
        [feature_dict['hist_'+feat]
            for feat in behavior_feature_list] + [hist_length]
    # Notice the concatenation order: single feature + multi-value feature + length
    # Since the length of the historical sequences of different features in DIN are the same(they are all extended from item_id),only one length vector is enough.
    y = [1, 0, 1]

    return x, y, feature_dim_dict, behavior_feature_list


if __name__ == "__main__":
    x, y, feature_dim_dict, behavior_feature_list = get_xy_fd()
    model = DIN(feature_dim_dict, behavior_feature_list, hist_len_max=4,)
    model.compile('adam', 'binary_crossentropy',
                  metrics=['binary_crossentropy'])
    history = model.fit(x, y, verbose=1, validation_split=0.5)
Beispiel #9
0
                sess_feature,
                embedding_size=4,
                att_activation='dice',
                att_weight_normalization=False,
                hist_len_max=sess_len_max,
                dnn_hidden_units=(200, 80),
                att_hidden_size=(
                    64,
                    16,
                ),
                l2_reg_embedding=REG,
                seed=2019)

    model.compile('adagrad',
                  'binary_crossentropy',
                  metrics=[
                      'binary_crossentropy',
                  ])

    hist_ = model.fit(
        train_input[:],
        train_label,
        batch_size=BATCH_SIZE,
        epochs=1,
        initial_epoch=0,
        verbose=1,
    )
    pred_ans = model.predict(test_input, TEST_BATCH_SIZE)

    print()
    print("test LogLoss", round(log_loss(test_label, pred_ans), 4), "test AUC",
Beispiel #10
0
class Trainer:
    def __init__(self):
        self.data = None
        self.encoder = None
        self.model = None
        # number of positive samples
        self.num_pos = None
        self.recipeDomain = None

    def loadData(self, url: str):
        self.data = pd.read_csv(url)
        self.recipeDomain = pd.read_csv(url)
        self.num_pos = self.data.shape[0]
        self.encoder = ModelEncoder()
        self.encoder.train()

    def preProcessData(self):
        self.data = self.encoder.encode(self.data)
        self.data['result'] = [1] * self.num_pos

        self.recipeDomain = self.encoder.encode(self.recipeDomain)

        self.build_negative_data()

    def buildModel(self):
        feature_columns = self.encoder.getFeatureColumns()
        self.model = DIN(feature_columns, self.encoder.behavior_list)
        self.model.compile('adam',
                           'binary_crossentropy',
                           metrics=['binary_crossentropy'])

    def train(self):
        model_input = {}
        for feat, _ in self.encoder.fixed_sparse_dict + self.encoder.var_sparse_dict:
            model_input[feat] = self.data[feat]
        history = self.model.fit(model_input,
                                 self.data['gt'].values,
                                 batch_size=256,
                                 epochs=10,
                                 verbose=2,
                                 validation_split=0.2,
                                 shuffle=True)

    def dump_model(self, path: str):
        save_model(self.model, path)

    def dump_encoder(self, path: str):
        pickle.dump(self.encoder, self, protocol=4)

    def update(self):
        url = ''
        self.recipeDomain = pd.read_csv(url)
        self.encoder.encode(self.recipeDomain)

    def build_negative_data(self):
        for i in range(self.num_pos):
            record = self.data.iloc[i]
            clicked_set = record['hist_recipe'] + record['recipe'][i]
            for j in self.recipeDomain.shape[0]:
                if self.recipeDomain.iloc[j]['recipe'] not in clicked_set:
                    # valid unclicked combination
                    for feat in record.columns:
                        if feat in self.recipeDomain.columns:
                            record[feat] = self.recipeDomain.iloc[j][feat]
                    record['result'] = 0
                    self.data.append(record)