def test_DIN_att(): model_name = "DIN_att" x, y, feature_dim_dict, behavior_feature_list = get_xy_fd() model = DIN(feature_dim_dict, behavior_feature_list, hist_len_max=4, embedding_size=8, use_din=True, hidden_size=[4, 4, 4], keep_prob=0.6,) model.compile('adam', 'binary_crossentropy', metrics=['binary_crossentropy']) model.fit(x, y, verbose=1, validation_split=0.5) print(model_name+" test train valid pass!") model.save_weights(model_name + '_weights.h5') model.load_weights(model_name + '_weights.h5') print(model_name+" test save load weight pass!") # try: # save_model(model, name + '.h5') # model = load_model(name + '.h5', custom_objects) # print(name + " test save load model pass!") # except: # print("【Error】There is a bug when save model use Dice---------------------------------------------------") print(model_name + " test pass!")
def test_DIN_att(): model_name = "DIN_att" x, y, feature_dim_dict, behavior_feature_list = get_xy_fd() model = DIN( feature_dim_dict, behavior_feature_list, hist_len_max=4, embedding_size=8, use_din=True, hidden_size=[4, 4, 4], keep_prob=0.6, ) model.compile('adam', 'binary_crossentropy', metrics=['binary_crossentropy']) model.fit(x, y, verbose=1, validation_split=0.5) print(model_name + " test train valid pass!") model.save_weights(model_name + '_weights.h5') model.load_weights(model_name + '_weights.h5') print(model_name + " test save load weight pass!") # try: # save_model(model, name + '.h5') # model = load_model(name + '.h5', custom_objects) # print(name + " test save load model pass!") # except: # print("【Error】There is a bug when save model use Dice---------------------------------------------------") print(model_name + " test pass!")
def test_DIN_sum(): model_name = "DIN_sum" x, y, feature_dim_dict, behavior_feature_list = get_xy_fd() model = DIN(feature_dim_dict, behavior_feature_list, hist_len_max=4, embedding_size=8, use_din=False, hidden_size=[4, 4, 4], keep_prob=0.6, activation="sigmoid") model.compile('adam', 'binary_crossentropy', metrics=['binary_crossentropy']) model.fit(x, y, verbose=1, validation_split=0.5) print(model_name + " test train valid pass!") model.save_weights(model_name + '_weights.h5') model.load_weights(model_name + '_weights.h5') print(model_name + " test save load weight pass!") save_model(model, model_name + '.h5') model = load_model(model_name + '.h5', custom_objects) print(model_name + " test save load model pass!") print(model_name + " test pass!")
def test_DIN_model_io(): model_name = "DIN_att" _, _, feature_dim_dict, behavior_feature_list = get_xy_fd() model = DIN(feature_dim_dict, behavior_feature_list, hist_len_max=4, embedding_size=8, att_activation=Dice, use_din=True, hidden_size=[4, 4, 4], keep_prob=0.6,) model.compile('adam', 'binary_crossentropy', metrics=['binary_crossentropy']) #model.fit(x, y, verbose=1, validation_split=0.5) save_model(model, model_name + '.h5') model = load_model(model_name + '.h5', custom_objects) print(model_name + " test save load model pass!")
def test_DIN_model_io(): name = "DIN_att" x, y, feature_dim_dict, behavior_feature_list = get_xy_fd() model = DIN( feature_dim_dict, behavior_feature_list, hist_len_max=4, embedding_size=8, use_din=True, hidden_size=[4, 4, 4], keep_prob=0.6, ) model.compile('adam', 'binary_crossentropy', metrics=['binary_crossentropy']) #model.fit(x, y, verbose=1, validation_split=0.5) save_model(model, name + '.h5') model = load_model(name + '.h5', custom_objects) print(name + " test save load model pass!")
def example_din(): """ 1. 生成训练数据为txt格式的,逗号分割字段 2. 转换成tfrecord 3. 读取数据,区分dense, sparse, VarLenSparse, 用户行为序列特征 4. 分别喂到模型中,看看会怎么样 :return: """ # x, y, feature_columns, behavior_feature_list = get_xy_fd() #说一下哪几列是当前的item需要和历史的行为进行attention的。所以之后就可以尝试,还是像之前一样读数据,然后只是把需要attention的列名单拿出来,放到list中就可以了 x, y, feature_columns, behavior_feature_list = get_xy_from_txt( ) #说一下哪几列是当前的item需要和历史的行为进行attention的。所以之后就可以尝试,还是像之前一样读数据,然后只是把需要attention的列名单拿出来,放到list中就可以了 # dataset = tf.data.Dataset.from_tensor_slices((x.values, y.values)) model = DIN(feature_columns, behavior_feature_list) model.compile( 'adam', keras.losses.binary_crossentropy, metrics=[keras.metrics.AUC(), keras.metrics.categorical_accuracy]) history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5) # history = model.fit(dataset, verbose=1, epochs=10, validation_data=(x,y)) # history = model.fit(dataset, verbose=1, epochs=10, validation_split=0.5) print("history: ", history)
def get_xy_fd(): feature_dim_dict = {"sparse": [SingleFeat('user', 3), SingleFeat( 'gender', 2), SingleFeat('item', 3+1), SingleFeat('item_gender', 2+1)], "dense": [SingleFeat('score', 0)]} behavior_feature_list = ["item", "item_gender"] uid = np.array([0, 1, 2]) ugender = np.array([0, 1, 0]) iid = np.array([1, 2, 3]) # 0 is mask value igender = np.array([1, 2, 1]) # 0 is mask value score = np.array([0.1, 0.2, 0.3]) hist_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]]) hist_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [2, 1, 0, 0]]) feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender, 'hist_item': hist_iid, 'hist_item_gender': hist_igender, 'score': score} x = [feature_dict[feat.name] for feat in feature_dim_dict["sparse"]] + [feature_dict[feat.name] for feat in feature_dim_dict["dense"]] + [feature_dict['hist_'+feat] for feat in behavior_feature_list] y = [1, 0, 1] return x, y, feature_dim_dict, behavior_feature_list if __name__ == "__main__": x, y, feature_dim_dict, behavior_feature_list = get_xy_fd() model = DIN(feature_dim_dict, behavior_feature_list, hist_len_max=4,) model.compile('adam', 'binary_crossentropy', metrics=['binary_crossentropy']) history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5)
user_age = np.array([1, 2, 3]) user_gender = np.array([0, 1, 0]) item_id = np.array([0, 1, 2]) item_gender = np.array([0, 1, 0]) # multi-value feature input hist_item_id = np.array([[0, 1, 2, 3], [0, 1, 2, 3], [0, 1, 2, 0]]) hist_item_gender = np.array([[0, 1, 0, 1], [0, 1, 1, 1], [0, 0, 1, 0]]) # valid length of behavior sequence of every sample hist_length = np.array([4, 4, 3]) feature_dict = {'user_age': user_age, 'user_gender': user_gender, 'item_id': item_id, 'item_gender': item_gender, 'hist_item_id': hist_item_id, 'hist_item_gender': hist_item_gender, } x = [feature_dict[feat] for feat in feature_dim_dict["sparse"]] + \ [feature_dict['hist_'+feat] for feat in behavior_feature_list] + [hist_length] # Notice the concatenation order: single feature + multi-value feature + length # Since the length of the historical sequences of different features in DIN are the same(they are all extended from item_id),only one length vector is enough. y = [1, 0, 1] return x, y, feature_dim_dict, behavior_feature_list if __name__ == "__main__": x, y, feature_dim_dict, behavior_feature_list = get_xy_fd() model = DIN(feature_dim_dict, behavior_feature_list, hist_len_max=4,) model.compile('adam', 'binary_crossentropy', metrics=['binary_crossentropy']) history = model.fit(x, y, verbose=1, validation_split=0.5)
sess_feature, embedding_size=4, att_activation='dice', att_weight_normalization=False, hist_len_max=sess_len_max, dnn_hidden_units=(200, 80), att_hidden_size=( 64, 16, ), l2_reg_embedding=REG, seed=2019) model.compile('adagrad', 'binary_crossentropy', metrics=[ 'binary_crossentropy', ]) hist_ = model.fit( train_input[:], train_label, batch_size=BATCH_SIZE, epochs=1, initial_epoch=0, verbose=1, ) pred_ans = model.predict(test_input, TEST_BATCH_SIZE) print() print("test LogLoss", round(log_loss(test_label, pred_ans), 4), "test AUC",
class Trainer: def __init__(self): self.data = None self.encoder = None self.model = None # number of positive samples self.num_pos = None self.recipeDomain = None def loadData(self, url: str): self.data = pd.read_csv(url) self.recipeDomain = pd.read_csv(url) self.num_pos = self.data.shape[0] self.encoder = ModelEncoder() self.encoder.train() def preProcessData(self): self.data = self.encoder.encode(self.data) self.data['result'] = [1] * self.num_pos self.recipeDomain = self.encoder.encode(self.recipeDomain) self.build_negative_data() def buildModel(self): feature_columns = self.encoder.getFeatureColumns() self.model = DIN(feature_columns, self.encoder.behavior_list) self.model.compile('adam', 'binary_crossentropy', metrics=['binary_crossentropy']) def train(self): model_input = {} for feat, _ in self.encoder.fixed_sparse_dict + self.encoder.var_sparse_dict: model_input[feat] = self.data[feat] history = self.model.fit(model_input, self.data['gt'].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2, shuffle=True) def dump_model(self, path: str): save_model(self.model, path) def dump_encoder(self, path: str): pickle.dump(self.encoder, self, protocol=4) def update(self): url = '' self.recipeDomain = pd.read_csv(url) self.encoder.encode(self.recipeDomain) def build_negative_data(self): for i in range(self.num_pos): record = self.data.iloc[i] clicked_set = record['hist_recipe'] + record['recipe'][i] for j in self.recipeDomain.shape[0]: if self.recipeDomain.iloc[j]['recipe'] not in clicked_set: # valid unclicked combination for feat in record.columns: if feat in self.recipeDomain.columns: record[feat] = self.recipeDomain.iloc[j][feat] record['result'] = 0 self.data.append(record)