def get_xy_fd(hash_flag=False): # feature_dim_dict = {"sparse": [SingleFeat('user', 3, hash_flag), SingleFeat( # 'gender', 2, hash_flag), SingleFeat('item', 3 + 1, hash_flag), SingleFeat('item_gender', 2 + 1, hash_flag)], # "dense": [SingleFeat('score', 0)]} feature_columns = [ SparseFeat('user', 3), SparseFeat('gender', 2), SparseFeat('item', 3 + 1), SparseFeat('item_gender', 2 + 1), DenseFeat('score', 0) ] feature_columns += [ VarLenSparseFeat('hist_item', 3 + 1, maxlen=4, embedding_name='item'), VarLenSparseFeat('hist_item_gender', 3 + 1, maxlen=4, embedding_name='item_gender') ] behavior_feature_list = ["item", "item_gender"] uid = np.array([0, 1, 2]) ugender = np.array([0, 1, 0]) iid = np.array([1, 2, 3]) # 0 is mask value igender = np.array([1, 2, 1]) # 0 is mask value score = np.array([0.1, 0.2, 0.3]) hist_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]]) hist_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [2, 1, 0, 0]]) feature_dict = { 'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender, 'hist_item': hist_iid, 'hist_item_gender': hist_igender, 'score': score } feature_names = get_fixlen_feature_names(feature_columns) varlen_feature_names = get_varlen_feature_names(feature_columns) x = [feature_dict[name] for name in feature_names ] + [feature_dict[name] for name in varlen_feature_names] # x = [feature_dict[feat.name] for feat in feature_dim_dict["sparse"]] + [feature_dict[feat.name] for feat in # feature_dim_dict["dense"]] + [ # feature_dict['hist_' + feat] for feat in behavior_feature_list] y = [1, 0, 1] return x, y, feature_columns, behavior_feature_list
def get_xy_fd(use_neg=False, hash_flag=False): feature_columns = [SparseFeat('user', 3,hash_flag), SparseFeat('gender', 2,hash_flag), SparseFeat('item', 3+1,hash_flag), SparseFeat('item_gender', 2+1,hash_flag), DenseFeat('score', 1)] feature_columns += [VarLenSparseFeat('hist_item',3+1, maxlen=4, embedding_name='item'), VarLenSparseFeat('hist_item_gender',3+1, maxlen=4, embedding_name='item_gender')] behavior_feature_list = ["item","item_gender"] uid = np.array([0, 1, 2]) ugender = np.array([0, 1, 0]) iid = np.array([1, 2, 3])#0 is mask value igender = np.array([1, 2, 1])# 0 is mask value score = np.array([0.1, 0.2, 0.3]) hist_iid = np.array([[ 1, 2, 3,0], [ 1, 2, 3,0], [ 1, 2, 0,0]]) hist_igender = np.array([[1, 1, 2,0 ], [2, 1, 1, 0], [2, 1, 0, 0]]) behavior_length = np.array([3,3,2]) feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender, 'hist_item': hist_iid, 'hist_item_gender': hist_igender, 'score': score} #x = [feature_dict[feat.name] for feat in feature_dim_dict["sparse"]] + [feature_dict[feat.name] for feat in # feature_dim_dict["dense"]] + [ # feature_dict['hist_' + feat] for feat in behavior_feature_list] if use_neg: feature_dict['neg_hist_item'] = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]]) feature_dict['neg_hist_item_gender'] = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [2, 1, 0, 0]]) feature_columns += [VarLenSparseFeat('neg_hist_item',3+1, maxlen=4, embedding_name='item'), VarLenSparseFeat('neg_hist_item_gender',3+1, maxlen=4, embedding_name='item_gender')] #x += [feature_dict['neg_hist_'+feat] for feat in behavior_feature_list] feature_names = get_fixlen_feature_names(feature_columns) varlen_feature_names = get_varlen_feature_names(feature_columns) print(varlen_feature_names) x = [feature_dict[name] for name in feature_names] + [feature_dict[name] for name in varlen_feature_names] x += [behavior_length] y = [1, 0, 1] print(len(x)) return x, y, feature_columns, behavior_feature_list
def get_xy_fd(): feature_columns = [ SparseFeat('user', 3), SparseFeat('gender', 2), SparseFeat('item', 3 + 1), SparseFeat('item_gender', 2 + 1), DenseFeat('score', 1) ] feature_columns += [ VarLenSparseFeat('hist_item', 3 + 1, maxlen=4, embedding_name='item'), VarLenSparseFeat('hist_item_gender', 3 + 1, maxlen=4, embedding_name='item_gender') ] behavior_feature_list = ["item", "item_gender"] uid = np.array([0, 1, 2]) ugender = np.array([0, 1, 0]) iid = np.array([1, 2, 3]) # 0 is mask value igender = np.array([1, 2, 1]) # 0 is mask value score = np.array([0.1, 0.2, 0.3]) hist_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]]) hist_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [2, 1, 0, 0]]) feature_dict = { 'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender, 'hist_item': hist_iid, 'hist_item_gender': hist_igender, 'score': score } fixlen_feature_names = get_fixlen_feature_names(feature_columns) varlen_feature_names = get_varlen_feature_names(feature_columns) x = [feature_dict[name] for name in fixlen_feature_names ] + [feature_dict[name] for name in varlen_feature_names] y = [1, 0, 1] return x, y, feature_columns, behavior_feature_list
def get_train_instances(self, train): users, checkins, cand_venues, labels = [], [], [], [] for u in self.trainSeq: visited = self.trainSeq[u] checkin_ = [] for v in visited[:-1]: checkin_.append(v) checkins.extend(sequence.pad_sequences([checkin_[:]], maxlen=self.maxVenue)) # start from the second venue in user's checkin sequence. visited = visited[1:] for i in range(len(visited)): cand_venues.append(visited[i]) users.append(u) labels.append(1) j = np.random.randint(self.uNum) # check if j is in training dataset or in user's sequence at state i or not while (u, j) in train or j in visited[:i]: j = np.random.randint(self.uNum) cand_venues.append(j) users.append(u) labels.append(0) sess_number = np.ones(len(labels)) users = np.array(users) items = np.array(cand_venues) sess_item = np.array(checkins) labels = np.array(labels) feature_dict = {'user': users, 'item': items, 'score': labels, 'sess_0_item': sess_item} fixlen_feature_names = get_fixlen_feature_names(self.feature_columns) varlen_feature_names = get_varlen_feature_names(self.feature_columns) x = [feature_dict[name] for name in fixlen_feature_names] + [feature_dict[name] for name in varlen_feature_names] x += [sess_number] return x, labels
def get_xy_fd(hash_flag=False): feature_columns = [SparseFeat('user', 3, hash_flag), SparseFeat('gender', 2, hash_flag), SparseFeat('item', 3 + 1, hash_flag), SparseFeat('item_gender', 2 + 1, hash_flag), DenseFeat('score', 1)] feature_columns += [VarLenSparseFeat('sess_0_item',3+1,4,use_hash=hash_flag,embedding_name='item'),VarLenSparseFeat('sess_0_item_gender',2+1,4,use_hash=hash_flag,embedding_name='item_gender')] feature_columns += [VarLenSparseFeat('sess_1_item', 3 + 1, 4, use_hash=hash_flag, embedding_name='item'),VarLenSparseFeat('sess_1_item_gender', 2 + 1, 4, use_hash=hash_flag,embedding_name='item_gender')] behavior_feature_list = ["item", "item_gender"] uid = np.array([0, 1, 2]) ugender = np.array([0, 1, 0]) iid = np.array([1, 2, 3]) # 0 is mask value igender = np.array([1, 2, 1]) # 0 is mask value score = np.array([0.1, 0.2, 0.3]) sess1_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [0, 0, 0, 0]]) sess1_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [0, 0, 0, 0]]) sess2_iid = np.array([[1, 2, 3, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) sess2_igender = np.array([[1, 1, 2, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) sess_number = np.array([2, 1, 0]) feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender, 'sess_0_item': sess1_iid, 'sess_0_item_gender': sess1_igender, 'score': score, 'sess_1_item': sess2_iid, 'sess_1_item_gender': sess2_igender, } fixlen_feature_names = get_fixlen_feature_names(feature_columns) varlen_feature_names = get_varlen_feature_names(feature_columns) x = [feature_dict[name] for name in fixlen_feature_names] + [feature_dict[name] for name in varlen_feature_names] x += [sess_number] y = [1, 0, 1] return x, y, feature_columns, behavior_feature_list
import numpy as np from deepctr.models import DIN from deepctr.inputs import SparseFeat,VarLenSparseFeat,DenseFeat,get_fixlen_feature_names,get_varlen_feature_names feature_columns = [SparseFeat('user',3),SparseFeat( 'gender', 2), SparseFeat('item', 3 + 1), SparseFeat('item_gender', 2 + 1),DenseFeat('score', 1)] feature_columns += [VarLenSparseFeat('hist_item',3+1, maxlen=4, embedding_name='item'), VarLenSparseFeat('hist_item_gender',3+1, maxlen=4, embedding_name='item_gender')] behavior_feature_list = ["item", "item_gender"] uid = np.array([0, 1, 2]) ugender = np.array([0, 1, 0]) iid = np.array([1, 2, 3]) # 0 is mask value igender = np.array([1, 2, 1]) # 0 is mask value score = np.array([0.1, 0.2, 0.3]) hist_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]]) hist_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [2, 1, 0, 0]]) feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender, 'hist_item': hist_iid, 'hist_item_gender': hist_igender, 'score': score} fixlen_feature_names = get_fixlen_feature_names(feature_columns) varlen_feature_names = get_varlen_feature_names(feature_columns) x = [feature_dict[name] for name in fixlen_feature_names] + [feature_dict[name] for name in varlen_feature_names] y = [1, 0, 1] model = DIN(feature_columns, behavior_feature_list, hist_len_max=4, ) model.compile('adam', 'binary_crossentropy', metrics=['binary_crossentropy']) history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5)
genres_list = list(map(split, data['genres'].values)) genres_length = np.array(list(map(len, genres_list))) max_len = max(genres_length) # Notice : padding=`post` genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', ) # 2.count #unique features for each sparse field and generate feature config for sequence feature fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique()) for feat in sparse_features] varlen_feature_columns = [VarLenSparseFeat('genres', len( key2index) + 1, max_len, 'mean')] # Notice : value 0 is for padding for sequence input feature linear_feature_columns = fixlen_feature_columns + varlen_feature_columns dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns fixlen_feature_names = get_fixlen_feature_names(linear_feature_columns + dnn_feature_columns) varlen_feature_names = get_varlen_feature_names(linear_feature_columns+dnn_feature_columns) # 3.generate input data for model fixlen_input = [data[name].values for name in fixlen_feature_names] varlen_input = [genres_list]#varlen_feature_names[0] model_input = fixlen_input + varlen_input # make sure the order is right # 4.Define Model,compile and train model = DeepFM(linear_feature_columns,dnn_feature_columns,task='regression') model.compile("adam", "mse", metrics=['mse'], ) history = model.fit(model_input, data[target].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2, )