Esempio n. 1
0
def get_xy_fd(hash_flag=False):
    # feature_dim_dict = {"sparse": [SingleFeat('user', 3, hash_flag), SingleFeat(
    #     'gender', 2, hash_flag), SingleFeat('item', 3 + 1, hash_flag), SingleFeat('item_gender', 2 + 1, hash_flag)],
    #                     "dense": [SingleFeat('score', 0)]}

    feature_columns = [
        SparseFeat('user', 3),
        SparseFeat('gender', 2),
        SparseFeat('item', 3 + 1),
        SparseFeat('item_gender', 2 + 1),
        DenseFeat('score', 0)
    ]
    feature_columns += [
        VarLenSparseFeat('hist_item', 3 + 1, maxlen=4, embedding_name='item'),
        VarLenSparseFeat('hist_item_gender',
                         3 + 1,
                         maxlen=4,
                         embedding_name='item_gender')
    ]

    behavior_feature_list = ["item", "item_gender"]
    uid = np.array([0, 1, 2])
    ugender = np.array([0, 1, 0])
    iid = np.array([1, 2, 3])  # 0 is mask value
    igender = np.array([1, 2, 1])  # 0 is mask value
    score = np.array([0.1, 0.2, 0.3])

    hist_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]])
    hist_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [2, 1, 0, 0]])

    feature_dict = {
        'user': uid,
        'gender': ugender,
        'item': iid,
        'item_gender': igender,
        'hist_item': hist_iid,
        'hist_item_gender': hist_igender,
        'score': score
    }

    feature_names = get_fixlen_feature_names(feature_columns)
    varlen_feature_names = get_varlen_feature_names(feature_columns)
    x = [feature_dict[name] for name in feature_names
         ] + [feature_dict[name] for name in varlen_feature_names]

    # x = [feature_dict[feat.name] for feat in feature_dim_dict["sparse"]] + [feature_dict[feat.name] for feat in
    #                                                                         feature_dim_dict["dense"]] + [
    #         feature_dict['hist_' + feat] for feat in behavior_feature_list]

    y = [1, 0, 1]
    return x, y, feature_columns, behavior_feature_list
Esempio n. 2
0
def get_xy_fd(use_neg=False, hash_flag=False):

    feature_columns = [SparseFeat('user', 3,hash_flag),
                       SparseFeat('gender', 2,hash_flag),
                       SparseFeat('item', 3+1,hash_flag),
                       SparseFeat('item_gender', 2+1,hash_flag),
                       DenseFeat('score', 1)]

    feature_columns += [VarLenSparseFeat('hist_item',3+1, maxlen=4, embedding_name='item'),
                        VarLenSparseFeat('hist_item_gender',3+1, maxlen=4, embedding_name='item_gender')]

    behavior_feature_list = ["item","item_gender"]
    uid = np.array([0, 1, 2])
    ugender = np.array([0, 1, 0])
    iid = np.array([1, 2, 3])#0 is mask value
    igender = np.array([1, 2, 1])# 0 is mask value
    score = np.array([0.1, 0.2, 0.3])

    hist_iid = np.array([[ 1, 2, 3,0], [ 1, 2, 3,0], [ 1, 2, 0,0]])
    hist_igender = np.array([[1, 1, 2,0 ], [2, 1, 1, 0], [2, 1, 0, 0]])

    behavior_length = np.array([3,3,2])

    feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender,
                    'hist_item': hist_iid, 'hist_item_gender': hist_igender,
                    'score': score}

    #x = [feature_dict[feat.name] for feat in feature_dim_dict["sparse"]] + [feature_dict[feat.name] for feat in
    #                                                                        feature_dim_dict["dense"]] + [
    #        feature_dict['hist_' + feat] for feat in behavior_feature_list]


    if use_neg:
        feature_dict['neg_hist_item'] = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]])
        feature_dict['neg_hist_item_gender'] = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [2, 1, 0, 0]])
        feature_columns += [VarLenSparseFeat('neg_hist_item',3+1, maxlen=4, embedding_name='item'),
                        VarLenSparseFeat('neg_hist_item_gender',3+1, maxlen=4, embedding_name='item_gender')]
        #x += [feature_dict['neg_hist_'+feat] for feat in behavior_feature_list]


    feature_names = get_fixlen_feature_names(feature_columns)
    varlen_feature_names = get_varlen_feature_names(feature_columns)
    print(varlen_feature_names)
    x = [feature_dict[name] for name in feature_names] + [feature_dict[name] for name in varlen_feature_names]

    x += [behavior_length]
    y = [1, 0, 1]
    print(len(x))
    return x, y, feature_columns, behavior_feature_list
Esempio n. 3
0
def get_xy_fd():

    feature_columns = [
        SparseFeat('user', 3),
        SparseFeat('gender', 2),
        SparseFeat('item', 3 + 1),
        SparseFeat('item_gender', 2 + 1),
        DenseFeat('score', 1)
    ]
    feature_columns += [
        VarLenSparseFeat('hist_item', 3 + 1, maxlen=4, embedding_name='item'),
        VarLenSparseFeat('hist_item_gender',
                         3 + 1,
                         maxlen=4,
                         embedding_name='item_gender')
    ]

    behavior_feature_list = ["item", "item_gender"]
    uid = np.array([0, 1, 2])
    ugender = np.array([0, 1, 0])
    iid = np.array([1, 2, 3])  # 0 is mask value
    igender = np.array([1, 2, 1])  # 0 is mask value
    score = np.array([0.1, 0.2, 0.3])

    hist_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]])
    hist_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [2, 1, 0, 0]])

    feature_dict = {
        'user': uid,
        'gender': ugender,
        'item': iid,
        'item_gender': igender,
        'hist_item': hist_iid,
        'hist_item_gender': hist_igender,
        'score': score
    }

    fixlen_feature_names = get_fixlen_feature_names(feature_columns)
    varlen_feature_names = get_varlen_feature_names(feature_columns)
    x = [feature_dict[name] for name in fixlen_feature_names
         ] + [feature_dict[name] for name in varlen_feature_names]

    y = [1, 0, 1]
    return x, y, feature_columns, behavior_feature_list
    def get_train_instances(self, train):
        users, checkins, cand_venues, labels = [], [], [], []

        for u in self.trainSeq:
            visited = self.trainSeq[u]
            checkin_ = []
            for v in visited[:-1]:
                checkin_.append(v)
                checkins.extend(sequence.pad_sequences([checkin_[:]], maxlen=self.maxVenue))

            # start from the second venue in user's checkin sequence.
            visited = visited[1:]
            for i in range(len(visited)):
                cand_venues.append(visited[i])
                users.append(u)
                labels.append(1)
                j = np.random.randint(self.uNum)
                # check if j is in training dataset or in user's sequence at state i or not
                while (u, j) in train or j in visited[:i]:
                    j = np.random.randint(self.uNum)

                cand_venues.append(j)
                users.append(u)
                labels.append(0)

        sess_number = np.ones(len(labels))

        users = np.array(users)
        items = np.array(cand_venues)
        sess_item = np.array(checkins)
        labels = np.array(labels)

        feature_dict = {'user': users, 'item': items, 'score': labels, 'sess_0_item': sess_item}

        fixlen_feature_names = get_fixlen_feature_names(self.feature_columns)
        varlen_feature_names = get_varlen_feature_names(self.feature_columns)
        x = [feature_dict[name] for name in fixlen_feature_names] + [feature_dict[name] for name in
                                                                     varlen_feature_names]
        x += [sess_number]

        return x, labels
Esempio n. 5
0
def get_xy_fd(hash_flag=False):

    feature_columns = [SparseFeat('user', 3, hash_flag),
                       SparseFeat('gender', 2, hash_flag),
                       SparseFeat('item', 3 + 1, hash_flag),
                       SparseFeat('item_gender', 2 + 1, hash_flag),
                       DenseFeat('score', 1)]
    feature_columns += [VarLenSparseFeat('sess_0_item',3+1,4,use_hash=hash_flag,embedding_name='item'),VarLenSparseFeat('sess_0_item_gender',2+1,4,use_hash=hash_flag,embedding_name='item_gender')]
    feature_columns += [VarLenSparseFeat('sess_1_item', 3 + 1, 4, use_hash=hash_flag, embedding_name='item'),VarLenSparseFeat('sess_1_item_gender', 2 + 1, 4, use_hash=hash_flag,embedding_name='item_gender')]

    behavior_feature_list = ["item", "item_gender"]
    uid = np.array([0, 1, 2])
    ugender = np.array([0, 1, 0])
    iid = np.array([1, 2, 3])  # 0 is mask value
    igender = np.array([1, 2, 1])  # 0 is mask value
    score = np.array([0.1, 0.2, 0.3])

    sess1_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [0, 0, 0, 0]])
    sess1_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [0, 0, 0, 0]])

    sess2_iid = np.array([[1, 2, 3, 0], [0, 0, 0, 0], [0, 0, 0, 0]])
    sess2_igender = np.array([[1, 1, 2, 0], [0, 0, 0, 0], [0, 0, 0, 0]])

    sess_number = np.array([2, 1, 0])

    feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender,
                    'sess_0_item': sess1_iid, 'sess_0_item_gender': sess1_igender, 'score': score,
                    'sess_1_item': sess2_iid, 'sess_1_item_gender': sess2_igender, }

    fixlen_feature_names = get_fixlen_feature_names(feature_columns)
    varlen_feature_names = get_varlen_feature_names(feature_columns)
    x = [feature_dict[name] for name in fixlen_feature_names] + [feature_dict[name] for name in varlen_feature_names]


    x += [sess_number]

    y = [1, 0, 1]
    return x, y, feature_columns, behavior_feature_list
Esempio n. 6
0
import numpy as np
from deepctr.models import DIN
from deepctr.inputs import SparseFeat,VarLenSparseFeat,DenseFeat,get_fixlen_feature_names,get_varlen_feature_names

feature_columns = [SparseFeat('user',3),SparseFeat(
    'gender', 2), SparseFeat('item', 3 + 1), SparseFeat('item_gender', 2 + 1),DenseFeat('score', 1)]
feature_columns += [VarLenSparseFeat('hist_item',3+1, maxlen=4, embedding_name='item'),
                    VarLenSparseFeat('hist_item_gender',3+1, maxlen=4, embedding_name='item_gender')]
behavior_feature_list = ["item", "item_gender"]
uid = np.array([0, 1, 2])
ugender = np.array([0, 1, 0])
iid = np.array([1, 2, 3])  # 0 is mask value
igender = np.array([1, 2, 1])  # 0 is mask value
score = np.array([0.1, 0.2, 0.3])

hist_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]])
hist_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [2, 1, 0, 0]])
feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender,
                'hist_item': hist_iid, 'hist_item_gender': hist_igender, 'score': score}

fixlen_feature_names = get_fixlen_feature_names(feature_columns)
varlen_feature_names = get_varlen_feature_names(feature_columns)
x = [feature_dict[name] for name in fixlen_feature_names] + [feature_dict[name] for name in varlen_feature_names]

y = [1, 0, 1]

model = DIN(feature_columns, behavior_feature_list, hist_len_max=4, )
model.compile('adam', 'binary_crossentropy',
              metrics=['binary_crossentropy'])
history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5)
Esempio n. 7
0
genres_list = list(map(split, data['genres'].values))
genres_length = np.array(list(map(len, genres_list)))
max_len = max(genres_length)
# Notice : padding=`post`
genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', )

# 2.count #unique features for each sparse field and generate feature config for sequence feature

fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique())
                    for feat in sparse_features]
varlen_feature_columns = [VarLenSparseFeat('genres', len(
    key2index) + 1, max_len, 'mean')]  # Notice : value 0 is for padding for sequence input feature

linear_feature_columns = fixlen_feature_columns + varlen_feature_columns
dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns
fixlen_feature_names = get_fixlen_feature_names(linear_feature_columns + dnn_feature_columns)
varlen_feature_names = get_varlen_feature_names(linear_feature_columns+dnn_feature_columns)


# 3.generate input data for model
fixlen_input = [data[name].values for name in fixlen_feature_names]
varlen_input = [genres_list]#varlen_feature_names[0]
model_input = fixlen_input + varlen_input # make sure the order is right

# 4.Define Model,compile and train
model = DeepFM(linear_feature_columns,dnn_feature_columns,task='regression')

model.compile("adam", "mse", metrics=['mse'], )
history = model.fit(model_input, data[target].values,
                    batch_size=256, epochs=10, verbose=2, validation_split=0.2, )