Esempio n. 1
0
    def __init__(self, list_file, category_emb, field_size):
        result_dict = data_preprocess.read_criteo_data(list_file, category_emb)

        self.feature_size = result_dict['feature_sizes']
        Xi = result_dict['index']
        Xv = result_dict['value']
        y = result_dict['label']
        self.length = len(y)

        Xi = np.array(Xi).reshape((-1, field_size, 1))
        Xv = np.array(Xv)
        y = np.array(y)

        self.Xi = torch.LongTensor(Xi)
        self.Xv = torch.FloatTensor(Xv)
        self.Y = torch.FloatTensor(y)
        print('dataset size: ', self.Xi.shape, self.Xv.shape, self.Y.shape)
Esempio n. 2
0
# -*- coding:utf-8 -*-

from utils import data_preprocess
from model import DeepFM
import torch

result_dict = data_preprocess.read_criteo_data('./data/tiny_train_input.csv', './data/category_emb.csv')
test_dict = data_preprocess.read_criteo_data('./data/tiny_test_input.csv', './data/category_emb.csv')

with torch.cuda.device(2):
    deepfm = DeepFM.DeepFM(39,result_dict['feature_sizes'],verbose=True,use_cuda=True, weight_decay=0.0001,use_fm=True,use_ffm=False,use_deep=True).cuda()
    deepfm.fit(result_dict['index'], result_dict['value'], result_dict['label'],
               test_dict['index'], test_dict['value'], test_dict['label'],ealry_stopping=True,refit=True)
from models.models_online_deep.deepfm_adam import DeepFMAdam
from models.models_online_deep.deepfm_onn import DeepFMOnn
from models.models_online_deep.nfm_adam import NFMAdam
from models.models_online_deep.nfm_onn import NFMOnn
from models.models_online_deep.fm_adam import FMAdam

########################################################################################################################
# save path
########################################################################################################################
save_log = os.getcwd() + '/performance/save_log/'
save_model = os.getcwd() + '/performance/save_model/'

########################################################################################################################
# dataset setup
########################################################################################################################
train_dict = data_preprocess.read_criteo_data(
    'dataset/criteo/tiny_train_input.csv', 'dataset/criteo/category_emb.csv')
train_dict_size = train_dict['size']

num_batchdata = 2500
num_batch = 10
data_config = "Iteration"
# data_config = 3

if data_config == "Iteration":
    batch_train_Xi_list, batch_train_Xv_list, batch_train_Y_list, ratio_list \
        = data_preprocess.create_ten_iter('dataset/criteo/tiny_train_input.csv', 'dataset/criteo/category_emb.csv', num_batch, num_batchdata)

elif isinstance(data_config, int):
    batch_train_Xi_list, batch_train_Xv_list, batch_train_Y_list, ratio_list \
        = data_preprocess.create_dataset('dataset/criteo/tiny_train_input.csv', 'dataset/criteo/category_emb.csv', int(num_batch / data_config), num_batch, num_batchdata)
Esempio n. 4
0
        :param y: tensor of labels
        :return: metric of the evaluation
        """
        y_pred = self.inner_predict_proba(Xi, Xv)
        return self.eval_metric(y.cpu().data.numpy(), y_pred)


"""
    test part
"""
if __name__ == '__main__':
    import sys
    sys.path.append('../')
    from utils import data_preprocess

    result_dict = data_preprocess.read_criteo_data('../data/train.csv',
                                                   '../data/category_emb.csv')
    test_dict = data_preprocess.read_criteo_data('../data/test.csv',
                                                 '../data/category_emb.csv')
    with torch.cuda.device(0):
        dcn = DCN(39,
                  result_dict['feature_sizes'],
                  batch_size=128 * 32,
                  verbose=True,
                  use_cuda=True,
                  weight_decay=0.00002,
                  use_inner_product=True).cuda()
        dcn.fit(result_dict['index'],
                result_dict['value'],
                result_dict['label'],
                test_dict['index'],
                test_dict['value'],
Esempio n. 5
0
        """
        :param Xi: tensor of feature index
        :param Xv: tensor of feature value
        :param y: tensor of labels
        :return: metric of the evaluation
        """
        y_pred = self.inner_predict_proba(Xi, Xv)
        return self.eval_metric(y.cpu().data.numpy(), y_pred)

"""
    test part
"""
import sys
sys.path.append('../')
from utils import data_preprocess

result_dict = data_preprocess.read_criteo_data('../data/train.csv', '../data/category_emb.csv')
test_dict = data_preprocess.read_criteo_data('../data/test.csv', '../data/category_emb.csv')
with torch.cuda.device(0):
    din = DIN(39, result_dict['feature_sizes'], batch_size=128 * 64, is_shallow_dropout=False, verbose=True, use_cuda=True,
                      weight_decay=0.0000002, use_fm=True, use_ffm=False, use_high_interaction=True,interation_type=False).cuda()
    # din.fit(result_dict['index'], result_dict['value'], result_dict['label'],
    #         test_dict['index'], test_dict['value'], test_dict['label'], ealry_stopping=True, pre_train=True,
    #         n_epochs=32,refit=False,
    #         save_path='../data/model/din.pkl')
    din.load_state_dict(torch.load('../data/model/din.pkl'))
    din.fit(result_dict['index'], result_dict['value'], result_dict['label'],
            test_dict['index'], test_dict['value'], test_dict['label'], ealry_stopping=True, pre_train=False,
            n_epochs=64, refit=False,
            save_path='../data/model/din.pkl')
Esempio n. 6
0
import os
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import roc_auc_score
from time import time

import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

from utils import data_preprocess

rootDir = 'E:/conf_test/dnn_ctr/data'

result_dict = data_preprocess.read_criteo_data(
    rootDir + '/tiny_train_input.csv', rootDir + '/category_emb.csv')
#test_dict = data_preprocess.read_criteo_data(rootDir + '/tiny_test_input.csv', rootDir + '/category_emb.csv')

# print(result_dict['index'])
# print('**********************')
print(result_dict['value'][0])

index_list = result_dict['index']
print(len(index_list[0]))
Esempio n. 7
0
# -*- coding:utf-8 -*-

from utils import data_preprocess
from model import DeepFM
import torch

result_dict = data_preprocess.read_criteo_data('data/tiny_train_input.csv',
                                               'data/category_emb.csv')
test_dict = data_preprocess.read_criteo_data('data/tiny_test_input.csv',
                                             'data/category_emb.csv')

with torch.cuda.device(0):
    deepfm = DeepFM.DeepFM(39,
                           result_dict['feature_sizes'],
                           verbose=True,
                           use_cuda=True,
                           weight_decay=0.0001,
                           use_fm=True,
                           use_ffm=False,
                           use_deep=True).cuda()
    deepfm.fit(result_dict['index'],
               result_dict['value'],
               result_dict['label'],
               test_dict['index'],
               test_dict['value'],
               test_dict['label'],
               ealry_stopping=True,
               refit=True)

# no gpu
# deepfm = DeepFM.DeepFM(