Esempio n. 1
0
def main():
    hidden_unit = 64
    batch_size = 32
    learning_rate = 0.001
    epochs = 50
    with open('../Din/dataset/dataset.pkl', 'rb') as f:
        train_set = np.array(pickle.load(f))
        test_set = pickle.load(f)
        cate_list = pickle.load(f)
        user_count, item_count, cate_count, max_sl = pickle.load(f)
    train_user, train_item, train_hist, train_sl, train_y = input_data(
        train_set, max_sl)
    # Tensorboard
    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    log_dir = 'logs/' + current_time
    tensorboard = tf.keras.callbacks.TensorBoard(log_dir=log_dir,
                                                 histogram_freq=1,
                                                 write_graph=True,
                                                 write_grads=False,
                                                 write_images=True,
                                                 embeddings_freq=0,
                                                 embeddings_layer_names=None,
                                                 embeddings_metadata=None,
                                                 embeddings_data=None,
                                                 update_freq=500)
    # model checkpoint
    check_path = 'save/wide_deep_weights.epoch_{epoch:04d}.val_loss_{val_loss:.4f}.ckpt'
    checkpoint = tf.keras.callbacks.ModelCheckpoint(check_path,
                                                    save_weights_only=True,
                                                    verbose=1,
                                                    period=1)

    model = WideDeep(user_count, item_count, cate_count, cate_list,
                     hidden_unit)
    model.summary()
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(loss=tf.keras.losses.binary_crossentropy,
                  optimizer=optimizer,
                  metrics=[tf.keras.metrics.AUC()])
    model.fit([train_user, train_item, train_hist, train_sl],
              train_y,
              epochs=epochs,
              batch_size=batch_size,
              validation_split=0.1,
              callbacks=[tensorboard, checkpoint])
Esempio n. 2
0
# -----------------  create dataset---------
feature_columns, train, test, val = create_criteo_dataset(
    file=file,
    read_part=read_part,
    sample_num=sample_num,
    embed_dim=embed_dim,
    test_size=test_size)

train_X, train_y = train
test_X, test_y = test
val_X, val_y = val

# ---------------build model----------
model = WideDeep(feature_columns,
                 hidden_units=hidden_units,
                 dnn_dropout=dnn_dropout,
                 residual=True)  #
model.summary()

# -------------model checkpoint ---------
check_path = './save/deepfm_weight.epoch_{epoch:4d}.val_loss_{val_loss:.4f}.ckpt'
checkpoint = tf.keras.callbacks.ModelCheckpoint(check_path,
                                                save_weights_only=True,
                                                verbose=1,
                                                period=5)

# ------------ model evaluate ------------
METRICS = [
    tf.keras.metrics.BinaryAccuracy(name='accuracy'),
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Recall(name='recall'),
Esempio n. 3
0
    learning_rate = 0.001
    batch_size = 256
    epochs = 30

    # ========================== Create dataset =======================
    feature_columns, train, test = create_criteo_dataset(file=file,
                                                         embed_dim=embed_dim,
                                                         read_part=read_part,
                                                         sample_num=sample_num,
                                                         test_size=test_size)
    train_X, train_y = train
    test_X, test_y = test
    # ============================Build Model==========================
    model = WideDeep(feature_columns,
                     hidden_units=hidden_units,
                     dnn_dropout=dnn_dropout)
    model.summary()
    # ============================model checkpoint======================
    # check_path = '../save/wide_deep_weights.epoch_{epoch:04d}.val_loss_{val_loss:.4f}.ckpt'
    # checkpoint = tf.keras.callbacks.ModelCheckpoint(check_path, save_weights_only=True,
    #                                                 verbose=1, period=5)
    # ============================Compile============================
    model.compile(loss=binary_crossentropy,
                  optimizer=Adam(learning_rate=learning_rate),
                  metrics=[AUC()])
    # ==============================Fit==============================
    model.fit(
        train_X,
        train_y,
        epochs=epochs,
Esempio n. 4
0
import tensorflow as tf
from tensorflow.keras import optimizers, losses
from sklearn.metrics import accuracy_score

if __name__ == '__main__':
    file_path = 'E:\\PycharmProjects\\推荐算法\\data\\train.txt'
    feature_columns, (X_train,
                      y_train), (X_test,
                                 y_test) = create_criteo_dataset(file_path,
                                                                 test_size=0.2)

    hidden_units = [256, 128, 64]
    output_dim = 1
    activation = 'relu'

    model = WideDeep(feature_columns, hidden_units, output_dim, activation)
    optimizer = optimizers.SGD(0.01)

    # train_dataset = tf.data.Dataset.from_tensor_slices(((X_train[:, :13], X_train[:, 13:]), y_train))
    # train_dataset = train_dataset.batch(32).prefetch(tf.data.experimental.AUTOTUNE)
    #
    # model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
    # model.fit(train_dataset, epochs=1000)
    # logloss, auc = model.evaluate((X_test[:, :13], X_test[:, 13:]), y_test)
    # print('logloss {}\nAUC {}'.format(round(logloss,2), round(auc,2)))
    # model.summary()

    summary_writer = tf.summary.create_file_writer(
        'E:\\PycharmProjects\\tensorboard')
    for i in range(100):
        with tf.GradientTape() as tape:
Esempio n. 5
0
@author: Administrator
'''
from model import WideDeep
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from keras.callbacks import EarlyStopping
from tensorflow.python.framework.dtypes import int32
from keras import optimizers, losses
import numpy as np
import pickle
from sklearn.metrics import log_loss, roc_auc_score
import tensorflow as tf
from builtins import int
# from loss import auc

model = WideDeep()


def auc(y_true, y_pred):
    return tf.py_func(roc_auc_score, (y_true, y_pred), tf.double)


#model.compile(optimizer='rmsprop', loss=losses.mse, metrics=["mse"],)
model.compile(optimizer="adam",
              loss="binary_crossentropy",
              metrics=[auc, 'accuracy'])
#
# wide_features=pd.read_csv('data/path_matrix.txt', sep='  ', header=None)
# deep_features=pd.read_csv('data/sns_dense.csv', sep=',', header=0)
#
# for index, row in wide_features.iterrows():
Esempio n. 6
0
    testloader = DataLoader(test_data,
                            batch_size=args.batch_size,
                            shuffle=False)

    deep_model_params = {
        'deep_columns_idx': deep_columns_idx,
        'embedding_columns_dict': embedding_columns_dict,
        'hidden_size_list': args.hidden_size_list,
        'dropouts': args.dropouts,
        'deep_output_dim': args.deep_out_dim
    }
    wide_model_params = {
        'wide_input_dim': data_wide.shape[1],
        'wide_output_dim': args.wide_out_dim
    }
    widedeep = WideDeep(wide_model_params, deep_model_params)
    model = widedeep.to(device)
    optimizer = torch.optim.Adam(widedeep.parameters(), lr=args.lr)

    for epoch in range(args.epochs):
        model.train()
        for idx, (data_wide, data_deep, target) in enumerate(trainloader):
            data_wide, data_deep, target = data_wide.to(device), data_deep.to(
                device), target.to(device)
            x = (data_wide, data_deep)
            optimizer.zero_grad()
            out = model(x)
            loss = F.binary_cross_entropy(out, target.float())
            print('epoch:{}, step:{}, loss:{:.10f}'.format(epoch, idx, loss))
            loss.backward()
            optimizer.step()
Esempio n. 7
0
        mean = data[feat].mean()
        std = data[feat].std()
        data[feat] = (data[feat] - mean) / (std + 1e-12)
    # print(data.shape)
    # print(data.head())

    train, valid = train_test_split(data, test_size=0.1, random_state=42)
    # print(train.shape)   # (540000, 40)
    # print(valid.shape)   # (60000, 40)
    train_dataset = TensorDataset(
        torch.LongTensor(train[sparse_features].values),
        torch.FloatTensor(train[dense_features].values),
        torch.FloatTensor(train['label'].values))
    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=args.train_batch_size,
                              shuffle=True)

    valid_dataset = TensorDataset(
        torch.LongTensor(valid[sparse_features].values),
        torch.FloatTensor(valid[dense_features].values),
        torch.FloatTensor(valid['label'].values))
    valid_loader = DataLoader(dataset=valid_dataset,
                              batch_size=args.eval_batch_size,
                              shuffle=False)

    cat_fea_unique = [data[f].nunique() for f in sparse_features]

    model = WideDeep(cat_fea_unique, num_fea_size=len(dense_features))

    train_model(model)
Esempio n. 8
0
from data_process import read_data, feature_engine
from config import set_args

args = set_args()
path = './data/adult.data'
data = read_data(path)
train_data, test_data, deep_columns_idx, embedding_columns_dict = feature_engine(data)
data_wide = train_data[0]

# 预测数据的输入格式,这里预测一条数据
t = (torch.from_numpy(train_data[0].values[0].reshape(-1, train_data[0].values.shape[1])),
     torch.from_numpy(train_data[1].values[0].reshape(-1, train_data[1].values.shape[1])))

# parameters setting
deep_model_params = {
    'deep_columns_idx': deep_columns_idx,
    'embedding_columns_dict': embedding_columns_dict,
    'hidden_size_list': args.hidden_size_list,
    'dropouts': args.dropouts,
    'deep_output_dim': args.deep_out_dim}
wide_model_params = {
    'wide_input_dim': data_wide.shape[1],
    'wide_output_dim': args.wide_out_dim
}
model = WideDeep(wide_model_params, deep_model_params)
# path 为存储模型参数的位置
path = 'wide_deep_model_0.pkl'

model.load_state_dict(torch.load(path))
print('输出的结果:', int(model(t) > 0.5))
Esempio n. 9
0
def main():
    data = pd.read_csv(RATING_FILE_PATH_TRAIN)
    batch_size = 128
    max_seq_len = 50
    sparse_features = ['user_id', 'movie_id', 'gender', 'occupation', 'zip']
    dense_features = ['age']
    print(data.head(10))

    feature_max_id = {}
    for feature in sparse_features:
        lbe = LabelEncoder()
        data[feature] = lbe.fit_transform(data[feature]) + 1
        feature_max_id[feature] = data[feature].max() + 1

    mms = MinMaxScaler(feature_range=(0, 1))
    data[dense_features] = mms.fit_transform(data[dense_features])

    # define features
    user_sparse_features = ["user_id", "gender", "occupation", "zip"]
    user_dense_features = ["age"]

    item_sparse_features = ["movie_id"]

    user_profile = data[user_sparse_features +
                        user_dense_features].drop_duplicates('user_id')
    item_profile = data[item_sparse_features].drop_duplicates('movie_id')
    user_profile.set_index("user_id", drop=False, inplace=True)

    print("Generate train and test dataset...")
    train_set, test_set = generate_train_test_dataset(data)

    print("Generate train and test features...")
    train_dataloader = generate_feature(train_set, user_profile, item_profile,
                                        batch_size, max_seq_len)
    test_dataloader = generate_feature(test_set, user_profile, item_profile,
                                       batch_size, max_seq_len)

    print("Generate feature columns...")
    embedding_dim = 8
    user_feature_columns = [SparseFeat(feat, feature_max_id[feat], embedding_dim) for i, feat in enumerate(user_sparse_features)] \
        + [DenseFeat(feat, 1) for i, feat in enumerate(user_dense_features)] \
        + [SeqSparseFeat(SparseFeat('user_hist', feature_max_id['movie_id'], embedding_dim, embedding_name='movie_id'),
                         maxlen=max_seq_len,combiner='mean', length_name=None)]

    item_feature_columns = [
        SparseFeat(feat, feature_max_id[feat], embedding_dim)
        for i, feat in enumerate(item_sparse_features)
    ]

    # define model
    model = WideDeep(feature_columns=user_feature_columns +
                     item_feature_columns)

    loss_func = nn.BCELoss()
    optimizer = torch.optim.Adagrad(params=model.parameters(), lr=0.01)
    metric_func = auc
    metric_name = 'auc'
    epochs = 3
    log_step_freq = 1000

    print('start_training.........')
    nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    print('========' * 8 + '%s' % nowtime)

    for epoch in range(1, epochs + 1):
        model.train()
        loss_sum = 0.0
        metric_sum = 0.0
        step = 1

        for step, (features, labels) in enumerate(train_dataloader, 1):
            optimizer.zero_grad()

            predictions = model(features)
            loss = loss_func(predictions, labels)
            metric = metric_func(predictions, labels)

            loss.backward()
            optimizer.step()

            loss_sum += loss.item()
            metric_sum += metric.item()
            if step % log_step_freq == 0:
                print(("[step=%d] loss: %.3f, " + metric_name + ": %.3f") %
                      (step, loss_sum / step, metric_sum / step))

        model.eval()
        val_loss_sum = 0.0
        val_metric_sum = 0.0

        for val_step, (features, labels) in enumerate(test_dataloader, 1):
            with torch.no_grad():
                predictions = model(features)
                val_loss = loss_func(predictions, labels)
                val_metric = metric_func(predictions, labels)
            val_loss_sum += val_loss.item()
            val_metric_sum += val_metric.item()

        info = (epoch, loss_sum / step, metric_sum / step)
        print(("\nEPOCH=%d, val_loss=%.3f, " + "val_auc" + " = %.3f") % info)
        nowtime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        print('\n' + '==========' * 8 + '%s' % nowtime)
    hidden_units = [256, 128, 64]

    learning_rate = 0.001
    batch_size = 512
    epochs = 5

    # ========================== Create dataset =======================
    feature_columns, train, test = create_criteo_dataset(file=file,
                                                         embed_dim=embed_dim,
                                                         read_part=read_part,
                                                         sample_num=sample_num,
                                                         test_size=test_size)
    train_X, train_y = train
    test_X, test_y = test
    # ============================Build Model==========================
    model = WideDeep(feature_columns, hidden_units=hidden_units, dnn_dropout=dnn_dropout)
    model.summary()
    # ============================model checkpoint======================
    # check_path = '../save/wide_deep_weights.epoch_{epoch:04d}.val_loss_{val_loss:.4f}.ckpt'
    # checkpoint = tf.keras.callbacks.ModelCheckpoint(check_path, save_weights_only=True,
    #                                                 verbose=1, period=5)
    # ============================Compile============================
    model.compile(loss=binary_crossentropy, optimizer=Adam(learning_rate=learning_rate),
                  metrics=[AUC()])
    # ==============================Fit==============================
    model.fit(
        train_X,
        train_y,
        epochs=epochs,
        # callbacks=[checkpoint],
        batch_size=batch_size,