def main(): hidden_unit = 64 batch_size = 32 learning_rate = 0.001 epochs = 50 with open('../Din/dataset/dataset.pkl', 'rb') as f: train_set = np.array(pickle.load(f)) test_set = pickle.load(f) cate_list = pickle.load(f) user_count, item_count, cate_count, max_sl = pickle.load(f) train_user, train_item, train_hist, train_sl, train_y = input_data( train_set, max_sl) # Tensorboard current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") log_dir = 'logs/' + current_time tensorboard = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, write_graph=True, write_grads=False, write_images=True, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None, embeddings_data=None, update_freq=500) # model checkpoint check_path = 'save/wide_deep_weights.epoch_{epoch:04d}.val_loss_{val_loss:.4f}.ckpt' checkpoint = tf.keras.callbacks.ModelCheckpoint(check_path, save_weights_only=True, verbose=1, period=1) model = WideDeep(user_count, item_count, cate_count, cate_list, hidden_unit) model.summary() optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate) model.compile(loss=tf.keras.losses.binary_crossentropy, optimizer=optimizer, metrics=[tf.keras.metrics.AUC()]) model.fit([train_user, train_item, train_hist, train_sl], train_y, epochs=epochs, batch_size=batch_size, validation_split=0.1, callbacks=[tensorboard, checkpoint])
# ----------------- create dataset--------- feature_columns, train, test, val = create_criteo_dataset( file=file, read_part=read_part, sample_num=sample_num, embed_dim=embed_dim, test_size=test_size) train_X, train_y = train test_X, test_y = test val_X, val_y = val # ---------------build model---------- model = WideDeep(feature_columns, hidden_units=hidden_units, dnn_dropout=dnn_dropout, residual=True) # model.summary() # -------------model checkpoint --------- check_path = './save/deepfm_weight.epoch_{epoch:4d}.val_loss_{val_loss:.4f}.ckpt' checkpoint = tf.keras.callbacks.ModelCheckpoint(check_path, save_weights_only=True, verbose=1, period=5) # ------------ model evaluate ------------ METRICS = [ tf.keras.metrics.BinaryAccuracy(name='accuracy'), tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall'),
learning_rate = 0.001 batch_size = 256 epochs = 30 # ========================== Create dataset ======================= feature_columns, train, test = create_criteo_dataset(file=file, embed_dim=embed_dim, read_part=read_part, sample_num=sample_num, test_size=test_size) train_X, train_y = train test_X, test_y = test # ============================Build Model========================== model = WideDeep(feature_columns, hidden_units=hidden_units, dnn_dropout=dnn_dropout) model.summary() # ============================model checkpoint====================== # check_path = '../save/wide_deep_weights.epoch_{epoch:04d}.val_loss_{val_loss:.4f}.ckpt' # checkpoint = tf.keras.callbacks.ModelCheckpoint(check_path, save_weights_only=True, # verbose=1, period=5) # ============================Compile============================ model.compile(loss=binary_crossentropy, optimizer=Adam(learning_rate=learning_rate), metrics=[AUC()]) # ==============================Fit============================== model.fit( train_X, train_y, epochs=epochs,
import tensorflow as tf from tensorflow.keras import optimizers, losses from sklearn.metrics import accuracy_score if __name__ == '__main__': file_path = 'E:\\PycharmProjects\\推荐算法\\data\\train.txt' feature_columns, (X_train, y_train), (X_test, y_test) = create_criteo_dataset(file_path, test_size=0.2) hidden_units = [256, 128, 64] output_dim = 1 activation = 'relu' model = WideDeep(feature_columns, hidden_units, output_dim, activation) optimizer = optimizers.SGD(0.01) # train_dataset = tf.data.Dataset.from_tensor_slices(((X_train[:, :13], X_train[:, 13:]), y_train)) # train_dataset = train_dataset.batch(32).prefetch(tf.data.experimental.AUTOTUNE) # # model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) # model.fit(train_dataset, epochs=1000) # logloss, auc = model.evaluate((X_test[:, :13], X_test[:, 13:]), y_test) # print('logloss {}\nAUC {}'.format(round(logloss,2), round(auc,2))) # model.summary() summary_writer = tf.summary.create_file_writer( 'E:\\PycharmProjects\\tensorboard') for i in range(100): with tf.GradientTape() as tape:
@author: Administrator ''' from model import WideDeep import pandas as pd from sklearn.preprocessing import LabelEncoder, MinMaxScaler from keras.callbacks import EarlyStopping from tensorflow.python.framework.dtypes import int32 from keras import optimizers, losses import numpy as np import pickle from sklearn.metrics import log_loss, roc_auc_score import tensorflow as tf from builtins import int # from loss import auc model = WideDeep() def auc(y_true, y_pred): return tf.py_func(roc_auc_score, (y_true, y_pred), tf.double) #model.compile(optimizer='rmsprop', loss=losses.mse, metrics=["mse"],) model.compile(optimizer="adam", loss="binary_crossentropy", metrics=[auc, 'accuracy']) # # wide_features=pd.read_csv('data/path_matrix.txt', sep=' ', header=None) # deep_features=pd.read_csv('data/sns_dense.csv', sep=',', header=0) # # for index, row in wide_features.iterrows():
testloader = DataLoader(test_data, batch_size=args.batch_size, shuffle=False) deep_model_params = { 'deep_columns_idx': deep_columns_idx, 'embedding_columns_dict': embedding_columns_dict, 'hidden_size_list': args.hidden_size_list, 'dropouts': args.dropouts, 'deep_output_dim': args.deep_out_dim } wide_model_params = { 'wide_input_dim': data_wide.shape[1], 'wide_output_dim': args.wide_out_dim } widedeep = WideDeep(wide_model_params, deep_model_params) model = widedeep.to(device) optimizer = torch.optim.Adam(widedeep.parameters(), lr=args.lr) for epoch in range(args.epochs): model.train() for idx, (data_wide, data_deep, target) in enumerate(trainloader): data_wide, data_deep, target = data_wide.to(device), data_deep.to( device), target.to(device) x = (data_wide, data_deep) optimizer.zero_grad() out = model(x) loss = F.binary_cross_entropy(out, target.float()) print('epoch:{}, step:{}, loss:{:.10f}'.format(epoch, idx, loss)) loss.backward() optimizer.step()
mean = data[feat].mean() std = data[feat].std() data[feat] = (data[feat] - mean) / (std + 1e-12) # print(data.shape) # print(data.head()) train, valid = train_test_split(data, test_size=0.1, random_state=42) # print(train.shape) # (540000, 40) # print(valid.shape) # (60000, 40) train_dataset = TensorDataset( torch.LongTensor(train[sparse_features].values), torch.FloatTensor(train[dense_features].values), torch.FloatTensor(train['label'].values)) train_loader = DataLoader(dataset=train_dataset, batch_size=args.train_batch_size, shuffle=True) valid_dataset = TensorDataset( torch.LongTensor(valid[sparse_features].values), torch.FloatTensor(valid[dense_features].values), torch.FloatTensor(valid['label'].values)) valid_loader = DataLoader(dataset=valid_dataset, batch_size=args.eval_batch_size, shuffle=False) cat_fea_unique = [data[f].nunique() for f in sparse_features] model = WideDeep(cat_fea_unique, num_fea_size=len(dense_features)) train_model(model)
from data_process import read_data, feature_engine from config import set_args args = set_args() path = './data/adult.data' data = read_data(path) train_data, test_data, deep_columns_idx, embedding_columns_dict = feature_engine(data) data_wide = train_data[0] # 预测数据的输入格式,这里预测一条数据 t = (torch.from_numpy(train_data[0].values[0].reshape(-1, train_data[0].values.shape[1])), torch.from_numpy(train_data[1].values[0].reshape(-1, train_data[1].values.shape[1]))) # parameters setting deep_model_params = { 'deep_columns_idx': deep_columns_idx, 'embedding_columns_dict': embedding_columns_dict, 'hidden_size_list': args.hidden_size_list, 'dropouts': args.dropouts, 'deep_output_dim': args.deep_out_dim} wide_model_params = { 'wide_input_dim': data_wide.shape[1], 'wide_output_dim': args.wide_out_dim } model = WideDeep(wide_model_params, deep_model_params) # path 为存储模型参数的位置 path = 'wide_deep_model_0.pkl' model.load_state_dict(torch.load(path)) print('输出的结果:', int(model(t) > 0.5))
def main(): data = pd.read_csv(RATING_FILE_PATH_TRAIN) batch_size = 128 max_seq_len = 50 sparse_features = ['user_id', 'movie_id', 'gender', 'occupation', 'zip'] dense_features = ['age'] print(data.head(10)) feature_max_id = {} for feature in sparse_features: lbe = LabelEncoder() data[feature] = lbe.fit_transform(data[feature]) + 1 feature_max_id[feature] = data[feature].max() + 1 mms = MinMaxScaler(feature_range=(0, 1)) data[dense_features] = mms.fit_transform(data[dense_features]) # define features user_sparse_features = ["user_id", "gender", "occupation", "zip"] user_dense_features = ["age"] item_sparse_features = ["movie_id"] user_profile = data[user_sparse_features + user_dense_features].drop_duplicates('user_id') item_profile = data[item_sparse_features].drop_duplicates('movie_id') user_profile.set_index("user_id", drop=False, inplace=True) print("Generate train and test dataset...") train_set, test_set = generate_train_test_dataset(data) print("Generate train and test features...") train_dataloader = generate_feature(train_set, user_profile, item_profile, batch_size, max_seq_len) test_dataloader = generate_feature(test_set, user_profile, item_profile, batch_size, max_seq_len) print("Generate feature columns...") embedding_dim = 8 user_feature_columns = [SparseFeat(feat, feature_max_id[feat], embedding_dim) for i, feat in enumerate(user_sparse_features)] \ + [DenseFeat(feat, 1) for i, feat in enumerate(user_dense_features)] \ + [SeqSparseFeat(SparseFeat('user_hist', feature_max_id['movie_id'], embedding_dim, embedding_name='movie_id'), maxlen=max_seq_len,combiner='mean', length_name=None)] item_feature_columns = [ SparseFeat(feat, feature_max_id[feat], embedding_dim) for i, feat in enumerate(item_sparse_features) ] # define model model = WideDeep(feature_columns=user_feature_columns + item_feature_columns) loss_func = nn.BCELoss() optimizer = torch.optim.Adagrad(params=model.parameters(), lr=0.01) metric_func = auc metric_name = 'auc' epochs = 3 log_step_freq = 1000 print('start_training.........') nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') print('========' * 8 + '%s' % nowtime) for epoch in range(1, epochs + 1): model.train() loss_sum = 0.0 metric_sum = 0.0 step = 1 for step, (features, labels) in enumerate(train_dataloader, 1): optimizer.zero_grad() predictions = model(features) loss = loss_func(predictions, labels) metric = metric_func(predictions, labels) loss.backward() optimizer.step() loss_sum += loss.item() metric_sum += metric.item() if step % log_step_freq == 0: print(("[step=%d] loss: %.3f, " + metric_name + ": %.3f") % (step, loss_sum / step, metric_sum / step)) model.eval() val_loss_sum = 0.0 val_metric_sum = 0.0 for val_step, (features, labels) in enumerate(test_dataloader, 1): with torch.no_grad(): predictions = model(features) val_loss = loss_func(predictions, labels) val_metric = metric_func(predictions, labels) val_loss_sum += val_loss.item() val_metric_sum += val_metric.item() info = (epoch, loss_sum / step, metric_sum / step) print(("\nEPOCH=%d, val_loss=%.3f, " + "val_auc" + " = %.3f") % info) nowtime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") print('\n' + '==========' * 8 + '%s' % nowtime)
hidden_units = [256, 128, 64] learning_rate = 0.001 batch_size = 512 epochs = 5 # ========================== Create dataset ======================= feature_columns, train, test = create_criteo_dataset(file=file, embed_dim=embed_dim, read_part=read_part, sample_num=sample_num, test_size=test_size) train_X, train_y = train test_X, test_y = test # ============================Build Model========================== model = WideDeep(feature_columns, hidden_units=hidden_units, dnn_dropout=dnn_dropout) model.summary() # ============================model checkpoint====================== # check_path = '../save/wide_deep_weights.epoch_{epoch:04d}.val_loss_{val_loss:.4f}.ckpt' # checkpoint = tf.keras.callbacks.ModelCheckpoint(check_path, save_weights_only=True, # verbose=1, period=5) # ============================Compile============================ model.compile(loss=binary_crossentropy, optimizer=Adam(learning_rate=learning_rate), metrics=[AUC()]) # ==============================Fit============================== model.fit( train_X, train_y, epochs=epochs, # callbacks=[checkpoint], batch_size=batch_size,