def test_WDL(): name = "WDL" sample_size = 64 feature_dim_dict = { 'sparse': { 'sparse_1': 2, 'sparse_2': 5, 'sparse_3': 10 }, 'dense': ['dense_1', 'dense_2', 'dense_3'] } sparse_input = [ np.random.randint(0, dim, sample_size) for dim in feature_dim_dict['sparse'].values() ] dense_input = [ np.random.random(sample_size) for name in feature_dim_dict['dense'] ] y = np.random.randint(0, 2, sample_size) x = sparse_input + dense_input model = WDL(feature_dim_dict, feature_dim_dict, hidden_size=[32, 32], keep_prob=0.5) model.compile('adam', 'binary_crossentropy', metrics=['binary_crossentropy']) model.fit(x + x, y, batch_size=100, epochs=1, validation_split=0.5) print(name + " test train valid pass!") model.save_weights(name + '_weights.h5') model.load_weights(name + '_weights.h5') print(name + "test save load weight pass!") save_model(model, name + '.h5') model = load_model(name + '.h5', custom_objects) print(name + "test save load model pass!") print(name + " test pass!")
def find_l2_reg_embedding(linear_feature_columns, dnn_feature_columns, train_model_input, train, test_model_input, test): cols = ['l2_reg_embedding', 'RMSE', 'MAE', 'MSE', 'AUC'] df_result = pd.DataFrame(columns=cols, index=range( len(config.param_rand['l2_reg_embedding']))) for i, x in enumerate(config.param_rand['l2_reg_embedding']): ##Add dnn_hidden_units as b later model = WDL( linear_feature_columns, dnn_feature_columns, # ADD LATER dnn_hidden_units=(2, 2), l2_reg_linear=0.1, l2_reg_embedding=x, l2_reg_dnn=0, init_std=0.0001, seed=1024, task='binary') model.compile("adam", "mse", metrics=['mse']) history = model.fit( train_model_input, train[target].values, batch_size=256, epochs=config.model_epoch['epoch'], verbose=2, validation_split=0.2, ) pred_ans = model.predict(test_model_input, batch_size=256) auc = roc_auc_score(test[target].values, pred_ans) df_result.loc[i].l2_reg_embedding = x df_result.loc[i].RMSE = np.round( math.sqrt(mean_squared_error(test[target].values, pred_ans)), 3) df_result.loc[i].MAE = np.round( mean_absolute_error(test[target].values, pred_ans), 3) df_result.loc[i].MSE = np.round( mean_squared_error(test[target].values, pred_ans), 3) df_result.loc[i].AUC = np.round(auc, 3) return df_result
def widendeep_model(linear_feature_columns, dnn_feature_columns, train_model_input, train, test_model_input, test): cols = ['model', 'RMSE', 'MAE', 'MSE', 'AUC', 'score'] df_result = pd.DataFrame(columns=cols, index=range(1)) model = WDL( linear_feature_columns, dnn_feature_columns, dnn_hidden_units=config.widendeep_att["dnn_hidden_units"], #l2_reg_linear=config.widendeep_att["l2_reg_linear"], # l2_reg_embedding=config.widendeep_att["l2_reg_embedding"], #l2_reg_dnn=config.widendeep_att["l2_reg_dnn"], # init_std=config.widendeep_att["init_std"], dnn_dropout=config.widendeep_att['dnn_dropout'], dnn_activation=config.widendeep_att['dnn_activation'], seed=config.widendeep_att["seed"], task=config.widendeep_att["task"]) model.compile("adam", "mse", metrics=['mse']) history = model.fit(train_model_input, train[target].values, batch_size=256, epochs=config.model_epoch['epoch'], verbose=2, validation_split=0.2) pred_ans = model.predict(test_model_input, batch_size=256) save_model(model, 'saved_widendeep.h5') # save_model auc = roc_auc_score(test[target].values, pred_ans) df_result.loc[0].model = "Wide and Deep" df_result.loc[0].RMSE = np.round( math.sqrt(mean_squared_error(test[target].values, pred_ans)), 3) df_result.loc[0].MAE = np.round( mean_absolute_error(test[target].values, pred_ans), 3) df_result.loc[0].MSE = np.round( mean_squared_error(test[target].values, pred_ans), 3) df_result.loc[0].AUC = np.round(auc, 3) return df_result
target = ['rating'] # 对特征标签进行编码 for feature in sparse_features: lbe = LabelEncoder() data[feature] = lbe.fit_transform(data[feature]) # 计算每个特征中的 不同特征值的个数 fixlen_feature_columns = [SparseFeat(feature, data[feature].nunique()) for feature in sparse_features] linear_feature_columns = fixlen_feature_columns dnn_feature_columns = fixlen_feature_columns feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) # 将数据集切分成训练集和测试集 train, test = train_test_split(data, test_size=0.2) train_model_input = {name:train[name].values for name in feature_names} test_model_input = {name:test[name].values for name in feature_names} # 使用WDL进行训练 model = WDL(linear_feature_columns, dnn_feature_columns, task='regression') model.compile("adam", "mse", metrics=['mse'], ) history = model.fit(train_model_input, train[target].values, batch_size=256, epochs=100, verbose=True, validation_split=0.2, ) plt.figure() x = range(len(history.history['loss'])) plt.plot(x, history.history['loss']) plt.title('loss') # 使用WDL进行预测 pred_ans = model.predict(test_model_input, batch_size=256) # 输出RMSE或MSE mse = round(mean_squared_error(test[target].values, pred_ans), 4) rmse = mse ** 0.5 print("test RMSE", rmse)
model.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy']) filepath = 'model_save/wdl_sample-ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5' checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min') history = model.fit( model_input, data[target].values, callbacks=[checkpoint], batch_size=batch_size, epochs=50, verbose=1, validation_split=0.2, ) elif mode == 'test': model = WDL({ "sparse": sparse_feature_dim, "dense": [] }, final_activation='sigmoid') model.load_weights( 'model_save/wdl_sample-ep001-loss0.184-val_loss0.172.h5') # model = load_model('model_save/deep_fm_sample-ep001-loss0.192-val_loss0.176.h5')