def Xtest_train_ffm(train_data): conf = { "dim": 10, "use_unary": True, "num_iter": 5, "opt_cls": optim.Adam, "opt_kwargs": { "lr": 1e-3 }, } conf.update(train_data.conf) model = FFM(**conf) train_model(model, train_data.train_iter, train_data.test, conf)
if __name__ == '__main__': n = 5 m = 2 k = 2 train_file = "train.txt" valid_file = "valid.txt" model_file = "ffm.npy" # 超参数 eta = 0.01 lambd = 1e-2 max_echo = 30 max_r2 = 0.9 # 训练模型,并保存模型参数 sample_generator = Sample(train_file) ffm = FFM(m, n, k, eta, lambd) ffm.train(sample_generator, max_echo, max_r2) ffm.save_model(model_file) # 加载模型,并计算在验证集上的拟合效果 ffm.load_model(model_file) valid_generator = Sample(valid_file) y_sum = 0.0 y_square_sum = 0.0 err_square_sum = 0.0 # 误差平方和 population = 0 # 样本总数 for node_list, y in valid_generator: y = 0.0 if y == -1 else y # 真实的y取值为{-1,1},而预测的y位于(0,1),计算拟合效果时需要进行统一 y_hat = ffm.predict(node_list) y_sum += y y_square_sum += y**2
# coding: utf-8 from sklearn.model_selection import train_test_split import numpy as np import pandas as pd from ffm import FFM, FFMData, load_libffm data_dir = './' train_X, train_y = load_libffm(data_dir + 'bigdata.tr.txt') test_X, test_y = load_libffm(data_dir + 'bigdata.te.txt') train_data = FFMData(train_X, train_y) test_data = FFMData(test_X, test_y) model = FFM(eta=0.1, lam=0.0002, k=4) model.fit(train_data, num_iter=50, val_data=test_data, metric='logloss', early_stopping=5, maximum=False) acc = model.score(test_data, scoring='acc') print("Accuracy Score: ", acc) f1 = model.score(test_data, scoring='f1') print("F1 Score: ", f1)
# transform data categorical = ['int1', 'int2', 's1', 's2'] numerical = ['float1'] target = 'clicked' train_data, val_data = train_test_split(train, test_size=0.2) ffm_train = FFMFormatPandas() ffm_train.fit(train, target=target, categorical=categorical, numerical=numerical) train_data = ffm_train.transform_convert(train_data) val_data = ffm_train.transform(val_data) # save and load data save_data(val_data, 'val_data.pkl') X, y = load_data('val_data.pkl') val_data = FFMData(X, y) # make model for train model = FFM(eta=0.1, lam=0.0001, k=4) model.fit(train_data, num_iter=32, val_data=val_data, metric=Gini, early_stopping=5, maximum=True) # predict val_proba = model.predict_proba(val_data)
# # for pair in line.split()[1:]: # field, feature, value = [int(x) for x in pair.split(':')] # # features.append(feature) # values.append(value) # # feature2field[feature] = field # # data_set_test.append((features, values, label)) # # counter += 1 # if counter == 1000: # break X_feature_train = np.array([x[0] for x in data_set_train]) X_value_train = np.array([x[1] for x in data_set_train]) Y_train = np.array([x[2] for x in data_set_train]) #X_feature_test = np.array([x[0] for x in data_set_test]) #X_value_test = np.array([x[1] for x in data_set_test]) #Y_test = np.array([x[2] for x in data_set_test]) clf = FFM(latent_dim=4, reg_parm=0.00002, batch_size=1024, learning_rate=0.2, n_iter=10) clf.fit((X_value_train, X_feature_train), Y_train, feature2field) #Y_pred = clf.predict((X_value_test, X_feature_test), feature2field)
y = data['click'] #convert 0 1 --> -1 1 #for i in range(len(y)): # if y[i] == 0: # y[i] = -1 X = data.drop(columns="click", axis=1) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) y_train = y_train.reshape([len(y_train), 1]) y_test = y_test.reshape([len(y_test), 1]) # ---------------model------------------------- field_name = [ 'C1', 'banner_pos', 'site_category', 'app_domain', 'app_category', 'device_id', 'device_type', 'device_conn_type', 'C15', 'C16', 'C18' ] tf.reset_default_graph() clf = FFM(X_train, y_train, field_name=field_name, epoch=500, learning_rate=2e-3, lbd=1e-5) clf.fit() y_p = clf.predict(X_test) print("Acc on testing data: ", clf.score(X_test, y_test))