Exemple #1
0
def Xtest_train_ffm(train_data):
    conf = {
        "dim": 10,
        "use_unary": True,
        "num_iter": 5,
        "opt_cls": optim.Adam,
        "opt_kwargs": {
            "lr": 1e-3
        },
    }
    conf.update(train_data.conf)
    model = FFM(**conf)
    train_model(model, train_data.train_iter, train_data.test, conf)
Exemple #2
0
if __name__ == '__main__':
    n = 5
    m = 2
    k = 2
    train_file = "train.txt"
    valid_file = "valid.txt"
    model_file = "ffm.npy"
    # 超参数
    eta = 0.01
    lambd = 1e-2
    max_echo = 30
    max_r2 = 0.9

    # 训练模型,并保存模型参数
    sample_generator = Sample(train_file)
    ffm = FFM(m, n, k, eta, lambd)
    ffm.train(sample_generator, max_echo, max_r2)
    ffm.save_model(model_file)

    # 加载模型,并计算在验证集上的拟合效果
    ffm.load_model(model_file)
    valid_generator = Sample(valid_file)
    y_sum = 0.0
    y_square_sum = 0.0
    err_square_sum = 0.0  # 误差平方和
    population = 0  # 样本总数
    for node_list, y in valid_generator:
        y = 0.0 if y == -1 else y  # 真实的y取值为{-1,1},而预测的y位于(0,1),计算拟合效果时需要进行统一
        y_hat = ffm.predict(node_list)
        y_sum += y
        y_square_sum += y**2
# coding: utf-8

from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from ffm import FFM, FFMData, load_libffm

data_dir = './'

train_X, train_y = load_libffm(data_dir + 'bigdata.tr.txt')
test_X, test_y = load_libffm(data_dir + 'bigdata.te.txt')

train_data = FFMData(train_X, train_y)
test_data = FFMData(test_X, test_y)

model = FFM(eta=0.1, lam=0.0002, k=4)
model.fit(train_data,
          num_iter=50,
          val_data=test_data,
          metric='logloss',
          early_stopping=5,
          maximum=False)

acc = model.score(test_data, scoring='acc')
print("Accuracy Score: ", acc)
f1 = model.score(test_data, scoring='f1')
print("F1 Score: ", f1)
Exemple #4
0
# transform data
categorical = ['int1', 'int2', 's1', 's2']
numerical = ['float1']
target = 'clicked'

train_data, val_data = train_test_split(train, test_size=0.2)

ffm_train = FFMFormatPandas()
ffm_train.fit(train,
              target=target,
              categorical=categorical,
              numerical=numerical)
train_data = ffm_train.transform_convert(train_data)
val_data = ffm_train.transform(val_data)

# save and load data
save_data(val_data, 'val_data.pkl')
X, y = load_data('val_data.pkl')
val_data = FFMData(X, y)

# make model for train
model = FFM(eta=0.1, lam=0.0001, k=4)
model.fit(train_data,
          num_iter=32,
          val_data=val_data,
          metric=Gini,
          early_stopping=5,
          maximum=True)

# predict
val_proba = model.predict_proba(val_data)
Exemple #5
0
#
#    for pair in line.split()[1:]:
#        field, feature, value = [int(x) for x in pair.split(':')]
#
#        features.append(feature)
#        values.append(value)
#
#        feature2field[feature] = field
#
#    data_set_test.append((features, values, label))
#
#    counter += 1
#    if counter == 1000:
#        break

X_feature_train = np.array([x[0] for x in data_set_train])
X_value_train = np.array([x[1] for x in data_set_train])
Y_train = np.array([x[2] for x in data_set_train])

#X_feature_test = np.array([x[0] for x in data_set_test])
#X_value_test = np.array([x[1] for x in data_set_test])
#Y_test = np.array([x[2] for x in data_set_test])

clf = FFM(latent_dim=4,
          reg_parm=0.00002,
          batch_size=1024,
          learning_rate=0.2,
          n_iter=10)
clf.fit((X_value_train, X_feature_train), Y_train, feature2field)
#Y_pred = clf.predict((X_value_test, X_feature_test), feature2field)
Exemple #6
0
y = data['click']
#convert 0 1 --> -1 1
#for i in range(len(y)):
#    if y[i] == 0:
#        y[i] = -1
X = data.drop(columns="click", axis=1)
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)
y_train = y_train.reshape([len(y_train), 1])
y_test = y_test.reshape([len(y_test), 1])
# ---------------model-------------------------
field_name = [
    'C1', 'banner_pos', 'site_category', 'app_domain', 'app_category',
    'device_id', 'device_type', 'device_conn_type', 'C15', 'C16', 'C18'
]

tf.reset_default_graph()
clf = FFM(X_train,
          y_train,
          field_name=field_name,
          epoch=500,
          learning_rate=2e-3,
          lbd=1e-5)
clf.fit()

y_p = clf.predict(X_test)

print("Acc on testing data: ", clf.score(X_test, y_test))