Beispiel #1
0
def change2liffm(df_data, usefield=False, field_map=None):  # 转成 Dmatrix
    """
    :param df_data:   df 数据
    :param usefield:   #  是否使用
    :param field_map:   Dataframe/series, array or list   推荐使用list
    :return:
    """
    x_, y_ = split_x_y(df_data)
    if usefield:
        xdm_data = xl.DMatrix(x_, y_, field_map)
    else:
        xdm_data = xl.DMatrix(x_, y_)
    return xdm_data
Beispiel #2
0
def predict_behavior_type(variable):
    # 获取数据
    test = get_new_user_data(variable=variable)
    # 预测数据
    # 最后处理
    test = end_processing(test)
    # 调用XGB模型
    XGB = joblib.load("C:\\Users\\dell--pc\\Desktop\\RecommenderSystem\\Model\\GbdtFFmFit\\XGB_FFM.model")
    # 获取叶子节点数据
    new_test = XGB.apply(test.values)
    # 转换数据为ffm需要的格式 DMatrix
    new_test = xlearn.DMatrix(new_test)
    # 调用FFM模型
    ffm_model = xlearn.create_ffm()
    ffm_model.setSign()
    ffm_model.setQuiet()
    ffm_model.setOnDisk()
    ffm_model.setTest(new_test)
    predict_behavior_type = ffm_model.predict(
        "C:\\Users\\dell--pc\\Desktop\\RecommenderSystem\\Model\\GbdtFFmFit\\model_dm.out")

    data_result = pd.DataFrame()
    data_result['user_id'] = test.user_id
    data_result['category_id'] = test.category_id
    data_result['item_id'] = test.item_id
    data_result['predict_result'] = predict_behavior_type
    data_result['predict_result'] = data_result['predict_result'].apply(lambda x: random.randint(0,1))
    data_result = data_result.loc[data_result.predict_result == 1]
    data_result['predict_result'] = connect_item_name(list(data_result['item_id']))
    if variable=='1':
        predict_result_to_Bmob(data_result[:2])
    else:
        engine = create_engine("mysql+pymysql://root:123456@localhost:3306/mysql?charset=utf8")
        data_result = data_result[:3]
        data_result.to_sql(name='predict_result_gbdt_java', con=engine, if_exists='replace',
                           index=False, index_label=False, chunksize=5000,
                           dtype={
                               'user_id':VARCHAR(length=20),
                               'category_id':VARCHAR(length=20),
                               'item_id':VARCHAR(length=20),
                               'predict_result':VARCHAR(length=20)
                           })
Beispiel #3
0
# 这里的train, valida, test将来全都是从数据库内获取数据
def get_datas(model, train, valida, test):
    new_train = model.apply(train.values)
    new_valida = model.apply(valida.values)
    new_test = model.apply(test.values)
    return new_train, new_valida, new_test

new_train, new_valida, new_test = get_datas(XGB, train, valida, test)
# 训练集,验证集,测试集


from sklearn.metrics import classification_report,roc_auc_score
import xlearn as xl

ffm_train = xl.DMatrix(new_train, train_label)
ffm_valida = xl.DMatrix(new_valida, valida_label)
ffm_test = xl.DMatrix(new_test, test_label)

print("开始训练FFm模型")
ffm_model = xl.create_ffm()

ffm_model.setTrain(ffm_train)
ffm_model.setValidate(ffm_valida)
# 帮助提前收敛setValidate
ffm_model.setSign()
ffm_model.setNoBin()
ffm_model.setQuiet()

param = {
    'task':'binary',
Beispiel #4
0
import pandas as pd

# read file from file
higgs_train = pd.read_csv("higgs-train.csv", header=None, sep=",")
higgs_test = pd.read_csv("higgs-test.csv", header=None, sep=",")

# get train X, y
X_train = higgs_train[higgs_train.columns[1:]]
y_train = higgs_train[0]

# get test X, y
X_test = higgs_test[higgs_test.columns[1:]]
y_test = higgs_test[0]

# DMatrix transition
xdm_train = xl.DMatrix(X_train, y_train)
xdm_test = xl.DMatrix(X_test, y_test)

# Training task
linear_model = xl.create_linear()  # Use linear model
# we use the same API for train from file
# that is, you can also pass xl.DMatrix for this API now
linear_model.setTrain(xdm_train)    # Training data
linear_model.setValidate(xdm_test)  # Validation data

# param:
#  0. regression task
#  1. learning rate: 0.2
#  2. regular lambda: 0.002
#  3. evaluation metric: acc
param = {'task':'binary', 'lr':0.2, 
Beispiel #5
0
import pandas as pd

data_path = r"F:\for learn\Python\Repo_sources\xlearn\demo\regression\house_price"
train_file = os.path.join(data_path, "house_price_train.txt")
test_file = os.path.join(data_path, "house_price_test.txt")
output_model = os.path.join(data_path, "temp.model")
output_file = os.path.join(data_path, "output.txt")
param = {"task": "reg", "lr": 0.2, "lambda": 0.002, "metric": "mae"}

if __name__ == '__main__':
    train_data = pd.read_csv(train_file, sep="\t", header=None)
    test_data = pd.read_csv(test_file, sep="\t", header=None)
    columns = train_data.columns
    X_train = train_data[columns[1:]]
    y_train = train_data[0]

    X_test = test_data[columns[1:]]
    y_test = test_data[0]

    train_matrix = xl.DMatrix(X_train, y_train)
    test_matrix = xl.DMatrix(X_test, y_test)

    fm_model = xl.create_fm()
    fm_model.setTrain(train_matrix)
    fm_model.setValidate(test_matrix)

    fm_model.fit(param, output_model)
    fm_model.setTest(test_matrix)

    fm_model.predict(output_model, output_file)
Beispiel #6
0
    X = data[data.columns[1:]]
    y = data[0].values

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=12)
    print(X_train.shape)

    # 预处理
    scaler = StandardScaler()
    scaler.fit(X)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)

    train_data = xl.DMatrix(data=X_train, label=y_train)
    test_data = xl.DMatrix(data=X_test, label=y_test)

    fm = xl.create_fm()
    fm.disableNorm()
    fm.setTrain(train_data)
    fm.setTest(test_data)

    param = {
        'task': 'reg',
        'lr': 0.1,
        'lambda': 0.02,
        'k': 100,
        'epoch': 100,
        'metric': 'rmse'
    }