def change2liffm(df_data, usefield=False, field_map=None): # 转成 Dmatrix """ :param df_data: df 数据 :param usefield: # 是否使用 :param field_map: Dataframe/series, array or list 推荐使用list :return: """ x_, y_ = split_x_y(df_data) if usefield: xdm_data = xl.DMatrix(x_, y_, field_map) else: xdm_data = xl.DMatrix(x_, y_) return xdm_data
def predict_behavior_type(variable): # 获取数据 test = get_new_user_data(variable=variable) # 预测数据 # 最后处理 test = end_processing(test) # 调用XGB模型 XGB = joblib.load("C:\\Users\\dell--pc\\Desktop\\RecommenderSystem\\Model\\GbdtFFmFit\\XGB_FFM.model") # 获取叶子节点数据 new_test = XGB.apply(test.values) # 转换数据为ffm需要的格式 DMatrix new_test = xlearn.DMatrix(new_test) # 调用FFM模型 ffm_model = xlearn.create_ffm() ffm_model.setSign() ffm_model.setQuiet() ffm_model.setOnDisk() ffm_model.setTest(new_test) predict_behavior_type = ffm_model.predict( "C:\\Users\\dell--pc\\Desktop\\RecommenderSystem\\Model\\GbdtFFmFit\\model_dm.out") data_result = pd.DataFrame() data_result['user_id'] = test.user_id data_result['category_id'] = test.category_id data_result['item_id'] = test.item_id data_result['predict_result'] = predict_behavior_type data_result['predict_result'] = data_result['predict_result'].apply(lambda x: random.randint(0,1)) data_result = data_result.loc[data_result.predict_result == 1] data_result['predict_result'] = connect_item_name(list(data_result['item_id'])) if variable=='1': predict_result_to_Bmob(data_result[:2]) else: engine = create_engine("mysql+pymysql://root:123456@localhost:3306/mysql?charset=utf8") data_result = data_result[:3] data_result.to_sql(name='predict_result_gbdt_java', con=engine, if_exists='replace', index=False, index_label=False, chunksize=5000, dtype={ 'user_id':VARCHAR(length=20), 'category_id':VARCHAR(length=20), 'item_id':VARCHAR(length=20), 'predict_result':VARCHAR(length=20) })
# 这里的train, valida, test将来全都是从数据库内获取数据 def get_datas(model, train, valida, test): new_train = model.apply(train.values) new_valida = model.apply(valida.values) new_test = model.apply(test.values) return new_train, new_valida, new_test new_train, new_valida, new_test = get_datas(XGB, train, valida, test) # 训练集,验证集,测试集 from sklearn.metrics import classification_report,roc_auc_score import xlearn as xl ffm_train = xl.DMatrix(new_train, train_label) ffm_valida = xl.DMatrix(new_valida, valida_label) ffm_test = xl.DMatrix(new_test, test_label) print("开始训练FFm模型") ffm_model = xl.create_ffm() ffm_model.setTrain(ffm_train) ffm_model.setValidate(ffm_valida) # 帮助提前收敛setValidate ffm_model.setSign() ffm_model.setNoBin() ffm_model.setQuiet() param = { 'task':'binary',
import pandas as pd # read file from file higgs_train = pd.read_csv("higgs-train.csv", header=None, sep=",") higgs_test = pd.read_csv("higgs-test.csv", header=None, sep=",") # get train X, y X_train = higgs_train[higgs_train.columns[1:]] y_train = higgs_train[0] # get test X, y X_test = higgs_test[higgs_test.columns[1:]] y_test = higgs_test[0] # DMatrix transition xdm_train = xl.DMatrix(X_train, y_train) xdm_test = xl.DMatrix(X_test, y_test) # Training task linear_model = xl.create_linear() # Use linear model # we use the same API for train from file # that is, you can also pass xl.DMatrix for this API now linear_model.setTrain(xdm_train) # Training data linear_model.setValidate(xdm_test) # Validation data # param: # 0. regression task # 1. learning rate: 0.2 # 2. regular lambda: 0.002 # 3. evaluation metric: acc param = {'task':'binary', 'lr':0.2,
import pandas as pd data_path = r"F:\for learn\Python\Repo_sources\xlearn\demo\regression\house_price" train_file = os.path.join(data_path, "house_price_train.txt") test_file = os.path.join(data_path, "house_price_test.txt") output_model = os.path.join(data_path, "temp.model") output_file = os.path.join(data_path, "output.txt") param = {"task": "reg", "lr": 0.2, "lambda": 0.002, "metric": "mae"} if __name__ == '__main__': train_data = pd.read_csv(train_file, sep="\t", header=None) test_data = pd.read_csv(test_file, sep="\t", header=None) columns = train_data.columns X_train = train_data[columns[1:]] y_train = train_data[0] X_test = test_data[columns[1:]] y_test = test_data[0] train_matrix = xl.DMatrix(X_train, y_train) test_matrix = xl.DMatrix(X_test, y_test) fm_model = xl.create_fm() fm_model.setTrain(train_matrix) fm_model.setValidate(test_matrix) fm_model.fit(param, output_model) fm_model.setTest(test_matrix) fm_model.predict(output_model, output_file)
X = data[data.columns[1:]] y = data[0].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=12) print(X_train.shape) # 预处理 scaler = StandardScaler() scaler.fit(X) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) train_data = xl.DMatrix(data=X_train, label=y_train) test_data = xl.DMatrix(data=X_test, label=y_test) fm = xl.create_fm() fm.disableNorm() fm.setTrain(train_data) fm.setTest(test_data) param = { 'task': 'reg', 'lr': 0.1, 'lambda': 0.02, 'k': 100, 'epoch': 100, 'metric': 'rmse' }