def getData(): p = 0.1 data = loadData(p=p) data = featureEngineer(data) # print(data.info()) #训练数据预处理 x_train, x_test, y_train, y_test = preprocessing(data) return x_train, y_train, x_test, y_test
def predict_clf(model): env = janestreet.make_env() iter_test = env.iter_test() for (test_df, sample_prediction_df) in iter_test: if test_df['weight'].item() > 0: test_df = featureEngineer(test_df) X_test = test_df.loc[:, test_df.columns.str.contains('feature')] X_test = X_test.fillna(0.0) y_preds = model.predict(X_test)[0] else: y_preds = 0 # print(y_preds) sample_prediction_df.action = y_preds env.predict(sample_prediction_df)
print("%f (%f) with: %r" % (mean, stdev, param)) y.append(mean) plt.plot(y) plt.savefig("./output/" + param_name + ".png") if __name__ == "__main__": newpath = "/home/code" os.chdir(newpath) # data_explore() # 真正开始干活 p = 0.0001 train = loadData(p=p) train = featureEngineer(train) # print(train.head()) # 计算模型评分 # score = Score(model, train) # print("模型评分:%.2f" % score) test = loadData(p=p) test = featureEngineer(test) #训练数据预处理 X_train, y_train = preprocessing(train) # xgboost print("XGBoost") max_depth = [3, 4, 5] subsample = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
iter_test = env.iter_test() for (test_df, sample_prediction_df) in iter_test: if test_df['weight'].item() > 0: X_test = test_df.loc[:, test_df.columns.str.contains('feature')] X_test = X_test.fillna(-999) y_preds = model.predict(X_test)[0] else: y_preds = 0 # print(y_preds) sample_prediction_df.action = y_preds env.predict(sample_prediction_df) if __name__ == "__main__": newpath = "/home/code" os.chdir(newpath) # data_explore() # 真正开始干活 train = pd.read_csv("./train.csv", nrows=10000) feature = pd.read_csv("./features.csv") train = featureEngineer(train) model = modeling(train) # 计算模型评分 # score = Score(model, train) # print("模型评分:%.2f" % score) # 进行预测 predict_clf(model)