def test_multiple_runs(): "test running multiple models through multiple tournaments" d = testing.play_data() models = [nx.logistic(), nx.fifty()] with testing.HiddenPrints(): p = nx.production(models, d, 'bernie') ok_(p.shape[1] == 2, 'wrong number of tournaments') p = nx.backtest(models, d, 2) ok_(p.shape[1] == 2, 'wrong number of tournaments') p = nx.run(models, nx.ValidationSplitter(d), 'ken') ok_(p.shape[1] == 2, 'wrong number of tournaments') p = nx.production(models, d) ok_(p.shape[1] == 10, 'wrong number of tournaments') p = nx.backtest(models, d) ok_(p.shape[1] == 10, 'wrong number of tournaments') p = nx.run(models, nx.ValidationSplitter(d)) ok_(p.shape[1] == 10, 'wrong number of tournaments') p = nx.production(models, d, [1, 5]) ok_(p.shape[1] == 4, 'wrong number of tournaments') ok_(p.tournaments() == ['bernie', 'charles'], 'wrong tournaments') p = nx.backtest(models, d, ['charles', 'bernie']) ok_(p.shape[1] == 4, 'wrong number of tournaments') ok_(p.tournaments() == ['bernie', 'charles'], 'wrong tournaments') p = nx.run(models, nx.ValidationSplitter(d), ['ken']) ok_(p.shape[1] == 2, 'wrong number of tournaments') ok_(p.tournaments() == ['ken'], 'wrong tournaments')
def test_multiple_runs(): """test running multiple models through multiple tournaments""" d = testing.play_data() models = [nx.linear(), nx.fifty()] with testing.HiddenPrints(): p = nx.production(models, d, 'kazutsugi') ok_(p.shape[1] == 2, 'wrong number of tournaments') p = nx.backtest(models, d, 8) ok_(p.shape[1] == 2, 'wrong number of tournaments') p = nx.run(models, nx.ValidationSplitter(d), 'kazutsugi') ok_(p.shape[1] == 2, 'wrong number of tournaments') p = nx.production(models, d) ok_(p.shape[1] == 2, 'wrong number of tournaments') p = nx.backtest(models, d) ok_(p.shape[1] == 2, 'wrong number of tournaments') p = nx.run(models, nx.ValidationSplitter(d)) ok_(p.shape[1] == 2, 'wrong number of tournaments') p = nx.production(models, d, [8]) ok_(p.shape[1] == 2, 'wrong number of tournaments') ok_(p.tournaments() == ['kazutsugi'], 'wrong tournaments') p = nx.backtest(models, d, ['kazutsugi']) ok_(p.shape[1] == 2, 'wrong number of tournaments') ok_(p.tournaments() == ['kazutsugi'], 'wrong tournaments') p = nx.run(models, nx.ValidationSplitter(d), ['kazutsugi']) ok_(p.shape[1] == 2, 'wrong number of tournaments') ok_(p.tournaments() == ['kazutsugi'], 'wrong tournaments')
def test_backtest_production(): "Make sure backtest and production run" d = testing.micro_data() model = fifty() with testing.HiddenPrints(): for verbosity in (0, 1, 2, 3): nx.backtest(model, d, kfold=2, verbosity=verbosity) nx.production(model, d, verbosity=verbosity) if verbosity == 3: nx.production(model, d, name='test', verbosity=verbosity)
def test_prediction_setitem(): "compare prediction._setitem__ with merge" data = nx.play_data() p1 = nx.production(nx.logistic(), data, 'model1', verbosity=0) p2 = nx.production(nx.logistic(1e-5), data, 'model2', verbosity=0) p3 = nx.production(nx.logistic(1e-6), data, 'model3', verbosity=0) p4 = nx.backtest(nx.logistic(), data, 'model1', verbosity=0) p = nx.Prediction() p['model1'] = p1 p['model2'] = p2 p['model3'] = p3 p['model1'] = p4 pp = nx.Prediction() pp = pp.merge(p1) pp = pp.merge(p2) pp = pp.merge(p3) pp = pp.merge(p4) pd.testing.assert_frame_equal(p.df, pp.df) assert_raises(ValueError, p.__setitem__, 'model1', p1) assert_raises(ValueError, p.__setitem__, 'model1', p)
def test_prediction_setitem(): """compare prediction._setitem__ with merge""" data = nx.play_data() p1 = nx.production(nx.linear(), data, 'kazutsugi', verbosity=0) p2 = nx.production(nx.linear(), data, 8, verbosity=0) p3 = nx.production(nx.linear(), data, 8, verbosity=0) p4 = nx.backtest(nx.linear(), data, 8, verbosity=0) p = nx.Prediction() p[('linear', 1)] = p1 p[('linear', 2)] = p2 p[('linear', 3)] = p3 p[('linear', 4)] = p4 pp = nx.Prediction() pp = pp.merge(p1) pp = pp.merge(p2) pp = pp.merge(p3) pp = pp.merge(p4) pd.testing.assert_frame_equal(p.df, pp.df) assert_raises(ValueError, p.__setitem__, ('linear', 1), p1) assert_raises(ValueError, p.__setitem__, ('linear', 1), p)
def test_prediction_setitem(): "compare prediction._setitem__ with merge" data = nx.play_data() p1 = nx.production(nx.logistic(), data, 'bernie', verbosity=0) p2 = nx.production(nx.logistic(1e-5), data, 2, verbosity=0) p3 = nx.production(nx.logistic(1e-6), data, 3, verbosity=0) p4 = nx.backtest(nx.logistic(), data, 4, verbosity=0) p = nx.Prediction() p[('logistic', 1)] = p1 p[('logistic', 2)] = p2 p[('logistic', 3)] = p3 p[('logistic', 4)] = p4 pp = nx.Prediction() pp = pp.merge(p1) pp = pp.merge(p2) pp = pp.merge(p3) pp = pp.merge(p4) pd.testing.assert_frame_equal(p.df, pp.df) assert_raises(ValueError, p.__setitem__, ('logistic', 1), p1) assert_raises(ValueError, p.__setitem__, ('logistic', 1), p)
def test_backtest_production(): "Make sure backtest and production run" d = testing.micro_data() model = nx.fifty() with testing.HiddenPrints(): p = nx.production(model, d) ok_(p.shape[1] == 5, 'wrong number of tournaments') ok_(p.tournaments() == nx.tournament_all(), 'wrong tournaments') p = nx.backtest(model, d, kfold=2) ok_(p.shape[1] == 5, 'wrong number of tournaments') ok_(p.tournaments() == nx.tournament_all(), 'wrong tournaments') for verbosity in (0, 1, 2, 3): nx.backtest(model, d, tournament=3, kfold=2, verbosity=verbosity) nx.production(model, d, tournament='ken', verbosity=verbosity) nx.production(model, d, tournament=4, verbosity=verbosity) nx.production(model, d, tournament=None, verbosity=verbosity) if verbosity == 3: nx.production(model, d, tournament=5, verbosity=verbosity) nx.production(model, d, tournament='charles', verbosity=verbosity)
def backtest_example(data): "Simple cross validation on training data using logistic regression" model = nx.logistic() prediction = nx.backtest(model, data) # noqa
# y_train remains the same y_train = y[train_index] print(">> running split #", counter) print(">> finding best params") xgreg = model_selection.GridSearchCV(xgb.XGBRegressor(), parameters, scoring="neg_mean_squared_error", cv=kfold_split) xgreg.fit(X_train, y_train) best_params = xgreg.best_params_ print(">> best params: ", best_params) # create a new logistic regression model for the tournament model = xgboost(best_params) print(">> training info:") train = nx.backtest(model, data, verbosity=2) # print (">> validation info:") #validation = nx.production(model, data) print(">> saving validation info: ") validation.to_csv(MODEL_NAME + "-" + tournament + "-" + str(counter) + ".csv") print(">> done saving validation info") print("\n") counter = counter + 1
def backtest(data, tournament='kazutsugi'): "Simple cross validation on training data using linear regression" model = nx.linear() prediction = nx.backtest(model, data, tournament) # noqa
def backtest(data, tournament='bernie'): "Simple cross validation on training data using logistic regression" model = nx.logistic() prediction = nx.backtest(model, data, tournament) # noqa
X_train = X[train_index][:,3:] # y_train remains the same y_train = y[train_index] print ">> running split #", counter print ">> finding best params" clf = model_selection.GridSearchCV(linear_model.LogisticRegression(), parameters, scoring="neg_log_loss", cv=kfold_split, n_jobs=-1) clf.fit(X_train, y_train) best_params = clf.best_params_ print ">> best params: ", best_params # create a new logistic regression model for the tournament model = logistic(best_params) print ">> training info:" train = nx.backtest(model, data, tournament, verbosity=1) print ">> validation info:" validation = nx.production(model, data, tournament, verbosity=1) print ">> saving validation info: " validation.to_csv(MODEL_NAME + "-" + tournament + "-" + str(counter) + ".csv") print ">> done saving validation info" print "\n" counter=counter+1
def backtest_example(): data = nx.play_data() model = nx.logistic() prediction = nx.backtest(model, data) # noqa
m = [] m.append(log_loss(y, yhat)) m.append(roc_auc_score(y, yhat)) yh = np.zeros(yhat.size) yh[yhat >= 0.5] = 1 m.append(accuracy_score(y, yh)) m.append(yhat.std()) return m if __name__ == '__main__': # test prediction.performance() import numerox as nx data = nx.load_data('/data/nx/numerai_dataset_20171024.hdf') model = nx.model.logistic() prediction1 = nx.backtest(model, data, verbosity=1) prediction2 = nx.production(model, data) """ prediction = prediction1 + prediction2 print prediction prediction.performance(data) prediction.save('/data/nx/pred/logistic_1e-5.pred') """ """ for c in (1e-1, 1e-2, 1e-3, 1e-4, 1e-5): print c model = nx.model.logistic(c) prediction1 = nx.backtest(model, data, verbosity=1) prediction2 = nx.production(model, data) prediction = prediction1 + prediction2