Example #1
0
def test_multiple_runs():
    "test running multiple models through multiple tournaments"

    d = testing.play_data()
    models = [nx.logistic(), nx.fifty()]

    with testing.HiddenPrints():

        p = nx.production(models, d, 'bernie')
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        p = nx.backtest(models, d, 2)
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        p = nx.run(models, nx.ValidationSplitter(d), 'ken')
        ok_(p.shape[1] == 2, 'wrong number of tournaments')

        p = nx.production(models, d)
        ok_(p.shape[1] == 10, 'wrong number of tournaments')
        p = nx.backtest(models, d)
        ok_(p.shape[1] == 10, 'wrong number of tournaments')
        p = nx.run(models, nx.ValidationSplitter(d))
        ok_(p.shape[1] == 10, 'wrong number of tournaments')

        p = nx.production(models, d, [1, 5])
        ok_(p.shape[1] == 4, 'wrong number of tournaments')
        ok_(p.tournaments() == ['bernie', 'charles'], 'wrong tournaments')
        p = nx.backtest(models, d, ['charles', 'bernie'])
        ok_(p.shape[1] == 4, 'wrong number of tournaments')
        ok_(p.tournaments() == ['bernie', 'charles'], 'wrong tournaments')
        p = nx.run(models, nx.ValidationSplitter(d), ['ken'])
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        ok_(p.tournaments() == ['ken'], 'wrong tournaments')
Example #2
0
def test_multiple_runs():
    """test running multiple models through multiple tournaments"""

    d = testing.play_data()
    models = [nx.linear(), nx.fifty()]

    with testing.HiddenPrints():

        p = nx.production(models, d, 'kazutsugi')
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        p = nx.backtest(models, d, 8)
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        p = nx.run(models, nx.ValidationSplitter(d), 'kazutsugi')
        ok_(p.shape[1] == 2, 'wrong number of tournaments')

        p = nx.production(models, d)
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        p = nx.backtest(models, d)
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        p = nx.run(models, nx.ValidationSplitter(d))
        ok_(p.shape[1] == 2, 'wrong number of tournaments')

        p = nx.production(models, d, [8])
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        ok_(p.tournaments() == ['kazutsugi'], 'wrong tournaments')
        p = nx.backtest(models, d, ['kazutsugi'])
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        ok_(p.tournaments() == ['kazutsugi'], 'wrong tournaments')
        p = nx.run(models, nx.ValidationSplitter(d), ['kazutsugi'])
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        ok_(p.tournaments() == ['kazutsugi'], 'wrong tournaments')
Example #3
0
def test_prediction_copies():
    "prediction properties should be copies"

    d = play_data()
    p = Prediction()
    p.append(d.ids, d.y)

    ok_(shares_memory(p, p), "looks like shares_memory failed")
    ok_(shares_memory(p, p.ids), "p.ids should be a view")
    ok_(shares_memory(p, p.yhat), "p.yhat should be a view")
    ok_(not shares_memory(p, p.copy()), "should be a copy")
Example #4
0
def test_data_properties():
    "prediction properties should not be corrupted"

    d = play_data()
    p = Prediction()
    p.append(d.ids, d.y)

    ok_((p.ids == p.df.index).all(), "ids is corrupted")
    ok_((p.ids == d.df.index).all(), "ids is corrupted")
    idx = ~np.isnan(p.df.yhat)
    ok_((p.yhat[idx] == p.df.yhat[idx]).all(), "yhat is corrupted")
    ok_((p.yhat[idx] == d.df.y[idx]).all(), "yhat is corrupted")
Example #5
0
def test_run():
    "Make sure run runs"
    d = testing.play_data()
    models = [nx.logistic(), fifty()]
    splitters = [nx.TournamentSplitter(d),
                 nx.ValidationSplitter(d),
                 nx.CheatSplitter(d),
                 nx.CVSplitter(d, kfold=2),
                 nx.SplitSplitter(d, fit_fraction=0.5)]
    for model in models:
        for splitter in splitters:
            nx.run(model, splitter, verbosity=0)
Example #6
0
def test_run():
    "Make sure run runs"
    d = play_data()
    models = [logistic(), extratrees(nfeatures=2)]
    splitters = [
        tournament_splitter(d),
        validation_splitter(d),
        cheat_splitter(d),
        cv_splitter(d, kfold=2),
        split_splitter(d, fit_fraction=0.5)
    ]
    for model in models:
        for splitter in splitters:
            run(model, splitter, verbosity=0)
Example #7
0
def test_run():
    "Make sure run runs"
    d = testing.play_data()

    models = [nx.linear(), nx.fifty()]
    splitters = [nx.TournamentSplitter(d),
                 nx.ValidationSplitter(d),
                 nx.CheatSplitter(d),
                 nx.CVSplitter(d, kfold=2),
                 nx.SplitSplitter(d, fit_fraction=0.5)]

    for model in models:
        for splitter in splitters:
            p = nx.run(model, splitter, tournament=None, verbosity=0)
            ok_(p.shape[1] == 1, 'wrong number of tournaments')
            ok_(p.tournaments() == nx.tournament_all(), 'wrong tournaments')

    assert_raises(ValueError, nx.run, None, nx.TournamentSplitter(d))
    assert_raises(ValueError, nx.run, nx.fifty(), nx.TournamentSplitter(d), {})
Example #8
0
def test_prediction_concordance():
    "make sure prediction.concordance runs"
    d = testing.play_data()
    p = nx.production(nx.logistic(), d, 'model1', verbosity=0)
    df = p.concordance(d)
    ok_(isinstance(df, pd.DataFrame), 'expecting a dataframe')
Example #9
0
def test_backtest_production():
    "Make sure backtest and production run"
    d = play_data()
    model = logistic()
    backtest(model, d, kfold=2, verbosity=0)
    production(model, d, verbosity=0)