Beispiel #1
0
def test_multiple_runs():
    "test running multiple models through multiple tournaments"

    d = testing.play_data()
    models = [nx.logistic(), nx.fifty()]

    with testing.HiddenPrints():

        p = nx.production(models, d, 'bernie')
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        p = nx.backtest(models, d, 2)
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        p = nx.run(models, nx.ValidationSplitter(d), 'ken')
        ok_(p.shape[1] == 2, 'wrong number of tournaments')

        p = nx.production(models, d)
        ok_(p.shape[1] == 10, 'wrong number of tournaments')
        p = nx.backtest(models, d)
        ok_(p.shape[1] == 10, 'wrong number of tournaments')
        p = nx.run(models, nx.ValidationSplitter(d))
        ok_(p.shape[1] == 10, 'wrong number of tournaments')

        p = nx.production(models, d, [1, 5])
        ok_(p.shape[1] == 4, 'wrong number of tournaments')
        ok_(p.tournaments() == ['bernie', 'charles'], 'wrong tournaments')
        p = nx.backtest(models, d, ['charles', 'bernie'])
        ok_(p.shape[1] == 4, 'wrong number of tournaments')
        ok_(p.tournaments() == ['bernie', 'charles'], 'wrong tournaments')
        p = nx.run(models, nx.ValidationSplitter(d), ['ken'])
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        ok_(p.tournaments() == ['ken'], 'wrong tournaments')
Beispiel #2
0
def cv_warning(model, data, tournament='bernie', kfold=5, nsamples=100):
    "Hold out a sample of eras not rows when doing cross validation."

    data = data['train']
    results_cve = pd.DataFrame()
    results_cv = pd.DataFrame()

    for i in range(nsamples):

        # cv across eras
        cve = nx.CVSplitter(data, kfold=kfold, seed=i)
        prediction = nx.run(model, cve, tournament, verbosity=0)
        df = prediction.performance(data, tournament)
        results_cve = results_cve.append(df, ignore_index=True)

        # cv ignoring eras but y balanced
        cv = nx.IgnoreEraCVSplitter(data,
                                    tournament=tournament,
                                    kfold=kfold,
                                    seed=i)
        prediction = nx.run(model, cv, tournament, verbosity=0)
        df = prediction.performance(data, tournament)
        results_cv = results_cv.append(df, ignore_index=True)

        # display results
        rcve = results_cve.mean(axis=0)
        rcv = results_cv.mean(axis=0)
        rcve.name = 'cve'
        rcv.name = 'cv'
        r = pd.concat([rcve, rcv], axis=1)
        print("\n{} runs".format(i + 1))
        print(r)
Beispiel #3
0
def test_multiple_runs():
    """test running multiple models through multiple tournaments"""

    d = testing.play_data()
    models = [nx.linear(), nx.fifty()]

    with testing.HiddenPrints():

        p = nx.production(models, d, 'kazutsugi')
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        p = nx.backtest(models, d, 8)
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        p = nx.run(models, nx.ValidationSplitter(d), 'kazutsugi')
        ok_(p.shape[1] == 2, 'wrong number of tournaments')

        p = nx.production(models, d)
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        p = nx.backtest(models, d)
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        p = nx.run(models, nx.ValidationSplitter(d))
        ok_(p.shape[1] == 2, 'wrong number of tournaments')

        p = nx.production(models, d, [8])
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        ok_(p.tournaments() == ['kazutsugi'], 'wrong tournaments')
        p = nx.backtest(models, d, ['kazutsugi'])
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        ok_(p.tournaments() == ['kazutsugi'], 'wrong tournaments')
        p = nx.run(models, nx.ValidationSplitter(d), ['kazutsugi'])
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        ok_(p.tournaments() == ['kazutsugi'], 'wrong tournaments')
Beispiel #4
0
def test_run():
    "Make sure run runs"
    d = testing.play_data()
    models = [nx.logistic(), fifty()]
    splitters = [nx.TournamentSplitter(d),
                 nx.ValidationSplitter(d),
                 nx.CheatSplitter(d),
                 nx.CVSplitter(d, kfold=2),
                 nx.SplitSplitter(d, fit_fraction=0.5)]
    for model in models:
        for splitter in splitters:
            nx.run(model, splitter, verbosity=0)
Beispiel #5
0
def cv_warning(data, nsamples=100):

    model = nx.logistic()

    for i in range(nsamples):

        report = nx.Report()

        # cv across eras
        cve = nx.CVSplitter(data, seed=i)
        prediction = nx.run(model, cve, verbosity=0)
        report.append_prediction(prediction, 'cve')

        # cv ignoring eras but y balanced
        cv = nx.IgnoreEraCVSplitter(data, seed=i)
        prediction = nx.run(model, cv, verbosity=0)
        report.append_prediction(prediction, 'cv')

        # save performance results
        df = report.performance_df(data)
        cols = df.columns.tolist()
        cols[-1] = 'cv_type'
        df.columns = cols
        if i == 0:
            results = df
        else:
            results = results.append(df, ignore_index=True)

        # display results
        rcve = results[results.cv_type == 'cve'].mean(axis=0)
        rcv = results[results.cv_type == 'cv'].mean(axis=0)
        rcve.name = 'cve'
        rcv.name = 'cv'
        r = pd.concat([rcve, rcv], axis=1)
        print("\n{} runs".format(i+1))
        print(r)
Beispiel #6
0
def test_run():
    "Make sure run runs"
    d = testing.play_data()

    models = [nx.linear(), nx.fifty()]
    splitters = [nx.TournamentSplitter(d),
                 nx.ValidationSplitter(d),
                 nx.CheatSplitter(d),
                 nx.CVSplitter(d, kfold=2),
                 nx.SplitSplitter(d, fit_fraction=0.5)]

    for model in models:
        for splitter in splitters:
            p = nx.run(model, splitter, tournament=None, verbosity=0)
            ok_(p.shape[1] == 1, 'wrong number of tournaments')
            ok_(p.tournaments() == nx.tournament_all(), 'wrong tournaments')

    assert_raises(ValueError, nx.run, None, nx.TournamentSplitter(d))
    assert_raises(ValueError, nx.run, nx.fifty(), nx.TournamentSplitter(d), {})