Esempio n. 1
0
def test_prediction_setitem():
    "compare prediction._setitem__ with merge"

    data = nx.play_data()
    p1 = nx.production(nx.logistic(), data, 'bernie', verbosity=0)
    p2 = nx.production(nx.logistic(1e-5), data, 2, verbosity=0)
    p3 = nx.production(nx.logistic(1e-6), data, 3, verbosity=0)
    p4 = nx.backtest(nx.logistic(), data, 4, verbosity=0)

    p = nx.Prediction()
    p[('logistic', 1)] = p1
    p[('logistic', 2)] = p2
    p[('logistic', 3)] = p3
    p[('logistic', 4)] = p4

    pp = nx.Prediction()
    pp = pp.merge(p1)
    pp = pp.merge(p2)
    pp = pp.merge(p3)
    pp = pp.merge(p4)

    pd.testing.assert_frame_equal(p.df, pp.df)

    assert_raises(ValueError, p.__setitem__, ('logistic', 1), p1)
    assert_raises(ValueError, p.__setitem__, ('logistic', 1), p)
Esempio n. 2
0
def test_prediction_setitem():
    """compare prediction._setitem__ with merge"""

    data = nx.play_data()

    p1 = nx.production(nx.linear(), data, 'kazutsugi', verbosity=0)
    p2 = nx.production(nx.linear(), data, 8, verbosity=0)
    p3 = nx.production(nx.linear(), data, 8, verbosity=0)
    p4 = nx.backtest(nx.linear(), data, 8, verbosity=0)

    p = nx.Prediction()
    p[('linear', 1)] = p1
    p[('linear', 2)] = p2
    p[('linear', 3)] = p3
    p[('linear', 4)] = p4

    pp = nx.Prediction()
    pp = pp.merge(p1)
    pp = pp.merge(p2)
    pp = pp.merge(p3)
    pp = pp.merge(p4)

    pd.testing.assert_frame_equal(p.df, pp.df)

    assert_raises(ValueError, p.__setitem__, ('linear', 1), p1)
    assert_raises(ValueError, p.__setitem__, ('linear', 1), p)
Esempio n. 3
0
def test_prediction_check():
    "make sure prediction.check runs"
    d = nx.play_data()
    p = nx.production(nx.logistic(), d, verbosity=0)
    p += nx.production(nx.logisticPCA(), d, verbosity=0)
    df = p.check(['logistic'], d)
    ok_(isinstance(df, pd.DataFrame), 'expecting a dataframe')
Esempio n. 4
0
def test_prediction_setitem():
    "compare prediction._setitem__ with merge"

    data = nx.play_data()
    p1 = nx.production(nx.logistic(), data, 'model1', verbosity=0)
    p2 = nx.production(nx.logistic(1e-5), data, 'model2',  verbosity=0)
    p3 = nx.production(nx.logistic(1e-6), data, 'model3',  verbosity=0)
    p4 = nx.backtest(nx.logistic(), data, 'model1',  verbosity=0)

    p = nx.Prediction()
    p['model1'] = p1
    p['model2'] = p2
    p['model3'] = p3
    p['model1'] = p4

    pp = nx.Prediction()
    pp = pp.merge(p1)
    pp = pp.merge(p2)
    pp = pp.merge(p3)
    pp = pp.merge(p4)

    pd.testing.assert_frame_equal(p.df, pp.df)

    assert_raises(ValueError, p.__setitem__, 'model1', p1)
    assert_raises(ValueError, p.__setitem__, 'model1', p)
Esempio n. 5
0
def test_model_run():
    "Make sure models run"
    d = nx.play_data()
    dfit = d['train']
    dpre = d['tournament']
    for model in get_models():
        model.fit_predict(dfit, dpre, tournament=5)
Esempio n. 6
0
def test_data_subsample():
    "test data.subsample"
    d = nx.play_data()
    d2 = d.subsample(0.5, balance=True)
    for era, idx in d2.era_iter():
        m = d2.y[idx].mean()
        if np.isfinite(m):
            ok_(d2.y[idx].mean() == 0.5, 'data is not balanced')
Esempio n. 7
0
def test_cvsplitter_kfold():
    """make sure cvsplitter runs k folds"""
    d = nx.play_data()
    for k in (2, 3):
        splitter = nx.CVSplitter(d, kfold=k)
        count = 0
        for dfit, dpredict in splitter:
            count += 1
        ok_(count == k, "CVSplitter iterated through wrong number of folds")
Esempio n. 8
0
def test_prediction_check():
    """make sure prediction.check runs"""
    d = nx.play_data()
    p1 = nx.production(nx.linear(), d, 'kazutsugi', verbosity=0)
    p2 = p1.copy()
    p2 = p2.rename('example_predictions')
    p = p1 + p2
    with nx.testing.HiddenPrints():
        df = p.check(d)
    ok_(isinstance(df, dict), 'expecting a dictionary')
Esempio n. 9
0
def test_rollsplitter():
    """make sure rollsplitter has no overlaps"""
    d = nx.play_data()
    splitter = nx.RollSplitter(d, fit_window=15, predict_window=10, step=15)
    for dfit, dpre in splitter:
        fera = dfit.unique_era()
        pera = dpre.unique_era()
        tera = np.unique(np.concatenate((fera, pera)))
        nfit = fera.size
        npre = pera.size
        ntot = tera.size
        ok_(nfit + npre == ntot, "RollSplitter has era overalp")
Esempio n. 10
0
def test_prediction_regression():
    "regression test of prediction performance evaluation"
    d = nx.play_data()
    p = nx.production(nx.logistic(), d, tournament=None, verbosity=0)
    for number, name in nx.tournament_iter():
        p2 = nx.production(nx.logistic(), d, tournament=name, verbosity=0)
        df = p.performance_mean(d['validation'], mean_of='tournament')
        logloss1 = df.loc[name]['logloss']
        logloss2 = p2.summary(d['validation']).loc['mean']['logloss']
        diff = np.abs(logloss1 - logloss2)
        msg = 'failed on {}'.format(name)
        ok_(diff < 1e-6, msg)
Esempio n. 11
0
def test_prediction_regression():
    """regression test of prediction performance evaluation"""
    d = nx.play_data()
    p = nx.production(nx.linear(), d, tournament=None, verbosity=0)
    for number, name in nx.tournament_iter():
        p2 = nx.production(nx.linear(), d, tournament=name, verbosity=0)
        df = p.performance_mean(d['validation'], mean_of='tournament')
        logloss1 = float('%.3f' % (df.loc[name]['mse']))
        logloss2 = float('%.3f' %
                         (p2.summary(d['validation']).loc['mean']['mse']))
        diff = np.abs(logloss1 - logloss2)
        msg = f"failed on {name}"
        ok_(diff < 1e-6, msg)
Esempio n. 12
0
def runner_example():

    data = nx.play_data()
    splitter = nx.CVSplitter(data)

    # let's run 3 models
    m1 = {'model': nx.logistic(), 'prediction_file': None, 'csv_file': None}
    m2 = {'model': nx.logistic(1e-4)}
    m3 = {'model': nx.extratrees()}
    run_list = [m1, m2, m3]

    # we won't save anything, just display the results
    runner = nx.Runner(run_list, splitter, verbosity=1)
    runner.run()
Esempio n. 13
0
def test_data_pca():
    "test data.pca"
    d = nx.play_data()
    nfactors = (None, 3, 0.5)
    for nfactor in nfactors:
        d2 = d.pca(nfactor=nfactor)
        msg = "data.pca should return a copy"
        ok_(not shares_memory(d, d2), msg)
        if nfactor is None:
            ok_(d.shape == d2.shape, "shape should not change")
        corr = np.corrcoef(d2.x.T)
        corr.flat[::corr.shape[0] + 1] = 0
        corr = np.abs(corr).max()
        ok_(corr < 1e-5, "features are not orthogonal")
Esempio n. 14
0
def test_prediction_dominance():
    "make sure prediction.dominance runs"

    d = nx.play_data()
    d = d['validation']

    p = nx.Prediction()
    p = p.merge_arrays(d.ids, d.y, 'model1')
    p = p.merge_arrays(d.ids, d.y, 'model2')
    p = p.merge_arrays(d.ids, d.y, 'model3')

    df = p.dominance(d)

    ok_(isinstance(df, pd.DataFrame), 'expecting a dataframe')
    assert_raises(ValueError, p['model1'].dominance, d)
Esempio n. 15
0
def test_loocvsplitter():
    """test loocvsplitter"""
    d = nx.play_data()['train']
    splitter = nx.LoocvSplitter(d)
    count = 0
    for dfit, dpre in splitter:
        count += 1
        eras = dfit.unique_era().tolist()
        era = dpre.unique_era().tolist()
        ok_(isinstance(eras, list), "expecting a list")
        ok_(isinstance(era, list), "expecting a list")
        ok_(len(era) == 1, "expecting a single era")
        ok_(era not in eras, "did not hold out era")
    k = d.unique_era().size
    ok_(count == k, "LoocvSplitter iterated through wrong number of folds")
Esempio n. 16
0
def test_splitter_overlap():
    "prediction data should not overlap"
    d = nx.play_data()
    splitters = [
        nx.TournamentSplitter(d),
        nx.ValidationSplitter(d),
        nx.CheatSplitter(d),
        nx.CVSplitter(d),
        nx.IgnoreEraCVSplitter(d),
        nx.SplitSplitter(d, fit_fraction=0.5)
    ]
    for splitter in splitters:
        predict_ids = []
        for dfit, dpredict in splitter:
            predict_ids.extend(dpredict.ids.tolist())
        ok_(len(predict_ids) == len(set(predict_ids)), "ids overlap")
Esempio n. 17
0
def test_prediction_dominance():
    "make sure prediction.dominance runs"

    d = nx.play_data()
    d = d['validation']

    p = nx.Prediction()
    p = p.merge_arrays(d.ids, d.y['bernie'], 'model1', 1)
    p = p.merge_arrays(d.ids, d.y['elizabeth'], 'model2', 2)
    p = p.merge_arrays(d.ids, d.y['jordan'], 'model3', 3)

    df = p.dominance(d, 3)
    df = p.dominance(d, 'jordan')

    ok_(isinstance(df, pd.DataFrame), 'expecting a dataframe')
    assert_raises(ValueError, p[('model1', 1)].dominance, d, 1)
Esempio n. 18
0
def test_prediction_dominance():
    """make sure prediction.dominance runs"""

    d = nx.play_data()
    d = d['validation']

    p = nx.Prediction()
    p = p.merge_arrays(d.ids, d.y['kazutsugi'], 'model1', 8)
    p = p.merge_arrays(d.ids, d.y['kazutsugi'], 'model2', 8)
    p = p.merge_arrays(d.ids, d.y['kazutsugi'], 'model3', 8)

    df = p.dominance(d, 8)
    df = p.dominance(d, 'kazutsugi')

    ok_(isinstance(df, pd.DataFrame), 'expecting a dataframe')
    assert_raises(ValueError, p[('model1', 1)].dominance, d, 1)
Esempio n. 19
0
def test_splitter_reset():
    "splitter reset should not change results"
    d = nx.play_data()
    splitters = [
        nx.TournamentSplitter(d),
        nx.ValidationSplitter(d),
        nx.CheatSplitter(d),
        nx.CVSplitter(d),
        nx.IgnoreEraCVSplitter(d),
        nx.SplitSplitter(d, fit_fraction=0.5)
    ]
    for splitter in splitters:
        ftups = [[], []]
        ptups = [[], []]
        for i in range(2):
            for dfit, dpredict in splitter:
                ftups[i].append(dfit)
                ptups[i].append(dpredict)
            splitter.reset()
        ok_(ftups[0] == ftups[1], "splitter reset changed fit split")
        ok_(ptups[0] == ptups[1], "splitter reset changed predict split")
Esempio n. 20
0
def backtest_example():
    data = nx.play_data()
    model = nx.logistic()
    prediction = nx.backtest(model, data)  # noqa
Esempio n. 21
0
def test_data_subsample():
    "test data.subsample"
    d = nx.play_data()
    d2 = d.subsample(0.5)
    ok_(isinstance(d2, nx.Data), 'expecting a Data object')
    ok_(d2.shape[0] < 0.51 * d.shape[0], 'expecting smaller subsample')
Esempio n. 22
0
def test_examples():
    data = nx.play_data()
    with testing.HiddenPrints():
        nx.examples.run_all_examples(data)
Esempio n. 23
0
def test_data_hash():
    "test data.hash"
    d = nx.play_data()
    ok_(d.hash() == d.hash(), "data.hash not reproduceable")
    d2 = nx.Data(d.df[::2])
    ok_(d2.hash() == d2.hash(), "data.hash not reproduceable")
Esempio n. 24
0
def concordance_example():
    data = nx.play_data()
    model = nx.logistic()
    prediction = nx.production(model, data)
    concord = nx.concordance(data, prediction)
    print("concordance {:.4f} (less than 0.12 is passing)".format(concord))