Esempio n. 1
0
def test_prediction_setitem():
    """compare prediction._setitem__ with merge"""

    data = nx.play_data()

    p1 = nx.production(nx.linear(), data, 'kazutsugi', verbosity=0)
    p2 = nx.production(nx.linear(), data, 8, verbosity=0)
    p3 = nx.production(nx.linear(), data, 8, verbosity=0)
    p4 = nx.backtest(nx.linear(), data, 8, verbosity=0)

    p = nx.Prediction()
    p[('linear', 1)] = p1
    p[('linear', 2)] = p2
    p[('linear', 3)] = p3
    p[('linear', 4)] = p4

    pp = nx.Prediction()
    pp = pp.merge(p1)
    pp = pp.merge(p2)
    pp = pp.merge(p3)
    pp = pp.merge(p4)

    pd.testing.assert_frame_equal(p.df, pp.df)

    assert_raises(ValueError, p.__setitem__, ('linear', 1), p1)
    assert_raises(ValueError, p.__setitem__, ('linear', 1), p)
Esempio n. 2
0
def test_prediction_setitem():
    "compare prediction._setitem__ with merge"

    data = nx.play_data()
    p1 = nx.production(nx.logistic(), data, 'bernie', verbosity=0)
    p2 = nx.production(nx.logistic(1e-5), data, 2, verbosity=0)
    p3 = nx.production(nx.logistic(1e-6), data, 3, verbosity=0)
    p4 = nx.backtest(nx.logistic(), data, 4, verbosity=0)

    p = nx.Prediction()
    p[('logistic', 1)] = p1
    p[('logistic', 2)] = p2
    p[('logistic', 3)] = p3
    p[('logistic', 4)] = p4

    pp = nx.Prediction()
    pp = pp.merge(p1)
    pp = pp.merge(p2)
    pp = pp.merge(p3)
    pp = pp.merge(p4)

    pd.testing.assert_frame_equal(p.df, pp.df)

    assert_raises(ValueError, p.__setitem__, ('logistic', 1), p1)
    assert_raises(ValueError, p.__setitem__, ('logistic', 1), p)
Esempio n. 3
0
def test_prediction_setitem():
    "compare prediction._setitem__ with merge"

    data = nx.play_data()
    p1 = nx.production(nx.logistic(), data, 'model1', verbosity=0)
    p2 = nx.production(nx.logistic(1e-5), data, 'model2',  verbosity=0)
    p3 = nx.production(nx.logistic(1e-6), data, 'model3',  verbosity=0)
    p4 = nx.backtest(nx.logistic(), data, 'model1',  verbosity=0)

    p = nx.Prediction()
    p['model1'] = p1
    p['model2'] = p2
    p['model3'] = p3
    p['model1'] = p4

    pp = nx.Prediction()
    pp = pp.merge(p1)
    pp = pp.merge(p2)
    pp = pp.merge(p3)
    pp = pp.merge(p4)

    pd.testing.assert_frame_equal(p.df, pp.df)

    assert_raises(ValueError, p.__setitem__, 'model1', p1)
    assert_raises(ValueError, p.__setitem__, 'model1', p)
Esempio n. 4
0
def run_one(model, splitter, tournament, verbosity=2):
    "Run a single model through a data splitter for a single tournament"
    t0 = time.time()
    name = model.name
    if verbosity > 2:
        print(splitter)
    if verbosity > 0:
        pprint.pprint(model)
    data = None
    prediction = nx.Prediction()
    for data_fit, data_predict in splitter:
        if verbosity > 0:
            if data is None:
                data = data_predict.copy()
            else:
                data = data + data_predict
        # the following line of code hides from your model the y
        # that you are trying to predict to prevent accidental cheating
        data_predict = data_predict.y_to_nan()
        ids, yhat = model.fit_predict(data_fit, data_predict, tournament)
        prediction = prediction.merge_arrays(ids, yhat, name, tournament)
        if verbosity > 1:
            print(
                prediction.summary(data.region_isnotin(['test', 'live']),
                                   tournament))
    if verbosity == 1:
        print(
            prediction.summary(data.region_isnotin(['test', 'live']),
                               tournament))
    if verbosity > 1:
        minutes = (time.time() - t0) / 60
        print('Done in {:.2f} minutes'.format(minutes))
    return prediction
Esempio n. 5
0
    def predict(self, dpre: nx.data.Data, tournament: str) -> nx.Prediction:
        """
        Alternative to fit_predict()
        dpre: must be data['tournament']
        tournament: can be int or str.
        """
        prediction = nx.Prediction()
        data_predict = dpre.y_to_nan()
        try:
            LOGGER.info('Inference started...')
            yhat = self.model.predict(data_predict.x)
            LOGGER.info(
                'Inference complete...now preparing predictions for submission'
            )
        except Exception as e:
            LOGGER.error(f'Failure to make predictions with {e}')
            raise e

        try:
            prediction = prediction.merge_arrays(data_predict.ids, yhat,
                                                 self.name, tournament)
            return prediction
        except Exception as e:
            LOGGER.error(f'Failure to prepare predictions with {e}')
            raise e
Esempio n. 6
0
def test_merge_predictions():
    "test merge_predictions"

    p = testing.micro_prediction()
    assert_raises(ValueError, nx.merge_predictions, [p, p])

    p2 = nx.merge_predictions([p, nx.Prediction()])
    ade(p2, p, 'corruption of merge predictions')

    p1 = testing.micro_prediction([0, 1, 2, 3, 4])
    p2 = testing.micro_prediction([5, 6, 7, 8, 9])
    p12 = nx.merge_predictions([p1, p2])
    ade(p12, p, 'corruption of merge predictions')

    p1 = testing.micro_prediction([0, 1, 2, 3])
    p2 = testing.micro_prediction([4, 5, 6])
    p3 = testing.micro_prediction([7, 8, 9])
    p123 = nx.merge_predictions([p1, p2, p3])
    ade(p123, p, 'corruption of merge predictions')

    p1 = testing.micro_prediction([9, 4, 3, 2])
    p2 = testing.micro_prediction([1, 8, 7])
    p3 = testing.micro_prediction([6, 5, 0])
    p123 = nx.merge_predictions([p1, p2, p3])
    ade(p123, p, 'corruption of merge predictions')

    p1 = testing.micro_prediction([0, 1, 2, 3, 4])
    p11 = p1[['model0', 'model1']]
    p12 = p1['model2']
    p2 = testing.micro_prediction([5, 6, 7, 8, 9])
    p21 = p2['model0']
    p22 = p2[['model1', 'model2']]
    p12 = nx.merge_predictions([p11, p21, p22, p12])
    ade(p12, p, 'corruption of merge predictions')
Esempio n. 7
0
def load_example_predictions(data_zip):
    "Load example predictions from Numerai zip archive"
    zf = zipfile.ZipFile(data_zip)
    df = pd.read_csv(zf.open(EXAMPLE_PREDICTIONS), header=0, index_col=0)
    df.columns = ['example_predictions']
    p = nx.Prediction(df)
    return p
Esempio n. 8
0
def micro_prediction(index=None):
    d = micro_data(index)
    n = len(d)
    rs = np.random.RandomState(0)
    yhat = 0.2 * (rs.rand(n) - 0.5) + 0.5
    prediction = nx.Prediction()
    prediction.append(d.ids, yhat)
    return prediction
Esempio n. 9
0
def load_example_predictions(data_zip, tournament):
    "Load example predictions from Numerai zip archive"
    zf = zipfile.ZipFile(data_zip)
    tourn_name = nx.tournament_str(tournament)
    filename = EXAMPLE_PREDICTIONS.format(tourn_name)
    df = pd.read_csv(zf.open(filename), header=0, index_col=0)
    df.columns = ['example_predictions_{}'.format(tourn_name)]
    p = nx.Prediction(df)
    return p
Esempio n. 10
0
def test_prediction_add():
    "add two predictions together"

    d = testing.micro_data()
    p1 = nx.Prediction()
    p2 = nx.Prediction()
    d1 = d['train']
    d2 = d['tournament']
    rs = np.random.RandomState(0)
    y1 = 0.2 * (rs.rand(len(d1)) - 0.5) + 0.5
    y2 = 0.2 * (rs.rand(len(d2)) - 0.5) + 0.5
    p1 = p1.merge_arrays(d1.ids, y1, 'model1')
    p2 = p2.merge_arrays(d2.ids, y2, 'model1')

    p = p1 + p2  # just make sure that it runs

    assert_raises(ValueError, p.__add__, p1)
    assert_raises(ValueError, p1.__add__, p1)
Esempio n. 11
0
def test_prediction_properties():
    "prediction properties should not be corrupted"

    d = nx.testing.micro_data()
    p = nx.Prediction()
    p = p.merge_arrays(d.ids, d.y['bernie'], 'model1', 1)
    p = p.merge_arrays(d.ids, d.y['elizabeth'], 'model2', 2)

    ok_((p.ids == p.df.index).all(), "ids is corrupted")
    ok_((p.ids == d.df.index).all(), "ids is corrupted")
    ok_((p.y[:, 0] == d.df.bernie).all(), "y is corrupted")
    ok_((p.y[:, 1] == d.df.elizabeth).all(), "y is corrupted")
Esempio n. 12
0
def test_data_properties():
    "prediction properties should not be corrupted"

    d = testing.micro_data()
    p = nx.Prediction()
    p = p.merge_arrays(d.ids, d.y, 'model1')
    p = p.merge_arrays(d.ids, d.y, 'model2')

    ok_((p.ids == p.df.index).all(), "ids is corrupted")
    ok_((p.ids == d.df.index).all(), "ids is corrupted")
    ok_((p.y[:, 0] == d.df.y).all(), "y is corrupted")
    ok_((p.y[:, 1] == d.df.y).all(), "y is corrupted")
Esempio n. 13
0
def test_prediction_ynew():
    "test prediction.ynew"
    p = nx.testing.micro_prediction()
    y = p.y.copy()
    y2 = np.random.rand(*y.shape)
    p2 = p.ynew(y2)
    np.testing.assert_array_equal(p2.y, y2, 'prediction.ynew failed')
    assert_raises(ValueError, p.ynew, y2[:3])
    assert_raises(ValueError, p.ynew, y2[:, :2])
    assert_raises(ValueError, p.ynew, y2.reshape(-1))
    p = nx.Prediction()
    assert_raises(ValueError, p.ynew, y2)
Esempio n. 14
0
def test_prediction_properties():
    """prediction properties should not be corrupted"""

    d = nx.testing.micro_data()
    p = nx.Prediction()
    p = p.merge_arrays(d.ids, d.y['kazutsugi'], 'model1', 1)
    p = p.merge_arrays(d.ids, d.y['kazutsugi'], 'model2', 2)

    ok_((p.ids == p.df.index).all(), "ids is corrupted")
    ok_((p.ids == d.df.index).all(), "ids is corrupted")
    ok_((p.y[:, 0] == d.df.kazutsugi).all(), "y is corrupted")
    ok_((p.y[:, 1] == d.df.kazutsugi).all(), "y is corrupted")
Esempio n. 15
0
def test_prediction_dominance():
    "make sure prediction.dominance runs"

    d = nx.play_data()
    d = d['validation']

    p = nx.Prediction()
    p = p.merge_arrays(d.ids, d.y, 'model1')
    p = p.merge_arrays(d.ids, d.y, 'model2')
    p = p.merge_arrays(d.ids, d.y, 'model3')

    df = p.dominance(d)

    ok_(isinstance(df, pd.DataFrame), 'expecting a dataframe')
    assert_raises(ValueError, p['model1'].dominance, d)
Esempio n. 16
0
def test_empty_prediction():
    "Test handling of empty predictions"
    p = nx.Prediction()
    ok_(p.names == [], "wrong name")
    assert_raises(ValueError, p.rename, 'name')
    assert_raises(ValueError, p.rename, ['name'])
    assert_raises(ValueError, p.drop, 'name')
    assert_raises(ValueError, p.drop, ['name'])
    assert_raises(ValueError, p.save, 'not_used')
    ok_((p.ids == np.array([], dtype=str)).all(), 'empty ids')
    ok_(p.copy() == p, 'empty copy')
    ok_(p.size == 0, 'empty size')
    ok_(p.shape == (0, 0), 'empty shape')
    ok_(len(p) == 0, 'empty length')
    p.__repr__()
Esempio n. 17
0
def test_prediction_dominance():
    "make sure prediction.dominance runs"

    d = nx.play_data()
    d = d['validation']

    p = nx.Prediction()
    p = p.merge_arrays(d.ids, d.y['bernie'], 'model1', 1)
    p = p.merge_arrays(d.ids, d.y['elizabeth'], 'model2', 2)
    p = p.merge_arrays(d.ids, d.y['jordan'], 'model3', 3)

    df = p.dominance(d, 3)
    df = p.dominance(d, 'jordan')

    ok_(isinstance(df, pd.DataFrame), 'expecting a dataframe')
    assert_raises(ValueError, p[('model1', 1)].dominance, d, 1)
Esempio n. 18
0
def test_prediction_dominance():
    """make sure prediction.dominance runs"""

    d = nx.play_data()
    d = d['validation']

    p = nx.Prediction()
    p = p.merge_arrays(d.ids, d.y['kazutsugi'], 'model1', 8)
    p = p.merge_arrays(d.ids, d.y['kazutsugi'], 'model2', 8)
    p = p.merge_arrays(d.ids, d.y['kazutsugi'], 'model3', 8)

    df = p.dominance(d, 8)
    df = p.dominance(d, 'kazutsugi')

    ok_(isinstance(df, pd.DataFrame), 'expecting a dataframe')
    assert_raises(ValueError, p[('model1', 1)].dominance, d, 1)
Esempio n. 19
0
def micro_prediction(index=None):
    "Returns a tiny prediction object for use in unit testing"
    cols = ['model0', 'model1', 'model2']
    df = pd.DataFrame(columns=cols)
    df.loc['index0'] = [0.00, 0.01, 0.02]
    df.loc['index1'] = [0.10, 0.11, 0.12]
    df.loc['index2'] = [0.20, 0.21, 0.22]
    df.loc['index3'] = [0.30, 0.31, 0.32]
    df.loc['index4'] = [0.40, 0.41, 0.42]
    df.loc['index5'] = [0.50, 0.51, 0.52]
    df.loc['index6'] = [0.60, 0.61, 0.62]
    df.loc['index7'] = [0.70, 0.71, 0.72]
    df.loc['index8'] = [0.80, 0.81, 0.82]
    df.loc['index9'] = [0.90, 0.91, 0.92]
    if index is not None:
        df = df.iloc[index]
    df = df.copy()  # assure contiguous memory
    prediction = nx.Prediction(df)
    return prediction
Esempio n. 20
0
def micro_prediction(index=None):
    """Returns a tiny prediction object for use in unit testing"""
    cols = [('model0', 2), ('model1', 1), ('model2', 3), ('model0', 5)]
    df = pd.DataFrame(columns=cols)
    df.loc['index0'] = [0.002, 0.011, 0.023, 0.005]
    df.loc['index1'] = [0.102, 0.111, 0.123, 0.105]
    df.loc['index2'] = [0.202, 0.211, 0.223, 0.205]
    df.loc['index3'] = [0.302, 0.311, 0.323, 0.305]
    df.loc['index4'] = [0.402, 0.411, 0.423, 0.405]
    df.loc['index5'] = [0.502, 0.511, 0.523, 0.505]
    df.loc['index6'] = [0.602, 0.611, 0.623, 0.605]
    df.loc['index7'] = [0.702, 0.711, 0.723, 0.705]
    df.loc['index8'] = [0.802, 0.811, 0.823, 0.805]
    df.loc['index9'] = [0.902, 0.911, 0.923, 0.905]
    if index is not None:
        df = df.iloc[index]
    df = df.copy()  # assure contiguous memory
    prediction = nx.Prediction(df)
    return prediction
Esempio n. 21
0
def run(model, splitter, tournament=None, verbosity=2):
    """
    Run a model/tournament pair (or pairs) through a data splitter.

    Parameters
    ----------
    model : nx.Model, list, tuple
        Prediction model to run through the splitter. Can be a list or tuple
        of prediction models. Model names must be unique.
    splitter : nx.Splitter
        An iterator of fit/predict data pairs.
    tournament : {None, int, str, list, tuple}, optional
        The tournament(s) to run the model through. By default (None) the
        model is run through all active tournaments. If a list or tuple of
        tournaments is given then it must must not contain duplicate
        tournaments.
    verbosity : int, optional
        An integer that determines verbosity. Zero is silent.

    Returns
    -------
    p : nx.Prediction
        A prediction object containing the predictions of the specified
        model/tournament pairs.

    """

    # make list of models
    if isinstance(model, nx.Model):
        models = [model]
    elif isinstance(model, list) or isinstance(model, tuple):
        models = model
    else:
        raise ValueError('`model` must be a model, list, or tuple of models')
    names = [m.name for m in models]
    if len(names) != len(set(names)):
        raise ValueError('`model` cannot contain duplicate names')

    # make list of tournaments
    if tournament is None:
        tournaments = nx.tournament_all()
    elif nx.isint(tournament) or nx.isstring(tournament):
        tournaments = [tournament]
    elif isinstance(tournament, list) or isinstance(tournament, tuple):
        tournaments = tournament
    else:
        msg = '`tournament` must be an integer, string, list, tuple, or None.'
        raise ValueError(msg)
    tournaments = [nx.tournament_str(t) for t in tournaments]
    if len(tournaments) != len(set(tournaments)):
        raise ValueError('`tournament` cannot contain duplicates')

    # loop over all model/tournament pairs
    p = nx.Prediction()
    for m in models:
        for t in tournaments:
            p += run_one(m, splitter, t, verbosity=verbosity)
            splitter.reset()
    splitter.reset()

    return p
Esempio n. 22
0
def test_emtpy_y_raises():
    p = nx.Prediction()
    p.y
Esempio n. 23
0
def test_data_hash():
    "test prediction.hash"
    p = nx.testing.micro_prediction()
    ok_(p.hash() == p.hash(), "prediction.hash not reproduceable")
    p2 = nx.Prediction(p.df[::2])
    ok_(p2.hash() == p2.hash(), "prediction.hash not reproduceable")