コード例 #1
0
def run(model, splitter, name=None, verbosity=2):
    "Run a single model through a data splitter"
    t0 = time.time()
    if name is None:
        name = model.__class__.__name__
    else:
        if verbosity > 2:
            print(name)
    if verbosity > 2:
        print(splitter)
    if verbosity > 0:
        pprint.pprint(model)
    data = None
    prediction = Prediction()
    for data_fit, data_predict in splitter:
        if verbosity > 0:
            if data is None:
                data = data_predict.copy()
            else:
                data = data + data_predict
        # the following line of code hides from your model the y
        # that you are trying to predict to prevent accidental cheating
        data_predict = data_predict.y_to_nan()
        ids, yhat = model.fit_predict(data_fit, data_predict)
        prediction = prediction.merge_arrays(ids, yhat, name)
        if verbosity > 1:
            print(prediction.summary(data.region_isnotin(['test', 'live'])))
    if verbosity == 1:
        print(prediction.summary(data.region_isnotin(['test', 'live'])))
    if verbosity > 1:
        minutes = (time.time() - t0) / 60
        print('Done in {:.2f} minutes'.format(minutes))
    return prediction
コード例 #2
0
ファイル: test_prediction.py プロジェクト: sovaa/numerox
def test_prediction_copies():
    "prediction properties should be copies"

    d = play_data()
    p = Prediction()
    p.append(d.ids, d.y)

    ok_(shares_memory(p, p), "looks like shares_memory failed")
    ok_(shares_memory(p, p.ids), "p.ids should be a view")
    ok_(shares_memory(p, p.yhat), "p.yhat should be a view")
    ok_(not shares_memory(p, p.copy()), "should be a copy")
コード例 #3
0
ファイル: test_prediction.py プロジェクト: mangstad/numerox
def test_data_properties():
    "prediction properties should not be corrupted"

    d = play_data()
    p = Prediction()
    p.append(d.ids, d.y)

    ok_((p.ids == p.df.index).all(), "ids is corrupted")
    ok_((p.ids == d.df.index).all(), "ids is corrupted")
    idx = ~np.isnan(p.df.yhat)
    ok_((p.yhat[idx] == p.df.yhat[idx]).all(), "yhat is corrupted")
    ok_((p.yhat[idx] == d.df.y[idx]).all(), "yhat is corrupted")
コード例 #4
0
def run(model, splitter, verbosity=2):
    "Run a single model through a data splitter"
    t0 = time.time()
    if verbosity > 2:
        print(splitter)
    if verbosity > 0:
        pprint.pprint(model)
    data = None
    prediction = Prediction()
    for data_fit, data_predict in splitter:
        if verbosity > 0:
            if data is None:
                data = data_predict.copy()
            else:
                data = data + data_predict
        # the following line of code hides from your model the y
        # that you are trying to predict to prevent accidental cheating
        data_predict.df = data_predict.df.assign(y=np.nan)
        ids, yhat = model.fit_predict(data_fit, data_predict)
        prediction.append(ids, yhat)
        if verbosity > 1:
            prediction.performance(data.region_isnotin(['test', 'live']))
    if verbosity == 1:
        prediction.performance(data.region_isnotin(['test', 'live']))
    if verbosity > 2:
        minutes = (time.time() - t0) / 60
        print('Done in {:.2f} minutes'.format(minutes))
    return prediction
コード例 #5
0
ファイル: test_report.py プロジェクト: sovaa/numerox
def test_report_performance_df():
    "make sure report.performance_df runs"

    d = micro_data()
    d = d['train'] + d['validation']

    p = Prediction()
    p.append(d.ids, d.y)

    r = Report()
    r.append_prediction(p, 'model1')
    r.append_prediction(p, 'model2')
    r.append_prediction(p, 'model3')

    df = r.performance_df(d)

    ok_(isinstance(df, pd.DataFrame), 'expecting a dataframe')
コード例 #6
0
ファイル: engines.py プロジェクト: aponte411/model_pipelines
 def evaluate_predictions(self, predictions: nx.Prediction, trainer: Any,
                          tournament: str) -> None:
     """Evaluate the validation set predictions"""
     LOGGER.info(
         predictions.summaries(self.data['validation'],
                               tournament=tournament))
     LOGGER.info(predictions[:, tournament].metric_per_era(
         data=self.data['validation'], tournament=tournament))
コード例 #7
0
def run(model, splitter, verbosity=2):
    if verbosity > 0:
        pprint.pprint(model)
    data = None
    prediction = Prediction()
    for data_fit, data_predict in splitter:
        ids, yhat = model.fit_predict(data_fit, data_predict)
        prediction.append(ids, yhat)
        if data is None:
            data = data_predict.copy()
        else:
            data = data + data_predict
        if verbosity > 1:
            prediction.performance(data.region_isnotin(['test', 'live']))
    if verbosity == 1:
        prediction.performance(data.region_isnotin(['test', 'live']))
    return prediction
コード例 #8
0
ファイル: test_prediction.py プロジェクト: mangstad/numerox
def test_prediction_add():
    "add two predictions together"

    d = micro_data()
    p1 = Prediction()
    p2 = Prediction()
    d1 = d['train']
    d2 = d['tournament']
    rs = np.random.RandomState(0)
    yhat1 = 0.2 * (rs.rand(len(d1)) - 0.5) + 0.5
    yhat2 = 0.2 * (rs.rand(len(d2)) - 0.5) + 0.5
    p1.append(d1.ids, yhat1)
    p2.append(d2.ids, yhat2)

    p = p1 + p2  # just make sure that it runs

    assert_raises(IndexError, p.__add__, p1)
    assert_raises(IndexError, p1.__add__, p1)