def run(model, splitter, name=None, verbosity=2): "Run a single model through a data splitter" t0 = time.time() if name is None: name = model.__class__.__name__ else: if verbosity > 2: print(name) if verbosity > 2: print(splitter) if verbosity > 0: pprint.pprint(model) data = None prediction = Prediction() for data_fit, data_predict in splitter: if verbosity > 0: if data is None: data = data_predict.copy() else: data = data + data_predict # the following line of code hides from your model the y # that you are trying to predict to prevent accidental cheating data_predict = data_predict.y_to_nan() ids, yhat = model.fit_predict(data_fit, data_predict) prediction = prediction.merge_arrays(ids, yhat, name) if verbosity > 1: print(prediction.summary(data.region_isnotin(['test', 'live']))) if verbosity == 1: print(prediction.summary(data.region_isnotin(['test', 'live']))) if verbosity > 1: minutes = (time.time() - t0) / 60 print('Done in {:.2f} minutes'.format(minutes)) return prediction
def test_prediction_copies(): "prediction properties should be copies" d = play_data() p = Prediction() p.append(d.ids, d.y) ok_(shares_memory(p, p), "looks like shares_memory failed") ok_(shares_memory(p, p.ids), "p.ids should be a view") ok_(shares_memory(p, p.yhat), "p.yhat should be a view") ok_(not shares_memory(p, p.copy()), "should be a copy")
def test_data_properties(): "prediction properties should not be corrupted" d = play_data() p = Prediction() p.append(d.ids, d.y) ok_((p.ids == p.df.index).all(), "ids is corrupted") ok_((p.ids == d.df.index).all(), "ids is corrupted") idx = ~np.isnan(p.df.yhat) ok_((p.yhat[idx] == p.df.yhat[idx]).all(), "yhat is corrupted") ok_((p.yhat[idx] == d.df.y[idx]).all(), "yhat is corrupted")
def run(model, splitter, verbosity=2): "Run a single model through a data splitter" t0 = time.time() if verbosity > 2: print(splitter) if verbosity > 0: pprint.pprint(model) data = None prediction = Prediction() for data_fit, data_predict in splitter: if verbosity > 0: if data is None: data = data_predict.copy() else: data = data + data_predict # the following line of code hides from your model the y # that you are trying to predict to prevent accidental cheating data_predict.df = data_predict.df.assign(y=np.nan) ids, yhat = model.fit_predict(data_fit, data_predict) prediction.append(ids, yhat) if verbosity > 1: prediction.performance(data.region_isnotin(['test', 'live'])) if verbosity == 1: prediction.performance(data.region_isnotin(['test', 'live'])) if verbosity > 2: minutes = (time.time() - t0) / 60 print('Done in {:.2f} minutes'.format(minutes)) return prediction
def test_report_performance_df(): "make sure report.performance_df runs" d = micro_data() d = d['train'] + d['validation'] p = Prediction() p.append(d.ids, d.y) r = Report() r.append_prediction(p, 'model1') r.append_prediction(p, 'model2') r.append_prediction(p, 'model3') df = r.performance_df(d) ok_(isinstance(df, pd.DataFrame), 'expecting a dataframe')
def evaluate_predictions(self, predictions: nx.Prediction, trainer: Any, tournament: str) -> None: """Evaluate the validation set predictions""" LOGGER.info( predictions.summaries(self.data['validation'], tournament=tournament)) LOGGER.info(predictions[:, tournament].metric_per_era( data=self.data['validation'], tournament=tournament))
def run(model, splitter, verbosity=2): if verbosity > 0: pprint.pprint(model) data = None prediction = Prediction() for data_fit, data_predict in splitter: ids, yhat = model.fit_predict(data_fit, data_predict) prediction.append(ids, yhat) if data is None: data = data_predict.copy() else: data = data + data_predict if verbosity > 1: prediction.performance(data.region_isnotin(['test', 'live'])) if verbosity == 1: prediction.performance(data.region_isnotin(['test', 'live'])) return prediction
def test_prediction_add(): "add two predictions together" d = micro_data() p1 = Prediction() p2 = Prediction() d1 = d['train'] d2 = d['tournament'] rs = np.random.RandomState(0) yhat1 = 0.2 * (rs.rand(len(d1)) - 0.5) + 0.5 yhat2 = 0.2 * (rs.rand(len(d2)) - 0.5) + 0.5 p1.append(d1.ids, yhat1) p2.append(d2.ids, yhat2) p = p1 + p2 # just make sure that it runs assert_raises(IndexError, p.__add__, p1) assert_raises(IndexError, p1.__add__, p1)