def evaluate_fit(df, fit_fn, predict_fn, repeat=10): """ Evaluate a model instance (fitter) with cross-validation. Arguments: df: A `pandas` dataframe containing the cases to test the model on. These cases are shuffled between cross-validation runs. fit_fn: A one-argument function that takes in training data as a `pandas` dataframe and returns a fitted model (see `evaluation.cross_validate`). predict_fn: A two-argument function that takes in the fitted model instance and a test case and returns a prediction. repeat: The number of times to repeat cross-validation. Returns: score: The mean Brier score for the category between cross-validation runs. errors: A list of all signed error values measured (across cross- validation runs). """ subscores, errors = [], [] for iteration in range(repeat): df = df.sample(n=len(df)) forecast = cross_validate(df, fit_fn, predict_fn) predictions = [forecast.ix[index].prediction for index in df.index] outcomes = df.survived.as_matrix().astype(int) subscores.append(compute_brier_score(predictions, outcomes)) errors += list(forecast.prediction - outcomes) return np.mean(subscores), errors
def test_brier_score(self): self.assertAlmostEqual(compute_brier_score([1, 0], [0.5, 0.5]), 0.25) self.assertAlmostEqual(compute_brier_score([1, 0, 0], [0.75, 0.25, 0.25]), 0.0625)