def test_is_correct(self): with self.subTest("when higher-scoring team is predicted winner"): prediction = Prediction( match=self.match, ml_model=self.ml_model, predicted_winner=self.home_team, predicted_margin=50, ) self.assertTrue(prediction.is_correct) with self.subTest("when lower-scoring team is predicted winner"): prediction = Prediction( match=self.match, ml_model=self.ml_model, predicted_winner=self.away_team, predicted_margin=50, ) self.assertFalse(prediction.is_correct) with self.subTest("when match is a draw"): self.match.teammatch_set.update(score=100) prediction = Prediction( match=self.match, ml_model=self.ml_model, predicted_winner=self.away_team, predicted_margin=50, ) self.assertTrue(prediction.is_correct)
def _make_predictions(self) -> None: predictions = self.data_importer.fetch_match_predictions( self._year_range, ml_models=[self.ml_model], train_models=True ) for pred in predictions: Prediction.update_or_create_from_raw_data(pred) if self.verbose == 1: print("\nPredictions saved!")
def __make_predictions(self, year: int, round_number: int) -> None: predictions = self.data_importer.fetch_prediction_data( (year, year + 1), round_number=round_number, ml_models=self.ml_models ) home_away_df = pivot_team_matches_to_matches(predictions) for pred in home_away_df.to_dict("records"): Prediction.update_or_create_from_data(pred) if self.verbose == 1: print("Predictions saved!\n")
def test_validation(self): with self.subTest("when predicted margin is negative"): prediction = Prediction( match=self.match, ml_model=self.ml_model, predicted_winner=self.away_team, predicted_margin=-50, ) with self.assertRaises(ValidationError): prediction.full_clean()
def main(): """One-off script to import Footy Tipper predictions to the DB.""" with open( os.path.join(PROJECT_PATH, "data/footy_tipper_predictions_2018.json"), "r", encoding="utf-8", ) as file: predictions = json.load(file) for pred in predictions: Prediction.update_or_create_from_raw_data(pred, future_only=False)
def __make_predictions(self, year_range: Tuple[int, int]) -> None: predictions = self.data_importer.fetch_prediction_data( year_range, verbose=self.verbose ) home_away_df = pivot_team_matches_to_matches(predictions) for pred in home_away_df.to_dict("records"): Prediction.update_or_create_from_data(pred) if self.verbose == 1: print("\nPredictions saved!")
def update_future_match_predictions( predictions: List[CleanPredictionData]) -> None: """Update or create prediction records for upcoming matches.""" future_match_count = Match.objects.filter( start_date_time__gt=timezone.now()).count() assert future_match_count > 0, ( "No future matches exist in the DB. Try updating fixture data, " "then updating predictions again.") for pred in predictions: Prediction.update_or_create_from_raw_data(pred, future_only=True)
def test_clean(self): with self.subTest( "when predicted margin and win probability are None"): prediction = Prediction( match=self.match, ml_model=self.ml_model, predicted_winner=self.away_team, predicted_margin=None, predicted_win_probability=None, ) with self.assertRaisesMessage( ValidationError, "Prediction must have a predicted_margin or predicted_win_probability.", ): prediction.clean() with self.subTest( "when predicted margin and win probability are both numbers"): prediction = Prediction( match=self.match, ml_model=self.ml_model, predicted_winner=self.away_team, predicted_margin=23, predicted_win_probability=0.23, ) with self.assertRaisesMessage( ValidationError, "Prediction cannot have both a predicted_margin and " "predicted_win_probability.", ): prediction.clean()
def __update_predictions_correctness(match: Match) -> None: for prediction in match.prediction_set.all(): prediction.is_correct = Prediction.calculate_whether_correct( match, prediction.predicted_winner ) prediction.clean() prediction.save()
def main(): for prediction in Prediction.objects.select_related( "match", "predicted_winner" ).all(): prediction.is_correct = Prediction.calculate_whether_correct( prediction.match, prediction.predicted_winner ) prediction.save()
def __build_match_prediction(ml_model_record: MLModel, prediction_data: pd.DataFrame, match: Match) -> Optional[Prediction]: home_team = match.teammatch_set.get(at_home=True).team away_team = match.teammatch_set.get(at_home=False).team predicted_home_margin = prediction_data.xs( home_team.name, level=0)["predicted_margin"].iloc[0] predicted_away_margin = prediction_data.xs( away_team.name, level=0)["predicted_margin"].iloc[0] # predicted_margin is always positive as its always associated with predicted_winner predicted_margin = np.mean( np.abs([predicted_home_margin, predicted_away_margin])) if predicted_home_margin > predicted_away_margin: predicted_winner = home_team elif predicted_away_margin > predicted_home_margin: predicted_winner = away_team else: raise ValueError( "Predicted home and away margins are equal, which is basically impossible, " "so figure out what's going on:\n" f"home_team = {home_team.name}\n" f"away_team = {away_team.name}\n" f"data = {prediction_data}") prediction_attributes = {"match": match, "ml_model": ml_model_record} try: prediction = Prediction.objects.get(**prediction_attributes) prediction.predicted_margin = predicted_margin prediction.predicted_winner = predicted_winner prediction.clean_fields() prediction.clean() prediction.save() return None except Prediction.DoesNotExist: prediction = Prediction( predicted_margin=predicted_margin, predicted_winner=predicted_winner, **prediction_attributes, ) return prediction
def setUp(self): self.maxDiff = None self.client = Client(schema) home_team = Team(name="Richmond") home_team.save() away_team = Team(name="Melbourne") away_team.save() match_datetime = timezone.make_aware(datetime(2018, 5, 5)) new_match = Match(start_date_time=match_datetime, round_number=5) new_match.save() match_datetime = timezone.make_aware(datetime(2014, 5, 5)) old_match = Match(start_date_time=match_datetime, round_number=7) old_match.save() (TeamMatch(team=home_team, match=new_match, at_home=True, score=150).save()) (TeamMatch(team=away_team, match=new_match, at_home=False, score=100).save()) (TeamMatch(team=home_team, match=old_match, at_home=True, score=150).save()) (TeamMatch(team=away_team, match=old_match, at_home=False, score=100).save()) ml_model = MLModel(name="test_model") ml_model.save() new_prediction = Prediction( match=new_match, ml_model=ml_model, predicted_winner=home_team, predicted_margin=50, ) new_prediction.save() old_prediction = Prediction( match=old_match, ml_model=ml_model, predicted_winner=away_team, predicted_margin=50, ) old_prediction.save()
def test_clean(self): with self.subTest("when predicted margin rounds to 0"): prediction = Prediction( match=self.match, ml_model=self.ml_model, predicted_winner=self.away_team, predicted_margin=0.2, ) prediction.clean() self.assertEqual(1, prediction.predicted_margin) with self.subTest("when predicted margin is a float"): prediction = Prediction( match=self.match, ml_model=self.ml_model, predicted_winner=self.away_team, predicted_margin=65.7, ) prediction.clean() self.assertEqual(66, prediction.predicted_margin)
def __build_match_prediction( ml_model_record: MLModel, prediction_data: pd.DataFrame, match: Match ) -> Prediction: home_team = match.teammatch_set.get(at_home=True).team away_team = match.teammatch_set.get(at_home=False).team match_prediction = prediction_data.loc[ ([home_team.name, away_team.name], match.year, match.round_number), "predicted_margin", ] predicted_home_margin = match_prediction.loc[home_team.name].iloc[0] predicted_away_margin = match_prediction.loc[away_team.name].iloc[0] # predicted_margin is always positive as its always associated with predicted_winner predicted_margin = match_prediction.abs().mean() if predicted_home_margin > predicted_away_margin: predicted_winner = home_team elif predicted_away_margin > predicted_home_margin: predicted_winner = away_team else: raise ValueError( "Predicted home and away margins are equal, which is basically impossible, " "so figure out what's going on:\n" f"home_team = {home_team.name}\n" f"away_team = {away_team.name}\n" "data =" f"{match_prediction}" ) prediction = Prediction( match=match, ml_model=ml_model_record, predicted_margin=predicted_margin, predicted_winner=predicted_winner, ) prediction.clean_fields() prediction.clean() return prediction
def test_convert_data_to_record(self): data = fake_prediction_data(self.match, ml_model_name=self.ml_model.name) home_away_df = pivot_team_matches_to_matches(pd.DataFrame(data)) self.assertEqual(Prediction.objects.count(), 0) Prediction.update_or_create_from_data(home_away_df.to_dict("records")[0]) self.assertEqual(Prediction.objects.count(), 1) with self.subTest("when prediction record already exists"): predicted_margin = 100 home_away_df.loc[:, "home_predicted_margin"] = predicted_margin home_away_df.loc[:, "away_predicted_margin"] = -predicted_margin Prediction.update_or_create_from_data(home_away_df.to_dict("records")[0]) self.assertEqual(Prediction.objects.count(), 1) prediction = Prediction.objects.first() self.assertEqual(prediction.predicted_margin, predicted_margin) # Regression tests for bug that caused update_or_create_from_data # to select wrong team as predicted_winner when predicted margin # was greater than away team's predicted winning margin with self.subTest( "when predicted margins are skewed with large home losing margin" ): predicted_winning_margin = 100 predicted_losing_margin = -200 home_away_df.loc[:, "home_predicted_margin"] = predicted_losing_margin home_away_df.loc[:, "away_predicted_margin"] = predicted_winning_margin Prediction.update_or_create_from_data(home_away_df.to_dict("records")[0]) prediction = Prediction.objects.first() self.assertEqual(prediction.predicted_margin, 150) self.assertEqual( home_away_df["away_team"].iloc[0], prediction.predicted_winner.name ) with self.subTest( "when predicted margins are skewed with large away losing margin" ): predicted_winning_margin = 100 predicted_losing_margin = -200 home_away_df.loc[:, "home_predicted_margin"] = predicted_winning_margin home_away_df.loc[:, "away_predicted_margin"] = predicted_losing_margin Prediction.update_or_create_from_data(home_away_df.to_dict("records")[0]) prediction = Prediction.objects.first() self.assertEqual(prediction.predicted_margin, 150) self.assertEqual( home_away_df["home_team"].iloc[0], prediction.predicted_winner.name ) with self.subTest("when predicted margins are less than 0.5"): predicted_winning_margin = 0.4 predicted_losing_margin = -0.4 home_away_df.loc[:, "home_predicted_margin"] = predicted_winning_margin home_away_df.loc[:, "away_predicted_margin"] = predicted_losing_margin Prediction.update_or_create_from_data(home_away_df.to_dict("records")[0]) prediction = Prediction.objects.first() self.assertEqual(prediction.predicted_margin, 1) self.assertEqual( home_away_df["home_team"].iloc[0], prediction.predicted_winner.name )
def test_update_or_create_from_raw_data(self): data = data_factories.fake_prediction_data( self.match, ml_model_name=self.ml_model.name) with self.subTest("when future_only is True"): with self.subTest("and the match has already been played"): self.assertLess(self.match.start_date_time, timezone.now()) self.assertEqual(Prediction.objects.count(), 0) Prediction.update_or_create_from_raw_data( data.to_dict("records")[0], future_only=True) # It doesn't create a prediction self.assertEqual(Prediction.objects.count(), 0) with self.subTest("and the match hasn't been played yet"): future_match = Match.objects.create( start_date_time=(timezone.now() + timedelta(days=1)), round_number=5, venue="Corporate Stadium", ) future_home_team = Team.objects.create(name="Collingwood") future_away_team = Team.objects.create(name="GWS") future_match.teammatch_set.create(team=future_home_team, at_home=True, score=0) future_match.teammatch_set.create(team=future_away_team, at_home=False, score=0) future_data = data_factories.fake_prediction_data( future_match, ml_model_name=self.ml_model.name) Prediction.update_or_create_from_raw_data( future_data.to_dict("records")[0], future_only=True) # It creates a prediction self.assertEqual(Prediction.objects.count(), 1) Prediction.objects.all().delete() self.assertEqual(Prediction.objects.count(), 0) Prediction.update_or_create_from_raw_data(data.to_dict("records")[0]) self.assertEqual(Prediction.objects.count(), 1) prediction = Prediction.objects.first() self.assertIsInstance(prediction.predicted_margin, float) self.assertIsNone(prediction.predicted_win_probability) with self.subTest("when prediction record already exists"): predicted_margin = 100 data.loc[:, "home_predicted_margin"] = predicted_margin data.loc[:, "away_predicted_margin"] = -predicted_margin Prediction.update_or_create_from_raw_data( data.to_dict("records")[0]) self.assertEqual(Prediction.objects.count(), 1) prediction = Prediction.objects.first() self.assertEqual(prediction.predicted_margin, predicted_margin) # Regression tests for bug that caused update_or_create_from_raw_data # to select wrong team as predicted_winner when predicted margin # was greater than away team's predicted winning margin with self.subTest( "when predicted margins are skewed with large home losing margin" ): predicted_winning_margin = 100 predicted_losing_margin = -200 data.loc[:, "home_predicted_margin"] = predicted_losing_margin data.loc[:, "away_predicted_margin"] = predicted_winning_margin Prediction.update_or_create_from_raw_data( data.to_dict("records")[0]) prediction = Prediction.objects.first() self.assertEqual(prediction.predicted_margin, 150) self.assertEqual(data["away_team"].iloc[0], prediction.predicted_winner.name) with self.subTest( "when predicted margins are skewed with large away losing margin" ): predicted_winning_margin = 100 predicted_losing_margin = -200 data.loc[:, "home_predicted_margin"] = predicted_winning_margin data.loc[:, "away_predicted_margin"] = predicted_losing_margin Prediction.update_or_create_from_raw_data( data.to_dict("records")[0]) prediction = Prediction.objects.first() self.assertEqual(prediction.predicted_margin, 150) self.assertEqual(data["home_team"].iloc[0], prediction.predicted_winner.name) with self.subTest("when predicted margins are less than 0.5"): predicted_winning_margin = 0.4 predicted_losing_margin = -0.4 data.loc[:, "home_predicted_margin"] = predicted_winning_margin data.loc[:, "away_predicted_margin"] = predicted_losing_margin Prediction.update_or_create_from_raw_data( data.to_dict("records")[0]) prediction = Prediction.objects.first() self.assertEqual(prediction.predicted_margin, 0.4) self.assertEqual(data["home_team"].iloc[0], prediction.predicted_winner.name) with self.subTest("when predicted margins are both positive"): predicted_winning_margin = 20.6 predicted_losing_margin = 10.6 data.loc[:, "home_predicted_margin"] = predicted_winning_margin data.loc[:, "away_predicted_margin"] = predicted_losing_margin Prediction.update_or_create_from_raw_data( data.to_dict("records")[0]) prediction = Prediction.objects.first() self.assertEqual(prediction.predicted_margin, 10) self.assertEqual(data["home_team"].iloc[0], prediction.predicted_winner.name) with self.subTest("when predicted margins are both negative"): predicted_winning_margin = -10.6 predicted_losing_margin = -20.6 data.loc[:, "home_predicted_margin"] = predicted_winning_margin data.loc[:, "away_predicted_margin"] = predicted_losing_margin Prediction.update_or_create_from_raw_data( data.to_dict("records")[0]) prediction = Prediction.objects.first() self.assertEqual(prediction.predicted_margin, 10) self.assertEqual(data["home_team"].iloc[0], prediction.predicted_winner.name) with self.subTest("when the calculated predicted_margin rounds up"): predicted_winning_margin = 5.8 predicted_losing_margin = -5.7 data.loc[:, "home_predicted_margin"] = predicted_winning_margin data.loc[:, "away_predicted_margin"] = predicted_losing_margin Prediction.update_or_create_from_raw_data( data.to_dict("records")[0]) prediction = Prediction.objects.first() self.assertEqual(prediction.predicted_margin, 5.75) self.assertEqual(data["home_team"].iloc[0], prediction.predicted_winner.name) proba_data = data_factories.fake_prediction_data( self.match, ml_model_name=self.ml_model.name, predict_margin=False) with self.subTest("when predicting win probability"): Prediction.update_or_create_from_raw_data( proba_data.to_dict("records")[0]) prediction = Prediction.objects.first() self.assertIsInstance(prediction.predicted_win_probability, float) self.assertIsNone(prediction.predicted_margin) with self.subTest("when '*_predicted_win_probability' is missing"): Prediction.update_or_create_from_raw_data( data.drop( [ "home_predicted_win_probability", "away_predicted_win_probability", ], axis=1, ).to_dict("records")[0]) self.assertIsNotNone(Prediction.objects.first()) with self.subTest("when '*_predicted_margin' is missing"): Prediction.update_or_create_from_raw_data( proba_data.drop( ["home_predicted_margin", "away_predicted_margin"], axis=1).to_dict("records")[0]) self.assertIsNotNone(Prediction.objects.first())
def test_update_correctness(self): with self.subTest("when higher-scoring team is predicted winner"): prediction = Prediction( match=self.match, ml_model=self.ml_model, predicted_winner=self.home_team, predicted_margin=50, ) prediction.update_correctness() self.assertTrue(prediction.is_correct) with self.subTest("when lower-scoring team is predicted winner"): prediction = Prediction( match=self.match, ml_model=self.ml_model, predicted_winner=self.away_team, predicted_margin=50, ) prediction.update_correctness() self.assertFalse(prediction.is_correct) with self.subTest("when match is a draw"): self.match.teammatch_set.update(score=100) prediction = Prediction( match=self.match, ml_model=self.ml_model, predicted_winner=self.away_team, predicted_margin=50, ) prediction.update_correctness() self.assertTrue(prediction.is_correct) with self.subTest("when match hasn't been played yet"): match_datetime = timezone.make_aware(datetime.today() + timedelta(days=5)) unplayed_match = Match.objects.create( start_date_time=match_datetime, round_number=5, venue="Corporate Stadium", ) prediction = Prediction( match=unplayed_match, ml_model=self.ml_model, predicted_winner=self.away_team, predicted_margin=50, ) prediction.update_correctness() self.assertEqual(prediction.is_correct, None)