def test_multiple_leagues_multiple_seasons(self): game_list = FootballDataReader.game_list_by_url(url=BaseTestCase.base_url() + "/E0-14.csv", league_name="e0_tests", season='E0-14') other_league = FootballDataReader.game_list_by_url(url=BaseTestCase.base_url() + "/B1-14.csv", league_name="e0_tests", season='B1-14') second_season = FootballDataReader.game_list_by_url(url=BaseTestCase.base_url() + "/E0-15.csv", league_name="e0_tests", season='E0-15') game_list.games_df = pandas.concat([game_list.games_df, second_season.games_df, other_league.games_df],ignore_index=True) result = LeagueGoalsDifferenceNormalizedGenerator().calculate_feature(game_list, ignore_cache=True) self.assertEqual(round(result.games_df.loc[0, "LeagueGoalsDifferenceNormalizedGenerator"], 2), 0.38) self.assertEqual(round(result.games_df.loc[169, "LeagueGoalsDifferenceNormalizedGenerator"], 3), 0.377)
def all_game_lists(csv_file_names, base_csv_folder_url): all_games = GameList("All Games", pd.DataFrame()) for current_league_name in csv_file_names: print("- Loading CSVs for league: " + current_league_name) f = open(base_csv_folder_url + current_league_name, 'r') for line in f: print(line, end='') temp_game_list = FootballDataReader.game_list_by_url( url=line, league_name=current_league_name) if all_games.games_df.empty: all_games.games_df = temp_game_list.games_df else: all_games.games_df = pd.concat( [temp_game_list.games_df, all_games.games_df], ignore_index=True) print("\n") return all_games
def test_calculate_mamy_games_with_period(self): BaseTestCase.clean_cache_files(self, ".*draw_perce.*dat") # Arrange game_list = FootballDataReader.game_list_by_url(url=BaseTestCase.base_url() + "/draw_percentage.csv", league_name="tests") draw_percentage = DrawsPercentageGenerator(2).calculate_feature(game_list, ignore_cache=True) self.assertEqual(draw_percentage.games_df.loc[0, "DrawsPercentageGeneratorPeriod"], -1) self.assertEqual(draw_percentage.games_df.loc[1, "DrawsPercentageGeneratorPeriod"], 0) self.assertEqual(draw_percentage.games_df.loc[2, "DrawsPercentageGeneratorPeriod"], 0) self.assertEqual(draw_percentage.games_df.loc[3, "DrawsPercentageGeneratorPeriod"], 0.5) self.assertEqual(draw_percentage.games_df.loc[4, "DrawsPercentageGeneratorPeriod"], 1)
def test_calculate_feature__many_games_one_season(self): # Arrange game_list = FootballDataReader.game_list_by_url(url=BaseTestCase.base_url() + "/many_games_low_scoring.csv", league_name="many_games") LeaguePointsGenerator().calculate_feature(game_list=game_list, ignore_cache=True) # Act LowScoringTeamsGenerator().calculate_feature(game_list, ignore_cache=True) # Assert self.assertEqual(game_list.games_df.loc[0, "LowScoringTeamsGenerator"], (1+1/3)) self.assertEqual(game_list.games_df.loc[5, "LowScoringTeamsGenerator"], (((1+1/3)*1+3.5)/3 + ((1+1/3)*1+0.5)/3)/2)
def test_calculate_feature__single_game(self): # Arrange game_list = FootballDataReader.game_list_by_url( url=BaseTestCase.base_url() + "/single_game.csv", league_name="tests") # Act game_list_with_points = LeaguePointsGenerator().calculate_feature( game_list=game_list, ignore_cache=True) # Assert self.assertTrue(bool(game_list_with_points)) self.assertEqual( game_list_with_points.games_df.loc[0, "AwayTeamLeaguePoints"], 0) self.assertEqual( game_list_with_points.games_df.loc[0, "HomeTeamLeaguePoints"], 0) self.assertEqual( game_list_with_points.games_df.loc[0, "HomeTeamGamesPlayedInSeason"], 0) self.assertEqual( game_list_with_points.games_df.loc[0, "AwayTeamGamesPlayedInSeason"], 0) self.assertEqual( game_list_with_points.games_df.loc[0, "HomeTeamGoalsScoredInSeason"], 0) self.assertEqual( game_list_with_points.games_df.loc[0, "AwayTeamGoalsScoredInSeason"], 0) self.assertEqual( game_list_with_points.games_df.loc[0, "MaxGamesPerSeason"], 1) self.assertEqual(len(game_list_with_points.games_df.index), 1)
def test_single_league_single_season(self): game_list = FootballDataReader.game_list_by_url(url=BaseTestCase.base_url() + "/E0-14.csv", league_name="e0_tests") result = LeagueGoalsDifferenceNormalizedGenerator().calculate_feature(game_list, ignore_cache=True) self.assertEqual(round(result.games_df.loc[0, "LeagueGoalsDifferenceNormalizedGenerator"], 2), 0.38) self.assertEqual(round(result.games_df.loc[169, "LeagueGoalsDifferenceNormalizedGenerator"], 3), 0.377)
def test_empty_df(self): no_games = FootballDataReader.game_list_by_url(url=BaseTestCase.base_url() + "/E0-14.csv", league_name="e0_tests") no_games.games_df = pandas.DataFrame(columns=no_games.games_df.columns) LeagueGoalsDifferenceNormalizedGenerator().calculate_feature(no_games)
def test_calculate_feature__many_games(self): BaseTestCase.clean_cache_files(self, ".*many_games.*dat") # Arrange game_list = FootballDataReader.game_list_by_url( url=BaseTestCase.base_url() + "/many_games.csv", league_name="tests") # Act game_list_with_points = LeaguePointsGenerator().calculate_feature( game_list=game_list, ignore_cache=True) game_list_with_points = PointsDifferenceGenerator().calculate_feature( game_list=game_list_with_points, ignore_cache=True) game_list_with_points = DistanceFromTopGenerator().calculate_feature( game_list=game_list_with_points, ignore_cache=True) # Assert self.assertTrue(bool(game_list_with_points)) self.assertEqual( game_list_with_points.games_df.loc[0, "AwayTeamLeaguePoints"], 0) self.assertEqual( game_list_with_points.games_df.loc[0, "HomeTeamLeaguePoints"], 0) self.assertEqual( game_list_with_points.games_df.loc[1, "AwayTeamLeaguePoints"], 0) self.assertEqual( game_list_with_points.games_df.loc[1, "HomeTeamLeaguePoints"], 3) self.assertEqual( game_list_with_points.games_df.loc[2, "AwayTeamLeaguePoints"], 0) self.assertEqual( game_list_with_points.games_df.loc[2, "HomeTeamLeaguePoints"], 0) self.assertEqual( game_list_with_points.games_df.loc[3, "AwayTeamLeaguePoints"], 6) self.assertEqual( game_list_with_points.games_df.loc[3, "HomeTeamLeaguePoints"], 1) self.assertEqual( game_list_with_points.games_df.loc[0, "HomeTeamGamesPlayedInSeason"], 0) self.assertEqual( game_list_with_points.games_df.loc[0, "AwayTeamGamesPlayedInSeason"], 0) self.assertEqual( game_list_with_points.games_df.loc[0, "HomeTeamGoalsScoredInSeason"], 0) self.assertEqual( game_list_with_points.games_df.loc[0, "AwayTeamGoalsScoredInSeason"], 0) self.assertEqual( game_list_with_points.games_df.loc[0, "MaxGamesPerSeason"], 3) self.assertEqual( game_list_with_points.games_df.loc[1, "HomeTeamGamesPlayedInSeason"], 1) self.assertEqual( game_list_with_points.games_df.loc[1, "AwayTeamGamesPlayedInSeason"], 0) self.assertEqual( game_list_with_points.games_df.loc[1, "HomeTeamGoalsScoredInSeason"], 2) self.assertEqual( game_list_with_points.games_df.loc[1, "AwayTeamGoalsScoredInSeason"], 0) self.assertEqual( game_list_with_points.games_df.loc[1, "MaxGamesPerSeason"], 3) self.assertEqual( game_list_with_points.games_df.loc[3, "HomeTeamGamesPlayedInSeason"], 2) self.assertEqual( game_list_with_points.games_df.loc[3, "AwayTeamGamesPlayedInSeason"], 2) self.assertEqual( game_list_with_points.games_df.loc[3, "HomeTeamGoalsScoredInSeason"], 6) self.assertEqual( game_list_with_points.games_df.loc[3, "AwayTeamGoalsScoredInSeason"], 4) self.assertEqual( game_list_with_points.games_df.loc[3, "MaxGamesPerSeason"], 3) self.assertEqual(len(game_list_with_points.games_df.index), 4) draws = [False, False, True, True] for index in range(len(draws)): self.assertEqual(draws[index], game_list_with_points.games_df["Draw"][index]) # todo move to a different test class self.assertEqual( game_list_with_points.games_df.loc[3, "LeaguePointsDiff"], 5) self.assertEqual( game_list_with_points.games_df.loc[3, "DistanceFromTop"], 1.25)
def test_calculate_feature__many_games_different_seasons(self): BaseTestCase.clean_cache_files(self, ".*many_games.*dat") game_list = FootballDataReader.game_list_by_url( url=BaseTestCase.base_url() + "/many_games.csv", league_name="tests", season="0910") game_list1 = FootballDataReader.game_list_by_url( url=BaseTestCase.base_url() + "/many_games.csv", league_name="tests", season="1011") game_list.games_df = pd.concat( [game_list.games_df, game_list1.games_df], ignore_index=True) # Act game_list_with_points = LeaguePointsGenerator().calculate_feature( game_list=game_list, ignore_cache=True) # Assert self.assertTrue(bool(game_list_with_points)) self.assertEqual( game_list_with_points.games_df.loc[0, "AwayTeamLeaguePoints"], 0) self.assertEqual( game_list_with_points.games_df.loc[0, "HomeTeamLeaguePoints"], 0) self.assertEqual( game_list_with_points.games_df.loc[1, "AwayTeamLeaguePoints"], 0) self.assertEqual( game_list_with_points.games_df.loc[1, "HomeTeamLeaguePoints"], 3) self.assertEqual( game_list_with_points.games_df.loc[2, "AwayTeamLeaguePoints"], 0) self.assertEqual( game_list_with_points.games_df.loc[2, "HomeTeamLeaguePoints"], 0) self.assertEqual( game_list_with_points.games_df.loc[3, "AwayTeamLeaguePoints"], 6) self.assertEqual( game_list_with_points.games_df.loc[3, "HomeTeamLeaguePoints"], 1) self.assertEqual( game_list_with_points.games_df.loc[7, "AwayTeamLeaguePoints"], 6) self.assertEqual( game_list_with_points.games_df.loc[7, "HomeTeamLeaguePoints"], 1) self.assertEqual(len(game_list_with_points.games_df.index), 8) self.assertEqual( game_list_with_points.games_df.loc[0, "HomeTeamGamesPlayedInSeason"], 0) self.assertEqual( game_list_with_points.games_df.loc[0, "AwayTeamGamesPlayedInSeason"], 0) self.assertEqual( game_list_with_points.games_df.loc[0, "HomeTeamGoalsScoredInSeason"], 0) self.assertEqual( game_list_with_points.games_df.loc[0, "AwayTeamGoalsScoredInSeason"], 0) self.assertEqual( game_list_with_points.games_df.loc[0, "MaxGamesPerSeason"], 3) self.assertEqual( game_list_with_points.games_df.loc[3, "HomeTeamGamesPlayedInSeason"], 2) self.assertEqual( game_list_with_points.games_df.loc[3, "AwayTeamGamesPlayedInSeason"], 2) self.assertEqual( game_list_with_points.games_df.loc[3, "HomeTeamGoalsScoredInSeason"], 6) self.assertEqual( game_list_with_points.games_df.loc[3, "AwayTeamGoalsScoredInSeason"], 4) self.assertEqual( game_list_with_points.games_df.loc[3, "MaxGamesPerSeason"], 3) self.assertEqual( game_list_with_points.games_df.loc[7, "HomeTeamGamesPlayedInSeason"], 2) self.assertEqual( game_list_with_points.games_df.loc[7, "AwayTeamGamesPlayedInSeason"], 2) self.assertEqual( game_list_with_points.games_df.loc[7, "HomeTeamGoalsScoredInSeason"], 6) self.assertEqual( game_list_with_points.games_df.loc[7, "AwayTeamGoalsScoredInSeason"], 4) self.assertEqual( game_list_with_points.games_df.loc[7, "MaxGamesPerSeason"], 3)