def setup_method(self, *args, **kwargs): self.results = { 'game': 2, 'boxscore_index': '2017-11-14-21-kansas', 'date': 'Tue, Nov 14, 2017', 'time': '9:30 pm/est', 'datetime': datetime(2017, 11, 14, 21, 30), 'type': REGULAR_SEASON, 'location': NEUTRAL, 'opponent_abbr': 'kentucky', 'opponent_name': 'Kentucky', 'opponent_rank': 7, 'opponent_conference': 'SEC', 'result': WIN, 'points_for': 65, 'points_against': 61, 'overtimes': 0, 'season_wins': 2, 'season_losses': 0, 'streak': 'W 2', 'arena': 'United Center' } flexmock(utils) \ .should_receive('_todays_date') \ .and_return(MockDateTime(YEAR, MONTH)) flexmock(Boxscore) \ .should_receive('_parse_game_data') \ .and_return(None) flexmock(Boxscore) \ .should_receive('dataframe') \ .and_return(pd.DataFrame([{'key': 'value'}])) self.schedule = Schedule('KANSAS')
def _pull_team_stats(self): for year in range(2015, 2021): for team in self.teams: try: self.df = self.df.append(Schedule(team, year).dataframe_extended) except HTTPError: print("HTTP Error")
def check_if_game_today(): game_today_bool = False now = datetime.datetime.now() # date_time = now.strftime("%Y-%m-%d %H:%M:%S") now_date_time = now.strftime("%Y-%m-%d") print("Current date:") print(now_date_time) print() sdsu_schedule = Schedule('san-diego-state') for game in sdsu_schedule: game_date = game.datetime.strftime("%Y-%m-%d") game_hour = game.datetime.strftime("%H") game_hour_int = int(game_hour) game_hour_pst = game_hour_int - 3 if (now_date_time == game_date): print("Hey there's a game today!") print(game.datetime) print(game_date) print(game_hour_pst) print(game.opponent_name) game_today_bool = True break return game_today_bool
def test_empty_page_return_no_games(self): flexmock(utils) \ .should_receive('_no_data_found') \ .once() flexmock(utils) \ .should_receive('_get_stats_table') \ .and_return(None) schedule = Schedule('KANSAS') assert len(schedule) == 0
def test_no_dataframes_extended_returns_none(self): flexmock(Schedule) \ .should_receive('_pull_schedule') \ .and_return(None) schedule = Schedule('PURDUE') fake_game = flexmock(dataframe_extended=None) fake_games = PropertyMock(return_value=fake_game) type(schedule).__iter__ = fake_games assert schedule.dataframe_extended is None
def isNeutral(team, date): # team = teamNameConversion(team) team = nameConversion(team) # print(team) try: schedule = Schedule(team) neutral1 = "" for game in schedule: gameDate = game.date sameDate = dateTransform(gameDate, date) if sameDate: neutral1 = game.location print("same date!") if neutral1 == "Neutral": return True except: print("some error happened, possibly 404 not found") return False
def test_invalid_default_year_reverts_to_previous_year(self, *args, **kwargs): results = { 'game': 2, 'boxscore_index': '2017-11-14-21-kansas', 'date': 'Tue, Nov 14, 2017', 'time': '9:30 pm/est', 'datetime': datetime(2017, 11, 14, 21, 30), 'type': REGULAR_SEASON, 'location': NEUTRAL, 'opponent_abbr': 'kentucky', 'opponent_name': 'Kentucky', 'opponent_rank': 7, 'opponent_conference': 'SEC', 'result': WIN, 'points_for': 65, 'points_against': 61, 'overtimes': 0, 'season_wins': 2, 'season_losses': 0, 'streak': 'W 2', 'arena': 'United Center' } flexmock(utils) \ .should_receive('_find_year_for_season') \ .and_return(2018) flexmock(Boxscore) \ .should_receive('_parse_game_data') \ .and_return(None) flexmock(Boxscore) \ .should_receive('dataframe') \ .and_return(pd.DataFrame([{'key': 'value'}])) schedule = Schedule('KANSAS') for attribute, value in results.items(): assert getattr(schedule[1], attribute) == value
def get_remaining_schedule(conference): # remaining_schedule is a list of lists with the inner list being # the home first, followed by the away team (ie. [home, away]) remaining_schedule = [] current_records = {} conference_name_short = conference['name'].replace(' Conference', '') for team in teams_list(conference): schedule = Schedule(team) conference_wins = get_conference_wins(team) current_records[team] = conference_wins for game in schedule: # Find all conference matchups that the team hasn't played yet. if game.opponent_abbr in teams_list(conference) and \ not game.points_for: if game.location == 'AWAY': remaining_schedule.append([game.opponent_abbr, team]) else: remaining_schedule.append([team, game.opponent_abbr]) remaining_schedule.sort() # Return a list of non-duplicate matches schedule = list(s for s, _ in itertools.groupby(remaining_schedule)) return schedule, current_records
class TestNCAABSchedule: @mock.patch('requests.get', side_effect=mock_pyquery) def setup_method(self, *args, **kwargs): self.results = { 'game': 2, 'boxscore_index': '2017-11-14-21-kansas', 'date': 'Tue, Nov 14, 2017', 'time': '9:30 pm/est', 'datetime': datetime(2017, 11, 14, 21, 30), 'type': REGULAR_SEASON, 'location': NEUTRAL, 'opponent_abbr': 'kentucky', 'opponent_name': 'Kentucky', 'opponent_rank': 7, 'opponent_conference': 'SEC', 'result': WIN, 'points_for': 65, 'points_against': 61, 'overtimes': 0, 'season_wins': 2, 'season_losses': 0, 'streak': 'W 2', 'arena': 'United Center' } flexmock(utils) \ .should_receive('_todays_date') \ .and_return(MockDateTime(YEAR, MONTH)) flexmock(Boxscore) \ .should_receive('_parse_game_data') \ .and_return(None) flexmock(Boxscore) \ .should_receive('dataframe') \ .and_return(pd.DataFrame([{'key': 'value'}])) self.schedule = Schedule('KANSAS') def test_ncaab_schedule_returns_correct_number_of_games(self): assert len(self.schedule) == NUM_GAMES_IN_SCHEDULE def test_ncaab_schedule_returns_requested_match_from_index(self): match_two = self.schedule[1] for attribute, value in self.results.items(): assert getattr(match_two, attribute) == value def test_ncaab_schedule_returns_requested_match_from_date(self): match_two = self.schedule(datetime(2017, 11, 14)) for attribute, value in self.results.items(): assert getattr(match_two, attribute) == value def test_ncaab_schedule_dataframe_returns_dataframe(self): df = pd.DataFrame([self.results], index=['KANSAS']) match_two = self.schedule[1] # Pandas doesn't natively allow comparisons of DataFrames. # Concatenating the two DataFrames (the one generated during the test # and the expected one above) and dropping duplicate rows leaves only # the rows that are unique between the two frames. This allows a quick # check of the DataFrame to see if it is empty - if so, all rows are # duplicates, and they are equal. frames = [df, match_two.dataframe] df1 = pd.concat(frames).drop_duplicates(keep=False) assert df1.empty def test_ncaab_schedule_dataframe_extended_returns_dataframe(self): df = pd.DataFrame([{'key': 'value'}]) result = self.schedule[1].dataframe_extended frames = [df, result] df1 = pd.concat(frames).drop_duplicates(keep=False) assert df1.empty def test_ncaab_schedule_all_dataframe_returns_dataframe(self): result = self.schedule.dataframe.drop_duplicates(keep=False) assert len(result) == NUM_GAMES_IN_SCHEDULE assert set(result.columns.values) == set(self.results.keys()) def test_ncaab_schedule_all_dataframe_extended_returns_dataframe(self): result = self.schedule.dataframe_extended assert len(result) == NUM_GAMES_IN_SCHEDULE def test_no_games_for_date_raises_value_error(self): with pytest.raises(ValueError): self.schedule(datetime.now()) def test_empty_page_return_no_games(self): flexmock(utils) \ .should_receive('_no_data_found') \ .once() flexmock(utils) \ .should_receive('_get_stats_table') \ .and_return(None) schedule = Schedule('KANSAS') assert len(schedule) == 0 def test_game_string_representation(self): game = self.schedule[0] assert game.__repr__() == 'Fri, Nov 10, 2017 - tennessee-state' def test_schedule_string_representation(self): expected = """Fri, Nov 10, 2017 - tennessee-state Tue, Nov 14, 2017 - kentucky Fri, Nov 17, 2017 - south-dakota-state Tue, Nov 21, 2017 - texas-southern Fri, Nov 24, 2017 - oakland Tue, Nov 28, 2017 - toledo Sat, Dec 2, 2017 - syracuse Wed, Dec 6, 2017 - washington Sun, Dec 10, 2017 - arizona-state Sat, Dec 16, 2017 - nebraska Mon, Dec 18, 2017 - nebraska-omaha Thu, Dec 21, 2017 - stanford Fri, Dec 29, 2017 - texas Tue, Jan 2, 2018 - texas-tech Sat, Jan 6, 2018 - texas-christian Tue, Jan 9, 2018 - iowa-state Sat, Jan 13, 2018 - kansas-state Mon, Jan 15, 2018 - west-virginia Sat, Jan 20, 2018 - baylor Tue, Jan 23, 2018 - oklahoma Sat, Jan 27, 2018 - texas-am Mon, Jan 29, 2018 - kansas-state Sat, Feb 3, 2018 - oklahoma-state Tue, Feb 6, 2018 - texas-christian Sat, Feb 10, 2018 - baylor Tue, Feb 13, 2018 - iowa-state Sat, Feb 17, 2018 - west-virginia Mon, Feb 19, 2018 - oklahoma Sat, Feb 24, 2018 - texas-tech Mon, Feb 26, 2018 - texas Sat, Mar 3, 2018 - oklahoma-state Thu, Mar 8, 2018 - oklahoma-state Fri, Mar 9, 2018 - kansas-state Sat, Mar 10, 2018 - west-virginia Thu, Mar 15, 2018 - pennsylvania Sat, Mar 17, 2018 - seton-hall Fri, Mar 23, 2018 - clemson Sun, Mar 25, 2018 - duke Sat, Mar 31, 2018 - villanova""" assert self.schedule.__repr__() == expected
def dateTransform(date1, date2): datetime_object1 = datetime.strptime(date1, "%a, %b %d, %Y") # print(datetime_object1.date()) date2 = date2[0:8] datetime_object2 = datetime.strptime(date2, "%Y%m%d") # print(datetime_object2.date()) return datetime_object1.date() == datetime_object2.date() # print(dateTransform("Sat, Mar 28, 2021", "20210328 12:30:00")) # schedule testing testschedule = Schedule("Gonzaga") for game in testschedule: print("location : " + game.location) print("date : " + game.date) # need to check typeof game.date/game.location, figure out how to transform date, get date (from comparison?) def isNeutral(team, date): # team = teamNameConversion(team) team = nameConversion(team) # print(team) try: schedule = Schedule(team) neutral1 = "" for game in schedule: gameDate = game.date
import os import pandas as pd from sportsreference.ncaab.teams import Teams from sportsreference.ncaab.schedule import Schedule errors = [] for year in range(2010, 2020): year = str(year) print(year) for team in Teams(year=year): try: df = pd.DataFrame() index = 0 schedule = Schedule(team.abbreviation, year=year) for game in schedule: temp_df = pd.DataFrame( { 'year': year, 'arena': game.arena, 'location': game.location, 'team': team.abbreviation, 'opponent': game.opponent_abbr, 'team_points': game.points_for, 'opponent_points': game.points_against }, index=[index]) df = pd.concat([df, temp_df], ignore_index=True) index += 1 output_path = os.path.join('data', 'sportsreference_data',
def random_forest_regressor(self, year): """calculates score of each team to predict winner Using the stats in data, the regressor calculates a score for each team The team with the higher score is the predicted winner Args: year (int): Season year returns: team (team): the predicted winning team """ #fields brought in by sports reference api that we don't want FIELDS_TO_DROP = ['away_points', 'home_points', 'date', 'location', 'losing_abbr', 'losing_name', 'winner', 'winning_abbr', 'winning_name', 'home_ranking', 'away_ranking', 'away_defensive_rebounds', 'home_defensive_rebounds', 'away_two_point_field_goal_attempts', 'away_two_point_field_goal_percentage', 'away_two_point_field_goals', 'home_two_point_field_goal_attempts', 'home_two_point_field_goal_percentage', 'home_two_point_field_goals', 'pace', 'away_defensive_rating', 'away_defensive_rebound_percentage', 'home_defensive_rating', 'home_defensive_rebound_percentage'] #pull in the scores for all games played in a certain season for both teams team1_name = self.team1.get_team_name().replace(" NCAA", "").replace(" ", "-").replace("(", "").replace(")", "").replace("'", "") team2_name = self.team2.get_team_name().replace(" NCAA", "").replace(" ", "-").replace("(", "").replace(")", "").replace("'", "") if team1_name == "UC-Irvine": team1_name = "CALIFORNIA-IRVINE" if team2_name == "UC-Irvine": team2_name = "CALIFORNIA-IRVINE" team1_schedule = Schedule(team1_name, year) team2_schedule = Schedule(team2_name, year) #team1_schedule.dataframe_extended.to_excel(r'C:\Users\dr171\OneDrive\Documents\College\Spring2020\sd&d\RedTeamMarchMadness\team1_schedule.xlsx', index=True) print("got schedules") team1_df = team1_schedule.dataframe_extended #print(team1_df.head()) team1_df_home = team1_df[team1_df.index.str.contains(self.team1.get_team_name().replace(" NCAA", "").replace(" ", "-").lower())] team1_df_away = team1_df[~team1_df.index.str.contains(self.team1.get_team_name().replace(" NCAA", "").replace(" ", "-").lower())] team2_df = team2_schedule.dataframe_extended #print(team2_df.head()) team2_df_home = team2_df[team2_df.index.str.contains(self.team2.get_team_name().replace(" NCAA", "").replace(" ", "-").lower())] team2_df_away = team2_df[~team2_df.index.str.contains(self.team2.get_team_name().replace(" NCAA", "").replace(" ", "-").lower())] # box_file = "data.csv" # bs = ScheduleData(2019, box_file) # print("tester.py: ScheduleData initialization successful.") # data_bf = bs.box_df # print("Initialized data") # team1_df_home = data_bf[data_bf.iloc[:,0].str.contains(self.team1.get_team_name().replace(" NCAA","").replace(" ","-").lower())] # team1_df_away = data_bf[~data_bf.iloc[:,0].str.contains(self.team1.get_team_name().replace(" NCAA","").replace(" ","-").lower())] # #print(team1_df_home) # team2_df_home = data_bf[data_bf.iloc[:,0].str.contains(self.team2.get_team_name().replace(" NCAA","").replace(" ","-").lower())] # team2_df_away = data_bf[~data_bf.iloc[:,0].str.contains(self.team2.get_team_name().replace(" NCAA","").replace(" ","-").lower())] print("seperated home and away") #compile into one dataset dataset_1 = pd.concat([team1_df_home, team2_df_away]) dataset_2 = pd.concat([team2_df_home, team1_df_away]) # dataset_1.drop(dataset_1.columns[[0]], axis=1, inplace=True) # dataset_2.drop(dataset_2.columns[[0]], axis=1, inplace=True) print('concated proper dataframes') #create training sets from datasetf X_train_1 = dataset_1.drop(FIELDS_TO_DROP, 1).dropna().drop_duplicates() X_train_2 = dataset_2.drop(FIELDS_TO_DROP, 1).dropna().drop_duplicates() #print(X_train_1) #print(X_train_2) Y_train_1 = dataset_1[['home_points', 'away_points']] Y_train_2 = dataset_2[['home_points', 'away_points']] #print(Y_train_1) #print(Y_train_2) print('created training sets') #pd.DataFrame(X_train).to_excel(r'C:\Users\dr171\OneDrive\Documents\College\Spring2020\sd&d\RedTeamMarchMadness\X_train.xlsx', index=False) #pd.DataFrame(Y_train).to_excel(r'C:\Users\dr171\OneDrive\Documents\College\Spring2020\sd&d\RedTeamMarchMadness\Y_train.xlsx', index=False) while len(X_train_1) != len(Y_train_1): Y_train_1 = Y_train_1[:-1] while len(X_train_2) != len(Y_train_2): Y_train_2 = Y_train_2[:-1] #create the x test (need to create method) X_test_1 = self.get_regeressor_info(self.team1, self.team2)#team1.get_attributes() + team2.get_attributes X_test_2 = self.get_regeressor_info(self.team2, self.team1)#team1.get_attributes() + team2.get_attributes print('got test sets') #pd.DataFrame(X_train).to_excel(r'C:\Users\dr171\OneDrive\Documents\College\Spring2020\sd&d\RedTeamMarchMadness\X_train_{0}.xlsx'.format(self.team1.get_team_name())) #pd.DataFrame(X_test).to_excel(r'C:\Users\dr171\OneDrive\Documents\College\Spring2020\sd&d\RedTeamMarchMadness\X_test_{0}.xlsx'.format(self.team1.get_team_name())) #print(X_train) #print(X_test) #parameters for model (could use tweaking to improve accuracy in the future) parameters = { 'bootstrap': True, 'max_depth': 6, 'max_features': None, 'min_samples_leaf': 50, 'min_samples_split': 12, 'n_estimators': 100} #create model model_1 = RandomForestRegressor(**parameters) model_2 = RandomForestRegressor(**parameters) # print('started threading') # p1 = Process(target=model_1.fit,args = (X_train_1,Y_train_1,)) # p1.start() # p2 = Process(target=model_2.fit,args = (X_train_2,Y_train_2,)) # p2.start() # p1.join() # p2.join() print('finished join') model_1.fit(X_train_1, Y_train_1) model_2.fit(X_train_2, Y_train_2) #predict outcome of game based of season statistics for both teams spread_1 = model_1.predict(X_test_1).astype(int) spread_2 = model_2.predict(X_test_2).astype(int) print('predicted spreads') spread_1 = str(spread_1[0]).replace("[", "").replace("]", "").split(" ") spread_2 = str(spread_2[0]).replace("[", "").replace("]", "").split(" ") team1_score = int(spread_1[0]) + int(spread_2[1]) team2_score = int(spread_1[1]) + int(spread_2[0]) print("Team 1 score", team1_score, self.team1.get_team_name()) print("Team 2 score", team2_score, self.team2.get_team_name()) if team1_score > team2_score: return self.team1 if team1_score < team2_score: return self.team2 if team1_score == team2_score: return random.choice([self.team1, self.team2])