def get_games(years): """ Load the info and statistics for the game played in the specified years. Must have already loaded the gamedates for these years in order to fetch the games themselves. :param years: the years to load info for :type years: list[str] """ # For each specified year, look at the dates and games played on them for year in years: # Load season season = Season.objects(year=year) if not season: print('Season and GameDates not yet loaded for {}'.format(year)) continue season = season[0] first_date = season.first_date last_date = season.last_date # For each day in the season days = (datetime.date.fromisoformat(last_date) - datetime.date.fromisoformat(first_date)).days + 1 for date in (datetime.date.fromisoformat(first_date) + timedelta(n) for n in range(days)): print('\n{} Loading date: {} {}\n'.format( '#' * 20, date, '#' * 20)) date = date.isoformat() # Fetch GameDate, if it doesn't exist then the season didn't # properly load and should re-load it game_date = GameDate.objects(date=date) if not game_date: print('GameDate not loaded for {}, you should re-load this ' 'season {} in full to get the full season before ' 'proceeding.'.format(date, year)) break game_date = game_date[0] # For each game on this day invalid_game_ids = [] for game_id in game_date.games: # Fetch Game, if it exists already, skip it game = Game.objects(game_id=game_id) if game: continue game = Game(game_id=game_id) game.date = date game.year = year # Fetch Box Score Summary try: box_score_summary = query_nba_api( boxscoresummaryv2.BoxScoreSummaryV2, game_id=game_id) except JSONDecodeError: invalid_game_ids.append(game_id) # The purpose of this except block is because in 2019-20, covid led # to games being cancelled. F**k COVID. if year == '2019-20': print('F**k COVID. This game was cancelled.') continue else: raise Exception("Game wasn't found.".format(game_id)) # Store inactive players game.inactives = [ str(inactive_player) for inactive_player in box_score_summary.inactive_players. get_data_frame()['PLAYER_ID'].to_list() ] # Store officials for this game (create Official if needed) officials_df = clean_boxscore_df( box_score_summary.officials.get_data_frame(), index='OFFICIAL_ID', str_keys=['OFFICIAL_ID']) officials = [] game.officials = officials for official_id, official in officials_df.iterrows(): official_name = '{} {}'.format(official['FIRST_NAME'], official['LAST_NAME']) official_entry = add_entry_to_db(document_type=Official, unique_id=official_id, name=official_name, year=year, game_id=game_id) officials.append(official_id) # Store home team id and road team id game_summary = box_score_summary.game_summary.get_data_frame() home_team_id = str(game_summary['HOME_TEAM_ID'][0]) road_team_id = str(game_summary['VISITOR_TEAM_ID'][0]) # Fetch various relevant box scores to use # Traditional box score box_score_traditional = query_nba_api( boxscoretraditionalv2.BoxScoreTraditionalV2, game_id=game_id) players_traditional = clean_boxscore_df( box_score_traditional.player_stats.get_data_frame(), index='PLAYER_ID') teams_traditional = clean_boxscore_df( box_score_traditional.team_stats.get_data_frame(), index='TEAM_ID') # Advanced box score box_score_advanced = query_nba_api( boxscoreadvancedv2.BoxScoreAdvancedV2, game_id=game_id) players_advanced = clean_boxscore_df( box_score_advanced.player_stats.get_data_frame(), index='PLAYER_ID') teams_advanced = clean_boxscore_df( box_score_advanced.team_stats.get_data_frame(), index='TEAM_ID') # Usage box score box_score_usage = query_nba_api( boxscoreusagev2.BoxScoreUsageV2, game_id=game_id) players_usage = clean_boxscore_df( box_score_usage.sql_players_usage.get_data_frame(), index='PLAYER_ID') # Log the current game team_names = [ '{} {}'.format(team['TEAM_CITY'], team['TEAM_NAME']) for _, team in teams_traditional.iterrows() ] print('\n{} Loading game: {} vs. {} {}'.format( '#' * 10, team_names[0], team_names[1], '#' * 10)) # Create each PlayerGame and map them to player_id player_games = {} game.player_games = player_games for player_id, player in players_traditional.iterrows(): # Gather player info and add to db for this year if not already stored player_name = player['PLAYER_NAME'] print("Player: {} (id: {})".format( player_name, player_id)) add_entry_to_db(document_type=Player, unique_id=player_id, name=player_name, year=year, game_id=game_id) # Create PlayerGame entry to add to this game player_game = PlayerGame(player_id=player_id) player_games[player_id] = player_game # Store basic data about PlayerGame if player['TEAM_ID'] == home_team_id: player_game.home = True player_game.team_id = home_team_id player_game.opposing_team_id = road_team_id else: player_game.home = False player_game.team_id = road_team_id player_game.opposing_team_id = home_team_id # Create traditional stats entry for this player traditional_player_entry = GameTraditionalStats() player_game.traditional_stats = traditional_player_entry assign_all_values(traditional_player_entry, player) # Create advanced stats entry for this player advanced_player_entry = GameAdvancedStats() player_game.advanced_stats = advanced_player_entry assign_all_values(advanced_player_entry, players_advanced.loc[player_id]) # Create usage stats entry for this player usage_player_entry = GameUsageStats() player_game.usage_stats = usage_player_entry assign_all_values(usage_player_entry, players_usage.loc[player_id]) # Create each TeamGame and map them to team_id team_games = {} game.team_games = team_games for team_id, team in teams_traditional.iterrows(): # Gather team info and add to db for this year if not already stored team_name = '{} {}'.format(team['TEAM_CITY'], team['TEAM_NAME']) print("Team: {} (id: {})".format(team_name, team_id)) add_entry_to_db(document_type=Team, unique_id=team_id, name=team_name, year=year, game_id=game_id) # Create TeamGame entry to add to this game team_game = TeamGame(team_id=team_id) team_games[team_id] = team_game # Store basic data about TeamGame team_game.date = date if team_id == home_team_id: team_game.home = True team_game.opposing_team_id = road_team_id else: team_game.home = False team_game.opposing_team_id = home_team_id # Create traditional stats entry for this team traditional_team_entry = GameTraditionalStats() team_game.traditional_stats = traditional_team_entry assign_all_values(traditional_team_entry, team) # Create advanced stats entry for this team advanced_team_entry = GameAdvancedStats() team_game.advanced_stats = advanced_team_entry assign_all_values(advanced_team_entry, teams_advanced.loc[team_id]) # Save game game.save() print("") # Remove game_id of games that were cancelled (covid) from game dates for # future iterations game_date.games = [ game_id for game_id in game_date.games if game_id not in invalid_game_ids ] game_date.save()
def get_gamedates(years): """ Load the dates and games played on them for the given years supplied :param years: the years to load info for :type years: list[str] """ # For each specified year, look at the dates and games played on them for year in years: season_entry = Season() season_entry.year = year # Get the first day of October as the first possible default date first_date = '{}-10-01'.format(year[:4]) # Iterate until finding first day of regular season while True: print("Looking at {} for first day of season".format(first_date)) gameday = query_nba_api(scoreboardv2.ScoreboardV2, game_date=first_date) game_ids = gameday.available.get_data_frame()['GAME_ID'] # If there were games this day, and it is regular season if len(game_ids) > 0 and game_ids[0][2] == '2': season_entry.first_date = first_date break else: first_date = (datetime.date.fromisoformat(first_date) + timedelta(1)).isoformat() # Begin loading into mongo the game dates date = first_date while True: gamedate_entry = GameDate.objects(date=date) # Game date already exists in database if gamedate_entry: print('{} is already in the database'.format(date)) # Else game date is not already in database else: gameday = query_nba_api(scoreboardv2.ScoreboardV2, game_date=date) game_ids = ( gameday.available.get_data_frame()['GAME_ID'].to_list()) # If all star game, skip if len(game_ids) > 0 and game_ids[0][2] == '3': game_ids = [] # If playoff game, stop and mark previous date as last day if len(game_ids) > 0 and game_ids[0][2] == '4': last_date = (datetime.date.fromisoformat(date) - timedelta(1)).isoformat() season_entry.last_date = last_date if not Season.objects(year=year): season_entry.save() break # Create gameday entry for this day gamedate_entry = GameDate() gamedate_entry.date = date gamedate_entry.year = year if '0021201214' in game_ids: # Remove not played game game_ids.remove('0021201214') gamedate_entry.games = game_ids gamedate_entry.save() print('Adding {} to database with {} games played on ' 'this day'.format(date, len(game_ids))) date = (datetime.date.fromisoformat(date) + timedelta(1)).isoformat()