def _json_game_player_stats(game, data): """ Parses the 'home' and 'away' team stats and returns an OrderedDict mapping player id to their total game statistics as instances of nflgame.player.GamePlayerStats. """ players = OrderedDict() for team in ('home', 'away'): for category in nflgame.statmap.categories: if category not in data[team]['stats']: continue for pid, raw in data[team]['stats'][category].iteritems(): stats = {} for k, v in raw.iteritems(): if k == 'name': continue stats['%s_%s' % (category, k)] = v if pid not in players: home = team == 'home' if home: team_name = game.home else: team_name = game.away players[pid] = nflgame.player.GamePlayerStats( pid, raw['name'], home, team_name) players[pid]._add_stats(stats) return players
def _json_play_players(play, data): """ Takes a single JSON play entry (data) and converts it to an OrderedDict of player statistics. play is the instance of Play that this data is part of. It is used to determine whether the player belong to the home team or not. """ players = OrderedDict() for playerid, statcats in data.iteritems(): if playerid == '0': continue for info in statcats: if info['statId'] not in nflgame.statmap.idmap: continue if playerid not in players: home = play.drive.game.is_home(info['clubcode']) if home: team_name = play.drive.game.home else: team_name = play.drive.game.away stats = nflgame.player.PlayPlayerStats(playerid, info['playerName'], home, team_name) players[playerid] = stats statvals = nflgame.statmap.values(info['statId'], info['yards']) players[playerid]._add_stats(statvals) return players
def new_schedule(): """ Builds an entire schedule from scratch. """ sched = OrderedDict() for year, stype, week in year_phase_week(): update_week(sched, year, stype, week) return sched
def __init__(self, playerid, name, home): """ Create a new Player instance with the player id (from NFL.com's GameCenter), the player's name (e.g., "T.Brady") and whether the player is playing in a home game or not. """ self.playerid = playerid self.name = name self.home = home self._stats = OrderedDict()
def __add__(self, other): """ Adds two sequences of players by combining repeat players and summing their statistics. """ players = OrderedDict() for p in itertools.chain(self, other): if p.playerid not in players: players[p.playerid] = p else: players[p.playerid] += p return GenPlayerStats(players)
def players(self): """ Returns the combined player stats for every play in the sequence. """ players = OrderedDict() for play in self: for player in play.players: if player.playerid not in players: players[player.playerid] = player else: players[player.playerid] += player return GenPlayerStats(players)
def max_player_stats(self): """ Returns a GenPlayers sequence of player statistics that combines game statistics and play statistics by taking the max value of each corresponding statistic. This is useful when accuracy is desirable. Namely, using only play-by-play data or using only game statistics can be unreliable. That is, both are inconsistently correct. Taking the max values of each statistic reduces the chance of being wrong (particularly for stats that are in both play-by-play data and game statistics), but does not eliminate them. """ game_players = list(self.players) play_players = list(self.drives.plays().players()) max_players = OrderedDict() # So this is a little tricky. It's possible for a player to have # only statistics at the play level, and therefore not be represented # in the game level statistics. Therefore, we initialize our # max_players with play-by-play stats first. Then go back through # and combine them with available game statistics. for pplay in play_players: newp = nflgame.player.GamePlayerStats(pplay.playerid, pplay.name, pplay.home, pplay.team) maxstats = {} for stat, val in pplay._stats.iteritems(): maxstats[stat] = val newp._overwrite_stats(maxstats) max_players[pplay.playerid] = newp for newp in max_players.itervalues(): for pgame in game_players: if pgame.playerid != newp.playerid: continue maxstats = {} for stat, val in pgame._stats.iteritems(): maxstats[stat] = max([val, newp._stats.get(stat, -_MAX_INT)]) newp._overwrite_stats(maxstats) break return nflgame.seq.GenPlayerStats(max_players)
def diff(before, after): """ Returns the difference between two points of time in a game in terms of plays and player statistics. The return value is a GameDiff namedtuple with two attributes: plays and players. Each contains *only* the data that is in the after game but not in the before game. This is useful for sending alerts where you're guaranteed to see each play statistic only once (assuming NFL.com behaves itself). XXX: There is an assertion that requires after's game clock be the same or later than before's game clock. This may need to be removed if NFL.com allows its game clock to be rolled back due to corrections from refs. """ assert after.time >= before.time, \ 'When diffing two games, "after" (%s) must be later or the ' \ 'same time as "before" (%s).' % (after.time, before.time) assert after.eid == before.eid plays = [] after_plays = list(after.drives.plays()) before_plays = list(before.drives.plays()) for play in after_plays: if play not in before_plays: plays.append(play) # You might think that updated play data is enough. You could scan # it for statistics you're looking for (like touchdowns). # But sometimes a play can sneak in twice if its description gets # updated (late call? play review? etc.) # Thus, we do a diff on the play statistics for player data too. _players = OrderedDict() after_players = list(after.drives.players()) before_players = list(before.drives.players()) for aplayer in after_players: has_before = False for bplayer in before_players: if aplayer.playerid == bplayer.playerid: has_before = True pdiff = aplayer - bplayer if pdiff is not None: _players[aplayer.playerid] = pdiff if not has_before: _players[aplayer.playerid] = aplayer players = nflgame.seq.GenPlayerStats(_players) return GameDiff(plays=plays, players=players)
def build_old(nfl_schedules_path): sched = OrderedDict() xml_filenames = get_filenames(nfl_schedules_path, "", ".xml") sort_nicely(xml_filenames) xml_filenames.reverse() cur_year = DETAILED_STATS_START_YEAR for xml_file in xml_filenames: year, week, stype = xml_file.split(".xml")[0].split("-") year = int(year) week = int(week) if year < cur_year: print(str(year)) cur_year = year if year < DETAILED_STATS_START_YEAR: print('Building (%d, %s, %d)...' % (year, stype, week)) update_week(sched, year, stype, week, nfl_schedules_path) return sched
def diff(before, after): """ Returns the difference between two points of time in a game in terms of plays and player statistics. The return value is a GameDiff namedtuple with two attributes: plays and players. Each contains *only* the data that is in the after game but not in the before game. This is useful for sending alerts where you're guaranteed to see each play statistic only once (assuming NFL.com behaves itself). """ assert after.eid == before.eid plays = [] after_plays = list(after.drives.plays()) before_plays = list(before.drives.plays()) for play in after_plays: if play not in before_plays: plays.append(play) # You might think that updated play data is enough. You could scan # it for statistics you're looking for (like touchdowns). # But sometimes a play can sneak in twice if its description gets # updated (late call? play review? etc.) # Thus, we do a diff on the play statistics for player data too. _players = OrderedDict() after_players = list(after.max_player_stats()) before_players = list(before.max_player_stats()) for aplayer in after_players: has_before = False for bplayer in before_players: if aplayer.playerid == bplayer.playerid: has_before = True pdiff = aplayer - bplayer if pdiff is not None: _players[aplayer.playerid] = pdiff if not has_before: _players[aplayer.playerid] = aplayer players = nflgame.seq.GenPlayerStats(_players) return GameDiff(before=before, after=after, plays=plays, players=players)
def _xml_plays(data, coach=True): """ Parses the XML raw string `data` given into an ordered dictionary of `nflvid.Play` objects corresponding to coach play timings. If `coach` is set to `False`, then play timings for the broadcast are retrieved. The dictionary is keyed by play id. A second return value, the ending time of the broadcast footage, is also returned. (This is used to compute an offset between the ArchiveTCIN time and when the play really starts.) """ if data is None: return None soup = bs4.BeautifulSoup(data) game_end_time = soup.find('dataset').get('endtime', None) if game_end_time is not None: game_end_time = PlayTime(game_end_time.strip()) # Load everything into a list first, since we need to look ahead to see # the next play's start time to compute the current play's duration. rows = [] for row in soup.find_all('row'): playid = row.find('id') if not playid: playid = row.get('playid', None) if not playid: continue playid = playid.strip() else: playid = playid.get_text().strip() if coach: start = row.find('catin') else: start = row.find('archivetcin') if not start: continue start = PlayTime(start.get_text().strip()) rows.append((playid, start, row)) # A predicate for determining whether to ignore a row or not in our final # result set. For example, timeouts take a lot of time but aren't needed # for play-by-play footage. def ignore(row): if 'playdescription' in row.attrs: if row['playdescription'].lower().startswith('timeout'): return True if row['playdescription'].lower().startswith('two-minute'): return True # Did we miss anything? if 'preplaybyplay' in row.attrs: if row['preplaybyplay'].lower().startswith('timeout'): return True return False d = OrderedDict() for i, (playid, start, row) in enumerate(rows): if ignore(row): continue end = None if i < len(rows) - 1: end = rows[i+1][1] d[playid] = Play(start, end, playid, game_end_time) return d