Ejemplo n.º 1
0
def _json_game_player_stats(game, data):
    """
    Parses the 'home' and 'away' team stats and returns an OrderedDict
    mapping player id to their total game statistics as instances of
    nflgame.player.GamePlayerStats.
    """
    players = OrderedDict()
    for team in ('home', 'away'):
        for category in nflgame.statmap.categories:
            if category not in data[team]['stats']:
                continue
            for pid, raw in data[team]['stats'][category].iteritems():
                stats = {}
                for k, v in raw.iteritems():
                    if k == 'name':
                        continue
                    stats['%s_%s' % (category, k)] = v
                if pid not in players:
                    home = team == 'home'
                    if home:
                        team_name = game.home
                    else:
                        team_name = game.away
                    players[pid] = nflgame.player.GamePlayerStats(
                        pid, raw['name'], home, team_name)
                players[pid]._add_stats(stats)
    return players
Ejemplo n.º 2
0
def _json_play_players(play, data):
    """
    Takes a single JSON play entry (data) and converts it to an OrderedDict
    of player statistics.

    play is the instance of Play that this data is part of. It is used
    to determine whether the player belong to the home team or not.
    """
    players = OrderedDict()
    for playerid, statcats in data.iteritems():
        if playerid == '0':
            continue
        for info in statcats:
            if info['statId'] not in nflgame.statmap.idmap:
                continue
            if playerid not in players:
                home = play.drive.game.is_home(info['clubcode'])
                if home:
                    team_name = play.drive.game.home
                else:
                    team_name = play.drive.game.away
                stats = nflgame.player.PlayPlayerStats(playerid,
                                                       info['playerName'],
                                                       home, team_name)
                players[playerid] = stats
            statvals = nflgame.statmap.values(info['statId'], info['yards'])
            players[playerid]._add_stats(statvals)
    return players
Ejemplo n.º 3
0
def new_schedule():
    """
    Builds an entire schedule from scratch.
    """
    sched = OrderedDict()
    for year, stype, week in year_phase_week():
        update_week(sched, year, stype, week)
    return sched
Ejemplo n.º 4
0
 def __init__(self, playerid, name, home):
     """
     Create a new Player instance with the player id (from NFL.com's
     GameCenter), the player's name (e.g., "T.Brady") and whether the
     player is playing in a home game or not.
     """
     self.playerid = playerid
     self.name = name
     self.home = home
     self._stats = OrderedDict()
Ejemplo n.º 5
0
 def __add__(self, other):
     """
     Adds two sequences of players by combining repeat players and summing
     their statistics.
     """
     players = OrderedDict()
     for p in itertools.chain(self, other):
         if p.playerid not in players:
             players[p.playerid] = p
         else:
             players[p.playerid] += p
     return GenPlayerStats(players)
Ejemplo n.º 6
0
 def players(self):
     """
     Returns the combined player stats for every play in the sequence.
     """
     players = OrderedDict()
     for play in self:
         for player in play.players:
             if player.playerid not in players:
                 players[player.playerid] = player
             else:
                 players[player.playerid] += player
     return GenPlayerStats(players)
Ejemplo n.º 7
0
    def max_player_stats(self):
        """
        Returns a GenPlayers sequence of player statistics that combines
        game statistics and play statistics by taking the max value of
        each corresponding statistic.

        This is useful when accuracy is desirable. Namely, using only
        play-by-play data or using only game statistics can be unreliable.
        That is, both are inconsistently correct.

        Taking the max values of each statistic reduces the chance of being
        wrong (particularly for stats that are in both play-by-play data
        and game statistics), but does not eliminate them.
        """
        game_players = list(self.players)
        play_players = list(self.drives.plays().players())
        max_players = OrderedDict()

        # So this is a little tricky. It's possible for a player to have
        # only statistics at the play level, and therefore not be represented
        # in the game level statistics. Therefore, we initialize our
        # max_players with play-by-play stats first. Then go back through
        # and combine them with available game statistics.
        for pplay in play_players:
            newp = nflgame.player.GamePlayerStats(pplay.playerid,
                                                  pplay.name, pplay.home,
                                                  pplay.team)
            maxstats = {}
            for stat, val in pplay._stats.iteritems():
                maxstats[stat] = val

            newp._overwrite_stats(maxstats)
            max_players[pplay.playerid] = newp

        for newp in max_players.itervalues():
            for pgame in game_players:
                if pgame.playerid != newp.playerid:
                    continue

                maxstats = {}
                for stat, val in pgame._stats.iteritems():
                    maxstats[stat] = max([val,
                                          newp._stats.get(stat, -_MAX_INT)])

                newp._overwrite_stats(maxstats)
                break
        return nflgame.seq.GenPlayerStats(max_players)
Ejemplo n.º 8
0
def diff(before, after):
    """
    Returns the difference between two points of time in a game in terms of
    plays and player statistics. The return value is a GameDiff namedtuple
    with two attributes: plays and players. Each contains *only* the data
    that is in the after game but not in the before game.

    This is useful for sending alerts where you're guaranteed to see each
    play statistic only once (assuming NFL.com behaves itself).

    XXX: There is an assertion that requires after's game clock be the same
    or later than before's game clock. This may need to be removed if NFL.com
    allows its game clock to be rolled back due to corrections from refs.
    """
    assert after.time >= before.time, \
        'When diffing two games, "after" (%s) must be later or the ' \
        'same time as "before" (%s).' % (after.time, before.time)
    assert after.eid == before.eid

    plays = []
    after_plays = list(after.drives.plays())
    before_plays = list(before.drives.plays())
    for play in after_plays:
        if play not in before_plays:
            plays.append(play)

    # You might think that updated play data is enough. You could scan
    # it for statistics you're looking for (like touchdowns).
    # But sometimes a play can sneak in twice if its description gets
    # updated (late call? play review? etc.)
    # Thus, we do a diff on the play statistics for player data too.
    _players = OrderedDict()
    after_players = list(after.drives.players())
    before_players = list(before.drives.players())
    for aplayer in after_players:
        has_before = False
        for bplayer in before_players:
            if aplayer.playerid == bplayer.playerid:
                has_before = True
                pdiff = aplayer - bplayer
                if pdiff is not None:
                    _players[aplayer.playerid] = pdiff
        if not has_before:
            _players[aplayer.playerid] = aplayer
    players = nflgame.seq.GenPlayerStats(_players)

    return GameDiff(plays=plays, players=players)
Ejemplo n.º 9
0
def build_old(nfl_schedules_path):
    sched = OrderedDict()
    xml_filenames = get_filenames(nfl_schedules_path, "", ".xml")
    sort_nicely(xml_filenames)
    xml_filenames.reverse()
    cur_year = DETAILED_STATS_START_YEAR
    for xml_file in xml_filenames:
        year, week, stype = xml_file.split(".xml")[0].split("-")
        year = int(year)
        week = int(week)
        if year < cur_year:
            print(str(year))
            cur_year = year
        if year < DETAILED_STATS_START_YEAR:
            print('Building (%d, %s, %d)...' % (year, stype, week))
            update_week(sched, year, stype, week, nfl_schedules_path)
    return sched
Ejemplo n.º 10
0
def diff(before, after):
    """
    Returns the difference between two points of time in a game in terms of
    plays and player statistics. The return value is a GameDiff namedtuple
    with two attributes: plays and players. Each contains *only* the data
    that is in the after game but not in the before game.

    This is useful for sending alerts where you're guaranteed to see each
    play statistic only once (assuming NFL.com behaves itself).
    """
    assert after.eid == before.eid

    plays = []
    after_plays = list(after.drives.plays())
    before_plays = list(before.drives.plays())
    for play in after_plays:
        if play not in before_plays:
            plays.append(play)

    # You might think that updated play data is enough. You could scan
    # it for statistics you're looking for (like touchdowns).
    # But sometimes a play can sneak in twice if its description gets
    # updated (late call? play review? etc.)
    # Thus, we do a diff on the play statistics for player data too.
    _players = OrderedDict()
    after_players = list(after.max_player_stats())
    before_players = list(before.max_player_stats())
    for aplayer in after_players:
        has_before = False
        for bplayer in before_players:
            if aplayer.playerid == bplayer.playerid:
                has_before = True
                pdiff = aplayer - bplayer
                if pdiff is not None:
                    _players[aplayer.playerid] = pdiff
        if not has_before:
            _players[aplayer.playerid] = aplayer
    players = nflgame.seq.GenPlayerStats(_players)

    return GameDiff(before=before, after=after, plays=plays, players=players)
Ejemplo n.º 11
0
def _xml_plays(data, coach=True):
    """
    Parses the XML raw string `data` given into an ordered dictionary
    of `nflvid.Play` objects corresponding to coach play timings. If
    `coach` is set to `False`, then play timings for the broadcast are
    retrieved.

    The dictionary is keyed by play id.

    A second return value, the ending time of the broadcast footage,
    is also returned. (This is used to compute an offset between the
    ArchiveTCIN time and when the play really starts.)
    """
    if data is None:
        return None
    soup = bs4.BeautifulSoup(data)

    game_end_time = soup.find('dataset').get('endtime', None)
    if game_end_time is not None:
        game_end_time = PlayTime(game_end_time.strip())

    # Load everything into a list first, since we need to look ahead to see
    # the next play's start time to compute the current play's duration.
    rows = []
    for row in soup.find_all('row'):
        playid = row.find('id')
        if not playid:
            playid = row.get('playid', None)
            if not playid:
                continue
            playid = playid.strip()
        else:
            playid = playid.get_text().strip()

        if coach:
            start = row.find('catin')
        else:
            start = row.find('archivetcin')
        if not start:
            continue
        start = PlayTime(start.get_text().strip())
        rows.append((playid, start, row))

    # A predicate for determining whether to ignore a row or not in our final
    # result set. For example, timeouts take a lot of time but aren't needed
    # for play-by-play footage.
    def ignore(row):
        if 'playdescription' in row.attrs:
            if row['playdescription'].lower().startswith('timeout'):
                return True
            if row['playdescription'].lower().startswith('two-minute'):
                return True

        # Did we miss anything?
        if 'preplaybyplay' in row.attrs:
            if row['preplaybyplay'].lower().startswith('timeout'):
                return True
        return False

    d = OrderedDict()
    for i, (playid, start, row) in enumerate(rows):
        if ignore(row):
            continue
        end = None
        if i < len(rows) - 1:
            end = rows[i+1][1]
        d[playid] = Play(start, end, playid, game_end_time)
    return d