def getYear(year, future=False): """Gets one year of games. Faster than getting it week by week because this opens one website while that opens 21""" games = {} url = "https://www.pro-football-reference.com/years/{}/games.htm".format( str(year)) soup = Base.openWebsite(url) row = soup.find_all("tr") for i in row: num = i.find('th').get('csk') ###Week 29 doesn't have anything if (num == '29' or not (num)): continue if (future): thisGame = getFuture(i) else: thisGame = getGame(i) if (num in games and thisGame): games[num].append(thisGame) elif (thisGame): games[num] = [thisGame] weekNums = sorted(list(games.keys()), key=lambda x: int(x)) realGames = [] for i in weekNums: realGames.append(games[i]) return realGames
def findScores(gameCode): """Finds all scoring plays from a game""" soup = Base.openWebsite("https://www.pro-football-reference.com"+gameCode) ###awayScore and homeScore are for the actual scores awayScore = soup.find_all(attrs={"data-stat":"vis_team_score"})[1:] homeScore = soup.find_all(attrs={"data-stat":"home_team_score"})[1:] ###allScores if for the time, quarter, etc. allScores = soup.find_all("tr")[4:4+len(awayScore)] scores = [] ###If the quarter is blank, the score happened in the last quarter lastQuarter = 1 for i in range(len(allScores)): score = allScores[i] ###The Quarter is in the <th> tag, but we want the time to the ###end of the game quarter = score.find("th").text.strip() if(quarter == ""): quarter = lastQuarter if(quarter == "OT"): quarter = '5' lastQuarter = quarter quarter = (4-int(quarter))*15 ###Find the time of the score, and then make that and quarter ###the time left time = score.find("td").text.strip().split(":") time[0] = quarter+int(time[0]) time[1] = int(time[1]) ###Take the next value in awayScore and homeScore to find the score ###at that time aScore = awayScore[i].text.strip() hScore = homeScore[i].text.strip() scores.append([time[0], time[1], int(aScore), int(hScore)]) return scores, soup
def getComment(url): soup = Base.openWebsite(url) ###The play by play is in a comment under the <div id='all_pbp'> tag findComment = lambda text: isinstance(text, Comment) playByPlay = soup.find('div', attrs={'id': 'all_pbp'}) commSoup = BeautifulSoup(str(playByPlay.find(string=findComment)), "html.parser") return soup, commSoup
def getDriveBase(gameCode=None, soup=None): """Gets the base HTML that has the drives, from pro football reference""" if(not(soup)): soup = Base.openWebsite("https://www.pro-football-reference.com"+gameCode) ###The drives are found in a comment under a <div id="all_vis_drives"> tag awayDrives = soup.find(attrs={"id":"all_vis_drives"}) homeDrives = soup.find(attrs={"id":"all_home_drives"}) return awayDrives, homeDrives
def getRoster(team, year): """Returns a list of (Position, Name) tuples for the entire roster""" soup = Base.openWebsite(getRosterUrl(Base.TEAM_ABBRS[team], year)) starterStats = soup.find_all("tr", class_="full_table") roster = [] for i in starterStats: ###The last slice is used to make the position LB instead of RLB, ###Since both are used pos = i.find("th").text.strip()[-2:] name = i.find("td", attrs={"data-stat": "player"}).text.strip() roster.append(( pos, name, )) return roster
def getFuture(game, year): """Returns the injury difference for a future game in year (home team - away team)""" ###Future games list all of their injuries, including backups ###So the backups need to be separated url = "https://www.pro-football-reference.com" + game[2] soup = Base.openWebsite(url) awayAbbr, homeAbbr = Base.TEAM_ABBRS[game[0]], Base.TEAM_ABBRS[game[1]] awayInj = soup.find("div", attrs={"id": "all_{}_injury_report".format(awayAbbr)}) homeInj = soup.find("div", attrs={"id": "all_{}_injury_report".format(homeAbbr)}) homeScore = getFutureTeam(homeInj, game[1], year) awayScore = getFutureTeam(awayInj, game[0], year) return subtract(homeScore, awayScore)
def getSnapTags(url): """Returns a list of tags in the comment with the games starters""" soup = Base.openWebsite("https://www.pro-football-reference.com" + url) ###The comment is right above the actual tags for the starters awayStarters = soup.find("div", attrs={"id": "all_vis_snap_counts"}) homeStarters = soup.find("div", attrs={"id": "all_home_snap_counts"}) if (awayStarters == None): return (soup, "This game does not have a snap count list") findComment = lambda text: isinstance(text, Comment) ###Gets everything in the div tag that is a comment, then splits it by tag awayRoster = awayStarters.find(string=findComment).split(">") homeRoster = homeStarters.find(string=findComment).split(">") awayPlayers = removeHeaders(awayRoster) homePlayers = removeHeaders(homeRoster) return awayPlayers, homePlayers
def getWeek(year, week, future=False): """Returns the week in that year The tuples are (winner, loser, home, points)""" url = "https://www.pro-football-reference.com/years/" + str( year) + "/games.htm" soup = Base.openWebsite(url) games = [] ###Each row (tr tag) is a different game row = soup.find_all("tr") for i in row: if (i.find("th").get('csk') == str(week)): if (future): ###If future is true, it will add a game in the future form, without points games.append(getFuture(i)) else: games.append(getGame(i)) return games
def getGameStarters(url, home): """Gets the 22 starters for the game. Only for future game injuries, where the team doesn't have a roster""" soup = Base.openWebsite("https://www.pro-football-reference.com" + url) if (home): tableName = "all_vis_starters" else: tableName = "all_home_starters" starterDiv = soup.find("div", attrs={"id": tableName}) findComment = lambda text: isinstance(text, Comment) homeSoup = BeautifulSoup(str(starterDiv.find(string=findComment)), "html.parser") players = [] for i in homeSoup.find('table').find_all('tr'): if (i.find('th') and i.find('td')): name = i.find('th').text pos = i.find('td').text players.append((pos, name)) return players
def getSpread(gameCode): soup = Base.openWebsite('https://www.pro-football-reference.com' + gameCode) infoTable = soup.find('div', attrs={'id': 'all_game_info'}) infoComment = infoTable.find(string=comment).split('>') oddLabelIndex = [ i for i in range(len(infoComment)) if 'vegas' in infoComment[i].lower() ] odd = infoComment[oddLabelIndex[0] + 2] odd = odd[:odd.index('<')] odd = odd.split(' ') points = odd[-1] if ('pick' in points.lower()): points = 0.0 else: points = float(points) team = ' '.join(odd[:-1]) return team, points