def list_team_names(my_year): try: allTeams = Teams(year=my_year) except: print("No data for Year %s" % my_year) return for team in allTeams: print("%-22s %3s" % (team.name, team.abbreviation))
def test_invalid_default_year_reverts_to_previous_year( self, *args, **kwargs): flexmock(utils) \ .should_receive('_find_year_for_season') \ .and_return(2018) teams = Teams() for team in teams: assert team._year == '2017'
def setup_method(self, *args, **kwargs): self.results = { 'rank': 6, 'abbreviation': 'KAN', 'name': 'Kansas City Chiefs', 'wins': 10, 'losses': 6, 'win_percentage': .625, 'post_season_result': LOST_WILD_CARD, 'games_played': 16, 'points_for': 415, 'points_against': 339, 'points_difference': 76, 'margin_of_victory': 4.8, 'strength_of_schedule': -1.3, 'simple_rating_system': 3.4, 'offensive_simple_rating_system': 3.8, 'defensive_simple_rating_system': -0.3, 'yards': 6007, 'plays': 985, 'yards_per_play': 6.1, 'turnovers': 11, 'fumbles': 3, 'first_downs': 322, 'pass_completions': 363, 'pass_attempts': 543, 'pass_yards': 4104, 'pass_touchdowns': 26, 'interceptions': 8, 'pass_net_yards_per_attempt': 7.1, 'pass_first_downs': 198, 'rush_attempts': 405, 'rush_yards': 1903, 'rush_touchdowns': 12, 'rush_yards_per_attempt': 4.7, 'rush_first_downs': 95, 'penalties': 118, 'yards_from_penalties': 1044, 'first_downs_from_penalties': 29, 'percent_drives_with_points': 44.9, 'percent_drives_with_turnovers': 6.3, 'points_contributed_by_offense': 115.88 } self.abbreviations = [ 'RAM', 'NWE', 'PHI', 'NOR', 'JAX', 'KAN', 'DET', 'PIT', 'RAV', 'MIN', 'SEA', 'CAR', 'SDG', 'DAL', 'ATL', 'WAS', 'HTX', 'TAM', 'OTI', 'SFO', 'GNB', 'BUF', 'RAI', 'NYJ', 'CRD', 'CIN', 'DEN', 'MIA', 'CHI', 'CLT', 'NYG', 'CLE' ] flexmock(utils) \ .should_receive('_todays_date') \ .and_return(MockDateTime(YEAR, MONTH)) self.teams = Teams()
def test_nfl_empty_page_returns_no_teams(self): flexmock(utils) \ .should_receive('_no_data_found') \ .once() flexmock(utils) \ .should_receive('_get_stats_table') \ .and_return(None) teams = Teams() assert len(teams) == 0
def addTeams(conn, startYear, endYear): teamz = {} for year in range(startYear, endYear): seasons = Teams(year=year) for team in seasons: if team.abbreviation not in teamz.keys(): sql = addTeamData.format(SCHEMANAME, team.abbreviation, team.name) sqhelp.transaction(conn, sql) teamz[team.abbreviation] = "" sqhelp.addTeamSeasonStats(conn, team, year) return teamz
def list_team_names(my_year): # if you give an unknown year or team it will go to the except block which prints out no data for that year # then return to the main page in this code try: allTeams = Teams(year=my_year) except: print("No data for Year %s" % my_year) return # walk through each team and print out just the long name and the abbreviated name for team in allTeams: print("%-22s %-3s" % (team.name, team.abbreviation))
def test_teams_string_representation(self, *args, **kwargs): expected = """Los Angeles Rams (RAM) New England Patriots (NWE) Philadelphia Eagles (PHI) New Orleans Saints (NOR) Jacksonville Jaguars (JAX) Kansas City Chiefs (KAN) Detroit Lions (DET) Pittsburgh Steelers (PIT) Baltimore Ravens (RAV) Minnesota Vikings (MIN) Seattle Seahawks (SEA) Carolina Panthers (CAR) Los Angeles Chargers (SDG) Dallas Cowboys (DAL) Atlanta Falcons (ATL) Washington Redskins (WAS) Houston Texans (HTX) Tampa Bay Buccaneers (TAM) Tennessee Titans (OTI) San Francisco 49ers (SFO) Green Bay Packers (GNB) Buffalo Bills (BUF) Oakland Raiders (RAI) New York Jets (NYJ) Arizona Cardinals (CRD) Cincinnati Bengals (CIN) Denver Broncos (DEN) Miami Dolphins (MIA) Chicago Bears (CHI) Indianapolis Colts (CLT) New York Giants (NYG) Cleveland Browns (CLE)""" teams = Teams() assert teams.__repr__() == expected
def pull_data(): df = pd.DataFrame() teams = [] years = [] for i in range(2002, 2020): teams = Teams(i) for team in teams: df = df.append(team.dataframe) years.append(i) df["years"] = years df #offense only df Offense_df = df[[ 'years', 'abbreviation', 'yards', 'margin_of_victory', 'offensive_simple_rating_system', 'pass_attempts', 'pass_completions', 'pass_first_downs', 'pass_net_yards_per_attempt', 'pass_touchdowns', 'pass_yards', 'percent_drives_with_turnovers', 'percent_drives_with_points', 'plays', 'points_contributed_by_offense', 'rank', 'rush_attempts', 'rush_yards', 'rush_first_downs', 'rush_yards_per_attempt', 'turnovers', 'win_percentage', 'yards_per_play', 'first_downs' ]] Offense_df #defense only df Defense_df = df[[ 'years', 'abbreviation', 'defensive_simple_rating_system', 'interceptions', 'points_against' ]] Defense_df #misc df Misc_df = df[[ 'years', 'abbreviation', 'fumbles', 'games_played', 'losses', 'name', 'first_downs_from_penalties', 'penalties', 'points_difference', 'points_for', 'post_season_result', 'simple_rating_system', 'strength_of_schedule', 'win_percentage', 'wins', 'yards_from_penalties' ]] Misc_df offense = Offense_df.to_dict('records') defense = Defense_df.to_dict('records') misc = Misc_df.to_dict('record') return offense, defense, misc
def generate_teams(year1, year2, include_interval=True): all_teams_dict = {} if include_interval: for year in range(year1, year2 + 1): teams = Teams(year) teams_dict = {} for team in teams: abbr = team.abbreviation teams_dict[abbr] = team all_teams_dict[year] = teams_dict else: teams = Teams(year1) teams_dict = {} for team in teams: abbr = team.abbreviation teams_dict[abbr] = team all_teams_dict[year1] = teams_dict teams = Teams(year2) teams_dict = {} for team in teams: abbr = team.abbreviation teams_dict[abbr] = team all_teams_dict[year2] = teams_dict return all_teams_dict
def update_years(): min_year = input("Beginning year: ") max_year = input("Ending year: ") while int(min_year) <= int(max_year): teams = Teams(min_year) total_teams = len(teams) count = 1 print('Starting data collection ... \n') for team in teams: if not os.path.exists('./data/year/{0}/{1}'.format( min_year, team.abbreviation).lower()): os.makedirs('./data/year/{0}/{1}'.format( min_year, team.abbreviation).lower()) file_csv = './data/year/{0}/{1}/{0}_{1}.csv'.format( min_year, team.abbreviation).lower() file_json = './data/year/{0}/{1}/{0}_{1}.json'.format( min_year, team.abbreviation).lower() f = open(file_csv, 'w+') f.close() team.schedule.dataframe_extended.to_csv(file_csv) f = open(file_csv, 'r') reader = csv.reader(f) lines = list(reader) lines[0][0] = 'game_id' f.close() f = open(file_csv, 'w') writer = csv.writer(f) for line in lines: writer.writerow(line) f.close() utils.csv_to_json(file_csv, file_json) print('{0}/{1} complete'.format(count, total_teams)) count += 1 min_year = utils.year_plus_one(min_year)
def display_teams(my_year): try: allTeams = Teams(year=my_year) except: print("No data for Year %s" % my_year) return # Let's sort the list of teams by wins in ascending order # The build-in python sorted function takes two parameters # - the list "allTeams" which was return from call to Teams above # - the key, which is a small function (lambda) which just returns what we are sorting on "team.wins" sortedTeams = sorted(allTeams, key=lambda team: team.wins) # walk through each team and print out more information for team in sortedTeams: print( "%-20s %-11s Won: %2d Lost: %2d pts+: %-3d pts-: %-3d ptdiff: %-4d" % (team.name, team.abbreviation, team.wins, team.losses, team.points_for, team.points_against, team.points_difference))
def nfl_build_season_passing_data(year): """Build QB Passing dataset from NFL boxscores, for a specified season""" # get a list of all the teams that competed in the specified year print(f"Gathering passing data for {year} NFL season:") teams = Teams(str(year)) # init empty lists for input and output labels; these lists will be populated as matrices input_arr = [] label_arr = [] # display progress bar bar = ChargingBar('Analyzing teams', max=len(teams)) # iterate over each team in the list of teams for team in teams: # get the schedule (list of games) for the team games = team.schedule # for every game in the schedule, if the game has been played, extract: # the team's passing stats, and the result of the game for game in games: # if there is no (raw) result for the game, skip it (probably has yet to be played) if not game._result: continue # append an input dataset to input_arr list (matrix) input_arr.append(nfl_build_game_passing_data(game)) # append a set of labels to label_arr list (matrix) label_arr.append(nfl_build_game_labels(game)) # increment progress bar bar.next() # sanity check: lengths of input_arr and label_arr must be the same assert len(input_arr) == len( label_arr), "Error: input & label array lengths mismatch!" # finish and return datasets bar.finish() return input_arr, label_arr
from sportsreference.nfl.teams import Teams year_start = 1999 # starting year year_end = 2018 # ending year sport = 'NFL' while year_start <= year_end: teams = Teams(year_start) for team in teams: zscore = (team.wins - (0.5 * (team.wins + team.losses))) / ( (team.wins + team.losses) * 0.5 * 0.5)**(0.5) zscoreabs = abs(zscore) print( f'{sport},{team.name},{team.wins},{team.losses},{year_start},{zscore},{zscoreabs}' ) year_start += 1
from sportsreference.nfl.teams import Teams import pandas as pd dfs = [] for team in Teams(): schedule = team.schedule df = schedule.dataframe df['Team'] = team.abbreviation dfs.append(df) nfl_sched_df = pd.concat(dfs) nfl_sched_df = nfl_sched_df.sort_values(by='week') sched_name_map = { 'CLT': 'IND', 'CRD': 'ARI', 'GNB': 'GB', 'HTX': 'HOU', 'JAX': 'JAC', 'KAN': 'KC', 'NOR': 'NO', 'NWE': 'NE', 'OTI': 'TEN', 'RAI': 'LV', 'RAM': 'LAR', 'RAV': 'BAL', 'SDG': 'LAC', 'SFO': 'SF', 'TAM': 'TB', }
from sportsreference.nfl.teams import Teams from sportsreference.nfl.roster import Roster from sportsreference.nfl.roster import Player import numpy as np import pandas as pd # In[3]: team_array = [] teams = Teams() for team in teams: team_array.append(team.abbreviation) print(team_array) # In[4]: player_array = [] tm_n = len(team_array) for yr in range (2008, 2019): for i in range(tm_n): try: roster = Roster(team_array[i], yr)
import time from sportsreference.nfl.teams import Teams import pandas as pd defense_list = [] for year in ['2010', '2011', '2012']: teams = Teams(year) for team in teams: i = 1 for game in team.schedule: t0 = time.time() if game.location=='Home': prefix='away' else: prefix='home' game_df = game.boxscore.dataframe row = [ team.abbreviation, game.datetime, game_df['{}_fumbles_lost'.format(prefix)].values[0], game_df['{}_interceptions'.format(prefix)].values[0], game_df['{}_pass_yards'.format(prefix)].values[0], game_df['{}_rush_yards'.format(prefix)].values[0], game_df['{}_times_sacked'.format(prefix)].values[0], game_df['{}_points'.format(prefix)].values[0] ] defense_list.append(row) print('{}, {} game {} finished in {:.2f}s'.format(year, team.abbreviation, i, time.time() - t0)) i += 1
def display_results(my_year): # stores the abbreviated names in a dictionary which converts abbr names to full name abbrevName2Name = {} allTeams = Teams(year=my_year) # walk through all the teams, saving a dictionary of abbrev name to actual team name for team in allTeams: abbrevName2Name[team.abbreviation] = team.name # Loop forever while 1: name = input("Team Name (l/list/q/quit/stop): ") # this time we convert to UPPER, because we need abbreviated name in UpperCase name = name.upper() if (name == "") or (name == "Q") or (name == "QUIT") or (name == "STOP"): break if (name == "L") or (name == "LIST"): list_team_names(my_year) # "continue" goes back up to run the while loop again continue try: allgames = Schedule(name, year=my_year) except: print("This is an unknown team or an unavaliable year") # "continue" goes back up to run the while loop again continue teamName = abbrevName2Name[name] won = 0 lost = 0 tie = 0 # Walk through all the the games, keeping track of # - number of wins # - number of losses # - number of ties # And print out each games result to the user for game in allgames: # if points_allowed is equal "None", then there are no more valid games in this list if game.points_allowed is None: break oppAbbr = game.opponent_abbr.upper() oppName = abbrevName2Name[oppAbbr] if game.result is None: result = "Not Played" else: result = game.result if game.points_scored > game.points_allowed: won = won + 1 elif game.points_scored < game.points_allowed: lost = lost + 1 elif game.points_scored == game.points_allowed: tie = tie + 1 print("%s %4s vs %24s %2d to %2d (%s)" % (teamName, result, oppName, game.points_scored, game.points_allowed, game.type)) print("Record: Wins: %d Loss: %d Ties: %d" % (won, lost, tie))
from sklearn.linear_model import BayesianRidge from sklearn.ensemble import RandomForestRegressor from sklearn.neural_network import MLPRegressor from sklearn.metrics import r2_score import plotly.express as px import plotly.graph_objects as go from plotly.offline import plot # what years should we evaluate? start_year = 1998 end_year = 2018 # query for the data nfl_ = pd.DataFrame() for y in range(start_year, end_year + 1): teams = Teams(year=y) print("---- " + str(y) + " ----") for t in teams: print(t.name) df = t.schedule.dataframe df["team"] = t.name nfl_ = pd.concat([nfl_, df], axis=0) # remove missing values nfl_ = nfl_.dropna() # convert time_of_possession into minutes nfl_["time_of_possession"] = "00:" + nfl_["time_of_possession"] nfl_["time_of_possession"] = pd.to_datetime(nfl_["time_of_possession"]) minutes = nfl_["time_of_possession"].dt.minute minutes = minutes + (nfl_["time_of_possession"].dt.second / 60)