write_bool = any(x.lower() == "write" for x in sys.argv) new_stats_bool = any(x.lower() == "new" for x in sys.argv) # Examines the "new" argument. If it exists, use the new scraped stats and combine them. If it does not exist,use the data from previously, the R-code-based one that combines the stats for each year. if new_stats_bool: # This must be run first to give the pickle files access to what they need. import nfl_convertPlayerGameStats # Make a playerstats file. Read in all seasons and then concatenate them vertically. # FIXME: Keeping this set to 2018 for now seasons = range(1999, 2018 + 1) outlist = [] for season in seasons: playerstatfile = Path( "data_raw", "nfl", f"nfl-player-game-statistics{season}.csv") playerstats_season = readplayergamestats(playerstatfile) outlist.append(playerstats_season) playerstats = pd.concat(outlist) else: playerstatfile = Path("data_raw", "nfl", cfg["playerstatfile"]) playerstats = readplayergamestats(playerstatfile) if any(x.lower() == "qb" for x in sys.argv) or all_bool: # Subset of dataframe meet_cond = playerstats[(playerstats["Pos"].isin(["QB"]))].copy() # Calculated variables # meet_cond["calc_YFS"] = meet_cond["Rush Yard"] + meet_cond["Rec Yards"] meet_cond["calc_pass_comp"] = (meet_cond["Pass Comp"] / meet_cond["Pass Att"]) * 100 meet_cond["calc_yard_per_att"] = meet_cond["Pass Yard"] / meet_cond["Pass Att"] meet_cond["calc_yard_per_comp"] = meet_cond["Pass Yard"] / meet_cond["Pass Comp"]
from tqdm import tqdm tqdm.write("Beginning ComputeElo") seasons = tqdm(seasons) for season in seasons: seasons.set_description(f"s: {season}") # Read in files for the current season playerstatfile = Path("data_raw", "cfb", f"{playerstatroot}{season}{extension}") teamstatfile = Path("data_raw", "cfb", f"{teamstatroot}{season}{extension}") teamstats = eu.readteamgamedata(teamstatfile) # Read in and merge player stats with demographics/position playerstats = eu.readplayergamestats(playerstatfile) dates = [int(x) for x in sorted(list(playerstats["gamedate"].unique()))] # Filter based on stats # FUTURE: change here to process based on player's position. # FUTURE: [player["position"].rsplit("/")] -- returns a list of positions the player has. if processRBs: # RBs - Filter to only players with at least 1 Rush Attempt rbstats = playerstats[playerstats["Rush Att"] > 0].copy() if processWRs: # WRs/TEs - Filter to only players with at least 1 Catch wrstats = playerstats[playerstats["Rec"] > 0].copy() if processQBs:
from eloUtilities import readplayergamestats # Get filename from config file with open(Path("elo_config.json")) as file: cfg = json.load(file)["findStats"]["college"] extension = ".csv" # Seasons to run seasons = range(2005, 2019 + 1) # Read in files all_seasons = [ readplayergamestats( Path("..", "data_raw", "cfb", f"{cfg['playerstatfile']}{season}{extension}")) for season in seasons ] # Combine all the dataframes into 1 to calculate and find medians for playerstats = pd.concat(all_seasons, axis=0, ignore_index=True) # Subset of dataframe meet_cond = playerstats[playerstats["Rush Att"] > 10].copy() # Calculated variables meet_cond["calc_YPC"] = meet_cond["Rush Yard"] / meet_cond["Rush Att"] # Return median for selected variables # print("recs:", meet_cond["Rec"].median())