def insert_teams_stats(df): logging.warning("PROCESSING GAMES TEAMS STATS...") for row, series in tqdm(df.iterrows(), total=df.shape[0]): record = dict(series) home_team_stats = TeamsStatsModel( game_id = try_parse_int(record["GAME_ID"]), team_id = try_parse_int(record["HOME_TEAM_ID"]), pts = try_parse_int(record["PTS_home"]), fg_pct = try_parse_float(record["FG_PCT_home"]), ft_pct = try_parse_float(record["FT_PCT_home"]), fg3_pct = try_parse_float(record["FG3_PCT_home"]), ast = try_parse_int(record["AST_home"]), reb = try_parse_int(record["REB_home"]), is_home_team=True ) visitor_team_stats = TeamsStatsModel( game_id = try_parse_int(record["GAME_ID"]), team_id = try_parse_int(record["VISITOR_TEAM_ID"]), pts = try_parse_int(record["PTS_away"]), fg_pct = try_parse_float(record["FG_PCT_away"]), ft_pct = try_parse_float(record["FT_PCT_away"]), fg3_pct = try_parse_float(record["FG3_PCT_away"]), ast = try_parse_int(record["AST_away"]), reb = try_parse_int(record["REB_away"]), is_home_team=False ) db.session.add(home_team_stats) db.session.add(visitor_team_stats) db.session.flush() db.session.commit() logging.warning("END PROCESS GAMES TEAMS STATS...")
def insert_players_teams(season_id, grouped): logging.warning("PROCESSING PLAYERS TEAMS SEASON...") for group, data in tqdm(grouped): team_id, player_id = group player_team=PlayerSeasonTeamModel( season_id=season_id, team_id=try_parse_int(team_id), player_id=try_parse_int(player_id) ) db.session.add(player_team) db.session.commit() logging.warning("END PROCESS PLAYERS TEAMS SEASON...")
def process_players_dataset(season): logging.warning("PROCESSING PLAYER DATASET") df= pd.read_csv(f"data/{season}/players.csv", delimiter=",") for row, series in tqdm(df.iterrows(), total=df.shape[0]): record = dict(series) player_id = try_parse_int(record["PERSON_ID"]) first_name, last_name = split_player_name(record["DISPLAY_LAST_COMMA_FIRST"]) player = PlayerModel.query.filter_by(id=player_id).first() if player: player.first_name = first_name player.last_name = last_name player.from_year = try_parse_int(record["FROM_YEAR"]) player.to_year = try_parse_int(record["TO_YEAR"]) player.still_playing = True if try_parse_int(record["ROSTERSTATUS"]) == 1 else False # logging.warning(f"UPDATING PLAYER {first_name} {last_name}") else: player=PlayerModel( id=player_id, first_name=first_name, last_name=last_name, from_year=try_parse_int(record["FROM_YEAR"]), to_year=try_parse_int(record["TO_YEAR"]), still_playing=True if try_parse_int(record["ROSTERSTATUS"]) == 1 else False ) # logging.warning(f"INSERT PLAYER {first_name} {last_name}") db.session.add(player) db.session.flush() db.session.commit() logging.warning("END PROCESS PLAYER DATASET")
def process_teams_dataset(season): logging.warning("PROCESSING TEAM DATASET...") df = pd.read_csv(f"data/{season}/teams.csv", delimiter=",") for row, series in tqdm(df.iterrows(), total=df.shape[0]): record = dict(series) team_id = try_parse_int(record["TEAM_ID"]) team = TeamModel.query.filter_by(id=team_id).first() if team: team.end_year = try_parse_int(record["MAX_YEAR"]) team.team_code = str(record["ABBREVIATION"]) team.nick_name = str(record["NICKNAME"]) team.city = str(record["CITY"]) team.arena = str(record["ARENA"]) # logging.warning(f"UPDATE TEAM {record['CITY']} {record['NICKNAME']}") else: team = TeamModel(id=try_parse_int(record["TEAM_ID"]), start_year=try_parse_int(record["MIN_YEAR"]), end_year=try_parse_int(record["MAX_YEAR"]), team_code=str(record["ABBREVIATION"]), nick_name=str(record["NICKNAME"]), city=str(record["CITY"]), year_founded=try_parse_int(record["YEARFOUNDED"]), arena=str(record["ARENA"])) # logging.warning(f"INSERT TEAM {record['CITY']} {record['NICKNAME']}") db.session.add(team) db.session.flush() db.session.commit() logging.warning("END PROCESS TEAM DATASET...")
def insert_games(season, df): logging.warning("PROCESSING GAMES DATASET...") df_playoffs = pd.read_csv(f"data/{season}/playoff_series.csv", delimiter=",") teams = {team.id:f"{team.city} {team.nick_name}" for team in TeamModel.query.all()} for row, series in tqdm(df.iterrows(), total=df.shape[0]): record = dict(series) playoff_serie = df_playoffs.loc[df_playoffs["GAME_ID"] == record["GAME_ID"]] game = GameModel( id= record["GAME_ID"], game_date_est=datetime.strptime(record["GAME_DATE_EST"], "%Y-%m-%d"), game_status_text=str(record["GAME_STATUS_TEXT"]), home_team=f"{teams[record['HOME_TEAM_ID']]} {teams[record['HOME_TEAM_ID']]}", home_score=try_parse_int(record["PTS_home"]), away_team=f"{teams[record['VISITOR_TEAM_ID']]} {teams[record['VISITOR_TEAM_ID']]}", away_score=try_parse_int(record["PTS_away"]), home_team_wins=bool(record["HOME_TEAM_WINS"]), season_id=try_parse_int(record["SEASON"]), is_playoff=(not playoff_serie.empty), playoff_seried_id=None if playoff_serie.empty else str(int(playoff_serie['SERIES_ID'])) ) db.session.add(game) db.session.flush() db.session.commit() logging.warning("END PROCESS GAMES DATASET...")
def insert_players_stats(df): logging.warning("PROCESSING GAMES PLAYERS STATS...") for row, series in tqdm(df.iterrows(), total=df.shape[0]): record = dict(series) playing_time = try_parse_string_to_time(record["MIN"]) minutes = (playing_time.minute + playing_time.second/60) if playing_time else 0.0 player_stats = PlayersStatsModel( game_id=try_parse_int(record["GAME_ID"]), player_id=try_parse_int(record["PLAYER_ID"]), start_position=str(record["START_POSITION"]), comment=str(record["COMMENT"]), minutes=minutes, fgm=try_parse_float(record["FGM"]), fga=try_parse_float(record["FGA"]), fg_pct=try_parse_float(record["FG_PCT"]), fg3m=try_parse_float(record["FG3M"]), fg3a=try_parse_float(record["FG3A"]), fg3_pct=try_parse_float(record["FG3_PCT"]), ftm=try_parse_float(record["FTM"]), fta=try_parse_float(record["FTA"]), ft_pct=try_parse_float(record["FT_PCT"]), oreb=try_parse_int(record["OREB"]), dreb=try_parse_int(record["DREB"]), ast=try_parse_int(record["AST"]), stl=try_parse_int(record["STL"]), blk=try_parse_int(record["BLK"]), to=try_parse_int(record["TO"]), pf=try_parse_int(record["PF"]), pts=try_parse_int(record["PTS"]), plus_minus=try_parse_int(record["PLUS_MINUS"]), ) db.session.add(player_stats) db.session.flush() db.session.commit() logging.warning("END PROCESS GAMES PLAYERS STATS...")