def insert_teams_stats(df):
    logging.warning("PROCESSING GAMES TEAMS STATS...")
    for row, series in tqdm(df.iterrows(), total=df.shape[0]):
        record = dict(series)
        home_team_stats = TeamsStatsModel(
            game_id = try_parse_int(record["GAME_ID"]),
            team_id = try_parse_int(record["HOME_TEAM_ID"]),
            pts = try_parse_int(record["PTS_home"]),
            fg_pct = try_parse_float(record["FG_PCT_home"]),
            ft_pct = try_parse_float(record["FT_PCT_home"]),
            fg3_pct = try_parse_float(record["FG3_PCT_home"]),
            ast = try_parse_int(record["AST_home"]),
            reb = try_parse_int(record["REB_home"]),
            is_home_team=True
        )

        visitor_team_stats = TeamsStatsModel(
            game_id = try_parse_int(record["GAME_ID"]),
            team_id = try_parse_int(record["VISITOR_TEAM_ID"]),
            pts = try_parse_int(record["PTS_away"]),
            fg_pct = try_parse_float(record["FG_PCT_away"]),
            ft_pct = try_parse_float(record["FT_PCT_away"]),
            fg3_pct = try_parse_float(record["FG3_PCT_away"]),
            ast = try_parse_int(record["AST_away"]),
            reb = try_parse_int(record["REB_away"]),
            is_home_team=False
        )
        db.session.add(home_team_stats)
        db.session.add(visitor_team_stats)
        db.session.flush()
    db.session.commit()
    logging.warning("END PROCESS GAMES TEAMS STATS...")
def insert_players_teams(season_id, grouped):
    logging.warning("PROCESSING PLAYERS TEAMS SEASON...")
    for group, data  in tqdm(grouped):
        team_id, player_id = group
        player_team=PlayerSeasonTeamModel(
            season_id=season_id,
            team_id=try_parse_int(team_id),
            player_id=try_parse_int(player_id)
        )
        db.session.add(player_team)
    db.session.commit()
    logging.warning("END PROCESS PLAYERS TEAMS SEASON...") 
Exemple #3
0
def process_players_dataset(season):
    logging.warning("PROCESSING PLAYER DATASET")
    df= pd.read_csv(f"data/{season}/players.csv", delimiter=",")

    for row, series in tqdm(df.iterrows(), total=df.shape[0]):
        record = dict(series)

        player_id = try_parse_int(record["PERSON_ID"])
        first_name, last_name = split_player_name(record["DISPLAY_LAST_COMMA_FIRST"])
        player = PlayerModel.query.filter_by(id=player_id).first()
        if player:
            player.first_name = first_name
            player.last_name = last_name
            player.from_year = try_parse_int(record["FROM_YEAR"])
            player.to_year = try_parse_int(record["TO_YEAR"])
            player.still_playing = True if try_parse_int(record["ROSTERSTATUS"]) == 1 else False
            # logging.warning(f"UPDATING PLAYER {first_name} {last_name}")
        else:
            player=PlayerModel(
                id=player_id,
                first_name=first_name,
                last_name=last_name,
                from_year=try_parse_int(record["FROM_YEAR"]),
                to_year=try_parse_int(record["TO_YEAR"]),
                still_playing=True if try_parse_int(record["ROSTERSTATUS"]) == 1 else False
            )
            # logging.warning(f"INSERT PLAYER {first_name} {last_name}")
        db.session.add(player)
        db.session.flush()
    db.session.commit()
    logging.warning("END PROCESS PLAYER DATASET")
Exemple #4
0
def process_teams_dataset(season):
    logging.warning("PROCESSING TEAM DATASET...")
    df = pd.read_csv(f"data/{season}/teams.csv", delimiter=",")

    for row, series in tqdm(df.iterrows(), total=df.shape[0]):
        record = dict(series)

        team_id = try_parse_int(record["TEAM_ID"])
        team = TeamModel.query.filter_by(id=team_id).first()
        if team:
            team.end_year = try_parse_int(record["MAX_YEAR"])
            team.team_code = str(record["ABBREVIATION"])
            team.nick_name = str(record["NICKNAME"])
            team.city = str(record["CITY"])
            team.arena = str(record["ARENA"])
            # logging.warning(f"UPDATE TEAM {record['CITY']} {record['NICKNAME']}")
        else:
            team = TeamModel(id=try_parse_int(record["TEAM_ID"]),
                             start_year=try_parse_int(record["MIN_YEAR"]),
                             end_year=try_parse_int(record["MAX_YEAR"]),
                             team_code=str(record["ABBREVIATION"]),
                             nick_name=str(record["NICKNAME"]),
                             city=str(record["CITY"]),
                             year_founded=try_parse_int(record["YEARFOUNDED"]),
                             arena=str(record["ARENA"]))
            # logging.warning(f"INSERT TEAM {record['CITY']} {record['NICKNAME']}")
        db.session.add(team)
        db.session.flush()
    db.session.commit()
    logging.warning("END PROCESS TEAM DATASET...")
def insert_games(season, df):
    logging.warning("PROCESSING GAMES DATASET...")
    df_playoffs = pd.read_csv(f"data/{season}/playoff_series.csv", delimiter=",")
    teams = {team.id:f"{team.city} {team.nick_name}" for team in TeamModel.query.all()} 
    for row, series in tqdm(df.iterrows(), total=df.shape[0]):
        record = dict(series)
        playoff_serie = df_playoffs.loc[df_playoffs["GAME_ID"] == record["GAME_ID"]]
        game = GameModel(
            id= record["GAME_ID"],
            game_date_est=datetime.strptime(record["GAME_DATE_EST"], "%Y-%m-%d"),
            game_status_text=str(record["GAME_STATUS_TEXT"]),
            home_team=f"{teams[record['HOME_TEAM_ID']]} {teams[record['HOME_TEAM_ID']]}",
            home_score=try_parse_int(record["PTS_home"]),
            away_team=f"{teams[record['VISITOR_TEAM_ID']]} {teams[record['VISITOR_TEAM_ID']]}",
            away_score=try_parse_int(record["PTS_away"]),
            home_team_wins=bool(record["HOME_TEAM_WINS"]),
            season_id=try_parse_int(record["SEASON"]),
            is_playoff=(not playoff_serie.empty),
            playoff_seried_id=None if playoff_serie.empty else str(int(playoff_serie['SERIES_ID']))
        )
        db.session.add(game)
        db.session.flush()
    db.session.commit()
    logging.warning("END PROCESS GAMES DATASET...")
def insert_players_stats(df):
    logging.warning("PROCESSING GAMES PLAYERS STATS...")
    for row, series in tqdm(df.iterrows(), total=df.shape[0]):
        record = dict(series)
        playing_time = try_parse_string_to_time(record["MIN"])
        minutes = (playing_time.minute + playing_time.second/60) if playing_time else 0.0
        player_stats = PlayersStatsModel(
            game_id=try_parse_int(record["GAME_ID"]),
            player_id=try_parse_int(record["PLAYER_ID"]),
            start_position=str(record["START_POSITION"]),
            comment=str(record["COMMENT"]),
            minutes=minutes,
            fgm=try_parse_float(record["FGM"]),
            fga=try_parse_float(record["FGA"]),
            fg_pct=try_parse_float(record["FG_PCT"]),
            fg3m=try_parse_float(record["FG3M"]),
            fg3a=try_parse_float(record["FG3A"]),
            fg3_pct=try_parse_float(record["FG3_PCT"]),
            ftm=try_parse_float(record["FTM"]),
            fta=try_parse_float(record["FTA"]),
            ft_pct=try_parse_float(record["FT_PCT"]),
            oreb=try_parse_int(record["OREB"]),
            dreb=try_parse_int(record["DREB"]),
            ast=try_parse_int(record["AST"]),
            stl=try_parse_int(record["STL"]),
            blk=try_parse_int(record["BLK"]),
            to=try_parse_int(record["TO"]),
            pf=try_parse_int(record["PF"]),
            pts=try_parse_int(record["PTS"]),
            plus_minus=try_parse_int(record["PLUS_MINUS"]),
        )
        db.session.add(player_stats)
        db.session.flush()
    db.session.commit()
    logging.warning("END PROCESS GAMES PLAYERS STATS...")