Ejemplo n.º 1
0
def setup_directories():
    dirs = [
        data_utilities.get_raw_data_filepath([CURRENT_SUBDIR, "Matches"]),
        data_utilities.get_raw_data_filepath([CURRENT_SUBDIR, "Players"]),
    ]
    for dir in dirs:
        if not os.path.exists(dir):
            os.makedirs(dir)
Ejemplo n.º 2
0
def get_player_ids():
    player_data = json.load(
        open(
            data_utilities.get_raw_data_filepath(
                [CURRENT_SUBDIR, "Players.json"]), "r"))
    player_ids = [player_entry["id"] for player_entry in player_data]
    return player_ids
Ejemplo n.º 3
0
def download_match_data(match_id, overwrite=False):
    url = BASE_URL + f"stats/{match_id}.json"
    headers = None
    outfile_path = data_utilities.get_raw_data_filepath(
        [CURRENT_SUBDIR, "Matches", f"Match{match_id}.json"])
    if not overwrite and os.path.exists(outfile_path):
        return (200, match_id)
    return (download_data(url, headers, outfile_path), match_id)
Ejemplo n.º 4
0
def download_general_json():
    pages = ["Squads", "Players", "Rounds", "Venues"]
    for page in pages:
        file = f"{page}.json"
        download_data(
            BASE_URL + file.lower(),
            None,
            data_utilities.get_raw_data_filepath([CURRENT_SUBDIR, file]),
        )
Ejemplo n.º 5
0
def get_match_ids():
    match_ids = []
    round_data = json.load(
        open(
            data_utilities.get_raw_data_filepath(
                [CURRENT_SUBDIR, "Rounds.json"]), "r"))
    for match_round in round_data:
        round_id = match_round["id"]
        if match_round["status"] == "complete" or match_round[
                "status"] == "active":
            matches = match_round["matches"]
            for match in matches:
                match_id = match["id"]
                match_ids.append(match_id)
    return match_ids
Ejemplo n.º 6
0
def download_player_data(player_id):
    url = BASE_URL + f"stats/players/{player_id}.json"
    headers = None
    outfile_path = data_utilities.get_raw_data_filepath(
        [CURRENT_SUBDIR, "Players", f"Player{player_id}.json"])
    return (download_data(url, headers, outfile_path), player_id)
Ejemplo n.º 7
0
def _insert_2018_season_data(conn, season):
    season_subdir = data_utilities.SEASON_DIRS[season]
    saved_rounds = get_saved_rounds(conn, season)

    historical_rows_list = []
    current_rows_list = []
    opponent_rows_list = []
    player_rows_list = []

    player_position_dict = {}
    player_team_dict = {}
    team_id_dict = {}
    player_name_dict = {}

    hit_current_round = False

    team_data = json.load(
        open(
            data_utilities.get_raw_data_filepath(
                [season_subdir, "Squads.json"]), "r"))
    for team_entry in team_data:
        team_id = team_entry["id"]
        team_name = team_entry["short_name"]
        team_id_dict[team_id] = team_name
    team_id_dict[399] = "RBNY"  # fix New York Red Bulls short name change

    player_data = json.load(
        open(
            data_utilities.get_raw_data_filepath(
                [season_subdir, "Players.json"]), "r"))
    for player_entry in player_data:
        player_id = player_entry["id"]
        player_name = player_entry["known_name"]
        if player_name is None:
            player_name = unidecode.unidecode(
                (player_entry["first_name"] + " " +
                 player_entry["last_name"]).strip()
            )  # remove weird characters and extra space if player has one name
        position_id = player_entry["positions"][0]
        current_cost = player_entry["cost"] / 1000000
        squad_id = player_entry["squad_id"]
        squad_name = team_id_dict[squad_id]
        player_dict = {
            "player_id": player_id,
            "player_name": player_name,
            "position_id": position_id,
            "cost": current_cost,
            "team": squad_name,
        }
        player_rows_list.append(player_dict)
        player_position_dict.update({player_id: position_id})
        player_team_dict.update({player_id: squad_name})
        player_name_dict.update({player_id: player_name})

    round_data = json.load(
        open(
            data_utilities.get_raw_data_filepath(
                [season_subdir, "Rounds.json"]), "r"))
    for match_round in round_data:
        round_id = match_round["id"]
        if (round_id not in saved_rounds and match_round["status"]
                == "complete"):  # or match_round['status'] == 'active':
            matches = match_round["matches"]
            for match in matches:
                match_id = match["id"]
                date = dateutil.parser.parse(match["date"])
                home_squad_short_name = team_id_dict[match["home_squad_id"]]
                away_squad_short_name = team_id_dict[match["away_squad_id"]]
                match_data = json.load(
                    open(
                        data_utilities.get_raw_data_filepath([
                            season_subdir, "Matches", f"Match{match_id}.json"
                        ]),
                        "r",
                    ))
                for player_data in match_data:
                    player_id = player_data["player_id"]
                    player_entry = player_data["stats"]
                    if player_entry["MIN"] > 0:
                        team = player_team_dict.get(player_id)
                        if team == home_squad_short_name:
                            home = 1
                            team = home_squad_short_name
                            opponent = away_squad_short_name
                        elif team == away_squad_short_name:
                            home = 0
                            team = away_squad_short_name
                            opponent = home_squad_short_name
                        else:
                            home = 2
                            team = away_squad_short_name
                            opponent = home_squad_short_name

                        player_dict = {
                            "mins":
                            player_entry["MIN"],
                            "gls":
                            player_entry["GL"],
                            "ass":
                            player_entry["ASS"],
                            "cs":
                            player_entry["CS"],
                            "sv":
                            player_entry["SV"],
                            "pe":
                            player_entry["PE"],
                            "ps":
                            player_entry["PS"],
                            "pm":
                            player_entry["PM"],
                            "gc":
                            player_entry["GC"],
                            "yc":
                            player_entry["YC"],
                            "rc":
                            player_entry["RC"],
                            "og":
                            player_entry["OG"],
                            "oga":
                            player_entry["OGA"],
                            "sh":
                            player_entry["SH"],
                            "wf":
                            player_entry["WF"],
                            "pss":
                            player_entry["PSS"],
                            "aps":
                            player_entry["APS"],
                            "pcp":
                            player_entry["PSS"] / player_entry["APS"]
                            if player_entry["APS"] > 0 else 0,
                            "crs":
                            player_entry["CRS"],
                            "kp":
                            player_entry["KP"],
                            "bc":
                            player_entry["BC"],
                            "cl":
                            player_entry["CL"],
                            "blk":
                            player_entry["BLK"],
                            "intc":
                            player_entry["INT"],
                            "tck":
                            player_entry["TCK"],
                            "br":
                            player_entry["BR"],
                            "elg":
                            player_entry["ELG"],
                            "position_id":
                            player_position_dict.get(player_id),
                            "player_id":
                            player_id,
                            "player_name":
                            player_name_dict.get(player_id),
                            "team":
                            team,
                            "round":
                            round_id,
                            "event_id":
                            match_id,
                            "opponent":
                            opponent,
                            "home":
                            home,
                            "season":
                            season,
                            "date":
                            date,
                        }
                        player_dict = data_utilities.calculate_fantasy_scores(
                            player_dict)
                        historical_rows_list.append(player_dict)
        elif round_id not in saved_rounds and not hit_current_round:
            hit_current_round = True
            matches = match_round["matches"]
            for match in matches:
                home_dict = {
                    "round": round_id,
                    "event_id": match["id"],
                    "opponent": team_id_dict[match["away_squad_id"]],
                    "team": team_id_dict[match["home_squad_id"]],
                    "home": 1,
                    "season": season,
                    "date": dateutil.parser.parse(match["date"]),
                }
                away_dict = {
                    "round": round_id,
                    "event_id": match["id"],
                    "opponent": team_id_dict[match["home_squad_id"]],
                    "team": team_id_dict[match["away_squad_id"]],
                    "home": 0,
                    "season": season,
                    "date": dateutil.parser.parse(match["date"]),
                }
                opponent_rows_list.append(home_dict)
                opponent_rows_list.append(away_dict)

    if len(historical_rows_list) > 0:
        df_historical = pd.DataFrame(historical_rows_list)
        # some home\away team and opponent info is messed up due to Fanhub only storing players current team so immidiately fix
        df_historical = df_historical.pipe(fix_player_home, player_team_dict)
        df_historical["date"] = pd.to_datetime(df_historical["date"], utc=True)
        df_historical.to_sql("player_stats",
                             conn,
                             if_exists="append",
                             index=False)
        conn.connection.commit()
    if len(opponent_rows_list) > 0:
        # add players to current round's dataframe by matching on current team
        df_player = pd.DataFrame(player_rows_list)
        df_opponent = pd.DataFrame(opponent_rows_list)
        df_current = pd.merge(df_player, df_opponent, how="right", on=["team"])
        df_current["date"] = pd.to_datetime(df_current["date"], utc=True)
        df_current.to_sql("player_stats",
                          conn,
                          if_exists="append",
                          index=False)
        conn.connection.commit()
Ejemplo n.º 8
0
def _insert_2017_season_data(conn, season):
    season_subdir = data_utilities.SEASON_DIRS[season]
    saved_rounds = get_saved_rounds(conn, season)

    rows_list = []
    # home_dict is hard coded for the last week due to a scraping error
    short_name_dict = {
        1: "CHI",
        2: "COL",
        3: "CLB",
        4: "DC",
        5: "DAL",
        6: "HOU",
        7: "MTL",
        8: "LA",
        9: "NE",
        10: "NYC",
        11: "RBNY",
        12: "ORL",
        13: "PHI",
        14: "POR",
        15: "RSL",
        16: "SJ",
        17: "SEA",
        18: "SKC",
        19: "TOR",
        20: "VAN",
        21: "ATL",
        22: "MIN",
    }
    home_dict = {
        "364,1": "TOR",
        "364,0": "ATL",
        "365,1": "DC",
        "365,0": "RBNY",
        "366,1": "DAL",
        "366,0": "LA",
        "367,1": "HOU",
        "367,0": "CHI",
        "368,1": "MTL",
        "368,0": "NE",
        "369,1": "NYC",
        "369,0": "CLB",
        "370,1": "PHI",
        "370,0": "ORL",
        "371,1": "POR",
        "371,0": "VAN",
        "372,1": "RSL",
        "372,0": "SKC",
        "373,1": "SJ",
        "373,0": "MIN",
        "374,1": "SEA",
        "374,0": "COL",
    }

    fixture_data = json.load(
        open(
            data_utilities.get_raw_data_filepath(
                [season_subdir, "Fixtures.json"]), "r"))
    for entry in fixture_data:
        event_id = entry["id"]
        home_team = short_name_dict[entry["team_h"]]
        away_team = short_name_dict[entry["team_a"]]
        home_dict[f"{event_id},1"] = home_team
        home_dict[f"{event_id},0"] = away_team

    key_data = json.load(
        open(data_utilities.get_raw_data_filepath([season_subdir, "Key.json"]),
             "r"))
    for entry in key_data["elements"]:
        player_id = entry["id"]
        player_name = unidecode.unidecode(
            (entry["first_name"] + " " + entry["second_name"]).strip()
        )  # remove weird characters and extra space if player has one name
        position_id = entry["element_type"]

        player_data = json.load(
            open(
                data_utilities.get_raw_data_filepath(
                    [season_subdir, f"Player{player_id}.json"]),
                "r",
            ))
        for player_entry in player_data["history"]:
            if (player_entry["round"] not in saved_rounds
                    and player_entry["minutes"] > 0):
                home = int(player_entry["was_home"])
                player_dict = {
                    "mins":
                    player_entry["minutes"],
                    "gls":
                    player_entry["goals_scored"],
                    "ass":
                    player_entry["assists"],
                    "cs":
                    player_entry["clean_sheets"],
                    "sv":
                    player_entry["saves"],
                    "pe":
                    player_entry["penalties_earned"],
                    "ps":
                    player_entry["penalties_saved"],
                    "pm":
                    player_entry["penalties_missed"],
                    "gc":
                    player_entry["goals_conceded"],
                    "yc":
                    player_entry["yellow_cards"],
                    "rc":
                    player_entry["red_cards"],
                    "og":
                    player_entry["own_goals"],
                    "oga":
                    player_entry["own_goal_earned"],
                    "sh":
                    player_entry["shots"],
                    "wf":
                    player_entry["was_fouled"],
                    "pss":
                    player_entry["attempted_passes"],
                    "aps":
                    player_entry["completed_passes"],
                    "pcp":
                    player_entry["completed_passes"] /
                    player_entry["completed_passes"]
                    if player_entry["completed_passes"] > 0 else 0,
                    "crs":
                    player_entry["crosses"],
                    "kp":
                    player_entry["key_passes"],
                    "bc":
                    player_entry["big_chances_created"],
                    "cl":
                    player_entry["clearances"],
                    "blk":
                    player_entry["blocks"],
                    "intc":
                    player_entry["interceptions"],
                    "tck":
                    player_entry["tackles"],
                    "br":
                    player_entry["recoveries"],
                    "elg":
                    player_entry["errors_leading_to_goal"],
                    # "cost": (player_entry["value"] / 10),
                    "position_id":
                    position_id,
                    "player_id":
                    player_id,
                    "player_name":
                    player_name,
                    "team":
                    home_dict[f"{event_id},{home}"],
                    "round":
                    player_entry["round"],
                    "event_id":
                    player_entry["fixture"],
                    "opponent":
                    short_name_dict[player_entry["opponent_team"]],
                    "home":
                    home,
                    "season":
                    season,
                    "date":
                    dateutil.parser.parse(player_entry["kickoff_time"]),
                }
                player_dict = data_utilities.calculate_fantasy_scores(
                    player_dict)
                rows_list.append(player_dict)
    if len(rows_list) > 0:
        df_player_stats = pd.DataFrame(rows_list)
        df_player_stats["date"] = pd.to_datetime(df_player_stats["date"],
                                                 utc=True)
        df_player_stats.to_sql("player_stats",
                               conn,
                               if_exists="append",
                               index=False)
        conn.connection.commit()