def shot_extractor(player_name, match_id, filepath, start_id=0): """ Given a player and match name, this function extracts all shots from that match Args: - player_name: str, the name of the player as it appears in the data - match_id: int, the match id which is usually the name of the events json file Returns: - shot_dict: dict, a shot dictionary """ # define file path path = filepath + str(match_id) + ".json" # read events json into a dataframe df_events = pd.read_json(path) # initiate an empty dictionary to store shot values shot_dict = defaultdict(dict) shot_id = start_id goal_x1y1 = [120, 36] goal_x2y2 = [120, 44] # iterate through the events and extract shot information specific to the player for i in range(len(df_events)): try: if (df_events.player[i]["name"]) == player_name: if df_events.type[i]["name"] == "Shot": freeze_frame_list = [] # create a new entry in the dictionary with the key being the id # add event index to dictionary shot_dict[shot_id]["index"] = df_events.index[i + 1] # get the game state at the event index shot_dict[shot_id]["gamestate"] = game_state_extractor( df_events.index[i], match_id=match_id, player_name=player_name, filepath=filepath, ) # reading match_id shot_dict[shot_id]["match_id"] = match_id shot_dict[shot_id]["start_X"] = df_events.location[i][0] shot_dict[shot_id]["start_Y"] = df_events.location[i][1] shot_dict[shot_id]["end_X"] = df_events.iloc[ i, :]["shot"]["end_location"][0] shot_dict[shot_id]["end_Y"] = df_events.iloc[ i, :]["shot"]["end_location"][1] shot_dict[shot_id]["end_Z"] = df_events.iloc[ i, :]["shot"]["end_location"][2] # shot time shot_dict[shot_id]["minute"] = df_events.minute[i] # statsbomb xg information shot_dict[shot_id]["statsbomb_xg"] = df_events.iloc[ i, :]["shot"]["statsbomb_xg"] # shot body part shot_dict[shot_id]["body_part"] = df_events.iloc[ i, :]["shot"]["body_part"]["name"] # shot outcome shot_dict[shot_id]["outcome"] = df_events.iloc[ i, :]["shot"]["outcome"]["name"] # shot type shot_dict[shot_id]["type"] = df_events.iloc[ i, :]["shot"]["type"]["name"] for bool_scenario in [ "aerial_won", "follows_dribble", "first_time", "open_goal", "deflected", ]: shot_dict[shot_id][ bool_scenario] = shot_exception_handler( df_events, i, bool_scenario, is_boolean=True) # ***************************************************************************************** # calculate the pack density for each shot # ****************************************************************************************** for j in range( len(df_events.iloc[i, :]["shot"]["freeze_frame"])): try: location = df_events.iloc[ i, :]["shot"]["freeze_frame"][j]["location"] freeze_frame_list.append( is_inside( df_events.location[i], goal_x1y1, goal_x2y2, location, )) except KeyError: freeze_frame_list.append(1) shot_dict[shot_id]["pack_density"] = sum(freeze_frame_list) shot_id += 1 except: pass return shot_dict
def pass_extractor(player_name, match_id, filepath, start_id=0): """ Given a player and match name, this function extracts all passes from that match Args: - player_name: str, the name of the player as it appears in the data - match_id: int, the match id which is usually the name of the events json file Returns: - pass_dict: dict, a pass dictionary """ # define file path path = filepath + str(match_id) + ".json" # read events json into a dataframe df_events = pd.read_json(path) # initiate an empty dictionary to store pass values pass_dict = defaultdict(dict) pass_id = start_id # iterate through the events and extract pass information specific to the player for i in range(len(df_events)): try: if (df_events.player[i]["name"]) == player_name: if df_events.type[i]["name"] == "Pass": # create a new entry in the dictionary with the key being the id # add event index to dictionary pass_dict[pass_id]["index"] = df_events.index[i + 1] # get the game state at the event index pass_dict[pass_id]["gamestate"] = game_state_extractor( df_events.index[i], match_id=match_id, player_name=player_name, filepath=filepath, ) # reading match_id pass_dict[pass_id]["match_id"] = match_id # then start location pass_dict[pass_id]["start_X"] = df_events.location[i][0] pass_dict[pass_id]["start_Y"] = df_events.location[i][1] # end location pass_dict[pass_id]["end_X"] = df_events.iloc[i, 17]["end_location"][ 0 ] pass_dict[pass_id]["end_Y"] = df_events.iloc[i, 17]["end_location"][ 1 ] # pass time pass_dict[pass_id]["minute"] = df_events.minute[i] # recipient information pass_dict[pass_id]["recipient"] = df_events.iloc[i, 17][ "recipient" ]["name"] # Pass pass length pass_dict[pass_id]["length"] = df_events.iloc[i, 17]["length"] # pass angle pass_dict[pass_id]["angle"] = df_events.iloc[i, 17]["angle"] # pass height pass_dict[pass_id]["height_type"] = df_events.iloc[i, 17]["height"][ "name" ] # ***************************************************************************************** # writing a for loop to check for some scenarios which are boolean (in StatsBomb data, if the # result is False, it is omitted) # ***************************************************************************************** for bool_scenario in [ "backheel", "deflected", "miscommunication", "cross", "cut_back", "switch", "shot_assist", "goal_assist", ]: pass_dict[pass_id][bool_scenario] = pass_exception_handler( df_events, i, bool_scenario, is_boolean=True ) # extract body part information pass_dict[pass_id]["body_part"] = df_events.iloc[i, 17][ "body_part" ]["name"] # ***************************************************************************************** # using the exception handler to extract conditional information which is not boolean # ***************************************************************************************** # first extracting type pass_dict[pass_id]["type"] = pass_exception_handler( df_events, i, "type", is_boolean=False, default_return="Regular Play", ) # then pass outcome pass_dict[pass_id]["outcome"] = pass_exception_handler( df_events, i, "outcome", is_boolean=False, default_return="Completed", ) # finally pass technique pass_dict[pass_id]["technique"] = pass_exception_handler( df_events, i, "technique", is_boolean=False, default_return="N/A", ) # update pass_id for next pass pass_id += 1 except: pass return pass_dict
def intercept_extractor(player_name, match_id, filepath, start_id=0): """ Given a player and match name, this function extracts all interceptions from that match Args: - player_name: str, the name of the player as it appears in the data - match_id: int, the match id which is usually the name of the events json file - filepath: str, location of files Returns: - intercept_dict: dict, an interception dictionary """ # define file path path = filepath + str(match_id) + ".json" # read events json into a dataframe df_events = pd.read_json(path) # initiate an empty dictionary to store intercept values intercept_dict = defaultdict(dict) intercept_id = start_id # iterate through the events and extract intercept information specific to the player for i in range(len(df_events)): try: if (df_events.player[i]["name"]) == player_name: if df_events.type[i]["name"] == "Interception": # create a new entry in the dictionary with the key being the id # add event index to dictionary intercept_dict[intercept_id]["index"] = df_events.index[i + 1] # get the game state at the event index intercept_dict[intercept_id][ "gamestate"] = game_state_extractor( df_events.index[i + 1], match_id=match_id, player_name=player_name, filepath=filepath, ) # reading match_id intercept_dict[intercept_id]["match_id"] = match_id intercept_dict[intercept_id][ "start_X"] = df_events.location[i][0] intercept_dict[intercept_id][ "start_Y"] = df_events.location[i][1] intercept_dict[intercept_id]["outcome"] = df_events.iloc[ i, :]["interception"]["outcome"]["name"] intercept_id += 1 except: pass return intercept_dict
def pressure_extractor(player_name, match_id, filepath, start_id=0): """ Given a player and match name, this function extracts all pressures from that match Args: - player_name: str, the name of the player as it appears in the data - match_id: int, the match id which is usually the name of the events json file - filepath: str, location of files Returns: - pressure_dict: dict, a pressure dataframe """ # define file path path = filepath + str(match_id) + ".json" # read events json into a dataframe df_events = pd.read_json(path) # initiate an empty dictionary to store pressure values pressure_dict = defaultdict(dict) pressure_id = start_id # iterate through the events and extract pressure information specific to the player for i in range(len(df_events)): try: if (df_events.player[i]["name"]) == player_name: if df_events.type[i]["name"] == "Pressure": # create a new entry in the dictionary with the key being the id # add event index to dictionary pressure_dict[pressure_id]["index"] = df_events.index[i + 1] # get the game state at the event index pressure_dict[pressure_id][ "gamestate"] = game_state_extractor( df_events.index[i + 1], match_id=match_id, player_name=player_name, filepath=filepath, ) # reading match_id pressure_dict[pressure_id]["match_id"] = match_id pressure_dict[pressure_id]["start_X"] = df_events.location[ i][0] pressure_dict[pressure_id]["start_Y"] = df_events.location[ i][1] pressure_dict[pressure_id][ "duration"] = df_events.duration[i] pressure_dict[pressure_id][ "counterpress"] = pressure_exception_handler( df_events, i, "counterpress", is_boolean=True) pressure_id += 1 except: pass return pressure_dict
def carry_extractor(player_name, match_id, filepath, start_id=0): """ Given a player and match name, this function extracts all carries from that match Args: - player_name: str, the name of the player as it appears in the data - match_id: int, the match id which is usually the name of the events json file - filepath: str, location of files Returns: - carry_dict: dict, a dictionary of carries """ # define file path path = filepath + str(match_id) + ".json" # read events json into a dataframe df_events = pd.read_json(path) # initiate an empty dictionary to store carry values carry_dict = defaultdict(dict) carry_id = start_id # iterate through the events and extract carry information specific to the player for i in range(len(df_events)): try: if (df_events.player[i]["name"]) == player_name: if df_events.type[i]["name"] == "Carry": # create a new entry in the dictionary with the key being the id # add event index to dictionary carry_dict[carry_id]["index"] = df_events.index[i + 1] # get the game state at the event index carry_dict[carry_id]["gamestate"] = game_state_extractor( df_events.index[i + 1], match_id=match_id, player_name=player_name, filepath=filepath, ) # reading match_id carry_dict[carry_id]["match_id"] = match_id carry_dict[carry_id]["start_X"] = df_events.location[i][0] carry_dict[carry_id]["start_Y"] = df_events.location[i][1] carry_dict[carry_id]["end_X"] = df_events.iloc[ i, :]["carry"]["end_location"][0] carry_dict[carry_id]["end_Y"] = df_events.iloc[ i, :]["carry"]["end_location"][1] carry_dict[carry_id][ "under_pressure"] = carry_exception_handler( df_events, i, "under_pressure", is_boolean=True) carry_dict[carry_id]["angle"] = angle_calculator( startX=carry_dict[carry_id]["start_X"], startY=carry_dict[carry_id]["start_Y"], endX=carry_dict[carry_id]["end_X"], endY=carry_dict[carry_id]["end_Y"], ) carry_dict[carry_id]["length"] = length_calculator( startX=carry_dict[carry_id]["start_X"], startY=carry_dict[carry_id]["start_Y"], endX=carry_dict[carry_id]["end_X"], endY=carry_dict[carry_id]["end_Y"], ) carry_id += 1 except: pass return carry_dict