def test_convert_to_actions(self): df_actions = sb.convert_to_actions(self.events_japbel, 782) assert len(df_actions) > 0 SPADLSchema.validate(df_actions) assert (df_actions.game_id == 7584).all() assert ((df_actions.team_id == 782) | (df_actions.team_id == 778)).all()
def convert_statsbomb_data(): seasons = { 3: '2018', } leagues = { 'FIFA World Cup': 'WorldCup', } spadl_datafolder = os.path.join(_data_dir, 'statsbomb') free_open_data_remote = 'https://raw.githubusercontent.com/statsbomb/open-data/master/data/' SBL = statsbomb.StatsBombLoader(root=free_open_data_remote, getter='remote') # View all available competitions df_competitions = SBL.competitions() df_selected_competitions = df_competitions[ df_competitions.competition_name.isin(leagues.keys()) ] for competition in df_selected_competitions.itertuples(): # Get games from all selected competition games = SBL.games(competition.competition_id, competition.season_id) games_verbose = tqdm(list(games.itertuples()), desc='Loading match data') teams, players = [], [] competition_id = leagues[competition.competition_name] season_id = seasons[competition.season_id] spadl_h5 = os.path.join(spadl_datafolder, f'spadl-{competition_id}-{season_id}.h5') with pd.HDFStore(spadl_h5) as spadlstore: spadlstore.put('actiontypes', spadl.actiontypes_df(), format='table') spadlstore.put('results', spadl.results_df(), format='table') spadlstore.put('bodyparts', spadl.bodyparts_df(), format='table') for game in games_verbose: # load data teams.append(SBL.teams(game.game_id)) players.append(SBL.players(game.game_id)) events = SBL.events(game.game_id) # convert data spadlstore.put( f'actions/game_{game.game_id}', statsbomb.convert_to_actions(events, game.home_team_id), format='table', ) games.season_id = season_id games.competition_id = competition_id spadlstore.put('games', games) spadlstore.put( 'teams', pd.concat(teams).drop_duplicates('team_id').reset_index(drop=True), ) spadlstore.put( 'players', pd.concat(players).drop_duplicates('player_id').reset_index(drop=True), )
def test_convert_own_goal(self): events_morira = self.SBL.events(7577) own_goal_for_event = events_morira[ events_morira.event_id == '8981bc58-6041-4b78-95c5-ebe9677ca379'] own_goal_for_actions = sb.convert_to_actions(own_goal_for_event, 797) assert len(own_goal_for_actions) == 0 own_goal_against_event = events_morira[ events_morira.event_id == 'cef0fcb6-28d0-49d7-8f93-4a0aef28001a'] own_goal_against_actions = sb.convert_to_actions( own_goal_against_event, 797) assert len(own_goal_against_actions) == 1 assert own_goal_against_actions.iloc[0][ 'type_id'] == spadl.actiontypes.index('bad_touch') assert own_goal_against_actions.iloc[0][ 'result_id'] == spadl.results.index('owngoal') assert own_goal_against_actions.iloc[0][ 'bodypart_id'] == spadl.bodyparts.index('foot')
def test_convert_pass(self): pass_event = self.events_japbel[self.events_japbel.event_id == 'a1b55211-a292-4294-887b-5385cc3c5705'] pass_action = sb.convert_to_actions(pass_event, self.id_bel).iloc[0] assert pass_action['team_id'] == 782 assert pass_action['player_id'] == 3289 assert pass_action['type_id'] == spadl.actiontypes.index('pass') assert pass_action['result_id'] == spadl.results.index('success') assert pass_action['bodypart_id'] == spadl.bodyparts.index('foot')
def test_convert_time(self, period, timestamp, minute, second): event = self.events_japbel[ self.events_japbel.event_id == 'a1b55211-a292-4294-887b-5385cc3c5705'].copy() event['period_id'] = period event['timestamp'] = timestamp event['minute'] = minute event['second'] = second action = sb.convert_to_actions(event, self.id_bel).iloc[0] assert action['period_id'] == period assert (action['time_seconds'] == 60 * minute - ((period > 1) * 45 * 60) - ((period > 2) * 45 * 60) - ((period > 3) * 15 * 60) - ((period > 4) * 15 * 60) + second)
matches = pd.concat(matches, sort=True).reset_index(drop=True) # Load and convert match data matches_verbose = tqdm.tqdm(list(matches.itertuples()), desc="Loading match data") teams, players, player_games = [], [], [] actions = {} for match in matches_verbose: teams.append(SBL.teams(match.match_id)) players.append(SBL.players(match.match_id)) events = SBL.events(match.match_id) player_games.append(statsbomb.extract_player_games(events)) actions[match.match_id] = statsbomb.convert_to_actions( events, match.home_team_id) # Store converted spadl data in a h5-file games = matches.rename(columns={"match_id": "game_id"}) teams = pd.concat(teams, sort=True).drop_duplicates("team_id").reset_index(drop=True) players = pd.concat( players, sort=True).drop_duplicates("player_id").reset_index(drop=True) player_games = pd.concat(player_games, sort=True).reset_index(drop=True) spadl_h5 = os.path.join(datafolder, "spadl-statsbomb.h5") with pd.HDFStore(spadl_h5) as spadlstore: spadlstore["competitions"] = selected_competitions spadlstore["games"] = games spadlstore["teams"] = teams spadlstore["players"] = players
def test_convert_end_location(self): event = self.events_japbel[self.events_japbel.event_id == 'a1b55211-a292-4294-887b-5385cc3c5705'] action = sb.convert_to_actions(event, self.id_bel).iloc[0] assert action['end_x'] == ((49.0 - 1) / 119) * spadl.field_length assert action['end_y'] == 68 - ((43.0 - 1) / 79) * spadl.field_width
def test_convert_start_location(self): event = pd.DataFrame([{ 'event_id': 'a1b55211-a292-4294-887b-5385cc3c5705', 'index': 5, 'period_id': 1, 'timestamp': '00:00:00.920', 'minute': 0, 'second': 0, 'type_id': 30, 'type_name': 'Pass', 'possession': 2, 'possession_team_id': 782, 'possession_team_name': 'Belgium', 'play_pattern_id': 9, 'play_pattern_name': 'From Kick Off', 'team_id': 782, 'team_name': 'Belgium', 'duration': 0.973, 'extra': { 'pass': { 'recipient': { 'id': 5642, 'name': 'Axel Witsel' }, 'length': 12.369317, 'angle': 2.896614, 'height': { 'id': 1, 'name': 'Ground Pass' }, 'type': { 'id': 65, 'name': 'Kick Off' }, 'body_part': { 'id': 40, 'name': 'Right Foot' }, } }, 'related_events': ['051449c5-e183-46f9-965d-1d8f00f017cb'], 'player_id': 3289.0, 'player_name': 'Romelu Lukaku Menama', 'position_id': 23.0, 'position_name': 'Center Forward', 'location': [61.0, 40.0], 'under_pressure': None, 'counterpress': None, 'game_id': 7584, }]) action = sb.convert_to_actions(event, 782).iloc[0] assert action['start_x'] == ((61.0 - 1) / 119) * spadl.field_length assert action['start_y'] == 68 - ((40.0 - 1) / 79) * spadl.field_width assert action['end_x'] == action['start_x'] assert action['end_y'] == action['start_y']
def test_convert_time(self, period, timestamp, minute, second): event = pd.DataFrame([{ 'event_id': 'a1b55211-a292-4294-887b-5385cc3c5705', 'index': 5, 'period_id': period, 'timestamp': timestamp, 'minute': minute, 'second': second, 'type_id': 30, 'type_name': 'Pass', 'possession': 2, 'possession_team_id': 782, 'possession_team_name': 'Belgium', 'play_pattern_id': 9, 'play_pattern_name': 'From Kick Off', 'team_id': 782, 'team_name': 'Belgium', 'duration': 0.973, 'extra': { 'pass': { 'recipient': { 'id': 5642, 'name': 'Axel Witsel' }, 'length': 12.369317, 'angle': 2.896614, 'height': { 'id': 1, 'name': 'Ground Pass' }, 'end_location': [49.0, 43.0], 'type': { 'id': 65, 'name': 'Kick Off' }, 'body_part': { 'id': 40, 'name': 'Right Foot' }, } }, 'related_events': ['051449c5-e183-46f9-965d-1d8f00f017cb'], 'player_id': 3289.0, 'player_name': 'Romelu Lukaku Menama', 'position_id': 23.0, 'position_name': 'Center Forward', 'location': [61.0, 40.0], 'under_pressure': None, 'counterpress': None, 'game_id': 7584, }]) action = sb.convert_to_actions(event, 782).iloc[0] assert action['period_id'] == period assert (action['time_seconds'] == 60 * minute - ((period > 1) * 45 * 60) - ((period > 2) * 45 * 60) - ((period > 3) * 15 * 60) - ((period > 4) * 15 * 60) + second)