def setup_method(self): data_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'data', 'statsbomb', 'raw') self.SBL = sb.StatsBombLoader(root=data_dir, getter='local') # https://raw.githubusercontent.com/statsbomb/open-data/master/data/events/7584.json self.id_bel = 782 self.events_japbel = self.SBL.events(7584)
def convert_statsbomb_data(): seasons = { 3: '2018', } leagues = { 'FIFA World Cup': 'WorldCup', } spadl_datafolder = os.path.join(_data_dir, 'statsbomb') free_open_data_remote = 'https://raw.githubusercontent.com/statsbomb/open-data/master/data/' SBL = statsbomb.StatsBombLoader(root=free_open_data_remote, getter='remote') # View all available competitions df_competitions = SBL.competitions() df_selected_competitions = df_competitions[ df_competitions.competition_name.isin(leagues.keys()) ] for competition in df_selected_competitions.itertuples(): # Get games from all selected competition games = SBL.games(competition.competition_id, competition.season_id) games_verbose = tqdm(list(games.itertuples()), desc='Loading match data') teams, players = [], [] competition_id = leagues[competition.competition_name] season_id = seasons[competition.season_id] spadl_h5 = os.path.join(spadl_datafolder, f'spadl-{competition_id}-{season_id}.h5') with pd.HDFStore(spadl_h5) as spadlstore: spadlstore.put('actiontypes', spadl.actiontypes_df(), format='table') spadlstore.put('results', spadl.results_df(), format='table') spadlstore.put('bodyparts', spadl.bodyparts_df(), format='table') for game in games_verbose: # load data teams.append(SBL.teams(game.game_id)) players.append(SBL.players(game.game_id)) events = SBL.events(game.game_id) # convert data spadlstore.put( f'actions/game_{game.game_id}', statsbomb.convert_to_actions(events, game.home_team_id), format='table', ) games.season_id = season_id games.competition_id = competition_id spadlstore.put('games', games) spadlstore.put( 'teams', pd.concat(teams).drop_duplicates('team_id').reset_index(drop=True), ) spadlstore.put( 'players', pd.concat(players).drop_duplicates('player_id').reset_index(drop=True), )
import socceraction.classification.features as fs import socceraction.classification.labels as lab import socceraction.spadl.statsbomb as statsbomb import socceraction.spadl as spadl import pandas as pd import warnings import xgboost import tqdm import os warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning) datafolder = "data/eventing" ### NOTEBOOK 1: LOAD AND CONVERT STATSBOMB DATA SBL = statsbomb.StatsBombLoader(root=datafolder, getter="local") selected_competitions = SBL.competitions() # Get matches from all selected competitions matches = list( SBL.matches(row.competition_id, row.season_id) for row in selected_competitions.itertuples()) matches = pd.concat(matches, sort=True).reset_index(drop=True) # Load and convert match data matches_verbose = tqdm.tqdm(list(matches.itertuples()), desc="Loading match data") teams, players, player_games = [], [], [] actions = {}
def always_ltr(actions): away_idx = ~actions.left_to_right actions.loc[away_idx, "start_x"] = spadlcfg.field_length - actions[away_idx].start_x.values actions.loc[away_idx, "start_y"] = spadlcfg.field_width - actions[away_idx].start_y.values actions.loc[away_idx, "end_x"] = spadlcfg.field_length - actions[away_idx].end_x.values actions.loc[away_idx, "end_y"] = spadlcfg.field_width - actions[away_idx].end_y.values return actions if __name__ == "__main__": # Set up the statsbombloader free_open_data_remote = "https://raw.githubusercontent.com/statsbomb/open-data/master/data/" SBL = statsbomb.StatsBombLoader(root=free_open_data_remote,getter="remote") # Select competitions to load and convert competitions = SBL.competitions() selected_competitions = competitions[competitions.competition_name=="FIFA World Cup"] # Get matches from all selected competitions matches = list( SBL.matches(row.competition_id, row.season_id) for row in selected_competitions.itertuples() ) matches = pd.concat(matches, sort=True).reset_index(drop=True) matches[["home_team_name","away_team_name","match_date","home_score","away_score"]] # Load and convert match data matches_verbose = tqdm.tqdm(list(matches.itertuples()),desc="Loading match data")
def test_default_remote(self): SBL = sb.StatsBombLoader() assert SBL.root == sb.StatsBombLoader._free_open_data
def setup_method(self): data_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'data', 'statsbomb', 'raw') self.SBL = sb.StatsBombLoader(root=data_dir, getter='local')