예제 #1
0
 def setup_method(self):
     data_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'data',
                             'statsbomb', 'raw')
     self.SBL = sb.StatsBombLoader(root=data_dir, getter='local')
     # https://raw.githubusercontent.com/statsbomb/open-data/master/data/events/7584.json
     self.id_bel = 782
     self.events_japbel = self.SBL.events(7584)
예제 #2
0
def convert_statsbomb_data():
    seasons = {
        3: '2018',
    }
    leagues = {
        'FIFA World Cup': 'WorldCup',
    }
    spadl_datafolder = os.path.join(_data_dir, 'statsbomb')

    free_open_data_remote = 'https://raw.githubusercontent.com/statsbomb/open-data/master/data/'

    SBL = statsbomb.StatsBombLoader(root=free_open_data_remote, getter='remote')

    # View all available competitions
    df_competitions = SBL.competitions()
    df_selected_competitions = df_competitions[
        df_competitions.competition_name.isin(leagues.keys())
    ]

    for competition in df_selected_competitions.itertuples():
        # Get games from all selected competition
        games = SBL.games(competition.competition_id, competition.season_id)

        games_verbose = tqdm(list(games.itertuples()), desc='Loading match data')
        teams, players = [], []

        competition_id = leagues[competition.competition_name]
        season_id = seasons[competition.season_id]
        spadl_h5 = os.path.join(spadl_datafolder, f'spadl-{competition_id}-{season_id}.h5')
        with pd.HDFStore(spadl_h5) as spadlstore:

            spadlstore.put('actiontypes', spadl.actiontypes_df(), format='table')
            spadlstore.put('results', spadl.results_df(), format='table')
            spadlstore.put('bodyparts', spadl.bodyparts_df(), format='table')

            for game in games_verbose:
                # load data
                teams.append(SBL.teams(game.game_id))
                players.append(SBL.players(game.game_id))
                events = SBL.events(game.game_id)

                # convert data
                spadlstore.put(
                    f'actions/game_{game.game_id}',
                    statsbomb.convert_to_actions(events, game.home_team_id),
                    format='table',
                )

            games.season_id = season_id
            games.competition_id = competition_id
            spadlstore.put('games', games)
            spadlstore.put(
                'teams',
                pd.concat(teams).drop_duplicates('team_id').reset_index(drop=True),
            )
            spadlstore.put(
                'players',
                pd.concat(players).drop_duplicates('player_id').reset_index(drop=True),
            )
예제 #3
0
import socceraction.classification.features as fs
import socceraction.classification.labels as lab
import socceraction.spadl.statsbomb as statsbomb
import socceraction.spadl as spadl

import pandas as pd
import warnings
import xgboost
import tqdm
import os

warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
datafolder = "data/eventing"

### NOTEBOOK 1: LOAD AND CONVERT STATSBOMB DATA
SBL = statsbomb.StatsBombLoader(root=datafolder, getter="local")
selected_competitions = SBL.competitions()

# Get matches from all selected competitions
matches = list(
    SBL.matches(row.competition_id, row.season_id)
    for row in selected_competitions.itertuples())

matches = pd.concat(matches, sort=True).reset_index(drop=True)

# Load and convert match data
matches_verbose = tqdm.tqdm(list(matches.itertuples()),
                            desc="Loading match data")
teams, players, player_games = [], [], []
actions = {}
예제 #4
0

def always_ltr(actions):
    away_idx = ~actions.left_to_right
    actions.loc[away_idx, "start_x"] = spadlcfg.field_length - actions[away_idx].start_x.values
    actions.loc[away_idx, "start_y"] = spadlcfg.field_width - actions[away_idx].start_y.values
    actions.loc[away_idx, "end_x"] = spadlcfg.field_length - actions[away_idx].end_x.values
    actions.loc[away_idx, "end_y"] = spadlcfg.field_width - actions[away_idx].end_y.values
    return actions


if __name__ == "__main__":

    # Set up the statsbombloader
    free_open_data_remote = "https://raw.githubusercontent.com/statsbomb/open-data/master/data/"
    SBL = statsbomb.StatsBombLoader(root=free_open_data_remote,getter="remote")

    # Select competitions to load and convert
    competitions = SBL.competitions()
    selected_competitions = competitions[competitions.competition_name=="FIFA World Cup"]

    # Get matches from all selected competitions
    matches = list(
        SBL.matches(row.competition_id, row.season_id)
        for row in selected_competitions.itertuples()
    )
    matches = pd.concat(matches, sort=True).reset_index(drop=True)
    matches[["home_team_name","away_team_name","match_date","home_score","away_score"]]

    # Load and convert match data
    matches_verbose = tqdm.tqdm(list(matches.itertuples()),desc="Loading match data")
예제 #5
0
 def test_default_remote(self):
     SBL = sb.StatsBombLoader()
     assert SBL.root == sb.StatsBombLoader._free_open_data
예제 #6
0
 def setup_method(self):
     data_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'data',
                             'statsbomb', 'raw')
     self.SBL = sb.StatsBombLoader(root=data_dir, getter='local')