예제 #1
0
 def test_convert_to_actions(self):
     df_actions = wy.convert_to_actions(self.events, 5629)
     assert len(df_actions) > 0
     SPADLSchema.validate(df_actions)
     assert (df_actions.game_id == 2058007).all()
     assert ((df_actions.team_id == 5629) |
             (df_actions.team_id == 12913)).all()
예제 #2
0
 def test_convert_own_goal(self):
     events_morira = self.WSL.events(2057961)
     own_goal_event = events_morira[events_morira.event_id == 258696133]
     own_goal_actions = wy.convert_to_actions(own_goal_event, 16216)
     assert len(own_goal_actions) == 1
     assert own_goal_actions.iloc[0]['type_id'] == spadl.actiontypes.index(
         'bad_touch')
     assert own_goal_actions.iloc[0]['result_id'] == spadl.results.index(
         'owngoal')
     assert own_goal_actions.iloc[0][
         'bodypart_id'] == spadl.bodyparts.index('foot')
예제 #3
0
 def test_insert_interception_passes(self):
     event = pd.DataFrame([{
         'type_id':
         8,
         'subtype_name':
         'Head pass',
         'tags': [{
             'id': 102
         }, {
             'id': 1401
         }, {
             'id': 1801
         }],  # own goal
         'player_id':
         38093,
         'positions': [{
             'y': 56,
             'x': 5
         }, {
             'y': 100,
             'x': 100
         }],
         'game_id':
         2499737,
         'type_name':
         'Pass',
         'team_id':
         1610,
         'period_id':
         2,
         'milliseconds':
         2184.793924,
         'subtype_id':
         82,
         'event_id':
         180427412,
     }])
     actions = wy.convert_to_actions(event, 1610)
     assert len(actions) == 2
     assert actions.at[0,
                       'type_id'] == spadl.actiontypes.index('interception')
     assert actions.at[1, 'type_id'] == spadl.actiontypes.index('bad_touch')
     assert actions.at[0, 'result_id'] == spadl.results.index('success')
     assert actions.at[1, 'result_id'] == spadl.results.index('owngoal')
예제 #4
0
 def test_convert_own_goal_touches(self):
     """Own goals resulting from bad touch events in the Wyscout event
     streams should be included in the SPADL representation.
     """
     # An own goal from the game between Leicester and Stoke on 24 Feb 2018.
     # Stoke's goalkeeper Jack Butland allows a low cross to bounce off his
     # gloves and into the net:
     event = pd.DataFrame([
         {
             'type_id': 8,
             'subtype_name': 'Cross',
             'tags': [{
                 'id': 402
             }, {
                 'id': 801
             }, {
                 'id': 1802
             }],
             'player_id': 8013,
             'positions': [{
                 'y': 89,
                 'x': 97
             }, {
                 'y': 0,
                 'x': 0
             }],
             'game_id': 2499994,
             'type_name': 'Pass',
             'team_id': 1631,
             'period_id': 2,
             'milliseconds': 1496.7290489999993,
             'subtype_id': 80,
             'event_id': 230320305,
         },
         {
             'type_id': 7,
             'subtype_name': 'Touch',
             'tags': [{
                 'id': 102
             }],
             'player_id': 8094,
             'positions': [{
                 'y': 50,
                 'x': 1
             }, {
                 'y': 100,
                 'x': 100
             }],
             'game_id': 2499994,
             'type_name': 'Others on the ball',
             'team_id': 1639,
             'period_id': 2,
             'milliseconds': 1497.6330749999993,
             'subtype_id': 72,
             'event_id': 230320132,
         },
         {
             'type_id': 9,
             'subtype_name': 'Reflexes',
             'tags': [{
                 'id': 101
             }, {
                 'id': 1802
             }],
             'player_id': 8094,
             'positions': [{
                 'y': 100,
                 'x': 100
             }, {
                 'y': 50,
                 'x': 1
             }],
             'game_id': 2499994,
             'type_name': 'Save attempt',
             'team_id': 1639,
             'period_id': 2,
             'milliseconds': 1499.980547,
             'subtype_id': 90,
             'event_id': 230320135,
         },
     ])
     actions = wy.convert_to_actions(event, 1639)
     # FIXME: It adds a dribble between the bad touch of the goalkeeper and
     # his attempt to save the ball before crossing the line. Not sure
     # whether that is ideal.
     assert len(actions) == 4
     assert actions.at[1, 'type_id'] == spadl.actiontypes.index('bad_touch')
     assert actions.at[1, 'result_id'] == spadl.results.index('owngoal')
예제 #5
0
def convert_wyscout_data():
    seasons = {
        10078: '2018',
    }
    leagues = {
        28: 'WorldCup',
    }

    raw_datafolder = os.path.join(_data_dir, 'wyscout_public', 'raw')
    spadl_datafolder = os.path.join(_data_dir, 'wyscout_public')

    # select competitions
    json_competitions = read_json_file(f'{raw_datafolder}/competitions.json')
    df_competitions = pd.read_json(json_competitions)
    # Rename competitions to the names used in the file names
    df_competitions['name'] = df_competitions.apply(
        lambda x: x.area['name'] if x.area['name'] != '' else x['name'], axis=1
    )

    df_selected_competitions = df_competitions[df_competitions.wyId.isin(leagues.keys())]

    json_teams = read_json_file(f'{raw_datafolder}/teams.json')
    df_teams = wyscout.convert_teams(pd.read_json(json_teams))

    json_players = read_json_file(f'{raw_datafolder}/players.json')
    df_players = wyscout.convert_players(pd.read_json(json_players))

    for competition in df_selected_competitions.itertuples():
        json_games = read_json_file(
            f"{raw_datafolder}/matches_{competition.name.replace(' ', '_')}.json"
        )
        df_games = pd.read_json(json_games)
        competition_id = leagues[competition.wyId]
        season_id = seasons[df_games.seasonId.unique()[0]]
        df_games = wyscout.convert_games(df_games)
        df_games['competition_id'] = competition_id
        df_games['season_id'] = season_id

        json_events = read_json_file(
            f"{raw_datafolder}/events_{competition.name.replace(' ', '_')}.json"
        )
        df_events = pd.read_json(json_events).groupby('matchId', as_index=False)

        spadl_h5 = os.path.join(spadl_datafolder, f'spadl-{competition_id}-{season_id}.h5')

        # Store all spadl data in h5-file
        print(f'Converting {competition_id} {season_id}')
        with pd.HDFStore(spadl_h5) as spadlstore:

            spadlstore['actiontypes'] = spadl.actiontypes_df()
            spadlstore['results'] = spadl.results_df()
            spadlstore['bodyparts'] = spadl.bodyparts_df()
            spadlstore['games'] = df_games

            for game in tqdm(list(df_games.itertuples())):
                game_id = game.game_id
                game_events = wyscout.convert_events(df_events.get_group(game_id))

                # convert events to SPADL actions
                home_team = game.home_team_id
                df_actions = wyscout.convert_to_actions(game_events, home_team)
                df_actions['action_id'] = range(len(df_actions))
                spadlstore[f'actions/game_{game_id}'] = df_actions

            spadlstore['players'] = df_players
            spadlstore['teams'] = df_teams[
                df_teams.team_id.isin(df_games.home_team_id)
                | df_teams.team_id.isin(df_games.away_team_id)
            ]