def test_events_data_frame(self): base_dir = os.path.dirname(__file__) events_dataset = load_metrica_json_event_data( metadata_filename=f"{base_dir}/files/metadata.xml", raw_data_filename=f"{base_dir}/files/events.json", ) events = EventsFrame(to_pandas(events_dataset)) events.data_type = DataType.EVENT events.metadata = events_dataset.metadata events.records = events_dataset.records assert events.type("PASS").shape[0] == 26 assert events.result("COMPLETE").shape[0] == 45 assert events.into(Zones.OPPONENT_BOX).shape[0] == 1 assert events.starts_inside(Zones.OPPONENT_BOX).shape[0] == 2 assert events.ends_inside(Zones.OPPONENT_BOX).shape[0] == 2 assert events.ends_outside(Zones.OPPONENT_BOX).shape[0] == 43 # Test diferent ways to input Zones and areas custom_area = Area((0.25, 0.2), (0.75, 0.8)) assert (events.ends_outside(Zones.OPPONENT_BOX, Zones.OWN_BOX).shape[0] == 45) assert (events.ends_inside(Zones.OPPONENT_BOX, custom_area).shape[0] == 14) assert events.ends_inside(custom_area, custom_area).shape[0] == 12
def main(): """ This example shows the use of Metrica datasets, and how we can pass argument to the dataset loader. """ logging.basicConfig( stream=sys.stdout, level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", ) # The metrica dataset loader loads by default the 'game1' dataset dataset = datasets.load("metrica_tracking", options={ "sample_rate": 1.0 / 12, "limit": 10 }) print(len(dataset.frames)) # We can pass additional keyword arguments to the loaders to specify a different dataset dataset = datasets.load("metrica_tracking", options={"limit": 1000}, game="game2") data_frame = to_pandas(dataset) print(data_frame)
def test_to_pandas_additional_columns(self): tracking_data = self._get_tracking_dataset() data_frame = to_pandas( tracking_data, additional_columns={ "match": "test", "bonus_column": lambda frame: frame.frame_id + 10, }, ) expected_data_frame = DataFrame.from_dict({ "period_id": [1, 1], "timestamp": [0.1, 0.2], "ball_state": [None, None], "ball_owning_team": [None, None], "ball_x": [100, 0], "ball_y": [-50, 50], "match": ["test", "test"], "bonus_column": [11, 12], "player_home_1_x": [None, 15], "player_home_1_y": [None, 35], "player_away_1_x": [None, 10], "player_away_1_y": [None, 20], }) assert_frame_equal(data_frame, expected_data_frame)
def main(): """ This example shows the use of Statsbomb datasets, and how we can pass argument to the dataset loader. """ logging.basicConfig( stream=sys.stdout, level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") logger = logging.getLogger(__name__) dataset = datasets.load( "statsbomb", {"event_types": ["pass", "take_on", "carry", "shot"]}) with performance_logging("transform", logger=logger): # convert to TRACAB coordinates dataset = transform(dataset, to_orientation="FIXED_HOME_AWAY", to_pitch_dimensions=[(-5500, 5500), (-3300, 3300)]) with performance_logging("to pandas", logger=logger): dataframe = to_pandas(dataset) print(dataframe[:50].to_string())
def main(): import pandas as pd pd.set_option("display.max_colwidth", None) pd.set_option("display.max_columns", None) pd.set_option("display.max_rows", None) pd.set_option("display.width", 2000) matcher = Matcher(encoder) dataset = datasets.load("statsbomb", options={"event_types": ["shot", "pass"]}) with performance_logging("search"): matches = matcher.search(dataset, r"PPS") df = to_pandas( dataset, additional_columns={ "player_name": lambda event: event.player.full_name, "team_name": lambda event: str(event.team), }, ) print(df[["timestamp", "team_name", "player_name", "event_type", "result"]][:100]) return for i, match in enumerate(matches): df = to_pandas( dataset, additional_columns={ "player_name": lambda event: event.player.full_name, "team_name": lambda event: str(event.team), }, ) print(df[[ "period_id", "timestamp", "team_name", "player_name", "event_type", ]])
def to_pandas(self, *args, **kwargs): """ See [to_pandas][kloppy.helpers.to_pandas] """ from kloppy import to_pandas return to_pandas( self, *args, **kwargs, )
def __init__( self, tracking_metadata_file=None, tracking_data_file=None, events_metadata_file=None, events_data_file=None, ): self.files = { "tracking_metadata_file": tracking_metadata_file, "tracking_data_file": tracking_data_file, "events_metadata_file": events_metadata_file, "events_data_file": events_data_file, } if tracking_data_file is not None: tracking_dataset = load_epts_tracking_data( metadata_filename=tracking_metadata_file, raw_data_filename=tracking_data_file, ) self.tracking = TrackingFrame(to_pandas(tracking_dataset)) self.tracking.data_type = DataType.TRACKING self.tracking.metadata = tracking_dataset.metadata self.tracking.records = tracking_dataset.records else: self.tracking = None if events_data_file is not None: events_dataset = load_metrica_json_event_data( metadata_filename=events_metadata_file, raw_data_filename=events_data_file, ) self.events = EventsFrame(to_pandas(events_dataset)) self.events.data_type = DataType.EVENT self.events.metadata = events_dataset.metadata self.events.records = events_dataset.records else: self.events = None self._enrich_data()
def test_to_pandas(self): tracking_data = self._get_tracking_dataset() data_frame = to_pandas(tracking_data) expected_data_frame = DataFrame.from_dict({ "period_id": { 0: 1, 1: 1 }, "timestamp": { 0: 0.1, 1: 0.2 }, "ball_state": { 0: None, 1: None }, "ball_owning_team": { 0: None, 1: None }, "ball_x": { 0: 100, 1: 0 }, "ball_y": { 0: -50, 1: 50 }, "player_home_1_x": { 0: None, 1: 15.0 }, "player_home_1_y": { 0: None, 1: 35.0 }, "player_away_1_x": { 0: None, 1: 10.0 }, "player_away_1_y": { 0: None, 1: 20.0 }, }) assert_frame_equal(data_frame, expected_data_frame)
def test_to_pandas_generic_events(self): base_dir = os.path.dirname(__file__) serializer = OptaSerializer() with open(f"{base_dir}/files/opta_f24.xml", "rb") as f24_data, open(f"{base_dir}/files/opta_f7.xml", "rb") as f7_data: dataset = serializer.deserialize(inputs={ "f24_data": f24_data, "f7_data": f7_data }) dataframe = to_pandas(dataset) dataframe = dataframe[dataframe.event_type == "GENERIC:out"] assert dataframe.shape[0] == 2
def main(): """ This example shows the use of Metrica datasets, and how we can pass argument to the dataset loader. """ # The metrica dataset loader loads by default the 'game1' dataset data_set = datasets.load("metrica_tracking", options={ 'sample_rate': 1. / 12, 'limit': 10 }) print(len(data_set.frames)) # We can pass additional keyword arguments to the loaders to specify a different dataset data_set = datasets.load("metrica_tracking", options={'limit': 1000}, game='game2') data_frame = to_pandas(data_set) print(data_frame)
def main(): logging.basicConfig( stream=sys.stdout, level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") logger = logging.getLogger(__name__) dataset = datasets.load( "statsbomb", {"event_types": ["pass", "take_on", "carry", "shot"]}) with performance_logging("transform", logger=logger): dataset = transform(dataset, to_orientation="FIXED_HOME_AWAY", to_pitch_dimensions=[(-5500, 5500), (-3300, 3300)]) with performance_logging("to pandas", logger=logger): dataframe = to_pandas(dataset) print(dataframe[:50].to_string())
def test_tracking_data_frame(self): base_dir = os.path.dirname(__file__) tracking_dataset = load_epts_tracking_data( metadata_filename=f"{base_dir}/files/metadata.xml", raw_data_filename=f"{base_dir}/files/tracking.txt", ) tracking = TrackingFrame(to_pandas(tracking_dataset)) tracking.data_type = DataType.TRACKING tracking.metadata = tracking_dataset.metadata tracking.records = tracking_dataset.records assert tracking.get_team_by_id("FIFATMA").team_id == "FIFATMA" assert tracking.get_period_by_id(1).id == 1 assert tracking.get_other_team_id("FIFATMA") == "FIFATMB" assert tracking.team("FIFATMA").shape[1] == 22 assert tracking.dimension("x").shape[1] == 23 assert tracking.players().shape[1] == 44 assert tracking.players("field").shape[1] == 40 assert sum(tracking.phase(defending_team_id="FIFATMA")) == 0 assert sum(tracking.team("FIFATMA").stretched(90)) == 863
if os.path.exists( WYSCOUT_JSON_PATH) is False: # only download it if it doesn't exist with open(WYSCOUT_JSON_PATH, 'w') as f: response = requests.get(url=WYSCOUT_URL) response.encoding = 'unicode-escape' # to make sure the encoding for é etc. is correct json.dump(response.json(), f) # load the wyscout events as a dataframe using Kloppy serializer = WyscoutSerializer() with open(WYSCOUT_JSON_PATH) as event_data: wyscout_dataset = serializer.deserialize(inputs={'event_data': event_data}) df_wyscout = to_pandas(wyscout_dataset, additional_columns={ 'player_name': lambda event: str(event.player), 'team_name': lambda event: str(event.player.team) }) ############################################################################## # Standardize the Wyscout data to StatsBomb coordinates # ----------------------------------------------------- # You can use any of the supported pitches in the pitch_from/ pitch_to here. # They are currently: ``statsbomb``, ``tracab``, ``opta``, ``wyscout``, ``uefa``, # ``metricasports``, ``custom``, ``skillcorner``, and ``secondspectrum``. # # If the pitch size varies (``tracab``, ``metricasports``, ``custom``, # ``skillcorner``, ``secondspectrum``) # then you also need to supply the relevant # length_from/ length_to or width_from/ width_to in meters.
def to_pandas(self, *args, **kwargs): from kloppy import to_pandas return to_pandas(self, *args, **kwargs)