def test_correct_serialization(self): base_dir = os.path.dirname(__file__) dataset = sportscode.load(f"{base_dir}/files/code_xml.xml") del dataset.codes[2:] # Make sure that data in Period 2 get the timestamp corrected dataset.metadata.periods = [ Period(id=1, start_timestamp=0, end_timestamp=45 * 60), Period(id=2, start_timestamp=45 * 60 + 10, end_timestamp=90 * 60), ] dataset.codes[1].period = dataset.metadata.periods[1] serializer = SportsCodeSerializer() output = serializer.serialize(dataset) expected_output = """<?xml version='1.0' encoding='utf-8'?> <file> <ALL_INSTANCES> <instance> <ID>P1</ID> <start>3.6</start> <end>9.7</end> <code>PASS</code> <label> <group>Team</group> <text>Henkie</text> </label> <label> <group>Packing.Value</group> <text>1</text> </label> <label> <group>Receiver</group> <text>Klaas Nøme</text> </label> </instance> <instance> <ID>P2</ID> <start>2768.3</start> <end>2774.5</end> <code>PASS</code> <label> <group>Team</group> <text>Henkie</text> </label> <label> <group>Packing.Value</group> <text>3</text> </label> <label> <group>Receiver</group> <text>Piet</text> </label> </instance> </ALL_INSTANCES> </file> """ expected_output = bytes(expected_output, "utf-8") assert output == expected_output
def test_correct_deserialization(self): """ This test uses data from the StatsBomb open data project. """ base_dir = os.path.dirname(__file__) serializer = StatsBombSerializer() with open(f"{base_dir}/files/statsbomb_lineup.json", "rb") as lineup_data, open( f"{base_dir}/files/statsbomb_event.json", "rb") as event_data: dataset = serializer.deserialize(inputs={ "lineup_data": lineup_data, "event_data": event_data }) assert len(dataset.events) == 4002 assert len(dataset.periods) == 2 assert dataset.orientation == Orientation.ACTION_EXECUTING_TEAM assert dataset.periods[0] == Period( id=1, start_timestamp=0.0, end_timestamp=2705.267, attacking_direction=AttackingDirection.NOT_SET, ) assert dataset.periods[1] == Period( id=2, start_timestamp=2705.268, end_timestamp=5557.321, attacking_direction=AttackingDirection.NOT_SET, )
def test_correct_deserialization(self): base_dir = os.path.dirname(__file__) serializer = OptaSerializer() with open(f"{base_dir}/files/opta_f24.xml", "rb") as f24_data, open(f"{base_dir}/files/opta_f7.xml", "rb") as f7_data: dataset = serializer.deserialize(inputs={ "f24_data": f24_data, "f7_data": f7_data }) assert len(dataset.events) == 17 assert len(dataset.periods) == 2 assert dataset.orientation == Orientation.ACTION_EXECUTING_TEAM assert dataset.periods[0] == Period( id=1, start_timestamp=1537707733.608, end_timestamp=1537710501.222, attacking_direction=AttackingDirection.NOT_SET, ) assert dataset.periods[1] == Period( id=2, start_timestamp=1537711528.873, end_timestamp=1537714537.788, attacking_direction=AttackingDirection.NOT_SET, )
def test_correct_deserialization(self): base_dir = os.path.dirname(__file__) serializer = MetricaTrackingSerializer() with open( f"{base_dir}/files/metrica_home.csv", "rb" ) as raw_data_home, open( f"{base_dir}/files/metrica_away.csv", "rb" ) as raw_data_away: dataset = serializer.deserialize( inputs={ "raw_data_home": raw_data_home, "raw_data_away": raw_data_away, } ) assert dataset.metadata.provider == Provider.METRICA assert dataset.dataset_type == DatasetType.TRACKING assert len(dataset.records) == 6 assert len(dataset.metadata.periods) == 2 assert dataset.metadata.orientation == Orientation.FIXED_HOME_AWAY assert dataset.metadata.periods[0] == Period( id=1, start_timestamp=0.04, end_timestamp=0.12, attacking_direction=AttackingDirection.HOME_AWAY, ) assert dataset.metadata.periods[1] == Period( id=2, start_timestamp=5800.16, end_timestamp=5800.24, attacking_direction=AttackingDirection.AWAY_HOME, ) # make sure data is loaded correctly (including flip y-axis) home_player = dataset.metadata.teams[0].players[0] assert dataset.records[0].players_coordinates[home_player] == Point( x=0.00082, y=1 - 0.48238 ) away_player = dataset.metadata.teams[1].players[0] assert dataset.records[0].players_coordinates[away_player] == Point( x=0.90509, y=1 - 0.47462 ) assert dataset.records[0].ball_coordinates == Point( x=0.45472, y=1 - 0.38709 ) # make sure player data is only in the frame when the player is at the pitch assert "home_14" not in [ player.player_id for player in dataset.records[0].players_coordinates.keys() ] assert "home_14" in [ player.player_id for player in dataset.records[3].players_coordinates.keys() ]
def _get_tracking_dataset(self): home_team = Team(team_id="home", name="home", ground=Ground.HOME) away_team = Team(team_id="away", name="away", ground=Ground.AWAY) teams = [home_team, away_team] periods = [ Period( id=1, start_timestamp=0.0, end_timestamp=10.0, attacking_direction=AttackingDirection.HOME_AWAY, ), Period( id=2, start_timestamp=15.0, end_timestamp=25.0, attacking_direction=AttackingDirection.AWAY_HOME, ), ] metadata = Metadata( flags=~(DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE), pitch_dimensions=PitchDimensions(x_dim=Dimension(0, 100), y_dim=Dimension(-50, 50)), orientation=Orientation.HOME_TEAM, frame_rate=25, periods=periods, teams=teams, score=None, provider=None, ) tracking_data = TrackingDataset( metadata=metadata, records=[ Frame( frame_id=1, timestamp=0.1, ball_owning_team=None, ball_state=None, period=periods[0], players_coordinates={}, ball_coordinates=Point(x=100, y=-50), ), Frame( frame_id=2, timestamp=0.2, ball_owning_team=None, ball_state=None, period=periods[0], players_coordinates={ Player(team=home_team, player_id="home_1", jersey_no=1): Point(x=15, y=35) }, ball_coordinates=Point(x=0, y=50), ), ], ) return tracking_data
def __create_iterator(self, data: Readable, sample_rate: float, frame_rate: int) -> Iterator: """ Notes: 1. the y-axis is flipped because Metrica use (y, -y) instead of (-y, y) """ team = None frame_idx = 0 frame_sample = 1 / sample_rate player_jersey_numbers = [] period = None for i, line in enumerate(data): line = line.strip().decode("ascii") columns = line.split(",") if i == 0: team = columns[3] elif i == 1: player_jersey_numbers = columns[3:-2:2] elif i == 2: # consider doing some validation on the columns pass else: period_id = int(columns[0]) frame_id = int(columns[1]) if period is None or period.id != period_id: period = Period( id=period_id, start_timestamp=frame_id / frame_rate, end_timestamp=frame_id / frame_rate, ) else: # consider not update this every frame for performance reasons period.end_timestamp = frame_id / frame_rate if frame_idx % frame_sample == 0: yield self.__PartialFrame( team=team, period=period, frame_id=frame_id, player_positions={ player_no: Point( x=float(columns[3 + i * 2]), y=1 - float(columns[3 + i * 2 + 1]), ) for i, player_no in enumerate( player_jersey_numbers) if columns[3 + i * 2] != "NaN" }, ball_position=Point(x=float(columns[-2]), y=1 - float(columns[-1])) if columns[-2] != "NaN" else None, ) frame_idx += 1
def test_correct_deserialization(self): """ This test uses data from the StatsBomb open data project. """ dataset = self._load_dataset() assert dataset.metadata.provider == Provider.STATSBOMB assert dataset.dataset_type == DatasetType.EVENT assert len(dataset.events) == 4022 assert len(dataset.metadata.periods) == 2 assert ( dataset.metadata.orientation == Orientation.ACTION_EXECUTING_TEAM ) assert dataset.metadata.teams[0].name == "Barcelona" assert dataset.metadata.teams[1].name == "Deportivo Alavés" player = dataset.metadata.teams[0].get_player_by_id("5503") assert player.player_id == "5503" assert player.jersey_no == 10 assert str(player) == "Lionel Andrés Messi Cuccittini" assert player.position is None # not set assert player.starting sub_player = dataset.metadata.teams[0].get_player_by_id("3501") assert str(sub_player) == "Philippe Coutinho Correia" assert not sub_player.starting assert dataset.metadata.periods[0] == Period( id=1, start_timestamp=0.0, end_timestamp=2705.267, attacking_direction=AttackingDirection.NOT_SET, ) assert dataset.metadata.periods[1] == Period( id=2, start_timestamp=2705.268, end_timestamp=5557.321, attacking_direction=AttackingDirection.NOT_SET, ) assert ( dataset.events[791].get_qualifier_value(BodyPartQualifier) == BodyPart.HEAD ) assert ( dataset.events[2231].get_qualifier_value(BodyPartQualifier) == BodyPart.RIGHT_FOOT ) assert ( dataset.events[195].get_qualifier_value(BodyPartQualifier) is None )
def test_correct_deserialization(self): base_dir = os.path.dirname(__file__) serializer = TRACABSerializer() with open(f"{base_dir}/files/tracab_meta.xml", "rb") as meta_data, open(f"{base_dir}/files/tracab_raw.dat", "rb") as raw_data: dataset = serializer.deserialize( inputs={ "meta_data": meta_data, "raw_data": raw_data }, options={"only_alive": False}, ) assert len(dataset.records) == 6 assert len(dataset.periods) == 2 assert dataset.orientation == Orientation.FIXED_HOME_AWAY assert dataset.periods[0] == Period( id=1, start_timestamp=4.0, end_timestamp=4.08, attacking_direction=AttackingDirection.HOME_AWAY, ) assert dataset.periods[1] == Period( id=2, start_timestamp=8.0, end_timestamp=8.08, attacking_direction=AttackingDirection.AWAY_HOME, ) assert dataset.records[0].home_team_player_positions["19"] == Point( x=-1234.0, y=-294.0) assert dataset.records[0].away_team_player_positions["19"] == Point( x=8889, y=-666) assert dataset.records[0].ball_position == Point(x=-27, y=25) assert dataset.records[0].ball_state == BallState.ALIVE assert dataset.records[0].ball_owning_team == Team.HOME assert dataset.records[1].ball_owning_team == Team.AWAY assert dataset.records[2].ball_state == BallState.DEAD # make sure player data is only in the frame when the player is at the pitch assert "1337" not in dataset.records[0].away_team_player_positions assert "1337" in dataset.records[3].away_team_player_positions
def _load_periods(global_config_elm, frame_rate: int) -> List[Period]: provider_params = _load_provider_parameters( global_config_elm.find("ProviderGlobalParameters"), value_mapper=int) period_names = [ "first_half", "second_half", "first_extra_half", "second_extra_half", ] periods = [] for idx, period_name in enumerate(period_names): start_key = f"{period_name}_start" end_key = f"{period_name}_end" if start_key in provider_params: periods.append( Period( id=idx + 1, start_timestamp=float(provider_params[start_key]) / frame_rate, end_timestamp=float(provider_params[end_key]) / frame_rate, )) else: # done break return periods
def _get_tracking_dataset(self): periods = [ Period( id=1, start_timestamp=0.0, end_timestamp=10.0, attacking_direction=AttackingDirection.HOME_AWAY, ), Period( id=2, start_timestamp=15.0, end_timestamp=25.0, attacking_direction=AttackingDirection.AWAY_HOME, ), ] tracking_data = TrackingDataset( flags=~(DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE), pitch_dimensions=PitchDimensions(x_dim=Dimension(0, 100), y_dim=Dimension(-50, 50)), orientation=Orientation.HOME_TEAM, frame_rate=25, records=[ Frame( frame_id=1, timestamp=0.1, ball_owning_team=None, ball_state=None, period=periods[0], away_team_player_positions={}, home_team_player_positions={}, ball_position=Point(x=100, y=-50), ), Frame( frame_id=2, timestamp=0.2, ball_owning_team=None, ball_state=None, period=periods[0], away_team_player_positions={"1": Point(x=10, y=20)}, home_team_player_positions={"1": Point(x=15, y=35)}, ball_position=Point(x=0, y=50), ), ], periods=periods, ) return tracking_data
def test_correct_deserialization(self): base_dir = os.path.dirname(__file__) serializer = OptaSerializer() with open(f"{base_dir}/files/opta_f24.xml", "rb") as f24_data, open( f"{base_dir}/files/opta_f7.xml", "rb" ) as f7_data: dataset = serializer.deserialize( inputs={"f24_data": f24_data, "f7_data": f7_data} ) assert dataset.metadata.provider == Provider.OPTA assert dataset.dataset_type == DatasetType.EVENT assert len(dataset.events) == 17 assert len(dataset.metadata.periods) == 2 assert dataset.events[10].ball_owning_team == dataset.metadata.teams[1] assert dataset.events[15].ball_owning_team == dataset.metadata.teams[0] assert ( dataset.metadata.orientation == Orientation.ACTION_EXECUTING_TEAM ) assert dataset.metadata.teams[0].name == "FC København" assert dataset.metadata.teams[0].ground == Ground.HOME assert dataset.metadata.teams[1].name == "FC Nordsjælland" assert dataset.metadata.teams[1].ground == Ground.AWAY player = dataset.metadata.teams[0].players[0] assert player.player_id == "111319" assert player.jersey_no == 21 assert str(player) == "Jesse Joronen" assert player.position.position_id == "1" assert player.position.name == "Goalkeeper" assert dataset.metadata.periods[0] == Period( id=1, start_timestamp=1537714933.608, end_timestamp=1537717701.222, attacking_direction=AttackingDirection.NOT_SET, ) assert dataset.metadata.periods[1] == Period( id=2, start_timestamp=1537718728.873, end_timestamp=1537721737.788, attacking_direction=AttackingDirection.NOT_SET, )
def test_correct_deserialization(self): """ This test uses data from the StatsBomb open data project. """ base_dir = os.path.dirname(__file__) serializer = StatsBombSerializer() with open( f"{base_dir}/files/statsbomb_lineup.json", "rb" ) as lineup_data, open( f"{base_dir}/files/statsbomb_event.json", "rb" ) as event_data: dataset = serializer.deserialize( inputs={"lineup_data": lineup_data, "event_data": event_data} ) assert dataset.metadata.provider == Provider.STATSBOMB assert len(dataset.events) == 4002 assert len(dataset.metadata.periods) == 2 assert ( dataset.metadata.orientation == Orientation.ACTION_EXECUTING_TEAM ) assert dataset.metadata.teams[0].name == "Barcelona" assert dataset.metadata.teams[1].name == "Deportivo Alavés" player = dataset.metadata.teams[0].players[0] assert player.player_id == "3109" assert player.jersey_no == 14 assert str(player) == "Malcom Filipe Silva de Oliveira" assert player.position is None # not set assert dataset.metadata.periods[0] == Period( id=1, start_timestamp=0.0, end_timestamp=2705.267, attacking_direction=AttackingDirection.NOT_SET, ) assert dataset.metadata.periods[1] == Period( id=2, start_timestamp=2705.268, end_timestamp=5557.321, attacking_direction=AttackingDirection.NOT_SET, )
def test_correct_deserialization(self): base_dir = os.path.dirname(__file__) serializer = MetricaEventsJsonSerializer() with open( f"{base_dir}/files/metrica_metadata.xml", "rb" ) as metadata, open( f"{base_dir}/files/metrica_events.json", "rb" ) as event_data: dataset = serializer.deserialize( inputs={"metadata": metadata, "event_data": event_data} ) assert dataset.metadata.provider == Provider.METRICA assert dataset.dataset_type == DatasetType.EVENT assert len(dataset.events) == 3684 assert len(dataset.metadata.periods) == 2 assert dataset.metadata.orientation is None assert dataset.metadata.teams[0].name == "Team A" assert dataset.metadata.teams[1].name == "Team B" player = dataset.metadata.teams[0].players[0] assert player.player_id == "P3578" assert player.jersey_no == 11 assert str(player) == "Player 11" assert player.position.name == "Goalkeeper" assert dataset.metadata.periods[0] == Period( id=1, start_timestamp=14.44, end_timestamp=2783.76, attacking_direction=AttackingDirection.NOT_SET, ) assert dataset.metadata.periods[1] == Period( id=2, start_timestamp=2803.6, end_timestamp=5742.12, attacking_direction=AttackingDirection.NOT_SET, ) # Make sure we are using the improved event types. dataset.records[1].qualifiers[0].value == SetPieceType.KICK_OFF
def test_correct_deserialization(self, home_data: str, away_data: str): dataset = metrica.load_tracking_csv(home_data=home_data, away_data=away_data) assert dataset.metadata.provider == Provider.METRICA assert dataset.dataset_type == DatasetType.TRACKING assert len(dataset.records) == 6 assert len(dataset.metadata.periods) == 2 assert dataset.metadata.orientation == Orientation.FIXED_HOME_AWAY assert dataset.metadata.periods[0] == Period( id=1, start_timestamp=0.04, end_timestamp=0.12, attacking_direction=AttackingDirection.HOME_AWAY, ) assert dataset.metadata.periods[1] == Period( id=2, start_timestamp=5800.16, end_timestamp=5800.24, attacking_direction=AttackingDirection.AWAY_HOME, ) # make sure data is loaded correctly (including flip y-axis) home_player = dataset.metadata.teams[0].players[0] assert dataset.records[0].players_data[ home_player].coordinates == Point(x=0.00082, y=1 - 0.48238) away_player = dataset.metadata.teams[1].players[0] assert dataset.records[0].players_data[ away_player].coordinates == Point(x=0.90509, y=1 - 0.47462) assert dataset.records[0].ball_coordinates == Point(x=0.45472, y=1 - 0.38709) # make sure player data is only in the frame when the player is at the pitch assert "home_14" not in [ player.player_id for player in dataset.records[0].players_data.keys() ] assert "home_14" in [ player.player_id for player in dataset.records[3].players_data.keys() ]
def test_correct_deserialization(self, event_data: str): dataset = datafactory.load(event_data=event_data, coordinates="datafactory") assert dataset.metadata.provider == Provider.DATAFACTORY assert dataset.dataset_type == DatasetType.EVENT assert len(dataset.events) == 1027 assert len(dataset.metadata.periods) == 2 assert dataset.events[10].ball_owning_team == dataset.metadata.teams[1] assert dataset.events[23].ball_owning_team == dataset.metadata.teams[0] assert dataset.metadata.orientation == Orientation.HOME_TEAM assert dataset.metadata.teams[0].name == "Team A" assert dataset.metadata.teams[0].ground == Ground.HOME assert dataset.metadata.teams[1].name == "Team B" assert dataset.metadata.teams[1].ground == Ground.AWAY player = dataset.metadata.teams[0].players[0] assert player.player_id == "38804" assert player.jersey_no == 1 assert str(player) == "Daniel Bold" assert player.position is None # not set assert player.starting assert dataset.metadata.periods[0] == Period( id=1, start_timestamp=0, end_timestamp=2912, attacking_direction=AttackingDirection.HOME_AWAY, ) assert dataset.metadata.periods[1] == Period( id=2, start_timestamp=2700, end_timestamp=5710, attacking_direction=AttackingDirection.AWAY_HOME, ) assert dataset.events[0].coordinates == Point(0.01, 0.01) # Check the qualifiers assert dataset.events[0].qualifiers[0].value == SetPieceType.KICK_OFF assert dataset.events[412].qualifiers[0].value == SetPieceType.THROW_IN
def test_correct_deserialization(self): base_dir = os.path.dirname(__file__) serializer = MetricaTrackingSerializer() with open(f"{base_dir}/files/metrica_home.csv", "rb") as raw_data_home, open( f"{base_dir}/files/metrica_away.csv", "rb") as raw_data_away: dataset = serializer.deserialize(inputs={ "raw_data_home": raw_data_home, "raw_data_away": raw_data_away, }) assert len(dataset.records) == 6 assert len(dataset.periods) == 2 assert dataset.orientation == Orientation.FIXED_HOME_AWAY assert dataset.periods[0] == Period( id=1, start_timestamp=0.04, end_timestamp=0.12, attacking_direction=AttackingDirection.HOME_AWAY, ) assert dataset.periods[1] == Period( id=2, start_timestamp=5800.16, end_timestamp=5800.24, attacking_direction=AttackingDirection.AWAY_HOME, ) # make sure data is loaded correctly (including flip y-axis) assert dataset.records[0].home_team_player_positions["11"] == Point( x=0.00082, y=1 - 0.48238) assert dataset.records[0].away_team_player_positions["25"] == Point( x=0.90509, y=1 - 0.47462) assert dataset.records[0].ball_position == Point(x=0.45472, y=1 - 0.38709) # make sure player data is only in the frame when the player is at the pitch assert "14" not in dataset.records[0].home_team_player_positions assert "14" in dataset.records[3].home_team_player_positions
def test_correct_deserialization(self): base_dir = os.path.dirname(__file__) serializer = MetricaEventsJsonSerializer() with open(f"{base_dir}/files/metrica_metadata.xml", "rb") as metadata, open( f"{base_dir}/files/metrica_events.json", "rb") as raw_data: dataset = serializer.deserialize(inputs={ "metadata": metadata, "raw_data": raw_data }) assert dataset.metadata.provider == Provider.METRICA assert len(dataset.events) == 3620 assert len(dataset.metadata.periods) == 2 assert dataset.metadata.orientation is None assert dataset.metadata.teams[0].name == "Team A" assert dataset.metadata.teams[1].name == "Team B" player = dataset.metadata.teams[0].players[0] assert player.player_id == "P3578" assert player.jersey_no == 11 assert str(player) == "Player 11" assert player.position.name == "Goalkeeper" assert dataset.metadata.periods[0] == Period( id=1, start_timestamp=14.44, end_timestamp=2783.76, attacking_direction=AttackingDirection.NOT_SET, ) assert dataset.metadata.periods[1] == Period( id=2, start_timestamp=2803.6, end_timestamp=5742.12, attacking_direction=AttackingDirection.NOT_SET, )
def test_correct_deserialization(self, event_data: str, meta_data: str): dataset = sportec.load(event_data=event_data, meta_data=meta_data, coordinates="sportec") assert dataset.metadata.provider == Provider.SPORTEC assert dataset.dataset_type == DatasetType.EVENT assert len(dataset.metadata.periods) == 2 # raw_event must be flattened dict assert isinstance(dataset.events[0].raw_event, dict) assert len(dataset.events) == 28 assert dataset.metadata.orientation == Orientation.FIXED_HOME_AWAY assert dataset.metadata.periods[0] == Period( id=1, start_timestamp=1591381800.21, end_timestamp=1591384584.0, attacking_direction=AttackingDirection.HOME_AWAY, ) assert dataset.metadata.periods[1] == Period( id=2, start_timestamp=1591385607.01, end_timestamp=1591388598.0, attacking_direction=AttackingDirection.AWAY_HOME, ) player = dataset.metadata.teams[0].players[0] assert player.player_id == "DFL-OBJ-00001D" assert player.jersey_no == 1 assert str(player) == "A. Schwolow" assert player.position.position_id is None assert player.position.name == "TW" # Check the qualifiers assert dataset.events[25].qualifiers[0].value == SetPieceType.KICK_OFF assert dataset.events[16].qualifiers[0].value == BodyPart.RIGHT_FOOT assert dataset.events[24].qualifiers[0].value == BodyPart.LEFT_FOOT assert dataset.events[26].qualifiers[0].value == BodyPart.HEAD assert dataset.events[0].coordinates == Point(56.41, 68.0)
def deserialize(self, inputs: SportsCodeInputs) -> CodeDataset: all_instances = objectify.fromstring(inputs.data.read()) codes = [] period = Period(id=1, start_timestamp=0, end_timestamp=0) for instance in all_instances.ALL_INSTANCES.iterchildren(): end_timestamp = float(instance.end) code = Code( period=period, code_id=str(instance.ID), code=str(instance.code), timestamp=float(instance.start), end_timestamp=end_timestamp, labels={ str(label.find("group")): parse_value(str(label.find("text"))) for label in instance.iterchildren("label") }, ball_state=None, ball_owning_team=None, ) period.end_timestamp = end_timestamp codes.append(code) return CodeDataset( metadata=Metadata( teams=[], periods=[period], pitch_dimensions=None, score=Score(0, 0), frame_rate=0.0, orientation=Orientation.NOT_SET, flags=~(DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE), provider=Provider.OTHER, coordinate_system=None, ), records=codes, )
def test_correct_deserialization(self): base_dir = os.path.dirname(__file__) dataset = load_sportec_event_data( f"{base_dir}/files/sportec_events.xml", f"{base_dir}/files/sportec_meta.xml", ) assert dataset.metadata.provider == Provider.SPORTEC assert dataset.dataset_type == DatasetType.EVENT assert len(dataset.metadata.periods) == 2 # raw_event must be flattened dict assert isinstance(dataset.events[0].raw_event, dict) assert len(dataset.events) == 28 assert dataset.metadata.orientation == Orientation.FIXED_HOME_AWAY assert dataset.metadata.periods[0] == Period( id=1, start_timestamp=1591381800.21, end_timestamp=1591384584.0, attacking_direction=AttackingDirection.HOME_AWAY, ) assert dataset.metadata.periods[1] == Period( id=2, start_timestamp=1591385607.01, end_timestamp=1591388598.0, attacking_direction=AttackingDirection.AWAY_HOME, ) player = dataset.metadata.teams[0].players[0] assert player.player_id == "DFL-OBJ-00001D" assert player.jersey_no == 1 assert str(player) == "A. Schwolow" assert player.position.position_id is None assert player.position.name == "TW"
def test_correct_deserialization(self, event_data: str, meta_data: str): dataset = metrica.load_event(event_data=event_data, meta_data=meta_data) assert dataset.metadata.provider == Provider.METRICA assert dataset.dataset_type == DatasetType.EVENT assert len(dataset.events) == 3684 assert len(dataset.metadata.periods) == 2 assert dataset.metadata.orientation is None assert dataset.metadata.teams[0].name == "Team A" assert dataset.metadata.teams[1].name == "Team B" player = dataset.metadata.teams[0].players[10] assert player.player_id == "Track_11" assert player.jersey_no == 11 assert str(player) == "Track_11" assert player.position.name == "Goalkeeper" assert dataset.metadata.periods[0] == Period( id=1, start_timestamp=14.44, end_timestamp=2783.76, attacking_direction=AttackingDirection.NOT_SET, ) assert dataset.metadata.periods[1] == Period( id=2, start_timestamp=2803.6, end_timestamp=5742.12, attacking_direction=AttackingDirection.NOT_SET, ) assert dataset.events[1].coordinates.x == 0.50125 # Check the qualifiers assert dataset.records[1].qualifiers[0].value == SetPieceType.KICK_OFF assert dataset.records[100].qualifiers[0].value == BodyPart.HEAD
def __get_periods(cls, tracking): """gets the Periods contained in the tracking data""" periods = {} _periods = np.array([f["period"] for f in tracking]) unique_periods = set(_periods) unique_periods = [ period for period in unique_periods if period is not None ] for period in unique_periods: _frames = [ frame for frame in tracking if frame["period"] == period and frame["time"] is not None ] periods[period] = Period( id=period, start_timestamp=cls._timestamp_from_timestring( _frames[0]["time"]), end_timestamp=cls._timestamp_from_timestring( _frames[-1]["time"]), ) return periods
def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> EventDataset: """ Deserialize Opta event data into a `EventDataset`. Parameters ---------- inputs : dict input `f24_data` should point to a `Readable` object containing the 'xml' formatted event data. input `f7_data` should point to a `Readable` object containing the 'xml' formatted f7 data. options : dict Options for deserialization of the Opta file. Possible options are `event_types` (list of event types) to specify the event types that should be returned. Valid types: "shot", "pass", "carry", "take_on" and "generic". Generic is everything other than the first 4. Those events are barely parsed. This type of event can be used to do the parsing yourself. Every event has a 'raw_event' attribute which contains the original dictionary. Returns ------- dataset : EventDataset Raises ------ See Also -------- Examples -------- >>> serializer = OptaSerializer() >>> with open("123_f24.xml", "rb") as f24_data, \ >>> open("123_f7.xml", "rb") as f7_data: >>> >>> dataset = serializer.deserialize( >>> inputs={ >>> 'f24_data': f24_data, >>> 'f7_data': f7_data >>> }, >>> options={ >>> 'event_types': ["pass", "take_on", "carry", "shot"] >>> } >>> ) """ self.__validate_inputs(inputs) if not options: options = {} with performance_logging("load data", logger=logger): f7_root = objectify.fromstring(inputs["f7_data"].read()) f24_root = objectify.fromstring(inputs["f24_data"].read()) wanted_event_types = [ EventType[event_type.upper()] for event_type in options.get("event_types", []) ] with performance_logging("parse data", logger=logger): matchdata_path = objectify.ObjectPath( "SoccerFeed.SoccerDocument.MatchData") team_elms = list( matchdata_path.find(f7_root).iterchildren("TeamData")) away_player_map = {} home_player_map = {} home_team_id = None away_team_id = None for team_elm in team_elms: player_map = { player_elm.attrib["PlayerRef"].lstrip("p"): player_elm.attrib["ShirtNumber"] for player_elm in team_elm.find( "PlayerLineUp").iterchildren("MatchPlayer") } team_id = team_elm.attrib["TeamRef"].lstrip("t") if team_elm.attrib["Side"] == "Home": home_player_map = player_map home_team_id = team_id elif team_elm.attrib["Side"] == "Away": away_player_map = player_map away_team_id = team_id else: raise Exception(f"Unknown side: {team_elm.attrib['Side']}") if not away_player_map or not home_player_map: raise Exception("LineUp incomplete") game_elm = f24_root.find("Game") periods = [ Period( id=1, start_timestamp=None, end_timestamp=None, ), Period( id=2, start_timestamp=None, end_timestamp=None, ), ] events = [] for event_elm in game_elm.iterchildren("Event"): event_id = event_elm.attrib["id"] type_id = int(event_elm.attrib["type_id"]) timestamp = _parse_f24_datetime(event_elm.attrib["timestamp"]) period_id = int(event_elm.attrib["period_id"]) for period in periods: if period.id == period_id: break else: logger.debug( f"Skipping event {event_id} because period doesn't match {period_id}" ) continue if type_id == EVENT_TYPE_START_PERIOD: logger.debug( f"Set start of period {period.id} to {timestamp}") period.start_timestamp = timestamp elif type_id == EVENT_TYPE_END_PERIOD: logger.debug( f"Set end of period {period.id} to {timestamp}") period.end_timestamp = timestamp else: if not period.start_timestamp: # not started yet continue if event_elm.attrib["team_id"] == home_team_id: team = Team.HOME current_team_map = home_player_map elif event_elm.attrib["team_id"] == away_team_id: team = Team.AWAY current_team_map = away_player_map else: raise Exception( f"Unknown team_id {event_elm.attrib['team_id']}") x = float(event_elm.attrib["x"]) y = float(event_elm.attrib["y"]) outcome = int(event_elm.attrib["outcome"]) qualifiers = { int(qualifier_elm.attrib["qualifier_id"]): qualifier_elm.attrib.get("value") for qualifier_elm in event_elm.iterchildren("Q") } player_jersey_no = None if "player_id" in event_elm.attrib: player_jersey_no = current_team_map[ event_elm.attrib["player_id"]] generic_event_kwargs = dict( # from DataRecord period=period, timestamp=timestamp - period.start_timestamp, ball_owning_team=None, ball_state=BallState.ALIVE, # from Event event_id=event_id, team=team, player_jersey_no=player_jersey_no, position=Point(x=x, y=y), raw_event=event_elm, ) if type_id == EVENT_TYPE_PASS: pass_event_kwargs = _parse_pass(qualifiers, outcome) event = PassEvent( **pass_event_kwargs, **generic_event_kwargs, ) elif type_id == EVENT_TYPE_OFFSIDE_PASS: pass_event_kwargs = _parse_offside_pass() event = PassEvent( **pass_event_kwargs, **generic_event_kwargs, ) elif type_id == EVENT_TYPE_TAKE_ON: take_on_event_kwargs = _parse_take_on(outcome) event = TakeOnEvent( **take_on_event_kwargs, **generic_event_kwargs, ) elif type_id in ( EVENT_TYPE_SHOT_MISS, EVENT_TYPE_SHOT_POST, EVENT_TYPE_SHOT_SAVED, EVENT_TYPE_SHOT_GOAL, ): shot_event_kwargs = _parse_shot( qualifiers, type_id, position=generic_event_kwargs["position"], ) kwargs = {} kwargs.update(generic_event_kwargs) kwargs.update(shot_event_kwargs) event = ShotEvent(**kwargs) else: event = GenericEvent(**generic_event_kwargs, result=None) if (not wanted_event_types or event.event_type in wanted_event_types): events.append(event) return EventDataset( flags=DatasetFlag.BALL_OWNING_TEAM, orientation=Orientation.ACTION_EXECUTING_TEAM, pitch_dimensions=PitchDimensions(x_dim=Dimension(0, 100), y_dim=Dimension(0, 100)), periods=periods, records=events, )
def deserialize(self, inputs: DatafactoryInputs) -> EventDataset: transformer = self.get_transformer(length=2, width=2) with performance_logging("load data", logger=logger): data = json.load(inputs.event_data) match = data["match"] score_data = data["scoreStatus"] incidences = data["incidences"] players_data = data["players"] teams_data = data["teams"] with performance_logging("parse data", logger=logger): teams = [] scores = [] team_ids = ( (Ground.HOME, str(match["homeTeamId"])), (Ground.AWAY, str(match["awayTeamId"])), ) for ground, team_id in team_ids: team = Team( team_id=team_id, name=teams_data[team_id]["name"], ground=ground, ) team.players = [ Player( player_id=player_id, team=team, first_name=player["name"]["first"], last_name=player["name"]["last"], name=player["name"]["shortName"] or player["name"]["nick"], jersey_no=player["squadNo"], starting=not player["substitute"], ) for player_id, player in players_data.items() if str(player["teamId"]) == team_id ] teams.append(team) scores.append(score_data.get(team_id, {}).get("score")) score = Score(home=scores[0], away=scores[1]) # setup periods status = incidences.pop(DF_EVENT_CLASS_STATUS) # start timestamps are fixed start_ts = {1: 0, 2: 45 * 60, 3: 90 * 60, 4: 105 * 60, 5: 120 * 60} # check for end status updates to setup periods end_event_types = { DF_EVENT_TYPE_STATUS_MATCH_END, DF_EVENT_TYPE_STATUS_FIRST_HALF_END, DF_EVENT_TYPE_STATUS_SECOND_HALF_END, DF_EVENT_TYPE_STATUS_FIRST_EXTRA_END, DF_EVENT_TYPE_STATUS_SECOND_EXTRA_END, } periods = {} for status_update in status.values(): if status_update["type"] not in end_event_types: continue half = status_update["t"]["half"] end_ts = parse_str_ts(status_update) periods[half] = Period( id=half, start_timestamp=start_ts[half], end_timestamp=end_ts, attacking_direction=AttackingDirection.HOME_AWAY if half % 2 == 1 else AttackingDirection.AWAY_HOME, ) # exclude goals, already listed as shots too incidences.pop(DF_EVENT_CLASS_GOALS) raw_events = [(k, e_id, e) for k in incidences for e_id, e in incidences[k].items()] # sort events by timestamp, event_id raw_events.sort(key=lambda e: ( e[2]["t"]["half"], e[2]["t"]["m"], e[2]["t"]["s"] or 0, e[1], )) home_team, away_team = teams events = [] previous_event = next_event = None for i, (e_class, e_id, raw_event) in enumerate(raw_events): period = periods.get(raw_event["t"]["half"]) if period is None: # skip invalid event continue timestamp = parse_str_ts(raw_event) if (previous_event is not None and previous_event["t"]["half"] != raw_event["t"]["half"]): previous_event = None next_event = (raw_events[i + 1][2] if i + 1 < len(raw_events) else None) team, player = _get_team_and_player(raw_event, home_team, away_team) event_base_kwargs = dict( # from DataRecord period=period, timestamp=timestamp, ball_owning_team=team, ball_state=BallState.ALIVE, # from Event event_id=e_id, team=team, player=player, coordinates=(_parse_coordinates(raw_event["coord"]["1"]) if "coord" in raw_event else None), raw_event=raw_event, result=None, qualifiers=None, ) if e_class in DF_EVENT_CLASS_PASSES: pass_event_kwargs = _parse_pass( raw_event=raw_event, team=team, previous_event=previous_event, next_event=next_event, ) event_base_kwargs.update(pass_event_kwargs) event = PassEvent.create(**event_base_kwargs) elif e_class == DF_EVENT_CLASS_SHOTS: shot_event_kwargs = _parse_shot( raw_event=raw_event, previous_event=previous_event, ) event_base_kwargs.update(shot_event_kwargs) event = ShotEvent.create(**event_base_kwargs) elif e_class == DF_EVENT_CLASS_STEALINGS: event = RecoveryEvent.create(**event_base_kwargs) elif e_class == DF_EVENT_CLASS_FOULS: # NOTE: could use qualifiers? (hand, foul, penalty?) # switch possession team event_base_kwargs["ball_owning_team"] = ( home_team if team == away_team else away_team) event = FoulCommittedEvent.create(**event_base_kwargs) elif e_class in DF_EVENT_CLASS_CARDS: card_kwargs = _parse_card(raw_event=raw_event, ) event_base_kwargs.update(card_kwargs) event = CardEvent.create(**event_base_kwargs) elif e_class == DF_EVENT_CLASS_SUBSTITUTIONS: substitution_event_kwargs = _parse_substitution( raw_event=raw_event, team=team) event_base_kwargs.update(substitution_event_kwargs) event = SubstitutionEvent.create(**event_base_kwargs) else: # otherwise, a generic event event = GenericEvent.create( event_name=e_class, **event_base_kwargs, ) # check if the event implies ball was out of the field and add a synthetic out event if raw_event["type"] in BALL_OUT_EVENTS: ball_out_event = BallOutEvent.create( # from DataRecord period=period, timestamp=timestamp, ball_owning_team=team, ball_state=BallState.DEAD, # from Event event_id=e_id, team=team, player=player, coordinates=event.coordinates, raw_event=raw_event, result=None, qualifiers=None, ) if self.should_include_event(event): events.append( transformer.transform_event(ball_out_event)) if self.should_include_event(event): events.append(transformer.transform_event(event)) # only consider as a previous_event a ball-in-play event if e_class not in ( DF_EVENT_CLASS_YELLOW_CARDS, DF_EVENT_CLASS_RED_CARDS, DF_EVENT_CLASS_SUBSTITUTIONS, DF_EVENT_CLASS_PENALTY_SHOOTOUT, ): previous_event = raw_event metadata = Metadata( teams=teams, periods=sorted(periods.values(), key=lambda p: p.id), pitch_dimensions=transformer.get_to_coordinate_system(). pitch_dimensions, frame_rate=None, orientation=Orientation.HOME_TEAM, flags=DatasetFlag.BALL_OWNING_TEAM, score=score, provider=Provider.DATAFACTORY, coordinate_system=transformer.get_to_coordinate_system(), ) return EventDataset( metadata=metadata, records=events, )
def deserialize( self, inputs: Dict[str, Readable], options: Dict = None ) -> EventDataset: """ Deserialize StatsBomb event data into a `EventDataset`. Parameters ---------- inputs : dict input `event_data` should point to a `Readable` object containing the 'json' formatted event data. input `lineup_data` should point to a `Readable` object containing the 'json' formatted lineup data. options : dict Options for deserialization of the StatsBomb file. Possible options are `event_types` (list of event types) to specify the event types that should be returned. Valid types: "shot", "pass", "carry", "take_on" and "generic". Generic is everything other than the first 4. Those events are barely parsed. This type of event can be used to do the parsing yourself. Every event has a 'raw_event' attribute which contains the original dictionary. Returns ------- dataset : EventDataset Raises ------ See Also -------- Examples -------- >>> serializer = StatsBombSerializer() >>> with open("events/12312312.json", "rb") as event_data, \ >>> open("lineups/123123123.json", "rb") as lineup_data: >>> >>> dataset = serializer.deserialize( >>> inputs={ >>> 'event_data': event_data, >>> 'lineup_data': lineup_data >>> }, >>> options={ >>> 'event_types': ["pass", "take_on", "carry", "shot"] >>> } >>> ) """ self.__validate_inputs(inputs) if not options: options = {} with performance_logging("load data", logger=logger): raw_events = json.load(inputs["event_data"]) home_lineup, away_lineup = json.load(inputs["lineup_data"]) ( shot_fidelity_version, xy_fidelity_version, ) = _determine_xy_fidelity_versions(raw_events) logger.info( f"Determined Fidelity versions: shot v{shot_fidelity_version} / XY v{xy_fidelity_version}" ) with performance_logging("parse data", logger=logger): home_team = Team( team_id=str(home_lineup["team_id"]), name=home_lineup["team_name"], ground=Ground.HOME, ) home_team.players = [ Player( player_id=str(player["player_id"]), team=home_team, name=player["player_name"], jersey_no=int(player["jersey_number"]), ) for player in home_lineup["lineup"] ] away_team = Team( team_id=str(away_lineup["team_id"]), name=away_lineup["team_name"], ground=Ground.AWAY, ) away_team.players = [ Player( player_id=str(player["player_id"]), team=away_team, name=player["player_name"], jersey_no=int(player["jersey_number"]), ) for player in away_lineup["lineup"] ] teams = [home_team, away_team] wanted_event_types = [ EventType[event_type.upper()] for event_type in options.get("event_types", []) ] periods = [] period = None events = [] for raw_event in raw_events: if raw_event["team"]["id"] == home_lineup["team_id"]: team = teams[0] elif raw_event["team"]["id"] == away_lineup["team_id"]: team = teams[1] else: raise Exception( f"Unknown team_id {raw_event['team']['id']}" ) if ( raw_event["possession_team"]["id"] == home_lineup["team_id"] ): possession_team = teams[0] elif ( raw_event["possession_team"]["id"] == away_lineup["team_id"] ): possession_team = teams[1] else: raise Exception( f"Unknown possession_team_id: {raw_event['possession_team']}" ) timestamp = parse_str_ts(raw_event["timestamp"]) period_id = int(raw_event["period"]) if not period or period.id != period_id: period = Period( id=period_id, start_timestamp=( timestamp if not period # period = [start, end], add millisecond to prevent overlapping else timestamp + period.end_timestamp + 0.001 ), end_timestamp=None, ) periods.append(period) else: period.end_timestamp = period.start_timestamp + timestamp player = None if "player" in raw_event: player = team.get_player_by_id(raw_event["player"]["id"]) event_type = raw_event["type"]["id"] if event_type == SB_EVENT_TYPE_SHOT: fidelity_version = shot_fidelity_version elif event_type in ( SB_EVENT_TYPE_CARRY, SB_EVENT_TYPE_DRIBBLE, SB_EVENT_TYPE_PASS, ): fidelity_version = xy_fidelity_version else: # TODO: Uh ohhhh.. don't know which one to pick fidelity_version = xy_fidelity_version generic_event_kwargs = dict( # from DataRecord period=period, timestamp=timestamp, ball_owning_team=possession_team, ball_state=BallState.ALIVE, # from Event event_id=raw_event["id"], team=team, player=player, coordinates=( _parse_coordinates( raw_event.get("location"), fidelity_version ) if "location" in raw_event else None ), raw_event=raw_event, ) if event_type == SB_EVENT_TYPE_PASS: pass_event_kwargs = _parse_pass( pass_dict=raw_event["pass"], team=team, fidelity_version=fidelity_version, ) event = PassEvent( # TODO: Consider moving this to _parse_pass receive_timestamp=timestamp + raw_event["duration"], **pass_event_kwargs, **generic_event_kwargs, ) elif event_type == SB_EVENT_TYPE_SHOT: shot_event_kwargs = _parse_shot( shot_dict=raw_event["shot"] ) event = ShotEvent( **shot_event_kwargs, **generic_event_kwargs ) # For dribble and carry the definitions # are flipped between Statsbomb and kloppy elif event_type == SB_EVENT_TYPE_DRIBBLE: take_on_event_kwargs = _parse_take_on( take_on_dict=raw_event["dribble"] ) event = TakeOnEvent( **take_on_event_kwargs, **generic_event_kwargs ) elif event_type == SB_EVENT_TYPE_CARRY: carry_event_kwargs = _parse_carry( carry_dict=raw_event["carry"], fidelity_version=fidelity_version, ) event = CarryEvent( # TODO: Consider moving this to _parse_carry end_timestamp=timestamp + raw_event["duration"], **carry_event_kwargs, **generic_event_kwargs, ) else: event = GenericEvent( result=None, event_name=raw_event["type"]["name"], **generic_event_kwargs, ) if ( not wanted_event_types or event.event_type in wanted_event_types ): events.append(event) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=PitchDimensions( x_dim=Dimension(0, 120), y_dim=Dimension(0, 80) ), frame_rate=None, orientation=Orientation.ACTION_EXECUTING_TEAM, flags=DatasetFlag.BALL_OWNING_TEAM, score=None, ) return EventDataset(metadata=metadata, records=events,)
def deserialize( self, inputs: Dict[str, Readable], options: Dict = None ) -> EventDataset: WyscoutSerializer.__validate_inputs(inputs) if not options: options = {} wanted_event_types = [ EventType[event_type.upper()] for event_type in options.get("event_types", []) ] with performance_logging("load data", logger=logger): raw_events = json.load(inputs["event_data"]) periods = [] with performance_logging("parse data", logger=logger): home_team_id, away_team_id = raw_events["teams"].keys() home_team = _parse_team(raw_events, home_team_id, Ground.HOME) away_team = _parse_team(raw_events, away_team_id, Ground.AWAY) teams = {home_team_id: home_team, away_team_id: away_team} players = dict( [ (wyId, _players_to_dict(team.players)) for wyId, team in teams.items() ] ) events = [] for idx, raw_event in enumerate(raw_events["events"]): next_event = None if (idx + 1) < len(raw_events["events"]): next_event = raw_events["events"][idx + 1] team_id = str(raw_event["teamId"]) player_id = str(raw_event["playerId"]) if ( len(periods) == 0 or periods[-1].id != raw_event["matchPeriod"] ): periods.append( Period( id=raw_event["matchPeriod"], start_timestamp=0, end_timestamp=0, ) ) generic_event_args = { "event_id": raw_event["id"], "raw_event": raw_event, "coordinates": Point( x=float(raw_event["positions"][0]["x"]), y=float(raw_event["positions"][0]["y"]), ), "team": teams[team_id], "player": players[team_id][player_id] if player_id != INVALID_PLAYER else None, "ball_owning_team": None, "ball_state": None, "period": periods[-1], "timestamp": raw_event["eventSec"], } event = None if raw_event["eventName"] == wyscout_events.SHOT.EVENT: shot_event_args = _parse_shot(raw_event, next_event) event = ShotEvent.create( **shot_event_args, **generic_event_args ) elif raw_event["eventName"] == wyscout_events.PASS.EVENT: pass_event_args = _parse_pass(raw_event, next_event) event = PassEvent.create( **pass_event_args, **generic_event_args ) elif raw_event["eventName"] == wyscout_events.FOUL.EVENT: foul_event_args = _parse_foul(raw_event) event = FoulCommittedEvent.create( **foul_event_args, **generic_event_args ) if any( (_has_tag(raw_event, tag) for tag in wyscout_tags.CARD) ): card_event_args = _parse_card(raw_event) event = CardEvent.create( **card_event_args, **generic_event_args ) elif ( raw_event["eventName"] == wyscout_events.INTERRUPTION.EVENT ): ball_out_event_args = _parse_ball_out(raw_event) event = BallOutEvent.create( **ball_out_event_args, **generic_event_args ) elif raw_event["eventName"] == wyscout_events.FREE_KICK.EVENT: set_piece_event_args = _parse_set_piece( raw_event, next_event ) if ( raw_event["subEventName"] in wyscout_events.FREE_KICK.PASS_TYPES ): event = PassEvent.create( **set_piece_event_args, **generic_event_args ) elif ( raw_event["subEventName"] in wyscout_events.FREE_KICK.SHOT_TYPES ): event = ShotEvent.create( **set_piece_event_args, **generic_event_args ) elif ( raw_event["eventName"] == wyscout_events.OTHERS_ON_BALL.EVENT ): recovery_event_args = _parse_recovery(raw_event) event = RecoveryEvent.create( **recovery_event_args, **generic_event_args ) elif raw_event["eventName"] == wyscout_events.DUEL.EVENT: takeon_event_args = _parse_takeon(raw_event) event = TakeOnEvent.create( **takeon_event_args, **generic_event_args ) elif raw_event["eventName"] not in [ wyscout_events.SAVE.EVENT, wyscout_events.OFFSIDE.EVENT, ]: # The events SAVE and OFFSIDE are already merged with PASS and SHOT events qualifiers = _generic_qualifiers(raw_event) event = GenericEvent.create( result=None, qualifiers=qualifiers, **generic_event_args ) if event and _include_event(event, wanted_event_types): events.append(event) metadata = Metadata( teams=[home_team, away_team], periods=periods, pitch_dimensions=PitchDimensions( x_dim=Dimension(0, 100), y_dim=Dimension(0, 100) ), score=None, frame_rate=None, orientation=Orientation.BALL_OWNING_TEAM, flags=None, provider=Provider.WYSCOUT, ) return EventDataset(metadata=metadata, records=events)
def deserialize(self, inputs: TRACABInputs) -> TrackingDataset: # TODO: also used in Metrica, extract to a method home_team = Team(team_id="home", name="home", ground=Ground.HOME) away_team = Team(team_id="away", name="away", ground=Ground.AWAY) teams = [home_team, away_team] with performance_logging("Loading metadata", logger=logger): match = objectify.fromstring(inputs.meta_data.read()).match frame_rate = int(match.attrib["iFrameRateFps"]) pitch_size_width = float(match.attrib["fPitchXSizeMeters"]) pitch_size_height = float(match.attrib["fPitchYSizeMeters"]) periods = [] for period in match.iterchildren(tag="period"): start_frame_id = int(period.attrib["iStartFrame"]) end_frame_id = int(period.attrib["iEndFrame"]) if start_frame_id != 0 or end_frame_id != 0: periods.append( Period( id=int(period.attrib["iId"]), start_timestamp=start_frame_id / frame_rate, end_timestamp=end_frame_id / frame_rate, ) ) with performance_logging("Loading data", logger=logger): transformer = self.get_transformer( length=pitch_size_width, width=pitch_size_height ) def _iter(): n = 0 sample = 1.0 / self.sample_rate for line_ in inputs.raw_data.readlines(): line_ = line_.strip().decode("ascii") if not line_: continue frame_id = int(line_[:10].split(":", 1)[0]) if self.only_alive and not line_.endswith("Alive;:"): continue for period_ in periods: if period_.contains(frame_id / frame_rate): if n % sample == 0: yield period_, line_ n += 1 frames = [] for n, (period, line) in enumerate(_iter()): frame = self._frame_from_line(teams, period, line, frame_rate) frame = transformer.transform_frame(frame) frames.append(frame) if not period.attacking_direction_set: period.set_attacking_direction( attacking_direction=attacking_direction_from_frame( frame ) ) if self.limit and n >= self.limit: break orientation = ( Orientation.FIXED_HOME_AWAY if periods[0].attacking_direction == AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME ) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=transformer.get_to_coordinate_system().pitch_dimensions, score=None, frame_rate=frame_rate, orientation=orientation, provider=Provider.TRACAB, flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE, coordinate_system=transformer.get_to_coordinate_system(), ) return TrackingDataset( records=frames, metadata=metadata, )
def deserialize( self, inputs: Dict[str, Readable], options: Dict = None ) -> TrackingDataset: """ Deserialize TRACAB tracking data into a `TrackingDataset`. Parameters ---------- inputs : dict input `raw_data` should point to a `Readable` object containing the 'csv' formatted raw data. input `metadata` should point to the xml metadata data. options : dict Options for deserialization of the TRACAB file. Possible options are `only_alive` (boolean) to specify that only frames with alive ball state should be loaded, or `sample_rate` (float between 0 and 1) to specify the amount of frames that should be loaded, `limit` to specify the max number of frames that will be returned. Returns ------- dataset : TrackingDataset Raises ------ - See Also -------- Examples -------- >>> serializer = TRACABSerializer() >>> with open("metadata.xml", "rb") as meta, \ >>> open("raw.dat", "rb") as raw: >>> dataset = serializer.deserialize( >>> inputs={ >>> 'metadata': meta, >>> 'raw_data': raw >>> }, >>> options={ >>> 'only_alive': True, >>> 'sample_rate': 1/12 >>> } >>> ) """ self.__validate_inputs(inputs) if not options: options = {} sample_rate = float(options.get("sample_rate", 1.0)) limit = int(options.get("limit", 0)) only_alive = bool(options.get("only_alive", True)) # TODO: also used in Metrica, extract to a method home_team = Team(team_id="home", name="home", ground=Ground.HOME) away_team = Team(team_id="away", name="away", ground=Ground.AWAY) teams = [home_team, away_team] with performance_logging("Loading metadata", logger=logger): match = objectify.fromstring(inputs["metadata"].read()).match frame_rate = int(match.attrib["iFrameRateFps"]) pitch_size_width = float(match.attrib["fPitchXSizeMeters"]) pitch_size_height = float(match.attrib["fPitchYSizeMeters"]) periods = [] for period in match.iterchildren(tag="period"): start_frame_id = int(period.attrib["iStartFrame"]) end_frame_id = int(period.attrib["iEndFrame"]) if start_frame_id != 0 or end_frame_id != 0: periods.append( Period( id=int(period.attrib["iId"]), start_timestamp=start_frame_id / frame_rate, end_timestamp=end_frame_id / frame_rate, ) ) with performance_logging("Loading data", logger=logger): def _iter(): n = 0 sample = 1.0 / sample_rate for line_ in inputs["raw_data"].readlines(): line_ = line_.strip().decode("ascii") if not line_: continue frame_id = int(line_[:10].split(":", 1)[0]) if only_alive and not line_.endswith("Alive;:"): continue for period_ in periods: if period_.contains(frame_id / frame_rate): if n % sample == 0: yield period_, line_ n += 1 frames = [] for n, (period, line) in enumerate(_iter()): frame = self._frame_from_line(teams, period, line, frame_rate) frames.append(frame) if not period.attacking_direction_set: period.set_attacking_direction( attacking_direction=attacking_direction_from_frame( frame ) ) if limit and n >= limit: break orientation = ( Orientation.FIXED_HOME_AWAY if periods[0].attacking_direction == AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME ) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=PitchDimensions( x_dim=Dimension( -1 * pitch_size_width / 2, pitch_size_width / 2 ), y_dim=Dimension( -1 * pitch_size_height / 2, pitch_size_height / 2 ), x_per_meter=100, y_per_meter=100, ), score=None, frame_rate=frame_rate, orientation=orientation, provider=Provider.TRACAB, flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE, ) return TrackingDataset( records=frames, metadata=metadata, )
def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> EventDataset: """ Deserialize StatsBomb event data into a `EventDataset`. Parameters ---------- inputs : dict input `event_data` should point to a `Readable` object containing the 'json' formatted event data. input `lineup_data` should point to a `Readable` object containing the 'json' formatted lineup data. options : dict Options for deserialization of the StatsBomb file. Possible options are `event_types` (list of event types) to specify the event types that should be returned. Valid types: "shot", "pass", "carry", "take_on" and "generic". Generic is everything other than the first 4. Those events are barely parsed. This type of event can be used to do the parsing yourself. Every event has a 'raw_event' attribute which contains the original dictionary. Returns ------- dataset : EventDataset Raises ------ See Also -------- Examples -------- >>> serializer = StatsBombSerializer() >>> with open("events/12312312.json", "rb") as event_data, \ >>> open("lineups/123123123.json", "rb") as lineup_data: >>> >>> dataset = serializer.deserialize( >>> inputs={ >>> 'event_data': event_data, >>> 'lineup_data': lineup_data >>> }, >>> options={ >>> 'event_types': ["pass", "take_on", "carry", "shot"] >>> } >>> ) """ self.__validate_inputs(inputs) if not options: options = {} with performance_logging("load data", logger=logger): raw_events = json.load(inputs['event_data']) home_lineup, away_lineup = json.load(inputs['lineup_data']) shot_fidelity_version, xy_fidelity_version = _determine_xy_fidelity_versions( raw_events) logger.info( f"Determined Fidelity versions: shot v{shot_fidelity_version} / XY v{xy_fidelity_version}" ) with performance_logging("parse data", logger=logger): home_player_map = { player['player_id']: str(player['jersey_number']) for player in home_lineup['lineup'] } away_player_map = { player['player_id']: str(player['jersey_number']) for player in away_lineup['lineup'] } wanted_event_types = [ EventType[event_type.upper()] for event_type in options.get('event_types', []) ] periods = [] period = None events = [] for raw_event in raw_events: if raw_event['team']['id'] == home_lineup['team_id']: team = Team.HOME current_team_map = home_player_map elif raw_event['team']['id'] == away_lineup['team_id']: team = Team.AWAY current_team_map = away_player_map else: raise Exception( f"Unknown team_id {raw_event['team']['id']}") if raw_event['possession_team']['id'] == home_lineup[ 'team_id']: possession_team = Team.HOME elif raw_event['possession_team']['id'] == away_lineup[ 'team_id']: possession_team = Team.AWAY else: raise Exception( f"Unknown possession_team_id: {raw_event['possession_team']}" ) timestamp = parse_str_ts(raw_event['timestamp']) period_id = int(raw_event['period']) if not period or period.id != period_id: period = Period(id=period_id, start_timestamp=timestamp if not period else timestamp + period.end_timestamp, end_timestamp=None) periods.append(period) else: period.end_timestamp = period.start_timestamp + timestamp player_jersey_no = None if 'player' in raw_event: player_jersey_no = current_team_map[raw_event['player'] ['id']] event_type = raw_event['type']['id'] if event_type == SB_EVENT_TYPE_SHOT: fidelity_version = shot_fidelity_version elif event_type in (SB_EVENT_TYPE_CARRY, SB_EVENT_TYPE_DRIBBLE, SB_EVENT_TYPE_PASS): fidelity_version = xy_fidelity_version else: # TODO: Uh ohhhh.. don't know which one to pick fidelity_version = xy_fidelity_version generic_event_kwargs = dict( # from DataRecord period=period, timestamp=timestamp, ball_owning_team=possession_team, ball_state=BallState.ALIVE, # from Event event_id=raw_event['id'], team=team, player_jersey_no=player_jersey_no, position=(_parse_position(raw_event.get('location'), fidelity_version) if 'location' in raw_event else None), raw_event=raw_event) if event_type == SB_EVENT_TYPE_PASS: pass_event_kwargs = _parse_pass( pass_dict=raw_event['pass'], current_team_map=current_team_map, fidelity_version=fidelity_version) event = PassEvent( # TODO: Consider moving this to _parse_pass receive_timestamp=timestamp + raw_event['duration'], **pass_event_kwargs, **generic_event_kwargs) elif event_type == SB_EVENT_TYPE_SHOT: shot_event_kwargs = _parse_shot( shot_dict=raw_event['shot']) event = ShotEvent(**shot_event_kwargs, **generic_event_kwargs) # For dribble and carry the definitions # are flipped between Statsbomb and kloppy elif event_type == SB_EVENT_TYPE_DRIBBLE: take_on_event_kwargs = _parse_take_on( take_on_dict=raw_event['dribble']) event = TakeOnEvent(**take_on_event_kwargs, **generic_event_kwargs) elif event_type == SB_EVENT_TYPE_CARRY: carry_event_kwargs = _parse_carry( carry_dict=raw_event['carry'], fidelity_version=fidelity_version) event = CarryEvent( # TODO: Consider moving this to _parse_carry end_timestamp=timestamp + raw_event['duration'], **carry_event_kwargs, **generic_event_kwargs) else: event = GenericEvent(result=None, **generic_event_kwargs) if not wanted_event_types or event.event_type in wanted_event_types: events.append(event) return EventDataset(flags=DatasetFlag.BALL_OWNING_TEAM, orientation=Orientation.ACTION_EXECUTING_TEAM, pitch_dimensions=PitchDimensions( x_dim=Dimension(0, 120), y_dim=Dimension(0, 80)), periods=periods, records=events)
def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> TrackingDataSet: """ Deserialize TRACAB tracking data into a `TrackingDataSet`. Parameters ---------- inputs : dict input `raw_data` should point to a `Readable` object containing the 'csv' formatted raw data. input `meta_data` should point to the xml metadata data. options : dict Options for deserialization of the TRACAB file. Possible options are `only_alive` (boolean) to specify that only frames with alive ball state should be loaded, or `sample_rate` (float between 0 and 1) to specify the amount of frames that should be loaded. Returns ------- data_set : TrackingDataSet Raises ------ - See Also -------- Examples -------- >>> serializer = TRACABSerializer() >>> with open("metadata.xml", "rb") as meta, \ >>> open("raw.dat", "rb") as raw: >>> data_set = serializer.deserialize( >>> inputs={ >>> 'meta_data': meta, >>> 'raw_data': raw >>> }, >>> options={ >>> 'only_alive': True, >>> 'sample_rate': 1/12 >>> } >>> ) """ self.__validate_inputs(inputs) if not options: options = {} sample_rate = float(options.get('sample_rate', 1.0)) only_alive = bool(options.get('only_alive', True)) with performance_logging("Loading metadata"): match = objectify.fromstring(inputs['meta_data'].read()).match frame_rate = int(match.attrib['iFrameRateFps']) pitch_size_width = float(match.attrib['fPitchXSizeMeters']) pitch_size_height = float(match.attrib['fPitchYSizeMeters']) periods = [] for period in match.iterchildren(tag='period'): start_frame_id = int(period.attrib['iStartFrame']) end_frame_id = int(period.attrib['iEndFrame']) if start_frame_id != 0 or end_frame_id != 0: periods.append( Period(id=int(period.attrib['iId']), start_timestamp=start_frame_id / frame_rate, end_timestamp=end_frame_id / frame_rate)) with performance_logging("Loading data"): def _iter(): n = 0 sample = 1. / sample_rate for line in inputs['raw_data'].readlines(): line = line.strip().decode("ascii") if not line: continue frame_id = int(line[:10].split(":", 1)[0]) if only_alive and not line.endswith("Alive;:"): continue for period in periods: if period.contains(frame_id / frame_rate): if n % sample == 0: yield period, line n += 1 frames = [] for period, line in _iter(): frame = self._frame_from_line(period, line, frame_rate) frames.append(frame) if not period.attacking_direction_set: period.set_attacking_direction( attacking_direction=attacking_direction_from_frame( frame)) orientation = ( Orientation.FIXED_HOME_AWAY if periods[0].attacking_direction == AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME) return TrackingDataSet(flags=DataSetFlag.BALL_OWNING_TEAM | DataSetFlag.BALL_STATE, frame_rate=frame_rate, orientation=orientation, pitch_dimensions=PitchDimensions( x_dim=Dimension(-1 * pitch_size_width / 2, pitch_size_width / 2), y_dim=Dimension(-1 * pitch_size_height / 2, pitch_size_height / 2), x_per_meter=100, y_per_meter=100), periods=periods, records=frames)