def _get_tracking_dataset(self): home_team = Team(team_id="home", name="home", ground=Ground.HOME) away_team = Team(team_id="away", name="away", ground=Ground.AWAY) teams = [home_team, away_team] periods = [ Period( id=1, start_timestamp=0.0, end_timestamp=10.0, attacking_direction=AttackingDirection.HOME_AWAY, ), Period( id=2, start_timestamp=15.0, end_timestamp=25.0, attacking_direction=AttackingDirection.AWAY_HOME, ), ] metadata = Metadata( flags=~(DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE), pitch_dimensions=PitchDimensions(x_dim=Dimension(0, 100), y_dim=Dimension(-50, 50)), orientation=Orientation.HOME_TEAM, frame_rate=25, periods=periods, teams=teams, score=None, provider=None, coordinate_system=None, ) tracking_data = TrackingDataset( metadata=metadata, records=[ Frame( frame_id=1, timestamp=0.1, ball_owning_team=None, ball_state=None, period=periods[0], players_data={}, other_data=None, ball_coordinates=Point3D(x=100, y=-50, z=0), ), Frame( frame_id=2, timestamp=0.2, ball_owning_team=None, ball_state=None, period=periods[0], players_data={ Player(team=home_team, player_id="home_1", jersey_no=1): PlayerData( coordinates=Point(x=15, y=35), distance=0.03, speed=10.5, other_data={"extra_data": 1}, ) }, other_data={"extra_data": 1}, ball_coordinates=Point3D(x=0, y=50, z=1), ), ], ) return tracking_data
def deserialize( self, inputs: Dict[str, Readable], options: Dict = None ) -> EventDataset: WyscoutSerializer.__validate_inputs(inputs) if not options: options = {} wanted_event_types = [ EventType[event_type.upper()] for event_type in options.get("event_types", []) ] with performance_logging("load data", logger=logger): raw_events = json.load(inputs["event_data"]) for event in raw_events["events"]: if "eventId" not in event: event["eventId"] = event["eventName"] if "subEventId" not in event: event["subEventId"] = event.get("subEventName") periods = [] with performance_logging("parse data", logger=logger): home_team_id, away_team_id = raw_events["teams"].keys() home_team = _parse_team(raw_events, home_team_id, Ground.HOME) away_team = _parse_team(raw_events, away_team_id, Ground.AWAY) teams = {home_team_id: home_team, away_team_id: away_team} players = dict( [ (wyId, _players_to_dict(team.players)) for wyId, team in teams.items() ] ) events = [] for idx, raw_event in enumerate(raw_events["events"]): next_event = None if (idx + 1) < len(raw_events["events"]): next_event = raw_events["events"][idx + 1] team_id = str(raw_event["teamId"]) player_id = str(raw_event["playerId"]) period_id = int(raw_event["matchPeriod"].replace("H", "")) if len(periods) == 0 or periods[-1].id != period_id: periods.append( Period( id=period_id, start_timestamp=0, end_timestamp=0, ) ) generic_event_args = { "event_id": raw_event["id"], "raw_event": raw_event, "coordinates": Point( x=float(raw_event["positions"][0]["x"]), y=float(raw_event["positions"][0]["y"]), ), "team": teams[team_id], "player": players[team_id][player_id] if player_id != INVALID_PLAYER else None, "ball_owning_team": None, "ball_state": None, "period": periods[-1], "timestamp": raw_event["eventSec"], } event = None if raw_event["eventId"] == wyscout_events.SHOT.EVENT: shot_event_args = _parse_shot(raw_event, next_event) event = ShotEvent.create( **shot_event_args, **generic_event_args ) elif raw_event["eventId"] == wyscout_events.PASS.EVENT: pass_event_args = _parse_pass(raw_event, next_event) event = PassEvent.create( **pass_event_args, **generic_event_args ) elif raw_event["eventId"] == wyscout_events.FOUL.EVENT: foul_event_args = _parse_foul(raw_event) event = FoulCommittedEvent.create( **foul_event_args, **generic_event_args ) if any( (_has_tag(raw_event, tag) for tag in wyscout_tags.CARD) ): card_event_args = _parse_card(raw_event) event = CardEvent.create( **card_event_args, **generic_event_args ) elif raw_event["eventId"] == wyscout_events.INTERRUPTION.EVENT: ball_out_event_args = _parse_ball_out(raw_event) event = BallOutEvent.create( **ball_out_event_args, **generic_event_args ) elif raw_event["eventId"] == wyscout_events.FREE_KICK.EVENT: set_piece_event_args = _parse_set_piece( raw_event, next_event ) if ( raw_event["subEventId"] in wyscout_events.FREE_KICK.PASS_TYPES ): event = PassEvent.create( **set_piece_event_args, **generic_event_args ) elif ( raw_event["subEventId"] in wyscout_events.FREE_KICK.SHOT_TYPES ): event = ShotEvent.create( **set_piece_event_args, **generic_event_args ) elif ( raw_event["eventId"] == wyscout_events.OTHERS_ON_BALL.EVENT ): recovery_event_args = _parse_recovery(raw_event) event = RecoveryEvent.create( **recovery_event_args, **generic_event_args ) elif raw_event["eventId"] == wyscout_events.DUEL.EVENT: takeon_event_args = _parse_takeon(raw_event) event = TakeOnEvent.create( **takeon_event_args, **generic_event_args ) elif raw_event["eventId"] not in [ wyscout_events.SAVE.EVENT, wyscout_events.OFFSIDE.EVENT, ]: # The events SAVE and OFFSIDE are already merged with PASS and SHOT events qualifiers = _generic_qualifiers(raw_event) event = GenericEvent.create( result=None, qualifiers=qualifiers, **generic_event_args ) if event and _include_event(event, wanted_event_types): events.append(event) metadata = Metadata( teams=[home_team, away_team], periods=periods, pitch_dimensions=PitchDimensions( x_dim=Dimension(0, 100), y_dim=Dimension(0, 100) ), score=None, frame_rate=None, orientation=Orientation.BALL_OWNING_TEAM, flags=None, provider=Provider.WYSCOUT, ) return EventDataset(metadata=metadata, records=events)
def test_correct_deserialization(self, meta_data: str, raw_data: str): dataset = tracab.load( meta_data=meta_data, raw_data=raw_data, coordinates="tracab", only_alive=False, ) assert dataset.metadata.provider == Provider.TRACAB assert dataset.dataset_type == DatasetType.TRACKING assert len(dataset.records) == 6 assert len(dataset.metadata.periods) == 2 assert dataset.metadata.orientation == Orientation.FIXED_HOME_AWAY assert dataset.metadata.periods[0] == Period( id=1, start_timestamp=4.0, end_timestamp=4.08, attacking_direction=AttackingDirection.HOME_AWAY, ) assert dataset.metadata.periods[1] == Period( id=2, start_timestamp=8.0, end_timestamp=8.08, attacking_direction=AttackingDirection.AWAY_HOME, ) player_home_19 = dataset.metadata.teams[0].get_player_by_jersey_number( 19 ) assert dataset.records[0].players_data[ player_home_19 ].coordinates == Point(x=-1234.0, y=-294.0) player_away_19 = dataset.metadata.teams[1].get_player_by_jersey_number( 19 ) assert dataset.records[0].players_data[ player_away_19 ].coordinates == Point(x=8889, y=-666) assert dataset.records[0].ball_coordinates == Point3D(x=-27, y=25, z=0) assert dataset.records[0].ball_state == BallState.ALIVE assert dataset.records[0].ball_owning_team == Team( team_id="home", name="home", ground=Ground.HOME ) assert dataset.records[1].ball_owning_team == Team( team_id="away", name="away", ground=Ground.AWAY ) assert dataset.records[2].ball_state == BallState.DEAD # make sure player data is only in the frame when the player is at the pitch assert "away_1337" not in [ player.player_id for player in dataset.records[0].players_data.keys() ] assert "away_1337" in [ player.player_id for player in dataset.records[3].players_data.keys() ]
def test_correct_deserialization(self, f7_data: str, f24_data: str): dataset = opta.load(f24_data=f24_data, f7_data=f7_data, coordinates="opta") assert dataset.metadata.provider == Provider.OPTA assert dataset.dataset_type == DatasetType.EVENT assert len(dataset.events) == 20 assert len(dataset.metadata.periods) == 2 assert ( dataset.events[10].ball_owning_team == dataset.metadata.teams[1] ) # 1594254267 assert ( dataset.events[15].ball_owning_team == dataset.metadata.teams[0] ) # 2087733359 assert ( dataset.metadata.orientation == Orientation.ACTION_EXECUTING_TEAM) assert dataset.metadata.teams[0].name == "FC København" assert dataset.metadata.teams[0].ground == Ground.HOME assert dataset.metadata.teams[0].starting_formation == FormationType( "4-4-2") assert dataset.metadata.teams[1].name == "FC Nordsjælland" assert dataset.metadata.teams[1].ground == Ground.AWAY assert dataset.metadata.teams[1].starting_formation == FormationType( "4-3-3") player = dataset.metadata.teams[0].players[0] assert player.player_id == "111319" assert player.jersey_no == 21 assert str(player) == "Jesse Joronen" assert player.position.position_id == "1" assert player.position.name == "Goalkeeper" assert dataset.metadata.periods[0] == Period( id=1, start_timestamp=1537714933.608, end_timestamp=1537717701.222, attacking_direction=AttackingDirection.NOT_SET, ) assert dataset.metadata.periods[1] == Period( id=2, start_timestamp=1537718728.873, end_timestamp=1537721737.788, attacking_direction=AttackingDirection.NOT_SET, ) assert dataset.events[0].coordinates == Point(50.1, 49.4) # Check the qualifiers assert (dataset.events[0].qualifiers[0].value == SetPieceType.KICK_OFF ) # 1510681159 assert (dataset.events[6].qualifiers[0].value == BodyPart.HEAD ) # 1101592119 assert (dataset.events[5].qualifiers[0].value == PassType.CHIPPED_PASS ) # 1444075194 assert (dataset.events[19].qualifiers[0].value == CardType.RED ) # 2318695229 # Check receiver coordinates for incomplete passes assert dataset.events[6].receiver_coordinates.x == 45.5 assert dataset.events[6].receiver_coordinates.y == 68.2 # Check timestamp from qualifier in case of goal assert dataset.events[17].timestamp == 139.65200018882751 # 2318695229 # assert dataset.events[17].coordinates_y == 12 # Check Own goal assert dataset.events[18].result.value == "OWN_GOAL" # 2318697001
def deserialize(self, inputs: OptaInputs) -> EventDataset: transformer = self.get_transformer(length=100, width=100) with performance_logging("load data", logger=logger): f7_root = objectify.fromstring(inputs.f7_data.read()) f24_root = objectify.fromstring(inputs.f24_data.read()) with performance_logging("parse data", logger=logger): matchdata_path = objectify.ObjectPath( "SoccerFeed.SoccerDocument.MatchData") team_elms = list( matchdata_path.find(f7_root).iterchildren("TeamData")) home_score = None away_score = None for team_elm in team_elms: if team_elm.attrib["Side"] == "Home": home_score = team_elm.attrib["Score"] home_team = _team_from_xml_elm(team_elm, f7_root) elif team_elm.attrib["Side"] == "Away": away_score = team_elm.attrib["Score"] away_team = _team_from_xml_elm(team_elm, f7_root) else: raise DeserializationError( f"Unknown side: {team_elm.attrib['Side']}") score = Score(home=home_score, away=away_score) teams = [home_team, away_team] if len(home_team.players) == 0 or len(away_team.players) == 0: raise DeserializationError("LineUp incomplete") game_elm = f24_root.find("Game") periods = [ Period( id=1, start_timestamp=None, end_timestamp=None, ), Period( id=2, start_timestamp=None, end_timestamp=None, ), ] possession_team = None events = [] for event_elm in game_elm.iterchildren("Event"): event_id = event_elm.attrib["id"] type_id = int(event_elm.attrib["type_id"]) timestamp = _parse_f24_datetime(event_elm.attrib["timestamp"]) period_id = int(event_elm.attrib["period_id"]) for period in periods: if period.id == period_id: break else: logger.debug( f"Skipping event {event_id} because period doesn't match {period_id}" ) continue if type_id == EVENT_TYPE_START_PERIOD: logger.debug( f"Set start of period {period.id} to {timestamp}") period.start_timestamp = timestamp elif type_id == EVENT_TYPE_END_PERIOD: logger.debug( f"Set end of period {period.id} to {timestamp}") period.end_timestamp = timestamp else: if not period.start_timestamp: # not started yet continue if event_elm.attrib["team_id"] == home_team.team_id: team = teams[0] elif event_elm.attrib["team_id"] == away_team.team_id: team = teams[1] else: raise DeserializationError( f"Unknown team_id {event_elm.attrib['team_id']}") x = float(event_elm.attrib["x"]) y = float(event_elm.attrib["y"]) outcome = int(event_elm.attrib["outcome"]) raw_qualifiers = { int(qualifier_elm.attrib["qualifier_id"]): qualifier_elm.attrib.get("value") for qualifier_elm in event_elm.iterchildren("Q") } player = None if "player_id" in event_elm.attrib: player = team.get_player_by_id( event_elm.attrib["player_id"]) if type_id in BALL_OWNING_EVENTS: possession_team = team generic_event_kwargs = dict( # from DataRecord period=period, timestamp=timestamp - period.start_timestamp, ball_owning_team=possession_team, ball_state=BallState.ALIVE, # from Event event_id=event_id, team=team, player=player, coordinates=Point(x=x, y=y), raw_event=event_elm, ) if type_id == EVENT_TYPE_PASS: pass_event_kwargs = _parse_pass( raw_qualifiers, outcome) event = PassEvent.create( **pass_event_kwargs, **generic_event_kwargs, ) elif type_id == EVENT_TYPE_OFFSIDE_PASS: pass_event_kwargs = _parse_offside_pass(raw_qualifiers) event = PassEvent.create( **pass_event_kwargs, **generic_event_kwargs, ) elif type_id == EVENT_TYPE_TAKE_ON: take_on_event_kwargs = _parse_take_on(outcome) event = TakeOnEvent.create( qualifiers=None, **take_on_event_kwargs, **generic_event_kwargs, ) elif type_id in ( EVENT_TYPE_SHOT_MISS, EVENT_TYPE_SHOT_POST, EVENT_TYPE_SHOT_SAVED, EVENT_TYPE_SHOT_GOAL, ): if type_id == EVENT_TYPE_SHOT_GOAL: if 374 in raw_qualifiers.keys(): generic_event_kwargs["timestamp"] = ( _parse_f24_datetime( raw_qualifiers.get(374).replace( " ", "T")) - period.start_timestamp) shot_event_kwargs = _parse_shot( raw_qualifiers, type_id, coordinates=generic_event_kwargs["coordinates"], ) kwargs = {} kwargs.update(generic_event_kwargs) kwargs.update(shot_event_kwargs) event = ShotEvent.create(**kwargs) elif type_id == EVENT_TYPE_RECOVERY: event = RecoveryEvent.create( result=None, qualifiers=None, **generic_event_kwargs, ) elif type_id == EVENT_TYPE_FOUL_COMMITTED: event = FoulCommittedEvent.create( result=None, qualifiers=None, **generic_event_kwargs, ) elif type_id in BALL_OUT_EVENTS: generic_event_kwargs["ball_state"] = BallState.DEAD event = BallOutEvent.create( result=None, qualifiers=None, **generic_event_kwargs, ) elif type_id == EVENT_TYPE_FORMATION_CHANGE: formation_change_event_kwargs = ( _parse_formation_change(raw_qualifiers)) event = FormationChangeEvent.create( result=None, qualifiers=None, **formation_change_event_kwargs, **generic_event_kwargs, ) elif type_id == EVENT_TYPE_CARD: generic_event_kwargs["ball_state"] = BallState.DEAD card_event_kwargs = _parse_card(raw_qualifiers) event = CardEvent.create( **card_event_kwargs, **generic_event_kwargs, ) else: event = GenericEvent.create( **generic_event_kwargs, result=None, qualifiers=None, event_name=_get_event_type_name(type_id), ) if self.should_include_event(event): events.append(transformer.transform_event(event)) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=transformer.get_to_coordinate_system(). pitch_dimensions, score=score, frame_rate=None, orientation=Orientation.ACTION_EXECUTING_TEAM, flags=DatasetFlag.BALL_OWNING_TEAM, provider=Provider.OPTA, coordinate_system=transformer.get_to_coordinate_system(), ) return EventDataset( metadata=metadata, records=events, )
def test_correct_deserialization(self, lineup_data: str, event_data: str): """ This test uses data from the StatsBomb open data project. """ dataset = statsbomb.load( lineup_data=lineup_data, event_data=event_data, coordinates="statsbomb", ) assert dataset.metadata.provider == Provider.STATSBOMB assert dataset.dataset_type == DatasetType.EVENT assert len(dataset.events) == 4023 assert len(dataset.metadata.periods) == 2 assert ( dataset.metadata.orientation == Orientation.ACTION_EXECUTING_TEAM) assert dataset.metadata.teams[0].name == "Barcelona" assert dataset.metadata.teams[1].name == "Deportivo Alavés" assert dataset.metadata.teams[0].starting_formation == FormationType( "4-4-2") assert dataset.metadata.teams[1].starting_formation == FormationType( "4-1-4-1") player = dataset.metadata.teams[0].get_player_by_id("5503") assert player.player_id == "5503" assert player.jersey_no == 10 assert str(player) == "Lionel Andrés Messi Cuccittini" assert player.position is None # not set assert player.starting sub_player = dataset.metadata.teams[0].get_player_by_id("3501") assert str(sub_player) == "Philippe Coutinho Correia" assert not sub_player.starting assert dataset.metadata.periods[0] == Period( id=1, start_timestamp=0.0, end_timestamp=2705.267, attacking_direction=AttackingDirection.NOT_SET, ) assert dataset.metadata.periods[1] == Period( id=2, start_timestamp=2705.268, end_timestamp=5557.321, attacking_direction=AttackingDirection.NOT_SET, ) assert dataset.events[10].coordinates == Point(34.5, 20.5) assert (dataset.events[792].get_qualifier_value(BodyPartQualifier) == BodyPart.HEAD) assert (dataset.events[2232].get_qualifier_value(BodyPartQualifier) == BodyPart.RIGHT_FOOT) assert (dataset.events[195].get_qualifier_value(BodyPartQualifier) is None) assert (dataset.events[1433].get_qualifier_value(PassQualifier) == PassType.CROSS) assert (dataset.events[1552].get_qualifier_value(PassQualifier) == PassType.THROUGH_BALL) assert (dataset.events[443].get_qualifier_value(PassQualifier) == PassType.SWITCH_OF_PLAY) assert (dataset.events[3438].get_qualifier_value(PassQualifier) == PassType.LONG_BALL) assert (dataset.events[2266].get_qualifier_value(PassQualifier) == PassType.HIGH_PASS) assert (dataset.events[653].get_qualifier_value(PassQualifier) == PassType.HEAD_PASS) assert (dataset.events[3134].get_qualifier_value(PassQualifier) == PassType.HAND_PASS) assert (dataset.events[3611].get_qualifier_value(PassQualifier) == PassType.ASSIST) assert dataset.events[3392].get_qualifier_value(PassQualifier) is None
def test_correct_deserialization(self): base_dir = os.path.dirname(__file__) serializer = TRACABSerializer() with open(f"{base_dir}/files/tracab_meta.xml", "rb") as metadata, open(f"{base_dir}/files/tracab_raw.dat", "rb") as raw_data: dataset = serializer.deserialize( inputs={ "metadata": metadata, "raw_data": raw_data }, options={"only_alive": False}, ) assert dataset.metadata.provider == Provider.TRACAB assert len(dataset.records) == 6 assert len(dataset.metadata.periods) == 2 assert dataset.metadata.orientation == Orientation.FIXED_HOME_AWAY assert dataset.metadata.periods[0] == Period( id=1, start_timestamp=4.0, end_timestamp=4.08, attacking_direction=AttackingDirection.HOME_AWAY, ) assert dataset.metadata.periods[1] == Period( id=2, start_timestamp=8.0, end_timestamp=8.08, attacking_direction=AttackingDirection.AWAY_HOME, ) player_home_19 = dataset.metadata.teams[0].get_player_by_jersey_number( "19") assert dataset.records[0].players_coordinates[player_home_19] == Point( x=-1234.0, y=-294.0) player_away_19 = dataset.metadata.teams[1].get_player_by_jersey_number( "19") assert dataset.records[0].players_coordinates[player_away_19] == Point( x=8889, y=-666) assert dataset.records[0].ball_coordinates == Point(x=-27, y=25) assert dataset.records[0].ball_state == BallState.ALIVE assert dataset.records[0].ball_owning_team == Team(team_id="home", name="home", ground=Ground.HOME) assert dataset.records[1].ball_owning_team == Team(team_id="away", name="away", ground=Ground.AWAY) assert dataset.records[2].ball_state == BallState.DEAD # make sure player data is only in the frame when the player is at the pitch assert "away_1337" not in [ player.player_id for player in dataset.records[0].players_coordinates.keys() ] assert "away_1337" in [ player.player_id for player in dataset.records[3].players_coordinates.keys() ]
def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> EventDataSet: self.__validate_inputs(inputs) periods = [] period = None events = [] game_state = self.__GameState(ball_state=BallState.DEAD, ball_owning_team=None) reader = csv.DictReader( map(lambda x: x.decode('utf-8'), inputs['raw_data'])) for event_id, record in enumerate(reader): event_type = event_type_map[record['Type']] subtypes = record['Subtype'].split('-') start_timestamp = float(record['Start Time [s]']) end_timestamp = float(record['End Time [s]']) period_id = int(record['Period']) if not period or period.id != period_id: period = Period(id=period_id, start_timestamp=start_timestamp, end_timestamp=end_timestamp) periods.append(period) else: period.end_timestamp = end_timestamp if record['Team'] == 'Home': team = Team.HOME elif record['Team'] == 'Away': team = Team.AWAY else: raise ValueError(f'Unknown team: {record["team"]}') event_kwargs = dict( # From DataRecord: timestamp=start_timestamp, ball_owning_team=None, ## todo ball_state=None, # todo period=period, # From Event: event_id=event_id, team=team, end_timestamp=end_timestamp, player_jersey_no=record['From'][6:], position=Point(x=float(record['Start X']), y=1 - float(record['Start Y'])) if record['Start X'] != 'NaN' else None, ) secondary_position = None if record['End X'] != 'NaN': secondary_position = Point(x=float(record['End X']), y=1 - float(record['End Y'])) secondary_jersey_no = None if record['To']: secondary_jersey_no = record['To'][6:] event = None if event_type == EventType.SET_PIECE: set_piece, fk_attempt, retaken = \ build_subtypes(subtypes, [SetPiece, FKAttempt, Retaken]) event = SetPieceEvent(**event_kwargs) elif event_type == EventType.RECOVERY: interference1, interference2 = \ build_subtypes(subtypes, [Interference1, Interference2]) event = RecoveryEvent(**event_kwargs) elif event_type == EventType.PASS: body_part, attempt, deflection, offside = \ build_subtypes(subtypes, [BodyPart, Attempt, Deflection, Offside]) event = PassEvent( receiver_position=secondary_position, receiver_player_jersey_no=secondary_jersey_no, **event_kwargs) elif event_type == EventType.BALL_LOST: body_part, attempt, interference1, intervention, deflection, offside = \ build_subtypes(subtypes, [ BodyPart, Attempt, Interference1, Intervention, Deflection, Offside ]) event = BallLossEvent(**event_kwargs) elif event_type == EventType.BALL_OUT: body_part, attempt, intervention, deflection, offside, own_goal = \ build_subtypes(subtypes, [ BodyPart, Attempt, Intervention, Deflection, Offside, OwnGoal ]) event = BallOutEvent(**event_kwargs) elif event_type == EventType.SHOT: body_part, deflection, shot_direction, shot_result, offside = \ build_subtypes(subtypes, [ BodyPart, Deflection, ShotDirection, ShotResult, Offside ]) event = ShotEvent(shot_result=shot_result, **event_kwargs) elif event_type == EventType.FAULT_RECEIVED: event = FaultReceivedEvent(**event_kwargs) elif event_type == EventType.CHALLENGE: challenge, fault, challenge_result = \ build_subtypes(subtypes, [Challenge, Fault, ChallengeResult]) event = ChallengeEvent(**event_kwargs) elif event_type == EventType.CARD: card, = build_subtypes(subtypes, [Card]) event = CardEvent(**event_kwargs) else: raise NotImplementedError( f"EventType {event_type} not implemented") # We want to attach the game_state after the event to the event game_state = self.__reduce_game_state(event=event, game_state=game_state) event.ball_state = game_state.ball_state event.ball_owning_team = game_state.ball_owning_team events.append(event) orientation = ( Orientation.FIXED_HOME_AWAY if periods[0].attacking_direction == AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME) return EventDataSet( flags=DataSetFlag.BALL_STATE | DataSetFlag.BALL_OWNING_TEAM, orientation=orientation, pitch_dimensions=PitchDimensions(x_dim=Dimension(0, 1), y_dim=Dimension(0, 1)), periods=periods, records=events)
def __create_iterator( self, data: Readable, sample_rate: float, frame_rate: int, ground: Ground, ) -> Iterator: """ Notes: 1. the y-axis is flipped because Metrica use (y, -y) instead of (-y, y) """ team = None frame_idx = 0 frame_sample = 1 / sample_rate player_jersey_numbers = [] period = None for i, line in enumerate(data): line = line.strip().decode("ascii") columns = line.split(",") if i == 0: team_name = columns[3] team = Team(team_id=str(ground), name=team_name, ground=ground) elif i == 1: player_jersey_numbers = columns[3:-2:2] players = [ Player( player_id=f"{team.ground}_{jersey_number}", jersey_no=int(jersey_number), team=team, ) for jersey_number in player_jersey_numbers ] team.players = players elif i == 2: # consider doing some validation on the columns pass else: period_id = int(columns[0]) frame_id = int(columns[1]) if period is None or period.id != period_id: period = Period( id=period_id, start_timestamp=frame_id / frame_rate, end_timestamp=frame_id / frame_rate, ) else: # consider not update this every frame for performance reasons period.end_timestamp = frame_id / frame_rate if frame_idx % frame_sample == 0: yield self.__PartialFrame( team=team, period=period, frame_id=frame_id, players_coordinates={ player: Point( x=float(columns[3 + i * 2]), y=1 - float(columns[3 + i * 2 + 1]), ) for i, player in enumerate(players) if columns[3 + i * 2] != "NaN" }, ball_coordinates=Point(x=float(columns[-2]), y=1 - float(columns[-1])) if columns[-2] != "NaN" else None, ) frame_idx += 1
def test_correct_deserialization(self): raw_data_home = BytesIO(b""",,,Home,,Home,,Home,,Home,,Home,,Home,,Home,,Home,,Home,,Home,,Home,,Home,,Home,,Home,,, ,,,11,,1,,2,,3,,4,,5,,6,,7,,8,,9,,10,,12,,13,,14,,, Period,Frame,Time [s],Player11,,Player1,,Player2,,Player3,,Player4,,Player5,,Player6,,Player7,,Player8,,Player9,,Player10,,Player12,,Player13,,Player14,,Ball, 1,1,0.04,0.00082,0.48238,0.32648,0.65322,0.33701,0.48863,0.30927,0.35529,0.32137,0.21262,0.41094,0.72589,0.41698,0.47843,0.39125,0.3255,0.45388,0.21174,0.52697,0.3798,0.55243,0.43269,NaN,NaN,NaN,NaN,NaN,NaN,0.45472,0.38709 1,2,0.08,0.00096,0.48238,0.32648,0.65322,0.33701,0.48863,0.30927,0.35529,0.32137,0.21262,0.41094,0.72589,0.41698,0.47843,0.39125,0.3255,0.45388,0.21174,0.52697,0.3798,0.55243,0.43269,NaN,NaN,NaN,NaN,NaN,NaN,0.49645,0.40656 1,3,0.12,0.00114,0.48238,0.32648,0.65322,0.33701,0.48863,0.30927,0.35529,0.32137,0.21262,0.41094,0.72589,0.41698,0.47843,0.39125,0.3255,0.45388,0.21174,0.52697,0.3798,0.55243,0.43269,NaN,NaN,NaN,NaN,NaN,NaN,0.53716,0.42556 2,145004,5800.16,0.90492,0.45355,NaN,NaN,0.34089,0.64569,0.31214,0.67501,0.11428,0.92765,0.25757,0.60019,NaN,NaN,0.37398,0.62446,0.17401,0.83396,0.1667,0.76677,NaN,NaN,0.30044,0.68311,0.33637,0.65366,0.34089,0.64569,NaN,NaN 2,145005,5800.2,0.90456,0.45356,NaN,NaN,0.34056,0.64552,0.31171,0.67468,0.11428,0.92765,0.25721,0.60089,NaN,NaN,0.37398,0.62446,0.17358,0.8343,0.16638,0.76665,NaN,NaN,0.30044,0.68311,0.33615,0.65317,0.34056,0.64552,NaN,NaN 2,145006,5800.24,0.90456,0.45356,NaN,NaN,0.33996,0.64544,0.31122,0.67532,0.11428,0.92765,0.25659,0.60072,NaN,NaN,0.37398,0.62446,0.17327,0.8346,0.1659,0.76555,NaN,NaN,0.30044,0.68311,0.33563,0.65166,0.33996,0.64544,NaN,NaN""") raw_data_away = BytesIO(b""",,,Away,,Away,,Away,,Away,,Away,,Away,,Away,,Away,,Away,,Away,,Away,,Away,,Away,,Away,,Away, ,,,25,,15,,16,,17,,18,,19,,20,,21,,22,,23,,24,,26,,27,,28,,, Period,Frame,Time [s],Player25,,Player15,,Player16,,Player17,,Player18,,Player19,,Player20,,Player21,,Player22,,Player23,,Player24,,Player26,,Player27,,Player28,,Ball, 1,1,0.04,0.90509,0.47462,0.58393,0.20794,0.67658,0.4671,0.6731,0.76476,0.40783,0.61525,0.45472,0.38709,0.5596,0.67775,0.55243,0.43269,0.50067,0.94322,0.43693,0.05002,0.37833,0.27383,NaN,NaN,NaN,NaN,NaN,NaN,0.45472,0.38709 1,2,0.08,0.90494,0.47462,0.58393,0.20794,0.67658,0.4671,0.6731,0.76476,0.40783,0.61525,0.45472,0.38709,0.5596,0.67775,0.55243,0.43269,0.50067,0.94322,0.43693,0.05002,0.37833,0.27383,NaN,NaN,NaN,NaN,NaN,NaN,0.49645,0.40656 1,3,0.12,0.90434,0.47463,0.58393,0.20794,0.67658,0.4671,0.6731,0.76476,0.40783,0.61525,0.45472,0.38709,0.5596,0.67775,0.55243,0.43269,0.50067,0.94322,0.43693,0.05002,0.37833,0.27383,NaN,NaN,NaN,NaN,NaN,NaN,0.53716,0.42556 2,145004,5800.16,0.12564,0.55386,0.17792,0.56682,0.25757,0.60019,0.0988,0.92391,0.21235,0.77391,NaN,NaN,0.14926,0.56204,0.10285,0.81944,NaN,NaN,0.29331,0.488,NaN,NaN,0.35561,0.55254,0.19805,0.452,0.21798,0.81079,NaN,NaN 2,145005,5800.2,0.12564,0.55386,0.1773,0.56621,0.25721,0.60089,0.0988,0.92391,0.21235,0.77391,NaN,NaN,0.14857,0.56068,0.10231,0.81944,NaN,NaN,0.29272,0.48789,NaN,NaN,0.35532,0.55243,0.19766,0.45237,0.21798,0.81079,NaN,NaN 2,145006,5800.24,0.12564,0.55386,0.17693,0.56675,0.25659,0.60072,0.0988,0.92391,0.21235,0.77391,NaN,NaN,0.14846,0.56017,0.10187,0.8198,NaN,NaN,0.29267,0.48903,NaN,NaN,0.35495,0.55364,0.19754,0.45364,0.21798,0.81079,NaN,NaN""") serializer = MetricaTrackingSerializer() data_set = serializer.deserialize( inputs={ 'raw_data_home': raw_data_home, 'raw_data_away': raw_data_away } ) assert len(data_set.records) == 6 assert len(data_set.periods) == 2 assert data_set.orientation == Orientation.FIXED_HOME_AWAY assert data_set.periods[0] == Period(id=1, start_timestamp=0.04, end_timestamp=0.12, attacking_direction=AttackingDirection.HOME_AWAY) assert data_set.periods[1] == Period(id=2, start_timestamp=5800.16, end_timestamp=5800.24, attacking_direction=AttackingDirection.AWAY_HOME) # make sure data is loaded correctly (including flip y-axis) assert data_set.records[0].home_team_player_positions['11'] == Point(x=0.00082, y=1 - 0.48238) assert data_set.records[0].away_team_player_positions['25'] == Point(x=0.90509, y=1 - 0.47462) assert data_set.records[0].ball_position == Point(x=0.45472, y=1 - 0.38709) # make sure player data is only in the frame when the player is at the pitch assert '14' not in data_set.records[0].home_team_player_positions assert '14' in data_set.records[3].home_team_player_positions # # class TestMetricaEvent: # def test_correct_deserialization(self): # raw_data = BytesIO(b"""Team,Type,Subtype,Period,Start Frame,Start Time [s],End Frame,End Time [s],From,To,Start X,Start Y,End X,End Y # Away,SET PIECE,KICK OFF,1,1,0.04,0,0,Player19,,NaN,NaN,NaN,NaN # Away,PASS,,1,1,0.04,3,0.12,Player19,Player21,0.45,0.39,0.55,0.43 # Away,PASS,,1,3,0.12,17,0.68,Player21,Player15,0.55,0.43,0.58,0.21 # Away,PASS,,1,45,1.8,61,2.44,Player15,Player19,0.55,0.19,0.45,0.31 # Away,PASS,,1,77,3.08,96,3.84,Player19,Player21,0.45,0.32,0.49,0.47 # Away,PASS,,1,191,7.64,217,8.68,Player21,Player22,0.4,0.73,0.32,0.98 # Away,PASS,,1,279,11.16,303,12.12,Player22,Player17,0.39,0.96,0.49,0.98 # Away,BALL LOST,INTERCEPTION,1,346,13.84,380,15.2,Player17,,0.51,0.97,0.27,0.75 # Home,RECOVERY,INTERCEPTION,1,378,15.12,378,15.12,Player2,,0.27,0.78,NaN,NaN # Home,BALL LOST,INTERCEPTION,1,378,15.12,452,18.08,Player2,,0.27,0.78,0.59,0.64 # Away,RECOVERY,INTERCEPTION,1,453,18.12,453,18.12,Player16,,0.57,0.67,NaN,NaN # Away,BALL LOST,HEAD-INTERCEPTION,1,453,18.12,497,19.88,Player16,,0.57,0.67,0.33,0.65 # Away,CHALLENGE,AERIAL-LOST,1,497,19.88,497,19.88,Player18,,0.38,0.67,NaN,NaN # Home,CHALLENGE,AERIAL-WON,1,498,19.92,498,19.92,Player2,,0.36,0.67,NaN,NaN # Home,RECOVERY,INTERCEPTION,1,498,19.92,498,19.92,Player2,,0.36,0.67,NaN,NaN # Home,PASS,HEAD,1,498,19.92,536,21.44,Player2,Player9,0.36,0.67,0.53,0.59 # Home,PASS,,1,536,21.44,556,22.24,Player9,Player10,0.53,0.59,0.5,0.65 # Home,BALL LOST,INTERCEPTION,1,572,22.88,616,24.64,Player10,,0.5,0.65,0.67,0.44 # Away,RECOVERY,INTERCEPTION,1,618,24.72,618,24.72,Player16,,0.64,0.46,NaN,NaN # Away,PASS,,1,763,30.52,784,31.36,Player16,Player19,0.58,0.27,0.51,0.33 # Away,PASS,,1,784,31.36,804,32.16,Player19,Player20,0.51,0.33,0.57,0.47 # Away,PASS,,1,834,33.36,881,35.24,Player20,Player22,0.53,0.53,0.44,0.92 # Away,PASS,,1,976,39.04,1010,40.4,Player22,Player17,0.36,0.96,0.48,0.86 # Away,BALL LOST,INTERCEPTION,1,1110,44.4,1134,45.36,Player17,,0.42,0.79,0.31,0.84 # Home,RECOVERY,INTERCEPTION,1,1134,45.36,1134,45.36,Player5,,0.32,0.89,NaN,NaN # Home,PASS,HEAD,1,1134,45.36,1154,46.16,Player5,Player6,0.32,0.89,0.31,0.78 # Home,PASS,,1,1154,46.16,1177,47.08,Player6,Player10,0.31,0.78,0.41,0.74 # Home,PASS,,1,1226,49.04,1266,50.64,Player10,Player8,0.46,0.68,0.56,0.34 # Home,BALL LOST,INTERCEPTION,1,1370,54.8,1375,55,Player8,,0.86,0.26,0.88,0.28 # Away,RECOVERY,INTERCEPTION,1,1374,54.96,1374,54.96,Player15,,0.87,0.29,NaN,NaN # Away,BALL OUT,,1,1374,54.96,1425,57,Player15,,0.87,0.29,1.05,0.17 # Home,SET PIECE,CORNER KICK,1,2143,85.72,2143,85.72,Player6,,NaN,NaN,NaN,NaN # Home,PASS,,1,2143,85.72,2184,87.36,Player6,Player10,1,0.01,0.9,0.09 # Home,PASS,CROSS,1,2263,90.52,2289,91.56,Player10,Player9,0.89,0.14,0.92,0.47 # Home,SHOT,HEAD-ON TARGET-GOAL,1,2289,91.56,2309,92.36,Player9,,0.92,0.47,1.01,0.55 # Away,SET PIECE,KICK OFF,1,3675,147,3675,147,Player19,,NaN,NaN,NaN,NaN # Away,PASS,,1,3675,147,3703,148.12,Player19,Player21,0.49,0.5,0.58,0.52""") # # serializer = MetricaEventSerializer() # serializer.deserialize( # inputs={ # 'raw_data': raw_data # } # )
def _get_frame_data( cls, teams, teamdict, players, player_id_to_team_dict, periods, player_dict, anon_players, ball_id, referee_dict, frame, ): frame_period = frame["period"] frame_id = frame["frame"] frame_time = cls._timestamp_from_timestring(frame["time"]) ball_coordinates = None players_coordinates = {} # ball_carrier = frame["possession"].get("trackable_object") ball_owning_team = frame["possession"].get("group") if ball_owning_team == "home team": ball_owning_team = teams[0] elif ball_owning_team == "away team": ball_owning_team = teams[1] else: ball_owning_team = None for frame_record in frame["data"]: # containing x, y, trackable_object, track_id, group_name x = frame_record.get("x") y = frame_record.get("y") trackable_object = frame_record.get("trackable_object", None) track_id = frame_record.get("track_id", None) group_name = frame_record.get("group_name", None) if trackable_object == ball_id: group_name = "ball" z = frame_record.get("z") ball_coordinates = Point(x=float(x), y=float(y)) continue elif trackable_object in referee_dict.keys(): group_name = "referee" continue # Skip Referee Coords if group_name is None: group_name = teamdict.get( player_id_to_team_dict.get(trackable_object)) if group_name == "home_team": player = players["HOME"][trackable_object] elif group_name == "away_team": player = players["AWAY"][trackable_object] if trackable_object is None: player_id = str(track_id) if group_name == "home team": if f"anon_{player_id}" not in anon_players["HOME"].keys(): player = cls.__create_anon_player( cls, teams, frame_record) anon_players["HOME"][f"anon_home_{player_id}"] = player else: player = anon_players["HOME"][f"anon_home_{player_id}"] elif group_name == "away team": if f"anon_{player_id}" not in anon_players["AWAY"].keys(): player = cls.__create_anon_player( cls, teams, frame_record) anon_players["AWAY"][f"anon_away_{player_id}"] = player else: player = anon_players["AWAY"][f"anon_away_{player_id}"] point = Point(x, y) players_coordinates[player] = point return Frame( frame_id=frame_id, timestamp=frame_time, ball_coordinates=ball_coordinates, players_coordinates=players_coordinates, period=periods[frame_period], ball_state=None, ball_owning_team=ball_owning_team, )
def test_correct_deserialization(self): base_dir = os.path.dirname(__file__) serializer = MetricaTrackingSerializer() with open(f'{base_dir}/files/metrica_home.csv', 'rb') as raw_data_home, \ open(f'{base_dir}/files/metrica_away.csv', 'rb') as raw_data_away: dataset = serializer.deserialize(inputs={ 'raw_data_home': raw_data_home, 'raw_data_away': raw_data_away }) assert len(dataset.records) == 6 assert len(dataset.periods) == 2 assert dataset.orientation == Orientation.FIXED_HOME_AWAY assert dataset.periods[0] == Period( id=1, start_timestamp=0.04, end_timestamp=0.12, attacking_direction=AttackingDirection.HOME_AWAY) assert dataset.periods[1] == Period( id=2, start_timestamp=5800.16, end_timestamp=5800.24, attacking_direction=AttackingDirection.AWAY_HOME) # make sure data is loaded correctly (including flip y-axis) assert dataset.records[0].home_team_player_positions['11'] == Point( x=0.00082, y=1 - 0.48238) assert dataset.records[0].away_team_player_positions['25'] == Point( x=0.90509, y=1 - 0.47462) assert dataset.records[0].ball_position == Point(x=0.45472, y=1 - 0.38709) # make sure player data is only in the frame when the player is at the pitch assert '14' not in dataset.records[0].home_team_player_positions assert '14' in dataset.records[3].home_team_player_positions # # class TestMetricaEvent: # def test_correct_deserialization(self): # raw_data = BytesIO(b"""Team,Type,Subtype,Period,Start Frame,Start Time [s],End Frame,End Time [s],From,To,Start X,Start Y,End X,End Y # Away,SET PIECE,KICK OFF,1,1,0.04,0,0,Player19,,NaN,NaN,NaN,NaN # Away,PASS,,1,1,0.04,3,0.12,Player19,Player21,0.45,0.39,0.55,0.43 # Away,PASS,,1,3,0.12,17,0.68,Player21,Player15,0.55,0.43,0.58,0.21 # Away,PASS,,1,45,1.8,61,2.44,Player15,Player19,0.55,0.19,0.45,0.31 # Away,PASS,,1,77,3.08,96,3.84,Player19,Player21,0.45,0.32,0.49,0.47 # Away,PASS,,1,191,7.64,217,8.68,Player21,Player22,0.4,0.73,0.32,0.98 # Away,PASS,,1,279,11.16,303,12.12,Player22,Player17,0.39,0.96,0.49,0.98 # Away,BALL LOST,INTERCEPTION,1,346,13.84,380,15.2,Player17,,0.51,0.97,0.27,0.75 # Home,RECOVERY,INTERCEPTION,1,378,15.12,378,15.12,Player2,,0.27,0.78,NaN,NaN # Home,BALL LOST,INTERCEPTION,1,378,15.12,452,18.08,Player2,,0.27,0.78,0.59,0.64 # Away,RECOVERY,INTERCEPTION,1,453,18.12,453,18.12,Player16,,0.57,0.67,NaN,NaN # Away,BALL LOST,HEAD-INTERCEPTION,1,453,18.12,497,19.88,Player16,,0.57,0.67,0.33,0.65 # Away,CHALLENGE,AERIAL-LOST,1,497,19.88,497,19.88,Player18,,0.38,0.67,NaN,NaN # Home,CHALLENGE,AERIAL-WON,1,498,19.92,498,19.92,Player2,,0.36,0.67,NaN,NaN # Home,RECOVERY,INTERCEPTION,1,498,19.92,498,19.92,Player2,,0.36,0.67,NaN,NaN # Home,PASS,HEAD,1,498,19.92,536,21.44,Player2,Player9,0.36,0.67,0.53,0.59 # Home,PASS,,1,536,21.44,556,22.24,Player9,Player10,0.53,0.59,0.5,0.65 # Home,BALL LOST,INTERCEPTION,1,572,22.88,616,24.64,Player10,,0.5,0.65,0.67,0.44 # Away,RECOVERY,INTERCEPTION,1,618,24.72,618,24.72,Player16,,0.64,0.46,NaN,NaN # Away,PASS,,1,763,30.52,784,31.36,Player16,Player19,0.58,0.27,0.51,0.33 # Away,PASS,,1,784,31.36,804,32.16,Player19,Player20,0.51,0.33,0.57,0.47 # Away,PASS,,1,834,33.36,881,35.24,Player20,Player22,0.53,0.53,0.44,0.92 # Away,PASS,,1,976,39.04,1010,40.4,Player22,Player17,0.36,0.96,0.48,0.86 # Away,BALL LOST,INTERCEPTION,1,1110,44.4,1134,45.36,Player17,,0.42,0.79,0.31,0.84 # Home,RECOVERY,INTERCEPTION,1,1134,45.36,1134,45.36,Player5,,0.32,0.89,NaN,NaN # Home,PASS,HEAD,1,1134,45.36,1154,46.16,Player5,Player6,0.32,0.89,0.31,0.78 # Home,PASS,,1,1154,46.16,1177,47.08,Player6,Player10,0.31,0.78,0.41,0.74 # Home,PASS,,1,1226,49.04,1266,50.64,Player10,Player8,0.46,0.68,0.56,0.34 # Home,BALL LOST,INTERCEPTION,1,1370,54.8,1375,55,Player8,,0.86,0.26,0.88,0.28 # Away,RECOVERY,INTERCEPTION,1,1374,54.96,1374,54.96,Player15,,0.87,0.29,NaN,NaN # Away,BALL OUT,,1,1374,54.96,1425,57,Player15,,0.87,0.29,1.05,0.17 # Home,SET PIECE,CORNER KICK,1,2143,85.72,2143,85.72,Player6,,NaN,NaN,NaN,NaN # Home,PASS,,1,2143,85.72,2184,87.36,Player6,Player10,1,0.01,0.9,0.09 # Home,PASS,CROSS,1,2263,90.52,2289,91.56,Player10,Player9,0.89,0.14,0.92,0.47 # Home,SHOT,HEAD-ON TARGET-GOAL,1,2289,91.56,2309,92.36,Player9,,0.92,0.47,1.01,0.55 # Away,SET PIECE,KICK OFF,1,3675,147,3675,147,Player19,,NaN,NaN,NaN,NaN # Away,PASS,,1,3675,147,3703,148.12,Player19,Player21,0.49,0.5,0.58,0.52""") # # serializer = MetricaEventSerializer() # serializer.deserialize( # inputs={ # 'raw_data': raw_data # } # )
def test_correct_deserialization(self, raw_data: str, meta_data: str): dataset = skillcorner.load( meta_data=meta_data, raw_data=raw_data, coordinates="skillcorner" ) assert dataset.metadata.provider == Provider.SKILLCORNER assert dataset.dataset_type == DatasetType.TRACKING assert len(dataset.records) == 34783 assert len(dataset.metadata.periods) == 2 assert dataset.metadata.orientation == Orientation.AWAY_TEAM assert dataset.metadata.periods[1] == Period( id=1, start_timestamp=0.0, end_timestamp=2753.3, attacking_direction=AttackingDirection.AWAY_HOME, ) assert dataset.metadata.periods[2] == Period( id=2, start_timestamp=2700.0, end_timestamp=5509.7, attacking_direction=AttackingDirection.HOME_AWAY, ) # are frames with wrong camera views and pregame skipped? assert dataset.records[0].timestamp == 11.2 # make sure data is loaded correctly home_player = dataset.metadata.teams[0].players[2] assert dataset.records[0].players_data[ home_player ].coordinates == Point(x=33.8697315398, y=-9.55742259253) away_player = dataset.metadata.teams[1].players[9] assert dataset.records[0].players_data[ away_player ].coordinates == Point(x=25.9863082795, y=27.3013598578) assert dataset.records[1].ball_coordinates == Point3D( x=30.5914728131, y=35.3622277834, z=2.24371228757 ) # check that missing ball-z_coordinate is identified as None assert dataset.records[38].ball_coordinates == Point3D( x=11.6568802848, y=24.7214038909, z=None ) # check that 'ball_z' column is included in to_pandas dataframe # frame = _frame_to_pandas_row_converter(dataset.records[38]) # assert "ball_z" in frame.keys() # make sure player data is only in the frame when the player is in view assert "home_1" not in [ player.player_id for player in dataset.records[0].players_data.keys() ] assert "away_1" not in [ player.player_id for player in dataset.records[0].players_data.keys() ] # are anonymous players loaded correctly? home_anon_75 = [ player for player in dataset.records[87].players_data if player.player_id == "home_anon_75" ] assert home_anon_75 == [ player for player in dataset.records[88].players_data if player.player_id == "home_anon_75" ] # is pitch dimension set correctly? pitch_dimensions = dataset.metadata.pitch_dimensions assert pitch_dimensions.x_dim.min == -52.5 assert pitch_dimensions.x_dim.max == 52.5 assert pitch_dimensions.y_dim.min == -34 assert pitch_dimensions.y_dim.max == 34
def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> EventDataset: """ Deserialize Opta event data into a `EventDataset`. Parameters ---------- inputs : dict input `f24_data` should point to a `Readable` object containing the 'xml' formatted event data. input `f7_data` should point to a `Readable` object containing the 'xml' formatted f7 data. options : dict Options for deserialization of the Opta file. Possible options are `event_types` (list of event types) to specify the event types that should be returned. Valid types: "shot", "pass", "carry", "take_on" and "generic". Generic is everything other than the first 4. Those events are barely parsed. This type of event can be used to do the parsing yourself. Every event has a 'raw_event' attribute which contains the original dictionary. Returns ------- dataset : EventDataset Raises ------ See Also -------- Examples -------- >>> serializer = OptaSerializer() >>> with open("123_f24.xml", "rb") as f24_data, \ >>> open("123_f7.xml", "rb") as f7_data: >>> >>> dataset = serializer.deserialize( >>> inputs={ >>> 'f24_data': f24_data, >>> 'f7_data': f7_data >>> }, >>> options={ >>> 'event_types': ["pass", "take_on", "carry", "shot"] >>> } >>> ) """ self.__validate_inputs(inputs) if not options: options = {} with performance_logging("load data", logger=logger): f7_root = objectify.fromstring(inputs["f7_data"].read()) f24_root = objectify.fromstring(inputs["f24_data"].read()) wanted_event_types = [ EventType[event_type.upper()] for event_type in options.get("event_types", []) ] with performance_logging("parse data", logger=logger): matchdata_path = objectify.ObjectPath( "SoccerFeed.SoccerDocument.MatchData") team_elms = list( matchdata_path.find(f7_root).iterchildren("TeamData")) home_score = None away_score = None for team_elm in team_elms: if team_elm.attrib["Side"] == "Home": home_score = team_elm.attrib["Score"] home_team = _team_from_xml_elm(team_elm, f7_root) elif team_elm.attrib["Side"] == "Away": away_score = team_elm.attrib["Score"] away_team = _team_from_xml_elm(team_elm, f7_root) else: raise Exception(f"Unknown side: {team_elm.attrib['Side']}") score = Score(home=home_score, away=away_score) teams = [home_team, away_team] if len(home_team.players) == 0 or len(away_team.players) == 0: raise Exception("LineUp incomplete") game_elm = f24_root.find("Game") periods = [ Period( id=1, start_timestamp=None, end_timestamp=None, ), Period( id=2, start_timestamp=None, end_timestamp=None, ), ] possession_team = None events = [] for event_elm in game_elm.iterchildren("Event"): event_id = event_elm.attrib["id"] type_id = int(event_elm.attrib["type_id"]) timestamp = _parse_f24_datetime(event_elm.attrib["timestamp"]) period_id = int(event_elm.attrib["period_id"]) for period in periods: if period.id == period_id: break else: logger.debug( f"Skipping event {event_id} because period doesn't match {period_id}" ) continue if type_id == EVENT_TYPE_START_PERIOD: logger.debug( f"Set start of period {period.id} to {timestamp}") period.start_timestamp = timestamp elif type_id == EVENT_TYPE_END_PERIOD: logger.debug( f"Set end of period {period.id} to {timestamp}") period.end_timestamp = timestamp else: if not period.start_timestamp: # not started yet continue if event_elm.attrib["team_id"] == home_team.team_id: team = teams[0] elif event_elm.attrib["team_id"] == away_team.team_id: team = teams[1] else: raise Exception( f"Unknown team_id {event_elm.attrib['team_id']}") x = float(event_elm.attrib["x"]) y = float(event_elm.attrib["y"]) outcome = int(event_elm.attrib["outcome"]) raw_qualifiers = { int(qualifier_elm.attrib["qualifier_id"]): qualifier_elm.attrib.get("value") for qualifier_elm in event_elm.iterchildren("Q") } player = None if "player_id" in event_elm.attrib: player = team.get_player_by_id( event_elm.attrib["player_id"]) if type_id in BALL_OWNING_EVENTS: possession_team = team generic_event_kwargs = dict( # from DataRecord period=period, timestamp=timestamp - period.start_timestamp, ball_owning_team=possession_team, ball_state=BallState.ALIVE, # from Event event_id=event_id, team=team, player=player, coordinates=Point(x=x, y=y), raw_event=event_elm, ) if type_id == EVENT_TYPE_PASS: pass_event_kwargs = _parse_pass( raw_qualifiers, outcome) event = PassEvent.create( **pass_event_kwargs, **generic_event_kwargs, ) elif type_id == EVENT_TYPE_OFFSIDE_PASS: pass_event_kwargs = _parse_offside_pass(raw_qualifiers) event = PassEvent.create( **pass_event_kwargs, **generic_event_kwargs, ) elif type_id == EVENT_TYPE_TAKE_ON: take_on_event_kwargs = _parse_take_on(outcome) event = TakeOnEvent.create( qualifiers=None, **take_on_event_kwargs, **generic_event_kwargs, ) elif type_id in ( EVENT_TYPE_SHOT_MISS, EVENT_TYPE_SHOT_POST, EVENT_TYPE_SHOT_SAVED, EVENT_TYPE_SHOT_GOAL, ): shot_event_kwargs = _parse_shot( raw_qualifiers, type_id, coordinates=generic_event_kwargs["coordinates"], ) kwargs = {} kwargs.update(generic_event_kwargs) kwargs.update(shot_event_kwargs) event = ShotEvent.create(**kwargs) elif type_id == EVENT_TYPE_RECOVERY: event = RecoveryEvent.create( result=None, qualifiers=None, **generic_event_kwargs, ) elif type_id == EVENT_TYPE_FOUL_COMMITTED: event = FoulCommittedEvent.create( result=None, qualifiers=None, **generic_event_kwargs, ) elif type_id in BALL_OUT_EVENTS: generic_event_kwargs["ball_state"] = BallState.DEAD event = BallOutEvent.create( result=None, qualifiers=None, **generic_event_kwargs, ) else: event = GenericEvent.create( **generic_event_kwargs, result=None, qualifiers=None, event_name=_get_event_type_name(type_id), ) if (not wanted_event_types or event.event_type in wanted_event_types): events.append(event) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=PitchDimensions(x_dim=Dimension(0, 100), y_dim=Dimension(0, 100)), score=score, frame_rate=None, orientation=Orientation.ACTION_EXECUTING_TEAM, flags=DatasetFlag.BALL_OWNING_TEAM, provider=Provider.OPTA, ) return EventDataset( metadata=metadata, records=events, )
def _frame_from_line(cls, teams, period, line, frame_rate): line = str(line) frame_id, players, ball = line.strip().split(":")[:3] players_coordinates = {} for player_data in players.split(";")[:-1]: team_id, target_id, jersey_no, x, y, speed = player_data.split(",") team_id = int(team_id) if team_id == 1: team = teams[0] elif team_id == 0: team = teams[1] else: raise Exception(f"Unknown team {team_id}") player = team.get_player_by_jersey_number(jersey_no) if not player: player = Player( player_id=f"{team.ground}_{jersey_no}", team=team, jersey_no=int(jersey_no), ) team.players.append(player) players_coordinates[player] = Point(float(x), float(y)) ( ball_x, ball_y, ball_z, ball_speed, ball_owning_team, ball_state, ) = ball.rstrip(";").split(",")[:6] frame_id = int(frame_id) if ball_owning_team == "H": ball_owning_team = teams[0] elif ball_owning_team == "A": ball_owning_team = teams[1] else: raise Exception(f"Unknown ball owning team: {ball_owning_team}") if ball_state == "Alive": ball_state = BallState.ALIVE elif ball_state == "Dead": ball_state = BallState.DEAD else: raise Exception(f"Unknown ball state: {ball_state}") return Frame( frame_id=frame_id, timestamp=frame_id / frame_rate - period.start_timestamp, ball_coordinates=Point(float(ball_x), float(ball_y)), ball_state=ball_state, ball_owning_team=ball_owning_team, players_coordinates=players_coordinates, period=period, )
def test_correct_deserialization(self): base_dir = os.path.dirname(__file__) serializer = SkillCornerTrackingSerializer() with open(f"{base_dir}/files/skillcorner_structured_data.json", "rb") as raw_data, open( f"{base_dir}/files/skillcorner_match_data.json", "rb") as metadata: dataset = serializer.deserialize(inputs={ "raw_data": raw_data, "metadata": metadata, }) assert dataset.metadata.provider == Provider.SKILLCORNER assert dataset.dataset_type == DatasetType.TRACKING assert len(dataset.records) == 34783 assert len(dataset.metadata.periods) == 2 assert dataset.metadata.orientation == Orientation.AWAY_TEAM assert dataset.metadata.periods[1] == Period( id=1, start_timestamp=0.0, end_timestamp=2753.3, attacking_direction=AttackingDirection.AWAY_HOME, ) assert dataset.metadata.periods[2] == Period( id=2, start_timestamp=2700.0, end_timestamp=5509.7, attacking_direction=AttackingDirection.HOME_AWAY, ) # are frames with wrong camera views and pregame skipped? assert dataset.records[0].timestamp == 11.2 # make sure data is loaded correctly home_player = dataset.metadata.teams[0].players[2] assert dataset.records[0].players_coordinates[home_player] == Point( x=33.8697315398, y=-9.55742259253) away_player = dataset.metadata.teams[1].players[9] assert dataset.records[0].players_coordinates[away_player] == Point( x=25.9863082795, y=27.3013598578) assert dataset.records[1].ball_coordinates == Point(x=30.5914728131, y=35.3622277834) # make sure player data is only in the frame when the player is in view assert "home_1" not in [ player.player_id for player in dataset.records[0].players_coordinates.keys() ] assert "away_1" not in [ player.player_id for player in dataset.records[0].players_coordinates.keys() ] # are anonymous players loaded correctly? home_anon_75 = [ player for player in dataset.records[87].players_coordinates if player.player_id == "home_anon_75" ] assert home_anon_75 == [ player for player in dataset.records[88].players_coordinates if player.player_id == "home_anon_75" ] # is pitch dimension set correctly? pitch_dimensions = dataset.metadata.pitch_dimensions assert pitch_dimensions.x_dim.min == -52.5 assert pitch_dimensions.x_dim.max == 52.5 assert pitch_dimensions.y_dim.min == -34 assert pitch_dimensions.y_dim.max == 34
def test_correct_normalized_deserialization(self, event_data: str): dataset = datafactory.load(event_data=event_data) assert dataset.events[0].coordinates == Point(0.505, 0.505)
def test_correct_normalized_deserialization(self, event_data: str, meta_data: str): dataset = sportec.load(event_data=event_data, meta_data=meta_data) assert dataset.events[0].coordinates == Point(0.5640999999999999, 1)
def deserialize(self, inputs: SportecInputs) -> EventDataset: with performance_logging("load data", logger=logger): match_root = objectify.fromstring(inputs.meta_data.read()) event_root = objectify.fromstring(inputs.event_data.read()) with performance_logging("parse data", logger=logger): x_max = float( match_root.MatchInformation.Environment.attrib["PitchX"] ) y_max = float( match_root.MatchInformation.Environment.attrib["PitchY"] ) transformer = self.get_transformer(length=x_max, width=y_max) team_path = objectify.ObjectPath( "PutDataRequest.MatchInformation.Teams" ) team_elms = list(team_path.find(match_root).iterchildren("Team")) for team_elm in team_elms: if team_elm.attrib["Role"] == "home": home_team = _team_from_xml_elm(team_elm) elif team_elm.attrib["Role"] == "guest": away_team = _team_from_xml_elm(team_elm) else: raise DeserializationError( f"Unknown side: {team_elm.attrib['Role']}" ) ( home_score, away_score, ) = match_root.MatchInformation.General.attrib["Result"].split(":") score = Score(home=int(home_score), away=int(away_score)) teams = [home_team, away_team] if len(home_team.players) == 0 or len(away_team.players) == 0: raise DeserializationError("LineUp incomplete") periods = [] period_id = 0 events = [] for event_elm in event_root.iterchildren("Event"): event_chain = _event_chain_from_xml_elm(event_elm) timestamp = _parse_datetime(event_chain["Event"]["EventTime"]) if ( SPORTEC_EVENT_NAME_KICKOFF in event_chain and "GameSection" in event_chain[SPORTEC_EVENT_NAME_KICKOFF] ): period_id += 1 period = Period( id=period_id, start_timestamp=timestamp, end_timestamp=None, ) if period_id == 1: team_left = event_chain[SPORTEC_EVENT_NAME_KICKOFF][ "TeamLeft" ] if team_left == home_team.team_id: # goal of home team is on the left side. # this means they attack from left to right orientation = Orientation.FIXED_HOME_AWAY period.set_attacking_direction( AttackingDirection.HOME_AWAY ) else: orientation = Orientation.FIXED_AWAY_HOME period.set_attacking_direction( AttackingDirection.AWAY_HOME ) else: last_period = periods[-1] period.set_attacking_direction( AttackingDirection.AWAY_HOME if last_period.attacking_direction == AttackingDirection.HOME_AWAY else AttackingDirection.HOME_AWAY ) periods.append(period) elif SPORTEC_EVENT_NAME_FINAL_WHISTLE in event_chain: period.end_timestamp = timestamp continue team = None player = None flatten_attributes = dict() # reverse because top levels are more important for event_attributes in reversed(event_chain.values()): flatten_attributes.update(event_attributes) if "Team" in flatten_attributes: team = ( home_team if flatten_attributes["Team"] == home_team.team_id else away_team ) if "Player" in flatten_attributes: if not team: raise ValueError("Player set while team is not set") player = team.get_player_by_id( flatten_attributes["Player"] ) generic_event_kwargs = dict( # from DataRecord period=period, timestamp=timestamp - period.start_timestamp, ball_owning_team=None, ball_state=BallState.ALIVE, # from Event event_id=event_chain["Event"]["EventId"], coordinates=_parse_coordinates(event_chain["Event"]), raw_event=flatten_attributes, team=team, player=player, ) event_name, event_attributes = event_chain.popitem() if event_name in SPORTEC_SHOT_EVENT_NAMES: shot_event_kwargs = _parse_shot( event_name=event_name, event_chain=event_chain ) event = ShotEvent.create( **shot_event_kwargs, **generic_event_kwargs, ) elif event_name in SPORTEC_PASS_EVENT_NAMES: pass_event_kwargs = _parse_pass( event_chain=event_chain, team=team ) event = PassEvent.create( **pass_event_kwargs, **generic_event_kwargs, receive_timestamp=None, receiver_coordinates=None, ) elif event_name == SPORTEC_EVENT_NAME_BALL_CLAIMING: event = RecoveryEvent.create( result=None, qualifiers=None, **generic_event_kwargs, ) elif event_name == SPORTEC_EVENT_NAME_SUBSTITUTION: substitution_event_kwargs = _parse_substitution( event_attributes=event_attributes, team=team ) generic_event_kwargs["player"] = substitution_event_kwargs[ "player" ] del substitution_event_kwargs["player"] event = SubstitutionEvent.create( result=None, qualifiers=None, **substitution_event_kwargs, **generic_event_kwargs, ) elif event_name == SPORTEC_EVENT_NAME_CAUTION: card_kwargs = _parse_caution(event_attributes) event = CardEvent.create( result=None, qualifiers=None, **card_kwargs, **generic_event_kwargs, ) elif event_name == SPORTEC_EVENT_NAME_FOUL: foul_kwargs = _parse_foul(event_attributes, teams=teams) generic_event_kwargs.update(foul_kwargs) event = FoulCommittedEvent.create( result=None, qualifiers=None, **generic_event_kwargs, ) else: event = GenericEvent.create( result=None, qualifiers=None, event_name=event_name, **generic_event_kwargs, ) if events: previous_event = events[-1] if ( previous_event.event_type == EventType.PASS and previous_event.result == PassResult.COMPLETE ): if "X-Source-Position" in event_chain["Event"]: previous_event.receiver_coordinates = Point( x=float( event_chain["Event"]["X-Source-Position"] ), y=float( event_chain["Event"]["Y-Source-Position"] ), ) if ( event.event_type == EventType.PASS and event.get_qualifier_value(SetPieceQualifier) in ( SetPieceType.THROW_IN, SetPieceType.GOAL_KICK, SetPieceType.CORNER_KICK, ) ): # 1. update previous pass if events[-1].event_type == EventType.PASS: events[-1].result = PassResult.OUT # 2. add synthetic out event decision_timestamp = _parse_datetime( event_chain[list(event_chain.keys())[1]][ "DecisionTimestamp" ] ) out_event = BallOutEvent.create( period=period, timestamp=decision_timestamp - period.start_timestamp, ball_owning_team=None, ball_state=BallState.DEAD, # from Event event_id=event_chain["Event"]["EventId"] + "-ball-out", team=events[-1].team, player=events[-1].player, coordinates=None, raw_event={}, result=None, qualifiers=None, ) events.append(transformer.transform_event(out_event)) events.append(transformer.transform_event(event)) events = list( filter( self.should_include_event, events, ) ) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=transformer.get_to_coordinate_system().pitch_dimensions, score=score, frame_rate=None, orientation=orientation, flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM), provider=Provider.SPORTEC, coordinate_system=transformer.get_to_coordinate_system(), ) return EventDataset( metadata=metadata, records=events, )