def _team_from_xml_elm(team_elm, f7_root) -> Team: # This should not happen here team_name, team_players = _parse_team_players(f7_root, team_elm.attrib["TeamRef"]) team_id = team_elm.attrib["TeamRef"].lstrip("t") team = Team( team_id=str(team_id), name=team_name, ground=Ground.HOME if team_elm.attrib["Side"] == "Home" else Ground.AWAY, ) team.players = [ Player( player_id=player_elm.attrib["PlayerRef"].lstrip("p"), team=team, jersey_no=int(player_elm.attrib["ShirtNumber"]), first_name=team_players[ player_elm.attrib["PlayerRef"]]["first_name"], last_name=team_players[player_elm.attrib["PlayerRef"]] ["last_name"], position=Position( position_id=player_elm.attrib["Formation_Place"], name=player_elm.attrib["Position"], coordinates=None, ), ) for player_elm in team_elm.find("PlayerLineUp").iterchildren( "MatchPlayer") ] return team
def __create_anon_player(cls, teams, frame_record): """ creates a Player object for a track_id'ed player with known team membership but unknown identity. Args: frame_record (dict): dictionary containing 'x', 'y', 'track_id' and 'group_name' Returns: kloppy.domain.models.common.Player """ track_id = frame_record.get("track_id", None) group_name = frame_record.get("group_name", None) if group_name == "home team": team = teams[0] elif group_name == "away team": team = teams[1] else: raise ValueError( f"anonymous player with track_id `{track_id}` does not have a specified group_name." ) return Player( player_id=f"{team.ground}_anon_{track_id}", team=team, jersey_no=None, name=f"Anon_{track_id}", first_name="Anon", last_name=track_id, starting=None, position=None, attributes={}, )
def _team_from_xml_elm(team_elm) -> Team: team = Team( team_id=team_elm.attrib["TeamId"], name=team_elm.attrib["TeamName"], ground=Ground.HOME if team_elm.attrib["Role"] == "home" else Ground.AWAY, ) team.players = [ Player( player_id=player_elm.attrib["PersonId"], team=team, jersey_no=int(player_elm.attrib["ShirtNumber"]), name=player_elm.attrib["Shortname"], first_name=player_elm.attrib["FirstName"], last_name=player_elm.attrib["LastName"], position=Position( position_id=None, name=player_elm.attrib["PlayingPosition"], coordinates=None, ) if "PlayingPosition" in player_elm.attrib else None, starting=player_elm.attrib["Starting"] == "true", ) for player_elm in team_elm.Players.iterchildren("Player") ] return team
def _get_tracking_dataset(self): home_team = Team(team_id="home", name="home", ground=Ground.HOME) away_team = Team(team_id="away", name="away", ground=Ground.AWAY) teams = [home_team, away_team] periods = [ Period( id=1, start_timestamp=0.0, end_timestamp=10.0, attacking_direction=AttackingDirection.HOME_AWAY, ), Period( id=2, start_timestamp=15.0, end_timestamp=25.0, attacking_direction=AttackingDirection.AWAY_HOME, ), ] metadata = Metadata( flags=~(DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE), pitch_dimensions=PitchDimensions(x_dim=Dimension(0, 100), y_dim=Dimension(-50, 50)), orientation=Orientation.HOME_TEAM, frame_rate=25, periods=periods, teams=teams, score=None, provider=None, ) tracking_data = TrackingDataset( metadata=metadata, records=[ Frame( frame_id=1, timestamp=0.1, ball_owning_team=None, ball_state=None, period=periods[0], players_coordinates={}, ball_coordinates=Point(x=100, y=-50), ), Frame( frame_id=2, timestamp=0.2, ball_owning_team=None, ball_state=None, period=periods[0], players_coordinates={ Player(team=home_team, player_id="home_1", jersey_no=1): Point(x=15, y=35) }, ball_coordinates=Point(x=0, y=50), ), ], ) return tracking_data
def _frame_from_framedata(cls, teams, period, frame_data): frame_id = frame_data["frameIdx"] frame_timestamp = frame_data["gameClock"] if frame_data["ball"]["xyz"]: ball_x, ball_y, ball_z = frame_data["ball"]["xyz"] ball_coordinates = Point3D( float(ball_x), float(ball_y), float(ball_z) ) else: ball_coordinates = None ball_state = BallState.ALIVE if frame_data["live"] else BallState.DEAD ball_owning_team = ( teams[0] if frame_data["lastTouch"] == "home" else teams[1] ) players_data = {} for team, team_str in zip(teams, ["homePlayers", "awayPlayers"]): for player_data in frame_data[team_str]: jersey_no = player_data["number"] x, y, _ = player_data["xyz"] player = team.get_player_by_jersey_number(jersey_no) if not player: player = Player( player_id=player_data["playerId"], team=team, jersey_no=int(jersey_no), ) team.players.append(player) players_data[player] = PlayerData( coordinates=Point(float(x), float(y)) ) return Frame( frame_id=frame_id, timestamp=frame_timestamp, ball_coordinates=ball_coordinates, ball_state=ball_state, ball_owning_team=ball_owning_team, players_data=players_data, period=period, other_data={}, )
def _parse_team(raw_events, wyId: str, ground: Ground) -> Team: team = Team( team_id=wyId, name=raw_events["teams"][wyId]["officialName"], ground=ground, ) team.players = [ Player( player_id=str(player["playerId"]), team=team, jersey_no=None, first_name=player["player"]["firstName"], last_name=player["player"]["lastName"], ) for player in raw_events["players"][wyId] ] return team
def deserialize( self, inputs: Dict[str, Readable], options: Dict = None ) -> EventDataset: """ Deserialize StatsBomb event data into a `EventDataset`. Parameters ---------- inputs : dict input `event_data` should point to a `Readable` object containing the 'json' formatted event data. input `lineup_data` should point to a `Readable` object containing the 'json' formatted lineup data. options : dict Options for deserialization of the StatsBomb file. Possible options are `event_types` (list of event types) to specify the event types that should be returned. Valid types: "shot", "pass", "carry", "take_on" and "generic". Generic is everything other than the first 4. Those events are barely parsed. This type of event can be used to do the parsing yourself. Every event has a 'raw_event' attribute which contains the original dictionary. Returns ------- dataset : EventDataset Raises ------ See Also -------- Examples -------- >>> serializer = StatsBombSerializer() >>> with open("events/12312312.json", "rb") as event_data, \ >>> open("lineups/123123123.json", "rb") as lineup_data: >>> >>> dataset = serializer.deserialize( >>> inputs={ >>> 'event_data': event_data, >>> 'lineup_data': lineup_data >>> }, >>> options={ >>> 'event_types': ["pass", "take_on", "carry", "shot"] >>> } >>> ) """ self.__validate_inputs(inputs) if not options: options = {} with performance_logging("load data", logger=logger): raw_events = json.load(inputs["event_data"]) home_lineup, away_lineup = json.load(inputs["lineup_data"]) ( shot_fidelity_version, xy_fidelity_version, ) = _determine_xy_fidelity_versions(raw_events) logger.info( f"Determined Fidelity versions: shot v{shot_fidelity_version} / XY v{xy_fidelity_version}" ) with performance_logging("parse data", logger=logger): home_team = Team( team_id=str(home_lineup["team_id"]), name=home_lineup["team_name"], ground=Ground.HOME, ) home_team.players = [ Player( player_id=str(player["player_id"]), team=home_team, name=player["player_name"], jersey_no=int(player["jersey_number"]), ) for player in home_lineup["lineup"] ] away_team = Team( team_id=str(away_lineup["team_id"]), name=away_lineup["team_name"], ground=Ground.AWAY, ) away_team.players = [ Player( player_id=str(player["player_id"]), team=away_team, name=player["player_name"], jersey_no=int(player["jersey_number"]), ) for player in away_lineup["lineup"] ] teams = [home_team, away_team] wanted_event_types = [ EventType[event_type.upper()] for event_type in options.get("event_types", []) ] periods = [] period = None events = [] for raw_event in raw_events: if raw_event["team"]["id"] == home_lineup["team_id"]: team = teams[0] elif raw_event["team"]["id"] == away_lineup["team_id"]: team = teams[1] else: raise Exception( f"Unknown team_id {raw_event['team']['id']}" ) if ( raw_event["possession_team"]["id"] == home_lineup["team_id"] ): possession_team = teams[0] elif ( raw_event["possession_team"]["id"] == away_lineup["team_id"] ): possession_team = teams[1] else: raise Exception( f"Unknown possession_team_id: {raw_event['possession_team']}" ) timestamp = parse_str_ts(raw_event["timestamp"]) period_id = int(raw_event["period"]) if not period or period.id != period_id: period = Period( id=period_id, start_timestamp=( timestamp if not period # period = [start, end], add millisecond to prevent overlapping else timestamp + period.end_timestamp + 0.001 ), end_timestamp=None, ) periods.append(period) else: period.end_timestamp = period.start_timestamp + timestamp player = None if "player" in raw_event: player = team.get_player_by_id(raw_event["player"]["id"]) event_type = raw_event["type"]["id"] if event_type == SB_EVENT_TYPE_SHOT: fidelity_version = shot_fidelity_version elif event_type in ( SB_EVENT_TYPE_CARRY, SB_EVENT_TYPE_DRIBBLE, SB_EVENT_TYPE_PASS, ): fidelity_version = xy_fidelity_version else: # TODO: Uh ohhhh.. don't know which one to pick fidelity_version = xy_fidelity_version generic_event_kwargs = dict( # from DataRecord period=period, timestamp=timestamp, ball_owning_team=possession_team, ball_state=BallState.ALIVE, # from Event event_id=raw_event["id"], team=team, player=player, coordinates=( _parse_coordinates( raw_event.get("location"), fidelity_version ) if "location" in raw_event else None ), raw_event=raw_event, ) if event_type == SB_EVENT_TYPE_PASS: pass_event_kwargs = _parse_pass( pass_dict=raw_event["pass"], team=team, fidelity_version=fidelity_version, ) event = PassEvent( # TODO: Consider moving this to _parse_pass receive_timestamp=timestamp + raw_event["duration"], **pass_event_kwargs, **generic_event_kwargs, ) elif event_type == SB_EVENT_TYPE_SHOT: shot_event_kwargs = _parse_shot( shot_dict=raw_event["shot"] ) event = ShotEvent( **shot_event_kwargs, **generic_event_kwargs ) # For dribble and carry the definitions # are flipped between Statsbomb and kloppy elif event_type == SB_EVENT_TYPE_DRIBBLE: take_on_event_kwargs = _parse_take_on( take_on_dict=raw_event["dribble"] ) event = TakeOnEvent( **take_on_event_kwargs, **generic_event_kwargs ) elif event_type == SB_EVENT_TYPE_CARRY: carry_event_kwargs = _parse_carry( carry_dict=raw_event["carry"], fidelity_version=fidelity_version, ) event = CarryEvent( # TODO: Consider moving this to _parse_carry end_timestamp=timestamp + raw_event["duration"], **carry_event_kwargs, **generic_event_kwargs, ) else: event = GenericEvent( result=None, event_name=raw_event["type"]["name"], **generic_event_kwargs, ) if ( not wanted_event_types or event.event_type in wanted_event_types ): events.append(event) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=PitchDimensions( x_dim=Dimension(0, 120), y_dim=Dimension(0, 80) ), frame_rate=None, orientation=Orientation.ACTION_EXECUTING_TEAM, flags=DatasetFlag.BALL_OWNING_TEAM, score=None, ) return EventDataset(metadata=metadata, records=events,)
def _frame_from_line(cls, teams, period, line, frame_rate): line = str(line) frame_id, players, ball = line.strip().split(":")[:3] players_data = {} for player_data in players.split(";")[:-1]: team_id, target_id, jersey_no, x, y, speed = player_data.split(",") team_id = int(team_id) if team_id == 1: team = teams[0] elif team_id == 0: team = teams[1] else: # it's probably -1, but make sure it doesn't crash continue player = team.get_player_by_jersey_number(jersey_no) if not player: player = Player( player_id=f"{team.ground}_{jersey_no}", team=team, jersey_no=int(jersey_no), ) team.players.append(player) players_data[player] = PlayerData( coordinates=Point(float(x), float(y)), speed=float(speed) ) ( ball_x, ball_y, ball_z, ball_speed, ball_owning_team, ball_state, ) = ball.rstrip(";").split(",")[:6] frame_id = int(frame_id) if ball_owning_team == "H": ball_owning_team = teams[0] elif ball_owning_team == "A": ball_owning_team = teams[1] else: raise DeserializationError( f"Unknown ball owning team: {ball_owning_team}" ) if ball_state == "Alive": ball_state = BallState.ALIVE elif ball_state == "Dead": ball_state = BallState.DEAD else: raise DeserializationError(f"Unknown ball state: {ball_state}") return Frame( frame_id=frame_id, timestamp=frame_id / frame_rate - period.start_timestamp, ball_coordinates=Point3D( float(ball_x), float(ball_y), float(ball_z) ), ball_state=ball_state, ball_owning_team=ball_owning_team, players_data=players_data, period=period, other_data={}, )
def deserialize(self, inputs: DatafactoryInputs) -> EventDataset: transformer = self.get_transformer(length=2, width=2) with performance_logging("load data", logger=logger): data = json.load(inputs.event_data) match = data["match"] score_data = data["scoreStatus"] incidences = data["incidences"] players_data = data["players"] teams_data = data["teams"] with performance_logging("parse data", logger=logger): teams = [] scores = [] team_ids = ( (Ground.HOME, str(match["homeTeamId"])), (Ground.AWAY, str(match["awayTeamId"])), ) for ground, team_id in team_ids: team = Team( team_id=team_id, name=teams_data[team_id]["name"], ground=ground, ) team.players = [ Player( player_id=player_id, team=team, first_name=player["name"]["first"], last_name=player["name"]["last"], name=player["name"]["shortName"] or player["name"]["nick"], jersey_no=player["squadNo"], starting=not player["substitute"], ) for player_id, player in players_data.items() if str(player["teamId"]) == team_id ] teams.append(team) scores.append(score_data.get(team_id, {}).get("score")) score = Score(home=scores[0], away=scores[1]) # setup periods status = incidences.pop(DF_EVENT_CLASS_STATUS) # start timestamps are fixed start_ts = {1: 0, 2: 45 * 60, 3: 90 * 60, 4: 105 * 60, 5: 120 * 60} # check for end status updates to setup periods end_event_types = { DF_EVENT_TYPE_STATUS_MATCH_END, DF_EVENT_TYPE_STATUS_FIRST_HALF_END, DF_EVENT_TYPE_STATUS_SECOND_HALF_END, DF_EVENT_TYPE_STATUS_FIRST_EXTRA_END, DF_EVENT_TYPE_STATUS_SECOND_EXTRA_END, } periods = {} for status_update in status.values(): if status_update["type"] not in end_event_types: continue half = status_update["t"]["half"] end_ts = parse_str_ts(status_update) periods[half] = Period( id=half, start_timestamp=start_ts[half], end_timestamp=end_ts, attacking_direction=AttackingDirection.HOME_AWAY if half % 2 == 1 else AttackingDirection.AWAY_HOME, ) # exclude goals, already listed as shots too incidences.pop(DF_EVENT_CLASS_GOALS) raw_events = [(k, e_id, e) for k in incidences for e_id, e in incidences[k].items()] # sort events by timestamp, event_id raw_events.sort(key=lambda e: ( e[2]["t"]["half"], e[2]["t"]["m"], e[2]["t"]["s"] or 0, e[1], )) home_team, away_team = teams events = [] previous_event = next_event = None for i, (e_class, e_id, raw_event) in enumerate(raw_events): period = periods.get(raw_event["t"]["half"]) if period is None: # skip invalid event continue timestamp = parse_str_ts(raw_event) if (previous_event is not None and previous_event["t"]["half"] != raw_event["t"]["half"]): previous_event = None next_event = (raw_events[i + 1][2] if i + 1 < len(raw_events) else None) team, player = _get_team_and_player(raw_event, home_team, away_team) event_base_kwargs = dict( # from DataRecord period=period, timestamp=timestamp, ball_owning_team=team, ball_state=BallState.ALIVE, # from Event event_id=e_id, team=team, player=player, coordinates=(_parse_coordinates(raw_event["coord"]["1"]) if "coord" in raw_event else None), raw_event=raw_event, result=None, qualifiers=None, ) if e_class in DF_EVENT_CLASS_PASSES: pass_event_kwargs = _parse_pass( raw_event=raw_event, team=team, previous_event=previous_event, next_event=next_event, ) event_base_kwargs.update(pass_event_kwargs) event = PassEvent.create(**event_base_kwargs) elif e_class == DF_EVENT_CLASS_SHOTS: shot_event_kwargs = _parse_shot( raw_event=raw_event, previous_event=previous_event, ) event_base_kwargs.update(shot_event_kwargs) event = ShotEvent.create(**event_base_kwargs) elif e_class == DF_EVENT_CLASS_STEALINGS: event = RecoveryEvent.create(**event_base_kwargs) elif e_class == DF_EVENT_CLASS_FOULS: # NOTE: could use qualifiers? (hand, foul, penalty?) # switch possession team event_base_kwargs["ball_owning_team"] = ( home_team if team == away_team else away_team) event = FoulCommittedEvent.create(**event_base_kwargs) elif e_class in DF_EVENT_CLASS_CARDS: card_kwargs = _parse_card(raw_event=raw_event, ) event_base_kwargs.update(card_kwargs) event = CardEvent.create(**event_base_kwargs) elif e_class == DF_EVENT_CLASS_SUBSTITUTIONS: substitution_event_kwargs = _parse_substitution( raw_event=raw_event, team=team) event_base_kwargs.update(substitution_event_kwargs) event = SubstitutionEvent.create(**event_base_kwargs) else: # otherwise, a generic event event = GenericEvent.create( event_name=e_class, **event_base_kwargs, ) # check if the event implies ball was out of the field and add a synthetic out event if raw_event["type"] in BALL_OUT_EVENTS: ball_out_event = BallOutEvent.create( # from DataRecord period=period, timestamp=timestamp, ball_owning_team=team, ball_state=BallState.DEAD, # from Event event_id=e_id, team=team, player=player, coordinates=event.coordinates, raw_event=raw_event, result=None, qualifiers=None, ) if self.should_include_event(event): events.append( transformer.transform_event(ball_out_event)) if self.should_include_event(event): events.append(transformer.transform_event(event)) # only consider as a previous_event a ball-in-play event if e_class not in ( DF_EVENT_CLASS_YELLOW_CARDS, DF_EVENT_CLASS_RED_CARDS, DF_EVENT_CLASS_SUBSTITUTIONS, DF_EVENT_CLASS_PENALTY_SHOOTOUT, ): previous_event = raw_event metadata = Metadata( teams=teams, periods=sorted(periods.values(), key=lambda p: p.id), pitch_dimensions=transformer.get_to_coordinate_system(). pitch_dimensions, frame_rate=None, orientation=Orientation.HOME_TEAM, flags=DatasetFlag.BALL_OWNING_TEAM, score=score, provider=Provider.DATAFACTORY, coordinate_system=transformer.get_to_coordinate_system(), ) return EventDataset( metadata=metadata, records=events, )
def deserialize(self, inputs: SkillCornerInputs) -> TrackingDataset: metadata = self.__load_json(inputs.meta_data) raw_data = self.__load_json(inputs.raw_data) with performance_logging("Loading metadata", logger=logger): periods = self.__get_periods(raw_data) teamdict = { metadata["home_team"].get("id"): "home_team", metadata["away_team"].get("id"): "away_team", } player_id_to_team_dict = { player["trackable_object"]: player["team_id"] for player in metadata["players"] } player_dict = { player["trackable_object"]: player for player in metadata["players"] } referee_dict = { ref["trackable_object"]: "referee" for ref in metadata["referees"] } ball_id = metadata["ball"]["trackable_object"] # there are different pitch_sizes in SkillCorner pitch_size_width = metadata["pitch_width"] pitch_size_length = metadata["pitch_length"] transformer = self.get_transformer(length=pitch_size_length, width=pitch_size_width) home_team_id = metadata["home_team"]["id"] away_team_id = metadata["away_team"]["id"] players = {"HOME": {}, "AWAY": {}} home_team = Team( team_id=home_team_id, name=metadata["home_team"]["name"], ground=Ground.HOME, ) away_team = Team( team_id=away_team_id, name=metadata["away_team"]["name"], ground=Ground.AWAY, ) teams = [home_team, away_team] for player_id in player_dict.keys(): player = player_dict.get(player_id) team_id = player["team_id"] if team_id == home_team_id: team_string = "HOME" team = home_team elif team_id == away_team_id: team_string = "AWAY" team = away_team players[team_string][player_id] = Player( player_id=f"{team.ground}_{player['number']}", team=team, jersey_no=player["number"], name=f"{player['first_name']} {player['last_name']}", first_name=player["first_name"], last_name=player["last_name"], starting=player["start_time"] == "00:00:00", position=Position( position_id=player["player_role"].get("id"), name=player["player_role"].get("name"), coordinates=None, ), attributes={}, ) home_team.players = list(players["HOME"].values()) away_team.players = list(players["AWAY"].values()) anon_players = {"HOME": {}, "AWAY": {}} with performance_logging("Loading data", logger=logger): def _iter(): n = 0 sample = 1.0 / self.sample_rate for frame in raw_data: frame_period = frame["period"] if frame_period is not None: if n % sample == 0: yield frame n += 1 frames = [] n_frames = 0 for _frame in _iter(): # include frame if there is any tracking data, players or ball. # or if include_empty_frames == True if self.include_empty_frames or len(_frame["data"]) > 0: frame = self._get_frame_data( teams, teamdict, players, player_id_to_team_dict, periods, player_dict, anon_players, ball_id, referee_dict, _frame, ) frame = transformer.transform_frame(frame) frames.append(frame) n_frames += 1 if self.limit and n_frames >= self.limit: break self._set_skillcorner_attacking_directions(frames, periods) frame_rate = 10 orientation = (Orientation.HOME_TEAM if periods[1].attacking_direction == AttackingDirection.HOME_AWAY else Orientation.AWAY_TEAM) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=transformer.get_to_coordinate_system(). pitch_dimensions, score=Score( home=metadata["home_team_score"], away=metadata["away_team_score"], ), frame_rate=frame_rate, orientation=orientation, provider=Provider.SKILLCORNER, flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM), coordinate_system=transformer.get_to_coordinate_system(), ) return TrackingDataset( records=frames, metadata=metadata, )
def __create_iterator( self, data: Readable, sample_rate: float, frame_rate: int, ground: Ground, ) -> Iterator: """ Notes: 1. the y-axis is flipped because Metrica use (y, -y) instead of (-y, y) """ team = None frame_idx = 0 frame_sample = 1 / sample_rate player_jersey_numbers = [] period = None for i, line in enumerate(data): line = line.strip().decode("ascii") columns = line.split(",") if i == 0: team_name = columns[3] team = Team(team_id=str(ground), name=team_name, ground=ground) elif i == 1: player_jersey_numbers = columns[3:-2:2] players = [ Player( player_id=f"{team.ground}_{jersey_number}", jersey_no=int(jersey_number), team=team, ) for jersey_number in player_jersey_numbers ] team.players = players elif i == 2: # consider doing some validation on the columns pass else: period_id = int(columns[0]) frame_id = int(columns[1]) if period is None or period.id != period_id: period = Period( id=period_id, start_timestamp=frame_id / frame_rate, end_timestamp=frame_id / frame_rate, ) else: # consider not update this every frame for performance reasons period.end_timestamp = frame_id / frame_rate if frame_idx % frame_sample == 0: yield self.__PartialFrame( team=team, period=period, frame_id=frame_id, players_coordinates={ player: Point( x=float(columns[3 + i * 2]), y=1 - float(columns[3 + i * 2 + 1]), ) for i, player in enumerate(players) if columns[3 + i * 2] != "NaN" }, ball_coordinates=Point( x=float(columns[-2]), y=1 - float(columns[-1]) ) if columns[-2] != "NaN" else None, ) frame_idx += 1
def deserialize(self, inputs: SecondSpectrumInputs) -> TrackingDataset: metadata = None # Handles the XML metadata that contains the pitch dimensions and frame info with performance_logging("Loading XML metadata", logger=logger): # The meta data can also be in JSON format. In that case # it also contains the 'additional metadata'. # First do a 'peek' to determine the char first_byte = inputs.meta_data.read(1) if first_byte == b"{": metadata = json.loads(first_byte + inputs.meta_data.read()) frame_rate = int(metadata["fps"]) pitch_size_height = float(metadata["pitchLength"]) pitch_size_width = float(metadata["pitchWidth"]) periods = [] for period in metadata["periods"]: start_frame_id = int(period["startFrameIdx"]) end_frame_id = int(period["endFrameIdx"]) if start_frame_id != 0 or end_frame_id != 0: # Frame IDs are unix timestamps (in milliseconds) periods.append( Period( id=int(period["number"]), start_timestamp=start_frame_id, end_timestamp=end_frame_id, ) ) else: match = objectify.fromstring( first_byte + inputs.meta_data.read() ).match frame_rate = int(match.attrib["iFrameRateFps"]) pitch_size_height = float(match.attrib["fPitchYSizeMeters"]) pitch_size_width = float(match.attrib["fPitchXSizeMeters"]) periods = [] for period in match.iterchildren(tag="period"): start_frame_id = int(period.attrib["iStartFrame"]) end_frame_id = int(period.attrib["iEndFrame"]) if start_frame_id != 0 or end_frame_id != 0: # Frame IDs are unix timestamps (in milliseconds) periods.append( Period( id=int(period.attrib["iId"]), start_timestamp=start_frame_id, end_timestamp=end_frame_id, ) ) # Default team initialisation home_team = Team(team_id="home", name="home", ground=Ground.HOME) away_team = Team(team_id="away", name="away", ground=Ground.AWAY) teams = [home_team, away_team] if inputs.additional_meta_data or metadata: with performance_logging("Loading JSON metadata", logger=logger): try: if inputs.additional_meta_data: metadata = json.loads( inputs.additional_meta_data.read() ) home_team_id = metadata["homeOptaId"] away_team_id = metadata["awayOptaId"] # Tries to parse (short) team names from the description string try: home_name = ( metadata["description"].split("-")[0].strip() ) away_name = ( metadata["description"] .split("-")[1] .split(":")[0] .strip() ) except: home_name, away_name = "home", "away" teams[0].team_id = home_team_id teams[0].name = home_name teams[1].team_id = away_team_id teams[1].name = away_name for team, team_str in zip( teams, ["homePlayers", "awayPlayers"] ): for player_data in metadata[team_str]: # We use the attributes field of Player to store the extra IDs provided by the # metadata. We designate the player_id to be the 'optaId' field as this is what's # used as 'player_id' in the raw frame data file player_attributes = { k: v for k, v in player_data.items() if k in ["ssiId", "optaUuid"] } player = Player( player_id=player_data["optaId"], name=player_data["name"], starting=player_data["position"] != "SUB", position=player_data["position"], team=team, jersey_no=int(player_data["number"]), attributes=player_attributes, ) team.players.append(player) except: # TODO: More specific exception logging.warning( "Optional JSON Metadata is malformed. Continuing without" ) # Handles the tracking frame data with performance_logging("Loading data", logger=logger): transformer = self.get_transformer( length=pitch_size_width, width=pitch_size_height ) def _iter(): n = 0 sample = 1 / self.sample_rate for line_ in inputs.raw_data.readlines(): line_ = line_.strip().decode("ascii") if not line_: continue # Each line is just json so we just parse it frame_data = json.loads(line_) if self.only_alive and not frame_data["live"]: continue if n % sample == 0: yield frame_data n += 1 frames = [] for n, frame_data in enumerate(_iter()): period = periods[frame_data["period"] - 1] frame = self._frame_from_framedata(teams, period, frame_data) frame = transformer.transform_frame(frame) frames.append(frame) if not period.attacking_direction_set: period.set_attacking_direction( attacking_direction=attacking_direction_from_frame( frame ) ) if self.limit and n + 1 >= self.limit: break orientation = ( Orientation.FIXED_HOME_AWAY if periods[0].attacking_direction == AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME ) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=transformer.get_to_coordinate_system().pitch_dimensions, score=None, frame_rate=frame_rate, orientation=orientation, provider=Provider.SECONDSPECTRUM, flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE, coordinate_system=transformer.get_to_coordinate_system(), ) return TrackingDataset( records=frames, metadata=metadata, )
def deserialize(self, inputs: StatsbombInputs) -> EventDataset: transformer = self.get_transformer(length=120, width=80) with performance_logging("load data", logger=logger): raw_events = json.load(inputs.event_data) home_lineup, away_lineup = json.load(inputs.lineup_data) ( shot_fidelity_version, xy_fidelity_version, ) = _determine_xy_fidelity_versions(raw_events) logger.info( f"Determined Fidelity versions: shot v{shot_fidelity_version} / XY v{xy_fidelity_version}" ) with performance_logging("parse data", logger=logger): starting_player_ids = { str(player["player"]["id"]) for raw_event in raw_events if raw_event["type"]["id"] == SB_EVENT_TYPE_STARTING_XI for player in raw_event["tactics"]["lineup"] } starting_formations = { raw_event["team"]["id"]: FormationType("-".join( list(str(raw_event["tactics"]["formation"])))) for raw_event in raw_events if raw_event["type"]["id"] == SB_EVENT_TYPE_STARTING_XI } home_team = Team( team_id=str(home_lineup["team_id"]), name=home_lineup["team_name"], ground=Ground.HOME, starting_formation=starting_formations[home_lineup["team_id"]], ) home_team.players = [ Player( player_id=str(player["player_id"]), team=home_team, name=player["player_name"], jersey_no=int(player["jersey_number"]), starting=str(player["player_id"]) in starting_player_ids, ) for player in home_lineup["lineup"] ] away_team = Team( team_id=str(away_lineup["team_id"]), name=away_lineup["team_name"], ground=Ground.AWAY, starting_formation=starting_formations[away_lineup["team_id"]], ) away_team.players = [ Player( player_id=str(player["player_id"]), team=away_team, name=player["player_name"], jersey_no=int(player["jersey_number"]), starting=str(player["player_id"]) in starting_player_ids, ) for player in away_lineup["lineup"] ] teams = [home_team, away_team] periods = [] period = None events = [] for raw_event in raw_events: if raw_event["team"]["id"] == home_lineup["team_id"]: team = home_team elif raw_event["team"]["id"] == away_lineup["team_id"]: team = away_team else: raise DeserializationError( f"Unknown team_id {raw_event['team']['id']}") if (raw_event["possession_team"]["id"] == home_lineup["team_id"]): possession_team = home_team elif (raw_event["possession_team"]["id"] == away_lineup["team_id"]): possession_team = away_team else: raise DeserializationError( f"Unknown possession_team_id: {raw_event['possession_team']}" ) timestamp = parse_str_ts(raw_event["timestamp"]) period_id = int(raw_event["period"]) if not period or period.id != period_id: period = Period( id=period_id, start_timestamp=( timestamp if not period # period = [start, end], add millisecond to prevent overlapping else timestamp + period.end_timestamp + 0.001), end_timestamp=None, ) periods.append(period) else: period.end_timestamp = period.start_timestamp + timestamp player = None if "player" in raw_event: player = team.get_player_by_id(raw_event["player"]["id"]) event_type = raw_event["type"]["id"] if event_type == SB_EVENT_TYPE_SHOT: fidelity_version = shot_fidelity_version elif event_type in ( SB_EVENT_TYPE_CARRY, SB_EVENT_TYPE_DRIBBLE, SB_EVENT_TYPE_PASS, ): fidelity_version = xy_fidelity_version else: # TODO: Uh ohhhh.. don't know which one to pick fidelity_version = xy_fidelity_version generic_event_kwargs = { # from DataRecord "period": period, "timestamp": timestamp, "ball_owning_team": possession_team, "ball_state": BallState.ALIVE, # from Event "event_id": raw_event["id"], "team": team, "player": player, "coordinates": (_parse_coordinates( raw_event.get("location"), fidelity_version, ) if "location" in raw_event else None), "related_event_ids": raw_event.get("related_events", []), "raw_event": raw_event, } new_events = [] if event_type == SB_EVENT_TYPE_PASS: pass_event_kwargs = _parse_pass( pass_dict=raw_event["pass"], team=team, fidelity_version=fidelity_version, ) pass_event = PassEvent.create( # TODO: Consider moving this to _parse_pass receive_timestamp=timestamp + raw_event["duration"], **pass_event_kwargs, **generic_event_kwargs, ) new_events.append(pass_event) elif event_type == SB_EVENT_TYPE_SHOT: shot_event_kwargs = _parse_shot( shot_dict=raw_event["shot"], ) shot_event = ShotEvent.create( **shot_event_kwargs, **generic_event_kwargs, ) new_events.append(shot_event) # For dribble and carry the definitions # are flipped between Statsbomb and kloppy elif event_type == SB_EVENT_TYPE_DRIBBLE: take_on_event_kwargs = _parse_take_on( take_on_dict=raw_event["dribble"], ) take_on_event = TakeOnEvent.create( qualifiers=None, **take_on_event_kwargs, **generic_event_kwargs, ) new_events.append(take_on_event) elif event_type == SB_EVENT_TYPE_CARRY: carry_event_kwargs = _parse_carry( carry_dict=raw_event["carry"], fidelity_version=fidelity_version, ) carry_event = CarryEvent.create( qualifiers=None, # TODO: Consider moving this to _parse_carry end_timestamp=timestamp + raw_event.get("duration", 0), **carry_event_kwargs, **generic_event_kwargs, ) new_events.append(carry_event) # lineup affecting events elif event_type == SB_EVENT_TYPE_SUBSTITUTION: substitution_event_kwargs = _parse_substitution( substitution_dict=raw_event["substitution"], team=team, ) substitution_event = SubstitutionEvent.create( result=None, qualifiers=None, **substitution_event_kwargs, **generic_event_kwargs, ) new_events.append(substitution_event) elif event_type == SB_EVENT_TYPE_BAD_BEHAVIOUR: bad_behaviour_kwargs = _parse_bad_behaviour( bad_behaviour_dict=raw_event.get("bad_behaviour", {}), ) if "card" in bad_behaviour_kwargs: card_kwargs = bad_behaviour_kwargs["card"] card_event = CardEvent.create( result=None, qualifiers=None, card_type=card_kwargs["card_type"], **generic_event_kwargs, ) new_events.append(card_event) elif event_type == SB_EVENT_TYPE_FOUL_COMMITTED: foul_committed_kwargs = _parse_foul_committed( foul_committed_dict=raw_event.get( "foul_committed", {}), ) foul_committed_event = FoulCommittedEvent.create( result=None, qualifiers=None, **generic_event_kwargs, ) new_events.append(foul_committed_event) if "card" in foul_committed_kwargs: card_kwargs = foul_committed_kwargs["card"] card_event = CardEvent.create( result=None, qualifiers=None, card_type=card_kwargs["card_type"], **generic_event_kwargs, ) new_events.append(card_event) elif event_type == SB_EVENT_TYPE_PLAYER_ON: player_on_event = PlayerOnEvent.create( result=None, qualifiers=None, **generic_event_kwargs, ) new_events.append(player_on_event) elif event_type == SB_EVENT_TYPE_PLAYER_OFF: player_off_event = PlayerOffEvent.create( result=None, qualifiers=None, **generic_event_kwargs, ) new_events.append(player_off_event) elif event_type == SB_EVENT_TYPE_RECOVERY: recovery_event = RecoveryEvent.create( result=None, qualifiers=None, **generic_event_kwargs, ) new_events.append(recovery_event) elif event_type == SB_EVENT_TYPE_FORMATION_CHANGE: formation_change_event_kwargs = _parse_formation_change( raw_event["tactics"]["formation"]) formation_change_event = FormationChangeEvent.create( result=None, qualifiers=None, **formation_change_event_kwargs, **generic_event_kwargs, ) new_events.append(formation_change_event) # rest: generic else: generic_event = GenericEvent.create( result=None, qualifiers=None, event_name=raw_event["type"]["name"], **generic_event_kwargs, ) new_events.append(generic_event) for event in new_events: if self.should_include_event(event): transformed_event = transformer.transform_event(event) events.append(transformed_event) # Checks if the event ended out of the field and adds a synthetic out event if event.result in OUT_EVENT_RESULTS: generic_event_kwargs["ball_state"] = BallState.DEAD if event.receiver_coordinates: generic_event_kwargs[ "coordinates"] = event.receiver_coordinates ball_out_event = BallOutEvent.create( result=None, qualifiers=None, **generic_event_kwargs, ) if self.should_include_event(ball_out_event): transformed_ball_out_event = ( transformer.transform_event(ball_out_event) ) events.append(transformed_ball_out_event) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=transformer.get_to_coordinate_system(). pitch_dimensions, frame_rate=None, orientation=Orientation.ACTION_EXECUTING_TEAM, flags=DatasetFlag.BALL_OWNING_TEAM, score=None, provider=Provider.STATSBOMB, coordinate_system=transformer.get_to_coordinate_system(), ) return EventDataset( metadata=metadata, records=events, )
def _frame_from_line(cls, teams, period, line, frame_rate): line = str(line) frame_id, players, ball = line.strip().split(":")[:3] players_coordinates = {} for player_data in players.split(";")[:-1]: team_id, target_id, jersey_no, x, y, speed = player_data.split(",") team_id = int(team_id) if team_id == 1: team = teams[0] elif team_id == 0: team = teams[1] else: raise Exception(f"Unknown team {team_id}") player = team.get_player_by_jersey_number(jersey_no) if not player: player = Player( player_id=f"{team.ground}_{jersey_no}", team=team, jersey_no=int(jersey_no), ) team.players.append(player) players_coordinates[player] = Point(float(x), float(y)) ( ball_x, ball_y, ball_z, ball_speed, ball_owning_team, ball_state, ) = ball.rstrip(";").split(",")[:6] frame_id = int(frame_id) if ball_owning_team == "H": ball_owning_team = teams[0] elif ball_owning_team == "A": ball_owning_team = teams[1] else: raise Exception(f"Unknown ball owning team: {ball_owning_team}") if ball_state == "Alive": ball_state = BallState.ALIVE elif ball_state == "Dead": ball_state = BallState.DEAD else: raise Exception(f"Unknown ball state: {ball_state}") return Frame( frame_id=frame_id, timestamp=frame_id / frame_rate - period.start_timestamp, ball_coordinates=Point(float(ball_x), float(ball_y)), ball_state=ball_state, ball_owning_team=ball_owning_team, players_coordinates=players_coordinates, period=period, )
def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> TrackingDataset: """ Deserialize SkillCorner tracking data into a `TrackingDataset`. Parameters ---------- inputs : dict input `raw_data` should point to a `Readable` object containing the 'json' formatted raw data. input `metadata` should point to the json metadata data. options : dict Options for deserialization of the TRACAB file. Possible options are: `include_empty_frames` (boolean): default = False to specify whether frames without any players_coordinates or the ball_coordinates should be loaded `sample_rate` (float between 0 and 1) to specify the amount of frames that should be loaded and `limit` (int) to specify the max number of frames that will be returned. Returns ------- dataset : TrackingDataset Raises ------ - See Also -------- Examples -------- >>> serializer = SkillCornerSerializer() >>> with open("match_data.json", "rb") as meta, \ >>> open("structured_data.json", "rb") as raw: >>> dataset = serializer.deserialize( >>> inputs={ >>> 'metadata': meta, >>> 'raw_data': raw >>> }, >>> options={ >>> } >>> ) """ self.__validate_inputs(inputs) metadata = self.__load_json(inputs["metadata"]) raw_data = self.__load_json(inputs["raw_data"]) if not options: options = {} sample_rate = float(options.get("sample_rate", 1.0)) limit = int(options.get("limit", 0)) include_empty_frames = bool(options.get("include_empty_frames", False)) with performance_logging("Loading metadata", logger=logger): periods = self.__get_periods(raw_data) teamdict = { metadata["home_team"].get("id"): "home_team", metadata["away_team"].get("id"): "away_team", } player_id_to_team_dict = { player["trackable_object"]: player["team_id"] for player in metadata["players"] } player_dict = { player["trackable_object"]: player for player in metadata["players"] } referee_dict = { ref["trackable_object"]: "referee" for ref in metadata["referees"] } ball_id = metadata["ball"]["trackable_object"] # there are different pitch_sizes in SkillCorner pitch_size_width = metadata["pitch_width"] pitch_size_length = metadata["pitch_length"] home_team_id = metadata["home_team"]["id"] away_team_id = metadata["away_team"]["id"] players = {"HOME": {}, "AWAY": {}} home_team = Team( team_id=home_team_id, name=metadata["home_team"]["name"], ground=Ground.HOME, ) self.home_team = home_team away_team = Team( team_id=away_team_id, name=metadata["away_team"]["name"], ground=Ground.AWAY, ) self.away_team = away_team teams = [home_team, away_team] for player_id in player_dict.keys(): player = player_dict.get(player_id) team_id = player["team_id"] if team_id == home_team_id: team_string = "HOME" team = home_team elif team_id == away_team_id: team_string = "AWAY" team = away_team players[team_string][player_id] = Player( player_id=f"{team.ground}_{player['number']}", team=team, jersey_no=player["number"], name=f"{player['first_name']} {player['last_name']}", first_name=player["first_name"], last_name=player["last_name"], starting=player["start_time"] == "00:00:00", position=Position( position_id=player["player_role"].get("id"), name=player["player_role"].get("name"), coordinates=None, ), attributes={}, ) home_team.players = list(players["HOME"].values()) away_team.players = list(players["AWAY"].values()) anon_players = {"HOME": {}, "AWAY": {}} with performance_logging("Loading data", logger=logger): def _iter(): n = 0 sample = 1.0 / sample_rate for frame in raw_data: frame_period = frame["period"] if frame_period is not None: if n % sample == 0: yield frame n += 1 frames = [] n_frames = 0 for _frame in _iter(): # include frame if there is any tracking data, players or ball. # or if include_empty_frames == True if include_empty_frames or len(_frame["data"]) > 0: frame = self._get_frame_data( teams, teamdict, players, player_id_to_team_dict, periods, player_dict, anon_players, ball_id, referee_dict, _frame, ) frames.append(frame) n_frames += 1 if limit and n_frames >= limit: break self._set_skillcorner_attacking_directions(frames, periods) frame_rate = 10 orientation = (Orientation.HOME_TEAM if periods[1].attacking_direction == AttackingDirection.HOME_AWAY else Orientation.AWAY_TEAM) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=PitchDimensions( x_dim=Dimension(-(pitch_size_length / 2), (pitch_size_length / 2)), y_dim=Dimension(-(pitch_size_width / 2), (pitch_size_width / 2)), x_per_meter=1, y_per_meter=1, ), score=Score( home=metadata["home_team_score"], away=metadata["away_team_score"], ), frame_rate=frame_rate, orientation=orientation, provider=Provider.SKILLCORNER, flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM), ) return TrackingDataset( records=frames, metadata=metadata, )