def deserialize(self, inputs: SportsCodeInputs) -> CodeDataset: all_instances = objectify.fromstring(inputs.data.read()) codes = [] period = Period(id=1, start_timestamp=0, end_timestamp=0) for instance in all_instances.ALL_INSTANCES.iterchildren(): end_timestamp = float(instance.end) code = Code( period=period, code_id=str(instance.ID), code=str(instance.code), timestamp=float(instance.start), end_timestamp=end_timestamp, labels={ str(label.find("group")): parse_value(str(label.find("text"))) for label in instance.iterchildren("label") }, ball_state=None, ball_owning_team=None, ) period.end_timestamp = end_timestamp codes.append(code) return CodeDataset( metadata=Metadata( teams=[], periods=[period], pitch_dimensions=None, score=Score(0, 0), frame_rate=0.0, orientation=Orientation.NOT_SET, flags=~(DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE), provider=Provider.OTHER, coordinate_system=None, ), records=codes, )
def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> EventDataset: """ Deserialize Opta event data into a `EventDataset`. Parameters ---------- inputs : dict input `f24_data` should point to a `Readable` object containing the 'xml' formatted event data. input `f7_data` should point to a `Readable` object containing the 'xml' formatted f7 data. options : dict Options for deserialization of the Opta file. Possible options are `event_types` (list of event types) to specify the event types that should be returned. Valid types: "shot", "pass", "carry", "take_on" and "generic". Generic is everything other than the first 4. Those events are barely parsed. This type of event can be used to do the parsing yourself. Every event has a 'raw_event' attribute which contains the original dictionary. Returns ------- dataset : EventDataset Raises ------ See Also -------- Examples -------- >>> serializer = OptaSerializer() >>> with open("123_f24.xml", "rb") as f24_data, \ >>> open("123_f7.xml", "rb") as f7_data: >>> >>> dataset = serializer.deserialize( >>> inputs={ >>> 'f24_data': f24_data, >>> 'f7_data': f7_data >>> }, >>> options={ >>> 'event_types': ["pass", "take_on", "carry", "shot"] >>> } >>> ) """ self.__validate_inputs(inputs) if not options: options = {} with performance_logging("load data", logger=logger): f7_root = objectify.fromstring(inputs["f7_data"].read()) f24_root = objectify.fromstring(inputs["f24_data"].read()) wanted_event_types = [ EventType[event_type.upper()] for event_type in options.get("event_types", []) ] with performance_logging("parse data", logger=logger): matchdata_path = objectify.ObjectPath( "SoccerFeed.SoccerDocument.MatchData") team_elms = list( matchdata_path.find(f7_root).iterchildren("TeamData")) home_score = None away_score = None for team_elm in team_elms: if team_elm.attrib["Side"] == "Home": home_score = team_elm.attrib["Score"] home_team = _team_from_xml_elm(team_elm, f7_root) elif team_elm.attrib["Side"] == "Away": away_score = team_elm.attrib["Score"] away_team = _team_from_xml_elm(team_elm, f7_root) else: raise Exception(f"Unknown side: {team_elm.attrib['Side']}") score = Score(home=home_score, away=away_score) teams = [home_team, away_team] if len(home_team.players) == 0 or len(away_team.players) == 0: raise Exception("LineUp incomplete") game_elm = f24_root.find("Game") periods = [ Period( id=1, start_timestamp=None, end_timestamp=None, ), Period( id=2, start_timestamp=None, end_timestamp=None, ), ] possession_team = None events = [] for event_elm in game_elm.iterchildren("Event"): event_id = event_elm.attrib["id"] type_id = int(event_elm.attrib["type_id"]) timestamp = _parse_f24_datetime(event_elm.attrib["timestamp"]) period_id = int(event_elm.attrib["period_id"]) for period in periods: if period.id == period_id: break else: logger.debug( f"Skipping event {event_id} because period doesn't match {period_id}" ) continue if type_id == EVENT_TYPE_START_PERIOD: logger.debug( f"Set start of period {period.id} to {timestamp}") period.start_timestamp = timestamp elif type_id == EVENT_TYPE_END_PERIOD: logger.debug( f"Set end of period {period.id} to {timestamp}") period.end_timestamp = timestamp else: if not period.start_timestamp: # not started yet continue if event_elm.attrib["team_id"] == home_team.team_id: team = teams[0] elif event_elm.attrib["team_id"] == away_team.team_id: team = teams[1] else: raise Exception( f"Unknown team_id {event_elm.attrib['team_id']}") x = float(event_elm.attrib["x"]) y = float(event_elm.attrib["y"]) outcome = int(event_elm.attrib["outcome"]) qualifiers = { int(qualifier_elm.attrib["qualifier_id"]): qualifier_elm.attrib.get("value") for qualifier_elm in event_elm.iterchildren("Q") } player = None if "player_id" in event_elm.attrib: player = team.get_player_by_id( event_elm.attrib["player_id"]) if type_id in BALL_OWNING_EVENTS: possession_team = team generic_event_kwargs = dict( # from DataRecord period=period, timestamp=timestamp - period.start_timestamp, ball_owning_team=possession_team, ball_state=BallState.ALIVE, # from Event event_id=event_id, team=team, player=player, coordinates=Point(x=x, y=y), raw_event=event_elm, ) if type_id == EVENT_TYPE_PASS: pass_event_kwargs = _parse_pass(qualifiers, outcome) event = PassEvent( **pass_event_kwargs, **generic_event_kwargs, ) elif type_id == EVENT_TYPE_OFFSIDE_PASS: pass_event_kwargs = _parse_offside_pass() event = PassEvent( **pass_event_kwargs, **generic_event_kwargs, ) elif type_id == EVENT_TYPE_TAKE_ON: take_on_event_kwargs = _parse_take_on(outcome) event = TakeOnEvent( **take_on_event_kwargs, **generic_event_kwargs, ) elif type_id in ( EVENT_TYPE_SHOT_MISS, EVENT_TYPE_SHOT_POST, EVENT_TYPE_SHOT_SAVED, EVENT_TYPE_SHOT_GOAL, ): shot_event_kwargs = _parse_shot( qualifiers, type_id, coordinates=generic_event_kwargs["coordinates"], ) kwargs = {} kwargs.update(generic_event_kwargs) kwargs.update(shot_event_kwargs) event = ShotEvent(**kwargs) else: event = GenericEvent( **generic_event_kwargs, result=None, event_name=_get_event_type_name(type_id), ) if (not wanted_event_types or event.event_type in wanted_event_types): events.append(event) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=PitchDimensions(x_dim=Dimension(0, 100), y_dim=Dimension(0, 100)), score=score, frame_rate=None, orientation=Orientation.ACTION_EXECUTING_TEAM, flags=DatasetFlag.BALL_OWNING_TEAM, provider=Provider.OPTA, ) return EventDataset( metadata=metadata, records=events, )
def load_metadata(metadata_file: Readable, provider: Provider = None) -> EPTSMetadata: root = objectify.fromstring(metadata_file.read()) metadata = root.find("Metadata") score_path = objectify.ObjectPath( "Metadata.Sessions.Session[0].MatchParameters.Score") score_elm = score_path.find(metadata) score = Score(home=score_elm.LocalTeamScore, away=score_elm.VisitingTeamScore) _team_map = { Ground.HOME: score_elm.attrib["idLocalTeam"], Ground.AWAY: score_elm.attrib["idVisitingTeam"], } _team_name_map = { team_elm.attrib["id"]: str(team_elm.find("Name")) for team_elm in metadata.find("Teams").iterchildren(tag="Team") } teams_metadata = {} for ground, team_id in _team_map.items(): team = Team(team_id=team_id, name=_team_name_map[team_id], ground=ground) team.players = _load_players(metadata.find("Players"), team) teams_metadata.update({ground: team}) data_format_specifications = _load_data_format_specifications( root.find("DataFormatSpecifications")) device_path = objectify.ObjectPath("Metadata.Devices.Device[0].Sensors") sensors = _load_sensors(device_path.find(metadata)) _channel_map = { channel.channel_id: channel for sensor in sensors for channel in sensor.channels } _all_players = [ player for key, value in teams_metadata.items() for player in value.players ] _player_map = {player.player_id: player for player in _all_players} player_channels = [ PlayerChannel( player_channel_id=player_channel_elm.attrib["id"], player=_player_map[player_channel_elm.attrib["playerId"]], channel=_channel_map[player_channel_elm.attrib["channelId"]], ) for player_channel_elm in metadata.find("PlayerChannels").iterchildren( tag="PlayerChannel") ] frame_rate = int(metadata.find("GlobalConfig").find("FrameRate")) pitch_dimensions = _load_pitch_dimensions(metadata, sensors) periods = _load_periods(metadata.find("GlobalConfig"), frame_rate) if periods: start_attacking_direction = periods[0].attacking_direction else: start_attacking_direction = None orientation = ( (Orientation.FIXED_HOME_AWAY if start_attacking_direction == AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME) if start_attacking_direction != AttackingDirection.NOT_SET else None) metadata.orientation = orientation return EPTSMetadata( teams=list(teams_metadata.values()), periods=periods, pitch_dimensions=pitch_dimensions, data_format_specifications=data_format_specifications, player_channels=player_channels, frame_rate=frame_rate, sensors=sensors, score=score, orientation=None, provider=provider, flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM), )
def deserialize(self, inputs: DatafactoryInputs) -> EventDataset: transformer = self.get_transformer(length=2, width=2) with performance_logging("load data", logger=logger): data = json.load(inputs.event_data) match = data["match"] score_data = data["scoreStatus"] incidences = data["incidences"] players_data = data["players"] teams_data = data["teams"] with performance_logging("parse data", logger=logger): teams = [] scores = [] team_ids = ( (Ground.HOME, str(match["homeTeamId"])), (Ground.AWAY, str(match["awayTeamId"])), ) for ground, team_id in team_ids: team = Team( team_id=team_id, name=teams_data[team_id]["name"], ground=ground, ) team.players = [ Player( player_id=player_id, team=team, first_name=player["name"]["first"], last_name=player["name"]["last"], name=player["name"]["shortName"] or player["name"]["nick"], jersey_no=player["squadNo"], starting=not player["substitute"], ) for player_id, player in players_data.items() if str(player["teamId"]) == team_id ] teams.append(team) scores.append(score_data.get(team_id, {}).get("score")) score = Score(home=scores[0], away=scores[1]) # setup periods status = incidences.pop(DF_EVENT_CLASS_STATUS) # start timestamps are fixed start_ts = {1: 0, 2: 45 * 60, 3: 90 * 60, 4: 105 * 60, 5: 120 * 60} # check for end status updates to setup periods end_event_types = { DF_EVENT_TYPE_STATUS_MATCH_END, DF_EVENT_TYPE_STATUS_FIRST_HALF_END, DF_EVENT_TYPE_STATUS_SECOND_HALF_END, DF_EVENT_TYPE_STATUS_FIRST_EXTRA_END, DF_EVENT_TYPE_STATUS_SECOND_EXTRA_END, } periods = {} for status_update in status.values(): if status_update["type"] not in end_event_types: continue half = status_update["t"]["half"] end_ts = parse_str_ts(status_update) periods[half] = Period( id=half, start_timestamp=start_ts[half], end_timestamp=end_ts, attacking_direction=AttackingDirection.HOME_AWAY if half % 2 == 1 else AttackingDirection.AWAY_HOME, ) # exclude goals, already listed as shots too incidences.pop(DF_EVENT_CLASS_GOALS) raw_events = [(k, e_id, e) for k in incidences for e_id, e in incidences[k].items()] # sort events by timestamp, event_id raw_events.sort(key=lambda e: ( e[2]["t"]["half"], e[2]["t"]["m"], e[2]["t"]["s"] or 0, e[1], )) home_team, away_team = teams events = [] previous_event = next_event = None for i, (e_class, e_id, raw_event) in enumerate(raw_events): period = periods.get(raw_event["t"]["half"]) if period is None: # skip invalid event continue timestamp = parse_str_ts(raw_event) if (previous_event is not None and previous_event["t"]["half"] != raw_event["t"]["half"]): previous_event = None next_event = (raw_events[i + 1][2] if i + 1 < len(raw_events) else None) team, player = _get_team_and_player(raw_event, home_team, away_team) event_base_kwargs = dict( # from DataRecord period=period, timestamp=timestamp, ball_owning_team=team, ball_state=BallState.ALIVE, # from Event event_id=e_id, team=team, player=player, coordinates=(_parse_coordinates(raw_event["coord"]["1"]) if "coord" in raw_event else None), raw_event=raw_event, result=None, qualifiers=None, ) if e_class in DF_EVENT_CLASS_PASSES: pass_event_kwargs = _parse_pass( raw_event=raw_event, team=team, previous_event=previous_event, next_event=next_event, ) event_base_kwargs.update(pass_event_kwargs) event = PassEvent.create(**event_base_kwargs) elif e_class == DF_EVENT_CLASS_SHOTS: shot_event_kwargs = _parse_shot( raw_event=raw_event, previous_event=previous_event, ) event_base_kwargs.update(shot_event_kwargs) event = ShotEvent.create(**event_base_kwargs) elif e_class == DF_EVENT_CLASS_STEALINGS: event = RecoveryEvent.create(**event_base_kwargs) elif e_class == DF_EVENT_CLASS_FOULS: # NOTE: could use qualifiers? (hand, foul, penalty?) # switch possession team event_base_kwargs["ball_owning_team"] = ( home_team if team == away_team else away_team) event = FoulCommittedEvent.create(**event_base_kwargs) elif e_class in DF_EVENT_CLASS_CARDS: card_kwargs = _parse_card(raw_event=raw_event, ) event_base_kwargs.update(card_kwargs) event = CardEvent.create(**event_base_kwargs) elif e_class == DF_EVENT_CLASS_SUBSTITUTIONS: substitution_event_kwargs = _parse_substitution( raw_event=raw_event, team=team) event_base_kwargs.update(substitution_event_kwargs) event = SubstitutionEvent.create(**event_base_kwargs) else: # otherwise, a generic event event = GenericEvent.create( event_name=e_class, **event_base_kwargs, ) # check if the event implies ball was out of the field and add a synthetic out event if raw_event["type"] in BALL_OUT_EVENTS: ball_out_event = BallOutEvent.create( # from DataRecord period=period, timestamp=timestamp, ball_owning_team=team, ball_state=BallState.DEAD, # from Event event_id=e_id, team=team, player=player, coordinates=event.coordinates, raw_event=raw_event, result=None, qualifiers=None, ) if self.should_include_event(event): events.append( transformer.transform_event(ball_out_event)) if self.should_include_event(event): events.append(transformer.transform_event(event)) # only consider as a previous_event a ball-in-play event if e_class not in ( DF_EVENT_CLASS_YELLOW_CARDS, DF_EVENT_CLASS_RED_CARDS, DF_EVENT_CLASS_SUBSTITUTIONS, DF_EVENT_CLASS_PENALTY_SHOOTOUT, ): previous_event = raw_event metadata = Metadata( teams=teams, periods=sorted(periods.values(), key=lambda p: p.id), pitch_dimensions=transformer.get_to_coordinate_system(). pitch_dimensions, frame_rate=None, orientation=Orientation.HOME_TEAM, flags=DatasetFlag.BALL_OWNING_TEAM, score=score, provider=Provider.DATAFACTORY, coordinate_system=transformer.get_to_coordinate_system(), ) return EventDataset( metadata=metadata, records=events, )
def deserialize(self, inputs: SkillCornerInputs) -> TrackingDataset: metadata = self.__load_json(inputs.meta_data) raw_data = self.__load_json(inputs.raw_data) with performance_logging("Loading metadata", logger=logger): periods = self.__get_periods(raw_data) teamdict = { metadata["home_team"].get("id"): "home_team", metadata["away_team"].get("id"): "away_team", } player_id_to_team_dict = { player["trackable_object"]: player["team_id"] for player in metadata["players"] } player_dict = { player["trackable_object"]: player for player in metadata["players"] } referee_dict = { ref["trackable_object"]: "referee" for ref in metadata["referees"] } ball_id = metadata["ball"]["trackable_object"] # there are different pitch_sizes in SkillCorner pitch_size_width = metadata["pitch_width"] pitch_size_length = metadata["pitch_length"] transformer = self.get_transformer(length=pitch_size_length, width=pitch_size_width) home_team_id = metadata["home_team"]["id"] away_team_id = metadata["away_team"]["id"] players = {"HOME": {}, "AWAY": {}} home_team = Team( team_id=home_team_id, name=metadata["home_team"]["name"], ground=Ground.HOME, ) away_team = Team( team_id=away_team_id, name=metadata["away_team"]["name"], ground=Ground.AWAY, ) teams = [home_team, away_team] for player_id in player_dict.keys(): player = player_dict.get(player_id) team_id = player["team_id"] if team_id == home_team_id: team_string = "HOME" team = home_team elif team_id == away_team_id: team_string = "AWAY" team = away_team players[team_string][player_id] = Player( player_id=f"{team.ground}_{player['number']}", team=team, jersey_no=player["number"], name=f"{player['first_name']} {player['last_name']}", first_name=player["first_name"], last_name=player["last_name"], starting=player["start_time"] == "00:00:00", position=Position( position_id=player["player_role"].get("id"), name=player["player_role"].get("name"), coordinates=None, ), attributes={}, ) home_team.players = list(players["HOME"].values()) away_team.players = list(players["AWAY"].values()) anon_players = {"HOME": {}, "AWAY": {}} with performance_logging("Loading data", logger=logger): def _iter(): n = 0 sample = 1.0 / self.sample_rate for frame in raw_data: frame_period = frame["period"] if frame_period is not None: if n % sample == 0: yield frame n += 1 frames = [] n_frames = 0 for _frame in _iter(): # include frame if there is any tracking data, players or ball. # or if include_empty_frames == True if self.include_empty_frames or len(_frame["data"]) > 0: frame = self._get_frame_data( teams, teamdict, players, player_id_to_team_dict, periods, player_dict, anon_players, ball_id, referee_dict, _frame, ) frame = transformer.transform_frame(frame) frames.append(frame) n_frames += 1 if self.limit and n_frames >= self.limit: break self._set_skillcorner_attacking_directions(frames, periods) frame_rate = 10 orientation = (Orientation.HOME_TEAM if periods[1].attacking_direction == AttackingDirection.HOME_AWAY else Orientation.AWAY_TEAM) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=transformer.get_to_coordinate_system(). pitch_dimensions, score=Score( home=metadata["home_team_score"], away=metadata["away_team_score"], ), frame_rate=frame_rate, orientation=orientation, provider=Provider.SKILLCORNER, flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM), coordinate_system=transformer.get_to_coordinate_system(), ) return TrackingDataset( records=frames, metadata=metadata, )
def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> TrackingDataset: """ Deserialize SkillCorner tracking data into a `TrackingDataset`. Parameters ---------- inputs : dict input `raw_data` should point to a `Readable` object containing the 'json' formatted raw data. input `metadata` should point to the json metadata data. options : dict Options for deserialization of the TRACAB file. Possible options are: `include_empty_frames` (boolean): default = False to specify whether frames without any players_coordinates or the ball_coordinates should be loaded `sample_rate` (float between 0 and 1) to specify the amount of frames that should be loaded and `limit` (int) to specify the max number of frames that will be returned. Returns ------- dataset : TrackingDataset Raises ------ - See Also -------- Examples -------- >>> serializer = SkillCornerSerializer() >>> with open("match_data.json", "rb") as meta, \ >>> open("structured_data.json", "rb") as raw: >>> dataset = serializer.deserialize( >>> inputs={ >>> 'metadata': meta, >>> 'raw_data': raw >>> }, >>> options={ >>> } >>> ) """ self.__validate_inputs(inputs) metadata = self.__load_json(inputs["metadata"]) raw_data = self.__load_json(inputs["raw_data"]) if not options: options = {} sample_rate = float(options.get("sample_rate", 1.0)) limit = int(options.get("limit", 0)) include_empty_frames = bool(options.get("include_empty_frames", False)) with performance_logging("Loading metadata", logger=logger): periods = self.__get_periods(raw_data) teamdict = { metadata["home_team"].get("id"): "home_team", metadata["away_team"].get("id"): "away_team", } player_id_to_team_dict = { player["trackable_object"]: player["team_id"] for player in metadata["players"] } player_dict = { player["trackable_object"]: player for player in metadata["players"] } referee_dict = { ref["trackable_object"]: "referee" for ref in metadata["referees"] } ball_id = metadata["ball"]["trackable_object"] # there are different pitch_sizes in SkillCorner pitch_size_width = metadata["pitch_width"] pitch_size_length = metadata["pitch_length"] home_team_id = metadata["home_team"]["id"] away_team_id = metadata["away_team"]["id"] players = {"HOME": {}, "AWAY": {}} home_team = Team( team_id=home_team_id, name=metadata["home_team"]["name"], ground=Ground.HOME, ) self.home_team = home_team away_team = Team( team_id=away_team_id, name=metadata["away_team"]["name"], ground=Ground.AWAY, ) self.away_team = away_team teams = [home_team, away_team] for player_id in player_dict.keys(): player = player_dict.get(player_id) team_id = player["team_id"] if team_id == home_team_id: team_string = "HOME" team = home_team elif team_id == away_team_id: team_string = "AWAY" team = away_team players[team_string][player_id] = Player( player_id=f"{team.ground}_{player['number']}", team=team, jersey_no=player["number"], name=f"{player['first_name']} {player['last_name']}", first_name=player["first_name"], last_name=player["last_name"], starting=player["start_time"] == "00:00:00", position=Position( position_id=player["player_role"].get("id"), name=player["player_role"].get("name"), coordinates=None, ), attributes={}, ) home_team.players = list(players["HOME"].values()) away_team.players = list(players["AWAY"].values()) anon_players = {"HOME": {}, "AWAY": {}} with performance_logging("Loading data", logger=logger): def _iter(): n = 0 sample = 1.0 / sample_rate for frame in raw_data: frame_period = frame["period"] if frame_period is not None: if n % sample == 0: yield frame n += 1 frames = [] n_frames = 0 for _frame in _iter(): # include frame if there is any tracking data, players or ball. # or if include_empty_frames == True if include_empty_frames or len(_frame["data"]) > 0: frame = self._get_frame_data( teams, teamdict, players, player_id_to_team_dict, periods, player_dict, anon_players, ball_id, referee_dict, _frame, ) frames.append(frame) n_frames += 1 if limit and n_frames >= limit: break self._set_skillcorner_attacking_directions(frames, periods) frame_rate = 10 orientation = (Orientation.HOME_TEAM if periods[1].attacking_direction == AttackingDirection.HOME_AWAY else Orientation.AWAY_TEAM) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=PitchDimensions( x_dim=Dimension(-(pitch_size_length / 2), (pitch_size_length / 2)), y_dim=Dimension(-(pitch_size_width / 2), (pitch_size_width / 2)), x_per_meter=1, y_per_meter=1, ), score=Score( home=metadata["home_team_score"], away=metadata["away_team_score"], ), frame_rate=frame_rate, orientation=orientation, provider=Provider.SKILLCORNER, flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM), ) return TrackingDataset( records=frames, metadata=metadata, )
def deserialize(self, inputs: OptaInputs) -> EventDataset: transformer = self.get_transformer(length=100, width=100) with performance_logging("load data", logger=logger): f7_root = objectify.fromstring(inputs.f7_data.read()) f24_root = objectify.fromstring(inputs.f24_data.read()) with performance_logging("parse data", logger=logger): matchdata_path = objectify.ObjectPath( "SoccerFeed.SoccerDocument.MatchData") team_elms = list( matchdata_path.find(f7_root).iterchildren("TeamData")) home_score = None away_score = None for team_elm in team_elms: if team_elm.attrib["Side"] == "Home": home_score = team_elm.attrib["Score"] home_team = _team_from_xml_elm(team_elm, f7_root) elif team_elm.attrib["Side"] == "Away": away_score = team_elm.attrib["Score"] away_team = _team_from_xml_elm(team_elm, f7_root) else: raise DeserializationError( f"Unknown side: {team_elm.attrib['Side']}") score = Score(home=home_score, away=away_score) teams = [home_team, away_team] if len(home_team.players) == 0 or len(away_team.players) == 0: raise DeserializationError("LineUp incomplete") game_elm = f24_root.find("Game") periods = [ Period( id=1, start_timestamp=None, end_timestamp=None, ), Period( id=2, start_timestamp=None, end_timestamp=None, ), ] possession_team = None events = [] for event_elm in game_elm.iterchildren("Event"): event_id = event_elm.attrib["id"] type_id = int(event_elm.attrib["type_id"]) timestamp = _parse_f24_datetime(event_elm.attrib["timestamp"]) period_id = int(event_elm.attrib["period_id"]) for period in periods: if period.id == period_id: break else: logger.debug( f"Skipping event {event_id} because period doesn't match {period_id}" ) continue if type_id == EVENT_TYPE_START_PERIOD: logger.debug( f"Set start of period {period.id} to {timestamp}") period.start_timestamp = timestamp elif type_id == EVENT_TYPE_END_PERIOD: logger.debug( f"Set end of period {period.id} to {timestamp}") period.end_timestamp = timestamp else: if not period.start_timestamp: # not started yet continue if event_elm.attrib["team_id"] == home_team.team_id: team = teams[0] elif event_elm.attrib["team_id"] == away_team.team_id: team = teams[1] else: raise DeserializationError( f"Unknown team_id {event_elm.attrib['team_id']}") x = float(event_elm.attrib["x"]) y = float(event_elm.attrib["y"]) outcome = int(event_elm.attrib["outcome"]) raw_qualifiers = { int(qualifier_elm.attrib["qualifier_id"]): qualifier_elm.attrib.get("value") for qualifier_elm in event_elm.iterchildren("Q") } player = None if "player_id" in event_elm.attrib: player = team.get_player_by_id( event_elm.attrib["player_id"]) if type_id in BALL_OWNING_EVENTS: possession_team = team generic_event_kwargs = dict( # from DataRecord period=period, timestamp=timestamp - period.start_timestamp, ball_owning_team=possession_team, ball_state=BallState.ALIVE, # from Event event_id=event_id, team=team, player=player, coordinates=Point(x=x, y=y), raw_event=event_elm, ) if type_id == EVENT_TYPE_PASS: pass_event_kwargs = _parse_pass( raw_qualifiers, outcome) event = PassEvent.create( **pass_event_kwargs, **generic_event_kwargs, ) elif type_id == EVENT_TYPE_OFFSIDE_PASS: pass_event_kwargs = _parse_offside_pass(raw_qualifiers) event = PassEvent.create( **pass_event_kwargs, **generic_event_kwargs, ) elif type_id == EVENT_TYPE_TAKE_ON: take_on_event_kwargs = _parse_take_on(outcome) event = TakeOnEvent.create( qualifiers=None, **take_on_event_kwargs, **generic_event_kwargs, ) elif type_id in ( EVENT_TYPE_SHOT_MISS, EVENT_TYPE_SHOT_POST, EVENT_TYPE_SHOT_SAVED, EVENT_TYPE_SHOT_GOAL, ): if type_id == EVENT_TYPE_SHOT_GOAL: if 374 in raw_qualifiers.keys(): generic_event_kwargs["timestamp"] = ( _parse_f24_datetime( raw_qualifiers.get(374).replace( " ", "T")) - period.start_timestamp) shot_event_kwargs = _parse_shot( raw_qualifiers, type_id, coordinates=generic_event_kwargs["coordinates"], ) kwargs = {} kwargs.update(generic_event_kwargs) kwargs.update(shot_event_kwargs) event = ShotEvent.create(**kwargs) elif type_id == EVENT_TYPE_RECOVERY: event = RecoveryEvent.create( result=None, qualifiers=None, **generic_event_kwargs, ) elif type_id == EVENT_TYPE_FOUL_COMMITTED: event = FoulCommittedEvent.create( result=None, qualifiers=None, **generic_event_kwargs, ) elif type_id in BALL_OUT_EVENTS: generic_event_kwargs["ball_state"] = BallState.DEAD event = BallOutEvent.create( result=None, qualifiers=None, **generic_event_kwargs, ) elif type_id == EVENT_TYPE_FORMATION_CHANGE: formation_change_event_kwargs = ( _parse_formation_change(raw_qualifiers)) event = FormationChangeEvent.create( result=None, qualifiers=None, **formation_change_event_kwargs, **generic_event_kwargs, ) elif type_id == EVENT_TYPE_CARD: generic_event_kwargs["ball_state"] = BallState.DEAD card_event_kwargs = _parse_card(raw_qualifiers) event = CardEvent.create( **card_event_kwargs, **generic_event_kwargs, ) else: event = GenericEvent.create( **generic_event_kwargs, result=None, qualifiers=None, event_name=_get_event_type_name(type_id), ) if self.should_include_event(event): events.append(transformer.transform_event(event)) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=transformer.get_to_coordinate_system(). pitch_dimensions, score=score, frame_rate=None, orientation=Orientation.ACTION_EXECUTING_TEAM, flags=DatasetFlag.BALL_OWNING_TEAM, provider=Provider.OPTA, coordinate_system=transformer.get_to_coordinate_system(), ) return EventDataset( metadata=metadata, records=events, )
def deserialize(self, inputs: SportecInputs) -> EventDataset: with performance_logging("load data", logger=logger): match_root = objectify.fromstring(inputs.meta_data.read()) event_root = objectify.fromstring(inputs.event_data.read()) with performance_logging("parse data", logger=logger): x_max = float( match_root.MatchInformation.Environment.attrib["PitchX"] ) y_max = float( match_root.MatchInformation.Environment.attrib["PitchY"] ) transformer = self.get_transformer(length=x_max, width=y_max) team_path = objectify.ObjectPath( "PutDataRequest.MatchInformation.Teams" ) team_elms = list(team_path.find(match_root).iterchildren("Team")) for team_elm in team_elms: if team_elm.attrib["Role"] == "home": home_team = _team_from_xml_elm(team_elm) elif team_elm.attrib["Role"] == "guest": away_team = _team_from_xml_elm(team_elm) else: raise DeserializationError( f"Unknown side: {team_elm.attrib['Role']}" ) ( home_score, away_score, ) = match_root.MatchInformation.General.attrib["Result"].split(":") score = Score(home=int(home_score), away=int(away_score)) teams = [home_team, away_team] if len(home_team.players) == 0 or len(away_team.players) == 0: raise DeserializationError("LineUp incomplete") periods = [] period_id = 0 events = [] for event_elm in event_root.iterchildren("Event"): event_chain = _event_chain_from_xml_elm(event_elm) timestamp = _parse_datetime(event_chain["Event"]["EventTime"]) if ( SPORTEC_EVENT_NAME_KICKOFF in event_chain and "GameSection" in event_chain[SPORTEC_EVENT_NAME_KICKOFF] ): period_id += 1 period = Period( id=period_id, start_timestamp=timestamp, end_timestamp=None, ) if period_id == 1: team_left = event_chain[SPORTEC_EVENT_NAME_KICKOFF][ "TeamLeft" ] if team_left == home_team.team_id: # goal of home team is on the left side. # this means they attack from left to right orientation = Orientation.FIXED_HOME_AWAY period.set_attacking_direction( AttackingDirection.HOME_AWAY ) else: orientation = Orientation.FIXED_AWAY_HOME period.set_attacking_direction( AttackingDirection.AWAY_HOME ) else: last_period = periods[-1] period.set_attacking_direction( AttackingDirection.AWAY_HOME if last_period.attacking_direction == AttackingDirection.HOME_AWAY else AttackingDirection.HOME_AWAY ) periods.append(period) elif SPORTEC_EVENT_NAME_FINAL_WHISTLE in event_chain: period.end_timestamp = timestamp continue team = None player = None flatten_attributes = dict() # reverse because top levels are more important for event_attributes in reversed(event_chain.values()): flatten_attributes.update(event_attributes) if "Team" in flatten_attributes: team = ( home_team if flatten_attributes["Team"] == home_team.team_id else away_team ) if "Player" in flatten_attributes: if not team: raise ValueError("Player set while team is not set") player = team.get_player_by_id( flatten_attributes["Player"] ) generic_event_kwargs = dict( # from DataRecord period=period, timestamp=timestamp - period.start_timestamp, ball_owning_team=None, ball_state=BallState.ALIVE, # from Event event_id=event_chain["Event"]["EventId"], coordinates=_parse_coordinates(event_chain["Event"]), raw_event=flatten_attributes, team=team, player=player, ) event_name, event_attributes = event_chain.popitem() if event_name in SPORTEC_SHOT_EVENT_NAMES: shot_event_kwargs = _parse_shot( event_name=event_name, event_chain=event_chain ) event = ShotEvent.create( **shot_event_kwargs, **generic_event_kwargs, ) elif event_name in SPORTEC_PASS_EVENT_NAMES: pass_event_kwargs = _parse_pass( event_chain=event_chain, team=team ) event = PassEvent.create( **pass_event_kwargs, **generic_event_kwargs, receive_timestamp=None, receiver_coordinates=None, ) elif event_name == SPORTEC_EVENT_NAME_BALL_CLAIMING: event = RecoveryEvent.create( result=None, qualifiers=None, **generic_event_kwargs, ) elif event_name == SPORTEC_EVENT_NAME_SUBSTITUTION: substitution_event_kwargs = _parse_substitution( event_attributes=event_attributes, team=team ) generic_event_kwargs["player"] = substitution_event_kwargs[ "player" ] del substitution_event_kwargs["player"] event = SubstitutionEvent.create( result=None, qualifiers=None, **substitution_event_kwargs, **generic_event_kwargs, ) elif event_name == SPORTEC_EVENT_NAME_CAUTION: card_kwargs = _parse_caution(event_attributes) event = CardEvent.create( result=None, qualifiers=None, **card_kwargs, **generic_event_kwargs, ) elif event_name == SPORTEC_EVENT_NAME_FOUL: foul_kwargs = _parse_foul(event_attributes, teams=teams) generic_event_kwargs.update(foul_kwargs) event = FoulCommittedEvent.create( result=None, qualifiers=None, **generic_event_kwargs, ) else: event = GenericEvent.create( result=None, qualifiers=None, event_name=event_name, **generic_event_kwargs, ) if events: previous_event = events[-1] if ( previous_event.event_type == EventType.PASS and previous_event.result == PassResult.COMPLETE ): if "X-Source-Position" in event_chain["Event"]: previous_event.receiver_coordinates = Point( x=float( event_chain["Event"]["X-Source-Position"] ), y=float( event_chain["Event"]["Y-Source-Position"] ), ) if ( event.event_type == EventType.PASS and event.get_qualifier_value(SetPieceQualifier) in ( SetPieceType.THROW_IN, SetPieceType.GOAL_KICK, SetPieceType.CORNER_KICK, ) ): # 1. update previous pass if events[-1].event_type == EventType.PASS: events[-1].result = PassResult.OUT # 2. add synthetic out event decision_timestamp = _parse_datetime( event_chain[list(event_chain.keys())[1]][ "DecisionTimestamp" ] ) out_event = BallOutEvent.create( period=period, timestamp=decision_timestamp - period.start_timestamp, ball_owning_team=None, ball_state=BallState.DEAD, # from Event event_id=event_chain["Event"]["EventId"] + "-ball-out", team=events[-1].team, player=events[-1].player, coordinates=None, raw_event={}, result=None, qualifiers=None, ) events.append(transformer.transform_event(out_event)) events.append(transformer.transform_event(event)) events = list( filter( self.should_include_event, events, ) ) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=transformer.get_to_coordinate_system().pitch_dimensions, score=score, frame_rate=None, orientation=orientation, flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM), provider=Provider.SPORTEC, coordinate_system=transformer.get_to_coordinate_system(), ) return EventDataset( metadata=metadata, records=events, )