def _get_tracking_dataset(self): home_team = Team(team_id="home", name="home", ground=Ground.HOME) away_team = Team(team_id="away", name="away", ground=Ground.AWAY) teams = [home_team, away_team] periods = [ Period( id=1, start_timestamp=0.0, end_timestamp=10.0, attacking_direction=AttackingDirection.HOME_AWAY, ), Period( id=2, start_timestamp=15.0, end_timestamp=25.0, attacking_direction=AttackingDirection.AWAY_HOME, ), ] metadata = Metadata( flags=~(DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE), pitch_dimensions=PitchDimensions(x_dim=Dimension(0, 100), y_dim=Dimension(-50, 50)), orientation=Orientation.HOME_TEAM, frame_rate=25, periods=periods, teams=teams, score=None, provider=None, ) tracking_data = TrackingDataset( metadata=metadata, records=[ Frame( frame_id=1, timestamp=0.1, ball_owning_team=None, ball_state=None, period=periods[0], players_coordinates={}, ball_coordinates=Point(x=100, y=-50), ), Frame( frame_id=2, timestamp=0.2, ball_owning_team=None, ball_state=None, period=periods[0], players_coordinates={ Player(team=home_team, player_id="home_1", jersey_no=1): Point(x=15, y=35) }, ball_coordinates=Point(x=0, y=50), ), ], ) return tracking_data
def transform_dataset( cls, dataset: DatasetT, to_pitch_dimensions: PitchDimensions = None, to_orientation: Orientation = None, ) -> DatasetT: if not to_pitch_dimensions and not to_orientation: return dataset elif not to_orientation: to_orientation = dataset.metadata.orientation elif not to_pitch_dimensions: to_pitch_dimensions = dataset.metadata.pitch_dimensions if to_orientation == Orientation.BALL_OWNING_TEAM: if not dataset.metadata.flags & DatasetFlag.BALL_OWNING_TEAM: raise ValueError( "Cannot transform to BALL_OWNING_TEAM orientation when dataset doesn't contain " "ball owning team data" ) transformer = cls( from_pitch_dimensions=dataset.metadata.pitch_dimensions, from_orientation=dataset.metadata.orientation, to_pitch_dimensions=to_pitch_dimensions, to_orientation=to_orientation, ) metadata = replace( dataset.metadata, pitch_dimensions=to_pitch_dimensions, orientation=to_orientation, ) if isinstance(dataset, TrackingDataset): frames = [ transformer.transform_frame(record) for record in dataset.records ] return TrackingDataset( metadata=metadata, records=frames, ) elif isinstance(dataset, EventDataset): events = list(map(transformer.transform_event, dataset.records)) return EventDataset( metadata=metadata, records=events, ) else: raise Exception("Unknown Dataset type")
def _get_tracking_dataset(self): periods = [ Period( id=1, start_timestamp=0.0, end_timestamp=10.0, attacking_direction=AttackingDirection.HOME_AWAY, ), Period( id=2, start_timestamp=15.0, end_timestamp=25.0, attacking_direction=AttackingDirection.AWAY_HOME, ), ] tracking_data = TrackingDataset( flags=~(DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE), pitch_dimensions=PitchDimensions(x_dim=Dimension(0, 100), y_dim=Dimension(-50, 50)), orientation=Orientation.HOME_TEAM, frame_rate=25, records=[ Frame( frame_id=1, timestamp=0.1, ball_owning_team=None, ball_state=None, period=periods[0], away_team_player_positions={}, home_team_player_positions={}, ball_position=Point(x=100, y=-50), ), Frame( frame_id=2, timestamp=0.2, ball_owning_team=None, ball_state=None, period=periods[0], away_team_player_positions={"1": Point(x=10, y=20)}, home_team_player_positions={"1": Point(x=15, y=35)}, ball_position=Point(x=0, y=50), ), ], periods=periods, ) return tracking_data
def deserialize(self, inputs: MetricaEPTSTrackingDataInputs) -> TrackingDataset: with performance_logging("Loading metadata", logger=logger): metadata = load_metadata(inputs.meta_data) if metadata.provider and metadata.pitch_dimensions: transformer = self.get_transformer( length=metadata.pitch_dimensions.length, width=metadata.pitch_dimensions.width, provider=metadata.coordinate_system.provider, ) else: transformer = None with performance_logging("Loading data", logger=logger): # assume they are sorted frames = [ self._frame_from_row(row, metadata, transformer) for row in read_raw_data( raw_data=inputs.raw_data, metadata=metadata, sensor_ids=[ sensor.sensor_id for sensor in metadata.sensors ], sample_rate=self.sample_rate, limit=self.limit, ) ] if transformer: metadata = replace( metadata, pitch_dimensions=transformer.get_to_coordinate_system(). pitch_dimensions, coordinate_system=transformer.get_to_coordinate_system(), ) return TrackingDataset(records=frames, metadata=metadata)
def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> TrackingDataset: """ Deserialize Metrica tracking data into a `TrackingDataset`. Parameters ---------- inputs : dict input `raw_data_home` should point to a `Readable` object containing the 'csv' formatted raw data for the home team. input `raw_data_away` should point to a `Readable` object containing the 'csv' formatted raw data for the away team. options : dict Options for deserialization of the Metrica file. Possible options are `sample_rate` (float between 0 and 1) to specify the amount of frames that should be loaded, `limit` to specify the max number of frames that will be returned. Returns ------- dataset : TrackingDataset Raises ------ ValueError when both input files don't seem to belong to each other See Also -------- Examples -------- >>> serializer = MetricaTrackingSerializer() >>> with open("Sample_Game_1_RawTrackingData_Away_Team.csv", "rb") as raw_home, \ >>> open("Sample_Game_1_RawTrackingData_Home_Team.csv", "rb") as raw_away: >>> >>> dataset = serializer.deserialize( >>> inputs={ >>> 'raw_data_home': raw_home, >>> 'raw_data_away': raw_away >>> }, >>> options={ >>> 'sample_rate': 1/12 >>> } >>> ) """ self.__validate_inputs(inputs) if not options: options = {} sample_rate = float(options.get('sample_rate', 1.0)) limit = int(options.get('limit', 0)) # consider reading this from data frame_rate = 25 with performance_logging("prepare", logger=logger): home_iterator = self.__create_iterator(inputs['raw_data_home'], sample_rate, frame_rate) away_iterator = self.__create_iterator(inputs['raw_data_away'], sample_rate, frame_rate) partial_frames = zip(home_iterator, away_iterator) with performance_logging("loading", logger=logger): frames = [] periods = [] partial_frame_type = self.__PartialFrame home_partial_frame: partial_frame_type away_partial_frame: partial_frame_type for n, (home_partial_frame, away_partial_frame) in enumerate(partial_frames): self.__validate_partials(home_partial_frame, away_partial_frame) period: Period = home_partial_frame.period frame_id: int = home_partial_frame.frame_id frame = Frame(frame_id=frame_id, timestamp=frame_id / frame_rate - period.start_timestamp, ball_position=home_partial_frame.ball_position, home_team_player_positions=home_partial_frame. player_positions, away_team_player_positions=away_partial_frame. player_positions, period=period, ball_state=None, ball_owning_team=None) frames.append(frame) if not periods or period.id != periods[-1].id: periods.append(period) if not period.attacking_direction_set: period.set_attacking_direction( attacking_direction=attacking_direction_from_frame( frame)) n += 1 if limit and n >= limit: break orientation = ( Orientation.FIXED_HOME_AWAY if periods[0].attacking_direction == AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME) return TrackingDataset( flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM), frame_rate=frame_rate, orientation=orientation, pitch_dimensions=PitchDimensions(x_dim=Dimension(0, 1), y_dim=Dimension(0, 1)), periods=periods, records=frames)
def deserialize( self, inputs: Dict[str, Readable], options: Dict = None ) -> TrackingDataset: """ Deserialize EPTS tracking data into a `TrackingDataset`. Parameters ---------- inputs : dict input `raw_data` should point to a `Readable` object containing the 'csv' formatted raw data. input `metadata` should point to the xml metadata data. options : dict Options for deserialization of the EPTS file. Possible options are `sample_rate` (float between 0 and 1) to specify the amount of frames that should be loaded, `limit` to specify the max number of frames that will be returned. Returns ------- dataset : TrackingDataset Raises ------ - See Also -------- Examples -------- >>> serializer = EPTSSerializer() >>> with open("metadata.xml", "rb") as meta, \ >>> open("raw.dat", "rb") as raw: >>> dataset = serializer.deserialize( >>> inputs={ >>> 'metadata': meta, >>> 'raw_data': raw >>> }, >>> options={ >>> 'sample_rate': 1/12 >>> } >>> ) """ self.__validate_inputs(inputs) if not options: options = {} sample_rate = float(options.get("sample_rate", 1.0)) limit = int(options.get("limit", 0)) with performance_logging("Loading metadata", logger=logger): metadata = load_metadata(inputs["metadata"]) with performance_logging("Loading data", logger=logger): # assume they are sorted frames = [ self._frame_from_row(row, metadata) for row in read_raw_data( raw_data=inputs["raw_data"], metadata=metadata, sensor_ids=[ "position" ], # we don't care about other sensors sample_rate=sample_rate, limit=limit, ) ] return TrackingDataset(records=frames, metadata=metadata)
def deserialize(self, inputs: TRACABInputs) -> TrackingDataset: # TODO: also used in Metrica, extract to a method home_team = Team(team_id="home", name="home", ground=Ground.HOME) away_team = Team(team_id="away", name="away", ground=Ground.AWAY) teams = [home_team, away_team] with performance_logging("Loading metadata", logger=logger): match = objectify.fromstring(inputs.meta_data.read()).match frame_rate = int(match.attrib["iFrameRateFps"]) pitch_size_width = float(match.attrib["fPitchXSizeMeters"]) pitch_size_height = float(match.attrib["fPitchYSizeMeters"]) periods = [] for period in match.iterchildren(tag="period"): start_frame_id = int(period.attrib["iStartFrame"]) end_frame_id = int(period.attrib["iEndFrame"]) if start_frame_id != 0 or end_frame_id != 0: periods.append( Period( id=int(period.attrib["iId"]), start_timestamp=start_frame_id / frame_rate, end_timestamp=end_frame_id / frame_rate, ) ) with performance_logging("Loading data", logger=logger): transformer = self.get_transformer( length=pitch_size_width, width=pitch_size_height ) def _iter(): n = 0 sample = 1.0 / self.sample_rate for line_ in inputs.raw_data.readlines(): line_ = line_.strip().decode("ascii") if not line_: continue frame_id = int(line_[:10].split(":", 1)[0]) if self.only_alive and not line_.endswith("Alive;:"): continue for period_ in periods: if period_.contains(frame_id / frame_rate): if n % sample == 0: yield period_, line_ n += 1 frames = [] for n, (period, line) in enumerate(_iter()): frame = self._frame_from_line(teams, period, line, frame_rate) frame = transformer.transform_frame(frame) frames.append(frame) if not period.attacking_direction_set: period.set_attacking_direction( attacking_direction=attacking_direction_from_frame( frame ) ) if self.limit and n >= self.limit: break orientation = ( Orientation.FIXED_HOME_AWAY if periods[0].attacking_direction == AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME ) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=transformer.get_to_coordinate_system().pitch_dimensions, score=None, frame_rate=frame_rate, orientation=orientation, provider=Provider.TRACAB, flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE, coordinate_system=transformer.get_to_coordinate_system(), ) return TrackingDataset( records=frames, metadata=metadata, )
def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> TrackingDataset: """ Deserialize EPTS tracking data into a `TrackingDataset`. Parameters ---------- inputs : dict input `raw_data` should point to a `Readable` object containing the 'csv' formatted raw data. input `meta_data` should point to the xml metadata data. options : dict Options for deserialization of the EPTS file. Possible options are `sample_rate` (float between 0 and 1) to specify the amount of frames that should be loaded, `limit` to specify the max number of frames that will be returned. Returns ------- dataset : TrackingDataset Raises ------ - See Also -------- Examples -------- >>> serializer = EPTSSerializer() >>> with open("metadata.xml", "rb") as meta, \ >>> open("raw.dat", "rb") as raw: >>> dataset = serializer.deserialize( >>> inputs={ >>> 'meta_data': meta, >>> 'raw_data': raw >>> }, >>> options={ >>> 'sample_rate': 1/12 >>> } >>> ) """ self.__validate_inputs(inputs) if not options: options = {} sample_rate = float(options.get("sample_rate", 1.0)) limit = int(options.get("limit", 0)) with performance_logging("Loading metadata", logger=logger): meta_data = load_meta_data(inputs["meta_data"]) periods = meta_data.periods with performance_logging("Loading data", logger=logger): # assume they are sorted frames = [ self._frame_from_row(row, meta_data) for row in read_raw_data( raw_data=inputs["raw_data"], meta_data=meta_data, sensor_ids=["position" ], # we don't care about other sensors sample_rate=sample_rate, limit=limit, ) ] if periods: start_attacking_direction = periods[0].attacking_direction elif frames: start_attacking_direction = attacking_direction_from_frame( frames[0]) else: start_attacking_direction = None orientation = ( (Orientation.FIXED_HOME_AWAY if start_attacking_direction == AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME) if start_attacking_direction != AttackingDirection.NOT_SET else None) return TrackingDataset( flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM), frame_rate=meta_data.frame_rate, orientation=orientation, pitch_dimensions=meta_data.pitch_dimensions, periods=periods, records=frames, )
def deserialize( self, inputs: Dict[str, Readable], options: Dict = None ) -> TrackingDataset: """ Deserialize TRACAB tracking data into a `TrackingDataset`. Parameters ---------- inputs : dict input `raw_data` should point to a `Readable` object containing the 'csv' formatted raw data. input `metadata` should point to the xml metadata data. options : dict Options for deserialization of the TRACAB file. Possible options are `only_alive` (boolean) to specify that only frames with alive ball state should be loaded, or `sample_rate` (float between 0 and 1) to specify the amount of frames that should be loaded, `limit` to specify the max number of frames that will be returned. Returns ------- dataset : TrackingDataset Raises ------ - See Also -------- Examples -------- >>> serializer = TRACABSerializer() >>> with open("metadata.xml", "rb") as meta, \ >>> open("raw.dat", "rb") as raw: >>> dataset = serializer.deserialize( >>> inputs={ >>> 'metadata': meta, >>> 'raw_data': raw >>> }, >>> options={ >>> 'only_alive': True, >>> 'sample_rate': 1/12 >>> } >>> ) """ self.__validate_inputs(inputs) if not options: options = {} sample_rate = float(options.get("sample_rate", 1.0)) limit = int(options.get("limit", 0)) only_alive = bool(options.get("only_alive", True)) # TODO: also used in Metrica, extract to a method home_team = Team(team_id="home", name="home", ground=Ground.HOME) away_team = Team(team_id="away", name="away", ground=Ground.AWAY) teams = [home_team, away_team] with performance_logging("Loading metadata", logger=logger): match = objectify.fromstring(inputs["metadata"].read()).match frame_rate = int(match.attrib["iFrameRateFps"]) pitch_size_width = float(match.attrib["fPitchXSizeMeters"]) pitch_size_height = float(match.attrib["fPitchYSizeMeters"]) periods = [] for period in match.iterchildren(tag="period"): start_frame_id = int(period.attrib["iStartFrame"]) end_frame_id = int(period.attrib["iEndFrame"]) if start_frame_id != 0 or end_frame_id != 0: periods.append( Period( id=int(period.attrib["iId"]), start_timestamp=start_frame_id / frame_rate, end_timestamp=end_frame_id / frame_rate, ) ) with performance_logging("Loading data", logger=logger): def _iter(): n = 0 sample = 1.0 / sample_rate for line_ in inputs["raw_data"].readlines(): line_ = line_.strip().decode("ascii") if not line_: continue frame_id = int(line_[:10].split(":", 1)[0]) if only_alive and not line_.endswith("Alive;:"): continue for period_ in periods: if period_.contains(frame_id / frame_rate): if n % sample == 0: yield period_, line_ n += 1 frames = [] for n, (period, line) in enumerate(_iter()): frame = self._frame_from_line(teams, period, line, frame_rate) frames.append(frame) if not period.attacking_direction_set: period.set_attacking_direction( attacking_direction=attacking_direction_from_frame( frame ) ) if limit and n >= limit: break orientation = ( Orientation.FIXED_HOME_AWAY if periods[0].attacking_direction == AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME ) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=PitchDimensions( x_dim=Dimension( -1 * pitch_size_width / 2, pitch_size_width / 2 ), y_dim=Dimension( -1 * pitch_size_height / 2, pitch_size_height / 2 ), x_per_meter=100, y_per_meter=100, ), score=None, frame_rate=frame_rate, orientation=orientation, provider=Provider.TRACAB, flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE, ) return TrackingDataset( records=frames, metadata=metadata, )
def deserialize(self, inputs: SkillCornerInputs) -> TrackingDataset: metadata = self.__load_json(inputs.meta_data) raw_data = self.__load_json(inputs.raw_data) with performance_logging("Loading metadata", logger=logger): periods = self.__get_periods(raw_data) teamdict = { metadata["home_team"].get("id"): "home_team", metadata["away_team"].get("id"): "away_team", } player_id_to_team_dict = { player["trackable_object"]: player["team_id"] for player in metadata["players"] } player_dict = { player["trackable_object"]: player for player in metadata["players"] } referee_dict = { ref["trackable_object"]: "referee" for ref in metadata["referees"] } ball_id = metadata["ball"]["trackable_object"] # there are different pitch_sizes in SkillCorner pitch_size_width = metadata["pitch_width"] pitch_size_length = metadata["pitch_length"] transformer = self.get_transformer(length=pitch_size_length, width=pitch_size_width) home_team_id = metadata["home_team"]["id"] away_team_id = metadata["away_team"]["id"] players = {"HOME": {}, "AWAY": {}} home_team = Team( team_id=home_team_id, name=metadata["home_team"]["name"], ground=Ground.HOME, ) away_team = Team( team_id=away_team_id, name=metadata["away_team"]["name"], ground=Ground.AWAY, ) teams = [home_team, away_team] for player_id in player_dict.keys(): player = player_dict.get(player_id) team_id = player["team_id"] if team_id == home_team_id: team_string = "HOME" team = home_team elif team_id == away_team_id: team_string = "AWAY" team = away_team players[team_string][player_id] = Player( player_id=f"{team.ground}_{player['number']}", team=team, jersey_no=player["number"], name=f"{player['first_name']} {player['last_name']}", first_name=player["first_name"], last_name=player["last_name"], starting=player["start_time"] == "00:00:00", position=Position( position_id=player["player_role"].get("id"), name=player["player_role"].get("name"), coordinates=None, ), attributes={}, ) home_team.players = list(players["HOME"].values()) away_team.players = list(players["AWAY"].values()) anon_players = {"HOME": {}, "AWAY": {}} with performance_logging("Loading data", logger=logger): def _iter(): n = 0 sample = 1.0 / self.sample_rate for frame in raw_data: frame_period = frame["period"] if frame_period is not None: if n % sample == 0: yield frame n += 1 frames = [] n_frames = 0 for _frame in _iter(): # include frame if there is any tracking data, players or ball. # or if include_empty_frames == True if self.include_empty_frames or len(_frame["data"]) > 0: frame = self._get_frame_data( teams, teamdict, players, player_id_to_team_dict, periods, player_dict, anon_players, ball_id, referee_dict, _frame, ) frame = transformer.transform_frame(frame) frames.append(frame) n_frames += 1 if self.limit and n_frames >= self.limit: break self._set_skillcorner_attacking_directions(frames, periods) frame_rate = 10 orientation = (Orientation.HOME_TEAM if periods[1].attacking_direction == AttackingDirection.HOME_AWAY else Orientation.AWAY_TEAM) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=transformer.get_to_coordinate_system(). pitch_dimensions, score=Score( home=metadata["home_team_score"], away=metadata["away_team_score"], ), frame_rate=frame_rate, orientation=orientation, provider=Provider.SKILLCORNER, flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM), coordinate_system=transformer.get_to_coordinate_system(), ) return TrackingDataset( records=frames, metadata=metadata, )
def deserialize(self, inputs: SecondSpectrumInputs) -> TrackingDataset: metadata = None # Handles the XML metadata that contains the pitch dimensions and frame info with performance_logging("Loading XML metadata", logger=logger): # The meta data can also be in JSON format. In that case # it also contains the 'additional metadata'. # First do a 'peek' to determine the char first_byte = inputs.meta_data.read(1) if first_byte == b"{": metadata = json.loads(first_byte + inputs.meta_data.read()) frame_rate = int(metadata["fps"]) pitch_size_height = float(metadata["pitchLength"]) pitch_size_width = float(metadata["pitchWidth"]) periods = [] for period in metadata["periods"]: start_frame_id = int(period["startFrameIdx"]) end_frame_id = int(period["endFrameIdx"]) if start_frame_id != 0 or end_frame_id != 0: # Frame IDs are unix timestamps (in milliseconds) periods.append( Period( id=int(period["number"]), start_timestamp=start_frame_id, end_timestamp=end_frame_id, ) ) else: match = objectify.fromstring( first_byte + inputs.meta_data.read() ).match frame_rate = int(match.attrib["iFrameRateFps"]) pitch_size_height = float(match.attrib["fPitchYSizeMeters"]) pitch_size_width = float(match.attrib["fPitchXSizeMeters"]) periods = [] for period in match.iterchildren(tag="period"): start_frame_id = int(period.attrib["iStartFrame"]) end_frame_id = int(period.attrib["iEndFrame"]) if start_frame_id != 0 or end_frame_id != 0: # Frame IDs are unix timestamps (in milliseconds) periods.append( Period( id=int(period.attrib["iId"]), start_timestamp=start_frame_id, end_timestamp=end_frame_id, ) ) # Default team initialisation home_team = Team(team_id="home", name="home", ground=Ground.HOME) away_team = Team(team_id="away", name="away", ground=Ground.AWAY) teams = [home_team, away_team] if inputs.additional_meta_data or metadata: with performance_logging("Loading JSON metadata", logger=logger): try: if inputs.additional_meta_data: metadata = json.loads( inputs.additional_meta_data.read() ) home_team_id = metadata["homeOptaId"] away_team_id = metadata["awayOptaId"] # Tries to parse (short) team names from the description string try: home_name = ( metadata["description"].split("-")[0].strip() ) away_name = ( metadata["description"] .split("-")[1] .split(":")[0] .strip() ) except: home_name, away_name = "home", "away" teams[0].team_id = home_team_id teams[0].name = home_name teams[1].team_id = away_team_id teams[1].name = away_name for team, team_str in zip( teams, ["homePlayers", "awayPlayers"] ): for player_data in metadata[team_str]: # We use the attributes field of Player to store the extra IDs provided by the # metadata. We designate the player_id to be the 'optaId' field as this is what's # used as 'player_id' in the raw frame data file player_attributes = { k: v for k, v in player_data.items() if k in ["ssiId", "optaUuid"] } player = Player( player_id=player_data["optaId"], name=player_data["name"], starting=player_data["position"] != "SUB", position=player_data["position"], team=team, jersey_no=int(player_data["number"]), attributes=player_attributes, ) team.players.append(player) except: # TODO: More specific exception logging.warning( "Optional JSON Metadata is malformed. Continuing without" ) # Handles the tracking frame data with performance_logging("Loading data", logger=logger): transformer = self.get_transformer( length=pitch_size_width, width=pitch_size_height ) def _iter(): n = 0 sample = 1 / self.sample_rate for line_ in inputs.raw_data.readlines(): line_ = line_.strip().decode("ascii") if not line_: continue # Each line is just json so we just parse it frame_data = json.loads(line_) if self.only_alive and not frame_data["live"]: continue if n % sample == 0: yield frame_data n += 1 frames = [] for n, frame_data in enumerate(_iter()): period = periods[frame_data["period"] - 1] frame = self._frame_from_framedata(teams, period, frame_data) frame = transformer.transform_frame(frame) frames.append(frame) if not period.attacking_direction_set: period.set_attacking_direction( attacking_direction=attacking_direction_from_frame( frame ) ) if self.limit and n + 1 >= self.limit: break orientation = ( Orientation.FIXED_HOME_AWAY if periods[0].attacking_direction == AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME ) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=transformer.get_to_coordinate_system().pitch_dimensions, score=None, frame_rate=frame_rate, orientation=orientation, provider=Provider.SECONDSPECTRUM, flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE, coordinate_system=transformer.get_to_coordinate_system(), ) return TrackingDataset( records=frames, metadata=metadata, )
def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> TrackingDataset: """ Deserialize SkillCorner tracking data into a `TrackingDataset`. Parameters ---------- inputs : dict input `raw_data` should point to a `Readable` object containing the 'json' formatted raw data. input `metadata` should point to the json metadata data. options : dict Options for deserialization of the TRACAB file. Possible options are: `include_empty_frames` (boolean): default = False to specify whether frames without any players_coordinates or the ball_coordinates should be loaded `sample_rate` (float between 0 and 1) to specify the amount of frames that should be loaded and `limit` (int) to specify the max number of frames that will be returned. Returns ------- dataset : TrackingDataset Raises ------ - See Also -------- Examples -------- >>> serializer = SkillCornerSerializer() >>> with open("match_data.json", "rb") as meta, \ >>> open("structured_data.json", "rb") as raw: >>> dataset = serializer.deserialize( >>> inputs={ >>> 'metadata': meta, >>> 'raw_data': raw >>> }, >>> options={ >>> } >>> ) """ self.__validate_inputs(inputs) metadata = self.__load_json(inputs["metadata"]) raw_data = self.__load_json(inputs["raw_data"]) if not options: options = {} sample_rate = float(options.get("sample_rate", 1.0)) limit = int(options.get("limit", 0)) include_empty_frames = bool(options.get("include_empty_frames", False)) with performance_logging("Loading metadata", logger=logger): periods = self.__get_periods(raw_data) teamdict = { metadata["home_team"].get("id"): "home_team", metadata["away_team"].get("id"): "away_team", } player_id_to_team_dict = { player["trackable_object"]: player["team_id"] for player in metadata["players"] } player_dict = { player["trackable_object"]: player for player in metadata["players"] } referee_dict = { ref["trackable_object"]: "referee" for ref in metadata["referees"] } ball_id = metadata["ball"]["trackable_object"] # there are different pitch_sizes in SkillCorner pitch_size_width = metadata["pitch_width"] pitch_size_length = metadata["pitch_length"] home_team_id = metadata["home_team"]["id"] away_team_id = metadata["away_team"]["id"] players = {"HOME": {}, "AWAY": {}} home_team = Team( team_id=home_team_id, name=metadata["home_team"]["name"], ground=Ground.HOME, ) self.home_team = home_team away_team = Team( team_id=away_team_id, name=metadata["away_team"]["name"], ground=Ground.AWAY, ) self.away_team = away_team teams = [home_team, away_team] for player_id in player_dict.keys(): player = player_dict.get(player_id) team_id = player["team_id"] if team_id == home_team_id: team_string = "HOME" team = home_team elif team_id == away_team_id: team_string = "AWAY" team = away_team players[team_string][player_id] = Player( player_id=f"{team.ground}_{player['number']}", team=team, jersey_no=player["number"], name=f"{player['first_name']} {player['last_name']}", first_name=player["first_name"], last_name=player["last_name"], starting=player["start_time"] == "00:00:00", position=Position( position_id=player["player_role"].get("id"), name=player["player_role"].get("name"), coordinates=None, ), attributes={}, ) home_team.players = list(players["HOME"].values()) away_team.players = list(players["AWAY"].values()) anon_players = {"HOME": {}, "AWAY": {}} with performance_logging("Loading data", logger=logger): def _iter(): n = 0 sample = 1.0 / sample_rate for frame in raw_data: frame_period = frame["period"] if frame_period is not None: if n % sample == 0: yield frame n += 1 frames = [] n_frames = 0 for _frame in _iter(): # include frame if there is any tracking data, players or ball. # or if include_empty_frames == True if include_empty_frames or len(_frame["data"]) > 0: frame = self._get_frame_data( teams, teamdict, players, player_id_to_team_dict, periods, player_dict, anon_players, ball_id, referee_dict, _frame, ) frames.append(frame) n_frames += 1 if limit and n_frames >= limit: break self._set_skillcorner_attacking_directions(frames, periods) frame_rate = 10 orientation = (Orientation.HOME_TEAM if periods[1].attacking_direction == AttackingDirection.HOME_AWAY else Orientation.AWAY_TEAM) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=PitchDimensions( x_dim=Dimension(-(pitch_size_length / 2), (pitch_size_length / 2)), y_dim=Dimension(-(pitch_size_width / 2), (pitch_size_width / 2)), x_per_meter=1, y_per_meter=1, ), score=Score( home=metadata["home_team_score"], away=metadata["away_team_score"], ), frame_rate=frame_rate, orientation=orientation, provider=Provider.SKILLCORNER, flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM), ) return TrackingDataset( records=frames, metadata=metadata, )
def transform_dataset( cls, dataset: Dataset, to_pitch_dimensions: PitchDimensions = None, to_orientation: Orientation = None, to_coordinate_system: CoordinateSystem = None, ) -> Dataset: if to_pitch_dimensions and to_coordinate_system: raise ValueError( "You can't do both a PitchDimension and CoordinateSysetm on the same dataset transformation" ) if ( not to_pitch_dimensions and not to_orientation and not to_coordinate_system ): return dataset elif not to_orientation: to_orientation = dataset.metadata.orientation if to_orientation == Orientation.BALL_OWNING_TEAM: if not dataset.metadata.flags & DatasetFlag.BALL_OWNING_TEAM: raise ValueError( "Cannot transform to BALL_OWNING_TEAM orientation when dataset doesn't contain " "ball owning team data" ) if to_pitch_dimensions: transformer = cls( from_pitch_dimensions=dataset.metadata.pitch_dimensions, from_orientation=dataset.metadata.orientation, to_pitch_dimensions=to_pitch_dimensions, to_orientation=to_orientation, ) metadata = replace( dataset.metadata, pitch_dimensions=to_pitch_dimensions, orientation=to_orientation, ) elif to_coordinate_system: transformer = cls( from_coordinate_system=dataset.metadata.coordinate_system, from_orientation=dataset.metadata.orientation, to_coordinate_system=to_coordinate_system, to_orientation=to_orientation, ) metadata = replace( dataset.metadata, coordinate_system=to_coordinate_system, pitch_dimensions=to_coordinate_system.pitch_dimensions, orientation=to_orientation, ) else: transformer = cls( from_coordinate_system=dataset.metadata.coordinate_system, from_orientation=dataset.metadata.orientation, to_coordinate_system=dataset.metadata.coordinate_system, to_orientation=to_orientation, ) metadata = replace( dataset.metadata, orientation=to_orientation, ) if isinstance(dataset, TrackingDataset): frames = [ transformer.transform_frame(record) for record in dataset.records ] return TrackingDataset( metadata=metadata, records=frames, ) elif isinstance(dataset, EventDataset): events = [ transformer.transform_event(event) for event in dataset.records ] return EventDataset( metadata=metadata, records=events, ) else: raise KloppyError("Unknown Dataset type")
def transform_dataset(cls, dataset: DatasetType, to_pitch_dimensions: PitchDimensions = None, to_orientation: Orientation = None) -> DatasetType: if not to_pitch_dimensions and not to_orientation: return dataset elif not to_orientation: to_orientation = dataset.orientation elif not to_pitch_dimensions: to_pitch_dimensions = dataset.pitch_dimensions if to_orientation == Orientation.BALL_OWNING_TEAM: if not dataset.flags & DatasetFlag.BALL_OWNING_TEAM: raise ValueError( "Cannot transform to BALL_OWNING_TEAM orientation when dataset doesn't contain " "ball owning team data") transformer = cls(from_pitch_dimensions=dataset.pitch_dimensions, from_orientation=dataset.orientation, to_pitch_dimensions=to_pitch_dimensions, to_orientation=to_orientation) if isinstance(dataset, TrackingDataset): frames = list(map(transformer.transform_frame, dataset.records)) return TrackingDataset(flags=dataset.flags, frame_rate=dataset.frame_rate, periods=dataset.periods, pitch_dimensions=to_pitch_dimensions, orientation=to_orientation, records=frames) #elif isinstance(dataset, EventDataset): # raise Exception("EventDataset transformer not implemented yet") else: raise Exception("Unknown Dataset type")