예제 #1
0
def main():
    """
        This example shows the use of Statsbomb datasets, and how we can pass argument
        to the dataset loader.
    """
    logging.basicConfig(
        stream=sys.stdout,
        level=logging.INFO,
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")

    logger = logging.getLogger(__name__)

    dataset = datasets.load(
        "statsbomb", {"event_types": ["pass", "take_on", "carry", "shot"]})

    with performance_logging("transform", logger=logger):
        # convert to TRACAB coordinates
        dataset = transform(dataset,
                            to_orientation="FIXED_HOME_AWAY",
                            to_pitch_dimensions=[(-5500, 5500), (-3300, 3300)])

    with performance_logging("to pandas", logger=logger):
        dataframe = to_pandas(dataset)

    print(dataframe[:50].to_string())
예제 #2
0
    def test_read(self):
        base_dir = os.path.dirname(__file__)
        with open(f"{base_dir}/files/epts_meta.xml", "rb") as meta_data_fp:
            meta_data = load_meta_data(meta_data_fp)

        with open(f"{base_dir}/files/epts_raw.txt", "rb") as raw_data:
            iterator = read_raw_data(raw_data, meta_data)

            with performance_logging("load"):
                assert list(iterator)
예제 #3
0
    def deserialize(self,
                    inputs: Dict[str, Readable],
                    options: Dict = None) -> EventDataset:
        """
                Deserialize Opta event data into a `EventDataset`.

                Parameters
                ----------
                inputs : dict
                    input `f24_data` should point to a `Readable` object containing
                    the 'xml' formatted event data. input `f7_data` should point
                    to a `Readable` object containing the 'xml' formatted f7 data.
                options : dict
                    Options for deserialization of the Opta file. Possible options are
                    `event_types` (list of event types) to specify the event types that
                    should be returned. Valid types: "shot", "pass", "carry", "take_on" and
                    "generic". Generic is everything other than the first 4. Those events
                    are barely parsed. This type of event can be used to do the parsing
                    yourself.
                    Every event has a 'raw_event' attribute which contains the original
                    dictionary.
                Returns
                -------
                dataset : EventDataset
                Raises
                ------

                See Also
                --------

                Examples
                --------
                >>> serializer = OptaSerializer()
                >>> with open("123_f24.xml", "rb") as f24_data, \
                >>>      open("123_f7.xml", "rb") as f7_data:
                >>>
                >>>     dataset = serializer.deserialize(
                >>>         inputs={
                >>>             'f24_data': f24_data,
                >>>             'f7_data': f7_data
                >>>         },
                >>>         options={
                >>>             'event_types': ["pass", "take_on", "carry", "shot"]
                >>>         }
                >>>     )
                """
        self.__validate_inputs(inputs)
        if not options:
            options = {}

        with performance_logging("load data", logger=logger):
            f7_root = objectify.fromstring(inputs["f7_data"].read())
            f24_root = objectify.fromstring(inputs["f24_data"].read())

            wanted_event_types = [
                EventType[event_type.upper()]
                for event_type in options.get("event_types", [])
            ]

        with performance_logging("parse data", logger=logger):
            matchdata_path = objectify.ObjectPath(
                "SoccerFeed.SoccerDocument.MatchData")
            team_elms = list(
                matchdata_path.find(f7_root).iterchildren("TeamData"))

            away_player_map = {}
            home_player_map = {}
            home_team_id = None
            away_team_id = None
            for team_elm in team_elms:
                player_map = {
                    player_elm.attrib["PlayerRef"].lstrip("p"):
                    player_elm.attrib["ShirtNumber"]
                    for player_elm in team_elm.find(
                        "PlayerLineUp").iterchildren("MatchPlayer")
                }
                team_id = team_elm.attrib["TeamRef"].lstrip("t")

                if team_elm.attrib["Side"] == "Home":
                    home_player_map = player_map
                    home_team_id = team_id
                elif team_elm.attrib["Side"] == "Away":
                    away_player_map = player_map
                    away_team_id = team_id
                else:
                    raise Exception(f"Unknown side: {team_elm.attrib['Side']}")

            if not away_player_map or not home_player_map:
                raise Exception("LineUp incomplete")

            game_elm = f24_root.find("Game")
            periods = [
                Period(
                    id=1,
                    start_timestamp=None,
                    end_timestamp=None,
                ),
                Period(
                    id=2,
                    start_timestamp=None,
                    end_timestamp=None,
                ),
            ]
            events = []
            for event_elm in game_elm.iterchildren("Event"):
                event_id = event_elm.attrib["id"]
                type_id = int(event_elm.attrib["type_id"])
                timestamp = _parse_f24_datetime(event_elm.attrib["timestamp"])
                period_id = int(event_elm.attrib["period_id"])
                for period in periods:
                    if period.id == period_id:
                        break
                else:
                    logger.debug(
                        f"Skipping event {event_id} because period doesn't match {period_id}"
                    )
                    continue

                if type_id == EVENT_TYPE_START_PERIOD:
                    logger.debug(
                        f"Set start of period {period.id} to {timestamp}")
                    period.start_timestamp = timestamp
                elif type_id == EVENT_TYPE_END_PERIOD:
                    logger.debug(
                        f"Set end of period {period.id} to {timestamp}")
                    period.end_timestamp = timestamp
                else:
                    if not period.start_timestamp:
                        # not started yet
                        continue

                    if event_elm.attrib["team_id"] == home_team_id:
                        team = Team.HOME
                        current_team_map = home_player_map
                    elif event_elm.attrib["team_id"] == away_team_id:
                        team = Team.AWAY
                        current_team_map = away_player_map
                    else:
                        raise Exception(
                            f"Unknown team_id {event_elm.attrib['team_id']}")

                    x = float(event_elm.attrib["x"])
                    y = float(event_elm.attrib["y"])
                    outcome = int(event_elm.attrib["outcome"])
                    qualifiers = {
                        int(qualifier_elm.attrib["qualifier_id"]):
                        qualifier_elm.attrib.get("value")
                        for qualifier_elm in event_elm.iterchildren("Q")
                    }
                    player_jersey_no = None
                    if "player_id" in event_elm.attrib:
                        player_jersey_no = current_team_map[
                            event_elm.attrib["player_id"]]

                    generic_event_kwargs = dict(
                        # from DataRecord
                        period=period,
                        timestamp=timestamp - period.start_timestamp,
                        ball_owning_team=None,
                        ball_state=BallState.ALIVE,
                        # from Event
                        event_id=event_id,
                        team=team,
                        player_jersey_no=player_jersey_no,
                        position=Point(x=x, y=y),
                        raw_event=event_elm,
                    )

                    if type_id == EVENT_TYPE_PASS:
                        pass_event_kwargs = _parse_pass(qualifiers, outcome)
                        event = PassEvent(
                            **pass_event_kwargs,
                            **generic_event_kwargs,
                        )
                    elif type_id == EVENT_TYPE_OFFSIDE_PASS:
                        pass_event_kwargs = _parse_offside_pass()
                        event = PassEvent(
                            **pass_event_kwargs,
                            **generic_event_kwargs,
                        )
                    elif type_id == EVENT_TYPE_TAKE_ON:
                        take_on_event_kwargs = _parse_take_on(outcome)
                        event = TakeOnEvent(
                            **take_on_event_kwargs,
                            **generic_event_kwargs,
                        )
                    elif type_id in (
                            EVENT_TYPE_SHOT_MISS,
                            EVENT_TYPE_SHOT_POST,
                            EVENT_TYPE_SHOT_SAVED,
                            EVENT_TYPE_SHOT_GOAL,
                    ):
                        shot_event_kwargs = _parse_shot(
                            qualifiers,
                            type_id,
                            position=generic_event_kwargs["position"],
                        )
                        kwargs = {}
                        kwargs.update(generic_event_kwargs)
                        kwargs.update(shot_event_kwargs)
                        event = ShotEvent(**kwargs)
                    else:
                        event = GenericEvent(**generic_event_kwargs,
                                             result=None)

                    if (not wanted_event_types
                            or event.event_type in wanted_event_types):
                        events.append(event)

        return EventDataset(
            flags=DatasetFlag.BALL_OWNING_TEAM,
            orientation=Orientation.ACTION_EXECUTING_TEAM,
            pitch_dimensions=PitchDimensions(x_dim=Dimension(0, 100),
                                             y_dim=Dimension(0, 100)),
            periods=periods,
            records=events,
        )
예제 #4
0
    def deserialize(
        self, inputs: Dict[str, Readable], options: Dict = None
    ) -> EventDataset:
        """
                Deserialize StatsBomb event data into a `EventDataset`.

                Parameters
                ----------
                inputs : dict
                    input `event_data` should point to a `Readable` object containing
                    the 'json' formatted event data. input `lineup_data` should point
                    to a `Readable` object containing the 'json' formatted lineup data.
                options : dict
                    Options for deserialization of the StatsBomb file. Possible options are
                    `event_types` (list of event types) to specify the event types that
                    should be returned. Valid types: "shot", "pass", "carry", "take_on" and
                    "generic". Generic is everything other than the first 4. Those events
                    are barely parsed. This type of event can be used to do the parsing
                    yourself.
                    Every event has a 'raw_event' attribute which contains the original
                    dictionary.
                Returns
                -------
                dataset : EventDataset
                Raises
                ------

                See Also
                --------

                Examples
                --------
                >>> serializer = StatsBombSerializer()
                >>> with open("events/12312312.json", "rb") as event_data, \
                >>>      open("lineups/123123123.json", "rb") as lineup_data:
                >>>
                >>>     dataset = serializer.deserialize(
                >>>         inputs={
                >>>             'event_data': event_data,
                >>>             'lineup_data': lineup_data
                >>>         },
                >>>         options={
                >>>             'event_types': ["pass", "take_on", "carry", "shot"]
                >>>         }
                >>>     )
                """
        self.__validate_inputs(inputs)
        if not options:
            options = {}

        with performance_logging("load data", logger=logger):
            raw_events = json.load(inputs["event_data"])
            home_lineup, away_lineup = json.load(inputs["lineup_data"])
            (
                shot_fidelity_version,
                xy_fidelity_version,
            ) = _determine_xy_fidelity_versions(raw_events)
            logger.info(
                f"Determined Fidelity versions: shot v{shot_fidelity_version} / XY v{xy_fidelity_version}"
            )

        with performance_logging("parse data", logger=logger):

            home_team = Team(
                team_id=str(home_lineup["team_id"]),
                name=home_lineup["team_name"],
                ground=Ground.HOME,
            )
            home_team.players = [
                Player(
                    player_id=str(player["player_id"]),
                    team=home_team,
                    name=player["player_name"],
                    jersey_no=int(player["jersey_number"]),
                )
                for player in home_lineup["lineup"]
            ]

            away_team = Team(
                team_id=str(away_lineup["team_id"]),
                name=away_lineup["team_name"],
                ground=Ground.AWAY,
            )
            away_team.players = [
                Player(
                    player_id=str(player["player_id"]),
                    team=away_team,
                    name=player["player_name"],
                    jersey_no=int(player["jersey_number"]),
                )
                for player in away_lineup["lineup"]
            ]

            teams = [home_team, away_team]

            wanted_event_types = [
                EventType[event_type.upper()]
                for event_type in options.get("event_types", [])
            ]

            periods = []
            period = None
            events = []
            for raw_event in raw_events:
                if raw_event["team"]["id"] == home_lineup["team_id"]:
                    team = teams[0]
                elif raw_event["team"]["id"] == away_lineup["team_id"]:
                    team = teams[1]
                else:
                    raise Exception(
                        f"Unknown team_id {raw_event['team']['id']}"
                    )

                if (
                    raw_event["possession_team"]["id"]
                    == home_lineup["team_id"]
                ):
                    possession_team = teams[0]
                elif (
                    raw_event["possession_team"]["id"]
                    == away_lineup["team_id"]
                ):
                    possession_team = teams[1]
                else:
                    raise Exception(
                        f"Unknown possession_team_id: {raw_event['possession_team']}"
                    )

                timestamp = parse_str_ts(raw_event["timestamp"])
                period_id = int(raw_event["period"])
                if not period or period.id != period_id:
                    period = Period(
                        id=period_id,
                        start_timestamp=(
                            timestamp
                            if not period
                            # period = [start, end], add millisecond to prevent overlapping
                            else timestamp + period.end_timestamp + 0.001
                        ),
                        end_timestamp=None,
                    )
                    periods.append(period)
                else:
                    period.end_timestamp = period.start_timestamp + timestamp

                player = None
                if "player" in raw_event:
                    player = team.get_player_by_id(raw_event["player"]["id"])

                event_type = raw_event["type"]["id"]
                if event_type == SB_EVENT_TYPE_SHOT:
                    fidelity_version = shot_fidelity_version
                elif event_type in (
                    SB_EVENT_TYPE_CARRY,
                    SB_EVENT_TYPE_DRIBBLE,
                    SB_EVENT_TYPE_PASS,
                ):
                    fidelity_version = xy_fidelity_version
                else:
                    # TODO: Uh ohhhh.. don't know which one to pick
                    fidelity_version = xy_fidelity_version

                generic_event_kwargs = dict(
                    # from DataRecord
                    period=period,
                    timestamp=timestamp,
                    ball_owning_team=possession_team,
                    ball_state=BallState.ALIVE,
                    # from Event
                    event_id=raw_event["id"],
                    team=team,
                    player=player,
                    coordinates=(
                        _parse_coordinates(
                            raw_event.get("location"), fidelity_version
                        )
                        if "location" in raw_event
                        else None
                    ),
                    raw_event=raw_event,
                )

                if event_type == SB_EVENT_TYPE_PASS:
                    pass_event_kwargs = _parse_pass(
                        pass_dict=raw_event["pass"],
                        team=team,
                        fidelity_version=fidelity_version,
                    )

                    event = PassEvent(
                        # TODO: Consider moving this to _parse_pass
                        receive_timestamp=timestamp + raw_event["duration"],
                        **pass_event_kwargs,
                        **generic_event_kwargs,
                    )
                elif event_type == SB_EVENT_TYPE_SHOT:
                    shot_event_kwargs = _parse_shot(
                        shot_dict=raw_event["shot"]
                    )
                    event = ShotEvent(
                        **shot_event_kwargs, **generic_event_kwargs
                    )

                # For dribble and carry the definitions
                # are flipped between Statsbomb and kloppy
                elif event_type == SB_EVENT_TYPE_DRIBBLE:
                    take_on_event_kwargs = _parse_take_on(
                        take_on_dict=raw_event["dribble"]
                    )
                    event = TakeOnEvent(
                        **take_on_event_kwargs, **generic_event_kwargs
                    )
                elif event_type == SB_EVENT_TYPE_CARRY:
                    carry_event_kwargs = _parse_carry(
                        carry_dict=raw_event["carry"],
                        fidelity_version=fidelity_version,
                    )
                    event = CarryEvent(
                        # TODO: Consider moving this to _parse_carry
                        end_timestamp=timestamp + raw_event["duration"],
                        **carry_event_kwargs,
                        **generic_event_kwargs,
                    )
                else:
                    event = GenericEvent(
                        result=None,
                        event_name=raw_event["type"]["name"],
                        **generic_event_kwargs,
                    )

                if (
                    not wanted_event_types
                    or event.event_type in wanted_event_types
                ):
                    events.append(event)

        metadata = Metadata(
            teams=teams,
            periods=periods,
            pitch_dimensions=PitchDimensions(
                x_dim=Dimension(0, 120), y_dim=Dimension(0, 80)
            ),
            frame_rate=None,
            orientation=Orientation.ACTION_EXECUTING_TEAM,
            flags=DatasetFlag.BALL_OWNING_TEAM,
            score=None,
        )

        return EventDataset(metadata=metadata, records=events,)
예제 #5
0
    def deserialize(self,
                    inputs: Dict[str, Readable],
                    options: Dict = None) -> TrackingDataset:
        """
        Deserialize Metrica tracking data into a `TrackingDataset`.

        Parameters
        ----------
        inputs : dict
            input `raw_data_home` should point to a `Readable` object containing
            the 'csv' formatted raw data for the home team. input `raw_data_away` should point
            to a `Readable` object containing the 'csv' formatted raw data for the away team.
        options : dict
            Options for deserialization of the Metrica file. Possible options are
            `sample_rate` (float between 0 and 1) to specify the amount of
            frames that should be loaded, `limit` to specify the max number of
            frames that will be returned.
        Returns
        -------
        dataset : TrackingDataset
        Raises
        ------
        ValueError when both input files don't seem to belong to each other

        See Also
        --------

        Examples
        --------
        >>> serializer = MetricaTrackingSerializer()
        >>> with open("Sample_Game_1_RawTrackingData_Away_Team.csv", "rb") as raw_home, \
        >>>      open("Sample_Game_1_RawTrackingData_Home_Team.csv", "rb") as raw_away:
        >>>
        >>>     dataset = serializer.deserialize(
        >>>         inputs={
        >>>             'raw_data_home': raw_home,
        >>>             'raw_data_away': raw_away
        >>>         },
        >>>         options={
        >>>             'sample_rate': 1/12
        >>>         }
        >>>     )
        """
        self.__validate_inputs(inputs)
        if not options:
            options = {}

        sample_rate = float(options.get('sample_rate', 1.0))
        limit = int(options.get('limit', 0))

        # consider reading this from data
        frame_rate = 25

        with performance_logging("prepare", logger=logger):
            home_iterator = self.__create_iterator(inputs['raw_data_home'],
                                                   sample_rate, frame_rate)
            away_iterator = self.__create_iterator(inputs['raw_data_away'],
                                                   sample_rate, frame_rate)

            partial_frames = zip(home_iterator, away_iterator)

        with performance_logging("loading", logger=logger):
            frames = []
            periods = []

            partial_frame_type = self.__PartialFrame
            home_partial_frame: partial_frame_type
            away_partial_frame: partial_frame_type
            for n, (home_partial_frame,
                    away_partial_frame) in enumerate(partial_frames):
                self.__validate_partials(home_partial_frame,
                                         away_partial_frame)

                period: Period = home_partial_frame.period
                frame_id: int = home_partial_frame.frame_id

                frame = Frame(frame_id=frame_id,
                              timestamp=frame_id / frame_rate -
                              period.start_timestamp,
                              ball_position=home_partial_frame.ball_position,
                              home_team_player_positions=home_partial_frame.
                              player_positions,
                              away_team_player_positions=away_partial_frame.
                              player_positions,
                              period=period,
                              ball_state=None,
                              ball_owning_team=None)

                frames.append(frame)

                if not periods or period.id != periods[-1].id:
                    periods.append(period)

                if not period.attacking_direction_set:
                    period.set_attacking_direction(
                        attacking_direction=attacking_direction_from_frame(
                            frame))

                n += 1
                if limit and n >= limit:
                    break

        orientation = (
            Orientation.FIXED_HOME_AWAY if periods[0].attacking_direction
            == AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME)

        return TrackingDataset(
            flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM),
            frame_rate=frame_rate,
            orientation=orientation,
            pitch_dimensions=PitchDimensions(x_dim=Dimension(0, 1),
                                             y_dim=Dimension(0, 1)),
            periods=periods,
            records=frames)
예제 #6
0
    def deserialize(
        self, inputs: Dict[str, Readable], options: Dict = None
    ) -> TrackingDataset:
        """
        Deserialize EPTS tracking data into a `TrackingDataset`.

        Parameters
        ----------
        inputs : dict
            input `raw_data` should point to a `Readable` object containing
            the 'csv' formatted raw data. input `metadata` should point to
            the xml metadata data.
        options : dict
            Options for deserialization of the EPTS file. Possible options are
            `sample_rate` (float between 0 and 1) to specify the amount of
            frames that should be loaded, `limit` to specify the max number of
            frames that will be returned.
        Returns
        -------
        dataset : TrackingDataset
        Raises
        ------
        -

        See Also
        --------

        Examples
        --------
        >>> serializer = EPTSSerializer()
        >>> with open("metadata.xml", "rb") as meta, \
        >>>      open("raw.dat", "rb") as raw:
        >>>     dataset = serializer.deserialize(
        >>>         inputs={
        >>>             'metadata': meta,
        >>>             'raw_data': raw
        >>>         },
        >>>         options={
        >>>             'sample_rate': 1/12
        >>>         }
        >>>     )
        """
        self.__validate_inputs(inputs)

        if not options:
            options = {}

        sample_rate = float(options.get("sample_rate", 1.0))
        limit = int(options.get("limit", 0))

        with performance_logging("Loading metadata", logger=logger):
            metadata = load_metadata(inputs["metadata"])

        with performance_logging("Loading data", logger=logger):
            # assume they are sorted
            frames = [
                self._frame_from_row(row, metadata)
                for row in read_raw_data(
                    raw_data=inputs["raw_data"],
                    metadata=metadata,
                    sensor_ids=[
                        "position"
                    ],  # we don't care about other sensors
                    sample_rate=sample_rate,
                    limit=limit,
                )
            ]

        return TrackingDataset(records=frames, metadata=metadata)
예제 #7
0
    def deserialize(self,
                    inputs: Dict[str, Readable],
                    options: Dict = None) -> EventDataset:
        """
                Deserialize StatsBomb event data into a `EventDataset`.

                Parameters
                ----------
                inputs : dict
                    input `event_data` should point to a `Readable` object containing
                    the 'json' formatted event data. input `lineup_data` should point
                    to a `Readable` object containing the 'json' formatted lineup data.
                options : dict
                    Options for deserialization of the StatsBomb file. Possible options are
                    `event_types` (list of event types) to specify the event types that
                    should be returned. Valid types: "shot", "pass", "carry", "take_on" and
                    "generic". Generic is everything other than the first 4. Those events
                    are barely parsed. This type of event can be used to do the parsing
                    yourself.
                    Every event has a 'raw_event' attribute which contains the original
                    dictionary.
                Returns
                -------
                dataset : EventDataset
                Raises
                ------

                See Also
                --------

                Examples
                --------
                >>> serializer = StatsBombSerializer()
                >>> with open("events/12312312.json", "rb") as event_data, \
                >>>      open("lineups/123123123.json", "rb") as lineup_data:
                >>>
                >>>     dataset = serializer.deserialize(
                >>>         inputs={
                >>>             'event_data': event_data,
                >>>             'lineup_data': lineup_data
                >>>         },
                >>>         options={
                >>>             'event_types': ["pass", "take_on", "carry", "shot"]
                >>>         }
                >>>     )
                """
        self.__validate_inputs(inputs)
        if not options:
            options = {}

        with performance_logging("load data", logger=logger):
            raw_events = json.load(inputs['event_data'])
            home_lineup, away_lineup = json.load(inputs['lineup_data'])
            shot_fidelity_version, xy_fidelity_version = _determine_xy_fidelity_versions(
                raw_events)
            logger.info(
                f"Determined Fidelity versions: shot v{shot_fidelity_version} / XY v{xy_fidelity_version}"
            )

        with performance_logging("parse data", logger=logger):
            home_player_map = {
                player['player_id']: str(player['jersey_number'])
                for player in home_lineup['lineup']
            }
            away_player_map = {
                player['player_id']: str(player['jersey_number'])
                for player in away_lineup['lineup']
            }

            wanted_event_types = [
                EventType[event_type.upper()]
                for event_type in options.get('event_types', [])
            ]

            periods = []
            period = None
            events = []
            for raw_event in raw_events:
                if raw_event['team']['id'] == home_lineup['team_id']:
                    team = Team.HOME
                    current_team_map = home_player_map
                elif raw_event['team']['id'] == away_lineup['team_id']:
                    team = Team.AWAY
                    current_team_map = away_player_map
                else:
                    raise Exception(
                        f"Unknown team_id {raw_event['team']['id']}")

                if raw_event['possession_team']['id'] == home_lineup[
                        'team_id']:
                    possession_team = Team.HOME
                elif raw_event['possession_team']['id'] == away_lineup[
                        'team_id']:
                    possession_team = Team.AWAY
                else:
                    raise Exception(
                        f"Unknown possession_team_id: {raw_event['possession_team']}"
                    )

                timestamp = parse_str_ts(raw_event['timestamp'])
                period_id = int(raw_event['period'])
                if not period or period.id != period_id:
                    period = Period(id=period_id,
                                    start_timestamp=timestamp if not period
                                    else timestamp + period.end_timestamp,
                                    end_timestamp=None)
                    periods.append(period)
                else:
                    period.end_timestamp = period.start_timestamp + timestamp

                player_jersey_no = None
                if 'player' in raw_event:
                    player_jersey_no = current_team_map[raw_event['player']
                                                        ['id']]

                event_type = raw_event['type']['id']
                if event_type == SB_EVENT_TYPE_SHOT:
                    fidelity_version = shot_fidelity_version
                elif event_type in (SB_EVENT_TYPE_CARRY, SB_EVENT_TYPE_DRIBBLE,
                                    SB_EVENT_TYPE_PASS):
                    fidelity_version = xy_fidelity_version
                else:
                    # TODO: Uh ohhhh.. don't know which one to pick
                    fidelity_version = xy_fidelity_version

                generic_event_kwargs = dict(
                    # from DataRecord
                    period=period,
                    timestamp=timestamp,
                    ball_owning_team=possession_team,
                    ball_state=BallState.ALIVE,
                    # from Event
                    event_id=raw_event['id'],
                    team=team,
                    player_jersey_no=player_jersey_no,
                    position=(_parse_position(raw_event.get('location'),
                                              fidelity_version)
                              if 'location' in raw_event else None),
                    raw_event=raw_event)

                if event_type == SB_EVENT_TYPE_PASS:
                    pass_event_kwargs = _parse_pass(
                        pass_dict=raw_event['pass'],
                        current_team_map=current_team_map,
                        fidelity_version=fidelity_version)

                    event = PassEvent(
                        # TODO: Consider moving this to _parse_pass
                        receive_timestamp=timestamp + raw_event['duration'],
                        **pass_event_kwargs,
                        **generic_event_kwargs)
                elif event_type == SB_EVENT_TYPE_SHOT:
                    shot_event_kwargs = _parse_shot(
                        shot_dict=raw_event['shot'])
                    event = ShotEvent(**shot_event_kwargs,
                                      **generic_event_kwargs)

                # For dribble and carry the definitions
                # are flipped between Statsbomb and kloppy
                elif event_type == SB_EVENT_TYPE_DRIBBLE:
                    take_on_event_kwargs = _parse_take_on(
                        take_on_dict=raw_event['dribble'])
                    event = TakeOnEvent(**take_on_event_kwargs,
                                        **generic_event_kwargs)
                elif event_type == SB_EVENT_TYPE_CARRY:
                    carry_event_kwargs = _parse_carry(
                        carry_dict=raw_event['carry'],
                        fidelity_version=fidelity_version)
                    event = CarryEvent(
                        # TODO: Consider moving this to _parse_carry
                        end_timestamp=timestamp + raw_event['duration'],
                        **carry_event_kwargs,
                        **generic_event_kwargs)
                else:
                    event = GenericEvent(result=None, **generic_event_kwargs)

                if not wanted_event_types or event.event_type in wanted_event_types:
                    events.append(event)

        return EventDataset(flags=DatasetFlag.BALL_OWNING_TEAM,
                            orientation=Orientation.ACTION_EXECUTING_TEAM,
                            pitch_dimensions=PitchDimensions(
                                x_dim=Dimension(0, 120),
                                y_dim=Dimension(0, 80)),
                            periods=periods,
                            records=events)
예제 #8
0
파일: tracab.py 프로젝트: MLonSoccer/kloppy
    def deserialize(self,
                    inputs: Dict[str, Readable],
                    options: Dict = None) -> TrackingDataSet:
        """
        Deserialize TRACAB tracking data into a `TrackingDataSet`.

        Parameters
        ----------
        inputs : dict
            input `raw_data` should point to a `Readable` object containing
            the 'csv' formatted raw data. input `meta_data` should point to
            the xml metadata data.
        options : dict
            Options for deserialization of the TRACAB file. Possible options are
            `only_alive` (boolean) to specify that only frames with alive ball state
            should be loaded, or `sample_rate` (float between 0 and 1) to specify
            the amount of frames that should be loaded.
        Returns
        -------
        data_set : TrackingDataSet
        Raises
        ------
        -

        See Also
        --------

        Examples
        --------
        >>> serializer = TRACABSerializer()
        >>> with open("metadata.xml", "rb") as meta, \
        >>>      open("raw.dat", "rb") as raw:
        >>>     data_set = serializer.deserialize(
        >>>         inputs={
        >>>             'meta_data': meta,
        >>>             'raw_data': raw
        >>>         },
        >>>         options={
        >>>             'only_alive': True,
        >>>             'sample_rate': 1/12
        >>>         }
        >>>     )
        """
        self.__validate_inputs(inputs)

        if not options:
            options = {}

        sample_rate = float(options.get('sample_rate', 1.0))
        only_alive = bool(options.get('only_alive', True))

        with performance_logging("Loading metadata"):
            match = objectify.fromstring(inputs['meta_data'].read()).match
            frame_rate = int(match.attrib['iFrameRateFps'])
            pitch_size_width = float(match.attrib['fPitchXSizeMeters'])
            pitch_size_height = float(match.attrib['fPitchYSizeMeters'])

            periods = []
            for period in match.iterchildren(tag='period'):
                start_frame_id = int(period.attrib['iStartFrame'])
                end_frame_id = int(period.attrib['iEndFrame'])
                if start_frame_id != 0 or end_frame_id != 0:
                    periods.append(
                        Period(id=int(period.attrib['iId']),
                               start_timestamp=start_frame_id / frame_rate,
                               end_timestamp=end_frame_id / frame_rate))

        with performance_logging("Loading data"):

            def _iter():
                n = 0
                sample = 1. / sample_rate

                for line in inputs['raw_data'].readlines():
                    line = line.strip().decode("ascii")
                    if not line:
                        continue

                    frame_id = int(line[:10].split(":", 1)[0])
                    if only_alive and not line.endswith("Alive;:"):
                        continue

                    for period in periods:
                        if period.contains(frame_id / frame_rate):
                            if n % sample == 0:
                                yield period, line
                            n += 1

            frames = []
            for period, line in _iter():
                frame = self._frame_from_line(period, line, frame_rate)

                frames.append(frame)

                if not period.attacking_direction_set:
                    period.set_attacking_direction(
                        attacking_direction=attacking_direction_from_frame(
                            frame))

        orientation = (
            Orientation.FIXED_HOME_AWAY if periods[0].attacking_direction
            == AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME)

        return TrackingDataSet(flags=DataSetFlag.BALL_OWNING_TEAM
                               | DataSetFlag.BALL_STATE,
                               frame_rate=frame_rate,
                               orientation=orientation,
                               pitch_dimensions=PitchDimensions(
                                   x_dim=Dimension(-1 * pitch_size_width / 2,
                                                   pitch_size_width / 2),
                                   y_dim=Dimension(-1 * pitch_size_height / 2,
                                                   pitch_size_height / 2),
                                   x_per_meter=100,
                                   y_per_meter=100),
                               periods=periods,
                               records=frames)
예제 #9
0
    def deserialize(self,
                    inputs: Dict[str, Readable],
                    options: Dict = None) -> TrackingDataset:
        """
        Deserialize EPTS tracking data into a `TrackingDataset`.

        Parameters
        ----------
        inputs : dict
            input `raw_data` should point to a `Readable` object containing
            the 'csv' formatted raw data. input `meta_data` should point to
            the xml metadata data.
        options : dict
            Options for deserialization of the EPTS file. Possible options are
            `sample_rate` (float between 0 and 1) to specify the amount of
            frames that should be loaded, `limit` to specify the max number of
            frames that will be returned.
        Returns
        -------
        dataset : TrackingDataset
        Raises
        ------
        -

        See Also
        --------

        Examples
        --------
        >>> serializer = EPTSSerializer()
        >>> with open("metadata.xml", "rb") as meta, \
        >>>      open("raw.dat", "rb") as raw:
        >>>     dataset = serializer.deserialize(
        >>>         inputs={
        >>>             'meta_data': meta,
        >>>             'raw_data': raw
        >>>         },
        >>>         options={
        >>>             'sample_rate': 1/12
        >>>         }
        >>>     )
        """
        self.__validate_inputs(inputs)

        if not options:
            options = {}

        sample_rate = float(options.get("sample_rate", 1.0))
        limit = int(options.get("limit", 0))

        with performance_logging("Loading metadata", logger=logger):
            meta_data = load_meta_data(inputs["meta_data"])

        periods = meta_data.periods

        with performance_logging("Loading data", logger=logger):
            # assume they are sorted
            frames = [
                self._frame_from_row(row, meta_data) for row in read_raw_data(
                    raw_data=inputs["raw_data"],
                    meta_data=meta_data,
                    sensor_ids=["position"
                                ],  # we don't care about other sensors
                    sample_rate=sample_rate,
                    limit=limit,
                )
            ]

        if periods:
            start_attacking_direction = periods[0].attacking_direction
        elif frames:
            start_attacking_direction = attacking_direction_from_frame(
                frames[0])
        else:
            start_attacking_direction = None

        orientation = (
            (Orientation.FIXED_HOME_AWAY if start_attacking_direction ==
             AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME) if
            start_attacking_direction != AttackingDirection.NOT_SET else None)

        return TrackingDataset(
            flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM),
            frame_rate=meta_data.frame_rate,
            orientation=orientation,
            pitch_dimensions=meta_data.pitch_dimensions,
            periods=periods,
            records=frames,
        )
예제 #10
0
    def deserialize(self,
                    inputs: Dict[str, Readable],
                    options: Dict = None) -> EventDataset:
        """
                Deserialize Metrica Sports event data json format into a `EventDataset`.

                Parameters
                ----------
                inputs : dict
                    input `raw_data` should point to a `Readable` object containing
                    the 'json' formatted event data. input `metadata` should point
                    to a `Readable` object containing the `xml` metadata file.
                options : dict
                    Options for deserialization of the Metrica Sports event json file. 
                    Possible options are `event_types` (list of event types) to specify 
                    the event types that should be returned. Valid types: "shot", "pass", 
                    "carry", "take_on" and "generic". Generic is everything other than 
                    the first 4. Those events are barely parsed. This type of event can 
                    be used to do the parsing yourself.
                    Every event has a 'raw_event' attribute which contains the original
                    dictionary.
                Returns
                -------
                dataset : EventDataset
                Raises
                ------

                See Also
                --------

                Examples
                --------
                >>> serializer = MetricaEventsJsonSerializer()
                >>> with open("events.json", "rb") as raw_data, \
                >>>      open("metadata.xml", "rb") as metadata:
                >>>
                >>>     dataset = serializer.deserialize(
                >>>         inputs={
                >>>             'raw_data': raw_data,
                >>>             'metadata': metadata
                >>>         },
                >>>         options={
                >>>             'event_types': ["pass", "take_on", "carry", "shot"]
                >>>         }
                >>>     )
                """
        self.__validate_inputs(inputs)
        if not options:
            options = {}

        with performance_logging("load data", logger=logger):
            raw_events = json.load(inputs["raw_data"])
            metadata = load_metadata(inputs["metadata"],
                                     provider=Provider.METRICA)

        with performance_logging("parse data", logger=logger):

            wanted_event_types = [
                EventType[event_type.upper()]
                for event_type in options.get("event_types", [])
            ]

            events = []
            for raw_event in raw_events["data"]:
                if raw_event["team"]["id"] == metadata.teams[0].team_id:
                    team = metadata.teams[0]
                elif raw_event["team"]["id"] == metadata.teams[1].team_id:
                    team = metadata.teams[1]
                else:
                    raise Exception(
                        f"Unknown team_id {raw_event['team']['id']}")

                player = team.get_player_by_id(raw_event["from"]["id"])
                event_type = raw_event["type"]["id"]
                subtypes = _parse_subtypes(raw_event)
                period = [
                    period for period in metadata.periods
                    if period.id == raw_event["period"]
                ][0]

                generic_event_kwargs = dict(
                    # from DataRecord
                    period=period,
                    timestamp=raw_event["start"]["time"],
                    ball_owning_team=_parse_ball_owning_team(event_type, team),
                    ball_state=BallState.ALIVE,
                    # from Event
                    event_id=None,
                    team=team,
                    player=player,
                    coordinates=(_parse_coordinates(raw_event["start"])),
                    raw_event=raw_event,
                )

                if event_type in MS_PASS_TYPES:
                    pass_event_kwargs = _parse_pass(
                        event=raw_event,
                        subtypes=subtypes,
                        team=team,
                    )
                    event = PassEvent(
                        **pass_event_kwargs,
                        **generic_event_kwargs,
                    )

                elif event_type == MS_EVENT_TYPE_SHOT:
                    shot_event_kwargs = _parse_shot(event=raw_event,
                                                    subtypes=subtypes)
                    event = ShotEvent(**shot_event_kwargs,
                                      **generic_event_kwargs)

                elif subtypes and MS_EVENT_TYPE_DRIBBLE in subtypes:
                    take_on_event_kwargs = _parse_take_on(subtypes=subtypes)
                    event = TakeOnEvent(**take_on_event_kwargs,
                                        **generic_event_kwargs)
                elif event_type == MS_EVENT_TYPE_CARRY:
                    carry_event_kwargs = _parse_carry(event=raw_event, )
                    event = CarryEvent(
                        **carry_event_kwargs,
                        **generic_event_kwargs,
                    )
                else:
                    event = GenericEvent(
                        result=None,
                        event_name=raw_event["type"]["name"],
                        **generic_event_kwargs,
                    )

                if (not wanted_event_types
                        or event.event_type in wanted_event_types):
                    events.append(event)

        return EventDataset(
            metadata=metadata,
            records=events,
        )
예제 #11
0
    def deserialize(self,
                    inputs: Dict[str, Readable],
                    options: Dict = None) -> TrackingDataset:
        """
        Deserialize TRACAB tracking data into a `TrackingDataset`.

        Parameters
        ----------
        inputs : dict
            input `raw_data` should point to a `Readable` object containing
            the 'csv' formatted raw data. input `metadata` should point to
            the xml metadata data.
        options : dict
            Options for deserialization of the TRACAB file. Possible options are
            `only_alive` (boolean) to specify that only frames with alive ball state
            should be loaded, or `sample_rate` (float between 0 and 1) to specify
            the amount of frames that should be loaded, `limit` to specify the max number of
            frames that will be returned.
        Returns
        -------
        dataset : TrackingDataset
        Raises
        ------
        -

        See Also
        --------

        Examples
        --------
        >>> serializer = TRACABSerializer()
        >>> with open("metadata.xml", "rb") as meta, \
        >>>      open("raw.dat", "rb") as raw:
        >>>     dataset = serializer.deserialize(
        >>>         inputs={
        >>>             'metadata': meta,
        >>>             'raw_data': raw
        >>>         },
        >>>         options={
        >>>             'only_alive': True,
        >>>             'sample_rate': 1/12
        >>>         }
        >>>     )
        """
        self.__validate_inputs(inputs)

        if not options:
            options = {}

        sample_rate = float(options.get("sample_rate", 1.0))
        limit = int(options.get("limit", 0))
        only_alive = bool(options.get("only_alive", True))

        # TODO: also used in Metrica, extract to a method
        home_team = Team(team_id="home", name="home", ground=Ground.HOME)
        away_team = Team(team_id="away", name="away", ground=Ground.AWAY)
        teams = [home_team, away_team]

        with performance_logging("Loading metadata", logger=logger):
            match = objectify.fromstring(inputs["metadata"].read()).match
            frame_rate = int(match.attrib["iFrameRateFps"])
            pitch_size_width = float(match.attrib["fPitchXSizeMeters"])
            pitch_size_height = float(match.attrib["fPitchYSizeMeters"])

            periods = []
            for period in match.iterchildren(tag="period"):
                start_frame_id = int(period.attrib["iStartFrame"])
                end_frame_id = int(period.attrib["iEndFrame"])
                if start_frame_id != 0 or end_frame_id != 0:
                    periods.append(
                        Period(
                            id=int(period.attrib["iId"]),
                            start_timestamp=start_frame_id / frame_rate,
                            end_timestamp=end_frame_id / frame_rate,
                        ))

        with performance_logging("Loading data", logger=logger):

            def _iter():
                n = 0
                sample = 1.0 / sample_rate

                for line_ in inputs["raw_data"].readlines():
                    line_ = line_.strip().decode("ascii")
                    if not line_:
                        continue

                    frame_id = int(line_[:10].split(":", 1)[0])
                    if only_alive and not line_.endswith("Alive;:"):
                        continue

                    for period_ in periods:
                        if period_.contains(frame_id / frame_rate):
                            if n % sample == 0:
                                yield period_, line_
                            n += 1

            frames = []
            for n, (period, line) in enumerate(_iter()):
                frame = self._frame_from_line(teams, period, line, frame_rate)

                frames.append(frame)

                if not period.attacking_direction_set:
                    period.set_attacking_direction(
                        attacking_direction=attacking_direction_from_frame(
                            frame))

                if limit and n >= limit:
                    break

        orientation = (
            Orientation.FIXED_HOME_AWAY if periods[0].attacking_direction
            == AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME)

        metadata = Metadata(
            teams=teams,
            periods=periods,
            pitch_dimensions=PitchDimensions(
                x_dim=Dimension(-1 * pitch_size_width / 2,
                                pitch_size_width / 2),
                y_dim=Dimension(-1 * pitch_size_height / 2,
                                pitch_size_height / 2),
                x_per_meter=100,
                y_per_meter=100,
            ),
            score=None,
            frame_rate=frame_rate,
            orientation=orientation,
            provider=Provider.TRACAB,
            flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE,
        )

        return TrackingDataset(
            records=frames,
            metadata=metadata,
        )
예제 #12
0
def run_query(argv=sys.argv[1:]):
    parser = argparse.ArgumentParser(description="Run query on event data")
    parser.add_argument('--input-statsbomb', help="StatsBomb event input files (events.json,lineup.json)")
    parser.add_argument('--output-xml', help="Output file")
    parser.add_argument('--with-success', default=True, help="Input existence of success capture in output")
    parser.add_argument('--prepend-time', default=7, help="Seconds to prepend to match")
    parser.add_argument('--append-time', default=5, help="Seconds to append to match")
    parser.add_argument('--query-file', help="File containing the query", required=True)
    parser.add_argument('--stats', default="none", help="Show matches stats", choices=["text", "json", "none"])
    parser.add_argument('--show-events', default=False, help="Show events for each match", action="store_true")

    logger = logging.getLogger("run_query")
    logging.basicConfig(stream=sys.stderr, level=logging.INFO,
                        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")

    opts = parser.parse_args(argv)

    query = load_query(opts.query_file)

    dataset = None
    if opts.input_statsbomb:
        with performance_logging("load dataset", logger=logger):
            events_filename, lineup_filename = opts.input_statsbomb.split(",")
            dataset = load_statsbomb_event_data(
                events_filename.strip(),
                lineup_filename.strip(),
                options={
                    "event_types": query.event_types
                }
            )

    if not dataset:
        raise Exception("You have to specify a dataset.")

    with performance_logging("searching", logger=logger):
        matches = pm.search(dataset, query.pattern)

    video_fragments = []
    counter = Counter()
    for i, match in enumerate(matches):
        team = match.events[0].team
        success = 'success' in match.captures

        counter.update({
            f"{team}_total": 1,
            f"{team}_success": 1 if success else 0
        })

        if opts.show_events:
            print_match(i, match, success, str(team))

        if opts.output_xml:
            label = str(team)
            if opts.with_success and success:
                label += " success"

            start_timestamp = (
                match.events[0].timestamp +
                match.events[0].period.start_timestamp -
                opts.prepend_time
            )
            end_timestamp = (
                match.events[-1].timestamp +
                match.events[-1].period.start_timestamp +
                opts.append_time
            )

            video_fragments.append(
                VideoFragment(
                    id_=str(i),
                    start=start_timestamp,
                    end=end_timestamp,
                    label=label
                )
            )

    if opts.output_xml:
        write_to_xml(video_fragments, opts.output_xml)
        logger.info(f"Wrote {len(video_fragments)} video fragments to file")

    if opts.stats == "text":
        print("Home:")
        print(f"  total count: {counter['home_total']}")
        print(
            f"    success: {counter['home_success']} ({counter['home_success'] / counter['home_total'] * 100:.0f}%)")
        print(
            f"    no success: {counter['home_total'] - counter['home_success']} ({(counter['home_total'] - counter['home_success']) / counter['home_total'] * 100:.0f}%)")
        print("")
        print("Away:")
        print(f"  total count: {counter['away_total']}")
        print(
            f"    success: {counter['away_success']} ({counter['away_success'] / counter['away_total'] * 100:.0f}%)")
        print(
            f"    no success: {counter['away_total'] - counter['away_success']} ({(counter['away_total'] - counter['away_success']) / counter['away_total'] * 100:.0f}%)")
    elif opts.stats == "json":
        import json
        print(json.dumps(counter, indent=4))