Ejemplo n.º 1
0
 def test_find_all(self, dataset: EventDataset):
     """
     Test find_all allows simple 'css selector' (<event_type>.<result>)
     """
     goals = dataset.find_all("shot.goal")
     assert len(goals) == 3
     assert goals[0].prev("shot.goal") is None
     assert goals[0].next("shot.goal") == goals[1]
     assert goals[0].next("shot.goal") == goals[2].prev("shot.goal")
     assert goals[2].next("shot.goal") is None
Ejemplo n.º 2
0
    def test_filter(self, dataset: EventDataset):
        """
        Test filtering allows simple 'css selector' (<event_type>.<result>)
        """
        goals_dataset = dataset.filter("shot.goal")

        df = goals_dataset.to_pandas()
        assert df["event_id"].to_list() == [
            "4c7c4ab1-6b9f-4504-a237-249c2e0c549f",
            "683c6752-13bc-4892-94ed-22e1c938f1f7",
            "55d71847-9511-4417-aea9-6f415e279011",
        ]
Ejemplo n.º 3
0
    def test_navigation(self, dataset: EventDataset):
        """
        Test navigating (next/prev) through events
        """
        passes = dataset.find_all("pass")
        assert passes[0].next("pass") == passes[1]
        assert passes[1].prev("pass") == passes[0]
        assert passes[0].next() == dataset.get_event_by_id(
            "61da36dc-d862-416c-8ee3-1a0cd24dc086"
        )

        goals = dataset.find_all("shot.goal")
        assert len(goals) == 3
        assert goals[0].prev("shot.goal") is None
        assert goals[0].next("shot.goal") == goals[1]
        assert goals[0].next("shot.goal") == goals[2].prev("shot.goal")
        assert goals[2].next("shot.goal") is None

        first_goal = dataset.find("shot.goal")
        assert first_goal == goals[0]
        assert first_goal.next(".goal") == goals[1]
        assert first_goal.next(".goal").next(".goal") == goals[2]
        assert first_goal.next(".goal").next(".goal").next(".goal") is None
Ejemplo n.º 4
0
    def transform_dataset(
        cls,
        dataset: DatasetT,
        to_pitch_dimensions: PitchDimensions = None,
        to_orientation: Orientation = None,
    ) -> DatasetT:
        if not to_pitch_dimensions and not to_orientation:
            return dataset
        elif not to_orientation:
            to_orientation = dataset.metadata.orientation
        elif not to_pitch_dimensions:
            to_pitch_dimensions = dataset.metadata.pitch_dimensions

        if to_orientation == Orientation.BALL_OWNING_TEAM:
            if not dataset.metadata.flags & DatasetFlag.BALL_OWNING_TEAM:
                raise ValueError(
                    "Cannot transform to BALL_OWNING_TEAM orientation when dataset doesn't contain "
                    "ball owning team data"
                )

        transformer = cls(
            from_pitch_dimensions=dataset.metadata.pitch_dimensions,
            from_orientation=dataset.metadata.orientation,
            to_pitch_dimensions=to_pitch_dimensions,
            to_orientation=to_orientation,
        )
        metadata = replace(
            dataset.metadata,
            pitch_dimensions=to_pitch_dimensions,
            orientation=to_orientation,
        )
        if isinstance(dataset, TrackingDataset):
            frames = [
                transformer.transform_frame(record)
                for record in dataset.records
            ]

            return TrackingDataset(
                metadata=metadata,
                records=frames,
            )
        elif isinstance(dataset, EventDataset):
            events = list(map(transformer.transform_event, dataset.records))

            return EventDataset(
                metadata=metadata,
                records=events,
            )
        else:
            raise Exception("Unknown Dataset type")
Ejemplo n.º 5
0
    def test_string_columns(self, dataset: EventDataset):
        """
        When string columns passed to to_records the data must be searched from:
        1. the output of Default()
        2. attributes of the event
        """

        records = dataset.filter("pass").to_records("timestamp",
                                                    "coordinates_x",
                                                    "coordinates")
        assert records[0] == {
            "timestamp": 0.098,
            "coordinates_x": 60.5,
            "coordinates": Point(x=60.5, y=40.5),
        }
Ejemplo n.º 6
0
    def test_string_wildcard_columns(self, dataset: EventDataset):
        """
        Make sure it's possible to specify wildcard pattern to match attributes.
        """

        records = dataset.filter("pass").to_records(
            "timestamp",
            "player_id",
            "coordinates_*",
            DistanceToGoalTransformer(),
        )
        assert records[0] == {
            "timestamp": 0.098,
            "player_id": "6581",
            "coordinates_x": 60.5,
            "coordinates_y": 40.5,
            "distance_to_goal": 59.50210080324896,
        }
Ejemplo n.º 7
0
 def test_default_columns(self, dataset: EventDataset):
     records = dataset.to_records()
     assert len(records) == 4023
     assert list(records[0].keys()) == [
         "event_id",
         "event_type",
         "result",
         "success",
         "period_id",
         "timestamp",
         "end_timestamp",
         "ball_state",
         "ball_owning_team",
         "team_id",
         "player_id",
         "coordinates_x",
         "coordinates_y",
     ]
Ejemplo n.º 8
0
    def transform_dataset(cls,
                          dataset: DatasetType,
                          to_pitch_dimensions: PitchDimensions = None,
                          to_orientation: Orientation = None) -> DatasetType:
        if not to_pitch_dimensions and not to_orientation:
            return dataset
        elif not to_orientation:
            to_orientation = dataset.orientation
        elif not to_pitch_dimensions:
            to_pitch_dimensions = dataset.pitch_dimensions

        if to_orientation == Orientation.BALL_OWNING_TEAM:
            if not dataset.flags & DatasetFlag.BALL_OWNING_TEAM:
                raise ValueError(
                    "Cannot transform to BALL_OWNING_TEAM orientation when dataset doesn't contain "
                    "ball owning team data")

        transformer = cls(from_pitch_dimensions=dataset.pitch_dimensions,
                          from_orientation=dataset.orientation,
                          to_pitch_dimensions=to_pitch_dimensions,
                          to_orientation=to_orientation)
        if isinstance(dataset, TrackingDataset):
            frames = list(map(transformer.transform_frame, dataset.records))

            return TrackingDataset(flags=dataset.flags,
                                   frame_rate=dataset.frame_rate,
                                   periods=dataset.periods,
                                   pitch_dimensions=to_pitch_dimensions,
                                   orientation=to_orientation,
                                   records=frames)
        elif isinstance(dataset, EventDataset):
            events = list(map(transformer.transform_event, dataset.records))

            return EventDataset(flags=dataset.flags,
                                periods=dataset.periods,
                                pitch_dimensions=to_pitch_dimensions,
                                orientation=to_orientation,
                                records=events)
        else:
            raise Exception("Unknown Dataset type")
Ejemplo n.º 9
0
    def deserialize(self, inputs: MetricaJsonEventDataInputs) -> EventDataset:
        with performance_logging("load data", logger=logger):
            raw_events = json.load(inputs.event_data)
            metadata = load_metadata(inputs.meta_data,
                                     provider=Provider.METRICA)

            transformer = self.get_transformer(
                length=metadata.pitch_dimensions.length,
                width=metadata.pitch_dimensions.width,
            )

        with performance_logging("parse data", logger=logger):
            events = []
            for i, raw_event in enumerate(raw_events["data"]):

                if raw_event["team"]["id"] == metadata.teams[0].team_id:
                    team = metadata.teams[0]
                elif raw_event["team"]["id"] == metadata.teams[1].team_id:
                    team = metadata.teams[1]
                else:
                    raise DeserializationError(
                        f"Unknown team_id {raw_event['team']['id']}")

                player = team.get_player_by_id(raw_event["from"]["id"])
                event_type = raw_event["type"]["id"]
                subtypes = _parse_subtypes(raw_event)
                period = [
                    period for period in metadata.periods
                    if period.id == raw_event["period"]
                ][0]
                previous_event = raw_events["data"][i - 1]

                generic_event_kwargs = dict(
                    # from DataRecord
                    period=period,
                    timestamp=raw_event["start"]["time"],
                    ball_owning_team=_parse_ball_owning_team(event_type, team),
                    ball_state=BallState.ALIVE,
                    # from Event
                    event_id=None,
                    team=team,
                    player=player,
                    coordinates=(_parse_coordinates(raw_event["start"])),
                    raw_event=raw_event,
                )

                iteration_events = []

                if event_type in MS_PASS_TYPES:
                    pass_event_kwargs = _parse_pass(
                        event=raw_event,
                        previous_event=previous_event,
                        subtypes=subtypes,
                        team=team,
                    )

                    event = PassEvent.create(
                        **pass_event_kwargs,
                        **generic_event_kwargs,
                    )

                elif event_type == MS_EVENT_TYPE_SHOT:
                    shot_event_kwargs = _parse_shot(
                        event=raw_event,
                        previous_event=previous_event,
                        subtypes=subtypes,
                    )
                    event = ShotEvent.create(
                        **shot_event_kwargs,
                        **generic_event_kwargs,
                    )

                elif subtypes and MS_EVENT_TYPE_DRIBBLE in subtypes:
                    take_on_event_kwargs = _parse_take_on(subtypes=subtypes)
                    event = TakeOnEvent.create(
                        qualifiers=None,
                        **take_on_event_kwargs,
                        **generic_event_kwargs,
                    )

                elif event_type == MS_EVENT_TYPE_CARRY:
                    carry_event_kwargs = _parse_carry(event=raw_event, )
                    event = CarryEvent.create(
                        qualifiers=None,
                        **carry_event_kwargs,
                        **generic_event_kwargs,
                    )

                elif event_type == MS_EVENT_TYPE_RECOVERY:
                    event = RecoveryEvent.create(
                        result=None,
                        qualifiers=None,
                        **generic_event_kwargs,
                    )

                elif event_type == MS_EVENT_TYPE_FOUL_COMMITTED:
                    event = FoulCommittedEvent.create(
                        result=None,
                        qualifiers=None,
                        **generic_event_kwargs,
                    )

                else:
                    event = GenericEvent.create(
                        result=None,
                        qualifiers=None,
                        event_name=raw_event["type"]["name"],
                        **generic_event_kwargs,
                    )

                if self.should_include_event(event):
                    events.append(transformer.transform_event(event))

                # Checks if the event ended out of the field and adds a synthetic out event
                if event.result in OUT_EVENT_RESULTS:
                    generic_event_kwargs["ball_state"] = BallState.DEAD
                    if raw_event["end"]["x"]:
                        generic_event_kwargs[
                            "coordinates"] = _parse_coordinates(
                                raw_event["end"])
                        generic_event_kwargs["timestamp"] = raw_event["end"][
                            "time"]

                        event = BallOutEvent.create(
                            result=None,
                            qualifiers=None,
                            **generic_event_kwargs,
                        )

                        if self.should_include_event(event):
                            events.append(transformer.transform_event(event))

        return EventDataset(
            metadata=metadata,
            records=events,
        )
Ejemplo n.º 10
0
    def deserialize(self, inputs: OptaInputs) -> EventDataset:
        transformer = self.get_transformer(length=100, width=100)

        with performance_logging("load data", logger=logger):
            f7_root = objectify.fromstring(inputs.f7_data.read())
            f24_root = objectify.fromstring(inputs.f24_data.read())

        with performance_logging("parse data", logger=logger):
            matchdata_path = objectify.ObjectPath(
                "SoccerFeed.SoccerDocument.MatchData")
            team_elms = list(
                matchdata_path.find(f7_root).iterchildren("TeamData"))

            home_score = None
            away_score = None
            for team_elm in team_elms:
                if team_elm.attrib["Side"] == "Home":
                    home_score = team_elm.attrib["Score"]
                    home_team = _team_from_xml_elm(team_elm, f7_root)
                elif team_elm.attrib["Side"] == "Away":
                    away_score = team_elm.attrib["Score"]
                    away_team = _team_from_xml_elm(team_elm, f7_root)
                else:
                    raise DeserializationError(
                        f"Unknown side: {team_elm.attrib['Side']}")
            score = Score(home=home_score, away=away_score)
            teams = [home_team, away_team]

            if len(home_team.players) == 0 or len(away_team.players) == 0:
                raise DeserializationError("LineUp incomplete")

            game_elm = f24_root.find("Game")
            periods = [
                Period(
                    id=1,
                    start_timestamp=None,
                    end_timestamp=None,
                ),
                Period(
                    id=2,
                    start_timestamp=None,
                    end_timestamp=None,
                ),
            ]
            possession_team = None
            events = []
            for event_elm in game_elm.iterchildren("Event"):
                event_id = event_elm.attrib["id"]
                type_id = int(event_elm.attrib["type_id"])
                timestamp = _parse_f24_datetime(event_elm.attrib["timestamp"])
                period_id = int(event_elm.attrib["period_id"])
                for period in periods:
                    if period.id == period_id:
                        break
                else:
                    logger.debug(
                        f"Skipping event {event_id} because period doesn't match {period_id}"
                    )
                    continue

                if type_id == EVENT_TYPE_START_PERIOD:
                    logger.debug(
                        f"Set start of period {period.id} to {timestamp}")
                    period.start_timestamp = timestamp
                elif type_id == EVENT_TYPE_END_PERIOD:
                    logger.debug(
                        f"Set end of period {period.id} to {timestamp}")
                    period.end_timestamp = timestamp
                else:
                    if not period.start_timestamp:
                        # not started yet
                        continue

                    if event_elm.attrib["team_id"] == home_team.team_id:
                        team = teams[0]
                    elif event_elm.attrib["team_id"] == away_team.team_id:
                        team = teams[1]
                    else:
                        raise DeserializationError(
                            f"Unknown team_id {event_elm.attrib['team_id']}")

                    x = float(event_elm.attrib["x"])
                    y = float(event_elm.attrib["y"])
                    outcome = int(event_elm.attrib["outcome"])
                    raw_qualifiers = {
                        int(qualifier_elm.attrib["qualifier_id"]):
                        qualifier_elm.attrib.get("value")
                        for qualifier_elm in event_elm.iterchildren("Q")
                    }
                    player = None
                    if "player_id" in event_elm.attrib:
                        player = team.get_player_by_id(
                            event_elm.attrib["player_id"])

                    if type_id in BALL_OWNING_EVENTS:
                        possession_team = team

                    generic_event_kwargs = dict(
                        # from DataRecord
                        period=period,
                        timestamp=timestamp - period.start_timestamp,
                        ball_owning_team=possession_team,
                        ball_state=BallState.ALIVE,
                        # from Event
                        event_id=event_id,
                        team=team,
                        player=player,
                        coordinates=Point(x=x, y=y),
                        raw_event=event_elm,
                    )

                    if type_id == EVENT_TYPE_PASS:
                        pass_event_kwargs = _parse_pass(
                            raw_qualifiers, outcome)
                        event = PassEvent.create(
                            **pass_event_kwargs,
                            **generic_event_kwargs,
                        )
                    elif type_id == EVENT_TYPE_OFFSIDE_PASS:
                        pass_event_kwargs = _parse_offside_pass(raw_qualifiers)
                        event = PassEvent.create(
                            **pass_event_kwargs,
                            **generic_event_kwargs,
                        )
                    elif type_id == EVENT_TYPE_TAKE_ON:
                        take_on_event_kwargs = _parse_take_on(outcome)
                        event = TakeOnEvent.create(
                            qualifiers=None,
                            **take_on_event_kwargs,
                            **generic_event_kwargs,
                        )
                    elif type_id in (
                            EVENT_TYPE_SHOT_MISS,
                            EVENT_TYPE_SHOT_POST,
                            EVENT_TYPE_SHOT_SAVED,
                            EVENT_TYPE_SHOT_GOAL,
                    ):
                        if type_id == EVENT_TYPE_SHOT_GOAL:
                            if 374 in raw_qualifiers.keys():
                                generic_event_kwargs["timestamp"] = (
                                    _parse_f24_datetime(
                                        raw_qualifiers.get(374).replace(
                                            " ", "T")) -
                                    period.start_timestamp)
                        shot_event_kwargs = _parse_shot(
                            raw_qualifiers,
                            type_id,
                            coordinates=generic_event_kwargs["coordinates"],
                        )
                        kwargs = {}
                        kwargs.update(generic_event_kwargs)
                        kwargs.update(shot_event_kwargs)
                        event = ShotEvent.create(**kwargs)

                    elif type_id == EVENT_TYPE_RECOVERY:
                        event = RecoveryEvent.create(
                            result=None,
                            qualifiers=None,
                            **generic_event_kwargs,
                        )

                    elif type_id == EVENT_TYPE_FOUL_COMMITTED:
                        event = FoulCommittedEvent.create(
                            result=None,
                            qualifiers=None,
                            **generic_event_kwargs,
                        )

                    elif type_id in BALL_OUT_EVENTS:
                        generic_event_kwargs["ball_state"] = BallState.DEAD
                        event = BallOutEvent.create(
                            result=None,
                            qualifiers=None,
                            **generic_event_kwargs,
                        )

                    elif type_id == EVENT_TYPE_FORMATION_CHANGE:
                        formation_change_event_kwargs = (
                            _parse_formation_change(raw_qualifiers))
                        event = FormationChangeEvent.create(
                            result=None,
                            qualifiers=None,
                            **formation_change_event_kwargs,
                            **generic_event_kwargs,
                        )

                    elif type_id == EVENT_TYPE_CARD:
                        generic_event_kwargs["ball_state"] = BallState.DEAD
                        card_event_kwargs = _parse_card(raw_qualifiers)

                        event = CardEvent.create(
                            **card_event_kwargs,
                            **generic_event_kwargs,
                        )

                    else:
                        event = GenericEvent.create(
                            **generic_event_kwargs,
                            result=None,
                            qualifiers=None,
                            event_name=_get_event_type_name(type_id),
                        )

                    if self.should_include_event(event):
                        events.append(transformer.transform_event(event))

        metadata = Metadata(
            teams=teams,
            periods=periods,
            pitch_dimensions=transformer.get_to_coordinate_system().
            pitch_dimensions,
            score=score,
            frame_rate=None,
            orientation=Orientation.ACTION_EXECUTING_TEAM,
            flags=DatasetFlag.BALL_OWNING_TEAM,
            provider=Provider.OPTA,
            coordinate_system=transformer.get_to_coordinate_system(),
        )

        return EventDataset(
            metadata=metadata,
            records=events,
        )
Ejemplo n.º 11
0
    def deserialize(
        self, inputs: Dict[str, Readable], options: Dict = None
    ) -> EventDataset:
        """
                Deserialize StatsBomb event data into a `EventDataset`.

                Parameters
                ----------
                inputs : dict
                    input `event_data` should point to a `Readable` object containing
                    the 'json' formatted event data. input `lineup_data` should point
                    to a `Readable` object containing the 'json' formatted lineup data.
                options : dict
                    Options for deserialization of the StatsBomb file. Possible options are
                    `event_types` (list of event types) to specify the event types that
                    should be returned. Valid types: "shot", "pass", "carry", "take_on" and
                    "generic". Generic is everything other than the first 4. Those events
                    are barely parsed. This type of event can be used to do the parsing
                    yourself.
                    Every event has a 'raw_event' attribute which contains the original
                    dictionary.
                Returns
                -------
                dataset : EventDataset
                Raises
                ------

                See Also
                --------

                Examples
                --------
                >>> serializer = StatsBombSerializer()
                >>> with open("events/12312312.json", "rb") as event_data, \
                >>>      open("lineups/123123123.json", "rb") as lineup_data:
                >>>
                >>>     dataset = serializer.deserialize(
                >>>         inputs={
                >>>             'event_data': event_data,
                >>>             'lineup_data': lineup_data
                >>>         },
                >>>         options={
                >>>             'event_types': ["pass", "take_on", "carry", "shot"]
                >>>         }
                >>>     )
                """
        self.__validate_inputs(inputs)
        if not options:
            options = {}

        with performance_logging("load data", logger=logger):
            raw_events = json.load(inputs["event_data"])
            home_lineup, away_lineup = json.load(inputs["lineup_data"])
            (
                shot_fidelity_version,
                xy_fidelity_version,
            ) = _determine_xy_fidelity_versions(raw_events)
            logger.info(
                f"Determined Fidelity versions: shot v{shot_fidelity_version} / XY v{xy_fidelity_version}"
            )

        with performance_logging("parse data", logger=logger):

            home_team = Team(
                team_id=str(home_lineup["team_id"]),
                name=home_lineup["team_name"],
                ground=Ground.HOME,
            )
            home_team.players = [
                Player(
                    player_id=str(player["player_id"]),
                    team=home_team,
                    name=player["player_name"],
                    jersey_no=int(player["jersey_number"]),
                )
                for player in home_lineup["lineup"]
            ]

            away_team = Team(
                team_id=str(away_lineup["team_id"]),
                name=away_lineup["team_name"],
                ground=Ground.AWAY,
            )
            away_team.players = [
                Player(
                    player_id=str(player["player_id"]),
                    team=away_team,
                    name=player["player_name"],
                    jersey_no=int(player["jersey_number"]),
                )
                for player in away_lineup["lineup"]
            ]

            teams = [home_team, away_team]

            wanted_event_types = [
                EventType[event_type.upper()]
                for event_type in options.get("event_types", [])
            ]

            periods = []
            period = None
            events = []
            for raw_event in raw_events:
                if raw_event["team"]["id"] == home_lineup["team_id"]:
                    team = teams[0]
                elif raw_event["team"]["id"] == away_lineup["team_id"]:
                    team = teams[1]
                else:
                    raise Exception(
                        f"Unknown team_id {raw_event['team']['id']}"
                    )

                if (
                    raw_event["possession_team"]["id"]
                    == home_lineup["team_id"]
                ):
                    possession_team = teams[0]
                elif (
                    raw_event["possession_team"]["id"]
                    == away_lineup["team_id"]
                ):
                    possession_team = teams[1]
                else:
                    raise Exception(
                        f"Unknown possession_team_id: {raw_event['possession_team']}"
                    )

                timestamp = parse_str_ts(raw_event["timestamp"])
                period_id = int(raw_event["period"])
                if not period or period.id != period_id:
                    period = Period(
                        id=period_id,
                        start_timestamp=(
                            timestamp
                            if not period
                            # period = [start, end], add millisecond to prevent overlapping
                            else timestamp + period.end_timestamp + 0.001
                        ),
                        end_timestamp=None,
                    )
                    periods.append(period)
                else:
                    period.end_timestamp = period.start_timestamp + timestamp

                player = None
                if "player" in raw_event:
                    player = team.get_player_by_id(raw_event["player"]["id"])

                event_type = raw_event["type"]["id"]
                if event_type == SB_EVENT_TYPE_SHOT:
                    fidelity_version = shot_fidelity_version
                elif event_type in (
                    SB_EVENT_TYPE_CARRY,
                    SB_EVENT_TYPE_DRIBBLE,
                    SB_EVENT_TYPE_PASS,
                ):
                    fidelity_version = xy_fidelity_version
                else:
                    # TODO: Uh ohhhh.. don't know which one to pick
                    fidelity_version = xy_fidelity_version

                generic_event_kwargs = dict(
                    # from DataRecord
                    period=period,
                    timestamp=timestamp,
                    ball_owning_team=possession_team,
                    ball_state=BallState.ALIVE,
                    # from Event
                    event_id=raw_event["id"],
                    team=team,
                    player=player,
                    coordinates=(
                        _parse_coordinates(
                            raw_event.get("location"), fidelity_version
                        )
                        if "location" in raw_event
                        else None
                    ),
                    raw_event=raw_event,
                )

                if event_type == SB_EVENT_TYPE_PASS:
                    pass_event_kwargs = _parse_pass(
                        pass_dict=raw_event["pass"],
                        team=team,
                        fidelity_version=fidelity_version,
                    )

                    event = PassEvent(
                        # TODO: Consider moving this to _parse_pass
                        receive_timestamp=timestamp + raw_event["duration"],
                        **pass_event_kwargs,
                        **generic_event_kwargs,
                    )
                elif event_type == SB_EVENT_TYPE_SHOT:
                    shot_event_kwargs = _parse_shot(
                        shot_dict=raw_event["shot"]
                    )
                    event = ShotEvent(
                        **shot_event_kwargs, **generic_event_kwargs
                    )

                # For dribble and carry the definitions
                # are flipped between Statsbomb and kloppy
                elif event_type == SB_EVENT_TYPE_DRIBBLE:
                    take_on_event_kwargs = _parse_take_on(
                        take_on_dict=raw_event["dribble"]
                    )
                    event = TakeOnEvent(
                        **take_on_event_kwargs, **generic_event_kwargs
                    )
                elif event_type == SB_EVENT_TYPE_CARRY:
                    carry_event_kwargs = _parse_carry(
                        carry_dict=raw_event["carry"],
                        fidelity_version=fidelity_version,
                    )
                    event = CarryEvent(
                        # TODO: Consider moving this to _parse_carry
                        end_timestamp=timestamp + raw_event["duration"],
                        **carry_event_kwargs,
                        **generic_event_kwargs,
                    )
                else:
                    event = GenericEvent(
                        result=None,
                        event_name=raw_event["type"]["name"],
                        **generic_event_kwargs,
                    )

                if (
                    not wanted_event_types
                    or event.event_type in wanted_event_types
                ):
                    events.append(event)

        metadata = Metadata(
            teams=teams,
            periods=periods,
            pitch_dimensions=PitchDimensions(
                x_dim=Dimension(0, 120), y_dim=Dimension(0, 80)
            ),
            frame_rate=None,
            orientation=Orientation.ACTION_EXECUTING_TEAM,
            flags=DatasetFlag.BALL_OWNING_TEAM,
            score=None,
        )

        return EventDataset(metadata=metadata, records=events,)
Ejemplo n.º 12
0
    def deserialize(
        self, inputs: Dict[str, Readable], options: Dict = None
    ) -> EventDataset:
        WyscoutSerializer.__validate_inputs(inputs)

        if not options:
            options = {}

        wanted_event_types = [
            EventType[event_type.upper()]
            for event_type in options.get("event_types", [])
        ]

        with performance_logging("load data", logger=logger):
            raw_events = json.load(inputs["event_data"])

        periods = []

        with performance_logging("parse data", logger=logger):
            home_team_id, away_team_id = raw_events["teams"].keys()
            home_team = _parse_team(raw_events, home_team_id, Ground.HOME)
            away_team = _parse_team(raw_events, away_team_id, Ground.AWAY)
            teams = {home_team_id: home_team, away_team_id: away_team}
            players = dict(
                [
                    (wyId, _players_to_dict(team.players))
                    for wyId, team in teams.items()
                ]
            )

            events = []

            for idx, raw_event in enumerate(raw_events["events"]):
                next_event = None
                if (idx + 1) < len(raw_events["events"]):
                    next_event = raw_events["events"][idx + 1]

                team_id = str(raw_event["teamId"])
                player_id = str(raw_event["playerId"])

                if (
                    len(periods) == 0
                    or periods[-1].id != raw_event["matchPeriod"]
                ):
                    periods.append(
                        Period(
                            id=raw_event["matchPeriod"],
                            start_timestamp=0,
                            end_timestamp=0,
                        )
                    )

                generic_event_args = {
                    "event_id": raw_event["id"],
                    "raw_event": raw_event,
                    "coordinates": Point(
                        x=float(raw_event["positions"][0]["x"]),
                        y=float(raw_event["positions"][0]["y"]),
                    ),
                    "team": teams[team_id],
                    "player": players[team_id][player_id]
                    if player_id != INVALID_PLAYER
                    else None,
                    "ball_owning_team": None,
                    "ball_state": None,
                    "period": periods[-1],
                    "timestamp": raw_event["eventSec"],
                }

                event = None
                if raw_event["eventName"] == wyscout_events.SHOT.EVENT:
                    shot_event_args = _parse_shot(raw_event, next_event)
                    event = ShotEvent.create(
                        **shot_event_args, **generic_event_args
                    )
                elif raw_event["eventName"] == wyscout_events.PASS.EVENT:
                    pass_event_args = _parse_pass(raw_event, next_event)
                    event = PassEvent.create(
                        **pass_event_args, **generic_event_args
                    )
                elif raw_event["eventName"] == wyscout_events.FOUL.EVENT:
                    foul_event_args = _parse_foul(raw_event)
                    event = FoulCommittedEvent.create(
                        **foul_event_args, **generic_event_args
                    )
                    if any(
                        (_has_tag(raw_event, tag) for tag in wyscout_tags.CARD)
                    ):
                        card_event_args = _parse_card(raw_event)
                        event = CardEvent.create(
                            **card_event_args, **generic_event_args
                        )
                elif (
                    raw_event["eventName"] == wyscout_events.INTERRUPTION.EVENT
                ):
                    ball_out_event_args = _parse_ball_out(raw_event)
                    event = BallOutEvent.create(
                        **ball_out_event_args, **generic_event_args
                    )
                elif raw_event["eventName"] == wyscout_events.FREE_KICK.EVENT:
                    set_piece_event_args = _parse_set_piece(
                        raw_event, next_event
                    )
                    if (
                        raw_event["subEventName"]
                        in wyscout_events.FREE_KICK.PASS_TYPES
                    ):
                        event = PassEvent.create(
                            **set_piece_event_args, **generic_event_args
                        )
                    elif (
                        raw_event["subEventName"]
                        in wyscout_events.FREE_KICK.SHOT_TYPES
                    ):
                        event = ShotEvent.create(
                            **set_piece_event_args, **generic_event_args
                        )

                elif (
                    raw_event["eventName"]
                    == wyscout_events.OTHERS_ON_BALL.EVENT
                ):
                    recovery_event_args = _parse_recovery(raw_event)
                    event = RecoveryEvent.create(
                        **recovery_event_args, **generic_event_args
                    )
                elif raw_event["eventName"] == wyscout_events.DUEL.EVENT:
                    takeon_event_args = _parse_takeon(raw_event)
                    event = TakeOnEvent.create(
                        **takeon_event_args, **generic_event_args
                    )
                elif raw_event["eventName"] not in [
                    wyscout_events.SAVE.EVENT,
                    wyscout_events.OFFSIDE.EVENT,
                ]:
                    # The events SAVE and OFFSIDE are already merged with PASS and SHOT events
                    qualifiers = _generic_qualifiers(raw_event)
                    event = GenericEvent.create(
                        result=None,
                        qualifiers=qualifiers,
                        **generic_event_args
                    )

                if event and _include_event(event, wanted_event_types):
                    events.append(event)

        metadata = Metadata(
            teams=[home_team, away_team],
            periods=periods,
            pitch_dimensions=PitchDimensions(
                x_dim=Dimension(0, 100), y_dim=Dimension(0, 100)
            ),
            score=None,
            frame_rate=None,
            orientation=Orientation.BALL_OWNING_TEAM,
            flags=None,
            provider=Provider.WYSCOUT,
        )

        return EventDataset(metadata=metadata, records=events)
Ejemplo n.º 13
0
    def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> EventDataset:
        self.__validate_inputs(inputs)

        periods = []
        period = None

        events = []

        game_state = self.__GameState(
            ball_state=BallState.DEAD,
            ball_owning_team=None
        )

        reader = csv.DictReader(map(lambda x: x.decode('utf-8'), inputs['raw_data']))
        for event_id, record in enumerate(reader):
            event_type = event_type_map[record['Type']]
            subtypes = record['Subtype'].split('-')

            start_timestamp = float(record['Start Time [s]'])
            end_timestamp = float(record['End Time [s]'])

            period_id = int(record['Period'])
            if not period or period.id != period_id:
                period = Period(
                    id=period_id,
                    start_timestamp=start_timestamp,
                    end_timestamp=end_timestamp
                )
                periods.append(period)
            else:
                period.end_timestamp = end_timestamp

            if record['Team'] == 'Home':
                team = Team.HOME
            elif record['Team'] == 'Away':
                team = Team.AWAY
            else:
                raise ValueError(f'Unknown team: {record["team"]}')

            event_kwargs = dict(
                # From DataRecord:
                timestamp=start_timestamp,
                ball_owning_team=None,  ## todo
                ball_state=None,  # todo
                period=period,

                # From Event:
                event_id=event_id,
                team=team,
                end_timestamp=end_timestamp,
                player_jersey_no=record['From'][6:],
                position=Point(
                    x=float(record['Start X']),
                    y=1 - float(record['Start Y'])
                ) if record['Start X'] != 'NaN' else None,
            )

            secondary_position = None
            if record['End X'] != 'NaN':
                secondary_position = Point(
                    x=float(record['End X']),
                    y=1 - float(record['End Y'])
                )

            secondary_jersey_no = None
            if record['To']:
                secondary_jersey_no = record['To'][6:]

            event = None
            if event_type == EventType.SET_PIECE:
                set_piece, fk_attempt, retaken = \
                    build_subtypes(subtypes, [SetPiece, FKAttempt, Retaken])

                event = SetPieceEvent(
                    **event_kwargs
                )
            elif event_type == EventType.RECOVERY:
                interference1, interference2 = \
                    build_subtypes(subtypes, [Interference1, Interference2])

                event = RecoveryEvent(
                    **event_kwargs
                )
            elif event_type == EventType.PASS:
                body_part, attempt, deflection, offside = \
                    build_subtypes(subtypes, [BodyPart, Attempt, Deflection, Offside])

                event = PassEvent(
                    receiver_position=secondary_position,
                    receiver_player_jersey_no=secondary_jersey_no,
                    **event_kwargs
                )
            elif event_type == EventType.BALL_LOST:
                body_part, attempt, interference1, intervention, deflection, offside = \
                    build_subtypes(subtypes, [
                        BodyPart, Attempt, Interference1, Intervention,
                        Deflection, Offside
                    ])

                event = BallLossEvent(
                    **event_kwargs
                )
            elif event_type == EventType.BALL_OUT:
                body_part, attempt, intervention, deflection, offside, own_goal = \
                    build_subtypes(subtypes, [
                        BodyPart, Attempt, Intervention, Deflection,
                        Offside, OwnGoal
                    ])

                event = BallOutEvent(
                    **event_kwargs
                )
            elif event_type == EventType.SHOT:
                body_part, deflection, shot_direction, shot_result, offside = \
                    build_subtypes(subtypes, [
                        BodyPart, Deflection, ShotDirection,
                        ShotResult, Offside
                    ])

                event = ShotEvent(
                    shot_result=shot_result,
                    **event_kwargs
                )
            elif event_type == EventType.FAULT_RECEIVED:
                event = FaultReceivedEvent(
                    **event_kwargs
                )
            elif event_type == EventType.CHALLENGE:
                challenge, fault, challenge_result = \
                    build_subtypes(subtypes, [Challenge, Fault, ChallengeResult])

                event = ChallengeEvent(
                    **event_kwargs
                )
            elif event_type == EventType.CARD:
                card, = build_subtypes(subtypes, [Card])

                event = CardEvent(
                    **event_kwargs
                )
            else:
                raise NotImplementedError(f"EventType {event_type} not implemented")

            # We want to attach the game_state after the event to the event
            game_state = self.__reduce_game_state(
                event=event,
                game_state=game_state
            )

            event.ball_state = game_state.ball_state
            event.ball_owning_team = game_state.ball_owning_team

            events.append(event)

        orientation = (
            Orientation.FIXED_HOME_AWAY
            if periods[0].attacking_direction == AttackingDirection.HOME_AWAY else
            Orientation.FIXED_AWAY_HOME
        )

        return EventDataset(
            flags=DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM,
            orientation=orientation,
            pitch_dimensions=PitchDimensions(
                x_dim=Dimension(0, 1),
                y_dim=Dimension(0, 1)
            ),
            periods=periods,
            records=events
        )
Ejemplo n.º 14
0
    def transform_dataset(
        cls,
        dataset: Dataset,
        to_pitch_dimensions: PitchDimensions = None,
        to_orientation: Orientation = None,
        to_coordinate_system: CoordinateSystem = None,
    ) -> Dataset:

        if to_pitch_dimensions and to_coordinate_system:
            raise ValueError(
                "You can't do both a PitchDimension and CoordinateSysetm on the same dataset transformation"
            )

        if (
            not to_pitch_dimensions
            and not to_orientation
            and not to_coordinate_system
        ):
            return dataset
        elif not to_orientation:
            to_orientation = dataset.metadata.orientation

        if to_orientation == Orientation.BALL_OWNING_TEAM:
            if not dataset.metadata.flags & DatasetFlag.BALL_OWNING_TEAM:
                raise ValueError(
                    "Cannot transform to BALL_OWNING_TEAM orientation when dataset doesn't contain "
                    "ball owning team data"
                )

        if to_pitch_dimensions:

            transformer = cls(
                from_pitch_dimensions=dataset.metadata.pitch_dimensions,
                from_orientation=dataset.metadata.orientation,
                to_pitch_dimensions=to_pitch_dimensions,
                to_orientation=to_orientation,
            )
            metadata = replace(
                dataset.metadata,
                pitch_dimensions=to_pitch_dimensions,
                orientation=to_orientation,
            )

        elif to_coordinate_system:

            transformer = cls(
                from_coordinate_system=dataset.metadata.coordinate_system,
                from_orientation=dataset.metadata.orientation,
                to_coordinate_system=to_coordinate_system,
                to_orientation=to_orientation,
            )
            metadata = replace(
                dataset.metadata,
                coordinate_system=to_coordinate_system,
                pitch_dimensions=to_coordinate_system.pitch_dimensions,
                orientation=to_orientation,
            )

        else:

            transformer = cls(
                from_coordinate_system=dataset.metadata.coordinate_system,
                from_orientation=dataset.metadata.orientation,
                to_coordinate_system=dataset.metadata.coordinate_system,
                to_orientation=to_orientation,
            )
            metadata = replace(
                dataset.metadata,
                orientation=to_orientation,
            )

        if isinstance(dataset, TrackingDataset):
            frames = [
                transformer.transform_frame(record)
                for record in dataset.records
            ]

            return TrackingDataset(
                metadata=metadata,
                records=frames,
            )
        elif isinstance(dataset, EventDataset):
            events = [
                transformer.transform_event(event) for event in dataset.records
            ]

            return EventDataset(
                metadata=metadata,
                records=events,
            )
        else:
            raise KloppyError("Unknown Dataset type")
Ejemplo n.º 15
0
    def deserialize(self,
                    inputs: Dict[str, Readable],
                    options: Dict = None) -> EventDataset:
        """
                Deserialize Metrica Sports event data json format into a `EventDataset`.

                Parameters
                ----------
                inputs : dict
                    input `raw_data` should point to a `Readable` object containing
                    the 'json' formatted event data. input `metadata` should point
                    to a `Readable` object containing the `xml` metadata file.
                options : dict
                    Options for deserialization of the Metrica Sports event json file. 
                    Possible options are `event_types` (list of event types) to specify 
                    the event types that should be returned. Valid types: "shot", "pass", 
                    "carry", "take_on" and "generic". Generic is everything other than 
                    the first 4. Those events are barely parsed. This type of event can 
                    be used to do the parsing yourself.
                    Every event has a 'raw_event' attribute which contains the original
                    dictionary.
                Returns
                -------
                dataset : EventDataset
                Raises
                ------

                See Also
                --------

                Examples
                --------
                >>> serializer = MetricaEventsJsonSerializer()
                >>> with open("events.json", "rb") as raw_data, \
                >>>      open("metadata.xml", "rb") as metadata:
                >>>
                >>>     dataset = serializer.deserialize(
                >>>         inputs={
                >>>             'raw_data': raw_data,
                >>>             'metadata': metadata
                >>>         },
                >>>         options={
                >>>             'event_types': ["pass", "take_on", "carry", "shot"]
                >>>         }
                >>>     )
                """
        self.__validate_inputs(inputs)
        if not options:
            options = {}

        with performance_logging("load data", logger=logger):
            raw_events = json.load(inputs["raw_data"])
            metadata = load_metadata(inputs["metadata"],
                                     provider=Provider.METRICA)

        with performance_logging("parse data", logger=logger):

            wanted_event_types = [
                EventType[event_type.upper()]
                for event_type in options.get("event_types", [])
            ]

            events = []
            for raw_event in raw_events["data"]:
                if raw_event["team"]["id"] == metadata.teams[0].team_id:
                    team = metadata.teams[0]
                elif raw_event["team"]["id"] == metadata.teams[1].team_id:
                    team = metadata.teams[1]
                else:
                    raise Exception(
                        f"Unknown team_id {raw_event['team']['id']}")

                player = team.get_player_by_id(raw_event["from"]["id"])
                event_type = raw_event["type"]["id"]
                subtypes = _parse_subtypes(raw_event)
                period = [
                    period for period in metadata.periods
                    if period.id == raw_event["period"]
                ][0]

                generic_event_kwargs = dict(
                    # from DataRecord
                    period=period,
                    timestamp=raw_event["start"]["time"],
                    ball_owning_team=_parse_ball_owning_team(event_type, team),
                    ball_state=BallState.ALIVE,
                    # from Event
                    event_id=None,
                    team=team,
                    player=player,
                    coordinates=(_parse_coordinates(raw_event["start"])),
                    raw_event=raw_event,
                )

                if event_type in MS_PASS_TYPES:
                    pass_event_kwargs = _parse_pass(
                        event=raw_event,
                        subtypes=subtypes,
                        team=team,
                    )
                    event = PassEvent(
                        **pass_event_kwargs,
                        **generic_event_kwargs,
                    )

                elif event_type == MS_EVENT_TYPE_SHOT:
                    shot_event_kwargs = _parse_shot(event=raw_event,
                                                    subtypes=subtypes)
                    event = ShotEvent(**shot_event_kwargs,
                                      **generic_event_kwargs)

                elif subtypes and MS_EVENT_TYPE_DRIBBLE in subtypes:
                    take_on_event_kwargs = _parse_take_on(subtypes=subtypes)
                    event = TakeOnEvent(**take_on_event_kwargs,
                                        **generic_event_kwargs)
                elif event_type == MS_EVENT_TYPE_CARRY:
                    carry_event_kwargs = _parse_carry(event=raw_event, )
                    event = CarryEvent(
                        **carry_event_kwargs,
                        **generic_event_kwargs,
                    )
                else:
                    event = GenericEvent(
                        result=None,
                        event_name=raw_event["type"]["name"],
                        **generic_event_kwargs,
                    )

                if (not wanted_event_types
                        or event.event_type in wanted_event_types):
                    events.append(event)

        return EventDataset(
            metadata=metadata,
            records=events,
        )
Ejemplo n.º 16
0
    def deserialize(self, inputs: DatafactoryInputs) -> EventDataset:

        transformer = self.get_transformer(length=2, width=2)

        with performance_logging("load data", logger=logger):
            data = json.load(inputs.event_data)
            match = data["match"]
            score_data = data["scoreStatus"]
            incidences = data["incidences"]
            players_data = data["players"]
            teams_data = data["teams"]

        with performance_logging("parse data", logger=logger):
            teams = []
            scores = []
            team_ids = (
                (Ground.HOME, str(match["homeTeamId"])),
                (Ground.AWAY, str(match["awayTeamId"])),
            )
            for ground, team_id in team_ids:
                team = Team(
                    team_id=team_id,
                    name=teams_data[team_id]["name"],
                    ground=ground,
                )
                team.players = [
                    Player(
                        player_id=player_id,
                        team=team,
                        first_name=player["name"]["first"],
                        last_name=player["name"]["last"],
                        name=player["name"]["shortName"]
                        or player["name"]["nick"],
                        jersey_no=player["squadNo"],
                        starting=not player["substitute"],
                    ) for player_id, player in players_data.items()
                    if str(player["teamId"]) == team_id
                ]
                teams.append(team)
                scores.append(score_data.get(team_id, {}).get("score"))
            score = Score(home=scores[0], away=scores[1])

            # setup periods
            status = incidences.pop(DF_EVENT_CLASS_STATUS)
            # start timestamps are fixed
            start_ts = {1: 0, 2: 45 * 60, 3: 90 * 60, 4: 105 * 60, 5: 120 * 60}
            # check for end status updates to setup periods
            end_event_types = {
                DF_EVENT_TYPE_STATUS_MATCH_END,
                DF_EVENT_TYPE_STATUS_FIRST_HALF_END,
                DF_EVENT_TYPE_STATUS_SECOND_HALF_END,
                DF_EVENT_TYPE_STATUS_FIRST_EXTRA_END,
                DF_EVENT_TYPE_STATUS_SECOND_EXTRA_END,
            }
            periods = {}
            for status_update in status.values():
                if status_update["type"] not in end_event_types:
                    continue
                half = status_update["t"]["half"]
                end_ts = parse_str_ts(status_update)
                periods[half] = Period(
                    id=half,
                    start_timestamp=start_ts[half],
                    end_timestamp=end_ts,
                    attacking_direction=AttackingDirection.HOME_AWAY if half %
                    2 == 1 else AttackingDirection.AWAY_HOME,
                )

            # exclude goals, already listed as shots too
            incidences.pop(DF_EVENT_CLASS_GOALS)
            raw_events = [(k, e_id, e) for k in incidences
                          for e_id, e in incidences[k].items()]
            # sort events by timestamp, event_id
            raw_events.sort(key=lambda e: (
                e[2]["t"]["half"],
                e[2]["t"]["m"],
                e[2]["t"]["s"] or 0,
                e[1],
            ))

            home_team, away_team = teams
            events = []
            previous_event = next_event = None
            for i, (e_class, e_id, raw_event) in enumerate(raw_events):
                period = periods.get(raw_event["t"]["half"])
                if period is None:
                    # skip invalid event
                    continue

                timestamp = parse_str_ts(raw_event)
                if (previous_event is not None and
                        previous_event["t"]["half"] != raw_event["t"]["half"]):
                    previous_event = None
                next_event = (raw_events[i + 1][2]
                              if i + 1 < len(raw_events) else None)

                team, player = _get_team_and_player(raw_event, home_team,
                                                    away_team)

                event_base_kwargs = dict(
                    # from DataRecord
                    period=period,
                    timestamp=timestamp,
                    ball_owning_team=team,
                    ball_state=BallState.ALIVE,
                    # from Event
                    event_id=e_id,
                    team=team,
                    player=player,
                    coordinates=(_parse_coordinates(raw_event["coord"]["1"])
                                 if "coord" in raw_event else None),
                    raw_event=raw_event,
                    result=None,
                    qualifiers=None,
                )

                if e_class in DF_EVENT_CLASS_PASSES:
                    pass_event_kwargs = _parse_pass(
                        raw_event=raw_event,
                        team=team,
                        previous_event=previous_event,
                        next_event=next_event,
                    )
                    event_base_kwargs.update(pass_event_kwargs)
                    event = PassEvent.create(**event_base_kwargs)

                elif e_class == DF_EVENT_CLASS_SHOTS:
                    shot_event_kwargs = _parse_shot(
                        raw_event=raw_event,
                        previous_event=previous_event,
                    )
                    event_base_kwargs.update(shot_event_kwargs)
                    event = ShotEvent.create(**event_base_kwargs)

                elif e_class == DF_EVENT_CLASS_STEALINGS:
                    event = RecoveryEvent.create(**event_base_kwargs)

                elif e_class == DF_EVENT_CLASS_FOULS:
                    # NOTE: could use qualifiers? (hand, foul, penalty?)
                    # switch possession team
                    event_base_kwargs["ball_owning_team"] = (
                        home_team if team == away_team else away_team)
                    event = FoulCommittedEvent.create(**event_base_kwargs)

                elif e_class in DF_EVENT_CLASS_CARDS:
                    card_kwargs = _parse_card(raw_event=raw_event, )
                    event_base_kwargs.update(card_kwargs)
                    event = CardEvent.create(**event_base_kwargs)

                elif e_class == DF_EVENT_CLASS_SUBSTITUTIONS:
                    substitution_event_kwargs = _parse_substitution(
                        raw_event=raw_event, team=team)
                    event_base_kwargs.update(substitution_event_kwargs)
                    event = SubstitutionEvent.create(**event_base_kwargs)

                else:
                    # otherwise, a generic event
                    event = GenericEvent.create(
                        event_name=e_class,
                        **event_base_kwargs,
                    )

                # check if the event implies ball was out of the field and add a synthetic out event
                if raw_event["type"] in BALL_OUT_EVENTS:
                    ball_out_event = BallOutEvent.create(
                        # from DataRecord
                        period=period,
                        timestamp=timestamp,
                        ball_owning_team=team,
                        ball_state=BallState.DEAD,
                        # from Event
                        event_id=e_id,
                        team=team,
                        player=player,
                        coordinates=event.coordinates,
                        raw_event=raw_event,
                        result=None,
                        qualifiers=None,
                    )
                    if self.should_include_event(event):
                        events.append(
                            transformer.transform_event(ball_out_event))

                if self.should_include_event(event):
                    events.append(transformer.transform_event(event))

                # only consider as a previous_event a ball-in-play event
                if e_class not in (
                        DF_EVENT_CLASS_YELLOW_CARDS,
                        DF_EVENT_CLASS_RED_CARDS,
                        DF_EVENT_CLASS_SUBSTITUTIONS,
                        DF_EVENT_CLASS_PENALTY_SHOOTOUT,
                ):
                    previous_event = raw_event

        metadata = Metadata(
            teams=teams,
            periods=sorted(periods.values(), key=lambda p: p.id),
            pitch_dimensions=transformer.get_to_coordinate_system().
            pitch_dimensions,
            frame_rate=None,
            orientation=Orientation.HOME_TEAM,
            flags=DatasetFlag.BALL_OWNING_TEAM,
            score=score,
            provider=Provider.DATAFACTORY,
            coordinate_system=transformer.get_to_coordinate_system(),
        )

        return EventDataset(
            metadata=metadata,
            records=events,
        )
Ejemplo n.º 17
0
    def deserialize(self, inputs: StatsbombInputs) -> EventDataset:
        transformer = self.get_transformer(length=120, width=80)

        with performance_logging("load data", logger=logger):
            raw_events = json.load(inputs.event_data)
            home_lineup, away_lineup = json.load(inputs.lineup_data)
            (
                shot_fidelity_version,
                xy_fidelity_version,
            ) = _determine_xy_fidelity_versions(raw_events)
            logger.info(
                f"Determined Fidelity versions: shot v{shot_fidelity_version} / XY v{xy_fidelity_version}"
            )

        with performance_logging("parse data", logger=logger):
            starting_player_ids = {
                str(player["player"]["id"])
                for raw_event in raw_events
                if raw_event["type"]["id"] == SB_EVENT_TYPE_STARTING_XI
                for player in raw_event["tactics"]["lineup"]
            }

            starting_formations = {
                raw_event["team"]["id"]: FormationType("-".join(
                    list(str(raw_event["tactics"]["formation"]))))
                for raw_event in raw_events
                if raw_event["type"]["id"] == SB_EVENT_TYPE_STARTING_XI
            }

            home_team = Team(
                team_id=str(home_lineup["team_id"]),
                name=home_lineup["team_name"],
                ground=Ground.HOME,
                starting_formation=starting_formations[home_lineup["team_id"]],
            )
            home_team.players = [
                Player(
                    player_id=str(player["player_id"]),
                    team=home_team,
                    name=player["player_name"],
                    jersey_no=int(player["jersey_number"]),
                    starting=str(player["player_id"]) in starting_player_ids,
                ) for player in home_lineup["lineup"]
            ]

            away_team = Team(
                team_id=str(away_lineup["team_id"]),
                name=away_lineup["team_name"],
                ground=Ground.AWAY,
                starting_formation=starting_formations[away_lineup["team_id"]],
            )
            away_team.players = [
                Player(
                    player_id=str(player["player_id"]),
                    team=away_team,
                    name=player["player_name"],
                    jersey_no=int(player["jersey_number"]),
                    starting=str(player["player_id"]) in starting_player_ids,
                ) for player in away_lineup["lineup"]
            ]

            teams = [home_team, away_team]

            periods = []
            period = None
            events = []
            for raw_event in raw_events:
                if raw_event["team"]["id"] == home_lineup["team_id"]:
                    team = home_team
                elif raw_event["team"]["id"] == away_lineup["team_id"]:
                    team = away_team
                else:
                    raise DeserializationError(
                        f"Unknown team_id {raw_event['team']['id']}")

                if (raw_event["possession_team"]["id"] ==
                        home_lineup["team_id"]):
                    possession_team = home_team
                elif (raw_event["possession_team"]["id"] ==
                      away_lineup["team_id"]):
                    possession_team = away_team
                else:
                    raise DeserializationError(
                        f"Unknown possession_team_id: {raw_event['possession_team']}"
                    )

                timestamp = parse_str_ts(raw_event["timestamp"])
                period_id = int(raw_event["period"])
                if not period or period.id != period_id:
                    period = Period(
                        id=period_id,
                        start_timestamp=(
                            timestamp if not period
                            # period = [start, end], add millisecond to prevent overlapping
                            else timestamp + period.end_timestamp + 0.001),
                        end_timestamp=None,
                    )
                    periods.append(period)
                else:
                    period.end_timestamp = period.start_timestamp + timestamp

                player = None
                if "player" in raw_event:
                    player = team.get_player_by_id(raw_event["player"]["id"])

                event_type = raw_event["type"]["id"]
                if event_type == SB_EVENT_TYPE_SHOT:
                    fidelity_version = shot_fidelity_version
                elif event_type in (
                        SB_EVENT_TYPE_CARRY,
                        SB_EVENT_TYPE_DRIBBLE,
                        SB_EVENT_TYPE_PASS,
                ):
                    fidelity_version = xy_fidelity_version
                else:
                    # TODO: Uh ohhhh.. don't know which one to pick
                    fidelity_version = xy_fidelity_version

                generic_event_kwargs = {
                    # from DataRecord
                    "period":
                    period,
                    "timestamp":
                    timestamp,
                    "ball_owning_team":
                    possession_team,
                    "ball_state":
                    BallState.ALIVE,
                    # from Event
                    "event_id":
                    raw_event["id"],
                    "team":
                    team,
                    "player":
                    player,
                    "coordinates": (_parse_coordinates(
                        raw_event.get("location"),
                        fidelity_version,
                    ) if "location" in raw_event else None),
                    "related_event_ids":
                    raw_event.get("related_events", []),
                    "raw_event":
                    raw_event,
                }

                new_events = []
                if event_type == SB_EVENT_TYPE_PASS:
                    pass_event_kwargs = _parse_pass(
                        pass_dict=raw_event["pass"],
                        team=team,
                        fidelity_version=fidelity_version,
                    )
                    pass_event = PassEvent.create(
                        # TODO: Consider moving this to _parse_pass
                        receive_timestamp=timestamp + raw_event["duration"],
                        **pass_event_kwargs,
                        **generic_event_kwargs,
                    )
                    new_events.append(pass_event)
                elif event_type == SB_EVENT_TYPE_SHOT:
                    shot_event_kwargs = _parse_shot(
                        shot_dict=raw_event["shot"], )
                    shot_event = ShotEvent.create(
                        **shot_event_kwargs,
                        **generic_event_kwargs,
                    )
                    new_events.append(shot_event)

                # For dribble and carry the definitions
                # are flipped between Statsbomb and kloppy
                elif event_type == SB_EVENT_TYPE_DRIBBLE:
                    take_on_event_kwargs = _parse_take_on(
                        take_on_dict=raw_event["dribble"], )
                    take_on_event = TakeOnEvent.create(
                        qualifiers=None,
                        **take_on_event_kwargs,
                        **generic_event_kwargs,
                    )
                    new_events.append(take_on_event)
                elif event_type == SB_EVENT_TYPE_CARRY:
                    carry_event_kwargs = _parse_carry(
                        carry_dict=raw_event["carry"],
                        fidelity_version=fidelity_version,
                    )
                    carry_event = CarryEvent.create(
                        qualifiers=None,
                        # TODO: Consider moving this to _parse_carry
                        end_timestamp=timestamp + raw_event.get("duration", 0),
                        **carry_event_kwargs,
                        **generic_event_kwargs,
                    )
                    new_events.append(carry_event)

                # lineup affecting events
                elif event_type == SB_EVENT_TYPE_SUBSTITUTION:
                    substitution_event_kwargs = _parse_substitution(
                        substitution_dict=raw_event["substitution"],
                        team=team,
                    )
                    substitution_event = SubstitutionEvent.create(
                        result=None,
                        qualifiers=None,
                        **substitution_event_kwargs,
                        **generic_event_kwargs,
                    )
                    new_events.append(substitution_event)
                elif event_type == SB_EVENT_TYPE_BAD_BEHAVIOUR:
                    bad_behaviour_kwargs = _parse_bad_behaviour(
                        bad_behaviour_dict=raw_event.get("bad_behaviour",
                                                         {}), )
                    if "card" in bad_behaviour_kwargs:
                        card_kwargs = bad_behaviour_kwargs["card"]
                        card_event = CardEvent.create(
                            result=None,
                            qualifiers=None,
                            card_type=card_kwargs["card_type"],
                            **generic_event_kwargs,
                        )
                        new_events.append(card_event)
                elif event_type == SB_EVENT_TYPE_FOUL_COMMITTED:
                    foul_committed_kwargs = _parse_foul_committed(
                        foul_committed_dict=raw_event.get(
                            "foul_committed", {}), )
                    foul_committed_event = FoulCommittedEvent.create(
                        result=None,
                        qualifiers=None,
                        **generic_event_kwargs,
                    )
                    new_events.append(foul_committed_event)
                    if "card" in foul_committed_kwargs:
                        card_kwargs = foul_committed_kwargs["card"]
                        card_event = CardEvent.create(
                            result=None,
                            qualifiers=None,
                            card_type=card_kwargs["card_type"],
                            **generic_event_kwargs,
                        )
                        new_events.append(card_event)
                elif event_type == SB_EVENT_TYPE_PLAYER_ON:
                    player_on_event = PlayerOnEvent.create(
                        result=None,
                        qualifiers=None,
                        **generic_event_kwargs,
                    )
                    new_events.append(player_on_event)
                elif event_type == SB_EVENT_TYPE_PLAYER_OFF:
                    player_off_event = PlayerOffEvent.create(
                        result=None,
                        qualifiers=None,
                        **generic_event_kwargs,
                    )
                    new_events.append(player_off_event)

                elif event_type == SB_EVENT_TYPE_RECOVERY:
                    recovery_event = RecoveryEvent.create(
                        result=None,
                        qualifiers=None,
                        **generic_event_kwargs,
                    )
                    new_events.append(recovery_event)

                elif event_type == SB_EVENT_TYPE_FORMATION_CHANGE:
                    formation_change_event_kwargs = _parse_formation_change(
                        raw_event["tactics"]["formation"])
                    formation_change_event = FormationChangeEvent.create(
                        result=None,
                        qualifiers=None,
                        **formation_change_event_kwargs,
                        **generic_event_kwargs,
                    )
                    new_events.append(formation_change_event)
                # rest: generic
                else:
                    generic_event = GenericEvent.create(
                        result=None,
                        qualifiers=None,
                        event_name=raw_event["type"]["name"],
                        **generic_event_kwargs,
                    )
                    new_events.append(generic_event)

                for event in new_events:
                    if self.should_include_event(event):
                        transformed_event = transformer.transform_event(event)
                        events.append(transformed_event)

                    # Checks if the event ended out of the field and adds a synthetic out event
                    if event.result in OUT_EVENT_RESULTS:
                        generic_event_kwargs["ball_state"] = BallState.DEAD
                        if event.receiver_coordinates:
                            generic_event_kwargs[
                                "coordinates"] = event.receiver_coordinates

                            ball_out_event = BallOutEvent.create(
                                result=None,
                                qualifiers=None,
                                **generic_event_kwargs,
                            )

                            if self.should_include_event(ball_out_event):
                                transformed_ball_out_event = (
                                    transformer.transform_event(ball_out_event)
                                )
                                events.append(transformed_ball_out_event)

        metadata = Metadata(
            teams=teams,
            periods=periods,
            pitch_dimensions=transformer.get_to_coordinate_system().
            pitch_dimensions,
            frame_rate=None,
            orientation=Orientation.ACTION_EXECUTING_TEAM,
            flags=DatasetFlag.BALL_OWNING_TEAM,
            score=None,
            provider=Provider.STATSBOMB,
            coordinate_system=transformer.get_to_coordinate_system(),
        )

        return EventDataset(
            metadata=metadata,
            records=events,
        )
Ejemplo n.º 18
0
    def deserialize(self,
                    inputs: Dict[str, Readable],
                    options: Dict = None) -> EventDataset:
        """
                Deserialize StatsBomb event data into a `EventDataset`.

                Parameters
                ----------
                inputs : dict
                    input `event_data` should point to a `Readable` object containing
                    the 'json' formatted event data. input `lineup_data` should point
                    to a `Readable` object containing the 'json' formatted lineup data.
                options : dict
                    Options for deserialization of the StatsBomb file. Possible options are
                    `event_types` (list of event types) to specify the event types that
                    should be returned. Valid types: "shot", "pass", "carry", "take_on" and
                    "generic". Generic is everything other than the first 4. Those events
                    are barely parsed. This type of event can be used to do the parsing
                    yourself.
                    Every event has a 'raw_event' attribute which contains the original
                    dictionary.
                Returns
                -------
                dataset : EventDataset
                Raises
                ------

                See Also
                --------

                Examples
                --------
                >>> serializer = StatsBombSerializer()
                >>> with open("events/12312312.json", "rb") as event_data, \
                >>>      open("lineups/123123123.json", "rb") as lineup_data:
                >>>
                >>>     dataset = serializer.deserialize(
                >>>         inputs={
                >>>             'event_data': event_data,
                >>>             'lineup_data': lineup_data
                >>>         },
                >>>         options={
                >>>             'event_types': ["pass", "take_on", "carry", "shot"]
                >>>         }
                >>>     )
                """
        self.__validate_inputs(inputs)
        if not options:
            options = {}

        with performance_logging("load data", logger=logger):
            raw_events = json.load(inputs['event_data'])
            home_lineup, away_lineup = json.load(inputs['lineup_data'])
            shot_fidelity_version, xy_fidelity_version = _determine_xy_fidelity_versions(
                raw_events)
            logger.info(
                f"Determined Fidelity versions: shot v{shot_fidelity_version} / XY v{xy_fidelity_version}"
            )

        with performance_logging("parse data", logger=logger):
            home_player_map = {
                player['player_id']: str(player['jersey_number'])
                for player in home_lineup['lineup']
            }
            away_player_map = {
                player['player_id']: str(player['jersey_number'])
                for player in away_lineup['lineup']
            }

            wanted_event_types = [
                EventType[event_type.upper()]
                for event_type in options.get('event_types', [])
            ]

            periods = []
            period = None
            events = []
            for raw_event in raw_events:
                if raw_event['team']['id'] == home_lineup['team_id']:
                    team = Team.HOME
                    current_team_map = home_player_map
                elif raw_event['team']['id'] == away_lineup['team_id']:
                    team = Team.AWAY
                    current_team_map = away_player_map
                else:
                    raise Exception(
                        f"Unknown team_id {raw_event['team']['id']}")

                if raw_event['possession_team']['id'] == home_lineup[
                        'team_id']:
                    possession_team = Team.HOME
                elif raw_event['possession_team']['id'] == away_lineup[
                        'team_id']:
                    possession_team = Team.AWAY
                else:
                    raise Exception(
                        f"Unknown possession_team_id: {raw_event['possession_team']}"
                    )

                timestamp = parse_str_ts(raw_event['timestamp'])
                period_id = int(raw_event['period'])
                if not period or period.id != period_id:
                    period = Period(id=period_id,
                                    start_timestamp=timestamp if not period
                                    else timestamp + period.end_timestamp,
                                    end_timestamp=None)
                    periods.append(period)
                else:
                    period.end_timestamp = period.start_timestamp + timestamp

                player_jersey_no = None
                if 'player' in raw_event:
                    player_jersey_no = current_team_map[raw_event['player']
                                                        ['id']]

                event_type = raw_event['type']['id']
                if event_type == SB_EVENT_TYPE_SHOT:
                    fidelity_version = shot_fidelity_version
                elif event_type in (SB_EVENT_TYPE_CARRY, SB_EVENT_TYPE_DRIBBLE,
                                    SB_EVENT_TYPE_PASS):
                    fidelity_version = xy_fidelity_version
                else:
                    # TODO: Uh ohhhh.. don't know which one to pick
                    fidelity_version = xy_fidelity_version

                generic_event_kwargs = dict(
                    # from DataRecord
                    period=period,
                    timestamp=timestamp,
                    ball_owning_team=possession_team,
                    ball_state=BallState.ALIVE,
                    # from Event
                    event_id=raw_event['id'],
                    team=team,
                    player_jersey_no=player_jersey_no,
                    position=(_parse_position(raw_event.get('location'),
                                              fidelity_version)
                              if 'location' in raw_event else None),
                    raw_event=raw_event)

                if event_type == SB_EVENT_TYPE_PASS:
                    pass_event_kwargs = _parse_pass(
                        pass_dict=raw_event['pass'],
                        current_team_map=current_team_map,
                        fidelity_version=fidelity_version)

                    event = PassEvent(
                        # TODO: Consider moving this to _parse_pass
                        receive_timestamp=timestamp + raw_event['duration'],
                        **pass_event_kwargs,
                        **generic_event_kwargs)
                elif event_type == SB_EVENT_TYPE_SHOT:
                    shot_event_kwargs = _parse_shot(
                        shot_dict=raw_event['shot'])
                    event = ShotEvent(**shot_event_kwargs,
                                      **generic_event_kwargs)

                # For dribble and carry the definitions
                # are flipped between Statsbomb and kloppy
                elif event_type == SB_EVENT_TYPE_DRIBBLE:
                    take_on_event_kwargs = _parse_take_on(
                        take_on_dict=raw_event['dribble'])
                    event = TakeOnEvent(**take_on_event_kwargs,
                                        **generic_event_kwargs)
                elif event_type == SB_EVENT_TYPE_CARRY:
                    carry_event_kwargs = _parse_carry(
                        carry_dict=raw_event['carry'],
                        fidelity_version=fidelity_version)
                    event = CarryEvent(
                        # TODO: Consider moving this to _parse_carry
                        end_timestamp=timestamp + raw_event['duration'],
                        **carry_event_kwargs,
                        **generic_event_kwargs)
                else:
                    event = GenericEvent(result=None, **generic_event_kwargs)

                if not wanted_event_types or event.event_type in wanted_event_types:
                    events.append(event)

        return EventDataset(flags=DatasetFlag.BALL_OWNING_TEAM,
                            orientation=Orientation.ACTION_EXECUTING_TEAM,
                            pitch_dimensions=PitchDimensions(
                                x_dim=Dimension(0, 120),
                                y_dim=Dimension(0, 80)),
                            periods=periods,
                            records=events)
Ejemplo n.º 19
0
    def deserialize(self,
                    inputs: Dict[str, Readable],
                    options: Dict = None) -> EventDataset:
        """
                Deserialize Opta event data into a `EventDataset`.

                Parameters
                ----------
                inputs : dict
                    input `f24_data` should point to a `Readable` object containing
                    the 'xml' formatted event data. input `f7_data` should point
                    to a `Readable` object containing the 'xml' formatted f7 data.
                options : dict
                    Options for deserialization of the Opta file. Possible options are
                    `event_types` (list of event types) to specify the event types that
                    should be returned. Valid types: "shot", "pass", "carry", "take_on" and
                    "generic". Generic is everything other than the first 4. Those events
                    are barely parsed. This type of event can be used to do the parsing
                    yourself.
                    Every event has a 'raw_event' attribute which contains the original
                    dictionary.
                Returns
                -------
                dataset : EventDataset
                Raises
                ------

                See Also
                --------

                Examples
                --------
                >>> serializer = OptaSerializer()
                >>> with open("123_f24.xml", "rb") as f24_data, \
                >>>      open("123_f7.xml", "rb") as f7_data:
                >>>
                >>>     dataset = serializer.deserialize(
                >>>         inputs={
                >>>             'f24_data': f24_data,
                >>>             'f7_data': f7_data
                >>>         },
                >>>         options={
                >>>             'event_types': ["pass", "take_on", "carry", "shot"]
                >>>         }
                >>>     )
                """
        self.__validate_inputs(inputs)
        if not options:
            options = {}

        with performance_logging("load data", logger=logger):
            f7_root = objectify.fromstring(inputs["f7_data"].read())
            f24_root = objectify.fromstring(inputs["f24_data"].read())

            wanted_event_types = [
                EventType[event_type.upper()]
                for event_type in options.get("event_types", [])
            ]

        with performance_logging("parse data", logger=logger):
            matchdata_path = objectify.ObjectPath(
                "SoccerFeed.SoccerDocument.MatchData")
            team_elms = list(
                matchdata_path.find(f7_root).iterchildren("TeamData"))

            home_score = None
            away_score = None
            for team_elm in team_elms:
                if team_elm.attrib["Side"] == "Home":
                    home_score = team_elm.attrib["Score"]
                    home_team = _team_from_xml_elm(team_elm, f7_root)
                elif team_elm.attrib["Side"] == "Away":
                    away_score = team_elm.attrib["Score"]
                    away_team = _team_from_xml_elm(team_elm, f7_root)
                else:
                    raise Exception(f"Unknown side: {team_elm.attrib['Side']}")

            score = Score(home=home_score, away=away_score)
            teams = [home_team, away_team]

            if len(home_team.players) == 0 or len(away_team.players) == 0:
                raise Exception("LineUp incomplete")

            game_elm = f24_root.find("Game")
            periods = [
                Period(
                    id=1,
                    start_timestamp=None,
                    end_timestamp=None,
                ),
                Period(
                    id=2,
                    start_timestamp=None,
                    end_timestamp=None,
                ),
            ]
            possession_team = None
            events = []
            for event_elm in game_elm.iterchildren("Event"):
                event_id = event_elm.attrib["id"]
                type_id = int(event_elm.attrib["type_id"])
                timestamp = _parse_f24_datetime(event_elm.attrib["timestamp"])
                period_id = int(event_elm.attrib["period_id"])
                for period in periods:
                    if period.id == period_id:
                        break
                else:
                    logger.debug(
                        f"Skipping event {event_id} because period doesn't match {period_id}"
                    )
                    continue

                if type_id == EVENT_TYPE_START_PERIOD:
                    logger.debug(
                        f"Set start of period {period.id} to {timestamp}")
                    period.start_timestamp = timestamp
                elif type_id == EVENT_TYPE_END_PERIOD:
                    logger.debug(
                        f"Set end of period {period.id} to {timestamp}")
                    period.end_timestamp = timestamp
                else:
                    if not period.start_timestamp:
                        # not started yet
                        continue

                    if event_elm.attrib["team_id"] == home_team.team_id:
                        team = teams[0]
                    elif event_elm.attrib["team_id"] == away_team.team_id:
                        team = teams[1]
                    else:
                        raise Exception(
                            f"Unknown team_id {event_elm.attrib['team_id']}")

                    x = float(event_elm.attrib["x"])
                    y = float(event_elm.attrib["y"])
                    outcome = int(event_elm.attrib["outcome"])
                    raw_qualifiers = {
                        int(qualifier_elm.attrib["qualifier_id"]):
                        qualifier_elm.attrib.get("value")
                        for qualifier_elm in event_elm.iterchildren("Q")
                    }
                    player = None
                    if "player_id" in event_elm.attrib:
                        player = team.get_player_by_id(
                            event_elm.attrib["player_id"])

                    if type_id in BALL_OWNING_EVENTS:
                        possession_team = team

                    generic_event_kwargs = dict(
                        # from DataRecord
                        period=period,
                        timestamp=timestamp - period.start_timestamp,
                        ball_owning_team=possession_team,
                        ball_state=BallState.ALIVE,
                        # from Event
                        event_id=event_id,
                        team=team,
                        player=player,
                        coordinates=Point(x=x, y=y),
                        raw_event=event_elm,
                    )

                    if type_id == EVENT_TYPE_PASS:
                        pass_event_kwargs = _parse_pass(
                            raw_qualifiers, outcome)
                        event = PassEvent.create(
                            **pass_event_kwargs,
                            **generic_event_kwargs,
                        )
                    elif type_id == EVENT_TYPE_OFFSIDE_PASS:
                        pass_event_kwargs = _parse_offside_pass(raw_qualifiers)
                        event = PassEvent.create(
                            **pass_event_kwargs,
                            **generic_event_kwargs,
                        )
                    elif type_id == EVENT_TYPE_TAKE_ON:
                        take_on_event_kwargs = _parse_take_on(outcome)
                        event = TakeOnEvent.create(
                            qualifiers=None,
                            **take_on_event_kwargs,
                            **generic_event_kwargs,
                        )
                    elif type_id in (
                            EVENT_TYPE_SHOT_MISS,
                            EVENT_TYPE_SHOT_POST,
                            EVENT_TYPE_SHOT_SAVED,
                            EVENT_TYPE_SHOT_GOAL,
                    ):
                        shot_event_kwargs = _parse_shot(
                            raw_qualifiers,
                            type_id,
                            coordinates=generic_event_kwargs["coordinates"],
                        )
                        kwargs = {}
                        kwargs.update(generic_event_kwargs)
                        kwargs.update(shot_event_kwargs)
                        event = ShotEvent.create(**kwargs)

                    elif type_id == EVENT_TYPE_RECOVERY:
                        event = RecoveryEvent.create(
                            result=None,
                            qualifiers=None,
                            **generic_event_kwargs,
                        )

                    elif type_id == EVENT_TYPE_FOUL_COMMITTED:
                        event = FoulCommittedEvent.create(
                            result=None,
                            qualifiers=None,
                            **generic_event_kwargs,
                        )

                    elif type_id in BALL_OUT_EVENTS:
                        generic_event_kwargs["ball_state"] = BallState.DEAD
                        event = BallOutEvent.create(
                            result=None,
                            qualifiers=None,
                            **generic_event_kwargs,
                        )

                    else:
                        event = GenericEvent.create(
                            **generic_event_kwargs,
                            result=None,
                            qualifiers=None,
                            event_name=_get_event_type_name(type_id),
                        )

                    if (not wanted_event_types
                            or event.event_type in wanted_event_types):
                        events.append(event)

        metadata = Metadata(
            teams=teams,
            periods=periods,
            pitch_dimensions=PitchDimensions(x_dim=Dimension(0, 100),
                                             y_dim=Dimension(0, 100)),
            score=score,
            frame_rate=None,
            orientation=Orientation.ACTION_EXECUTING_TEAM,
            flags=DatasetFlag.BALL_OWNING_TEAM,
            provider=Provider.OPTA,
        )

        return EventDataset(
            metadata=metadata,
            records=events,
        )
Ejemplo n.º 20
0
    def deserialize(self,
                    inputs: Dict[str, Readable],
                    options: Dict = None) -> EventDataset:
        """
                Deserialize Opta event data into a `EventDataset`.

                Parameters
                ----------
                inputs : dict
                    input `f24_data` should point to a `Readable` object containing
                    the 'xml' formatted event data. input `f7_data` should point
                    to a `Readable` object containing the 'xml' formatted f7 data.
                options : dict
                    Options for deserialization of the Opta file. Possible options are
                    `event_types` (list of event types) to specify the event types that
                    should be returned. Valid types: "shot", "pass", "carry", "take_on" and
                    "generic". Generic is everything other than the first 4. Those events
                    are barely parsed. This type of event can be used to do the parsing
                    yourself.
                    Every event has a 'raw_event' attribute which contains the original
                    dictionary.
                Returns
                -------
                dataset : EventDataset
                Raises
                ------

                See Also
                --------

                Examples
                --------
                >>> serializer = OptaSerializer()
                >>> with open("123_f24.xml", "rb") as f24_data, \
                >>>      open("123_f7.xml", "rb") as f7_data:
                >>>
                >>>     dataset = serializer.deserialize(
                >>>         inputs={
                >>>             'f24_data': f24_data,
                >>>             'f7_data': f7_data
                >>>         },
                >>>         options={
                >>>             'event_types': ["pass", "take_on", "carry", "shot"]
                >>>         }
                >>>     )
                """
        self.__validate_inputs(inputs)
        if not options:
            options = {}

        with performance_logging("load data", logger=logger):
            f7_root = objectify.fromstring(inputs["f7_data"].read())
            f24_root = objectify.fromstring(inputs["f24_data"].read())

            wanted_event_types = [
                EventType[event_type.upper()]
                for event_type in options.get("event_types", [])
            ]

        with performance_logging("parse data", logger=logger):
            matchdata_path = objectify.ObjectPath(
                "SoccerFeed.SoccerDocument.MatchData")
            team_elms = list(
                matchdata_path.find(f7_root).iterchildren("TeamData"))

            away_player_map = {}
            home_player_map = {}
            home_team_id = None
            away_team_id = None
            for team_elm in team_elms:
                player_map = {
                    player_elm.attrib["PlayerRef"].lstrip("p"):
                    player_elm.attrib["ShirtNumber"]
                    for player_elm in team_elm.find(
                        "PlayerLineUp").iterchildren("MatchPlayer")
                }
                team_id = team_elm.attrib["TeamRef"].lstrip("t")

                if team_elm.attrib["Side"] == "Home":
                    home_player_map = player_map
                    home_team_id = team_id
                elif team_elm.attrib["Side"] == "Away":
                    away_player_map = player_map
                    away_team_id = team_id
                else:
                    raise Exception(f"Unknown side: {team_elm.attrib['Side']}")

            if not away_player_map or not home_player_map:
                raise Exception("LineUp incomplete")

            game_elm = f24_root.find("Game")
            periods = [
                Period(
                    id=1,
                    start_timestamp=None,
                    end_timestamp=None,
                ),
                Period(
                    id=2,
                    start_timestamp=None,
                    end_timestamp=None,
                ),
            ]
            events = []
            for event_elm in game_elm.iterchildren("Event"):
                event_id = event_elm.attrib["id"]
                type_id = int(event_elm.attrib["type_id"])
                timestamp = _parse_f24_datetime(event_elm.attrib["timestamp"])
                period_id = int(event_elm.attrib["period_id"])
                for period in periods:
                    if period.id == period_id:
                        break
                else:
                    logger.debug(
                        f"Skipping event {event_id} because period doesn't match {period_id}"
                    )
                    continue

                if type_id == EVENT_TYPE_START_PERIOD:
                    logger.debug(
                        f"Set start of period {period.id} to {timestamp}")
                    period.start_timestamp = timestamp
                elif type_id == EVENT_TYPE_END_PERIOD:
                    logger.debug(
                        f"Set end of period {period.id} to {timestamp}")
                    period.end_timestamp = timestamp
                else:
                    if not period.start_timestamp:
                        # not started yet
                        continue

                    if event_elm.attrib["team_id"] == home_team_id:
                        team = Team.HOME
                        current_team_map = home_player_map
                    elif event_elm.attrib["team_id"] == away_team_id:
                        team = Team.AWAY
                        current_team_map = away_player_map
                    else:
                        raise Exception(
                            f"Unknown team_id {event_elm.attrib['team_id']}")

                    x = float(event_elm.attrib["x"])
                    y = float(event_elm.attrib["y"])
                    outcome = int(event_elm.attrib["outcome"])
                    qualifiers = {
                        int(qualifier_elm.attrib["qualifier_id"]):
                        qualifier_elm.attrib.get("value")
                        for qualifier_elm in event_elm.iterchildren("Q")
                    }
                    player_jersey_no = None
                    if "player_id" in event_elm.attrib:
                        player_jersey_no = current_team_map[
                            event_elm.attrib["player_id"]]

                    generic_event_kwargs = dict(
                        # from DataRecord
                        period=period,
                        timestamp=timestamp - period.start_timestamp,
                        ball_owning_team=None,
                        ball_state=BallState.ALIVE,
                        # from Event
                        event_id=event_id,
                        team=team,
                        player_jersey_no=player_jersey_no,
                        position=Point(x=x, y=y),
                        raw_event=event_elm,
                    )

                    if type_id == EVENT_TYPE_PASS:
                        pass_event_kwargs = _parse_pass(qualifiers, outcome)
                        event = PassEvent(
                            **pass_event_kwargs,
                            **generic_event_kwargs,
                        )
                    elif type_id == EVENT_TYPE_OFFSIDE_PASS:
                        pass_event_kwargs = _parse_offside_pass()
                        event = PassEvent(
                            **pass_event_kwargs,
                            **generic_event_kwargs,
                        )
                    elif type_id == EVENT_TYPE_TAKE_ON:
                        take_on_event_kwargs = _parse_take_on(outcome)
                        event = TakeOnEvent(
                            **take_on_event_kwargs,
                            **generic_event_kwargs,
                        )
                    elif type_id in (
                            EVENT_TYPE_SHOT_MISS,
                            EVENT_TYPE_SHOT_POST,
                            EVENT_TYPE_SHOT_SAVED,
                            EVENT_TYPE_SHOT_GOAL,
                    ):
                        shot_event_kwargs = _parse_shot(
                            qualifiers,
                            type_id,
                            position=generic_event_kwargs["position"],
                        )
                        kwargs = {}
                        kwargs.update(generic_event_kwargs)
                        kwargs.update(shot_event_kwargs)
                        event = ShotEvent(**kwargs)
                    else:
                        event = GenericEvent(**generic_event_kwargs,
                                             result=None)

                    if (not wanted_event_types
                            or event.event_type in wanted_event_types):
                        events.append(event)

        return EventDataset(
            flags=DatasetFlag.BALL_OWNING_TEAM,
            orientation=Orientation.ACTION_EXECUTING_TEAM,
            pitch_dimensions=PitchDimensions(x_dim=Dimension(0, 100),
                                             y_dim=Dimension(0, 100)),
            periods=periods,
            records=events,
        )
Ejemplo n.º 21
0
    def deserialize(self, inputs: SportecInputs) -> EventDataset:
        with performance_logging("load data", logger=logger):
            match_root = objectify.fromstring(inputs.meta_data.read())
            event_root = objectify.fromstring(inputs.event_data.read())

        with performance_logging("parse data", logger=logger):
            x_max = float(
                match_root.MatchInformation.Environment.attrib["PitchX"]
            )
            y_max = float(
                match_root.MatchInformation.Environment.attrib["PitchY"]
            )

            transformer = self.get_transformer(length=x_max, width=y_max)

            team_path = objectify.ObjectPath(
                "PutDataRequest.MatchInformation.Teams"
            )
            team_elms = list(team_path.find(match_root).iterchildren("Team"))

            for team_elm in team_elms:
                if team_elm.attrib["Role"] == "home":
                    home_team = _team_from_xml_elm(team_elm)
                elif team_elm.attrib["Role"] == "guest":
                    away_team = _team_from_xml_elm(team_elm)
                else:
                    raise DeserializationError(
                        f"Unknown side: {team_elm.attrib['Role']}"
                    )

            (
                home_score,
                away_score,
            ) = match_root.MatchInformation.General.attrib["Result"].split(":")
            score = Score(home=int(home_score), away=int(away_score))
            teams = [home_team, away_team]

            if len(home_team.players) == 0 or len(away_team.players) == 0:
                raise DeserializationError("LineUp incomplete")

            periods = []
            period_id = 0
            events = []

            for event_elm in event_root.iterchildren("Event"):
                event_chain = _event_chain_from_xml_elm(event_elm)
                timestamp = _parse_datetime(event_chain["Event"]["EventTime"])
                if (
                    SPORTEC_EVENT_NAME_KICKOFF in event_chain
                    and "GameSection"
                    in event_chain[SPORTEC_EVENT_NAME_KICKOFF]
                ):
                    period_id += 1
                    period = Period(
                        id=period_id,
                        start_timestamp=timestamp,
                        end_timestamp=None,
                    )
                    if period_id == 1:
                        team_left = event_chain[SPORTEC_EVENT_NAME_KICKOFF][
                            "TeamLeft"
                        ]
                        if team_left == home_team.team_id:
                            # goal of home team is on the left side.
                            # this means they attack from left to right
                            orientation = Orientation.FIXED_HOME_AWAY
                            period.set_attacking_direction(
                                AttackingDirection.HOME_AWAY
                            )
                        else:
                            orientation = Orientation.FIXED_AWAY_HOME
                            period.set_attacking_direction(
                                AttackingDirection.AWAY_HOME
                            )
                    else:
                        last_period = periods[-1]
                        period.set_attacking_direction(
                            AttackingDirection.AWAY_HOME
                            if last_period.attacking_direction
                            == AttackingDirection.HOME_AWAY
                            else AttackingDirection.HOME_AWAY
                        )

                    periods.append(period)
                elif SPORTEC_EVENT_NAME_FINAL_WHISTLE in event_chain:
                    period.end_timestamp = timestamp
                    continue

                team = None
                player = None
                flatten_attributes = dict()
                # reverse because top levels are more important
                for event_attributes in reversed(event_chain.values()):
                    flatten_attributes.update(event_attributes)

                if "Team" in flatten_attributes:
                    team = (
                        home_team
                        if flatten_attributes["Team"] == home_team.team_id
                        else away_team
                    )
                if "Player" in flatten_attributes:
                    if not team:
                        raise ValueError("Player set while team is not set")
                    player = team.get_player_by_id(
                        flatten_attributes["Player"]
                    )

                generic_event_kwargs = dict(
                    # from DataRecord
                    period=period,
                    timestamp=timestamp - period.start_timestamp,
                    ball_owning_team=None,
                    ball_state=BallState.ALIVE,
                    # from Event
                    event_id=event_chain["Event"]["EventId"],
                    coordinates=_parse_coordinates(event_chain["Event"]),
                    raw_event=flatten_attributes,
                    team=team,
                    player=player,
                )

                event_name, event_attributes = event_chain.popitem()
                if event_name in SPORTEC_SHOT_EVENT_NAMES:
                    shot_event_kwargs = _parse_shot(
                        event_name=event_name, event_chain=event_chain
                    )
                    event = ShotEvent.create(
                        **shot_event_kwargs,
                        **generic_event_kwargs,
                    )
                elif event_name in SPORTEC_PASS_EVENT_NAMES:
                    pass_event_kwargs = _parse_pass(
                        event_chain=event_chain, team=team
                    )
                    event = PassEvent.create(
                        **pass_event_kwargs,
                        **generic_event_kwargs,
                        receive_timestamp=None,
                        receiver_coordinates=None,
                    )
                elif event_name == SPORTEC_EVENT_NAME_BALL_CLAIMING:
                    event = RecoveryEvent.create(
                        result=None,
                        qualifiers=None,
                        **generic_event_kwargs,
                    )
                elif event_name == SPORTEC_EVENT_NAME_SUBSTITUTION:
                    substitution_event_kwargs = _parse_substitution(
                        event_attributes=event_attributes, team=team
                    )
                    generic_event_kwargs["player"] = substitution_event_kwargs[
                        "player"
                    ]
                    del substitution_event_kwargs["player"]
                    event = SubstitutionEvent.create(
                        result=None,
                        qualifiers=None,
                        **substitution_event_kwargs,
                        **generic_event_kwargs,
                    )
                elif event_name == SPORTEC_EVENT_NAME_CAUTION:
                    card_kwargs = _parse_caution(event_attributes)
                    event = CardEvent.create(
                        result=None,
                        qualifiers=None,
                        **card_kwargs,
                        **generic_event_kwargs,
                    )
                elif event_name == SPORTEC_EVENT_NAME_FOUL:
                    foul_kwargs = _parse_foul(event_attributes, teams=teams)
                    generic_event_kwargs.update(foul_kwargs)
                    event = FoulCommittedEvent.create(
                        result=None,
                        qualifiers=None,
                        **generic_event_kwargs,
                    )
                else:
                    event = GenericEvent.create(
                        result=None,
                        qualifiers=None,
                        event_name=event_name,
                        **generic_event_kwargs,
                    )

                if events:
                    previous_event = events[-1]
                    if (
                        previous_event.event_type == EventType.PASS
                        and previous_event.result == PassResult.COMPLETE
                    ):
                        if "X-Source-Position" in event_chain["Event"]:
                            previous_event.receiver_coordinates = Point(
                                x=float(
                                    event_chain["Event"]["X-Source-Position"]
                                ),
                                y=float(
                                    event_chain["Event"]["Y-Source-Position"]
                                ),
                            )

                if (
                    event.event_type == EventType.PASS
                    and event.get_qualifier_value(SetPieceQualifier)
                    in (
                        SetPieceType.THROW_IN,
                        SetPieceType.GOAL_KICK,
                        SetPieceType.CORNER_KICK,
                    )
                ):
                    # 1. update previous pass
                    if events[-1].event_type == EventType.PASS:
                        events[-1].result = PassResult.OUT

                    # 2. add synthetic out event
                    decision_timestamp = _parse_datetime(
                        event_chain[list(event_chain.keys())[1]][
                            "DecisionTimestamp"
                        ]
                    )
                    out_event = BallOutEvent.create(
                        period=period,
                        timestamp=decision_timestamp - period.start_timestamp,
                        ball_owning_team=None,
                        ball_state=BallState.DEAD,
                        # from Event
                        event_id=event_chain["Event"]["EventId"] + "-ball-out",
                        team=events[-1].team,
                        player=events[-1].player,
                        coordinates=None,
                        raw_event={},
                        result=None,
                        qualifiers=None,
                    )
                    events.append(transformer.transform_event(out_event))

                events.append(transformer.transform_event(event))

        events = list(
            filter(
                self.should_include_event,
                events,
            )
        )

        metadata = Metadata(
            teams=teams,
            periods=periods,
            pitch_dimensions=transformer.get_to_coordinate_system().pitch_dimensions,
            score=score,
            frame_rate=None,
            orientation=orientation,
            flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM),
            provider=Provider.SPORTEC,
            coordinate_system=transformer.get_to_coordinate_system(),
        )

        return EventDataset(
            metadata=metadata,
            records=events,
        )