Example #1
0
    def test_read(self):
        base_dir = os.path.dirname(__file__)
        with open(f"{base_dir}/files/epts_meta.xml", "rb") as metadata_fp:
            metadata = load_metadata(metadata_fp)

        with open(f"{base_dir}/files/epts_raw.txt", "rb") as raw_data:
            iterator = read_raw_data(raw_data, metadata)

            with performance_logging("load"):
                assert list(iterator)
Example #2
0
    def test_skip_sensors(self):
        base_dir = os.path.dirname(__file__)

        with open(f"{base_dir}/files/epts_meta.xml",
                  "rb") as metadata_fp, open(f"{base_dir}/files/epts_raw.txt",
                                             "rb") as raw_data:
            metadata = load_metadata(metadata_fp)
            records = read_raw_data(raw_data,
                                    metadata,
                                    sensor_ids=["heartbeat"])
            data_frame = DataFrame.from_records(records)

        assert "player_1_max_heartbeat" in data_frame.columns
        assert "player_1_x" not in data_frame.columns
Example #3
0
    def test_regex(self):
        base_dir = os.path.dirname(__file__)
        with open(f"{base_dir}/files/epts_meta.xml", "rb") as metadata_fp:
            metadata = load_metadata(metadata_fp)

        regex_str = build_regex(
            metadata.data_format_specifications[0],
            metadata.player_channels,
            metadata.sensors,
        )

        regex = re.compile(regex_str)

        # NOTE: use broken example of FIFA
        result = regex.search(
            "1779143:,-2.013,-500,100,9.63,9.80,4,5,177,182;-461,-615,-120,99,900,9.10,4,5,170,179;-2638,3478,120,110,1.15,5.20,3,4,170,175;:-2656,367,100:"
        )

        assert result is not None
Example #4
0
    def deserialize(self,
                    inputs: Dict[str, Readable],
                    options: Dict = None) -> EventDataset:
        """
                Deserialize Metrica Sports event data json format into a `EventDataset`.

                Parameters
                ----------
                inputs : dict
                    input `raw_data` should point to a `Readable` object containing
                    the 'json' formatted event data. input `metadata` should point
                    to a `Readable` object containing the `xml` metadata file.
                options : dict
                    Options for deserialization of the Metrica Sports event json file. 
                    Possible options are `event_types` (list of event types) to specify 
                    the event types that should be returned. Valid types: "shot", "pass", 
                    "carry", "take_on" and "generic". Generic is everything other than 
                    the first 4. Those events are barely parsed. This type of event can 
                    be used to do the parsing yourself.
                    Every event has a 'raw_event' attribute which contains the original
                    dictionary.
                Returns
                -------
                dataset : EventDataset
                Raises
                ------

                See Also
                --------

                Examples
                --------
                >>> serializer = MetricaEventsJsonSerializer()
                >>> with open("events.json", "rb") as raw_data, \
                >>>      open("metadata.xml", "rb") as metadata:
                >>>
                >>>     dataset = serializer.deserialize(
                >>>         inputs={
                >>>             'raw_data': raw_data,
                >>>             'metadata': metadata
                >>>         },
                >>>         options={
                >>>             'event_types': ["pass", "take_on", "carry", "shot"]
                >>>         }
                >>>     )
                """
        self.__validate_inputs(inputs)
        if not options:
            options = {}

        with performance_logging("load data", logger=logger):
            raw_events = json.load(inputs["raw_data"])
            metadata = load_metadata(inputs["metadata"],
                                     provider=Provider.METRICA)

        with performance_logging("parse data", logger=logger):

            wanted_event_types = [
                EventType[event_type.upper()]
                for event_type in options.get("event_types", [])
            ]

            events = []
            for raw_event in raw_events["data"]:
                if raw_event["team"]["id"] == metadata.teams[0].team_id:
                    team = metadata.teams[0]
                elif raw_event["team"]["id"] == metadata.teams[1].team_id:
                    team = metadata.teams[1]
                else:
                    raise Exception(
                        f"Unknown team_id {raw_event['team']['id']}")

                player = team.get_player_by_id(raw_event["from"]["id"])
                event_type = raw_event["type"]["id"]
                subtypes = _parse_subtypes(raw_event)
                period = [
                    period for period in metadata.periods
                    if period.id == raw_event["period"]
                ][0]

                generic_event_kwargs = dict(
                    # from DataRecord
                    period=period,
                    timestamp=raw_event["start"]["time"],
                    ball_owning_team=_parse_ball_owning_team(event_type, team),
                    ball_state=BallState.ALIVE,
                    # from Event
                    event_id=None,
                    team=team,
                    player=player,
                    coordinates=(_parse_coordinates(raw_event["start"])),
                    raw_event=raw_event,
                )

                if event_type in MS_PASS_TYPES:
                    pass_event_kwargs = _parse_pass(
                        event=raw_event,
                        subtypes=subtypes,
                        team=team,
                    )
                    event = PassEvent(
                        **pass_event_kwargs,
                        **generic_event_kwargs,
                    )

                elif event_type == MS_EVENT_TYPE_SHOT:
                    shot_event_kwargs = _parse_shot(event=raw_event,
                                                    subtypes=subtypes)
                    event = ShotEvent(**shot_event_kwargs,
                                      **generic_event_kwargs)

                elif subtypes and MS_EVENT_TYPE_DRIBBLE in subtypes:
                    take_on_event_kwargs = _parse_take_on(subtypes=subtypes)
                    event = TakeOnEvent(**take_on_event_kwargs,
                                        **generic_event_kwargs)
                elif event_type == MS_EVENT_TYPE_CARRY:
                    carry_event_kwargs = _parse_carry(event=raw_event, )
                    event = CarryEvent(
                        **carry_event_kwargs,
                        **generic_event_kwargs,
                    )
                else:
                    event = GenericEvent(
                        result=None,
                        event_name=raw_event["type"]["name"],
                        **generic_event_kwargs,
                    )

                if (not wanted_event_types
                        or event.event_type in wanted_event_types):
                    events.append(event)

        return EventDataset(
            metadata=metadata,
            records=events,
        )