예제 #1
0
    def test_events_data_frame(self):

        base_dir = os.path.dirname(__file__)

        events_dataset = load_metrica_json_event_data(
            metadata_filename=f"{base_dir}/files/metadata.xml",
            raw_data_filename=f"{base_dir}/files/events.json",
        )
        events = EventsFrame(to_pandas(events_dataset))
        events.data_type = DataType.EVENT
        events.metadata = events_dataset.metadata
        events.records = events_dataset.records

        assert events.type("PASS").shape[0] == 26
        assert events.result("COMPLETE").shape[0] == 45
        assert events.into(Zones.OPPONENT_BOX).shape[0] == 1
        assert events.starts_inside(Zones.OPPONENT_BOX).shape[0] == 2
        assert events.ends_inside(Zones.OPPONENT_BOX).shape[0] == 2
        assert events.ends_outside(Zones.OPPONENT_BOX).shape[0] == 43

        # Test diferent ways to input Zones and areas

        custom_area = Area((0.25, 0.2), (0.75, 0.8))

        assert (events.ends_outside(Zones.OPPONENT_BOX,
                                    Zones.OWN_BOX).shape[0] == 45)
        assert (events.ends_inside(Zones.OPPONENT_BOX,
                                   custom_area).shape[0] == 14)
        assert events.ends_inside(custom_area, custom_area).shape[0] == 12
예제 #2
0
def main():
    """
    This example shows the use of Metrica datasets, and how we can pass argument
    to the dataset loader.
    """
    logging.basicConfig(
        stream=sys.stdout,
        level=logging.INFO,
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    )

    # The metrica dataset loader loads by default the 'game1' dataset
    dataset = datasets.load("metrica_tracking",
                            options={
                                "sample_rate": 1.0 / 12,
                                "limit": 10
                            })
    print(len(dataset.frames))

    # We can pass additional keyword arguments to the loaders to specify a different dataset
    dataset = datasets.load("metrica_tracking",
                            options={"limit": 1000},
                            game="game2")

    data_frame = to_pandas(dataset)
    print(data_frame)
예제 #3
0
    def test_to_pandas_additional_columns(self):
        tracking_data = self._get_tracking_dataset()

        data_frame = to_pandas(
            tracking_data,
            additional_columns={
                "match": "test",
                "bonus_column": lambda frame: frame.frame_id + 10,
            },
        )

        expected_data_frame = DataFrame.from_dict({
            "period_id": [1, 1],
            "timestamp": [0.1, 0.2],
            "ball_state": [None, None],
            "ball_owning_team": [None, None],
            "ball_x": [100, 0],
            "ball_y": [-50, 50],
            "match": ["test", "test"],
            "bonus_column": [11, 12],
            "player_home_1_x": [None, 15],
            "player_home_1_y": [None, 35],
            "player_away_1_x": [None, 10],
            "player_away_1_y": [None, 20],
        })

        assert_frame_equal(data_frame, expected_data_frame)
예제 #4
0
def main():
    """
        This example shows the use of Statsbomb datasets, and how we can pass argument
        to the dataset loader.
    """
    logging.basicConfig(
        stream=sys.stdout,
        level=logging.INFO,
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")

    logger = logging.getLogger(__name__)

    dataset = datasets.load(
        "statsbomb", {"event_types": ["pass", "take_on", "carry", "shot"]})

    with performance_logging("transform", logger=logger):
        # convert to TRACAB coordinates
        dataset = transform(dataset,
                            to_orientation="FIXED_HOME_AWAY",
                            to_pitch_dimensions=[(-5500, 5500), (-3300, 3300)])

    with performance_logging("to pandas", logger=logger):
        dataframe = to_pandas(dataset)

    print(dataframe[:50].to_string())
예제 #5
0
def main():
    import pandas as pd

    pd.set_option("display.max_colwidth", None)
    pd.set_option("display.max_columns", None)
    pd.set_option("display.max_rows", None)
    pd.set_option("display.width", 2000)

    matcher = Matcher(encoder)

    dataset = datasets.load("statsbomb",
                            options={"event_types": ["shot", "pass"]})

    with performance_logging("search"):
        matches = matcher.search(dataset, r"PPS")

    df = to_pandas(
        dataset,
        additional_columns={
            "player_name": lambda event: event.player.full_name,
            "team_name": lambda event: str(event.team),
        },
    )
    print(df[["timestamp", "team_name", "player_name", "event_type",
              "result"]][:100])
    return

    for i, match in enumerate(matches):
        df = to_pandas(
            dataset,
            additional_columns={
                "player_name": lambda event: event.player.full_name,
                "team_name": lambda event: str(event.team),
            },
        )
        print(df[[
            "period_id",
            "timestamp",
            "team_name",
            "player_name",
            "event_type",
        ]])
예제 #6
0
파일: common.py 프로젝트: xor-lab/kloppy
    def to_pandas(self, *args, **kwargs):
        """
        See [to_pandas][kloppy.helpers.to_pandas]
        """
        from kloppy import to_pandas

        return to_pandas(
            self,
            *args,
            **kwargs,
        )
예제 #7
0
    def __init__(
        self,
        tracking_metadata_file=None,
        tracking_data_file=None,
        events_metadata_file=None,
        events_data_file=None,
    ):
        self.files = {
            "tracking_metadata_file": tracking_metadata_file,
            "tracking_data_file": tracking_data_file,
            "events_metadata_file": events_metadata_file,
            "events_data_file": events_data_file,
        }

        if tracking_data_file is not None:
            tracking_dataset = load_epts_tracking_data(
                metadata_filename=tracking_metadata_file,
                raw_data_filename=tracking_data_file,
            )
            self.tracking = TrackingFrame(to_pandas(tracking_dataset))
            self.tracking.data_type = DataType.TRACKING
            self.tracking.metadata = tracking_dataset.metadata
            self.tracking.records = tracking_dataset.records
        else:
            self.tracking = None

        if events_data_file is not None:
            events_dataset = load_metrica_json_event_data(
                metadata_filename=events_metadata_file,
                raw_data_filename=events_data_file,
            )
            self.events = EventsFrame(to_pandas(events_dataset))
            self.events.data_type = DataType.EVENT
            self.events.metadata = events_dataset.metadata
            self.events.records = events_dataset.records
        else:
            self.events = None

        self._enrich_data()
예제 #8
0
    def test_to_pandas(self):
        tracking_data = self._get_tracking_dataset()

        data_frame = to_pandas(tracking_data)

        expected_data_frame = DataFrame.from_dict({
            "period_id": {
                0: 1,
                1: 1
            },
            "timestamp": {
                0: 0.1,
                1: 0.2
            },
            "ball_state": {
                0: None,
                1: None
            },
            "ball_owning_team": {
                0: None,
                1: None
            },
            "ball_x": {
                0: 100,
                1: 0
            },
            "ball_y": {
                0: -50,
                1: 50
            },
            "player_home_1_x": {
                0: None,
                1: 15.0
            },
            "player_home_1_y": {
                0: None,
                1: 35.0
            },
            "player_away_1_x": {
                0: None,
                1: 10.0
            },
            "player_away_1_y": {
                0: None,
                1: 20.0
            },
        })

        assert_frame_equal(data_frame, expected_data_frame)
예제 #9
0
    def test_to_pandas_generic_events(self):
        base_dir = os.path.dirname(__file__)

        serializer = OptaSerializer()

        with open(f"{base_dir}/files/opta_f24.xml",
                  "rb") as f24_data, open(f"{base_dir}/files/opta_f7.xml",
                                          "rb") as f7_data:
            dataset = serializer.deserialize(inputs={
                "f24_data": f24_data,
                "f7_data": f7_data
            })

        dataframe = to_pandas(dataset)
        dataframe = dataframe[dataframe.event_type == "GENERIC:out"]
        assert dataframe.shape[0] == 2
예제 #10
0
def main():
    """
        This example shows the use of Metrica datasets, and how we can pass argument
        to the dataset loader.
    """

    # The metrica dataset loader loads by default the 'game1' dataset
    data_set = datasets.load("metrica_tracking",
                             options={
                                 'sample_rate': 1. / 12,
                                 'limit': 10
                             })
    print(len(data_set.frames))

    # We can pass additional keyword arguments to the loaders to specify a different dataset
    data_set = datasets.load("metrica_tracking",
                             options={'limit': 1000},
                             game='game2')

    data_frame = to_pandas(data_set)
    print(data_frame)
예제 #11
0
def main():

    logging.basicConfig(
        stream=sys.stdout,
        level=logging.INFO,
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")

    logger = logging.getLogger(__name__)

    dataset = datasets.load(
        "statsbomb", {"event_types": ["pass", "take_on", "carry", "shot"]})

    with performance_logging("transform", logger=logger):
        dataset = transform(dataset,
                            to_orientation="FIXED_HOME_AWAY",
                            to_pitch_dimensions=[(-5500, 5500), (-3300, 3300)])

    with performance_logging("to pandas", logger=logger):
        dataframe = to_pandas(dataset)

    print(dataframe[:50].to_string())
예제 #12
0
    def test_tracking_data_frame(self):

        base_dir = os.path.dirname(__file__)

        tracking_dataset = load_epts_tracking_data(
            metadata_filename=f"{base_dir}/files/metadata.xml",
            raw_data_filename=f"{base_dir}/files/tracking.txt",
        )
        tracking = TrackingFrame(to_pandas(tracking_dataset))
        tracking.data_type = DataType.TRACKING
        tracking.metadata = tracking_dataset.metadata
        tracking.records = tracking_dataset.records

        assert tracking.get_team_by_id("FIFATMA").team_id == "FIFATMA"
        assert tracking.get_period_by_id(1).id == 1
        assert tracking.get_other_team_id("FIFATMA") == "FIFATMB"
        assert tracking.team("FIFATMA").shape[1] == 22
        assert tracking.dimension("x").shape[1] == 23
        assert tracking.players().shape[1] == 44
        assert tracking.players("field").shape[1] == 40
        assert sum(tracking.phase(defending_team_id="FIFATMA")) == 0
        assert sum(tracking.team("FIFATMA").stretched(90)) == 863
예제 #13
0
if os.path.exists(
        WYSCOUT_JSON_PATH) is False:  # only download it if it doesn't exist
    with open(WYSCOUT_JSON_PATH, 'w') as f:
        response = requests.get(url=WYSCOUT_URL)
        response.encoding = 'unicode-escape'  # to make sure the encoding for é etc. is correct
        json.dump(response.json(), f)

# load the wyscout events as a dataframe using Kloppy
serializer = WyscoutSerializer()

with open(WYSCOUT_JSON_PATH) as event_data:
    wyscout_dataset = serializer.deserialize(inputs={'event_data': event_data})

df_wyscout = to_pandas(wyscout_dataset,
                       additional_columns={
                           'player_name': lambda event: str(event.player),
                           'team_name': lambda event: str(event.player.team)
                       })

##############################################################################
# Standardize the Wyscout data to StatsBomb coordinates
# -----------------------------------------------------
# You can use any of the supported pitches in the pitch_from/ pitch_to here.
# They are currently: ``statsbomb``, ``tracab``, ``opta``, ``wyscout``, ``uefa``,
# ``metricasports``, ``custom``, ``skillcorner``, and ``secondspectrum``.
#
# If the pitch size varies (``tracab``, ``metricasports``, ``custom``,
# ``skillcorner``, ``secondspectrum``)
# then you also need to supply the relevant
# length_from/ length_to or width_from/ width_to in meters.
예제 #14
0
    def to_pandas(self, *args, **kwargs):
        from kloppy import to_pandas

        return to_pandas(self, *args, **kwargs)