Exemple #1
0
def main():
    """
    This example shows the use of Metrica datasets, and how we can pass argument
    to the dataset loader.
    """
    logging.basicConfig(
        stream=sys.stdout,
        level=logging.INFO,
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    )

    # The metrica dataset loader loads by default the 'game1' dataset
    dataset = datasets.load("metrica_tracking",
                            options={
                                "sample_rate": 1.0 / 12,
                                "limit": 10
                            })
    print(len(dataset.frames))

    # We can pass additional keyword arguments to the loaders to specify a different dataset
    dataset = datasets.load("metrica_tracking",
                            options={"limit": 1000},
                            game="game2")

    data_frame = to_pandas(dataset)
    print(data_frame)
Exemple #2
0
def main():
    """
        This example shows the use of Statsbomb datasets, and how we can pass argument
        to the dataset loader.
    """
    logging.basicConfig(
        stream=sys.stdout,
        level=logging.INFO,
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")

    logger = logging.getLogger(__name__)

    dataset = datasets.load(
        "statsbomb", {"event_types": ["pass", "take_on", "carry", "shot"]})

    with performance_logging("transform", logger=logger):
        # convert to TRACAB coordinates
        dataset = transform(dataset,
                            to_orientation="FIXED_HOME_AWAY",
                            to_pitch_dimensions=[(-5500, 5500), (-3300, 3300)])

    with performance_logging("to pandas", logger=logger):
        dataframe = to_pandas(dataset)

    print(dataframe[:50].to_string())
Exemple #3
0
def main():
    """
    This example shows how to determine playing time
    """
    logging.basicConfig(
        stream=sys.stdout,
        level=logging.INFO,
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    )

    dataset = datasets.load("metrica_tracking",
                            options={"sample_rate": 1.0 / 25})

    playing_seconds_per_player = Counter()
    for frame in dataset.frames:
        playing_seconds_per_player.update([
            player.jersey_no for player in frame.players_coordinates.keys()
            if player.team.ground == Ground.HOME
        ])

    x = range(len(playing_seconds_per_player))
    jersey_numbers, playing_seconds = zip(
        *sorted(playing_seconds_per_player.items()))
    playing_minutes = [seconds / 60 for seconds in playing_seconds]

    plt.bar(x, playing_minutes, align="center", alpha=0.5)
    plt.xticks(x, jersey_numbers)
    plt.ylabel("Minutes")
    plt.title("Playing time per player")

    plt.show()
Exemple #4
0
def main():
    """
        This example shows how to determine playing time
    """

    data_set = datasets.load("metrica_tracking",
                             options={'sample_rate': 1. / 25})

    playing_seconds_per_player = Counter()
    for frame in data_set.frames:
        playing_seconds_per_player.update([
            int(jersey_no)
            for jersey_no in frame.home_team_player_positions.keys()
        ])

    x = range(len(playing_seconds_per_player))
    jersey_numbers, playing_seconds = zip(
        *sorted(playing_seconds_per_player.items()))
    playing_minutes = [seconds / 60 for seconds in playing_seconds]

    plt.bar(x, playing_minutes, align='center', alpha=0.5)
    plt.xticks(x, jersey_numbers)
    plt.ylabel('Minutes')
    plt.title('Playing time per player')

    plt.show()
Exemple #5
0
def main():
    """
        This example shows the use of Metrica datasets, and how we can pass argument
        to the dataset loader.
    """

    # The metrica dataset loader loads by default the 'game1' dataset
    data_set = datasets.load("metrica_tracking",
                             options={
                                 'sample_rate': 1. / 12,
                                 'limit': 10
                             })
    print(len(data_set.frames))

    # We can pass additional keyword arguments to the loaders to specify a different dataset
    data_set = datasets.load("metrica_tracking",
                             options={'limit': 1000},
                             game='game2')

    data_frame = to_pandas(data_set)
    print(data_frame)
Exemple #6
0
def main():
    import pandas as pd

    pd.set_option("display.max_colwidth", None)
    pd.set_option("display.max_columns", None)
    pd.set_option("display.max_rows", None)
    pd.set_option("display.width", 2000)

    matcher = Matcher(encoder)

    dataset = datasets.load("statsbomb",
                            options={"event_types": ["shot", "pass"]})

    with performance_logging("search"):
        matches = matcher.search(dataset, r"PPS")

    df = to_pandas(
        dataset,
        additional_columns={
            "player_name": lambda event: event.player.full_name,
            "team_name": lambda event: str(event.team),
        },
    )
    print(df[["timestamp", "team_name", "player_name", "event_type",
              "result"]][:100])
    return

    for i, match in enumerate(matches):
        df = to_pandas(
            dataset,
            additional_columns={
                "player_name": lambda event: event.player.full_name,
                "team_name": lambda event: str(event.team),
            },
        )
        print(df[[
            "period_id",
            "timestamp",
            "team_name",
            "player_name",
            "event_type",
        ]])
Exemple #7
0
def main():

    logging.basicConfig(
        stream=sys.stdout,
        level=logging.INFO,
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")

    logger = logging.getLogger(__name__)

    dataset = datasets.load(
        "statsbomb", {"event_types": ["pass", "take_on", "carry", "shot"]})

    with performance_logging("transform", logger=logger):
        dataset = transform(dataset,
                            to_orientation="FIXED_HOME_AWAY",
                            to_pitch_dimensions=[(-5500, 5500), (-3300, 3300)])

    with performance_logging("to pandas", logger=logger):
        dataframe = to_pandas(dataset)

    print(dataframe[:50].to_string())
Exemple #8
0
def _load_match(provider: str, match_id: str) -> List[pyspark.Row]:
    dataset = datasets.load(provider, match_id=match_id)
    pdf = kh.fix_kloppy_dataframe(dataset.to_pandas(all_passes=True))
    return [pyspark.Row(match=match_id, **row) for row in pdf.to_dict(orient="records")]