def main(): """ This example shows the use of Metrica datasets, and how we can pass argument to the dataset loader. """ logging.basicConfig( stream=sys.stdout, level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", ) # The metrica dataset loader loads by default the 'game1' dataset dataset = datasets.load("metrica_tracking", options={ "sample_rate": 1.0 / 12, "limit": 10 }) print(len(dataset.frames)) # We can pass additional keyword arguments to the loaders to specify a different dataset dataset = datasets.load("metrica_tracking", options={"limit": 1000}, game="game2") data_frame = to_pandas(dataset) print(data_frame)
def main(): """ This example shows the use of Statsbomb datasets, and how we can pass argument to the dataset loader. """ logging.basicConfig( stream=sys.stdout, level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") logger = logging.getLogger(__name__) dataset = datasets.load( "statsbomb", {"event_types": ["pass", "take_on", "carry", "shot"]}) with performance_logging("transform", logger=logger): # convert to TRACAB coordinates dataset = transform(dataset, to_orientation="FIXED_HOME_AWAY", to_pitch_dimensions=[(-5500, 5500), (-3300, 3300)]) with performance_logging("to pandas", logger=logger): dataframe = to_pandas(dataset) print(dataframe[:50].to_string())
def main(): """ This example shows how to determine playing time """ logging.basicConfig( stream=sys.stdout, level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", ) dataset = datasets.load("metrica_tracking", options={"sample_rate": 1.0 / 25}) playing_seconds_per_player = Counter() for frame in dataset.frames: playing_seconds_per_player.update([ player.jersey_no for player in frame.players_coordinates.keys() if player.team.ground == Ground.HOME ]) x = range(len(playing_seconds_per_player)) jersey_numbers, playing_seconds = zip( *sorted(playing_seconds_per_player.items())) playing_minutes = [seconds / 60 for seconds in playing_seconds] plt.bar(x, playing_minutes, align="center", alpha=0.5) plt.xticks(x, jersey_numbers) plt.ylabel("Minutes") plt.title("Playing time per player") plt.show()
def main(): """ This example shows how to determine playing time """ data_set = datasets.load("metrica_tracking", options={'sample_rate': 1. / 25}) playing_seconds_per_player = Counter() for frame in data_set.frames: playing_seconds_per_player.update([ int(jersey_no) for jersey_no in frame.home_team_player_positions.keys() ]) x = range(len(playing_seconds_per_player)) jersey_numbers, playing_seconds = zip( *sorted(playing_seconds_per_player.items())) playing_minutes = [seconds / 60 for seconds in playing_seconds] plt.bar(x, playing_minutes, align='center', alpha=0.5) plt.xticks(x, jersey_numbers) plt.ylabel('Minutes') plt.title('Playing time per player') plt.show()
def main(): """ This example shows the use of Metrica datasets, and how we can pass argument to the dataset loader. """ # The metrica dataset loader loads by default the 'game1' dataset data_set = datasets.load("metrica_tracking", options={ 'sample_rate': 1. / 12, 'limit': 10 }) print(len(data_set.frames)) # We can pass additional keyword arguments to the loaders to specify a different dataset data_set = datasets.load("metrica_tracking", options={'limit': 1000}, game='game2') data_frame = to_pandas(data_set) print(data_frame)
def main(): import pandas as pd pd.set_option("display.max_colwidth", None) pd.set_option("display.max_columns", None) pd.set_option("display.max_rows", None) pd.set_option("display.width", 2000) matcher = Matcher(encoder) dataset = datasets.load("statsbomb", options={"event_types": ["shot", "pass"]}) with performance_logging("search"): matches = matcher.search(dataset, r"PPS") df = to_pandas( dataset, additional_columns={ "player_name": lambda event: event.player.full_name, "team_name": lambda event: str(event.team), }, ) print(df[["timestamp", "team_name", "player_name", "event_type", "result"]][:100]) return for i, match in enumerate(matches): df = to_pandas( dataset, additional_columns={ "player_name": lambda event: event.player.full_name, "team_name": lambda event: str(event.team), }, ) print(df[[ "period_id", "timestamp", "team_name", "player_name", "event_type", ]])
def main(): logging.basicConfig( stream=sys.stdout, level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") logger = logging.getLogger(__name__) dataset = datasets.load( "statsbomb", {"event_types": ["pass", "take_on", "carry", "shot"]}) with performance_logging("transform", logger=logger): dataset = transform(dataset, to_orientation="FIXED_HOME_AWAY", to_pitch_dimensions=[(-5500, 5500), (-3300, 3300)]) with performance_logging("to pandas", logger=logger): dataframe = to_pandas(dataset) print(dataframe[:50].to_string())
def _load_match(provider: str, match_id: str) -> List[pyspark.Row]: dataset = datasets.load(provider, match_id=match_id) pdf = kh.fix_kloppy_dataframe(dataset.to_pandas(all_passes=True)) return [pyspark.Row(match=match_id, **row) for row in pdf.to_dict(orient="records")]