def _load_dataset(self, base_filename="statsbomb"): base_dir = os.path.dirname(__file__) return statsbomb.load( event_data=f"{base_dir}/files/{base_filename}_event.json", lineup_data=f"{base_dir}/files/{base_filename}_lineup.json", )
def test_substitution(self, lineup_data: str, event_data: str): """ Test substitution events """ dataset = statsbomb.load( lineup_data=lineup_data, event_data=event_data, event_types=["substitution"], ) assert len(dataset.events) == 6 subs = [ (6374, 3501), (6839, 6935), (6581, 6566), (6613, 6624), (5477, 11392), (5203, 8206), ] for event_idx, (player_id, replacement_player_id) in enumerate(subs): event = dataset.events[event_idx] assert event.player == event.team.get_player_by_id(player_id) assert event.replacement_player == event.team.get_player_by_id( replacement_player_id)
def test_correct_normalized_deserialization(self, lineup_data: str, event_data: str): """ This test uses data from the StatsBomb open data project. """ dataset = statsbomb.load(lineup_data=lineup_data, event_data=event_data) assert dataset.events[10].coordinates == Point(0.2875, 0.25625)
def test_foul_committed(self, lineup_data: str, event_data: str): """ Test foul committed events """ dataset = statsbomb.load( lineup_data=lineup_data, event_data=event_data, event_types=["foul_committed"], ) assert len(dataset.events) == 23
def test_to_pandas_incomplete_pass(self): base_dir = os.path.dirname(__file__) dataset = statsbomb.load( lineup_data=f"{base_dir}/files/statsbomb_lineup.json", event_data=f"{base_dir}/files/statsbomb_event.json", ) df = dataset.to_pandas() incomplete_passes = df[(df.event_type == "PASS") & (df.result == "INCOMPLETE")].reset_index() assert incomplete_passes.loc[0, "end_coordinates_y"] == 0.90625 assert incomplete_passes.loc[0, "end_coordinates_x"] == 0.7125
def test_related_events(self, lineup_data: str, event_data: str): dataset = statsbomb.load(lineup_data=lineup_data, event_data=event_data) carry_event = dataset.get_event_by_id( "8e3dacc2-7a39-4301-9053-e78cfec1aa95") pass_event = dataset.get_event_by_id( "d1cccb73-c7ef-4b02-8267-ebd7f149904b") receipt_event = dataset.get_event_by_id( "61da36dc-d862-416c-8ee3-1a0cd24dc086") assert carry_event.get_related_events() == [receipt_event, pass_event] assert carry_event.related_pass() == pass_event
def test_card(self, lineup_data: str, event_data: str): """ Test card events """ dataset = statsbomb.load( lineup_data=lineup_data, event_data=event_data, event_types=["card"], ) assert len(dataset.events) == 2 for card in dataset.events: assert card.card_type == CardType.FIRST_YELLOW
def test_transform_event_data(self): """Make sure event data that's in ACTION_EXECUTING orientation is transformed correctly""" base_dir = os.path.dirname(__file__) dataset = statsbomb.load( lineup_data=f"{base_dir}/files/statsbomb_lineup.json", event_data=f"{base_dir}/files/statsbomb_event.json", ) home_team, away_team = dataset.metadata.teams # This is a pressure event by Deportivo while Barcelona is in possession pressure_event = dataset.get_event_by_id( "6399af5c-74b8-4efe-ae19-85f331d355e8") assert pressure_event.team == away_team assert pressure_event.ball_owning_team == home_team receipt_event = pressure_event.next() assert receipt_event.team == home_team assert receipt_event.ball_owning_team == home_team transformed_dataset = dataset.transform( to_orientation="fixed_home_away") transformed_pressure_event = transformed_dataset.get_event_by_id( pressure_event.event_id) transformed_receipt_event = transformed_pressure_event.next() # The receipt event is executed by the away team and should be changed by the transformation assert (pressure_event.coordinates.x == 1 - transformed_pressure_event.coordinates.x) assert (pressure_event.coordinates.y == 1 - transformed_pressure_event.coordinates.y) # The receipt event is executed by the home team and shouldn't be changed by the transformation assert (receipt_event.coordinates.x == transformed_receipt_event.coordinates.x) assert (receipt_event.coordinates.y == transformed_receipt_event.coordinates.y)
def run_query(argv=sys.argv[1:]): parser = argparse.ArgumentParser(description="Run query on event data") parser.add_argument( "--input-statsbomb", help="StatsBomb event input files (events.json,lineup.json)", ) parser.add_argument("--input-opta", help="Opta event input files (f24.xml,f7.xml)") parser.add_argument("--input-datafactory", help="Datafactory event input file (.json)") parser.add_argument("--input-wyscout", help="Wyscout event input file") parser.add_argument("--output-xml", help="Output file") parser.add_argument( "--with-success", default=True, help="Input existence of success capture in output", ) parser.add_argument("--prepend-time", default=7, help="Seconds to prepend to match") parser.add_argument("--append-time", default=5, help="Seconds to append to match") parser.add_argument("--query-file", help="File containing the query", required=True) parser.add_argument( "--stats", default="none", help="Show matches stats", choices=["text", "json", "none"], ) parser.add_argument( "--show-events", default=False, help="Show events for each match", action="store_true", ) parser.add_argument( "--only-success", default=False, help="Only show/output success cases", action="store_true", ) logger = logging.getLogger("run_query") logging.basicConfig( stream=sys.stderr, level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", ) opts = parser.parse_args(argv) query = load_query(opts.query_file) dataset = None if opts.input_statsbomb: with performance_logging("load dataset", logger=logger): events_filename, lineup_filename = opts.input_statsbomb.split(",") dataset = statsbomb.load( event_data=events_filename.strip(), lineup_data=lineup_filename.strip(), event_types=query.event_types, ) if opts.input_opta: with performance_logging("load dataset", logger=logger): f24_filename, f7_filename = opts.input_opta.split(",") dataset = opta.load( f24_data=f24_filename.strip(), f7_data=f7_filename.strip(), event_types=query.event_types, ) if opts.input_datafactory: with performance_logging("load dataset", logger=logger): events_filename = opts.input_datafactory dataset = datafactory.load( event_data=events_filename.strip(), event_types=query.event_types, ) if opts.input_wyscout: with performance_logging("load dataset", logger=logger): events_filename = opts.input_wyscout dataset = wyscout.load( event_data=events_filename, event_types=query.event_types, ) if not dataset: raise Exception("You have to specify a dataset.") with performance_logging("searching", logger=logger): matches = pm.search(dataset, query.pattern) # Construct new code dataset with same properties (eg periods) # as original event dataset. # Records will be added later below records = [] counter = Counter() for i, match in enumerate(matches): team = match.events[0].team success = "success" in match.captures counter.update({ f"{team.ground}_total": 1, f"{team.ground}_success": 1 if success else 0, }) should_process = not opts.only_success or success if opts.show_events and should_process: print_match(i, match, success, str(team)) if opts.output_xml and should_process: code_ = str(team) if opts.with_success and success: code_ += " success" code = Code( period=match.events[0].period, code_id=str(i), code=code_, timestamp=match.events[0].timestamp - opts.prepend_time, end_timestamp=match.events[-1].timestamp + opts.append_time, # TODO: refactor those two out ball_state=None, ball_owning_team=None, ) records.append(code) code_dataset = CodeDataset(metadata=dataset.metadata, records=records) if opts.output_xml: sportscode.save(code_dataset, opts.output_xml) logger.info(f"Wrote {len(code_dataset.codes)} video fragments to file") if opts.stats == "text": text_stats = """\ Home: total count: {home_total} success: {home_success} ({home_success_rate:.0f}%) no success: {home_failure} ({home_failure_rate:.0f}%) Away: total count: {away_total} success: {away_success} ({away_success_rate:.0f}%) no success: {away_failure} ({away_failure_rate:.0f}%) """.format( home_total=counter["home_total"], home_success=counter["home_success"], home_success_rate=(counter["home_success"] / counter["home_total"] * 100), home_failure=counter["home_total"] - counter["home_success"], home_failure_rate=( (counter["home_total"] - counter["home_success"]) / counter["home_total"] * 100), away_total=counter["away_total"], away_success=counter["away_success"], away_success_rate=(counter["away_success"] / counter["away_total"] * 100), away_failure=counter["away_total"] - counter["away_success"], away_failure_rate=( (counter["away_total"] - counter["away_success"]) / counter["away_total"] * 100), ) print(textwrap.dedent(text_stats)) elif opts.stats == "json": import json print(json.dumps(counter, indent=4))
def dataset(self, event_data: str, lineup_data: str) -> EventDataset: return statsbomb.load( lineup_data=lineup_data, event_data=event_data, coordinates="statsbomb", )
def test_correct_deserialization(self, lineup_data: str, event_data: str): """ This test uses data from the StatsBomb open data project. """ dataset = statsbomb.load( lineup_data=lineup_data, event_data=event_data, coordinates="statsbomb", ) assert dataset.metadata.provider == Provider.STATSBOMB assert dataset.dataset_type == DatasetType.EVENT assert len(dataset.events) == 4023 assert len(dataset.metadata.periods) == 2 assert ( dataset.metadata.orientation == Orientation.ACTION_EXECUTING_TEAM) assert dataset.metadata.teams[0].name == "Barcelona" assert dataset.metadata.teams[1].name == "Deportivo Alavés" assert dataset.metadata.teams[0].starting_formation == FormationType( "4-4-2") assert dataset.metadata.teams[1].starting_formation == FormationType( "4-1-4-1") player = dataset.metadata.teams[0].get_player_by_id("5503") assert player.player_id == "5503" assert player.jersey_no == 10 assert str(player) == "Lionel Andrés Messi Cuccittini" assert player.position is None # not set assert player.starting sub_player = dataset.metadata.teams[0].get_player_by_id("3501") assert str(sub_player) == "Philippe Coutinho Correia" assert not sub_player.starting assert dataset.metadata.periods[0] == Period( id=1, start_timestamp=0.0, end_timestamp=2705.267, attacking_direction=AttackingDirection.NOT_SET, ) assert dataset.metadata.periods[1] == Period( id=2, start_timestamp=2705.268, end_timestamp=5557.321, attacking_direction=AttackingDirection.NOT_SET, ) assert dataset.events[10].coordinates == Point(34.5, 20.5) assert (dataset.events[792].get_qualifier_value(BodyPartQualifier) == BodyPart.HEAD) assert (dataset.events[2232].get_qualifier_value(BodyPartQualifier) == BodyPart.RIGHT_FOOT) assert (dataset.events[195].get_qualifier_value(BodyPartQualifier) is None) assert (dataset.events[1433].get_qualifier_value(PassQualifier) == PassType.CROSS) assert (dataset.events[1552].get_qualifier_value(PassQualifier) == PassType.THROUGH_BALL) assert (dataset.events[443].get_qualifier_value(PassQualifier) == PassType.SWITCH_OF_PLAY) assert (dataset.events[3438].get_qualifier_value(PassQualifier) == PassType.LONG_BALL) assert (dataset.events[2266].get_qualifier_value(PassQualifier) == PassType.HIGH_PASS) assert (dataset.events[653].get_qualifier_value(PassQualifier) == PassType.HEAD_PASS) assert (dataset.events[3134].get_qualifier_value(PassQualifier) == PassType.HAND_PASS) assert (dataset.events[3611].get_qualifier_value(PassQualifier) == PassType.ASSIST) assert dataset.events[3392].get_qualifier_value(PassQualifier) is None