def test_set_config(self, f24_data: str, f7_data: str): """ Make sure the coordinate system can be set and is used when loading data. """ dataset = opta.load(f24_data=f24_data, f7_data=f7_data) assert isinstance(dataset.metadata.coordinate_system, KloppyCoordinateSystem) set_config("coordinate_system", "opta") dataset = opta.load(f24_data=f24_data, f7_data=f7_data) assert isinstance(dataset.metadata.coordinate_system, OptaCoordinateSystem)
def kloppy_load_data(f7, f24): """ Args: f7: filepath to the match details f24: filepath to the event details Returns: events: DataFrame consisting of all events home_team_id: id of the home team away_team_id: id of the away team """ dataset = opta.load(f7_data=f7, f24_data=f24) events = dataset.transform( to_orientation=Orientation.FIXED_HOME_AWAY).to_pandas( additional_columns={ "event_name": lambda event: str(getattr(event, "event_name", "")), "player_name": lambda event: str(getattr(event, "player", "")), "ball_state": lambda event: str(getattr(event, "ball_state", "")), "team_name": lambda event: str(getattr(event, "team", "")), }, ) metadata = dataset.metadata home_team, away_team = metadata.teams return events, home_team.team_id, away_team.team_id
def test_correct_normalized_deserialization(self, f7_data: str, f24_data: str): dataset = opta.load( f24_data=f24_data, f7_data=f7_data, ) assert dataset.events[0].coordinates == Point(0.501, 0.506)
def test_to_pandas_generic_events(self): base_dir = os.path.dirname(__file__) dataset = opta.load( f7_data=f"{base_dir}/files/opta_f7.xml", f24_data=f"{base_dir}/files/opta_f24.xml", ) dataframe = dataset.to_pandas() dataframe = dataframe[dataframe.event_type == "BALL_OUT"] assert dataframe.shape[0] == 2
def test_config_context(self, f24_data: str, f7_data: str): assert get_config("coordinate_system") == "kloppy" with config_context("coordinate_system", "opta"): assert get_config("coordinate_system") == "opta" dataset = opta.load(f24_data=f24_data, f7_data=f7_data) assert get_config("coordinate_system") == "kloppy" assert isinstance(dataset.metadata.coordinate_system, OptaCoordinateSystem)
def test_custom_adapter(self, f24_data: str, f7_data: str): """ Make sure the coordinate system can be set and is used when loading data. """ class CustomAdapter(Adapter): def supports(self, url: str) -> bool: return url.startswith("test123://") def read_to_stream(self, url: str, output: BinaryIO): if url == "test123://f24": fp = open(f24_data, "rb") elif url == "test123://f7": fp = open(f7_data, "rb") else: raise Exception(f"Unknown url {url}") output.write(fp.read()) fp.close() with config_context("cache", None): with pytest.raises(InputNotFoundError): # When our adapter is not registered yet # kloppy will fall back to regular `open`. opta.load(f24_data="test123://f24", f7_data="test123://f7") custom_adapter = CustomAdapter() adapters.append(custom_adapter) dataset = opta.load(f24_data="test123://f24", f7_data="test123://f7") # Asserts borrowed from `test_opta.py` assert dataset.metadata.provider == Provider.OPTA assert dataset.dataset_type == DatasetType.EVENT assert len(dataset.events) == 20 # cleanup adapters.remove(custom_adapter)
def run_query(argv=sys.argv[1:]): parser = argparse.ArgumentParser(description="Run query on event data") parser.add_argument( "--input-statsbomb", help="StatsBomb event input files (events.json,lineup.json)", ) parser.add_argument("--input-opta", help="Opta event input files (f24.xml,f7.xml)") parser.add_argument("--input-datafactory", help="Datafactory event input file (.json)") parser.add_argument("--input-wyscout", help="Wyscout event input file") parser.add_argument("--output-xml", help="Output file") parser.add_argument( "--with-success", default=True, help="Input existence of success capture in output", ) parser.add_argument("--prepend-time", default=7, help="Seconds to prepend to match") parser.add_argument("--append-time", default=5, help="Seconds to append to match") parser.add_argument("--query-file", help="File containing the query", required=True) parser.add_argument( "--stats", default="none", help="Show matches stats", choices=["text", "json", "none"], ) parser.add_argument( "--show-events", default=False, help="Show events for each match", action="store_true", ) parser.add_argument( "--only-success", default=False, help="Only show/output success cases", action="store_true", ) logger = logging.getLogger("run_query") logging.basicConfig( stream=sys.stderr, level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", ) opts = parser.parse_args(argv) query = load_query(opts.query_file) dataset = None if opts.input_statsbomb: with performance_logging("load dataset", logger=logger): events_filename, lineup_filename = opts.input_statsbomb.split(",") dataset = statsbomb.load( event_data=events_filename.strip(), lineup_data=lineup_filename.strip(), event_types=query.event_types, ) if opts.input_opta: with performance_logging("load dataset", logger=logger): f24_filename, f7_filename = opts.input_opta.split(",") dataset = opta.load( f24_data=f24_filename.strip(), f7_data=f7_filename.strip(), event_types=query.event_types, ) if opts.input_datafactory: with performance_logging("load dataset", logger=logger): events_filename = opts.input_datafactory dataset = datafactory.load( event_data=events_filename.strip(), event_types=query.event_types, ) if opts.input_wyscout: with performance_logging("load dataset", logger=logger): events_filename = opts.input_wyscout dataset = wyscout.load( event_data=events_filename, event_types=query.event_types, ) if not dataset: raise Exception("You have to specify a dataset.") with performance_logging("searching", logger=logger): matches = pm.search(dataset, query.pattern) # Construct new code dataset with same properties (eg periods) # as original event dataset. # Records will be added later below records = [] counter = Counter() for i, match in enumerate(matches): team = match.events[0].team success = "success" in match.captures counter.update({ f"{team.ground}_total": 1, f"{team.ground}_success": 1 if success else 0, }) should_process = not opts.only_success or success if opts.show_events and should_process: print_match(i, match, success, str(team)) if opts.output_xml and should_process: code_ = str(team) if opts.with_success and success: code_ += " success" code = Code( period=match.events[0].period, code_id=str(i), code=code_, timestamp=match.events[0].timestamp - opts.prepend_time, end_timestamp=match.events[-1].timestamp + opts.append_time, # TODO: refactor those two out ball_state=None, ball_owning_team=None, ) records.append(code) code_dataset = CodeDataset(metadata=dataset.metadata, records=records) if opts.output_xml: sportscode.save(code_dataset, opts.output_xml) logger.info(f"Wrote {len(code_dataset.codes)} video fragments to file") if opts.stats == "text": text_stats = """\ Home: total count: {home_total} success: {home_success} ({home_success_rate:.0f}%) no success: {home_failure} ({home_failure_rate:.0f}%) Away: total count: {away_total} success: {away_success} ({away_success_rate:.0f}%) no success: {away_failure} ({away_failure_rate:.0f}%) """.format( home_total=counter["home_total"], home_success=counter["home_success"], home_success_rate=(counter["home_success"] / counter["home_total"] * 100), home_failure=counter["home_total"] - counter["home_success"], home_failure_rate=( (counter["home_total"] - counter["home_success"]) / counter["home_total"] * 100), away_total=counter["away_total"], away_success=counter["away_success"], away_success_rate=(counter["away_success"] / counter["away_total"] * 100), away_failure=counter["away_total"] - counter["away_success"], away_failure_rate=( (counter["away_total"] - counter["away_success"]) / counter["away_total"] * 100), ) print(textwrap.dedent(text_stats)) elif opts.stats == "json": import json print(json.dumps(counter, indent=4))
def test_correct_deserialization(self, f7_data: str, f24_data: str): dataset = opta.load(f24_data=f24_data, f7_data=f7_data, coordinates="opta") assert dataset.metadata.provider == Provider.OPTA assert dataset.dataset_type == DatasetType.EVENT assert len(dataset.events) == 20 assert len(dataset.metadata.periods) == 2 assert ( dataset.events[10].ball_owning_team == dataset.metadata.teams[1] ) # 1594254267 assert ( dataset.events[15].ball_owning_team == dataset.metadata.teams[0] ) # 2087733359 assert ( dataset.metadata.orientation == Orientation.ACTION_EXECUTING_TEAM) assert dataset.metadata.teams[0].name == "FC København" assert dataset.metadata.teams[0].ground == Ground.HOME assert dataset.metadata.teams[0].starting_formation == FormationType( "4-4-2") assert dataset.metadata.teams[1].name == "FC Nordsjælland" assert dataset.metadata.teams[1].ground == Ground.AWAY assert dataset.metadata.teams[1].starting_formation == FormationType( "4-3-3") player = dataset.metadata.teams[0].players[0] assert player.player_id == "111319" assert player.jersey_no == 21 assert str(player) == "Jesse Joronen" assert player.position.position_id == "1" assert player.position.name == "Goalkeeper" assert dataset.metadata.periods[0] == Period( id=1, start_timestamp=1537714933.608, end_timestamp=1537717701.222, attacking_direction=AttackingDirection.NOT_SET, ) assert dataset.metadata.periods[1] == Period( id=2, start_timestamp=1537718728.873, end_timestamp=1537721737.788, attacking_direction=AttackingDirection.NOT_SET, ) assert dataset.events[0].coordinates == Point(50.1, 49.4) # Check the qualifiers assert (dataset.events[0].qualifiers[0].value == SetPieceType.KICK_OFF ) # 1510681159 assert (dataset.events[6].qualifiers[0].value == BodyPart.HEAD ) # 1101592119 assert (dataset.events[5].qualifiers[0].value == PassType.CHIPPED_PASS ) # 1444075194 assert (dataset.events[19].qualifiers[0].value == CardType.RED ) # 2318695229 # Check receiver coordinates for incomplete passes assert dataset.events[6].receiver_coordinates.x == 45.5 assert dataset.events[6].receiver_coordinates.y == 68.2 # Check timestamp from qualifier in case of goal assert dataset.events[17].timestamp == 139.65200018882751 # 2318695229 # assert dataset.events[17].coordinates_y == 12 # Check Own goal assert dataset.events[18].result.value == "OWN_GOAL" # 2318697001