Example #1
0
    def test_set_config(self, f24_data: str, f7_data: str):
        """
        Make sure the coordinate system can be set and is used
        when loading data.
        """
        dataset = opta.load(f24_data=f24_data, f7_data=f7_data)
        assert isinstance(dataset.metadata.coordinate_system,
                          KloppyCoordinateSystem)

        set_config("coordinate_system", "opta")

        dataset = opta.load(f24_data=f24_data, f7_data=f7_data)
        assert isinstance(dataset.metadata.coordinate_system,
                          OptaCoordinateSystem)
Example #2
0
def kloppy_load_data(f7, f24):
    """

    Args:
        f7: filepath to the match details
        f24: filepath to the event details


    Returns:
        events: DataFrame consisting of all events
        home_team_id: id of the home team
        away_team_id: id of the away team

    """
    dataset = opta.load(f7_data=f7, f24_data=f24)

    events = dataset.transform(
        to_orientation=Orientation.FIXED_HOME_AWAY).to_pandas(
            additional_columns={
                "event_name":
                lambda event: str(getattr(event, "event_name", "")),
                "player_name": lambda event: str(getattr(event, "player", "")),
                "ball_state":
                lambda event: str(getattr(event, "ball_state", "")),
                "team_name": lambda event: str(getattr(event, "team", "")),
            }, )

    metadata = dataset.metadata
    home_team, away_team = metadata.teams
    return events, home_team.team_id, away_team.team_id
Example #3
0
 def test_correct_normalized_deserialization(self, f7_data: str,
                                             f24_data: str):
     dataset = opta.load(
         f24_data=f24_data,
         f7_data=f7_data,
     )
     assert dataset.events[0].coordinates == Point(0.501, 0.506)
Example #4
0
    def test_to_pandas_generic_events(self):
        base_dir = os.path.dirname(__file__)
        dataset = opta.load(
            f7_data=f"{base_dir}/files/opta_f7.xml",
            f24_data=f"{base_dir}/files/opta_f24.xml",
        )

        dataframe = dataset.to_pandas()
        dataframe = dataframe[dataframe.event_type == "BALL_OUT"]
        assert dataframe.shape[0] == 2
Example #5
0
    def test_config_context(self, f24_data: str, f7_data: str):
        assert get_config("coordinate_system") == "kloppy"

        with config_context("coordinate_system", "opta"):
            assert get_config("coordinate_system") == "opta"

            dataset = opta.load(f24_data=f24_data, f7_data=f7_data)

        assert get_config("coordinate_system") == "kloppy"

        assert isinstance(dataset.metadata.coordinate_system,
                          OptaCoordinateSystem)
Example #6
0
    def test_custom_adapter(self, f24_data: str, f7_data: str):
        """
        Make sure the coordinate system can be set and is used
        when loading data.
        """
        class CustomAdapter(Adapter):
            def supports(self, url: str) -> bool:
                return url.startswith("test123://")

            def read_to_stream(self, url: str, output: BinaryIO):
                if url == "test123://f24":
                    fp = open(f24_data, "rb")
                elif url == "test123://f7":
                    fp = open(f7_data, "rb")
                else:
                    raise Exception(f"Unknown url {url}")

                output.write(fp.read())
                fp.close()

        with config_context("cache", None):
            with pytest.raises(InputNotFoundError):
                # When our adapter is not registered yet
                # kloppy will fall back to regular `open`.
                opta.load(f24_data="test123://f24", f7_data="test123://f7")

            custom_adapter = CustomAdapter()
            adapters.append(custom_adapter)

            dataset = opta.load(f24_data="test123://f24",
                                f7_data="test123://f7")

            # Asserts borrowed from `test_opta.py`
            assert dataset.metadata.provider == Provider.OPTA
            assert dataset.dataset_type == DatasetType.EVENT
            assert len(dataset.events) == 20

            # cleanup
            adapters.remove(custom_adapter)
Example #7
0
def run_query(argv=sys.argv[1:]):
    parser = argparse.ArgumentParser(description="Run query on event data")
    parser.add_argument(
        "--input-statsbomb",
        help="StatsBomb event input files (events.json,lineup.json)",
    )
    parser.add_argument("--input-opta",
                        help="Opta event input files (f24.xml,f7.xml)")
    parser.add_argument("--input-datafactory",
                        help="Datafactory event input file (.json)")
    parser.add_argument("--input-wyscout", help="Wyscout event input file")
    parser.add_argument("--output-xml", help="Output file")
    parser.add_argument(
        "--with-success",
        default=True,
        help="Input existence of success capture in output",
    )
    parser.add_argument("--prepend-time",
                        default=7,
                        help="Seconds to prepend to match")
    parser.add_argument("--append-time",
                        default=5,
                        help="Seconds to append to match")
    parser.add_argument("--query-file",
                        help="File containing the query",
                        required=True)
    parser.add_argument(
        "--stats",
        default="none",
        help="Show matches stats",
        choices=["text", "json", "none"],
    )
    parser.add_argument(
        "--show-events",
        default=False,
        help="Show events for each match",
        action="store_true",
    )
    parser.add_argument(
        "--only-success",
        default=False,
        help="Only show/output success cases",
        action="store_true",
    )

    logger = logging.getLogger("run_query")
    logging.basicConfig(
        stream=sys.stderr,
        level=logging.INFO,
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    )

    opts = parser.parse_args(argv)

    query = load_query(opts.query_file)

    dataset = None
    if opts.input_statsbomb:
        with performance_logging("load dataset", logger=logger):
            events_filename, lineup_filename = opts.input_statsbomb.split(",")
            dataset = statsbomb.load(
                event_data=events_filename.strip(),
                lineup_data=lineup_filename.strip(),
                event_types=query.event_types,
            )
    if opts.input_opta:
        with performance_logging("load dataset", logger=logger):
            f24_filename, f7_filename = opts.input_opta.split(",")
            dataset = opta.load(
                f24_data=f24_filename.strip(),
                f7_data=f7_filename.strip(),
                event_types=query.event_types,
            )
    if opts.input_datafactory:
        with performance_logging("load dataset", logger=logger):
            events_filename = opts.input_datafactory
            dataset = datafactory.load(
                event_data=events_filename.strip(),
                event_types=query.event_types,
            )
    if opts.input_wyscout:
        with performance_logging("load dataset", logger=logger):
            events_filename = opts.input_wyscout
            dataset = wyscout.load(
                event_data=events_filename,
                event_types=query.event_types,
            )

    if not dataset:
        raise Exception("You have to specify a dataset.")

    with performance_logging("searching", logger=logger):
        matches = pm.search(dataset, query.pattern)

    # Construct new code dataset with same properties (eg periods)
    # as original event dataset.
    # Records will be added later below
    records = []
    counter = Counter()
    for i, match in enumerate(matches):
        team = match.events[0].team
        success = "success" in match.captures

        counter.update({
            f"{team.ground}_total": 1,
            f"{team.ground}_success": 1 if success else 0,
        })

        should_process = not opts.only_success or success
        if opts.show_events and should_process:
            print_match(i, match, success, str(team))

        if opts.output_xml and should_process:
            code_ = str(team)
            if opts.with_success and success:
                code_ += " success"

            code = Code(
                period=match.events[0].period,
                code_id=str(i),
                code=code_,
                timestamp=match.events[0].timestamp - opts.prepend_time,
                end_timestamp=match.events[-1].timestamp + opts.append_time,
                # TODO: refactor those two out
                ball_state=None,
                ball_owning_team=None,
            )
            records.append(code)

    code_dataset = CodeDataset(metadata=dataset.metadata, records=records)

    if opts.output_xml:
        sportscode.save(code_dataset, opts.output_xml)
        logger.info(f"Wrote {len(code_dataset.codes)} video fragments to file")

    if opts.stats == "text":
        text_stats = """\
        Home:
          total count: {home_total}
            success: {home_success} ({home_success_rate:.0f}%)
            no success: {home_failure} ({home_failure_rate:.0f}%)

        Away:
          total count: {away_total}
            success: {away_success} ({away_success_rate:.0f}%)
            no success: {away_failure} ({away_failure_rate:.0f}%)
        """.format(
            home_total=counter["home_total"],
            home_success=counter["home_success"],
            home_success_rate=(counter["home_success"] /
                               counter["home_total"] * 100),
            home_failure=counter["home_total"] - counter["home_success"],
            home_failure_rate=(
                (counter["home_total"] - counter["home_success"]) /
                counter["home_total"] * 100),
            away_total=counter["away_total"],
            away_success=counter["away_success"],
            away_success_rate=(counter["away_success"] /
                               counter["away_total"] * 100),
            away_failure=counter["away_total"] - counter["away_success"],
            away_failure_rate=(
                (counter["away_total"] - counter["away_success"]) /
                counter["away_total"] * 100),
        )
        print(textwrap.dedent(text_stats))
    elif opts.stats == "json":
        import json

        print(json.dumps(counter, indent=4))
Example #8
0
    def test_correct_deserialization(self, f7_data: str, f24_data: str):
        dataset = opta.load(f24_data=f24_data,
                            f7_data=f7_data,
                            coordinates="opta")

        assert dataset.metadata.provider == Provider.OPTA
        assert dataset.dataset_type == DatasetType.EVENT
        assert len(dataset.events) == 20
        assert len(dataset.metadata.periods) == 2
        assert (
            dataset.events[10].ball_owning_team == dataset.metadata.teams[1]
        )  # 1594254267
        assert (
            dataset.events[15].ball_owning_team == dataset.metadata.teams[0]
        )  # 2087733359
        assert (
            dataset.metadata.orientation == Orientation.ACTION_EXECUTING_TEAM)
        assert dataset.metadata.teams[0].name == "FC København"
        assert dataset.metadata.teams[0].ground == Ground.HOME
        assert dataset.metadata.teams[0].starting_formation == FormationType(
            "4-4-2")
        assert dataset.metadata.teams[1].name == "FC Nordsjælland"
        assert dataset.metadata.teams[1].ground == Ground.AWAY
        assert dataset.metadata.teams[1].starting_formation == FormationType(
            "4-3-3")

        player = dataset.metadata.teams[0].players[0]
        assert player.player_id == "111319"
        assert player.jersey_no == 21
        assert str(player) == "Jesse Joronen"
        assert player.position.position_id == "1"
        assert player.position.name == "Goalkeeper"

        assert dataset.metadata.periods[0] == Period(
            id=1,
            start_timestamp=1537714933.608,
            end_timestamp=1537717701.222,
            attacking_direction=AttackingDirection.NOT_SET,
        )
        assert dataset.metadata.periods[1] == Period(
            id=2,
            start_timestamp=1537718728.873,
            end_timestamp=1537721737.788,
            attacking_direction=AttackingDirection.NOT_SET,
        )

        assert dataset.events[0].coordinates == Point(50.1, 49.4)

        # Check the qualifiers
        assert (dataset.events[0].qualifiers[0].value == SetPieceType.KICK_OFF
                )  # 1510681159
        assert (dataset.events[6].qualifiers[0].value == BodyPart.HEAD
                )  # 1101592119
        assert (dataset.events[5].qualifiers[0].value == PassType.CHIPPED_PASS
                )  # 1444075194
        assert (dataset.events[19].qualifiers[0].value == CardType.RED
                )  # 2318695229

        # Check receiver coordinates for incomplete passes
        assert dataset.events[6].receiver_coordinates.x == 45.5
        assert dataset.events[6].receiver_coordinates.y == 68.2

        # Check timestamp from qualifier in case of goal
        assert dataset.events[17].timestamp == 139.65200018882751  # 2318695229
        # assert dataset.events[17].coordinates_y == 12

        # Check Own goal
        assert dataset.events[18].result.value == "OWN_GOAL"  # 2318697001