Ejemplo n.º 1
0
    def _load_dataset(self, base_filename="statsbomb"):
        base_dir = os.path.dirname(__file__)

        return statsbomb.load(
            event_data=f"{base_dir}/files/{base_filename}_event.json",
            lineup_data=f"{base_dir}/files/{base_filename}_lineup.json",
        )
Ejemplo n.º 2
0
    def test_substitution(self, lineup_data: str, event_data: str):
        """
        Test substitution events
        """
        dataset = statsbomb.load(
            lineup_data=lineup_data,
            event_data=event_data,
            event_types=["substitution"],
        )

        assert len(dataset.events) == 6

        subs = [
            (6374, 3501),
            (6839, 6935),
            (6581, 6566),
            (6613, 6624),
            (5477, 11392),
            (5203, 8206),
        ]

        for event_idx, (player_id, replacement_player_id) in enumerate(subs):
            event = dataset.events[event_idx]
            assert event.player == event.team.get_player_by_id(player_id)
            assert event.replacement_player == event.team.get_player_by_id(
                replacement_player_id)
Ejemplo n.º 3
0
    def test_correct_normalized_deserialization(self, lineup_data: str,
                                                event_data: str):
        """
        This test uses data from the StatsBomb open data project.
        """
        dataset = statsbomb.load(lineup_data=lineup_data,
                                 event_data=event_data)

        assert dataset.events[10].coordinates == Point(0.2875, 0.25625)
Ejemplo n.º 4
0
    def test_foul_committed(self, lineup_data: str, event_data: str):
        """
        Test foul committed events
        """
        dataset = statsbomb.load(
            lineup_data=lineup_data,
            event_data=event_data,
            event_types=["foul_committed"],
        )

        assert len(dataset.events) == 23
Ejemplo n.º 5
0
    def test_to_pandas_incomplete_pass(self):
        base_dir = os.path.dirname(__file__)

        dataset = statsbomb.load(
            lineup_data=f"{base_dir}/files/statsbomb_lineup.json",
            event_data=f"{base_dir}/files/statsbomb_event.json",
        )
        df = dataset.to_pandas()
        incomplete_passes = df[(df.event_type == "PASS")
                               & (df.result == "INCOMPLETE")].reset_index()
        assert incomplete_passes.loc[0, "end_coordinates_y"] == 0.90625
        assert incomplete_passes.loc[0, "end_coordinates_x"] == 0.7125
Ejemplo n.º 6
0
    def test_related_events(self, lineup_data: str, event_data: str):
        dataset = statsbomb.load(lineup_data=lineup_data,
                                 event_data=event_data)
        carry_event = dataset.get_event_by_id(
            "8e3dacc2-7a39-4301-9053-e78cfec1aa95")
        pass_event = dataset.get_event_by_id(
            "d1cccb73-c7ef-4b02-8267-ebd7f149904b")
        receipt_event = dataset.get_event_by_id(
            "61da36dc-d862-416c-8ee3-1a0cd24dc086")

        assert carry_event.get_related_events() == [receipt_event, pass_event]
        assert carry_event.related_pass() == pass_event
Ejemplo n.º 7
0
    def test_card(self, lineup_data: str, event_data: str):
        """
        Test card events
        """
        dataset = statsbomb.load(
            lineup_data=lineup_data,
            event_data=event_data,
            event_types=["card"],
        )

        assert len(dataset.events) == 2

        for card in dataset.events:
            assert card.card_type == CardType.FIRST_YELLOW
Ejemplo n.º 8
0
    def test_transform_event_data(self):
        """Make sure event data that's in ACTION_EXECUTING orientation is
        transformed correctly"""
        base_dir = os.path.dirname(__file__)

        dataset = statsbomb.load(
            lineup_data=f"{base_dir}/files/statsbomb_lineup.json",
            event_data=f"{base_dir}/files/statsbomb_event.json",
        )

        home_team, away_team = dataset.metadata.teams

        # This is a pressure event by Deportivo while Barcelona is in possession
        pressure_event = dataset.get_event_by_id(
            "6399af5c-74b8-4efe-ae19-85f331d355e8")
        assert pressure_event.team == away_team
        assert pressure_event.ball_owning_team == home_team

        receipt_event = pressure_event.next()
        assert receipt_event.team == home_team
        assert receipt_event.ball_owning_team == home_team

        transformed_dataset = dataset.transform(
            to_orientation="fixed_home_away")
        transformed_pressure_event = transformed_dataset.get_event_by_id(
            pressure_event.event_id)
        transformed_receipt_event = transformed_pressure_event.next()

        # The receipt event is executed by the away team and should be changed by the transformation
        assert (pressure_event.coordinates.x == 1 -
                transformed_pressure_event.coordinates.x)
        assert (pressure_event.coordinates.y == 1 -
                transformed_pressure_event.coordinates.y)

        # The receipt event is executed by the home team and shouldn't be changed by the transformation
        assert (receipt_event.coordinates.x ==
                transformed_receipt_event.coordinates.x)
        assert (receipt_event.coordinates.y ==
                transformed_receipt_event.coordinates.y)
Ejemplo n.º 9
0
def run_query(argv=sys.argv[1:]):
    parser = argparse.ArgumentParser(description="Run query on event data")
    parser.add_argument(
        "--input-statsbomb",
        help="StatsBomb event input files (events.json,lineup.json)",
    )
    parser.add_argument("--input-opta",
                        help="Opta event input files (f24.xml,f7.xml)")
    parser.add_argument("--input-datafactory",
                        help="Datafactory event input file (.json)")
    parser.add_argument("--input-wyscout", help="Wyscout event input file")
    parser.add_argument("--output-xml", help="Output file")
    parser.add_argument(
        "--with-success",
        default=True,
        help="Input existence of success capture in output",
    )
    parser.add_argument("--prepend-time",
                        default=7,
                        help="Seconds to prepend to match")
    parser.add_argument("--append-time",
                        default=5,
                        help="Seconds to append to match")
    parser.add_argument("--query-file",
                        help="File containing the query",
                        required=True)
    parser.add_argument(
        "--stats",
        default="none",
        help="Show matches stats",
        choices=["text", "json", "none"],
    )
    parser.add_argument(
        "--show-events",
        default=False,
        help="Show events for each match",
        action="store_true",
    )
    parser.add_argument(
        "--only-success",
        default=False,
        help="Only show/output success cases",
        action="store_true",
    )

    logger = logging.getLogger("run_query")
    logging.basicConfig(
        stream=sys.stderr,
        level=logging.INFO,
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    )

    opts = parser.parse_args(argv)

    query = load_query(opts.query_file)

    dataset = None
    if opts.input_statsbomb:
        with performance_logging("load dataset", logger=logger):
            events_filename, lineup_filename = opts.input_statsbomb.split(",")
            dataset = statsbomb.load(
                event_data=events_filename.strip(),
                lineup_data=lineup_filename.strip(),
                event_types=query.event_types,
            )
    if opts.input_opta:
        with performance_logging("load dataset", logger=logger):
            f24_filename, f7_filename = opts.input_opta.split(",")
            dataset = opta.load(
                f24_data=f24_filename.strip(),
                f7_data=f7_filename.strip(),
                event_types=query.event_types,
            )
    if opts.input_datafactory:
        with performance_logging("load dataset", logger=logger):
            events_filename = opts.input_datafactory
            dataset = datafactory.load(
                event_data=events_filename.strip(),
                event_types=query.event_types,
            )
    if opts.input_wyscout:
        with performance_logging("load dataset", logger=logger):
            events_filename = opts.input_wyscout
            dataset = wyscout.load(
                event_data=events_filename,
                event_types=query.event_types,
            )

    if not dataset:
        raise Exception("You have to specify a dataset.")

    with performance_logging("searching", logger=logger):
        matches = pm.search(dataset, query.pattern)

    # Construct new code dataset with same properties (eg periods)
    # as original event dataset.
    # Records will be added later below
    records = []
    counter = Counter()
    for i, match in enumerate(matches):
        team = match.events[0].team
        success = "success" in match.captures

        counter.update({
            f"{team.ground}_total": 1,
            f"{team.ground}_success": 1 if success else 0,
        })

        should_process = not opts.only_success or success
        if opts.show_events and should_process:
            print_match(i, match, success, str(team))

        if opts.output_xml and should_process:
            code_ = str(team)
            if opts.with_success and success:
                code_ += " success"

            code = Code(
                period=match.events[0].period,
                code_id=str(i),
                code=code_,
                timestamp=match.events[0].timestamp - opts.prepend_time,
                end_timestamp=match.events[-1].timestamp + opts.append_time,
                # TODO: refactor those two out
                ball_state=None,
                ball_owning_team=None,
            )
            records.append(code)

    code_dataset = CodeDataset(metadata=dataset.metadata, records=records)

    if opts.output_xml:
        sportscode.save(code_dataset, opts.output_xml)
        logger.info(f"Wrote {len(code_dataset.codes)} video fragments to file")

    if opts.stats == "text":
        text_stats = """\
        Home:
          total count: {home_total}
            success: {home_success} ({home_success_rate:.0f}%)
            no success: {home_failure} ({home_failure_rate:.0f}%)

        Away:
          total count: {away_total}
            success: {away_success} ({away_success_rate:.0f}%)
            no success: {away_failure} ({away_failure_rate:.0f}%)
        """.format(
            home_total=counter["home_total"],
            home_success=counter["home_success"],
            home_success_rate=(counter["home_success"] /
                               counter["home_total"] * 100),
            home_failure=counter["home_total"] - counter["home_success"],
            home_failure_rate=(
                (counter["home_total"] - counter["home_success"]) /
                counter["home_total"] * 100),
            away_total=counter["away_total"],
            away_success=counter["away_success"],
            away_success_rate=(counter["away_success"] /
                               counter["away_total"] * 100),
            away_failure=counter["away_total"] - counter["away_success"],
            away_failure_rate=(
                (counter["away_total"] - counter["away_success"]) /
                counter["away_total"] * 100),
        )
        print(textwrap.dedent(text_stats))
    elif opts.stats == "json":
        import json

        print(json.dumps(counter, indent=4))
Ejemplo n.º 10
0
 def dataset(self, event_data: str, lineup_data: str) -> EventDataset:
     return statsbomb.load(
         lineup_data=lineup_data,
         event_data=event_data,
         coordinates="statsbomb",
     )
Ejemplo n.º 11
0
    def test_correct_deserialization(self, lineup_data: str, event_data: str):
        """
        This test uses data from the StatsBomb open data project.
        """
        dataset = statsbomb.load(
            lineup_data=lineup_data,
            event_data=event_data,
            coordinates="statsbomb",
        )

        assert dataset.metadata.provider == Provider.STATSBOMB
        assert dataset.dataset_type == DatasetType.EVENT
        assert len(dataset.events) == 4023
        assert len(dataset.metadata.periods) == 2
        assert (
            dataset.metadata.orientation == Orientation.ACTION_EXECUTING_TEAM)
        assert dataset.metadata.teams[0].name == "Barcelona"
        assert dataset.metadata.teams[1].name == "Deportivo Alavés"
        assert dataset.metadata.teams[0].starting_formation == FormationType(
            "4-4-2")
        assert dataset.metadata.teams[1].starting_formation == FormationType(
            "4-1-4-1")

        player = dataset.metadata.teams[0].get_player_by_id("5503")
        assert player.player_id == "5503"
        assert player.jersey_no == 10
        assert str(player) == "Lionel Andrés Messi Cuccittini"
        assert player.position is None  # not set
        assert player.starting

        sub_player = dataset.metadata.teams[0].get_player_by_id("3501")
        assert str(sub_player) == "Philippe Coutinho Correia"
        assert not sub_player.starting

        assert dataset.metadata.periods[0] == Period(
            id=1,
            start_timestamp=0.0,
            end_timestamp=2705.267,
            attacking_direction=AttackingDirection.NOT_SET,
        )
        assert dataset.metadata.periods[1] == Period(
            id=2,
            start_timestamp=2705.268,
            end_timestamp=5557.321,
            attacking_direction=AttackingDirection.NOT_SET,
        )

        assert dataset.events[10].coordinates == Point(34.5, 20.5)

        assert (dataset.events[792].get_qualifier_value(BodyPartQualifier) ==
                BodyPart.HEAD)

        assert (dataset.events[2232].get_qualifier_value(BodyPartQualifier) ==
                BodyPart.RIGHT_FOOT)

        assert (dataset.events[195].get_qualifier_value(BodyPartQualifier) is
                None)

        assert (dataset.events[1433].get_qualifier_value(PassQualifier) ==
                PassType.CROSS)

        assert (dataset.events[1552].get_qualifier_value(PassQualifier) ==
                PassType.THROUGH_BALL)

        assert (dataset.events[443].get_qualifier_value(PassQualifier) ==
                PassType.SWITCH_OF_PLAY)

        assert (dataset.events[3438].get_qualifier_value(PassQualifier) ==
                PassType.LONG_BALL)

        assert (dataset.events[2266].get_qualifier_value(PassQualifier) ==
                PassType.HIGH_PASS)

        assert (dataset.events[653].get_qualifier_value(PassQualifier) ==
                PassType.HEAD_PASS)

        assert (dataset.events[3134].get_qualifier_value(PassQualifier) ==
                PassType.HAND_PASS)

        assert (dataset.events[3611].get_qualifier_value(PassQualifier) ==
                PassType.ASSIST)

        assert dataset.events[3392].get_qualifier_value(PassQualifier) is None