Exemple #1
0
 def setUp(self):
     self.parser = PlayerDataParser(
         search_result_location_parser=ResourceLocationParser(
             resource_location_regex=ParserService.
             SEARCH_RESULT_RESOURCE_LOCATION_REGEX, ),
         league_abbreviation_parser=LeagueAbbreviationParser(
             abbreviations_to_league=LEAGUE_ABBREVIATIONS_TO_LEAGUE),
     )
def search(term):
    response = requests.get(
        url="{BASE_URL}/search/search.fcgi".format(BASE_URL=BASE_URL),
        params={"search": term}
    )

    response.raise_for_status()

    player_results = []

    if response.url.startswith("{BASE_URL}/search/search.fcgi".format(BASE_URL=BASE_URL)):
        page = SearchPage(html=html.fromstring(response.content))

        parser = SearchResultsParser(
            search_result_name_parser=SearchResultNameParser(),
            search_result_location_parser=ResourceLocationParser(
                resource_location_regex=SEARCH_RESULT_RESOURCE_LOCATION_REGEX,
            ),
            league_abbreviation_parser=LeagueAbbreviationParser(
                abbreviations_to_league=LEAGUE_ABBREVIATIONS_TO_LEAGUE,
            ),
        )

        parsed_results = parser.parse(nba_aba_baa_players=page.nba_aba_baa_players)
        player_results += parsed_results["players"]

        while page.nba_aba_baa_players_pagination_url is not None:
            response = requests.get(
                url="{BASE_URL}/search/{pagination_url}".format(
                    BASE_URL=BASE_URL,
                    pagination_url=page.nba_aba_baa_players_pagination_url
                )
            )

            response.raise_for_status()

            page = SearchPage(html=html.fromstring(response.content))

            parsed_results = parser.parse(nba_aba_baa_players=page.nba_aba_baa_players)
            player_results += parsed_results["players"]

    elif response.url.startswith("{BASE_URL}/players".format(BASE_URL=BASE_URL)):
        page = PlayerPage(html=html.fromstring(response.content))
        data = PlayerData(
            name=page.name,
            resource_location=response.url,
            league_abbreviations=set([row.league_abbreviation for row in page.totals_table.rows])
        )
        parser = PlayerDataParser(
            search_result_location_parser=ResourceLocationParser(
                resource_location_regex=SEARCH_RESULT_RESOURCE_LOCATION_REGEX,
            ),
            league_abbreviation_parser=LeagueAbbreviationParser(abbreviations_to_league=LEAGUE_ABBREVIATIONS_TO_LEAGUE),
        )
        player_results += [parser.parse(player=data)]

    return {
        "players": player_results
    }
    def __init__(self):
        self.team_abbreviation_parser = TeamAbbreviationParser(
            abbreviations_to_teams=TEAM_ABBREVIATIONS_TO_TEAM)
        self.league_abbreviation_parser = LeagueAbbreviationParser(
            abbreviations_to_league=LEAGUE_ABBREVIATIONS_TO_LEAGUE)
        self.location_abbreviation_parser = LocationAbbreviationParser(
            abbreviations_to_locations=LOCATION_ABBREVIATIONS_TO_POSITION, )
        self.outcome_abbreviation_parser = OutcomeAbbreviationParser(
            abbreviations_to_outcomes=OUTCOME_ABBREVIATIONS_TO_OUTCOME, )
        self.outcome_parser = PlayerBoxScoreOutcomeParser(
            outcome_abbreviation_parser=self.outcome_abbreviation_parser)
        self.period_details_parser = PeriodDetailsParser(
            regulation_periods_count=4)
        self.period_timestamp_parser = PeriodTimestampParser(
            timestamp_format=ParserService.PLAY_BY_PLAY_TIMESTAMP_FORMAT)
        self.position_abbreviation_parser = PositionAbbreviationParser(
            abbreviations_to_positions=POSITION_ABBREVIATIONS_TO_POSITION, )
        self.seconds_played_parser = SecondsPlayedParser()
        self.scores_parser = ScoresParser(
            scores_regex=ParserService.PLAY_BY_PLAY_SCORES_REGEX)
        self.search_result_name_parser = SearchResultNameParser()
        self.search_result_location_parser = ResourceLocationParser(
            resource_location_regex=ParserService.
            SEARCH_RESULT_RESOURCE_LOCATION_REGEX)
        self.team_name_parser = TeamNameParser(
            team_names_to_teams=TEAM_NAME_TO_TEAM)

        self.play_by_plays_parser = PlayByPlaysParser(
            period_details_parser=self.period_details_parser,
            period_timestamp_parser=self.period_timestamp_parser,
            scores_parser=self.scores_parser,
        )
        self.player_box_scores_parser = PlayerBoxScoresParser(
            team_abbreviation_parser=self.team_abbreviation_parser,
            location_abbreviation_parser=self.location_abbreviation_parser,
            outcome_abbreviation_parser=self.outcome_abbreviation_parser,
            seconds_played_parser=self.seconds_played_parser)
        self.player_data_parser = PlayerDataParser(
            search_result_location_parser=self.search_result_location_parser,
            league_abbreviation_parser=self.league_abbreviation_parser,
        )
        self.player_season_box_scores_parser = PlayerSeasonBoxScoresParser(
            team_abbreviation_parser=self.team_abbreviation_parser,
            location_abbreviation_parser=self.location_abbreviation_parser,
            outcome_parser=self.outcome_parser,
            seconds_played_parser=self.seconds_played_parser)
        self.player_season_totals_parser = PlayerSeasonTotalsParser(
            position_abbreviation_parser=self.position_abbreviation_parser,
            team_abbreviation_parser=self.team_abbreviation_parser,
        )
        self.player_advanced_season_totals_parser = PlayerAdvancedSeasonTotalsParser(
            team_abbreviation_parser=self.team_abbreviation_parser,
            position_abbreviation_parser=self.position_abbreviation_parser,
        )
        self.scheduled_start_time_parser = ScheduledStartTimeParser()
        self.scheduled_games_parser = ScheduledGamesParser(
            start_time_parser=self.scheduled_start_time_parser,
            team_name_parser=self.team_name_parser,
        )
        self.search_results_parser = SearchResultsParser(
            search_result_name_parser=self.search_result_name_parser,
            search_result_location_parser=self.search_result_location_parser,
            league_abbreviation_parser=self.league_abbreviation_parser,
        )
        self.team_totals_parser = TeamTotalsParser(
            team_abbreviation_parser=self.team_abbreviation_parser)
        self.division_name_parser = DivisionNameParser(divisions=Division)
        self.team_standings_parser = TeamStandingsParser(teams=Team)
        self.conference_division_standings_parser = ConferenceDivisionStandingsParser(
            division_name_parser=self.division_name_parser,
            team_standings_parser=self.team_standings_parser,
            divisions_to_conferences=DIVISIONS_TO_CONFERENCES,
        )
class ParserService:
    PLAY_BY_PLAY_TIMESTAMP_FORMAT = "%M:%S.%f"
    PLAY_BY_PLAY_SCORES_REGEX = "(?P<away_team_score>[0-9]+)-(?P<home_team_score>[0-9]+)"
    SEARCH_RESULT_RESOURCE_LOCATION_REGEX = '(https?:\/\/www\.basketball-reference\.com\/)?(?P<resource_type>.+?(?=\/)).*\/(?P<resource_identifier>.+).html'

    def __init__(self):
        self.team_abbreviation_parser = TeamAbbreviationParser(
            abbreviations_to_teams=TEAM_ABBREVIATIONS_TO_TEAM)
        self.league_abbreviation_parser = LeagueAbbreviationParser(
            abbreviations_to_league=LEAGUE_ABBREVIATIONS_TO_LEAGUE)
        self.location_abbreviation_parser = LocationAbbreviationParser(
            abbreviations_to_locations=LOCATION_ABBREVIATIONS_TO_POSITION, )
        self.outcome_abbreviation_parser = OutcomeAbbreviationParser(
            abbreviations_to_outcomes=OUTCOME_ABBREVIATIONS_TO_OUTCOME, )
        self.outcome_parser = PlayerBoxScoreOutcomeParser(
            outcome_abbreviation_parser=self.outcome_abbreviation_parser)
        self.period_details_parser = PeriodDetailsParser(
            regulation_periods_count=4)
        self.period_timestamp_parser = PeriodTimestampParser(
            timestamp_format=ParserService.PLAY_BY_PLAY_TIMESTAMP_FORMAT)
        self.position_abbreviation_parser = PositionAbbreviationParser(
            abbreviations_to_positions=POSITION_ABBREVIATIONS_TO_POSITION, )
        self.seconds_played_parser = SecondsPlayedParser()
        self.scores_parser = ScoresParser(
            scores_regex=ParserService.PLAY_BY_PLAY_SCORES_REGEX)
        self.search_result_name_parser = SearchResultNameParser()
        self.search_result_location_parser = ResourceLocationParser(
            resource_location_regex=ParserService.
            SEARCH_RESULT_RESOURCE_LOCATION_REGEX)
        self.team_name_parser = TeamNameParser(
            team_names_to_teams=TEAM_NAME_TO_TEAM)

        self.play_by_plays_parser = PlayByPlaysParser(
            period_details_parser=self.period_details_parser,
            period_timestamp_parser=self.period_timestamp_parser,
            scores_parser=self.scores_parser,
        )
        self.player_box_scores_parser = PlayerBoxScoresParser(
            team_abbreviation_parser=self.team_abbreviation_parser,
            location_abbreviation_parser=self.location_abbreviation_parser,
            outcome_abbreviation_parser=self.outcome_abbreviation_parser,
            seconds_played_parser=self.seconds_played_parser)
        self.player_data_parser = PlayerDataParser(
            search_result_location_parser=self.search_result_location_parser,
            league_abbreviation_parser=self.league_abbreviation_parser,
        )
        self.player_season_box_scores_parser = PlayerSeasonBoxScoresParser(
            team_abbreviation_parser=self.team_abbreviation_parser,
            location_abbreviation_parser=self.location_abbreviation_parser,
            outcome_parser=self.outcome_parser,
            seconds_played_parser=self.seconds_played_parser)
        self.player_season_totals_parser = PlayerSeasonTotalsParser(
            position_abbreviation_parser=self.position_abbreviation_parser,
            team_abbreviation_parser=self.team_abbreviation_parser,
        )
        self.player_advanced_season_totals_parser = PlayerAdvancedSeasonTotalsParser(
            team_abbreviation_parser=self.team_abbreviation_parser,
            position_abbreviation_parser=self.position_abbreviation_parser,
        )
        self.scheduled_start_time_parser = ScheduledStartTimeParser()
        self.scheduled_games_parser = ScheduledGamesParser(
            start_time_parser=self.scheduled_start_time_parser,
            team_name_parser=self.team_name_parser,
        )
        self.search_results_parser = SearchResultsParser(
            search_result_name_parser=self.search_result_name_parser,
            search_result_location_parser=self.search_result_location_parser,
            league_abbreviation_parser=self.league_abbreviation_parser,
        )
        self.team_totals_parser = TeamTotalsParser(
            team_abbreviation_parser=self.team_abbreviation_parser)
        self.division_name_parser = DivisionNameParser(divisions=Division)
        self.team_standings_parser = TeamStandingsParser(teams=Team)
        self.conference_division_standings_parser = ConferenceDivisionStandingsParser(
            division_name_parser=self.division_name_parser,
            team_standings_parser=self.team_standings_parser,
            divisions_to_conferences=DIVISIONS_TO_CONFERENCES,
        )

    def parse_division_standings(self, standings):
        return self.conference_division_standings_parser.parse(
            division_standings=standings)

    def parse_play_by_plays(self, play_by_plays, away_team_name,
                            home_team_name):
        return self.play_by_plays_parser.parse(
            play_by_plays=play_by_plays,
            away_team=self.team_name_parser.parse_team_name(
                team_name=away_team_name),
            home_team=self.team_name_parser.parse_team_name(
                team_name=home_team_name),
        )

    def parse_player_box_scores(self, box_scores):
        return self.player_box_scores_parser.parse(box_scores=box_scores)

    def parse_player_season_box_scores(self, box_scores):
        return self.player_season_box_scores_parser.parse(
            box_scores=box_scores)

    def parse_player_advanced_season_totals_parser(self, totals):
        return self.player_advanced_season_totals_parser.parse(totals=totals)

    def parse_player_season_totals(self, totals):
        return self.player_season_totals_parser.parse(totals=totals)

    def parse_scheduled_games(self, games):
        return self.scheduled_games_parser.parse_games(games)

    def parse_team_totals(self, first_team_totals, second_team_totals):
        return self.team_totals_parser.parse(
            first_team_totals=first_team_totals,
            second_team_totals=second_team_totals)

    def parse_player_search_results(self, nba_aba_baa_players):
        return self.search_results_parser.parse(
            nba_aba_baa_players=nba_aba_baa_players)

    def parse_player_data(self, player):
        return self.player_data_parser.parse(player=player)
Exemple #5
0
class TestPlayerDataParser(TestCase):
    def setUp(self):
        self.parser = PlayerDataParser(
            search_result_location_parser=ResourceLocationParser(
                resource_location_regex=ParserService.
                SEARCH_RESULT_RESOURCE_LOCATION_REGEX, ),
            league_abbreviation_parser=LeagueAbbreviationParser(
                abbreviations_to_league=LEAGUE_ABBREVIATIONS_TO_LEAGUE),
        )

    def test_parse_name(self):
        parsed_player = self.parser.parse(
            player=PlayerData(name="jaebaebae",
                              resource_location='/players/b/bryanko01.html',
                              league_abbreviations=[]))
        self.assertEqual(parsed_player["name"], "jaebaebae")

    def test_parse_resource_location(self):
        parsed_player = self.parser.parse(
            player=PlayerData(name="jaebaebae",
                              resource_location='/players/b/bryanko01.html',
                              league_abbreviations=[]))
        self.assertEqual(parsed_player["identifier"], "bryanko01")

    def test_parse_league_abbreviations_for_single_nba_abbreviation(self):
        parsed_player = self.parser.parse(
            player=PlayerData(name="jaebaebae",
                              resource_location='/players/b/bryanko01.html',
                              league_abbreviations=["NBA"]))
        self.assertEqual(parsed_player["leagues"],
                         {League.NATIONAL_BASKETBALL_ASSOCIATION})

    def test_parse_league_abbreviations_for_single_aba_abbreviation(self):
        parsed_player = self.parser.parse(
            player=PlayerData(name="jaebaebae",
                              resource_location='/players/b/bryanko01.html',
                              league_abbreviations=["ABA"]))
        self.assertEqual(parsed_player["leagues"],
                         {League.AMERICAN_BASKETBALL_ASSOCIATION})

    def test_parse_league_abbreviations_for_single_baa_abbreviation(self):
        parsed_player = self.parser.parse(
            player=PlayerData(name="jaebaebae",
                              resource_location='/players/b/bryanko01.html',
                              league_abbreviations=["BAA"]))
        self.assertEqual(parsed_player["leagues"],
                         {League.BASKETBALL_ASSOCIATION_OF_AMERICA})

    def test_parse_league_abbreviations_for_multiple_nba_abbreviations(self):
        parsed_player = self.parser.parse(
            player=PlayerData(name="jaebaebae",
                              resource_location='/players/b/bryanko01.html',
                              league_abbreviations=["NBA", "NBA"]))
        self.assertEqual(parsed_player["leagues"],
                         {League.NATIONAL_BASKETBALL_ASSOCIATION})

    def test_parse_league_abbreviations_for_multiple_aba_abbreviations(self):
        parsed_player = self.parser.parse(
            player=PlayerData(name="jaebaebae",
                              resource_location='/players/b/bryanko01.html',
                              league_abbreviations=["ABA", "ABA"]))
        self.assertEqual(parsed_player["leagues"],
                         {League.AMERICAN_BASKETBALL_ASSOCIATION})

    def test_parse_league_abbreviations_for_multiple_baa_abbreviations(self):
        parsed_player = self.parser.parse(
            player=PlayerData(name="jaebaebae",
                              resource_location='/players/b/bryanko01.html',
                              league_abbreviations=["BAA", "BAA"]))
        self.assertEqual(parsed_player["leagues"],
                         {League.BASKETBALL_ASSOCIATION_OF_AMERICA})

    def test_parse_league_abbreviations_for_multiple_nba_and_aba_and_baa_abbreviations(
            self):
        parsed_player = self.parser.parse(player=PlayerData(
            name="jaebaebae",
            resource_location='/players/b/bryanko01.html',
            league_abbreviations=["NBA", "ABA", "BAA", "NBA", "ABA", "BAA"]))
        self.assertEqual(
            parsed_player["leagues"], {
                League.NATIONAL_BASKETBALL_ASSOCIATION,
                League.AMERICAN_BASKETBALL_ASSOCIATION,
                League.BASKETBALL_ASSOCIATION_OF_AMERICA
            })