Beispiel #1
0
def play_by_play(home_team, day, month, year):
    add_0_if_needed = lambda s: "0" + s if len(s) == 1 else s

    # the hard-coded `0` in the url assumes we always take the first match of the given date and team.
    url = "{BASE_URL}/boxscores/pbp/{year}{month}{day}0{team_abbr}.html".format(
        BASE_URL=BASE_URL,
        year=year,
        month=add_0_if_needed(str(month)),
        day=add_0_if_needed(str(day)),
        team_abbr=TEAM_TO_TEAM_ABBREVIATION[home_team])
    response = requests.get(url=url)
    response.raise_for_status()
    page = PlayByPlayPage(html=html.fromstring(response.content))

    play_by_plays_parser = PlayByPlaysParser(
        period_details_parser=PeriodDetailsParser(regulation_periods_count=4),
        period_timestamp_parser=PeriodTimestampParser(
            timestamp_format=PLAY_BY_PLAY_TIMESTAMP_FORMAT),
        scores_parser=ScoresParser(scores_regex=PLAY_BY_PLAY_SCORES_REGEX))

    team_name_parser = TeamNameParser(team_names_to_teams=TEAM_NAME_TO_TEAM)

    return play_by_plays_parser.parse(
        play_by_plays=page.play_by_play_table.rows,
        away_team=team_name_parser.parse_team_name(
            team_name=page.away_team_name),
        home_team=team_name_parser.parse_team_name(
            team_name=page.home_team_name))
Beispiel #2
0
def schedule_for_month(url):
    response = requests.get(url=url)

    response.raise_for_status()

    page = SchedulePage(html=html.fromstring(html=response.content))
    parser = ScheduledGamesParser(
        start_time_parser=ScheduledStartTimeParser(),
        team_name_parser=TeamNameParser(team_names_to_teams=TEAM_NAME_TO_TEAM),
    )
    return parser.parse_games(games=page.rows)
 def setUp(self):
     self.october_2001_html = requests.get(
         'https://www.basketball-reference.com/leagues/NBA_2001_games.html'
     ).text
     self.october_2018_html = requests.get(
         'https://www.basketball-reference.com/leagues/NBA_2018_games-october.html'
     ).text
     self.schedule_with_future_games_html_file = open(
         os.path.join(os.path.dirname(__file__),
                      './NBA_2019_games-april.html'))
     self.schedule_with_future_games_html = self.schedule_with_future_games_html_file.read(
     )
     self.parser = ScheduledGamesParser(
         start_time_parser=ScheduledStartTimeParser(),
         team_name_parser=TeamNameParser(
             team_names_to_teams=TEAM_NAME_TO_TEAM),
     )
Beispiel #4
0
def season_schedule(season_end_year):
    url = '{BASE_URL}/leagues/NBA_{season_end_year}_games.html'.format(
        BASE_URL=BASE_URL, season_end_year=season_end_year)

    response = requests.get(url=url)

    response.raise_for_status()

    page = SchedulePage(html=html.fromstring(html=response.content))
    parser = ScheduledGamesParser(
        start_time_parser=ScheduledStartTimeParser(),
        team_name_parser=TeamNameParser(team_names_to_teams=TEAM_NAME_TO_TEAM),
    )
    season_schedule_values = parser.parse_games(games=page.rows)

    for month_url_path in page.other_months_schedule_urls:
        url = '{BASE_URL}{month_url_path}'.format(
            BASE_URL=BASE_URL, month_url_path=month_url_path)
        monthly_schedule = schedule_for_month(url=url)
        season_schedule_values.extend(monthly_schedule)

    return season_schedule_values
    def __init__(self):
        self.team_abbreviation_parser = TeamAbbreviationParser(
            abbreviations_to_teams=TEAM_ABBREVIATIONS_TO_TEAM)
        self.league_abbreviation_parser = LeagueAbbreviationParser(
            abbreviations_to_league=LEAGUE_ABBREVIATIONS_TO_LEAGUE)
        self.location_abbreviation_parser = LocationAbbreviationParser(
            abbreviations_to_locations=LOCATION_ABBREVIATIONS_TO_POSITION, )
        self.outcome_abbreviation_parser = OutcomeAbbreviationParser(
            abbreviations_to_outcomes=OUTCOME_ABBREVIATIONS_TO_OUTCOME, )
        self.outcome_parser = PlayerBoxScoreOutcomeParser(
            outcome_abbreviation_parser=self.outcome_abbreviation_parser)
        self.period_details_parser = PeriodDetailsParser(
            regulation_periods_count=4)
        self.period_timestamp_parser = PeriodTimestampParser(
            timestamp_format=ParserService.PLAY_BY_PLAY_TIMESTAMP_FORMAT)
        self.position_abbreviation_parser = PositionAbbreviationParser(
            abbreviations_to_positions=POSITION_ABBREVIATIONS_TO_POSITION, )
        self.seconds_played_parser = SecondsPlayedParser()
        self.scores_parser = ScoresParser(
            scores_regex=ParserService.PLAY_BY_PLAY_SCORES_REGEX)
        self.search_result_name_parser = SearchResultNameParser()
        self.search_result_location_parser = ResourceLocationParser(
            resource_location_regex=ParserService.
            SEARCH_RESULT_RESOURCE_LOCATION_REGEX)
        self.team_name_parser = TeamNameParser(
            team_names_to_teams=TEAM_NAME_TO_TEAM)

        self.play_by_plays_parser = PlayByPlaysParser(
            period_details_parser=self.period_details_parser,
            period_timestamp_parser=self.period_timestamp_parser,
            scores_parser=self.scores_parser,
        )
        self.player_box_scores_parser = PlayerBoxScoresParser(
            team_abbreviation_parser=self.team_abbreviation_parser,
            location_abbreviation_parser=self.location_abbreviation_parser,
            outcome_abbreviation_parser=self.outcome_abbreviation_parser,
            seconds_played_parser=self.seconds_played_parser)
        self.player_data_parser = PlayerDataParser(
            search_result_location_parser=self.search_result_location_parser,
            league_abbreviation_parser=self.league_abbreviation_parser,
        )
        self.player_season_box_scores_parser = PlayerSeasonBoxScoresParser(
            team_abbreviation_parser=self.team_abbreviation_parser,
            location_abbreviation_parser=self.location_abbreviation_parser,
            outcome_parser=self.outcome_parser,
            seconds_played_parser=self.seconds_played_parser)
        self.player_season_totals_parser = PlayerSeasonTotalsParser(
            position_abbreviation_parser=self.position_abbreviation_parser,
            team_abbreviation_parser=self.team_abbreviation_parser,
        )
        self.player_advanced_season_totals_parser = PlayerAdvancedSeasonTotalsParser(
            team_abbreviation_parser=self.team_abbreviation_parser,
            position_abbreviation_parser=self.position_abbreviation_parser,
        )
        self.scheduled_start_time_parser = ScheduledStartTimeParser()
        self.scheduled_games_parser = ScheduledGamesParser(
            start_time_parser=self.scheduled_start_time_parser,
            team_name_parser=self.team_name_parser,
        )
        self.search_results_parser = SearchResultsParser(
            search_result_name_parser=self.search_result_name_parser,
            search_result_location_parser=self.search_result_location_parser,
            league_abbreviation_parser=self.league_abbreviation_parser,
        )
        self.team_totals_parser = TeamTotalsParser(
            team_abbreviation_parser=self.team_abbreviation_parser)
        self.division_name_parser = DivisionNameParser(divisions=Division)
        self.team_standings_parser = TeamStandingsParser(teams=Team)
        self.conference_division_standings_parser = ConferenceDivisionStandingsParser(
            division_name_parser=self.division_name_parser,
            team_standings_parser=self.team_standings_parser,
            divisions_to_conferences=DIVISIONS_TO_CONFERENCES,
        )
class ParserService:
    PLAY_BY_PLAY_TIMESTAMP_FORMAT = "%M:%S.%f"
    PLAY_BY_PLAY_SCORES_REGEX = "(?P<away_team_score>[0-9]+)-(?P<home_team_score>[0-9]+)"
    SEARCH_RESULT_RESOURCE_LOCATION_REGEX = '(https?:\/\/www\.basketball-reference\.com\/)?(?P<resource_type>.+?(?=\/)).*\/(?P<resource_identifier>.+).html'

    def __init__(self):
        self.team_abbreviation_parser = TeamAbbreviationParser(
            abbreviations_to_teams=TEAM_ABBREVIATIONS_TO_TEAM)
        self.league_abbreviation_parser = LeagueAbbreviationParser(
            abbreviations_to_league=LEAGUE_ABBREVIATIONS_TO_LEAGUE)
        self.location_abbreviation_parser = LocationAbbreviationParser(
            abbreviations_to_locations=LOCATION_ABBREVIATIONS_TO_POSITION, )
        self.outcome_abbreviation_parser = OutcomeAbbreviationParser(
            abbreviations_to_outcomes=OUTCOME_ABBREVIATIONS_TO_OUTCOME, )
        self.outcome_parser = PlayerBoxScoreOutcomeParser(
            outcome_abbreviation_parser=self.outcome_abbreviation_parser)
        self.period_details_parser = PeriodDetailsParser(
            regulation_periods_count=4)
        self.period_timestamp_parser = PeriodTimestampParser(
            timestamp_format=ParserService.PLAY_BY_PLAY_TIMESTAMP_FORMAT)
        self.position_abbreviation_parser = PositionAbbreviationParser(
            abbreviations_to_positions=POSITION_ABBREVIATIONS_TO_POSITION, )
        self.seconds_played_parser = SecondsPlayedParser()
        self.scores_parser = ScoresParser(
            scores_regex=ParserService.PLAY_BY_PLAY_SCORES_REGEX)
        self.search_result_name_parser = SearchResultNameParser()
        self.search_result_location_parser = ResourceLocationParser(
            resource_location_regex=ParserService.
            SEARCH_RESULT_RESOURCE_LOCATION_REGEX)
        self.team_name_parser = TeamNameParser(
            team_names_to_teams=TEAM_NAME_TO_TEAM)

        self.play_by_plays_parser = PlayByPlaysParser(
            period_details_parser=self.period_details_parser,
            period_timestamp_parser=self.period_timestamp_parser,
            scores_parser=self.scores_parser,
        )
        self.player_box_scores_parser = PlayerBoxScoresParser(
            team_abbreviation_parser=self.team_abbreviation_parser,
            location_abbreviation_parser=self.location_abbreviation_parser,
            outcome_abbreviation_parser=self.outcome_abbreviation_parser,
            seconds_played_parser=self.seconds_played_parser)
        self.player_data_parser = PlayerDataParser(
            search_result_location_parser=self.search_result_location_parser,
            league_abbreviation_parser=self.league_abbreviation_parser,
        )
        self.player_season_box_scores_parser = PlayerSeasonBoxScoresParser(
            team_abbreviation_parser=self.team_abbreviation_parser,
            location_abbreviation_parser=self.location_abbreviation_parser,
            outcome_parser=self.outcome_parser,
            seconds_played_parser=self.seconds_played_parser)
        self.player_season_totals_parser = PlayerSeasonTotalsParser(
            position_abbreviation_parser=self.position_abbreviation_parser,
            team_abbreviation_parser=self.team_abbreviation_parser,
        )
        self.player_advanced_season_totals_parser = PlayerAdvancedSeasonTotalsParser(
            team_abbreviation_parser=self.team_abbreviation_parser,
            position_abbreviation_parser=self.position_abbreviation_parser,
        )
        self.scheduled_start_time_parser = ScheduledStartTimeParser()
        self.scheduled_games_parser = ScheduledGamesParser(
            start_time_parser=self.scheduled_start_time_parser,
            team_name_parser=self.team_name_parser,
        )
        self.search_results_parser = SearchResultsParser(
            search_result_name_parser=self.search_result_name_parser,
            search_result_location_parser=self.search_result_location_parser,
            league_abbreviation_parser=self.league_abbreviation_parser,
        )
        self.team_totals_parser = TeamTotalsParser(
            team_abbreviation_parser=self.team_abbreviation_parser)
        self.division_name_parser = DivisionNameParser(divisions=Division)
        self.team_standings_parser = TeamStandingsParser(teams=Team)
        self.conference_division_standings_parser = ConferenceDivisionStandingsParser(
            division_name_parser=self.division_name_parser,
            team_standings_parser=self.team_standings_parser,
            divisions_to_conferences=DIVISIONS_TO_CONFERENCES,
        )

    def parse_division_standings(self, standings):
        return self.conference_division_standings_parser.parse(
            division_standings=standings)

    def parse_play_by_plays(self, play_by_plays, away_team_name,
                            home_team_name):
        return self.play_by_plays_parser.parse(
            play_by_plays=play_by_plays,
            away_team=self.team_name_parser.parse_team_name(
                team_name=away_team_name),
            home_team=self.team_name_parser.parse_team_name(
                team_name=home_team_name),
        )

    def parse_player_box_scores(self, box_scores):
        return self.player_box_scores_parser.parse(box_scores=box_scores)

    def parse_player_season_box_scores(self, box_scores):
        return self.player_season_box_scores_parser.parse(
            box_scores=box_scores)

    def parse_player_advanced_season_totals_parser(self, totals):
        return self.player_advanced_season_totals_parser.parse(totals=totals)

    def parse_player_season_totals(self, totals):
        return self.player_season_totals_parser.parse(totals=totals)

    def parse_scheduled_games(self, games):
        return self.scheduled_games_parser.parse_games(games)

    def parse_team_totals(self, first_team_totals, second_team_totals):
        return self.team_totals_parser.parse(
            first_team_totals=first_team_totals,
            second_team_totals=second_team_totals)

    def parse_player_search_results(self, nba_aba_baa_players):
        return self.search_results_parser.parse(
            nba_aba_baa_players=nba_aba_baa_players)

    def parse_player_data(self, player):
        return self.player_data_parser.parse(player=player)