def scrape(self): games = self._get_games() selected_game = games[int(self.box)] boxscore_path = Utils.find_attr(selected_game, 'game_data_directory') boxscore_url = 'http://gd2.mlb.com' + boxscore_path + '/' + 'boxscore.xml' html = Utils.get_content(boxscore_url, self.PARSER) boxscore = Utils.find_tag(html, 'boxscore') # basic info away_team_name = Utils.find_attr(boxscore, 'away_fname') home_team_name = Utils.find_attr(boxscore, 'home_fname') self._scrape_scoreboard(boxscore, away_team_name, home_team_name) self._scrape_boxscore(boxscore, away_team_name, home_team_name)
def _scrape_scoreboard(self, boxscore, away_team_name, home_team_name): """ scrape scoreboard :param boxscore: boxscore contents :param away_team_name: away team name :param home_team_name: home team name """ score_all = Utils.find_tag(boxscore, 'linescore') away_total_runs = Utils.find_attr(score_all, 'away_team_runs') home_total_runs = Utils.find_attr(score_all, 'home_team_runs') away_total_hits = Utils.find_attr(score_all, 'away_team_hits') home_total_hits = Utils.find_attr(score_all, 'home_team_hits') away_total_errs = Utils.find_attr(score_all, 'away_team_errors') home_total_errs = Utils.find_attr(score_all, 'home_team_errors') innings = Utils.find_all_tags(score_all, 'inning_line_score') table_contents = [] header = [] away = [] home = [] aligns = [] header.append('TEAM') away.append(away_team_name) home.append(home_team_name) for inning in innings: aligns.append('c') header.append(Utils.find_attr(inning, 'inning')) away.append(Utils.find_attr(inning, 'away')) home.append(Utils.find_attr(inning, 'home')) header.extend(['R', 'H', 'E']) away.extend([away_total_runs, away_total_hits, away_total_errs]) home.extend([home_total_runs, home_total_hits, home_total_errs]) aligns.extend(['c', 'c', 'c', 'c']) table_contents.append(header) table_contents.append(away) table_contents.append(home) Utils.draw_table(table_contents, aligns, True)
def scrape(self): games = self._get_games() selected_game = games[int(self.play)] game_info_path = Utils.find_attr(selected_game, 'game_data_directory') play_by_play_url = 'http://gd2.mlb.com' + game_info_path + '/' + 'game_events.xml' table_contents = [] header = ['INNING', 'BATTER_NO', 'DESCRIPTION', 'SCORE'] aligns = ['c', 'c', 'l', 'c'] table_contents.append(header) html = Utils.get_content(play_by_play_url, self.PARSER) all_innings = Utils.find_all_tags(html, 'inning') for single_inning in all_innings: inning = [] inning_no = Utils.find_attr(single_inning, 'num') top = Utils.find_tag(single_inning, 'top') bottom = Utils.find_tag(single_inning, 'bottom') top_atbats = Utils.find_all_tags(top, 'atbat') bottom_atbats = Utils.find_all_tags(bottom, 'atbat') self._get_results(top_atbats, inning, inning_no) self._get_results(bottom_atbats, inning, inning_no) table_contents.extend(inning) if self.score: score_plays_table = [] score_plays_table_aligns = ['c', 'l', 'c'] score_plays_table.append(['INNING', 'DESCRIPTION', 'SCORE']) for table_content in table_contents: score_play = [] if 'scores' in table_content[2] or 'homers' in table_content[2]: score_play.append(table_content[0]) score_play.append(table_content[2]) score_play.append(table_content[3]) score_plays_table.append(score_play) Utils.draw_table(score_plays_table, score_plays_table_aligns, False) elif self.grep != 'no_grep': grep_result_table = [] grep_result_table_aligns = ['c', 'l', 'c'] grep_result_table.append(['INNING', 'DESCRIPTION', 'SCORE']) for table_content in table_contents: grep_play = [] if self.grep in table_content[2]: grep_play.append(table_content[0]) grep_play.append(table_content[2]) grep_play.append(table_content[3]) grep_result_table.append(grep_play) Utils.draw_table(grep_result_table, grep_result_table_aligns, False) else: Utils.draw_table(table_contents, aligns, False)