def _get_games(self): """ get all games from base url """ base_url = self.DELIMITER.join( [self.BASE_URL, self.YMD_FORMAT.format(**self.params), 'epg.xml']) html = Utils.get_content(base_url, self.PARSER) return Utils.find_all_tags(html, 'game')
def scrape(self): games = self._get_games() selected_game = games[int(self.box)] boxscore_path = Utils.find_attr(selected_game, 'game_data_directory') boxscore_url = 'http://gd2.mlb.com' + boxscore_path + '/' + 'boxscore.xml' html = Utils.get_content(boxscore_url, self.PARSER) boxscore = Utils.find_tag(html, 'boxscore') # basic info away_team_name = Utils.find_attr(boxscore, 'away_fname') home_team_name = Utils.find_attr(boxscore, 'home_fname') self._scrape_scoreboard(boxscore, away_team_name, home_team_name) self._scrape_boxscore(boxscore, away_team_name, home_team_name)
def _get_results(self, atbats, inning, inning_no): """ get atbat result array :param atbats: at bat results :inning atbats: inning array :param inning_no: inning no """ for atbat in atbats: result = [] away_team_runs = Utils.find_attr(atbat, 'away_team_runs') home_team_runs = Utils.find_attr(atbat, 'home_team_runs') score = away_team_runs + '-' + home_team_runs result.append(inning_no) result.append(Utils.find_attr(atbat, 'num')) result.append(Utils.find_attr(atbat, 'des')) result.append(score) inning.append(result)
def scrape(self): games = self._get_games() selected_game = games[int(self.play)] game_info_path = Utils.find_attr(selected_game, 'game_data_directory') play_by_play_url = 'http://gd2.mlb.com' + game_info_path + '/' + 'game_events.xml' table_contents = [] header = ['INNING', 'BATTER_NO', 'DESCRIPTION', 'SCORE'] aligns = ['c', 'c', 'l', 'c'] table_contents.append(header) html = Utils.get_content(play_by_play_url, self.PARSER) all_innings = Utils.find_all_tags(html, 'inning') for single_inning in all_innings: inning = [] inning_no = Utils.find_attr(single_inning, 'num') top = Utils.find_tag(single_inning, 'top') bottom = Utils.find_tag(single_inning, 'bottom') top_atbats = Utils.find_all_tags(top, 'atbat') bottom_atbats = Utils.find_all_tags(bottom, 'atbat') self._get_results(top_atbats, inning, inning_no) self._get_results(bottom_atbats, inning, inning_no) table_contents.extend(inning) if self.score: score_plays_table = [] score_plays_table_aligns = ['c', 'l', 'c'] score_plays_table.append(['INNING', 'DESCRIPTION', 'SCORE']) for table_content in table_contents: score_play = [] if 'scores' in table_content[2] or 'homers' in table_content[2]: score_play.append(table_content[0]) score_play.append(table_content[2]) score_play.append(table_content[3]) score_plays_table.append(score_play) Utils.draw_table(score_plays_table, score_plays_table_aligns, False) elif self.grep != 'no_grep': grep_result_table = [] grep_result_table_aligns = ['c', 'l', 'c'] grep_result_table.append(['INNING', 'DESCRIPTION', 'SCORE']) for table_content in table_contents: grep_play = [] if self.grep in table_content[2]: grep_play.append(table_content[0]) grep_play.append(table_content[2]) grep_play.append(table_content[3]) grep_result_table.append(grep_play) Utils.draw_table(grep_result_table, grep_result_table_aligns, False) else: Utils.draw_table(table_contents, aligns, False)
def _scrape_boxscore(self, boxscore, away_team_name, home_team_name): """ scrape boxscore :param boxscore: boxscore contents :param away_team_name: away team name :param home_team_name: home team name """ batting_aligns = [ 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c' ] pitching_aligns = ['c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c'] batting_header = [ 'BATTING', 'NAME', 'POS', 'AB', 'R', 'H', 'RBI', 'BB', 'SO', 'LOB', 'AVG' ] pitching_header = [ 'PITCHING', 'IP', 'H', 'R', 'ER', 'BB', 'SO', 'HR', 'ERA' ] away_batting = [] home_batting = [] away_pitching = [] home_pitching = [] away_batting.append(batting_header) home_batting.append(batting_header) away_pitching.append(pitching_header) home_pitching.append(pitching_header) teams_batting = Utils.find_all_tags(boxscore, 'batting') teams_pitching = Utils.find_all_tags(boxscore, 'pitching') bat_count = 0 for team_batting in teams_batting: batters = Utils.find_all_tags(team_batting, 'batter') for batter in batters: player = [] bo_id = Utils.find_attr(batter, 'bo') if bo_id != 'none': pass else: bo_id = '---' bo = bo_id[0:1] + '-' + bo_id[1:3] player.append(bo) player.append(Utils.find_attr(batter, 'name')) player.append(Utils.find_attr(batter, 'pos')) player.append(Utils.find_attr(batter, 'ab')) player.append(Utils.find_attr(batter, 'r')) player.append(Utils.find_attr(batter, 'h')) player.append(Utils.find_attr(batter, 'rbi')) player.append(Utils.find_attr(batter, 'bb')) player.append(Utils.find_attr(batter, 'so')) player.append(Utils.find_attr(batter, 'lob')) player.append(Utils.find_attr(batter, 'avg')) if bat_count == 0: home_batting.append(player) else: away_batting.append(player) bat_count += 1 pit_count = 0 for team_pitching in teams_pitching: pitchers = Utils.find_all_tags(team_pitching, 'pitcher') for pitcher in pitchers: player = [] out = Utils.find_attr(pitcher, 'out') ip = str(int(out) // 3) + '.' + str(int(out) % 3) player.append(Utils.find_attr(pitcher, 'name')) player.append(ip) player.append(Utils.find_attr(pitcher, 'h')) player.append(Utils.find_attr(pitcher, 'r')) player.append(Utils.find_attr(pitcher, 'er')) player.append(Utils.find_attr(pitcher, 'bb')) player.append(Utils.find_attr(pitcher, 'so')) player.append(Utils.find_attr(pitcher, 'hr')) player.append(Utils.find_attr(pitcher, 'era')) if pit_count == 0: away_pitching.append(player) else: home_pitching.append(player) pit_count += 1 print('\t'.join(['***', away_team_name, '***'])) Utils.draw_table(away_batting, batting_aligns, False) Utils.draw_table(away_pitching, pitching_aligns, False) print('') print('\t'.join(['***', home_team_name, '***'])) Utils.draw_table(home_batting, batting_aligns, False) Utils.draw_table(home_pitching, pitching_aligns, False)
def _scrape_scoreboard(self, boxscore, away_team_name, home_team_name): """ scrape scoreboard :param boxscore: boxscore contents :param away_team_name: away team name :param home_team_name: home team name """ score_all = Utils.find_tag(boxscore, 'linescore') away_total_runs = Utils.find_attr(score_all, 'away_team_runs') home_total_runs = Utils.find_attr(score_all, 'home_team_runs') away_total_hits = Utils.find_attr(score_all, 'away_team_hits') home_total_hits = Utils.find_attr(score_all, 'home_team_hits') away_total_errs = Utils.find_attr(score_all, 'away_team_errors') home_total_errs = Utils.find_attr(score_all, 'home_team_errors') innings = Utils.find_all_tags(score_all, 'inning_line_score') table_contents = [] header = [] away = [] home = [] aligns = [] header.append('TEAM') away.append(away_team_name) home.append(home_team_name) for inning in innings: aligns.append('c') header.append(Utils.find_attr(inning, 'inning')) away.append(Utils.find_attr(inning, 'away')) home.append(Utils.find_attr(inning, 'home')) header.extend(['R', 'H', 'E']) away.extend([away_total_runs, away_total_hits, away_total_errs]) home.extend([home_total_runs, home_total_hits, home_total_errs]) aligns.extend(['c', 'c', 'c', 'c']) table_contents.append(header) table_contents.append(away) table_contents.append(home) Utils.draw_table(table_contents, aligns, True)
def scrape(self): """ scrape game info """ games = self._get_games() table_contents = [] aligns = ['c', 'c', 'c', 'c', 'c'] table_contents.append( ['NO', 'VENUE', 'AWAY_TEAM', 'HOME_TEAM', 'RESULT']) no = 0 for game in games: content = [] venue = Utils.find_attr(game, 'venue') away_team_name = Utils.find_attr(game, 'away_team_name') home_team_name = Utils.find_attr(game, 'home_team_name') away_win = Utils.find_attr(game, 'away_win') away_loss = Utils.find_attr(game, 'away_loss') home_win = Utils.find_attr(game, 'home_win') home_loss = Utils.find_attr(game, 'home_loss') away_team_runs = Utils.find_attr(game, 'away_team_runs') home_team_runs = Utils.find_attr(game, 'home_team_runs') if away_team_runs == 'none': away_team_runs = '*' if home_team_runs == 'none': home_team_runs = '*' away = away_team_name + '(' + away_win + '-' + away_loss + ')' home = home_team_name + '(' + home_win + '-' + home_loss + ')' result = away_team_runs + '-' + home_team_runs content.append(str(no)) content.append(venue) content.append(away) content.append(home) content.append(result) table_contents.append(content) no += 1 Utils.draw_table(table_contents, aligns, True)
def scrape(self): games = self._get_games() selected_game = games[int(self.roster)] game_info_path = Utils.find_attr(selected_game,'game_data_directory') roster_url = 'http://gd2.mlb.com' + game_info_path + '/' + 'players.xml' away_roster = [] home_roster = [] header = ['TEAM','NAME','POS'] aligns = ['c','c','c'] away_roster.append(header) home_roster.append(header) html = Utils.get_content(roster_url,self.PARSER) teams = Utils.find_all_tags(html,'team') for team in teams: roster = [] type = Utils.find_attr(team,'type') team_nm = Utils.find_attr(team,'name') members = Utils.find_all_tags(team,'player') for member in members: members_info = [] first_name = Utils.find_attr(member,'first') last_name = Utils.find_attr(member,'last') full_name = ' '.join([first_name,last_name]) members_info.append(team_nm) members_info.append(full_name) members_info.append(Utils.find_attr(member,'position')) roster.append(members_info) if type == 'away': away_roster.extend(roster) else: home_roster.extend(roster) print('\t'.join(['***','away','***'])) Utils.draw_table(away_roster,aligns,False) print('') print('\t'.join(['***','home','***'])) Utils.draw_table(home_roster,aligns,False)