def _get_games(self): """ get all games from base url """ base_url = self.DELIMITER.join( [self.BASE_URL, self.YMD_FORMAT.format(**self.params), 'epg.xml']) html = Utils.get_content(base_url, self.PARSER) return Utils.find_all_tags(html, 'game')
def scrape(self): games = self._get_games() selected_game = games[int(self.box)] boxscore_path = Utils.find_attr(selected_game, 'game_data_directory') boxscore_url = 'http://gd2.mlb.com' + boxscore_path + '/' + 'boxscore.xml' html = Utils.get_content(boxscore_url, self.PARSER) boxscore = Utils.find_tag(html, 'boxscore') # basic info away_team_name = Utils.find_attr(boxscore, 'away_fname') home_team_name = Utils.find_attr(boxscore, 'home_fname') self._scrape_scoreboard(boxscore, away_team_name, home_team_name) self._scrape_boxscore(boxscore, away_team_name, home_team_name)
def scrape(self): games = self._get_games() selected_game = games[int(self.roster)] game_info_path = Utils.find_attr(selected_game,'game_data_directory') roster_url = 'http://gd2.mlb.com' + game_info_path + '/' + 'players.xml' away_roster = [] home_roster = [] header = ['TEAM','NAME','POS'] aligns = ['c','c','c'] away_roster.append(header) home_roster.append(header) html = Utils.get_content(roster_url,self.PARSER) teams = Utils.find_all_tags(html,'team') for team in teams: roster = [] type = Utils.find_attr(team,'type') team_nm = Utils.find_attr(team,'name') members = Utils.find_all_tags(team,'player') for member in members: members_info = [] first_name = Utils.find_attr(member,'first') last_name = Utils.find_attr(member,'last') full_name = ' '.join([first_name,last_name]) members_info.append(team_nm) members_info.append(full_name) members_info.append(Utils.find_attr(member,'position')) roster.append(members_info) if type == 'away': away_roster.extend(roster) else: home_roster.extend(roster) print('\t'.join(['***','away','***'])) Utils.draw_table(away_roster,aligns,False) print('') print('\t'.join(['***','home','***'])) Utils.draw_table(home_roster,aligns,False)
def scrape(self): games = self._get_games() selected_game = games[int(self.play)] game_info_path = Utils.find_attr(selected_game, 'game_data_directory') play_by_play_url = 'http://gd2.mlb.com' + game_info_path + '/' + 'game_events.xml' table_contents = [] header = ['INNING', 'BATTER_NO', 'DESCRIPTION', 'SCORE'] aligns = ['c', 'c', 'l', 'c'] table_contents.append(header) html = Utils.get_content(play_by_play_url, self.PARSER) all_innings = Utils.find_all_tags(html, 'inning') for single_inning in all_innings: inning = [] inning_no = Utils.find_attr(single_inning, 'num') top = Utils.find_tag(single_inning, 'top') bottom = Utils.find_tag(single_inning, 'bottom') top_atbats = Utils.find_all_tags(top, 'atbat') bottom_atbats = Utils.find_all_tags(bottom, 'atbat') self._get_results(top_atbats, inning, inning_no) self._get_results(bottom_atbats, inning, inning_no) table_contents.extend(inning) if self.score: score_plays_table = [] score_plays_table_aligns = ['c', 'l', 'c'] score_plays_table.append(['INNING', 'DESCRIPTION', 'SCORE']) for table_content in table_contents: score_play = [] if 'scores' in table_content[2] or 'homers' in table_content[2]: score_play.append(table_content[0]) score_play.append(table_content[2]) score_play.append(table_content[3]) score_plays_table.append(score_play) Utils.draw_table(score_plays_table, score_plays_table_aligns, False) elif self.grep != 'no_grep': grep_result_table = [] grep_result_table_aligns = ['c', 'l', 'c'] grep_result_table.append(['INNING', 'DESCRIPTION', 'SCORE']) for table_content in table_contents: grep_play = [] if self.grep in table_content[2]: grep_play.append(table_content[0]) grep_play.append(table_content[2]) grep_play.append(table_content[3]) grep_result_table.append(grep_play) Utils.draw_table(grep_result_table, grep_result_table_aligns, False) else: Utils.draw_table(table_contents, aligns, False)