def process_players(self): """ Performs fetching of player data. """ # alphabet = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", # "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"] alphabet = [ "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z" ] # Go through all players by letter for letter in alphabet: url = "/players/{}/".format(letter) data = MLBUtilities.fetch_data("www.baseball-reference.com", url, True) self.player_list_parser.parse(data, letter) # Go through odd player ids for player_id in self.player_list_parser.odd_player_ids: odd_url = "/players/{}/".format(player_id[0]) self.process_player(player_id[1], odd_url) if self.all_players: # Go through retired players for player_id in self.player_list_parser.retired_player_ids: self.process_player(player_id, url) # Go through active players for player_id in self.player_list_parser.active_player_ids: self.process_player(player_id, url, True)
def scrape_ballpark_factors(self): """ Scrapes the ballpark factors from Rotogrinders.com. """ data = MLBUtilities.fetch_data("rotogrinders.com", "/pages/Ballpark_Factors-49556", True) ballpark_factors_parser = RotogrindersBallparkFactorsParser() ballpark_factors_parser.parse(data)
def process(self): while True: start = time.time() if self.source == "site": data = MLBUtilities.fetch_data("www.rotowire.com", "/baseball/daily_lineups.htm", True) else: data = open(self.source) rotoworld_scraper = RotoworldLineupScraper( sleep_time=self.sleep_time) rotoworld_scraper.parse(data) end = time.time() print "Scraped starting line-ups in {} minutes".format( (end - start) / 60.0) print "All done. Sleeping for 10 minutes then re-evaluating..." time.sleep(60 * 10)
def scrape_yesterdays_lineups(self): """ Expedite the process of collecting data by getting stats for yesterday's lineups. There's likely little that's going to change in terms of who is in the lineup (their exact spot could change) so this will give us a good head start. """ # Reset the sleep on BBR scraper, in case what was passed in from the CLI is different. self.bbr_scraper.sleep_time = self.sleep_time # Reset the scrape_bvp flag, in case what was passed in from the CLI is different. self.bbr_scraper.scrape_bvp = self.scrape_bvp one_day = timedelta(days=1) yesterday = date.today() - one_day # print "Fetching data for yesterday's lineups ({})".format(yesterday) # players = self.lineup_manager.lineups_collection.find_one({"date": str(yesterday)}) # # if players is None: # logging.info("Looks like we didn't run this yesterday. Going to look for today's lineups...") # return rotoworld_scraper = RotoworldLineupScraper(sleep_time=self.sleep_time) data = MLBUtilities.fetch_data("www.rotowire.com", "/baseball/daily_lineups.htm", True) teams, team_details = rotoworld_scraper.parse_teams_only(data) for team in teams: players = self.lineup_manager.find_team_last_game(team) for player in players: # for player in players["players"]: player_id = player[MLBConstants.PLAYER_ID] unescaped_player = player_id.replace("_", ".") # Skip player if they've already been processed. if self.lineup_manager.is_processed(player_id): logging.info( "Skipping {}, already processed.".format(player_id)) continue # Skip player if they didn't end up in yesterday's lineup. This can happen # if we do prefetching on a player from a previous day and they have an off day. # if len(players["players"][player]) == 0: # logging.info("Skipping {}, wasn't in yesterday's lineup.".format(player)) # continue # Skip player if their team isn't playing today. # if players["players"][player][MLBConstants.TEAM] not in teams: # logging.info("Skipping {}, {} are not playing today.".format(player, players["players"][player][MLBConstants.TEAM])) # continue start = time.time() # Ignore pitchers player_record = self.player_manager.players_collection.find_one( {"player_id": unescaped_player}) if player_record[MLBConstants.POSITION].lower( ) == MLBConstants.PITCHER_TYPE: logging.info( "{} is a pitcher. Skipping...".format(player)) continue # Found a player. Let's update their stuff. url = "/players/{}/".format(unescaped_player[0:1]) self.bbr_scraper.process_player(unescaped_player, url, active=True) # Mark the player as processed (write to the lineup) once their stats have been updated. player_data = team_details[team] if len(player[MLBConstants.POSITION]) == 0: self.lineup_manager.find_player_position_last_game( player_id) else: player_data[MLBConstants.POSITION] = player[ MLBConstants.POSITION] self.lineup_manager.add_player_to_lineup( player[MLBConstants.PLAYER_ID], player_data) end = time.time() print "Processed {} in {} seconds".format(player, end - start)
def parse_standard_pitching(self, soup): """ Parses data from the Stanard Pitching table. """ pitching_standard_entries = soup.find_all( id=self.pitching_standard_season_regex) for entry in pitching_standard_entries: tds = entry.find_all("td") i = 0 season = "" for td in tds: if i == 0: season = td.text if MLBConstants.STANDARD_PITCHING not in self.player_data: self.player_data[MLBConstants.STANDARD_PITCHING] = {} self.player_data[ MLBConstants.STANDARD_PITCHING][season] = {} elif i == 1: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants.AGE] = MLBUtilities.resolve_value( td.text, "int") elif i == 2: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants.TEAM] = td.text elif i == 3: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants.LEAGUE] = td.text elif i == 4: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants.WINS] = MLBUtilities.resolve_value( td.text, "int") elif i == 5: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants.LOSSES] = MLBUtilities.resolve_value( td.text, "int") elif i == 6: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants. WIN_LOSS_PCT] = MLBUtilities.resolve_value( td.text, "float") elif i == 7: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants.ERA] = MLBUtilities.resolve_value( td.text, "float") elif i == 8: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants.GAMES] = MLBUtilities.resolve_value( td.text, "int") elif i == 9: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants. GAMES_STARTED] = MLBUtilities.resolve_value( td.text, "int") elif i == 10: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants. GAMES_FINISHED] = MLBUtilities.resolve_value( td.text, "int") elif i == 11: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants. COMPLETE_GAMES] = MLBUtilities.resolve_value( td.text, "int") elif i == 12: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants.SHUT_OUTS] = MLBUtilities.resolve_value( td.text, "int") elif i == 13: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants.SAVES] = MLBUtilities.resolve_value( td.text, "int") elif i == 14: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants. INNINGS_PITCHED] = MLBUtilities.resolve_value( td.text, "float") elif i == 15: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants.HITS] = MLBUtilities.resolve_value( td.text, "int") elif i == 16: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants.RUNS] = MLBUtilities.resolve_value( td.text, "int") elif i == 17: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants.EARNED_RUNS] = MLBUtilities.resolve_value( td.text, "int") elif i == 18: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants.HOME_RUNS] = MLBUtilities.resolve_value( td.text, "int") elif i == 19: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants.WALKS] = MLBUtilities.resolve_value( td.text, "int") elif i == 20: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants. INTENTIONAL_WALKS] = MLBUtilities.resolve_value( td.text, "int") elif i == 21: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants.STRIKE_OUTS] = MLBUtilities.resolve_value( td.text, "int") elif i == 22: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants. HIT_BY_PITCH] = MLBUtilities.resolve_value( td.text, "int") elif i == 23: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants.BALKS] = MLBUtilities.resolve_value( td.text, "int") elif i == 24: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants. WILD_PITCHES] = MLBUtilities.resolve_value( td.text, "int") elif i == 25: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants. BATTERS_FACED] = MLBUtilities.resolve_value( td.text, "int") elif i == 26: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants.ERA_PLUS] = MLBUtilities.resolve_value( td.text, "float") elif i == 27: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants.FIP] = MLBUtilities.resolve_value( td.text, "float") elif i == 28: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants.WHIP] = MLBUtilities.resolve_value( td.text, "float") elif i == 29: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants. HITS_PER_9_INNINGS] = MLBUtilities.resolve_value( td.text, "float") elif i == 30: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants. HOME_RUNS_PER_9_INNINGS] = MLBUtilities.resolve_value( td.text, "float") elif i == 31: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants. WALKS_PER_9_INNINGS] = MLBUtilities.resolve_value( td.text, "float") elif i == 32: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants. STRIKE_OUTS_PER_9_INNINGS] = MLBUtilities.resolve_value( td.text, "float") elif i == 33: self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants. STRIKE_OUT_TO_WALK_RATIO] = MLBUtilities.resolve_value( td.text, "float") i += 1 # self.player_data[MLBConstants.STANDARD_PITCHING][season][MLBConstants.FIP] = self.stat_calculator.calculate_fip(self.player_data[MLBConstants.STANDARD_PITCHING][season]) self.player_data[MLBConstants.STANDARD_PITCHING][season][ MLBConstants.WOBA] = self.stat_calculator.calculate_woba( self.player_data[MLBConstants.STANDARD_PITCHING][season])
def parse_player_value_batting(self, soup): """ Parses data from the Stanard Batting table. """ player_value_batting_entries = soup.find_all( id=self.player_value_batting_regex) for entry in player_value_batting_entries: tds = entry.find_all("td") i = 0 season = "" for td in tds: if i == 0: season = td.text if MLBConstants.PLAYER_VALUE_BATTING not in self.player_data: self.player_data[ MLBConstants.PLAYER_VALUE_BATTING] = {} self.player_data[ MLBConstants.PLAYER_VALUE_BATTING][season] = {} elif i == 6: self.player_data[MLBConstants.PLAYER_VALUE_BATTING][ season][MLBConstants. RUNS_BATTING] = MLBUtilities.resolve_value( td.text, "int") elif i == 7: self.player_data[MLBConstants.PLAYER_VALUE_BATTING][season][ MLBConstants. RUNS_FROM_BASERUNNING] = MLBUtilities.resolve_value( td.text, "int") elif i == 8: self.player_data[MLBConstants.PLAYER_VALUE_BATTING][season][ MLBConstants. RUNS_GROUNDED_INTO_DOUBLE_PLAY] = MLBUtilities.resolve_value( td.text, "int") elif i == 9: self.player_data[ MLBConstants.PLAYER_VALUE_BATTING][season][ MLBConstants. RUNS_FROM_FIELDING] = MLBUtilities.resolve_value( td.text, "int") elif i == 10: self.player_data[MLBConstants.PLAYER_VALUE_BATTING][season][ MLBConstants. RUNS_FROM_POSITION_SCARCITY] = MLBUtilities.resolve_value( td.text, "int") elif i == 11: self.player_data[MLBConstants.PLAYER_VALUE_BATTING][ season][MLBConstants.RAA] = MLBUtilities.resolve_value( td.text, "int") elif i == 12: self.player_data[MLBConstants.PLAYER_VALUE_BATTING][ season][MLBConstants.WAA] = MLBUtilities.resolve_value( td.text, "float") elif i == 13: self.player_data[MLBConstants.PLAYER_VALUE_BATTING][season][ MLBConstants. RUNS_FROM_REPLACEMENT_LEVEL] = MLBUtilities.resolve_value( td.text, "int") elif i == 14: self.player_data[MLBConstants.PLAYER_VALUE_BATTING][ season][MLBConstants.RAR] = MLBUtilities.resolve_value( td.text, "float") elif i == 15: self.player_data[MLBConstants.PLAYER_VALUE_BATTING][ season][MLBConstants.WAR] = MLBUtilities.resolve_value( td.text, "float") elif i == 16: self.player_data[MLBConstants.PLAYER_VALUE_BATTING][season][ MLBConstants. WIN_LOSS_PCT_WITH_AVG_TEAM] = MLBUtilities.resolve_value( td.text, "float") elif i == 17: self.player_data[MLBConstants.PLAYER_VALUE_BATTING][season][ MLBConstants. WIN_LOSS_PCT_WITH_AVG_TEAM_SEASON] = MLBUtilities.resolve_value( td.text, "float") elif i == 18: self.player_data[ MLBConstants.PLAYER_VALUE_BATTING][season][ MLBConstants.OFF_WAR] = MLBUtilities.resolve_value( td.text, "float") elif i == 19: self.player_data[ MLBConstants.PLAYER_VALUE_BATTING][season][ MLBConstants.DEF_WAR] = MLBUtilities.resolve_value( td.text, "float") elif i == 20: self.player_data[ MLBConstants.PLAYER_VALUE_BATTING][season][ MLBConstants.OFF_RAR] = MLBUtilities.resolve_value( td.text, "float") i += 1
def parse_standard_batting(self, soup): """ Parses data from the Stanard Batting table. """ batting_standard_entries = soup.find_all( id=self.batting_standard_season_regex) for entry in batting_standard_entries: tds = entry.find_all("td") i = 0 season = "" for td in tds: if i == 0: season = td.text if MLBConstants.STANDARD_BATTING not in self.player_data: self.player_data[MLBConstants.STANDARD_BATTING] = {} self.player_data[ MLBConstants.STANDARD_BATTING][season] = {} elif i == 1: self.player_data[MLBConstants.STANDARD_BATTING][season][ MLBConstants.AGE] = MLBUtilities.resolve_value( td.text, "int") elif i == 2: try: self.player_data[MLBConstants.STANDARD_BATTING][ season][MLBConstants.TEAM] = td.a.text except AttributeError: self.player_data[MLBConstants.STANDARD_BATTING][ season][MLBConstants.TEAM] = td.text elif i == 3: try: self.player_data[MLBConstants.STANDARD_BATTING][ season][MLBConstants.LEAGUE] = td.a.text except AttributeError: self.player_data[MLBConstants.STANDARD_BATTING][ season][MLBConstants.LEAGUE] = td.text elif i == 4: self.player_data[MLBConstants.STANDARD_BATTING][season][ MLBConstants. GAMES_PLAYED] = MLBUtilities.resolve_value( td.text, "int") elif i == 5: self.player_data[MLBConstants.STANDARD_BATTING][season][ MLBConstants. PLATE_APPEARANCES] = MLBUtilities.resolve_value( td.text, "int") elif i == 6: self.player_data[MLBConstants.STANDARD_BATTING][season][ MLBConstants.AT_BATS] = MLBUtilities.resolve_value( td.text, "int") elif i == 7: self.player_data[MLBConstants.STANDARD_BATTING][season][ MLBConstants.RUNS] = MLBUtilities.resolve_value( td.text, "int") elif i == 8: self.player_data[MLBConstants.STANDARD_BATTING][season][ MLBConstants.HITS] = MLBUtilities.resolve_value( td.text, "int") elif i == 9: self.player_data[MLBConstants.STANDARD_BATTING][season][ MLBConstants.DOUBLES] = MLBUtilities.resolve_value( td.text, "int") elif i == 10: self.player_data[MLBConstants.STANDARD_BATTING][season][ MLBConstants.TRIPLES] = MLBUtilities.resolve_value( td.text, "int") elif i == 11: self.player_data[MLBConstants.STANDARD_BATTING][season][ MLBConstants.HOME_RUNS] = MLBUtilities.resolve_value( td.text, "int") elif i == 12: self.player_data[MLBConstants.STANDARD_BATTING][season][ MLBConstants.RBI] = MLBUtilities.resolve_value( td.text, "int") elif i == 13: self.player_data[MLBConstants.STANDARD_BATTING][season][ MLBConstants. STOLEN_BASES] = MLBUtilities.resolve_value( td.text, "int") elif i == 14: self.player_data[MLBConstants.STANDARD_BATTING][season][ MLBConstants. CAUGHT_STEALING] = MLBUtilities.resolve_value( td.text, "int") elif i == 15: self.player_data[MLBConstants.STANDARD_BATTING][season][ MLBConstants.WALKS] = MLBUtilities.resolve_value( td.text, "int") elif i == 16: self.player_data[MLBConstants.STANDARD_BATTING][season][ MLBConstants.STRIKE_OUTS] = MLBUtilities.resolve_value( td.text, "int") elif i == 17: self.player_data[MLBConstants.STANDARD_BATTING][season][ MLBConstants. BATTING_AVERAGE] = MLBUtilities.resolve_value( td.text, "float") elif i == 18: self.player_data[MLBConstants.STANDARD_BATTING][season][ MLBConstants. ON_BASE_PERCENTAGE] = MLBUtilities.resolve_value( td.text, "float") elif i == 19: self.player_data[MLBConstants.STANDARD_BATTING][season][ MLBConstants. SLUGGING_PERCENTAGE] = MLBUtilities.resolve_value( td.text, "float") elif i == 20: self.player_data[MLBConstants.STANDARD_BATTING][season][ MLBConstants.OPS] = MLBUtilities.resolve_value( td.text, "float") elif i == 21: self.player_data[MLBConstants.STANDARD_BATTING][season][ MLBConstants.OPS_PLUS] = MLBUtilities.resolve_value( td.text, "float") elif i == 22: self.player_data[MLBConstants.STANDARD_BATTING][season][ MLBConstants.TOTAL_BASES] = MLBUtilities.resolve_value( td.text, "int") elif i == 23: self.player_data[MLBConstants.STANDARD_BATTING][season][ MLBConstants. DOUBLE_PLAYS_GROUNDED_INTO] = MLBUtilities.resolve_value( td.text, "int") elif i == 24: self.player_data[MLBConstants.STANDARD_BATTING][season][ MLBConstants. HIT_BY_PITCH] = MLBUtilities.resolve_value( td.text, "int") elif i == 25: self.player_data[MLBConstants.STANDARD_BATTING][season][ MLBConstants. SACRIFICE_HITS] = MLBUtilities.resolve_value( td.text, "int") elif i == 26: self.player_data[MLBConstants.STANDARD_BATTING][season][ MLBConstants. SACRIFICE_FLIES] = MLBUtilities.resolve_value( td.text, "int") elif i == 27: self.player_data[MLBConstants.STANDARD_BATTING][season][ MLBConstants. INTENTIONAL_WALKS] = MLBUtilities.resolve_value( td.text, "int") elif i == 28: self.player_data[MLBConstants.STANDARD_BATTING][season][ MLBConstants.POSITION] = td.text i += 1 # Compute additional stats self.player_data[MLBConstants.STANDARD_BATTING][season][ MLBConstants.WOBA] = self.stat_calculator.calculate_woba( self.player_data[MLBConstants.STANDARD_BATTING][season])
def parse_player_value_pitchers(self, soup): """ Parses data in the Player Value--Pitchers table. """ pitching_value_entries = soup.find_all( id=self.player_value_pitching_regex) for entry in pitching_value_entries: tds = entry.find_all("td") i = 0 season = "" for td in tds: if i == 0: season = td.text if MLBConstants.PLAYER_VALUE_PITCHING not in self.player_data: self.player_data[ MLBConstants.PLAYER_VALUE_PITCHING] = {} self.player_data[ MLBConstants.PLAYER_VALUE_PITCHING][season] = {} elif i == 8: self.player_data[MLBConstants.PLAYER_VALUE_PITCHING][season][ MLBConstants. RUNS_ALLOWED_PER_9_INNINGS] = MLBUtilities.resolve_value( td.text, "float") elif i == 9: self.player_data[MLBConstants.PLAYER_VALUE_PITCHING][season][ MLBConstants. RUNS_ALLOWED_PER_9_INNINGS_OPP] = MLBUtilities.resolve_value( td.text, "float") elif i == 10: self.player_data[MLBConstants.PLAYER_VALUE_PITCHING][season][ MLBConstants. RUNS_PER_9_INNINGS_IN_SUPPORT_FROM_DEFENSE] = MLBUtilities.resolve_value( td.text, "float") elif i == 11: self.player_data[MLBConstants.PLAYER_VALUE_PITCHING][season][ MLBConstants. RUNS_PER_9_INNINGS_BY_ROLE] = MLBUtilities.resolve_value( td.text, "float") elif i == 12: self.player_data[MLBConstants.PLAYER_VALUE_PITCHING][ season][MLBConstants. PARK_FACTORS] = MLBUtilities.resolve_value( td.text, "float") elif i == 13: self.player_data[MLBConstants.PLAYER_VALUE_PITCHING][season][ MLBConstants. RUNS_PER_9_INNINGS_FOR_AVG_PITCHER] = MLBUtilities.resolve_value( td.text, "float") elif i == 14: self.player_data[ MLBConstants.PLAYER_VALUE_PITCHING][season][ MLBConstants. RUNS_BETTER_THAN_AVG] = MLBUtilities.resolve_value( td.text, "int") elif i == 15: self.player_data[MLBConstants.PLAYER_VALUE_PITCHING][ season][MLBConstants. WINS_ABOVE_AVG] = MLBUtilities.resolve_value( td.text, "float") elif i == 16: self.player_data[MLBConstants.PLAYER_VALUE_PITCHING][season][ MLBConstants. GAME_ENTERING_LEVERAGE_INDEX] = MLBUtilities.resolve_value( td.text, "float") elif i == 17: self.player_data[MLBConstants.PLAYER_VALUE_PITCHING][season][ MLBConstants. WINS_ABOVE_AVG_ADJUSTMENT] = MLBUtilities.resolve_value( td.text, "float") elif i == 18: self.player_data[MLBConstants.PLAYER_VALUE_PITCHING][season][ MLBConstants. WINS_ABOVE_REPLACEMENT] = MLBUtilities.resolve_value( td.text, "float") elif i == 19: self.player_data[MLBConstants.PLAYER_VALUE_PITCHING][season][ MLBConstants. RUNS_BETTER_THAN_REPLACEMENT] = MLBUtilities.resolve_value( td.text, "int") elif i == 20: self.player_data[MLBConstants.PLAYER_VALUE_PITCHING][season][ MLBConstants. WIN_LOSS_PCT_WITH_AVG_TEAM] = MLBUtilities.resolve_value( td.text, "float") elif i == 21: self.player_data[MLBConstants.PLAYER_VALUE_PITCHING][season][ MLBConstants. WIN_LOSS_PCT_WITH_AVG_TEAM_SEASON] = MLBUtilities.resolve_value( td.text, "float") elif i == 22: self.player_data[ MLBConstants.PLAYER_VALUE_PITCHING][season][ MLBConstants.SALARY] = MLBUtilities.resolve_value( td.text.replace('$', '').replace(',', ''), "int") i += 1
def parse_pitching_stats(self, soup): """ Parse gamelog pitching stats. """ pitching_gamelog_entries = soup.find_all( id=self.pitching_gamelog_regex) for entry in pitching_gamelog_entries: tds = entry.find_all("td") i = 0 game_number = 0 for td in tds: if i == 2: game_number = td.text if MLBConstants.PLAYER_GAMELOG_PITCHING not in self.player_data: self.player_data[ MLBConstants.PLAYER_GAMELOG_PITCHING] = {} if self.season not in self.player_data[ MLBConstants.PLAYER_GAMELOG_PITCHING]: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season] = {} if game_number not in self.player_data[ MLBConstants.PLAYER_GAMELOG_PITCHING][self.season]: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number] = {} elif i == 3: if int(self.season) >= 1900: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants.DATE] = datetime.fromtimestamp( mktime( strptime( "{} {}".format( td.a.text.replace( u'\xa0', u' '), self.season), "%b %d %Y"))) elif i == 4: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants.TEAM] = td.a.text elif i == 5: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants. HOME_GAME] = False if td.text == "@" else True elif i == 6: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants.OPPONENT] = td.a.text elif i == 7: m = self.result_regex.match(td.text) self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants.RESULT] = m.group(1) self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants.TEAM_SCORE] = int(m.group(2)) self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants.OPPONENT_SCORE] = int(m.group(3)) elif i == 8: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants.INNINGS] = td.text elif i == 9: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants.DECISION] = td.text elif i == 10: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants. DAYS_REST] = MLBUtilities.resolve_value( td.text, "int") elif i == 11: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants. INNINGS_PITCHED] = MLBUtilities.resolve_value( td.span.text, "float") elif i == 12: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants.HITS] = MLBUtilities.resolve_value( td.text, "int") elif i == 13: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants.RUNS] = MLBUtilities.resolve_value( td.text, "int") elif i == 14: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants. EARNED_RUNS] = MLBUtilities.resolve_value( td.text, "int") elif i == 15: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants.WALKS] = MLBUtilities.resolve_value( td.text, "int") elif i == 16: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants. STRIKE_OUTS] = MLBUtilities.resolve_value( td.text, "int") elif i == 17: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants. HOME_RUNS] = MLBUtilities.resolve_value( td.text, "int") elif i == 18: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants. HIT_BY_PITCH] = MLBUtilities.resolve_value( td.text, "int") elif i == 19: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants.ERA] = MLBUtilities.resolve_value( td.text, "float") elif i == 20: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants. BATTERS_FACED] = MLBUtilities.resolve_value( td.text, "int") elif i == 21: try: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants. NUM_PITCHES] = MLBUtilities.resolve_value( td.a.text, "int") except: try: self.player_data[ MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants. NUM_PITCHES] = MLBUtilities.resolve_value( td.text, "int") except: logging.info("Giving up on NUM_PITCHES") elif i == 22: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants.STRIKES] = MLBUtilities.resolve_value( td.text, "int") elif i == 23: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants. STRIKES_LOOKING] = MLBUtilities.resolve_value( td.text, "int") elif i == 24: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants. STRIKES_SWINGING] = MLBUtilities.resolve_value( td.text, "int") elif i == 25: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants. GROUND_BALLS] = MLBUtilities.resolve_value( td.text, "int") elif i == 26: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants. FLY_BALLS] = MLBUtilities.resolve_value( td.text, "int") elif i == 27: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants. LINE_DRIVES] = MLBUtilities.resolve_value( td.text, "int") elif i == 28: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants.POP_UPS] = MLBUtilities.resolve_value( td.text, "int") elif i == 29: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants. UNKNOWN_BATTED_BALLS] = MLBUtilities.resolve_value( td.text, "int") elif i == 30: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants.PLAYER_GAME_SCORE] = td.text elif i == 31: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants. INHERITED_RUNNERS] = MLBUtilities.resolve_value( td.text, "int") elif i == 32: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants. INHERITED_SCORE] = MLBUtilities.resolve_value( td.text, "int") elif i == 33: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants. STOLEN_BASES] = MLBUtilities.resolve_value( td.text, "int") elif i == 34: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants. CAUGHT_STEALING] = MLBUtilities.resolve_value( td.text, "int") elif i == 35: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants. PICK_OFFS] = MLBUtilities.resolve_value( td.text, "int") elif i == 36: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants.AT_BATS] = MLBUtilities.resolve_value( td.text, "int") elif i == 37: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants.DOUBLES] = MLBUtilities.resolve_value( td.text, "int") elif i == 38: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants.TRIPLES] = MLBUtilities.resolve_value( td.text, "int") elif i == 39: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants. INTENTIONAL_WALKS] = MLBUtilities.resolve_value( td.text, "int") elif i == 40: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants. DOUBLE_PLAYS_GROUNDED_INTO] = MLBUtilities.resolve_value( td.text, "int") elif i == 41: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants. SACRIFICE_FLIES] = MLBUtilities.resolve_value( td.text, "int") elif i == 42: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants. REACHED_ON_ERROR] = MLBUtilities.resolve_value( td.text, "int") elif i == 43: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants. AVERAGE_LEVERAGE_INDEX] = MLBUtilities.resolve_value( td.text, "float") elif i == 44: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants. WIN_PROBABILITY_ADDED_BY_PITCHER] = MLBUtilities.resolve_value( td.text, "float") elif i == 45: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants. BASE_OUT_RUNS_SAVED] = MLBUtilities.resolve_value( td.text, "float") elif i == 46: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants.ENTRY_SITUATION] = td.span.text.strip( ) elif i == 47: self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][ self.season][game_number][ MLBConstants.EXIT_SITUATION] = td.text.strip() i += 1
def parse_batting_stats(self, soup): """ Parse gamelog batting stats. """ battinging_gamelog_entries = soup.find_all( id=self.batting_gamelog_regex) for entry in battinging_gamelog_entries: tds = entry.find_all("td") i = 0 game_number = 0 for td in tds: if i == 2: game_number = td.text if MLBConstants.PLAYER_GAMELOG_BATTING not in self.player_data: self.player_data[ MLBConstants.PLAYER_GAMELOG_BATTING] = {} if self.season not in self.player_data[ MLBConstants.PLAYER_GAMELOG_BATTING]: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season] = {} if game_number not in self.player_data[ MLBConstants.PLAYER_GAMELOG_BATTING][self.season]: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number] = {} elif i == 3: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants.DATE] = datetime.fromtimestamp( mktime( strptime( "{} {}".format( td.a.text.replace(u'\xa0', u' '), self.season), "%b %d %Y"))) elif i == 4: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants.TEAM] = td.a.text elif i == 5: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants. HOME_GAME] = False if td.text == "@" else True elif i == 6: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants.OPPONENT] = td.a.text elif i == 7: m = self.result_regex.match(td.text) self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants.RESULT] = m.group(1) self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants.TEAM_SCORE] = int(m.group(2)) self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants.OPPONENT_SCORE] = int(m.group(3)) elif i == 8: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants.INNINGS] = td.text elif i == 9: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants. PLATE_APPEARANCES] = MLBUtilities.resolve_value( td.span.text, "int") elif i == 10: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants.AT_BATS] = MLBUtilities.resolve_value( td.text, "int") elif i == 11: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants.RUNS] = MLBUtilities.resolve_value( td.text, "int") elif i == 12: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants.HITS] = MLBUtilities.resolve_value( td.text, "int") elif i == 13: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants.DOUBLES] = MLBUtilities.resolve_value( td.text, "int") elif i == 14: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants.TRIPLES] = MLBUtilities.resolve_value( td.text, "int") elif i == 15: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants. HOME_RUNS] = MLBUtilities.resolve_value( td.text, "int") elif i == 16: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants.RBI] = MLBUtilities.resolve_value( td.text, "int") elif i == 17: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants.WALKS] = MLBUtilities.resolve_value( td.text, "int") elif i == 18: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants. INTENTIONAL_WALKS] = MLBUtilities.resolve_value( td.text, "int") elif i == 19: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants. STRIKE_OUTS] = MLBUtilities.resolve_value( td.text, "int") elif i == 20: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants. HIT_BY_PITCH] = MLBUtilities.resolve_value( td.text, "int") elif i == 21: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants. SACRIFICE_HITS] = MLBUtilities.resolve_value( td.text, "int") elif i == 22: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants. SACRIFICE_FLIES] = MLBUtilities.resolve_value( td.text, "int") elif i == 23: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants. REACHED_ON_ERROR] = MLBUtilities.resolve_value( td.text, "int") elif i == 24: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants. DOUBLE_PLAYS_GROUNDED_INTO] = MLBUtilities.resolve_value( td.text, "int") elif i == 25: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants. STOLEN_BASES] = MLBUtilities.resolve_value( td.text, "int") elif i == 26: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants. CAUGHT_STEALING] = MLBUtilities.resolve_value( td.text, "int") elif i == 27: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants. BATTING_AVERAGE] = MLBUtilities.resolve_value( td.text, "float") elif i == 28: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants. ON_BASE_PERCENTAGE] = MLBUtilities.resolve_value( td.text, "float") elif i == 29: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants. SLUGGING_PERCENTAGE] = MLBUtilities.resolve_value( td.text, "float") elif i == 30: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants.OPS] = MLBUtilities.resolve_value( td.text, "float") elif i == 31: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants. BATTING_ORDER_POSITION] = MLBUtilities.resolve_value( td.text, "int") elif i == 32: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants. AVERAGE_LEVERAGE_INDEX] = MLBUtilities.resolve_value( td.text, "float") elif i == 33: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants. WIN_PROBABILITY_ADDED] = MLBUtilities.resolve_value( td.text, "float") elif i == 34: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants. BASE_OUT_RUNS_ADDED] = MLBUtilities.resolve_value( td.text, "float") elif i == 35: self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][ self.season][game_number][ MLBConstants.POSITION] = td.text i += 1
def process(self): players = self.lineup_manager.lineups_collection.find_one( {"date": str(self.game_date)}, {"players": 1}) batter_csv_contents = [[ "Name", "Team", "Opponent", "Verified", "Position", "Batting Order Position", "wOBA", "wOBA vs Pitcher Type (LH/RH)", "OPS vs Pitcher Type (LH/RH)", "OPS", "Plate Appearances vs Pitcher", "Avg vs Pitcher", "Hits vs Pitcher", "HRs vs Pitcher", "Park Runs", "Park HRs", "Vegas Line", "O/U" ]] pitcher_csv_contents = [[ "Name", "Team", "Opponent", "Verified", "LH/RH", "FIP", "wOBA", "wOBA vs RHB", "wOBA vs LHB", "BABIP vs RHB", "BABIP vs LHB", "K/9", "BB/9", "Park Runs", "Park HRs", "Vegas Line", "O/U" ]] ballpark_data = self.ballpark_collection.find_one( {"date": str(self.game_date)}) for player in players["players"]: player_lineup_data = players["players"][player] if len(player_lineup_data) < 8: continue player_csv_data = [] escaped_player_id = player.replace("_", ".") player_data = self.player_manager.players_collection.find_one( {MLBConstants.PLAYER_ID: escaped_player_id}, { MLBConstants.POSITION: 1, MLBConstants.NAME: 1 }) player_csv_data.append(player_data[MLBConstants.NAME].encode( 'ascii', errors='ignore')) player_csv_data.append(player_lineup_data[MLBConstants.TEAM]) player_csv_data.append(player_lineup_data[MLBConstants.OPPONENT]) player_csv_data.append( str(player_lineup_data[MLBConstants.VERIFIED])) is_batter = player_data[MLBConstants.POSITION] != "Pitcher" ###################### # Player is a batter ###################### if is_batter: # Retrieve opposing pitcher data opposing_pitcher_data = self.player_manager.players_collection.find_one( { MLBConstants.NAME: player_lineup_data["opposing_pitcher"] }, { MLBConstants.PLAYER_ID: 1, MLBConstants.HANDEDNESS_THROWING: 1 }) batter_data = self.player_manager.players_collection.find_one( {MLBConstants.PLAYER_ID: escaped_player_id}, { MLBConstants.POSITION: 1, MLBConstants.HANDEDNESS_BATTING: 1, "{}.{}.{}".format(MLBConstants.STANDARD_BATTING, self.season, MLBConstants.WOBA): 1, "{}.{}.{}.{}".format(MLBConstants.BATTER_SPLITS, self.season, MLBConstants.SPLITS_VS_RHP, MLBConstants.WOBA): 1, "{}.{}.{}.{}".format(MLBConstants.BATTER_SPLITS, self.season, MLBConstants.SPLITS_VS_LHP, MLBConstants.WOBA): 1, "{}.{}.{}".format(MLBConstants.STANDARD_BATTING, self.season, MLBConstants.OPS): 1, "{}.{}.{}.{}".format(MLBConstants.BATTER_SPLITS, self.season, MLBConstants.SPLITS_VS_RHP, MLBConstants.OPS): 1, "{}.{}.{}.{}".format(MLBConstants.BATTER_SPLITS, self.season, MLBConstants.SPLITS_VS_LHP, MLBConstants.OPS): 1, MLBConstants.BATTER_VS_PITCHER: 1 }) if player_lineup_data["home"]: ballpark_hits = ballpark_data[MLBConstants.BPF_ALL][ MLBUtilities.map_rg_team_to_rotowire( player_lineup_data[MLBConstants.TEAM])][ MLBConstants.HITS] ballpark_home_runs = ballpark_data[MLBConstants.BPF_ALL][ MLBUtilities.map_rg_team_to_rotowire( player_lineup_data[MLBConstants.TEAM])][ MLBConstants.HOME_RUNS] else: ballpark_hits = ballpark_data[MLBConstants.BPF_ALL][ MLBUtilities.map_rg_team_to_rotowire( player_lineup_data[MLBConstants.OPPONENT])][ MLBConstants.HITS] ballpark_home_runs = ballpark_data[MLBConstants.BPF_ALL][ MLBUtilities.map_rg_team_to_rotowire( player_lineup_data[MLBConstants.OPPONENT])][ MLBConstants.HOME_RUNS] player_csv_data.append( player_lineup_data[MLBConstants.POSITION].replace( ",", "/")) player_csv_data.append( str(player_lineup_data[ MLBConstants.BATTING_ORDER_POSITION])) if self.season in batter_data[MLBConstants.STANDARD_BATTING]: player_csv_data.append( str(batter_data[MLBConstants.STANDARD_BATTING][ self.season][MLBConstants.WOBA])) else: player_csv_data.append("N/A") if self.season in batter_data[MLBConstants.BATTER_SPLITS]: if opposing_pitcher_data is None or MLBConstants.HANDEDNESS_THROWING not in opposing_pitcher_data\ or (MLBConstants.SPLITS_VS_LHP not in batter_data[MLBConstants.BATTER_SPLITS][self.season] and opposing_pitcher_data[MLBConstants.HANDEDNESS_THROWING] == "Left")\ or (MLBConstants.SPLITS_VS_RHP not in batter_data[MLBConstants.BATTER_SPLITS][self.season] and opposing_pitcher_data[MLBConstants.HANDEDNESS_THROWING] == "Right"): player_csv_data.append("N/A") player_csv_data.append("N/A") elif opposing_pitcher_data[ MLBConstants.HANDEDNESS_THROWING] == "Right": player_csv_data.append( str(batter_data[MLBConstants.BATTER_SPLITS][ self.season][MLBConstants.SPLITS_VS_RHP][ MLBConstants.WOBA])) player_csv_data.append( str(batter_data[MLBConstants.BATTER_SPLITS][ self.season][MLBConstants.SPLITS_VS_RHP][ MLBConstants.OPS])) else: player_csv_data.append( str(batter_data[MLBConstants.BATTER_SPLITS][ self.season][MLBConstants.SPLITS_VS_LHP][ MLBConstants.WOBA])) player_csv_data.append( str(batter_data[MLBConstants.BATTER_SPLITS][ self.season][MLBConstants.SPLITS_VS_LHP][ MLBConstants.OPS])) else: player_csv_data.append("N/A") player_csv_data.append("N/A") if self.season in batter_data[MLBConstants.STANDARD_BATTING]: player_csv_data.append( str(batter_data[MLBConstants.STANDARD_BATTING][ self.season][MLBConstants.OPS])) else: player_csv_data.append("N/A") # BvP if opposing_pitcher_data is not None and opposing_pitcher_data[ MLBConstants.PLAYER_ID] in batter_data[ MLBConstants.BATTER_VS_PITCHER]: player_csv_data.append( str(batter_data[MLBConstants.BATTER_VS_PITCHER][ opposing_pitcher_data[MLBConstants.PLAYER_ID]][ MLBConstants.PLATE_APPEARANCES])) player_csv_data.append( str(batter_data[MLBConstants.BATTER_VS_PITCHER][ opposing_pitcher_data[MLBConstants.PLAYER_ID]][ MLBConstants.BATTING_AVERAGE])) player_csv_data.append( str(batter_data[MLBConstants.BATTER_VS_PITCHER][ opposing_pitcher_data[MLBConstants.PLAYER_ID]][ MLBConstants.HITS])) player_csv_data.append( str(batter_data[MLBConstants.BATTER_VS_PITCHER][ opposing_pitcher_data[MLBConstants.PLAYER_ID]][ MLBConstants.HOME_RUNS])) else: player_csv_data.append("N/A") player_csv_data.append("N/A") player_csv_data.append("N/A") player_csv_data.append("N/A") # Park factors player_csv_data.append(str(ballpark_hits)) player_csv_data.append(str(ballpark_home_runs)) ###################### # Vegas line and O/U ###################### if MLBConstants.VEGAS_LINE in player_lineup_data: player_csv_data.append( player_lineup_data[MLBConstants.VEGAS_LINE]) else: player_csv_data.append("N/A") if MLBConstants.OVER_UNDER in player_lineup_data: player_csv_data.append( player_lineup_data[MLBConstants.OVER_UNDER]) else: player_csv_data.append("N/A") batter_csv_contents.append(player_csv_data) ####################### # Player is a pitcher ####################### else: pitcher_data = self.player_manager.players_collection.find_one( {MLBConstants.PLAYER_ID: escaped_player_id}, { MLBConstants.POSITION: 1, MLBConstants.HANDEDNESS_THROWING: 1, "{}.{}.{}".format(MLBConstants.STANDARD_PITCHING, self.season, MLBConstants.FIP): 1, "{}.{}.vs RHB.{}".format(MLBConstants.PITCHER_SPLITS, self.season, MLBConstants.FIP): 1, "{}.{}.vs LHB.{}".format(MLBConstants.PITCHER_SPLITS, self.season, MLBConstants.FIP): 1, "{}.{}.{} Totals.{}".format( MLBConstants.PITCHER_SPLITS, self.season, self.season, MLBConstants.WOBA): 1, "{}.{}.vs RHB.{}".format(MLBConstants.PITCHER_SPLITS, self.season, MLBConstants.WOBA): 1, "{}.{}.vs LHB.{}".format(MLBConstants.PITCHER_SPLITS, self.season, MLBConstants.WOBA): 1, "{}.{}.vs RHB.{}".format(MLBConstants.PITCHER_SPLITS, self.season, MLBConstants.BABIP): 1, "{}.{}.vs LHB.{}".format(MLBConstants.PITCHER_SPLITS, self.season, MLBConstants.BABIP): 1, "{}.{}.{}".format( MLBConstants.STANDARD_PITCHING, self.season, MLBConstants.STRIKE_OUTS_PER_9_INNINGS): 1, "{}.{}.{}".format(MLBConstants.STANDARD_PITCHING, self.season, MLBConstants.WALKS_PER_9_INNINGS): 1, MLBConstants.BATTER_VS_PITCHER: 1 }) # Do a quick check to make sure the pitcher has stats for the current season. # If the current season isn't available then bail on this pitcher. if self.season not in pitcher_data[ MLBConstants. STANDARD_PITCHING] or self.season not in pitcher_data[ MLBConstants.PITCHER_SPLITS]: print "Could not find season {} for either Standard Pitching or Splits for {}. Not sure how that would happen".format( self.season, escaped_player_id) continue if player_lineup_data["home"]: ballpark_hits = ballpark_data[MLBConstants.BPF_ALL][ MLBUtilities.map_rg_team_to_rotowire( player_lineup_data[MLBConstants.TEAM])][ MLBConstants.HITS] ballpark_home_runs = ballpark_data[MLBConstants.BPF_ALL][ MLBUtilities.map_rg_team_to_rotowire( player_lineup_data[MLBConstants.TEAM])][ MLBConstants.HOME_RUNS] else: ballpark_hits = ballpark_data[MLBConstants.BPF_ALL][ MLBUtilities.map_rg_team_to_rotowire( player_lineup_data[MLBConstants.OPPONENT])][ MLBConstants.HITS] ballpark_home_runs = ballpark_data[MLBConstants.BPF_ALL][ MLBUtilities.map_rg_team_to_rotowire( player_lineup_data[MLBConstants.OPPONENT])][ MLBConstants.HOME_RUNS] player_csv_data.append( pitcher_data[MLBConstants.HANDEDNESS_THROWING]) if self.season in pitcher_data[MLBConstants.STANDARD_PITCHING]: player_csv_data.append( str(pitcher_data[MLBConstants.STANDARD_PITCHING][ self.season][MLBConstants.FIP])) else: player_csv_data.append("N/A") if self.season in pitcher_data[ MLBConstants. PITCHER_SPLITS] and MLBConstants.WOBA in pitcher_data[ MLBConstants.PITCHER_SPLITS][self.season][ "{} Totals".format(self.season)]: player_csv_data.append( str(pitcher_data[MLBConstants.PITCHER_SPLITS][ self.season]["{} Totals".format( self.season)][MLBConstants.WOBA])) else: player_csv_data.append("N/A") if MLBConstants.PITCHER_SPLITS in pitcher_data: if MLBConstants.SPLITS_VS_RHB in pitcher_data[ MLBConstants.PITCHER_SPLITS][self.season]: player_csv_data.append( str(pitcher_data[MLBConstants.PITCHER_SPLITS][ self.season][MLBConstants.SPLITS_VS_RHB][ MLBConstants.WOBA])) else: player_csv_data.append("N/A") if MLBConstants.SPLITS_VS_LHB in pitcher_data[ MLBConstants.PITCHER_SPLITS][self.season]: player_csv_data.append( str(pitcher_data[MLBConstants.PITCHER_SPLITS][ self.season][MLBConstants.SPLITS_VS_LHB][ MLBConstants.WOBA])) else: player_csv_data.append("N/A") if MLBConstants.SPLITS_VS_RHB in pitcher_data[ MLBConstants.PITCHER_SPLITS][self.season]: player_csv_data.append( str(pitcher_data[MLBConstants.PITCHER_SPLITS][ self.season][MLBConstants.SPLITS_VS_RHB][ MLBConstants.BABIP])) else: player_csv_data.append("N/A") if MLBConstants.SPLITS_VS_LHB in pitcher_data[ MLBConstants.PITCHER_SPLITS][self.season]: player_csv_data.append( str(pitcher_data[MLBConstants.PITCHER_SPLITS][ self.season][MLBConstants.SPLITS_VS_LHB][ MLBConstants.BABIP])) else: player_csv_data.append("N/A") if self.season in pitcher_data[MLBConstants.STANDARD_PITCHING]: player_csv_data.append( str(pitcher_data[MLBConstants.STANDARD_PITCHING][ self.season][ MLBConstants.STRIKE_OUTS_PER_9_INNINGS])) player_csv_data.append( str(pitcher_data[MLBConstants.STANDARD_PITCHING][ self.season][MLBConstants.WALKS_PER_9_INNINGS])) else: player_csv_data.append("N/A") player_csv_data.append("N/A") player_csv_data.append(str(ballpark_hits)) player_csv_data.append(str(ballpark_home_runs)) ###################### # Vegas line and O/U ###################### if MLBConstants.VEGAS_LINE in player_lineup_data: player_csv_data.append( player_lineup_data[MLBConstants.VEGAS_LINE]) else: player_csv_data.append("N/A") if MLBConstants.OVER_UNDER in player_lineup_data: player_csv_data.append( player_lineup_data[MLBConstants.OVER_UNDER]) else: player_csv_data.append("N/A") # Look for opponents opponents = [] # for p in players["players"]: # pld = players["players"][p] # if len(pld) == 0 or pld["opposing_pitcher"] != player_data[MLBConstants.NAME]: # continue # # opponent = self.player_manager.players_collection.find_one({MLBConstants.PLAYER_ID: p}, # {MLBConstants.POSITION: 1, # "{}.{}.{}".format(MLBConstants.STANDARD_BATTING, self.season, MLBConstants.WOBA): 1, # "{}.{}.vs RH Starter.{}".format(MLBConstants.BATTER_SPLITS, self.season, MLBConstants.WOBA): 1, # "{}.{}.vs LH Starter.{}".format(MLBConstants.BATTER_SPLITS, self.season, MLBConstants.WOBA): 1, # MLBConstants.BATTER_VS_PITCHER: 1}) # opponents.append(opponent) # # for o in opponents: # pass pitcher_csv_contents.append(player_csv_data) print player_csv_data # Write results out to file pitcher_output = open( "../projections/pitchers_{}.csv".format(str(date.today())), "w") batter_output = open( "../projections/batters_{}.csv".format(str(date.today())), "w") for line in pitcher_csv_contents: pitcher_output.write(",".join(line) + "\n") for line in batter_csv_contents: batter_output.write(",".join(line) + "\n")
def parse_batter_splits(self, soup, season): split_divs = soup.find_all("div", attrs={"class": "stw"}) for split_div in split_divs: label = split_div.find("div", attrs={ "class": "table_heading" }).a.h4.text trs = split_div.find_all("tr") for tr in trs: tds = tr.find_all("td") if len(tds) == 0: continue i = 0 split_type = "" for td in tds: if i == 0: if td.text == "": continue if MLBConstants.BATTER_SPLITS not in self.player_data: self.player_data[MLBConstants.BATTER_SPLITS] = {} if season not in self.player_data[ MLBConstants.BATTER_SPLITS]: self.player_data[ MLBConstants.BATTER_SPLITS][season] = {} split_type = td.text.replace(".", "_") if split_type not in self.player_data[ MLBConstants.BATTER_SPLITS][season]: self.player_data[MLBConstants.BATTER_SPLITS][ season][split_type] = {} elif i == 1: self.player_data[ MLBConstants.BATTER_SPLITS][season][split_type][ MLBConstants. GAMES_PLAYED] = MLBUtilities.resolve_value( td.text, "int") elif i == 2: self.player_data[ MLBConstants.BATTER_SPLITS][season][split_type][ MLBConstants. GAMES_STARTED] = MLBUtilities.resolve_value( td.text, "int") elif i == 3: self.player_data[ MLBConstants.BATTER_SPLITS][season][split_type][ MLBConstants. PLATE_APPEARANCES] = MLBUtilities.resolve_value( td.text, "int") elif i == 4: self.player_data[MLBConstants.BATTER_SPLITS][season][ split_type][MLBConstants. AT_BATS] = MLBUtilities.resolve_value( td.text, "int") elif i == 5: self.player_data[MLBConstants.BATTER_SPLITS][season][ split_type][MLBConstants. RUNS] = MLBUtilities.resolve_value( td.text, "int") elif i == 6: self.player_data[MLBConstants.BATTER_SPLITS][season][ split_type][MLBConstants. HITS] = MLBUtilities.resolve_value( td.text, "int") elif i == 7: self.player_data[MLBConstants.BATTER_SPLITS][season][ split_type][MLBConstants. DOUBLES] = MLBUtilities.resolve_value( td.text, "int") elif i == 8: self.player_data[MLBConstants.BATTER_SPLITS][season][ split_type][MLBConstants. TRIPLES] = MLBUtilities.resolve_value( td.text, "int") elif i == 9: self.player_data[ MLBConstants.BATTER_SPLITS][season][split_type][ MLBConstants. HOME_RUNS] = MLBUtilities.resolve_value( td.text, "int") elif i == 10: self.player_data[ MLBConstants.BATTER_SPLITS][season][split_type][ MLBConstants.RBI] = MLBUtilities.resolve_value( td.text, "int") elif i == 11: self.player_data[ MLBConstants.BATTER_SPLITS][season][split_type][ MLBConstants. STOLEN_BASES] = MLBUtilities.resolve_value( td.text, "int") elif i == 12: self.player_data[ MLBConstants.BATTER_SPLITS][season][split_type][ MLBConstants. CAUGHT_STEALING] = MLBUtilities.resolve_value( td.text, "int") elif i == 13: self.player_data[MLBConstants.BATTER_SPLITS][season][ split_type][MLBConstants. WALKS] = MLBUtilities.resolve_value( td.text, "int") elif i == 14: self.player_data[ MLBConstants.BATTER_SPLITS][season][split_type][ MLBConstants. STRIKE_OUTS] = MLBUtilities.resolve_value( td.text, "int") elif i == 15: self.player_data[ MLBConstants.BATTER_SPLITS][season][split_type][ MLBConstants. BATTING_AVERAGE] = MLBUtilities.resolve_value( td.text, "float") elif i == 16: self.player_data[ MLBConstants.BATTER_SPLITS][season][split_type][ MLBConstants. ON_BASE_PERCENTAGE] = MLBUtilities.resolve_value( td.text, "float") elif i == 17: self.player_data[ MLBConstants.BATTER_SPLITS][season][split_type][ MLBConstants. SLUGGING_PERCENTAGE] = MLBUtilities.resolve_value( td.text, "float") elif i == 18: self.player_data[ MLBConstants.BATTER_SPLITS][season][split_type][ MLBConstants.OPS] = MLBUtilities.resolve_value( td.text, "float") elif i == 19: self.player_data[ MLBConstants.BATTER_SPLITS][season][split_type][ MLBConstants. TOTAL_BASES] = MLBUtilities.resolve_value( td.text, "int") elif i == 20: self.player_data[ MLBConstants.BATTER_SPLITS][season][split_type][ MLBConstants. DOUBLE_PLAYS_GROUNDED_INTO] = MLBUtilities.resolve_value( td.text, "int") elif i == 21: self.player_data[ MLBConstants.BATTER_SPLITS][season][split_type][ MLBConstants. HIT_BY_PITCH] = MLBUtilities.resolve_value( td.text, "int") elif i == 22: self.player_data[ MLBConstants.BATTER_SPLITS][season][split_type][ MLBConstants. SACRIFICE_HITS] = MLBUtilities.resolve_value( td.text, "int") elif i == 23: self.player_data[ MLBConstants.BATTER_SPLITS][season][split_type][ MLBConstants. SACRIFICE_FLIES] = MLBUtilities.resolve_value( td.text, "int") elif i == 24: self.player_data[ MLBConstants.BATTER_SPLITS][season][split_type][ MLBConstants. INTENTIONAL_WALKS] = MLBUtilities.resolve_value( td.text, "int") elif i == 25: self.player_data[ MLBConstants.BATTER_SPLITS][season][split_type][ MLBConstants. REACHED_ON_ERROR] = MLBUtilities.resolve_value( td.text, "int") elif i == 26: self.player_data[MLBConstants.BATTER_SPLITS][season][ split_type][MLBConstants. BABIP] = MLBUtilities.resolve_value( td.text, "float") elif i == 27: self.player_data[ MLBConstants.BATTER_SPLITS][season][split_type][ MLBConstants. T_OPS_PLUS] = MLBUtilities.resolve_value( td.text, "int") elif i == 28: self.player_data[ MLBConstants.BATTER_SPLITS][season][split_type][ MLBConstants. S_OPS_PLUS] = MLBUtilities.resolve_value( td.text, "int") i += 1 self.player_data[ MLBConstants.BATTER_SPLITS][season][split_type][ MLBConstants. WOBA] = self.stat_calculator.calculate_woba( self.player_data[MLBConstants.BATTER_SPLITS] [season][split_type])
def parse_pitcher_splits(self, soup, season): split_divs = soup.find_all("div", attrs={"class": "stw"}) for split_div in split_divs: table = split_div.find("table") pitcher_extras = False if table.attrs["id"].find("_extra") > -1: pitcher_extras = True tbody = split_div.find("tbody") if (tbody is None): print "" trs = tbody.find_all("tr") for tr in trs: tds = tr.find_all("td") if len(tds) == 0: continue i = 0 split_type = "" for td in tds: # There are (sometimes) two table for each split type for pitchers - normal and extras. The id for # each extras table ends in "_extras", so when that is detected, we set a flag indicating that we're # processing an extras table. if not pitcher_extras: if i == 0: if MLBConstants.PITCHER_SPLITS not in self.player_data: self.player_data[ MLBConstants.PITCHER_SPLITS] = {} if season not in self.player_data[ MLBConstants.PITCHER_SPLITS]: self.player_data[ MLBConstants.PITCHER_SPLITS][season] = {} split_type = td.text.replace(".", "_") if split_type not in self.player_data[ MLBConstants.PITCHER_SPLITS][season]: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type] = {} elif i == 1 and not pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. GAMES_PLAYED] = MLBUtilities.resolve_value( td.text, "int") elif i == 2 and not pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. PLATE_APPEARANCES] = MLBUtilities.resolve_value( td.text, "int") elif i == 3 and not pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. AT_BATS] = MLBUtilities.resolve_value( td.text, "int") elif i == 4 and not pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. RUNS] = MLBUtilities.resolve_value( td.text, "int") elif i == 5 and not pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. HITS] = MLBUtilities.resolve_value( td.text, "int") elif i == 6 and not pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. DOUBLES] = MLBUtilities.resolve_value( td.text, "int") elif i == 7 and not pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. TRIPLES] = MLBUtilities.resolve_value( td.text, "int") elif i == 8 and not pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. HOME_RUNS] = MLBUtilities.resolve_value( td.text, "int") elif i == 9 and not pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. STOLEN_BASES] = MLBUtilities.resolve_value( td.text, "int") elif i == 10 and not pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. CAUGHT_STEALING] = MLBUtilities.resolve_value( td.text, "int") elif i == 11 and not pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. WALKS] = MLBUtilities.resolve_value( td.text, "int") elif i == 12 and not pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. STRIKE_OUTS] = MLBUtilities.resolve_value( td.text, "int") elif i == 13 and not pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. STRIKE_OUT_TO_WALK_RATIO] = MLBUtilities.resolve_value( td.text, "float") elif i == 14 and not pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. BATTING_AVERAGE] = MLBUtilities.resolve_value( td.text, "float") elif i == 15 and not pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. ON_BASE_PERCENTAGE] = MLBUtilities.resolve_value( td.text, "float") elif i == 16 and not pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. SLUGGING_PERCENTAGE] = MLBUtilities.resolve_value( td.text, "float") elif i == 17 and not pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. OPS] = MLBUtilities.resolve_value( td.text, "float") elif i == 18 and not pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. TOTAL_BASES] = MLBUtilities.resolve_value( td.text, "int") elif i == 19 and not pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. DOUBLE_PLAYS_GROUNDED_INTO] = MLBUtilities.resolve_value( td.text, "int") elif i == 20 and not pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. HIT_BY_PITCH] = MLBUtilities.resolve_value( td.text, "int") elif i == 21 and not pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. SACRIFICE_HITS] = MLBUtilities.resolve_value( td.text, "int") elif i == 22 and not pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. SACRIFICE_FLIES] = MLBUtilities.resolve_value( td.text, "int") elif i == 23 and not pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. INTENTIONAL_WALKS] = MLBUtilities.resolve_value( td.text, "int") elif i == 24 and not pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. REACHED_ON_ERROR] = MLBUtilities.resolve_value( td.text, "int") elif i == 25 and not pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. BABIP] = MLBUtilities.resolve_value( td.text, "float") elif i == 26 and not pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. T_OPS_PLUS] = MLBUtilities.resolve_value( td.text, "int") elif i == 27 and not pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. S_OPS_PLUS] = MLBUtilities.resolve_value( td.text, "int") else: if i == 1 and pitcher_extras: if MLBConstants.PITCHER_SPLITS not in self.player_data: self.player_data[ MLBConstants.PITCHER_SPLITS] = {} if season not in self.player_data[ MLBConstants.PITCHER_SPLITS]: self.player_data[ MLBConstants.PITCHER_SPLITS][season] = {} split_type = td.text.replace(".", "_") if td.text not in self.player_data[ MLBConstants.PITCHER_SPLITS][season]: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type] = {} elif i == 2 and pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. WINS] = MLBUtilities.resolve_value( td.text, "int") elif i == 3 and pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. LOSSES] = MLBUtilities.resolve_value( td.text, "int") elif i == 4 and pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. WIN_LOSS_PCT] = MLBUtilities.resolve_value( td.text, "float") elif i == 5 and pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. ERA] = MLBUtilities.resolve_value( td.text, "float") # elif i == 6 and pitcher_extras: # self.player_data[MLBConstants.PITCHER_SPLITS][season][split_type][ # MLBConstants.GAMES_PLAYED] = MLBUtilities.resolve_value(td.text, "int") elif i == 7 and pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. GAMES_STARTED] = MLBUtilities.resolve_value( td.text, "int") elif i == 8 and pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. GAMES_FINISHED] = MLBUtilities.resolve_value( td.text, "int") elif i == 9 and pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. COMPLETE_GAMES] = MLBUtilities.resolve_value( td.text, "int") elif i == 10 and pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. SHUT_OUTS] = MLBUtilities.resolve_value( td.text, "int") elif i == 11 and pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. SAVES] = MLBUtilities.resolve_value( td.text, "int") elif i == 12 and pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. INNINGS_PITCHED] = MLBUtilities.resolve_value( td.text, "float") # elif i == 12 and pitcher_extras: # self.player_data[MLBConstants.PITCHER_SPLITS][season][split_type][ # MLBConstants.HITS] = MLBUtilities.resolve_value(td.text, "int") elif i == 15 and pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. EARNED_RUNS] = MLBUtilities.resolve_value( td.text, "int") elif i == 21 and pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. BALKS] = MLBUtilities.resolve_value( td.text, "int") elif i == 22 and pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. WILD_PITCHES] = MLBUtilities.resolve_value( td.text, "int") elif i == 24 and pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. WHIP] = MLBUtilities.resolve_value( td.text, "float") elif i == 25 and pitcher_extras: self.player_data[MLBConstants.PITCHER_SPLITS][ season][split_type][ MLBConstants. STRIKE_OUTS_PER_9_INNINGS] = MLBUtilities.resolve_value( td.text, "float") i += 1 self.player_data[ MLBConstants.PITCHER_SPLITS][season][split_type][ MLBConstants.FIP] = self.stat_calculator.calculate_fip( self.player_data[MLBConstants.PITCHER_SPLITS] [season][split_type]) self.player_data[ MLBConstants.PITCHER_SPLITS][season][split_type][ MLBConstants. WOBA] = self.stat_calculator.calculate_woba( self.player_data[MLBConstants.PITCHER_SPLITS] [season][split_type])
def parse(self, data): """ Parse data from the batter vs pitcher page. """ soup = BeautifulSoup(data) if MLBConstants.BATTER_VS_PITCHER not in self.player_data: self.player_data[MLBConstants.BATTER_VS_PITCHER] = {} table = soup.find("table", attrs={"id": "ajax_result_table"}) trs = table.find_all("tr") for tr in trs: tds = tr.find_all("td") i = 0 opponent_id = "" for td in tds: if i == 0: if self.type == MLBConstants.BATTER_TYPE: m = self.pitcher_id_regex.match(td.a.attrs["href"]) else: m = self.batter_id_regex.match(td.a.attrs["href"]) opponent_id = m.group(1).replace(".", "_") if opponent_id not in self.player_data[MLBConstants.BATTER_VS_PITCHER]: self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id] = {} self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][MLBConstants.NAME] = td.a.text elif i == 1: self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][ MLBConstants.PLATE_APPEARANCES] = MLBUtilities.resolve_value(td.text, "int") elif i == 2: self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][ MLBConstants.AT_BATS] = MLBUtilities.resolve_value(td.text, "int") elif i == 3: self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][ MLBConstants.HITS] = MLBUtilities.resolve_value(td.text, "int") elif i == 4: self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][ MLBConstants.DOUBLES] = MLBUtilities.resolve_value(td.text, "int") elif i == 5: self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][ MLBConstants.TRIPLES] = MLBUtilities.resolve_value(td.text, "int") elif i == 6: self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][ MLBConstants.HOME_RUNS] = MLBUtilities.resolve_value(td.text, "int") elif i == 7: self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][ MLBConstants.RBI] = MLBUtilities.resolve_value(td.text, "int") elif i == 8: self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][ MLBConstants.WALKS] = MLBUtilities.resolve_value(td.text, "int") elif i == 9: self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][ MLBConstants.STRIKE_OUTS] = MLBUtilities.resolve_value(td.text, "int") elif i == 10: self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][ MLBConstants.BATTING_AVERAGE] = MLBUtilities.resolve_value(td.text, "float") elif i == 11: self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][ MLBConstants.ON_BASE_PERCENTAGE] = MLBUtilities.resolve_value(td.text, "float") elif i == 12: self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][ MLBConstants.SLUGGING_PERCENTAGE] = MLBUtilities.resolve_value(td.text, "float") elif i == 13: self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][ MLBConstants.OPS] = MLBUtilities.resolve_value(td.text, "float") elif i == 14: self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][ MLBConstants.SACRIFICE_HITS] = MLBUtilities.resolve_value(td.text, "int") elif i == 15: self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][ MLBConstants.SACRIFICE_FLIES] = MLBUtilities.resolve_value(td.text, "int") elif i == 16: self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][ MLBConstants.INTENTIONAL_WALKS] = MLBUtilities.resolve_value(td.text, "int") elif i == 17: self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][ MLBConstants.HIT_BY_PITCH] = MLBUtilities.resolve_value(td.text, "int") elif i == 18: self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][ MLBConstants.DOUBLE_PLAYS_GROUNDED_INTO] = MLBUtilities.resolve_value(td.text, "int") i += 1
def process_player(self, player_id, url, active=False): start = time.time() player = self.player_manager.read({MLBConstants.PLAYER_ID: player_id}) end = time.time() print "\t\tDEBUG: Read from player_manager in {} seconds".format(end - start) if player is None: player = {MLBConstants.PLAYER_ID: player_id} # We can skip this player elif MLBConstants.BATTER_VS_PITCHER in player and not active: logging.info( "Looks like all info for {} has already been scraped. Moving on..." .format(player_id)) return player_url = "{}{}.shtml".format(url, player_id) player_page_data = MLBUtilities.fetch_data( "www.baseball-reference.com", player_url, True) self.player_season_stats_parser.player_data = player self.player_season_stats_parser.parse(player_page_data) ############################################# # Should we get the pitcher or batter page? ############################################# if self.player_season_stats_parser.player_data[ MLBConstants.POSITION] == "Pitcher": player_season_stats_detail_url = "{}{}-pitch.shtml".format( url, player_id) else: player_season_stats_detail_url = "{}{}-bat.shtml".format( url, player_id) ############################################### # Fetch detailed season stats for the player. ############################################### start = time.time() player_season_stats_detail_data = MLBUtilities.fetch_data( "www.baseball-reference.com", player_season_stats_detail_url, True) end = time.time() print "\t\tDEBUG: Fetched detailed season stats in {} seconds".format( end - start) start = time.time() self.player_season_stats_parser.parse(player_season_stats_detail_data) end = time.time() print "\t\tDEBUG: Parsed detailed season stats in {} seconds".format( end - start) # start = time.time() # self.player_manager.save(player) # end = time.time() # print "\t\tDEBUG: Saved detailed season stats in {} seconds".format(end-start) active_seasons = self.determine_active_seasons(player) type = "p" if player[MLBConstants.POSITION] == "Pitcher" else "b" ################# # Grab gamelogs ################# for season in active_seasons: if season not in ["2014"]: continue player_gamelog_url = "/players/gl.cgi?id={}&t={}&year={}".format( player_id, type, season) data = MLBUtilities.fetch_data("www.baseball-reference.com", player_gamelog_url, True) self.player_gamelog_parser.player_data = player self.player_gamelog_parser.type = MLBConstants.PITCHER_TYPE if player[ MLBConstants. POSITION] == "Pitcher" else MLBConstants.BATTER_TYPE self.player_gamelog_parser.season = season self.player_gamelog_parser.parse(data) # self.player_manager.save(player) ############### # Grab splits ############### active_seasons.append("Career") for season in active_seasons: if season not in ["2014", "Career"]: continue player_split_url = "/players/split.cgi?id={}&t={}&year={}".format( player_id, type, season) data = MLBUtilities.fetch_data("www.baseball-reference.com", player_split_url, True) self.player_splits_parser.player_data = player self.player_splits_parser.season = season self.player_splits_parser.parse(data, season) # self.player_manager.save(player) ##################### # Grab BvP (or PvB) ##################### if self.scrape_bvp: if self.player_season_stats_parser.player_data[ MLBConstants.POSITION] == "Pitcher": self.player_bvp_parser.type = MLBConstants.PITCHER_TYPE bvp_url = "/play-index/batter_vs_pitcher.cgi?pitcher={}".format( player_id) else: self.player_bvp_parser.type = MLBConstants.BATTER_TYPE bvp_url = "/play-index/batter_vs_pitcher.cgi?batter={}".format( player_id) data = self.fetch_data(bvp_url, True) self.player_bvp_parser.player_data = player self.player_bvp_parser.parse(data) start = time.time() self.player_manager.save(player) end = time.time() print "\t\tDEBUG: Saved player stats in {} seconds".format(end - start)