コード例 #1
0
    def process_players(self):
        """
		Performs fetching of player data.
		"""
        # alphabet = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
        # 			"p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"]
        alphabet = [
            "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
            "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"
        ]

        # Go through all players by letter
        for letter in alphabet:
            url = "/players/{}/".format(letter)
            data = MLBUtilities.fetch_data("www.baseball-reference.com", url,
                                           True)

            self.player_list_parser.parse(data, letter)

            # Go through odd player ids
            for player_id in self.player_list_parser.odd_player_ids:
                odd_url = "/players/{}/".format(player_id[0])
                self.process_player(player_id[1], odd_url)

            if self.all_players:
                # Go through retired players
                for player_id in self.player_list_parser.retired_player_ids:
                    self.process_player(player_id, url)

            # Go through active players
            for player_id in self.player_list_parser.active_player_ids:
                self.process_player(player_id, url, True)
コード例 #2
0
    def scrape_ballpark_factors(self):
        """
		Scrapes the ballpark factors from Rotogrinders.com.
		"""
        data = MLBUtilities.fetch_data("rotogrinders.com",
                                       "/pages/Ballpark_Factors-49556", True)
        ballpark_factors_parser = RotogrindersBallparkFactorsParser()
        ballpark_factors_parser.parse(data)
コード例 #3
0
    def process(self):
        while True:
            start = time.time()

            if self.source == "site":
                data = MLBUtilities.fetch_data("www.rotowire.com",
                                               "/baseball/daily_lineups.htm",
                                               True)
            else:
                data = open(self.source)
            rotoworld_scraper = RotoworldLineupScraper(
                sleep_time=self.sleep_time)
            rotoworld_scraper.parse(data)

            end = time.time()
            print "Scraped starting line-ups in {} minutes".format(
                (end - start) / 60.0)

            print "All done.  Sleeping for 10 minutes then re-evaluating..."
            time.sleep(60 * 10)
コード例 #4
0
    def scrape_yesterdays_lineups(self):
        """
		Expedite the process of collecting data by getting stats for yesterday's lineups.  There's likely
		little that's going to change in terms of who is in the lineup (their exact spot could change) so
		this will give us a good head start.
		"""

        # Reset the sleep on BBR scraper, in case what was passed in from the CLI is different.
        self.bbr_scraper.sleep_time = self.sleep_time

        # Reset the scrape_bvp flag, in case what was passed in from the CLI is different.
        self.bbr_scraper.scrape_bvp = self.scrape_bvp

        one_day = timedelta(days=1)
        yesterday = date.today() - one_day

        # print "Fetching data for yesterday's lineups ({})".format(yesterday)

        # players = self.lineup_manager.lineups_collection.find_one({"date": str(yesterday)})
        #
        # if players is None:
        # 	logging.info("Looks like we didn't run this yesterday. Going to look for today's lineups...")
        # 	return

        rotoworld_scraper = RotoworldLineupScraper(sleep_time=self.sleep_time)
        data = MLBUtilities.fetch_data("www.rotowire.com",
                                       "/baseball/daily_lineups.htm", True)
        teams, team_details = rotoworld_scraper.parse_teams_only(data)

        for team in teams:
            players = self.lineup_manager.find_team_last_game(team)
            for player in players:
                # for player in players["players"]:
                player_id = player[MLBConstants.PLAYER_ID]
                unescaped_player = player_id.replace("_", ".")

                # Skip player if they've already been processed.
                if self.lineup_manager.is_processed(player_id):
                    logging.info(
                        "Skipping {}, already processed.".format(player_id))
                    continue

                # Skip player if they didn't end up in yesterday's lineup.  This can happen
                # if we do prefetching on a player from a previous day and they have an off day.
                # if len(players["players"][player]) == 0:
                # 	logging.info("Skipping {}, wasn't in yesterday's lineup.".format(player))
                # 	continue

                # Skip player if their team isn't playing today.
                # if players["players"][player][MLBConstants.TEAM] not in teams:
                # 	logging.info("Skipping {}, {} are not playing today.".format(player, players["players"][player][MLBConstants.TEAM]))
                # 	continue

                start = time.time()

                # Ignore pitchers
                player_record = self.player_manager.players_collection.find_one(
                    {"player_id": unescaped_player})
                if player_record[MLBConstants.POSITION].lower(
                ) == MLBConstants.PITCHER_TYPE:
                    logging.info(
                        "{} is a pitcher.  Skipping...".format(player))
                    continue

                # Found a player.  Let's update their stuff.
                url = "/players/{}/".format(unescaped_player[0:1])
                self.bbr_scraper.process_player(unescaped_player,
                                                url,
                                                active=True)

                # Mark the player as processed (write to the lineup) once their stats have been updated.
                player_data = team_details[team]
                if len(player[MLBConstants.POSITION]) == 0:
                    self.lineup_manager.find_player_position_last_game(
                        player_id)
                else:
                    player_data[MLBConstants.POSITION] = player[
                        MLBConstants.POSITION]

                self.lineup_manager.add_player_to_lineup(
                    player[MLBConstants.PLAYER_ID], player_data)

                end = time.time()
                print "Processed {} in {} seconds".format(player, end - start)
コード例 #5
0
    def parse_standard_pitching(self, soup):
        """
		Parses data from the Stanard Pitching table.
		"""
        pitching_standard_entries = soup.find_all(
            id=self.pitching_standard_season_regex)

        for entry in pitching_standard_entries:
            tds = entry.find_all("td")

            i = 0
            season = ""
            for td in tds:
                if i == 0:
                    season = td.text

                    if MLBConstants.STANDARD_PITCHING not in self.player_data:
                        self.player_data[MLBConstants.STANDARD_PITCHING] = {}

                    self.player_data[
                        MLBConstants.STANDARD_PITCHING][season] = {}
                elif i == 1:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.AGE] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 2:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.TEAM] = td.text
                elif i == 3:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.LEAGUE] = td.text
                elif i == 4:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.WINS] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 5:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.LOSSES] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 6:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.
                        WIN_LOSS_PCT] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 7:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.ERA] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 8:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.GAMES] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 9:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.
                        GAMES_STARTED] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 10:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.
                        GAMES_FINISHED] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 11:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.
                        COMPLETE_GAMES] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 12:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.SHUT_OUTS] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 13:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.SAVES] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 14:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.
                        INNINGS_PITCHED] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 15:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.HITS] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 16:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.RUNS] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 17:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.EARNED_RUNS] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 18:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.HOME_RUNS] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 19:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.WALKS] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 20:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.
                        INTENTIONAL_WALKS] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 21:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.STRIKE_OUTS] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 22:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.
                        HIT_BY_PITCH] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 23:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.BALKS] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 24:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.
                        WILD_PITCHES] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 25:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.
                        BATTERS_FACED] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 26:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.ERA_PLUS] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 27:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.FIP] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 28:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.WHIP] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 29:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.
                        HITS_PER_9_INNINGS] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 30:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.
                        HOME_RUNS_PER_9_INNINGS] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 31:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.
                        WALKS_PER_9_INNINGS] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 32:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.
                        STRIKE_OUTS_PER_9_INNINGS] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 33:
                    self.player_data[MLBConstants.STANDARD_PITCHING][season][
                        MLBConstants.
                        STRIKE_OUT_TO_WALK_RATIO] = MLBUtilities.resolve_value(
                            td.text, "float")

                i += 1

            # self.player_data[MLBConstants.STANDARD_PITCHING][season][MLBConstants.FIP] = self.stat_calculator.calculate_fip(self.player_data[MLBConstants.STANDARD_PITCHING][season])
            self.player_data[MLBConstants.STANDARD_PITCHING][season][
                MLBConstants.WOBA] = self.stat_calculator.calculate_woba(
                    self.player_data[MLBConstants.STANDARD_PITCHING][season])
コード例 #6
0
    def parse_player_value_batting(self, soup):
        """
		Parses data from the Stanard Batting table.
		"""
        player_value_batting_entries = soup.find_all(
            id=self.player_value_batting_regex)

        for entry in player_value_batting_entries:
            tds = entry.find_all("td")

            i = 0
            season = ""
            for td in tds:
                if i == 0:
                    season = td.text

                    if MLBConstants.PLAYER_VALUE_BATTING not in self.player_data:
                        self.player_data[
                            MLBConstants.PLAYER_VALUE_BATTING] = {}

                    self.player_data[
                        MLBConstants.PLAYER_VALUE_BATTING][season] = {}
                elif i == 6:
                    self.player_data[MLBConstants.PLAYER_VALUE_BATTING][
                        season][MLBConstants.
                                RUNS_BATTING] = MLBUtilities.resolve_value(
                                    td.text, "int")
                elif i == 7:
                    self.player_data[MLBConstants.PLAYER_VALUE_BATTING][season][
                        MLBConstants.
                        RUNS_FROM_BASERUNNING] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 8:
                    self.player_data[MLBConstants.PLAYER_VALUE_BATTING][season][
                        MLBConstants.
                        RUNS_GROUNDED_INTO_DOUBLE_PLAY] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 9:
                    self.player_data[
                        MLBConstants.PLAYER_VALUE_BATTING][season][
                            MLBConstants.
                            RUNS_FROM_FIELDING] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 10:
                    self.player_data[MLBConstants.PLAYER_VALUE_BATTING][season][
                        MLBConstants.
                        RUNS_FROM_POSITION_SCARCITY] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 11:
                    self.player_data[MLBConstants.PLAYER_VALUE_BATTING][
                        season][MLBConstants.RAA] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 12:
                    self.player_data[MLBConstants.PLAYER_VALUE_BATTING][
                        season][MLBConstants.WAA] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 13:
                    self.player_data[MLBConstants.PLAYER_VALUE_BATTING][season][
                        MLBConstants.
                        RUNS_FROM_REPLACEMENT_LEVEL] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 14:
                    self.player_data[MLBConstants.PLAYER_VALUE_BATTING][
                        season][MLBConstants.RAR] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 15:
                    self.player_data[MLBConstants.PLAYER_VALUE_BATTING][
                        season][MLBConstants.WAR] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 16:
                    self.player_data[MLBConstants.PLAYER_VALUE_BATTING][season][
                        MLBConstants.
                        WIN_LOSS_PCT_WITH_AVG_TEAM] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 17:
                    self.player_data[MLBConstants.PLAYER_VALUE_BATTING][season][
                        MLBConstants.
                        WIN_LOSS_PCT_WITH_AVG_TEAM_SEASON] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 18:
                    self.player_data[
                        MLBConstants.PLAYER_VALUE_BATTING][season][
                            MLBConstants.OFF_WAR] = MLBUtilities.resolve_value(
                                td.text, "float")
                elif i == 19:
                    self.player_data[
                        MLBConstants.PLAYER_VALUE_BATTING][season][
                            MLBConstants.DEF_WAR] = MLBUtilities.resolve_value(
                                td.text, "float")
                elif i == 20:
                    self.player_data[
                        MLBConstants.PLAYER_VALUE_BATTING][season][
                            MLBConstants.OFF_RAR] = MLBUtilities.resolve_value(
                                td.text, "float")

                i += 1
コード例 #7
0
    def parse_standard_batting(self, soup):
        """
		Parses data from the Stanard Batting table.
		"""
        batting_standard_entries = soup.find_all(
            id=self.batting_standard_season_regex)

        for entry in batting_standard_entries:
            tds = entry.find_all("td")

            i = 0
            season = ""
            for td in tds:
                if i == 0:
                    season = td.text

                    if MLBConstants.STANDARD_BATTING not in self.player_data:
                        self.player_data[MLBConstants.STANDARD_BATTING] = {}

                    self.player_data[
                        MLBConstants.STANDARD_BATTING][season] = {}
                elif i == 1:
                    self.player_data[MLBConstants.STANDARD_BATTING][season][
                        MLBConstants.AGE] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 2:
                    try:
                        self.player_data[MLBConstants.STANDARD_BATTING][
                            season][MLBConstants.TEAM] = td.a.text
                    except AttributeError:
                        self.player_data[MLBConstants.STANDARD_BATTING][
                            season][MLBConstants.TEAM] = td.text
                elif i == 3:
                    try:
                        self.player_data[MLBConstants.STANDARD_BATTING][
                            season][MLBConstants.LEAGUE] = td.a.text
                    except AttributeError:
                        self.player_data[MLBConstants.STANDARD_BATTING][
                            season][MLBConstants.LEAGUE] = td.text
                elif i == 4:
                    self.player_data[MLBConstants.STANDARD_BATTING][season][
                        MLBConstants.
                        GAMES_PLAYED] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 5:
                    self.player_data[MLBConstants.STANDARD_BATTING][season][
                        MLBConstants.
                        PLATE_APPEARANCES] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 6:
                    self.player_data[MLBConstants.STANDARD_BATTING][season][
                        MLBConstants.AT_BATS] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 7:
                    self.player_data[MLBConstants.STANDARD_BATTING][season][
                        MLBConstants.RUNS] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 8:
                    self.player_data[MLBConstants.STANDARD_BATTING][season][
                        MLBConstants.HITS] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 9:
                    self.player_data[MLBConstants.STANDARD_BATTING][season][
                        MLBConstants.DOUBLES] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 10:
                    self.player_data[MLBConstants.STANDARD_BATTING][season][
                        MLBConstants.TRIPLES] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 11:
                    self.player_data[MLBConstants.STANDARD_BATTING][season][
                        MLBConstants.HOME_RUNS] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 12:
                    self.player_data[MLBConstants.STANDARD_BATTING][season][
                        MLBConstants.RBI] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 13:
                    self.player_data[MLBConstants.STANDARD_BATTING][season][
                        MLBConstants.
                        STOLEN_BASES] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 14:
                    self.player_data[MLBConstants.STANDARD_BATTING][season][
                        MLBConstants.
                        CAUGHT_STEALING] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 15:
                    self.player_data[MLBConstants.STANDARD_BATTING][season][
                        MLBConstants.WALKS] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 16:
                    self.player_data[MLBConstants.STANDARD_BATTING][season][
                        MLBConstants.STRIKE_OUTS] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 17:
                    self.player_data[MLBConstants.STANDARD_BATTING][season][
                        MLBConstants.
                        BATTING_AVERAGE] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 18:
                    self.player_data[MLBConstants.STANDARD_BATTING][season][
                        MLBConstants.
                        ON_BASE_PERCENTAGE] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 19:
                    self.player_data[MLBConstants.STANDARD_BATTING][season][
                        MLBConstants.
                        SLUGGING_PERCENTAGE] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 20:
                    self.player_data[MLBConstants.STANDARD_BATTING][season][
                        MLBConstants.OPS] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 21:
                    self.player_data[MLBConstants.STANDARD_BATTING][season][
                        MLBConstants.OPS_PLUS] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 22:
                    self.player_data[MLBConstants.STANDARD_BATTING][season][
                        MLBConstants.TOTAL_BASES] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 23:
                    self.player_data[MLBConstants.STANDARD_BATTING][season][
                        MLBConstants.
                        DOUBLE_PLAYS_GROUNDED_INTO] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 24:
                    self.player_data[MLBConstants.STANDARD_BATTING][season][
                        MLBConstants.
                        HIT_BY_PITCH] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 25:
                    self.player_data[MLBConstants.STANDARD_BATTING][season][
                        MLBConstants.
                        SACRIFICE_HITS] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 26:
                    self.player_data[MLBConstants.STANDARD_BATTING][season][
                        MLBConstants.
                        SACRIFICE_FLIES] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 27:
                    self.player_data[MLBConstants.STANDARD_BATTING][season][
                        MLBConstants.
                        INTENTIONAL_WALKS] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 28:
                    self.player_data[MLBConstants.STANDARD_BATTING][season][
                        MLBConstants.POSITION] = td.text

                i += 1

            # Compute additional stats
            self.player_data[MLBConstants.STANDARD_BATTING][season][
                MLBConstants.WOBA] = self.stat_calculator.calculate_woba(
                    self.player_data[MLBConstants.STANDARD_BATTING][season])
コード例 #8
0
    def parse_player_value_pitchers(self, soup):
        """
		Parses data in the Player Value--Pitchers table.
		"""
        pitching_value_entries = soup.find_all(
            id=self.player_value_pitching_regex)

        for entry in pitching_value_entries:
            tds = entry.find_all("td")

            i = 0
            season = ""
            for td in tds:
                if i == 0:
                    season = td.text

                    if MLBConstants.PLAYER_VALUE_PITCHING not in self.player_data:
                        self.player_data[
                            MLBConstants.PLAYER_VALUE_PITCHING] = {}

                    self.player_data[
                        MLBConstants.PLAYER_VALUE_PITCHING][season] = {}
                elif i == 8:
                    self.player_data[MLBConstants.PLAYER_VALUE_PITCHING][season][
                        MLBConstants.
                        RUNS_ALLOWED_PER_9_INNINGS] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 9:
                    self.player_data[MLBConstants.PLAYER_VALUE_PITCHING][season][
                        MLBConstants.
                        RUNS_ALLOWED_PER_9_INNINGS_OPP] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 10:
                    self.player_data[MLBConstants.PLAYER_VALUE_PITCHING][season][
                        MLBConstants.
                        RUNS_PER_9_INNINGS_IN_SUPPORT_FROM_DEFENSE] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 11:
                    self.player_data[MLBConstants.PLAYER_VALUE_PITCHING][season][
                        MLBConstants.
                        RUNS_PER_9_INNINGS_BY_ROLE] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 12:
                    self.player_data[MLBConstants.PLAYER_VALUE_PITCHING][
                        season][MLBConstants.
                                PARK_FACTORS] = MLBUtilities.resolve_value(
                                    td.text, "float")
                elif i == 13:
                    self.player_data[MLBConstants.PLAYER_VALUE_PITCHING][season][
                        MLBConstants.
                        RUNS_PER_9_INNINGS_FOR_AVG_PITCHER] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 14:
                    self.player_data[
                        MLBConstants.PLAYER_VALUE_PITCHING][season][
                            MLBConstants.
                            RUNS_BETTER_THAN_AVG] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 15:
                    self.player_data[MLBConstants.PLAYER_VALUE_PITCHING][
                        season][MLBConstants.
                                WINS_ABOVE_AVG] = MLBUtilities.resolve_value(
                                    td.text, "float")
                elif i == 16:
                    self.player_data[MLBConstants.PLAYER_VALUE_PITCHING][season][
                        MLBConstants.
                        GAME_ENTERING_LEVERAGE_INDEX] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 17:
                    self.player_data[MLBConstants.PLAYER_VALUE_PITCHING][season][
                        MLBConstants.
                        WINS_ABOVE_AVG_ADJUSTMENT] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 18:
                    self.player_data[MLBConstants.PLAYER_VALUE_PITCHING][season][
                        MLBConstants.
                        WINS_ABOVE_REPLACEMENT] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 19:
                    self.player_data[MLBConstants.PLAYER_VALUE_PITCHING][season][
                        MLBConstants.
                        RUNS_BETTER_THAN_REPLACEMENT] = MLBUtilities.resolve_value(
                            td.text, "int")
                elif i == 20:
                    self.player_data[MLBConstants.PLAYER_VALUE_PITCHING][season][
                        MLBConstants.
                        WIN_LOSS_PCT_WITH_AVG_TEAM] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 21:
                    self.player_data[MLBConstants.PLAYER_VALUE_PITCHING][season][
                        MLBConstants.
                        WIN_LOSS_PCT_WITH_AVG_TEAM_SEASON] = MLBUtilities.resolve_value(
                            td.text, "float")
                elif i == 22:
                    self.player_data[
                        MLBConstants.PLAYER_VALUE_PITCHING][season][
                            MLBConstants.SALARY] = MLBUtilities.resolve_value(
                                td.text.replace('$', '').replace(',', ''),
                                "int")

                i += 1
コード例 #9
0
    def parse_pitching_stats(self, soup):
        """
		Parse gamelog pitching stats.
		"""
        pitching_gamelog_entries = soup.find_all(
            id=self.pitching_gamelog_regex)

        for entry in pitching_gamelog_entries:
            tds = entry.find_all("td")

            i = 0
            game_number = 0
            for td in tds:
                if i == 2:
                    game_number = td.text

                    if MLBConstants.PLAYER_GAMELOG_PITCHING not in self.player_data:
                        self.player_data[
                            MLBConstants.PLAYER_GAMELOG_PITCHING] = {}

                    if self.season not in self.player_data[
                            MLBConstants.PLAYER_GAMELOG_PITCHING]:
                        self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                            self.season] = {}

                    if game_number not in self.player_data[
                            MLBConstants.PLAYER_GAMELOG_PITCHING][self.season]:
                        self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                            self.season][game_number] = {}
                elif i == 3:
                    if int(self.season) >= 1900:
                        self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                            self.season][game_number][
                                MLBConstants.DATE] = datetime.fromtimestamp(
                                    mktime(
                                        strptime(
                                            "{} {}".format(
                                                td.a.text.replace(
                                                    u'\xa0', u' '),
                                                self.season), "%b %d %Y")))
                elif i == 4:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.TEAM] = td.a.text
                elif i == 5:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.
                            HOME_GAME] = False if td.text == "@" else True
                elif i == 6:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.OPPONENT] = td.a.text
                elif i == 7:
                    m = self.result_regex.match(td.text)
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.RESULT] = m.group(1)
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.TEAM_SCORE] = int(m.group(2))
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.OPPONENT_SCORE] = int(m.group(3))
                elif i == 8:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.INNINGS] = td.text
                elif i == 9:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.DECISION] = td.text
                elif i == 10:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.
                            DAYS_REST] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 11:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.
                            INNINGS_PITCHED] = MLBUtilities.resolve_value(
                                td.span.text, "float")
                elif i == 12:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.HITS] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 13:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.RUNS] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 14:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.
                            EARNED_RUNS] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 15:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.WALKS] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 16:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.
                            STRIKE_OUTS] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 17:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.
                            HOME_RUNS] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 18:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.
                            HIT_BY_PITCH] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 19:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.ERA] = MLBUtilities.resolve_value(
                                td.text, "float")
                elif i == 20:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.
                            BATTERS_FACED] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 21:
                    try:
                        self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                            self.season][game_number][
                                MLBConstants.
                                NUM_PITCHES] = MLBUtilities.resolve_value(
                                    td.a.text, "int")
                    except:
                        try:
                            self.player_data[
                                MLBConstants.PLAYER_GAMELOG_PITCHING][
                                    self.season][game_number][
                                        MLBConstants.
                                        NUM_PITCHES] = MLBUtilities.resolve_value(
                                            td.text, "int")
                        except:
                            logging.info("Giving up on NUM_PITCHES")
                elif i == 22:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.STRIKES] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 23:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.
                            STRIKES_LOOKING] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 24:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.
                            STRIKES_SWINGING] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 25:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.
                            GROUND_BALLS] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 26:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.
                            FLY_BALLS] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 27:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.
                            LINE_DRIVES] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 28:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.POP_UPS] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 29:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.
                            UNKNOWN_BATTED_BALLS] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 30:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.PLAYER_GAME_SCORE] = td.text
                elif i == 31:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.
                            INHERITED_RUNNERS] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 32:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.
                            INHERITED_SCORE] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 33:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.
                            STOLEN_BASES] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 34:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.
                            CAUGHT_STEALING] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 35:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.
                            PICK_OFFS] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 36:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.AT_BATS] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 37:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.DOUBLES] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 38:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.TRIPLES] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 39:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.
                            INTENTIONAL_WALKS] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 40:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.
                            DOUBLE_PLAYS_GROUNDED_INTO] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 41:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.
                            SACRIFICE_FLIES] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 42:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.
                            REACHED_ON_ERROR] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 43:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.
                            AVERAGE_LEVERAGE_INDEX] = MLBUtilities.resolve_value(
                                td.text, "float")
                elif i == 44:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.
                            WIN_PROBABILITY_ADDED_BY_PITCHER] = MLBUtilities.resolve_value(
                                td.text, "float")
                elif i == 45:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.
                            BASE_OUT_RUNS_SAVED] = MLBUtilities.resolve_value(
                                td.text, "float")
                elif i == 46:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.ENTRY_SITUATION] = td.span.text.strip(
                            )
                elif i == 47:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_PITCHING][
                        self.season][game_number][
                            MLBConstants.EXIT_SITUATION] = td.text.strip()

                i += 1
コード例 #10
0
    def parse_batting_stats(self, soup):
        """
		Parse gamelog batting stats.
		"""
        battinging_gamelog_entries = soup.find_all(
            id=self.batting_gamelog_regex)

        for entry in battinging_gamelog_entries:
            tds = entry.find_all("td")

            i = 0
            game_number = 0
            for td in tds:
                if i == 2:
                    game_number = td.text

                    if MLBConstants.PLAYER_GAMELOG_BATTING not in self.player_data:
                        self.player_data[
                            MLBConstants.PLAYER_GAMELOG_BATTING] = {}

                    if self.season not in self.player_data[
                            MLBConstants.PLAYER_GAMELOG_BATTING]:
                        self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                            self.season] = {}

                    if game_number not in self.player_data[
                            MLBConstants.PLAYER_GAMELOG_BATTING][self.season]:
                        self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                            self.season][game_number] = {}
                elif i == 3:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.DATE] = datetime.fromtimestamp(
                                mktime(
                                    strptime(
                                        "{} {}".format(
                                            td.a.text.replace(u'\xa0', u' '),
                                            self.season), "%b %d %Y")))
                elif i == 4:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.TEAM] = td.a.text
                elif i == 5:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.
                            HOME_GAME] = False if td.text == "@" else True
                elif i == 6:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.OPPONENT] = td.a.text
                elif i == 7:
                    m = self.result_regex.match(td.text)
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.RESULT] = m.group(1)
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.TEAM_SCORE] = int(m.group(2))
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.OPPONENT_SCORE] = int(m.group(3))
                elif i == 8:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.INNINGS] = td.text
                elif i == 9:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.
                            PLATE_APPEARANCES] = MLBUtilities.resolve_value(
                                td.span.text, "int")
                elif i == 10:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.AT_BATS] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 11:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.RUNS] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 12:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.HITS] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 13:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.DOUBLES] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 14:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.TRIPLES] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 15:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.
                            HOME_RUNS] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 16:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.RBI] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 17:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.WALKS] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 18:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.
                            INTENTIONAL_WALKS] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 19:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.
                            STRIKE_OUTS] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 20:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.
                            HIT_BY_PITCH] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 21:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.
                            SACRIFICE_HITS] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 22:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.
                            SACRIFICE_FLIES] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 23:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.
                            REACHED_ON_ERROR] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 24:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.
                            DOUBLE_PLAYS_GROUNDED_INTO] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 25:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.
                            STOLEN_BASES] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 26:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.
                            CAUGHT_STEALING] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 27:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.
                            BATTING_AVERAGE] = MLBUtilities.resolve_value(
                                td.text, "float")
                elif i == 28:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.
                            ON_BASE_PERCENTAGE] = MLBUtilities.resolve_value(
                                td.text, "float")
                elif i == 29:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.
                            SLUGGING_PERCENTAGE] = MLBUtilities.resolve_value(
                                td.text, "float")
                elif i == 30:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.OPS] = MLBUtilities.resolve_value(
                                td.text, "float")
                elif i == 31:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.
                            BATTING_ORDER_POSITION] = MLBUtilities.resolve_value(
                                td.text, "int")
                elif i == 32:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.
                            AVERAGE_LEVERAGE_INDEX] = MLBUtilities.resolve_value(
                                td.text, "float")
                elif i == 33:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.
                            WIN_PROBABILITY_ADDED] = MLBUtilities.resolve_value(
                                td.text, "float")
                elif i == 34:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.
                            BASE_OUT_RUNS_ADDED] = MLBUtilities.resolve_value(
                                td.text, "float")
                elif i == 35:
                    self.player_data[MLBConstants.PLAYER_GAMELOG_BATTING][
                        self.season][game_number][
                            MLBConstants.POSITION] = td.text

                i += 1
コード例 #11
0
    def process(self):
        players = self.lineup_manager.lineups_collection.find_one(
            {"date": str(self.game_date)}, {"players": 1})

        batter_csv_contents = [[
            "Name", "Team", "Opponent", "Verified", "Position",
            "Batting Order Position", "wOBA", "wOBA vs Pitcher Type (LH/RH)",
            "OPS vs Pitcher Type (LH/RH)", "OPS",
            "Plate Appearances vs Pitcher", "Avg vs Pitcher",
            "Hits vs Pitcher", "HRs vs Pitcher", "Park Runs", "Park HRs",
            "Vegas Line", "O/U"
        ]]
        pitcher_csv_contents = [[
            "Name", "Team", "Opponent", "Verified", "LH/RH", "FIP", "wOBA",
            "wOBA vs RHB", "wOBA vs LHB", "BABIP vs RHB", "BABIP vs LHB",
            "K/9", "BB/9", "Park Runs", "Park HRs", "Vegas Line", "O/U"
        ]]

        ballpark_data = self.ballpark_collection.find_one(
            {"date": str(self.game_date)})

        for player in players["players"]:
            player_lineup_data = players["players"][player]
            if len(player_lineup_data) < 8:
                continue

            player_csv_data = []

            escaped_player_id = player.replace("_", ".")

            player_data = self.player_manager.players_collection.find_one(
                {MLBConstants.PLAYER_ID: escaped_player_id}, {
                    MLBConstants.POSITION: 1,
                    MLBConstants.NAME: 1
                })
            player_csv_data.append(player_data[MLBConstants.NAME].encode(
                'ascii', errors='ignore'))
            player_csv_data.append(player_lineup_data[MLBConstants.TEAM])
            player_csv_data.append(player_lineup_data[MLBConstants.OPPONENT])
            player_csv_data.append(
                str(player_lineup_data[MLBConstants.VERIFIED]))

            is_batter = player_data[MLBConstants.POSITION] != "Pitcher"

            ######################
            # Player is a batter
            ######################
            if is_batter:
                # Retrieve opposing pitcher data
                opposing_pitcher_data = self.player_manager.players_collection.find_one(
                    {
                        MLBConstants.NAME:
                        player_lineup_data["opposing_pitcher"]
                    }, {
                        MLBConstants.PLAYER_ID: 1,
                        MLBConstants.HANDEDNESS_THROWING: 1
                    })
                batter_data = self.player_manager.players_collection.find_one(
                    {MLBConstants.PLAYER_ID: escaped_player_id}, {
                        MLBConstants.POSITION:
                        1,
                        MLBConstants.HANDEDNESS_BATTING:
                        1,
                        "{}.{}.{}".format(MLBConstants.STANDARD_BATTING, self.season, MLBConstants.WOBA):
                        1,
                        "{}.{}.{}.{}".format(MLBConstants.BATTER_SPLITS, self.season, MLBConstants.SPLITS_VS_RHP, MLBConstants.WOBA):
                        1,
                        "{}.{}.{}.{}".format(MLBConstants.BATTER_SPLITS, self.season, MLBConstants.SPLITS_VS_LHP, MLBConstants.WOBA):
                        1,
                        "{}.{}.{}".format(MLBConstants.STANDARD_BATTING, self.season, MLBConstants.OPS):
                        1,
                        "{}.{}.{}.{}".format(MLBConstants.BATTER_SPLITS, self.season, MLBConstants.SPLITS_VS_RHP, MLBConstants.OPS):
                        1,
                        "{}.{}.{}.{}".format(MLBConstants.BATTER_SPLITS, self.season, MLBConstants.SPLITS_VS_LHP, MLBConstants.OPS):
                        1,
                        MLBConstants.BATTER_VS_PITCHER:
                        1
                    })

                if player_lineup_data["home"]:
                    ballpark_hits = ballpark_data[MLBConstants.BPF_ALL][
                        MLBUtilities.map_rg_team_to_rotowire(
                            player_lineup_data[MLBConstants.TEAM])][
                                MLBConstants.HITS]
                    ballpark_home_runs = ballpark_data[MLBConstants.BPF_ALL][
                        MLBUtilities.map_rg_team_to_rotowire(
                            player_lineup_data[MLBConstants.TEAM])][
                                MLBConstants.HOME_RUNS]
                else:
                    ballpark_hits = ballpark_data[MLBConstants.BPF_ALL][
                        MLBUtilities.map_rg_team_to_rotowire(
                            player_lineup_data[MLBConstants.OPPONENT])][
                                MLBConstants.HITS]
                    ballpark_home_runs = ballpark_data[MLBConstants.BPF_ALL][
                        MLBUtilities.map_rg_team_to_rotowire(
                            player_lineup_data[MLBConstants.OPPONENT])][
                                MLBConstants.HOME_RUNS]

                player_csv_data.append(
                    player_lineup_data[MLBConstants.POSITION].replace(
                        ",", "/"))
                player_csv_data.append(
                    str(player_lineup_data[
                        MLBConstants.BATTING_ORDER_POSITION]))

                if self.season in batter_data[MLBConstants.STANDARD_BATTING]:
                    player_csv_data.append(
                        str(batter_data[MLBConstants.STANDARD_BATTING][
                            self.season][MLBConstants.WOBA]))
                else:
                    player_csv_data.append("N/A")

                if self.season in batter_data[MLBConstants.BATTER_SPLITS]:
                    if opposing_pitcher_data is None or MLBConstants.HANDEDNESS_THROWING not in opposing_pitcher_data\
                      or (MLBConstants.SPLITS_VS_LHP not in batter_data[MLBConstants.BATTER_SPLITS][self.season] and opposing_pitcher_data[MLBConstants.HANDEDNESS_THROWING] == "Left")\
                      or (MLBConstants.SPLITS_VS_RHP not in batter_data[MLBConstants.BATTER_SPLITS][self.season] and opposing_pitcher_data[MLBConstants.HANDEDNESS_THROWING] == "Right"):
                        player_csv_data.append("N/A")
                        player_csv_data.append("N/A")
                    elif opposing_pitcher_data[
                            MLBConstants.HANDEDNESS_THROWING] == "Right":
                        player_csv_data.append(
                            str(batter_data[MLBConstants.BATTER_SPLITS][
                                self.season][MLBConstants.SPLITS_VS_RHP][
                                    MLBConstants.WOBA]))
                        player_csv_data.append(
                            str(batter_data[MLBConstants.BATTER_SPLITS][
                                self.season][MLBConstants.SPLITS_VS_RHP][
                                    MLBConstants.OPS]))
                    else:
                        player_csv_data.append(
                            str(batter_data[MLBConstants.BATTER_SPLITS][
                                self.season][MLBConstants.SPLITS_VS_LHP][
                                    MLBConstants.WOBA]))
                        player_csv_data.append(
                            str(batter_data[MLBConstants.BATTER_SPLITS][
                                self.season][MLBConstants.SPLITS_VS_LHP][
                                    MLBConstants.OPS]))
                else:
                    player_csv_data.append("N/A")
                    player_csv_data.append("N/A")

                if self.season in batter_data[MLBConstants.STANDARD_BATTING]:
                    player_csv_data.append(
                        str(batter_data[MLBConstants.STANDARD_BATTING][
                            self.season][MLBConstants.OPS]))
                else:
                    player_csv_data.append("N/A")

                # BvP
                if opposing_pitcher_data is not None and opposing_pitcher_data[
                        MLBConstants.PLAYER_ID] in batter_data[
                            MLBConstants.BATTER_VS_PITCHER]:
                    player_csv_data.append(
                        str(batter_data[MLBConstants.BATTER_VS_PITCHER][
                            opposing_pitcher_data[MLBConstants.PLAYER_ID]][
                                MLBConstants.PLATE_APPEARANCES]))
                    player_csv_data.append(
                        str(batter_data[MLBConstants.BATTER_VS_PITCHER][
                            opposing_pitcher_data[MLBConstants.PLAYER_ID]][
                                MLBConstants.BATTING_AVERAGE]))
                    player_csv_data.append(
                        str(batter_data[MLBConstants.BATTER_VS_PITCHER][
                            opposing_pitcher_data[MLBConstants.PLAYER_ID]][
                                MLBConstants.HITS]))
                    player_csv_data.append(
                        str(batter_data[MLBConstants.BATTER_VS_PITCHER][
                            opposing_pitcher_data[MLBConstants.PLAYER_ID]][
                                MLBConstants.HOME_RUNS]))
                else:
                    player_csv_data.append("N/A")
                    player_csv_data.append("N/A")
                    player_csv_data.append("N/A")
                    player_csv_data.append("N/A")

                # Park factors
                player_csv_data.append(str(ballpark_hits))
                player_csv_data.append(str(ballpark_home_runs))

                ######################
                # Vegas line and O/U
                ######################
                if MLBConstants.VEGAS_LINE in player_lineup_data:
                    player_csv_data.append(
                        player_lineup_data[MLBConstants.VEGAS_LINE])
                else:
                    player_csv_data.append("N/A")

                if MLBConstants.OVER_UNDER in player_lineup_data:
                    player_csv_data.append(
                        player_lineup_data[MLBConstants.OVER_UNDER])
                else:
                    player_csv_data.append("N/A")

                batter_csv_contents.append(player_csv_data)

            #######################
            # Player is a pitcher
            #######################
            else:
                pitcher_data = self.player_manager.players_collection.find_one(
                    {MLBConstants.PLAYER_ID: escaped_player_id}, {
                        MLBConstants.POSITION:
                        1,
                        MLBConstants.HANDEDNESS_THROWING:
                        1,
                        "{}.{}.{}".format(MLBConstants.STANDARD_PITCHING, self.season, MLBConstants.FIP):
                        1,
                        "{}.{}.vs RHB.{}".format(MLBConstants.PITCHER_SPLITS, self.season, MLBConstants.FIP):
                        1,
                        "{}.{}.vs LHB.{}".format(MLBConstants.PITCHER_SPLITS, self.season, MLBConstants.FIP):
                        1,
                        "{}.{}.{} Totals.{}".format(
                            MLBConstants.PITCHER_SPLITS, self.season, self.season, MLBConstants.WOBA):
                        1,
                        "{}.{}.vs RHB.{}".format(MLBConstants.PITCHER_SPLITS, self.season, MLBConstants.WOBA):
                        1,
                        "{}.{}.vs LHB.{}".format(MLBConstants.PITCHER_SPLITS, self.season, MLBConstants.WOBA):
                        1,
                        "{}.{}.vs RHB.{}".format(MLBConstants.PITCHER_SPLITS, self.season, MLBConstants.BABIP):
                        1,
                        "{}.{}.vs LHB.{}".format(MLBConstants.PITCHER_SPLITS, self.season, MLBConstants.BABIP):
                        1,
                        "{}.{}.{}".format(
                            MLBConstants.STANDARD_PITCHING, self.season, MLBConstants.STRIKE_OUTS_PER_9_INNINGS):
                        1,
                        "{}.{}.{}".format(MLBConstants.STANDARD_PITCHING, self.season, MLBConstants.WALKS_PER_9_INNINGS):
                        1,
                        MLBConstants.BATTER_VS_PITCHER:
                        1
                    })

                # Do a quick check to make sure the pitcher has stats for the current season.
                # If the current season isn't available then bail on this pitcher.
                if self.season not in pitcher_data[
                        MLBConstants.
                        STANDARD_PITCHING] or self.season not in pitcher_data[
                            MLBConstants.PITCHER_SPLITS]:
                    print "Could not find season {} for either Standard Pitching or Splits for {}.  Not sure how that would happen".format(
                        self.season, escaped_player_id)
                    continue

                if player_lineup_data["home"]:
                    ballpark_hits = ballpark_data[MLBConstants.BPF_ALL][
                        MLBUtilities.map_rg_team_to_rotowire(
                            player_lineup_data[MLBConstants.TEAM])][
                                MLBConstants.HITS]
                    ballpark_home_runs = ballpark_data[MLBConstants.BPF_ALL][
                        MLBUtilities.map_rg_team_to_rotowire(
                            player_lineup_data[MLBConstants.TEAM])][
                                MLBConstants.HOME_RUNS]
                else:
                    ballpark_hits = ballpark_data[MLBConstants.BPF_ALL][
                        MLBUtilities.map_rg_team_to_rotowire(
                            player_lineup_data[MLBConstants.OPPONENT])][
                                MLBConstants.HITS]
                    ballpark_home_runs = ballpark_data[MLBConstants.BPF_ALL][
                        MLBUtilities.map_rg_team_to_rotowire(
                            player_lineup_data[MLBConstants.OPPONENT])][
                                MLBConstants.HOME_RUNS]

                player_csv_data.append(
                    pitcher_data[MLBConstants.HANDEDNESS_THROWING])

                if self.season in pitcher_data[MLBConstants.STANDARD_PITCHING]:
                    player_csv_data.append(
                        str(pitcher_data[MLBConstants.STANDARD_PITCHING][
                            self.season][MLBConstants.FIP]))
                else:
                    player_csv_data.append("N/A")

                if self.season in pitcher_data[
                        MLBConstants.
                        PITCHER_SPLITS] and MLBConstants.WOBA in pitcher_data[
                            MLBConstants.PITCHER_SPLITS][self.season][
                                "{} Totals".format(self.season)]:
                    player_csv_data.append(
                        str(pitcher_data[MLBConstants.PITCHER_SPLITS][
                            self.season]["{} Totals".format(
                                self.season)][MLBConstants.WOBA]))
                else:
                    player_csv_data.append("N/A")

                if MLBConstants.PITCHER_SPLITS in pitcher_data:
                    if MLBConstants.SPLITS_VS_RHB in pitcher_data[
                            MLBConstants.PITCHER_SPLITS][self.season]:
                        player_csv_data.append(
                            str(pitcher_data[MLBConstants.PITCHER_SPLITS][
                                self.season][MLBConstants.SPLITS_VS_RHB][
                                    MLBConstants.WOBA]))
                    else:
                        player_csv_data.append("N/A")
                    if MLBConstants.SPLITS_VS_LHB in pitcher_data[
                            MLBConstants.PITCHER_SPLITS][self.season]:
                        player_csv_data.append(
                            str(pitcher_data[MLBConstants.PITCHER_SPLITS][
                                self.season][MLBConstants.SPLITS_VS_LHB][
                                    MLBConstants.WOBA]))
                    else:
                        player_csv_data.append("N/A")
                    if MLBConstants.SPLITS_VS_RHB in pitcher_data[
                            MLBConstants.PITCHER_SPLITS][self.season]:
                        player_csv_data.append(
                            str(pitcher_data[MLBConstants.PITCHER_SPLITS][
                                self.season][MLBConstants.SPLITS_VS_RHB][
                                    MLBConstants.BABIP]))
                    else:
                        player_csv_data.append("N/A")
                    if MLBConstants.SPLITS_VS_LHB in pitcher_data[
                            MLBConstants.PITCHER_SPLITS][self.season]:
                        player_csv_data.append(
                            str(pitcher_data[MLBConstants.PITCHER_SPLITS][
                                self.season][MLBConstants.SPLITS_VS_LHB][
                                    MLBConstants.BABIP]))
                    else:
                        player_csv_data.append("N/A")

                if self.season in pitcher_data[MLBConstants.STANDARD_PITCHING]:
                    player_csv_data.append(
                        str(pitcher_data[MLBConstants.STANDARD_PITCHING][
                            self.season][
                                MLBConstants.STRIKE_OUTS_PER_9_INNINGS]))
                    player_csv_data.append(
                        str(pitcher_data[MLBConstants.STANDARD_PITCHING][
                            self.season][MLBConstants.WALKS_PER_9_INNINGS]))
                else:
                    player_csv_data.append("N/A")
                    player_csv_data.append("N/A")

                player_csv_data.append(str(ballpark_hits))
                player_csv_data.append(str(ballpark_home_runs))

                ######################
                # Vegas line and O/U
                ######################
                if MLBConstants.VEGAS_LINE in player_lineup_data:
                    player_csv_data.append(
                        player_lineup_data[MLBConstants.VEGAS_LINE])
                else:
                    player_csv_data.append("N/A")

                if MLBConstants.OVER_UNDER in player_lineup_data:
                    player_csv_data.append(
                        player_lineup_data[MLBConstants.OVER_UNDER])
                else:
                    player_csv_data.append("N/A")

                # Look for opponents
                opponents = []

                # for p in players["players"]:
                # 	pld = players["players"][p]
                # 	if len(pld) == 0 or pld["opposing_pitcher"] != player_data[MLBConstants.NAME]:
                # 		continue
                #
                # 	opponent = self.player_manager.players_collection.find_one({MLBConstants.PLAYER_ID: p},
                #                                                               {MLBConstants.POSITION: 1,
                #                                                                "{}.{}.{}".format(MLBConstants.STANDARD_BATTING, self.season, MLBConstants.WOBA): 1,
                #                                                                "{}.{}.vs RH Starter.{}".format(MLBConstants.BATTER_SPLITS, self.season, MLBConstants.WOBA): 1,
                #                                                                "{}.{}.vs LH Starter.{}".format(MLBConstants.BATTER_SPLITS, self.season, MLBConstants.WOBA): 1,
                #                                                                MLBConstants.BATTER_VS_PITCHER: 1})
                # 	opponents.append(opponent)
                #
                # for o in opponents:
                # 	pass

                pitcher_csv_contents.append(player_csv_data)

            print player_csv_data

        # Write results out to file
        pitcher_output = open(
            "../projections/pitchers_{}.csv".format(str(date.today())), "w")
        batter_output = open(
            "../projections/batters_{}.csv".format(str(date.today())), "w")

        for line in pitcher_csv_contents:
            pitcher_output.write(",".join(line) + "\n")
        for line in batter_csv_contents:
            batter_output.write(",".join(line) + "\n")
コード例 #12
0
    def parse_batter_splits(self, soup, season):
        split_divs = soup.find_all("div", attrs={"class": "stw"})

        for split_div in split_divs:
            label = split_div.find("div", attrs={
                "class": "table_heading"
            }).a.h4.text
            trs = split_div.find_all("tr")

            for tr in trs:
                tds = tr.find_all("td")
                if len(tds) == 0:
                    continue

                i = 0
                split_type = ""
                for td in tds:
                    if i == 0:
                        if td.text == "":
                            continue

                        if MLBConstants.BATTER_SPLITS not in self.player_data:
                            self.player_data[MLBConstants.BATTER_SPLITS] = {}

                        if season not in self.player_data[
                                MLBConstants.BATTER_SPLITS]:
                            self.player_data[
                                MLBConstants.BATTER_SPLITS][season] = {}

                        split_type = td.text.replace(".", "_")
                        if split_type not in self.player_data[
                                MLBConstants.BATTER_SPLITS][season]:
                            self.player_data[MLBConstants.BATTER_SPLITS][
                                season][split_type] = {}
                    elif i == 1:
                        self.player_data[
                            MLBConstants.BATTER_SPLITS][season][split_type][
                                MLBConstants.
                                GAMES_PLAYED] = MLBUtilities.resolve_value(
                                    td.text, "int")
                    elif i == 2:
                        self.player_data[
                            MLBConstants.BATTER_SPLITS][season][split_type][
                                MLBConstants.
                                GAMES_STARTED] = MLBUtilities.resolve_value(
                                    td.text, "int")
                    elif i == 3:
                        self.player_data[
                            MLBConstants.BATTER_SPLITS][season][split_type][
                                MLBConstants.
                                PLATE_APPEARANCES] = MLBUtilities.resolve_value(
                                    td.text, "int")
                    elif i == 4:
                        self.player_data[MLBConstants.BATTER_SPLITS][season][
                            split_type][MLBConstants.
                                        AT_BATS] = MLBUtilities.resolve_value(
                                            td.text, "int")
                    elif i == 5:
                        self.player_data[MLBConstants.BATTER_SPLITS][season][
                            split_type][MLBConstants.
                                        RUNS] = MLBUtilities.resolve_value(
                                            td.text, "int")
                    elif i == 6:
                        self.player_data[MLBConstants.BATTER_SPLITS][season][
                            split_type][MLBConstants.
                                        HITS] = MLBUtilities.resolve_value(
                                            td.text, "int")
                    elif i == 7:
                        self.player_data[MLBConstants.BATTER_SPLITS][season][
                            split_type][MLBConstants.
                                        DOUBLES] = MLBUtilities.resolve_value(
                                            td.text, "int")
                    elif i == 8:
                        self.player_data[MLBConstants.BATTER_SPLITS][season][
                            split_type][MLBConstants.
                                        TRIPLES] = MLBUtilities.resolve_value(
                                            td.text, "int")
                    elif i == 9:
                        self.player_data[
                            MLBConstants.BATTER_SPLITS][season][split_type][
                                MLBConstants.
                                HOME_RUNS] = MLBUtilities.resolve_value(
                                    td.text, "int")
                    elif i == 10:
                        self.player_data[
                            MLBConstants.BATTER_SPLITS][season][split_type][
                                MLBConstants.RBI] = MLBUtilities.resolve_value(
                                    td.text, "int")
                    elif i == 11:
                        self.player_data[
                            MLBConstants.BATTER_SPLITS][season][split_type][
                                MLBConstants.
                                STOLEN_BASES] = MLBUtilities.resolve_value(
                                    td.text, "int")
                    elif i == 12:
                        self.player_data[
                            MLBConstants.BATTER_SPLITS][season][split_type][
                                MLBConstants.
                                CAUGHT_STEALING] = MLBUtilities.resolve_value(
                                    td.text, "int")
                    elif i == 13:
                        self.player_data[MLBConstants.BATTER_SPLITS][season][
                            split_type][MLBConstants.
                                        WALKS] = MLBUtilities.resolve_value(
                                            td.text, "int")
                    elif i == 14:
                        self.player_data[
                            MLBConstants.BATTER_SPLITS][season][split_type][
                                MLBConstants.
                                STRIKE_OUTS] = MLBUtilities.resolve_value(
                                    td.text, "int")
                    elif i == 15:
                        self.player_data[
                            MLBConstants.BATTER_SPLITS][season][split_type][
                                MLBConstants.
                                BATTING_AVERAGE] = MLBUtilities.resolve_value(
                                    td.text, "float")
                    elif i == 16:
                        self.player_data[
                            MLBConstants.BATTER_SPLITS][season][split_type][
                                MLBConstants.
                                ON_BASE_PERCENTAGE] = MLBUtilities.resolve_value(
                                    td.text, "float")
                    elif i == 17:
                        self.player_data[
                            MLBConstants.BATTER_SPLITS][season][split_type][
                                MLBConstants.
                                SLUGGING_PERCENTAGE] = MLBUtilities.resolve_value(
                                    td.text, "float")
                    elif i == 18:
                        self.player_data[
                            MLBConstants.BATTER_SPLITS][season][split_type][
                                MLBConstants.OPS] = MLBUtilities.resolve_value(
                                    td.text, "float")
                    elif i == 19:
                        self.player_data[
                            MLBConstants.BATTER_SPLITS][season][split_type][
                                MLBConstants.
                                TOTAL_BASES] = MLBUtilities.resolve_value(
                                    td.text, "int")
                    elif i == 20:
                        self.player_data[
                            MLBConstants.BATTER_SPLITS][season][split_type][
                                MLBConstants.
                                DOUBLE_PLAYS_GROUNDED_INTO] = MLBUtilities.resolve_value(
                                    td.text, "int")
                    elif i == 21:
                        self.player_data[
                            MLBConstants.BATTER_SPLITS][season][split_type][
                                MLBConstants.
                                HIT_BY_PITCH] = MLBUtilities.resolve_value(
                                    td.text, "int")
                    elif i == 22:
                        self.player_data[
                            MLBConstants.BATTER_SPLITS][season][split_type][
                                MLBConstants.
                                SACRIFICE_HITS] = MLBUtilities.resolve_value(
                                    td.text, "int")
                    elif i == 23:
                        self.player_data[
                            MLBConstants.BATTER_SPLITS][season][split_type][
                                MLBConstants.
                                SACRIFICE_FLIES] = MLBUtilities.resolve_value(
                                    td.text, "int")
                    elif i == 24:
                        self.player_data[
                            MLBConstants.BATTER_SPLITS][season][split_type][
                                MLBConstants.
                                INTENTIONAL_WALKS] = MLBUtilities.resolve_value(
                                    td.text, "int")
                    elif i == 25:
                        self.player_data[
                            MLBConstants.BATTER_SPLITS][season][split_type][
                                MLBConstants.
                                REACHED_ON_ERROR] = MLBUtilities.resolve_value(
                                    td.text, "int")
                    elif i == 26:
                        self.player_data[MLBConstants.BATTER_SPLITS][season][
                            split_type][MLBConstants.
                                        BABIP] = MLBUtilities.resolve_value(
                                            td.text, "float")
                    elif i == 27:
                        self.player_data[
                            MLBConstants.BATTER_SPLITS][season][split_type][
                                MLBConstants.
                                T_OPS_PLUS] = MLBUtilities.resolve_value(
                                    td.text, "int")
                    elif i == 28:
                        self.player_data[
                            MLBConstants.BATTER_SPLITS][season][split_type][
                                MLBConstants.
                                S_OPS_PLUS] = MLBUtilities.resolve_value(
                                    td.text, "int")

                    i += 1

                self.player_data[
                    MLBConstants.BATTER_SPLITS][season][split_type][
                        MLBConstants.
                        WOBA] = self.stat_calculator.calculate_woba(
                            self.player_data[MLBConstants.BATTER_SPLITS]
                            [season][split_type])
コード例 #13
0
    def parse_pitcher_splits(self, soup, season):
        split_divs = soup.find_all("div", attrs={"class": "stw"})

        for split_div in split_divs:
            table = split_div.find("table")

            pitcher_extras = False
            if table.attrs["id"].find("_extra") > -1:
                pitcher_extras = True

            tbody = split_div.find("tbody")

            if (tbody is None):
                print ""

            trs = tbody.find_all("tr")

            for tr in trs:
                tds = tr.find_all("td")
                if len(tds) == 0:
                    continue

                i = 0
                split_type = ""
                for td in tds:
                    # There are (sometimes) two table for each split type for pitchers - normal and extras.  The id for
                    # each extras table ends in "_extras", so when that is detected, we set a flag indicating that we're
                    # processing an extras table.
                    if not pitcher_extras:
                        if i == 0:
                            if MLBConstants.PITCHER_SPLITS not in self.player_data:
                                self.player_data[
                                    MLBConstants.PITCHER_SPLITS] = {}

                            if season not in self.player_data[
                                    MLBConstants.PITCHER_SPLITS]:
                                self.player_data[
                                    MLBConstants.PITCHER_SPLITS][season] = {}

                            split_type = td.text.replace(".", "_")
                            if split_type not in self.player_data[
                                    MLBConstants.PITCHER_SPLITS][season]:
                                self.player_data[MLBConstants.PITCHER_SPLITS][
                                    season][split_type] = {}
                        elif i == 1 and not pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    GAMES_PLAYED] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 2 and not pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    PLATE_APPEARANCES] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 3 and not pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    AT_BATS] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 4 and not pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    RUNS] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 5 and not pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    HITS] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 6 and not pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    DOUBLES] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 7 and not pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    TRIPLES] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 8 and not pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    HOME_RUNS] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 9 and not pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    STOLEN_BASES] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 10 and not pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    CAUGHT_STEALING] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 11 and not pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    WALKS] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 12 and not pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    STRIKE_OUTS] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 13 and not pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    STRIKE_OUT_TO_WALK_RATIO] = MLBUtilities.resolve_value(
                                        td.text, "float")
                        elif i == 14 and not pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    BATTING_AVERAGE] = MLBUtilities.resolve_value(
                                        td.text, "float")
                        elif i == 15 and not pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    ON_BASE_PERCENTAGE] = MLBUtilities.resolve_value(
                                        td.text, "float")
                        elif i == 16 and not pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    SLUGGING_PERCENTAGE] = MLBUtilities.resolve_value(
                                        td.text, "float")
                        elif i == 17 and not pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    OPS] = MLBUtilities.resolve_value(
                                        td.text, "float")
                        elif i == 18 and not pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    TOTAL_BASES] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 19 and not pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    DOUBLE_PLAYS_GROUNDED_INTO] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 20 and not pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    HIT_BY_PITCH] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 21 and not pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    SACRIFICE_HITS] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 22 and not pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    SACRIFICE_FLIES] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 23 and not pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    INTENTIONAL_WALKS] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 24 and not pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    REACHED_ON_ERROR] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 25 and not pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    BABIP] = MLBUtilities.resolve_value(
                                        td.text, "float")
                        elif i == 26 and not pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    T_OPS_PLUS] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 27 and not pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    S_OPS_PLUS] = MLBUtilities.resolve_value(
                                        td.text, "int")
                    else:
                        if i == 1 and pitcher_extras:
                            if MLBConstants.PITCHER_SPLITS not in self.player_data:
                                self.player_data[
                                    MLBConstants.PITCHER_SPLITS] = {}

                            if season not in self.player_data[
                                    MLBConstants.PITCHER_SPLITS]:
                                self.player_data[
                                    MLBConstants.PITCHER_SPLITS][season] = {}

                            split_type = td.text.replace(".", "_")
                            if td.text not in self.player_data[
                                    MLBConstants.PITCHER_SPLITS][season]:
                                self.player_data[MLBConstants.PITCHER_SPLITS][
                                    season][split_type] = {}
                        elif i == 2 and pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    WINS] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 3 and pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    LOSSES] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 4 and pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    WIN_LOSS_PCT] = MLBUtilities.resolve_value(
                                        td.text, "float")
                        elif i == 5 and pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    ERA] = MLBUtilities.resolve_value(
                                        td.text, "float")
                        # elif i == 6 and pitcher_extras:
                        # 	self.player_data[MLBConstants.PITCHER_SPLITS][season][split_type][
                        # 		MLBConstants.GAMES_PLAYED] = MLBUtilities.resolve_value(td.text, "int")
                        elif i == 7 and pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    GAMES_STARTED] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 8 and pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    GAMES_FINISHED] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 9 and pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    COMPLETE_GAMES] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 10 and pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    SHUT_OUTS] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 11 and pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    SAVES] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 12 and pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    INNINGS_PITCHED] = MLBUtilities.resolve_value(
                                        td.text, "float")
                        # elif i == 12 and pitcher_extras:
                        # 	self.player_data[MLBConstants.PITCHER_SPLITS][season][split_type][
                        # 		MLBConstants.HITS] = MLBUtilities.resolve_value(td.text, "int")
                        elif i == 15 and pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    EARNED_RUNS] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 21 and pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    BALKS] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 22 and pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    WILD_PITCHES] = MLBUtilities.resolve_value(
                                        td.text, "int")
                        elif i == 24 and pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    WHIP] = MLBUtilities.resolve_value(
                                        td.text, "float")
                        elif i == 25 and pitcher_extras:
                            self.player_data[MLBConstants.PITCHER_SPLITS][
                                season][split_type][
                                    MLBConstants.
                                    STRIKE_OUTS_PER_9_INNINGS] = MLBUtilities.resolve_value(
                                        td.text, "float")

                    i += 1

                self.player_data[
                    MLBConstants.PITCHER_SPLITS][season][split_type][
                        MLBConstants.FIP] = self.stat_calculator.calculate_fip(
                            self.player_data[MLBConstants.PITCHER_SPLITS]
                            [season][split_type])
                self.player_data[
                    MLBConstants.PITCHER_SPLITS][season][split_type][
                        MLBConstants.
                        WOBA] = self.stat_calculator.calculate_woba(
                            self.player_data[MLBConstants.PITCHER_SPLITS]
                            [season][split_type])
コード例 #14
0
	def parse(self, data):
		"""
		Parse data from the batter vs pitcher page.
		"""
		soup = BeautifulSoup(data)

		if MLBConstants.BATTER_VS_PITCHER not in self.player_data:
			self.player_data[MLBConstants.BATTER_VS_PITCHER] = {}

		table = soup.find("table", attrs={"id": "ajax_result_table"})
		trs = table.find_all("tr")
		for tr in trs:
			tds = tr.find_all("td")
			i = 0
			opponent_id = ""
			for td in tds:
				if i == 0:
					if self.type == MLBConstants.BATTER_TYPE:
						m = self.pitcher_id_regex.match(td.a.attrs["href"])
					else:
						m = self.batter_id_regex.match(td.a.attrs["href"])

					opponent_id = m.group(1).replace(".", "_")

					if opponent_id not in self.player_data[MLBConstants.BATTER_VS_PITCHER]:
						self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id] = {}

					self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][MLBConstants.NAME] = td.a.text
				elif i == 1:
					self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][
						MLBConstants.PLATE_APPEARANCES] = MLBUtilities.resolve_value(td.text, "int")
				elif i == 2:
					self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][
						MLBConstants.AT_BATS] = MLBUtilities.resolve_value(td.text, "int")
				elif i == 3:
					self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][
						MLBConstants.HITS] = MLBUtilities.resolve_value(td.text, "int")
				elif i == 4:
					self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][
						MLBConstants.DOUBLES] = MLBUtilities.resolve_value(td.text, "int")
				elif i == 5:
					self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][
						MLBConstants.TRIPLES] = MLBUtilities.resolve_value(td.text, "int")
				elif i == 6:
					self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][
						MLBConstants.HOME_RUNS] = MLBUtilities.resolve_value(td.text, "int")
				elif i == 7:
					self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][
						MLBConstants.RBI] = MLBUtilities.resolve_value(td.text, "int")
				elif i == 8:
					self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][
						MLBConstants.WALKS] = MLBUtilities.resolve_value(td.text, "int")
				elif i == 9:
					self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][
						MLBConstants.STRIKE_OUTS] = MLBUtilities.resolve_value(td.text, "int")
				elif i == 10:
					self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][
						MLBConstants.BATTING_AVERAGE] = MLBUtilities.resolve_value(td.text, "float")
				elif i == 11:
					self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][
						MLBConstants.ON_BASE_PERCENTAGE] = MLBUtilities.resolve_value(td.text, "float")
				elif i == 12:
					self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][
						MLBConstants.SLUGGING_PERCENTAGE] = MLBUtilities.resolve_value(td.text, "float")
				elif i == 13:
					self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][
						MLBConstants.OPS] = MLBUtilities.resolve_value(td.text, "float")
				elif i == 14:
					self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][
						MLBConstants.SACRIFICE_HITS] = MLBUtilities.resolve_value(td.text, "int")
				elif i == 15:
					self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][
						MLBConstants.SACRIFICE_FLIES] = MLBUtilities.resolve_value(td.text, "int")
				elif i == 16:
					self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][
						MLBConstants.INTENTIONAL_WALKS] = MLBUtilities.resolve_value(td.text, "int")
				elif i == 17:
					self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][
						MLBConstants.HIT_BY_PITCH] = MLBUtilities.resolve_value(td.text, "int")
				elif i == 18:
					self.player_data[MLBConstants.BATTER_VS_PITCHER][opponent_id][
						MLBConstants.DOUBLE_PLAYS_GROUNDED_INTO] = MLBUtilities.resolve_value(td.text, "int")

				i += 1
コード例 #15
0
    def process_player(self, player_id, url, active=False):
        start = time.time()
        player = self.player_manager.read({MLBConstants.PLAYER_ID: player_id})
        end = time.time()
        print "\t\tDEBUG: Read from player_manager in {} seconds".format(end -
                                                                         start)

        if player is None:
            player = {MLBConstants.PLAYER_ID: player_id}
        # We can skip this player
        elif MLBConstants.BATTER_VS_PITCHER in player and not active:
            logging.info(
                "Looks like all info for {} has already been scraped.  Moving on..."
                .format(player_id))
            return

        player_url = "{}{}.shtml".format(url, player_id)

        player_page_data = MLBUtilities.fetch_data(
            "www.baseball-reference.com", player_url, True)

        self.player_season_stats_parser.player_data = player
        self.player_season_stats_parser.parse(player_page_data)

        #############################################
        # Should we get the pitcher or batter page?
        #############################################
        if self.player_season_stats_parser.player_data[
                MLBConstants.POSITION] == "Pitcher":
            player_season_stats_detail_url = "{}{}-pitch.shtml".format(
                url, player_id)
        else:
            player_season_stats_detail_url = "{}{}-bat.shtml".format(
                url, player_id)

        ###############################################
        # Fetch detailed season stats for the player.
        ###############################################
        start = time.time()
        player_season_stats_detail_data = MLBUtilities.fetch_data(
            "www.baseball-reference.com", player_season_stats_detail_url, True)
        end = time.time()
        print "\t\tDEBUG: Fetched detailed season stats in {} seconds".format(
            end - start)

        start = time.time()
        self.player_season_stats_parser.parse(player_season_stats_detail_data)
        end = time.time()
        print "\t\tDEBUG: Parsed detailed season stats in {} seconds".format(
            end - start)

        # start = time.time()
        # self.player_manager.save(player)
        # end = time.time()
        # print "\t\tDEBUG: Saved detailed season stats in {} seconds".format(end-start)

        active_seasons = self.determine_active_seasons(player)
        type = "p" if player[MLBConstants.POSITION] == "Pitcher" else "b"

        #################
        # Grab gamelogs
        #################
        for season in active_seasons:
            if season not in ["2014"]:
                continue
            player_gamelog_url = "/players/gl.cgi?id={}&t={}&year={}".format(
                player_id, type, season)
            data = MLBUtilities.fetch_data("www.baseball-reference.com",
                                           player_gamelog_url, True)
            self.player_gamelog_parser.player_data = player
            self.player_gamelog_parser.type = MLBConstants.PITCHER_TYPE if player[
                MLBConstants.
                POSITION] == "Pitcher" else MLBConstants.BATTER_TYPE
            self.player_gamelog_parser.season = season
            self.player_gamelog_parser.parse(data)
        # self.player_manager.save(player)

        ###############
        # Grab splits
        ###############
        active_seasons.append("Career")
        for season in active_seasons:
            if season not in ["2014", "Career"]:
                continue
            player_split_url = "/players/split.cgi?id={}&t={}&year={}".format(
                player_id, type, season)
            data = MLBUtilities.fetch_data("www.baseball-reference.com",
                                           player_split_url, True)
            self.player_splits_parser.player_data = player
            self.player_splits_parser.season = season
            self.player_splits_parser.parse(data, season)
        # self.player_manager.save(player)

        #####################
        # Grab BvP (or PvB)
        #####################
        if self.scrape_bvp:
            if self.player_season_stats_parser.player_data[
                    MLBConstants.POSITION] == "Pitcher":
                self.player_bvp_parser.type = MLBConstants.PITCHER_TYPE
                bvp_url = "/play-index/batter_vs_pitcher.cgi?pitcher={}".format(
                    player_id)
            else:
                self.player_bvp_parser.type = MLBConstants.BATTER_TYPE
                bvp_url = "/play-index/batter_vs_pitcher.cgi?batter={}".format(
                    player_id)

            data = self.fetch_data(bvp_url, True)
            self.player_bvp_parser.player_data = player
            self.player_bvp_parser.parse(data)

        start = time.time()
        self.player_manager.save(player)
        end = time.time()
        print "\t\tDEBUG: Saved player stats in {} seconds".format(end - start)