def get_team_info(team_name, year_of_interest=None, team_soup=None):
        URL = "/about/parkadjust.shtml"
        try:
            team_abbreviation = BaseballReference.team_dict.inv[team_name]
        except KeyError:
            raise BaseballReference.InvalidTeamName(team_name)

        if year_of_interest is None:
            year_of_interest = date.today().year

        if team_soup is None:
            team_soup = BeautifulSoupHelper.get_soup_from_url(BaseballReference.BASE_URL + "/teams/" +
                                                          team_abbreviation + "/" + str(year_of_interest) + ".shtml")

        sub_nodes = team_soup.find("a", {"href": URL}).parent.parent.findAll("strong")
        for sub_node in sub_nodes:
            for content in sub_node.contents:
                if "multi-year:" in content:
                    factor_string = sub_node.next_sibling.split(",")

                    hitter_factor = int(factor_string[0].split("-")[1].strip().split(" ")[0])
                    pitcher_factor = int(factor_string[1].split("-")[1].strip().split(" ")[0])

                    return hitter_factor, pitcher_factor

        return None
Ejemplo n.º 2
0
    def get_pitcher_soup(year=None):
        if year is None:
            year = date.today().year

        pitcher_year_url = BaseballReference.BASE_URL + "/leagues/MLB/" + str(
            year) + "-standard-pitching.shtml"
        return BeautifulSoupHelper.get_soup_from_url(pitcher_year_url)
    def get_vs_pitcher_stats(batter_id, pitcher_id, soup=None):
        if soup is None:
            url = BaseballReference.BASE_URL + "/play-index/batter_vs_pitcher.cgi?batter=" + str(batter_id)
            print url
            soup = BeautifulSoupHelper.get_soup_from_url(url)

        return BaseballReference.get_vs_table_row_dict(soup, batter_id, pitcher_id)
Ejemplo n.º 4
0
    def get_team_info(team_name, year_of_interest=None, team_soup=None):
        URL = "/about/parkadjust.shtml"
        try:
            team_abbreviation = BaseballReference.team_dict.inv[team_name]
        except KeyError:
            raise BaseballReference.InvalidTeamName(team_name)

        if year_of_interest is None:
            year_of_interest = date.today().year

        if team_soup is None:
            team_soup = BeautifulSoupHelper.get_soup_from_url(
                BaseballReference.BASE_URL + "/teams/" + team_abbreviation +
                "/" + str(year_of_interest) + ".shtml")

        sub_nodes = team_soup.find("a", {
            "href": URL
        }).parent.parent.findAll("strong")
        for sub_node in sub_nodes:
            for content in sub_node.contents:
                if "multi-year:" in content:
                    factor_string = sub_node.next_sibling.split(",")

                    hitter_factor = int(
                        factor_string[0].split("-")[1].strip().split(" ")[0])
                    pitcher_factor = int(
                        factor_string[1].split("-")[1].strip().split(" ")[0])

                    return hitter_factor, pitcher_factor

        return None
Ejemplo n.º 5
0
def get_name_from_id(rotowire_id):
    """ Use the acquired RotoWire ID to resolve the name in case it is too long for the
    daily lineups page.
    :param rotowire_id: unique ID for a player in RotoWire
    :return: str representation of the name of the player
    """
    player_soup = BeautifulSoupHelper.get_soup_from_url(PLAYER_PAGE_BASE_URL + str(rotowire_id))
    return player_soup.find("div", {"class": PLAYER_PAGE_LABEL}).find("h1").text.strip()
Ejemplo n.º 6
0
    def get_recent_pitcher_stats(baseball_reference_id, soup=None):
        if soup is None:
            soup = BeautifulSoupHelper.get_soup_from_url(
                BaseballReference.BASE_URL + "/players/split.cgi?id=" +
                str(baseball_reference_id) + "&year=Career&t=p")

        return BaseballReference.get_table_row_dict(soup, "total_extra",
                                                    "Last 14 days", "Split")
Ejemplo n.º 7
0
    def get_career_hitting_stats(baseball_reference_id, soup=None):
        if soup is None:
            soup = BeautifulSoupHelper.get_soup_from_url(
                BaseballReference.BASE_URL + "/players/split.cgi?id=" +
                str(baseball_reference_id) + "&year=Career&t=b")

        return BaseballReference.get_table_row_dict(soup, "total",
                                                    "Career Totals", "Split")
 def get_yesterdays_hitting_game_log(baseball_reference_id, soup=None):
     yesterdays_date = date.today() - timedelta(days=1)
     if soup is None:
         soup = BeautifulSoupHelper.get_soup_from_url(BaseballReference.BASE_URL + "/players/gl.cgi?id=" +
                                                      str(baseball_reference_id) + "&t=b&year=" + str(yesterdays_date.year))
     return BaseballReference.get_table_row_dict(soup, "batting_gamelogs",
                                                 BaseballReference.date_abbreviations[yesterdays_date.month] + " " + str(yesterdays_date.day),
                                                 "Date")
Ejemplo n.º 9
0
    def get_vs_pitcher_stats(batter_id, pitcher_id, soup=None):
        if soup is None:
            url = BaseballReference.BASE_URL + "/play-index/batter_vs_pitcher.cgi?batter=" + str(
                batter_id)
            print url
            soup = BeautifulSoupHelper.get_soup_from_url(url)

        return BaseballReference.get_vs_table_row_dict(soup, batter_id,
                                                       pitcher_id)
    def get_season_pitcher_stats(baseball_reference_id, year=None, soup=None):
        if year is None:
            year = date.today().year
        if soup is None:
            url = BaseballReference.BASE_URL + "/players/split.cgi?id=" + str(baseball_reference_id) + "&year=" + \
                  str(year) + "&t=p"
            print url
            soup = BeautifulSoupHelper.get_soup_from_url(url)

        return BaseballReference.get_table_row_dict(soup, "total_extra", str(year) + " Totals", "Split")
Ejemplo n.º 11
0
def get_name_from_id(rotowire_id):
    """ Use the acquired RotoWire ID to resolve the name in case it is too long for the
    daily lineups page.
    :param rotowire_id: unique ID for a player in RotoWire
    :return: str representation of the name of the player
    """
    player_soup = BeautifulSoupHelper.get_soup_from_url(PLAYER_PAGE_BASE_URL +
                                                        str(rotowire_id))
    return player_soup.find("div", {
        "class": PLAYER_PAGE_LABEL
    }).find("h1").text.strip()
Ejemplo n.º 12
0
    def get_season_pitcher_stats(baseball_reference_id, year=None, soup=None):
        if year is None:
            year = date.today().year
        if soup is None:
            url = BaseballReference.BASE_URL + "/players/split.cgi?id=" + str(baseball_reference_id) + "&year=" + \
                  str(year) + "&t=p"
            print url
            soup = BeautifulSoupHelper.get_soup_from_url(url)

        return BaseballReference.get_table_row_dict(soup, "total_extra",
                                                    str(year) + " Totals",
                                                    "Split")
 def get_yesterdays_hitting_game_log(baseball_reference_id, soup=None):
     yesterdays_date = date.today() - timedelta(days=1)
     if soup is None:
         soup = BeautifulSoupHelper.get_soup_from_url(BaseballReference.BASE_URL + "/players/gl.cgi?id=" +
                                                      str(baseball_reference_id) + "&t=b&year=" + str(yesterdays_date.year))
     try:
         return BaseballReference.get_table_row_dict(soup, "batting_gamelogs",
                                                     BaseballReference.date_abbreviations[yesterdays_date.month] + " " + str(yesterdays_date.day), "Date")
     # TODO: just try again for now, explore BeautifulSoup built-in options for this
     except BaseballReference.TableNotFound as e:
         print e
         return BaseballReference.get_table_row_dict(soup, "batting_gamelogs",
                                                     BaseballReference.date_abbreviations[yesterdays_date.month] + " " + str(yesterdays_date.day), "Date")
    def get_vs_hand_hitting_stats(baseball_reference_id, hand_value, soup=None):
        if soup is None:
            soup = BeautifulSoupHelper.get_soup_from_url(BaseballReference.BASE_URL + "/players/split.cgi?id=" +
                                                         str(baseball_reference_id) + "&year=Career&t=b")

        if hand_value is BaseballReference.HandEnum.LHP:
            hand = "vs LHP"
        elif hand_value is BaseballReference.HandEnum.RHP:
            hand = "vs RHP"
        else:
            print "Invalid hand enum."
            return None

        return BaseballReference.get_table_row_dict(soup, "plato", hand, "Split")
Ejemplo n.º 15
0
 def get_pitching_game_log(baseball_reference_id,
                           soup=None,
                           game_date=None):
     if game_date is None:
         game_date = date.today() - timedelta(days=1)
     if soup is None:
         soup = BeautifulSoupHelper.get_soup_from_url(
             BaseballReference.BASE_URL + "/players/gl.cgi?id=" +
             str(baseball_reference_id) + "&t=p&year=" +
             str(game_date.year))
     return BaseballReference.get_table_row_dict(
         soup, "pitching_gamelogs",
         BaseballReference.date_abbreviations[game_date.month] + " " +
         str(game_date.day), "Date")
Ejemplo n.º 16
0
    def get_vs_hand_hitting_stats(baseball_reference_id,
                                  hand_value,
                                  soup=None):
        if soup is None:
            soup = BeautifulSoupHelper.get_soup_from_url(
                BaseballReference.BASE_URL + "/players/split.cgi?id=" +
                str(baseball_reference_id) + "&year=Career&t=b")

        if hand_value is BaseballReference.HandEnum.LHP:
            hand = "vs LHP"
        elif hand_value is BaseballReference.HandEnum.RHP:
            hand = "vs RHP"
        else:
            print "Invalid hand enum."
            return None

        return BaseballReference.get_table_row_dict(soup, "plato", hand,
                                                    "Split")
Ejemplo n.º 17
0
 def get_yesterdays_hitting_game_log(baseball_reference_id, soup=None):
     yesterdays_date = date.today() - timedelta(days=1)
     if soup is None:
         soup = BeautifulSoupHelper.get_soup_from_url(
             BaseballReference.BASE_URL + "/players/gl.cgi?id=" +
             str(baseball_reference_id) + "&t=b&year=" +
             str(yesterdays_date.year))
     try:
         return BaseballReference.get_table_row_dict(
             soup, "batting_gamelogs",
             BaseballReference.date_abbreviations[yesterdays_date.month] +
             " " + str(yesterdays_date.day), "Date")
     # TODO: just try again for now, explore BeautifulSoup built-in options for this
     except BaseballReference.TableNotFound as e:
         print e
         return BaseballReference.get_table_row_dict(
             soup, "batting_gamelogs",
             BaseballReference.date_abbreviations[yesterdays_date.month] +
             " " + str(yesterdays_date.day), "Date")
    def get_recent_pitcher_stats(baseball_reference_id, soup=None):
        if soup is None:
            soup = BeautifulSoupHelper.get_soup_from_url(BaseballReference.BASE_URL + "/players/split.cgi?id=" +
                                                         str(baseball_reference_id) + "&year=Career&t=p")

        return BaseballReference.get_table_row_dict(soup, "total_extra", "Last 14 days", "Split")
Ejemplo n.º 19
0
 def get_hitter_page_career_soup(baseball_reference_id):
     return BeautifulSoupHelper.get_soup_from_url(
         BaseballReference.BASE_URL + "/players/split.cgi?id=" +
         str(baseball_reference_id) + "&year=Career&t=b")
 def get_pitcher_page_career_soup(baseball_reference_id):
     url = BaseballReference.BASE_URL + "/players/split.cgi?id=" + str(baseball_reference_id) + "&year=Career&t=p"
     print url
     return BeautifulSoupHelper.get_soup_from_url(url)
 def get_hitter_page_career_soup(baseball_reference_id):
     return BeautifulSoupHelper.get_soup_from_url(BaseballReference.BASE_URL + "/players/split.cgi?id=" +
                                                  str(baseball_reference_id) + "&year=Career&t=b")
Ejemplo n.º 22
0
 def get_pitcher_page_career_soup(baseball_reference_id):
     url = BaseballReference.BASE_URL + "/players/split.cgi?id=" + str(
         baseball_reference_id) + "&year=Career&t=p"
     print url
     return BeautifulSoupHelper.get_soup_from_url(url)
Ejemplo n.º 23
0
def get_game_lineups(database_session):
    """ Mine the RotoWire daily lineups page and get the players' name, team, and RotoWire ID
    Note: longer names are abbreviated by RotoWire and need to be resolved by another source
    :return: list of Game objects representing the lineups for the day
    """
    #TODO: add feature to look if it's going to rain
    lineup_soup = BeautifulSoupHelper.get_soup_from_url(DAILY_LINEUPS_URL)
    header_nodes = lineup_soup.findAll("div", {"class": TEAM_REGION_LABEL})
    games = list()
    for header_node in header_nodes:
        game_node = header_node.parent
        home_team_lineup = list()
        away_team_lineup = list()
        away_team_abbreviation = game_node.find("div", {"class": AWAY_TEAM_REGION_LABEL}).text.split()[0]
        home_team_abbreviation = game_node.find("div", {"class": HOME_TEAM_REGION_LABEL}).text.split()[0]
        game_main_soup = game_node.find("div", {"class": LINEUPS_CLASS_LABEL})

        for away_player in game_main_soup.findAll("div", {"class": AWAY_TEAM_PLAYER_LABEL}):
            away_team_lineup.append(get_hitter(away_player, away_team_abbreviation, database_session))
        for home_player in game_main_soup.findAll("div", {"class": HOME_TEAM_PLAYER_LABEL}):
            home_team_lineup.append(get_hitter(home_player, home_team_abbreviation, database_session))

        try:
            pitchers = game_node.find("div", PITCHERS_REGION_LABEL).findAll("div")
            away_team_pitcher = get_pitcher(pitchers[0], away_team_abbreviation, database_session)
            home_team_pitcher = get_pitcher(pitchers[1], home_team_abbreviation, database_session)
        # No pitchers present on page
        except AttributeError:
            print "Game between %s and %s is not valid." % (away_team_abbreviation, home_team_abbreviation)
            continue

        current_game = Game(away_team_lineup, away_team_pitcher, home_team_lineup, home_team_pitcher)

        # TODO: since they only release the ump data ~1 hour before the game, we'll have to make this robust later
        try:
            game_time = game_node.find("div", {"class": TIME_REGION_LABEL}).find("a").text.replace("ET", "").strip()
            game_time = datetime.strptime(game_time, '%I:%M %p').strftime("%H:%M")
            game_entry = GameEntry(date.today(), game_time, home_team_abbreviation, away_team_abbreviation)
            game_entry.wind_speed = get_wind_speed(game_node)
            game_entry.ump_ks_per_game = get_ump_ks_per_game(game_node)
            game_entry.ump_runs_per_game = get_ump_runs_per_game(game_node)
            game_entry.park_hitter_score, game_entry.park_pitcher_score = BaseballReference.get_team_info(team_dict[home_team_abbreviation])

            database_session.add(game_entry)
            database_session.commit()
        except IntegrityError:
            database_session.rollback()
            print "Warning: attempt to duplicate game entry: %s %s %s %s" % (str(home_team_abbreviation),
                                                                             str(away_team_abbreviation),
                                                                             str(game_entry.game_date),
                                                                             str(game_entry.game_time))
        except Exception as e:
            print e
            pass

        if current_game.is_valid():
            games.append(current_game)
        else:
            print "Game between %s and %s is not valid." % (away_team_abbreviation, home_team_abbreviation)

    return games
    def get_pitcher_soup(year=None):
        if year is None:
            year = date.today().year

        pitcher_year_url = BaseballReference.BASE_URL + "/leagues/MLB/" + str(year) + "-standard-pitching.shtml"
        return BeautifulSoupHelper.get_soup_from_url(pitcher_year_url)
Ejemplo n.º 25
0
def get_game_lineups(database_session):
    """ Mine the RotoWire daily lineups page and get the players' name, team, and RotoWire ID
    Note: longer names are abbreviated by RotoWire and need to be resolved by another source
    :return: list of Game objects representing the lineups for the day
    """
    #TODO: add feature to look if it's going to rain
    lineup_soup = BeautifulSoupHelper.get_soup_from_url(DAILY_LINEUPS_URL)
    header_nodes = lineup_soup.findAll("div", {"class": TEAM_REGION_LABEL})
    games = list()
    for header_node in header_nodes:
        game_node = header_node.parent
        home_team_lineup = list()
        away_team_lineup = list()
        away_team_abbreviation = game_node.find("div", {
            "class": AWAY_TEAM_REGION_LABEL
        }).text.split()[0]
        home_team_abbreviation = game_node.find("div", {
            "class": HOME_TEAM_REGION_LABEL
        }).text.split()[0]
        game_main_soup = game_node.find("div", {"class": LINEUPS_CLASS_LABEL})

        for away_player in game_main_soup.findAll(
                "div", {"class": AWAY_TEAM_PLAYER_LABEL}):
            away_team_lineup.append(
                get_hitter(away_player, away_team_abbreviation,
                           database_session))
        for home_player in game_main_soup.findAll(
                "div", {"class": HOME_TEAM_PLAYER_LABEL}):
            home_team_lineup.append(
                get_hitter(home_player, home_team_abbreviation,
                           database_session))

        try:
            pitchers = game_node.find("div",
                                      PITCHERS_REGION_LABEL).findAll("div")
            away_team_pitcher = get_pitcher(pitchers[0],
                                            away_team_abbreviation,
                                            database_session)
            home_team_pitcher = get_pitcher(pitchers[1],
                                            home_team_abbreviation,
                                            database_session)
        # No pitchers present on page
        except AttributeError:
            print "Game between %s and %s is not valid." % (
                away_team_abbreviation, home_team_abbreviation)
            continue

        current_game = Game(away_team_lineup, away_team_pitcher,
                            home_team_lineup, home_team_pitcher)

        # TODO: since they only release the ump data ~1 hour before the game, we'll have to make this robust later
        try:
            game_time = game_node.find("div", {
                "class": TIME_REGION_LABEL
            }).find("a").text.replace("ET", "").strip()
            game_time = datetime.strptime(game_time,
                                          '%I:%M %p').strftime("%H:%M")
            game_entry = GameEntry(date.today(), game_time,
                                   home_team_abbreviation,
                                   away_team_abbreviation)
            game_entry.wind_speed = get_wind_speed(game_node)
            game_entry.ump_ks_per_game = get_ump_ks_per_game(game_node)
            game_entry.ump_runs_per_game = get_ump_runs_per_game(game_node)
            game_entry.park_hitter_score, game_entry.park_pitcher_score = BaseballReference.get_team_info(
                team_dict[home_team_abbreviation])

            database_session.add(game_entry)
            database_session.commit()
        except IntegrityError:
            database_session.rollback()
            print "Warning: attempt to duplicate game entry: %s %s %s %s" % (
                str(home_team_abbreviation), str(away_team_abbreviation),
                str(game_entry.game_date), str(game_entry.game_time))
        except Exception as e:
            print e
            pass

        if current_game.is_valid():
            games.append(current_game)
        else:
            print "Game between %s and %s is not valid." % (
                away_team_abbreviation, home_team_abbreviation)

    return games
    def get_career_hitting_stats(baseball_reference_id, soup=None):
        if soup is None:
            soup = BeautifulSoupHelper.get_soup_from_url(BaseballReference.BASE_URL + "/players/split.cgi?id=" +
                                                         str(baseball_reference_id) + "&year=Career&t=b")

        return BaseballReference.get_table_row_dict(soup, "total", "Career Totals", "Split")