def get_table_row_dict(soup, table_name, table_row_label, table_column_label): results_table = soup.find("table", {"id": table_name}) if results_table is None: raise BaseballReference.TableNotFound(table_name) table_header_list = results_table.find("thead").findAll("th") table_header_list = [x.text for x in table_header_list] stat_rows = results_table.find("tbody").findAll("tr") for stat_row in stat_rows: # Create a dictionary of the stat attributes stat_dict = dict() stat_entries = stat_row.findAll("td") for i in range(0, len(table_header_list)): if stat_entries[i].text == "": stat_dict[table_header_list[i]] = 0 else: stat_dict[table_header_list[i]] = stat_entries[i].text try: if stat_dict[table_column_label] == table_row_label: return stat_dict # We have reached the end of the year-by-year stats, just end except ValueError: break #TODO: add a TableRowNotFound exception raise BaseballReference.TableNotFound(table_name)
def update_pitcher_id(pitcher, database_session): pitcher_soup = BaseballReference.get_pitcher_soup() name = pitcher.name.split() first_name = name[0] last_name = " ".join(str(x) for x in name[1:len(name)]) db_query = database_session.query(PitcherEntry).get(pitcher.rotowire_id) # Found unique entry, check to make sure the team matches the database if db_query is not None: if db_query.team == pitcher.team: return # Update the player's team in the database else: db_query.team = pitcher.team database_session.commit() # Found no entries, create a bare bones entry with just the name and id else: try: baseball_reference_id = BaseballReference.get_pitcher_id( first_name + " " + last_name, BaseballReference.team_dict.inv[team_dict[pitcher.team]], date.today().year, pitcher_soup) except BaseballReference.NameNotFound: print "Skipping committing this pitcher '%s %s'." % (first_name, last_name) return create_new_pitcher_entry(pitcher, baseball_reference_id, database_session)
def update_pitcher_id(pitcher, database_session): pitcher_soup = BaseballReference.get_pitcher_soup() name = pitcher.name.split() first_name = name[0] last_name = " ".join(str(x) for x in name[1:len(name)]) db_query = database_session.query(PitcherEntry).get(pitcher.rotowire_id) # Found unique entry, check to make sure the team matches the database if db_query is not None: if db_query.team == pitcher.team: return # Update the player's team in the database else: db_query.team = pitcher.team database_session.commit() # Found no entries, create a bare bones entry with just the name and id else: try: baseball_reference_id = BaseballReference.get_pitcher_id(first_name + " " + last_name, BaseballReference.team_dict.inv[team_dict[pitcher.team]], date.today().year, pitcher_soup) except BaseballReference.NameNotFound: print "Skipping committing this pitcher '%s %s'." % (first_name, last_name) return create_new_pitcher_entry(pitcher, baseball_reference_id, database_session)
def get_game_lineups(database_session): """ Mine the RotoWire daily lineups page and get the players' name, team, and RotoWire ID Note: longer names are abbreviated by RotoWire and need to be resolved by another source :return: list of Game objects representing the lineups for the day """ #TODO: add feature to look if it's going to rain lineup_soup = BeautifulSoupHelper.get_soup_from_url(DAILY_LINEUPS_URL) header_nodes = lineup_soup.findAll("div", {"class": TEAM_REGION_LABEL}) games = list() for header_node in header_nodes: game_node = header_node.parent home_team_lineup = list() away_team_lineup = list() away_team_abbreviation = game_node.find("div", { "class": AWAY_TEAM_REGION_LABEL }).text.split()[0] home_team_abbreviation = game_node.find("div", { "class": HOME_TEAM_REGION_LABEL }).text.split()[0] game_main_soup = game_node.find("div", {"class": LINEUPS_CLASS_LABEL}) for away_player in game_main_soup.findAll( "div", {"class": AWAY_TEAM_PLAYER_LABEL}): away_team_lineup.append( get_hitter(away_player, away_team_abbreviation, database_session)) for home_player in game_main_soup.findAll( "div", {"class": HOME_TEAM_PLAYER_LABEL}): home_team_lineup.append( get_hitter(home_player, home_team_abbreviation, database_session)) try: pitchers = game_node.find("div", PITCHERS_REGION_LABEL).findAll("div") away_team_pitcher = get_pitcher(pitchers[0], away_team_abbreviation, database_session) home_team_pitcher = get_pitcher(pitchers[1], home_team_abbreviation, database_session) # No pitchers present on page except AttributeError: print "Game between %s and %s is not valid." % ( away_team_abbreviation, home_team_abbreviation) continue current_game = Game(away_team_lineup, away_team_pitcher, home_team_lineup, home_team_pitcher) # TODO: since they only release the ump data ~1 hour before the game, we'll have to make this robust later try: game_time = game_node.find("div", { "class": TIME_REGION_LABEL }).find("a").text.replace("ET", "").strip() game_time = datetime.strptime(game_time, '%I:%M %p').strftime("%H:%M") game_entry = GameEntry(date.today(), game_time, home_team_abbreviation, away_team_abbreviation) game_entry.wind_speed = get_wind_speed(game_node) game_entry.ump_ks_per_game = get_ump_ks_per_game(game_node) game_entry.ump_runs_per_game = get_ump_runs_per_game(game_node) game_entry.park_hitter_score, game_entry.park_pitcher_score = BaseballReference.get_team_info( team_dict[home_team_abbreviation]) database_session.add(game_entry) database_session.commit() except IntegrityError: database_session.rollback() print "Warning: attempt to duplicate game entry: %s %s %s %s" % ( str(home_team_abbreviation), str(away_team_abbreviation), str(game_entry.game_date), str(game_entry.game_time)) except Exception as e: print e pass if current_game.is_valid(): games.append(current_game) else: print "Game between %s and %s is not valid." % ( away_team_abbreviation, home_team_abbreviation) return games
def mine_yesterdays_results(database_session): # Query the database for all hitter game entries from yesterday hitter_entries = database_session.query(PregameHitterGameEntry).filter( PregameHitterGameEntry.game_date == (date.today() - timedelta(days=1))) for pregame_hitter_entry in hitter_entries: hitter_entry = database_session.query(HitterEntry).get( pregame_hitter_entry.rotowire_id) try: stat_row_dict = BaseballReference.get_yesterdays_hitting_game_log( hitter_entry.baseball_reference_id) except BaseballReference.TableRowNotFound: print "Player %s %s did not play yesterday. Deleting pregame entry %s %s" % ( hitter_entry.first_name, hitter_entry.last_name, pregame_hitter_entry.game_date, pregame_hitter_entry.opposing_team) database_session.delete(pregame_hitter_entry) database_session.commit() continue postgame_hitter_entry = PostgameHitterGameEntry() postgame_hitter_entry.rotowire_id = hitter_entry.rotowire_id postgame_hitter_entry.game_date = pregame_hitter_entry.game_date postgame_hitter_entry.game_h = int(stat_row_dict["H"]) postgame_hitter_entry.game_bb = int(stat_row_dict["BB"]) postgame_hitter_entry.game_hbp = int(stat_row_dict["HBP"]) postgame_hitter_entry.game_r = int(stat_row_dict["R"]) postgame_hitter_entry.game_sb = int(stat_row_dict["SB"]) postgame_hitter_entry.game_hr = int(stat_row_dict["HR"]) postgame_hitter_entry.game_rbi = int(stat_row_dict["RBI"]) postgame_hitter_entry.game_2b = int(stat_row_dict["2B"]) postgame_hitter_entry.game_3b = int(stat_row_dict["3B"]) postgame_hitter_entry.game_1b = postgame_hitter_entry.game_h - postgame_hitter_entry.game_2b - \ postgame_hitter_entry.game_3b - postgame_hitter_entry.game_hr postgame_hitter_entry.actual_draftkings_points = Draftkings.get_hitter_points( postgame_hitter_entry) try: database_session.add(postgame_hitter_entry) database_session.commit() except IntegrityError: database_session.rollback() print "Attempt to duplicate hitter postgame results: %s %s %s %s" % ( hitter_entry.first_name, hitter_entry.last_name, hitter_entry.team, pregame_hitter_entry.game_date) # Query the database for all hitter game entries from yesterday pitcher_entries = database_session.query(PregamePitcherGameEntry).filter( PregamePitcherGameEntry.game_date == (date.today() - timedelta(days=1))) for pregame_pitcher_entry in pitcher_entries: pitcher_entry = database_session.query(PitcherEntry).get( pregame_pitcher_entry.rotowire_id) print "Mining yesterday for %s %s" % (pitcher_entry.first_name, pitcher_entry.last_name) try: stat_row_dict = BaseballReference.get_pitching_game_log( pitcher_entry.baseball_reference_id) except BaseballReference.TableRowNotFound: print "Player %s %s did not play yesterday. Deleting pregame entry %s %s" % ( pitcher_entry.first_name, pitcher_entry.last_name, pregame_pitcher_entry.game_date, pregame_pitcher_entry.opposing_team) database_session.delete(pregame_pitcher_entry) database_session.commit() continue postgame_pitcher_entry = PostgamePitcherGameEntry() postgame_pitcher_entry.rotowire_id = pitcher_entry.rotowire_id postgame_pitcher_entry.game_date = pregame_pitcher_entry.game_date postgame_pitcher_entry.game_ip = float(stat_row_dict["IP"]) postgame_pitcher_entry.game_so = int(stat_row_dict["SO"]) if str(stat_row_dict["Dec"])[0] == "W": postgame_pitcher_entry.game_wins = 1 postgame_pitcher_entry.game_er = int(stat_row_dict["ER"]) postgame_pitcher_entry.game_er = int(stat_row_dict["ER"]) postgame_pitcher_entry.game_h = int(stat_row_dict["H"]) postgame_pitcher_entry.game_bb = int(stat_row_dict["BB"]) postgame_pitcher_entry.game_hbp = int(stat_row_dict["HBP"]) if stat_row_dict["Inngs"] == "CG": postgame_pitcher_entry.game_cg = 1 if stat_row_dict["Inngs"] == "SHO": postgame_pitcher_entry.game_cgso = 1 if postgame_pitcher_entry.game_cg == 1 and postgame_pitcher_entry.game_h == 0: postgame_pitcher_entry.game_no_hitter = 1 postgame_pitcher_entry.actual_draftkings_points = Draftkings.get_pitcher_points( postgame_pitcher_entry) try: database_session.add(postgame_pitcher_entry) database_session.commit() except IntegrityError: database_session.rollback() print "Attempt to duplicate pitcher postgame results: %s %s %s %s" % ( pitcher_entry.first_name, pitcher_entry.last_name, pregame_pitcher_entry.opposing_team, postgame_pitcher_entry.game_date)
def get_pitcher_stats(pitcher_id, team, opposing_team, database_session, game_date=None): """ Get the career, last 14 day, vs hand stats from the RotoWire player page, get t :param rotowire_id: the RotoWire unique ID of this player :param pitcher_hand: a str representation of the hand the pitcher throws with ("L" or "R") :return: a PregameHitterGameEntry object without the predicted_draftkings_points field populated """ pregame_pitcher_entry = PregamePitcherGameEntry() pregame_pitcher_entry.rotowire_id = pitcher_id pregame_pitcher_entry.team = team pregame_pitcher_entry.opposing_team = opposing_team if game_date is None: game_date = date.today() pregame_pitcher_entry.game_date = game_date # Career stats pitcher_entry = database_session.query(PitcherEntry).get(pitcher_id) if pitcher_entry is None: raise PitcherNotFound(pitcher_id) pitcher_career_soup = BaseballReference.get_pitcher_page_career_soup( pitcher_entry.baseball_reference_id) try: career_stats = BaseballReference.get_career_pitching_stats( pitcher_entry.baseball_reference_id, pitcher_career_soup) pregame_pitcher_entry.career_bf = int(career_stats["BF"]) pregame_pitcher_entry.career_ip = float(career_stats["IP"]) pregame_pitcher_entry.career_h = int(career_stats["H"]) pregame_pitcher_entry.career_hr = int(career_stats["HR"]) pregame_pitcher_entry.career_er = int(career_stats["ER"]) pregame_pitcher_entry.career_bb = int(career_stats["BB"]) pregame_pitcher_entry.career_so = int(career_stats["SO"]) pregame_pitcher_entry.career_wins = int(career_stats["W"]) pregame_pitcher_entry.career_losses = int(career_stats["L"]) except (BaseballReference.TableNotFound, BaseballReference.TableRowNotFound) as e: print str(e), "with", str(pitcher_entry.first_name), str( pitcher_entry.last_name) opposing_lineup = database_session.query(PregameHitterGameEntry).filter( PregameHitterGameEntry.game_date == game_date, PregameHitterGameEntry.opposing_team == opposing_team) for hitter in opposing_lineup: pregame_pitcher_entry.vs_h += hitter.vs_h pregame_pitcher_entry.vs_bb += hitter.vs_bb pregame_pitcher_entry.vs_so += hitter.vs_so pregame_pitcher_entry.vs_hr += hitter.vs_hr pregame_pitcher_entry.vs_bf += hitter.vs_pa # Approximate earned runs by the RBIs of opposing hitters pregame_pitcher_entry.vs_er += hitter.vs_rbi # Recent stats try: recent_stats = BaseballReference.get_recent_pitcher_stats( pitcher_entry.baseball_reference_id, pitcher_career_soup) pregame_pitcher_entry.recent_bf = int(recent_stats["BF"]) pregame_pitcher_entry.recent_ip = float(recent_stats["IP"]) pregame_pitcher_entry.recent_h = int(recent_stats["H"]) pregame_pitcher_entry.recent_hr = int(recent_stats["HR"]) pregame_pitcher_entry.recent_er = int(recent_stats["ER"]) pregame_pitcher_entry.recent_bb = int(recent_stats["BB"]) pregame_pitcher_entry.recent_so = int(recent_stats["SO"]) pregame_pitcher_entry.recent_wins = int(recent_stats["W"]) pregame_pitcher_entry.recent_losses = int(recent_stats["L"]) except (BaseballReference.TableNotFound, BaseballReference.TableRowNotFound) as e: print str(e), "with", str(pitcher_entry.first_name), str( pitcher_entry.last_name) #Season stats try: season_stats = BaseballReference.get_season_pitcher_stats( pitcher_entry.baseball_reference_id) pregame_pitcher_entry.season_bf = int(season_stats["BF"]) pregame_pitcher_entry.season_ip = float(season_stats["IP"]) pregame_pitcher_entry.season_h = int(season_stats["H"]) pregame_pitcher_entry.season_hr = int(season_stats["HR"]) pregame_pitcher_entry.season_er = int(season_stats["ER"]) pregame_pitcher_entry.season_bb = int(season_stats["BB"]) pregame_pitcher_entry.season_so = int(season_stats["SO"]) pregame_pitcher_entry.season_wins = int(season_stats["W"]) pregame_pitcher_entry.season_losses = int(season_stats["L"]) except (BaseballReference.TableNotFound, BaseballReference.TableRowNotFound) as e: print str(e), "with", str(pitcher_entry.first_name), str( pitcher_entry.last_name) return pregame_pitcher_entry
def get_hitter_stats(batter_id, pitcher_id, team, pitcher_hand, database_session): """ Get the career, last 14 day, vs hand stats from the RotoWire player page, get t :param rotowire_id: the RotoWire unique ID of this player :param pitcher_hand: a str representation of the hand the pitcher throws with ("L" or "R") :return: a PregameHitterGameEntry object without the predicted_draftkings_points field populated """ pregame_hitter_entry = PregameHitterGameEntry() pregame_hitter_entry.rotowire_id = batter_id pregame_hitter_entry.pitcher_id = pitcher_id pregame_hitter_entry.team = team # Career stats hitter_entry = database_session.query(HitterEntry).get(batter_id) if hitter_entry is None: raise HitterNotFound(batter_id) hitter_career_soup = BaseballReference.get_hitter_page_career_soup( hitter_entry.baseball_reference_id) try: career_stats = BaseballReference.get_career_hitting_stats( hitter_entry.baseball_reference_id, hitter_career_soup) pregame_hitter_entry.career_pa = int(career_stats["PA"]) pregame_hitter_entry.career_ab = int(career_stats["AB"]) pregame_hitter_entry.career_r = int(career_stats["R"]) pregame_hitter_entry.career_h = int(career_stats["H"]) pregame_hitter_entry.career_hr = int(career_stats["HR"]) pregame_hitter_entry.career_rbi = int(career_stats["RBI"]) pregame_hitter_entry.career_sb = int(career_stats["SB"]) pregame_hitter_entry.career_cs = int(career_stats["CS"]) pregame_hitter_entry.career_bb = int(career_stats["BB"]) pregame_hitter_entry.career_so = int(career_stats["SO"]) #TODO: add ColumnNotFound exception to BaseballReference except (BaseballReference.TableNotFound, BaseballReference.TableRowNotFound) as e: print str(e), "with", str(hitter_entry.first_name), str( hitter_entry.last_name) # Vs hand of the opposing pitcher if pitcher_hand == "L": pitcher_hand_lr = BaseballReference.HandEnum.LHP elif pitcher_hand == "R": pitcher_hand_lr = BaseballReference.HandEnum.RHP else: print "Invalid pitcher hand %i" % pitcher_hand assert 0 try: vs_hand_stats = BaseballReference.get_vs_hand_hitting_stats( hitter_entry.baseball_reference_id, pitcher_hand_lr, hitter_career_soup) pregame_hitter_entry.vs_hand_pa = int(vs_hand_stats["PA"]) pregame_hitter_entry.vs_hand_ab = int(vs_hand_stats["AB"]) pregame_hitter_entry.vs_hand_r = int(vs_hand_stats["R"]) pregame_hitter_entry.vs_hand_h = int(vs_hand_stats["H"]) pregame_hitter_entry.vs_hand_hr = int(vs_hand_stats["HR"]) pregame_hitter_entry.vs_hand_rbi = int(vs_hand_stats["RBI"]) pregame_hitter_entry.vs_hand_sb = int(vs_hand_stats["SB"]) pregame_hitter_entry.vs_hand_cs = int(vs_hand_stats["CS"]) pregame_hitter_entry.vs_hand_bb = int(vs_hand_stats["BB"]) pregame_hitter_entry.vs_hand_so = int(vs_hand_stats["SO"]) except (BaseballReference.TableNotFound, BaseballReference.TableRowNotFound) as e: print str(e), "with", str(hitter_entry.first_name), str( hitter_entry.last_name) # Recent stats try: recent_stats = BaseballReference.get_recent_hitting_stats( hitter_entry.baseball_reference_id, hitter_career_soup) pregame_hitter_entry.recent_pa = int(recent_stats["PA"]) pregame_hitter_entry.recent_ab = int(recent_stats["AB"]) pregame_hitter_entry.recent_r = int(recent_stats["R"]) pregame_hitter_entry.recent_h = int(recent_stats["H"]) pregame_hitter_entry.recent_hr = int(recent_stats["HR"]) pregame_hitter_entry.recent_rbi = int(recent_stats["RBI"]) pregame_hitter_entry.recent_sb = int(recent_stats["SB"]) pregame_hitter_entry.recent_cs = int(recent_stats["CS"]) pregame_hitter_entry.recent_bb = int(recent_stats["BB"]) pregame_hitter_entry.recent_so = int(recent_stats["SO"]) except (BaseballReference.TableNotFound, BaseballReference.TableRowNotFound) as e: print str(e), "with", str(hitter_entry.first_name), str( hitter_entry.last_name) #Season stats try: season_stats = BaseballReference.get_season_hitting_stats( hitter_entry.baseball_reference_id) pregame_hitter_entry.season_pa = int(season_stats["PA"]) pregame_hitter_entry.season_ab = int(season_stats["AB"]) pregame_hitter_entry.season_r = int(season_stats["R"]) pregame_hitter_entry.season_h = int(season_stats["H"]) pregame_hitter_entry.season_hr = int(season_stats["HR"]) pregame_hitter_entry.season_rbi = int(season_stats["RBI"]) pregame_hitter_entry.season_sb = int(season_stats["SB"]) pregame_hitter_entry.season_cs = int(season_stats["CS"]) pregame_hitter_entry.season_bb = int(season_stats["BB"]) pregame_hitter_entry.season_so = int(season_stats["SO"]) except (BaseballReference.TableNotFound, BaseballReference.TableRowNotFound) as e: print str(e), "with", str(hitter_entry.first_name), str( hitter_entry.last_name) # Career versus this pitcher pitcher_entry = database_session.query(PitcherEntry).get( pregame_hitter_entry.pitcher_id) # Couldn't find the pitcher, just continue and use default values if pitcher_entry is None: return pregame_hitter_entry else: try: vs_pitcher_stats = BaseballReference.get_vs_pitcher_stats( hitter_entry.baseball_reference_id, pitcher_entry.baseball_reference_id) pregame_hitter_entry.vs_pa = int(vs_pitcher_stats["PA"]) pregame_hitter_entry.vs_ab = int(vs_pitcher_stats["AB"]) pregame_hitter_entry.vs_h = int(vs_pitcher_stats["H"]) pregame_hitter_entry.vs_hr = int(vs_pitcher_stats["HR"]) pregame_hitter_entry.vs_rbi = int(vs_pitcher_stats["RBI"]) pregame_hitter_entry.vs_bb = int(vs_pitcher_stats["BB"]) pregame_hitter_entry.vs_so = int(vs_pitcher_stats["SO"]) except (BaseballReference.TableNotFound, BaseballReference.TableRowNotFound) as e: print str(e), "with", str(hitter_entry.first_name), str( hitter_entry.last_name) return pregame_hitter_entry
def get_game_lineups(database_session): """ Mine the RotoWire daily lineups page and get the players' name, team, and RotoWire ID Note: longer names are abbreviated by RotoWire and need to be resolved by another source :return: list of Game objects representing the lineups for the day """ #TODO: add feature to look if it's going to rain lineup_soup = BeautifulSoupHelper.get_soup_from_url(DAILY_LINEUPS_URL) header_nodes = lineup_soup.findAll("div", {"class": TEAM_REGION_LABEL}) games = list() for header_node in header_nodes: game_node = header_node.parent home_team_lineup = list() away_team_lineup = list() away_team_abbreviation = game_node.find("div", {"class": AWAY_TEAM_REGION_LABEL}).text.split()[0] home_team_abbreviation = game_node.find("div", {"class": HOME_TEAM_REGION_LABEL}).text.split()[0] game_main_soup = game_node.find("div", {"class": LINEUPS_CLASS_LABEL}) for away_player in game_main_soup.findAll("div", {"class": AWAY_TEAM_PLAYER_LABEL}): away_team_lineup.append(get_hitter(away_player, away_team_abbreviation, database_session)) for home_player in game_main_soup.findAll("div", {"class": HOME_TEAM_PLAYER_LABEL}): home_team_lineup.append(get_hitter(home_player, home_team_abbreviation, database_session)) try: pitchers = game_node.find("div", PITCHERS_REGION_LABEL).findAll("div") away_team_pitcher = get_pitcher(pitchers[0], away_team_abbreviation, database_session) home_team_pitcher = get_pitcher(pitchers[1], home_team_abbreviation, database_session) # No pitchers present on page except AttributeError: print "Game between %s and %s is not valid." % (away_team_abbreviation, home_team_abbreviation) continue current_game = Game(away_team_lineup, away_team_pitcher, home_team_lineup, home_team_pitcher) # TODO: since they only release the ump data ~1 hour before the game, we'll have to make this robust later try: game_time = game_node.find("div", {"class": TIME_REGION_LABEL}).find("a").text.replace("ET", "").strip() game_time = datetime.strptime(game_time, '%I:%M %p').strftime("%H:%M") game_entry = GameEntry(date.today(), game_time, home_team_abbreviation, away_team_abbreviation) game_entry.wind_speed = get_wind_speed(game_node) game_entry.ump_ks_per_game = get_ump_ks_per_game(game_node) game_entry.ump_runs_per_game = get_ump_runs_per_game(game_node) game_entry.park_hitter_score, game_entry.park_pitcher_score = BaseballReference.get_team_info(team_dict[home_team_abbreviation]) database_session.add(game_entry) database_session.commit() except IntegrityError: database_session.rollback() print "Warning: attempt to duplicate game entry: %s %s %s %s" % (str(home_team_abbreviation), str(away_team_abbreviation), str(game_entry.game_date), str(game_entry.game_time)) except Exception as e: print e pass if current_game.is_valid(): games.append(current_game) else: print "Game between %s and %s is not valid." % (away_team_abbreviation, home_team_abbreviation) return games
def mine_yesterdays_results(database_session): # Query the database for all hitter game entries from yesterday hitter_entries = database_session.query(PregameHitterGameEntry).filter(PregameHitterGameEntry.game_date == (date.today() - timedelta(days=1))) for pregame_hitter_entry in hitter_entries: hitter_entry = database_session.query(HitterEntry).get(pregame_hitter_entry.rotowire_id) try: stat_row_dict = BaseballReference.get_yesterdays_hitting_game_log(hitter_entry.baseball_reference_id) except BaseballReference.TableRowNotFound: print "Player %s %s did not play yesterday. Deleting pregame entry %s %s" % (hitter_entry.first_name, hitter_entry.last_name, pregame_hitter_entry.game_date, pregame_hitter_entry.opposing_team) database_session.delete(pregame_hitter_entry) database_session.commit() continue postgame_hitter_entry = PostgameHitterGameEntry() postgame_hitter_entry.rotowire_id = hitter_entry.rotowire_id postgame_hitter_entry.game_date = pregame_hitter_entry.game_date postgame_hitter_entry.game_h = int(stat_row_dict["H"]) postgame_hitter_entry.game_bb = int(stat_row_dict["BB"]) postgame_hitter_entry.game_hbp = int(stat_row_dict["HBP"]) postgame_hitter_entry.game_r = int(stat_row_dict["R"]) postgame_hitter_entry.game_sb = int(stat_row_dict["SB"]) postgame_hitter_entry.game_hr = int(stat_row_dict["HR"]) postgame_hitter_entry.game_rbi = int(stat_row_dict["RBI"]) postgame_hitter_entry.game_2b = int(stat_row_dict["2B"]) postgame_hitter_entry.game_3b = int(stat_row_dict["3B"]) postgame_hitter_entry.game_1b = postgame_hitter_entry.game_h - postgame_hitter_entry.game_2b - \ postgame_hitter_entry.game_3b - postgame_hitter_entry.game_hr postgame_hitter_entry.actual_draftkings_points = Draftkings.get_hitter_points(postgame_hitter_entry) try: database_session.add(postgame_hitter_entry) database_session.commit() except IntegrityError: database_session.rollback() print "Attempt to duplicate hitter postgame results: %s %s %s %s" % (hitter_entry.first_name, hitter_entry.last_name, hitter_entry.team, pregame_hitter_entry.game_date) # Query the database for all hitter game entries from yesterday pitcher_entries = database_session.query(PregamePitcherGameEntry).filter(PregamePitcherGameEntry.game_date == (date.today() - timedelta(days=1))) for pregame_pitcher_entry in pitcher_entries: pitcher_entry = database_session.query(PitcherEntry).get(pregame_pitcher_entry.rotowire_id) print "Mining yesterday for %s %s" % (pitcher_entry.first_name, pitcher_entry.last_name) try: stat_row_dict = BaseballReference.get_pitching_game_log(pitcher_entry.baseball_reference_id) except BaseballReference.TableRowNotFound: print "Player %s %s did not play yesterday. Deleting pregame entry %s %s" % (pitcher_entry.first_name, pitcher_entry.last_name, pregame_pitcher_entry.game_date, pregame_pitcher_entry.opposing_team) database_session.delete(pregame_pitcher_entry) database_session.commit() continue postgame_pitcher_entry = PostgamePitcherGameEntry() postgame_pitcher_entry.rotowire_id = pitcher_entry.rotowire_id postgame_pitcher_entry.game_date = pregame_pitcher_entry.game_date postgame_pitcher_entry.game_ip = float(stat_row_dict["IP"]) postgame_pitcher_entry.game_so = int(stat_row_dict["SO"]) if str(stat_row_dict["Dec"])[0] == "W": postgame_pitcher_entry.game_wins = 1 postgame_pitcher_entry.game_er = int(stat_row_dict["ER"]) postgame_pitcher_entry.game_er = int(stat_row_dict["ER"]) postgame_pitcher_entry.game_h = int(stat_row_dict["H"]) postgame_pitcher_entry.game_bb = int(stat_row_dict["BB"]) postgame_pitcher_entry.game_hbp = int(stat_row_dict["HBP"]) if stat_row_dict["Inngs"] == "CG": postgame_pitcher_entry.game_cg = 1 if stat_row_dict["Inngs"] == "SHO": postgame_pitcher_entry.game_cgso = 1 if postgame_pitcher_entry.game_cg == 1 and postgame_pitcher_entry.game_h == 0: postgame_pitcher_entry.game_no_hitter = 1 postgame_pitcher_entry.actual_draftkings_points = Draftkings.get_pitcher_points(postgame_pitcher_entry) try: database_session.add(postgame_pitcher_entry) database_session.commit() except IntegrityError: database_session.rollback() print "Attempt to duplicate pitcher postgame results: %s %s %s %s" % (pitcher_entry.first_name, pitcher_entry.last_name, pregame_pitcher_entry.opposing_team, postgame_pitcher_entry.game_date)
def get_pitcher_stats(pitcher_id, team, opposing_team, database_session, game_date=None): """ Get the career, last 14 day, vs hand stats from the RotoWire player page, get t :param rotowire_id: the RotoWire unique ID of this player :param pitcher_hand: a str representation of the hand the pitcher throws with ("L" or "R") :return: a PregameHitterGameEntry object without the predicted_draftkings_points field populated """ pregame_pitcher_entry = PregamePitcherGameEntry() pregame_pitcher_entry.rotowire_id = pitcher_id pregame_pitcher_entry.team = team pregame_pitcher_entry.opposing_team = opposing_team if game_date is None: game_date = date.today() pregame_pitcher_entry.game_date = game_date # Career stats pitcher_entry = database_session.query(PitcherEntry).get(pitcher_id) if pitcher_entry is None: raise PitcherNotFound(pitcher_id) pitcher_career_soup = BaseballReference.get_pitcher_page_career_soup(pitcher_entry.baseball_reference_id) try: career_stats = BaseballReference.get_career_pitching_stats(pitcher_entry.baseball_reference_id, pitcher_career_soup) pregame_pitcher_entry.career_bf = int(career_stats["BF"]) pregame_pitcher_entry.career_ip = float(career_stats["IP"]) pregame_pitcher_entry.career_h = int(career_stats["H"]) pregame_pitcher_entry.career_hr = int(career_stats["HR"]) pregame_pitcher_entry.career_er = int(career_stats["ER"]) pregame_pitcher_entry.career_bb = int(career_stats["BB"]) pregame_pitcher_entry.career_so = int(career_stats["SO"]) pregame_pitcher_entry.career_wins = int(career_stats["W"]) pregame_pitcher_entry.career_losses = int(career_stats["L"]) except (BaseballReference.TableNotFound, BaseballReference.TableRowNotFound) as e: print str(e), "with", str(pitcher_entry.first_name), str(pitcher_entry.last_name) opposing_lineup = database_session.query(PregameHitterGameEntry).filter(PregameHitterGameEntry.game_date == game_date, PregameHitterGameEntry.opposing_team == opposing_team) for hitter in opposing_lineup: pregame_pitcher_entry.vs_h += hitter.vs_h pregame_pitcher_entry.vs_bb += hitter.vs_bb pregame_pitcher_entry.vs_so += hitter.vs_so pregame_pitcher_entry.vs_hr += hitter.vs_hr pregame_pitcher_entry.vs_bf += hitter.vs_pa # Approximate earned runs by the RBIs of opposing hitters pregame_pitcher_entry.vs_er += hitter.vs_rbi # Recent stats try: recent_stats = BaseballReference.get_recent_pitcher_stats(pitcher_entry.baseball_reference_id, pitcher_career_soup) pregame_pitcher_entry.recent_bf = int(recent_stats["BF"]) pregame_pitcher_entry.recent_ip = float(recent_stats["IP"]) pregame_pitcher_entry.recent_h = int(recent_stats["H"]) pregame_pitcher_entry.recent_hr = int(recent_stats["HR"]) pregame_pitcher_entry.recent_er = int(recent_stats["ER"]) pregame_pitcher_entry.recent_bb = int(recent_stats["BB"]) pregame_pitcher_entry.recent_so = int(recent_stats["SO"]) pregame_pitcher_entry.recent_wins = int(recent_stats["W"]) pregame_pitcher_entry.recent_losses = int(recent_stats["L"]) except (BaseballReference.TableNotFound, BaseballReference.TableRowNotFound) as e: print str(e), "with", str(pitcher_entry.first_name), str(pitcher_entry.last_name) #Season stats try: season_stats = BaseballReference.get_season_pitcher_stats(pitcher_entry.baseball_reference_id) pregame_pitcher_entry.season_bf = int(season_stats["BF"]) pregame_pitcher_entry.season_ip = float(season_stats["IP"]) pregame_pitcher_entry.season_h = int(season_stats["H"]) pregame_pitcher_entry.season_hr = int(season_stats["HR"]) pregame_pitcher_entry.season_er = int(season_stats["ER"]) pregame_pitcher_entry.season_bb = int(season_stats["BB"]) pregame_pitcher_entry.season_so = int(season_stats["SO"]) pregame_pitcher_entry.season_wins = int(season_stats["W"]) pregame_pitcher_entry.season_losses = int(season_stats["L"]) except (BaseballReference.TableNotFound, BaseballReference.TableRowNotFound) as e: print str(e), "with", str(pitcher_entry.first_name), str(pitcher_entry.last_name) return pregame_pitcher_entry
def get_hitter_stats(batter_id, pitcher_id, team, pitcher_hand, database_session): """ Get the career, last 14 day, vs hand stats from the RotoWire player page, get t :param rotowire_id: the RotoWire unique ID of this player :param pitcher_hand: a str representation of the hand the pitcher throws with ("L" or "R") :return: a PregameHitterGameEntry object without the predicted_draftkings_points field populated """ pregame_hitter_entry = PregameHitterGameEntry() pregame_hitter_entry.rotowire_id = batter_id pregame_hitter_entry.pitcher_id = pitcher_id pregame_hitter_entry.team = team # Career stats hitter_entry = database_session.query(HitterEntry).get(batter_id) if hitter_entry is None: raise HitterNotFound(batter_id) hitter_career_soup = BaseballReference.get_hitter_page_career_soup(hitter_entry.baseball_reference_id) try: career_stats = BaseballReference.get_career_hitting_stats(hitter_entry.baseball_reference_id, hitter_career_soup) pregame_hitter_entry.career_pa = int(career_stats["PA"]) pregame_hitter_entry.career_ab = int(career_stats["AB"]) pregame_hitter_entry.career_r = int(career_stats["R"]) pregame_hitter_entry.career_h = int(career_stats["H"]) pregame_hitter_entry.career_hr = int(career_stats["HR"]) pregame_hitter_entry.career_rbi = int(career_stats["RBI"]) pregame_hitter_entry.career_sb = int(career_stats["SB"]) pregame_hitter_entry.career_cs = int(career_stats["CS"]) pregame_hitter_entry.career_bb = int(career_stats["BB"]) pregame_hitter_entry.career_so = int(career_stats["SO"]) #TODO: add ColumnNotFound exception to BaseballReference except (BaseballReference.TableNotFound, BaseballReference.TableRowNotFound) as e: print str(e), "with", str(hitter_entry.first_name), str(hitter_entry.last_name) # Vs hand of the opposing pitcher if pitcher_hand == "L": pitcher_hand_lr = BaseballReference.HandEnum.LHP elif pitcher_hand == "R": pitcher_hand_lr = BaseballReference.HandEnum.RHP else: print "Invalid pitcher hand %i" % pitcher_hand assert 0 try: vs_hand_stats = BaseballReference.get_vs_hand_hitting_stats(hitter_entry.baseball_reference_id, pitcher_hand_lr, hitter_career_soup) pregame_hitter_entry.vs_hand_pa = int(vs_hand_stats["PA"]) pregame_hitter_entry.vs_hand_ab = int(vs_hand_stats["AB"]) pregame_hitter_entry.vs_hand_r = int(vs_hand_stats["R"]) pregame_hitter_entry.vs_hand_h = int(vs_hand_stats["H"]) pregame_hitter_entry.vs_hand_hr = int(vs_hand_stats["HR"]) pregame_hitter_entry.vs_hand_rbi = int(vs_hand_stats["RBI"]) pregame_hitter_entry.vs_hand_sb = int(vs_hand_stats["SB"]) pregame_hitter_entry.vs_hand_cs = int(vs_hand_stats["CS"]) pregame_hitter_entry.vs_hand_bb = int(vs_hand_stats["BB"]) pregame_hitter_entry.vs_hand_so = int(vs_hand_stats["SO"]) except (BaseballReference.TableNotFound, BaseballReference.TableRowNotFound) as e: print str(e), "with", str(hitter_entry.first_name), str(hitter_entry.last_name) # Recent stats try: recent_stats = BaseballReference.get_recent_hitting_stats(hitter_entry.baseball_reference_id, hitter_career_soup) pregame_hitter_entry.recent_pa = int(recent_stats["PA"]) pregame_hitter_entry.recent_ab = int(recent_stats["AB"]) pregame_hitter_entry.recent_r = int(recent_stats["R"]) pregame_hitter_entry.recent_h = int(recent_stats["H"]) pregame_hitter_entry.recent_hr = int(recent_stats["HR"]) pregame_hitter_entry.recent_rbi = int(recent_stats["RBI"]) pregame_hitter_entry.recent_sb = int(recent_stats["SB"]) pregame_hitter_entry.recent_cs = int(recent_stats["CS"]) pregame_hitter_entry.recent_bb = int(recent_stats["BB"]) pregame_hitter_entry.recent_so = int(recent_stats["SO"]) except (BaseballReference.TableNotFound, BaseballReference.TableRowNotFound) as e: print str(e), "with", str(hitter_entry.first_name), str(hitter_entry.last_name) #Season stats try: season_stats = BaseballReference.get_season_hitting_stats(hitter_entry.baseball_reference_id) pregame_hitter_entry.season_pa = int(season_stats["PA"]) pregame_hitter_entry.season_ab = int(season_stats["AB"]) pregame_hitter_entry.season_r = int(season_stats["R"]) pregame_hitter_entry.season_h = int(season_stats["H"]) pregame_hitter_entry.season_hr = int(season_stats["HR"]) pregame_hitter_entry.season_rbi = int(season_stats["RBI"]) pregame_hitter_entry.season_sb = int(season_stats["SB"]) pregame_hitter_entry.season_cs = int(season_stats["CS"]) pregame_hitter_entry.season_bb = int(season_stats["BB"]) pregame_hitter_entry.season_so = int(season_stats["SO"]) except (BaseballReference.TableNotFound, BaseballReference.TableRowNotFound) as e: print str(e), "with", str(hitter_entry.first_name), str(hitter_entry.last_name) # Career versus this pitcher pitcher_entry = database_session.query(PitcherEntry).get(pregame_hitter_entry.pitcher_id) # Couldn't find the pitcher, just continue and use default values if pitcher_entry is None: return pregame_hitter_entry else: try: vs_pitcher_stats = BaseballReference.get_vs_pitcher_stats(hitter_entry.baseball_reference_id, pitcher_entry.baseball_reference_id) pregame_hitter_entry.vs_pa = int(vs_pitcher_stats["PA"]) pregame_hitter_entry.vs_ab = int(vs_pitcher_stats["AB"]) pregame_hitter_entry.vs_h = int(vs_pitcher_stats["H"]) pregame_hitter_entry.vs_hr = int(vs_pitcher_stats["HR"]) pregame_hitter_entry.vs_rbi = int(vs_pitcher_stats["RBI"]) pregame_hitter_entry.vs_bb = int(vs_pitcher_stats["BB"]) pregame_hitter_entry.vs_so = int(vs_pitcher_stats["SO"]) except (BaseballReference.TableNotFound, BaseballReference.TableRowNotFound, BaseballReference.DidNotFacePitcher) as e: print str(e), "with", str(hitter_entry.first_name), str(hitter_entry.last_name) return pregame_hitter_entry