예제 #1
0
def get_all_pitcher_fps():
	url = "https://www.baseball-reference.com/leagues/MLB/{}-standard-pitching.shtml".format(CUR_SEASON)
	page = br.get_page(url)
	pitchers = br.build_df(br.get_table_by_class(page, 'stats_table'), 1, ['Name', 'Tm', 'Lg'], ['Rk', 'Age'])

	url = "https://www.baseball-reference.com/leagues/MLB/{}-reliever-pitching.shtml".format(CUR_SEASON)
	relief_page = br.get_page(url)
	relievers = br.build_df(br.get_table_by_class(relief_page, 'stats_table'), 1, ['Name', 'Tm'], ['Rk', 'Age'])

	url = "https://www.baseball-reference.com/leagues/MLB/{}-starter-pitching.shtml".format(CUR_SEASON)
	qs_page = br.get_page(url)
	qs_table = br.build_df(br.get_table_by_class(qs_page, 'stats_table'), 1, ['Name', 'Tm'], ['Rk', 'Age'])

	url = "https://www.baseball-reference.com/leagues/MLB/{}-basesituation-pitching.shtml".format(CUR_SEASON)
	sb_page = br.get_page(url)
	sb_table = br.build_df(br.get_table_by_class(sb_page, 'stats_table'), 1, ['Name', 'Tm'], ['Rk', 'Age'])

	df2 = pd.merge(pitchers, relievers[['Hold', 'BSv', 'Name', 'Tm']], on=['Name', 'Tm'], how='left').fillna(0)
	df3 = pd.merge(df2, qs_table[['QS', 'Name', 'Tm']], on=['Name', 'Tm'], how='left').fillna(0)
	df4 = pd.merge(df3, sb_table[['SB', 'Name', 'Tm']], on=['Name', 'Tm'], how='left').fillna(0)
	#print(list(df3))

	players = df3['Name'].astype('str')
	games = df3['G'].astype('float64')
	ip = df3['IP'].astype('float64')
	w = df3['W'].astype('float64')
	l = df3['L'].astype('float64')
	cg = df3['CG'].astype('float64')
	sv = df3['SV'].astype('float64')
	h = df3['H'].astype('float64')
	er = df3['ER'].astype('float64')
	walks = df3['BB'].astype('float64')
	hb = df3['HBP'].astype('float64')
	k = df3['SO'].astype('float64')

	hld = df3['Hold'].astype('float64')
	bsv = df3['BSv'].astype('float64')

	#handling half innings
	half_ip = []
	inns = []
	for i in ip:
		inn = int(i)
		half_i = str(i)[-1]
		inns.append(inn)
		half_ip.append(float(half_i))

	innings = pd.Series(inns)
	rem = pd.Series(half_ip)

	qs = df3['QS'].astype('float64')
	sb = df4['SB'].astype('float64')

	fps = (innings * 1.0) + (rem * 0.33) + (w * 9) - (l * 6) + (cg * 7) + (sv * 8) - (h * 0.25) - er - (walks * 0.5) - (hb * 0.5) + k + (hld * 7.5) - (bsv * 3) + (qs * 5) - (sb * 0.25)
	fps_g = fps / games

	df = build_fp_table(players, fps, fps_g)

	return df
예제 #2
0
def get_finished_df(season, source):
    """Function that takes in a team stat dataset from Fangraphs or Baseball Prospectus and joins in some baseball reference data, namely W-L% and postseason finish"""
    if source == 'fg':
        war_df = get_fg_team_stats(str(season))
        pre_df = teamname_to_abbr(war_df)
        start_year = FG_START_YEAR
    elif source == 'bp':
        warp_df = get_bp_team_stats(str(season))
        pre_df = bp.abbr_to_master(warp_df)
        start_year = BP_START_YEAR
    #print(df)

    standings = bref.get_page(
        'https://www.baseball-reference.com/leagues/MLB/{}-standings.shtml'.
        format(str(season)))
    standings_table = bref.get_table_by_id(standings,
                                           'expanded_standings_overall')
    standings_df = bref.build_df(standings_table, 0, [
        'Tm', 'Lg', 'Strk', 'pythWL', 'vEast', 'vCent', 'vWest', 'Inter',
        'Home', 'Road', 'ExInn', '1Run', 'vRHP', 'vLHP', 'last10', 'last20',
        'last30', 'gte.500', 'lt.500'
    ], [])
    bref_df = bref.abbr_to_master(standings_df, season)
    df = pre_df.merge(bref_df[['Master Team', 'W-L%']],
                      on='Master Team',
                      how='left')
    #print(df)
    if season < int(CUR_SEASON):
        if season != 1904 and season != 1994:
            champ = get_ws_champ(season, start_year)['Master Team'].values[0]
            #print(champ)
            champ_row = df['Master Team'] == champ
            idx = df.index[champ_row].tolist()[0]
            df.loc[idx, 'FINISH'] = 'CHAMPION'
    return df
예제 #3
0
def load_bref_team_sp():
    url = "https://www.baseball-reference.com/leagues/MLB/{}-starter-pitching.shtml".format(
        CUR_SEASON)
    page = bref.get_page(url)
    df = bref.build_df(bref.get_table_by_class(page, 'stats_table'), 0, ['Tm'],
                       ['']).sort_values(by=['GmScA'], ascending=False)
    refresh_table('MLB_TEAM', 'bref_team_sp', df)
예제 #4
0
def get_all_batter_fps():
	url = "https://www.baseball-reference.com/leagues/MLB/{}-standard-batting.shtml".format(CUR_SEASON)
	page = br.get_page(url)
	batters = br.build_df(br.get_table_by_class(page, 'stats_table'), 1, ['Name', 'Tm', 'Lg', 'Pos\xa0Summary'], ['Rk', 'Age'])
	
	url = "https://www.baseball-reference.com/leagues/MLB/{}-standard-fielding.shtml".format(CUR_SEASON)
	field_page = br.get_page(url)
	fielders = br.build_df(br.get_table_by_class(field_page, 'stats_table'), 1, ['Name', 'Tm', 'Lg', 'Pos\xa0Summary'], ['Rk', 'Age'])

	url = "https://www.baseball-reference.com/leagues/MLB/{}-specialpos_of-fielding.shtml".format(CUR_SEASON)
	of_page = br.get_page(url)
	ofs = br.build_df(br.get_table_by_class(of_page, 'stats_table'), 1, ['Name', 'Tm', 'Lg'], ['Rk', 'Age'])
	
	df2 = pd.merge(batters, ofs[['A', 'Name', 'Tm']], on=['Name', 'Tm'], how='left').fillna(0)
	df2.rename(columns={'A':'OFA'}, inplace=True)
	df3 = pd.merge(df2, fielders[['A', 'E', 'Name', 'Tm']], on=['Name', 'Tm'], how='left').fillna(0)

	#batting and fielding data
	players = df3['Name'].astype('str')
	
	games = df3['G'].astype('float64')
	r = df3['R'].astype('float64')

	double = df3['2B'].astype('float64')
	triple = df3['3B'].astype('float64')
	homer = df3['HR'].astype('float64')
	single = df3['H'].astype('float64') - double - triple - homer
	rbi = df3['RBI'].astype('float64')
	sb = df3['SB'].astype('float64')
	cs = df3['CS'].astype('float64')
	bb = df3['BB'].astype('float64')
	hbp = df3['HBP'].astype('float64')
	so = df3['SO'].astype('float64')

	e = df3['E'].astype('float64')
	ofa = df3['OFA'].astype('float64')
	a = df3['A'].astype('float64')

	fps = r + (single * 1.0) + (double * 2) + (triple * 3) + (homer * 4) + rbi + (sb * 1.75) - (cs * 0.5) + (bb * 0.75) + (hbp * 0.5) - (so * .1) - e + (ofa * 1) + (a * 0.05)
	fps_g = fps / games

	df = build_fp_table(players, fps, fps_g)
	
	return df
예제 #5
0
def get_ws_champs(start_year):
    page = bref.get_page('https://www.baseball-reference.com/postseason/')
    table = bref.get_table_by_id(page, 'postseason_series')
    df = pd.DataFrame(columns=['Year', 'Team'])
    years = []
    champs = []
    rows = table[0].find_all('tr')
    for row in rows:
        cells = row.find_all(['th', 'td'])
        cells = [cell.text.replace('*', '').strip().lower() for cell in cells]
        if "world series" in cells[0]:
            years.append(cells[0].split()[0])
            champs.append(cells[2][:cells[2].index("(")])
    df['Year'] = years
    df['Year'] = df['Year'].astype(int)
    df['Team'] = champs
    return df[df['Year'] >= start_year]