예제 #1
0
def main():
	db = Database()
	conn = db.create_connection(DB_NAME)
	c = conn.cursor()
	c.execute('DROP table if exists batting')
	conn.commit()
	c.execute('CREATE table if not exists batting(Season real, Name text, Team text, ' + 
			'Age real, G real, AB real, PA real, H real, h_1B real, h_2B real, ' +
			'h_3B real, HR real, RBI real, SB real, AVG real, BB_p real, K_p real, ' +
			'BB_K real, OBP real, SLG real, OPS real, ISO real, BABIP real, ' + 
			'GB_FB real, LD_p real, GB_p real, FB_p real, IFFB_p real, wOBA real, ' +
			'WAR real, wRC_p real)')

	conn.commit()

	for i in range(1970, 2020):

		data = batting_stats(i)

		for i, row in data.iterrows():
			d = [(row['Season'], row['Name'], row['Team'], row['Age'], row['G'], row['AB'], 
				row['PA'], row['H'], row['1B'], row['2B'], row['3B'], row['HR'], 
				row['RBI'], row['SB'], row['AVG'],
				row['BB%'], row['K%'], row['BB/K'], row['OBP'], row['SLG'], row['OPS'],
				row['ISO'], row['BABIP'], row['GB/FB'], row['LD%'], row['GB%'], 
				row['FB%'], row['IFFB%'], row['wOBA'], row['WAR'], row['wRC+'])]
			
			c.executemany('INSERT into batting VALUES (?, ?, ?, ?, ?, ?, ?, ' + 
				'?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ' + 
				'?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', d)

	conn.commit()


	conn.close()
예제 #2
0
def load_hitting_data():
    batting_2019_L = batting_stats(2019,
                                   end_season=None,
                                   league='all',
                                   qual=1,
                                   ind=0,
                                   handVar='L')
    batting_2019_R = batting_stats(2019,
                                   end_season=None,
                                   league='all',
                                   qual=1,
                                   ind=0,
                                   handVar='R')
    batting_2019_L = batting_2019_L[batting_2019_L['AB'] >= 30]
    batting_2019_R = batting_2019_R[batting_2019_R['AB'] >= 30]
    return batting_2019_L, batting_2019_R
예제 #3
0
def test_cache(monkeypatch: MonkeyPatch, cache_type: str,
               thrower: Callable) -> None:
    with patch('pybaseball.cache.config.cache_type', cache_type):
        # Delete any existing data just in case
        pybaseball.cache.purge()

        # Uncached read
        result = pybaseball.batting_stats(2019)  # type: ignore

        # Make requests.get throw an error so we can be sure this is coming from the cache
        monkeypatch.setattr(requests, 'get', thrower)

        # Cached read
        result2 = pybaseball.batting_stats(2019)  # type: ignore

        pd.testing.assert_frame_equal(result, result2)

        # Cleanup
        pybaseball.cache.purge()
def make_period_dicts(dictionary):
    batter_df = {
        dic: pyb.batting_stats(int(dic), qual=False)
        for dic in dictionary.keys()
    }
    pitcher_df = {
        dic: pyb.pitching_stats(int(dic), qual=False)
        for dic in dictionary.keys()
    }

    return batter_df, pitcher_df
예제 #5
0
def get_fangraphs_data(path, fn, startyear, endyear, force_API):
    fgf = path + fn
    if (force_API == True) or (not os.path.exists(fgf)):
        data = batting_stats(startyear, endyear)
        data = data[[
            'Name', 'G', 'AB', 'H', '2B', '3B', 'HR', 'SF', 'BB', 'HBP', 'OBP',
            'SLG', 'OPS'
        ]]
        export_csv = data.to_csv(fgf, index=None, header=True)
    else:
        data = pd.read_csv(fgf)
    return data
예제 #6
0
def cmd_batting_upload(start_year, end_year, db_username, db_password,
                       db_hostname, db_name, db_tablename):

    click.echo('[[[ PULLING BATTING DATAFRAME ]]]')

    engine = initdb_batting(db_username, db_password, db_hostname, db_name,
                            db_tablename)

    click.echo(str('Pulling data...'))
    try:
        data = batting_stats(start_year, end_year)
        data.columns = data.columns.str.replace('%', '')
        upload_block(data, engine, db_tablename)
    except Exception as exc:
        click.echo("ERROR pulling down data - Error was = " + str(exc))
    else:
        click.echo(str('SUCCESS, pulled ' + str(data.shape[0]) + ' records'))
예제 #7
0
def retrieve_stats(team, year):
    batting = batting_stats(year)
    pitching = pitching_stats(year)

    team_batting = batting[batting['Team'] == team][[
        'Name', 'PA', 'WAR', 'wRC+', 'AVG', 'OBP', 'SLG', 'OPS', 'HR'
    ]].sort_values('PA', ascending=False)
    team_batting = team_batting.set_index('Name')

    print("***** BATTING STATS *****")
    print(team_batting)

    print("")

    team_pitching = pitching[pitching['Team'] == team][[
        'Name', 'IP', 'WAR', 'FIP'
    ]].sort_values('IP', ascending=False)
    team_pitching = team_pitching.set_index('Name')

    print("***** PITCHING STATS *****")
    print(team_pitching)
예제 #8
0
currentDate = datetime.now()
currentMonth = f"{currentDate.month:02d}"
if int(currentMonth) > 3:
    currentYear = currentDate.year
    currentDay = date.today()
    recentDay = currentDay - timedelta(days=7)
    currentDay = str(currentDay)
    recentDay = str(recentDay)
else:
    currentYear = currentDate.year -1
    currentDay = None
    recentDay = None

pitchingData = pitching_stats(currentYear)
battingData = batting_stats(currentYear)

if currentDay is not None:
    recentPitchingData = pitching_stats_range(recentDay, currentDay)
    recentBattingData = batting_stats_range(recentDay, currentDay)

    recentPitchingDataFile = open("../public/json/pitcherRankingsRecent.json", "w")
    recentPitchingDataFile.write(json.dumps(json.loads(recentPitchingData.reset_index().to_json(orient='index')), indent=2))
    recentPitchingDataFile.close()

    recentBattingDataFile = open("../public/json/batterRankingsRecent.json", "w")
    recentBattingDataFile.write(json.dumps(json.loads(recentBattingData.reset_index().to_json(orient='index')), indent=2))
    recentBattingDataFile.close()

else:
    recentPitchingDataFile = open("../public/json/pitcherRankingsRecent.json", "w")
예제 #9
0
def yearGrab(currentSeason):

    # Batting Stats
    from pybaseball import batting_stats

    BattingStats_Year = batting_stats(currentSeason, qual=1)
    # print(BattingStats_Year.head()) # Test
    BattingStats_Year.to_csv('data/YearlyData/temp/bstats.csv')

    import pybaseball

    #Team Batting Stats
    BattingStats_Team_Year = pybaseball.team_batting(currentSeason)
    BattingStats_Team_Year.to_csv('data/YearlyData/temp/team_bstats.csv')

    # Pitching Stats
    from pybaseball import pitching_stats

    PitchingStats_Year = pitching_stats(currentSeason)
    # print(PitchingStats_Year.head()) # Test
    PitchingStats_Year.to_csv('data/YearlyData/temp/pstats.csv')

    print(str(currentSeason) + " : Stats Grab: Successful")

    # Team Pitching Stats
    PitchingStats_Team_Year = pybaseball.team_pitching(currentSeason)
    PitchingStats_Team_Year.to_csv('data/YearlyData/temp/team_pstats.csv')
    """
    # Year Standings
    if currentSeason >=1969:
        from pybaseball import standings
        # get the end-of-season division standings for each season
        Standings = standings(currentSeason)
        Standings.insert(0, 'Season', currentSeason)
        Standings.to_csv('data/YearlyData/temp/team_standings.csv')
    """
    """
    # Amateur Draft Data
    if currentSeason >= 1965:

        from pybaseball import amateur_draft

        # Get amateur Draft Results
        Number_Rounds = 30
        RoundRange = list(range(1, Number_Rounds + 1))
        for i in RoundRange:
            #Pull season draft data 1 round at a time
            Amateur_Draft = amateur_draft(currentSeason, i)
            #Add Draft Year
            Amateur_Draft.insert(0, 'Draft_Year', currentSeason)

            # print(Amateur_Draft.head()) # Test

            # Read file with all draft files so far
            A_Draft = pd.read_csv('data/YearlyData/temp/amateur_draft.csv') 
            # Add New Round
            A_Draft = A_Draft.append(Amateur_Draft, ignore_index=True)
            #Save new round to File
            A_Draft.to_csv('data/YearlyData/temp/amateur_draft.csv', sep=',', index=False, encoding='utf-8')
        """

    # Exit Veolocity Data

    from pybaseball import statcast_batter_exitvelo_barrels

    Exit_Velocity = statcast_batter_exitvelo_barrels(currentSeason)
    Exit_Velocity.insert(0, 'Season', currentSeason)
    Exit_Velocity.to_csv('data/YearlyData/temp/statcast_exit_velocity.csv')

    # FanGraph Data
    import pybaseball

    # Individual Batting Stats
    fan_bat = pybaseball.batting_stats(currentSeason)
    fan_bat.to_csv('data/YearlyData/temp/fan_bat.csv')

    # Individual Pitching Stats
    fan_pit = pybaseball.pitching_stats(currentSeason)
    fan_pit.to_csv('data/YearlyData/temp/fan_pit.csv')

    # Team Batting Stats
    fan_team_bat = pybaseball.team_batting(currentSeason)
    fan_team_bat.to_csv('data/YearlyData/temp/fan_team_bat.csv')

    # Team Pitching Stats
    fan_team_pit = pybaseball.team_pitching(currentSeason)
    fan_team_pit.to_csv('data/YearlyData/temp/fan_team_pit.csv')

    from pybaseball import top_prospects

    # Get top overall prospects leaguewide
    topProspects = top_prospects()
    topProspects.to_csv('data/YearlyData/Top_Prospects.csv')
예제 #10
0
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pybaseball as pyb
from pybaseball import statcast_batter
from pybaseball import playerid_lookup

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("chained_assignment", None)

pitchers_2018 = pyb.pitching_stats(2018)
pitchers_2019 = pyb.pitching_stats(2019)
pitchers_2020 = pyb.pitching_stats(2020)

df_2018 = pyb.batting_stats(2018)
df_2019 = pyb.batting_stats(2019)
df_2020 = pyb.batting_stats(2020)

#cards_df = df_2019.loc[df_2019['Team'] == "Cardinals"]
#cards_pitchers = pitchers_2019.loc[pitchers_2019['Team'] == "Cardinals"]
"""
winker_stats = statcast_batter('2020-08-01', '2020-08-03', 608385)
print(winker_stats)
"""
"""
Sorting the Cardinals (2019) by WAR (top 30)
And ONLY showing the WAR and name columns.
"""
#print(cards_df.sort_values(by = 'WAR', ascending = False).head(30))
#print(cards_pitchers)
예제 #11
0
This is the data visualization part. This will allow us to see graphics of the data, rather then having to read through all the numbers.
Here on line 5 we see something that is usually non Pythonic, we have two statements on one line, seperated by a semicolon. This is how you can have
two statements on one line in python, otherwise this would be unattainable code.
"""
import pandas as pd

pd.set_option('display.max_columns', None)
import seaborn as sns
from matplotlib import pyplot as plt
import pybaseball as pyb
import warnings

warnings.filterwarnings('ignore')

#This sets a var data to the batting stats of 2019, then you copy the data to the batting var made on line 15, and then you print the first 5 entries.
data = pyb.batting_stats(2019)
batting = data.copy()
#print(batting.head(5))
"""
In the following code we will be printing 2019 stats for how many homers per at bats players have, we will filter out players with less than 50 ABs.
Many of these functions from seaborn consist of us having to pass in our DataFrames columns. In some cases we will only need to provide a single 
column, but in other cases where x/y data is required, we will provide two. 
First we will plot this data using seaborn, then we will do it in matplotlib. They do the same exact thing, but in some cases one is more useful than
the other. Usually matplotlib takes more lines of code to produce the same plot. They also look different, so you can choose whichever floats your boat

Seaborn documentation for scatter plots --->  https://seaborn.pydata.org/generated/seaborn.scatterplot.html
"""

#setting the style for visualizations
#sns.set_style('whitegrid')
예제 #12
0
def points(currentSeason):
    from pybaseball import batting_stats

    data = batting_stats(currentSeason, qual=1)

    pointAmountsBat()

    points = ( ( R * data['R'] )
    + ( Single * data['1B'] )
    + ( Double * data['2B'] )
    + ( Triple * data['3B'] )
    + ( HR * data['HR'] )
    + ( TB * ( data['1B'] + (2 * data['2B']) + (3 * data['3B']) + (4 * data['HR']) ) )
    + ( RBI * data['RBI'] )
    + ( BB * data['BB'] )
    + ( K * data['SO'] )
    + ( SB * data['SB'] )
    + ( AB * data['AB'] )
    + ( Hits * data['H'])
    + ( XBH * ( data['2B'] + data['3B'] + data['HR']) )
    + ( IBB * data['IBB'] )
    + ( HBP * data['HBP'] )
    + ( CS * data['CS'] ) )


    # missing Sac and GWRBI and everything past CS

    data['Points'] = points
    data['Points'] = data['Points'].round(decimals=0)

    BattingStats = data[['Season', 'Name', 'Team', 'Age', 'Points', 'G', 'PA', 'AB', 'AVG', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SB', 'CS', 'Pitches']]

    BattingStats = BattingStats.sort_values('Points', ascending=False)
    BattingStats = BattingStats.reset_index(drop=True)


    BattingStats.to_csv('data/bstats.csv')

    from pybaseball import pitching_stats

    pdata = pitching_stats(currentSeason)

    pointAmountsPit()

    ppoints = ( ( IP * pdata['IP'] )
    + ( ER * pdata['ER'] )
    + ( K * pdata['SO'] )
    + ( SO * pdata['ShO'] )
    + ( W * pdata['W'] )
    + ( L * pdata['L'] )
    + ( SV * pdata['SV'] )
    + ( BS * pdata['BS'] )
    + ( G * pdata['G'] )
    + ( GS * pdata['GS'] )
    + ( H * pdata['H'] )
    + ( RA * pdata['R'] )
    + ( HR * pdata['HR'] )
    + ( BB * pdata['BB'] )
    + ( HB * pdata['HBP'] )
    + ( IBB * pdata['IBB'] )
    + ( B * pdata['BK'] )
    #+ ( PKO * pdata['PKO'] )
    + ( QS * pdata['BK'] )
    + ( CG * pdata['CG'] )
    #+ ( NH * pdata['NH'] )
    #+ ( PG * pdata['PG'] )
    #+ ( BF * pdata['BF'] )
    + ( PC * pdata['Pitches'] )
    #+ ( SOP * pdata['SOP'] )
    #+ ( HD * pdata['HD'] )
    )

    pdata['Points'] = ppoints
    pdata['Points'] = pdata['Points'].round(decimals=0)

    PitchingStats = pdata[['Season', 'Name', 'Team', 'Age', 'Points', 'W', 'L', 'ERA', 'ER', 'WAR', 'G', 'GS', 'CG', 'ShO', 'SV', 'BS', 'IP', 'H', 'R', 'HR', 'BB', 'IBB', 'HBP', 'BK', 'SO', 'TBF', 'Pitches']]

    PitchingStats = PitchingStats.sort_values('Points', ascending=False)
    PitchingStats = PitchingStats.reset_index(drop=True)

    PitchingStats.to_csv('data/pstats.csv')
예제 #13
0
               'Fld',
               'Rep',
               'Pos',
               'RAR',
               'WAR',
               'Spd',
               'wRC+']


# %%

url = 'https://baseballsavant.mlb.com/statcast_search/csv?hfPT=&hfAB=&hfBBT=&hfPR=&hfZ=&stadium=&hfBBL=&hfNewZones=&hfGT=R%7C&hfC=&hfSea=2018%7C&hfSit=&player_type=batter&hfOuts=&opponent=&pitcher_throws=&batter_stands=&hfSA=&game_date_gt=&game_date_lt=&hfInfield=&team=&position=&hfOutfield=&hfRO=&home_road=&hfFlag=&hfPull=&metric_1=&hfInn=&min_pitches=0&min_results=0&group_by=name&sort_col=pitches&player_event_sort=h_launch_speed&sort_order=desc&min_pas=150#results'
s = requests.get(url).content
xwOBA_df = pd.read_csv(io.StringIO(s.decode('utf-8')))
#xwOBA_df = pd.read_csv('xWAR/savant_data_xwOBA_20180506.csv')
fg_df = pb.batting_stats(2018)
pf_df = pd.read_csv('mlb/xWAR/park_factors.csv', usecols=['Team', 'Basic'])
guts_df = pd.read_csv('mlb/xWAR/fg_guts.csv', usecols=['Season', 'wOBA', 'wOBAScale', 'R/PA', 'R/W'])
fg_df['League'] = np.where(fg_df.Team.isin(nl_teams), 'NL', 'AL')
pf_df['dec_pf'] = pf_df.Basic / 100
guts_df.rename(columns={'wOBA': 'lg_wOBA', 'wOBAScale': 'wOBA_scale'}, inplace=True)

# %%

df = xwOBA_df.merge(fg_df, left_on='player_name', right_on='Name')[comb_fields]
df = df.merge(pf_df, how='left', on='Team')
df = df.merge(guts_df, on='Season')

# %%

al_r = df[df.League == 'AL'].wRC.sum() / df[df.League == 'AL'].PA.sum()
예제 #14
0
    'Age Rng': 'Age_Rng',
    'K-BB%': 'K_BB_pct',
    'K/9+': 'K_9_plus',
    'BB/9+': 'BB_9_plus',
    'H/9+': 'H_9_plus',
    'HR/9+': 'HR_9_plus',
    'LOB%+': 'LOB_pct_plus',
    'WHIP+': 'WHIP_plus'
}

for year in years:
    for side in sides:
        table_name = side + '_' + str(year)
        table_id = "fangraphs." + table_name
        if side == 'batting':
            df = batting_stats(year, qual=0)
        elif side == 'pitching':
            df = pitching_stats(year, qual=0)
        df.rename(columns=renamed_columns, inplace=True)
        print('Loading ' + table_id)
        table_schema = []
        for column in df.columns:
            table_schema.append({'name': column, 'type': 'STRING'})
        pandas_gbq.to_gbq(df,
                          table_id,
                          project_id=project_id,
                          if_exists='replace',
                          table_schema=table_schema,
                          chunksize=10000)
        # try:
        #     if side == 'batting':
예제 #15
0
def profile_batting_stats():
    batting_stats(2019)
ax.set_ylabel('Projected Hits')
ax.set_xlabel('Last Years Hits')

sns.regplot(data = df, x = 'hits_last_year', y = 'hits_next_year')
#plt.show()
"""

#predicting hits section
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('chained_assignment', None)

print('Loading ten years of data...this may take a few minutes')

#loading df from 2010-2020
hits_df = pyb.batting_stats(2010, 2021)
hits_2019 = pyb.batting_stats(2018, 2019)

print('data loaded')

hits_df_copy = hits_df.copy()

hits_df_copy = hits_df_copy.loc[:, [
    'Season', 'Name', 'AB', 'HR', 'SLG', 'LD%', 'wOBA', 'Contact%', 'Soft%',
    'Med%', 'Hard%'
]]

hits_df_copy['HR_Next_Year'] = hits_df_copy.sort_values(
    ['Name', 'Season'], ascending=False).groupby('Name')['HR'].shift()

hits_df_copy = hits_df_copy.loc[hits_df_copy['AB'] > 300]