def process_data(time, firstname, lastname, pos): playerid = playerid_lookup(lastname, firstname) if "to" in time: if pos == 'batter': return statcast_batter(time[0:10], time[14:], player_id=int(playerid['key_mlbam'][0])) elif pos == 'pitcher': return statcast_pitcher(time[0:10], time[14:], player_id=int(playerid['key_mlbam'][0])) else: return None else: if pos == 'batter': return statcast_batter(time, player_id=int(playerid['key_mlbam'][0])) elif pos == 'pitcher': return statcast_pitcher(time, player_id=int(playerid['key_mlbam'][0])) else: return None
def player(first_name, last_name, start_date, end_date): player_info = pybaseball.playerid_lookup(last_name, first_name) # if (player_info['mlb_played_last'][0] - player_info['mlb_played_first'][0]) < 10: # start_year = int(player_info['mlb_played_first'][0]) # else: # start_year = int(player_info['mlb_played_last'][0] - 10) player_id = player_info['key_mlbam'][0] # player_info = [player_id,start_year,str(player_info['mlb_played_last'][0])] data = pybaseball.statcast_batter(start_dt = start_date, end_dt = end_date, player_id = player_info[0]) data = data.reset_index(drop = True) return data
def batter(self, name, team, throws='R'): Xcols = ['pfx_x', 'pfx_z', 'release_speed', 'release_spin_rate'] if throws == 'R': scaler = self.scalerR kmeans = self.modelR else: scaler = self.scalerL kmeans = self.modelL fgh = self.fgh player = fgh[(fgh.Name.str.lower() == name.lower()) & (fgh.Team.str.lower() == team.lower())].playerid pid = int(playerid_reverse_lookup(player, 'fangraphs').key_mlbam) bat = statcast_batter(start_dt='2015-03-28', end_dt='2019-09-29', player_id=pid) bat.dropna(subset=Xcols, inplace=True) bat.reset_index(drop=True, inplace=True) bat = bat[bat.p_throws == throws] bat['p_type'] = kmeans.predict(scaler.transform(bat[Xcols])) batdict = {} for i in range(12): if throws == 'R': if i == 7: batnum = bat[(bat.p_type == 7) | (bat.p_type == 12)] elif i == 12: continue else: batnum = bat[bat.p_type == i] else: if i == 0: batnum = bat[(bat.p_type == 0) | (bat.p_type == 4)] elif i == 4: continue else: batnum = bat[bat.p_type == i] batdict[i] = [len(batnum)] batdict[i] += [ round((np.sum(batnum.woba_value) / np.sum(batnum.woba_denom)), 3) ] return pid, batdict
def data_from_name(last, first, year1=2020, num_years=1): years = range(year1, year1 + num_years) lookup = pybaseball.playerid_lookup(last, first) if len(lookup) > 1: print('Multiple players found, determining player by years.') lookup['int'] = lookup.apply(lambda row: len( set( range(int(row['mlb_played_first']), int(row['mlb_played_last']) )) & set(years)), axis=1) lookup = lookup[lookup['int'] == max(lookup['int'])] if len(lookup) > 1: print('Unable to determine player') else: mlb_id = int(lookup['key_mlbam']) data = pybaseball.statcast_batter(f'{year1}-01-01', f'{year1+num_years}-01-01', mlb_id) data = data[data.apply( lambda row: 'hit_into_play' in row['description'], axis=1)] data = data[data['events'] != 'home_run'] data = data.dropna( how='any', subset=['launch_angle', 'launch_speed', 'hc_x', 'hc_y']) return data
#get players for both team players = [("corey", "kluber"), ("yan", "gomes"), ("yonder", "alonso"), ("jose", "ramirez"), ("josh", "donaldson"), ("francisco", "lindor"), ("melky", "cabrera"), ("jason", "kipnis"), ("michael", "brantley"), ("luis", "castillo"), ("tucker", "barnhart"), ("joey", "votto"), ("scooter", "gennett"), ("eugenio", "suarez"), ("mason", "williams"), ("billy", "hamilton"), ("preston", "tucker"), ("jose", "peraza")] mycursor = mydb.cursor() #get player data for player in players: id = playerid_lookup(player[1], player[0]) print(len(id)) if len(id) == 1: stats = statcast_batter('2018-3-29', '2018-10-02', id.key_mlbam.iloc[0]) hr = 0 bip = 0 tot = 0 for event in stats.events: tot = tot + 1 if event == 'home_run': hr = hr + 1 if type( event ) != float and event != "strikeout" and event != "walk" and event != "home_run": bip = bip + 1 sql = "INSERT INTO batters (NAME, HR, BIP) VALUES (%s, %s, %s)" val = (f'{player[0]} {player[1]}', hr / tot, bip / tot) mycursor.execute(sql, val) mydb.commit()
def retrieve_data(): """ Function for retrieving data from Statcast and performing some custom formatting """ run_button.label = 'Running...' reset_data() global p_dict global h_dict global data global data_cds global pitch_cds_p global pitches_p global pitcher_data global batter_data global sub_batter # update plot title pitchername = pitcherselect.value.split(' -')[0] battername = hitterselect.value.split(' -')[0] plot.title.text = f'{pitchername} vs. {battername}' pitcher_id = p_dict[pitcherselect.value] hitter_id = h_dict[hitterselect.value] # all the data for the batter in the time frame batter_data_temp = statcast_batter(str(start_date.value), str(end_date.value), hitter_id) batter_data = pitch_info(batter_data_temp) # all data for the pitcher in the time frame pitcher_data_temp = statcast_pitcher(str(start_date.value), str(end_date.value), pitcher_id) pitcher_data = pitch_info(pitcher_data_temp) # filter to only the pitches thrown to selected batter data = pitcher_data[pitcher_data['batter'] == hitter_id].copy() sub_batter = batter_data[batter_data['pitcher'] == pitcher_id].copy() if len(data) == 0: warning_txt = 'No matchups in specified time frame' warning_label.text = warning_txt else: warning_label.text = '' result = [] count = [] for r in data.iterrows(): # assign event names event = results(r[1]['events'], r[1]['description']) result.append(event) count_str = f"{r[1]['balls']}, {r[1]['strikes']}" count.append(count_str) data['result'] = result data['count'] = count # update column data source new_data = { 'pitch': data['pitch_name'], 'speed': data['release_speed'], 'result': data['result'], 'count': data['count'], 'color': data['color'], 'plate_x': data['plate_x'], 'plate_z': data['plate_z'] } data_cds.data = new_data # update strike zoe new_top = data.sz_top.sum() / len(data.sz_top) new_bottom = data.sz_bot.sum() / len(data.sz_bot) new_zone = { 'x': [-8.5 / 12, 8.5 / 12], 'x_side1': [-8.5 / 12, -8.5 / 12], 'x_side2': [8.5 / 12, 8.5 / 12], 'top': [new_top, new_top], 'bottom': [new_bottom, new_bottom], 'side1': [new_top, new_bottom], 'side2': [new_bottom, new_top] } strike_zone_cds.data = new_zone # update pitch plots p_unique, p_matchup, p_overall = pitch_frequency(pitcher_data, data) pitches_p.x_range.factors = p_unique new_data_pitcher = { 'pitches': p_unique, 'matchup': p_matchup, 'overall': p_overall } pitch_cds_p.data = new_data_pitcher b_unique, b_matchup, b_overall = pitch_frequency( batter_data, sub_batter) pitches_b.x_range.factors = b_unique new_data_batter = { 'pitches': b_unique, 'matchup': b_matchup, 'overall': b_overall } pitch_cds_b.data = new_data_batter run_button.label = 'Run'
firstName = input() print("Enter player's last name: ") lastName = input() print("Enter Start Date (YYYY-MM-DD): ") fromDate = input() print("Enter End Date (YYYY-MM-DD): ") toDate = input() playerBattedBalls = get_player_batted_balls(firstName, lastName, fromDate, toDate) playerBattedBalls = playerBattedBalls.reset_index(drop=True) xBA = playerBattedBalls["estimated_ba_using_speedangle"] playerIDtable = pybaseball.playerid_lookup(lastName, firstName) player = playerIDtable.loc[0].key_mlbam playerBattedBallsMore = pybaseball.statcast_batter(fromDate, toDate, player) BAlist = [] for x in range(100000): hit = 0 for i in range(0, len(xBA)): rand = random.uniform(0, 1) if rand < xBA[i]: hit = hit + 1 BA = hit / ((len(xBA) + len(playerBattedBallsMore.events[ playerBattedBallsMore.events == 'strikeout']))) BAlist.append(BA) hits = len(playerBattedBallsMore.events[ playerBattedBallsMore.events == 'single']) + len( playerBattedBallsMore.events[playerBattedBallsMore.events == 'double']
async def on_message(message): if message.content.startswith('!istimcool'): # Tim is cool, duh. await client.send_message(message.channel, 'Well of course.') elif message.content.startswith('!hidrricks'): # Tim is cool, duh. await client.send_message(message.channel, 'Hello! It must be lab day.') elif message.content.startswith('!whattimeisit'): # Returns current time await client.send_message(message.channel, datetime.datetime.now().time()) await client.send_message(message.channel, 'Central Time') elif message.content.startswith('!coinflip'): # Flips a coin resultOfFlip = random.choice(['Heads', 'Tails']) await client.send_message(message.channel, resultOfFlip) elif message.content.startswith( '!addquote'): # Adds a quote to a running quote file.! if not os.path.isfile("quote_file.pk1"): quote_list = [] else: with open("quote_file.pk1", "r") as quote_file: quote_list = json.load(quote_file) quote_list.append(message.content[9:]) with open("quote_file.pk1", "w") as quote_file: json.dump(quote_list, quote_file) elif message.content.startswith( '!quote'): # Pulls a random quote from the quote file. with open("quote_file.pk1", "r") as quote_file: quote_list = json.load(quote_file) await client.send_message(message.channel, random.choice(quote_list)) elif message.content.startswith('!rps'): # Play rock paper scissors. rps = random.choice(["Rock", "Paper", "Scissors"]) await client.send_message(message.channel, rps) elif message.content.startswith('!emoji'): # Turns to emoji text. Message = message.content[7:] if Message == 'hello': # hello await client.send_message( message.channel, ':regional_indicator_h::regional_indicator_e::regional_indicator_l::regional_indicator_l::regional_indicator_o:' ) elif Message == 'liam sucks': # liam sucks await client.send_message( message.channel, ':regional_indicator_l::regional_indicator_i::regional_indicator_a::regional_indicator_m: :regional_indicator_s::regional_indicator_u::regional_indicator_c::regional_indicator_k::regional_indicator_s:' ) elif message.content.startswith( '!stats'): # Returns batting stats for player playerName = message.content[7:] print(playerName) if playerName == 'Mike Moustakas': from pybaseball import statcast_batter playerID = 519058 moustakasStats = statcast_batter('2017-04-01', '2017-09-01', playerID) print('success') await client.send_message(message.channel, moustakasStats) elif message.content.startswith('!censor'): # NO SWEARING await client.send_message( message.channel, 'http://i0.kym-cdn.com/photos/images/original/001/299/189/6bb.jpg') elif message.content.startswith( '!weather'): # Returns condition and temp at location location = message.content[9:] from weather import Weather weather = Weather() lookup = weather.lookup_by_location(location) condition = lookup.condition().text() temp = lookup.condition().temp() desc = lookup.description() await client.send_message(message.channel, condition) await client.send_message(message.channel, temp) await client.send_message(message.channel, desc) elif message.content.startswith('!steam'): # Steam info steamapi.core.APIConnection(api_key=steamToken, validate_key=True) me = steamapi.user.SteamUser(userurl="acanceroustwzlr") steamCommand = message.content[7:] if steamCommand == 'level': await client.send_message(message.channel, me.level) elif steamCommand == 'friends': await client.send_message(message.channel, me.friends) elif steamCommand == 'recently_played': await client.send_message(message.channel, me.recently_played) elif steamCommand == 'games': await client.send_message(message.channel, me.games)
from pybaseball import statcast_batter from pybaseball import playerid_lookup from pybaseball import statcast_pitcher hoskins_id = playerid_lookup('hoskins', 'rhys') print(hoskins_id) print("statcast stats from march 1st to april 1st") hoskins_statcast = statcast_batter('2019-03-01', '2019-04-01', 656555) print(hoskins_statcast) """ kersh = playerid_lookup('kershaw', 'clayton') kershaw_stats = statcast_pitcher('2017-06-01', '2017-07-01', kersh) print(kershaw_stats.head(5)) """
def __init__(self, playerid, start="2019-03-01", end="2019-11-03"): # Wrangle the DF player = statcast_batter(start, end, int(playerid)) player = player[~player["events"].isna()] # Drop nas player = player[player["game_type"] == "R"] # Regular Season games only # Add other features player = add_spray_angle(player) player = add_park_factors(player, 2018) player = add_sprint_speed(player, 2018) # Save the DF self.df = player self.model_input = player[['launch_speed', 'launch_angle', 'adj_spray_angle', '1b_park_factor', '2b_park_factor', '3b_park_factor', 'hr_park_factor', 'sprint_speed']] # keep playerid for later self.playerid = playerid # Keep year if consistent if start.split("-")[0] == end.split("-")[0]: self.year = start.split("-")[0] else: self.year = None # Set properties vc = player["events"].value_counts() # If an event hasn't happend, a KeyError will happen if "walk" in vc: self.bb = vc["walk"] else: self.bb = 0 if "single" in vc: self.single = vc["single"] else: self.single = 0 if "double" in vc: self.double = vc["double"] else: self.double = 0 if "triple" in vc: self.triple = vc["triple"] else: self.triple = 0 if "home_run" in vc: self.hr = vc["home_run"] else: self.hr = 0 if "sac_fly" in vc: self.sf = vc["sac_fly"] else: self.sf = 0 if "hit_by_pitch" in vc: self.hbp = vc["hit_by_pitch"] else: self.hbp = 0 # Still need ABs from FG, set later self.fg_ab = None self.fg_woba = None
from typing import Any, Union import pybaseball from pandas import DataFrame, Series from pandas.io.parsers import TextFileReader b = pybaseball.batting() b2 = pybaseball.batting_stats_bref() b3 = pybaseball.bwar_bat() s = pybaseball.statcast_batter(start_dt='2020-7-31', end_dt='2020-08-01', player_id=596019) import numpy as np np.set_printoptions(threshold=np.inf) r: Union[Union[TextFileReader, Series, DataFrame, None], Any] = pybaseball.retrosheet.season_game_logs(2019) r = np.array(r.columns) l = pybaseball.lahman.batting() import pandas as pd path = '/Users/ryangerda/PycharmProjects/DFS_Baseball/Data/playing-2019.csv' p = pd.read_csv(path) p = np.array(p.columns) path = '/Users/ryangerda/PycharmProjects/DFS_Baseball/Data/teams-2019.csv' t = pd.read_csv(path) t = np.array(t.columns)
def get_batting_player( start_dt=None, end_dt=None, player_last='Vogelbach', player_first='Daniel', fname_all=None, fname_bb=None, features=[ 'events', 'description', 'batter', 'stand', 'launch_angle', 'launch_speed', 'hc_x', 'hc_y', 'pitcher', 'p_throws', 'pitch_type', 'release_speed', 'release_spin_rate' ] ): """ Pull player statcast batting data from baseballsavant using pybaseball https://github.com/jldbc/pybaseball https://baseballsavant.mlb.com/statcast_search Arguments start_dt: get data from start_dt forward stop_dt: get data up to stop_dt player_last: player's last name player_first: player's first name fname_all: export csv of all statcast at bat outcomes to this file **must be .csv** fname_bb: export csv of all outcomes with a batted ball to this file **must be .csv** Returns (all_outcomes, batted_balls) tuple of dataframes Saves to files 'fname_all' and 'fname_bb' if fname is not None """ # get player's mlbam_id (mlb advanced metrics id) # note: for players the same first+last name, this will get the # player who entered the league first # need to fix -- for now pick players with unique names # sorry Chris Davis :p player_id = playerid_lookup( player_last, player_first )['key_mlbam'].values[0] # get statcast data (this can take awhile) print('Querying batting stats for {} {}'.format(player_first, player_last)) df = statcast_batter(start_dt, end_dt, player_id) # discard null events all_outcomes = df[df['events'].notnull()] # get the specified features only all_outcomes = all_outcomes[features] if fname_all is not None: # export to csv all_outcomes.to_csv(fname_all, index=False) print('Exported: {}'.format(fname_all)) # get batted balls only batted_balls = filter_batted_balls(all_outcomes) if fname_bb is not None: # export data batted_balls.to_csv(fname_bb, index=False) print('Exported: {}'.format(fname_bb)) return(all_outcomes, batted_balls)
'Split-Finger': 8, 'Sinker': 9 } ''' # For the hitting stats, I only care about non-bunted balls in play, so will filter statcast data based on that ball_in_play = ['hit_into_play', 'hit_into_play_score', 'hit_into_play_no_out'] bb_types = ['ground_ball', 'fly_ball', 'line_drive', 'popup'] # Now, will fill each row w/ the respective statcast data for idx, row in test_df.iterrows(): name = row.Name id = int(row.mlb_id) start = int(row.first_played) end = int(row.last_played) stats = pybaseball.statcast_batter(f'{start}-01-01', f'{end}-12-31', id) # This is where I will filter out all events that do not result in a ball in play stats = stats[(stats.description.isin(ball_in_play))] groundBallRate = len(stats[stats.bb_type == bb_types[0]]) / len(stats) flyBallRate = len(stats[stats.bb_type == bb_types[1]]) / len(stats) lineDriveRate = len(stats[stats.bb_type == bb_types[2]]) / len(stats) popUpRate = len(stats[stats.bb_type == bb_types[3]]) / len(stats) print(groundBallRate, flyBallRate, lineDriveRate, popUpRate) #print(stats.launch_speed) This returns NaNs #print(stats.launch_angle) As does this test_df.at[idx, 'bbType_flyBall'] = flyBallRate test_df.at[idx, 'bbType_groundBall'] = groundBallRate test_df.at[idx, 'bbType_lineDrive'] = lineDriveRate test_df.at[idx, 'bbType_popUp'] = popUpRate