Example #1
0
def process_data(time, firstname, lastname, pos):
    playerid = playerid_lookup(lastname, firstname)
    if "to" in time:
        if pos == 'batter': 
            return statcast_batter(time[0:10], time[14:], player_id=int(playerid['key_mlbam'][0]))
        elif pos == 'pitcher': 
            return statcast_pitcher(time[0:10], time[14:], player_id=int(playerid['key_mlbam'][0]))
        else: 
            return None
    else: 
        if pos == 'batter': 
            return statcast_batter(time, player_id=int(playerid['key_mlbam'][0]))
        elif pos == 'pitcher': 
            return statcast_pitcher(time, player_id=int(playerid['key_mlbam'][0]))
        else: 
            return None
def player(first_name, last_name, start_date, end_date):
    player_info = pybaseball.playerid_lookup(last_name, first_name)
    # if (player_info['mlb_played_last'][0] - player_info['mlb_played_first'][0]) < 10:
    #     start_year = int(player_info['mlb_played_first'][0])
    # else:
    #     start_year = int(player_info['mlb_played_last'][0] - 10)
        
    player_id = player_info['key_mlbam'][0]
    # player_info = [player_id,start_year,str(player_info['mlb_played_last'][0])]
    
    data = pybaseball.statcast_batter(start_dt = start_date, end_dt = end_date, player_id = player_info[0])
    data = data.reset_index(drop = True)
    return data
Example #3
0
 def batter(self, name, team, throws='R'):
     Xcols = ['pfx_x', 'pfx_z', 'release_speed', 'release_spin_rate']
     if throws == 'R':
         scaler = self.scalerR
         kmeans = self.modelR
     else:
         scaler = self.scalerL
         kmeans = self.modelL
     fgh = self.fgh
     player = fgh[(fgh.Name.str.lower() == name.lower())
                  & (fgh.Team.str.lower() == team.lower())].playerid
     pid = int(playerid_reverse_lookup(player, 'fangraphs').key_mlbam)
     bat = statcast_batter(start_dt='2015-03-28',
                           end_dt='2019-09-29',
                           player_id=pid)
     bat.dropna(subset=Xcols, inplace=True)
     bat.reset_index(drop=True, inplace=True)
     bat = bat[bat.p_throws == throws]
     bat['p_type'] = kmeans.predict(scaler.transform(bat[Xcols]))
     batdict = {}
     for i in range(12):
         if throws == 'R':
             if i == 7:
                 batnum = bat[(bat.p_type == 7) | (bat.p_type == 12)]
             elif i == 12:
                 continue
             else:
                 batnum = bat[bat.p_type == i]
         else:
             if i == 0:
                 batnum = bat[(bat.p_type == 0) | (bat.p_type == 4)]
             elif i == 4:
                 continue
             else:
                 batnum = bat[bat.p_type == i]
         batdict[i] = [len(batnum)]
         batdict[i] += [
             round((np.sum(batnum.woba_value) / np.sum(batnum.woba_denom)),
                   3)
         ]
     return pid, batdict
Example #4
0
def data_from_name(last, first, year1=2020, num_years=1):
    years = range(year1, year1 + num_years)
    lookup = pybaseball.playerid_lookup(last, first)
    if len(lookup) > 1:
        print('Multiple players found, determining player by years.')
        lookup['int'] = lookup.apply(lambda row: len(
            set(
                range(int(row['mlb_played_first']), int(row['mlb_played_last'])
                      )) & set(years)),
                                     axis=1)
        lookup = lookup[lookup['int'] == max(lookup['int'])]
    if len(lookup) > 1:
        print('Unable to determine player')
    else:
        mlb_id = int(lookup['key_mlbam'])
        data = pybaseball.statcast_batter(f'{year1}-01-01',
                                          f'{year1+num_years}-01-01', mlb_id)
        data = data[data.apply(
            lambda row: 'hit_into_play' in row['description'], axis=1)]
        data = data[data['events'] != 'home_run']
        data = data.dropna(
            how='any', subset=['launch_angle', 'launch_speed', 'hc_x', 'hc_y'])
        return data
Example #5
0
#get players for both team
players = [("corey", "kluber"), ("yan", "gomes"), ("yonder", "alonso"),
           ("jose", "ramirez"), ("josh", "donaldson"), ("francisco", "lindor"),
           ("melky", "cabrera"), ("jason", "kipnis"), ("michael", "brantley"),
           ("luis", "castillo"), ("tucker", "barnhart"), ("joey", "votto"),
           ("scooter", "gennett"), ("eugenio", "suarez"),
           ("mason", "williams"), ("billy", "hamilton"), ("preston", "tucker"),
           ("jose", "peraza")]

mycursor = mydb.cursor()
#get player data
for player in players:
    id = playerid_lookup(player[1], player[0])
    print(len(id))
    if len(id) == 1:
        stats = statcast_batter('2018-3-29', '2018-10-02',
                                id.key_mlbam.iloc[0])
        hr = 0
        bip = 0
        tot = 0
        for event in stats.events:
            tot = tot + 1
            if event == 'home_run':
                hr = hr + 1
            if type(
                    event
            ) != float and event != "strikeout" and event != "walk" and event != "home_run":
                bip = bip + 1
        sql = "INSERT INTO batters (NAME, HR, BIP) VALUES (%s, %s, %s)"
        val = (f'{player[0]} {player[1]}', hr / tot, bip / tot)
        mycursor.execute(sql, val)
        mydb.commit()
Example #6
0
def retrieve_data():
    """
    Function for retrieving data from Statcast and performing some custom
    formatting
    """
    run_button.label = 'Running...'
    reset_data()
    global p_dict
    global h_dict
    global data
    global data_cds
    global pitch_cds_p
    global pitches_p
    global pitcher_data
    global batter_data
    global sub_batter

    # update plot title
    pitchername = pitcherselect.value.split(' -')[0]
    battername = hitterselect.value.split(' -')[0]
    plot.title.text = f'{pitchername} vs. {battername}'
    pitcher_id = p_dict[pitcherselect.value]
    hitter_id = h_dict[hitterselect.value]
    # all the data for the batter in the time frame
    batter_data_temp = statcast_batter(str(start_date.value),
                                       str(end_date.value), hitter_id)
    batter_data = pitch_info(batter_data_temp)
    # all data for the pitcher in the time frame
    pitcher_data_temp = statcast_pitcher(str(start_date.value),
                                         str(end_date.value), pitcher_id)
    pitcher_data = pitch_info(pitcher_data_temp)
    # filter to only the pitches thrown to selected batter
    data = pitcher_data[pitcher_data['batter'] == hitter_id].copy()
    sub_batter = batter_data[batter_data['pitcher'] == pitcher_id].copy()
    if len(data) == 0:
        warning_txt = 'No matchups in specified time frame'
        warning_label.text = warning_txt
    else:
        warning_label.text = ''
        result = []
        count = []
        for r in data.iterrows():
            # assign event names
            event = results(r[1]['events'], r[1]['description'])
            result.append(event)
            count_str = f"{r[1]['balls']}, {r[1]['strikes']}"
            count.append(count_str)
        data['result'] = result
        data['count'] = count
        # update column data source
        new_data = {
            'pitch': data['pitch_name'],
            'speed': data['release_speed'],
            'result': data['result'],
            'count': data['count'],
            'color': data['color'],
            'plate_x': data['plate_x'],
            'plate_z': data['plate_z']
        }
        data_cds.data = new_data

        # update strike zoe
        new_top = data.sz_top.sum() / len(data.sz_top)
        new_bottom = data.sz_bot.sum() / len(data.sz_bot)
        new_zone = {
            'x': [-8.5 / 12, 8.5 / 12],
            'x_side1': [-8.5 / 12, -8.5 / 12],
            'x_side2': [8.5 / 12, 8.5 / 12],
            'top': [new_top, new_top],
            'bottom': [new_bottom, new_bottom],
            'side1': [new_top, new_bottom],
            'side2': [new_bottom, new_top]
        }
        strike_zone_cds.data = new_zone

        # update pitch plots
        p_unique, p_matchup, p_overall = pitch_frequency(pitcher_data, data)
        pitches_p.x_range.factors = p_unique
        new_data_pitcher = {
            'pitches': p_unique,
            'matchup': p_matchup,
            'overall': p_overall
        }
        pitch_cds_p.data = new_data_pitcher
        b_unique, b_matchup, b_overall = pitch_frequency(
            batter_data, sub_batter)
        pitches_b.x_range.factors = b_unique
        new_data_batter = {
            'pitches': b_unique,
            'matchup': b_matchup,
            'overall': b_overall
        }
        pitch_cds_b.data = new_data_batter
    run_button.label = 'Run'
firstName = input()
print("Enter player's last name: ")
lastName = input()
print("Enter Start Date (YYYY-MM-DD): ")
fromDate = input()
print("Enter End Date (YYYY-MM-DD): ")
toDate = input()
playerBattedBalls = get_player_batted_balls(firstName, lastName, fromDate,
                                            toDate)
playerBattedBalls = playerBattedBalls.reset_index(drop=True)
xBA = playerBattedBalls["estimated_ba_using_speedangle"]

playerIDtable = pybaseball.playerid_lookup(lastName, firstName)
player = playerIDtable.loc[0].key_mlbam

playerBattedBallsMore = pybaseball.statcast_batter(fromDate, toDate, player)

BAlist = []
for x in range(100000):
    hit = 0
    for i in range(0, len(xBA)):
        rand = random.uniform(0, 1)
        if rand < xBA[i]:
            hit = hit + 1
    BA = hit / ((len(xBA) + len(playerBattedBallsMore.events[
        playerBattedBallsMore.events == 'strikeout'])))
    BAlist.append(BA)

hits = len(playerBattedBallsMore.events[
    playerBattedBallsMore.events == 'single']) + len(
        playerBattedBallsMore.events[playerBattedBallsMore.events == 'double']
Example #8
0
async def on_message(message):
    if message.content.startswith('!istimcool'):  # Tim is cool, duh.
        await client.send_message(message.channel, 'Well of course.')

    elif message.content.startswith('!hidrricks'):  # Tim is cool, duh.
        await client.send_message(message.channel,
                                  'Hello! It must be lab day.')

    elif message.content.startswith('!whattimeisit'):  # Returns current time
        await client.send_message(message.channel,
                                  datetime.datetime.now().time())
        await client.send_message(message.channel, 'Central Time')

    elif message.content.startswith('!coinflip'):  # Flips a coin
        resultOfFlip = random.choice(['Heads', 'Tails'])
        await client.send_message(message.channel, resultOfFlip)

    elif message.content.startswith(
            '!addquote'):  # Adds a quote to a running quote file.!
        if not os.path.isfile("quote_file.pk1"):
            quote_list = []
        else:
            with open("quote_file.pk1", "r") as quote_file:
                quote_list = json.load(quote_file)
        quote_list.append(message.content[9:])
        with open("quote_file.pk1", "w") as quote_file:
            json.dump(quote_list, quote_file)

    elif message.content.startswith(
            '!quote'):  # Pulls a random quote from the quote file.
        with open("quote_file.pk1", "r") as quote_file:
            quote_list = json.load(quote_file)
        await client.send_message(message.channel, random.choice(quote_list))

    elif message.content.startswith('!rps'):  # Play rock paper scissors.
        rps = random.choice(["Rock", "Paper", "Scissors"])
        await client.send_message(message.channel, rps)

    elif message.content.startswith('!emoji'):  # Turns to emoji text.
        Message = message.content[7:]

        if Message == 'hello':  # hello
            await client.send_message(
                message.channel,
                ':regional_indicator_h::regional_indicator_e::regional_indicator_l::regional_indicator_l::regional_indicator_o:'
            )
        elif Message == 'liam sucks':  # liam sucks
            await client.send_message(
                message.channel,
                ':regional_indicator_l::regional_indicator_i::regional_indicator_a::regional_indicator_m: :regional_indicator_s::regional_indicator_u::regional_indicator_c::regional_indicator_k::regional_indicator_s:'
            )

    elif message.content.startswith(
            '!stats'):  # Returns batting stats for player
        playerName = message.content[7:]
        print(playerName)

        if playerName == 'Mike Moustakas':
            from pybaseball import statcast_batter
            playerID = 519058
            moustakasStats = statcast_batter('2017-04-01', '2017-09-01',
                                             playerID)
            print('success')
            await client.send_message(message.channel, moustakasStats)

    elif message.content.startswith('!censor'):  # NO SWEARING
        await client.send_message(
            message.channel,
            'http://i0.kym-cdn.com/photos/images/original/001/299/189/6bb.jpg')

    elif message.content.startswith(
            '!weather'):  # Returns condition and temp at location
        location = message.content[9:]

        from weather import Weather
        weather = Weather()
        lookup = weather.lookup_by_location(location)
        condition = lookup.condition().text()
        temp = lookup.condition().temp()
        desc = lookup.description()

        await client.send_message(message.channel, condition)
        await client.send_message(message.channel, temp)
        await client.send_message(message.channel, desc)

    elif message.content.startswith('!steam'):  # Steam info
        steamapi.core.APIConnection(api_key=steamToken, validate_key=True)
        me = steamapi.user.SteamUser(userurl="acanceroustwzlr")

        steamCommand = message.content[7:]

        if steamCommand == 'level':
            await client.send_message(message.channel, me.level)
        elif steamCommand == 'friends':
            await client.send_message(message.channel, me.friends)
        elif steamCommand == 'recently_played':
            await client.send_message(message.channel, me.recently_played)
        elif steamCommand == 'games':
            await client.send_message(message.channel, me.games)
Example #9
0
from pybaseball import statcast_batter
from pybaseball import playerid_lookup
from pybaseball import statcast_pitcher


hoskins_id = playerid_lookup('hoskins', 'rhys')
print(hoskins_id)

print("statcast stats from march 1st to april 1st")
hoskins_statcast = statcast_batter('2019-03-01', '2019-04-01', 656555)

print(hoskins_statcast)


"""
kersh = playerid_lookup('kershaw', 'clayton')

kershaw_stats = statcast_pitcher('2017-06-01', '2017-07-01', kersh)

print(kershaw_stats.head(5))
"""
    def __init__(self, playerid, start="2019-03-01", end="2019-11-03"):

        # Wrangle the DF
        player = statcast_batter(start, end, int(playerid))
        player = player[~player["events"].isna()]  # Drop nas
        player = player[player["game_type"] == "R"]  # Regular Season games only

        # Add other features
        player = add_spray_angle(player)
        player = add_park_factors(player, 2018)
        player = add_sprint_speed(player, 2018)

        # Save the DF
        self.df = player
        self.model_input = player[['launch_speed',
                                   'launch_angle',
                                   'adj_spray_angle',
                                   '1b_park_factor',
                                   '2b_park_factor',
                                   '3b_park_factor',
                                   'hr_park_factor',
                                   'sprint_speed']]

        # keep playerid for later
        self.playerid = playerid

        # Keep year if consistent
        if start.split("-")[0] == end.split("-")[0]:
            self.year = start.split("-")[0]
        else:
            self.year = None

        # Set properties
        vc = player["events"].value_counts()

        # If an event hasn't happend, a KeyError will happen
        if "walk" in vc:
            self.bb = vc["walk"]
        else:
            self.bb = 0
        if "single" in vc:
            self.single = vc["single"]
        else:
            self.single = 0
        if "double" in vc:
            self.double = vc["double"]
        else:
            self.double = 0
        if "triple" in vc:
            self.triple = vc["triple"]
        else:
            self.triple = 0
        if "home_run" in vc:
            self.hr = vc["home_run"]
        else:
            self.hr = 0
        if "sac_fly" in vc:
            self.sf = vc["sac_fly"]
        else:
            self.sf = 0
        if "hit_by_pitch" in vc:
            self.hbp = vc["hit_by_pitch"]
        else:
            self.hbp = 0

        # Still need ABs from FG, set later
        self.fg_ab = None
        self.fg_woba = None
Example #11
0
from typing import Any, Union

import pybaseball
from pandas import DataFrame, Series
from pandas.io.parsers import TextFileReader

b = pybaseball.batting()
b2 = pybaseball.batting_stats_bref()
b3 = pybaseball.bwar_bat()

s = pybaseball.statcast_batter(start_dt='2020-7-31',
                               end_dt='2020-08-01',
                               player_id=596019)

import numpy as np
np.set_printoptions(threshold=np.inf)
r: Union[Union[TextFileReader, Series, DataFrame, None],
         Any] = pybaseball.retrosheet.season_game_logs(2019)
r = np.array(r.columns)

l = pybaseball.lahman.batting()

import pandas as pd
path = '/Users/ryangerda/PycharmProjects/DFS_Baseball/Data/playing-2019.csv'
p = pd.read_csv(path)
p = np.array(p.columns)

path = '/Users/ryangerda/PycharmProjects/DFS_Baseball/Data/teams-2019.csv'
t = pd.read_csv(path)
t = np.array(t.columns)
Example #12
0
def get_batting_player(
    start_dt=None, end_dt=None, player_last='Vogelbach', player_first='Daniel',
    fname_all=None, fname_bb=None, features=[
        'events', 'description', 'batter', 'stand', 'launch_angle',
        'launch_speed', 'hc_x', 'hc_y', 'pitcher', 'p_throws', 'pitch_type',
        'release_speed', 'release_spin_rate'
    ]
):

    """
    Pull player statcast batting data from baseballsavant using pybaseball
    https://github.com/jldbc/pybaseball
    https://baseballsavant.mlb.com/statcast_search

    Arguments
        start_dt: get data from start_dt forward
        stop_dt:  get data up to stop_dt
        player_last: player's last name
        player_first: player's first name
        fname_all: export csv of all statcast at bat outcomes to this file
            **must be .csv**
        fname_bb: export csv of all outcomes with a batted ball to this file
            **must be .csv**

    Returns
        (all_outcomes, batted_balls) tuple of dataframes
            Saves to files 'fname_all' and 'fname_bb' if fname is not None
    """

    # get player's mlbam_id (mlb advanced metrics id)
    # note: for players the same first+last name, this will get the
    # player who entered the league first
    # need to fix -- for now pick players with unique names
    # sorry Chris Davis :p
    player_id = playerid_lookup(
        player_last, player_first
    )['key_mlbam'].values[0]

    # get statcast data (this can take awhile)
    print('Querying batting stats for {} {}'.format(player_first, player_last))
    df = statcast_batter(start_dt, end_dt, player_id)
    # discard null events
    all_outcomes = df[df['events'].notnull()]

    # get the specified features only
    all_outcomes = all_outcomes[features]

    if fname_all is not None:
        # export to csv
        all_outcomes.to_csv(fname_all, index=False)
        print('Exported: {}'.format(fname_all))

    # get batted balls only
    batted_balls = filter_batted_balls(all_outcomes)

    if fname_bb is not None:
        # export data
        batted_balls.to_csv(fname_bb, index=False)
        print('Exported: {}'.format(fname_bb))

    return(all_outcomes, batted_balls)
Example #13
0
    'Split-Finger': 8,
    'Sinker': 9
}
'''
# For the hitting stats, I only care about non-bunted balls in play, so will filter statcast data based on that
ball_in_play = ['hit_into_play', 'hit_into_play_score', 'hit_into_play_no_out']

bb_types = ['ground_ball', 'fly_ball', 'line_drive', 'popup']

# Now, will fill each row w/ the respective statcast data
for idx, row in test_df.iterrows():
    name = row.Name
    id = int(row.mlb_id)
    start = int(row.first_played)
    end = int(row.last_played)
    stats = pybaseball.statcast_batter(f'{start}-01-01', f'{end}-12-31', id)
    # This is where I will filter out all events that do not result in a ball in play
    stats = stats[(stats.description.isin(ball_in_play))]

    groundBallRate = len(stats[stats.bb_type == bb_types[0]]) / len(stats)
    flyBallRate = len(stats[stats.bb_type == bb_types[1]]) / len(stats)
    lineDriveRate = len(stats[stats.bb_type == bb_types[2]]) / len(stats)
    popUpRate = len(stats[stats.bb_type == bb_types[3]]) / len(stats)
    print(groundBallRate, flyBallRate, lineDriveRate, popUpRate)
    #print(stats.launch_speed) This returns NaNs
    #print(stats.launch_angle) As does this

    test_df.at[idx, 'bbType_flyBall'] = flyBallRate
    test_df.at[idx, 'bbType_groundBall'] = groundBallRate
    test_df.at[idx, 'bbType_lineDrive'] = lineDriveRate
    test_df.at[idx, 'bbType_popUp'] = popUpRate