Esempio n. 1
0
    def hit_odds(self, batter, pitcher, bt, pt, xbo):
        if type(batter) is list:
            try:
                batter_id = playerid_lookup(batter[1], batter[0])
                if len(batter_id) > 1:
                    print(
                        "Choose the index corresponding to the desired player:"
                    )
                    print(batter_id)
                    desired_index = input("Index: ")
                    batter_id = batter_id.loc[int(desired_index), 'key_mlbam']
                else:
                    batter_id = batter_id.loc[0, 'key_mlbam']
            except:
                print("Invalid batter name.")
                return
        else:
            batter_id = batter

        if type(pitcher) is list:
            try:
                pitcher_id = playerid_lookup(pitcher[1],
                                             pitcher[0]).loc[0, 'key_mlbam']
            except:
                print("Invalid pitcher name.")
                return
        else:
            pitcher_id = pitcher

        if type(xbo) is list:
            hit_options = {}
            for i in xbo:
                hitting = self.hit_odds_work(
                    batter_id, pitcher_id, pt, i,
                    self.mlb_games.query("Team == @bt").reset_index().loc[
                        0, 'moneyline'],
                    self.mlb_games.query("Team == @bt").reset_index().loc[
                        0, 'total'])
                hit_options[i] = hitting
            return hit_options

        else:
            hitting = self.hit_odds_work(
                batter_id, pitcher_id, pt, xbo,
                self.mlb_games.query("Team == @bt").reset_index().loc[
                    0, 'moneyline'],
                self.mlb_games.query("Team == @bt").reset_index().loc[0,
                                                                      'total'])
            return hitting
Esempio n. 2
0
def get_mlbam():

    #from pybaseball import playerid_lookup
    name_dict = {}
    count = 0
    for name in names:
        if (count <= len(names)):
            LastName = (name.split()[1])
            LastName = LastName.lower()
            FirstName = (name.split()[0])
            FirstName = FirstName.lower()
            #print(LastName)
            #print(FirstName)

            # J.A. Happ x
            # Ronald Acuna Jr. => Ronald Acuna
            # Henderson Alvarez III => Henderson Alvarez
            temp_id = playerid_lookup(LastName,
                                      FirstName)  #(LastName,FirstName)
            #print(len(temp_id))
            if (len(temp_id) == 1):
                name_dict[name] = int(temp_id['key_mlbam'])
            else:
                print(name, "Not added")
            count += 1

    #print(name_dict)
    df = pd.DataFrame.from_dict(name_dict, orient='index', columns=['mlbam'])
    df.reset_index(level=0, inplace=True)
    df.rename(index=str, columns={"index": "name"}, inplace=True)
    print(df)
    df.to_csv(
        r'/Users/blaisepage/Documents/CUBoulder/Sabermetrics/export_dataframe.csv'
    )
Esempio n. 3
0
def get_data(first_name, last_name, start_date, end_date):
    try:
        key = pb.playerid_lookup(
            last_name,
            first_name)["key_mlbam"].values[0]  # get unique pitcher identifier
    except:
        pass

    data = pb.statcast_pitcher(start_date, end_date,
                               key)  # get dataset of pitches thrown by pitcher
    data = data.sort_values(["pitch_number"
                             ])  # sort pitches by order thrown, earliest first
    data = data.dropna(subset=[
        "pitch_type", "des", "description", "release_spin_rate"
    ])  # make sure dataset does not contain nulls

    data["order"] = data.reset_index(
    ).index  # create new column with pitch order

    df = pd.DataFrame(data)

    df = df.rename(
        {
            "des": "Play by Play",
            "description": "Result of Pitch",
            "order": "Pitch Number",
            "pitch_name": "Pitch Type",
            "release_speed": "Pitch Speed",
        },
        axis=1,
    )

    return df
Esempio n. 4
0
def get_atbats(first, last):

    # Lookup player
    player_info = playerid_lookup(last, first)
    player_id = player_info["key_mlbam"].iloc[0]  # assume only one line
    start_year = int(player_info["mlb_played_first"].iloc[0])
    end_year = int(player_info["mlb_played_last"].iloc[0])
    # ignore this year
    if end_year == 2019:
        end_year = 2018

    # Get all the stats
    start_date = "{0}-01-01".format(start_year)
    end_date = "{0}-12-31".format(end_year)
    print("Scraping from {0} to {1}".format(start_date, end_date))
    d_all_stats = statcast_pitcher(start_date, end_date, player_id)
    d_features = d_all_stats[features]

    # Iterate over strikeout rows, build into AtBat Objects
    strikeout_rows = d_all_stats.index[d_all_stats["events"] ==
                                       "strikeout"].to_list()
    at_bats, ab_arrays = [], []
    for row in strikeout_rows:
        this_ab = AtBat(d_features, row)
        at_bats.append(this_ab)
        ab_arrays.append(this_ab.np)

    return at_bats, ab_arrays
Esempio n. 5
0
def get_number(last, first):
    playerTable = playerid_lookup(last, first)
    playerTable = playerTable.loc[playerTable['mlb_played_last'].isin([2019])]
    playerTable.index = range(len(playerTable['mlb_played_last']))
    number = playerTable['key_mlbam']
    number = number[0]
    return number
Esempio n. 6
0
def filter_batting_player(league_data='all_outcomes_2018.csv',
                          player_last='Vogelbach', player_first='Daniel',
                          fname=None):

    """
    This function should be used *after* getting league-wide data
    with get_league_batting_data(). If you already have league-wide data, this
    is faster than calling get_batting_player() since it doesn't query statcast
    data again

    Arguments
        league_data: any .csv file returned from get_league_batting_data
        player_last: last name of player
        player_first: first name of player
        fname: name of .csv file to export
            defaults to [player_last]_[player_first].csv
    Returns
        dataframe
            Saves to file 'fname' if fname is not None
    """
    # get player's mlbam_id (mlb advanced metrics id)
    # note: for players the same first+last name, this will get the
    # player who entered the league first
    # need to fix -- for now pick players with unique names, sorry Chris Davis
    mlbam_id = playerid_lookup(
        player_last, player_first
    )['key_mlbam'].values[0]

    league_df = pd.read_csv(league_data)
    player_df = league_df[league_df['batter'] == mlbam_id]
    player_df.reset_index(inplace=True, drop=True)
    if fname is not None:
        player_df.to_csv(fname, index=False)
    return(player_df)
Esempio n. 7
0
def get_player_batted_balls(playerFirst, playerLast, dateGT, dateLT):

    results = []
    #all batted balls for a single player
    url = 'https://baseballsavant.mlb.com/statcast_search/csv?all=true&hfPT=&hfAB=single%7Cdouble%7Ctriple%7Chome%5C.%5C.run%7Cfield%5C.%5C.out%7Cdouble%5C.%5C.play%7Cfield%5C.%5C.error%7Cgrounded%5C.%5C.into%5C.%5C.double%5C.%5C.play%7Cfielders%5C.%5C.choice%7Cfielders%5C.%5C.choice%5C.%5C.out%7Cforce%5C.%5C.out%7Csac%5C.%5C.bunt%7Csac%5C.%5C.bunt%5C.%5C.double%5C.%5C.play%7Csac%5C.%5C.fly%7Csac%5C.%5C.fly%5C.%5C.double%5C.%5C.play%7Ctriple%5C.%5C.play%7C&hfBBT=&hfPR=&hfZ=&stadium=&hfBBL=&hfNewZones=&hfGT=R%7C&hfC=&hfSea=&hfSit=&player_type=batter&hfOuts=&opponent=&pitcher_throws=&batter_stands=&hfSA=&game_date_gt={}&game_date_lt={}&hfInfield=&team=&position=&hfOutfield=&hfRO=&home_road=&batters_lookup%5B%5D={}&hfFlag=&hfPull=&metric_1=&hfInn=&min_pitches=0&min_results=0&group_by=name-event&sort_col=pitches&player_event_sort=h_launch_speed&sort_order=desc&min_pas=0&type=details&'

    #convert playername into Savant PlayerID for search
    playerIDtable = pybaseball.playerid_lookup(playerLast, playerFirst)
    player = playerIDtable.loc[0].key_mlbam

    #convert year, month, and day into individual integers
    monthGT = int(dateGT[5:7])
    monthLT = int(dateLT[5:7])
    yearGT = int(dateGT[0:4])
    yearLT = int(dateLT[0:4])
    dayGT = int(dateGT[8:10])
    dayLT = int(dateLT[8:10])

    #if spanning more than a month get needed days of first month, then months in between, then needed days of last month
    if monthLT > monthGT:
        dateGT = datetime.date(yearGT, monthGT, dayGT)
        dateGT = dateGT.strftime("%Y-%m-%d")
        dateLT = datetime.date(yearGT, monthGT + 1,
                               1) - datetime.timedelta(days=1)
        dateLT = dateLT.strftime("%Y-%m-%d")
        data = requests.get(url.format(dateGT, dateLT, player)).content
        df = pd.read_csv(io.StringIO(data.decode('utf-8')))
        results.append(df)
        if ((monthLT - monthGT) > 1):
            for month in range((monthGT + 1), monthLT):
                dateGT = datetime.date(yearGT, month, 1)
                dateGT = dateGT.strftime("%Y-%m-%d")
                dateLT = datetime.date(yearGT, month + 1,
                                       1) - datetime.timedelta(days=1)
                dateLT = dateLT.strftime("%Y-%m-%d")
                data = requests.get(url.format(dateGT, dateLT, player)).content
                df = pd.read_csv(io.StringIO(data.decode('utf-8')))
                results.append(df)
        dateGT = datetime.date(yearGT, monthLT, 1)
        dateGT = dateGT.strftime("%Y-%m-%d")
        dateLT = datetime.date(yearGT, monthLT, dayLT)
        dateLT = dateLT.strftime("%Y-%m-%d")
        data = requests.get(url.format(dateGT, dateLT, player)).content
        df = pd.read_csv(io.StringIO(data.decode('utf-8')))
        results.append(df)

    #if months equal just grab the days needed
    if monthLT == monthGT:
        dateGT = datetime.date(yearGT, monthGT, dayGT)
        dateGT = dateGT.strftime("%Y-%m-%d")
        dateLT = datetime.date(yearGT, monthLT, dayLT)
        dateLT = dateLT.strftime("%Y-%m-%d")
        data = requests.get(url.format(dateGT, dateLT, player)).content
        df = pd.read_csv(io.StringIO(data.decode('utf-8')))
        results.append(df)

    return pd.concat(results)
Esempio n. 8
0
def collect_statcast(sample_size, target, features, pitcher_names):
    """Scrapes the Statcast data for each pithcer based on specified criteria; see arguments. 
    
    Arguments:
        sample_size {int} -- the number of pitches to collect for each pithcer
        target {list} -- a list containing the categories desired in the resulting pitch
        features {list} -- a list containing the desired features to keep for the resulting data.
        pitcher_names {list} -- the list of pitcher names from the read_pitchers function.
    
    Returns:
        pandas dataframe -- a pandas dataframe where each row is a single pitch for a particular pitcher
        and each column is a specified feature in the 'features' argument. 
    """

    #loop through all the names
    print('Begin scraping \n')

    final_data = pd.DataFrame(columns=features)

    for i, pitcher in enumerate(pitcher_names):
        if len(pitcher) == 2:
            fname, lname = pitcher[0], pitcher[1]
        elif len(pitcher) >= 3:
            fname, lname = pitcher[0], " ".join(pitcher[1:])
        else:
            pass

        print(
            f'\n Pitcher Name: {fname} {lname}, #: {i+1}/{len(pitcher_names)}  \n'
        )
        #grap the unique identifier of the pitcher
        player = playerid_lookup(lname, fname)

        #to avoid any possible errors, execute following try statement:
        # grab the unique identifier value
        # get all available data in time frame
        # filter data to only have appropriate targets, defined above
        # append particular pitcher to 'master' dataframe
        #if any of these steps fail, particularly the grabbing of 'ID'
        #pass on to next pitcher
        try:
            ID = player['key_mlbam'].iloc[player['key_mlbam'].argmax()]
            df = statcast_pitcher('2018-03-29', '2018-09-30', player_id=ID)
            df = df[df['description'].isin(target)].sample(sample_size,
                                                           random_state=2019)
            final_data = final_data.append(df[features], ignore_index=True)

        except ValueError:
            pass

    print('Finsihed Scraping')
    return final_data
def player(first_name, last_name, start_date, end_date):
    player_info = pybaseball.playerid_lookup(last_name, first_name)
    # if (player_info['mlb_played_last'][0] - player_info['mlb_played_first'][0]) < 10:
    #     start_year = int(player_info['mlb_played_first'][0])
    # else:
    #     start_year = int(player_info['mlb_played_last'][0] - 10)
        
    player_id = player_info['key_mlbam'][0]
    # player_info = [player_id,start_year,str(player_info['mlb_played_last'][0])]
    
    data = pybaseball.statcast_batter(start_dt = start_date, end_dt = end_date, player_id = player_info[0])
    data = data.reset_index(drop = True)
    return data
Esempio n. 10
0
def process_data(time, firstname, lastname, pos):
    playerid = playerid_lookup(lastname, firstname)
    if "to" in time:
        if pos == 'batter': 
            return statcast_batter(time[0:10], time[14:], player_id=int(playerid['key_mlbam'][0]))
        elif pos == 'pitcher': 
            return statcast_pitcher(time[0:10], time[14:], player_id=int(playerid['key_mlbam'][0]))
        else: 
            return None
    else: 
        if pos == 'batter': 
            return statcast_batter(time, player_id=int(playerid['key_mlbam'][0]))
        elif pos == 'pitcher': 
            return statcast_pitcher(time, player_id=int(playerid['key_mlbam'][0]))
        else: 
            return None
Esempio n. 11
0
def getplayer(first, last):
    """
    function for getting a list of players to select from
    """
    if first == '':
        first = None
    players = playerid_lookup(last, first)
    players = players[players.mlb_played_first.notnull()]
    # TODO: figure out how to deal with players with the same name
    temp_dict = {}
    for r in players.iterrows():
        first = r[1]['name_first']
        last = r[1]['name_last']
        sid = r[1]['key_mlbam']  # statcast ID
        full = f'{first} {last} - {sid}'.title()
        temp_dict[full] = sid
    return temp_dict
Esempio n. 12
0
def get_mlbam_id(player_dict):
    if not player_dict["is_mlb"]:
        return None

    player_name = clean_name(player_dict["Player Name"])
    first_name, last_name = player_name.split(maxsplit=1)

    if "." in first_name:
        # lookup has "A.J." as "A. J." for some reason
        first_name = first_name.replace(".", ". ").strip()
    id_lookup = playerid_lookup(last_name, first_name)
    if id_lookup.shape[0] > 1:
        mlbam_id = id_lookup.loc[id_lookup.mlb_played_last == id_lookup.
                                 mlb_played_last.max()].key_mlbam.values[0]
    elif id_lookup.shape[0] == 1:
        mlbam_id = id_lookup.key_mlbam.values[0]
    else:
        # how could this happen?
        mlbam_id = None
    return mlbam_id
Esempio n. 13
0
 def getPlayerIDs(players):
     listOfIDs = []
     for i in range (0, len(players)):
         splitPlayer = players[i].split(' ')
         first = splitPlayer[0]
         last = splitPlayer[1]                
         ident = playerid_lookup(last,first)
         identList = ident['key_bbref'].tolist()
         counter = len(identList) - 1
         
         if len(identList) > 0:    
             while str(identList[counter]) == 'nan' and counter > -1:
                 counter -= 1
             finalPlayerID = identList[counter]
             listOfIDs.append(finalPlayerID)
         else:
             listOfIDs.append("PLAYER NOT FOUND")
         
         print('added player ' + str(i))
         print(listOfIDs)
Esempio n. 14
0
def get_fangraphs_id(player_dict):
    """Figure out a way to refactor this and get_mlbam_id()"""
    if not player_dict["is_mlb"]:
        return None

    player_name = clean_name(player_dict["Player Name"])
    first_name, last_name = player_name.split(maxsplit=1)

    if "." in first_name:
        # lookup has "A.J." as "A. J." for some reason
        first_name = first_name.replace(".", ". ").strip()
    id_lookup = playerid_lookup(last_name, first_name)
    if id_lookup.shape[0] > 1:
        fangraphs_id = id_lookup.loc[
            id_lookup.mlb_played_last ==
            id_lookup.mlb_played_last.max()].key_fangraphs.values[0]
    elif id_lookup.shape[0] == 1:
        fangraphs_id = id_lookup.key_fangraphs.values[0]
    else:
        # how could this happen?
        fangraphs_id = None
    return fangraphs_id
Esempio n. 15
0
def data_from_name(last, first, year1=2020, num_years=1):
    years = range(year1, year1 + num_years)
    lookup = pybaseball.playerid_lookup(last, first)
    if len(lookup) > 1:
        print('Multiple players found, determining player by years.')
        lookup['int'] = lookup.apply(lambda row: len(
            set(
                range(int(row['mlb_played_first']), int(row['mlb_played_last'])
                      )) & set(years)),
                                     axis=1)
        lookup = lookup[lookup['int'] == max(lookup['int'])]
    if len(lookup) > 1:
        print('Unable to determine player')
    else:
        mlb_id = int(lookup['key_mlbam'])
        data = pybaseball.statcast_batter(f'{year1}-01-01',
                                          f'{year1+num_years}-01-01', mlb_id)
        data = data[data.apply(
            lambda row: 'hit_into_play' in row['description'], axis=1)]
        data = data[data['events'] != 'home_run']
        data = data.dropna(
            how='any', subset=['launch_angle', 'launch_speed', 'hc_x', 'hc_y'])
        return data
Esempio n. 16
0
# https://pypi.org/project/baseball-scraper/
# https://pypi.org/project/vigorish/
# Useful source to compare packages:
# https://snyk.io/advisor/python

############## Statcast data ##############
data = statcast(start_dt='2021-04-01', end_dt='2021-04-02', team='SEA')
data.loc[data.game_date == '2021-04-01', :]
data.head()

############## Team Crosswalk #############
team_cross = teams()
team_cross = team_ids()

############# Player Crosswalk ############
player = playerid_lookup('Sheffield', 'Justus')

############## Date Range #################
dt_lst = pd.date_range(start='2021-04-01', end='2021-05-31', freq='D')
dt_range = []
for i in dt_lst:
    dt_range.append(i.strftime('%Y-%m-%d'))

############### Pitcher Profile ##############
# Game by Game
# TODO remove data.GS filter if want to look at all pitchers ; for now just looking at starters
table_lst = []
for x in dt_range:
    data = pitching_stats_range(start_dt=x, end_dt=x)
    data = data.loc[data.GS == 1, :]
    table_lst.append(data)
    def get_data(first_name, last_name):

        train_filename = 'Data/' + str(last_name) + "_" + str(
            first_name) + "_train.csv"
        test_filename = 'Data/' + str(last_name) + "_" + str(
            first_name) + "_test.csv"

        if os.path.isfile(train_filename) and os.path.isfile(
                test_filename):  #If we've already gotten the data, read it in
            train_data = pd.read_csv(train_filename)
            test_data = pd.read_csv(test_filename)
        else:
            #If we haven't, get it off the web and store it for future runs
            #training is done on data from 2015 through 2017
            train_data = statcast_pitcher(
                start_dt='2015-01-01',
                end_dt='2017-12-31',
                player_id=int(playerid_lookup('sale', 'chris')['key_mlbam']))
            train_data.to_csv(train_filename)
            #testing is done on data from the beginning of 2018 to present
            test_data = statcast_pitcher(
                start_dt='2018-01-01',
                end_dt='2019-12-31',
                player_id=int(playerid_lookup('sale', 'chris')['key_mlbam']))
            test_data.to_csv(test_filename)

        #Get all of the pitch types that a pitcher throws, then encode them using our system
        train_data = train_data[train_data['pitch_type'].isin(pitcher_pitches)]
        train_data = train_data.dropna(subset=['pitch_type'])
        train_data['pitch_code'] = train_data.apply(
            lambda row: get_pitch_code(row, pitcher_pitches), axis=1)

        #Do the same as above but for the testing data in case they added a new pitch
        test_data = test_data[test_data['pitch_type'].isin(pitcher_pitches)]
        test_data = test_data.dropna(subset=['pitch_type'])

        #Encode all the pitch type/location info to a unique int
        test_data['pitch_code'] = test_data.apply(
            lambda row: get_pitch_code(row, pitcher_pitches), axis=1)
        train_data = get_prev_pitch(train_data)
        test_data = get_prev_pitch(test_data)

        #Fills the Na values, turns the batter ID for the player on base into a bool value
        train_data['on_3b'] = train_data['on_3b'].fillna(
            value=0).astype(bool).astype(int)
        train_data['on_2b'] = train_data['on_2b'].fillna(
            value=0).astype(bool).astype(int)
        train_data['on_1b'] = train_data['on_1b'].fillna(
            value=0).astype(bool).astype(int)

        test_data['on_3b'] = test_data['on_3b'].fillna(
            value=0).astype(bool).astype(int)
        test_data['on_2b'] = test_data['on_2b'].fillna(
            value=0).astype(bool).astype(int)
        test_data['on_1b'] = test_data['on_1b'].fillna(
            value=0).astype(bool).astype(int)

        #Get the data we need and drop any null values (which is why it double selects)
        train_data_input = train_data[[
            'prev_pitch_3', 'prev_pitch_2', 'prev_pitch_1', 'balls', 'strikes',
            'stand', 'on_3b', 'on_2b', 'on_1b', 'outs_when_up', 'pitch_number',
            'pitch_code'
        ]].dropna()
        train_data_result = train_data_input[['pitch_code']]
        train_data_input = train_data_input[[
            'prev_pitch_3', 'prev_pitch_2', 'prev_pitch_1', 'balls', 'strikes',
            'stand', 'on_3b', 'on_2b', 'on_1b', 'outs_when_up', 'pitch_number'
        ]]

        test_data = test_data[[
            'prev_pitch_3', 'prev_pitch_2', 'prev_pitch_1', 'balls', 'strikes',
            'stand', 'on_3b', 'on_2b', 'on_1b', 'outs_when_up', 'pitch_number',
            'pitch_code'
        ]].dropna()

        return train_data_input, train_data_result, test_data
Esempio n. 18
0
def get_batting_player(
    start_dt=None, end_dt=None, player_last='Vogelbach', player_first='Daniel',
    fname_all=None, fname_bb=None, features=[
        'events', 'description', 'batter', 'stand', 'launch_angle',
        'launch_speed', 'hc_x', 'hc_y', 'pitcher', 'p_throws', 'pitch_type',
        'release_speed', 'release_spin_rate'
    ]
):

    """
    Pull player statcast batting data from baseballsavant using pybaseball
    https://github.com/jldbc/pybaseball
    https://baseballsavant.mlb.com/statcast_search

    Arguments
        start_dt: get data from start_dt forward
        stop_dt:  get data up to stop_dt
        player_last: player's last name
        player_first: player's first name
        fname_all: export csv of all statcast at bat outcomes to this file
            **must be .csv**
        fname_bb: export csv of all outcomes with a batted ball to this file
            **must be .csv**

    Returns
        (all_outcomes, batted_balls) tuple of dataframes
            Saves to files 'fname_all' and 'fname_bb' if fname is not None
    """

    # get player's mlbam_id (mlb advanced metrics id)
    # note: for players the same first+last name, this will get the
    # player who entered the league first
    # need to fix -- for now pick players with unique names
    # sorry Chris Davis :p
    player_id = playerid_lookup(
        player_last, player_first
    )['key_mlbam'].values[0]

    # get statcast data (this can take awhile)
    print('Querying batting stats for {} {}'.format(player_first, player_last))
    df = statcast_batter(start_dt, end_dt, player_id)
    # discard null events
    all_outcomes = df[df['events'].notnull()]

    # get the specified features only
    all_outcomes = all_outcomes[features]

    if fname_all is not None:
        # export to csv
        all_outcomes.to_csv(fname_all, index=False)
        print('Exported: {}'.format(fname_all))

    # get batted balls only
    batted_balls = filter_batted_balls(all_outcomes)

    if fname_bb is not None:
        # export data
        batted_balls.to_csv(fname_bb, index=False)
        print('Exported: {}'.format(fname_bb))

    return(all_outcomes, batted_balls)
Esempio n. 19
0
df = pd.read_csv('gather_ids.csv')

df_noIDs = df[df['mlb_id'].isnull()]
df_noIDs = df_noIDs[['First','Last']]
df_noIDs.to_csv('df_noIDs.csv')
'''
# Loading the hitter dataframe; will ammend each individual here and then re-save and overwrite the loaded file
df_hitters = pd.read_csv('./data/gather_ids.csv')

'''
I have loaded each individual player missing IDs below,
commenting each of their names out and I will go through them one-by-one and update their IDs...
'''

#16,J.P.,Arencibia
jp_arencibia = pybaseball.playerid_lookup(last='arencibia')
df_hitters['mlb_id'] = np.where( ((df_hitters.First == 'J.P.') & (df_hitters.Last == 'Arencibia')), int(jp_arencibia['key_mlbam'].item()), df_hitters['mlb_id'] )
df_hitters['retro_id'] = np.where( ((df_hitters.First == 'J.P.') & (df_hitters.Last == 'Arencibia')), str(jp_arencibia['key_retro'].item()), df_hitters['retro_id'] )
df_hitters['bbref_id'] = np.where( ((df_hitters.First == 'J.P.') & (df_hitters.Last == 'Arencibia')), str(jp_arencibia['key_bbref'].item()), df_hitters['bbref_id'] )
df_hitters['fangraphs_id'] = np.where( ((df_hitters.First == 'J.P.') & (df_hitters.Last == 'Arencibia')), int(jp_arencibia['key_fangraphs'].item()), df_hitters['fangraphs_id'] )
df_hitters['first_played'] = np.where( ((df_hitters.First == 'J.P.') & (df_hitters.Last == 'Arencibia')), int(jp_arencibia['mlb_played_first'].item()), df_hitters['first_played'] )
df_hitters['last_played'] = np.where( ((df_hitters.First == 'J.P.') & (df_hitters.Last == 'Arencibia')), int(jp_arencibia['mlb_played_last'].item()), df_hitters['last_played'] )
print(df_hitters[df_hitters['Last'] == 'Arencibia'])
#28,Jose,Bautista
jose_bautista = pybaseball.playerid_lookup(last='bautista', first='jose').iloc[1]
df_hitters['mlb_id'] = np.where( ((df_hitters.First == 'Jose') & (df_hitters.Last == 'Bautista')), int(jose_bautista['key_mlbam'].item()), df_hitters['mlb_id'] )
df_hitters['retro_id'] = np.where( ((df_hitters.First == 'Jose') & (df_hitters.Last == 'Bautista')), str(jose_bautista['key_retro']), df_hitters['retro_id'] )
df_hitters['bbref_id'] = np.where( ((df_hitters.First == 'Jose') & (df_hitters.Last == 'Bautista')), str(jose_bautista['key_bbref']), df_hitters['bbref_id'] )
df_hitters['fangraphs_id'] = np.where( ((df_hitters.First == 'Jose') & (df_hitters.Last == 'Bautista')), int(jose_bautista['key_fangraphs'].item()), df_hitters['fangraphs_id'] )
df_hitters['first_played'] = np.where( ((df_hitters.First == 'Jose') & (df_hitters.Last == 'Bautista')), int(jose_bautista['mlb_played_first'].item()), df_hitters['first_played'] )
df_hitters['last_played'] = np.where( ((df_hitters.First == 'Jose') & (df_hitters.Last == 'Bautista')), int(jose_bautista['mlb_played_last'].item()), df_hitters['last_played'] )
Esempio n. 20
0

print("Enter player's first name: ")
firstName = input()
print("Enter player's last name: ")
lastName = input()
print("Enter Start Date (YYYY-MM-DD): ")
fromDate = input()
print("Enter End Date (YYYY-MM-DD): ")
toDate = input()
playerBattedBalls = get_player_batted_balls(firstName, lastName, fromDate,
                                            toDate)
playerBattedBalls = playerBattedBalls.reset_index(drop=True)
xBA = playerBattedBalls["estimated_ba_using_speedangle"]

playerIDtable = pybaseball.playerid_lookup(lastName, firstName)
player = playerIDtable.loc[0].key_mlbam

playerBattedBallsMore = pybaseball.statcast_batter(fromDate, toDate, player)

BAlist = []
for x in range(100000):
    hit = 0
    for i in range(0, len(xBA)):
        rand = random.uniform(0, 1)
        if rand < xBA[i]:
            hit = hit + 1
    BA = hit / ((len(xBA) + len(playerBattedBallsMore.events[
        playerBattedBallsMore.events == 'strikeout'])))
    BAlist.append(BA)
Esempio n. 21
0
def main():
    league_id = "953"  # put this in env
    base_url = "https://ottoneu.fangraphs.com"
    current_year = 2020
    auction_url = f"{base_url}/{league_id}/transactions"
    # for testing
    auction_url = "https://ottoneu.fangraphs.com/953/transactions?filters%5B%5D=cut&filters%5B%5D=increase"
    resp = requests.get(auction_url)
    soup = BeautifulSoup(resp.content, "html.parser")
    table = soup.find("table")
    thead = [th.get_text() for th in table.find("thead").find_all("th")]

    auction_players = list()
    for tr in tqdm(table.find("tbody").find_all("tr")):
        player_data = [td.get_text().strip() for td in tr.find_all("td")]
        player_page_url = [
            a["href"] for a in tr.find_all("a") if "playercard" in a["href"]
        ].pop()
        player_dict = dict(zip(thead, player_data))
        if player_dict["Transaction Type"] != "add":
            continue
        player_dict["ottoneu_id"] = player_page_url.rsplit("=")[1]
        player_salary_dict = get_ottoneu_player_page(player_dict["ottoneu_id"],
                                                     league_id)
        player_dict.update(player_salary_dict)

        player_name = clean_name(player_dict["Player Name"])
        first_name, last_name = player_name.split(maxsplit=1)

        if player_dict["is_mlb"]:
            if "." in first_name:
                # lookup has "A.J." as "A. J." for some reason
                first_name = first_name.replace(".", ". ").strip()
            id_lookup = playerid_lookup(last_name, first_name)
            if id_lookup.shape[0] > 1:
                player_dict["mlbam_id"] = id_lookup.loc[
                    id_lookup.mlb_played_last ==
                    id_lookup.mlb_played_last.max()].key_mlbam.values[0]
            else:
                player_dict["mlbam_id"] = id_lookup.key_mlbam.values[0]

        is_hitter, is_pitcher = get_position_group(player_dict["positions"])
        player_dict["is_hitter"] = is_hitter
        player_dict["is_pitcher"] = is_pitcher

        auction_players.append(player_dict)

    hitters = [player for player in auction_players if player["is_hitter"]]
    pitchers = [player for player in auction_players if player["is_pitcher"]]
    if hitters:
        # setting minBBE = 0 to avoid not getting someone
        # get rid of this indentation and just pull exit velo #s regardless?
        exit_velo_data = statcast_batter_exitvelo_barrels(current_year,
                                                          minBBE=0)
        for player in hitters:
            if not player["is_mlb"]:
                # avoid index error for minor leaguers
                continue
            player_exit_velo = (exit_velo_data.loc[exit_velo_data.player_id ==
                                                   player["mlbam_id"]].to_dict(
                                                       "records").pop())
            # add anything else?
            player["avg_exit_velo"] = player_exit_velo["avg_hit_speed"]
            player["max_exit_velo"] = player_exit_velo["max_hit_speed"]
            player["barrel_pa_rate"] = player_exit_velo["brl_pa"]
            player["barrel_bbe_rate"] = player_exit_velo["brl_percent"]

    if pitchers:
        # currently pybaseball only has individual pitcher data
        pass

    print(auction_players[0])
Esempio n. 22
0
def get_data(year = 2018, minimum_starts = 5):
    if not os.path.exists(str(year)):
        os.mkdir(str(year))
    if not os.path.exists(os.path.join(str(year), "Players_Stats_"+str(year)+".csv")):
        player_stats = pitching_stats(year, year)
        player_stats = player_stats[player_stats['GS']>minimum_starts]
        player_stats.to_csv(os.path.join(str(year), "Players_Stats_"+str(year)+".csv"))
    else:
        player_stats = pd.read_csv(os.path.join(str(year), "Players_Stats_"+str(year)+".csv"))
    out = None
    for name in player_stats['Name']:
        if not os.path.exists(os.path.join(str(year),'player')):
            os.mkdir(os.path.join(str(year),'player'))
        splitname = name.split(' ')
        # Database is really good and has some mistakes, so when we go to the lookup table for MLB Player IDs sometimes
        # it doesn't match up. This corrects the issues that I've found. Obviously this won't work for every year
        # out of the box because of this.
        splitname[0] = splitname[0].replace('.', '. ', 1)
        # print(splitname[0])
        if splitname[0] == 'J.A.':
            splitname[0] = 'J. A.'
        if name == 'Zack Wheeler':
            splitname[0] = 'Zach'
        if name == 'Matthew Boyd':
            splitname[0] = 'Matt'
        if name == 'C.J. Wilson':
            splitname[0] = 'c. j.'
        if name == 'R.A. Dickey':
            splitname[0] = 'R. A.'
        if name == 'Jon Niese':
            splitname[0] = 'Jonathon'
        if name == 'A.J. Burnett':
            splitname[0] = 'A. J.'
        if name == 'Jorge De La Rosa':
            splitname[0] = 'Jorge'
            splitname[1] = 'De La Rosa'
        if name == 'Rubby de la Rosa':
            splitname[0] = 'Rubby'
            splitname[1] = 'de la Rosa'
        if name == 'Cole DeVries':
            splitname[1] = 'De Vries'
        if name == 'Samuel Deduno':
            splitname[0] = 'Sam'
        if name == 'JC Ramirez':
            splitname[0] = 'J. C.'
        if name == 'Nathan Karns':
            splitname[0] = 'Nate'
        if name == 'Daniel Ponce de Leon':
            splitname[1] = 'Ponce de Leon'
        if name == 'Chi Chi Gonzalez':
            splitname[0] = 'Chi Chi'
            splitname[1] = 'Gonzalez'
        if name == 'Josh A. Smith':
            splitname[0] = 'Josh'
            splitname[1] = 'Smith'
        if name == 'Joel De La Cruz':
            splitname[1] = 'De La Cruz'

        if not os.path.exists(os.path.join(str(year), 'player', name+'-'+str(year)+'.csv')):
            player_id = playerid_lookup(splitname[1], splitname[0])
            print(year)
            player_id = player_id[player_id['mlb_played_first'] <= year]
            player_id = player_id[player_id['mlb_played_last'] >= year]

            print(player_id)
            print(len(player_id))
            if len(player_id) != 1:
                print(player_id)
                print(name)
                print("Concerning")


            player = statcast_pitcher(str(year)+'-1-01', str(year)+'-12-31', player_id['key_mlbam'].iloc(0)[0])
            player.to_csv(os.path.join(str(year), 'player', name+'-'+str(year)+'.csv'))
        else:
            player = pd.read_csv(os.path.join(str(year), 'player', name+'-'+str(year)+'.csv'))

        # ['SL' 'FF' 'CU' 'FT' 'CH' nan 'FC' 'KC' 'SI' 'PO' 'FS' 'EP' 'SC']
        player_row = pd.DataFrame({'Name':[name]})
        pitch_types = ['SL','FF','CU','FT','CH','FC','KC','SI','PO','FS','EP','SC','KN']
        soi = ['release_speed','release_pos_x','release_pos_z','pfx_x','pfx_z','vx0','vy0','vz0','ax','ay','az','effective_speed','release_spin_rate']
        for pitch in pitch_types:
            pitches = player[player['pitch_type'] == pitch]
            pitches = pitches[soi]
            for stat in soi:
                mean = np.mean(pitches[stat])
                if math.isnan(mean):
                    mean = 0
                std = np.std(pitches[stat])+0
                if math.isnan(std):
                    std = 0
                min = np.min(pitches[stat])+0
                if math.isnan(min):
                    min = 0
                max = np.max(pitches[stat])+0
                if math.isnan(max):
                    max = 0
                player_row[pitch+"_"+stat + '_std'] = std
                player_row[pitch+"_"+stat + '_mean'] = mean
                player_row[pitch + "_" + stat + '_min'] = min
                player_row[pitch + "_" + stat + '_max'] = max
        if out is None:
            out = player_row
        else:
            out = pd.concat([out,player_row])
    out
    out.to_csv(str(year)+".csv")
Esempio n. 23
0
'''

Example from pybaseball github on gathering ids:
pid = pybaseball.playerid_lookup('kershaw', 'clayton')
print(pid)
print(pid.key_bbref.item())

'''

# Iterate over dataframe to gather first & last names and then get their ids
for idx, row in hitters.iterrows():
    first = str(row['First'])
    last = str(row['Last'])

    # How to set values while iterating: hitters.at[idx, '']
    pid = pybaseball.playerid_lookup(last, first)

    if len(pid) == 1:
        hitters.at[idx, 'mlb_id'] = int(pid.key_mlbam.item())
        hitters.at[idx, 'retro_id'] = str(pid.key_retro.item())
        hitters.at[idx, 'bbref_id'] = str(pid.key_bbref.item())
        hitters.at[idx, 'fangraphs_id'] = int(pid.key_fangraphs.item())
        hitters.at[idx, 'first_played'] = int(pid.mlb_played_first.item())
        hitters.at[idx, 'last_played'] = int(pid.mlb_played_last.item())
    else:
        continue

# Save the updated dataframe to a new csv file
hitters.to_csv('./data/gather_ids.csv')
print(hitters.head())
Esempio n. 24
0
from pybaseball import statcast
import pandas as pd
import numpy as np
from pybaseball import statcast_pitcher
from pybaseball import playerid_lookup

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from keras.utils import to_categorical

from tensorflow import feature_column
from tensorflow.keras import layers
import tensorflow as tf

pid = playerid_lookup('berrios', 'jose')["key_mlbam"][0]
print(pid)
# get all available data
data = statcast_pitcher('2017-03-01', '2019-10-10', player_id=pid)

data = data[[
    "pitch_type", "bat_score", "fld_score", "on_3b", "on_2b", "on_1b",
    "outs_when_up", "inning", "inning_topbot", "pitch_number", "p_throws",
    "balls", "strikes", "stand", "batter", "release_speed", "description"
]]

data = data[data.pitch_type != 'EP']
data = data[data.pitch_type != 'PO']

data[["on_3b", "on_2b", "on_1b"]] = data[["on_3b", "on_2b",
                                          "on_1b"]].replace(np.nan, 0)
Esempio n. 25
0
from pybaseball import statcast_batter
from pybaseball import playerid_lookup
from pybaseball import statcast_pitcher


hoskins_id = playerid_lookup('hoskins', 'rhys')
print(hoskins_id)

print("statcast stats from march 1st to april 1st")
hoskins_statcast = statcast_batter('2019-03-01', '2019-04-01', 656555)

print(hoskins_statcast)


"""
kersh = playerid_lookup('kershaw', 'clayton')

kershaw_stats = statcast_pitcher('2017-06-01', '2017-07-01', kersh)

print(kershaw_stats.head(5))
"""
Esempio n. 26
0
def main(input_files):
    # get the player that we'll be excluding from the neural network
    first = input('First Name: ')
    last = input('Last Name: ')
    possibles = (playerid_lookup(last, first))
    print(possibles)
    index = int(input('Which player do you want?'))
    target_id = possibles['key_mlbam'][
        index]  # sometimes need to change index depending on player name

    inputs = []
    training = []
    testing = []
    years = {'2015': {}, '2016': {}, '2017': {}, '2018': {}}
    diamond = [
        "fielder_1", "fielder_2", "fielder_3", "fielder_4", "fielder_5",
        "fielder_6", "fielder_7", "fielder_8", "fielder_9"
    ]

    team_dict = {
        "ARI": 0,
        "ATL": 1,
        "BAL": 2,
        "BOS": 3,
        "CHC": 4,
        "CWS": 5,
        "CIN": 6,
        "CLE": 7,
        "COL": 8,
        "DET": 9,
        "MIA": 10,
        "HOU": 11,
        "KC": 12,
        "LAA": 13,
        "LAD": 14,
        "MIL": 15,
        "MIN": 16,
        "NYM": 17,
        "NYY": 18,
        "OAK": 19,
        "PHI": 20,
        "PIT": 21,
        "SD": 22,
        "SF": 23,
        "SEA": 24,
        "STL": 25,
        "TB": 26,
        "TEX": 27,
        "TOR": 28,
        "WSH": 29
    }

    print("begin")

    for input_file in input_files:
        with open(input_file) as file_r:
            reader = csv.DictReader(file_r)
            missing = 0
            for row in reader:
                # sets the on_base variables to binary
                on_1 = 0 if row['on_1b'] == "" else 1
                on_2 = 0 if row['on_2b'] == "" else 1
                on_3 = 0 if row['on_3b'] == "" else 1

                # handles missing values
                if row['hc_x'] == "" or row['hc_y'] == "" or row[
                        'launch_angle'] == "" or row['launch_speed'] == "" or row[
                            'estimated_ba_using_speedangle'] == "" or row[
                                'outs_when_up'] == "" or row[
                                    'total_bases'] == "" or row[
                                        'hit_location'] == "":  # add or row['home_team'] == ""
                    missing += 1
                else:
                    # creates input to be fed into the NN
                    new_input = [
                        float(row['hc_x']),
                        float(row['hc_y']),
                        float(row['launch_angle']),
                        float(row['launch_speed']),
                        float(row['estimated_ba_using_speedangle']),
                        int(float(row['outs_when_up'])), on_1, on_2, on_3,
                        int(float(row['total_bases'])) + 6
                    ]  # row['home_team'],  # team_dict[row['home_team']],
                    # inputs.append(new_input)
                    if int(float(row['total_bases'])) > 6:
                        print("Above 6", int(float(row['total_bases'])))

                    # keeps track of the players associated with each event
                    # print(row['hit_location'])
                    location = int(float(row['hit_location'])) - 1

                    # for some reason, we don't have pitcher and catcher id's in here???
                    if location != 0 and location != 1:
                        player = row[diamond[location]]
                        if player != "":
                            # Add to training and testing set
                            if float(player) == target_id:
                                testing.append(new_input)
                                if int(float(row["hit_location"])
                                       ) not in target_position:
                                    target_position.append(
                                        int(float(row["hit_location"])))
                                # print("Added to testing")
                            else:
                                training.append(new_input)

                            current_dict = years[input_file[-8:-4]]
                            if (player, location + 1) in current_dict.keys():
                                current_dict[(player,
                                              location + 1)].append(new_input)
                            else:
                                current_dict[(player,
                                              location + 1)] = [new_input]
        file_r.close()
        # print(years['2015'].keys())

        print(input_file, "missing data:", missing)

        print("closed file")

    print("completed split")

    train_x = []
    train_y = []
    test_x = []
    test_y = []

    # handled by just adding 6 when setting inputs . . .
    for row in training:
        train_x.append(row[:-1])
        train_y.append(row[-1])  # shifted by 6 bases, CHANGE THIS!!!
    for row in testing:
        test_x.append(row[:-1])
        test_y.append(row[-1])  # shifted by 6 bases, CHANGE THIS!!!

    print("completed x/y var separation")

    train_data = numpy.asarray(train_x)
    train_labels = numpy.asarray(train_y)
    eval_data = numpy.asarray(test_x)
    eval_labels = numpy.asarray(test_y)

    print("---------------CREATING CLASSIFIER----------------")
    # create estimator
    model = tf.estimator.Estimator(model_fn=neural_net,
                                   model_dir="checkpoints/")

    print("---------------TRAINING CLASSIFIER----------------")
    # train the classifier
    model = train(train_data, train_labels, model)

    print("---------------EVALUATING CLASSIFIER----------------")
    # evaluate effectiveness
    results, pred_gen = test(eval_data, eval_labels, model)
    print(results)
    #print(test_y)
    # print(pred)
    # pred_gen = list(pred_gen)
    # predict = []

    # for row in pred_gen:
    # 	predict.append(row["classes"])

    #print(pred)

    # matrix = tf.math.confusion_matrix(test_y, predict)
    # print(matrix)
    # with tf.Session() as sess:
    # 	confusion_matrix = tf.confusion_matrix(labels=test_y, predictions=predict)
    # 	confusion_matrix_to_Print = sess.run(confusion_matrix)
    # 	print(confusion_matrix_to_Print)

    # start evaluating players
    yearly_results = {}
    for year, players in years.items():
        print("Processing", year)
        individual_results = {}
        for key, value in players.items():
            if int(key[1]) not in target_position:
                continue
            # key is (player id, location_played)
            # value is all of the plays identified with them
            curr_x = []
            curr_y = []
            for row in value:
                curr_x.append(row[:-1])
                curr_y.append(row[-1])
            individual_predictions = list(
                predict(numpy.asarray(curr_x), numpy.asarray(curr_y), model))
            actual_pred = []
            for row in individual_predictions:
                actual_pred.append(row['classes'] - 6)

            tally = 0
            for i in range(len(curr_y)):
                # overlook drastically missed predictions
                # if actual_pred[i] < -1:
                # 	continue
                tally += actual_pred[i] - (curr_y[i] - 6)

            # print(tally)
            individual_results[key] = {
                "total_bases": tally,
                "opportunities": len(curr_y)
            }

        yearly_results[year] = individual_results

    return (last, yearly_results)
Esempio n. 27
0
playerDatapath = '../../playerStats/data_raw/'

for row in range(3980, df.shape[0]):

    #df = pd.read_csv('./player_lookup/players.csv')

    ID = df['RetroID'][row]
    relevant = df[df['RetroID'] == ID]
    #print(relevant.shape)
    LastName = relevant['LastName'].values[0]
    FirstName = relevant['FirstName'].values[0]
    ndf.iloc[row, 1] = LastName
    ndf.iloc[row, 2] = FirstName

    playerInfo = pb.playerid_lookup(last=LastName,
                                    first=FirstName)  #['key_mlbam']
    playerInfo = playerInfo[playerInfo['key_retro'] == ID]
    playerInfo = playerInfo['key_mlbam']
    df.iloc[row, 3] = playerInfo.values
    print(row)

    if row % 100 == 0:
        print('row ' + str(row) + ' of ' + str(ndf.shape[0]))
        #ndf.to_csv('player_lookup/players.csv', index = False)

ndf.to_csv('player_lookup/players.csv', index=False)

for row in range(0, playerdf.shape[0]):
    try:
        playerdf['MLBAM'][row] = int(playerdf['MLBAM'][row].replace(
            ']', '').replace('[', ''))
# Made by Noah Mitchem for MLB Pitchers
# Vertical pitch breaks seem off, don't know what other data can be used
import plotly.graph_objs as go
from plotly import tools
from plotly.offline import plot
import numpy as np
from matplotlib import cm
from pybaseball import playerid_lookup
from pybaseball import statcast_pitcher
file = statcast_pitcher("2019-03-25", "2019-10-01",
                        playerid_lookup("scherzer", "max")["key_mlbam"][0])


def colorcode(speed):
    speed1 = int((speed - 50) * 4.3)
    co = np.array(cm.magma(speed1)) * 255
    return "rgb(" + str(int(co[0])) + "," + str(int(co[1])) + "," + str(
        int(co[2])) + ")"


data = []
data1 = []
pitchTrack = 0
breaks = 0
x = {}
extremes = []
differentPitches = file["pitch_type"].unique().size
totalPitches = file.index.size
color = [
    "rgb(102, 204, 0)", "rgb(0, 214, 214)", "rgb(204, 0, 0)",
    "rgb(255, 153, 0)", "rgb(153, 0, 255)"
Esempio n. 29
0
                               user="******",
                               database="baseball")

#get players for both team
players = [("corey", "kluber"), ("yan", "gomes"), ("yonder", "alonso"),
           ("jose", "ramirez"), ("josh", "donaldson"), ("francisco", "lindor"),
           ("melky", "cabrera"), ("jason", "kipnis"), ("michael", "brantley"),
           ("luis", "castillo"), ("tucker", "barnhart"), ("joey", "votto"),
           ("scooter", "gennett"), ("eugenio", "suarez"),
           ("mason", "williams"), ("billy", "hamilton"), ("preston", "tucker"),
           ("jose", "peraza")]

mycursor = mydb.cursor()
#get player data
for player in players:
    id = playerid_lookup(player[1], player[0])
    print(len(id))
    if len(id) == 1:
        stats = statcast_batter('2018-3-29', '2018-10-02',
                                id.key_mlbam.iloc[0])
        hr = 0
        bip = 0
        tot = 0
        for event in stats.events:
            tot = tot + 1
            if event == 'home_run':
                hr = hr + 1
            if type(
                    event
            ) != float and event != "strikeout" and event != "walk" and event != "home_run":
                bip = bip + 1
Esempio n. 30
0
from pybaseball import statcast
from pybaseball import playerid_lookup
import pandas as pd

# First we have to find all of the player_ids for the Red Sox batters to filter to just their at-bats
redsox_batters = [('christian', 'vazquez'), ('christian', 'arroyo'),
                  ('xander', 'bogaerts'), ('rafael', 'devers'),
                  ('alex', 'verdugo'), ('enrique', 'hernandez'),
                  ('hunter', 'renfroe'), ('kyle', 'schwarber'),
                  ('kevin', 'plawecki')]
redsox_batters_dict = {
    "jd+martinez": 502110.0
}  # Had to look up jd martinez manually because I could not figure out what search parameter to use for his first name
for batter in redsox_batters:
    id = playerid_lookup(batter[1], batter[0])
    redsox_batters_dict[f"{batter[0]}+{batter[1]}"] = float(id['key_mlbam'])

pd.DataFrame.from_dict(data=redsox_batters_dict,
                       orient='index').to_csv('redsox_batters.csv',
                                              header=False)

# Now, collect all statcast data from the ALCS dates (15OCT21 thru 22OCT21) and filter based on player_ids found above
data = statcast(start_dt="2021-10-15", end_dt="2021-10-22")
data = data.loc[(data['batter'].isin(redsox_batters_dict.values()))]
data.to_csv('unfilteredAtBats.csv')