def ytd(self): print("\nFINDING YEAR TO DATE'S DATA:") date_obj = DateManipulation() prev_date = '2018-03-29' last_date = date_obj.get_date_for_num_days(2) #print(prev_date) file = open(self.output_file, 'w') data = batting_stats_range(prev_date, last_date) for i, row in data.iterrows(): file.write(str(row)) file.write("\n") file.close() player = self.create_player() player.print_player() return data
def write_batting_for_single_date(self, output_file): data = batting_stats_range(self.find_prev_date(),) file = open(output_file, 'w') for i, row in data.iterrows(): file.write(str(row)) file.write("\n") file.close() return data
def write_batting_for_date(self, outputfile): print(self.find_prev_date()) data = batting_stats_range("2018-03-29", self.find_prev_date()) file = open(outputfile, 'w') for i, row in data.iterrows(): file.write(str(row)) file.write("\n") file.close() return data
def write_data_to_text(self, years_before): file = open(self.output_file, 'w') calc_year = self.year - years_before data = batting_stats_range( str(calc_year) + "-03-25", str(calc_year) + "-10-01") for i, row in data.iterrows(): file.write(str(row)) file.write("\n") file.close() return data
def get_raw_stats(season_days_so_far, len_days, ct): df_lst = [] for i in range(len(season_days_so_far)): ct += 1 d = season_days_so_far[i] try: tmp_df = _pybaseball.batting_stats_range(str(d), ) tmp_df['DATE'] = d pct_complete = int(round(ct/len_days * 100, 0)) print('Compiling... ', d) print("{}%".format(pct_complete)) clear_output(wait=True) df_lst.append(tmp_df) except IndexError: continue return df_lst, ct
def write_file_for_dates(self, prev_date, late_date): date_obj = DateManipulation() #print(prev_date) file = open(self.output_file, 'w') data = batting_stats_range(prev_date, late_date) for i, row in data.iterrows(): file.write(str(row)) file.write("\n") file.close() return data
def get_raw_stats(season_days_so_far, len_days, ct): df_lst = [] for i in range(len(season_days_so_far)): ct += 1 #print(ct, len_days) d = season_days_so_far[i] try: tmp_df = _pybaseball.batting_stats_range(str(d), ) tmp_df['DATE'] = d pct_complete = int(round(ct / len_days * 100, 0)) print('FIRST-TIME-USE INITIALIZE') print('processing... ', d) print("{}%".format(pct_complete)) clear_output(wait=True) df_lst.append(tmp_df) except IndexError: continue return _pd.concat(df_lst, ignore_index=True), ct
def write_file_for_range(self, num_days): date_obj = DateManipulation() prev_date = date_obj.get_date_for_num_days(num_days) last_date = date_obj.get_date_for_num_days(1) #print(prev_date) file = open(self.output_file, 'w') data = batting_stats_range(prev_date, last_date) for i, row in data.iterrows(): file.write(str(row)) file.write("\n") file.close() return data
currentYear = currentDate.year currentDay = date.today() recentDay = currentDay - timedelta(days=7) currentDay = str(currentDay) recentDay = str(recentDay) else: currentYear = currentDate.year -1 currentDay = None recentDay = None pitchingData = pitching_stats(currentYear) battingData = batting_stats(currentYear) if currentDay is not None: recentPitchingData = pitching_stats_range(recentDay, currentDay) recentBattingData = batting_stats_range(recentDay, currentDay) recentPitchingDataFile = open("../public/json/pitcherRankingsRecent.json", "w") recentPitchingDataFile.write(json.dumps(json.loads(recentPitchingData.reset_index().to_json(orient='index')), indent=2)) recentPitchingDataFile.close() recentBattingDataFile = open("../public/json/batterRankingsRecent.json", "w") recentBattingDataFile.write(json.dumps(json.loads(recentBattingData.reset_index().to_json(orient='index')), indent=2)) recentBattingDataFile.close() else: recentPitchingDataFile = open("../public/json/pitcherRankingsRecent.json", "w") recentPitchingDataFile.write('{}') recentPitchingDataFile.close() recentBattingDataFile = open("../public/json/batterRankingsRecent.json", "w")
for month in months: print( f'Now Downloading : start_dt = {year}-{month}-01, end_dt={year}-{month}-{months_days[month]}' ) start_dt = f'{year}-{month}-01' end_dt = f'{year}-{month}-{months_days[month]}' filename = f'{year}-{month}_batting.csv' file_path = folder_path.joinpath(filename) if file_path.exists(): print('-' * 40) print(f'{filename} already dowaloaded !') print('-' * 40) continue try: print('-' * 40) print(f'Start with {filename}') print('-' * 40) data = batting_stats_range(start_dt=start_dt, end_dt=end_dt) data.to_csv(file_path, encoding='utf_8_sig') print('-' * 40) print(f'Done with {filename}') print('-' * 40) except Exception as e: print(f'\nError\n{e}') with open(folder_path.joinpath('errorlog.txt'), 'a') as logfile: logfile.write(f'\noccur at {start_dt} ~ {end_dt}') continue time.sleep(10)
end_date = today # get a datestring to use to cycle through to collect data date_string = pd.date_range(start_date, end_date, freq='D') '''SRART COMPILING''' #loop through all the days for active_date in date_string: #change the format of 'date' to a 'string' since that what the website needs if (active_date.month > 2) & (active_date.month < 12): loop_date = active_date.strftime("%Y-%m-%d") #try to scrape data, on error go to next day and try again try: #get stats batting = batting_stats_range(loop_date, ) pitching = pitching_stats_range(loop_date, ) #now clean up column names by adding 'p_' and 'b_' to pitching and batting stats batting_specific_columns = [ (i, 'b_' + i) for i in batting.iloc[:, 6:].columns.values ] batting.rename(columns=dict(batting_specific_columns), inplace=True) pitching_specific_columns = [ (i, 'p_' + i) for i in pitching.iloc[:, 6:].columns.values ] pitching.rename(columns=dict(pitching_specific_columns), inplace=True)
import pybaseball import pandas as pd # Gather all hitter data from 2015 thru 2020 data = pybaseball.batting_stats_range('2008-01-01', '2020-12-31') # Cull list to only include hitters w/ @ least 1610 PAs # Desired sample size based on XBH stabilization (https://library.fangraphs.com/principles/sample-size/) data = data[data.PA >= 1610] # Saving this data to a csv file for easier access data.to_csv('./data/gather_hitters.csv')
def getAllPlayers(year): data = batting_stats_range(year + '-04-01', year + '-09-30') df = pd.DataFrame(data) allPlayers = df['Name'].tolist() return allPlayers
def test_batting_stats_range_single_date(): stats = batting_stats_range('2019-05-01', ) assert not stats.empty