def ytd(self):
        print("\nFINDING YEAR TO DATE'S DATA:")

        date_obj = DateManipulation()

        prev_date = '2018-03-29'
        last_date = date_obj.get_date_for_num_days(2)

        #print(prev_date)

        file = open(self.output_file, 'w')

        data = batting_stats_range(prev_date, last_date)

        for i, row in data.iterrows():
            file.write(str(row))
            file.write("\n")

        file.close()

        player = self.create_player()

        player.print_player()

        return data
Esempio n. 2
0
	def write_batting_for_single_date(self, output_file):
		data = batting_stats_range(self.find_prev_date(),)

		file = open(output_file, 'w')

		for i, row in data.iterrows():
			file.write(str(row))
			file.write("\n")

		file.close()

		return data
Esempio n. 3
0
	def write_batting_for_date(self, outputfile):
		print(self.find_prev_date())
		data = batting_stats_range("2018-03-29", self.find_prev_date())

		file = open(outputfile, 'w')

		for i, row in data.iterrows():
			file.write(str(row))
			file.write("\n")

		file.close()

		return data
Esempio n. 4
0
    def write_data_to_text(self, years_before):
        file = open(self.output_file, 'w')

        calc_year = self.year - years_before

        data = batting_stats_range(
            str(calc_year) + "-03-25",
            str(calc_year) + "-10-01")

        for i, row in data.iterrows():
            file.write(str(row))
            file.write("\n")

        file.close()

        return data
Esempio n. 5
0
def get_raw_stats(season_days_so_far, len_days, ct):
        df_lst = []
        for i in range(len(season_days_so_far)):
            ct += 1
            d = season_days_so_far[i]
            try:
                tmp_df = _pybaseball.batting_stats_range(str(d), )
                tmp_df['DATE'] = d
                pct_complete = int(round(ct/len_days * 100, 0))
                print('Compiling... ', d)
                print("{}%".format(pct_complete))
                clear_output(wait=True)
                df_lst.append(tmp_df)
            except IndexError:
                continue
        return df_lst, ct
    def write_file_for_dates(self, prev_date, late_date):
        date_obj = DateManipulation()

        #print(prev_date)

        file = open(self.output_file, 'w')

        data = batting_stats_range(prev_date, late_date)

        for i, row in data.iterrows():
            file.write(str(row))
            file.write("\n")

        file.close()

        return data
Esempio n. 7
0
def get_raw_stats(season_days_so_far, len_days, ct):
    df_lst = []
    for i in range(len(season_days_so_far)):
        ct += 1
        #print(ct, len_days)
        d = season_days_so_far[i]
        try:
            tmp_df = _pybaseball.batting_stats_range(str(d), )
            tmp_df['DATE'] = d
            pct_complete = int(round(ct / len_days * 100, 0))
            print('FIRST-TIME-USE INITIALIZE')
            print('processing... ', d)
            print("{}%".format(pct_complete))
            clear_output(wait=True)
            df_lst.append(tmp_df)
        except IndexError:
            continue
    return _pd.concat(df_lst, ignore_index=True), ct
    def write_file_for_range(self, num_days):
        date_obj = DateManipulation()

        prev_date = date_obj.get_date_for_num_days(num_days)
        last_date = date_obj.get_date_for_num_days(1)

        #print(prev_date)

        file = open(self.output_file, 'w')

        data = batting_stats_range(prev_date, last_date)

        for i, row in data.iterrows():
            file.write(str(row))
            file.write("\n")

        file.close()

        return data
Esempio n. 9
0
    currentYear = currentDate.year
    currentDay = date.today()
    recentDay = currentDay - timedelta(days=7)
    currentDay = str(currentDay)
    recentDay = str(recentDay)
else:
    currentYear = currentDate.year -1
    currentDay = None
    recentDay = None

pitchingData = pitching_stats(currentYear)
battingData = batting_stats(currentYear)

if currentDay is not None:
    recentPitchingData = pitching_stats_range(recentDay, currentDay)
    recentBattingData = batting_stats_range(recentDay, currentDay)

    recentPitchingDataFile = open("../public/json/pitcherRankingsRecent.json", "w")
    recentPitchingDataFile.write(json.dumps(json.loads(recentPitchingData.reset_index().to_json(orient='index')), indent=2))
    recentPitchingDataFile.close()

    recentBattingDataFile = open("../public/json/batterRankingsRecent.json", "w")
    recentBattingDataFile.write(json.dumps(json.loads(recentBattingData.reset_index().to_json(orient='index')), indent=2))
    recentBattingDataFile.close()

else:
    recentPitchingDataFile = open("../public/json/pitcherRankingsRecent.json", "w")
    recentPitchingDataFile.write('{}')
    recentPitchingDataFile.close()

    recentBattingDataFile = open("../public/json/batterRankingsRecent.json", "w")
Esempio n. 10
0
    for month in months:
        print(
            f'Now Downloading : start_dt = {year}-{month}-01, end_dt={year}-{month}-{months_days[month]}'
        )
        start_dt = f'{year}-{month}-01'
        end_dt = f'{year}-{month}-{months_days[month]}'
        filename = f'{year}-{month}_batting.csv'
        file_path = folder_path.joinpath(filename)

        if file_path.exists():
            print('-' * 40)
            print(f'{filename} already dowaloaded !')
            print('-' * 40)
            continue
        try:
            print('-' * 40)
            print(f'Start with {filename}')
            print('-' * 40)
            data = batting_stats_range(start_dt=start_dt, end_dt=end_dt)
            data.to_csv(file_path, encoding='utf_8_sig')
            print('-' * 40)
            print(f'Done with {filename}')
            print('-' * 40)
        except Exception as e:
            print(f'\nError\n{e}')
            with open(folder_path.joinpath('errorlog.txt'), 'a') as logfile:
                logfile.write(f'\noccur at {start_dt} ~ {end_dt}')
            continue

        time.sleep(10)
Esempio n. 11
0
    end_date = today

# get a datestring to use to cycle through to collect data
date_string = pd.date_range(start_date, end_date, freq='D')
'''SRART COMPILING'''
#loop through all the days
for active_date in date_string:

    #change the format of 'date' to a 'string' since that what the website needs
    if (active_date.month > 2) & (active_date.month < 12):
        loop_date = active_date.strftime("%Y-%m-%d")

        #try to scrape data, on error go to next day and try again
        try:
            #get stats
            batting = batting_stats_range(loop_date, )
            pitching = pitching_stats_range(loop_date, )

            #now clean up column names by adding 'p_' and 'b_' to pitching and batting stats
            batting_specific_columns = [
                (i, 'b_' + i) for i in batting.iloc[:, 6:].columns.values
            ]
            batting.rename(columns=dict(batting_specific_columns),
                           inplace=True)

            pitching_specific_columns = [
                (i, 'p_' + i) for i in pitching.iloc[:, 6:].columns.values
            ]
            pitching.rename(columns=dict(pitching_specific_columns),
                            inplace=True)
Esempio n. 12
0
import pybaseball
import pandas as pd

# Gather all hitter data from 2015 thru 2020
data = pybaseball.batting_stats_range('2008-01-01', '2020-12-31')

# Cull list to only include hitters w/ @ least 1610 PAs
# Desired sample size based on XBH stabilization (https://library.fangraphs.com/principles/sample-size/)
data = data[data.PA >= 1610]

# Saving this data to a csv file for easier access
data.to_csv('./data/gather_hitters.csv')
Esempio n. 13
0
 def getAllPlayers(year):
     data = batting_stats_range(year + '-04-01', year + '-09-30')
     df = pd.DataFrame(data)
     allPlayers = df['Name'].tolist()
     return allPlayers
def test_batting_stats_range_single_date():
    stats = batting_stats_range('2019-05-01', )
    assert not stats.empty