Exemple #1
0
def scrape_elo():
    """ Returns list of winner and loser elo rankings, prior to each match played. """
    
    # Get dataframe of elo ranking dates
    elo_dates = pd.DataFrame()
    for year in [2018, 2019, 2020]:
        dates = pd.read_json(f"https://www.ultimatetennisstatistics.com/seasonRankingDates?rankType=ELO_RANK&season={year}")
        elo_dates = elo_dates.append(dates)
    elo_dates.columns=['Date']
    elo_dates.sort_values('Date', inplace=True)
    elo_dates.reset_index(inplace=True, drop=True)
    elo_dates = pd.to_datetime(elo_dates['Date']).dt.date
    
    # Get dataframe of tournament dates
    tourney_dates = atp_bet_data['Date'].dt.date
    
    # Get dates corresponding to elo ranking before tournament
    date_indices = tourney_dates.apply(lambda x: closest_past_date(elo_dates, x))
    dates_before = elo_dates[date_indices]
    dates_before = [date.strftime('%d-%m-%Y') for date in dates_before]
    
    # Get elo rankings before tournament
    elo_winners = []
    elo_losers = []
    date = dates_before[0]
    elo = get_json(f"https://www.ultimatetennisstatistics.com/rankingsTableTable?current=1&rowCount=-1&searchPhrase=&rankType=ELO_RANK&season=&date={date}&_=1578626200145")
    elo_winner, elo_loser = get_elo(elo, atp_bet_data.iloc[0])
    elo_winners.append(elo_winner)
    elo_losers.append(elo_loser)
    
    for i in range(1, len(dates_before)):
        # If date different to previous, get new elo rankings
        if dates_before[i] != dates_before[i-1]:
            date = dates_before[i]
            elo = get_json(f"https://www.ultimatetennisstatistics.com/rankingsTableTable?current=1&rowCount=-1&searchPhrase=&rankType=ELO_RANK&season=&date={date}&_=1578626200145")

        elo_winner, elo_loser = get_elo(elo, atp_bet_data.iloc[i])
        elo_winners.append(elo_winner)
        elo_losers.append(elo_loser)
    
    return elo_winners, elo_losers
Exemple #2
0
def get_player_ids():
    """ Returns dataframe of player names and ids. """
    elo = get_json("https://www.ultimatetennisstatistics.com/rankingsTableTable?current=1&rowCount=-1&searchPhrase=&rankType=ELO_RANK&season=&date=&_=1578626200145")
    player_ids = []
    
    for ind, row in atp_bet_data.iterrows():
        player_name = row['Winner']
        if player_name[-1] == ' ':
            player_name = player_name[:-1]
        player_surname, player_first_name = player_name.replace('-',' ').replace(',','').split(' ')[-2], player_name.replace('-',' ').replace(',','').split(' ')[-1][0]
        try:
            player = elo[(elo['name'].str.split().str[-1]==player_surname) & (elo['name'].str.contains(player_first_name))]
            player_id = player['playerId'].iloc[0]
        except:
            player_id = None
    
        player_ids.append([player_name, player_id])
    
    player_id = pd.DataFrame(player_ids, columns=['Name', 'PlayerId']).drop_duplicates()
    return player_id
    players['won'], players['lost'], players['matches'] = 0, 0, 0
    for index, row in players.iterrows():a
        try:
            # players.at[index,'won'] += match_hist['winner_name'].value_counts()[row['name']]
            # players.at[index,'lost'] += match_hist['loser_name'].value_counts()[row['name']]
            players.at[index,'won'] += match_hist[match_hist['tourney_date']<='20200106']['winner_name'].value_counts()[row['name']]
            players.at[index,'lost'] += match_hist[match_hist['tourney_date']<='20200106']['loser_name'].value_counts()[row['name']]
        except:
            pass
    players['matches'] = players['won'] + players['lost']
    # players.to_csv('atpdata/elo_ratings_current.csv')
    players.to_csv('atpdata/current_elo_ratings.csv')
get_num_matches(players)
"""
elo = get_json(
    "https://www.ultimatetennisstatistics.com/rankingsTableTable?current=1&rowCount=-1&searchPhrase=&rankType=ELO_RANK&season=&date=&_=1578626200145"
)
elo.loc[elo['name'].str.contains('Wawrinka'), 'name'] = 'Stanislas Wawrinka'

utr = get_utr("https://agw-prod.myutr.com/v2/player/top?gender=M&tags=Pro")
utr = utr.sort_values('threeMonthRating', ascending=False)
utr = utr.rename(columns={'displayName': 'name', 'threeMonthRating': 'points'})

atp_bet_data_2019 = pd.read_excel('atpdata/bet_data_2019.xlsx')

r = requests.get("http://www.tennis-data.co.uk/2020/2020.zip")
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall()
os.rename('2020.xlsx', 'atpdata/bet_data_2020.xlsx')
atp_bet_data_2020 = pd.read_excel('atpdata/bet_data_2020.xlsx')
curr_date = datetime.today()
#curr_date = datetime.strptime('2019-02-10', '%Y-%m-%d')
from_date = (curr_date - timedelta(days=90)).strftime('%d-%m-%Y')
to_date = curr_date.strftime('%d-%m-%Y')

#time_filter = (curr_date-timedelta(days=7))
time_filter = curr_date
ausopen_2019_matches = atp_bet_data[(atp_bet_data['Date'] > time_filter)
                                    & (atp_bet_data['Date'] < curr_date)]
#ausopen_2019_matches = ausopen_2019_matches[ausopen_2019_matches['Location']=='Cordoba']
#ausopen_2019_matches = atp_bet_data[(atp_bet_data['Tournament']=='Cordoba Open')]

ausopen_2019_matches.index = range(len(ausopen_2019_matches))

elo = get_json(
    "https://www.ultimatetennisstatistics.com/rankingsTableTable?current=1&rowCount=-1&searchPhrase=&rankType=ELO_RANK&season=&date=&_=1578626200145"
)
elo.loc[elo['name'].str.contains('Wawrinka'), 'name'] = 'Stanislas Wawrinka'
elo.loc[elo['name'].str.contains('Ramos'), 'name'] = 'Albert Ramos Vinolas'

utr = get_utr("https://agw-prod.myutr.com/v2/player/top?gender=M&tags=Pro")
utr = utr.sort_values('threeMonthRating', ascending=False)
utr = utr.rename(columns={'displayName': 'name', 'threeMonthRating': 'points'})
utr['name'] = utr['name'].str.replace('-', ' ')

from_date = (time_filter - timedelta(days=90)).strftime('%d-%m-%Y')
to_date = time_filter.strftime('%d-%m-%Y')
#to_date = datetime.today().strftime('%d-%m-%Y')
win_perc = get_json(
    "https://www.ultimatetennisstatistics.com/statsLeadersTable?current=1&rowCount=-1&sort%5Bvalue%5D=desc&searchPhrase=&category=matchesWonPct&season=&fromDate={0}&toDate={1}&level=&bestOf=&surface=&indoor=&speed=&round=&result=&tournamentId=&opponent=&countryId=&minEntries=&_=1579235798414"
    .format(from_date, to_date))
            # players.at[index,'won'] += match_hist['winner_name'].value_counts()[row['name']]
            # players.at[index,'lost'] += match_hist['loser_name'].value_counts()[row['name']]
            players.at[index,'won'] += match_hist[match_hist['tourney_date']<='20200106']['winner_name'].value_counts()[row['name']]
            players.at[index,'lost'] += match_hist[match_hist['tourney_date']<='20200106']['loser_name'].value_counts()[row['name']]
        except:
            pass
    players['matches'] = players['won'] + players['lost']
    # players.to_csv('atpdata/elo_ratings_current.csv')
    players.to_csv('atpdata/current2_elo_ratings.csv')
get_num_matches(players)
"""
elo = pd.read_csv('atpdata/test3_elo_ratings.csv')
elo.loc[elo['name'].str.contains('Wawrinka'), 'name'] = 'Stanislas Wawrinka'

aces = get_json(
    "https://www.ultimatetennisstatistics.com/statsLeadersTable?current=1&rowCount=100&sort%5Bvalue%5D=desc&searchPhrase=&category=acePct&season=-1&fromDate=&toDate=&level=&bestOf=&surface=&indoor=&speed=&round=&result=&tournamentId=&opponent=&countryId=&minEntries=&_=1578621981846"
)
first_serve = get_json(
    "https://www.ultimatetennisstatistics.com/statsLeadersTable?current=1&rowCount=-1&sort%5Bvalue%5D=desc&searchPhrase=&category=firstServePct&season=-1&fromDate=&toDate=&level=&bestOf=&surface=&indoor=&speed=&round=&result=&tournamentId=&opponent=&countryId=&minEntries=&_=1578641519820"
)
upsets_scored = get_json(
    "https://www.ultimatetennisstatistics.com/statsLeadersTable?current=1&rowCount=-1&sort%5Bvalue%5D=desc&searchPhrase=&category=upsetsScoredPct&season=&fromDate=&toDate=&level=&bestOf=&surface=&indoor=&speed=&round=&result=&tournamentId=&opponent=&countryId=&minEntries=&_=1578626448297"
)
upsets_against = get_json(
    "https://www.ultimatetennisstatistics.com/statsLeadersTable?current=1&rowCount=-1&sort%5Bvalue%5D=desc&searchPhrase=&category=upsetsAgainstPct&season=&fromDate=&toDate=&level=&bestOf=&surface=&indoor=&speed=&round=&result=&tournamentId=&opponent=&countryId=&minEntries=&_=1578626448300"
)
# test_elo = get_json("https://www.ultimatetennisstatistics.com/rankingsTableTable?current=1&rowCount=-1&searchPhrase=&rankType=ELO_RANK&season=&date=&_=1578626200145")
rankings = get_json(
    "https://www.ultimatetennisstatistics.com/rankingsTableTable?current=1&rowCount=-1&searchPhrase=&rankType=RANK&season=&date=&_=1578782369780"
)
win_perc = get_json(