async def main(): async with aiohttp.ClientSession() as session: understat = Understat(session) players_Understat = await understat.get_league_players( 'epl', CURRENT_DATE.year) print(json.dumps(players_Understat))
async def get_team_data(team): async with aiohttp.ClientSession() as session: understat = Understat(session) team_json = await understat.get_team_results(team, '2018') data = json.dumps(team_json) data_df = pd.read_json(data, orient='records') return data_df
async def update_Total(league): async with aiohttp.ClientSession() as session: understat = Understat(session) print(Fore.RED + 'Extracting season data') for team in tqdm(teams): players = await understat.get_league_players("epl", 2019, team_title=team) for player in players: if int(player['games']) > 9: if player['player_name'] in names: pos = positions[names.index(player['player_name'])] league.addPlayer(player['player_name'], player['team_title'], pos, player['goals'], player['xG'], player['assists'], player['xA'], player['id']) else: names.append(player['player_name']) positions.append(positionfunction(player['position'])) with open('player_data.csv', mode='w', newline='', encoding='utf-8-sig') as file: writer = csv.writer(file) writer.writerow(names) writer.writerow(positions) league.addPlayer(player['player_name'], player['team_title'], pos, player['goals'], player['xG'], player['assists'], player['xA'], player['id']) teams_league = await understat.get_teams('epl', 2019) for j in range(20): xG = 0 for i in range(len(teams_league[j]['history'])): xG += teams_league[j]['history'][i]['xG'] league.addTeam(teams_league[j]['title'], xG, teams_league[j]['id']) with open('player_data.pkl', 'wb') as output: pickle.dump(league, output, pickle.HIGHEST_PROTOCOL)
async def main(): async with aiohttp.ClientSession() as session: understat = Understat(session) teams = await understat.get_teams("epl", 2019, title=away_team_name) res_a.append(teams)
async def load_match_shots(league, season): async with aiohttp.ClientSession() as session: understat = Understat(session) #first fixtures are needed to get shot for each match fixtures = await understat.get_league_results(league, season["param_season"]) #loop over each fixture and get match shots for fixture in fixtures: try: match_shots = await understat.get_match_shots(fixture['id']) except: print("Unexpected error for fixture-id " + fixture['id']) #extract home match shots df_match_shots = pd.DataFrame(match_shots["h"]) try: df_match_shots = df_match_shots.append(match_shots["a"]) except IndexError: print("sorry, no away shots for the fixture-id " + fixture['id']) client.put_object(Body=bytes( df_match_shots.to_json(orient='records', lines=True).encode('UTF-8')), Bucket=v_bucket, Key=v_file_name) await session.close()
async def main(): async with aiohttp.ClientSession() as session: understat = Understat(session) result_dict = {league: {season: {} for season in seasons} for league in leagues} # Initialise dict for df # Imports data for each result by season by league and gathers relevant data for league, season in it.product(leagues, seasons): print('Importing {} {} results'.format(league.replace('_', ' ').title(), season)) teams = set() # Creates dict containing teams by season by league for team_info in await understat.get_teams(league, season): teams.add(team_info['title']) result_dict[league][season][team_info['title']] = {} # Iterates through season results to insert relevant data about the result into dict for location, result_info in it.product(['h', 'a'], await understat.get_league_results(league, season)): if datetime.strptime(result_info['datetime'], '%Y-%m-%d %H:%M:%S'): result_dict[league][season][result_info[location]['title']][int(result_info['id'])] = { 'team': result_info[location]['title'], 'opponent': result_info[['h', 'a'][location=='h']]['title'], 'xG_scored': float(result_info['xG'][location]), 'xG_conceded': float(result_info['xG'][['h', 'a'][location=='h']]), 'goals_scored': int(result_info['goals'][location]), 'goals_conceded': int(result_info['goals'][['h', 'a'][location=='h']]), 'location': location, 'date': datetime.strptime(result_info['datetime'], '%Y-%m-%d %H:%M:%S') } # For each team in given league season, creates results df if they do not yet exist for team in teams: path = '{}/raw_data/{}/{}/{}/results'.format(root_dir, league, season, team) if not os.path.isfile('{}/{}_{}_results_df.pickle'.format(path, season, team)): Path(path).mkdir(parents=True, exist_ok=True) if season == 2020: print(pd.DataFrame(result_dict[league][season][team]).T) df = pd.DataFrame(result_dict[league][season][team]).T.sort_values(by='date') df.to_pickle('{}/{}_{}_result_df.pickle'.format(path, team, season)) df.to_csv('{}/{}_{}_result_df.csv'.format(path, team, season))
async def get_match_shots_async(match_id, options=None): async with aiohttp.ClientSession() as session: understat = Understat(session) data = await understat.get_match_shots(match_id=match_id, options=options) json_data = json.dumps(data) return json_data
async def get_team_stats_async(team_name, season): async with aiohttp.ClientSession() as session: understat = Understat(session) data = await understat.get_team_stats(team_name=team_name, season=season) json_data = json.dumps(data) return json_data
async def get_player_stats_async(player_id, positions=None): async with aiohttp.ClientSession() as session: understat = Understat(session) data = await understat.get_player_stats(player_id=player_id, positions=positions) json_data = json.dumps(data) return json_data
async def update_Form(league, form): games_no = int(CSV_reader('code_settings.csv')[0][0]) async with aiohttp.ClientSession() as session: understat = Understat(session) teams_league = await understat.get_teams('epl', 2019) for j in range(20): xG = 0 for i in range(len(teams_league[j]['history']) - games_no, len(teams_league[j]['history'])): xG += teams_league[j]['history'][i]['xG'] form.addTeam(teams_league[j]['title'], xG, teams_league[j]['id']) print(Fore.RED + 'Updating player form data') time.sleep(0.5) for key, player in tqdm(league.players.items()): recent_matches = await understat.get_player_matches(player.player_id) recent_matches = recent_matches[:games_no] xG = 0 xA = 0 G = 0 A = 0 for i in range(games_no): if len(recent_matches) > games_no - 1: xG += float(recent_matches[i]['xG']) xA += float(recent_matches[i]['xA']) G += float(recent_matches[i]['goals']) A += float(recent_matches[i]['assists']) xGr = xG / form.teams[player.team].xG xAr = xA / form.teams[player.team].xG form.addPlayer(player.name, player.team, xGr, xG, xAr, xA, player.player_id) with open('recent_data.pkl', 'wb') as output: pickle.dump(form, output, pickle.HIGHEST_PROTOCOL)
async def get_game_df(league_name,year): async with aiohttp.ClientSession() as session: understat = Understat(session) fixtures = await understat.get_league_results(league_name,year) a = pd.DataFrame({'game_id':[fixtures[i]['id'] for i in range(len(fixtures))], 'date':[fixtures[i]['datetime'] for i in range(len(fixtures))], 'home_team':[fixtures[i]['h']['title'] for i in range(len(fixtures))], 'away_team':[fixtures[i]['a']['title'] for i in range(len(fixtures))], 'home_team_id':[fixtures[i]['h']['id'] for i in range(len(fixtures))], 'away_team_id':[fixtures[i]['a']['id'] for i in range(len(fixtures))], 'xg_home':[fixtures[i]['xG']['h'] for i in range(len(fixtures))], 'xg_away':[fixtures[i]['xG']['a'] for i in range(len(fixtures))], 'home_prob':[fixtures[i]['forecast']['w'] for i in range(len(fixtures))], 'draw_prob':[fixtures[i]['forecast']['d'] for i in range(len(fixtures))], 'away_prob':[fixtures[i]['forecast']['l'] for i in range(len(fixtures))], 'home_score':[fixtures[i]['goals']['h'] for i in range(len(fixtures))], 'away_score':[fixtures[i]['goals']['a'] for i in range(len(fixtures))]}) a['result'] = np.where(a['home_score']>a['away_score'],'home',np.where(a['home_score']==a['away_score'],'draw','away')) a['home_prob']=a['home_prob'].astype(float) a['away_prob']=a['away_prob'].astype(float) a['draw_prob']=a['draw_prob'].astype(float) a['home_score']=a['home_score'].astype(float) a['away_score']=a['away_score'].astype(float) a['pred']=a.iloc[:,8:11].idxmax(axis=1).str.slice(0,4) a['date'] = pd.to_datetime(a['date']) a['league']=league_name a['season']=str(year)+'/'+str(year+1) a = a[a.columns[-2:].append(a.columns[:-2])] return a
async def update_results(): async with aiohttp.ClientSession() as session: understat = Understat(session) games_data = await understat.get_league_results('epl', 2019) games_list = [] for game in games_data: games_list.append([ teamAbbreviator(game['h']['title']), teamAbbreviator(game['a']['title']), game['xG']['h'], game['xG']['a'] ]) with open('all_results_xG.csv', mode='w', newline='') as file: writer = csv.writer(file) writer.writerow(['HomeTeam', 'AwayTeam', 'HomeGoals', 'AwayGoals']) for row in games_list: writer.writerow(row) games_list = [] for game in games_data: games_list.append([ teamAbbreviator(game['h']['title']), teamAbbreviator(game['a']['title']), int(game['goals']['h']), int(game['goals']['a']) ]) with open('all_results.csv', mode='w', newline='') as file: writer = csv.writer(file) writer.writerow(['HomeTeam', 'AwayTeam', 'HomeGoals', 'AwayGoals']) for row in games_list: writer.writerow(row)
async def get_league_fixtures_async(league_name, season, options=None): async with aiohttp.ClientSession() as session: understat = Understat(session) data = await understat.get_league_fixtures(league_name=league_name, season=season, options=options) json_data = json.dumps(data) return json_data
async def get_team_players_async(team_name, season, options=None): async with aiohttp.ClientSession() as session: understat = Understat(session) data = await understat.get_team_players(team_name=team_name, season=season, options=options) json_data = json.dumps(data) return json_data
async def main(): async with aiohttp.ClientSession() as session: understat = Understat(session) data_json = await understat.get_league_players('epl', 2019) data = json.dumps(data_json) data_df = pd.read_json(data, orient='records') data_df.to_csv('understat.csv') data_df.to_pickle('data.pkl')
async def getFixtures(season): async with aiohttp.ClientSession() as session: understat = Understat(session) results = await understat.get_league_fixtures("epl", season) data = json.dumps(results) with open('tempfiles/unders_fixtures.json', 'w') as file: file.write(data)
async def main(): async with aiohttp.ClientSession() as session: understat = Understat(session) player_shots = await understat.get_player_shots( id, {"season": "2019"}) shots = json.dumps(player_shots) with open(f'jsonfiles/{name}.json', 'w') as json_file: json.dump(shots, json_file)
async def main(): async with aiohttp.ClientSession( ) as session: # Opens connection with server understat = Understat(session) global results results = await understat.get_league_results( "epl", 2019 ) # Adds the Understat info from every match from the 2018/19 and 2019/20 season to the dataset for comparison
async def main(): async with aiohttp.ClientSession() as session: understat = Understat(session) player = await understat.get_league_players( "epl", 2020, ) #print(json.dumps(players)) ustat.append(json.dumps(player))
async def get_data(league, year): int_stats = ["goals", "shots", "time", "assists", "key_passes", "npg"] float_stats = ["xG", "xA", "npxG", "xGChain", "xGBuildup"] stats = int_stats + float_stats async with aiohttp.ClientSession() as session: understat = Understat(session) t = await understat.get_teams(league, year) max_fixtures = max(len(x["history"]) for x in t) teams = [x["title"] for x in t] data = [] for team in teams: fixtures = await understat.get_team_results(team, year) fixture_ids = [x["id"] for x in fixtures] players = await understat.get_team_players(team, year) for player in players: y = await understat.get_player_matches(player["id"], season=str(year)) p_matches = [x["id"] for x in y] statData = [["-"] * max_fixtures for _ in range(len(stats))] for i, ID in enumerate(p_matches): try: for j, stat in enumerate(stats): if stat in int_stats: statData[j][fixture_ids.index(ID)] = int( y[i][stat]) else: statData[j][fixture_ids.index(ID)] = round( float(y[i][stat]), 4) except: pass data.append( [[ player["id"], html.unescape(player["player_name"]), player["team_title"] ] + statData[i] + [round(sum(x for x in statData[i] if x != "-"), 4)] for i in range(len(statData))]) outdir = f"player/{str(year)}-{str(year + 1)[2:]}/{league}" if not os.path.exists(outdir): os.mkdir(outdir) for i, stat in enumerate(stats): df = pd.DataFrame([data[x][i] for x in range(len(data))], columns=["id", "name", "team"] + list(range(1, max_fixtures + 1)) + ["total"]) df = df.sort_values(by="total", ascending=False) df.to_csv( f"player/{str(year)}-{str(year + 1)[2:]}/{league}/{stat}.csv", index=False)
async def main(): async with aiohttp.ClientSession() as session: # TODO check what async does understat = Understat(session) team_stats = await understat.get_team_stats("Chelsea", 2020) formation_list = list(team_stats["formation"].keys()) for formation in team_stats["formation"]: xg = team_stats["formation"][formation]["xG"] xg_against = team_stats["formation"][formation]["against"]["xG"] minutes = team_stats["formation"][formation]["time"] print(f"{formation}: xG: {xg}, xGA: {xg_against} in {minutes} mins")
async def main(league: str, season: str): async with aiohttp.ClientSession() as session: understat = Understat(session) results = await understat.get_league_results(league, season) print('Getting ' + league + ':' + season) results = json.dumps(results) with open('tempfiles/' + league + '_' + season + '_res.json', 'w') as file: file.write(results)
async def main(team: str, year: str): async with aiohttp.ClientSession() as session: understat = Understat(session) if team == "All": data = await understat.get_league_players("epl", year) else: data = await understat.get_league_players("epl", year, {"team_title": team}) print(json.dumps(data))
async def main(match_ids): dflist = [] async with aiohttp.ClientSession() as session: understat = Understat(session) for match_id in tqdm(match_ids): players = await understat.get_match_shots(match_id) dfh, dfa = json_normalize(players['h'], sep='_'), json_normalize(players['a'], sep='_') dflist.append(pd.concat([dfh, dfa], ignore_index=True)) return pd.concat(dflist, ignore_index=True)
async def get_game_future_df(league_name): async with aiohttp.ClientSession() as session: understat = Understat(session) fixtures = await understat.get_league_fixtures(league_name,2019) a = pd.DataFrame({'game_id':[fixtures[i]['id'] for i in range(len(fixtures))], 'date':[fixtures[i]['datetime'] for i in range(len(fixtures))], 'home_team_id':[fixtures[i]['h']['id'] for i in range(len(fixtures))], 'home_team':[fixtures[i]['h']['title'] for i in range(len(fixtures))], 'away_team_id':[fixtures[i]['a']['id'] for i in range(len(fixtures))], 'away_team':[fixtures[i]['a']['title'] for i in range(len(fixtures))]}) return a
async def main(): #Plug in with Understat. async with aiohttp.ClientSession() as session: understat = Understat(session) #Pull the team data into a dataframe by using the team name and year. results = await understat.get_team_results("Arsenal", 2019) df = pd.DataFrame(results) #Export the data into xlsx format df.to_excel("ARS2019-20.xlsx")
async def main(): async with aiohttp.ClientSession() as session: understat = Understat(session) # Imports players' name and understat ID by season by league for league, season in it.product(leagues, seasons): print('Importing {} {} players'.format(league.replace('_', ' ').title(), season)) for player in await understat.get_league_players(league, season): players.append({'name': player['player_name'], 'understat_id': int(player['id'])}) df = pd.DataFrame(players).drop_duplicates() # Delete duplicate entries # Creates files for each season of league if .pickle file does not yet exist if not os.path.isfile('{}/raw_data/player_df.pickle'.format(root_dir)): df.to_csv('{}/raw_data/player_df.csv'.format(root_dir)) df.to_pickle('{}/raw_data/player_df.pickle'.format(root_dir))
async def load_fixtures(league, season): async with aiohttp.ClientSession() as session: understat = Understat(session) fixtures = await understat.get_league_results(league, season["param_season"]) df_fixtures = pd.DataFrame(fixtures) await session.close() return df_fixtures
async def get_data(league, year): async with aiohttp.ClientSession() as session: understat = Understat(session) t = await understat.get_teams(league, year) max_fixtures = max(len(x["history"]) for x in t) teams = [x["title"] for x in t] data = [] for team in teams: team_data = ["-"] * max_fixtures fixtures = await understat.get_team_results(team, year) fixture_ids = [x["id"] for x in fixtures] for i, fix in enumerate(fixture_ids): try: shot_data = await understat.get_match_shots(fix) # want shot data for the **opposition** team prob_cs = 1 try: if shot_data["h"][0]["h_team"] == team: wanted = "a" else: wanted = "h" for shot in shot_data[wanted]: prob_cs *= (1 - float(shot["xG"])) except IndexError: # occurs when the home team had 0 shots try: if shot_data["a"][0]["h_team"] == team: wanted = "a" else: wanted = "h" for shot in shot_data[wanted]: prob_cs *= (1 - float(shot["xG"])) except IndexError: # occurs when the away team also had 0 shots pass team_data[i] = round(prob_cs, 4) except UnboundLocalError: # occurs when no match data is present e.g. for abandoned matches pass data.append([team] + team_data + [round(sum(x for x in team_data if x != "-"), 4)]) df = pd.DataFrame(data, columns=["team"] + list(range(1, max_fixtures + 1)) + ["total"]) df = df.sort_values(by="total", ascending=False) outdir = f"team/{str(year)}-{str(year + 1)[2:]}/{league}" if not os.path.exists(outdir): os.mkdir(outdir) df.to_csv(f"{outdir}/xCS.csv", index=False)
async def get_game_player_df(game_id): async with aiohttp.ClientSession() as session: understat = Understat(session) players = await understat.get_match_players(game_id) h = pd.DataFrame({'player_id':[players['h'][i]['player_id'] for i in players['h'].keys()], 'player':[players['h'][i]['player'] for i in players['h'].keys()], 'position':[players['h'][i]['position'] for i in players['h'].keys()], 'team_id':[players['h'][i]['team_id'] for i in players['h'].keys()], 'h_a':[players['h'][i]['h_a'] for i in players['h'].keys()], 'time':[players['h'][i]['time'] for i in players['h'].keys()], 'xG':[players['h'][i]['xG'] for i in players['h'].keys()], 'xA':[players['h'][i]['xA'] for i in players['h'].keys()], 'xGChain':[players['h'][i]['xGChain'] for i in players['h'].keys()], 'xGBuildup':[players['h'][i]['xGBuildup'] for i in players['h'].keys()], 'shots':[players['h'][i]['shots'] for i in players['h'].keys()], 'goals':[players['h'][i]['goals'] for i in players['h'].keys()], 'own_goals':[players['h'][i]['own_goals'] for i in players['h'].keys()], 'key_passes':[players['h'][i]['key_passes'] for i in players['h'].keys()], 'assists':[players['h'][i]['assists'] for i in players['h'].keys()], 'yellow_card':[players['h'][i]['yellow_card'] for i in players['h'].keys()], 'red_card':[players['h'][i]['red_card'] for i in players['h'].keys()], 'roster_in':[players['h'][i]['roster_in'] for i in players['h'].keys()], 'roster_out':[players['h'][i]['roster_out'] for i in players['h'].keys()], 'positionOrder':[players['h'][i]['positionOrder'] for i in players['h'].keys()] }) a = pd.DataFrame({'player_id':[players['a'][i]['player_id'] for i in players['a'].keys()], 'player':[players['a'][i]['player'] for i in players['a'].keys()], 'position':[players['a'][i]['position'] for i in players['a'].keys()], 'team_id':[players['a'][i]['team_id'] for i in players['a'].keys()], 'h_a':[players['a'][i]['h_a'] for i in players['a'].keys()], 'time':[players['a'][i]['time'] for i in players['a'].keys()], 'xG':[players['a'][i]['xG'] for i in players['a'].keys()], 'xA':[players['a'][i]['xA'] for i in players['a'].keys()], 'xGChain':[players['a'][i]['xGChain'] for i in players['a'].keys()], 'xGBuildup':[players['a'][i]['xGBuildup'] for i in players['a'].keys()], 'shots':[players['a'][i]['shots'] for i in players['a'].keys()], 'goals':[players['a'][i]['goals'] for i in players['a'].keys()], 'own_goals':[players['a'][i]['own_goals'] for i in players['a'].keys()], 'key_passes':[players['a'][i]['key_passes'] for i in players['a'].keys()], 'assists':[players['a'][i]['assists'] for i in players['a'].keys()], 'yellow_card':[players['a'][i]['yellow_card'] for i in players['a'].keys()], 'red_card':[players['a'][i]['red_card'] for i in players['a'].keys()], 'roster_in':[players['a'][i]['roster_in'] for i in players['a'].keys()], 'roster_out':[players['a'][i]['roster_out'] for i in players['a'].keys()], 'positionOrder':[players['a'][i]['positionOrder'] for i in players['a'].keys()] }) final = pd.concat([a,h]) final['game_id']=game_id final = final[final.columns[-1:].append(final.columns[:-1])] return final