def players(self): allres = {} for p in tqdm(self.plyrs[1:]): ix, ming = 2 if self.RoP == 'regular' else 4, 500 if self.RoP == 'regular' else 50 if p[ix] and int(p[ix]) > ming: pm = p[1].split('/')[-1][:-5] # print(pm) tmp, atds = self.singleplayer(pm) if tmp: allres[pm] = [tmp, atds] writeToPickle('./data/playerRATs_%s.pickle' % self.RoP, allres)
def splitLineups(lines5): print(list(lines5[0])[0], '...', list(lines5[0])[-1]) print(list(lines5[1])[0], '...', list(lines5[1])[-1]) lines_all = [[{}, {}], [{}, {}], [{}, {}], [{}, {}]] # 0一人组合1二人组合2三人组合3四人组合 0常规赛1季后赛 for season in range(2002, 2021): ss = '%d_%d' % (season, season + 1) print(ss) for i in range(2): lines1234 = [{}, {}, {}, {}] # 单赛季按球队 0一人组合1二人组合2三人组合3四人组合 for tm in lines5[i][ss]: # 球队lineups初始化 for ix in range(4): if tm not in lines1234[ix]: lines1234[ix][tm] = {} lines_all[ix][i][ss] = {} line5_tm = lines5[i][ss][tm] # 单队5人lineups # print(tm, len(line5_tm)) for L in line5_tm: pms = L.split(' ') # 遍历每组5人组合 # print(pms) pms1234 = [[], [], [], []] # 0一人组合1二人组合2三人组合3四人组合 for c in range(1, 5): # 按照每组5人组合,使用(排列)组合方法穷尽所有1-4人的搭配 for iter in itertools.combinations(pms, c): pms1234[c - 1].append(list(iter)) for ix, combs_i in enumerate( pms1234): # 遍历1-4人的所有搭配,累加时间和正负值 for combs in combs_i: combstr = ' '.join(sorted(combs)) if combstr not in lines1234[ix][tm]: # 每种组合初始化 lines1234[ix][tm][combstr] = [ MPTime('0:00.0'), 0 ] lines1234[ix][tm][combstr][0] += line5_tm[L][0] lines1234[ix][tm][combstr][1] += line5_tm[L][1] for ix in range(4): for tm in lines1234[ix]: lines_sorted = {} for k in sorted(lines1234[ix][tm], key=lines1234[ix][tm].__getitem__, reverse=True): # 字典按值排序 lines_sorted[k] = lines1234[ix][tm][k] lines_all[ix][i][ss][tm] = lines_sorted for ix in range(4): writeToPickle( 'D:/sunyiwu/stat/data/Lineups/anaSeason%dLineups.pickle' % (ix + 1), lines_all[ix])
print(ave_score) # [28317, 1933] # [435372.0, 27686.0] # [68541:22.2, 4634:40.1] # ['0:9.4', '0:10.0'] # [538495.0, 33785.0] # [1.2368618101301876, 1.2202918442534132] for pm in plyrs: for i in range(2): if plyrs[pm][i][0]: plyrs[pm][i][1] = plyrs[pm][i][1].average_acc(plyrs[pm][i][0]) # plyrs[pm][i][2] /= plyrs[pm][i][0] plyrs[pm][i][3]['TOV'][0] /= plyrs[pm][i][0] for ss in plyrs[pm][i][-1]: if plyrs[pm][i][-1][ss][0]: plyrs[pm][i][-1][ss][1] = plyrs[pm][i][-1][ss][1].average_acc(plyrs[pm][i][-1][ss][0]) # plyrs[pm][i][-1][ss][2] /= plyrs[pm][i][-1][ss][0] plyrs[pm][i][-1][ss][3]['TOV'][0] /= plyrs[pm][i][-1][ss][0] print('共%d名球员' % len(plyrs)) # for pm in plyrs: # print(pm, plyrs[pm]) print(exchange_plays) print(MSerror) writeToPickle('./data/Enforce/player%sEnforce.pickle' % tartext[tar_item], plyrs) writeToPickle('./data/Enforce/season%sEnforceRecord.pickle' % tartext[tar_item], [count_games_all, count_item_all, count_time_all, average_time_all, count_score_all, average_score_all])
th = ss.find_all('th')[0].text tds = ss.find_all('td') if (th or tds[2].text == 'NBA') and\ not (th == 'Career' and tds[2].text == 'TOT') and\ tds[2].text == 'NBA': if not th: th = 'Career' seasonAVE.append([th] + [x.text for x in tds]) # 写入csv文件 if seasonAVE: if not os.path.exists('./data/players/%s' % pm): os.mkdir('./data/players/%s' % pm) if not os.path.exists('./data/players/%s/regularGames' % pm): os.mkdir('./data/players/%s/regularGames' % pm) df = pd.DataFrame(seasonAVE, columns=items) writeToPickle( './data/players/%s/regularGames/seasonAVE.pickle' % pm, df) df = pd.DataFrame(singleGAMES, columns=regular_items_en.keys()) df[df == ''] = np.nan for col in df.columns: df[col] = df[col].astype('category') writeToPickle( './data/players/%s/regularGames/regularGameBasicStat.pickle' % pm, df) else: print('球员未参加过NBA常规赛') #%% # -----季后赛----- if last_season > 2018: seasonAVE = [] singleGAMES = []
np.set_printoptions(suppress=True) pd.options.display.expand_frame_repr = False pd.options.display.width = 50 RoF = 0 gm = '201201070NJN' game = Game(gm, 'playoff' if RoF else 'regular') record = game.game_scanner() # for i in record: # print(i) record = game.game_analyser(record) record = game.game_analyser(record, T=1) ss = gameMarkToSeason(gm) rof = 'playoff' if RoF else 'regular' season_dir = 'D:/sunyiwu/stat/data/seasons_scanned/%s/%s/' % (ss, rof) writeToPickle(season_dir + gm + '_scanned.pickle', record) for i in record: print(i) # print() game.find_time_series(record) game.start_of_quarter(record) # 判断胜者 scores = [game.bxscr[0][x][0] for x in game.bxscr[0]] print(list(game.bxscr[0])) print(scores) print('主队胜' if scores[0] < scores[1] else '客队胜') rot = game.rotation(record) for i in rot:
# 表头 items = [x.text for x in charts[1].find_all('th')] items[0] = 'players' chart.append(items) for row in charts[2:]: stats = row.find_all('td') if len(stats) > 0: if row.find('th').a: pm = row.find('th').a.attrs['href'].split('/')[-1][:-5] else: if 'game' not in j: continue pm = row.find('th').text chart.append([pm] + [x.text for x in stats]) cs.append(chart) if cs: boxscores.append(cs) # 保存单场比赛数据 if regularOrPlayoff: writeToPickle(seasonDir + '/playoff/%s_boxscores.pickle' % date, boxscores) else: writeToPickle(seasonDir + '/regular/%s_boxscores.pickle' % date, boxscores) else: ths = tr.find_all('th') if len(ths) == 1 and ths[0].get_text().strip() == 'Playoffs': # 找到季后赛分割线 print('switch to Playoffs') regularOrPlayoff = 1 print('=' * 50)
if ivs_pm: for pm in ivs_pm: flag = 1 for r in game.gameflow[qtr]: ind = 1 if r[1] else 5 if (len(r) > 2 and pm in r[ind]) or ( len(r) == 2 and 'Jump' in r[ind] and pm in r[ind]): flag = 0 # print(qtr, pm, r) break if flag: if pm not in plyrs: plyrs[pm] = [ [], [], [], [], [] ] # 0gm, 1节次, 2+/-, 3胜负, 4是否比赛最后一节 diff = plus_minus( rot[tick] if tick < len(rot) else record[-1], rot[SoQ]['S'], tm) plyrs[pm][0].append(gm) plyrs[pm][1].append(qtr) plyrs[pm][2].append(diff) plyrs[pm][3].append(int(winner == tm)) plyrs[pm][4].append( int(qtr == game.quarters - 1)) # print('ivs', pm, gm, qtr) for pm in plyrs: print(pm, plyrs[pm]) writeToPickle('./data/anaInvisiblePlayers.pickle', plyrs)
from klasses.stats_items import * from tqdm import tqdm import pandas as pd # 计算球员赛季和生涯场均及总和并保存 pm2pn = LoadPickle('../data/playermark2playername.pickle') for p in tqdm(list(pm2pn.keys())): # print(p) for RoP in ['regular', 'playoff']: player = Player(p, RoP) if player.exists and not isinstance(player.data, list): res = [] for sss, ss in player.yieldSeasons(): sss = player.on_board_games(sss) res += player.ave_and_sum(sss, type=2) res[-2] = [ss] + res[-2] res[-1] = [ss] + res[-1] sss = player.on_board_games(player.data) res += player.ave_and_sum(sss, type=2) res[-2] = ['career'] + res[-2] res[-1] = ['career'] + res[-1] res = pd.DataFrame(res, columns=['Season'] + list(regular_items_en.keys() if RoP == 'regular' else playoff_items_en.keys())) # for col in res.columns: # res[col] = res[col].astype('category') writeToPickle( '../data/players/%s/%sGames/%sSaCAaS.pickle' % (p, RoP, RoP), res)
rs = i.find_all('a') if rs != None: for r in rs: url, rn = r.attrs['href'], r.text if rn not in refs: refs[rn] = url reftmp.append(rn) else: # 裁判无个人URL reftmp = i.text.split('\xa0')[1].split(', ') elif 'Att' in i.text: gtmp['A'] = int(i.text.split('\xa0')[1].replace(',', '')) else: gtmp['T'] = i.text.split('\xa0')[1] if reftmp or gtmp: res.append([gm[0], ', '.join(reftmp), gtmp['A'] if 'A' in gtmp else -1, gtmp['T'] if 'T' in gtmp else '']) # print(time.time() - s) if res: df = pd.DataFrame(res, columns=['gm', 'Referees', 'Attendance', 'Time of Game']) writeToPickle('./data/seasons_RAT/%s.pickle' % ss, df) writeToPickle('./data/refereeURLs.pickle', refs)
for pm in tmplyrs[rh]: if pm not in plyrs: plyrs[pm] = {} if ss not in plyrs[pm]: # 0本队1对手 0reg1plf 0全部比赛数1客场数2主场数3stats 0客场1主场2全部 0第一节1第二节2上半场3第三节4第四节5下半场6全场 # 0FG 1FGA 2FG% 33P 43PA 53P% 6FT 7FTA 8FT% 9ORB 10DRB 11TRB 12AST 13STL 14BLK 15TOV 16PF 17PTS 18PACE plyrs[pm][ss] = [[[0, 0, 0, 0, np.zeros((3, 7, 19))], [0, 0, 0, 0, np.zeros((3, 7, 19))]], [[0, 0, 0, 0, np.zeros((3, 7, 19))], [0, 0, 0, 0, np.zeros((3, 7, 19))]]] # 记录本队 plyrs[pm][ss][0][i][0] += 1 plyrs[pm][ss][0][i][rh + 1] += 1 plyrs[pm][ss][0][i][3] += gametime plyrs[pm][ss][0][i][-1][rh, 6, :] += bx[rh] plyrs[pm][ss][0][i][-1][2, 6, :] += bx[rh] # 记录对手 op = 0 if rh else 1 plyrs[pm][ss][1][i][0] += 1 plyrs[pm][ss][1][i][op + 1] += 1 plyrs[pm][ss][0][i][3] += gametime plyrs[pm][ss][1][i][-1][op, 6, :] += bx[op] plyrs[pm][ss][1][i][-1][2, 6, :] += bx[op] print(seasons[ss][0][0], list(seasons[ss][0][-1][2, 6, :] / seasons[ss][0][0] / 2)) print(seasons[ss][1][0], list(seasons[ss][1][-1][2, 6, :] / seasons[ss][1][0] / 2)) # if 'jamesle01' in plyrs: # print(plyrs['jamesle01'][ss][0][0], list(plyrs['jamesle01'][ss][0][-1][2, 6, :] / plyrs['jamesle01'][ss][0][0])) # print(plyrs['jamesle01'][ss][1][0], list(plyrs['jamesle01'][ss][1][-1][2, 6, :] / plyrs['jamesle01'][ss][1][0])) writeToPickle('./data/leagueSeasonAverage.pickle', seasons) writeToPickle('./data/playerSeasonAverage.pickle', plyrs)
gms = os.listdir(season_dir) for gm in gms: c = LoadPickle(season_dir + gm) for q, qtr in enumerate(c): for ix, r in enumerate(qtr): if len(r) == 6 and 'enters' in (r[1] if r[1] else r[-1]): ind = 1 if r[1] else 5 tmp = r[ind].split(' ') pm1, pm2 = tmp[0], tmp[-1] if pm1 == pm2: print(gm, c[q][ix][ind]) url = 'https://www.basketball-reference.com/boxscores/pbp/%s.html' % gm[:-7] plays = getCode(url, 'UTF-8') plays = plays.find('table', class_='stats_table').find_all('tr') for play in plays: tdPlays = play.find_all('td') if len(tdPlays) == 6: for p in tdPlays: if p.find_all('a'): s = p.get_text().strip() if 'enters' in s: ps = s.split(' enters the game for ') if len(ps) > 1 and ps[0] == ps[1]: pms = [] for a in p.find_all('a'): pms.append(a.attrs['href'].split('/')[-1].split('.')[0]) correct = '%s enters the game for %s' % (pms[0], pms[1]) c[q][ix][ind] = correct print(c[q][ix][ind]) writeToPickle(season_dir + gm, c)
sentence_o = sentence_o + a.attrs[ 'href'].split('/')[-1].rstrip( '.html') elif 'ejected from game' == sentence_o: sentence_o = a.attrs['href'].split( '/')[-1].rstrip( '.html') + sentence_o pTmp.append(sentence_o) else: pTmp.append(p.get_text().strip()) gameProcess[qtr].append(pTmp) else: pass # 保存单场比赛数据 if regularOrPlayoff: writeToPickle(seasonDir + '/playoff/%s.pickle' % date, gameProcess) else: writeToPickle(seasonDir + '/regular/%s.pickle' % date, gameProcess) # 更新赛季比赛列表 if regularOrPlayoff: seasonuPlayoffSmmary.append(gameDetails) else: seasonuRegularSmmary.append(gameDetails) else: ths = tr.find_all('th') if len(ths) == 1 and ths[0].get_text().strip() == 'Playoffs': # 找到季后赛分割线 print('switch to Playoffs') regularOrPlayoff = 1 writeToPickle(seasonDir + '/seasonRegularSummary.pickle',
os.path.exists(seasonDir + '/regular/%s_shot.pickle' % date)): # 比赛详细过程 if not tds[-4].a: continue gameURL = 'https://www.basketball-reference.com' + '/boxscores/shot-chart/' + tds[-4].a.attrs['href'].lstrip('/boxscores') gamePage = getCode(gameURL, 'UTF-8') charts = gamePage.find_all('div', class_='shot-area') try: assert len(charts) == 2 except: print('%s:本场比赛缺失投篮点数据!' % date) continue shootings = [[], []] for i in range(2): shoots = charts[i].find_all('div') for shoot in shoots: shootings[i].append([shoot.attrs['style'], shoot.attrs['tip'], shoot.attrs['class']]) # 保存单场比赛数据 if regularOrPlayoff: writeToPickle(seasonDir + '/playoff/%s_shot.pickle' % date, shootings) else: writeToPickle(seasonDir + '/regular/%s_shot.pickle' % date, shootings) else: ths = tr.find_all('th') if len(ths) == 1 and ths[0].get_text().strip() == 'Playoffs': # 找到季后赛分割线 print('switch to Playoffs') regularOrPlayoff = 1 print('=' * 50)
def write2file(self): if self.gameflow and self.file_name: writeToPickle(self.file_name, self.gameflow) print('写入文件%s成功' % self.file_name) else: messagebox.showinfo('提示', '没有内容!')
'Jalen Harris', '2021', '2021', 'G', '6-5', '195', 'August 14, 1998', 'Louisiana Tech, Nevada' ] players['winstca01'] = [ 'Cassius Winston', '2021', '2021', 'G', '6-1', '185', '', '' ] players['beyty01'] = [ 'Tyler Bey', '2021', '2021', 'F', '6-7', '215', 'February 10, 1998', '' ] players['marshna01'] = [ 'Naji Marshall', '2021', '2021', 'F', '6-7', '220', 'January 24, 1998', '' ] players['okongon01'] = [ 'Onyeka Okongwu', '2021', '2021', 'C', '6-8', '235', 'December 11, 2000', '' ] players['haganas01'] = [ 'Ashton Hagans', '2021', '2021', 'G', '6-3', '190', 'July 8, 1999', '' ] writeToPickle('./data/playerBasicInformation.pickle', players) #%% pm2pn = {} for k in list(players): pm2pn[k] = players[k][0] writeToPickle('./data/playermark2playername.pickle', pm2pn)
for y in range(cmps_shts_area_all.shape[0]): for x in range(cmps_shts_area_all.shape[1]): text = ax.text(x, y, '%.2f' % cmps_shts_area_all[cols[x]][y], ha="center", va="center", color="w") # h1 = plt.contourf(cmps_shts_area_all) fig.colorbar(img, ax=ax) plt.show() writeToPickle('D:/sunyiwu/stat/data/winning_point.pickle', [[cmps_all, cmps_shts_area_all, basic_all, count_game], cmps_by_season]) # 5048 2016-2021 # [0.8098256735340729, ' eFG%'] # [0.803486529318542, ' biggest lead'] # [0.7977416798732171, ' biggest lead pts%'] # [0.7022583201267829, ' 3PT %'] # [0.6816561014263075, ' 2PT %'] # [0.6024167987321711, ' TOV%'] # [0.5919175911251982, ' pts off tovs'] # [0.5845879556259905, ' FT/FGA'] # [0.5711172741679873, ' ORB%'] # [0.5673534072900158, ' FTA/FGA'] # [0.5635895404120443, ' 3PT PTS%'] # [0.563391442155309, ' pts in the paint']