def getChampions(soup, season): global allChampions p = soup.select_one('#meta > div:nth-child(2) > p') a = p.select_one('a') name = a.get_text() team_id = int(searchTeamByName(name)[0]) allChampions.append([team_id, season])
def helperTwo(row, id, season): team_id = int( searchTeamByName( row.select_one('[data-stat="team_name"] > a').get_text())[0]) g = toIntOrFloat(row, '[data-stat="g"]') mp = toIntOrFloat(row, '[data-stat="mp"]') fg = toIntOrFloat(row, '[data-stat="opp_fg"]') fga = toIntOrFloat(row, '[data-stat="opp_fga"]') fg_pct = toIntOrFloat(row, '[data-stat="opp_fg_pct"]') fg3 = toIntOrFloat(row, '[data-stat="opp_fg3"]') fg3a = toIntOrFloat(row, '[data-stat="opp_fg3a"]') fg3_pct = toIntOrFloat(row, '[data-stat="opp_fg3_pct"]') fg2 = toIntOrFloat(row, '[data-stat="opp_fg2"]') fg2a = toIntOrFloat(row, '[data-stat="opp_fg2a"]') fg2_pct = toIntOrFloat(row, '[data-stat="opp_fg2_pct"]') ft = toIntOrFloat(row, '[data-stat="opp_ft"]') fta = toIntOrFloat(row, '[data-stat="opp_fta"]') ft_pct = toIntOrFloat(row, '[data-stat="opp_ft_pct"]') orb = toIntOrFloat(row, '[data-stat="opp_orb"]') drb = toIntOrFloat(row, '[data-stat="opp_drb"]') trb = toIntOrFloat(row, '[data-stat="opp_trb"]') ast = toIntOrFloat(row, '[data-stat="opp_ast"]') stl = toIntOrFloat(row, '[data-stat="opp_stl"]') blk = toIntOrFloat(row, '[data-stat="opp_blk"]') tov = toIntOrFloat(row, '[data-stat="opp_tov"]') pf = toIntOrFloat(row, '[data-stat="opp_pf"]') pts = toIntOrFloat(row, '[data-stat="opp_pts"]') return [ id, team_id, season, g, mp, fg, fga, fg_pct, fg3, fg3a, fg3_pct, fg2, fg2a, fg2_pct, ft, fta, ft_pct, orb, drb, trb, ast, stl, blk, tov, pf, pts ]
def helperFive(row, id, season): # 'id', 'team_id', 'season', 'g', 'mp' 'fg_pct', avg_dist, fg2a_ct_fga, pct_fga_00_03, pct_fga_03_10, pct_fga_10_16, pct_fga_16_xx, fg3a_pct_fga, fg2_pct, fg_pct_00_03, fg_pct_03_10, fg_pct_10_16, fg_pct_16_xx, fg3_pct, fg2_pct_ast, pct_fg2_dunk, fg2_dunk, pct_fg2_layup, fg2_layup, fg3_pct_ast, pct_fg3a_corner, fg3_pct_corner, fg3a_heave, fg3_heave team_id = int( searchTeamByName( row.select_one('[data-stat="team_name"] > a').get_text())[0]) g = toIntOrFloat(row, '[data-stat="g"]') mp = toIntOrFloat(row, '[data-stat="mp"]') fg_pct = toIntOrFloat(row, '[data-stat="opp_fg_pct"]') avg_dist = toIntOrFloat(row, '[data-stat="opp_avg_dist"]') fg2a_ct_fga = toIntOrFloat(row, '[data-stat="opp_fg2a_ct_fga"]') pct_fga_00_03 = toIntOrFloat(row, '[data-stat="opp_pct_fga_00_03"]') pct_fga_03_10 = toIntOrFloat(row, '[data-stat="opp_pct_fga_03_10"]') pct_fga_10_16 = toIntOrFloat(row, '[data-stat="opp_pct_fga_10_16"]') pct_fga_16_xx = toIntOrFloat(row, '[data-stat="opp_pct_fga_16_xx"]') fg3a_pct_fga = toIntOrFloat(row, '[data-stat="opp_fg3a_pct_fga"]') fg2_pct = toIntOrFloat(row, '[data-stat="opp_fg2_pct"]') fg_pct_00_03 = toIntOrFloat(row, '[data-stat="opp_fg_pct_00_03"]') fg_pct_03_10 = toIntOrFloat(row, '[data-stat="opp_fg_pct_03_10"]') fg_pct_10_16 = toIntOrFloat(row, '[data-stat="opp_fg_pct_10_16"]') fg_pct_16_xx = toIntOrFloat(row, '[data-stat="opp_fg_pct_16_xx"]') fg3_pct = toIntOrFloat(row, '[data-stat="opp_fg3_pct"]') fg2_pct_ast = toIntOrFloat(row, '[data-stat="opp_fg2_pct_ast"]') pct_fg2_dunk = toIntOrFloat(row, '[data-stat="opp_pct_fg2_dunk"]') fg2_dunk = toIntOrFloat(row, '[data-stat="opp_fg2_dunk"]') pct_fg2_layup = toIntOrFloat(row, '[data-stat="opp_pct_fg2_layup"]') fg2_layup = toIntOrFloat(row, '[data-stat="opp_fg2_layup"]') fg3_pct_ast = toIntOrFloat(row, '[data-stat="opp_fg3_pct_ast"]') pct_fg3a_corner = toIntOrFloat(row, '[data-stat="opp_pct_fg3a_corner"]') fg3_pct_corner = toIntOrFloat(row, '[data-stat="opp_fg3_pct_corner"]') fg3a_heave = toIntOrFloat(row, '[data-stat="opp_fg3a_heave"]') fg3_heave = toIntOrFloat(row, '[data-stat="opp_fg3_heave"]') return [ id, team_id, season, g, mp, fg_pct, avg_dist, fg2a_ct_fga, pct_fga_00_03, pct_fga_03_10, pct_fga_10_16, pct_fga_16_xx, fg3a_pct_fga, fg2_pct, fg_pct_00_03, fg_pct_03_10, fg_pct_10_16, fg_pct_16_xx, fg3_pct, fg2_pct_ast, pct_fg2_dunk, fg2_dunk, pct_fg2_layup, fg2_layup, fg3_pct_ast, pct_fg3a_corner, fg3_pct_corner, fg3a_heave, fg3_heave ]
def helperThree(row, id, season): # id team_id season age wins losses wins_pyth losses_pyth mov sos srs off_rtg def_rtg net_rtg pace fta_per_fga_pct fg3a_per_fga_pct ts_pct efg_pct tov_pct orb_pct ft_rate opp_efg_pct opp_tov_pct drb_pct opp_ft_rate arena_name attendance attendance_per_g team_id = int( searchTeamByName( row.select_one('[data-stat="team_name"] > a').get_text())[0]) age = toIntOrFloat(row, '[data-stat="age"]') wins = toIntOrFloat(row, '[data-stat="wins"]') losses = toIntOrFloat(row, '[data-stat="g"]') wins_pyth = toIntOrFloat(row, '[data-stat="wins_pyth"]') losses_pyth = toIntOrFloat(row, '[data-stat="losses_pyth"]') mov = toIntOrFloat(row, '[data-stat="mov"]') sos = toIntOrFloat(row, '[data-stat="sos"]') srs = toIntOrFloat(row, '[data-stat="srs"]') off_rtg = toIntOrFloat(row, '[data-stat="off_rtg"]') def_rtg = toIntOrFloat(row, '[data-stat="def_rtg"]') net_rtg = toIntOrFloat(row, '[data-stat="net_rtg"]') pace = toIntOrFloat(row, '[data-stat="pace"]') fta_per_fga_pct = toIntOrFloat(row, '[data-stat="fta_per_fga_pct"]') fg3a_per_fga_pct = toIntOrFloat(row, '[data-stat="fg3a_per_fga_pct"]') ts_pct = toIntOrFloat(row, '[data-stat="ts_pct"]') efg_pct = toIntOrFloat(row, '[data-stat="efg_pct"]') tov_pct = toIntOrFloat(row, '[data-stat="tov_pct"]') orb_pct = toIntOrFloat(row, '[data-stat="orb_pct"]') ft_rate = toIntOrFloat(row, '[data-stat="ft_rate"]') opp_efg_pct = toIntOrFloat(row, '[data-stat="opp_efg_pct"]') opp_tov_pct = toIntOrFloat(row, '[data-stat="opp_tov_pct"]') drb_pct = toIntOrFloat(row, '[data-stat="drb_pct"]') opp_ft_rate = toIntOrFloat(row, '[data-stat="opp_ft_rate"]') arena_name = row.select_one('[data-stat="arena_name"]').get_text() attendance = toIntOrFloat(row, '[data-stat="attendance"]') attendance_per_g = toIntOrFloat(row, '[data-stat="attendance_per_g"]') return [ id, team_id, season, age, wins, losses, wins_pyth, losses_pyth, mov, sos, srs, off_rtg, def_rtg, net_rtg, pace, fta_per_fga_pct, fg3a_per_fga_pct, ts_pct, efg_pct, tov_pct, orb_pct, ft_rate, opp_efg_pct, opp_tov_pct, drb_pct, opp_ft_rate, arena_name, attendance, attendance_per_g ]
def getGameData(url, year): global playoff global id html_source = requests.get(url) soup = BeautifulSoup(html_source.content, 'html.parser') if soup.find('h1').get_text() == 'Page Not Found (404 error)': return season = soup.select_one( 'ul.hoversmooth > li.index > a > u').get_text()[:7] table_rows = soup.select('#schedule > tbody > tr') for tr in table_rows: try: if ( type(tr.contents[0]) is Tag and tr.contents[0].get_text() == 'Playoffs' ) or url == 'https://www.basketball-reference.com/leagues/NBA_1980_games-april.html': playoff = True except: pass # try: if not tr.get('class'): date = tr.select_one('[data-stat="date_game"]').get_text() awayTeamName = tr.select_one( '[data-stat="visitor_team_name"]').get_text() awayTeamId = int(searchTeamByName(awayTeamName)[0]) awayTeamScore = tr.select_one( '[data-stat="visitor_pts"]').get_text() homeTeamName = tr.select_one( '[data-stat="home_team_name"]').get_text() homeTeamId = int(searchTeamByName(homeTeamName)[0]) homeTeamScore = tr.select_one('[data-stat="home_pts"]').get_text() overtime = tr.select_one('[data-stat="overtimes"]').get_text() attendance = tr.select_one('[data-stat="attendance"]').get_text() if attendance != '': attendance = int(attendance.replace(',', '')) notes = tr.select_one('[data-stat="game_remarks"]').get_text() boxScoreLink = '' try: boxScoreLink = 'https://www.basketball-reference.com' + tr.select_one( '[data-stat="box_score_text"] > a').get("href") except: date2 = datetime.strptime(date, '%a, %b %d, %Y') month = date2.month day = date2.day if month < 10: month = '0' + str(month) else: month = str(month) if day < 10: day = '0' + str(day) else: day = str(day) homeShortName = searchTeamByName(homeTeamName)[2] boxScoreLink = 'https://www.basketball-reference.com/boxscores/' + \ str(date2.year) + month + day + \ '0' + homeShortName + '.html' storeValue = [ id, homeTeamId, homeTeamScore, awayTeamId, awayTeamScore, date, overtime, attendance, playoff, season, notes ] boxScoreLinks.append([ id, boxScoreLink, awayTeamId, homeTeamId, season, playoff, date ]) id += 1 # if playoff: # playoffGames.append(storeValue) # else: games.append(storeValue)
def getPlayerData(url): html = requests.get(url[1]) soup = BeautifulSoup(html.content, 'html.parser') team_ids = soup.select('#per_game > tfoot > tr [data-stat=team_id]') div = soup.select_one('#meta > div[itemtype="https://schema.org/Person"]') teams = [] fullName = '' name = '' current_team_id = '' birth_place = '' birth_date = '' height = '' weight = '' playerUrl = '/'.join(url[1].split('/')[-3:]) retired = url[3] name = div.select_one('h1').get_text().strip() injury = False for team_id in team_ids: shortName = team_id.get_text() if searchTeamByShortName(shortName) != None: teams.append(int(searchTeamByShortName(shortName))) if div.select_one( 'p:nth-child(2) > strong').get_text().strip() == 'Pronunciation': fullName = div.select_one('p:nth-child(3) > strong').get_text().strip() else: fullName = div.select_one('p:nth-child(2) > strong').get_text().strip() try: height = div.select_one('span[itemprop="height"]').get_text() except: pass try: weight = div.select_one('span[itemprop="weight"]').get_text()[:-2] except: pass try: birth_date = div.select_one('#necro-birth > a:first-child').get_text( ).strip() + " " + div.select_one( '#necro-birth > a:nth-child(2)').get_text().strip() except: pass try: find_current_team = div.select_one('[href*="/2020.html"]').get_text() current_team_id = searchTeamByName(find_current_team)[0] if len(teams) == 0 and current_team_id: teams.append(current_team_id) except: pass try: birth_place = div.select_one( 'span[itemprop="birthPlace"]').get_text().strip()[3:] except: pass if soup.select_one('#injury'): injury = True with open('players.csv', 'a', newline='') as file: writer = csv.writer(file) writer.writerow([ url[0], name, fullName, url[2], height, weight, birth_date, birth_place, retired, playerUrl, current_team_id, teams, injury ])