def stats_grab(url, file): page = open_page(url) container = page.find("div", {"class": "p402_premium"}) table = container.find("table", {"id": "stats"}) data = table.find_all("tr") table_grab(data, file) next_page_grab(container, file)
def lead_changes_count_grab(game_url, file): lead_change_link = url_pbp_alter(root_url + game_url) page = open_page(lead_change_link) # get the website all_lead_changes = page.find_all("td", {"class": "bbr-play-leadchange center"}) change_count = str(len(all_lead_changes)) file.write(change_count + ",") print(change_count + "\n")
def stats_grab_caller(url, file_name): page = open_page(url) file = open_file(file_name) container = page.find("div", {"class": "p402_premium"}) table = container.find("table", {"id": "stats"}) data = table.find_all("tr") header_grab(table, file) stats_grab(url, file) file.close()
def bandcamp_grab(url_to_grab, file_name): # grabs the titles and lengths of songs from a bandcamp # stores data in csv page = open_page(url_to_grab) data = page.find("div", {"class": "leftMiddleColumns"}).ol.find_all("li") for li in data: incomplete_album_link = li.find('a', href=True) if incomplete_album_link is not None: url_album = url_to_grab + incomplete_album_link['href'] album_grab(url_album, file_name)
def album_grab(url_to_grab, file_name): # grabs the titles and lengths of songs from a bandcamp # stores data in csv page = open_page(url_to_grab) album = page.find_all("tr", {"itemprop": "tracks"}) track_number = 1 # add album name write and track number for track in album: local_url = track.find("a", {"itemprop": "url"})['href'] song_url = "https://8102.bandcamp.com" + local_url # song grab actually writes to the csv, this simply calls it. song_grab(song_url, file_name, track_number) track_number += 1
def song_grab(url, file_name, track_num): page = open_page(url) file = open_file(file_name) song_name = page.find("h2", {"class": "trackTitle"}).text.strip() abt_divfind = page.find("div", {"class": "tralbumData tralbum-about"}) if abt_divfind is not None: abt_text = abt_divfind.text abt = ' '.join(abt_text.split()) else: abt = "" track_creds = page.find("div", {"class": "tralbumData tralbum-credits"}) if track_creds is not None: creds_text = track_creds.text creds2 = ' '.join(creds_text.split()) creds = creds2.split(",")[2] song_duration = page.find("meta", {"itemprop": "duration"})['content'] write_string = str( track_num ) + ", " + song_name + ", " + song_duration + ", " + abt + ", " + creds + "\n" # is it better to do one write all at once, maybe dont want 'abt' to add a new line for example file.write(write_string) file.close()
def url_pbp_alter(url_to_change): page = open_page(url_to_change) url_to_pbp = page.find("div", {"class": "filter"}).find_all("div") pbp = url_to_pbp[1].find('a', href=True) complete_pbp = "https://www.basketball-reference.com" + pbp['href'] return complete_pbp