def metacritic(oldest_first=False): header = """ .-. /|/| / .-. / .-. / | .-.---/---.-. .-. ).--.`-'---/---`-'.-. / |./.-'_ / ( | ( / / / / ( .-' / |(__.' / `-'-'`---'/ _.(__. / _.(__. `---' (__.' `. .-. .-. (_) )-. / /|/| / / __) .-. . ---/--- / | .-.---/---.-. . .-. .-._.).--..-. / `. ./.-'_/ \ / / |./.-'_ / ( | / \( ( )/ ./.-'_ /' )(__.'/ ._)/ .-' / |(__.' / `-'-'/ ._)`---'`-'/ (__.' (_/ `----' / (__.' `. / --------------------------------------------------------------------------------- """ driver = Animate(initialize_webdriver, "Initializing webdriver...")() print(header) url = "http://www.metacritic.com/browse/albums/release-date/new-releases/metascore?view=detailed" html = Animate(partial(render_html, url, driver), "", trailing_newline=True)() soup = BeautifulSoup(html, "html5lib") records = soup.find_all("td", class_="clamp-summary-wrap") records = records[:35] if oldest_first: records = records[::-1] for ix, record in enumerate(records): album = try_except( lambda: record.find("h3", class_="product_title").find("a").text.strip(), "album", ) artist = try_except( lambda: SPACE_HYPHEN_SPACE.split( record.find("span", class_="product_artist").text )[-1].strip(), "artist", ) genres = try_except( lambda: record.find("li", class_="stat genre") .find("span", class_="data") .text.split(", "), "genre", ) href = try_except( lambda: record.find("h3", class_="product_title") .find("a") .attrs["href"] .strip(), "link", ) rating = try_except( lambda: record.find("a", class_="basic_stat product_score").text.strip(), "rating", ) genre = try_except(lambda: [g.strip() for g in genres], "genre") if isinstance(genre, list): genre = " / ".join([g for g in genre if "..." not in g]) label, lede, link = "Unknown label", "Unknown lede", "Unknown link" if "Unknown link" not in href: link = "http://www.metacritic.com{}".format(href) # visit the review page to get genre, rating, & review lede review_html = render(link) review = BeautifulSoup(review_html, "html5lib") lede = try_except( lambda: review.find("li", class_="summary_detail product_summary") .find("span", class_="data") .text.strip(), "review", ) label = try_except( lambda: review.find("li", class_="summary_detail product_company") .find("span", class_="data") .text.strip(), "label", ) if oldest_first: ix = len(records) - ix - 1 entry = { "artist": artist, "album": album, "label": label, "genre": genre, "link": link, "lede": lede, "rating": rating, "index": "{}. ".format(ix + 1), } print_record(**entry)
def allmusic(oldest_first=False, n_items=None): header = """ _ _ __ __ _ /\ | | | \/ | (_) / \ | | | \ / |_ _ ___ _ ___ / /\ \ | | | |\/| | | | / __| |/ __| / ____ \| | | | | | |_| \__ \ | (__ /_/____\_\_|_|_| |_|\__,_|___/_|\___|_____ _ _ | ____| | (_) | ( ) / ____| | (_) | |__ __| |_| |_ ___ _ __|/ ___ | | | |__ ___ _ ___ ___ | __| / _` | | __/ _ \| '__| / __| | | | '_ \ / _ \| |/ __/ _ \\ | |___| (_| | | || (_) | | \__ \ | |____| | | | (_) | | (_| __/ |______\__,_|_|\__\___/|_| |___/ \_____|_| |_|\___/|_|\___\___| ------------------------------------------------------------------- """ driver = Animate(initialize_webdriver, "Initializing webdriver...")() print(header) url = "https://www.allmusic.com/newreleases/editorschoice" html = Animate(partial(render_html, url, driver), "", trailing_newline=True)() soup = BeautifulSoup(html, "html5lib") records = soup.find_all("div", class_="editors-choice-item") if oldest_first: records = records[::-1] for ix, record in enumerate(records): if n_items and ix == n_items: break if ix > 0: time.sleep(SLEEP_BTWN_ITEMS) artist = try_except( lambda: record.find("div", class_="artist").text.strip(), "artist" ) album = try_except( lambda: record.find("div", class_="title").text.strip(), "album" ) genre = try_except( lambda: record.find("div", class_="genres").text.strip(), "genre" ) lede = try_except( lambda: record.find("div", class_="headline-review").text.strip(), "review" ) label = try_except( lambda: record.find("div", class_="labels").text.strip(), "label" ) link = try_except( lambda: record.find("div", class_="title").find("a").attrs["href"].strip(), "link", ) def allmusic_rating(): r = record.find("div", class_="allmusic-rating-new") rating = len(r.find_all("img", class_="blue star")) rating += len(r.find_all("img", class_="blue half")) * 0.5 return str(rating) rating = try_except(lambda: allmusic_rating(), "rating") entry = { "artist": artist, "album": album, "label": label, "link": link, "genre": genre, "lede": lede, "rating": rating, } print_record(trailing_newline=True, **entry) driver.close()
def midheaven(oldest_first=False, n_items=None): header = """ d8, d8b d8b `8P 88P ?88 d88 88b 88bd8b,d88b 88b d888888 888888b d8888b d888b8b ?88 d8P d8888b 88bd88b 88P'`?8P'?8b 88Pd8P' ?88 88P `?8bd8b_,dPd8P' ?88 d88 d8P'd8b_,dP 88P' ?8b d88 d88 88P d88 88b ,88b d88 88P88b 88b ,88b ?8b ,88' 88b d88 88P d88' d88' 88bd88' `?88P'`88bd88' 88b`?888P'`?88P'`88b`?888P' `?888P'd88' 88b d8b d8b d8b ?88 d8P 88P 88P 88b d888888P d88 d88 888888b d8888b .d888b, ?88' .d888b, d8888b888 888 d8888b 88bd88b .d888b, 88P `?8bd8b_,dP ?8b, 88P ?8b, d8b_,dP?88 ?88 d8b_,dP 88P' ` ?8b, d88, d8888b `?8b 88b `?8b 88b 88b 88b 88b d88 `?8b d88'`?88P'`?888P'`?888P' `?8b `?888P' `?888P' 88b 88b`?888P'd88' `?888P' -------------------------------------------------------------------------------------- """ print(header) url = "https://www.midheaven.com/top-selling" html = requests.get(url).text soup = BeautifulSoup(html, "html5lib") records = soup.find_all("div", class_="uk-panel uk-panel-box") if oldest_first: records = records[::-1] for ix, record in enumerate(records): if n_items and ix == n_items: break if ix > 0: time.sleep(SLEEP_BTWN_ITEMS) artist = try_except(lambda: record.find("h4").text.strip(), "artist") album = try_except(lambda: record.find("h5").text.strip(), "album") label = try_except(lambda: record.find("h6").text.strip(), "label") href = try_except( lambda: record.find("div", class_="uk-panel-teaser") .find("a") .attrs["href"] .strip(), "link", ) if "Unknown link" not in href: link = "http://www.midheaven.com{}".format(href) # visit the review page to get genre, rating, & review lede review_html = requests.get(link).text review = BeautifulSoup(review_html, "html5lib") lede = try_except( lambda: review.find("div", class_="item-meta").text.strip(), "review" ) if oldest_first: ix = len(records) - ix - 1 entry = { "artist": artist, "album": album, "label": label, "link": link, "lede": lede, "index": "{}. ".format(ix + 1), } print_record(**entry)
def boomkat(period="last-week", oldest_first=False, n_items=None): header = """ ____ __ __ / __ )____ ____ ____ ___ / /______ _/ /_ / __ / __ \/ __ \/ __ `__ \/ //_/ __ `/ __/ / /_/ / /_/ / /_/ / / / / / / ,< / /_/ / /_ /___________/\____/___/_/ /______|\__,_____/ / __ )___ _____/ /_ / ___/___ / / /__ __________ / __ / _ \/ ___/ __/ \__ \/ _ \/ / / _ \/ ___/ ___/ / /_/ / __(__ ) /_ ___/ / __/ / / __/ / (__ ) /_____/\___/____/\__/ /____/\___/_/_/\___/_/ /____/ ------------------------------------------------------- """ driver = Animate(initialize_webdriver, "Initializing webdriver...")() print(header) if period not in ["last-week", "last-month", "last-year"]: raise ValueError("Unrecognized period: {}".format(period)) url = "https://boomkat.com/bestsellers?q[release_date]={}".format(period) html = Animate(partial(render_html, url, driver), "", trailing_newline=False)() soup = BeautifulSoup(html, "html5lib") records = soup.find_all("li", class_="bestsellers-item") if oldest_first: records = records[::-1] for ix, record in enumerate(records): if n_items and ix == n_items: break titles = try_except( lambda: record.find("div", class_="product-name").text.strip(), "album" ) genres = try_except( lambda: record.find("div", class_="product-label-genre").text.strip(), "genre", ) link = try_except( lambda: record.find("a", class_="full-listing").attrs["href"].strip(), "link", ) label = try_except(lambda: genres.split("\n")[0].strip(), "label") genre = try_except(lambda: genres.split("\n")[-1].strip(), "genre") artist = try_except(lambda: titles.split("\n")[0].strip(), "artist") album = try_except(lambda: titles.split("\n")[-1].strip(), "album") # visit the review page to get the review lede lede = "Unknown review" if "Unknown link" not in link: review_html = Animate( partial(render_html, link, driver), "", trailing_newline=True )() review = BeautifulSoup(review_html, "html5lib") lede = try_except( lambda: review.find("div", class_="product-review") .find("strong") .text.strip(), "review", ) if oldest_first: ix = len(records) - ix - 1 entry = { "artist": artist, "album": album, "label": label, "genre": genre, "link": link, "lede": lede, "index": "{}. ".format(ix + 1), } print_record(trailing_newline=False, **entry) print("\n") driver.quit()
def resident_advisor(oldest_first=False, n_items=None): header = """ __ _ __ _ /__\ /_\ /__\ ___ ___ ___ _ __ ___ _ __ ___ ___ _ __ __| |___ / \// //_\\\\ / \/// _ \/ __/ _ \| '_ ` _ \| '_ ` _ \ / _ \ '_ \ / _` / __| / _ \/ _ \ / _ \ __/ (_| (_) | | | | | | | | | | | __/ | | | (_| \__ \\ \/ \_/\_/ \_/ \/ \_/\___|\___\___/|_| |_| |_|_| |_| |_|\___|_| |_|\__,_|___/ --------------------------------------------------------------------------- """ driver = Animate(initialize_webdriver, "Initializing webdriver...")() print(header) url = "https://ra.co/reviews/recommends" html = Animate(partial(render_html, url, driver), "", trailing_newline=True)() soup = BeautifulSoup(html, "html5lib") reviews = soup.find("main", {"data-tracking-id": "reviews-archive"}).find_all( "li", class_="Column-sc-18hsrnn-0 iBzIXi" ) if oldest_first: records = records[::-1] title_regex = re.compile("^/reviews/") label_regex = re.compile("^/labels/") for ix, record in enumerate(reviews): if n_items and ix == n_items: break if ix > 0: time.sleep(SLEEP_BTWN_ITEMS) tattr = {"data-tracking-id": title_regex} title = try_except(lambda: record.find("span", tattr).text.strip(), "album") artist, album = ("Unknown artist", "Unknown album") if "Unknown album" not in title: try: artist, album = SPACE_HYPHEN_SPACE.split(title, 1) except ValueError: artist, album = HYPHEN_SPACE.split(title, 1) get_rpath = lambda: record.find("span", tattr).attrs["href"] create_link = lambda: f"https://ra.co{get_rpath()}" link = try_except(lambda: create_link(), "link") lattr = {"data-tracking-id": label_regex} label = try_except(lambda: record.find("span", lattr).text.strip(), "label") lede = try_except( lambda: record.find("span", class_="kyXmTt").text.strip(), "review" ) entry = { "artist": artist, "album": album, "label": label, "link": link, "lede": lede, # "genre": genre, # "rating": rating, # RA doesn't include ratings anymore! } print_record(**entry) driver.close()
def pitchfork(n_pages=2, oldest_first=False, n_items=None): header = """ ___ _ _ _ __ _ / _ (_) |_ ___| |__ / _| ___ _ __| | __ / /_)/ | __/ __| '_ \| |_ / _ \| '__| |/ / / ___/| | || (__| | | | _| (_) | | | < \/ |_|\__\___|_| |_|_| \___/|_| |_|\_\\ ___ ___ _ _ _ ( _ ) / _ \ _ /_\ | | |__ _ _ _ __ ___ ___ / _ \| | | |_| |_ //_\\\\| | '_ \| | | | '_ ` _ \/ __| | (_) | |_| |_ _| / _ \ | |_) | |_| | | | | | \__ \\ \___(_)___/ |_| \_/ \_/_|_.__/ \__,_|_| |_| |_|___/ --------------------------------------------------------- """ print(header) pages = range(1, n_pages + 1) if oldest_first: pages = reversed(pages) ix = -1 for pn in pages: url = "https://pitchfork.com/best/high-scoring-albums/?page={}".format(pn) html = requests.get(url).text soup = BeautifulSoup(html, "html5lib") records = soup.find_all("div", class_="review") if oldest_first: records = records[::-1] for record in records: ix += 1 if n_items and ix >= n_items: break albums = record.find_all("h2", class_="review__title-album") genres = record.find_all("a", class_="genre-list__link") bnms = record.find_all("a", class_="review__meta-bnm") artists = record.find_all("ul", class_="artist-list review__title-artist") link = try_except( lambda: record.find("a", class_="review__link").attrs["href"].strip(), "link", ) artist = try_except( lambda: set([g.text.strip() for g in artists[0].children]), "artist" ) album = try_except(lambda: set([g.text.strip() for g in albums]), "album") genre = try_except(lambda: set([g.text.strip() for g in genres]), "genre") bnm = try_except(lambda: set([g.text for g in bnms]), "") if isinstance(artist, (list, set)): artist = " + ".join(artist) if isinstance(album, (list, set)): album = " / ".join(album) if isinstance(genre, (list, set)): genre = " / ".join(genre) label, lede, rating = ("Unknown label", "Unknown review", "Unknown rating") if "Unknown link" not in link: link = "https://pitchfork.com{}".format(link) # visit the review page to get genre, rating, & review lede review_html = requests.get(link).text review = BeautifulSoup(review_html, "html5lib") rating = try_except( lambda: review.find( "div", class_="ScoreCircle-cJwsOz" ).text.strip(), "rating", ) labels = review.find_all("li", class_="InfoSliceListItem-gMxDho gigHWh") labels = [i for i in labels if "Label:" in i.text] label = try_except( lambda: set([g.text.replace("Label:", "").strip() for g in labels]), "label", ) lede = try_except( lambda: review.find( "div", class_="BaseWrap-sc-TURhJ BaseText-fFzBQt SplitScreenContentHeaderDekDown-fkIOvp eTiIvU ifBumJ etXaLE", ).text.strip(), "review", ) if isinstance(label, (list, set)): label = ", ".join(label) if len(labels) == 0: label = "Unknown label" symbol = "" if "Best New Reissue" in bnm: symbol = colored("**", "red", attrs=["bold"]) elif "Best New Album" in bnm: symbol = colored("*", "red", attrs=["bold"]) entry = { "artist": artist, "album": album, "label": label, "genre": genre, "link": link, "lede": lede, "rating": rating, "symbol": symbol, } print_record(**entry) print("{} = Best New Album".format(colored("*", "red", attrs=["bold"]))) print("{} = Best New Reissue".format(colored("**", "red", attrs=["bold"])))
def forced_exposure(oldest_first=False, n_items=None): header = """ ___ __ __ __ ___ __ ___ __ __ __ __ ___ |__ / \ |__) / ` |__ | \ |__ \_/ |__) / \ /__` | | |__) |__ | \__/ | \ \__, |___ |__/ |___ / \ | \__/ .__/ \__/ | \ |___ __ ___ __ ___ __ ___ ___ __ __ |__) |__ /__` | /__` |__ | | |__ |__) /__` |__) |___ .__/ | .__/ |___ |___ |___ |___ | \ .__/ ----------------------------------------------------------------------- """ print(header) url = "https://www.forcedexposure.com/Best/BestIndex.html" html = requests.get(url).text soup = BeautifulSoup(html, "html5lib") indices = range(2, 52) if oldest_first: indices = reversed(indices) for i, ix in enumerate(indices): if n_items and (ix - 2) == n_items: break if i > 0: time.sleep(SLEEP_BTWN_ITEMS) ix = "0" + str(ix) if ix <= 9 else ix prefix = "ctl00_ContentPlaceHolder1_gvRecBestSeller_ctl{}_".format(ix) artist = try_except( lambda: soup.find("a", {"id": prefix + "hlnkArtistId"}) .text.strip() .title(), "artist", ) album = try_except( lambda: soup.find("a", {"id": prefix + "hrTitle"}).text.strip(), "album" ) label = try_except( lambda: soup.find("a", {"id": prefix + "hlnkLabel"}).text.title(), "label" ) lede = try_except( lambda: soup.find("span", {"id": prefix + "lblTx_Desc"}).text.strip(), "review", ) status = try_except( lambda: soup.find("span", {"id": prefix + "lblStockStatus"}).text.strip(), "status", ) link = try_except( lambda: soup.find("a", {"id": prefix + "hrTitle"}).attrs["href"].strip(), "link", ) if "Unknown link" not in link: link = "https://www.forcedexposure.com/Catalog/{}".format( link.split("../Catalog/")[1] ) entry = { "artist": artist, "album": album, "label": label, "status": status, "link": link, "lede": lede, "index": "{}. ".format(int(ix) - 1), } print_record(**entry)