def parse_for_songs(url): print 'Getting songs from ', url r = requests.get(url) soup = BeautifulSoup(r.content) title_tags = soup.find_all(is_entry_title) regex = re.compile(ur'^(.*)\u2013(.*)$') for title in title_tags: title_text = title.a.text matches = regex.findall(title_text) if len(matches) == 0 or len(matches[0])<2: print '\tno matches...?', matches continue artist = matches[0][0].strip() album = matches[0][1].strip() if album.lower() == 's/t' or album.lower() == 'self-titled': album = artist # print '\t', 'FOUND:', artist, '>', album songs = [] albums = spotifier.fetch_album(album) for album in albums: album_artist = album['artist'] if album_artist.lower() != artist.lower(): continue album_id = album['id'] tracks = spotifier.get_album_tracks(album_id) if len(tracks) < 1: continue songs.append({'uri': tracks[0]['uri']}) print "{}: {}".format(album_artist, album['name']) break page_title = 'DOA' return songs, page_title
def parse_for_songs(url): r = requests.get(url) soup = BeautifulSoup(r.content) trs = soup.find_all('tr') songs = [] for tr in trs: row_raw_text = [text for text in tr.stripped_strings] index = 0 # print row_raw_text if len(row_raw_text) >=3: artist = row_raw_text[1].encode('ascii', errors='replace').strip() album = row_raw_text[2].encode('ascii', errors='replace').strip() albums = spotifier.fetch_album(album) for album in albums: if not 'artist' in album: continue album_artist = album['artist'] if album_artist.lower() != artist.lower(): continue album_id = album['id'] tracks = spotifier.get_album_tracks(album_id) if len(tracks) < 1: continue songs.append({'uri': tracks[0]['uri']}) print "{}: {}".format(album_artist, album['name']) break page_title = 'KEXP' return songs, page_title
def parse_for_songs(url): print 'Getting songs from ', url r = requests.get(url) soup = BeautifulSoup(r.content) band_tags = soup.find_all(is_band_tag) songs = [] for band in band_tags: band_text = band.text.strip() band_names = band_text.split(',') for band_name in band_names: #print band_name band_name = band_name.strip() albums = spotifier.get_albums_for_artist(band_name) if len(albums) == 0: continue print 'Band: {} -- Found {} albums'.format(band_name, len(albums)) for album in albums: tracks = spotifier.get_album_tracks(album['id']) if len(tracks) < 1: continue songs.append({'uri': tracks[0]['uri']}) break page_title = 'U_STREET' return songs, page_title
def parse_for_songs(url): print 'Getting songs from ', url r = requests.get(url) soup = BeautifulSoup(r.content) band_tags = soup.find_all(is_band_tag) songs = [] for band in band_tags: # print band band_name_raw = band.text if band_name_raw == 'Tickets' or band_name_raw == 'More Info': continue band_names = [name.strip() for name in band_name_raw.split(',')] # artists = spotifier.fetch_artist(band_name) # print artists for band_name in band_names: print band_name albums = spotifier.get_albums_for_artist(band_name) if len(albums) == 0: continue # print 'Found {} albums'.format(len(albums)) for album in albums: tracks = spotifier.get_album_tracks(album['id']) if len(tracks) < 1: continue songs.append({'uri': tracks[0]['uri']}) break page_title = 'CATSCRADLE' return songs, page_title
def parse_for_songs(url): songs = [] page_title = 'skinny' print 'Getting songs from ', url r = requests.get(url) soup = BeautifulSoup(r.content) title_tags = soup.find_all(is_review_title) for title in title_tags: title_text = [text for text in title.stripped_strings] regex = re.compile(ur'^(.*)\u2013(.*)$') matches = regex.findall(title_text[0]) if len(matches) < 1 or len(matches[0]) < 2: continue artist = matches[0][0].strip() album = matches[0][1].strip() print artist, '>', album albums = spotifier.fetch_album(album) for album in albums: album_artist = album['artist'] if album_artist.lower() != artist.lower(): continue album_id = album['id'] tracks = spotifier.get_album_tracks(album_id) if len(tracks) < 1: continue songs.append({'uri': tracks[0]['uri']}) return songs, page_title
def parse_for_songs(url): print 'Getting songs from ', url r = requests.get(url) soup = BeautifulSoup(r.content) band_tags = soup.find_all(is_band_tag) songs = [] for band in band_tags: band_text = band.text.strip() band_names = band_text.split('|') for band_name in band_names: # print band_name band_name = band_name.strip() albums = spotifier.get_albums_for_artist(band_name) if len(albums) == 0: continue print 'Band: {} -- Found {} albums'.format(band_name, len(albums)) for album in albums: tracks = spotifier.get_album_tracks(album['id']) if len(tracks) < 1: continue songs.append({'uri': tracks[0]['uri']}) break page_title = 'ROCK_HOTEL' return songs, page_title
def parse_for_songs(url): print 'Getting songs from ', url r = requests.get(url) soup = BeautifulSoup(r.content) band_tags = soup.find_all(is_band_tag) songs = [] for band in band_tags: # print band band_name_raw = band.text if band_name_raw == 'Tickets' or band_name_raw =='More Info': continue band_names = [name.strip() for name in band_name_raw.split(',')] # artists = spotifier.fetch_artist(band_name) # print artists for band_name in band_names: print band_name albums = spotifier.get_albums_for_artist(band_name) if len(albums) == 0: continue # print 'Found {} albums'.format(len(albums)) for album in albums: tracks = spotifier.get_album_tracks(album['id']) if len(tracks) < 1: continue songs.append({'uri': tracks[0]['uri']}) break page_title = 'CATSCRADLE' return songs, page_title
def parse_for_songs(url): songs = [] print 'Getting songs from ', url r = requests.get(url) soup = BeautifulSoup(r.content) title_tags = soup.find_all(href=re.compile("/reviews/albums")) for title_tag in title_tags: matches = [text for text in title_tag.stripped_strings] if len(matches)<2: continue artist = matches[0].encode('ascii', errors='replace').strip() album = matches[1].encode('ascii', errors='replace').strip() # print '\t', 'FOUND:', artist, '>', album albums = spotifier.fetch_album(album) for album in albums: """ if not 'artist' in album: print "%s does NOT have key 'artist'" % str(album) continue album_artist = album['artist'] if album_artist.lower() != artist.lower(): continue """ album_id = album['id'] tracks = spotifier.get_album_tracks(album_id) if len(tracks) < 1: continue songs.append({'uri': tracks[0]['uri']}) print "{}: {}".format(artist, album['name']) break page_title = 'PITCHFORK' return songs, page_title
def parse_for_songs(url): print 'Getting songs from ', url r = requests.get(url) soup = BeautifulSoup(r.content) title_tags = soup.find_all(is_entry_title) regex = re.compile(ur'^(.*)\u2013(.*)$') for title in title_tags: title_text = title.a.text matches = regex.findall(title_text) if len(matches) == 0 or len(matches[0]) < 2: print '\tno matches...?', matches continue artist = matches[0][0].strip() album = matches[0][1].strip() if album.lower() == 's/t' or album.lower() == 'self-titled': album = artist # print '\t', 'FOUND:', artist, '>', album songs = [] albums = spotifier.fetch_album(album) for album in albums: album_artist = album['artist'] if album_artist.lower() != artist.lower(): continue album_id = album['id'] tracks = spotifier.get_album_tracks(album_id) if len(tracks) < 1: continue songs.append({'uri': tracks[0]['uri']}) print "{}: {}".format(album_artist, album['name']) break page_title = 'DOA' return songs, page_title
def parse_for_songs(url): r = requests.get(url) soup = BeautifulSoup(r.content) band_tags = soup.find_all(is_band) songs = [] bands = [] for tag in band_tags: band_name = tag.text.strip() albums = spotifier.get_albums_for_artist(band_name) if len(albums) == 0: continue print '\tBand: {} -- Found {} albums'.format(band_name, len(albums)) for album in albums: tracks = spotifier.get_album_tracks(album['id']) if len(tracks) < 1: continue songs.append({'uri': tracks[0]['uri']}) break page_title = 'ROCKNESS' return songs, page_title
def parse_for_songs(url): r = requests.get(url) soup = BeautifulSoup(r.content) band_tags = soup.find_all(is_band) songs = [] bands = [] for tag in band_tags: band_name = tag.text.strip() albums = spotifier.get_albums_for_artist(band_name) if len(albums) == 0: continue print '\tBand: {} -- Found {} albums'.format(band_name, len(albums)) for album in albums: tracks = spotifier.get_album_tracks(album['id']) if len(tracks) < 1: continue songs.append({'uri': tracks[0]['uri']}) break page_title = 'SXSW' return songs, page_title
def parse_for_songs(url): print 'Getting songs from ', url r = requests.get(url) soup = BeautifulSoup(r.content) band_tags = soup.find_all(is_band_tag) songs = [] cleaned_band_names = [] for band in band_tags: band_text = band.text.strip() band_names = band_text.split(',') cleaned_band_names.extend(band_names) for band_name in band_names: band_name = band_name.strip() if band_name.startswith('and '): band_name = band_name[4:] band_name = band_name.replace('(Record Release)', '') band_name = band_name.strip() cleaned_band_names.append(band_name) more_names = band_name.split('w/') for new_one in more_names: cleaned_band_names.append(new_one) #print '\t' + new_one for band_name in set(cleaned_band_names): band_name = band_name.strip() #print band_name albums = spotifier.get_albums_for_artist(band_name) if len(albums) == 0: continue print '\tBand: {} -- Found {} albums'.format(band_name, len(albums)) for album in albums: tracks = spotifier.get_album_tracks(album['id']) if len(tracks) < 1: continue songs.append({'uri': tracks[0]['uri']}) break page_title = 'COMET' return songs, page_title
def parse_for_songs(url): print 'Getting songs from ', url r = requests.get(url) soup = BeautifulSoup(r.content) band_tags = soup.find_all(is_band_tag) songs = [] for band in band_tags: band_name = band.text #print band_name # artists = spotifier.fetch_artist(band_name) # print artists albums = spotifier.get_albums_for_artist(band_name) if len(albums) == 0: continue # print 'Found {} albums'.format(len(albums)) for album in albums: tracks = spotifier.get_album_tracks(album['id']) if len(tracks) < 1: continue songs.append({'uri': tracks[0]['uri']}) break page_title = 'BLACKCAT' return songs, page_title