def search_song(self, song_title="", band_name="", album_type="any", excluded_album_types=None): params = dict(bandName=band_name, songTitle=song_title) url = self.site_url + self.url_search_songs data = requests.get(url, params=params, headers={ 'User-Agent': get_random_user_agent() }).json() songs = data['aaData'] excluded_album_types = excluded_album_types or [] for song in songs: if album_type != "any": if song[2] != album_type: continue if song[2] in excluded_album_types: continue data = { "album_url": song[0][song[0].find('href="') + 6:song[0].find('" title=')], "band_name": song[0][song[0].find('>') + 1:song[0].find('</a')], "album_name": song[1][song[1].find('">') + 2:song[1].find('</a')], "album_type": song[2], "song_name": song[3], "song_id": self.lyric_id_re.search(song[4]).group("id") } yield data
def search_band(self, band_name="", genre="", index=0): params = dict(bandName=band_name, genre=genre, iDisplayStart=index) url = self.site_url + self.url_search_bands data = requests.get(url, params=params, headers={ 'User-Agent': get_random_user_agent() }).json() bands, num = data['aaData'], data["iTotalRecords"] for band in bands: data = { "url": band[0][band[0].find('href="') + 6:band[0].find('">')], "name": band[0][band[0].find('">') + 2:band[0].find('</a>')], "genre": band[1], "country": band[2] } yield data
def get_band_data(url): result = {} response = requests.get( url, headers={'User-Agent': get_random_user_agent()}) tree = html.fromstring(response.content) result["name"] = \ tree.xpath('//*[@id="band_info"]/h1/a/text()') result["url"] = \ tree.xpath('//*[@id="band_info"]/h1/a/@href') result["genre"] = \ tree.xpath(".//*[@id='band_stats']/dl[2]/dd[1]/text()") result["theme"] = \ tree.xpath(".//*[@id='band_stats']/dl[2]/dd[2]/text()") result["label"] = \ tree.xpath(".//*[@id='band_stats']/dl[2]/dd[3]/text()") result["country"] = \ tree.xpath(".//*[@id='band_stats']/dl[1]/dd[1]/a/text()") result["location"] = \ tree.xpath(".//*[@id='band_stats']/dl[1]/dd[2]/text()") result["status"] = \ tree.xpath(".//*[@id='band_stats']/dl[1]/dd[3]/text()") result["date"] = \ tree.xpath(".//*[@id='band_stats']/dl[1]/dd[4]/text()") years_active = \ tree.xpath(".//*[@id='band_stats']/dl[3]/dd/text()") # result["years"] = years_active[0] for r in result.keys(): if isinstance(result[r], list) and len(result[r]) == 1: result[r] = result[r][0] elif isinstance(result[r], list) and len(result[r]) == 0: result[r] = None if isinstance(result[r], str) and result[r] == 'N/A': result[r] = None if r == "years": if "," in result[r]: years = result[r].split(",") result[r] = [y.rstrip().lstrip() for y in years] else: result[r] = [result[r].rstrip().lstrip()] if r == "theme" and result[r] is not None: result[r] = result[r].split(",") return result
def get_lyrics_by_song_id(self, song_id): url = self.site_url + self.url_lyrics + song_id data = requests.get(url, headers={'User-Agent': get_random_user_agent()}) lyrics = self.tags_re.sub('', data.text.strip()) return lyrics