Esempio n. 1
0
 def search_song(self,
                 song_title="",
                 band_name="",
                 album_type="any",
                 excluded_album_types=None):
     params = dict(bandName=band_name, songTitle=song_title)
     url = self.site_url + self.url_search_songs
     data = requests.get(url,
                         params=params,
                         headers={
                             'User-Agent': get_random_user_agent()
                         }).json()
     songs = data['aaData']
     excluded_album_types = excluded_album_types or []
     for song in songs:
         if album_type != "any":
             if song[2] != album_type:
                 continue
         if song[2] in excluded_album_types:
             continue
         data = {
             "album_url":
             song[0][song[0].find('href="') + 6:song[0].find('" title=')],
             "band_name":
             song[0][song[0].find('>') + 1:song[0].find('</a')],
             "album_name":
             song[1][song[1].find('">') + 2:song[1].find('</a')],
             "album_type":
             song[2],
             "song_name":
             song[3],
             "song_id":
             self.lyric_id_re.search(song[4]).group("id")
         }
         yield data
Esempio n. 2
0
 def search_band(self, band_name="", genre="", index=0):
     params = dict(bandName=band_name, genre=genre, iDisplayStart=index)
     url = self.site_url + self.url_search_bands
     data = requests.get(url,
                         params=params,
                         headers={
                             'User-Agent': get_random_user_agent()
                         }).json()
     bands, num = data['aaData'], data["iTotalRecords"]
     for band in bands:
         data = {
             "url": band[0][band[0].find('href="') + 6:band[0].find('">')],
             "name": band[0][band[0].find('">') + 2:band[0].find('</a>')],
             "genre": band[1],
             "country": band[2]
         }
         yield data
Esempio n. 3
0
    def get_band_data(url):
        result = {}
        response = requests.get(
            url, headers={'User-Agent': get_random_user_agent()})
        tree = html.fromstring(response.content)
        result["name"] = \
            tree.xpath('//*[@id="band_info"]/h1/a/text()')
        result["url"] = \
            tree.xpath('//*[@id="band_info"]/h1/a/@href')
        result["genre"] = \
            tree.xpath(".//*[@id='band_stats']/dl[2]/dd[1]/text()")
        result["theme"] = \
            tree.xpath(".//*[@id='band_stats']/dl[2]/dd[2]/text()")
        result["label"] = \
            tree.xpath(".//*[@id='band_stats']/dl[2]/dd[3]/text()")
        result["country"] = \
            tree.xpath(".//*[@id='band_stats']/dl[1]/dd[1]/a/text()")
        result["location"] = \
            tree.xpath(".//*[@id='band_stats']/dl[1]/dd[2]/text()")
        result["status"] = \
            tree.xpath(".//*[@id='band_stats']/dl[1]/dd[3]/text()")
        result["date"] = \
            tree.xpath(".//*[@id='band_stats']/dl[1]/dd[4]/text()")
        years_active = \
            tree.xpath(".//*[@id='band_stats']/dl[3]/dd/text()")
        # result["years"] = years_active[0]

        for r in result.keys():
            if isinstance(result[r], list) and len(result[r]) == 1:
                result[r] = result[r][0]
            elif isinstance(result[r], list) and len(result[r]) == 0:
                result[r] = None
            if isinstance(result[r], str) and result[r] == 'N/A':
                result[r] = None
            if r == "years":
                if "," in result[r]:
                    years = result[r].split(",")
                    result[r] = [y.rstrip().lstrip() for y in years]
                else:
                    result[r] = [result[r].rstrip().lstrip()]
            if r == "theme" and result[r] is not None:
                result[r] = result[r].split(",")
        return result
Esempio n. 4
0
 def get_lyrics_by_song_id(self, song_id):
     url = self.site_url + self.url_lyrics + song_id
     data = requests.get(url,
                         headers={'User-Agent': get_random_user_agent()})
     lyrics = self.tags_re.sub('', data.text.strip())
     return lyrics