コード例 #1
0
def _fetch_from_lyricsdotcom(artist, song):
    page   = requests.get('http://www.lyrics.com/%s-lyrics-%s.html' % (song, artist))
    soup   = BeautifulSoup(page.text, 'lxml')
    lyrics = str(soup.findAll('div', {'id' : 'lyrics'}))
    lyrics = mlstripper.strip_tags(lyrics)
    lyrics = lyrics.replace('\\n', '\n') # Re-encode new lines.
    return lyrics
コード例 #2
0
ファイル: scraper.py プロジェクト: tylermcdonnell/billboard
    def parse(self):
        '''
        Parses this Billboard 200 page and returns scraped tuples of form:
        
        (album, artist, date, position)
        '''
        soup    = BeautifulSoup(self.read_page(), "lxml")

        #print ("Extracting ranks...")
        r_raw   = soup.findAll("span", {"class" : "chart-row__current-week"})
        ranks   = [int(r.contents[0]) for r in r_raw] # tags -> int
        
        #print ("Extracting albums...")
        a_raw   = soup.findAll("h2", {"class" : "chart-row__song"})
        albums  = [self.clean(str(a.contents[0])) for a in a_raw]

        #print ("Extracting artists...")
        a_raw   = soup.findAll("h3", {"class" : "chart-row__artist"})
        artists = [self.clean(mlstripper.strip_tags(str(a))) for a in a_raw]

        return [(self._date, z[0], z[1], z[2]) for z in zip(ranks, albums, artists)]