def parse(self): """Call feed first""" # get the titles and the artists soup = base.Soup(self.pagecontent) titles = [node.string for node in base.select(soup, 'span.tracktitle')] artists = [node.string for node in base.select(soup, 'span.artist')] # combine these combined = zip(artists, titles) # get the last artist and title self.artist, self.title = combined[-1]
def parse(self): """Call feed first""" soup = base.Soup(self.pagecontent) elements = [element.string for element in base.select(soup, 'table tr a')] artists = elements[0::2] titles = elements[1::2] combined = zip(artists, titles) artist, self.title = combined[0] self.artist = self.uncommafy(artist)
def parse(self): """Cuts the content and extracts informations""" # get the soup soup = base.Soup(self.pagecontent) # now the hacking begins (some trouble with BeautifulSoup) # select an element point = base.select(soup, 'tr td i')[0] # get its parent parent = point.parent # get all texts texts = parent.findAll(text=True) # convert and save these artist = texts[1].strip() title = texts[-1].strip() self.artist = self.capstext(artist) self.title = self.capstext(title)
def parse(self): """Call feed first""" # create the soup and convert HTML entities soup = base.Soup(self.pagecontent, convertEntities='html') # list of artists and their tracks tracks = list() # get all elements which are td.bold (that's the artists) artists = base.select(soup, 'td.bold') for artist in artists: # find the next element (being hopefully the title) title = artist.findNextSibling() # append the artists name and title to the list tracks.append((artist.string, title.string)) self.artist, self.title = tracks[0]
def parse(self): """Call feed first""" soup = base.Soup(self.pagecontent) heading_last = base.select(soup, "h2")[-1] track_table = heading_last.findNextSibling() tds = track_table.findAll("td") useful = list() for td in tds: if not td.findAll("a"): # filter out non breaking spaces if td.string == " ": continue # filter out dates if timestamp.match(td.string): continue useful.append(td) # group them into (title, artist) tracks = [(useful[a], useful[a + 1]) for a in range(0, len(useful), 2)] self.title = self.capstext(tracks[0][0].string) self.artist = self.capstext(tracks[0][1].string)