def tryParseItemDefinition(htmlTxt, modulesInfo): soup = BeautifulSoup() soup.feed(htmlTxt) pageHeader1 = soup.first("span", {"class": "pageheader1"}) pageHeader2 = soup.first("span", {"class": "pageheader2"}) contentTd = soup.first("td", {"class": "content1"}) if not contentTd or not pageHeader1: return UNKNOWN_FORMAT, None df = Definition() title = getAllTextFromTag(pageHeader1).strip() note = "" if pageHeader2: note = getAllTextFromToInBrFormat( pageHeader2, getLastElementFromTag(pageHeader2).next).strip() text = getAllTextFromToInBrFormat( contentTd, getLastElementFromTag(contentTd).next).strip() gtxt = df.TextElement(title) gtxt.setStyle('bold') #df.LineBreakElement() albumText = "from the album " performedText = "performed by " for noteLine in note.split("<br>"): noteLine = noteLine.strip() if noteLine.startswith(albumText): df.TextElement(" from ") album = noteLine[len(albumText):] gtxt = df.TextElement(album) lyricsHyperlink = "s+lyricssearch:;;" + album + ";;" if None != modulesInfo: popupItems = [] popupItems.append( ["Search Lyrics", lyricsHyperlink, False, True, False]) if modulesInfo["Amazon"]: popupItems.append([ "Search Amazon", "s+amazonsearch:Music;;1;" + album, False, False, False ]) if modulesInfo["ListsOfBests"]: popupItems.append([ "Search Lists of Bests", "s+listsofbestssearch:" + album + ";Music;Title", False, False, False ]) #TODO: more? if len(popupItems) > 1: gtxt.setHyperlink(buildPopupMenu(popupItems)) else: gtxt.setHyperlink(lyricsHyperlink) else: gtxt.setHyperlink(lyricsHyperlink) elif noteLine.startswith(performedText): df.TextElement(" by ") artist = noteLine[len(performedText):] ## remove " in yyyy" if len(artist) > 8: if artist[-8:-4] == " in " and artist[-4:].isdigit(): year = artist[-8:] artist = artist[:-8] gtxt = df.TextElement(artist) df.TextElement(year) else: gtxt = df.TextElement(artist) else: gtxt = df.TextElement(artist) setArtistLink(gtxt, artist, modulesInfo) else: df.TextElement(noteLine) df.LineBreakElement(1, 2) par = df.ParagraphElement(False) par.setJustification(justCenter) # df.TextElement("New Search", link="lyricsform:search") # df.LineBreakElement(3,2) # this remove double linebreaks... text2 = text.replace( "<br> <br>", "###2br###") # we hope there is no "###2br###" in text if -1 == text2.find("<br>"): text = text2.replace("###2br###", "<br>") wasBreak = False for textLine in text.split("<br>"): if len(textLine.strip()) > 0: df.TextElement(textLine.strip()) wasBreak = False if wasBreak: df.LineBreakElement(1, 2) else: df.LineBreakElement() wasBreak = True df.PopParentElement() return LYRICS_ITEM, universalDataFormatWithDefinition(df, [["H", title]])