コード例 #1
0
def tryParseItemDefinition(htmlTxt, modulesInfo):
    soup = BeautifulSoup()
    soup.feed(htmlTxt)

    pageHeader1 = soup.first("span", {"class": "pageheader1"})
    pageHeader2 = soup.first("span", {"class": "pageheader2"})
    contentTd = soup.first("td", {"class": "content1"})
    if not contentTd or not pageHeader1:
        return UNKNOWN_FORMAT, None

    df = Definition()
    title = getAllTextFromTag(pageHeader1).strip()
    note = ""
    if pageHeader2:
        note = getAllTextFromToInBrFormat(
            pageHeader2,
            getLastElementFromTag(pageHeader2).next).strip()
    text = getAllTextFromToInBrFormat(
        contentTd,
        getLastElementFromTag(contentTd).next).strip()

    gtxt = df.TextElement(title)
    gtxt.setStyle('bold')
    #df.LineBreakElement()
    albumText = "from the album "
    performedText = "performed by "
    for noteLine in note.split("<br>"):
        noteLine = noteLine.strip()
        if noteLine.startswith(albumText):
            df.TextElement(" from ")
            album = noteLine[len(albumText):]
            gtxt = df.TextElement(album)
            lyricsHyperlink = "s+lyricssearch:;;" + album + ";;"
            if None != modulesInfo:
                popupItems = []
                popupItems.append(
                    ["Search Lyrics", lyricsHyperlink, False, True, False])
                if modulesInfo["Amazon"]:
                    popupItems.append([
                        "Search Amazon", "s+amazonsearch:Music;;1;" + album,
                        False, False, False
                    ])
                if modulesInfo["ListsOfBests"]:
                    popupItems.append([
                        "Search Lists of Bests",
                        "s+listsofbestssearch:" + album + ";Music;Title",
                        False, False, False
                    ])
                #TODO: more?
                if len(popupItems) > 1:
                    gtxt.setHyperlink(buildPopupMenu(popupItems))
                else:
                    gtxt.setHyperlink(lyricsHyperlink)
            else:
                gtxt.setHyperlink(lyricsHyperlink)
        elif noteLine.startswith(performedText):
            df.TextElement(" by ")
            artist = noteLine[len(performedText):]
            ## remove " in yyyy"
            if len(artist) > 8:
                if artist[-8:-4] == " in " and artist[-4:].isdigit():
                    year = artist[-8:]
                    artist = artist[:-8]
                    gtxt = df.TextElement(artist)
                    df.TextElement(year)
                else:
                    gtxt = df.TextElement(artist)
            else:
                gtxt = df.TextElement(artist)
            setArtistLink(gtxt, artist, modulesInfo)
        else:
            df.TextElement(noteLine)

    df.LineBreakElement(1, 2)

    par = df.ParagraphElement(False)
    par.setJustification(justCenter)
    #    df.TextElement("New Search", link="lyricsform:search")
    #    df.LineBreakElement(3,2)

    # this remove double linebreaks...
    text2 = text.replace(
        "<br> <br>", "###2br###")  # we hope there is no "###2br###" in text
    if -1 == text2.find("<br>"):
        text = text2.replace("###2br###", "<br>")

    wasBreak = False
    for textLine in text.split("<br>"):
        if len(textLine.strip()) > 0:
            df.TextElement(textLine.strip())
            wasBreak = False
        if wasBreak:
            df.LineBreakElement(1, 2)
        else:
            df.LineBreakElement()
            wasBreak = True
    df.PopParentElement()

    return LYRICS_ITEM, universalDataFormatWithDefinition(df, [["H", title]])