Python get_pages Beispiele

Programmiersprache: Python

Namespace / Paketname: OpenPages

Methode / Funktion: get_pages

Beispiele auf hotexamples.com: 8

Python get_pages - 8 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die OpenPages.get_pages, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

def get_glinfobasic(file, startpage=499, stoppage=712):
    """Returns an infolist containing multiple sublists. The first sublist contains the headers for an info-table.
       Subsequent lists contain, respectively, for a set page range: Epistle, Page No., Folio, Gloss no. and tag-free
       Gloss Text for every gloss in Wb."""
    curepist = "Unknown"
    infolist = [["Epistle", "Page", "Folio", "Gloss No.", "Gloss Text"]]
    for page in range(startpage, stoppage + 1):
        thispage = page
        pagetext = get_pages(file, thispage, thispage)
        epfunc = get_tagtext(pagetext, "H2")
        if epfunc:
            curepist = epfunc[0]
        glosslist = order_glosslist(
            clear_tags("\n\n".join(get_section(pagetext, "SG"))))
        foliolist = []
        for folinfo in get_fol(
                order_glosses(
                    clear_tags(
                        "\n\n".join(
                            get_section(get_pages(file, thispage, thispage),
                                        "SG")), "fol"))):
            folio = folinfo[1]
            foliotext = folinfo[0]
            foliolist.append([folio, foliotext])
        for gloss in glosslist:
            thisglosslist = [curepist, thispage]
            glossfound = False
            for foltextlist in foliolist:
                if gloss in foltextlist[1]:
                    thisfolio = foltextlist[0]
                    thisglosslist.append(thisfolio)
                    glossfound = True
            if not glossfound:
                glossstub = gloss[:11]
                for foltextlist in foliolist:
                    if glossstub in foltextlist[1]:
                        thisfolio = foltextlist[0]
                        thisglosslist.append(thisfolio)
                        glossfound = True
            if not glossfound:
                thisglosslist.append("No Folio Information Found")
            glossnopat = re.compile(r'(\d{1,2}[a-z]?, )?\d{1,2}[a-z]?\. ')
            glosspatitir = glossnopat.finditer(gloss)
            for i in glosspatitir:
                thisglosslist.extend([
                    (i.group())[:-2],
                    gloss[gloss.find(i.group()) + len(i.group()):]
                ])
            infolist.append(thisglosslist)
    return infolist

Beispiel #2

Datei anzeigen

def get_pageinfo(file, startpage=499, stoppage=712):
    """Opens the text of the Glosses from the document and returns a page-list of page-no-lists where the page number is
       at page-no-list[0] and the page text is at page-no-list[1] for a selected range of pages"""
    pagelist = []
    for page in range(startpage, stoppage + 1):
        pageno = page
        pagetext = get_pages(file, page, page)
        pagelist.append([pageno, pagetext])
    return pagelist

Beispiel #3

Datei anzeigen

def list_numbered_glosses(file, startpage, stoppage):
    """Lists glosses by their folio ID and gloss number"""
    glist = []
    for p in range(startpage, stoppage + 1):
        fcont = get_fol(
            order_glosses(
                clear_tags(
                    "\n\n".join(get_section(get_pages(file, p, p), "SG")),
                    "fol")))
        for g in order_glosslist("\n\n".join(
                get_section(get_pages(file, p, p), "SG"))):
            for fol in fcont:
                raw_gloss = clear_tags(g)
                if clear_tags(g) in fol[0]:
                    numpat = re.compile(r'(\d{1,2}[a-z]?, )?\d{1,2}[a-z]?\. ')
                    numpatitir = numpat.finditer(raw_gloss)
                    for i in numpatitir:
                        if i.group() in raw_gloss:
                            glist.append(
                                [fol[1][3:] + i.group()[:-1],
                                 cleangloss(g)])
    return glist

Beispiel #4

Datei anzeigen

Datei: GetTrans.py Projekt: berstearns/WurzburgGlossParser

def get_transpageinfo(file, page):
    """Returns a list of translation lists for each page
       Each translation list contains a glossno [0] and a gloss translation [1]"""
    english = clear_spectags(
        "\n\n".join(get_section(get_pages(file, page, page), "Eng")), "fol")
    englishnums = []
    englishlines = []
    engpat = re.compile(r'(\d{1,2} – )?\d{1,2}[a-z]?\. ')
    engpatitir = engpat.finditer(english)
    # find the numbers in the english text, add them to a list
    for i in engpatitir:
        englishnums.append(i.group())
    # using the numbers as markers, identify the strings associated with the numbers, add them to a list
    for i in range(len(englishnums)):
        numlen = len(englishnums[i])
        # if the current number isn't the last number, the string is from this number to the next number
        if i != len(englishnums) - 1:
            if englishnums[i + 1] not in englishnums[i]:
                line = english[english.find(englishnums[i]) +
                               numlen:english.find(englishnums[i + 1])]
            # account for situations where the next number is in the current number
            # eg. "1. " is in "21. " on page 503
            else:
                scrap = english
                firstspot = scrap.find(englishnums[i + 1])
                scrap = scrap[firstspot + len(englishnums[i + 1]):]
                secondspot = scrap.find(englishnums[i + 1]) + len(
                    englishnums[i + 1])
                line = english[english.find(englishnums[i]) +
                               numlen:secondspot]
            english = english[numlen:]
            english = english[english.find(englishnums[i + 1]):]
        # if the current number is the last number, the string is from this number to the end
        else:
            line = english[english.find(englishnums[i]) + numlen:]
        line = line.split("\n")
        line = " ".join(line)
        line = line.strip()
        englishlines.append(line)
    translist = []
    # remove full stop and space from end of number
    for i in range(len(englishnums)):
        thistransnum = englishnums[i]
        englishnums[i] = thistransnum[:thistransnum.rfind(".")]
    # add number and trans to list, then lest to translist
    for i in range(len(englishnums)):
        thislist = [englishnums[i], englishlines[i]]
        translist.append(thislist)
    # edit translations to include html superscript footnotes instead of footnote tags
    for i in range(len(translist)):
        translationpair = translist[i]
        if "[" in translationpair[1]:
            fixedtrans = translationpair[1]
            newpair = [translationpair[0]]
            fnpat = re.compile(r'\[\w\]')
            fnpatitir = fnpat.finditer(translationpair[1])
            for fn in fnpatitir:
                fntags = fn.group()
                fntagless = fntags[1:-1]
                ss = "<sup>" + fntagless + "</sup>"
                fixedlist = fixedtrans.split(fntags)
                fixedtrans = ss.join(fixedlist)
            if fixedtrans != translationpair[1]:
                newpair.append(fixedtrans)
                translist[i] = newpair
    return translist

Beispiel #5

Datei anzeigen

Datei: OrderFootnotes.py Projekt: AdeDoyle/WurzburgGlossParser

def order_footnotes(file, page):
    """Prints footnotes for a selected page as a single string"""
    footnotes = "\n\n".join(get_section(get_pages(file, page, page), "FN"))
    return footnotes

Beispiel #6

Datei anzeigen

def get_glinfo(file, startpage=499, stoppage=712):
    """Returns an infolist containing multiple sublists. The first sublist contains the headers for an info-table.
       Subsequent lists contain, respectively, for a set page range: Epistle, Page No., Folio, Gloss no. and Gloss Text
       (with [GLat][/GLat] tags converted to html italics tags) for every gloss in Wb."""
    curepist = "Unknown"
    infolist = [[
        "Epistle", "Page", "Folio", "Gloss No.", "Gloss Full-Tags",
        "Gloss Text", "Gloss Footnotes", "Relevant Footnotes",
        "Adrian's Notes", "Gloss Translation"
    ]]
    pagestrans = get_transpagesinfo(file, startpage, stoppage)
    for page in range(startpage, stoppage + 1):
        thispage = page
        pagetext = get_pages(file, thispage, thispage)
        # Gets all page Footnotes for the first time (for the gloss)
        footnotelist = order_footlist(file, page)
        # Gets all notes supplied by me (for the gloss)
        notelist = order_newlist(file, page)
        newnotelist = list()
        notefol = False
        if notelist != ['']:
            for notenum, note in enumerate(notelist):
                noteidpat = re.compile(r'\[/?f\. \d{1,2}[a-d]\]')
                noteiditer = noteidpat.findall(note)
                if noteiditer:
                    for folinfo in noteiditer:
                        note = "".join(note.split(folinfo))
                    folinfo = "".join(i for i in noteiditer[0]
                                      if i not in ["[", "]", "/"])
                    if notenum == 0:
                        notefol = folinfo
                    elif notefol != folinfo:
                        notefol = folinfo
                notenumpat = re.compile(r'^\d{1,2}[a-z]?\. ')
                notenumiter = notenumpat.findall(note)
                if not notenumiter:
                    raise RuntimeError(
                        f"Personal note found without link to gloss number.\nNote: {note}"
                    )
                elif len(notenumiter
                         ) > 1 or notenumiter[0] != note[:len(notenumiter[0])]:
                    raise RuntimeError(
                        f"Multiple possible gloss numbers found for personal note.\nNote: {note}"
                    )
                elif notenumiter[0] == note[:len(notenumiter[0])]:
                    glossnum = notenumiter[0][:-2]
                    note = note[len(notenumiter[0]):].strip()
                newnotelist.append([notefol, glossnum, note])
        # Checks for a new epistle on the current page.
        epfunc = get_tagtext(pagetext, "H2")
        if epfunc:
            curepist = epfunc[0]
        # Identifies individual glosses on the current page, and adds them to a gloss-list.
        glosslist = order_glosslist(
            clear_spectags("\n\n".join(get_section(pagetext, "SG")), "fol"))
        foliolist = []
        # Creates a list of folios and related gloss text for the current page.
        for folinfo in get_fol(
                order_glosses("\n\n".join(
                    get_section(get_pages(file, thispage, thispage), "SG")))):
            folio = folinfo[1]
            foliotext = folinfo[0]
            foliolist.append([folio, foliotext])
        # Creates a list with the current epistle name and page,
        # Checks for each gloss on the current page which folio it is in,
        # Adds folio information to the list.
        for gloss in glosslist:
            thisglosslist = [curepist, thispage]
            glossfound = False
            for foltextlist in foliolist:
                if gloss in foltextlist[1]:
                    thisfolio = foltextlist[0]
                    thisglosslist.append(thisfolio)
                    glossfound = True
            if not glossfound:
                glossstub = gloss[:11]
                for foltextlist in foliolist:
                    if glossstub in foltextlist[1]:
                        thisfolio = foltextlist[0]
                        thisglosslist.append(thisfolio)
                        glossfound = True
            if not glossfound:
                thisglosslist.append("No Folio Information Found")
            # Identifies gloss numbers and removes them from the gloss text.
            glossnopat = re.compile(r'(\d{1,2}[a-z]?, )?\d{1,2}[a-z]?\. ')
            glosspatitir = glossnopat.finditer(gloss)
            for i in glosspatitir:
                # Adds gloss number to list.
                thisglosslist.append(i.group()[:-2])
                # Identifies foundational gloss including all markup tags.
                glossfulltags = gloss[gloss.find(i.group()) + len(i.group()):]
                # Creates a display copy of the gloss text, replacing Latin tags with html emphasis tags.
                glosstext = glossfulltags
                if "[GLat]" in glosstext:
                    glosstextlist = glosstext.split("[GLat]")
                    glosstext = "<em>".join(glosstextlist)
                if "[/GLat]" in glosstext:
                    glosstextlist = glosstext.split("[/GLat]")
                    glosstext = "</em>".join(glosstextlist)
                # Creates 2 copies of display gloss text, one primary, one retaining footnotes in superscript tags.
                basegloss = clear_tags(glosstext)
                footnotesgloss = glosstext[:]
                footnotepat = re.compile(r'\[/?[a-z]\]')
                fnpatitir = footnotepat.finditer(footnotesgloss)
                fnlist = []
                for j in fnpatitir:
                    fnlist.append(j.group())
                if not fnlist:
                    fnstring = ""
                    thisglosslist.extend([
                        glossfulltags, basegloss,
                        clear_tags(footnotesgloss), fnstring
                    ])
                if fnlist:
                    for fntag in fnlist:
                        if "[/" in fntag:
                            endtag = fntag
                            begintag = "".join(endtag.split("/"))
                            footnotesgloss = "".join(
                                footnotesgloss.split(begintag))
                            tagplace = footnotesgloss.find(endtag)
                            footnotesgloss = footnotesgloss[:tagplace] + "<sup>" +\
                                             footnotesgloss[tagplace + 2: tagplace + 3] + "</sup>" +\
                                             footnotesgloss[tagplace + 4:]
                            if begintag in fnlist:
                                del fnlist[fnlist.index(begintag)]
                    glossfnlist = []
                    for footnote in fnlist:
                        fnletter = footnote[-2]
                        # Collects footnotes relevant to this gloss and adds them to a list.
                        for fnote in footnotelist:
                            fnoteid = fnote[:1]
                            if fnletter == fnoteid:
                                glossfnlist.append(
                                    clear_tags(fnote[:1] + ":" + fnote[1:]))
                        fnsuperscript = "<sup>" + fnletter + "</sup>"
                        footnotesgloss = fnsuperscript.join(
                            footnotesgloss.split(footnote))
                    thisglosslist.extend([
                        glossfulltags, basegloss,
                        clear_tags(footnotesgloss), glossfnlist
                    ])
                if newnotelist:
                    for note in newnotelist:
                        folinfo = note[0]
                        glossnum = note[1]
                        notetext = note[2]
                        if folinfo == thisglosslist[
                                2] and glossnum == thisglosslist[3]:
                            if len(thisglosslist) == 8:
                                thisglosslist.extend([notetext])
                            elif len(thisglosslist) == 9:
                                if thisglosslist[8] == "":
                                    thisglosslist[8] = notetext
                        else:
                            anstring = ""
                            if len(thisglosslist) == 8:
                                thisglosslist.extend([anstring])
                elif not newnotelist:
                    anstring = ""
                    thisglosslist.extend([anstring])
            infolist.append(thisglosslist)
    # add translations to the end of the info-lists where they are available
    for infoset in infolist[
            1:]:  # exclude the first info-set containing the titles
        glossid = infoset[3]
        curpagetrans = pagestrans[0]
        curtransid = curpagetrans[0]
        curtrans = curpagetrans[1]
        # deal with the conjoined gloss on TPH p. 500 (1b10 + 1b11)
        # split the gloss id into the two numbers, use these to identify the two translations
        # conjoin the two translations and append them to the info-set for the conjoined gloss ids
        if ", " in glossid:
            glossidlist = glossid.split(", ")
            splittranslations = []
            for newid in glossidlist:
                if newid == curtransid:
                    splittranslations.append(curtrans)
                    del pagestrans[0]
                    curpagetrans = pagestrans[0]
                    curtransid = curpagetrans[0]
                    curtrans = curpagetrans[1]
            joinedtrans = " i.e. ".join(splittranslations)
            if "[GLat]" in joinedtrans:
                transtextlist = joinedtrans.split("[GLat]")
                joinedtrans = "<em>".join(transtextlist)
            if "[/GLat]" in joinedtrans:
                transtextlist = joinedtrans.split("[/GLat]")
                joinedtrans = "</em>".join(transtextlist)
            infoset.append(joinedtrans)
        else:
            if glossid == curtransid:
                if "[GLat]" in curtrans:
                    transtextlist = curtrans.split("[GLat]")
                    curtrans = "<em>".join(transtextlist)
                if "[/GLat]" in curtrans:
                    transtextlist = curtrans.split("[/GLat]")
                    curtrans = "</em>".join(transtextlist)
                infoset.append(curtrans)
                del pagestrans[0]
            # deal with page 587 where glosses 27, 28, and 29 share the one translation, numbered '27 – 29.'.
            elif " – " in curtransid:
                curtransidlist = curtransid.split(" – ")
                curtransidrange = [
                    int(curtransidlist[0]),
                    int(curtransidlist[1])
                ]
                idstart = curtransidrange[0]
                idstop = curtransidrange[1]
                curtransidlist = []
                for i in range(idstart, idstop + 1):
                    curtransidlist.append(str(i))
                if glossid in curtransidlist:
                    if "[GLat]" in curtrans:
                        transtextlist = curtrans.split("[GLat]")
                        curtrans = "<em>".join(transtextlist)
                    if "[/GLat]" in curtrans:
                        transtextlist = curtrans.split("[/GLat]")
                        curtrans = "</em>".join(transtextlist)
                    infoset.append(curtrans)
                if glossid == curtransidlist[-1]:
                    del pagestrans[0]
            # if no translation is given in TPH
            else:
                infoset.append("No translation available.")
    # Gets all page Footnotes for the second time (for the translation)
    curpage = None
    for infoset in infolist[
            1:]:  # exclude the first info-set containing the titles
        thistransfns = []
        # ensures page footnotes are only generated once per page, and not for every gloss
        if curpage:
            if curpage != infoset[1]:
                curpage = infoset[1]
                footnotelist = order_footlist(file, curpage)
        elif not curpage:
            curpage = infoset[1]
            footnotelist = order_footlist(file, curpage)
        trans = infoset[9]
        # finds which translations have footnotes, looks for the associated footnote i the list generated above
        if "<sup>" in trans:
            superscriptpat = re.compile(r'<sup>\w</sup>')
            superscriptpatitir = superscriptpat.finditer(trans)
            for i in superscriptpatitir:
                fnid = i.group()[5]
                for footnote in footnotelist:
                    if footnote[0] == fnid:
                        # if the footnote is found and not already in the footnote list for the gloss it is added
                        if infoset[7]:
                            if clear_tags(footnote[:1] + ":" +
                                          footnote[1:]) not in infoset[7]:
                                thistransfns.append(
                                    clear_tags(footnote[:1] + ":" +
                                               footnote[1:]))
                        else:
                            thistransfns.append(
                                clear_tags(footnote[:1] + ":" + footnote[1:]))
        # all footnotes found for the gloss are combined
        # if there are translation footnotes
        if thistransfns:
            # if there are no gloss footnotes to add them to
            if not infoset[7]:
                infoset[7] = thistransfns
            # if there are gloss footnotes to add them to
            elif infoset[7]:
                for i in thistransfns:
                    infoset[7].append(i)
    return infolist

Beispiel #7

Datei anzeigen

Datei: GetAllInfo.py Projekt: berstearns/WurzburgGlossParser

def get_allinfo(file, startpage, stoppage=None):
    """Returns an infolist containing multiple sub-lists. The first sublist contains the headers for an info-table.
       Subsequent lists contain, respectively, for a set page range: page no., folio, gloss no., gloss text, Latin
       lemma, Position of lemma in Latin text, Latin verse number, and Latin text."""
    if stoppage is None:
        stoppage = startpage
    infolist = [[
        "Page", "Folio", "Gloss No.", "Gloss Text", "Lemma", "Lemma Position",
        "Verse", "Glossed Latin", "Latin Footnotes"
    ]]
    for page in range(startpage, stoppage + 1):
        thispage = page
        # Gets all page Footnotes (for the Latin)
        footnotelist = order_footlist(file, page)
        # Collect folio information, one page at a time
        foliolist = []
        for folinfo in get_fol(
                order_glosses(
                    clear_tags(
                        "\n\n".join(
                            get_section(get_pages(file, thispage, thispage),
                                        "SG")), "fol"))):
            folio = folinfo[1]
            foliotext = folinfo[0]
            foliolist.append([folio, foliotext])
        # Gets gloss, glossed latin, lemma and lemma position for all glosses, one page at a time
        for sublist in get_latpageinfo(file, page):
            glosslistplus = [thispage]
            thisgloss = sublist[0]
            glossfound = False
            # For each folio on the page compares gloss from Latin list to the gloss in the folio list
            # If found, folio is identified for gloss
            for foltextlist in foliolist:
                if thisgloss in foltextlist[1]:
                    thisfolio = foltextlist[0]
                    glosslistplus.append(thisfolio)
                    glossfound = True
            # If gloss not found still, compares first ten characters of gloss from Latin list to gloss in folio list
            # Only seems to affect f.2a 21 ([f. 2b]) marker bisects gloss in TPH
            if not glossfound:
                glossstub = thisgloss[:11]
                for foltextlist in foliolist:
                    if glossstub in foltextlist[1]:
                        thisfolio = foltextlist[0]
                        glosslistplus.append(thisfolio)
            # Gets each gloss on this page by finding its number
            # Returns gloss number and then the gloss in a list for each gloss
            # Adds each list to a gloss list for the page
            glossnopat = re.compile(r'(\d{1,2}[a-z]?, )?\d{1,2}[a-z]?\. ')
            glosspatitir = glossnopat.finditer(thisgloss)
            glosslist = []
            for i in glosspatitir:
                glosslist.extend([
                    i.group(),
                    thisgloss[thisgloss.find(i.group()) + len(i.group()):]
                ])
            # Identifies glossno, glosstext, latin text, and latin lemma from their lists (all already found)
            glossno = glosslist[0]
            glosstext = glosslist[1]
            lempos = sublist[3]
            lemma = sublist[2]
            latin = sublist[1]
            rawfns = []
            fns = []
            fnpat = re.compile(r'<sup>[a-d]</sup>')
            fnpatitir = fnpat.finditer(latin)
            for fn in fnpatitir:
                rawfns.append(fn.group())
            if rawfns:
                for rawfn in rawfns:
                    fnletter = rawfn[5:6]
                    for fnote in footnotelist:
                        fnoteid = fnote[:1]
                        if fnletter == fnoteid:
                            fns.append(clear_tags(fnote[:1] + ":" + fnote[1:]))
            # Identifies Latin Verse Numbers and Latin text for that verse
            # Adds '00. ' to the start of every latin line in the page's Latin list (for folios that start with no no.)
            # 'Rom. ' is removed later, but must be included here for regex to work
            versenopat = re.compile(
                r'00\. (\[NV\]|((Rom\. )?([IVX]{1,4}\. )?(\d{1,2}[a-z]?, )?(\d{1,2}[a-z]?\. )?))'
            )
            latpatitir = versenopat.finditer("00. " + latin)
            for i in latpatitir:
                if i.group() == "00. ":
                    verseno = "0"
                elif i.group() == "00. [NV]":
                    verseno = "[NV]"
                else:
                    verseno = (i.group())[4:-2]
                    latin = latin[len(verseno) + 2:]
            # Adds glossno, glosstext, lemma, verseno, and Latin text to glosslistplus
            # Now glosslistplus is: pageno, folio, glossno, glosstext, lemma, lempos, verseno, and Latin text
            # Adds glosslistplus to infolist (which is returned once fixed)
            glosslistplus.extend([
                glossno[:glossno.rfind(".")], glosstext, lemma, lempos,
                verseno,
                clear_spectags(latin, ["NV"]), fns
            ])
            infolist.append(glosslistplus)
    # Fixes versenos in infolist (combines numberless verses, adds chapter to verses with verseno only)
    # Adds all versenos from infolist to versenofixlist
    versenofixlist = []
    for info in infolist:
        versenofixlist.append(info[6])
    # Joins all versenos together
    numsearch = "".join(versenofixlist)
    romnumlist = []
    # Finds all chapter numerals throughout all the verse info, adds these to romnumlist
    numeralpat = re.compile(r'(\[NV\]|[IVX]{1,4}\. )')
    numpatitir = numeralpat.finditer(numsearch)
    for numfind in numpatitir:
        if numfind.group() not in romnumlist:
            romnumlist.append(numfind.group())
    # Goes through every verseno in the versenofixlist
    curverse = "0"
    poscount = 0
    curnum = ""
    for item in versenofixlist:
        # Removes 'Rom. ' from the numeral
        if "Rom. " in item:
            versenofixlist[poscount] = item[5:]
            item = versenofixlist[poscount]
        if item != "0":
            curverse = item
        # Replaces no-number verses not at epistle boundaries with the number of the previous verse
        elif item == "0":
            versenofixlist[poscount] = curverse
            item = versenofixlist[poscount]
        # Replaces no-number verses at epistle boundaries with a comment that no information is available about verse.
        if item == "[NV]":
            versenofixlist[
                poscount] = "- No Chapter or Verse Information Available"
            item = versenofixlist[poscount]
        # Updates the current (previous) verse to the roman numeral of the current verse
        for romnum in romnumlist:
            if romnum in item:
                if item.find(romnum) == 0:
                    curnum = romnum
        # Combines chapter numerals to verse numbers that don't have them already
        if curnum not in item:
            if item != "- No Chapter or Verse Information Available":
                versenofixlist[poscount] = curnum + item
        poscount += 1
    fixcount = 0
    # The versenos in infolist are updated with the corrected forms from versenofixlist
    for wronglist in infolist:
        wronglist[6] = versenofixlist[fixcount]
        fixcount += 1
    # Adds Epistle Name to chapter and verse
    epnames = [
        "Rom.", "1 Cor.", "2 Cor.", "Gal.", "Eph.", "Phil.", "1 Thes.",
        "2 Thes.", "Col.", "1 Tim.", "2 Tim.", "Tit.", "Philem.", "Heb."
    ]
    eppages = [
        543, 591, 619, 631, 643, 654, 663, 669, 679, 690, 698, 703, 705, 713
    ]
    spot = 0
    for item in infolist[1:]:
        if int(item[0]) < eppages[spot]:
            item[6] = epnames[spot] + " " + item[6]
        elif int(item[0]) == eppages[spot]:
            spot += 1
            item[6] = epnames[spot] + " " + item[6]
    return infolist

Beispiel #8

Datei anzeigen

Datei: GetLatInfo.py Projekt: berstearns/WurzburgGlossParser

def get_latpageinfo(file, page):
    """returns a list of gloss-lists for a specified page of TPH
       each gloss-list contains a gloss[0], the Latin verse[1], the lemma[2], and the lemma position[3]"""
    latininfolist = []
    latlines = order_latlist("\n\n".join(
        get_section(get_pages(file, page, page), "Lat")))
    eachgloss = order_glosslist(
        clear_tags("\n\n".join(get_section(get_pages(file, page, page),
                                           "SG"))))
    glosses = order_glosses(
        clear_tags("\n\n".join(get_section(get_pages(file, page, page),
                                           "SG"))))
    numpat = re.compile(r'(\d{1,2}[a-z]?, )?\d{1,2}[a-z]?\. ')
    glossitir = numpat.finditer(glosses)
    glossnums = []
    for i in glossitir:
        # Gets gloss numbers from the Irish text, converts them to match tags in the Latin text, adds them to a list.
        glossnum = i.group()
        glossnum = glossnum[:-2]
        if ", " in glossnum:
            glossnum = "–".join(glossnum.split(", "))
        glossnums.append("[" + glossnum + "]")
    latpergloss = []
    lemmata = []
    positions = []
    usednums = []
    backlist = []
    # Creates a reversed version of latlines to be searched instead on pages where there are duplicate glossnos.
    # This prevents two glosses with the same number interacting with each other's Latin lines.
    for line in latlines:
        backlist.append(line)
    backlist.reverse()
    # Checks for expected gloss numbers in the latin text and, if found, adds the latin line and lemma to lists.
    for num in glossnums:
        if num not in usednums:
            # If this is the first instance of this glossno on this page.
            usednums.append(num)
            found = False
            while not found:
                for line in latlines:
                    if num in line:
                        latpergloss.append(line)
                        linetext = line
                        numpos = line.find(num)
                        linetext = linetext[:numpos]
                        lemma = linetext[linetext.rfind(" ") + 1:]
                        if "[" in lemma:
                            lemma = clear_tags(lemma)
                        lemmata.append(lemma)
                        notagtext = clear_tags(linetext, ["let"])
                        remnumpat = re.compile(
                            r'(\[NV\]|((Rom\. )?([IVX]{1,4}\. )?(\d{1,2}[a-z]?, )?(\d{1,2}[a-z]?\. )?))'
                        )
                        thisglossremnum = ""  # has to be put here for pages which begin mid-Latin-line
                        for remnum in remnumpat.finditer(notagtext):
                            if remnum.group() != "":
                                thisglossremnum = remnum.group()
                        remlen = len(thisglossremnum)
                        notagtext = notagtext[remlen:]
                        fnpat = re.compile(r'\[/?[a-d]\]')
                        fnpatitir = fnpat.finditer(notagtext)
                        fns = []
                        for fn in fnpatitir:
                            fns.append(fn.group())
                        if fns:
                            for marker in fns:
                                if "[/" in marker:
                                    notagtext = "".join(
                                        notagtext.split(marker[0] +
                                                        marker[-2:]))
                                    supscr = "</em><sup>{}</sup><em>".format(
                                        marker[2:3])
                                    notagtext = supscr.join(
                                        notagtext.split(marker))
                            for marker in fns:
                                if marker in notagtext:
                                    supscr = "</em><sup>{}</sup><em>".format(
                                        marker[1:2])
                                    notagtext = supscr.join(
                                        notagtext.split(marker))
                        lempos = notagtext.rfind(lemma)
                        positions.append(lempos)
                        found = True
                        break
        elif num in usednums:
            # If this is not the first instance of this glossno on this page.
            found = False
            while not found:
                for line in backlist:
                    if num in line:
                        latpergloss.append(line)
                        linetext = line
                        numpos = line.find(num)
                        linetext = linetext[:numpos]
                        lemma = linetext[linetext.rfind(" ") + 1:]
                        if "[" in lemma:
                            lemma = clear_tags(lemma)
                        lemmata.append(lemma)
                        notagtext = clear_tags(linetext, ["let"])
                        remnumpat = re.compile(
                            r'(\[NV\]|((Rom\. )?([IVX]{1,4}\. )?(\d{1,2}[a-z]?, )?(\d{1,2}[a-z]?\. )?))'
                        )
                        thisglossremnum = ""  # has to be put here for pages which begin mid-Latin-line
                        for remnum in remnumpat.finditer(notagtext):
                            if remnum.group() != "":
                                thisglossremnum = remnum.group()
                        remlen = len(thisglossremnum)
                        notagtext = notagtext[remlen:]
                        fnpat = re.compile(r'\[/?[a-d]\]')
                        fnpatitir = fnpat.finditer(notagtext)
                        fns = []
                        for fn in fnpatitir:
                            fns.append(fn.group())
                        if fns:
                            for marker in fns:
                                if "[/" in marker:
                                    notagtext = "".join(
                                        notagtext.split(marker[0] +
                                                        marker[-2:]))
                                    supscr = "</em><sup>{}</sup><em>".format(
                                        marker[2:3])
                                    notagtext = supscr.join(
                                        notagtext.split(marker))
                            for marker in fns:
                                if marker in notagtext:
                                    supscr = "</em><sup>{}</sup><em>".format(
                                        marker[1:2])
                                    notagtext = supscr.join(
                                        notagtext.split(marker))
                        lempos = notagtext.rfind(lemma)
                        positions.append(lempos)
                        found = True
                        break
    for i in range(len(glossnums)):
        # Compiles a list of the gloss, the Latin line, and the lemma for the gloss within the Latin line.
        thislatperglos = latpergloss[i]
        fnpat = re.compile(r'\[/?[a-d]\]')
        fnpatitir = fnpat.finditer(thislatperglos)
        fns = []
        for fn in fnpatitir:
            fns.append(fn.group())
        if fns:
            for marker in fns:
                if "[/" in marker:
                    thislatperglos = "".join(
                        thislatperglos.split(marker[0] + marker[-2:]))
                    supscr = "</em><sup>{}</sup><em>".format(marker[2:3])
                    thislatperglos = supscr.join(thislatperglos.split(marker))
            for marker in fns:
                if marker in thislatperglos:
                    supscr = "</em><sup>{}</sup><em>".format(marker[1:2])
                    thislatperglos = supscr.join(thislatperglos.split(marker))
        latininfolist.append([
            eachgloss[i],
            clear_tags(thislatperglos, ["NV"]),
            clear_tags(lemmata[i]), positions[i]
        ])
    return latininfolist