def get_glinfobasic(file, startpage=499, stoppage=712): """Returns an infolist containing multiple sublists. The first sublist contains the headers for an info-table. Subsequent lists contain, respectively, for a set page range: Epistle, Page No., Folio, Gloss no. and tag-free Gloss Text for every gloss in Wb.""" curepist = "Unknown" infolist = [["Epistle", "Page", "Folio", "Gloss No.", "Gloss Text"]] for page in range(startpage, stoppage + 1): thispage = page pagetext = get_pages(file, thispage, thispage) epfunc = get_tagtext(pagetext, "H2") if epfunc: curepist = epfunc[0] glosslist = order_glosslist( clear_tags("\n\n".join(get_section(pagetext, "SG")))) foliolist = [] for folinfo in get_fol( order_glosses( clear_tags( "\n\n".join( get_section(get_pages(file, thispage, thispage), "SG")), "fol"))): folio = folinfo[1] foliotext = folinfo[0] foliolist.append([folio, foliotext]) for gloss in glosslist: thisglosslist = [curepist, thispage] glossfound = False for foltextlist in foliolist: if gloss in foltextlist[1]: thisfolio = foltextlist[0] thisglosslist.append(thisfolio) glossfound = True if not glossfound: glossstub = gloss[:11] for foltextlist in foliolist: if glossstub in foltextlist[1]: thisfolio = foltextlist[0] thisglosslist.append(thisfolio) glossfound = True if not glossfound: thisglosslist.append("No Folio Information Found") glossnopat = re.compile(r'(\d{1,2}[a-z]?, )?\d{1,2}[a-z]?\. ') glosspatitir = glossnopat.finditer(gloss) for i in glosspatitir: thisglosslist.extend([ (i.group())[:-2], gloss[gloss.find(i.group()) + len(i.group()):] ]) infolist.append(thisglosslist) return infolist
def testsectext(sec, startpage, stoppage): """Takes a desired section and page range as input. Outputs a list of lists of page no. and page content.""" pagesinfolist = [] for page in range(startpage, stoppage + 1): pageinfolist = [ str(page), "\n\n".join( get_section(get_pages("Wurzburg Glosses", page, page), sec)) ] pagesinfolist.append(pageinfolist) return pagesinfolist
def list_numbered_glosses(file, startpage, stoppage): """Lists glosses by their folio ID and gloss number""" glist = [] for p in range(startpage, stoppage + 1): fcont = get_fol( order_glosses( clear_tags( "\n\n".join(get_section(get_pages(file, p, p), "SG")), "fol"))) for g in order_glosslist("\n\n".join( get_section(get_pages(file, p, p), "SG"))): for fol in fcont: raw_gloss = clear_tags(g) if clear_tags(g) in fol[0]: numpat = re.compile(r'(\d{1,2}[a-z]?, )?\d{1,2}[a-z]?\. ') numpatitir = numpat.finditer(raw_gloss) for i in numpatitir: if i.group() in raw_gloss: glist.append( [fol[1][3:] + i.group()[:-1], cleangloss(g)]) return glist
def get_transpageinfo(file, page): """Returns a list of translation lists for each page Each translation list contains a glossno [0] and a gloss translation [1]""" english = clear_spectags( "\n\n".join(get_section(get_pages(file, page, page), "Eng")), "fol") englishnums = [] englishlines = [] engpat = re.compile(r'(\d{1,2} – )?\d{1,2}[a-z]?\. ') engpatitir = engpat.finditer(english) # find the numbers in the english text, add them to a list for i in engpatitir: englishnums.append(i.group()) # using the numbers as markers, identify the strings associated with the numbers, add them to a list for i in range(len(englishnums)): numlen = len(englishnums[i]) # if the current number isn't the last number, the string is from this number to the next number if i != len(englishnums) - 1: if englishnums[i + 1] not in englishnums[i]: line = english[english.find(englishnums[i]) + numlen:english.find(englishnums[i + 1])] # account for situations where the next number is in the current number # eg. "1. " is in "21. " on page 503 else: scrap = english firstspot = scrap.find(englishnums[i + 1]) scrap = scrap[firstspot + len(englishnums[i + 1]):] secondspot = scrap.find(englishnums[i + 1]) + len( englishnums[i + 1]) line = english[english.find(englishnums[i]) + numlen:secondspot] english = english[numlen:] english = english[english.find(englishnums[i + 1]):] # if the current number is the last number, the string is from this number to the end else: line = english[english.find(englishnums[i]) + numlen:] line = line.split("\n") line = " ".join(line) line = line.strip() englishlines.append(line) translist = [] # remove full stop and space from end of number for i in range(len(englishnums)): thistransnum = englishnums[i] englishnums[i] = thistransnum[:thistransnum.rfind(".")] # add number and trans to list, then lest to translist for i in range(len(englishnums)): thislist = [englishnums[i], englishlines[i]] translist.append(thislist) # edit translations to include html superscript footnotes instead of footnote tags for i in range(len(translist)): translationpair = translist[i] if "[" in translationpair[1]: fixedtrans = translationpair[1] newpair = [translationpair[0]] fnpat = re.compile(r'\[\w\]') fnpatitir = fnpat.finditer(translationpair[1]) for fn in fnpatitir: fntags = fn.group() fntagless = fntags[1:-1] ss = "<sup>" + fntagless + "</sup>" fixedlist = fixedtrans.split(fntags) fixedtrans = ss.join(fixedlist) if fixedtrans != translationpair[1]: newpair.append(fixedtrans) translist[i] = newpair return translist
def order_footnotes(file, page): """Prints footnotes for a selected page as a single string""" footnotes = "\n\n".join(get_section(get_pages(file, page, page), "FN")) return footnotes
def get_glinfo(file, startpage=499, stoppage=712): """Returns an infolist containing multiple sublists. The first sublist contains the headers for an info-table. Subsequent lists contain, respectively, for a set page range: Epistle, Page No., Folio, Gloss no. and Gloss Text (with [GLat][/GLat] tags converted to html italics tags) for every gloss in Wb.""" curepist = "Unknown" infolist = [[ "Epistle", "Page", "Folio", "Gloss No.", "Gloss Full-Tags", "Gloss Text", "Gloss Footnotes", "Relevant Footnotes", "Adrian's Notes", "Gloss Translation" ]] pagestrans = get_transpagesinfo(file, startpage, stoppage) for page in range(startpage, stoppage + 1): thispage = page pagetext = get_pages(file, thispage, thispage) # Gets all page Footnotes for the first time (for the gloss) footnotelist = order_footlist(file, page) # Gets all notes supplied by me (for the gloss) notelist = order_newlist(file, page) newnotelist = list() notefol = False if notelist != ['']: for notenum, note in enumerate(notelist): noteidpat = re.compile(r'\[/?f\. \d{1,2}[a-d]\]') noteiditer = noteidpat.findall(note) if noteiditer: for folinfo in noteiditer: note = "".join(note.split(folinfo)) folinfo = "".join(i for i in noteiditer[0] if i not in ["[", "]", "/"]) if notenum == 0: notefol = folinfo elif notefol != folinfo: notefol = folinfo notenumpat = re.compile(r'^\d{1,2}[a-z]?\. ') notenumiter = notenumpat.findall(note) if not notenumiter: raise RuntimeError( f"Personal note found without link to gloss number.\nNote: {note}" ) elif len(notenumiter ) > 1 or notenumiter[0] != note[:len(notenumiter[0])]: raise RuntimeError( f"Multiple possible gloss numbers found for personal note.\nNote: {note}" ) elif notenumiter[0] == note[:len(notenumiter[0])]: glossnum = notenumiter[0][:-2] note = note[len(notenumiter[0]):].strip() newnotelist.append([notefol, glossnum, note]) # Checks for a new epistle on the current page. epfunc = get_tagtext(pagetext, "H2") if epfunc: curepist = epfunc[0] # Identifies individual glosses on the current page, and adds them to a gloss-list. glosslist = order_glosslist( clear_spectags("\n\n".join(get_section(pagetext, "SG")), "fol")) foliolist = [] # Creates a list of folios and related gloss text for the current page. for folinfo in get_fol( order_glosses("\n\n".join( get_section(get_pages(file, thispage, thispage), "SG")))): folio = folinfo[1] foliotext = folinfo[0] foliolist.append([folio, foliotext]) # Creates a list with the current epistle name and page, # Checks for each gloss on the current page which folio it is in, # Adds folio information to the list. for gloss in glosslist: thisglosslist = [curepist, thispage] glossfound = False for foltextlist in foliolist: if gloss in foltextlist[1]: thisfolio = foltextlist[0] thisglosslist.append(thisfolio) glossfound = True if not glossfound: glossstub = gloss[:11] for foltextlist in foliolist: if glossstub in foltextlist[1]: thisfolio = foltextlist[0] thisglosslist.append(thisfolio) glossfound = True if not glossfound: thisglosslist.append("No Folio Information Found") # Identifies gloss numbers and removes them from the gloss text. glossnopat = re.compile(r'(\d{1,2}[a-z]?, )?\d{1,2}[a-z]?\. ') glosspatitir = glossnopat.finditer(gloss) for i in glosspatitir: # Adds gloss number to list. thisglosslist.append(i.group()[:-2]) # Identifies foundational gloss including all markup tags. glossfulltags = gloss[gloss.find(i.group()) + len(i.group()):] # Creates a display copy of the gloss text, replacing Latin tags with html emphasis tags. glosstext = glossfulltags if "[GLat]" in glosstext: glosstextlist = glosstext.split("[GLat]") glosstext = "<em>".join(glosstextlist) if "[/GLat]" in glosstext: glosstextlist = glosstext.split("[/GLat]") glosstext = "</em>".join(glosstextlist) # Creates 2 copies of display gloss text, one primary, one retaining footnotes in superscript tags. basegloss = clear_tags(glosstext) footnotesgloss = glosstext[:] footnotepat = re.compile(r'\[/?[a-z]\]') fnpatitir = footnotepat.finditer(footnotesgloss) fnlist = [] for j in fnpatitir: fnlist.append(j.group()) if not fnlist: fnstring = "" thisglosslist.extend([ glossfulltags, basegloss, clear_tags(footnotesgloss), fnstring ]) if fnlist: for fntag in fnlist: if "[/" in fntag: endtag = fntag begintag = "".join(endtag.split("/")) footnotesgloss = "".join( footnotesgloss.split(begintag)) tagplace = footnotesgloss.find(endtag) footnotesgloss = footnotesgloss[:tagplace] + "<sup>" +\ footnotesgloss[tagplace + 2: tagplace + 3] + "</sup>" +\ footnotesgloss[tagplace + 4:] if begintag in fnlist: del fnlist[fnlist.index(begintag)] glossfnlist = [] for footnote in fnlist: fnletter = footnote[-2] # Collects footnotes relevant to this gloss and adds them to a list. for fnote in footnotelist: fnoteid = fnote[:1] if fnletter == fnoteid: glossfnlist.append( clear_tags(fnote[:1] + ":" + fnote[1:])) fnsuperscript = "<sup>" + fnletter + "</sup>" footnotesgloss = fnsuperscript.join( footnotesgloss.split(footnote)) thisglosslist.extend([ glossfulltags, basegloss, clear_tags(footnotesgloss), glossfnlist ]) if newnotelist: for note in newnotelist: folinfo = note[0] glossnum = note[1] notetext = note[2] if folinfo == thisglosslist[ 2] and glossnum == thisglosslist[3]: if len(thisglosslist) == 8: thisglosslist.extend([notetext]) elif len(thisglosslist) == 9: if thisglosslist[8] == "": thisglosslist[8] = notetext else: anstring = "" if len(thisglosslist) == 8: thisglosslist.extend([anstring]) elif not newnotelist: anstring = "" thisglosslist.extend([anstring]) infolist.append(thisglosslist) # add translations to the end of the info-lists where they are available for infoset in infolist[ 1:]: # exclude the first info-set containing the titles glossid = infoset[3] curpagetrans = pagestrans[0] curtransid = curpagetrans[0] curtrans = curpagetrans[1] # deal with the conjoined gloss on TPH p. 500 (1b10 + 1b11) # split the gloss id into the two numbers, use these to identify the two translations # conjoin the two translations and append them to the info-set for the conjoined gloss ids if ", " in glossid: glossidlist = glossid.split(", ") splittranslations = [] for newid in glossidlist: if newid == curtransid: splittranslations.append(curtrans) del pagestrans[0] curpagetrans = pagestrans[0] curtransid = curpagetrans[0] curtrans = curpagetrans[1] joinedtrans = " i.e. ".join(splittranslations) if "[GLat]" in joinedtrans: transtextlist = joinedtrans.split("[GLat]") joinedtrans = "<em>".join(transtextlist) if "[/GLat]" in joinedtrans: transtextlist = joinedtrans.split("[/GLat]") joinedtrans = "</em>".join(transtextlist) infoset.append(joinedtrans) else: if glossid == curtransid: if "[GLat]" in curtrans: transtextlist = curtrans.split("[GLat]") curtrans = "<em>".join(transtextlist) if "[/GLat]" in curtrans: transtextlist = curtrans.split("[/GLat]") curtrans = "</em>".join(transtextlist) infoset.append(curtrans) del pagestrans[0] # deal with page 587 where glosses 27, 28, and 29 share the one translation, numbered '27 – 29.'. elif " – " in curtransid: curtransidlist = curtransid.split(" – ") curtransidrange = [ int(curtransidlist[0]), int(curtransidlist[1]) ] idstart = curtransidrange[0] idstop = curtransidrange[1] curtransidlist = [] for i in range(idstart, idstop + 1): curtransidlist.append(str(i)) if glossid in curtransidlist: if "[GLat]" in curtrans: transtextlist = curtrans.split("[GLat]") curtrans = "<em>".join(transtextlist) if "[/GLat]" in curtrans: transtextlist = curtrans.split("[/GLat]") curtrans = "</em>".join(transtextlist) infoset.append(curtrans) if glossid == curtransidlist[-1]: del pagestrans[0] # if no translation is given in TPH else: infoset.append("No translation available.") # Gets all page Footnotes for the second time (for the translation) curpage = None for infoset in infolist[ 1:]: # exclude the first info-set containing the titles thistransfns = [] # ensures page footnotes are only generated once per page, and not for every gloss if curpage: if curpage != infoset[1]: curpage = infoset[1] footnotelist = order_footlist(file, curpage) elif not curpage: curpage = infoset[1] footnotelist = order_footlist(file, curpage) trans = infoset[9] # finds which translations have footnotes, looks for the associated footnote i the list generated above if "<sup>" in trans: superscriptpat = re.compile(r'<sup>\w</sup>') superscriptpatitir = superscriptpat.finditer(trans) for i in superscriptpatitir: fnid = i.group()[5] for footnote in footnotelist: if footnote[0] == fnid: # if the footnote is found and not already in the footnote list for the gloss it is added if infoset[7]: if clear_tags(footnote[:1] + ":" + footnote[1:]) not in infoset[7]: thistransfns.append( clear_tags(footnote[:1] + ":" + footnote[1:])) else: thistransfns.append( clear_tags(footnote[:1] + ":" + footnote[1:])) # all footnotes found for the gloss are combined # if there are translation footnotes if thistransfns: # if there are no gloss footnotes to add them to if not infoset[7]: infoset[7] = thistransfns # if there are gloss footnotes to add them to elif infoset[7]: for i in thistransfns: infoset[7].append(i) return infolist
def get_allinfo(file, startpage, stoppage=None): """Returns an infolist containing multiple sub-lists. The first sublist contains the headers for an info-table. Subsequent lists contain, respectively, for a set page range: page no., folio, gloss no., gloss text, Latin lemma, Position of lemma in Latin text, Latin verse number, and Latin text.""" if stoppage is None: stoppage = startpage infolist = [[ "Page", "Folio", "Gloss No.", "Gloss Text", "Lemma", "Lemma Position", "Verse", "Glossed Latin", "Latin Footnotes" ]] for page in range(startpage, stoppage + 1): thispage = page # Gets all page Footnotes (for the Latin) footnotelist = order_footlist(file, page) # Collect folio information, one page at a time foliolist = [] for folinfo in get_fol( order_glosses( clear_tags( "\n\n".join( get_section(get_pages(file, thispage, thispage), "SG")), "fol"))): folio = folinfo[1] foliotext = folinfo[0] foliolist.append([folio, foliotext]) # Gets gloss, glossed latin, lemma and lemma position for all glosses, one page at a time for sublist in get_latpageinfo(file, page): glosslistplus = [thispage] thisgloss = sublist[0] glossfound = False # For each folio on the page compares gloss from Latin list to the gloss in the folio list # If found, folio is identified for gloss for foltextlist in foliolist: if thisgloss in foltextlist[1]: thisfolio = foltextlist[0] glosslistplus.append(thisfolio) glossfound = True # If gloss not found still, compares first ten characters of gloss from Latin list to gloss in folio list # Only seems to affect f.2a 21 ([f. 2b]) marker bisects gloss in TPH if not glossfound: glossstub = thisgloss[:11] for foltextlist in foliolist: if glossstub in foltextlist[1]: thisfolio = foltextlist[0] glosslistplus.append(thisfolio) # Gets each gloss on this page by finding its number # Returns gloss number and then the gloss in a list for each gloss # Adds each list to a gloss list for the page glossnopat = re.compile(r'(\d{1,2}[a-z]?, )?\d{1,2}[a-z]?\. ') glosspatitir = glossnopat.finditer(thisgloss) glosslist = [] for i in glosspatitir: glosslist.extend([ i.group(), thisgloss[thisgloss.find(i.group()) + len(i.group()):] ]) # Identifies glossno, glosstext, latin text, and latin lemma from their lists (all already found) glossno = glosslist[0] glosstext = glosslist[1] lempos = sublist[3] lemma = sublist[2] latin = sublist[1] rawfns = [] fns = [] fnpat = re.compile(r'<sup>[a-d]</sup>') fnpatitir = fnpat.finditer(latin) for fn in fnpatitir: rawfns.append(fn.group()) if rawfns: for rawfn in rawfns: fnletter = rawfn[5:6] for fnote in footnotelist: fnoteid = fnote[:1] if fnletter == fnoteid: fns.append(clear_tags(fnote[:1] + ":" + fnote[1:])) # Identifies Latin Verse Numbers and Latin text for that verse # Adds '00. ' to the start of every latin line in the page's Latin list (for folios that start with no no.) # 'Rom. ' is removed later, but must be included here for regex to work versenopat = re.compile( r'00\. (\[NV\]|((Rom\. )?([IVX]{1,4}\. )?(\d{1,2}[a-z]?, )?(\d{1,2}[a-z]?\. )?))' ) latpatitir = versenopat.finditer("00. " + latin) for i in latpatitir: if i.group() == "00. ": verseno = "0" elif i.group() == "00. [NV]": verseno = "[NV]" else: verseno = (i.group())[4:-2] latin = latin[len(verseno) + 2:] # Adds glossno, glosstext, lemma, verseno, and Latin text to glosslistplus # Now glosslistplus is: pageno, folio, glossno, glosstext, lemma, lempos, verseno, and Latin text # Adds glosslistplus to infolist (which is returned once fixed) glosslistplus.extend([ glossno[:glossno.rfind(".")], glosstext, lemma, lempos, verseno, clear_spectags(latin, ["NV"]), fns ]) infolist.append(glosslistplus) # Fixes versenos in infolist (combines numberless verses, adds chapter to verses with verseno only) # Adds all versenos from infolist to versenofixlist versenofixlist = [] for info in infolist: versenofixlist.append(info[6]) # Joins all versenos together numsearch = "".join(versenofixlist) romnumlist = [] # Finds all chapter numerals throughout all the verse info, adds these to romnumlist numeralpat = re.compile(r'(\[NV\]|[IVX]{1,4}\. )') numpatitir = numeralpat.finditer(numsearch) for numfind in numpatitir: if numfind.group() not in romnumlist: romnumlist.append(numfind.group()) # Goes through every verseno in the versenofixlist curverse = "0" poscount = 0 curnum = "" for item in versenofixlist: # Removes 'Rom. ' from the numeral if "Rom. " in item: versenofixlist[poscount] = item[5:] item = versenofixlist[poscount] if item != "0": curverse = item # Replaces no-number verses not at epistle boundaries with the number of the previous verse elif item == "0": versenofixlist[poscount] = curverse item = versenofixlist[poscount] # Replaces no-number verses at epistle boundaries with a comment that no information is available about verse. if item == "[NV]": versenofixlist[ poscount] = "- No Chapter or Verse Information Available" item = versenofixlist[poscount] # Updates the current (previous) verse to the roman numeral of the current verse for romnum in romnumlist: if romnum in item: if item.find(romnum) == 0: curnum = romnum # Combines chapter numerals to verse numbers that don't have them already if curnum not in item: if item != "- No Chapter or Verse Information Available": versenofixlist[poscount] = curnum + item poscount += 1 fixcount = 0 # The versenos in infolist are updated with the corrected forms from versenofixlist for wronglist in infolist: wronglist[6] = versenofixlist[fixcount] fixcount += 1 # Adds Epistle Name to chapter and verse epnames = [ "Rom.", "1 Cor.", "2 Cor.", "Gal.", "Eph.", "Phil.", "1 Thes.", "2 Thes.", "Col.", "1 Tim.", "2 Tim.", "Tit.", "Philem.", "Heb." ] eppages = [ 543, 591, 619, 631, 643, 654, 663, 669, 679, 690, 698, 703, 705, 713 ] spot = 0 for item in infolist[1:]: if int(item[0]) < eppages[spot]: item[6] = epnames[spot] + " " + item[6] elif int(item[0]) == eppages[spot]: spot += 1 item[6] = epnames[spot] + " " + item[6] return infolist
def scribe_split(glossfile, startpage=499, stoppage=712): """Takes the text of the glosses, identifies page number, gloss text and footnotes, Separates the three scribal hands first by identifying prima manus footnotes throughout whole text then by breaking the remaining glosses of f.32d from f.33a""" # get a list of pages and page numbers from the file, and isolate the irish gloss text ... and footnotes pagesinfolist = get_pageinfo(glossfile, startpage, stoppage) pagesdir = [] for page in pagesinfolist: pageno = page[0] irish = get_section(get_pages(glossfile, pageno, pageno), "SG") irish = irish[0] pagedir = [pageno, irish] pagesdir.append(pagedir) # get the individual glosses per page, check if they have a 'prima manus' footnote, if so, put in PM list allglosses = ['All Glosses'] primanlist = ['Prima Manus'] handiilist = ['Hand Two'] handiiilist = ['Hand Three'] # glosscount = 0 # pmcount = 0 # htwocount = 0 # hthreecount = 0 # adds all glosses to a single list for page in pagesdir: glosslist = order_glosslist(page[1]) for curgloss in glosslist: allglosses.append(curgloss) # glosscount += 1 # adds prima manus glosses to a proma manus list for page in pagesdir: glosslist = order_glosslist(page[1]) footnotes = order_footlist(glossfile, page[0]) for curgloss in glosslist: # find footnote markers in each individual gloss glossfnpat = re.compile(r'\[[a-z]\]') glossfnitir = glossfnpat.finditer(curgloss) for i in glossfnitir: let = i.group() let = let[1:-1] for fn in footnotes: # Find footnote associated with gloss, then if it indicates a prima manu add gloss to prima list if fn[0] == let: if "prima" in fn: if curgloss not in primanlist: primanlist.append(curgloss) # pmcount += 1 # adds remaining glosses to separate lists for hands 2 and 3 handtwo = True for page in pagesdir: glosslist = order_glosslist(page[1]) for curgloss in glosslist: # iterate through the remaining glosses, remove prima glosses, divide rest into hand 2 or hand 3 list if "[f. 33a]" in curgloss: handtwo = False if handtwo: if curgloss not in primanlist: handiilist.append(curgloss) # htwocount += 1 else: if curgloss not in primanlist: handiiilist.append(curgloss) # hthreecount += 1 handlists = [allglosses, primanlist, handiilist, handiiilist] # print("Full Count: %d\nH1: %d\nH2: %d\nH3: %d" % (glosscount, pmcount, htwocount, hthreecount)) return handlists
def get_latpageinfo(file, page): """returns a list of gloss-lists for a specified page of TPH each gloss-list contains a gloss[0], the Latin verse[1], the lemma[2], and the lemma position[3]""" latininfolist = [] latlines = order_latlist("\n\n".join( get_section(get_pages(file, page, page), "Lat"))) eachgloss = order_glosslist( clear_tags("\n\n".join(get_section(get_pages(file, page, page), "SG")))) glosses = order_glosses( clear_tags("\n\n".join(get_section(get_pages(file, page, page), "SG")))) numpat = re.compile(r'(\d{1,2}[a-z]?, )?\d{1,2}[a-z]?\. ') glossitir = numpat.finditer(glosses) glossnums = [] for i in glossitir: # Gets gloss numbers from the Irish text, converts them to match tags in the Latin text, adds them to a list. glossnum = i.group() glossnum = glossnum[:-2] if ", " in glossnum: glossnum = "–".join(glossnum.split(", ")) glossnums.append("[" + glossnum + "]") latpergloss = [] lemmata = [] positions = [] usednums = [] backlist = [] # Creates a reversed version of latlines to be searched instead on pages where there are duplicate glossnos. # This prevents two glosses with the same number interacting with each other's Latin lines. for line in latlines: backlist.append(line) backlist.reverse() # Checks for expected gloss numbers in the latin text and, if found, adds the latin line and lemma to lists. for num in glossnums: if num not in usednums: # If this is the first instance of this glossno on this page. usednums.append(num) found = False while not found: for line in latlines: if num in line: latpergloss.append(line) linetext = line numpos = line.find(num) linetext = linetext[:numpos] lemma = linetext[linetext.rfind(" ") + 1:] if "[" in lemma: lemma = clear_tags(lemma) lemmata.append(lemma) notagtext = clear_tags(linetext, ["let"]) remnumpat = re.compile( r'(\[NV\]|((Rom\. )?([IVX]{1,4}\. )?(\d{1,2}[a-z]?, )?(\d{1,2}[a-z]?\. )?))' ) thisglossremnum = "" # has to be put here for pages which begin mid-Latin-line for remnum in remnumpat.finditer(notagtext): if remnum.group() != "": thisglossremnum = remnum.group() remlen = len(thisglossremnum) notagtext = notagtext[remlen:] fnpat = re.compile(r'\[/?[a-d]\]') fnpatitir = fnpat.finditer(notagtext) fns = [] for fn in fnpatitir: fns.append(fn.group()) if fns: for marker in fns: if "[/" in marker: notagtext = "".join( notagtext.split(marker[0] + marker[-2:])) supscr = "</em><sup>{}</sup><em>".format( marker[2:3]) notagtext = supscr.join( notagtext.split(marker)) for marker in fns: if marker in notagtext: supscr = "</em><sup>{}</sup><em>".format( marker[1:2]) notagtext = supscr.join( notagtext.split(marker)) lempos = notagtext.rfind(lemma) positions.append(lempos) found = True break elif num in usednums: # If this is not the first instance of this glossno on this page. found = False while not found: for line in backlist: if num in line: latpergloss.append(line) linetext = line numpos = line.find(num) linetext = linetext[:numpos] lemma = linetext[linetext.rfind(" ") + 1:] if "[" in lemma: lemma = clear_tags(lemma) lemmata.append(lemma) notagtext = clear_tags(linetext, ["let"]) remnumpat = re.compile( r'(\[NV\]|((Rom\. )?([IVX]{1,4}\. )?(\d{1,2}[a-z]?, )?(\d{1,2}[a-z]?\. )?))' ) thisglossremnum = "" # has to be put here for pages which begin mid-Latin-line for remnum in remnumpat.finditer(notagtext): if remnum.group() != "": thisglossremnum = remnum.group() remlen = len(thisglossremnum) notagtext = notagtext[remlen:] fnpat = re.compile(r'\[/?[a-d]\]') fnpatitir = fnpat.finditer(notagtext) fns = [] for fn in fnpatitir: fns.append(fn.group()) if fns: for marker in fns: if "[/" in marker: notagtext = "".join( notagtext.split(marker[0] + marker[-2:])) supscr = "</em><sup>{}</sup><em>".format( marker[2:3]) notagtext = supscr.join( notagtext.split(marker)) for marker in fns: if marker in notagtext: supscr = "</em><sup>{}</sup><em>".format( marker[1:2]) notagtext = supscr.join( notagtext.split(marker)) lempos = notagtext.rfind(lemma) positions.append(lempos) found = True break for i in range(len(glossnums)): # Compiles a list of the gloss, the Latin line, and the lemma for the gloss within the Latin line. thislatperglos = latpergloss[i] fnpat = re.compile(r'\[/?[a-d]\]') fnpatitir = fnpat.finditer(thislatperglos) fns = [] for fn in fnpatitir: fns.append(fn.group()) if fns: for marker in fns: if "[/" in marker: thislatperglos = "".join( thislatperglos.split(marker[0] + marker[-2:])) supscr = "</em><sup>{}</sup><em>".format(marker[2:3]) thislatperglos = supscr.join(thislatperglos.split(marker)) for marker in fns: if marker in thislatperglos: supscr = "</em><sup>{}</sup><em>".format(marker[1:2]) thislatperglos = supscr.join(thislatperglos.split(marker)) latininfolist.append([ eachgloss[i], clear_tags(thislatperglos, ["NV"]), clear_tags(lemmata[i]), positions[i] ]) return latininfolist