def browserfindlinenumberfromperseus(citationlist: list, workobject: dbOpus, resultmessage: str, dbcursor) -> tuple: """ here comes the fun part: alien format; inconsistent citation style; incorrect data... sample url: /browse/perseus/gr0016w001/7:130 is Herodotus, Historiae, Book 7, section 130 :param citationlist: :param workobject: :param resultmessage: :param dbcursor: :return: """ try: # dict does not always agree with our ids... # do an imperfect test for this by inviting the exception # you can still get a valid but wrong work, of course, # but if you ask for w001 and only w003 exists, this is supposed to take care of that returnfirstorlastlinenumber(workobject.universalid, dbcursor) except: # dict did not agree with our ids...: euripides, esp # what follows is a 'hope for the best' approach workid = perseusidmismatch(workobject.universalid, dbcursor) workobject = workdict[workid] # life=cal. or section=32 # this has already been nuked? needscleaning = [True for c in citationlist if len(c.split('=')) > 1] if True in needscleaning: citationlist = perseusdelabeler(citationlist, workobject) # another problem 'J. ' in sallust <bibl id="perseus/lt0631w001/J. 79:3" default="NO" valid="yes"><author>Sall.</author> J. 79, 3</bibl> # 'lt0631w002/79:3' is what you need to send to finddblinefromincompletelocus() # note that the work number is wrong, so the next is only a partial fix and valid only if wNNN has been set right if ' ' in citationlist[-1]: citationlist[-1] = citationlist[-1].split(' ')[-1] p = finddblinefromincompletelocus(workobject, citationlist, dbcursor) resultmessage = p['code'] thelinenumber = p['line'] return thelinenumber, resultmessage
def textsegmentfindstartandstop(authorobject, workobject, passageaslist, cursor) -> dict: """ find the first and last lines of a work segment :return: """ p = tuple(passageaslist) lookforline = finddblinefromincompletelocus(workobject, p, cursor) # assuming that lookforline['code'] == 'success' # lookforline['code'] is (allegedly) only relevant to the Perseus lookup problem where a bad locus can be sent foundline = lookforline['line'] line = grabonelinefromwork(authorobject.universalid, foundline, cursor) lo = dblineintolineobject(line) # let's say you looked for 'book 2' of something that has 'book, chapter, line' # that means that you want everything that has the same level2 value as the lineobject # build a where clause passageaslist.reverse() atloc = '|'.join(passageaslist) selection = '{uid}_AT_{line}'.format(uid=workobject.universalid, line=atloc) w = atsignwhereclauses(selection, '=', {authorobject.universalid: authorobject}) d = [workobject.universalid] qw = str() for i in range(0, len(w)): qw += 'AND (' + w[i][0] + ') ' d.append(w[i][1]) query = 'SELECT index FROM {au} WHERE wkuniversalid=%s {whr} ORDER BY index DESC LIMIT 1'.format( au=authorobject.universalid, whr=qw) data = tuple(d) cursor.execute(query, data) found = cursor.fetchone() startandstop = dict() startandstop['startline'] = lo.index startandstop['endline'] = found[0] return startandstop
def browserfindlinenumberfromlocus(citationlist: list, workobject: dbOpus, resultmessage: str, dbcursor) -> tuple: """ you were sent here by the citation builder autofill boxes note that browse/locus/gr0016w001/3|11|5 is Herodotus, Historiae, Book 3, section 11, line 5 and locus/lt1056w001/3|5|_0 is Vitruvius, De Architectura, book 3, chapter 5, section 1, line 1 unfortunately you might need to find '300,19' as in 'Democritus, Fragmenta: Fragment 300,19, line 4' '-' is used for something like Ar., Nubes 1510-1511 '_' is used for '_0' (which really means 'first available line at this level') ( ) and / should be converted to equivalents in the builder: they do us no good here see dbswapoutbadcharsfromciations() in HipparchiaBuilder see also citationcharacterset() in InputParsingObject() :param citationlist: :param workobject: :param resultmessage: :param dbcursor: :return: """ ct = tuple(citationlist) if len(ct) == workobject.availablelevels: thelinenumber = finddblinefromlocus(workobject, ct, dbcursor) else: elements = finddblinefromincompletelocus(workobject, citationlist, dbcursor) resultmessage = elements['code'] thelinenumber = elements['line'] return thelinenumber, resultmessage
def textmaker(author: str, work=None, passage=None, endpoint=None, citationdelimiter='|') -> JSON_STR: """ build a text suitable for display "GET /textof/lt0474/024/20/30" :return: """ probeforsessionvariables() dbconnection = ConnectionObject('autocommit') dbcursor = dbconnection.cursor() linesevery = hipparchia.config['SHOWLINENUMBERSEVERY'] po = TextmakerInputParsingObject(author, work, passage, endpoint, citationdelimiter) ao = po.authorobject wo = po.workobject segmenttext = str() # consolewarning('po.passageaslist: {p}'.format(p=po.passageaslist)) if ao and wo: # we have both an author and a work, maybe we also have a subset of the work if endpoint: firstlinenumber = finddblinefromincompletelocus( wo, po.passageaslist, dbcursor) lastlinenumber = finddblinefromincompletelocus(wo, po.endpointlist, dbcursor, findlastline=True) if firstlinenumber['code'] == 'success' and lastlinenumber[ 'code'] == 'success': startline = firstlinenumber['line'] endline = lastlinenumber['line'] startlnobj = dblineintolineobject( grabonelinefromwork(ao.universalid, startline, dbcursor)) stoplnobj = dblineintolineobject( grabonelinefromwork(ao.universalid, endline, dbcursor)) else: msg = '"buildtexttospan/" could not find first and last: {a}w{b} - {c} TO {d}' consolewarning( msg.format(a=author, b=work, c=passage, d=endpoint)) startlnobj = makeablankline(work, 0) stoplnobj = makeablankline(work, 1) startline = 0 endline = 1 segmenttext = 'from {a} to {b}'.format(a=startlnobj.shortlocus(), b=stoplnobj.shortlocus()) elif not po.passageaslist: # whole work startline = wo.starts endline = wo.ends else: startandstop = textsegmentfindstartandstop(ao, wo, po.passageaslist, dbcursor) startline = startandstop['startline'] endline = startandstop['endline'] texthtml = buildtext(wo.universalid, startline, endline, linesevery, dbcursor) else: texthtml = str() if hipparchia.config['INSISTUPONSTANDARDANGLEBRACKETS']: texthtml = gtltsubstitutes(texthtml) if not segmenttext: segmenttext = '.'.join(po.passageaslist) if not ao or not wo: ao = makeanemptyauthor('gr0000') wo = makeanemptywork('gr0000w000') results = dict() results['authorname'] = avoidsmallvariants(ao.shortname) results['title'] = avoidsmallvariants(wo.title) results['structure'] = avoidsmallvariants(wo.citation()) results['worksegment'] = segmenttext results['texthtml'] = texthtml results = json.dumps(results) dbconnection.connectioncleanup() return results
def buildindexto(searchid: str, author: str, work=None, passage=None, endpoint=None, citationdelimiter='|', justvocab=False) -> JSON_STR: """ build a complete index to a an author, work, or segment of a work :return: """ probeforsessionvariables() pollid = validatepollid(searchid) starttime = time.time() progresspolldict[pollid] = ProgressPoll(pollid) progresspolldict[pollid].activate() dbconnection = ConnectionObject('autocommit') dbcursor = dbconnection.cursor() po = IndexmakerInputParsingObject(author, work, passage, endpoint, citationdelimiter) ao = po.authorobject wo = po.workobject psg = po.passageaslist stop = po.endpointlist if not work: wo = makeanemptywork('gr0000w000') # bool useheadwords = session['headwordindexing'] allworks = list() output = list() cdict = dict() segmenttext = str() valid = True if ao and work and psg and stop: start = psg firstlinenumber = finddblinefromincompletelocus(wo, start, dbcursor) lastlinenumber = finddblinefromincompletelocus(wo, stop, dbcursor, findlastline=True) if firstlinenumber['code'] == 'success' and lastlinenumber[ 'code'] == 'success': cdict = { wo.universalid: (firstlinenumber['line'], lastlinenumber['line']) } startln = dblineintolineobject( grabonelinefromwork(ao.universalid, firstlinenumber['line'], dbcursor)) stopln = dblineintolineobject( grabonelinefromwork(ao.universalid, lastlinenumber['line'], dbcursor)) else: msg = '"indexspan/" could not find first and last: {a}w{b} - {c} TO {d}' consolewarning(msg.format(a=author, b=work, c=passage, d=endpoint)) startln = makeablankline(work, 0) stopln = makeablankline(work, 1) valid = False segmenttext = 'from {a} to {b}'.format(a=startln.shortlocus(), b=stopln.shortlocus()) elif ao and work and psg: # subsection of a work of an author progresspolldict[pollid].statusis( 'Preparing a partial index to {t}'.format(t=wo.title)) startandstop = textsegmentfindstartandstop(ao, wo, psg, dbcursor) startline = startandstop['startline'] endline = startandstop['endline'] cdict = {wo.universalid: (startline, endline)} elif ao and work: # one work progresspolldict[pollid].statusis( 'Preparing an index to {t}'.format(t=wo.title)) startline = wo.starts endline = wo.ends cdict = {wo.universalid: (startline, endline)} elif ao: # whole author allworks = [ '{w} ⇒ {t}'.format(w=w.universalid[6:10], t=w.title) for w in ao.listofworks ] allworks.sort() progresspolldict[pollid].statusis( 'Preparing an index to the works of {a}'.format(a=ao.shortname)) for wkid in ao.listworkids(): cdict[wkid] = (workdict[wkid].starts, workdict[wkid].ends) else: # we do not have a valid selection valid = False output = ['invalid input'] if not stop: segmenttext = '.'.join(psg) if valid and justvocab: dbconnection.connectioncleanup() del progresspolldict[pollid] return cdict if valid: output = buildindextowork(cdict, progresspolldict[pollid], useheadwords, dbcursor) # get ready to send stuff to the page count = len(output) try: locale.setlocale(locale.LC_ALL, 'en_US') count = locale.format_string('%d', count, grouping=True) except locale.Error: count = str(count) progresspolldict[pollid].statusis('Preparing the index HTML') indexhtml = wordindextohtmltable(output, useheadwords) buildtime = time.time() - starttime buildtime = round(buildtime, 2) progresspolldict[pollid].deactivate() if not ao: ao = makeanemptyauthor('gr0000') results = dict() results['authorname'] = avoidsmallvariants(ao.shortname) results['title'] = avoidsmallvariants(wo.title) results['structure'] = avoidsmallvariants(wo.citation()) results['worksegment'] = segmenttext results['elapsed'] = buildtime results['wordsfound'] = count results['indexhtml'] = indexhtml results['keytoworks'] = allworks results['newjs'] = supplementalindexjs() results = json.dumps(results) dbconnection.connectioncleanup() del progresspolldict[pollid] return results
def selectionmade(requestargs: MultiDict) -> JSON_STR: """ once a choice is made, parse and register it inside session['selections'] then return the human readable version of the same for display on the page '_AT_' syntax is used to restrict the scope of a search "GET /selection/make/_?auth=lt0474&work=001&locus=13|4&endpoint= HTTP/1.1" request.args ImmutableMultiDict([('auth', 'lt0474'), ('work', '001'), ('locus', '13|4'), ('endpoint', '')]) "GET /selection/make/_?auth=lt0474&work=001&locus=10&endpoint=20&raw=t HTTP/1.1" request.args ImmutableMultiDict([('auth', 'lt0474'), ('work', '001'), ('locus', '10'), ('endpoint', '20'), ('raw', 't')]) "GET /selection/make/_?auth=lt0474&work=001&exclude=t HTTP/1.1" request.args ImmutableMultiDict([('auth', 'lt0474'), ('work', '001'), ('exclude', 't')]) :return: """ probeforsessionvariables() uid = depunct(requestargs.get('auth', str())) workid = depunct(requestargs.get('work', str())) genre = depunct(requestargs.get('genre', str())) auloc = depunct(requestargs.get('auloc', str())) rawdataentry = re.sub('[^tf]', str(), requestargs.get('raw', str())) exclude = re.sub('[^tf]', str(), requestargs.get('exclude', str())) allowedpunct = '|,.' locus = depunct(requestargs.get('locus', str()), allowedpunct) endpoint = depunct(requestargs.get('endpoint', str()), allowedpunct) allowedpunct = '.-?():' wkprov = depunct(requestargs.get('wkprov', str()), allowedpunct) allowedpunct = '.' wkgenre = depunct(requestargs.get('wkgenre', str()), allowedpunct) if exclude != 't': suffix = 'selections' other = 'exclusions' else: suffix = 'exclusions' other = 'selections' if rawdataentry == 't': locus = re.sub(r'\.', '|', locus) endpoint = re.sub(r'\.', '|', endpoint) # the selection box might contain stale info if you deselect a corpus while items are still in the box uid = selectionisactive(uid) if genre and genre not in returnactivelist(authorgenresdict): genre = str() if wkgenre and wkgenre not in returnactivelist(workgenresdict): wkgenre = str() if auloc and auloc not in returnactivelist(authorlocationdict): auloc = str() if wkprov and wkprov not in returnactivelist(workprovenancedict): wkprov = str() # you have validated the input, now do something with it... if uid and workid and locus and endpoint: # a span in an author: 3 verrine orations, e.g. [note that the selection is 'greedy': 1start - 3end] # http://127.0.0.1:5000/makeselection?auth=lt0474&work=005&locus=2|1&endpoint=2|3 # convert this into a 'firstline' through 'lastline' format emptycursor = None workobject = None try: workobject = workdict['{a}w{b}'.format(a=uid, b=workid)] except KeyError: consolewarning('"/selection/make/" sent a bad workuniversalid: {a}w{b}'.format(a=uid, b=workid)) start = locus.split('|') stop = endpoint.split('|') start.reverse() stop.reverse() if workobject: firstline = finddblinefromincompletelocus(workobject, start, emptycursor) lastline = finddblinefromincompletelocus(workobject, stop, emptycursor, findlastline=True) citationtemplate = '{a}w{b}_FROM_{c}_TO_{d}' if firstline['code'] == 'success' and lastline['code'] == 'success': fl = firstline['line'] ll = lastline['line'] loc = citationtemplate.format(a=uid, b=workid, c=fl, d=ll) # print('span selected:', loc) # span selected: lt0474w005_FROM_4501_TO_11915 # Cicero, In Verrem: 2.1.t.1 # Cicero, In Verrem: 2.3.228.15 if ll > fl: session['psg' + suffix].append(loc) session['psg' + suffix] = tidyuplist(session['psg' + suffix]) else: msg = '"makeselection/" sent a firstline greater than the lastine value: {a} > {b} [{c}; {d}]' consolewarning(msg.format(a=fl, b=ll, c=locus, d=endpoint)) rationalizeselections(loc, suffix) else: msg = '"makeselection/" could not find first and last: {a}w{b} - {c} TO {d}' consolewarning(msg.format(a=uid, b=workid, c=locus, d=endpoint)) elif uid and workid and locus: # a specific passage session['psg' + suffix].append(uid + 'w' + workid + '_AT_' + locus) session['psg' + suffix] = tidyuplist(session['psg' + suffix]) rationalizeselections(uid + 'w' + workid + '_AT_' + locus, suffix) elif uid and workid: # a specific work session['wk' + suffix].append(uid + 'w' + workid) session['wk' + suffix] = tidyuplist(session['wk' + suffix]) rationalizeselections(uid + 'w' + workid, suffix) elif uid and not workid: # a specific author session['au' + suffix].append(uid) session['au' + suffix] = tidyuplist(session['au' + suffix]) rationalizeselections(uid, suffix) # if vs elif: allow multiple simultaneous instance if genre: # add to the +/- genre list and then subtract from the -/+ list session['agn' + suffix].append(genre) session['agn' + suffix] = tidyuplist(session['agn' + suffix]) session['agn' + other] = dropdupes(session['agn' + other], session['agn' + suffix]) if wkgenre: # add to the +/- genre list and then subtract from the -/+ list session['wkgn' + suffix].append(wkgenre) session['wkgn' + suffix] = tidyuplist(session['wkgn' + suffix]) session['wkgn' + other] = dropdupes(session['wkgn' + other], session['wkgn' + suffix]) if auloc: # add to the +/- locations list and then subtract from the -/+ list session['aloc' + suffix].append(auloc) session['aloc' + suffix] = tidyuplist(session['aloc' + suffix]) session['aloc' + other] = dropdupes(session['aloc' + other], session['aloc' + suffix]) if wkprov: # add to the +/- locations list and then subtract from the -/+ list session['wloc' + suffix].append(wkprov) session['wloc' + suffix] = tidyuplist(session['wloc' + suffix]) session['wloc' + other] = dropdupes(session['wloc' + other], session['wloc' + suffix]) # after the update to the session, you need to update the page html to reflect the changes # print('session["psgselections"]=', session['psgselections']) # print('session["psgexclusions"]=', session['psgexclusions']) return getcurrentselections()