def sampleworkcitation(authorid: str, workid: str) -> JSON_STR: """ called by loadsamplecitation() in autocomplete.js we are using the maual input style on the web page so we need some hint on how to do things: check the end line for a sample citation "In Timarchum (w001)" yields... 127.0.0.1 - - [04/Apr/2021 13:48:53] "GET /get/json/samplecitation/gr0026/001 HTTP/1.1" 200 - /get/json/samplecitation {"firstline": "1.1", "lastline": "196.7"} :param authorid: :param workid: :return: """ dbconnection = ConnectionObject() dbcursor = dbconnection.cursor() returnvals = dict() returnvals['firstline'] = str() returnvals['lastline'] = str() authorid = depunct(authorid) workid = depunct(workid) try: ao = authordict[authorid] wo = workdict[authorid + 'w' + workid] except KeyError: returnvals['firstline'] = 'no such author/work combination' return json.dumps(returnvals) toplevel = wo.availablelevels - 1 firstlineindex = returnfirstorlastlinenumber(wo.universalid, dbcursor, disallowt=True, disallowlevel=toplevel) flo = dblineintolineobject( grabonelinefromwork(authorid, firstlineindex, dbcursor)) lastlineidx = returnfirstorlastlinenumber(wo.universalid, dbcursor, findlastline=True) llo = dblineintolineobject( grabonelinefromwork(authorid, lastlineidx, dbcursor)) returnvals['firstline'] = flo.prolixlocus() returnvals['lastline'] = llo.prolixlocus() results = json.dumps(returnvals) dbconnection.connectioncleanup() return results
def textsegmentfindstartandstop(authorobject, workobject, passageaslist, cursor) -> dict: """ find the first and last lines of a work segment :return: """ p = tuple(passageaslist) lookforline = finddblinefromincompletelocus(workobject, p, cursor) # assuming that lookforline['code'] == 'success' # lookforline['code'] is (allegedly) only relevant to the Perseus lookup problem where a bad locus can be sent foundline = lookforline['line'] line = grabonelinefromwork(authorobject.universalid, foundline, cursor) lo = dblineintolineobject(line) # let's say you looked for 'book 2' of something that has 'book, chapter, line' # that means that you want everything that has the same level2 value as the lineobject # build a where clause passageaslist.reverse() atloc = '|'.join(passageaslist) selection = '{uid}_AT_{line}'.format(uid=workobject.universalid, line=atloc) w = atsignwhereclauses(selection, '=', {authorobject.universalid: authorobject}) d = [workobject.universalid] qw = str() for i in range(0, len(w)): qw += 'AND (' + w[i][0] + ') ' d.append(w[i][1]) query = 'SELECT index FROM {au} WHERE wkuniversalid=%s {whr} ORDER BY index DESC LIMIT 1'.format( au=authorobject.universalid, whr=qw) data = tuple(d) cursor.execute(query, data) found = cursor.fetchone() startandstop = dict() startandstop['startline'] = lo.index startandstop['endline'] = found[0] return startandstop
def grableadingandlagging(hitline: dbWorkLine, searchobject: SearchObject, cursor, override=None) -> dict: """ take a dbline and grab the N words in front of it and after it it would be a good idea to have an autocommit connection here? override was added so that the rewritten so of precomposedphraseandproximitysearch() can set 'seeking' as it wishes :param hitline: :param searchobject: :param cursor: :return: """ so = searchobject # look out for off-by-one errors distance = so.distance + 1 if override: seeking = override elif so.lemma: seeking = wordlistintoregex(so.lemma.formlist) so.usewordlist = 'polytonic' else: seeking = so.termone # expanded searchzone bacause "seeking" might be a multi-line phrase prev = grabonelinefromwork(hitline.authorid, hitline.index - 1, cursor) next = grabonelinefromwork(hitline.authorid, hitline.index + 1, cursor) prev = dbWorkLine(*prev) next = dbWorkLine(*next) searchzone = ' '.join([ getattr(prev, so.usewordlist), getattr(hitline, so.usewordlist), getattr(next, so.usewordlist) ]) match = re.search(r'{s}'.format(s=seeking), searchzone) # but what if you just found 'paucitate' inside of 'paucitatem'? # you will have 'm' left over and this will throw off your distance-in-words count past = None upto = None lagging = list() leading = list() ucount = 0 pcount = 0 try: past = searchzone[match.end():].strip() except AttributeError: # AttributeError: 'NoneType' object has no attribute 'end' pass try: upto = searchzone[:match.start()].strip() except AttributeError: pass if upto: ucount = len([x for x in upto.split(' ') if x]) lagging = [x for x in upto.split(' ') if x] if past: pcount = len([x for x in past.split(' ') if x]) leading = [x for x in past.split(' ') if x] atline = hitline.index while ucount < distance + 1: atline -= 1 try: previous = dblineintolineobject( grabonelinefromwork(hitline.authorid, atline, cursor)) except TypeError: # 'NoneType' object is not subscriptable previous = makeablankline(hitline.authorid, -1) ucount = 999 lagging = previous.wordlist(so.usewordlist) + lagging ucount += previous.wordcount() lagging = lagging[-1 * (distance - 1):] lagging = ' '.join(lagging) atline = hitline.index while pcount < distance + 1: atline += 1 try: nextline = dblineintolineobject( grabonelinefromwork(hitline.authorid, atline, cursor)) except TypeError: # 'NoneType' object is not subscriptable nextline = makeablankline(hitline.authorid, -1) pcount = 999 leading += nextline.wordlist(so.usewordlist) pcount += nextline.wordcount() leading = leading[:distance - 1] leading = ' '.join(leading) returndict = {'lag': lagging, 'lead': leading} return returndict
def textmaker(author: str, work=None, passage=None, endpoint=None, citationdelimiter='|') -> JSON_STR: """ build a text suitable for display "GET /textof/lt0474/024/20/30" :return: """ probeforsessionvariables() dbconnection = ConnectionObject('autocommit') dbcursor = dbconnection.cursor() linesevery = hipparchia.config['SHOWLINENUMBERSEVERY'] po = TextmakerInputParsingObject(author, work, passage, endpoint, citationdelimiter) ao = po.authorobject wo = po.workobject segmenttext = str() # consolewarning('po.passageaslist: {p}'.format(p=po.passageaslist)) if ao and wo: # we have both an author and a work, maybe we also have a subset of the work if endpoint: firstlinenumber = finddblinefromincompletelocus( wo, po.passageaslist, dbcursor) lastlinenumber = finddblinefromincompletelocus(wo, po.endpointlist, dbcursor, findlastline=True) if firstlinenumber['code'] == 'success' and lastlinenumber[ 'code'] == 'success': startline = firstlinenumber['line'] endline = lastlinenumber['line'] startlnobj = dblineintolineobject( grabonelinefromwork(ao.universalid, startline, dbcursor)) stoplnobj = dblineintolineobject( grabonelinefromwork(ao.universalid, endline, dbcursor)) else: msg = '"buildtexttospan/" could not find first and last: {a}w{b} - {c} TO {d}' consolewarning( msg.format(a=author, b=work, c=passage, d=endpoint)) startlnobj = makeablankline(work, 0) stoplnobj = makeablankline(work, 1) startline = 0 endline = 1 segmenttext = 'from {a} to {b}'.format(a=startlnobj.shortlocus(), b=stoplnobj.shortlocus()) elif not po.passageaslist: # whole work startline = wo.starts endline = wo.ends else: startandstop = textsegmentfindstartandstop(ao, wo, po.passageaslist, dbcursor) startline = startandstop['startline'] endline = startandstop['endline'] texthtml = buildtext(wo.universalid, startline, endline, linesevery, dbcursor) else: texthtml = str() if hipparchia.config['INSISTUPONSTANDARDANGLEBRACKETS']: texthtml = gtltsubstitutes(texthtml) if not segmenttext: segmenttext = '.'.join(po.passageaslist) if not ao or not wo: ao = makeanemptyauthor('gr0000') wo = makeanemptywork('gr0000w000') results = dict() results['authorname'] = avoidsmallvariants(ao.shortname) results['title'] = avoidsmallvariants(wo.title) results['structure'] = avoidsmallvariants(wo.citation()) results['worksegment'] = segmenttext results['texthtml'] = texthtml results = json.dumps(results) dbconnection.connectioncleanup() return results
def buildindexto(searchid: str, author: str, work=None, passage=None, endpoint=None, citationdelimiter='|', justvocab=False) -> JSON_STR: """ build a complete index to a an author, work, or segment of a work :return: """ probeforsessionvariables() pollid = validatepollid(searchid) starttime = time.time() progresspolldict[pollid] = ProgressPoll(pollid) progresspolldict[pollid].activate() dbconnection = ConnectionObject('autocommit') dbcursor = dbconnection.cursor() po = IndexmakerInputParsingObject(author, work, passage, endpoint, citationdelimiter) ao = po.authorobject wo = po.workobject psg = po.passageaslist stop = po.endpointlist if not work: wo = makeanemptywork('gr0000w000') # bool useheadwords = session['headwordindexing'] allworks = list() output = list() cdict = dict() segmenttext = str() valid = True if ao and work and psg and stop: start = psg firstlinenumber = finddblinefromincompletelocus(wo, start, dbcursor) lastlinenumber = finddblinefromincompletelocus(wo, stop, dbcursor, findlastline=True) if firstlinenumber['code'] == 'success' and lastlinenumber[ 'code'] == 'success': cdict = { wo.universalid: (firstlinenumber['line'], lastlinenumber['line']) } startln = dblineintolineobject( grabonelinefromwork(ao.universalid, firstlinenumber['line'], dbcursor)) stopln = dblineintolineobject( grabonelinefromwork(ao.universalid, lastlinenumber['line'], dbcursor)) else: msg = '"indexspan/" could not find first and last: {a}w{b} - {c} TO {d}' consolewarning(msg.format(a=author, b=work, c=passage, d=endpoint)) startln = makeablankline(work, 0) stopln = makeablankline(work, 1) valid = False segmenttext = 'from {a} to {b}'.format(a=startln.shortlocus(), b=stopln.shortlocus()) elif ao and work and psg: # subsection of a work of an author progresspolldict[pollid].statusis( 'Preparing a partial index to {t}'.format(t=wo.title)) startandstop = textsegmentfindstartandstop(ao, wo, psg, dbcursor) startline = startandstop['startline'] endline = startandstop['endline'] cdict = {wo.universalid: (startline, endline)} elif ao and work: # one work progresspolldict[pollid].statusis( 'Preparing an index to {t}'.format(t=wo.title)) startline = wo.starts endline = wo.ends cdict = {wo.universalid: (startline, endline)} elif ao: # whole author allworks = [ '{w} ⇒ {t}'.format(w=w.universalid[6:10], t=w.title) for w in ao.listofworks ] allworks.sort() progresspolldict[pollid].statusis( 'Preparing an index to the works of {a}'.format(a=ao.shortname)) for wkid in ao.listworkids(): cdict[wkid] = (workdict[wkid].starts, workdict[wkid].ends) else: # we do not have a valid selection valid = False output = ['invalid input'] if not stop: segmenttext = '.'.join(psg) if valid and justvocab: dbconnection.connectioncleanup() del progresspolldict[pollid] return cdict if valid: output = buildindextowork(cdict, progresspolldict[pollid], useheadwords, dbcursor) # get ready to send stuff to the page count = len(output) try: locale.setlocale(locale.LC_ALL, 'en_US') count = locale.format_string('%d', count, grouping=True) except locale.Error: count = str(count) progresspolldict[pollid].statusis('Preparing the index HTML') indexhtml = wordindextohtmltable(output, useheadwords) buildtime = time.time() - starttime buildtime = round(buildtime, 2) progresspolldict[pollid].deactivate() if not ao: ao = makeanemptyauthor('gr0000') results = dict() results['authorname'] = avoidsmallvariants(ao.shortname) results['title'] = avoidsmallvariants(wo.title) results['structure'] = avoidsmallvariants(wo.citation()) results['worksegment'] = segmenttext results['elapsed'] = buildtime results['wordsfound'] = count results['indexhtml'] = indexhtml results['keytoworks'] = allworks results['newjs'] = supplementalindexjs() results = json.dumps(results) dbconnection.connectioncleanup() del progresspolldict[pollid] return results
def sessionselectionsinfo(authordict: dict, workdict: dict) -> dict: """ build the selections html either for a or b: #selectionstable + #selectioninfocell #selectionstable + #exclusioninfocell there are seven headings to populate [a] author classes [b] work genres [c] author location [d] work provenance [e] author selections [f] work selections [g] passage selections id numbers need to be attached to the selections so that they can be double-clicked so as to delete them :param authordict: :return: """ returndict = dict() thejs = list() tit = 'title="Double-click to remove this item"' try: # it is possible to hit this function before the session has been set, so... session['auselections'] except KeyError: probeforsessionvariables() sessionsearchlist = session['auselections'] + session['agnselections'] + session['wkgnselections'] + \ session['psgselections'] + session['wkselections'] + session['alocselections'] + \ session['wlocselections'] for selectionorexclusion in ['selections', 'exclusions']: thehtml = list() # if there are no explicit selections, then if not sessionsearchlist and selectionorexclusion == 'selections': thehtml.append('<span class="picklabel">Authors</span><br />') thehtml.append('[All in active corpora less exclusions]<br />') if selectionorexclusion == 'exclusions' and not sessionsearchlist and session['spuria'] == 'Y' and \ not session['wkgnexclusions'] and not session['agnexclusions'] and not session['auexclusions']: thehtml.append('<span class="picklabel">Authors</span><br />') thehtml.append('[No exclusions]<br />') # [a] author classes v = 'agn' var = v + selectionorexclusion if session[var]: thehtml.append( '<span class="picklabel">Author categories</span><br />') htmlandjs = selectionlinehtmlandjs(v, selectionorexclusion, session) thehtml += htmlandjs['html'] thejs += htmlandjs['js'] # [b] work genres v = 'wkgn' var = v + selectionorexclusion if session[var]: thehtml.append('<span class="picklabel">Work genres</span><br />') htmlandjs = selectionlinehtmlandjs(v, selectionorexclusion, session) thehtml += htmlandjs['html'] thejs += htmlandjs['js'] # [c] author location v = 'aloc' var = v + selectionorexclusion if session[var]: thehtml.append( '<span class="picklabel">Author location</span><br />') htmlandjs = selectionlinehtmlandjs(v, selectionorexclusion, session) thehtml += htmlandjs['html'] thejs += htmlandjs['js'] # [d] work provenance v = 'wloc' var = v + selectionorexclusion if session[var]: thehtml.append( '<span class="picklabel">Work provenance</span><br />') htmlandjs = selectionlinehtmlandjs(v, selectionorexclusion, session) thehtml += htmlandjs['html'] thejs += htmlandjs['js'] # [e] authors v = 'au' var = v + selectionorexclusion if session[var]: thehtml.append('<span class="picklabel">Authors</span><br />') localval = -1 for s in session[var]: localval += 1 ao = authordict[s] thehtml.append( '<span class="{v}{soe} selection" id="{var}_0{lv}" {tit}>{s}</span>' '<br />'.format(v=v, soe=selectionorexclusion, var=var, lv=localval, s=ao.akaname, tit=tit)) thejs.append((var, localval)) # [f] works v = 'wk' var = v + selectionorexclusion if session[var] and selectionorexclusion == 'exclusions' and session[ 'spuria'] == 'N': thehtml.append('<span class="picklabel">Works</span><br />') thehtml.append('[All non-selected spurious works]<br />') if session[var]: thehtml.append('<span class="picklabel">Works</span><br />') if selectionorexclusion == 'exclusions' and session[ 'spuria'] == 'N': thehtml.append('[Non-selected spurious works]<br />') localval = -1 for s in session[var]: localval += 1 uid = s[:6] ao = authordict[uid] wk = workdict[s] thehtml.append( '<span class="{v}{soe} selection" id="{var}_0{lv}" {tit}>{au}, ' '<span class="pickedwork">{wk}</span></span>' '<br />'.format(v=v, var=var, soe=selectionorexclusion, lv=localval, au=ao.akaname, tit=tit, wk=wk.title)) thejs.append((var, localval)) # [g] passages v = 'psg' var = v + selectionorexclusion if session[var]: psgtemplate = '<span class="{v}{soe} selection" id="{var}_0{lv}" {tit}>{au}, <span class="pickedwork">{wk}</span> <span class="pickedsubsection">{loc}</span></span><br />' spantemplate = 'from {a} to {b}' thehtml.append('<span class="picklabel">Passages</span><br />') localval = -1 for s in session[var]: localval += 1 uid = s[:6] ao = authordict[uid] loc = str() # watch out for heterogenous passage selection formats; only _AT_ and _FROM_ exist ATM # session[psgselections] = ['lt0474w005_FROM_4501_TO_11915', 'lt2806w002_AT_3|4|5'] if '_AT_' in s: locus = s.split('_AT_')[1].split('|') locus.reverse() citationtuple = tuple(locus) for w in ao.listofworks: if w.universalid == s[0:10]: wk = w loc = prolixlocus(wk, citationtuple) elif '_FROM_' in s: dbconnection = ConnectionObject() dbcursor = dbconnection.cursor() wk = workdict[s[0:10]] locus = s.split('_FROM_')[1] start = locus.split('_TO_')[0] stop = locus.split('_TO_')[1] startln = dblineintolineobject( grabonelinefromwork(uid, start, dbcursor)) stopln = dblineintolineobject( grabonelinefromwork(uid, stop, dbcursor)) dbconnection.connectioncleanup() # print('_FROM_', start, stop, startln.uncleanlocustuple(), stopln.uncleanlocustuple()) loc = spantemplate.format(a=startln.prolixlocus(), b=stopln.prolixlocus()) thehtml.append( psgtemplate.format(v=v, var=var, soe=selectionorexclusion, lv=localval, au=ao.akaname, wk=wk.title, loc=loc, tit=tit)) thejs.append((var, localval)) returndict[selectionorexclusion] = '\n'.join(thehtml) scount = len(session['auselections'] + session['wkselections'] + session['agnselections'] + session['wkgnselections'] + session['psgselections'] + session['alocselections'] + session['wlocselections']) scount += len(session['auexclusions'] + session['wkexclusions'] + session['agnexclusions'] + session['wkgnexclusions'] + session['psgexclusions'] + session['alocexclusions'] + session['wlocexclusions']) returndict['numberofselections'] = -1 if scount > 0: returndict['numberofselections'] = scount returndict['jstuples'] = thejs return returndict