예제 #1
0
def sampleworkcitation(authorid: str, workid: str) -> JSON_STR:
    """

	called by loadsamplecitation() in autocomplete.js

	we are using the maual input style on the web page
	so we need some hint on how to do things: check the end line for a sample citation

	"In Timarchum (w001)" yields...

	127.0.0.1 - - [04/Apr/2021 13:48:53] "GET /get/json/samplecitation/gr0026/001 HTTP/1.1" 200 -
	/get/json/samplecitation
		{"firstline": "1.1", "lastline": "196.7"}

	:param authorid:
	:param workid:
	:return:
	"""
    dbconnection = ConnectionObject()
    dbcursor = dbconnection.cursor()

    returnvals = dict()
    returnvals['firstline'] = str()
    returnvals['lastline'] = str()

    authorid = depunct(authorid)
    workid = depunct(workid)

    try:
        ao = authordict[authorid]
        wo = workdict[authorid + 'w' + workid]
    except KeyError:
        returnvals['firstline'] = 'no such author/work combination'
        return json.dumps(returnvals)

    toplevel = wo.availablelevels - 1
    firstlineindex = returnfirstorlastlinenumber(wo.universalid,
                                                 dbcursor,
                                                 disallowt=True,
                                                 disallowlevel=toplevel)
    flo = dblineintolineobject(
        grabonelinefromwork(authorid, firstlineindex, dbcursor))

    lastlineidx = returnfirstorlastlinenumber(wo.universalid,
                                              dbcursor,
                                              findlastline=True)
    llo = dblineintolineobject(
        grabonelinefromwork(authorid, lastlineidx, dbcursor))

    returnvals['firstline'] = flo.prolixlocus()
    returnvals['lastline'] = llo.prolixlocus()

    results = json.dumps(returnvals)

    dbconnection.connectioncleanup()

    return results
def textsegmentfindstartandstop(authorobject, workobject, passageaslist,
                                cursor) -> dict:
    """
	find the first and last lines of a work segment
	:return:
	"""

    p = tuple(passageaslist)
    lookforline = finddblinefromincompletelocus(workobject, p, cursor)
    # assuming that lookforline['code'] == 'success'
    # lookforline['code'] is (allegedly) only relevant to the Perseus lookup problem where a bad locus can be sent
    foundline = lookforline['line']
    line = grabonelinefromwork(authorobject.universalid, foundline, cursor)
    lo = dblineintolineobject(line)

    # let's say you looked for 'book 2' of something that has 'book, chapter, line'
    # that means that you want everything that has the same level2 value as the lineobject
    # build a where clause
    passageaslist.reverse()
    atloc = '|'.join(passageaslist)
    selection = '{uid}_AT_{line}'.format(uid=workobject.universalid,
                                         line=atloc)

    w = atsignwhereclauses(selection, '=',
                           {authorobject.universalid: authorobject})
    d = [workobject.universalid]
    qw = str()
    for i in range(0, len(w)):
        qw += 'AND (' + w[i][0] + ') '
        d.append(w[i][1])

    query = 'SELECT index FROM {au} WHERE wkuniversalid=%s {whr} ORDER BY index DESC LIMIT 1'.format(
        au=authorobject.universalid, whr=qw)
    data = tuple(d)

    cursor.execute(query, data)
    found = cursor.fetchone()

    startandstop = dict()
    startandstop['startline'] = lo.index
    startandstop['endline'] = found[0]

    return startandstop
예제 #3
0
def grableadingandlagging(hitline: dbWorkLine,
                          searchobject: SearchObject,
                          cursor,
                          override=None) -> dict:
    """

	take a dbline and grab the N words in front of it and after it

	it would be a good idea to have an autocommit connection here?

	override was added so that the rewritten so of precomposedphraseandproximitysearch() can set 'seeking' as it
	wishes

	:param hitline:
	:param searchobject:
	:param cursor:
	:return:
	"""

    so = searchobject
    # look out for off-by-one errors
    distance = so.distance + 1

    if override:
        seeking = override
    elif so.lemma:
        seeking = wordlistintoregex(so.lemma.formlist)
        so.usewordlist = 'polytonic'
    else:
        seeking = so.termone

    # expanded searchzone bacause "seeking" might be a multi-line phrase
    prev = grabonelinefromwork(hitline.authorid, hitline.index - 1, cursor)
    next = grabonelinefromwork(hitline.authorid, hitline.index + 1, cursor)
    prev = dbWorkLine(*prev)
    next = dbWorkLine(*next)

    searchzone = ' '.join([
        getattr(prev, so.usewordlist),
        getattr(hitline, so.usewordlist),
        getattr(next, so.usewordlist)
    ])

    match = re.search(r'{s}'.format(s=seeking), searchzone)
    # but what if you just found 'paucitate' inside of 'paucitatem'?
    # you will have 'm' left over and this will throw off your distance-in-words count
    past = None
    upto = None
    lagging = list()
    leading = list()
    ucount = 0
    pcount = 0

    try:
        past = searchzone[match.end():].strip()
    except AttributeError:
        # AttributeError: 'NoneType' object has no attribute 'end'
        pass

    try:
        upto = searchzone[:match.start()].strip()
    except AttributeError:
        pass

    if upto:
        ucount = len([x for x in upto.split(' ') if x])
        lagging = [x for x in upto.split(' ') if x]

    if past:
        pcount = len([x for x in past.split(' ') if x])
        leading = [x for x in past.split(' ') if x]

    atline = hitline.index

    while ucount < distance + 1:
        atline -= 1
        try:
            previous = dblineintolineobject(
                grabonelinefromwork(hitline.authorid, atline, cursor))
        except TypeError:
            # 'NoneType' object is not subscriptable
            previous = makeablankline(hitline.authorid, -1)
            ucount = 999
        lagging = previous.wordlist(so.usewordlist) + lagging
        ucount += previous.wordcount()
    lagging = lagging[-1 * (distance - 1):]
    lagging = ' '.join(lagging)

    atline = hitline.index
    while pcount < distance + 1:
        atline += 1
        try:
            nextline = dblineintolineobject(
                grabonelinefromwork(hitline.authorid, atline, cursor))
        except TypeError:
            # 'NoneType' object is not subscriptable
            nextline = makeablankline(hitline.authorid, -1)
            pcount = 999
        leading += nextline.wordlist(so.usewordlist)
        pcount += nextline.wordcount()
    leading = leading[:distance - 1]
    leading = ' '.join(leading)

    returndict = {'lag': lagging, 'lead': leading}

    return returndict
예제 #4
0
def textmaker(author: str,
              work=None,
              passage=None,
              endpoint=None,
              citationdelimiter='|') -> JSON_STR:
    """
	build a text suitable for display

		"GET /textof/lt0474/024/20/30"

	:return:
	"""

    probeforsessionvariables()

    dbconnection = ConnectionObject('autocommit')
    dbcursor = dbconnection.cursor()

    linesevery = hipparchia.config['SHOWLINENUMBERSEVERY']

    po = TextmakerInputParsingObject(author, work, passage, endpoint,
                                     citationdelimiter)

    ao = po.authorobject
    wo = po.workobject

    segmenttext = str()

    # consolewarning('po.passageaslist: {p}'.format(p=po.passageaslist))

    if ao and wo:
        # we have both an author and a work, maybe we also have a subset of the work
        if endpoint:
            firstlinenumber = finddblinefromincompletelocus(
                wo, po.passageaslist, dbcursor)
            lastlinenumber = finddblinefromincompletelocus(wo,
                                                           po.endpointlist,
                                                           dbcursor,
                                                           findlastline=True)
            if firstlinenumber['code'] == 'success' and lastlinenumber[
                    'code'] == 'success':
                startline = firstlinenumber['line']
                endline = lastlinenumber['line']
                startlnobj = dblineintolineobject(
                    grabonelinefromwork(ao.universalid, startline, dbcursor))
                stoplnobj = dblineintolineobject(
                    grabonelinefromwork(ao.universalid, endline, dbcursor))
            else:
                msg = '"buildtexttospan/" could not find first and last: {a}w{b} - {c} TO {d}'
                consolewarning(
                    msg.format(a=author, b=work, c=passage, d=endpoint))
                startlnobj = makeablankline(work, 0)
                stoplnobj = makeablankline(work, 1)
                startline = 0
                endline = 1
            segmenttext = 'from {a} to {b}'.format(a=startlnobj.shortlocus(),
                                                   b=stoplnobj.shortlocus())
        elif not po.passageaslist:
            # whole work
            startline = wo.starts
            endline = wo.ends
        else:
            startandstop = textsegmentfindstartandstop(ao, wo,
                                                       po.passageaslist,
                                                       dbcursor)
            startline = startandstop['startline']
            endline = startandstop['endline']
        texthtml = buildtext(wo.universalid, startline, endline, linesevery,
                             dbcursor)
    else:
        texthtml = str()

    if hipparchia.config['INSISTUPONSTANDARDANGLEBRACKETS']:
        texthtml = gtltsubstitutes(texthtml)

    if not segmenttext:
        segmenttext = '.'.join(po.passageaslist)

    if not ao or not wo:
        ao = makeanemptyauthor('gr0000')
        wo = makeanemptywork('gr0000w000')

    results = dict()
    results['authorname'] = avoidsmallvariants(ao.shortname)
    results['title'] = avoidsmallvariants(wo.title)
    results['structure'] = avoidsmallvariants(wo.citation())
    results['worksegment'] = segmenttext
    results['texthtml'] = texthtml

    results = json.dumps(results)

    dbconnection.connectioncleanup()

    return results
예제 #5
0
def buildindexto(searchid: str,
                 author: str,
                 work=None,
                 passage=None,
                 endpoint=None,
                 citationdelimiter='|',
                 justvocab=False) -> JSON_STR:
    """
	build a complete index to a an author, work, or segment of a work

	:return:
	"""

    probeforsessionvariables()

    pollid = validatepollid(searchid)

    starttime = time.time()

    progresspolldict[pollid] = ProgressPoll(pollid)
    progresspolldict[pollid].activate()

    dbconnection = ConnectionObject('autocommit')
    dbcursor = dbconnection.cursor()

    po = IndexmakerInputParsingObject(author, work, passage, endpoint,
                                      citationdelimiter)

    ao = po.authorobject
    wo = po.workobject
    psg = po.passageaslist
    stop = po.endpointlist

    if not work:
        wo = makeanemptywork('gr0000w000')

    # bool
    useheadwords = session['headwordindexing']

    allworks = list()
    output = list()
    cdict = dict()
    segmenttext = str()
    valid = True

    if ao and work and psg and stop:
        start = psg
        firstlinenumber = finddblinefromincompletelocus(wo, start, dbcursor)
        lastlinenumber = finddblinefromincompletelocus(wo,
                                                       stop,
                                                       dbcursor,
                                                       findlastline=True)
        if firstlinenumber['code'] == 'success' and lastlinenumber[
                'code'] == 'success':
            cdict = {
                wo.universalid:
                (firstlinenumber['line'], lastlinenumber['line'])
            }
            startln = dblineintolineobject(
                grabonelinefromwork(ao.universalid, firstlinenumber['line'],
                                    dbcursor))
            stopln = dblineintolineobject(
                grabonelinefromwork(ao.universalid, lastlinenumber['line'],
                                    dbcursor))
        else:
            msg = '"indexspan/" could not find first and last: {a}w{b} - {c} TO {d}'
            consolewarning(msg.format(a=author, b=work, c=passage, d=endpoint))
            startln = makeablankline(work, 0)
            stopln = makeablankline(work, 1)
            valid = False
        segmenttext = 'from {a} to {b}'.format(a=startln.shortlocus(),
                                               b=stopln.shortlocus())
    elif ao and work and psg:
        # subsection of a work of an author
        progresspolldict[pollid].statusis(
            'Preparing a partial index to {t}'.format(t=wo.title))
        startandstop = textsegmentfindstartandstop(ao, wo, psg, dbcursor)
        startline = startandstop['startline']
        endline = startandstop['endline']
        cdict = {wo.universalid: (startline, endline)}
    elif ao and work:
        # one work
        progresspolldict[pollid].statusis(
            'Preparing an index to {t}'.format(t=wo.title))
        startline = wo.starts
        endline = wo.ends
        cdict = {wo.universalid: (startline, endline)}
    elif ao:
        # whole author
        allworks = [
            '{w}  ⇒ {t}'.format(w=w.universalid[6:10], t=w.title)
            for w in ao.listofworks
        ]
        allworks.sort()
        progresspolldict[pollid].statusis(
            'Preparing an index to the works of {a}'.format(a=ao.shortname))
        for wkid in ao.listworkids():
            cdict[wkid] = (workdict[wkid].starts, workdict[wkid].ends)
    else:
        # we do not have a valid selection
        valid = False
        output = ['invalid input']

    if not stop:
        segmenttext = '.'.join(psg)

    if valid and justvocab:
        dbconnection.connectioncleanup()
        del progresspolldict[pollid]
        return cdict

    if valid:
        output = buildindextowork(cdict, progresspolldict[pollid],
                                  useheadwords, dbcursor)

    # get ready to send stuff to the page
    count = len(output)

    try:
        locale.setlocale(locale.LC_ALL, 'en_US')
        count = locale.format_string('%d', count, grouping=True)
    except locale.Error:
        count = str(count)

    progresspolldict[pollid].statusis('Preparing the index HTML')
    indexhtml = wordindextohtmltable(output, useheadwords)

    buildtime = time.time() - starttime
    buildtime = round(buildtime, 2)
    progresspolldict[pollid].deactivate()

    if not ao:
        ao = makeanemptyauthor('gr0000')

    results = dict()
    results['authorname'] = avoidsmallvariants(ao.shortname)
    results['title'] = avoidsmallvariants(wo.title)
    results['structure'] = avoidsmallvariants(wo.citation())
    results['worksegment'] = segmenttext
    results['elapsed'] = buildtime
    results['wordsfound'] = count
    results['indexhtml'] = indexhtml
    results['keytoworks'] = allworks
    results['newjs'] = supplementalindexjs()
    results = json.dumps(results)

    dbconnection.connectioncleanup()
    del progresspolldict[pollid]

    return results
예제 #6
0
def sessionselectionsinfo(authordict: dict, workdict: dict) -> dict:
    """
	build the selections html either for a or b:
		#selectionstable + #selectioninfocell
		#selectionstable + #exclusioninfocell
	there are seven headings to populate
		[a] author classes
		[b] work genres
		[c] author location
		[d] work provenance
		[e] author selections
		[f] work selections
		[g] passage selections

	id numbers need to be attached to the selections so that they can be double-clicked so as to delete them

	:param authordict:
	:return:
	"""

    returndict = dict()
    thejs = list()

    tit = 'title="Double-click to remove this item"'

    try:
        # it is possible to hit this function before the session has been set, so...
        session['auselections']
    except KeyError:
        probeforsessionvariables()

    sessionsearchlist = session['auselections'] + session['agnselections'] + session['wkgnselections'] + \
                        session['psgselections'] + session['wkselections'] + session['alocselections'] + \
                        session['wlocselections']

    for selectionorexclusion in ['selections', 'exclusions']:
        thehtml = list()
        # if there are no explicit selections, then
        if not sessionsearchlist and selectionorexclusion == 'selections':
            thehtml.append('<span class="picklabel">Authors</span><br />')
            thehtml.append('[All in active corpora less exclusions]<br />')

        if selectionorexclusion == 'exclusions' and not sessionsearchlist and session['spuria'] == 'Y' and \
          not session['wkgnexclusions'] and not session['agnexclusions'] and not session['auexclusions']:
            thehtml.append('<span class="picklabel">Authors</span><br />')
            thehtml.append('[No exclusions]<br />')

        # [a] author classes
        v = 'agn'
        var = v + selectionorexclusion
        if session[var]:
            thehtml.append(
                '<span class="picklabel">Author categories</span><br />')
            htmlandjs = selectionlinehtmlandjs(v, selectionorexclusion,
                                               session)
            thehtml += htmlandjs['html']
            thejs += htmlandjs['js']

        # [b] work genres
        v = 'wkgn'
        var = v + selectionorexclusion
        if session[var]:
            thehtml.append('<span class="picklabel">Work genres</span><br />')
            htmlandjs = selectionlinehtmlandjs(v, selectionorexclusion,
                                               session)
            thehtml += htmlandjs['html']
            thejs += htmlandjs['js']

        # [c] author location
        v = 'aloc'
        var = v + selectionorexclusion
        if session[var]:
            thehtml.append(
                '<span class="picklabel">Author location</span><br />')
            htmlandjs = selectionlinehtmlandjs(v, selectionorexclusion,
                                               session)
            thehtml += htmlandjs['html']
            thejs += htmlandjs['js']

        # [d] work provenance
        v = 'wloc'
        var = v + selectionorexclusion
        if session[var]:
            thehtml.append(
                '<span class="picklabel">Work provenance</span><br />')
            htmlandjs = selectionlinehtmlandjs(v, selectionorexclusion,
                                               session)
            thehtml += htmlandjs['html']
            thejs += htmlandjs['js']

        # [e] authors
        v = 'au'
        var = v + selectionorexclusion
        if session[var]:
            thehtml.append('<span class="picklabel">Authors</span><br />')
            localval = -1
            for s in session[var]:
                localval += 1
                ao = authordict[s]
                thehtml.append(
                    '<span class="{v}{soe} selection" id="{var}_0{lv}" {tit}>{s}</span>'
                    '<br />'.format(v=v,
                                    soe=selectionorexclusion,
                                    var=var,
                                    lv=localval,
                                    s=ao.akaname,
                                    tit=tit))
                thejs.append((var, localval))

        # [f] works
        v = 'wk'
        var = v + selectionorexclusion
        if session[var] and selectionorexclusion == 'exclusions' and session[
                'spuria'] == 'N':
            thehtml.append('<span class="picklabel">Works</span><br />')
            thehtml.append('[All non-selected spurious works]<br />')

        if session[var]:
            thehtml.append('<span class="picklabel">Works</span><br />')
            if selectionorexclusion == 'exclusions' and session[
                    'spuria'] == 'N':
                thehtml.append('[Non-selected spurious works]<br />')
            localval = -1
            for s in session[var]:
                localval += 1
                uid = s[:6]
                ao = authordict[uid]
                wk = workdict[s]
                thehtml.append(
                    '<span class="{v}{soe} selection" id="{var}_0{lv}" {tit}>{au}, '
                    '<span class="pickedwork">{wk}</span></span>'
                    '<br />'.format(v=v,
                                    var=var,
                                    soe=selectionorexclusion,
                                    lv=localval,
                                    au=ao.akaname,
                                    tit=tit,
                                    wk=wk.title))
                thejs.append((var, localval))

        # [g] passages
        v = 'psg'
        var = v + selectionorexclusion
        if session[var]:
            psgtemplate = '<span class="{v}{soe} selection" id="{var}_0{lv}" {tit}>{au}, <span class="pickedwork">{wk}</span>&nbsp; <span class="pickedsubsection">{loc}</span></span><br />'
            spantemplate = 'from {a} to {b}'
            thehtml.append('<span class="picklabel">Passages</span><br />')
            localval = -1
            for s in session[var]:
                localval += 1
                uid = s[:6]
                ao = authordict[uid]
                loc = str()
                # watch out for heterogenous passage selection formats; only _AT_ and _FROM_ exist ATM
                # session[psgselections] = ['lt0474w005_FROM_4501_TO_11915', 'lt2806w002_AT_3|4|5']
                if '_AT_' in s:
                    locus = s.split('_AT_')[1].split('|')
                    locus.reverse()
                    citationtuple = tuple(locus)
                    for w in ao.listofworks:
                        if w.universalid == s[0:10]:
                            wk = w
                    loc = prolixlocus(wk, citationtuple)
                elif '_FROM_' in s:
                    dbconnection = ConnectionObject()
                    dbcursor = dbconnection.cursor()
                    wk = workdict[s[0:10]]
                    locus = s.split('_FROM_')[1]
                    start = locus.split('_TO_')[0]
                    stop = locus.split('_TO_')[1]
                    startln = dblineintolineobject(
                        grabonelinefromwork(uid, start, dbcursor))
                    stopln = dblineintolineobject(
                        grabonelinefromwork(uid, stop, dbcursor))
                    dbconnection.connectioncleanup()
                    # print('_FROM_', start, stop, startln.uncleanlocustuple(), stopln.uncleanlocustuple())
                    loc = spantemplate.format(a=startln.prolixlocus(),
                                              b=stopln.prolixlocus())

                thehtml.append(
                    psgtemplate.format(v=v,
                                       var=var,
                                       soe=selectionorexclusion,
                                       lv=localval,
                                       au=ao.akaname,
                                       wk=wk.title,
                                       loc=loc,
                                       tit=tit))
                thejs.append((var, localval))

        returndict[selectionorexclusion] = '\n'.join(thehtml)

    scount = len(session['auselections'] + session['wkselections'] +
                 session['agnselections'] + session['wkgnselections'] +
                 session['psgselections'] + session['alocselections'] +
                 session['wlocselections'])
    scount += len(session['auexclusions'] + session['wkexclusions'] +
                  session['agnexclusions'] + session['wkgnexclusions'] +
                  session['psgexclusions'] + session['alocexclusions'] +
                  session['wlocexclusions'])

    returndict['numberofselections'] = -1
    if scount > 0:
        returndict['numberofselections'] = scount

    returndict['jstuples'] = thejs

    return returndict