Beispiel #1
0
def sessiontimeexclusionsinfo():
    """
	build time exlusion html for #selectionstable + #timerestrictions
	:return:
	"""

    try:
        # it is possible to hit this function before the session has been set, so...
        session['latestdate']
    except KeyError:
        probeforsessionvariables()

    info = 'Unless specifically listed, authors/works must come from {early}&nbspto {late}'
    timerestrictions = ''

    if session['latestdate'] != '1500' or session['earliestdate'] != '-850':
        if int(session['earliestdate']) < 0:
            early = session['earliestdate'][1:] + ' B.C.E'
        else:
            early = session['earliestdate'] + ' C.E'
        if int(session['latestdate']) < 0:
            late = session['latestdate'][1:] + ' B.C.E'
        else:
            late = session['latestdate'] + ' C.E'

        timerestrictions = info.format(early=early, late=late)

    return timerestrictions
Beispiel #2
0
def loadcssfile(cssrequest) -> FlaskResponse:
	"""

	send the CSS, but insert config-based fonts, etc. into it first

	:return:
	"""

	probeforsessionvariables()

	# extremely unsafe to allow user to supply a path

	validcss = [hipparchia.config['CSSSTYLESHEET'], 'ldavis.css']

	cssfile = hipparchia.config['CSSSTYLESHEET']

	if cssrequest in validcss:
		cssfile = cssrequest

	with open(hipparchia.root_path+'/css/'+cssfile, encoding='utf8') as f:
		css = f.read()

	cfo = CssFormattingObject(css)
	cfo.runcleaningsuite()
	css = cfo.css

	# return send_from_directory('css', cssfile)

	response = make_response(css)
	response.headers.set('Content-Type', 'text/css')
	response.headers.set('Content-Disposition', 'attachment', filename=cssfile)

	return response
Beispiel #3
0
def headwordsearch(searchid, headform) -> JSON_STR:
    """

	you get sent here via the morphology tables

	this is a restricted version of executesearch(): a dictionary headword

	:param searchid:
	:param headform:
	:return:
	"""

    probeforsessionvariables()
    inputlemma = cleaninitialquery(headform)

    try:
        lemma = lemmatadict[inputlemma]
    except KeyError:
        lemma = None

    pollid = validatepollid(searchid)
    seeking = str()
    proximate = str()

    proximatelemma = str()

    so = SearchObject(pollid, seeking, proximate, lemma, proximatelemma,
                      session)

    jsonoutput = executesearch(pollid, so)

    return jsonoutput
Beispiel #4
0
def singlewordsearch(searchid, searchterm) -> JSON_STR:
    """

	you get sent here via the morphology tables

	this is a restricted version of executesearch(): single, exact term

	WINDOWS ONLY ERROR: this function will trigger a recursion error

	the situation looks a lot like case #3 @ https://bugs.python.org/issue9592

	but that is supposed to be a closed bug

	cf the complaints at https://forums.fast.ai/t/recursion-error-fastai-v1-0-27-windows-10/30673/10

	"multiprocessing\popen_spawn_win32.py" is the culprit?

	the current 'solution' is to send things to executesearch() instead "if osname == 'nt'"
	this test is inside morphologychartjs(); this is a potential source of future brittleness
	to the extent that one wants to explore refactoring executesearch()

	:param searchid:
	:param searchterm:
	:return:
	"""

    probeforsessionvariables()

    pollid = validatepollid(searchid)
    searchterm = cleaninitialquery(searchterm)
    seeking = ' {s} '.format(s=searchterm)
    proximate = str()
    lemma = None
    proximatelemma = None

    so = SearchObject(pollid, seeking, proximate, lemma, proximatelemma,
                      session)

    jsonoutput = executesearch(pollid, so)

    return jsonoutput
Beispiel #5
0
def frontpage() -> PAGE_STR:
    """

	the front page. it used to do stuff; now it just loads the JS which then calls all of the routes:
	regular users never leave the front page
	the only other pages are basically debug pages as seen in debuggingroutes.py

	:return:
	"""

    probeforsessionvariables()

    fonts = hipparchia.config['FONTPICKERLIST']
    fonts.sort()
    if fonts:
        picker = hipparchia.config['ENBALEFONTPICKER']
    else:
        picker = False

    debugpanel = hipparchia.config['ALLOWUSERTOSETDEBUGMODES']

    havevectors = hipparchia.config['SEMANTICVECTORSENABLED']

    knowncorpora = [
        'greekcorpus', 'latincorpus', 'papyruscorpus', 'inscriptioncorpus',
        'christiancorpus'
    ]

    # check to see eith which dbs we search by default or are presently active
    activecorpora = [c for c in knowncorpora if session[c]]

    if not hipparchia.config['AVOIDCIRCLEDLETTERS']:
        corporalabels = {'g': 'Ⓖ', 'l': 'Ⓛ', 'd': 'Ⓓ', 'i': 'Ⓘ', 'c': 'Ⓒ'}
    elif hipparchia.config['FALLBACKTODOUBLESTRIKES']:
        corporalabels = {'g': '𝔾', 'l': '𝕃', 'd': '𝔻', 'i': '𝕀', 'c': 'ℂ'}
    else:
        corporalabels = {'g': 'G', 'l': 'L', 'd': 'D', 'i': 'I', 'c': 'C'}

    icanzap = 'yes'
    if osname == 'nt':
        # windows can't have the UI σ/ς option because it can't fork()
        # the 'fix' is to have frozensession always available when building a dbWorkLine
        # but that involves a lot kludge just to make a very optional option work
        icanzap = 'no'

    loginform = None

    if hipparchia.config['LIMITACCESSTOLOGGEDINUSERS']:
        loginform = LoginForm()

    page = render_template('search.html',
                           activelists=activelists,
                           activecorpora=activecorpora,
                           clab=corporalabels,
                           css=stylesheet,
                           backend=theenvironment,
                           buildinfo=buildinfo,
                           onehit=session['onehit'],
                           picker=picker,
                           fonts=fonts,
                           hwindexing=session['headwordindexing'],
                           indexbyfrequency=session['indexbyfrequency'],
                           spuria=session['spuria'],
                           varia=session['varia'],
                           undated=session['incerta'],
                           debug=debugpanel,
                           vectorhtml=vectorhtmlforfrontpage(),
                           vectoroptionshtml=vectorhtmlforoptionsbar(),
                           havevectors=havevectors,
                           version=version,
                           shortversion=shortversion,
                           searchfieldbuttons=getsearchfieldbuttonshtml(),
                           holdingshtml=getauthorholdingfieldhtml(),
                           datesearchinghtml=getdaterangefieldhtml(),
                           lexicalthml=getlexicafieldhtml(),
                           icanzap=icanzap,
                           loginform=loginform)

    return page
def buildsearchobject(searchid: str, therequest: request,
                      thesession: session) -> SearchObject:
    """

	generic searchobject builder

	:param searchid:
	:param therequest:
	:param thesession:
	:return:
	"""

    whitespace = ' '

    if not searchid:
        searchid = str(int(time.time()))

    probeforsessionvariables()

    # a search can take 30s or more and the user might alter the session while the search is running
    # by toggling onehit, etc that can be a problem, so freeze the values now and rely on this instead
    # of some moving target
    frozensession = thesession.copy()

    # need to sanitize input at least a bit: remove digits and punctuation
    # dispatcher will do searchtermcharactersubstitutions() and massagesearchtermsforwhitespace() to take
    # care of lunate sigma, etc.

    seeking = cleaninitialquery(therequest.args.get('skg', ''))
    proximate = cleaninitialquery(therequest.args.get('prx', ''))
    inputlemma = cleaninitialquery(therequest.args.get('lem', ''))
    inputproximatelemma = cleaninitialquery(therequest.args.get('plm', ''))

    try:
        lemma = lemmatadict[inputlemma]
    except KeyError:
        lemma = None

    # print('lo forms', lemma.formlist)

    try:
        proximatelemma = lemmatadict[inputproximatelemma]
    except KeyError:
        proximatelemma = None

    replacebeta = False

    if hipparchia.config['UNIVERSALASSUMESBETACODE'] and re.search(
            '[a-zA-Z]', seeking):
        # why the 'and' condition:
        #   sending unicode 'οὐθενὸϲ' to the betacode function will result in 0 hits
        #   this is something that could/should be debugged within that function,
        #   but in practice it is silly to allow hybrid betacode/unicode? this only
        #   makes the life of a person who wants unicode+regex w/ a betacode option more difficult
        replacebeta = True

    if hipparchia.config['TLGASSUMESBETACODE']:
        if justtlg() and (re.search('[a-zA-Z]', seeking) or re.search(
                '[a-zA-Z]', proximate)) and not re.search(
                    minimumgreek, seeking) and not re.search(
                        minimumgreek, proximate):
            replacebeta = True

    if replacebeta:
        seeking = seeking.upper()
        seeking = replacegreekbetacode(seeking)
        seeking = seeking.lower()
        proximate = proximate.upper()
        proximate = replacegreekbetacode(proximate)
        proximate = proximate.lower()

    if seeking == whitespace:
        seeking = str()

    if proximate == whitespace:
        proximate = str()

    so = SearchObject(searchid, seeking, proximate, lemma, proximatelemma,
                      frozensession)

    return so
def textmaker(author: str,
              work=None,
              passage=None,
              endpoint=None,
              citationdelimiter='|') -> JSON_STR:
    """
	build a text suitable for display

		"GET /textof/lt0474/024/20/30"

	:return:
	"""

    probeforsessionvariables()

    dbconnection = ConnectionObject('autocommit')
    dbcursor = dbconnection.cursor()

    linesevery = hipparchia.config['SHOWLINENUMBERSEVERY']

    po = TextmakerInputParsingObject(author, work, passage, endpoint,
                                     citationdelimiter)

    ao = po.authorobject
    wo = po.workobject

    segmenttext = str()

    # consolewarning('po.passageaslist: {p}'.format(p=po.passageaslist))

    if ao and wo:
        # we have both an author and a work, maybe we also have a subset of the work
        if endpoint:
            firstlinenumber = finddblinefromincompletelocus(
                wo, po.passageaslist, dbcursor)
            lastlinenumber = finddblinefromincompletelocus(wo,
                                                           po.endpointlist,
                                                           dbcursor,
                                                           findlastline=True)
            if firstlinenumber['code'] == 'success' and lastlinenumber[
                    'code'] == 'success':
                startline = firstlinenumber['line']
                endline = lastlinenumber['line']
                startlnobj = dblineintolineobject(
                    grabonelinefromwork(ao.universalid, startline, dbcursor))
                stoplnobj = dblineintolineobject(
                    grabonelinefromwork(ao.universalid, endline, dbcursor))
            else:
                msg = '"buildtexttospan/" could not find first and last: {a}w{b} - {c} TO {d}'
                consolewarning(
                    msg.format(a=author, b=work, c=passage, d=endpoint))
                startlnobj = makeablankline(work, 0)
                stoplnobj = makeablankline(work, 1)
                startline = 0
                endline = 1
            segmenttext = 'from {a} to {b}'.format(a=startlnobj.shortlocus(),
                                                   b=stoplnobj.shortlocus())
        elif not po.passageaslist:
            # whole work
            startline = wo.starts
            endline = wo.ends
        else:
            startandstop = textsegmentfindstartandstop(ao, wo,
                                                       po.passageaslist,
                                                       dbcursor)
            startline = startandstop['startline']
            endline = startandstop['endline']
        texthtml = buildtext(wo.universalid, startline, endline, linesevery,
                             dbcursor)
    else:
        texthtml = str()

    if hipparchia.config['INSISTUPONSTANDARDANGLEBRACKETS']:
        texthtml = gtltsubstitutes(texthtml)

    if not segmenttext:
        segmenttext = '.'.join(po.passageaslist)

    if not ao or not wo:
        ao = makeanemptyauthor('gr0000')
        wo = makeanemptywork('gr0000w000')

    results = dict()
    results['authorname'] = avoidsmallvariants(ao.shortname)
    results['title'] = avoidsmallvariants(wo.title)
    results['structure'] = avoidsmallvariants(wo.citation())
    results['worksegment'] = segmenttext
    results['texthtml'] = texthtml

    results = json.dumps(results)

    dbconnection.connectioncleanup()

    return results
def buildindexto(searchid: str,
                 author: str,
                 work=None,
                 passage=None,
                 endpoint=None,
                 citationdelimiter='|',
                 justvocab=False) -> JSON_STR:
    """
	build a complete index to a an author, work, or segment of a work

	:return:
	"""

    probeforsessionvariables()

    pollid = validatepollid(searchid)

    starttime = time.time()

    progresspolldict[pollid] = ProgressPoll(pollid)
    progresspolldict[pollid].activate()

    dbconnection = ConnectionObject('autocommit')
    dbcursor = dbconnection.cursor()

    po = IndexmakerInputParsingObject(author, work, passage, endpoint,
                                      citationdelimiter)

    ao = po.authorobject
    wo = po.workobject
    psg = po.passageaslist
    stop = po.endpointlist

    if not work:
        wo = makeanemptywork('gr0000w000')

    # bool
    useheadwords = session['headwordindexing']

    allworks = list()
    output = list()
    cdict = dict()
    segmenttext = str()
    valid = True

    if ao and work and psg and stop:
        start = psg
        firstlinenumber = finddblinefromincompletelocus(wo, start, dbcursor)
        lastlinenumber = finddblinefromincompletelocus(wo,
                                                       stop,
                                                       dbcursor,
                                                       findlastline=True)
        if firstlinenumber['code'] == 'success' and lastlinenumber[
                'code'] == 'success':
            cdict = {
                wo.universalid:
                (firstlinenumber['line'], lastlinenumber['line'])
            }
            startln = dblineintolineobject(
                grabonelinefromwork(ao.universalid, firstlinenumber['line'],
                                    dbcursor))
            stopln = dblineintolineobject(
                grabonelinefromwork(ao.universalid, lastlinenumber['line'],
                                    dbcursor))
        else:
            msg = '"indexspan/" could not find first and last: {a}w{b} - {c} TO {d}'
            consolewarning(msg.format(a=author, b=work, c=passage, d=endpoint))
            startln = makeablankline(work, 0)
            stopln = makeablankline(work, 1)
            valid = False
        segmenttext = 'from {a} to {b}'.format(a=startln.shortlocus(),
                                               b=stopln.shortlocus())
    elif ao and work and psg:
        # subsection of a work of an author
        progresspolldict[pollid].statusis(
            'Preparing a partial index to {t}'.format(t=wo.title))
        startandstop = textsegmentfindstartandstop(ao, wo, psg, dbcursor)
        startline = startandstop['startline']
        endline = startandstop['endline']
        cdict = {wo.universalid: (startline, endline)}
    elif ao and work:
        # one work
        progresspolldict[pollid].statusis(
            'Preparing an index to {t}'.format(t=wo.title))
        startline = wo.starts
        endline = wo.ends
        cdict = {wo.universalid: (startline, endline)}
    elif ao:
        # whole author
        allworks = [
            '{w}  ⇒ {t}'.format(w=w.universalid[6:10], t=w.title)
            for w in ao.listofworks
        ]
        allworks.sort()
        progresspolldict[pollid].statusis(
            'Preparing an index to the works of {a}'.format(a=ao.shortname))
        for wkid in ao.listworkids():
            cdict[wkid] = (workdict[wkid].starts, workdict[wkid].ends)
    else:
        # we do not have a valid selection
        valid = False
        output = ['invalid input']

    if not stop:
        segmenttext = '.'.join(psg)

    if valid and justvocab:
        dbconnection.connectioncleanup()
        del progresspolldict[pollid]
        return cdict

    if valid:
        output = buildindextowork(cdict, progresspolldict[pollid],
                                  useheadwords, dbcursor)

    # get ready to send stuff to the page
    count = len(output)

    try:
        locale.setlocale(locale.LC_ALL, 'en_US')
        count = locale.format_string('%d', count, grouping=True)
    except locale.Error:
        count = str(count)

    progresspolldict[pollid].statusis('Preparing the index HTML')
    indexhtml = wordindextohtmltable(output, useheadwords)

    buildtime = time.time() - starttime
    buildtime = round(buildtime, 2)
    progresspolldict[pollid].deactivate()

    if not ao:
        ao = makeanemptyauthor('gr0000')

    results = dict()
    results['authorname'] = avoidsmallvariants(ao.shortname)
    results['title'] = avoidsmallvariants(wo.title)
    results['structure'] = avoidsmallvariants(wo.citation())
    results['worksegment'] = segmenttext
    results['elapsed'] = buildtime
    results['wordsfound'] = count
    results['indexhtml'] = indexhtml
    results['keytoworks'] = allworks
    results['newjs'] = supplementalindexjs()
    results = json.dumps(results)

    dbconnection.connectioncleanup()
    del progresspolldict[pollid]

    return results
Beispiel #9
0
def sessionselectionsinfo(authordict: dict, workdict: dict) -> dict:
    """
	build the selections html either for a or b:
		#selectionstable + #selectioninfocell
		#selectionstable + #exclusioninfocell
	there are seven headings to populate
		[a] author classes
		[b] work genres
		[c] author location
		[d] work provenance
		[e] author selections
		[f] work selections
		[g] passage selections

	id numbers need to be attached to the selections so that they can be double-clicked so as to delete them

	:param authordict:
	:return:
	"""

    returndict = dict()
    thejs = list()

    tit = 'title="Double-click to remove this item"'

    try:
        # it is possible to hit this function before the session has been set, so...
        session['auselections']
    except KeyError:
        probeforsessionvariables()

    sessionsearchlist = session['auselections'] + session['agnselections'] + session['wkgnselections'] + \
                        session['psgselections'] + session['wkselections'] + session['alocselections'] + \
                        session['wlocselections']

    for selectionorexclusion in ['selections', 'exclusions']:
        thehtml = list()
        # if there are no explicit selections, then
        if not sessionsearchlist and selectionorexclusion == 'selections':
            thehtml.append('<span class="picklabel">Authors</span><br />')
            thehtml.append('[All in active corpora less exclusions]<br />')

        if selectionorexclusion == 'exclusions' and not sessionsearchlist and session['spuria'] == 'Y' and \
          not session['wkgnexclusions'] and not session['agnexclusions'] and not session['auexclusions']:
            thehtml.append('<span class="picklabel">Authors</span><br />')
            thehtml.append('[No exclusions]<br />')

        # [a] author classes
        v = 'agn'
        var = v + selectionorexclusion
        if session[var]:
            thehtml.append(
                '<span class="picklabel">Author categories</span><br />')
            htmlandjs = selectionlinehtmlandjs(v, selectionorexclusion,
                                               session)
            thehtml += htmlandjs['html']
            thejs += htmlandjs['js']

        # [b] work genres
        v = 'wkgn'
        var = v + selectionorexclusion
        if session[var]:
            thehtml.append('<span class="picklabel">Work genres</span><br />')
            htmlandjs = selectionlinehtmlandjs(v, selectionorexclusion,
                                               session)
            thehtml += htmlandjs['html']
            thejs += htmlandjs['js']

        # [c] author location
        v = 'aloc'
        var = v + selectionorexclusion
        if session[var]:
            thehtml.append(
                '<span class="picklabel">Author location</span><br />')
            htmlandjs = selectionlinehtmlandjs(v, selectionorexclusion,
                                               session)
            thehtml += htmlandjs['html']
            thejs += htmlandjs['js']

        # [d] work provenance
        v = 'wloc'
        var = v + selectionorexclusion
        if session[var]:
            thehtml.append(
                '<span class="picklabel">Work provenance</span><br />')
            htmlandjs = selectionlinehtmlandjs(v, selectionorexclusion,
                                               session)
            thehtml += htmlandjs['html']
            thejs += htmlandjs['js']

        # [e] authors
        v = 'au'
        var = v + selectionorexclusion
        if session[var]:
            thehtml.append('<span class="picklabel">Authors</span><br />')
            localval = -1
            for s in session[var]:
                localval += 1
                ao = authordict[s]
                thehtml.append(
                    '<span class="{v}{soe} selection" id="{var}_0{lv}" {tit}>{s}</span>'
                    '<br />'.format(v=v,
                                    soe=selectionorexclusion,
                                    var=var,
                                    lv=localval,
                                    s=ao.akaname,
                                    tit=tit))
                thejs.append((var, localval))

        # [f] works
        v = 'wk'
        var = v + selectionorexclusion
        if session[var] and selectionorexclusion == 'exclusions' and session[
                'spuria'] == 'N':
            thehtml.append('<span class="picklabel">Works</span><br />')
            thehtml.append('[All non-selected spurious works]<br />')

        if session[var]:
            thehtml.append('<span class="picklabel">Works</span><br />')
            if selectionorexclusion == 'exclusions' and session[
                    'spuria'] == 'N':
                thehtml.append('[Non-selected spurious works]<br />')
            localval = -1
            for s in session[var]:
                localval += 1
                uid = s[:6]
                ao = authordict[uid]
                wk = workdict[s]
                thehtml.append(
                    '<span class="{v}{soe} selection" id="{var}_0{lv}" {tit}>{au}, '
                    '<span class="pickedwork">{wk}</span></span>'
                    '<br />'.format(v=v,
                                    var=var,
                                    soe=selectionorexclusion,
                                    lv=localval,
                                    au=ao.akaname,
                                    tit=tit,
                                    wk=wk.title))
                thejs.append((var, localval))

        # [g] passages
        v = 'psg'
        var = v + selectionorexclusion
        if session[var]:
            psgtemplate = '<span class="{v}{soe} selection" id="{var}_0{lv}" {tit}>{au}, <span class="pickedwork">{wk}</span>&nbsp; <span class="pickedsubsection">{loc}</span></span><br />'
            spantemplate = 'from {a} to {b}'
            thehtml.append('<span class="picklabel">Passages</span><br />')
            localval = -1
            for s in session[var]:
                localval += 1
                uid = s[:6]
                ao = authordict[uid]
                loc = str()
                # watch out for heterogenous passage selection formats; only _AT_ and _FROM_ exist ATM
                # session[psgselections] = ['lt0474w005_FROM_4501_TO_11915', 'lt2806w002_AT_3|4|5']
                if '_AT_' in s:
                    locus = s.split('_AT_')[1].split('|')
                    locus.reverse()
                    citationtuple = tuple(locus)
                    for w in ao.listofworks:
                        if w.universalid == s[0:10]:
                            wk = w
                    loc = prolixlocus(wk, citationtuple)
                elif '_FROM_' in s:
                    dbconnection = ConnectionObject()
                    dbcursor = dbconnection.cursor()
                    wk = workdict[s[0:10]]
                    locus = s.split('_FROM_')[1]
                    start = locus.split('_TO_')[0]
                    stop = locus.split('_TO_')[1]
                    startln = dblineintolineobject(
                        grabonelinefromwork(uid, start, dbcursor))
                    stopln = dblineintolineobject(
                        grabonelinefromwork(uid, stop, dbcursor))
                    dbconnection.connectioncleanup()
                    # print('_FROM_', start, stop, startln.uncleanlocustuple(), stopln.uncleanlocustuple())
                    loc = spantemplate.format(a=startln.prolixlocus(),
                                              b=stopln.prolixlocus())

                thehtml.append(
                    psgtemplate.format(v=v,
                                       var=var,
                                       soe=selectionorexclusion,
                                       lv=localval,
                                       au=ao.akaname,
                                       wk=wk.title,
                                       loc=loc,
                                       tit=tit))
                thejs.append((var, localval))

        returndict[selectionorexclusion] = '\n'.join(thehtml)

    scount = len(session['auselections'] + session['wkselections'] +
                 session['agnselections'] + session['wkgnselections'] +
                 session['psgselections'] + session['alocselections'] +
                 session['wlocselections'])
    scount += len(session['auexclusions'] + session['wkexclusions'] +
                  session['agnexclusions'] + session['wkgnexclusions'] +
                  session['psgexclusions'] + session['alocexclusions'] +
                  session['wlocexclusions'])

    returndict['numberofselections'] = -1
    if scount > 0:
        returndict['numberofselections'] = scount

    returndict['jstuples'] = thejs

    return returndict
Beispiel #10
0
def executesearch(searchid: str, so=None, req=request) -> JSON_STR:
    """

	the interface to all of the other search functions

	tell me what you are looking for and i'll try to find it

	the results are returned in a json bundle that will be used to update the html on the page

	note that cosdistbysentence vector queries also flow through here: they need a hitdict

	overview:
		buildsearchobject() and then start modifying elements of the SearchObject

		build a search list via compilesearchlist()
			modify search list via flagexclusions()
			modify search list via calculatewholeauthorsearches()
		build search list restrictions via indexrestrictions()

		search via searchdispatcher()

		format results via buildresultobjects()

	:return:
	"""

    pollid = validatepollid(searchid)

    if not so:
        # there is a so if singlewordsearch() sent you here
        probeforsessionvariables()
        so = buildsearchobject(pollid, req, session)

    frozensession = so.session

    progresspolldict[pollid] = ProgressPoll(pollid)
    so.poll = progresspolldict[pollid]

    so.poll.activate()
    so.poll.statusis('Preparing to search')

    nosearch = True
    output = SearchOutputObject(so)

    allcorpora = [
        'greekcorpus', 'latincorpus', 'papyruscorpus', 'inscriptioncorpus',
        'christiancorpus'
    ]
    activecorpora = [c for c in allcorpora if frozensession[c]]

    if (len(so.seeking) > 0 or so.lemma or frozensession['tensorflowgraph']
            or frozensession['topicmodel']) and activecorpora:
        so.poll.statusis('Compiling the list of works to search')
        so.searchlist = compilesearchlist(listmapper, frozensession)

    if so.searchlist:
        # do this before updatesearchlistandsearchobject() which collapses items and cuts your total
        workssearched = len(so.searchlist)

        # calculatewholeauthorsearches() + configurewhereclausedata()
        so = updatesearchlistandsearchobject(so)

        nosearch = False
        skg = None
        prx = None

        isgreek = re.compile(
            '[α-ωϲἀἁἂἃἄἅἆἇᾀᾁᾂᾃᾄᾅᾆᾇᾲᾳᾴᾶᾷᾰᾱὰάἐἑἒἓἔἕὲέἰἱἲἳἴἵἶἷὶίῐῑῒΐῖῗὀὁὂὃὄὅόὸὐὑὒὓὔὕὖὗϋῠῡῢΰῦῧύὺᾐᾑᾒᾓᾔᾕᾖᾗῂῃῄῆῇἤἢἥἣὴήἠἡἦἧὠὡὢὣὤὥὦὧᾠᾡᾢᾣᾤᾥᾦᾧῲῳῴῶῷώὼ]'
        )

        if so.lemmaone:
            so.termone = wordlistintoregex(so.lemma.formlist)
            skg = so.termone
            if re.search(isgreek, skg):
                # 'v' is a problem because the lemmata list is going to send 'u'
                # but the greek lemmata are accented
                so.usecolumn = 'accented_line'

        if so.lemmatwo:
            so.termtwo = wordlistintoregex(so.lemmatwo.formlist)
            prx = so.termtwo
            if re.search(isgreek, prx):
                so.usecolumn = 'accented_line'

        so.setsearchtype()
        thesearch = so.generatesearchdescription()
        htmlsearch = so.generatehtmlsearchdescription()

        # now that the SearchObject is built, do the search...
        hits = precomposedsqlsearch(so)
        so.poll.statusis('Putting the results in context')

        # hits is List[dbWorkLine]
        hitdict = sortresultslist(hits, so, authordict, workdict)

        if so.vectorquerytype == 'cosdistbylineorword':
            # print('executesearch(): h - cosdistbylineorword')
            # take these hits and head on over to the vector worker
            output = findabsolutevectorsfromhits(so, hitdict, workssearched)
            del progresspolldict[pollid]
            return output

        resultlist = buildresultobjects(hitdict, authordict, workdict, so)

        so.poll.statusis('Converting results to HTML')

        sandp = rewriteskgandprx(skg, prx, htmlsearch, so)
        skg = sandp['skg']
        prx = sandp['prx']
        htmlsearch = sandp['html']

        for r in resultlist:
            r.lineobjects = flagsearchterms(r, skg, prx, so)

        if so.context > 0:
            findshtml = htmlifysearchfinds(resultlist, so)
        else:
            findshtml = nocontexthtmlifysearchfinds(resultlist)

        if hipparchia.config['INSISTUPONSTANDARDANGLEBRACKETS']:
            findshtml = gtltsubstitutes(findshtml)

        findsjs = insertbrowserclickjs('browser')

        resultcount = len(resultlist)

        if resultcount < so.cap:
            hitmax = False
        else:
            hitmax = True

        output.title = thesearch
        output.found = findshtml
        output.js = findsjs
        output.setresultcount(resultcount, 'passages')
        output.setscope(workssearched)
        output.searchtime = so.getelapsedtime()
        output.thesearch = thesearch
        output.htmlsearch = htmlsearch
        output.hitmax = hitmax

    if nosearch:
        if not activecorpora:
            output.reasons.append('there are no active databases')
        if len(so.seeking) == 0:
            output.reasons.append('there is no search term')
        if len(so.seeking) > 0 and len(so.searchlist) == 0:
            output.reasons.append('zero works match the search criteria')

        output.title = '(empty query)'
        output.setresultcount(0, 'passages')
        output.explainemptysearch()

    so.poll.deactivate()
    jsonoutput = json.dumps(output.generateoutput())

    del progresspolldict[pollid]

    return jsonoutput
Beispiel #11
0
def dictsearch(searchterm) -> JSON_STR:
	"""
	look up words
	return dictionary entries
	json packing
	:return:
	"""
	returndict = dict()

	searchterm = searchterm[:hipparchia.config['MAXIMUMLEXICALLENGTH']]
	probeforsessionvariables()

	dbconnection = ConnectionObject()
	dbcursor = dbconnection.cursor()

	if hipparchia.config['UNIVERSALASSUMESBETACODE']:
		searchterm = replacegreekbetacode(searchterm.upper())

	allowedpunct = '^$.'
	seeking = depunct(searchterm, allowedpunct)
	seeking = seeking.lower()
	seeking = re.sub('[σς]', 'ϲ', seeking)
	stripped = stripaccents(seeking)

	# don't turn 'injurius' into '[iiII]n[iiII][uuVV]r[iiII][uuVV]s'
	# that will happen if you call stripaccents() prematurely
	stripped = re.sub(r'[uv]', '[uvUV]', stripped)
	stripped = re.sub(r'[ij]', '[ijIJ]', stripped)

	if re.search(r'[a-z]', seeking):
		usedictionary = 'latin'
		usecolumn = 'entry_name'
	else:
		usedictionary = 'greek'
		usecolumn = 'unaccented_entry'

	if not session['available'][usedictionary + '_dictionary']:
		returndict['newhtml'] = 'cannot look up {w}: {d} dictionary is not installed'.format(d=usedictionary, w=seeking)
		return json.dumps(returndict)

	if not session['available'][usedictionary + '_dictionary']:
		returndict['newhtml'] = 'cannot look up {w}: {d} dictionary is not installed'.format(d=usedictionary, w=seeking)
		return json.dumps(returndict)

	limit = hipparchia.config['CAPONDICTIONARYFINDS']

	foundtuples = headwordsearch(stripped, limit, usedictionary, usecolumn)

	# example:
	# results are presorted by ID# via the postgres query
	# foundentries [('scrofa¹', 43118), ('scrofinus', 43120), ('scrofipascus', 43121), ('Scrofa²', 43119), ('scrofulae', 43122)]

	returnlist = list()

	if len(foundtuples) == limit:
		returnlist.append('[stopped searching after {lim} finds]<br>'.format(lim=limit))

	if len(foundtuples) > 0:

		if len(foundtuples) == 1:
			# sending '0' to browserdictionarylookup() will hide the count number
			usecounter = False
		else:
			usecounter = True

		wordobjects = [probedictionary(setdictionarylanguage(f[0]) + '_dictionary', 'entry_name', f[0], '=', dbcursor=dbcursor, trialnumber=0) for f in foundtuples]
		wordobjects = flattenlistoflists(wordobjects)
		outputobjects = [lexicalOutputObject(w) for w in wordobjects]

		# very top: list the finds
		if usecounter:
			findstemplate = '({n})&nbsp;<a class="nounderline" href="#{w}_{wdid}">{w}</a>'
			findslist = [findstemplate.format(n=f[0]+1, w=f[1][0], wdid=f[1][1]) for f in enumerate(foundtuples)]
			returnlist.append('\n<br>\n'.join(findslist))

		# the actual entries
		count = 0
		for oo in outputobjects:
			count += 1
			if usecounter:
				entry = oo.generatelexicaloutput(countervalue=count)
			else:
				entry = oo.generatelexicaloutput()
			returnlist.append(entry)
	else:
		returnlist.append('[nothing found]')

	if session['zaplunates']:
		returnlist = [attemptsigmadifferentiation(x) for x in returnlist]
		returnlist = [abbreviatedsigmarestoration(x) for x in returnlist]

	returndict['newhtml'] = '\n'.join(returnlist)
	returndict['newjs'] = '\n'.join([dictionaryentryjs(), insertlexicalbrowserjs()])

	jsondict = json.dumps(returndict)

	dbconnection.connectioncleanup()

	return jsondict
Beispiel #12
0
def reverselexiconsearch(searchid, searchterm) -> JSON_STR:
	"""
	attempt to find all of the greek/latin dictionary entries that might go with the english search term

	'ape' will drive this crazy; what is needed is a lookup for only the senses

	this can be built into the dictionary

	:param searchid:
	:param searchterm:
	:return:
	"""

	searchterm = searchterm[:hipparchia.config['MAXIMUMLEXICALLENGTH']]
	pollid = validatepollid(searchid)
	progresspolldict[pollid] = ProgressPoll(pollid)
	activepoll = progresspolldict[pollid]
	activepoll.activate()
	activepoll.statusis('Searching lexical entries for "{t}"'.format(t=searchterm))

	probeforsessionvariables()

	returndict = dict()
	returnarray = list()

	seeking = depunct(searchterm)

	if justlatin():
		searchunder = [('latin', 'hi')]
	elif justtlg():
		searchunder = [('greek', 'tr')]
	else:
		searchunder = [('greek', 'tr'), ('latin', 'hi')]

	limit = hipparchia.config['CAPONDICTIONARYFINDS']

	entriestuples = list()
	for s in searchunder:
		usedict = s[0]
		translationlabel = s[1]
		# first see if your term is mentioned at all
		wordobjects = reversedictionarylookup(seeking, usedict, limit)
		entriestuples += [(w.entry, w.id) for w in wordobjects]

	if len(entriestuples) == limit:
		returnarray.append('[stopped searching after {lim} finds]\n<br>\n'.format(lim=limit))

	entriestuples = list(set(entriestuples))

	unsortedentries = [(querytotalwordcounts(e[0]), e[0], e[1]) for e in entriestuples]
	entries = list()
	for e in unsortedentries:
		hwcountobject = e[0]
		term = e[1]
		idval = e[2]
		if hwcountobject:
			entries.append((hwcountobject.t, term, idval))
		else:
			entries.append((0, term, idval))
	entries = sorted(entries, reverse=True)
	entriestuples = [(e[1], e[2]) for e in entries]

	# now we retrieve and format the entries
	if entriestuples:
		# summary of entry values first
		countobjectdict = {e: querytotalwordcounts(e[0]) for e in entriestuples}
		summary = list()
		count = 0
		for c in countobjectdict.keys():
			count += 1
			try:
				totalhits = countobjectdict[c].t
			except:
				totalhits = 0
			# c[0]: the word; c[1]: the id
			summary.append((count, c[0], c[1], totalhits))

		summarytemplate = """
		<span class="sensesum">({n})&nbsp;
			<a class="nounderline" href="#{w}_{wdid}">{w}</a>&nbsp;
			<span class="small">({t:,})</span>
		</span>
		"""

		summary = sorted(summary, key=lambda x: x[3], reverse=True)
		summary = [summarytemplate.format(n=e[0], w=e[1], wdid=e[2], t=e[3]) for e in summary]
		returnarray.append('\n<br />\n'.join(summary))

		# then the entries proper
		dbconnection = ConnectionObject()
		dbconnection.setautocommit()
		dbcursor = dbconnection.cursor()

		wordobjects = [probedictionary(setdictionarylanguage(e[0]) + '_dictionary', 'entry_name', e[0], '=', dbcursor=dbcursor, trialnumber=0) for e in entriestuples]
		wordobjects = flattenlistoflists(wordobjects)
		outputobjects = [lexicalOutputObject(w) for w in wordobjects]
		if len(outputobjects) > 1:
			usecounter = True
		else:
			usecounter = False

		count = 0
		for oo in outputobjects:
			count += 1
			if usecounter:
				entry = oo.generatelexicaloutput(countervalue=count)
			else:
				entry = oo.generatelexicaloutput()
			returnarray.append(entry)
	else:
		returnarray.append('<br />[nothing found under "{skg}"]'.format(skg=seeking))

	returndict['newhtml'] = '\n'.join(returnarray)
	returndict['newjs'] = '\n'.join([dictionaryentryjs(), insertlexicalbrowserjs()])

	jsondict = json.dumps(returndict)

	del progresspolldict[pollid]

	return jsondict
Beispiel #13
0
def findbyform(observedword, authorid=None) -> JSON_STR:
	"""
	this function sets of a chain of other functions
	find dictionary form
	find the other possible forms
	look up the dictionary form
	return a formatted set of info
	:return:
	"""

	if authorid and authorid not in authordict:
		authorid = None

	observedword = observedword[:hipparchia.config['MAXIMUMLEXICALLENGTH']]

	probeforsessionvariables()

	dbconnection = ConnectionObject()
	dbcursor = dbconnection.cursor()

	sanitationerror = '[empty search: <span class="emph">{w}</span> was sanitized into nothingness]'
	dberror = '<br />[the {lang} morphology data has not been installed]'
	notfounderror = '<br />[could not find a match for <span class="emph">{cw}</span> in the morphology table]'
	nodataerror = '<br /><br />no prevalence data for {w}'

	# the next is pointless because: 'po/lemon' will generate a URL '/parse/po/lemon'
	# that will 404 before you can get to replacegreekbetacode()
	# this is a bug in the interaction between Flask and the JS

	# if hipparchia.config['UNIVERSALASSUMESBETACODE']:
	# 	observedword = replacegreekbetacode(observedword.upper())

	# the next makes sense only in the context of pointedly invalid input
	w = depunct(observedword)
	w = w.strip()
	w = tidyupterm(w)
	w = re.sub(r'[σς]', 'ϲ', w)

	# python seems to know how to do this with greek...
	w = w.lower()
	retainedgravity = w
	cleanedword = removegravity(retainedgravity)

	# index clicks will send you things like 'αὖ²'
	cleanedword = re.sub(r'[⁰¹²³⁴⁵⁶⁷⁸⁹]', str(), cleanedword)

	# the search syntax is '=' and not '~', so the next should be avoided unless a lot of refactoring will happen
	# cleanedword = re.sub(r'[uv]', r'[uv]', cleanedword)
	# cleanedword = re.sub(r'[ij]', r'[ij]', cleanedword)

	# a collection of HTML items that the JS will just dump out later; i.e. a sort of pseudo-page
	returndict = dict()

	try:
		cleanedword[0]
	except IndexError:
		returndict['newhtml'] = sanitationerror.format(w=observedword)
		return json.dumps(returndict)

	isgreek = True
	if re.search(r'[a-z]', cleanedword[0]):
		cleanedword = stripaccents(cleanedword)
		isgreek = False

	morphologyobject = lookformorphologymatches(cleanedword, dbcursor)
	# print('findbyform() mm',morphologyobject.getpossible()[0].transandanal)
	# φέρεται --> morphologymatches [('<possibility_1>', '1', 'φέρω', '122883104', '<transl>fero</transl><analysis>pres ind mp 3rd sg</analysis>')]

	if morphologyobject:
		oo = multipleWordOutputObject(cleanedword, morphologyobject, authorid)
		returndict['newhtml'] = oo.generateoutput()
	else:
		newhtml = list()
		if isgreek and not session['available']['greek_morphology']:
			newhtml.append(dberror.format(lang='Greek'))
		elif not isgreek and not session['available']['latin_morphology']:
			newhtml.append(dberror.format(lang='Latin'))
		else:
			newhtml.append(notfounderror.format(cw=cleanedword))

		prev = getobservedwordprevalencedata(cleanedword)
		if not prev:
			newhtml.append(getobservedwordprevalencedata(retainedgravity))
		if not prev:
			newhtml.append(nodataerror.format(w=retainedgravity))
		else:
			newhtml.append(prev)
		try:
			returndict['newhtml'] = '\n'.join(newhtml)
		except TypeError:
			returndict['newhtml'] = '[nothing found]'

	returndict['newjs'] = '\n'.join([dictionaryentryjs(), insertlexicalbrowserjs()])
	jsondict = json.dumps(returndict)

	dbconnection.connectioncleanup()

	return jsondict
def selectionmade(requestargs: MultiDict) -> JSON_STR:
	"""

	once a choice is made, parse and register it inside session['selections']
	then return the human readable version of the same for display on the page

	'_AT_' syntax is used to restrict the scope of a search

	"GET /selection/make/_?auth=lt0474&work=001&locus=13|4&endpoint= HTTP/1.1"
	request.args ImmutableMultiDict([('auth', 'lt0474'), ('work', '001'), ('locus', '13|4'), ('endpoint', '')])

	"GET /selection/make/_?auth=lt0474&work=001&locus=10&endpoint=20&raw=t HTTP/1.1"
	request.args ImmutableMultiDict([('auth', 'lt0474'), ('work', '001'), ('locus', '10'), ('endpoint', '20'), ('raw', 't')])

	"GET /selection/make/_?auth=lt0474&work=001&exclude=t HTTP/1.1"
	request.args ImmutableMultiDict([('auth', 'lt0474'), ('work', '001'), ('exclude', 't')])

	:return:
	"""

	probeforsessionvariables()

	uid = depunct(requestargs.get('auth', str()))
	workid = depunct(requestargs.get('work', str()))
	genre = depunct(requestargs.get('genre', str()))
	auloc = depunct(requestargs.get('auloc', str()))

	rawdataentry = re.sub('[^tf]', str(), requestargs.get('raw', str()))
	exclude = re.sub('[^tf]', str(), requestargs.get('exclude', str()))

	allowedpunct = '|,.'
	locus = depunct(requestargs.get('locus', str()), allowedpunct)
	endpoint = depunct(requestargs.get('endpoint', str()), allowedpunct)

	allowedpunct = '.-?():'
	wkprov = depunct(requestargs.get('wkprov', str()), allowedpunct)

	allowedpunct = '.'
	wkgenre = depunct(requestargs.get('wkgenre', str()), allowedpunct)

	if exclude != 't':
		suffix = 'selections'
		other = 'exclusions'
	else:
		suffix = 'exclusions'
		other = 'selections'

	if rawdataentry == 't':
		locus = re.sub(r'\.', '|', locus)
		endpoint = re.sub(r'\.', '|', endpoint)

	# the selection box might contain stale info if you deselect a corpus while items are still in the box
	uid = selectionisactive(uid)

	if genre and genre not in returnactivelist(authorgenresdict):
		genre = str()

	if wkgenre and wkgenre not in returnactivelist(workgenresdict):
		wkgenre = str()

	if auloc and auloc not in returnactivelist(authorlocationdict):
		auloc = str()

	if wkprov and wkprov not in returnactivelist(workprovenancedict):
		wkprov = str()

	# you have validated the input, now do something with it...
	if uid and workid and locus and endpoint:
		# a span in an author: 3 verrine orations, e.g. [note that the selection is 'greedy': 1start - 3end]
		# http://127.0.0.1:5000/makeselection?auth=lt0474&work=005&locus=2|1&endpoint=2|3
		# convert this into a 'firstline' through 'lastline' format
		emptycursor = None
		workobject = None
		try:
			workobject = workdict['{a}w{b}'.format(a=uid, b=workid)]
		except KeyError:
			consolewarning('"/selection/make/" sent a bad workuniversalid: {a}w{b}'.format(a=uid, b=workid))
		start = locus.split('|')
		stop = endpoint.split('|')
		start.reverse()
		stop.reverse()
		if workobject:
			firstline = finddblinefromincompletelocus(workobject, start, emptycursor)
			lastline = finddblinefromincompletelocus(workobject, stop, emptycursor, findlastline=True)
			citationtemplate = '{a}w{b}_FROM_{c}_TO_{d}'
			if firstline['code'] == 'success' and lastline['code'] == 'success':
				fl = firstline['line']
				ll = lastline['line']
				loc = citationtemplate.format(a=uid, b=workid, c=fl, d=ll)
				# print('span selected:', loc)
				# span selected: lt0474w005_FROM_4501_TO_11915
				# Cicero, In Verrem: 2.1.t.1
				# Cicero, In Verrem: 2.3.228.15
				if ll > fl:
					session['psg' + suffix].append(loc)
					session['psg' + suffix] = tidyuplist(session['psg' + suffix])
				else:
					msg = '"makeselection/" sent a firstline greater than the lastine value: {a} > {b} [{c}; {d}]'
					consolewarning(msg.format(a=fl, b=ll, c=locus, d=endpoint))
				rationalizeselections(loc, suffix)
			else:
				msg = '"makeselection/" could not find first and last: {a}w{b} - {c} TO {d}'
				consolewarning(msg.format(a=uid, b=workid, c=locus, d=endpoint))
	elif uid and workid and locus:
		# a specific passage
		session['psg' + suffix].append(uid + 'w' + workid + '_AT_' + locus)
		session['psg' + suffix] = tidyuplist(session['psg' + suffix])
		rationalizeselections(uid + 'w' + workid + '_AT_' + locus, suffix)
	elif uid and workid:
		# a specific work
		session['wk' + suffix].append(uid + 'w' + workid)
		session['wk' + suffix] = tidyuplist(session['wk' + suffix])
		rationalizeselections(uid + 'w' + workid, suffix)
	elif uid and not workid:
		# a specific author
		session['au' + suffix].append(uid)
		session['au' + suffix] = tidyuplist(session['au' + suffix])
		rationalizeselections(uid, suffix)

	# if vs elif: allow multiple simultaneous instance
	if genre:
		# add to the +/- genre list and then subtract from the -/+ list
		session['agn' + suffix].append(genre)
		session['agn' + suffix] = tidyuplist(session['agn' + suffix])
		session['agn' + other] = dropdupes(session['agn' + other], session['agn' + suffix])
	if wkgenre:
		# add to the +/- genre list and then subtract from the -/+ list
		session['wkgn' + suffix].append(wkgenre)
		session['wkgn' + suffix] = tidyuplist(session['wkgn' + suffix])
		session['wkgn' + other] = dropdupes(session['wkgn' + other], session['wkgn' + suffix])
	if auloc:
		# add to the +/- locations list and then subtract from the -/+ list
		session['aloc' + suffix].append(auloc)
		session['aloc' + suffix] = tidyuplist(session['aloc' + suffix])
		session['aloc' + other] = dropdupes(session['aloc' + other], session['aloc' + suffix])
	if wkprov:
		# add to the +/- locations list and then subtract from the -/+ list
		session['wloc' + suffix].append(wkprov)
		session['wloc' + suffix] = tidyuplist(session['wloc' + suffix])
		session['wloc' + other] = dropdupes(session['wloc' + other], session['wloc' + suffix])

	# after the update to the session, you need to update the page html to reflect the changes
	# print('session["psgselections"]=', session['psgselections'])
	# print('session["psgexclusions"]=', session['psgexclusions'])

	return getcurrentselections()