def formatexternalgrabberarguments(command: str, so: SearchObject) -> list:
    """
	Usage of ./HipparchiaGoDBHelper:
	  -c int
			max hit count (default 200)
	  -k string
			redis key to use (default "go")
	  -l int
			logging level: 0 is silent; 5 is very noisy (default 1)
	  -p string
			psql logon information (as a JSON string) (default "{\"Host\": \"localhost\", \"Port\": 5432, \"User\": \"hippa_wr\", \"Pass\": \"\", \"DBName\": \"hipparchiaDB\"}")
	  -r string
			redis logon information (as a JSON string) (default "{\"Addr\": \"localhost:6379\", \"Password\": \"\", \"DB\": 0}")
	  -sv
			[vectors] assert that this is a vectorizing run
	  -svb string
			[vectors] the bagging method: choices are alternates, flat, unlemmatized, winnertakesall (default "winnertakesall")
	  -svdb string
			[vectors][for manual debugging] db to grab from (default "lt0448")
	  -sve int
			[vectors][for manual debugging] last line to grab (default 26)
	  -svs int
			[vectors][for manual debugging] first line to grab (default 1)
	  -t int
			number of goroutines to dispatch (default 5)
	  -v    print version and exit

	"""
    if 'Rust' not in hipparchia.config['EXTERNALBINARYNAME']:
        # irritating '--x' vs '-x' issue...
        prefix = '-'
    else:
        prefix = '--'

    arguments = dict()

    arguments['k'] = so.searchid
    arguments['c'] = so.cap
    arguments['t'] = setthreadcount()
    arguments['l'] = hipparchia.config['EXTERNALCLILOGLEVEL']

    rld = {
        'Addr':
        '{a}:{b}'.format(a=hipparchia.config['REDISHOST'],
                         b=hipparchia.config['REDISPORT']),
        'Password':
        str(),
        'DB':
        hipparchia.config['REDISDBID']
    }
    arguments['r'] = json.dumps(rld)

    # rw user by default atm; can do this smarter...
    psd = {
        'Host': hipparchia.config['DBHOST'],
        'Port': hipparchia.config['DBPORT'],
        'User': hipparchia.config['DBWRITEUSER'],
        'Pass': hipparchia.config['DBWRITEPASS'],
        'DBName': hipparchia.config['DBNAME']
    }

    if hipparchia.config['EXTERNALBINARYKNOWSLOGININFO']:
        pass
    else:
        arguments['p'] = json.dumps(psd)

    argumentlist = [[
        '{p}{k}'.format(k=k, p=prefix), '{v}'.format(v=arguments[k])
    ] for k in arguments]
    argumentlist = flattenlistoflists(argumentlist)
    commandandarguments = [command] + argumentlist

    return commandandarguments
def generatevocabfor(searchid: str,
                     author: str,
                     work=None,
                     passage=None,
                     endpoint=None,
                     citationdelimiter='|') -> JSON_STR:
    """

	given a text span
		figure out what words are used by this span
		then provide a vocabulary list from that list

	ex:
		http://localhost:5000/vocabularyfor/SEARCHID/lt0631/001/1/20

	this is a lot like building an index so we just leverage buildindexto() but pull away from it after the initial
	bit where we establish endpoints and get ready to gather the lines

	:param searchid:
	:param author:
	:param work:
	:param passage:
	:param endpoint:
	:param citationdelimiter:
	:return:
	"""

    starttime = time.time()
    segmenttext = str()

    dbconnection = ConnectionObject('autocommit')
    dbcursor = dbconnection.cursor()

    justvocab = True

    cdict = buildindexto(searchid, author, work, passage, endpoint,
                         citationdelimiter, justvocab)
    lineobjects = grabbundlesoflines(cdict, dbcursor)

    allwords = [l.wordset() for l in lineobjects]
    allwords = set(flattenlistoflists(allwords))

    morphobjects = getrequiredmorphobjects(allwords)
    # 'dominatio': <server.hipparchiaobjects.dbtextobjects.dbMorphologyObject object at 0x14ab92d68>, ...

    baseformsmorphobjects = list()
    for m in morphobjects:
        try:
            baseformsmorphobjects.extend(morphobjects[m].getpossible())
        except AttributeError:
            # 'NoneType' object has no attribute 'getpossible'
            pass

    vocabset = {
        '{w} ~~~ {t}'.format(w=b.getbaseform(), t=b.gettranslation())
        for b in baseformsmorphobjects if b.gettranslation()
    }
    vocabset = {
        v.split(' ~~~ ')[0]: v.split(' ~~~ ')[1].strip()
        for v in vocabset
    }
    vocabset = {v: vocabset[v] for v in vocabset if vocabset[v]}

    # the following can be in entries and will cause problems...:
    #   <tr opt="n">which had become milder</tr>

    vocabset = {
        v: re.sub(r'<(|/)tr.*?>', str(), vocabset[v])
        for v in vocabset
    }

    # now you have { word1: definition1, word2: definition2, ...}

    vocabcounter = [
        b.getbaseform() for b in baseformsmorphobjects if b.gettranslation()
    ]
    vocabcount = dict()
    for v in vocabcounter:
        try:
            vocabcount[v] += 1
        except KeyError:
            vocabcount[v] = 1

    po = IndexmakerInputParsingObject(author, work, passage, endpoint,
                                      citationdelimiter)

    ao = po.authorobject
    wo = po.workobject
    psg = po.passageaslist
    stop = po.endpointlist

    tableheadtemplate = """
	<tr>
		<th class="vocabtable">word</th>
		<th class="vocabtable">count</th>
		<th class="vocabtable">definitions</th>
	</tr>
	"""

    tablerowtemplate = """
	<tr>
		<td class="word"><vocabobserved id="{w}">{w}</vocabobserved></td>
		<td class="count">{c}</td>
		<td class="trans">{t}</td>
	</tr>
	"""

    tablehtml = """
	<table>
		{head}
		{rows}
	</table>
	"""

    byfrequency = False
    if not byfrequency:
        rowhtml = [
            tablerowtemplate.format(w=k, t=vocabset[k], c=vocabcount[k])
            for k in polytonicsort(vocabset.keys())
        ]
    else:
        vc = [(vocabcount[v], v) for v in vocabcount]
        vc.sort(reverse=True)
        vk = [v[1] for v in vc]
        vk = [v for v in vk if v in vocabset]
        rowhtml = [
            tablerowtemplate.format(w=k, t=vocabset[k], c=vocabcount[k])
            for k in vk
        ]

    wordsfound = len(rowhtml)
    rowhtml = '\n'.join(rowhtml)

    vocabhtml = tablehtml.format(head=tableheadtemplate, rows=rowhtml)

    if not ao:
        ao = makeanemptyauthor('gr0000')

    buildtime = time.time() - starttime
    buildtime = round(buildtime, 2)

    if not stop:
        segmenttext = '.'.join(psg)

    results = dict()
    results['authorname'] = avoidsmallvariants(ao.shortname)
    results['title'] = avoidsmallvariants(wo.title)
    results['structure'] = avoidsmallvariants(wo.citation())
    results['worksegment'] = segmenttext
    results['elapsed'] = buildtime
    results['wordsfound'] = wordsfound
    results['texthtml'] = vocabhtml
    results['keytoworks'] = str()
    results['newjs'] = supplementalvocablistjs()
    results = json.dumps(results)

    # print('vocabhtml', vocabhtml)

    return results
Esempio n. 3
0
def lookformorphologymatches(word: str,
                             dbcursor,
                             trialnumber=0,
                             revertword=None,
                             rewrite=None,
                             furtherdeabbreviate=False) -> dbMorphologyObject:
    """

	hipparchiaDB=# select * from greek_morphology limit 1;
	 observed_form |   xrefs   | prefixrefs |                                                             possible_dictionary_forms
	---------------+-----------+------------+---------------------------------------------------------------------------------------------------------------------------------------------------
	 Τηνίουϲ       | 114793123 |            | <possibility_1>Τήνιοϲ<xref_value>114793123</xref_value><xref_kind>0</xref_kind><transl> </transl><analysis>masc acc pl</analysis></possibility_1>+
               |           |            |
	hipparchiaDB=# select * from greek_lemmata where xref_number=114793123;
	 dictionary_entry | xref_number |                  derivative_forms
	------------------+-------------+----------------------------------------------------
	 τήνιοϲ           |   114793123 | {τηνίων,τήνια,τηνίουϲ,τήνιοι,τηνίοιϲ,τηνία,τήνιοϲ}

	funky because we need to poke at words several times and to try combinations of fixes

	ought to pass a cursor to this one because this function will have trouble cleaning the connection properly

	:param word:
	:param dbcursor:
	:param trialnumber:
	:param revertword:
	:param rewrite:
	:param furtherdeabbreviate: a vector run has already turned 'm.' into Marcus, so it is safe to turn 'm' into 'mille'
	:return:
	"""

    if re.search(r'[a-z]', word):
        usedictionary = 'latin'
    else:
        usedictionary = 'greek'

    # βοῶ̣ντεϲ -> βοῶντεϲ
    word = re.sub(r'̣', str(), word)

    ihavesession = True
    try:
        session['available'][usedictionary + '_morphology']
    except RuntimeError:
        # vectorbot thread does not have access to the session...
        # we will *dangerously guess* that we can skip the next check because vectorbotters
        # are quite likely to have beefy installations...
        ihavesession = False

    if ihavesession and not session['available'][usedictionary +
                                                 '_morphology']:
        return None

    maxtrials = 4
    retrywithcapitalization = 1
    trialnumber += 1

    # the things that can confuse me
    terminalacute = re.compile(r'[άέίόύήώ]')

    morphobjects = None

    # syntax = '~' if you have to deal with '[uv]' problems, e.g.
    # but that opens up a whole new can of worms

    query = 'SELECT * FROM {d}_morphology WHERE observed_form = %s'.format(
        d=usedictionary)
    data = (word, )

    # print('lookformorphologymatches() q/d', query, data)

    dbcursor.execute(query, data)
    # NOT TRUE: fetchone() because all possiblities are stored inside the analysis itself
    # loss of case sensitivity is a problem here: Latro vs latro.
    analyses = dbcursor.fetchall()

    if analyses:
        morphobjects = [dbMorphologyObject(*a) for a in analyses]
        if rewrite:
            for m in morphobjects:
                m.observed = rewrite
                m.rewritten = True
    elif trialnumber < maxtrials:
        # turn 'kal' into 'kalends', etc.
        # not very costly as this is a dict lookup, and less costly than any call to the db
        newword = unpackcommonabbreviations(word, furtherdeabbreviate)
        if newword != word:
            return lookformorphologymatches(newword, dbcursor, 0, rewrite=word)

        if revertword:
            word = revertword
        # this code lets you make multiple stabs at an answer if you have already failed once
        # need to be careful about the retries that reset the trialnumber: could infinite loop if not careful
        # [a] something like πλακουντάριόν τι will fail because of the enclitic (greek_morphology can find πλακουντάριον and πλακουντάριοϲ)
        # [b] something like προχοίδιόν τι will fail twice over because of the enclitic and the diaresis

        try:
            # have to 'try...' because there might not be a word[-2]
            if trialnumber == 1:
                # elided ending? you will ask for ἀλλ, but you need to look for ἀλλ'
                newword = word + "'"
                morphobjects = lookformorphologymatches(newword,
                                                        dbcursor,
                                                        trialnumber,
                                                        revertword=word)
            elif trialnumber == 2:
                # a proper noun?
                newword = word[0].upper() + word[1:]
                morphobjects = lookformorphologymatches(newword,
                                                        dbcursor,
                                                        trialnumber,
                                                        revertword=word)
            elif re.search(r'\'$', word):
                # the last word in a greek quotation might have a 'close quote' that was mistaken for an elision
                newword = re.sub(r'\'', '', word)
                morphobjects = lookformorphologymatches(
                    newword, dbcursor, trialnumber)
            elif re.search(r'[ΐϊΰῧϋî]', word):
                # desperate: ῥηϊδίωϲ --> ῥηιδίωϲ
                diacritical = 'ΐϊΰῧϋî'
                plain = 'ίιύῦυi'
                xform = str.maketrans(diacritical, plain)
                newword = word.translate(xform)
                morphobjects = lookformorphologymatches(
                    newword, dbcursor, trialnumber=retrywithcapitalization)
            elif re.search(terminalacute, word[-1]):
                # an enclitic problem?
                sub = stripaccents(word[-1])
                newword = word[:-1] + sub
                morphobjects = lookformorphologymatches(
                    newword, dbcursor, trialnumber=retrywithcapitalization)
            elif re.search(terminalacute, word[-2]):
                # πλακουντάριόν?
                sub = stripaccents(word[-2])
                newword = word[:-2] + sub + word[-1]
                morphobjects = lookformorphologymatches(
                    newword, dbcursor, trialnumber=retrywithcapitalization)
            else:
                return None
        except IndexError:
            morphobjects = None

    if not morphobjects:
        return None

    # OK: we have a list of dbMorphologyObjects; this needs to be turned into a single object...
    # def __init__(self, observed, xrefs, prefixrefs, possibleforms):

    if isinstance(morphobjects, dbMorphologyObject):
        # you got here after multiple tries
        # if you don't do the next, the len() check will fail
        morphobjects = [morphobjects]

    if len(morphobjects) == 1:
        morphobject = morphobjects[0]
    else:
        ob = morphobjects[0].observed
        xr = flattenlistoflists([m.xrefs for m in morphobjects])
        xr = ', '.join(xr)
        pr = flattenlistoflists([m.prefixrefs for m in morphobjects])
        pr = ', '.join(pr)
        pf = [m.possibleforms for m in morphobjects]
        hw = flattenlistoflists([m.headwords for m in morphobjects])

        # note that you will have multiple '<possibility_1>' entries now... Does not matter ATM, but a bug waiting to bite
        mergedpf = dict()
        for p in pf:
            mergedpf = {**mergedpf, **p}

        morphobject = dbMorphologyObject(ob, xr, pr, mergedpf, hw)

    return morphobject
 def _gettokens(self) -> set:
     tokens = self.analysis.split(' ')
     tokens = flattenlistoflists([t.split('/') for t in tokens])
     tokens = set(tokens)
     return tokens
Esempio n. 5
0
def helperappwebsocketserver(theport):
	"""

	use the golang websocket server

	it will lock python if you try to use it as a module; so we will invoke it via a binary

	Usage of ./HipparchiaGoDBHelper:
	  -c int
			[searches] max hit count (default 200)
	  -k string
			[searches] redis key to use (default "go")
	  -l int
			[common] logging level: 0 is silent; 5 is very noisy (default 1)
	  -p string
			[common] psql logon information (as a JSON string) (default "{\"Host\": \"localhost\", \"Port\": 5432, \"User\": \"hippa_wr\", \"Pass\": \"\", \"DBName\": \"hipparchiaDB\"}")
	  -r string
			[common] redis logon information (as a JSON string) (default "{\"Addr\": \"localhost:6379\", \"Password\": \"\", \"DB\": 0}")
	  -sv
			[vectors] assert that this is a vectorizing run
	  -svb string
			[vectors] the bagging method: choices are alternates, flat, unlemmatized, winnertakesall (default "winnertakesall")
	  -svdb string
			[vectors][for manual debugging] db to grab from (default "lt0448")
	  -sve int
			[vectors][for manual debugging] last line to grab (default 26)
	  -svs int
			[vectors][for manual debugging] first line to grab (default 1)
	  -t int
			[common] number of goroutines to dispatch (default 5)
	  -v    [common] print version and exit
	  -ws
			[websockets] assert that you are requesting the websocket server
	  -wsf int
			[websockets] fail threshold before messages stop being sent (default 4)
	  -wsp int
			[websockets] port on which to open the websocket server (default 5010)
	  -wss int
			[websockets] save the polls instead of deleting them: 0 is no; 1 is yes

	"""

	if 'Rust' not in hipparchia.config['EXTERNALBINARYNAME']:
		# irritating '--x' vs '-x' issue...
		prefix = '-'
	else:
		prefix = '--'

	command = getexternalhelperpath()

	arguments = dict()

	rld = {'Addr': '{a}:{b}'.format(a=hipparchia.config['REDISHOST'], b=hipparchia.config['REDISPORT']),
		   'Password': str(),
		   'DB': hipparchia.config['REDISDBID']}
	arguments['r'] = json.dumps(rld)
	if hipparchia.config['RETAINREDISPOLLS']:
		arguments['wss'] = 1
	else:
		arguments['wss'] = 0
	arguments['l'] = hipparchia.config['EXTERNALWSSLOGLEVEL']
	arguments['wsp'] = theport
	arguments['wsf'] = hipparchia.config['EXTERNALBINARYFAILTHRESHOLD']

	argumentlist = [['{p}{k}'.format(p=prefix, k=k), '{v}'.format(v=arguments[k])] for k in arguments]

	# debugmessage('argumentlist={a}'.format(a=argumentlist))

	argumentlist = flattenlistoflists(argumentlist)
	commandandarguments = [command] + ['{p}ws'.format(p=prefix)] + argumentlist

	subprocess.Popen(commandandarguments)

	# debugmessage('successfully opened {b}'.format(b=hipparchia.config['EXTERNALBINARYNAME']))

	return
def lemmatizedwithinxlines(searchobject: SearchObject, hitlist: List[tuple],
                           dbcursor):
    """

    BROKEN ATM: 1.7.4 (probably most/all of 1.7.x)

    the alternate way of doing withinxlines

    this will ask regex to do the heavy lifting

    nasty edge case 'fire' near 'burn' in Homer:

    simplewithinxlines()
      Sought all 5 known forms of »πῦρ« within 1 lines of all 359 known forms of »καίω«
      Searched 3 texts and found 24 passages (621.25s)

    lemmatizedwithinxlines()
       Sought all 5 known forms of »πῦρ« within 1 lines of all 359 known forms of »καίω«
       Searched 3 texts and found 24 passages (2.82s)

    note that this function is often slightly slower than simplewithinxlines(), but it does seem to be able
    to avoid the catastrophe

    lemmatized vs non-lemmatized is probably the key difference when it comes to speed

    :param hitlist:
    :return:
    """

    so = searchobject

    columconverter = {
        'marked_up_line': 'markedup',
        'accented_line': 'polytonic',
        'stripped_line': 'stripped'
    }
    col = columconverter[so.usecolumn]

    prox = int(so.session['proximity'])

    # note that at the moment we arrive here with a one-work per worker policy
    # that is all of the hits will come from the same table
    # this means extra/useless sifting below, but perhaps it is safer to be wasteful now lest we break later

    fullmatches = set()  # set to avoid duplicate hits
    hitlinelist = list()
    linesintheauthors = dict()

    hitlinelist = [dblineintolineobject(h) for h in hitlist]
    for l in hitlinelist:
        wkid = l.universalid
        # prox = 2
        # l = 100
        # list(range(l-prox, l+prox+1))
        # [98, 99, 100, 101, 102]
        environs = set(range(l.index - prox, l.index + prox + 1))
        environs = ['{w}_ln_{x}'.format(w=wkid, x=e) for e in environs]
        try:
            linesintheauthors[wkid[0:6]]
        except KeyError:
            linesintheauthors[wkid[0:6]] = set()
        linesintheauthors[wkid[0:6]].update(environs)

    # now grab all of the lines you might need
    linecollection = set()
    for l in linesintheauthors:
        if linesintheauthors[l]:
            # example: {'lt0803': {952, 953, 951}}
            linecollection = grablistoflines(l, list(linesintheauthors[l]),
                                             dbcursor)
            linecollection = {
                '{w}_ln_{x}'.format(w=l.wkuinversalid, x=l.index): l
                for l in linecollection
            }

    # then associate all of the surrounding words with those lines
    wordbundles = dict()
    for l in hitlinelist:
        wkid = l.universalid
        environs = set(range(l.index - prox, l.index + prox + 1))
        mylines = list()
        for e in environs:
            try:
                mylines.append(linecollection['{w}_ln_{x}'.format(w=wkid,
                                                                  x=e)])
            except KeyError:
                # you went out of bounds and tried to grab something that is not really there
                # KeyError: 'lt1515w001_ln_1175'
                # line 1175 is actually the first line of lt1515w002...
                pass

        mywords = [getattr(l, col) for l in mylines]
        mywords = [w.split(' ') for w in mywords if mywords]
        mywords = flattenlistoflists(mywords)
        mywords = ' '.join(mywords)
        wordbundles[l] = mywords

    # then see if we have any hits...
    while True:
        for provisionalhitline in wordbundles:
            if len(fullmatches) > so.cap:
                break
            if so.near and re.search(so.termtwo,
                                     wordbundles[provisionalhitline]):
                fullmatches.add(provisionalhitline)
            elif not so.near and not re.search(
                    so.termtwo, wordbundles[provisionalhitline]):
                fullmatches.add(provisionalhitline)
        break

    fullmatches = [m.decompose() for m in fullmatches]

    return fullmatches
Esempio n. 7
0
def dictsearch(searchterm) -> JSON_STR:
	"""
	look up words
	return dictionary entries
	json packing
	:return:
	"""
	returndict = dict()

	searchterm = searchterm[:hipparchia.config['MAXIMUMLEXICALLENGTH']]
	probeforsessionvariables()

	dbconnection = ConnectionObject()
	dbcursor = dbconnection.cursor()

	if hipparchia.config['UNIVERSALASSUMESBETACODE']:
		searchterm = replacegreekbetacode(searchterm.upper())

	allowedpunct = '^$.'
	seeking = depunct(searchterm, allowedpunct)
	seeking = seeking.lower()
	seeking = re.sub('[σς]', 'ϲ', seeking)
	stripped = stripaccents(seeking)

	# don't turn 'injurius' into '[iiII]n[iiII][uuVV]r[iiII][uuVV]s'
	# that will happen if you call stripaccents() prematurely
	stripped = re.sub(r'[uv]', '[uvUV]', stripped)
	stripped = re.sub(r'[ij]', '[ijIJ]', stripped)

	if re.search(r'[a-z]', seeking):
		usedictionary = 'latin'
		usecolumn = 'entry_name'
	else:
		usedictionary = 'greek'
		usecolumn = 'unaccented_entry'

	if not session['available'][usedictionary + '_dictionary']:
		returndict['newhtml'] = 'cannot look up {w}: {d} dictionary is not installed'.format(d=usedictionary, w=seeking)
		return json.dumps(returndict)

	if not session['available'][usedictionary + '_dictionary']:
		returndict['newhtml'] = 'cannot look up {w}: {d} dictionary is not installed'.format(d=usedictionary, w=seeking)
		return json.dumps(returndict)

	limit = hipparchia.config['CAPONDICTIONARYFINDS']

	foundtuples = headwordsearch(stripped, limit, usedictionary, usecolumn)

	# example:
	# results are presorted by ID# via the postgres query
	# foundentries [('scrofa¹', 43118), ('scrofinus', 43120), ('scrofipascus', 43121), ('Scrofa²', 43119), ('scrofulae', 43122)]

	returnlist = list()

	if len(foundtuples) == limit:
		returnlist.append('[stopped searching after {lim} finds]<br>'.format(lim=limit))

	if len(foundtuples) > 0:

		if len(foundtuples) == 1:
			# sending '0' to browserdictionarylookup() will hide the count number
			usecounter = False
		else:
			usecounter = True

		wordobjects = [probedictionary(setdictionarylanguage(f[0]) + '_dictionary', 'entry_name', f[0], '=', dbcursor=dbcursor, trialnumber=0) for f in foundtuples]
		wordobjects = flattenlistoflists(wordobjects)
		outputobjects = [lexicalOutputObject(w) for w in wordobjects]

		# very top: list the finds
		if usecounter:
			findstemplate = '({n})&nbsp;<a class="nounderline" href="#{w}_{wdid}">{w}</a>'
			findslist = [findstemplate.format(n=f[0]+1, w=f[1][0], wdid=f[1][1]) for f in enumerate(foundtuples)]
			returnlist.append('\n<br>\n'.join(findslist))

		# the actual entries
		count = 0
		for oo in outputobjects:
			count += 1
			if usecounter:
				entry = oo.generatelexicaloutput(countervalue=count)
			else:
				entry = oo.generatelexicaloutput()
			returnlist.append(entry)
	else:
		returnlist.append('[nothing found]')

	if session['zaplunates']:
		returnlist = [attemptsigmadifferentiation(x) for x in returnlist]
		returnlist = [abbreviatedsigmarestoration(x) for x in returnlist]

	returndict['newhtml'] = '\n'.join(returnlist)
	returndict['newjs'] = '\n'.join([dictionaryentryjs(), insertlexicalbrowserjs()])

	jsondict = json.dumps(returndict)

	dbconnection.connectioncleanup()

	return jsondict
Esempio n. 8
0
def reverselexiconsearch(searchid, searchterm) -> JSON_STR:
	"""
	attempt to find all of the greek/latin dictionary entries that might go with the english search term

	'ape' will drive this crazy; what is needed is a lookup for only the senses

	this can be built into the dictionary

	:param searchid:
	:param searchterm:
	:return:
	"""

	searchterm = searchterm[:hipparchia.config['MAXIMUMLEXICALLENGTH']]
	pollid = validatepollid(searchid)
	progresspolldict[pollid] = ProgressPoll(pollid)
	activepoll = progresspolldict[pollid]
	activepoll.activate()
	activepoll.statusis('Searching lexical entries for "{t}"'.format(t=searchterm))

	probeforsessionvariables()

	returndict = dict()
	returnarray = list()

	seeking = depunct(searchterm)

	if justlatin():
		searchunder = [('latin', 'hi')]
	elif justtlg():
		searchunder = [('greek', 'tr')]
	else:
		searchunder = [('greek', 'tr'), ('latin', 'hi')]

	limit = hipparchia.config['CAPONDICTIONARYFINDS']

	entriestuples = list()
	for s in searchunder:
		usedict = s[0]
		translationlabel = s[1]
		# first see if your term is mentioned at all
		wordobjects = reversedictionarylookup(seeking, usedict, limit)
		entriestuples += [(w.entry, w.id) for w in wordobjects]

	if len(entriestuples) == limit:
		returnarray.append('[stopped searching after {lim} finds]\n<br>\n'.format(lim=limit))

	entriestuples = list(set(entriestuples))

	unsortedentries = [(querytotalwordcounts(e[0]), e[0], e[1]) for e in entriestuples]
	entries = list()
	for e in unsortedentries:
		hwcountobject = e[0]
		term = e[1]
		idval = e[2]
		if hwcountobject:
			entries.append((hwcountobject.t, term, idval))
		else:
			entries.append((0, term, idval))
	entries = sorted(entries, reverse=True)
	entriestuples = [(e[1], e[2]) for e in entries]

	# now we retrieve and format the entries
	if entriestuples:
		# summary of entry values first
		countobjectdict = {e: querytotalwordcounts(e[0]) for e in entriestuples}
		summary = list()
		count = 0
		for c in countobjectdict.keys():
			count += 1
			try:
				totalhits = countobjectdict[c].t
			except:
				totalhits = 0
			# c[0]: the word; c[1]: the id
			summary.append((count, c[0], c[1], totalhits))

		summarytemplate = """
		<span class="sensesum">({n})&nbsp;
			<a class="nounderline" href="#{w}_{wdid}">{w}</a>&nbsp;
			<span class="small">({t:,})</span>
		</span>
		"""

		summary = sorted(summary, key=lambda x: x[3], reverse=True)
		summary = [summarytemplate.format(n=e[0], w=e[1], wdid=e[2], t=e[3]) for e in summary]
		returnarray.append('\n<br />\n'.join(summary))

		# then the entries proper
		dbconnection = ConnectionObject()
		dbconnection.setautocommit()
		dbcursor = dbconnection.cursor()

		wordobjects = [probedictionary(setdictionarylanguage(e[0]) + '_dictionary', 'entry_name', e[0], '=', dbcursor=dbcursor, trialnumber=0) for e in entriestuples]
		wordobjects = flattenlistoflists(wordobjects)
		outputobjects = [lexicalOutputObject(w) for w in wordobjects]
		if len(outputobjects) > 1:
			usecounter = True
		else:
			usecounter = False

		count = 0
		for oo in outputobjects:
			count += 1
			if usecounter:
				entry = oo.generatelexicaloutput(countervalue=count)
			else:
				entry = oo.generatelexicaloutput()
			returnarray.append(entry)
	else:
		returnarray.append('<br />[nothing found under "{skg}"]'.format(skg=seeking))

	returndict['newhtml'] = '\n'.join(returnarray)
	returndict['newjs'] = '\n'.join([dictionaryentryjs(), insertlexicalbrowserjs()])

	jsondict = json.dumps(returndict)

	del progresspolldict[pollid]

	return jsondict