def rewritesqlsearchdictforlemmata(so: SearchObject) -> dict:
    """

    you have
    { table1: {query: q, data: d, temptable: t},
    table2: {query: q, data: d, temptable: t},
    ... }

    but the 'data' needs to be swapped out

    { ...,
    'gr0059_20': {
        'data': '(^|\\s)δηλώϲητε(\\s|$)|(^|\\s)δηλώϲωϲι(\\s|$)|(^|\\s)δεδηλωμένην(\\s|$)|(^|\\s)δηλωθε[ίὶ]ϲ(\\s|$)|(^|\\s)δεδήλωκεν(\\s|$)|(^|\\s)δηλώϲαντα(\\s|$)|(^|\\s)δηλώϲῃϲ(\\s|$)|(^|\\s)δηλώϲουϲαν(\\s|$)|(^|\\s)δηλωϲάϲηϲ(\\s|$)|(^|\\s)δηλοῖμεν(\\s|$)',
        'query': 'SELECT wkuniversalid, index, level_05_value, level_04_value, level_03_value, level_02_value, level_01_value, level_00_value, marked_up_line, accented_line, stripped_line, hyphenated_words, annotations FROM gr0059 WHERE ( (index BETWEEN 2172 AND 4884) OR (index BETWEEN 40842 AND 52799) OR (index BETWEEN 1 AND 677) ) AND ( accented_line ~* %s )  LIMIT 200',
        'temptable': ''
    },
    'gr0059_21': {
        'data': '(^|\\s)ἐδηλώϲαντο(\\s|$)|(^|\\s)δεδηλωμένοϲ(\\s|$)|(^|\\s)δήλουϲ(\\s|$)|(^|\\s)δηλούϲαϲ(\\s|$)|(^|\\s)δηλώϲειεν(\\s|$)|(^|\\s)δηλωθ[έὲ]ν(\\s|$)|(^|\\s)δηλώϲειϲ(\\s|$)|(^|\\s)δηλουμένων(\\s|$)|(^|\\s)δηλώϲαϲαν(\\s|$)|(^|\\s)δηλώϲετε(\\s|$)',
        'query': 'SELECT wkuniversalid, index, level_05_value, level_04_value, level_03_value, level_02_value, level_01_value, level_00_value, marked_up_line, accented_line, stripped_line, hyphenated_words, annotations FROM gr0059 WHERE ( (index BETWEEN 2172 AND 4884) OR (index BETWEEN 40842 AND 52799) OR (index BETWEEN 1 AND 677) ) AND ( accented_line ~* %s )  LIMIT 200',
        'temptable': ''
    }
    }

    """

    searchdict = so.searchsqldict

    terms = so.lemmaone.formlist

    chunksize = min(int(len(terms) / (hipparchia.config['WORKERS'] * 2)), 25)

    newtablenames = '{t}_{c}'

    chunked = [terms[i:i + chunksize] for i in range(0, len(terms), chunksize)]
    chunked = [wordlistintoregex(c) for c in chunked]
    modifieddict = dict()
    for authortable in searchdict:
        count = -1
        for c in chunked:
            count += 1
            modifieddict[newtablenames.format(t=authortable, c=count)] = dict()
            target = modifieddict[newtablenames.format(t=authortable, c=count)]
            target['data'] = c
            target['query'] = searchdict[authortable]['query']
            target['temptable'] = searchdict[authortable]['temptable']

    return modifieddict
def withinxlines(workdbname: str, searchobject: SearchObject,
                 dbconnection) -> List[tuple]:
    """

    after finding x, look for y within n lines of x

    people who send phrases to both halves and/or a lot of regex will not always get what they want

    :param workdbname:
    :param searchobject:
    :return:
    """

    so = searchobject
    dbcursor = dbconnection.cursor()
    dbconnection.setautocommit()

    # you will only get session['maxresults'] back from substringsearch() unless you raise the cap
    # "Roman" near "Aetol" will get 3786 hits in Livy, but only maxresults will come
    # back for checking: but the Aetolians are likely not among those 200 or so passages...
    templimit = 2000000

    if so.lemma:
        chunksize = hipparchia.config['LEMMACHUNKSIZE']
        terms = so.lemma.formlist
        chunked = [
            terms[i:i + chunksize] for i in range(0, len(terms), chunksize)
        ]
        chunked = [wordlistintoregex(c) for c in chunked]
        hitlist = list()
        for c in chunked:
            hitlist += list(
                substringsearch(c, workdbname, so, dbcursor, templimit))
    else:
        hitlist = list(
            substringsearch(so.termone, workdbname, so, dbcursor, templimit))

    # fullmatches = lemmatizedwithinxlines(searchobject, hitlist, dbcursor)

    if so.lemmaone or so.lemmatwo:
        fullmatches = lemmatizedwithinxlines(searchobject, hitlist, dbcursor)
    else:
        fullmatches = simplewithinxlines(searchobject, hitlist, dbcursor)

    return fullmatches
def grableadingandlagging(hitline: dbWorkLine,
                          searchobject: SearchObject,
                          cursor,
                          override=None) -> dict:
    """

	take a dbline and grab the N words in front of it and after it

	it would be a good idea to have an autocommit connection here?

	override was added so that the rewritten so of precomposedphraseandproximitysearch() can set 'seeking' as it
	wishes

	:param hitline:
	:param searchobject:
	:param cursor:
	:return:
	"""

    so = searchobject
    # look out for off-by-one errors
    distance = so.distance + 1

    if override:
        seeking = override
    elif so.lemma:
        seeking = wordlistintoregex(so.lemma.formlist)
        so.usewordlist = 'polytonic'
    else:
        seeking = so.termone

    # expanded searchzone bacause "seeking" might be a multi-line phrase
    prev = grabonelinefromwork(hitline.authorid, hitline.index - 1, cursor)
    next = grabonelinefromwork(hitline.authorid, hitline.index + 1, cursor)
    prev = dbWorkLine(*prev)
    next = dbWorkLine(*next)

    searchzone = ' '.join([
        getattr(prev, so.usewordlist),
        getattr(hitline, so.usewordlist),
        getattr(next, so.usewordlist)
    ])

    match = re.search(r'{s}'.format(s=seeking), searchzone)
    # but what if you just found 'paucitate' inside of 'paucitatem'?
    # you will have 'm' left over and this will throw off your distance-in-words count
    past = None
    upto = None
    lagging = list()
    leading = list()
    ucount = 0
    pcount = 0

    try:
        past = searchzone[match.end():].strip()
    except AttributeError:
        # AttributeError: 'NoneType' object has no attribute 'end'
        pass

    try:
        upto = searchzone[:match.start()].strip()
    except AttributeError:
        pass

    if upto:
        ucount = len([x for x in upto.split(' ') if x])
        lagging = [x for x in upto.split(' ') if x]

    if past:
        pcount = len([x for x in past.split(' ') if x])
        leading = [x for x in past.split(' ') if x]

    atline = hitline.index

    while ucount < distance + 1:
        atline -= 1
        try:
            previous = dblineintolineobject(
                grabonelinefromwork(hitline.authorid, atline, cursor))
        except TypeError:
            # 'NoneType' object is not subscriptable
            previous = makeablankline(hitline.authorid, -1)
            ucount = 999
        lagging = previous.wordlist(so.usewordlist) + lagging
        ucount += previous.wordcount()
    lagging = lagging[-1 * (distance - 1):]
    lagging = ' '.join(lagging)

    atline = hitline.index
    while pcount < distance + 1:
        atline += 1
        try:
            nextline = dblineintolineobject(
                grabonelinefromwork(hitline.authorid, atline, cursor))
        except TypeError:
            # 'NoneType' object is not subscriptable
            nextline = makeablankline(hitline.authorid, -1)
            pcount = 999
        leading += nextline.wordlist(so.usewordlist)
        pcount += nextline.wordcount()
    leading = leading[:distance - 1]
    leading = ' '.join(leading)

    returndict = {'lag': lagging, 'lead': leading}

    return returndict
Beispiel #4
0
def dynamicsqlsearchdispatcher(searchobject: SearchObject) -> List[dbWorkLine]:
    """

	assign the search to multiprocessing workers
		searchobject:
			<server.hipparchiaclasses.SearchObject object at 0x1102c15f8>
		activepoll:
			<server.hipparchiaclasses.ProgressPoll object at 0x1102c15f8>

	:param searchobject:
	:param activepoll:
	:return:
	"""

    # clean out the pool if necessary before starting
    # this seems like the safest time for a reset of the pool: otherwise you could have workers working
    # but if you have a multi-user environment AND pool problems this code might make things worse
    cleanpoolifneeded()

    so = searchobject
    activepoll = so.poll

    # recompose 'searchingfor' (if it exists)
    # note that 'proximate' does not need as many checks
    if so.seeking:
        searchingfor = massagesearchtermsforwhitespace(so.seeking)
    else:
        searchingfor = str()

    # lunate sigmas / UV / JI issues
    unomdifiedskg = searchingfor
    unmodifiedprx = so.proximate

    activepoll.statusis('Loading the the dispatcher...')

    # of long-term interest is the new shared_memory module; using it will break the 3.6-3.7 installations
    # https://docs.python.org/3.8/library/multiprocessing.shared_memory.html#module-multiprocessing.shared_memory

    manager = Manager()
    founddblineobjects = manager.list()

    workers = setthreadcount()

    if so.redisresultlist and so.redissearchlist:
        listofplacestosearch = None
        buildredissearchlist(list(so.indexrestrictions.keys()), so.searchid)
    else:
        listofplacestosearch = manager.list(so.indexrestrictions.keys())

    activepoll.allworkis(len(so.searchlist))
    activepoll.remain(len(so.indexrestrictions.keys()))
    activepoll.sethits(0)

    # be careful about getting mp aware args into the function

    targetfunction = None
    argumentuple = None

    if so.searchtype == 'simple':
        activepoll.statusis('Executing a simple word search...')
        targetfunction = workonsimplesearch
        argumentuple = (founddblineobjects, listofplacestosearch, so)
    elif so.searchtype == 'simplelemma':
        activepoll.statusis(
            'Executing a lemmatized word search for the {n} known forms of {w}...'
            .format(n=len(so.lemma.formlist), w=so.lemma.dictionaryentry))
        # don't search for every form at once (100+?)
        # instead build a list of tuples: [(ORed_regex_forms_part_01, authortable1), ...]
        chunksize = hipparchia.config['LEMMACHUNKSIZE']
        terms = so.lemma.formlist
        chunked = [
            terms[i:i + chunksize] for i in range(0, len(terms), chunksize)
        ]
        chunked = [wordlistintoregex(c) for c in chunked]
        searchtuples = manager.list()
        masterlist = so.indexrestrictions.keys()
        for c in chunked:
            for item in masterlist:
                searchtuples.append((c, item))
        activepoll.allworkis(len(searchtuples))
        if so.usequeue:
            searchtuples = loadsearchqueue([t for t in searchtuples], workers)
        if so.redissearchlist:
            ptuples = [pickle.dumps(s) for s in searchtuples]
            buildredissearchlist(ptuples, so.searchid)
        targetfunction = workonsimplelemmasearch
        argumentuple = (founddblineobjects, searchtuples, so)
    elif so.searchtype == 'phrase':
        activepoll.statusis('Executing a phrase search.')
        so.leastcommon = findleastcommonterm(so.termone, so.accented)
        lccount = findleastcommontermcount(so.termone, so.accented)

        # print('least common word in phrase:', lccount, ':', so.leastcommon, so.termone)
        # longestterm = max([len(t) for t in so.termone.split(' ') if t])
        # need to figure out when it will be faster to go to subqueryphrasesearch() and when not to
        # logic + trial and error
        #   e.g., any phrase involving λιποταξίου (e.g., γράψομαι λιποταξίου) can be very fast because that form appears 36x:
        #   you can find it in 1s but if you go through subqueryphrasesearch() you will spend about 17s per full TLG search
        # lccount = -1 if you are unaccented
        #   'if 0 < lccount < 500 or longestterm > 5' got burned badly with 'ἐξ ἀρχῆϲ πρῶτον'
        #   'or (lccount == -1 and longestterm > 6)' would take 1m to find διαφοραϲ ιδεαν via workonphrasesearch()
        #   but the same can be found in 16.45s via subqueryphrasesearch()
        # it looks like unaccented searches are very regularly faster via subqueryphrasesearch()
        #   when is this not true? being wrong about sqs() means spending an extra 10s; being wrong about phs() means an extra 40s...
        if 0 < lccount < 500:
            # print('workonphrasesearch()', searchingfor)
            targetfunction = workonphrasesearch
            argumentuple = (founddblineobjects, listofplacestosearch, so)
        else:
            # print('subqueryphrasesearch()', searchingfor)
            targetfunction = subqueryphrasesearch
            argumentuple = (founddblineobjects, so.termone,
                            listofplacestosearch, so)
    elif so.searchtype == 'proximity':
        activepoll.statusis('Executing a proximity search...')
        if so.lemma or so.proximatelemma:
            pass
        elif so.accented or re.search(r'^[a-z]', so.termone) and so.near:
            # choose the necessarily faster option
            leastcommon = findleastcommonterm(
                unomdifiedskg + ' ' + unmodifiedprx, so.accented)
            if leastcommon != unomdifiedskg:
                tmp = so.termone
                so.termone = so.termtwo
                so.termtwo = tmp
        elif len(so.termtwo) > len(so.termone) and so.near:
            # look for the longest word first since that is probably the quicker route
            # but you can't swap searchingfor and proximate this way in a 'is not near' search without yielding the wrong focus
            tmp = so.termone
            so.termone = so.termtwo
            so.termtwo = tmp
        targetfunction = workonproximitysearch
        argumentuple = (founddblineobjects, listofplacestosearch, so)
    else:
        # impossible, but...
        workers = 0

    # non-parallel multiprocessing implementation across platforms: widows can't pickle a connection;
    # everyone else needs to pickle the connection
    if icanpickleconnections():
        # you need to give each job its own connection if you use a connection pool
        # otherwise there will be problems with threading
        # note that we are not yet taking care of connection types: 'autocommit', etc
        oneconnectionperworker = {
            i: ConnectionObject()
            for i in range(workers)
        }
    else:
        # will grab a connection later once inside of 'sfo'
        oneconnectionperworker = {i: None for i in range(workers)}

    # note that the following (when fully implemented...) does not produce speedups
    # operedisconnectionperworker = {i: establishredisconnection() for i in range(workers)}

    argumentswithconnections = [
        tuple([i] + list(argumentuple) + [oneconnectionperworker[i]])
        for i in range(workers)
    ]
    jobs = [
        Process(target=targetfunction, args=argumentswithconnections[i])
        for i in range(workers)
    ]

    for j in jobs:
        j.start()

    for j in jobs:
        j.join()

    if so.redisresultlist:
        foundlineobjects = loadredisresults(so.searchid)
    else:
        # foundlineobjects = [dblineintolineobject(item) for item in founddblineobjects]
        foundlineobjects = list(founddblineobjects)

    if oneconnectionperworker[0]:
        for c in oneconnectionperworker:
            oneconnectionperworker[c].connectioncleanup()

    return foundlineobjects
Beispiel #5
0
def executesearch(searchid: str, so=None, req=request) -> JSON_STR:
    """

	the interface to all of the other search functions

	tell me what you are looking for and i'll try to find it

	the results are returned in a json bundle that will be used to update the html on the page

	note that cosdistbysentence vector queries also flow through here: they need a hitdict

	overview:
		buildsearchobject() and then start modifying elements of the SearchObject

		build a search list via compilesearchlist()
			modify search list via flagexclusions()
			modify search list via calculatewholeauthorsearches()
		build search list restrictions via indexrestrictions()

		search via searchdispatcher()

		format results via buildresultobjects()

	:return:
	"""

    pollid = validatepollid(searchid)

    if not so:
        # there is a so if singlewordsearch() sent you here
        probeforsessionvariables()
        so = buildsearchobject(pollid, req, session)

    frozensession = so.session

    progresspolldict[pollid] = ProgressPoll(pollid)
    so.poll = progresspolldict[pollid]

    so.poll.activate()
    so.poll.statusis('Preparing to search')

    nosearch = True
    output = SearchOutputObject(so)

    allcorpora = [
        'greekcorpus', 'latincorpus', 'papyruscorpus', 'inscriptioncorpus',
        'christiancorpus'
    ]
    activecorpora = [c for c in allcorpora if frozensession[c]]

    if (len(so.seeking) > 0 or so.lemma or frozensession['tensorflowgraph']
            or frozensession['topicmodel']) and activecorpora:
        so.poll.statusis('Compiling the list of works to search')
        so.searchlist = compilesearchlist(listmapper, frozensession)

    if so.searchlist:
        # do this before updatesearchlistandsearchobject() which collapses items and cuts your total
        workssearched = len(so.searchlist)

        # calculatewholeauthorsearches() + configurewhereclausedata()
        so = updatesearchlistandsearchobject(so)

        nosearch = False
        skg = None
        prx = None

        isgreek = re.compile(
            '[α-ωϲἀἁἂἃἄἅἆἇᾀᾁᾂᾃᾄᾅᾆᾇᾲᾳᾴᾶᾷᾰᾱὰάἐἑἒἓἔἕὲέἰἱἲἳἴἵἶἷὶίῐῑῒΐῖῗὀὁὂὃὄὅόὸὐὑὒὓὔὕὖὗϋῠῡῢΰῦῧύὺᾐᾑᾒᾓᾔᾕᾖᾗῂῃῄῆῇἤἢἥἣὴήἠἡἦἧὠὡὢὣὤὥὦὧᾠᾡᾢᾣᾤᾥᾦᾧῲῳῴῶῷώὼ]'
        )

        if so.lemmaone:
            so.termone = wordlistintoregex(so.lemma.formlist)
            skg = so.termone
            if re.search(isgreek, skg):
                # 'v' is a problem because the lemmata list is going to send 'u'
                # but the greek lemmata are accented
                so.usecolumn = 'accented_line'

        if so.lemmatwo:
            so.termtwo = wordlistintoregex(so.lemmatwo.formlist)
            prx = so.termtwo
            if re.search(isgreek, prx):
                so.usecolumn = 'accented_line'

        so.setsearchtype()
        thesearch = so.generatesearchdescription()
        htmlsearch = so.generatehtmlsearchdescription()

        # now that the SearchObject is built, do the search...
        hits = precomposedsqlsearch(so)
        so.poll.statusis('Putting the results in context')

        # hits is List[dbWorkLine]
        hitdict = sortresultslist(hits, so, authordict, workdict)

        if so.vectorquerytype == 'cosdistbylineorword':
            # print('executesearch(): h - cosdistbylineorword')
            # take these hits and head on over to the vector worker
            output = findabsolutevectorsfromhits(so, hitdict, workssearched)
            del progresspolldict[pollid]
            return output

        resultlist = buildresultobjects(hitdict, authordict, workdict, so)

        so.poll.statusis('Converting results to HTML')

        sandp = rewriteskgandprx(skg, prx, htmlsearch, so)
        skg = sandp['skg']
        prx = sandp['prx']
        htmlsearch = sandp['html']

        for r in resultlist:
            r.lineobjects = flagsearchterms(r, skg, prx, so)

        if so.context > 0:
            findshtml = htmlifysearchfinds(resultlist, so)
        else:
            findshtml = nocontexthtmlifysearchfinds(resultlist)

        if hipparchia.config['INSISTUPONSTANDARDANGLEBRACKETS']:
            findshtml = gtltsubstitutes(findshtml)

        findsjs = insertbrowserclickjs('browser')

        resultcount = len(resultlist)

        if resultcount < so.cap:
            hitmax = False
        else:
            hitmax = True

        output.title = thesearch
        output.found = findshtml
        output.js = findsjs
        output.setresultcount(resultcount, 'passages')
        output.setscope(workssearched)
        output.searchtime = so.getelapsedtime()
        output.thesearch = thesearch
        output.htmlsearch = htmlsearch
        output.hitmax = hitmax

    if nosearch:
        if not activecorpora:
            output.reasons.append('there are no active databases')
        if len(so.seeking) == 0:
            output.reasons.append('there is no search term')
        if len(so.seeking) > 0 and len(so.searchlist) == 0:
            output.reasons.append('zero works match the search criteria')

        output.title = '(empty query)'
        output.setresultcount(0, 'passages')
        output.explainemptysearch()

    so.poll.deactivate()
    jsonoutput = json.dumps(output.generateoutput())

    del progresspolldict[pollid]

    return jsonoutput
def withinxwords(workdbname: str, searchobject: SearchObject,
                 dbconnection) -> List[dbWorkLine]:
    """

    int(session['proximity']), searchingfor, proximate, curs, wkid, whereclauseinfo

    after finding x, look for y within n words of x

    getting to y:
        find the search term x and slice it out of its line
        then build forwards and backwards within the requisite range
        then see if you get a match in the range

    if looking for 'paucitate' near 'imperator' you will find:
        'romani paucitate seruorum gloriatos itane tandem ne'
    this will become:
        'romani' + 'seruorum gloriatos itane tandem ne'

    :param workdbname:
    :param searchobject:
    :return:
    """

    so = searchobject
    dbcursor = dbconnection.cursor()
    dbconnection.setautocommit()

    # you will only get session['maxresults'] back from substringsearch() unless you raise the cap
    # "Roman" near "Aetol" will get 3786 hits in Livy, but only maxresults will come
    # back for checking: but the Aetolians are likley not among those passages...
    templimit = 9999

    if so.lemma:
        chunksize = hipparchia.config['LEMMACHUNKSIZE']
        terms = so.lemma.formlist
        chunked = [
            terms[i:i + chunksize] for i in range(0, len(terms), chunksize)
        ]
        chunked = [wordlistintoregex(c) for c in chunked]

        hits = list()
        for c in chunked:
            hits += list(
                substringsearch(c, workdbname, so, dbcursor, templimit))
        so.usewordlist = 'polytonic'
    else:
        hits = list(
            substringsearch(so.termone, workdbname, so, dbcursor, templimit))

    fullmatches = list()

    for hit in hits:
        hitline = dblineintolineobject(hit)

        leadandlag = grableadingandlagging(hitline, so, dbcursor)
        lagging = leadandlag['lag']
        leading = leadandlag['lead']
        # print(hitline.universalid, so.termtwo, '\n\t[lag] ', lagging, '\n\t[lead]', leading)

        if so.near and (re.search(so.termtwo, leading)
                        or re.search(so.termtwo, lagging)):
            fullmatches.append(hit)
        elif not so.near and not re.search(
                so.termtwo, leading) and not re.search(so.termtwo, lagging):
            fullmatches.append(hit)

    return fullmatches
Beispiel #7
0
def precomposedphraseandproximitysearch(so: SearchObject) -> List[dbWorkLine]:
    """

    do a precomposedsqlsubqueryphrasesearch() and then search inside the results for part two...

    corner case tester: two line-enders: non solum + temporum dignitatem

    [12]   Caesar, De Bello Gallico: book 7, chapter 54, section 4, line 2

    7.54.3.3 multatos agris, omnibus ereptis sociis, imposito stipendio,
    7.54.4.1 obsidibus summa cum contumelia extortis, et quam in
    7.54.4.2 fortunam quamque in amplitudinem deduxisset, ut non
    7.54.4.3 solum in pristinum statum redissent, sed omnium tem-
    7.54.4.4 porum dignitatem et gratiam antecessisse viderentur.


    corner case tester: two distant line-enders: temporum dignitatem + obsides Galliae

    ut non
    solum in pristinum statum redissent, sed omnium tem- 	7.54.4.3
    porum dignitatem et gratiam antecessisse viderentur.
    his datis mandatis eos ab se dimisit.
          Noviodunum erat oppidum Haeduorum ad ripas 	7.55.1.1
    Ligeris opportuno loco positum. huc Caesar omnes ob- 	7.55.2.1
    sides Galliae, frumentum, pecuniam publicam, suorum

    the old code will trick you by pretending it is doing a valid search even though it is not really set up
    to handle this situation and was not supposed to promise that it could do phrase+
    [it's the phrase-spanning-two-lines bit that yields the problem since you do "lemma+" but have no handler for
    the multi-line issue]

    0.0.0-1.8.1

    Sought all 19 known forms of »χώρα« within 1 lines of »μεγάλην δύναμιν«
    Searched 3,182 works and found 1 passage (0.77s)
    Searched between 850 B.C.E. and 300 B.C.E.
    Sorted by name
    [1]   Ctesias, Fragmenta: Volume-Jacoby#-F 3c,688,F, fragment 5, line 47

    3c,688,F.5.45 τόπουϲ. (3) γενόμενον δ’ ἀποϲτάτην καὶ πείϲαντα τὸ ϲύμπαν ἔθνοϲ ἀντέχεϲθαι
    3c,688,F.5.46 τῆϲ ἐλευθερίαϲ, αἱρεθῆναι ϲτρατηγὸν διὰ τὴν ἀνδρείαν. ἔπειτα πυνθανόμενον
    3c,688,F.5.47 ἀθροιζομένην ἐπ’ αὐτὸν μεγάλην δύναμιν, καθοπλίϲαι τοὺϲ Καδουϲίουϲ παν-
    3c,688,F.5.48 δημεί, καὶ καταϲτρατοπεδεῦϲαι πρὸϲ ταῖϲ εἰϲ τὴν χώραν εἰϲβολαῖϲ, ἔχοντα
    3c,688,F.5.49 τοὺϲ ϲύμπανταϲ οὐκ ἐλάττουϲ εἴκοϲι μυριάδων. (4) τοῦ δὲ βαϲιλέωϲ Ἀρταίου

    1.8.2+

    Sought all 19 known forms of »χώρα« within 1 lines of »μεγάλην δύναμιν«
    Searched 2,346 works and found 2 passages (2.2s)
    Searched between 850 B.C.E. and 300 B.C.E.
    Sorted by name
    [1]   Ctesias, Fragmenta: Volume-Jacoby#-F 3c,688,F, fragment 5, line 47

    3c,688,F.5.45 τόπουϲ. (3) γενόμενον δ’ ἀποϲτάτην καὶ πείϲαντα τὸ ϲύμπαν ἔθνοϲ ἀντέχεϲθαι
    3c,688,F.5.46 τῆϲ ἐλευθερίαϲ, αἱρεθῆναι ϲτρατηγὸν διὰ τὴν ἀνδρείαν. ἔπειτα πυνθανόμενον
    3c,688,F.5.47 ἀθροιζομένην ἐπ’ αὐτὸν μεγάλην δύναμιν, καθοπλίϲαι τοὺϲ Καδουϲίουϲ παν-
    3c,688,F.5.48 δημεί, καὶ καταϲτρατοπεδεῦϲαι πρὸϲ ταῖϲ εἰϲ τὴν χώραν εἰϲβολαῖϲ, ἔχοντα
    3c,688,F.5.49 τοὺϲ ϲύμπανταϲ οὐκ ἐλάττουϲ εἴκοϲι μυριάδων. (4) τοῦ δὲ βαϲιλέωϲ Ἀρταίου
    [2]   Ctesias, Fragmenta: Volume-Jacoby#-F 3c,688,F, fragment 14, line 54

    3c,688,F.14.52    (40) καὶ ἐλυπήθη λύπην ϲφοδρὰν Μεγάβυζοϲ, καὶ ἐπένθηϲε, καὶ ἠιτήϲατο
    3c,688,F.14.53 ἐπὶ Ϲυρίαν τὴν ἑαυτοῦ χώραν ἀπιέναι. ἐνταῦθα λάθραι καὶ τοὺϲ ἄλλουϲ τῶν
    3c,688,F.14.54 Ἑλλήνων προέπεμπε. καὶ ἀπήιει, καὶ ἀπέϲτη βαϲιλέωϲ, καὶ ἀθροίζει μεγάλην
    3c,688,F.14.55 δύναμιν ἄχρι πεντεκαίδεκα μυριάδων χωρὶϲ τῶν ἱππέων [καὶ τῶν πεζῶν].
    3c,688,F.14.56 καὶ πέμπεται Οὔϲιριϲ κατ’ αὐτοῦ ϲὺν ⟨κ⟩ μυριάϲι, καὶ ϲυνάπτεται πόλεμοϲ, καὶ

    """

    #
    # initially do "within x lines"
    #

    phrasefinder = re.compile(r'[^\s]\s[^\s]')
    if re.search(phrasefinder, so.seeking) and re.search(
            phrasefinder, so.proximate):
        secondsearch = precomposedsqlsubqueryphrasesearch
    elif not re.search(phrasefinder, so.seeking) and re.search(
            phrasefinder, so.proximate):
        so.swapseekingandproxmate()
        so.swaplemmaoneandtwo()
        secondsearch = basicprecomposedsqlsearcher
    else:
        secondsearch = basicprecomposedsqlsearcher

    c = so.cap
    ps = so.proximate
    so.proximate = str()
    pl = so.lemmatwo
    so.lemmatwo = str()
    so.phrase = so.seeking
    firstterm = so.phrase

    so.cap = hipparchia.config['INTERMEDIATESEARCHCAP']

    initialhitlines = precomposedsqlsubqueryphrasesearch(so)

    so.seeking = ps
    so.lemmaone = pl
    so.setsearchtype()
    so.cap = c

    if secondsearch == precomposedsqlsubqueryphrasesearch:
        so.phrase = ps
    else:
        so.phrase = str()

    so = perparesoforsecondsqldict(so, initialhitlines)
    so.searchsqldict = searchlistintosqldict(so, so.seeking)

    if so.lemmaone:
        so.searchsqldict = rewritesqlsearchdictforlemmata(so)

    so.poll.sethits(0)

    newhitlines = secondsearch(so)

    initialhitlinedict = {hl.uniqueid: hl for hl in initialhitlines}
    newhitlineids = set()

    for nhl in newhitlines:
        indices = list(
            range(nhl.index - so.distance, nhl.index + so.distance + 1))
        ids = ['{a}_{b}'.format(a=nhl.wkuinversalid, b=i) for i in indices]
        newhitlineids.update(ids)

    maybefinalhitines = list()
    if so.near:
        # "is near"
        maybefinalhitines = [
            initialhitlinedict[hl] for hl in initialhitlinedict
            if hl in newhitlineids
        ]
    elif not so.near:
        # "is not near"
        maybefinalhitines = [
            initialhitlinedict[hl] for hl in initialhitlinedict
            if hl not in newhitlineids
        ]

    #
    # if neccessary, do "within x words" as x lines hits will always be a subset of the first set
    #

    if so.lemmaone:
        secondterm = wordlistintoregex(so.lemmaone.formlist)
    else:
        secondterm = so.seeking

    if so.scope == 'words':
        finalhitlines = paredowntowithinxwords(so, firstterm, secondterm,
                                               maybefinalhitines)
    else:
        finalhitlines = maybefinalhitines

    # to humor rewriteskgandprx()
    # but that formatting doesn't 100% work yet...

    so.termone = firstterm
    so.termtwo = secondterm
    so.lemmatwo = so.lemmaone

    return finalhitlines