Ejemplo n.º 1
0
def updatesearchlistandsearchobject(so: SearchObject) -> SearchObject:
    """

	you have a searchlist; now tell the searchobject more about it...

	this has been peeled off so that golangvectors() can call it too

	"""

    # mark works that have passage exclusions associated with them:
    # gr0001x001 instead of gr0001w001 if you are skipping part of w001
    so.searchlist = flagexclusions(so.searchlist, so.session)

    so.poll.statusis('Calculating full authors to search')
    so.searchlist = calculatewholeauthorsearches(so.searchlist, authordict)
    so.usedcorpora = so.wholecorporasearched()
    so.poll.statusis('Configuring the search restrictions')
    so.indexrestrictions = configurewhereclausedata(so.searchlist, workdict,
                                                    so)

    return so
Ejemplo n.º 2
0
def checkneedtoabort(so: SearchObject) -> str:
    """

    can/should we even do this?

    """

    if so.iamarobot:
        return str()

    abortjson = str()
    abort = lambda x: emptyvectoroutput(so, x)
    activecorpora = so.getactivecorpora()
    so.poll.statusis('Compiling the list of works to search')
    so.searchlist = compilesearchlist(listmapper, so.session)

    # so.seeking should only be set via a fallback when session['baggingmethod'] == 'unlemmatized'
    if (so.lemmaone or so.tovectorize or so.seeking) and activecorpora:
        pass
    elif not activecorpora:
        abortjson = abort(['no active corpora'])
    elif not so.searchlist:
        abortjson = abort(['empty list of places to look'])
    elif so.vectorquerytype == 'topicmodel':
        # we don't have and don't need a lemmaone, etc.
        pass
    elif so.vectorquerytype == 'analogies':
        if not so.lemmaone or not so.lemmatwo or not so.lemmathree:
            abortjson = abort('[did not have three lemmata]')
    else:
        # note that some vector queries do not need a term; fix this later...
        abortjson = abort(['there was no search term'])

    maxwords = hipparchia.config['MAXVECTORSPACE']
    wordstotal = 0
    for work in so.searchlist:
        work = work[:10]
        try:
            wordstotal += workdict[work].wordcount
        except TypeError:
            # TypeError: unsupported operand type(s) for +=: 'int' and 'NoneType'
            pass

    if wordstotal > maxwords:
        m = 'the vector scope max exceeded: {a} > {b} '
        abortjson = abort([
            m.format(a=locale.format_string('%d', wordstotal, grouping=True),
                     b=locale.format_string('%d', maxwords, grouping=True))
        ])

    return abortjson
Ejemplo n.º 3
0
def perparesoforsecondsqldict(so: SearchObject, initialhitlines: List[dbWorkLine], usebetweensyntax=True) -> SearchObject:
    """

    after finding initialhitlines sqlwithinxlinessearch() will run a second query

    it needs a new sqldict

    note that "usebetweensyntax=False" will break precomposedphraseandproximitysearch()

    """

    so.indexrestrictions = dict()
    authorsandlines = dict()

    if not usebetweensyntax:
        # consolewarning('sqlwithinxlinessearch(): temptable')
        # time trials...
        # Sought all 13 known forms of »ὕβριϲ« within 4 lines of all 230 known forms of »φεύγω«
        # Searched 7,873 texts and found 9 passages (11.87s)
        # Searched between 400 B.C.E. and 350 B.C.E.

        # Sought all 230 known forms of »φεύγω« within 4 lines of all 16 known forms of »κρίϲιϲ«
        # Searched 7,873 texts and found 12 passages (14.64s)
        # Searched between 400 B.C.E. and 350 B.C.E.

        for hl in initialhitlines:
            linestosearch = list(range(hl.index - so.distance, hl.index + so.distance + 1))
            try:
                authorsandlines[hl.authorid].extend(linestosearch)
            except KeyError:
                authorsandlines[hl.authorid] = linestosearch

        so.searchlist = list(authorsandlines.keys())

        for a in authorsandlines:
            so.indexrestrictions[a] = dict()
            so.indexrestrictions[a]['type'] = 'temptable'
            so.indexrestrictions[a]['where'] = wholeworktemptablecontents(a, set(authorsandlines[a]))
            # print("so.indexrestrictions[a]['where']", so.indexrestrictions[a]['where'])
    else:
        # Sought all 13 known forms of »ὕβριϲ« within 4 lines of all 230 known forms of »φεύγω«
        # Searched 7,873 texts and found 9 passages (9.35s)
        # Searched between 400 B.C.E. and 350 B.C.E.

        # Sought all 230 known forms of »φεύγω« within 4 lines of all 16 known forms of »κρίϲιϲ«
        # Searched 7,873 texts and found 12 passages (11.35s)
        # Searched between 400 B.C.E. and 350 B.C.E.

        # consolewarning('sqlwithinxlinessearch(): between')
        for hl in initialhitlines:
            boundiaries = (hl.index - so.distance, hl.index + so.distance)
            try:
                authorsandlines[hl.authorid].append(boundiaries)
            except KeyError:
                authorsandlines[hl.authorid] = [boundiaries]
        for a in authorsandlines:
            so.searchlist = list(authorsandlines.keys())
            so.indexrestrictions[a] = dict()
            so.indexrestrictions[a]['where'] = dict()
            so.indexrestrictions[a]['type'] = 'between'
            so.indexrestrictions[a]['where']['listofboundaries'] = authorsandlines[a]
            so.indexrestrictions[a]['where']['listofomissions'] = list()

    return so