Example #1
0
def breaktextsintosentences(foundsentences: ListProxy, searchlist: ListProxy,
                            so: SearchObject,
                            dbconnection: ConnectionObject) -> List[tuple]:
    """

	break a text into sentences that contain the term we are looking for

	that is, findsentences() both chunks and searches

	findsentences() results[0] ('line/gr0014w001/1', 'ἀντὶ πολλῶν ἄν ὦ ἄνδρεϲ ἀθηναῖοι χρημάτων ὑμᾶϲ ἑλέϲθαι νομίζω εἰ φανερὸν γένοιτο τὸ μέλλον ϲυνοίϲειν τῇ πόλει περὶ ὧν νυνὶ ϲκοπεῖτε')

	:param foundsentences:
	:param searchlist:
	:param activepoll:
	:param searchobject:
	:return:
	"""

    activepoll = so.poll

    dbcursor = dbconnection.cursor()

    commitcount = 0
    while searchlist:
        commitcount += 1
        try:
            authortable = searchlist.pop()
        except IndexError:
            authortable = None

        if authortable:
            foundsentences.extend(findsentences(authortable, so, dbcursor))

            dbconnection.checkneedtocommit(commitcount)

        try:
            activepoll.remain(len(searchlist))
        except TypeError:
            pass

    return foundsentences
def mpmorphology(terms: list, furtherdeabbreviate: bool, dictofmorphobjects,
                 dbconnection: ConnectionObject) -> dict:
    """

	build a dict of morphology objects

	:param terms:
	:param furtherdeabbreviate:
	:param dictofmorphobjects:
	:param dbconnection:
	:return:
	"""

    if not dbconnection:
        dbconnection = ConnectionObject()

    dbcursor = dbconnection.cursor()

    commitcount = 0
    while terms:
        commitcount += 1
        dbconnection.checkneedtocommit(commitcount)
        try:
            term = terms.pop()
        except IndexError:
            term = None

        if term:
            mo = lookformorphologymatches(
                term, dbcursor, furtherdeabbreviate=furtherdeabbreviate)
            if mo:
                dictofmorphobjects[term] = mo
            else:
                dictofmorphobjects[term] = None

    if not icanpickleconnections():
        dbconnection.connectioncleanup()

    return dictofmorphobjects
class GenericSearchFunctionObject(object):
    """

	a class to hold repeated code for the searches

	the chief difference between most search types is the number and names of the parameters passed
	to self.searchfunction

	one also has the option of storing the searchlist either in shared memory or in a redis set
	the retrieval, parsing, and ending checks for the two structures are different

	handling the matrix of possibilities for search_type and list_type combinations produces the
	somewhat tangled set of options below

	"""
    def __init__(self, workerid, foundlineobjects: ListProxy,
                 listofplacestosearch, searchobject: SearchObject,
                 dbconnection, searchfunction):
        self.workerid = workerid
        self.commitcount = 0
        if dbconnection:
            self.dbconnection = dbconnection
            self.needconnectioncleanup = False
        else:
            # you are running Windows and can't pickle your connections
            self.dbconnection = ConnectionObject()
            self.needconnectioncleanup = True
        self.dbcursor = self.dbconnection.cursor()
        self.so = searchobject
        self.foundlineobjects = foundlineobjects
        self.listofplacestosearch = listofplacestosearch
        self.searchfunction = searchfunction
        self.searchfunctionparameters = None
        self.activepoll = self.so.poll
        self.parameterswapper = self.simpleparamswapper
        self.emptytest = self.listofplacestosearch
        try:
            self.getnetxitem = self.listofplacestosearch.pop
        except AttributeError:
            # this should get implemented momentarily after this GenericObject has been initialized
            self.getnetxitem = NotImplementedError
        self.remainder = self.listofplacestosearch
        self.emptyerror = IndexError
        self.remaindererror = TypeError

    def authorsamongthefinds(self) -> set:
        authorset = {f.authorid for f in self.foundlineobjects}
        return authorset

    def getnextfnc(self):
        self.commitcount += 1
        self.dbconnection.checkneedtocommit(self.commitcount)
        try:
            nextsearchlocation = self.getnetxitem(0)
        except self.emptyerror:
            nextsearchlocation = None
        return nextsearchlocation

    def getremain(self):
        return len(self.remainder)

    def listcleanup(self):
        pass

    def addnewfindstolistoffinds(self, newfinds: list):
        self.foundlineobjects.extend(newfinds)
        # nf = ', '.join([f.universalid for f in newfinds])
        # print('{c} {u}\tadded\t{ln}'.format(c=self.workerid, u=self.dbconnection.uniquename, ln=nf))

    def updatepollremaining(self):
        try:
            self.activepoll.remain(self.getremain())
        except self.remaindererror:
            self.activepoll.setnotes(
                'Number remaining unavailable: % complete will be inaccurate')
            pass

    def updatepollfinds(self, lines: list):
        if lines:
            numberoffinds = len(lines)
            self.activepoll.addhits(numberoffinds)
        return

    def simpleparamswapper(self, texttoinsert: str,
                           insertposition: int) -> list:
        """

		the various searchfunctions have different interfaces

		this lets you get the right collection of paramaters into the various functions

		:param texttoinsert:
		:param insertposition:
		:return:
		"""
        parameters = self.searchfunctionparameters
        parameters[insertposition] = texttoinsert
        return parameters

    def tupleparamswapper(self, tupletoinsert: tuple,
                          insertposition: int) -> list:
        """

		somewhat brittle, but...

		this handles the non-standard case of a tuple that needs swapping instead of an individual name
		(i.e., it works with the lemmatized search)

		:param tupletoinsert:
		:param insertposition:
		:return:
		"""
        if self.so.redissearchlist:
            tupletoinsert = pickle.loads(tupletoinsert)

        parameters = self.searchfunctionparameters
        head = parameters[:insertposition]
        tail = parameters[insertposition + 1:]
        newparams = head + list(tupletoinsert) + tail
        return newparams

    def iteratethroughsearchlist(self):
        """

		this is the simple core of the whole thing; the rest is about feeding it properly

		if you do not pickle the lineobjects here and now you will need to generate line objects at the other end
			foundlineobjects = [dblineintolineobject(item) for item in founddblineobjects]

		you will also need to use lo.decompose() in phrasesearching.py to feed the findslist

		:return:
		"""

        insertposition = self.searchfunctionparameters.index('parametertoswap')
        while self.emptytest and self.activepoll.gethits() <= self.so.cap:
            srchfunct = self.searchfunction
            nextitem = self.getnextfnc()
            if self.so.session['onehit']:
                # simplelemma chunk might have already searched and found in an author
                if self.so.lemma or self.so.proximatelemma:
                    # nextitem looks like '(chunk, item)'
                    if nextitem[1] in self.authorsamongthefinds():
                        srchfunct = None

            if nextitem and srchfunct:
                params = self.parameterswapper(nextitem, insertposition)
                foundlines = srchfunct(*tuple(params))
                lineobjects = [dblineintolineobject(f) for f in foundlines]
                self.addnewfindstolistoffinds(lineobjects)
                self.updatepollfinds(lineobjects)
                self.updatepollremaining()
            elif not srchfunct:
                pass
            else:
                # listofplacestosearch has been exhausted
                break

        self.listcleanup()

        if self.needconnectioncleanup:
            self.dbconnection.connectioncleanup()

        # empty return because foundlineobjects is a ListProxy:
        # ask for self.foundlineobjects as the search result instead
        # print('{i} finished'.format(i=self.workerid))
        return
def workonprecomposedsqlsearch(workerid: int, foundlineobjects: ListProxy, listofplacestosearch: ListProxy,
                               searchobject: SearchObject, dbconnection) -> ListProxy:
    """

    iterate through listofplacestosearch

    execute precomposedsqlsearcher() on each item in the list

    gather the results...

    listofplacestosearch elements are dicts and the whole looks like:

        [{'temptable': '', 'query': 'SELECT ...', 'data': ('ὕβριν',)},
        {'temptable': '', 'query': 'SELECT ...', 'data': ('ὕβριν',)} ...]

    this is supposed to give you one query per hipparchiaDB table unless you are lemmatizing

    """

    if not dbconnection:
        dbconnection = ConnectionObject()

    so = searchobject
    activepoll = so.poll
    dbconnection.setreadonly(False)
    dbcursor = dbconnection.cursor()
    commitcount = 0
    getnetxitem = listofplacestosearch.pop
    emptyerror = IndexError
    remaindererror = TypeError

    while listofplacestosearch and activepoll.gethits() <= so.cap:
        # if workerid == 0:
        #     print('remain:', len(listofplacestosearch))
        commitcount += 1
        dbconnection.checkneedtocommit(commitcount)

        try:
            querydict = getnetxitem(0)
            # consolewarning("workonprecomposedsqlsearch() querydict:\n\t{q}".format(q=querydict))
        except emptyerror:
            querydict = None
            listofplacestosearch = None

        if querydict:
            foundlines = precomposedsqlsearcher(querydict, dbcursor)
            lineobjects = [dblineintolineobject(f) for f in foundlines]
            foundlineobjects.extend(lineobjects)

            if lineobjects:
                numberoffinds = len(lineobjects)
                activepoll.addhits(numberoffinds)
        else:
            listofplacestosearch = None

        try:
            activepoll.remain(len(listofplacestosearch))
        except remaindererror:
            pass

    if not icanpickleconnections():
        dbconnection.connectioncleanup()

    return foundlineobjects