Ejemplo n.º 1
0
def perseusidmismatch(badworkdbnumber: str, cursor) -> str:
	"""
	exception handling
	Perseus says you can look something up in gr0006w16: but there is no such thing
	go through the work list and pick the 16th: hope for the best

	more common is asking for w001 when really 002 or 003 is the 1st valid work number

	:param badworkdbnumber:
	:param cursor:
	:return:
	"""
	
	newworkid = '[na]'
	# print('ick: perseus wants',badworkdbnumber,'but this does not exist')
	
	while newworkid == '[na]':
		query = 'SELECT universalid FROM works WHERE universalid LIKE %s ORDER BY universalid ASC'
		data = (badworkdbnumber[0:6]+'%',)
		try:
			cursor.execute(query, data)
			works = cursor.fetchall()
			try:
				oldnumber = int(badworkdbnumber[8:10])
				newworkid = works[oldnumber][0]
			except IndexError:
				newworkid = returnfirstwork(badworkdbnumber[0:6], cursor)
		except psycopg2.DatabaseError as e:
			consolewarning('perseusidmismatch() - could not execute', query)
			consolewarning('Error:', e)
			newworkid = returnfirstwork(badworkdbnumber[0:6], cursor)

	return newworkid
Ejemplo n.º 2
0
    def _findpartofspeech(self):
        tenses = [
            'pres', 'aor', 'fut', 'perf', 'imperf', 'plup', 'futperf', 'part'
        ]
        genders = ['masc', 'fem', 'neut', 'masc/neut', 'masc/fem']
        if self.analyssiscomponents[0] in tenses:
            pos = 'conjugated'
            try:
                self.analysis = ConjugatedFormAnalysis(
                    self.word, self.language, self.dialects,
                    self.analyssiscomponents)
            except IndexError:
                # bad original data; too few items in analyssiscomponents
                warnstring = 'cannot parse {w}: analysis list is too short: {a}'.format(
                    w=self.word, a=self.analyssiscomponents)
                consolewarning(warnstring, 'yellow')
                self.analysis = None
        elif self.analyssiscomponents[0] in genders:
            pos = 'declined'
            try:
                self.analysis = DeclinedFormAnalysis(self.word, self.language,
                                                     self.dialects,
                                                     self.analyssiscomponents)
            except AssertionError:
                self.analysis = None
        else:
            pos = 'notimplem'
            self.analysis = None

        return pos
Ejemplo n.º 3
0
def setthreadcount(startup=False) -> int:
    """

	used to set worker count on multithreaded functions
	return either the manual config value or determine it algorithmically

	:return:
	"""

    if not hipparchia.config['AUTOCONFIGWORKERS']:
        workers = hipparchia.config['WORKERS']
    else:
        workers = int(cpu_count() / 2) + 1

    if workers < 1:
        workers = 1

    if workers > cpu_count() and startup:
        consolewarning(
            '\nWARNING: thread count exceeds total available number of threads: {a} > {b}'
            .format(a=workers, b=cpu_count()))
        consolewarning(
            'consider editing "WORKERS" and/or "AUTOCONFIGWORKERS" in "HipparchiaServer/server/settings/performancesettings.py"'
        )

    return workers
Ejemplo n.º 4
0
def determinevectorworkpile(tempcap=False) -> List[tuple]:
    """

	probe the db for potential vectorization targets

	:return:
	"""

    if not tempcap:
        cap = hipparchia.config['MAXVECTORSPACE']
    else:
        # real number is just over 93596456
        cap = 94000000

    if multiprocessing.current_process().name == 'MainProcess':
        consolewarning(
            'the vectorbot is active and searching for items that need to be vectorized',
            color='green')
        consolewarning('bagging method has been set to: {b}'.format(
            b=hipparchia.config['DEFAULTBAGGINGMETHOD']))

    authors = [(authordict[a].universalid, authordict[a].countwordsinworks())
               for a in authordict]
    authorsbylength = sorted(authors, key=lambda x: x[1])
    # note that we are turning these into one-item lists: genre lists, etc are multi-author lists
    authortuples = [([a[0]], a[1]) for a in authorsbylength]

    # print('authortuples[-10:]', authortuples[-10:])
    # [(['gr2042'], 1145288), (['gr4013'], 1177392), (['lt0474'], 1207760), (['gr2018'], 1271700), (['gr4089'], 1343587), (['gr4015'], 1422513), (['gr4083'], 1765800), (['gr4090'], 2202504), (['gr0057'], 2594166), (['gr2062'], 4182615)]

    activelists = [l for l in listmapper if len(listmapper[l]['a']) > 0]

    corpustuples = list()
    for item in activelists:
        corpustuples.append(([
            authordict[a].universalid for a in authordict
            if authordict[a].universalid[:2] == item
        ],
                             sum([
                                 authordict[a].countwordsinworks()
                                 for a in authordict
                                 if authordict[a].universalid[:2] == item
                             ])))

    # gk 75233496
    # lt 7548164
    # db 4276074
    # in 5485166
    # ch 1053646

    # you can do the same exercise for genres and time slices
    # gr up to 300BCE: 13,518,315

    workpile = authortuples + corpustuples
    workpile = [w for w in workpile if w[1] < cap]

    # test: just caesar
    # workpile = [(['lt0448'], 999)]

    return workpile
Ejemplo n.º 5
0
def startwspolling(theport=None):
	"""

	you need a websocket poll server

	pick between python and golang as a delivery medium

	"""

	if not theport:
		theport = hipparchia.config['PROGRESSPOLLDEFAULTPORT']

	if not gosearch:
		debugmessage('websockets are to be provided via the python socket server')
		startpythonwspolling(theport)

	if hipparchia.config['EXTERNALWEBSOCKETS']:
		debugmessage('websockets are to be provided via the helper app socket server')
		helperappwebsocketserver(theport)
		return

	if not hipparchia.config['GRABBERCALLEDVIACLI']:
		debugmessage('websockets are to be provided via the helper app socket server')
		helperappwebsocketserver(theport)
		return

	startpythonwspolling(theport)

	# actually this function never returns
	consolewarning(failurestring.format(f='startwspolling()'), color='red')
	return
Ejemplo n.º 6
0
def clearselections(category, index=-1) -> JSON_STR:
	"""
	a selection gets thrown into the trash

	:return:
	"""

	selectiontypes = ['auselections', 'wkselections', 'psgselections', 'agnselections', 'wkgnselections',
						'alocselections', 'wlocselections', 'auexclusions', 'wkexclusions', 'psgexclusions',
						'agnexclusions', 'wkgnexclusions', 'alocexclusions', 'wlocexclusions']

	if category not in selectiontypes:
		category = None

	try:
		index = int(index)
	except ValueError:
		index = -1

	if category and index > -1:
		try:
			session[category].pop(index)
		except IndexError:
			consolewarning('\tclearselections() IndexError when popping {c}[{i}]'.format(c=category, i=str(index)), color='red')
			pass
		except KeyError:
			consolewarning('\tclearselections() KeyError when popping {c}[{i}]'.format(c=category, i=str(index)), color='red')
			pass

		session.modified = True

	return getcurrentselections()
Ejemplo n.º 7
0
def checkcompatability():
    if hipparchia.config['EXTERNALGRABBER']:
        bin = hipparchia.config['EXTERNALBINARYNAME']
        p = getexternalhelperpath(bin)

        if 'Rust' in bin:
            vmin = RUSTHELPERMIN
            pre = '--'
        else:
            vmin = GOHELPERMIN
            pre = '-'

        commandandarguments = [p, '{p}v'.format(p=pre)]
        version = subprocess.run(commandandarguments, capture_output=True)
        version = version.stdout

        # b'Hipparchia Golang Helper CLI Debugging Interface (v.1.3.2)\n'

        vfinder = re.compile(r'\(v\.((\d+)\.(\d+)\.(\d+))\)')
        # extra brackets let you find the components, but you only need v[1]: "1.3.2"
        # note how "1.3.2b", etc. are doomed to fail...

        v = re.search(vfinder, str(version))

        try:
            minversion = LooseVersion(vmin)
        except ValueError:
            minversion = None
            consolewarning(
                'checkcompatability() failed to parse minimum version string "{v}"'
                .format(v=vmin))

        try:
            binversion = LooseVersion(v[1])
        except ValueError:
            binversion = None
            consolewarning(
                'checkcompatability() failed to parse version info string "{v}"'
                .format(v=version))

        if not binversion or not minversion:
            return

        if binversion >= minversion:
            debugmessage(
                'checkcompatability() says that {b} {x} >= {y}'.format(b=bin,
                                                                       x=v[1],
                                                                       y=vmin))
            pass
        else:
            w = '{b} is out of date. You have {x}. You need {y}. Some/Many functions are likely to fail.'
            consolewarning(w.format(b=bin, x=v[1], y=vmin), color='red')
            w = 'You should either upgrade {b} or disable the helper in "settings/helpersettings.py"'
            consolewarning(w.format(b=bin))
            consolewarning('I am now forcibly disabling the grabber...')
            hipparchia.config['EXTERNALGRABBER'] = False

    return
Ejemplo n.º 8
0
def startupprint(message: str,
                 color='white',
                 isbold=False,
                 colorcoded=False,
                 baremessage=True):
    # consolewarning(message: str, color='yellow', isbold=False, colorcoded=True, baremessage=False)
    if current_process().name == 'MainProcess':
        consolewarning(message, color, isbold, colorcoded, baremessage)
    return
Ejemplo n.º 9
0
def fetchvectorgraph(imagename) -> bytes:
	"""

	grab a graph in the image table so that you can subsequently display it in the browser

	note that images get deleted after use

	also note that we hand the data to the db and then immediatel grab it out of the db because of
	constraints imposed by the way flask works

	:param imagename:
	:return:
	"""

	if hipparchia.config['RETAINFIGURES']:
		deletewhendone = False
	else:
		deletewhendone = True

	dbconnection = ConnectionObject(ctype='rw')
	dbconnection.setautocommit()
	cursor = dbconnection.cursor()

	q = 'SELECT imagedata FROM public.storedvectorimages WHERE imagename=%s'
	d = (imagename,)

	cursor.execute(q, d)

	imagedata = cursor.fetchone()
	# need to convert to bytes, otherwise:
	# AttributeError: 'memoryview' object has no attribute 'read'
	try:
		imagedata = bytes(imagedata[0])
	except TypeError:
		# TypeError: 'NoneType' object is not subscriptable
		# how did this happen...
		# if you right click and download a graph in Firefox it will try to pull via the URL
		# but that figure is almost certainly gone unless you are a debugger retaining figures...
		imagedata = b''
		consolewarning('fetchvectorgraph() failed to fetch image {i}'.format(i=imagename))

	# print('fetched {n} from vector image table'.format(n=randomid))

	# now we should delete the image because we are done with it

	if deletewhendone:
		q = 'DELETE FROM public.storedvectorimages WHERE imagename=%s'
		d = (imagename,)
		cursor.execute(q, d)

	dbconnection.connectioncleanup()

	return imagedata
def precomposedexternalsearcher(so: SearchObject) -> List[dbWorkLine]:
    """

    you are using golang to do the search

    [1] send the searchdict to redis as a list of json.dumps(items) (keyed to the searchid)
    [2] send the external fnc the searchid, cap value, worker #, psql login info, redis login info
    [3] wait for the function to (a) gather; (b) search; (c) store
    [4] pull the results back from redis via the searchid
    NB: redis makes sense because the activity poll is going to have to be done via redis anyway...

    the searched items are stored under the redis key 'searchid_results'
    json.loads() will leave you with a dictionary of k/v pairs that can be turned into a dbWorkLine

    """

    warning = 'attempted to search via external helper but {x} is not available using precomposedsqlsearchmanager() instead'

    if not gosearch and not haveexternalhelper(getexternalhelperpath()):
        x = 'the external module'
        if not haveexternalhelper(getexternalhelperpath()):
            x = hipparchia.config['EXTERNALBINARYNAME']
        consolewarning(warning.format(x=x), color='red')
        return precomposedsqlsearchmanager(so)

    if not canuseredis:
        consolewarning(warning.format(x='redis'), color='red')
        return precomposedsqlsearchmanager(so)

    rc = establishredisconnection()

    so.searchsqldict = rewritesqlsearchdictforexternalhelper(so)
    # debugmessage('storing search at "{r}"'.format(r=so.searchid))

    for s in so.searchsqldict:
        rc.sadd(so.searchid, json.dumps(so.searchsqldict[s]))

    # if 1 > 0:
    #     consolewarning('precomposedgolangsearcher() merely stored the search in redis and did not execute it')
    #     return list()

    if not hipparchia.config['GRABBERCALLEDVIACLI']:
        resultrediskey = helpersharedlibrarysearcher(so)
    else:
        resultrediskey = helperclibinarysearcher(so)

    redisresults = redisfetch(resultrediskey)

    hits = [redishitintodbworkline(r) for r in redisresults]

    return hits
Ejemplo n.º 11
0
    def getbaseform(self):
        if not hipparchia.config['SUPPRESSWARNINGS']:
            warn = True
        else:
            warn = False

        if self.amgreek():
            return self._getgreekbaseform()
        elif self.amlatin():
            return self._getlatinbaseform()
        else:
            if warn:
                consolewarning(
                    'MorphPossibilityObject failed to determine its own language: {e}'
                    .format(e=self.entry))
            return None
Ejemplo n.º 12
0
def createvectorstable():
    """

	zap and reconstitute the storedvectors table

	:return:
	"""

    consolewarning('resetting the stored vectors table', color='green')

    dbconnection = ConnectionObject(ctype='rw')
    dbcursor = dbconnection.cursor()

    query = """
	DROP TABLE IF EXISTS public.storedvectors;

	CREATE TABLE public.storedvectors
	(
		ts timestamp without time zone,
		thumbprint character varying(32) COLLATE pg_catalog."default",
		uidlist character varying(32) COLLATE pg_catalog."default",
		vectortype character varying(24) COLLATE pg_catalog."default",
		baggingmethod character varying(24) COLLATE pg_catalog."default",
		calculatedvectorspace bytea
	)
	WITH (
		OIDS = FALSE
	)
	TABLESPACE pg_default;
	
	ALTER TABLE public.storedvectors
		OWNER to hippa_wr;
	
	GRANT SELECT ON TABLE public.storedvectors TO {reader};
	
	GRANT ALL ON TABLE public.storedvectors TO {writer};
	"""

    query = query.format(reader=hipparchia.config['DBUSER'],
                         writer=hipparchia.config['DBWRITEUSER'])

    dbcursor.execute(query)

    dbconnection.connectioncleanup()

    return
Ejemplo n.º 13
0
 def checkneedtocommit(self, commitcountervalue):
     # commitcountervalue is an MPCounter?
     try:
         v = commitcountervalue.value
     except AttributeError:
         v = commitcountervalue
     if v % self.commitcount == 0:
         try:
             getattr(self.dbconnection, 'commit')()
         except psycopg2.DatabaseError:
             # psycopg2.DatabaseError: error with status PGRES_TUPLES_OK and no message from the libpq
             # will return often-but-not-always '2' as the status: i.e., STATUS_IN_TRANSACTION
             consolewarning(
                 '{c} failed its commit()'.format(c=self.uniquename),
                 color='red')
             status = self.dbconnection.get_transaction_status()
             consolewarning('\tConnectionObject {me} status is {s}'.format(
                 me=self.uniquename, s=status))
     return
Ejemplo n.º 14
0
def loadusersdict(knownusersandpasswords=None):
    """

	return the userobjects we know about

	note that this is effectively empty: no dict of users is being passed ATM

	anyone with ambitions re. a collection of users should insert them via securitysettings.py

		KNOWNUSERSDICT = {'user1': 'pass1, 'user2': 'pass2'}

	elaborate user and authentication schemes are a non-priority (as is encryption...)

	:return:
	"""

    userlist = list()

    if not knownusersandpasswords and hipparchia.config['KNOWNUSERSDICT']:
        knownusersandpasswords = hipparchia.config['KNOWNUSERSDICT']
        userlist = [
            PassUser(k, knownusersandpasswords[k])
            for k in knownusersandpasswords
        ]

    if hipparchia.config['SETADEFAULTUSER']:
        thepass = hipparchia.config['DEFAULTREMOTEPASS']
        if thepass == 'yourremoteuserpassheretrytomakeitstrongplease':
            thepass = assignuniquename()
            consolewarning(
                'DEFAULTREMOTEPASS cannot be left as "yourremoteuserpassheretrytomakeitstrongplease"'
            )
            consolewarning(
                'temporary one-time password is "{p}"'.format(p=thepass))
        defaultuser = PassUser(hipparchia.config['DEFAULTREMOTEUSER'], thepass)
        userlist.append(defaultuser)

    # anonymoususer = PassUser('Anonymous', 'NoPassword')
    # userlist.append(anonymoususer)

    usersdict = {u.username: u for u in userlist}

    return usersdict
Ejemplo n.º 15
0
def createstoredimagestable():
    """

	zap and reconstitute the storedimages table

	:return:
	"""

    consolewarning('resetting the stored images table', color='green')

    dbconnection = ConnectionObject(ctype='rw')
    dbcursor = dbconnection.cursor()

    query = """
	DROP TABLE IF EXISTS public.storedvectorimages;
	
	CREATE TABLE public.storedvectorimages
	(
		imagename character varying(12),
		imagedata bytea
	)
	WITH (
		OIDS = FALSE
	)
	TABLESPACE pg_default;
	
	ALTER TABLE public.storedvectorimages
		OWNER to hippa_wr;
	
	GRANT SELECT ON TABLE public.storedvectorimages TO {reader};
	
	GRANT ALL ON TABLE public.storedvectorimages TO {writer};
	"""

    query = query.format(reader=hipparchia.config['DBUSER'],
                         writer=hipparchia.config['DBWRITEUSER'])

    dbcursor.execute(query)

    dbconnection.connectioncleanup()

    return
Ejemplo n.º 16
0
def genericexternalcliexecution(theprogram: str, formatterfunction,
                                so: SearchObject) -> str:
    """

	call a golang cli helper; report the result key

	this basically sets you up for either GOLANGCLIBINARYNAME or GOLANGVECTORBINARYNAME

	and you will need the relevant formatgolangXXXarguments() too

	note that the last line of the output of the binary is super-important: it needs to be the result key

	"""
    resultrediskey = str()

    command = getexternalhelperpath(theprogram)
    commandandarguments = formatterfunction(command, so)

    try:
        result = subprocess.run(commandandarguments, capture_output=True)
    except FileNotFoundError:
        consolewarning(
            'cannot find the golang executable "{x}'.format(x=command),
            color='red')
        return resultrediskey

    if result.returncode == 0:
        stdo = result.stdout.decode('UTF-8')
        outputlist = stdo.split('\n')
        for o in outputlist:
            debugmessage(o)
        resultrediskey = [o for o in outputlist if o]
        resultrediskey = resultrediskey[-1]
        # but this looks like: 'results sent to redis as ff128837_results'
        # so you need a little more work
        resultrediskey = resultrediskey.split()[-1]
    else:
        o = str(result.stdout)
        e = str(result.stderr)
        e = re.sub(r'\\n', '\n', e)
        e = re.sub(r'\\t', '\t', e)
        consolewarning('{c} returned an error'.format(
            c=hipparchia.config['EXTERNALBINARYNAME']),
                       color='red')
        consolewarning('{e}'.format(e=o), color='cyan')
        consolewarning('{e}'.format(e=e), color='yellow')
        # debugmessage(repr(result))

    return resultrediskey
Ejemplo n.º 17
0
    def __init__(self,
                 autocommit='defaultisno',
                 readonlyconnection=True,
                 ctype='ro'):
        super().__init__(autocommit, readonlyconnection)
        assert ctype in ['ro',
                         'rw'], 'connection type must be either "ro" or "rw"'
        if ctype != 'rw':
            u = GenericConnectionObject.dbuser
            p = GenericConnectionObject.dbpass
        else:
            u = GenericConnectionObject.dbwriteuser
            p = GenericConnectionObject.dbwritepass
            self.readonlyconnection = False

        try:
            self.dbconnection = psycopg2.connect(
                user=u,
                host=GenericConnectionObject.dbhost,
                port=GenericConnectionObject.dbport,
                database=GenericConnectionObject.dbname,
                password=p)
        except psycopg2.OperationalError as operror:
            thefailure = operror.args[0]
            unknown = 'no pg_hba.conf entry for'
            if unknown in thefailure:
                thefailure = 'username and password problem for "DBWRITEUSER": check "securitysettings.py"'
            consolewarning(
                GenericConnectionObject.postgresproblem.format(e=thefailure),
                color='red')
            sys.exit(0)

        if self.autocommit == 'autocommit':
            self.setautocommit()

        self.setreadonly(self.readonlyconnection)
        self.curs = getattr(self.dbconnection, 'cursor')()
        self.thisisafallback = False
Ejemplo n.º 18
0
def startpythonwspolling(theport):
	"""

	launch a websocket poll server

	tricky because loop.run_forever() will run forever: requires threading

	the poll is more or less eternal: the libary was coded that way, and it is kind of irritating

	multiple servers on multiple ports is possible, but not yet implemented: a multi-client model is not a priority

	:param theport:
	:return:
	"""

	try:
		theport = int(theport)
	except ValueError:
		theport = hipparchia.config['PROGRESSPOLLDEFAULTPORT']

	theip = hipparchia.config['MYEXTERNALIPADDRESS']

	# because we are not in the main thread we cannot ask for the default loop
	loop = asyncio.new_event_loop()
	asyncio.set_event_loop(loop)

	wspolling = websockets.serve(wscheckpoll, theip, port=theport, loop=loop)
	consolewarning('opening websocket at {p}'.format(p=theport), color='cyan', isbold=False)

	try:
		loop.run_until_complete(wspolling)
	except OSError:
		consolewarning('websocket could not be launched: cannot get access to {i}:{p}'.format(p=theport, i=theip),
					   color='red')
		pass

	try:
		loop.run_forever()
	finally:
		loop.run_until_complete(loop.shutdown_asyncgens())
		loop.close()

	# actually this function never returns
	consolewarning(failurestring.format(f='startpythonwspolling()'), color='red')
	return
Ejemplo n.º 19
0
if hipparchia.config['POLLCONNECTIONTYPE'] != 'redis' and not hipparchia.config[
        'EXTERNALWEBSOCKETS']:

    class ProgressPoll(SharedMemoryProgressPoll):
        pass
else:
    try:
        import redis
        c = establishredisconnection()
        c.ping()
        canuseredis = True
        del c
    except ImportError:
        canuseredis = False
    except redis.exceptions.ConnectionError:
        canuseredis = False

    if canuseredis:
        debugmessage('RedisProgressPoll selected')

        class ProgressPoll(RedisProgressPoll):
            pass
    else:
        consolewarning(
            'configuration asked for a RedisProgressPoll, but you cannot connect to redis'
        )

        class ProgressPoll(SharedMemoryProgressPoll):
            pass
Ejemplo n.º 20
0
        (see LICENSE in the top level directory of the distribution)
"""

from server import hipparchia
from server.formatting.miscformatting import consolewarning

try:
    from flask_wtf import FlaskForm
    from werkzeug.security import generate_password_hash, check_password_hash
    from wtforms import StringField
    from wtforms.validators import DataRequired
except ModuleNotFoundError:
    if hipparchia.config['LIMITACCESSTOLOGGEDINUSERS']:
        hipparchia.config['LIMITACCESSTOLOGGEDINUSERS'] = False
        consolewarning(
            'flask_wtf and/or wtforms not found: ~/hipparchia_venv/bin/pip install flask_wtf',
            color='red')
        consolewarning('forcibly setting LIMITACCESSTOLOGGEDINUSERS to False',
                       color='red')
    FlaskForm = None
    generate_password_hash = None
    check_password_hash = None
    StringField = None
    DataRequired = None


class PassUser(object):
    """

    log people into hipparchia
Ejemplo n.º 21
0
def textmaker(author: str,
              work=None,
              passage=None,
              endpoint=None,
              citationdelimiter='|') -> JSON_STR:
    """
	build a text suitable for display

		"GET /textof/lt0474/024/20/30"

	:return:
	"""

    probeforsessionvariables()

    dbconnection = ConnectionObject('autocommit')
    dbcursor = dbconnection.cursor()

    linesevery = hipparchia.config['SHOWLINENUMBERSEVERY']

    po = TextmakerInputParsingObject(author, work, passage, endpoint,
                                     citationdelimiter)

    ao = po.authorobject
    wo = po.workobject

    segmenttext = str()

    # consolewarning('po.passageaslist: {p}'.format(p=po.passageaslist))

    if ao and wo:
        # we have both an author and a work, maybe we also have a subset of the work
        if endpoint:
            firstlinenumber = finddblinefromincompletelocus(
                wo, po.passageaslist, dbcursor)
            lastlinenumber = finddblinefromincompletelocus(wo,
                                                           po.endpointlist,
                                                           dbcursor,
                                                           findlastline=True)
            if firstlinenumber['code'] == 'success' and lastlinenumber[
                    'code'] == 'success':
                startline = firstlinenumber['line']
                endline = lastlinenumber['line']
                startlnobj = dblineintolineobject(
                    grabonelinefromwork(ao.universalid, startline, dbcursor))
                stoplnobj = dblineintolineobject(
                    grabonelinefromwork(ao.universalid, endline, dbcursor))
            else:
                msg = '"buildtexttospan/" could not find first and last: {a}w{b} - {c} TO {d}'
                consolewarning(
                    msg.format(a=author, b=work, c=passage, d=endpoint))
                startlnobj = makeablankline(work, 0)
                stoplnobj = makeablankline(work, 1)
                startline = 0
                endline = 1
            segmenttext = 'from {a} to {b}'.format(a=startlnobj.shortlocus(),
                                                   b=stoplnobj.shortlocus())
        elif not po.passageaslist:
            # whole work
            startline = wo.starts
            endline = wo.ends
        else:
            startandstop = textsegmentfindstartandstop(ao, wo,
                                                       po.passageaslist,
                                                       dbcursor)
            startline = startandstop['startline']
            endline = startandstop['endline']
        texthtml = buildtext(wo.universalid, startline, endline, linesevery,
                             dbcursor)
    else:
        texthtml = str()

    if hipparchia.config['INSISTUPONSTANDARDANGLEBRACKETS']:
        texthtml = gtltsubstitutes(texthtml)

    if not segmenttext:
        segmenttext = '.'.join(po.passageaslist)

    if not ao or not wo:
        ao = makeanemptyauthor('gr0000')
        wo = makeanemptywork('gr0000w000')

    results = dict()
    results['authorname'] = avoidsmallvariants(ao.shortname)
    results['title'] = avoidsmallvariants(wo.title)
    results['structure'] = avoidsmallvariants(wo.citation())
    results['worksegment'] = segmenttext
    results['texthtml'] = texthtml

    results = json.dumps(results)

    dbconnection.connectioncleanup()

    return results
Ejemplo n.º 22
0
def buildindexto(searchid: str,
                 author: str,
                 work=None,
                 passage=None,
                 endpoint=None,
                 citationdelimiter='|',
                 justvocab=False) -> JSON_STR:
    """
	build a complete index to a an author, work, or segment of a work

	:return:
	"""

    probeforsessionvariables()

    pollid = validatepollid(searchid)

    starttime = time.time()

    progresspolldict[pollid] = ProgressPoll(pollid)
    progresspolldict[pollid].activate()

    dbconnection = ConnectionObject('autocommit')
    dbcursor = dbconnection.cursor()

    po = IndexmakerInputParsingObject(author, work, passage, endpoint,
                                      citationdelimiter)

    ao = po.authorobject
    wo = po.workobject
    psg = po.passageaslist
    stop = po.endpointlist

    if not work:
        wo = makeanemptywork('gr0000w000')

    # bool
    useheadwords = session['headwordindexing']

    allworks = list()
    output = list()
    cdict = dict()
    segmenttext = str()
    valid = True

    if ao and work and psg and stop:
        start = psg
        firstlinenumber = finddblinefromincompletelocus(wo, start, dbcursor)
        lastlinenumber = finddblinefromincompletelocus(wo,
                                                       stop,
                                                       dbcursor,
                                                       findlastline=True)
        if firstlinenumber['code'] == 'success' and lastlinenumber[
                'code'] == 'success':
            cdict = {
                wo.universalid:
                (firstlinenumber['line'], lastlinenumber['line'])
            }
            startln = dblineintolineobject(
                grabonelinefromwork(ao.universalid, firstlinenumber['line'],
                                    dbcursor))
            stopln = dblineintolineobject(
                grabonelinefromwork(ao.universalid, lastlinenumber['line'],
                                    dbcursor))
        else:
            msg = '"indexspan/" could not find first and last: {a}w{b} - {c} TO {d}'
            consolewarning(msg.format(a=author, b=work, c=passage, d=endpoint))
            startln = makeablankline(work, 0)
            stopln = makeablankline(work, 1)
            valid = False
        segmenttext = 'from {a} to {b}'.format(a=startln.shortlocus(),
                                               b=stopln.shortlocus())
    elif ao and work and psg:
        # subsection of a work of an author
        progresspolldict[pollid].statusis(
            'Preparing a partial index to {t}'.format(t=wo.title))
        startandstop = textsegmentfindstartandstop(ao, wo, psg, dbcursor)
        startline = startandstop['startline']
        endline = startandstop['endline']
        cdict = {wo.universalid: (startline, endline)}
    elif ao and work:
        # one work
        progresspolldict[pollid].statusis(
            'Preparing an index to {t}'.format(t=wo.title))
        startline = wo.starts
        endline = wo.ends
        cdict = {wo.universalid: (startline, endline)}
    elif ao:
        # whole author
        allworks = [
            '{w}  ⇒ {t}'.format(w=w.universalid[6:10], t=w.title)
            for w in ao.listofworks
        ]
        allworks.sort()
        progresspolldict[pollid].statusis(
            'Preparing an index to the works of {a}'.format(a=ao.shortname))
        for wkid in ao.listworkids():
            cdict[wkid] = (workdict[wkid].starts, workdict[wkid].ends)
    else:
        # we do not have a valid selection
        valid = False
        output = ['invalid input']

    if not stop:
        segmenttext = '.'.join(psg)

    if valid and justvocab:
        dbconnection.connectioncleanup()
        del progresspolldict[pollid]
        return cdict

    if valid:
        output = buildindextowork(cdict, progresspolldict[pollid],
                                  useheadwords, dbcursor)

    # get ready to send stuff to the page
    count = len(output)

    try:
        locale.setlocale(locale.LC_ALL, 'en_US')
        count = locale.format_string('%d', count, grouping=True)
    except locale.Error:
        count = str(count)

    progresspolldict[pollid].statusis('Preparing the index HTML')
    indexhtml = wordindextohtmltable(output, useheadwords)

    buildtime = time.time() - starttime
    buildtime = round(buildtime, 2)
    progresspolldict[pollid].deactivate()

    if not ao:
        ao = makeanemptyauthor('gr0000')

    results = dict()
    results['authorname'] = avoidsmallvariants(ao.shortname)
    results['title'] = avoidsmallvariants(wo.title)
    results['structure'] = avoidsmallvariants(wo.citation())
    results['worksegment'] = segmenttext
    results['elapsed'] = buildtime
    results['wordsfound'] = count
    results['indexhtml'] = indexhtml
    results['keytoworks'] = allworks
    results['newjs'] = supplementalindexjs()
    results = json.dumps(results)

    dbconnection.connectioncleanup()
    del progresspolldict[pollid]

    return results
Ejemplo n.º 23
0
    def _getgreekbaseform(self) -> str:
        """
		the tricky bit:

		some are quite easy: 'ἐπώνυμοϲ'
		others are compounds with a ', ' separation

		there is a HUGE PROBLEM in the original data here:
		   [a] 'ὑπό, ἐκ-ἀράω²': what comes before the comma is a prefix to the verb
		   [b] 'ἠχούϲαϲ, ἠχέω': what comes before the comma is an observed form of the verb
		when you .split() what do you have at wordandform[0]?

		you have to look at the full db entry for the word:
		the number of items in prefixrefs corresponds to the number of prefix checks you will need to make to recompose the verb

		:return:
		"""
        if not hipparchia.config['SUPPRESSWARNINGS']:
            warn = True
        else:
            warn = False

        # need an aspiration check; incl εκ -⟩ εξ

        baseform = str()
        segments = self.entry.split(', ')

        if len(segments) == 1 and '-' not in segments[-1]:
            # [a] the simplest case where what you see is what you should seek: 'ἐπώνυμοϲ'
            baseform = segments[-1]
        elif len(segments
                 ) == 2 and '-' not in segments[-1] and self.prefixcount == 0:
            # [b] a compound case, but it does not involve prefixes just morphology: 'ἠχούϲαϲ, ἠχέω'
            baseform = segments[-1]
        elif len(segments) == 1 and '-' in segments[-1]:
            # [c] the simplest version of a prefix: ἐκ-ϲύρω
            baseform = gkattemptelision(segments[-1])
        elif len(segments
                 ) == 2 and '-' in segments[-1] and self.prefixcount == 1:
            # [d] more info, but we do not need it: ἐκϲύρωμεν, ἐκ-ϲύρω
            baseform = gkattemptelision(segments[-1])
        elif len(
                segments) > 1 and '-' in segments[-1] and self.prefixcount > 1:
            # [e] all bets are off: ὑπό,κατά,ἐκ-λάω
            # print('segments',segments)
            for i in range(self.prefixcount - 2, -1, -1):
                baseform = gkattemptelision(segments[-1])
                try:
                    baseform = segments[i] + '-' + baseform
                except IndexError:
                    if warn:
                        consolewarning(
                            'abandoning efforts to parse {e}'.format(
                                e=self.entry))
                    baseform = segments[-1]
        else:
            if warn:
                consolewarning(
                    'MorphPossibilityObject.getbaseform() is confused: {e} - {s}'
                    .format(e=self.entry, s=segments))

        # not sure this ever happens with the greek data
        baseform = re.sub(r'^\s', str(), baseform)

        return baseform
Ejemplo n.º 24
0
def verbtabletemplate(mood: str,
                      voice: str,
                      dialect='attic',
                      duals=True,
                      lang='greek') -> str:
    """

	Smythe §383ff

	cells look like:

		<td class="morphcell">_attic_subj_pass_pl_2nd_pres_</td>

	:return:
	"""

    if not session['morphduals']:
        duals = False

    mytenses = dict()
    if lang == 'greek':
        try:
            mytenses = findmygreektenses(mood, voice)
        except AssertionError:
            # hipparchiaDB=# select * from greek_morphology where possible_dictionary_forms like '%ἀπαλλάϲϲω%' and possible_dictionary_forms like '%2nd%' and possible_dictionary_forms like '%imperat%' and possible_dictionary_forms like '%attic%';
            # you will see: <possibility_4>ἀπαλλάϲϲω<xref_value>11723310</xref_value><xref_kind>9</xref_kind><transl>set free</transl><analysis>aor imperat 2nd dual</analysis></possibility_4>
            # contrast: <possibility_6>ἀπαλλάϲϲω<xref_value>11723310</xref_value><xref_kind>9</xref_kind><transl>set free</transl><analysis>aor ind pass 2nd dual (homeric ionic)</analysis></possibility_6>
            consolewarning(
                'invalid parser data: cannot build a table where mood = "{m}" and voice = "{v}"'
                .format(m=mood, v=voice))
            mytenses = dict()

    if lang == 'latin':
        mytenses = findmylatintenses(mood, voice)

    tabletemplate = """
	<table class="verbanalysis">
	<tbody>
	{header}
	{rows}
	</tbody>
	</table>
	<hr class="styled">
	"""

    headerrowtemplate = """
	<tr align="center">
		<td rowspan="1" colspan="{s}" class="dialectlabel">{dialect}<br>
		</td>
	</tr>
	<tr align="center">
		<td rowspan="1" colspan="{s}" class="voicelabel">{voice}<br>
		</td>
	</tr>
	<tr align="center">
		<td rowspan="1" colspan="{s}" class="moodlabel">{mood}<br>
		</td>
	{tenseheader}
	</tr>"""

    tensestemplate = """<tr>
		<td class="tenselabel">&nbsp;</td>
		{alltenses}
	</tr>"""

    blank = """
	<tr><td>&nbsp;</td>{columns}</tr>
	"""

    blankrow = blank.format(columns=''.join(
        ['<td>&nbsp;</td>' for k in sorted(mytenses.keys()) if mytenses[k]]))

    tensecell = '<td class="tensecell">{t}<br></td>'
    tenserows = [
        tensecell.format(t=mytenses[k]) for k in sorted(mytenses.keys())
        if mytenses[k]
    ]
    tenserows = '\n\t\t'.join(tenserows)
    tenseheader = tensestemplate.format(alltenses=tenserows)
    fullheader = headerrowtemplate.format(dialect=dialect,
                                          voice=voice,
                                          mood=mood,
                                          tenseheader=tenseheader,
                                          s=len(mytenses))

    allrows = list()

    # cell arrangement: left to right and top to bottom is vmnpt
    # i.e., voice, mood, number, person, tense
    # we are doing the npt part here

    # the lists of numbers, persons, tenses, etc is set to match the parser abbreviations

    cases = list()
    if lang == 'latin':
        cases = ['nom', 'gen', 'dat', 'acc', 'abl', 'voc']

    if lang == 'greek':
        cases = ['nom', 'gen', 'dat', 'acc', 'voc']

    genders = ['masc', 'fem', 'neut']

    if duals:
        numbers = ['sg', 'dual', 'pl']
    else:
        numbers = ['sg', 'pl']

    persons = ['1st', '2nd', '3rd']
    tensedict = {
        1: 'pres',
        2: 'imperf',
        3: 'fut',
        4: 'aor',
        5: 'perf',
        6: 'plup',
        7: 'futperf'
    }
    tenses = [tensedict[k] for k in sorted(mytenses.keys()) if mytenses[k]]

    morphrowtemplate = """
	<tr class="morphrow">
		{allcells}
	</tr>
	"""

    morphlabelcell = '<td class="morphlabelcell">{ml}</td>'
    morphcell = '<td class="morphcell">{mo}</td>'
    regextemplate = '_{d}_{m}_{v}_{n}_{p}_{t}_'
    pcpltemplate = '_{d}_{m}_{v}_{n}_{t}_{g}_{c}_'

    # note that we cant do infinitives and participles yet

    if mood != 'part' and mood != 'inf':
        for n in numbers:
            for p in persons:
                if p == '1st' and n == 'dual':
                    pass
                else:
                    allcellsinrow = list()
                    ml = '{n} {p}'.format(n=n, p=p)
                    allcellsinrow.append(morphlabelcell.format(ml=ml))
                    for t in tenses:
                        mo = regextemplate.format(d=dialect,
                                                  m=mood,
                                                  v=voice,
                                                  n=n,
                                                  p=p,
                                                  t=t)
                        allcellsinrow.append(morphcell.format(mo=mo))
                    thisrow = '\n\t\t'.join(allcellsinrow)
                    allrows.append(morphrowtemplate.format(allcells=thisrow))
    elif mood == 'part':
        for n in numbers:
            for g in genders:
                for c in cases:
                    allcellsinrow = list()
                    ml = '{g} {n} {c}'.format(n=n, c=c, g=g)
                    allcellsinrow.append(morphlabelcell.format(ml=ml))
                    for t in tenses:
                        mo = pcpltemplate.format(d=dialect,
                                                 m=mood,
                                                 v=voice,
                                                 n=n,
                                                 c=c,
                                                 t=t,
                                                 g=g)
                        allcellsinrow.append(morphcell.format(mo=mo))
                    thisrow = '\n\t\t'.join(allcellsinrow)
                    allrows.append(morphrowtemplate.format(allcells=thisrow))
                if session['morphemptyrows']:
                    allrows.append(blankrow)
    elif mood == 'inf':
        allcellsinrow = list()
        allcellsinrow.append(morphlabelcell.format(ml='infinitive'))
        for t in tenses:
            mo = regextemplate.format(d=dialect,
                                      m=mood,
                                      v=voice,
                                      n=None,
                                      p=None,
                                      t=t)
            allcellsinrow.append(morphcell.format(mo=mo))
        thisrow = '\n\t\t'.join(allcellsinrow)
        allrows.append(morphrowtemplate.format(allcells=thisrow))

    rows = '\n'.join(allrows)
    thetablehtml = tabletemplate.format(header=fullheader, rows=rows)

    return thetablehtml
Ejemplo n.º 25
0
def finddblinefromlocus(workobject: dbOpus,
                        citationtuple: tuple,
                        dbcursor,
                        findlastline=False) -> int:
    """

	citationtuple ('9','109','8') to focus on line 9, section 109, book 8
	finddblinefromlocus(h, 1, ('130', '24')) ---> 15033

	findlastline lets you grab the end of a segment: needed for the "endpoint" selection code

	:param workid:
	:param citationtuple:
	:param dbcursor:
	:return:
	"""

    workid = workobject.universalid

    lmap = {
        0: 'level_00_value',
        1: 'level_01_value',
        2: 'level_02_value',
        3: 'level_03_value',
        4: 'level_04_value',
        5: 'level_05_value'
    }

    workdb = workid[0:6]

    if workid[0:2] in ['in', 'dp', 'ch']:
        wklvs = 2
    else:
        wklvs = workobject.availablelevels

    if wklvs != len(citationtuple):
        consolewarning(
            'mismatch between shape of work and browsing request: impossible citation of {w}.'
            .format(w=workid))
        print(str(wklvs), ' levels vs', list(citationtuple))
        print('safe to ignore if you requested the first line of a work')

    # step one: find the index number of the passage
    query = 'SELECT index FROM {w} WHERE ( wkuniversalid=%s ) AND '.format(
        w=workdb)
    lq = list()
    for level in range(0, len(citationtuple)):
        lq.append('{l}=%s'.format(l=lmap[level]))

    if not findlastline:
        query = query + ' AND '.join(lq) + ' ORDER BY index ASC'
    else:
        query = query + ' AND '.join(lq) + ' ORDER BY index DESC'

    # if the last selection box was empty you are sent '_0' instead of a real value
    # (because the first line of lvl05 is not necc. '1')
    # so we need to kill off 'level_00_value=%s AND ', etc
    # example: ('-1', '256', 'beta') [here the 1st line is actually '10t', btw]

    citation = list(citationtuple)

    if citation[0] == '_0':
        query = re.sub(r'level_00_value=%s AND ', '', query)
        citation = citation[1:]

    if not citation:
        indexvalue = workdict[workid].starts
        return indexvalue

    data = tuple([workid] + citation)

    try:
        dbcursor.execute(query, data)
        found = dbcursor.fetchone()
        indexvalue = found[0]
    except TypeError:
        # TypeError: 'NoneType' object is not subscriptable
        indexvalue = returnfirstorlastlinenumber(workdb, dbcursor)

    # print('finddblinefromlocus() - indexvalue:', indexvalue)

    return indexvalue
Ejemplo n.º 26
0
 def resetpool():
     # dangerous to do this while anything interesting is going on
     # currently checking to see if need cleaning at head of searchdispatcher
     consolewarning('emptying out PooledConnectionObject._pools()')
     PooledConnectionObject._pools = dict()
     PooledConnectionObject.poolneedscleaning = False
Ejemplo n.º 27
0
except ImportError:
    from multiprocessing import current_process

    if current_process().name == 'MainProcess':
        print('gensim not available')
    Word2Vec = None

try:
    from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer, TfidfVectorizer
    from sklearn.linear_model import SGDClassifier
    from sklearn.model_selection import GridSearchCV
    from sklearn.pipeline import Pipeline
    from sklearn.decomposition import NMF, LatentDirichletAllocation, TruncatedSVD
except ImportError:
    if current_process().name == 'MainProcess':
        consolewarning('sklearn is unavailable', color='black')
    CountVectorizer = None
    TfidfTransformer = None
    SGDClassifier = None
    GridSearchCV = None
    Pipeline = None

try:
    # will hurl out a bunch of DeprecationWarning messages at the moment...
    # lib/python3.6/re.py:191: DeprecationWarning: bad escape \s
    import pyLDAvis
    import pyLDAvis.sklearn as ldavis
except ImportError:
    if current_process().name == 'MainProcess':
        consolewarning('pyLDAvis is unavailable', color='black')
    pyLDAvis = None
Ejemplo n.º 28
0
async def wscheckpoll(websocket, path):
	"""

	a poll checker started by startwspolling(): the client sends the name of a poll and this will output
	the status of the poll continuously while the poll remains active

	example:
		progress {'active': 1, 'total': 20, 'remaining': 20, 'hits': 48, 'message': 'Putting the results in context', 'elapsed': 14.0, 'extrainfo': '<span class="small"></span>'}

	:param websocket:
	:param path:
	:return:
	"""

	try:
		pollid = await websocket.recv()
	except websockets.exceptions.ConnectionClosed:
		# you reloaded the page
		return

	# comes to us with quotes: "eb91fb11" --> eb91fb11
	pollid = re.sub(r'"', str(), pollid)
	pollid = validatepollid(pollid)

	while True:
		progress = dict()
		try:
			active = progresspolldict[pollid].getactivity()
			progress['ID'] = pollid
			progress['Poolofwork'] = progresspolldict[pollid].worktotal()
			progress['Remaining'] = progresspolldict[pollid].getremaining()
			progress['Hitcount'] = progresspolldict[pollid].gethits()
			progress['Statusmessage'] = progresspolldict[pollid].getstatus()
			progress['Launchtime'] = progresspolldict[pollid].getlaunchtime()
			if not hipparchia.config['SUPPRESSLONGREQUESTMESSAGE']:
				if progresspolldict[pollid].getnotes():
					progress['Notes'] = progresspolldict[pollid].getnotes()
			else:
				progress['Notes'] = str()
		except KeyError:
			# the poll key is deleted from progresspolldict when the query ends; you will always end up here
			progress['Active'] = 'inactive'
			try:
				await websocket.send(json.dumps(progress))
			except websockets.exceptions.ConnectionClosed:
				# you reloaded the page in the middle of a search and both the poll and the socket vanished
				pass
			break
		except TypeError:
			# TypeError: int() argument must be a string, a bytes-like object or a number, not 'NoneType
			# the poll is gone...
			break
		
		await asyncio.sleep(.4)
		# print(progress)
		# print('progress %', ((progress['Poolofwork'] - progress['Remaining']) / progress['Poolofwork']) * 100)

		try:
			# something changed amid backend updates and json.dumps() started choking on progresspolldict[pollid].getactivity()
			# active is (now) a <Synchronized wrapper for c_byte(1)>; that was the unexpected change: it was 'bool'
			# <class 'multiprocessing.sharedctypes.Synchronized'>
			progress['Active'] = active.value
		except AttributeError:
			# AttributeError: 'str' (or 'int' or 'bool') object has no attribute 'value'
			progress['Active'] = active

		try:
			await websocket.send(json.dumps(progress))
		except websockets.exceptions.ConnectionClosed:
			# websockets.exceptions.ConnectionClosed because you reloaded the page in the middle of a search
			pass
		except TypeError as e:
			# "Object of type Synchronized is not JSON serializable"
			# macOS and indexmaker combo is a problem; macOS is the real problem?
			consolewarning('websocket non-fatal error: "{e}"'.format(e=e), color='yellow', isbold=False)
			pass
	return
Ejemplo n.º 29
0
def searchlistintosqldict(searchobject: SearchObject, seeking: str, subqueryphrasesearch=False, vectors=False) -> dict:
    """

    take a searchobject
    grab its searchlist and its exceptionlist and convert them into a collection of sql queries

    the old strategy would generate the queries as needed and on the fly: this version is slower can costs more memory
    by definition; it generates all possible queries and it holds them in memory; nevertheless the speed cost should be
    negligible relative to the total cost of a search; the memory cost can only get interesting if you have lots of
    users; but here too the overload problem should come from too much postgres and not too much prep

    in any case these lists of queries can be handed off to a simple MP-aware helper binary that can dodge MP
    forking in python; this binary can be in rust or go or ...

    { table1: {query: q, data: d, temptable: t},
    table2: {query: q, data: d, temptable: t},
    ... }

    note that a temptable is seldom used
    but something like searching inside a date range in an inscriptional corpus will trigger the need for one

    example: δηλοῖ in Aristotle + 3 works of Plato
    {
    'gr0086': {
        'temptable': '',
        'query': 'SELECT wkuniversalid, index, level_05_value, level_04_value, level_03_value, level_02_value, level_01_value, level_00_value, marked_up_line, accented_line, stripped_line, hyphenated_words, annotations FROM gr0086 WHERE  ( accented_line ~* %s )  LIMIT 200',
        'data': ('δηλοῖ',)
    },
    'gr0059': {
        'temptable': '',
        'query': 'SELECT wkuniversalid, index, level_05_value, level_04_value, level_03_value, level_02_value, level_01_value, level_00_value, marked_up_line, accented_line, stripped_line, hyphenated_words, annotations FROM gr0059 WHERE ( (index BETWEEN 40842 AND 52799) OR (index BETWEEN 2172 AND 4884) OR (index BETWEEN 1 AND 677) ) AND ( accented_line ~* %s )  LIMIT 200',
        'data': ('δηλοῖ',)
    }
    }

    'ch0814': {'temptable': '\n\tCREATE TEMPORARY TABLE ch0814_includelist_UNIQUENAME AS \n\t\tSELECT values \n\t\t\tAS includeindex FROM unnest(ARRAY[11380,11381,11382,11383,11384,11385,11386,11387,11388]) values\n\t', 'query': 'SELECT wkuniversalid, index, level_05_value, level_04_value, level_03_value, level_02_value, level_01_value, level_00_value, marked_up_line, accented_line, stripped_line, hyphenated_words, annotations FROM ch0814 WHERE \n            EXISTS\n                (SELECT 1 FROM ch0814_includelist_UNIQUENAME incl WHERE incl.includeindex = ch0814.index\n            ', 'data': ('',)}

    a bit fiddly because more than one class of query is constructed here: vanilla, subquery, vector...

    """

    returndict = dict()

    so = searchobject
    searchlist = so.indexrestrictions.keys()

    # templimits are used by proximity searching but so.cap should have been temporarily swapped out
    lim = str(so.cap)

    if so.onehit:
        mylimit = ' ORDER BY index ASC LIMIT 1'
    else:
        mylimit = ' ORDER BY index ASC LIMIT {lim}'.format(lim=lim)

    mysyntax = '~*'

    # print(so.indexrestrictions)

    for authortable in searchlist:
        r = so.indexrestrictions[authortable]
        whereextensions = str()
        returndict[authortable] = dict()
        returndict[authortable]['temptable'] = str()

        if r['type'] == 'between':
            whereextensions = buildbetweenwhereextension(authortable, so)
            if not subqueryphrasesearch and not vectors:
                whr = 'WHERE {xtn} ( {c} {sy} %s )'.format(c=so.usecolumn, sy=mysyntax, xtn=whereextensions)
            else:
                # whereextensions will come back with an extraneous ' AND'
                whereextensions = whereextensions[:-4]
                whr = 'WHERE {xtn}'.format(xtn=whereextensions)
        elif r['type'] == 'unrestricted':
            if not subqueryphrasesearch and not vectors:
                whr = 'WHERE {xtn} ( {c} {sy} %s )'.format(c=so.usecolumn, sy=mysyntax, xtn=whereextensions)
            else:
                whr = str()
        elif r['type'] == 'temptable':
            # how to construct the table...
            # note that the temp table name can't be assigned yet because you can get collisions via lemmatization
            # since that will give you more than one query per author table: gr1001_0, gr1001_1, ...
            q = r['where']['tempquery']
            q = re.sub('_includelist', '_includelist_UNIQUENAME', q)
            returndict[authortable]['temptable'] = q

            # how to SELECT inside the table...
            wtempate = """
            EXISTS
                (SELECT 1 FROM {tbl}_includelist_UNIQUENAME incl WHERE incl.includeindex = {tbl}.index
            """
            whereextensions = wtempate.format(tbl=authortable)
            if not vectors:
                whr = 'WHERE {xtn} AND {au}.{col} {sy} %s)'.format(au=authortable, col=so.usecolumn, sy=mysyntax,
                                                               xtn=whereextensions)
            else:
                whr = 'WHERE {xtn} )'.format(xtn=whereextensions)
        else:
            # should never see this
            consolewarning('error in substringsearch(): unknown whereclause type', r['type'])
            whr = 'WHERE ( {c} {sy} %s )'.format(c=so.usecolumn, sy=mysyntax)

        if not subqueryphrasesearch and not vectors:
            qtemplate = 'SELECT {wtmpl} FROM {db} {whr} {lm}'
            q = qtemplate.format(wtmpl=worklinetemplate, db=authortable, whr=whr, lm=mylimit)
        elif vectors:
            q = 'SELECT {wtmpl} FROM {db} {whr}'.format(wtmpl=worklinetemplate, db=authortable, whr=whr)
        else:
            if r['type'] == 'temptable':
                ttstripper = True
            else:
                ttstripper = False
            q = rewritequerystringforsubqueryphrasesearching(authortable, whr, ttstripper, so)
        d = (seeking,)
        returndict[authortable]['query'] = q
        returndict[authortable]['data'] = d
        # consolewarning("{a}:\nq\t{q}\nd\t{d}\nt\t{t}".format(a=authortable, q=q, d=d, t=returndict[authortable]['temptable']), color="cyan")
    return returndict
Ejemplo n.º 30
0
    def __init__(self,
                 autocommit='defaultisno',
                 readonlyconnection=True,
                 ctype='ro'):
        super().__init__(autocommit, readonlyconnection)
        self.cytpe = ctype
        if not PooledConnectionObject._pools:
            # initialize the borg
            # note that poolsize is implicitly a claim about how many concurrent users you imagine having
            poolsize = setthreadcount() + 3

            # three known pool types; simple should be faster as you are avoiding locking
            pooltype = connectionpool.SimpleConnectionPool
            # pooltype = connectionpool.ThreadedConnectionPool
            # pooltype = connectionpool.PersistentConnectionPool

            # [A] 'ro' pool
            kwds = {
                'user': GenericConnectionObject.dbuser,
                'host': GenericConnectionObject.dbhost,
                'port': GenericConnectionObject.dbport,
                'database': GenericConnectionObject.dbname,
                'password': GenericConnectionObject.dbpass
            }

            try:
                readonlypool = pooltype(poolsize, poolsize * 2, **kwds)
            except psycopg2.OperationalError as operror:
                thefailure = operror.args[0]
                noconnection = 'could not connect to server'
                badpass = '******'
                if noconnection in thefailure:
                    e = GenericConnectionObject.noserverproblem.format(
                        h=GenericConnectionObject.dbhost,
                        p=GenericConnectionObject.dbport)
                    consolewarning(
                        GenericConnectionObject.postgresproblem.format(e=e))
                    if sys.platform == 'darwin':
                        consolewarning(GenericConnectionObject.darwinproblem)

                if badpass in thefailure:
                    e = GenericConnectionObject.badpassproblem.format(
                        h=GenericConnectionObject.dbhost,
                        p=GenericConnectionObject.dbport)
                    consolewarning(
                        GenericConnectionObject.postgresproblem.format(e=e))

                sys.exit(0)

            # [B] 'rw' pool: only used by the vector graphing functions
            # and these are always going to be single-threaded
            littlepool = max(int(setthreadcount() / 2), 2)
            kwds['user'] = GenericConnectionObject.dbwriteuser
            kwds['password'] = GenericConnectionObject.dbwritepass
            # this can be smaller because only vectors do rw and the vectorbot is not allowed in the pool
            # but you also need to be free to leave rw unset
            try:
                readandwritepool = pooltype(littlepool, littlepool, **kwds)
            except psycopg2.OperationalError:
                readandwritepool = None

            PooledConnectionObject._pools['ro'] = readonlypool
            PooledConnectionObject._pools['rw'] = readandwritepool

        assert self.cytpe in ['ro', 'rw'
                              ], 'connection type must be either "ro" or "rw"'
        self.pool = PooledConnectionObject._pools[self.cytpe]

        if self.cytpe == 'rw':
            self.readonlyconnection = False

        if threading.current_thread().name == 'vectorbot':
            # the vectobot lives in a thread and it will exhaust the pool
            self.simpleconnectionfallback()
        else:
            try:
                self.dbconnection = self.pool.getconn(key=self.uniquename)
            except psycopg2.pool.PoolError:
                # the pool is exhausted: try a basic connection instead
                # but in the long run should probably make a bigger pool/debug something
                # at the moment the only way to hit this error is via some sort of platform bug that yields a hung search
                # that is, something like a ryzen c-state aborted search damages the pool in the long run...
                consolewarning(
                    'PoolError: emergency fallback to SimpleConnectionObject()'
                )
                self.simpleconnectionfallback()
                PooledConnectionObject.poolneedscleaning = True

        if self.autocommit == 'autocommit':
            self.setautocommit()

        self.setreadonly(self.readonlyconnection)
        self.curs = getattr(self.dbconnection, 'cursor')()