Python avoidsmallvariantsの例、server.formatting.wordformatting.avoidsmallvariants Pythonの例

コード例 #1

0

ファイルを表示

ファイル: browserobjects.py プロジェクト: e-gun/HipparchiaServer

    def __init__(self,
                 authorobject,
                 workobject,
                 dblinenumber,
                 resultmessage='success'):
        self.authorobject = authorobject
        self.workobject = workobject
        self.index = dblinenumber
        self.resultmessage = resultmessage

        # to be calculated on initialization
        if self.workobject.isliterary():
            self.name = authorobject.shortname
        else:
            self.name = authorobject.idxname
        self.name = avoidsmallvariants(self.name)
        self.title = avoidsmallvariants(workobject.title)
        self.uid = authorobject.universalid
        try:
            if int(workobject.converted_date) < 1500:
                self.date = int(workobject.converted_date)
            else:
                self.date = None
        except:
            self.date = None
        self.linetemplate = self.getlinetemplate()

        # to be populated later, mostly by generatepassageheader()
        self.browsedlines = list()
        self.focusline = None
        self.biblio = str()
        self.citation = str()
        self.header = str()
        self.authorandwork = str()

コード例 #2

0

ファイルを表示

	def __init__(self, hitnumber, author, work, citationstring, worknumber, clickurl, lineobjects):
		self.hitnumber = hitnumber
		self.author = avoidsmallvariants(author)
		self.work = avoidsmallvariants(work)
		self.citationstring = citationstring
		self.clickurl = clickurl
		self.lineobjects = lineobjects
		self.worknumber = worknumber

コード例 #3

0

ファイルを表示

ファイル: citationfunctions.py プロジェクト: e-gun/HipparchiaServer

def locusintocitation(workobject: dbOpus, lineobject: dbWorkLine) -> str:
    """

	generate a prolix citation like "Book 8, section 108, line 9"

	:param workobject:
	:param lineobject:
	:return:
	"""

    wklvls = list(workobject.structure.keys())
    cite = list(lineobject.locustuple())
    wklvls.reverse()
    citation = list()
    for level in wklvls:
        try:
            if workobject.isnotliterary() and workobject.structure[
                    level] == ' ' and cite[level] == 'recto':
                # ' ' ==> 'face' which is likely 'recto'
                # this check will make it so you don't see 'recto' over and over again when looking at inscriptions
                pass
            else:
                citation.append(workobject.structure[level] + ' ' +
                                cite[level])
        except KeyError:
            # did you send me a partial citation like "book 2"?
            pass

    citation = ', '.join(citation)

    citation = avoidsmallvariants(citation)

    return citation

コード例 #4

0

ファイルを表示

ファイル: worklineobject.py プロジェクト: e-gun/HipparchiaServer

    def locus(self) -> str:
        """

		turn the funky substitutes into standard characters:

		in:     B❨1❩, line 2
		out:    B(1), line 2

		NB: these might not be in the data...

		:return:
		"""

        return avoidsmallvariants(self.uncleanlocus())

コード例 #5

0

ファイルを表示

ファイル: worklineobject.py プロジェクト: e-gun/HipparchiaServer

    def locustuple(self):
        """

		turn the funky substitutes into standard characters:

		in:     B❨1❩, line 2
		out:    B(1), line 2

		NB: these might not be in the data...

		:return:
		"""

        ltuple = self.uncleanlocustuple()

        newtuple = (avoidsmallvariants(t) for t in ltuple)

        return newtuple

コード例 #6

0

ファイルを表示

ファイル: textandindexroutes.py プロジェクト: e-gun/HipparchiaServer

def textmaker(author: str,
              work=None,
              passage=None,
              endpoint=None,
              citationdelimiter='|') -> JSON_STR:
    """
	build a text suitable for display

		"GET /textof/lt0474/024/20/30"

	:return:
	"""

    probeforsessionvariables()

    dbconnection = ConnectionObject('autocommit')
    dbcursor = dbconnection.cursor()

    linesevery = hipparchia.config['SHOWLINENUMBERSEVERY']

    po = TextmakerInputParsingObject(author, work, passage, endpoint,
                                     citationdelimiter)

    ao = po.authorobject
    wo = po.workobject

    segmenttext = str()

    # consolewarning('po.passageaslist: {p}'.format(p=po.passageaslist))

    if ao and wo:
        # we have both an author and a work, maybe we also have a subset of the work
        if endpoint:
            firstlinenumber = finddblinefromincompletelocus(
                wo, po.passageaslist, dbcursor)
            lastlinenumber = finddblinefromincompletelocus(wo,
                                                           po.endpointlist,
                                                           dbcursor,
                                                           findlastline=True)
            if firstlinenumber['code'] == 'success' and lastlinenumber[
                    'code'] == 'success':
                startline = firstlinenumber['line']
                endline = lastlinenumber['line']
                startlnobj = dblineintolineobject(
                    grabonelinefromwork(ao.universalid, startline, dbcursor))
                stoplnobj = dblineintolineobject(
                    grabonelinefromwork(ao.universalid, endline, dbcursor))
            else:
                msg = '"buildtexttospan/" could not find first and last: {a}w{b} - {c} TO {d}'
                consolewarning(
                    msg.format(a=author, b=work, c=passage, d=endpoint))
                startlnobj = makeablankline(work, 0)
                stoplnobj = makeablankline(work, 1)
                startline = 0
                endline = 1
            segmenttext = 'from {a} to {b}'.format(a=startlnobj.shortlocus(),
                                                   b=stoplnobj.shortlocus())
        elif not po.passageaslist:
            # whole work
            startline = wo.starts
            endline = wo.ends
        else:
            startandstop = textsegmentfindstartandstop(ao, wo,
                                                       po.passageaslist,
                                                       dbcursor)
            startline = startandstop['startline']
            endline = startandstop['endline']
        texthtml = buildtext(wo.universalid, startline, endline, linesevery,
                             dbcursor)
    else:
        texthtml = str()

    if hipparchia.config['INSISTUPONSTANDARDANGLEBRACKETS']:
        texthtml = gtltsubstitutes(texthtml)

    if not segmenttext:
        segmenttext = '.'.join(po.passageaslist)

    if not ao or not wo:
        ao = makeanemptyauthor('gr0000')
        wo = makeanemptywork('gr0000w000')

    results = dict()
    results['authorname'] = avoidsmallvariants(ao.shortname)
    results['title'] = avoidsmallvariants(wo.title)
    results['structure'] = avoidsmallvariants(wo.citation())
    results['worksegment'] = segmenttext
    results['texthtml'] = texthtml

    results = json.dumps(results)

    dbconnection.connectioncleanup()

    return results

コード例 #7

0

ファイルを表示

ファイル: textandindexroutes.py プロジェクト: e-gun/HipparchiaServer

def generatevocabfor(searchid: str,
                     author: str,
                     work=None,
                     passage=None,
                     endpoint=None,
                     citationdelimiter='|') -> JSON_STR:
    """

	given a text span
		figure out what words are used by this span
		then provide a vocabulary list from that list

	ex:
		http://localhost:5000/vocabularyfor/SEARCHID/lt0631/001/1/20

	this is a lot like building an index so we just leverage buildindexto() but pull away from it after the initial
	bit where we establish endpoints and get ready to gather the lines

	:param searchid:
	:param author:
	:param work:
	:param passage:
	:param endpoint:
	:param citationdelimiter:
	:return:
	"""

    starttime = time.time()
    segmenttext = str()

    dbconnection = ConnectionObject('autocommit')
    dbcursor = dbconnection.cursor()

    justvocab = True

    cdict = buildindexto(searchid, author, work, passage, endpoint,
                         citationdelimiter, justvocab)
    lineobjects = grabbundlesoflines(cdict, dbcursor)

    allwords = [l.wordset() for l in lineobjects]
    allwords = set(flattenlistoflists(allwords))

    morphobjects = getrequiredmorphobjects(allwords)
    # 'dominatio': <server.hipparchiaobjects.dbtextobjects.dbMorphologyObject object at 0x14ab92d68>, ...

    baseformsmorphobjects = list()
    for m in morphobjects:
        try:
            baseformsmorphobjects.extend(morphobjects[m].getpossible())
        except AttributeError:
            # 'NoneType' object has no attribute 'getpossible'
            pass

    vocabset = {
        '{w} ~~~ {t}'.format(w=b.getbaseform(), t=b.gettranslation())
        for b in baseformsmorphobjects if b.gettranslation()
    }
    vocabset = {
        v.split(' ~~~ ')[0]: v.split(' ~~~ ')[1].strip()
        for v in vocabset
    }
    vocabset = {v: vocabset[v] for v in vocabset if vocabset[v]}

    # the following can be in entries and will cause problems...:
    #   <tr opt="n">which had become milder</tr>

    vocabset = {
        v: re.sub(r'<(|/)tr.*?>', str(), vocabset[v])
        for v in vocabset
    }

    # now you have { word1: definition1, word2: definition2, ...}

    vocabcounter = [
        b.getbaseform() for b in baseformsmorphobjects if b.gettranslation()
    ]
    vocabcount = dict()
    for v in vocabcounter:
        try:
            vocabcount[v] += 1
        except KeyError:
            vocabcount[v] = 1

    po = IndexmakerInputParsingObject(author, work, passage, endpoint,
                                      citationdelimiter)

    ao = po.authorobject
    wo = po.workobject
    psg = po.passageaslist
    stop = po.endpointlist

    tableheadtemplate = """
	<tr>
		<th class="vocabtable">word</th>
		<th class="vocabtable">count</th>
		<th class="vocabtable">definitions</th>
	</tr>
	"""

    tablerowtemplate = """
	<tr>
		<td class="word"><vocabobserved id="{w}">{w}</vocabobserved></td>
		<td class="count">{c}</td>
		<td class="trans">{t}</td>
	</tr>
	"""

    tablehtml = """
	<table>
		{head}
		{rows}
	</table>
	"""

    byfrequency = False
    if not byfrequency:
        rowhtml = [
            tablerowtemplate.format(w=k, t=vocabset[k], c=vocabcount[k])
            for k in polytonicsort(vocabset.keys())
        ]
    else:
        vc = [(vocabcount[v], v) for v in vocabcount]
        vc.sort(reverse=True)
        vk = [v[1] for v in vc]
        vk = [v for v in vk if v in vocabset]
        rowhtml = [
            tablerowtemplate.format(w=k, t=vocabset[k], c=vocabcount[k])
            for k in vk
        ]

    wordsfound = len(rowhtml)
    rowhtml = '\n'.join(rowhtml)

    vocabhtml = tablehtml.format(head=tableheadtemplate, rows=rowhtml)

    if not ao:
        ao = makeanemptyauthor('gr0000')

    buildtime = time.time() - starttime
    buildtime = round(buildtime, 2)

    if not stop:
        segmenttext = '.'.join(psg)

    results = dict()
    results['authorname'] = avoidsmallvariants(ao.shortname)
    results['title'] = avoidsmallvariants(wo.title)
    results['structure'] = avoidsmallvariants(wo.citation())
    results['worksegment'] = segmenttext
    results['elapsed'] = buildtime
    results['wordsfound'] = wordsfound
    results['texthtml'] = vocabhtml
    results['keytoworks'] = str()
    results['newjs'] = supplementalvocablistjs()
    results = json.dumps(results)

    # print('vocabhtml', vocabhtml)

    return results

コード例 #8

0

ファイルを表示

ファイル: textandindexroutes.py プロジェクト: e-gun/HipparchiaServer

def buildindexto(searchid: str,
                 author: str,
                 work=None,
                 passage=None,
                 endpoint=None,
                 citationdelimiter='|',
                 justvocab=False) -> JSON_STR:
    """
	build a complete index to a an author, work, or segment of a work

	:return:
	"""

    probeforsessionvariables()

    pollid = validatepollid(searchid)

    starttime = time.time()

    progresspolldict[pollid] = ProgressPoll(pollid)
    progresspolldict[pollid].activate()

    dbconnection = ConnectionObject('autocommit')
    dbcursor = dbconnection.cursor()

    po = IndexmakerInputParsingObject(author, work, passage, endpoint,
                                      citationdelimiter)

    ao = po.authorobject
    wo = po.workobject
    psg = po.passageaslist
    stop = po.endpointlist

    if not work:
        wo = makeanemptywork('gr0000w000')

    # bool
    useheadwords = session['headwordindexing']

    allworks = list()
    output = list()
    cdict = dict()
    segmenttext = str()
    valid = True

    if ao and work and psg and stop:
        start = psg
        firstlinenumber = finddblinefromincompletelocus(wo, start, dbcursor)
        lastlinenumber = finddblinefromincompletelocus(wo,
                                                       stop,
                                                       dbcursor,
                                                       findlastline=True)
        if firstlinenumber['code'] == 'success' and lastlinenumber[
                'code'] == 'success':
            cdict = {
                wo.universalid:
                (firstlinenumber['line'], lastlinenumber['line'])
            }
            startln = dblineintolineobject(
                grabonelinefromwork(ao.universalid, firstlinenumber['line'],
                                    dbcursor))
            stopln = dblineintolineobject(
                grabonelinefromwork(ao.universalid, lastlinenumber['line'],
                                    dbcursor))
        else:
            msg = '"indexspan/" could not find first and last: {a}w{b} - {c} TO {d}'
            consolewarning(msg.format(a=author, b=work, c=passage, d=endpoint))
            startln = makeablankline(work, 0)
            stopln = makeablankline(work, 1)
            valid = False
        segmenttext = 'from {a} to {b}'.format(a=startln.shortlocus(),
                                               b=stopln.shortlocus())
    elif ao and work and psg:
        # subsection of a work of an author
        progresspolldict[pollid].statusis(
            'Preparing a partial index to {t}'.format(t=wo.title))
        startandstop = textsegmentfindstartandstop(ao, wo, psg, dbcursor)
        startline = startandstop['startline']
        endline = startandstop['endline']
        cdict = {wo.universalid: (startline, endline)}
    elif ao and work:
        # one work
        progresspolldict[pollid].statusis(
            'Preparing an index to {t}'.format(t=wo.title))
        startline = wo.starts
        endline = wo.ends
        cdict = {wo.universalid: (startline, endline)}
    elif ao:
        # whole author
        allworks = [
            '{w}  ⇒ {t}'.format(w=w.universalid[6:10], t=w.title)
            for w in ao.listofworks
        ]
        allworks.sort()
        progresspolldict[pollid].statusis(
            'Preparing an index to the works of {a}'.format(a=ao.shortname))
        for wkid in ao.listworkids():
            cdict[wkid] = (workdict[wkid].starts, workdict[wkid].ends)
    else:
        # we do not have a valid selection
        valid = False
        output = ['invalid input']

    if not stop:
        segmenttext = '.'.join(psg)

    if valid and justvocab:
        dbconnection.connectioncleanup()
        del progresspolldict[pollid]
        return cdict

    if valid:
        output = buildindextowork(cdict, progresspolldict[pollid],
                                  useheadwords, dbcursor)

    # get ready to send stuff to the page
    count = len(output)

    try:
        locale.setlocale(locale.LC_ALL, 'en_US')
        count = locale.format_string('%d', count, grouping=True)
    except locale.Error:
        count = str(count)

    progresspolldict[pollid].statusis('Preparing the index HTML')
    indexhtml = wordindextohtmltable(output, useheadwords)

    buildtime = time.time() - starttime
    buildtime = round(buildtime, 2)
    progresspolldict[pollid].deactivate()

    if not ao:
        ao = makeanemptyauthor('gr0000')

    results = dict()
    results['authorname'] = avoidsmallvariants(ao.shortname)
    results['title'] = avoidsmallvariants(wo.title)
    results['structure'] = avoidsmallvariants(wo.citation())
    results['worksegment'] = segmenttext
    results['elapsed'] = buildtime
    results['wordsfound'] = count
    results['indexhtml'] = indexhtml
    results['keytoworks'] = allworks
    results['newjs'] = supplementalindexjs()
    results = json.dumps(results)

    dbconnection.connectioncleanup()
    del progresspolldict[pollid]

    return results