예제 #1
0
def lsiformatoutput(findshtml: str, workssearched: int, matches: list,
                    searchobject: SearchObject) -> str:
    """

	should use OutputObject() instead

	:param findshtml:
	:param workssearched:
	:param searchobject:
	:param activepoll:
	:param starttime:
	:return:
	"""

    so = searchobject
    activepoll = so.poll
    output = SearchOutputObject(so)

    output.found = findshtml
    output.js = insertbrowserclickjs('browser')
    output.setscope(workssearched)
    output.title = 'Sentences that are reminiscent of »{skg}«'.format(
        skg=so.seeking)
    output.thesearch = output.title
    output.htmlsearch = 'sentences that are reminiscent of <span class="sought">»{skg}«</span>'.format(
        skg=so.seeking)
    output.resultcount = '{n} sentences above the cutoff'.format(
        n=len(matches))
    output.searchtime = so.getelapsedtime()

    activepoll.deactivate()

    jsonoutput = json.dumps(output.generateoutput())

    return jsonoutput
예제 #2
0
def ldatopicsgenerateoutput(ldavishtmlandjs: str, searchobject: SearchObject):
    """

	pyLDAvis.prepared_data_to_html() outputs something that is almost pure JS and looks like this:

		<link rel="stylesheet" type="text/css" href="https://cdn.rawgit.com/bmabey/pyLDAvis/files/ldavis.v1.0.0.css">


		<div id="ldavis_el7428760626948328485476648"></div>
		<script type="text/javascript">

		var ldavis_el7428760626948328485476648_data = {"mdsDat": ...

		}
		</script>


	instance = {
		'maxfeatures': 2000,
		'components': 15,  # topics
		'maxfreq': .75,  # fewer than n% of sentences should have this word (i.e., purge common words)
		'minfreq': 5,  # word must be found >n times
		'iterations': 12,
		'mustbelongerthan': 3
	}

	:param ldavishtmlandjs:
	:param workssearched:
	:param settings:
	:param searchobject:
	:return:
	"""

    so = searchobject
    activepoll = so.poll
    output = SearchOutputObject(so)

    workssearched = len(so.searchlist)

    vv = searchobject.vectorvalues
    settings = {
        'maxfeatures': vv.ldamaxfeatures,
        'components': vv.ldacomponents,  # topics
        'maxfreq': vv.
        ldamaxfreq,  # fewer than n% of sentences should have this word (i.e., purge common words)
        'minfreq': vv.ldaminfreq,  # word must be found >n times
        'iterations': vv.ldaiterations,
        'mustbelongerthan': vv.ldamustbelongerthan
    }

    lines = ldavishtmlandjs.split('\n')
    lines = [re.sub(r'\t', str(), l) for l in lines if l]

    lines.reverse()

    thisline = str()
    html = list()

    while not re.search(r'<script type="text/javascript">', thisline):
        html.append(thisline)
        try:
            thisline = lines.pop()
        except IndexError:
            # oops, we never found the script...
            thisline = '<script type="text/javascript">'

    # we cut '<script>'; now drop '</script>'
    lines.reverse()
    js = lines[:-1]

    findshtml = '\n'.join(html)
    findsjs = '\n'.join(js)

    ldacssurl = r'https://cdn.rawgit.com/bmabey/pyLDAvis/files/ldavis.v1.0.0.css'
    ldacsslocal = '/css/ldavis.css'
    findshtml = re.sub(ldacssurl, ldacsslocal, findshtml)

    # brittle: ldavis might change its URLs between versions, etc.
    # should probably make this conditional upon the presence of the file locally...
    ldajsurl = r'https://cdn.rawgit.com/bmabey/pyLDAvis/files/ldavis.v1.0.0.js'
    ldajslocal = '/static/jsforldavis.js'
    findsjs = re.sub(ldajsurl, ldajslocal, findsjs)

    # this next will break the reloaded figure: hm...
    # d3jsurl = r'https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min'
    # d3jslocal = '/static/jsd3'
    # findsjs = re.sub(d3jsurl, d3jslocal, findsjs)
    #
    # print('findsjs',findsjs)

    who = str()
    where = '{n} authors'.format(n=searchobject.numberofauthorssearched())

    if searchobject.numberofauthorssearched() == 1:
        a = authordict[searchobject.searchlist[0][:6]]
        who = a.akaname
        where = who

    if workssearched == 1:
        try:
            w = workdict[searchobject.searchlist[0]]
            w = w.title
        except KeyError:
            w = str()
        where = '{a}, <worktitle>{w}</worktitle>'.format(a=who, w=w)

    output.title = 'Latent Dirichlet Allocation'
    output.found = findshtml
    output.js = findsjs

    output.setscope(workssearched)
    output.sortby = 'weight'
    output.thesearch = 'thesearch'.format(skg='')
    output.resultcount = 'the following topics'
    output.htmlsearch = '{n} topics in {w}'.format(n=settings['components'],
                                                   w=where)
    output.searchtime = so.getelapsedtime()
    activepoll.deactivate()

    jsonoutput = json.dumps(output.generateoutput())

    return jsonoutput