Python SearchObject.cap Exemples

Langage de programmation: Python

Espace de nommage/Pack: server.hipparchiaobjects.searchobjects

Class/Type: SearchObject

Méthode/Fonction: cap

Exemples au hotexamples.com: 3

Python SearchObject.cap - 3 exemples trouvés. Ce sont les exemples réels les mieux notés de server.hipparchiaobjects.searchobjects.SearchObject.cap extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

SearchObject(7)

searchsqldict(7)

searchlist(3)

lemmaone(3)

termone(3)

cap(3)

vectorquerytype(2)

usecolumn(2)

termtwo(2)

seeking(2)

searchtermcleanup(2)

phrase(2)

lemmatwo(2)

indexrestrictions(2)

proximate(1)

lemmathree(1)

getactivecorpora(1)

usedcorpora(1)

iamarobot(1)

termthree(1)

swaplemmaoneandtwo(1)

swapseekingandproxmate(1)

poll(1)

sortorder(1)

setsearchtype(1)

setsearchlistthumbprint(1)

numberofauthorssearched(1)

wholecorporasearched(1)

Méthodes fréquemment utilisées

SearchObject (7)

searchsqldict (7)

searchlist (3)

lemmaone (3)

termone (3)

cap (3)

vectorquerytype (2)

usecolumn (2)

termtwo (2)

seeking (2)

Méthodes fréquemment utilisées

searchtermcleanup (2)

phrase (2)

lemmatwo (2)

indexrestrictions (2)

proximate (1)

lemmathree (1)

getactivecorpora (1)

usedcorpora (1)

iamarobot (1)

termthree (1)

swaplemmaoneandtwo (1)

swapseekingandproxmate (1)

poll (1)

sortorder (1)

setsearchtype (1)

setsearchlistthumbprint (1)

numberofauthorssearched (1)

wholecorporasearched (1)

Méthodes fréquemment utilisées

swaplemmaoneandtwo (1)

swapseekingandproxmate (1)

poll (1)

sortorder (1)

setsearchtype (1)

setsearchlistthumbprint (1)

numberofauthorssearched (1)

wholecorporasearched (1)

Exemple #1

0

Afficher le fichier

def generatepreliminaryhitlist( so: SearchObject, recap=hipparchia.config['INTERMEDIATESEARCHCAP']) -> List[dbWorkLine]: """ grab the hits for part one of a two part search INTERMEDIATESEARCHCAP is interesting... you can test via "Sought »α« within 1 lines of »ι«" 400k or so seems to be the practical worst case: if you search for "α" in all of the databases you will get 392275 lines back as your intermediate result. You just grabbed a huge % of the total possible collection of lines. you can pull this in about 5s, so there is really no reason to worry about the cap if using the grabber """ actualcap = so.cap so.cap = recap so.poll.statusis('Searching for "{x}"'.format(x=so.termone)) if so.searchtype == 'phraseandproximity': so.poll.statusis('Searching for "{x}"'.format(x=so.phrase)) if so.lemmaone: so.poll.statusis('Searching for all forms of "{x}"'.format( x=so.lemmaone.dictionaryentry)) hitlines = basicprecomposedsqlsearcher(so) so.cap = actualcap return hitlines

Exemple #2

0

Afficher le fichier

Fichier : vectorpipeline.py Projet : e-gun/HipparchiaServer

def pythonvectors(so: SearchObject) -> JSON_STR: """ this is the matching function to golangvectors() [0] test to see what will happen: [a] scope problems? [jump away if so...] [b] already a model on file? ... [jump down to #5 if so] [1] generate a searchlist [2] do a searchlistintosqldict() [3] acquire and bag the words [a] grab db lines that are relevant to the search [b] turn them into a unified text block [c] do some preliminary cleanups [d] break the text into sentences and assemble []SentenceWithLocus (NB: these are "unlemmatized bags of words") [e] figure out all of the words used in the passage [f] find all of the parsing info relative to these words [g] figure out which headwords to associate with the collection of words [h] build the lemmatized bags of words ('unlemmatized' can skip [f] and [g]...) [4] hand the bags over to Word2Vec(), etc. [*] [5] run queries against the model and return the JSON results """ # debugmessage('pythonvectors()') assert so.vectorquerytype in [ 'analogies', 'nearestneighborsquery', 'topicmodel' ] # [0] is this really going to happen? so.poll.statusis('Checking for valid search') # [i] do we bail out before even getting started? # note that this can / will return independently and break here abortjson = checkneedtoabort(so) if abortjson: del so.poll return abortjson # [ii] do we actually have a model stored already? so.poll.statusis('Checking for stored search') # calculatewholeauthorsearches() + configurewhereclausedata() so = updatesearchlistandsearchobject(so) so.setsearchlistthumbprint() so.poll.allworkis(-1) # this turns off the % completed notice in the JS so.poll.sethits(0) themodel = checkforstoredvector(so) if not themodel: # [1] generate a searchlist: use executesearch() as the template so.usecolumn = 'marked_up_line' so.cap = 199999999 # [2] do a searchlistintosqldict() [this is killing lda...] so.searchsqldict = searchlistintosqldict(so, str(), vectors=True) bagsofwords = acquireandbagthewords(so) # [4] hand the bags over to Word2Vec(), etc. so.poll.statusis('Building the model') if so.vectorquerytype == 'nearestneighborsquery': themodel = buildgensimmodel(so, bagsofwords) elif so.vectorquerytype == 'analogies': # the same gensim model can serve both analogies and neighbors themodel = buildgensimmodel(so, bagsofwords) elif so.vectorquerytype == 'topicmodel': stops = list(mostcommonwordsviaheadwords()) bagsofsentences = [' '.join(b) for b in bagsofwords] bagsofsentences = [ removestopwords(s, stops) for s in bagsofsentences ] themodel = buildsklearnselectedworks(so, bagsofsentences) else: pass elif so.iamarobot: # there is a model and the bot is attempting to build something that has already been build return '' # so we have a model one way or the other by now... # [5] run queries against the model if so.iamarobot: return '' if so.vectorquerytype == 'nearestneighborsquery': jsonoutput = generatenearestneighbordata(None, len(so.searchlist), so, themodel) elif so.vectorquerytype == 'analogies': jsonoutput = gensimgenerateanalogies(themodel, so) elif so.vectorquerytype == 'topicmodel': # def ldatopicsgenerateoutput(ldavishtmlandjs: str, workssearched: int, settings: dict, searchobject: SearchObject): jsonoutput = ldatopicsgenerateoutput(themodel, so) else: jsonoutput = json.dumps( 'golang cannot execute {s} queries'.format(s=so.vectorquerytype)) return jsonoutput

Exemple #3

0

Afficher le fichier

def precomposedphraseandproximitysearch(so: SearchObject) -> List[dbWorkLine]: """ do a precomposedsqlsubqueryphrasesearch() and then search inside the results for part two... corner case tester: two line-enders: non solum + temporum dignitatem [12] Caesar, De Bello Gallico: book 7, chapter 54, section 4, line 2 7.54.3.3 multatos agris, omnibus ereptis sociis, imposito stipendio, 7.54.4.1 obsidibus summa cum contumelia extortis, et quam in 7.54.4.2 fortunam quamque in amplitudinem deduxisset, ut non 7.54.4.3 solum in pristinum statum redissent, sed omnium tem- 7.54.4.4 porum dignitatem et gratiam antecessisse viderentur. corner case tester: two distant line-enders: temporum dignitatem + obsides Galliae ut non solum in pristinum statum redissent, sed omnium tem- 7.54.4.3 porum dignitatem et gratiam antecessisse viderentur. his datis mandatis eos ab se dimisit. Noviodunum erat oppidum Haeduorum ad ripas 7.55.1.1 Ligeris opportuno loco positum. huc Caesar omnes ob- 7.55.2.1 sides Galliae, frumentum, pecuniam publicam, suorum the old code will trick you by pretending it is doing a valid search even though it is not really set up to handle this situation and was not supposed to promise that it could do phrase+ [it's the phrase-spanning-two-lines bit that yields the problem since you do "lemma+" but have no handler for the multi-line issue] 0.0.0-1.8.1 Sought all 19 known forms of »χώρα« within 1 lines of »μεγάλην δύναμιν« Searched 3,182 works and found 1 passage (0.77s) Searched between 850 B.C.E. and 300 B.C.E. Sorted by name [1] Ctesias, Fragmenta: Volume-Jacoby#-F 3c,688,F, fragment 5, line 47 3c,688,F.5.45 τόπουϲ. (3) γενόμενον δ’ ἀποϲτάτην καὶ πείϲαντα τὸ ϲύμπαν ἔθνοϲ ἀντέχεϲθαι 3c,688,F.5.46 τῆϲ ἐλευθερίαϲ, αἱρεθῆναι ϲτρατηγὸν διὰ τὴν ἀνδρείαν. ἔπειτα πυνθανόμενον 3c,688,F.5.47 ἀθροιζομένην ἐπ’ αὐτὸν μεγάλην δύναμιν, καθοπλίϲαι τοὺϲ Καδουϲίουϲ παν- 3c,688,F.5.48 δημεί, καὶ καταϲτρατοπεδεῦϲαι πρὸϲ ταῖϲ εἰϲ τὴν χώραν εἰϲβολαῖϲ, ἔχοντα 3c,688,F.5.49 τοὺϲ ϲύμπανταϲ οὐκ ἐλάττουϲ εἴκοϲι μυριάδων. (4) τοῦ δὲ βαϲιλέωϲ Ἀρταίου 1.8.2+ Sought all 19 known forms of »χώρα« within 1 lines of »μεγάλην δύναμιν« Searched 2,346 works and found 2 passages (2.2s) Searched between 850 B.C.E. and 300 B.C.E. Sorted by name [1] Ctesias, Fragmenta: Volume-Jacoby#-F 3c,688,F, fragment 5, line 47 3c,688,F.5.45 τόπουϲ. (3) γενόμενον δ’ ἀποϲτάτην καὶ πείϲαντα τὸ ϲύμπαν ἔθνοϲ ἀντέχεϲθαι 3c,688,F.5.46 τῆϲ ἐλευθερίαϲ, αἱρεθῆναι ϲτρατηγὸν διὰ τὴν ἀνδρείαν. ἔπειτα πυνθανόμενον 3c,688,F.5.47 ἀθροιζομένην ἐπ’ αὐτὸν μεγάλην δύναμιν, καθοπλίϲαι τοὺϲ Καδουϲίουϲ παν- 3c,688,F.5.48 δημεί, καὶ καταϲτρατοπεδεῦϲαι πρὸϲ ταῖϲ εἰϲ τὴν χώραν εἰϲβολαῖϲ, ἔχοντα 3c,688,F.5.49 τοὺϲ ϲύμπανταϲ οὐκ ἐλάττουϲ εἴκοϲι μυριάδων. (4) τοῦ δὲ βαϲιλέωϲ Ἀρταίου [2] Ctesias, Fragmenta: Volume-Jacoby#-F 3c,688,F, fragment 14, line 54 3c,688,F.14.52 (40) καὶ ἐλυπήθη λύπην ϲφοδρὰν Μεγάβυζοϲ, καὶ ἐπένθηϲε, καὶ ἠιτήϲατο 3c,688,F.14.53 ἐπὶ Ϲυρίαν τὴν ἑαυτοῦ χώραν ἀπιέναι. ἐνταῦθα λάθραι καὶ τοὺϲ ἄλλουϲ τῶν 3c,688,F.14.54 Ἑλλήνων προέπεμπε. καὶ ἀπήιει, καὶ ἀπέϲτη βαϲιλέωϲ, καὶ ἀθροίζει μεγάλην 3c,688,F.14.55 δύναμιν ἄχρι πεντεκαίδεκα μυριάδων χωρὶϲ τῶν ἱππέων [καὶ τῶν πεζῶν]. 3c,688,F.14.56 καὶ πέμπεται Οὔϲιριϲ κατ’ αὐτοῦ ϲὺν ⟨κ⟩ μυριάϲι, καὶ ϲυνάπτεται πόλεμοϲ, καὶ """ # # initially do "within x lines" # phrasefinder = re.compile(r'[^\s]\s[^\s]') if re.search(phrasefinder, so.seeking) and re.search( phrasefinder, so.proximate): secondsearch = precomposedsqlsubqueryphrasesearch elif not re.search(phrasefinder, so.seeking) and re.search( phrasefinder, so.proximate): so.swapseekingandproxmate() so.swaplemmaoneandtwo() secondsearch = basicprecomposedsqlsearcher else: secondsearch = basicprecomposedsqlsearcher c = so.cap ps = so.proximate so.proximate = str() pl = so.lemmatwo so.lemmatwo = str() so.phrase = so.seeking firstterm = so.phrase so.cap = hipparchia.config['INTERMEDIATESEARCHCAP'] initialhitlines = precomposedsqlsubqueryphrasesearch(so) so.seeking = ps so.lemmaone = pl so.setsearchtype() so.cap = c if secondsearch == precomposedsqlsubqueryphrasesearch: so.phrase = ps else: so.phrase = str() so = perparesoforsecondsqldict(so, initialhitlines) so.searchsqldict = searchlistintosqldict(so, so.seeking) if so.lemmaone: so.searchsqldict = rewritesqlsearchdictforlemmata(so) so.poll.sethits(0) newhitlines = secondsearch(so) initialhitlinedict = {hl.uniqueid: hl for hl in initialhitlines} newhitlineids = set() for nhl in newhitlines: indices = list( range(nhl.index - so.distance, nhl.index + so.distance + 1)) ids = ['{a}_{b}'.format(a=nhl.wkuinversalid, b=i) for i in indices] newhitlineids.update(ids) maybefinalhitines = list() if so.near: # "is near" maybefinalhitines = [ initialhitlinedict[hl] for hl in initialhitlinedict if hl in newhitlineids ] elif not so.near: # "is not near" maybefinalhitines = [ initialhitlinedict[hl] for hl in initialhitlinedict if hl not in newhitlineids ] # # if neccessary, do "within x words" as x lines hits will always be a subset of the first set # if so.lemmaone: secondterm = wordlistintoregex(so.lemmaone.formlist) else: secondterm = so.seeking if so.scope == 'words': finalhitlines = paredowntowithinxwords(so, firstterm, secondterm, maybefinalhitines) else: finalhitlines = maybefinalhitines # to humor rewriteskgandprx() # but that formatting doesn't 100% work yet... so.termone = firstterm so.termtwo = secondterm so.lemmatwo = so.lemmaone return finalhitlines