def search(self, query, lang, nostem = False): if nostem: originResults = self.engine.nostemSearch(query, lang) else: originResults = self.engine.search(query, lang) terms = originResults['terms'] wordsTerms = originResults['wordsTerms'] queryStems = {normalizeWord(x, lang):x for x in wordsTerms} ### Modify context modResult = self.engine.nostemSearch(' OR '.join(terms), lang) modDoc, modTerms = self._getDocsAndTerms(modResult) modContext = getContextFromSR(modDoc, modTerms, self.index.contains_term, self.maxKeywords) modAttrsID = modContext.attrs2ids(terms) originDocumentsID = {x['url'] for x in originResults['documents']} modDocumentsID = {x['url'] for x in modDoc} documentsID = originDocumentsID & modDocumentsID documentsID = modContext.objects2ids(documentsID) pq = str(originResults['parsedQuery']) notAndQuery = ' OR ' in pq or 'NOT(' in pq if notAndQuery: modSearchConcept = self.getSearchConceptByObjects(modContext, documentsID) else: modSearchConcept = self._getSearchConceptByAttr(modContext, modAttrsID) lowerN, upperN = self.getLowerUpper(modContext, modSearchConcept) self.totalConcepts = set(lowerN | upperN) jsonContext = modContext.getJSON() jsonContext['attributes'] = [self.index.stem2word(x) for x in jsonContext['attributes']] res = {'origin':originResults, 'specialization':self.getSpecialization(lowerN, modContext, terms, modSearchConcept), 'generalization':self.getGeneralization(upperN, modContext, terms, modSearchConcept, queryStems, notAndQuery), 'siblings':self.getSiblings(upperN, lowerN, modContext, modSearchConcept, queryStems), 'meta' : {'objects' : modContext.height, 'attributes' : modContext.width, 'context':jsonContext}, 'suggestions' : self.getSuggestions(wordsTerms, len(originResults['documents']))} namedLower = self.getNamedIntents(lowerN, modContext) namedUpper = self.getNamedIntents(upperN, modContext) namedSearchConcept = self.getNamedIntents([modSearchConcept], modContext)[0] namedSiblings = self.getNamedIntents(self.siblings, modContext) namedTrash = self.getNamedIntents(self.totalConcepts - (upperN | lowerN | self.siblings | {modSearchConcept}), modContext) res['lattice'] = { 'lower' : namedLower, 'upper' : namedUpper, 'siblings' : namedSiblings, 'conceptintent' : namedSearchConcept, 'trash' : namedTrash, 'conceptextent' : list(modSearchConcept.extent)} res['meta'].update({'lower' : len(lowerN), 'upper' : len(upperN), 'neighbor' : len(self.totalConcepts)}) return res
def search(self, query, lang, nostem=False): if nostem: originResults = self.engine.nostemSearch(query, lang) else: originResults = self.engine.search(query, lang) terms = originResults['terms'] wordsTerms = originResults['wordsTerms'] queryStems = {normalizeWord(x, lang): x for x in wordsTerms} ### Modify context modResult = self.engine.nostemSearch(' OR '.join(terms), lang) modDoc, modTerms = self._getDocsAndTerms(modResult) modContext = getContextFromSR(modDoc, modTerms, self.index.contains_term, self.maxKeywords) modAttrsID = modContext.attrs2ids(terms) originDocumentsID = {x['url'] for x in originResults['documents']} modDocumentsID = {x['url'] for x in modDoc} documentsID = originDocumentsID & modDocumentsID documentsID = modContext.objects2ids(documentsID) pq = str(originResults['parsedQuery']) notAndQuery = ' OR ' in pq or 'NOT(' in pq if notAndQuery: modSearchConcept = self.getSearchConceptByObjects( modContext, documentsID) else: modSearchConcept = self._getSearchConceptByAttr( modContext, modAttrsID) lowerN, upperN = self.getLowerUpper(modContext, modSearchConcept) self.totalConcepts = set(lowerN | upperN) jsonContext = modContext.getJSON() jsonContext['attributes'] = [ self.index.stem2word(x) for x in jsonContext['attributes'] ] res = { 'origin': originResults, 'specialization': self.getSpecialization(lowerN, modContext, terms, modSearchConcept), 'generalization': self.getGeneralization(upperN, modContext, terms, modSearchConcept, queryStems, notAndQuery), 'siblings': self.getSiblings(upperN, lowerN, modContext, modSearchConcept, queryStems), 'meta': { 'objects': modContext.height, 'attributes': modContext.width, 'context': jsonContext }, 'suggestions': self.getSuggestions(wordsTerms, len(originResults['documents'])) } namedLower = self.getNamedIntents(lowerN, modContext) namedUpper = self.getNamedIntents(upperN, modContext) namedSearchConcept = self.getNamedIntents([modSearchConcept], modContext)[0] namedSiblings = self.getNamedIntents(self.siblings, modContext) namedTrash = self.getNamedIntents( self.totalConcepts - (upperN | lowerN | self.siblings | {modSearchConcept}), modContext) res['lattice'] = { 'lower': namedLower, 'upper': namedUpper, 'siblings': namedSiblings, 'conceptintent': namedSearchConcept, 'trash': namedTrash, 'conceptextent': list(modSearchConcept.extent) } res['meta'].update({ 'lower': len(lowerN), 'upper': len(upperN), 'neighbor': len(self.totalConcepts) }) return res
def findWordIndex(self, content, word, lang): for i, w in enumerate(content): if normalizeWord(w, lang) == word: return i