def _update_doc_in_index(self, index_writer, doc): last_mod = datetime.datetime.fromtimestamp(doc.last_mod) docid = unicode(doc.docid) txt = u"" for page in doc.pages: for line in page.text: txt += unicode(line) + u"\n" for label in doc.labels: txt += u" " + unicode(label.name) txt = txt.strip() txt = strip_accents(txt) if txt == u"": # make sure the text field is not empty. Whoosh doesn't like that txt = u"empty" labels = u",".join([strip_accents(unicode(label.name)) for label in doc.labels]) index_writer.update_document( docid=docid, doctype=doc.doctype, content=txt, label=labels, last_read=last_mod ) return True
def _update_doc_in_index(index_writer, doc): """ Add/Update a document in the index """ last_mod = datetime.datetime.fromtimestamp(doc.last_mod) docid = unicode(doc.docid) txt = u"" for page in doc.pages: txt += u"\n".join([unicode(line) for line in page.text]) extra_txt = doc.extra_text if extra_txt != u"": txt += extra_txt + u"\n" for label in doc.labels: txt += u" " + unicode(label.name) txt = txt.strip() txt = strip_accents(txt) if txt == u"": # make sure the text field is not empty. Whoosh doesn't like that txt = u"empty" labels = u",".join( [strip_accents(unicode(label.name)) for label in doc.labels]) index_writer.update_document(docid=docid, doctype=doc.doctype, content=txt, label=labels, date=doc.date, last_read=last_mod) return True
def find_suggestions(self, sentence): """ Search all possible suggestions. Suggestions returned always have at least one document matching. Arguments: sentence --- keywords (single strings) for which we want suggestions Return: An array of sets of keywords. Each set of keywords (-> one string) is a suggestion. """ keywords = sentence.split(" ") final_suggestions = [] corrector = self.__searcher.corrector("content") for keyword_idx in range(0, len(keywords)): keyword = strip_accents(keywords[keyword_idx]) if (len(keyword) <= MIN_KEYWORD_LEN): continue keyword_suggestions = corrector.suggest(keyword, limit=5)[:] for keyword_suggestion in keyword_suggestions: new_suggestion = keywords[:] new_suggestion[keyword_idx] = keyword_suggestion new_suggestion = u" ".join(new_suggestion) if len(self.find_documents(new_suggestion)) <= 0: continue final_suggestions.append(new_suggestion) final_suggestions.sort() return final_suggestions
def find_documents(self, sentence): """ Returns all the documents matching the given keywords Arguments: keywords --- keywords (single string) Returns: An array of document id (strings) """ sentence = sentence.strip() if sentence == u"": return self.docs sentence = strip_accents(sentence) query = self.__qparser.parse(sentence) return self.__find_documents(query)