Ejemplo n.º 1
0
    def _update_doc_in_index(self, index_writer, doc):
        last_mod = datetime.datetime.fromtimestamp(doc.last_mod)
        docid = unicode(doc.docid)
        txt = u""
        for page in doc.pages:
            for line in page.text:
                txt += unicode(line) + u"\n"
        for label in doc.labels:
            txt += u" " + unicode(label.name)
        txt = txt.strip()
        txt = strip_accents(txt)
        if txt == u"":
            # make sure the text field is not empty. Whoosh doesn't like that
            txt = u"empty"
        labels = u",".join([strip_accents(unicode(label.name))
                            for label in doc.labels])

        index_writer.update_document(
            docid=docid,
            doctype=doc.doctype,
            content=txt,
            label=labels,
            last_read=last_mod
        )
        return True
Ejemplo n.º 2
0
    def _update_doc_in_index(index_writer, doc):
        """
        Add/Update a document in the index
        """
        last_mod = datetime.datetime.fromtimestamp(doc.last_mod)
        docid = unicode(doc.docid)
        txt = u""
        for page in doc.pages:
            txt += u"\n".join([unicode(line) for line in page.text])
        extra_txt = doc.extra_text
        if extra_txt != u"":
            txt += extra_txt + u"\n"
        for label in doc.labels:
            txt += u" " + unicode(label.name)
        txt = txt.strip()
        txt = strip_accents(txt)
        if txt == u"":
            # make sure the text field is not empty. Whoosh doesn't like that
            txt = u"empty"
        labels = u",".join(
            [strip_accents(unicode(label.name)) for label in doc.labels])

        index_writer.update_document(docid=docid,
                                     doctype=doc.doctype,
                                     content=txt,
                                     label=labels,
                                     date=doc.date,
                                     last_read=last_mod)
        return True
Ejemplo n.º 3
0
    def find_suggestions(self, sentence):
        """
        Search all possible suggestions. Suggestions returned always have at
        least one document matching.

        Arguments:
            sentence --- keywords (single strings) for which we want
                suggestions
        Return:
            An array of sets of keywords. Each set of keywords (-> one string)
            is a suggestion.
        """
        keywords = sentence.split(" ")
        final_suggestions = []

        corrector = self.__searcher.corrector("content")
        for keyword_idx in range(0, len(keywords)):
            keyword = strip_accents(keywords[keyword_idx])
            if (len(keyword) <= MIN_KEYWORD_LEN):
                continue
            keyword_suggestions = corrector.suggest(keyword, limit=5)[:]
            for keyword_suggestion in keyword_suggestions:
                new_suggestion = keywords[:]
                new_suggestion[keyword_idx] = keyword_suggestion
                new_suggestion = u" ".join(new_suggestion)
                if len(self.find_documents(new_suggestion)) <= 0:
                    continue
                final_suggestions.append(new_suggestion)
        final_suggestions.sort()
        return final_suggestions
Ejemplo n.º 4
0
    def find_suggestions(self, sentence):
        """
        Search all possible suggestions. Suggestions returned always have at
        least one document matching.

        Arguments:
            sentence --- keywords (single strings) for which we want
                suggestions
        Return:
            An array of sets of keywords. Each set of keywords (-> one string)
            is a suggestion.
        """
        keywords = sentence.split(" ")
        final_suggestions = []

        corrector = self.__searcher.corrector("content")
        for keyword_idx in range(0, len(keywords)):
            keyword = strip_accents(keywords[keyword_idx])
            if (len(keyword) <= MIN_KEYWORD_LEN):
                continue
            keyword_suggestions = corrector.suggest(keyword, limit=5)[:]
            for keyword_suggestion in keyword_suggestions:
                new_suggestion = keywords[:]
                new_suggestion[keyword_idx] = keyword_suggestion
                new_suggestion = u" ".join(new_suggestion)
                if len(self.find_documents(new_suggestion)) <= 0:
                    continue
                final_suggestions.append(new_suggestion)
        final_suggestions.sort()
        return final_suggestions
Ejemplo n.º 5
0
    def find_documents(self, sentence):
        """
        Returns all the documents matching the given keywords

        Arguments:
            keywords --- keywords (single string)

        Returns:
            An array of document id (strings)
        """
        sentence = sentence.strip()

        if sentence == u"":
            return self.docs

        sentence = strip_accents(sentence)

        query = self.__qparser.parse(sentence)
        return self.__find_documents(query)
Ejemplo n.º 6
0
    def find_documents(self, sentence):
        """
        Returns all the documents matching the given keywords

        Arguments:
            keywords --- keywords (single string)

        Returns:
            An array of document id (strings)
        """
        sentence = sentence.strip()

        if sentence == u"":
            return self.docs

        sentence = strip_accents(sentence)

        query = self.__qparser.parse(sentence)
        return self.__find_documents(query)