Example #1
0
    def suggestions_and_scores(self, text, weighting=None):
        """Returns a list of possible alternative spellings of 'text', as
        ('word', score, weight) triples, where 'word' is the suggested
        word, 'score' is the score that was assigned to the word using
        :meth:`SpellChecker.add_field` or :meth:`SpellChecker.add_scored_words`,
        and 'weight' is the score the word received in the search for the
        original word's ngrams.
        
        You must add words to the dictionary (using add_field, add_words,
        and/or add_scored_words) before you can use this.
        
        This is a lower-level method, in case an expert user needs access to
        the raw scores, for example to implement a custom suggestion ranking
        algorithm. Most people will want to call :meth:`~SpellChecker.suggest`
        instead, which simply returns the top N valued words.
        
        :param text: The word to check.
        :rtype: list
        """

        if weighting is None:
            weighting = TF_IDF()

        grams = defaultdict(list)
        for size in range(self.mingram, self.maxgram + 1):
            key = "gram%s" % size
            nga = analysis.NgramAnalyzer(size)
            for t in nga(text):
                grams[key].append(t.text)

        queries = []
        for size in range(self.mingram, min(self.maxgram + 1, len(text))):
            key = "gram%s" % size
            gramlist = grams[key]
            queries.append(
                query.Term("start%s" % size,
                           gramlist[0],
                           boost=self.booststart))
            queries.append(
                query.Term("end%s" % size, gramlist[-1], boost=self.boostend))
            for gram in gramlist:
                queries.append(query.Term(key, gram))

        q = query.Or(queries)
        ix = self.index()
        s = ix.searcher(weighting=weighting)
        try:
            result = s.search(q)
            return [(fs["word"], fs["score"], result.score(i))
                    for i, fs in enumerate(result) if fs["word"] != text]
        finally:
            s.close()
Example #2
0
    def parse_query(self, fieldname, qstring, boost=1.0):
        from alfanous.Support.whoosh import query
        text = None
        if qstring in self.falses:
            text = self.strings[0]
        elif qstring in self.trues:
            text = self.strings[1]

        if text is None:
            return query.NullQuery
        return query.Term(fieldname, text, boost=boost)
Example #3
0
    def document_numbers(self, **kw):
        """Returns a generator of the document numbers for documents matching
        the given keyword arguments, where the keyword keys are field names and
        the values are terms that must appear in the field.
        
        >>> docnums = list(searcher.document_numbers(emailto=u"*****@*****.**"))
        """

        q = query.And([query.Term(k, v) for k, v in kw.iteritems()])
        q = q.normalize()
        if q:
            return q.docs(self)
Example #4
0
 def parse_query(self, fieldname, qstring, boost=1.0):
     from alfanous.Support.whoosh import query
     return query.Term(fieldname, self.to_text(qstring), boost=boost)