def testOutOfOrderDocsScoringSort(self):
        """
        Two Sort criteria to instantiate the multi/single comparators.
        """

        sorts = [Sort(SortField.FIELD_DOC), Sort()]

        tfcOptions = [[False, False, False],
                      [False, False, True],
                      [False, True, False],
                      [False, True, True],
                      [True, False, False],
                      [True, False, True],
                      [True, True, False],
                      [True, True, True]]

        actualTFCClasses = [
            "OutOfOrderOneComparatorNonScoringCollector", 
            "OutOfOrderOneComparatorScoringMaxScoreCollector", 
            "OutOfOrderOneComparatorScoringNoMaxScoreCollector", 
            "OutOfOrderOneComparatorScoringMaxScoreCollector", 
            "OutOfOrderOneComparatorNonScoringCollector", 
            "OutOfOrderOneComparatorScoringMaxScoreCollector", 
            "OutOfOrderOneComparatorScoringNoMaxScoreCollector", 
            "OutOfOrderOneComparatorScoringMaxScoreCollector" 
        ]
    
        bq = BooleanQuery()

        # Add a Query with SHOULD, since bw.scorer() returns BooleanScorer2
        # which delegates to BS if there are no mandatory clauses.
        bq.add(MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)

        # Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to
        # return the clause instead of BQ.
        bq.setMinimumNumberShouldMatch(1)

        for sort in sorts:
            for tfcOption, actualTFCClass in izip(tfcOptions,
                                                  actualTFCClasses):
                tdc = TopFieldCollector.create(sort, 10, tfcOption[0],
                                               tfcOption[1], tfcOption[2],
                                               False)

                self.assert_(tdc.getClass().getName().endswith("$" + actualTFCClass))
          
                self.full.search(bq, tdc)
          
                tds = tdc.topDocs()
                sds = tds.scoreDocs  
                self.assertEqual(10, len(sds))
Exemple #2
0
def searcher_text(text):
    vm_env = lucene.getVMEnv()
    vm_env.attachCurrentThread()
    tags_count = {}
    sentences = tokenize.sent_tokenize(text)
    t_phrases = []
    for sentence in sentences:
        sentence = sentence.replace('\n', ' ')
        #print sentence
        t_phrases = sentence.split()
        t_phrases = [p.lower() for p in t_phrases]
        query = BooleanQuery()
        query.setMinimumNumberShouldMatch(1)
        i = 0
        phrases = getPhrases(sentence)
        t_phrases = t_phrases + phrases
        for k in t_phrases:
            for t in k.split():
                if (i < 1000):
                    query.add(TermQuery(Term("Body", t)),
                              BooleanClause.Occur.SHOULD)
                i = i + 1
        MAX = 5
        hits = searcher.search(query, MAX)
        for hit in hits.scoreDocs:
            if (hit.score > 0.0):
                doc = searcher.doc(hit.doc)
                tgs = doc.get("Tags")
                score = hit.score
                addTags(tgs, score, tags_count)
    for tg in tags_count:
        tags_count[tg] = tags_count[tg] / float(1 +
                                                math.log(stack_tags_count[tg]))
    sorted_words = sorted(tags_count.items(), key=lambda x: x[1], reverse=True)
    k = 25
    ret = []
    for word, score in sorted_words[:k]:
        ret.append(word)
    return ret