Example #1
0
    def searchWithRequestAndQuery(cls, query, indexReader, taxoReader,
                                  indexingParams, facetRequest):
        """
        Search an index with facets for given query and facet requests.
        returns a List<FacetResult>
        """
        # prepare searcher to search against
        searcher = IndexSearcher(indexReader)
        # collect matching documents into a collector
        topDocsCollector = TopScoreDocCollector.create(10, True)
        if not indexingParams:
            indexingParams = DefaultFacetIndexingParams()

        # Faceted search parameters indicate which facets are we interested in
        facetSearchParams = FacetSearchParams(indexingParams)
        # Add the facet request of interest to the search params
        facetSearchParams.addFacetRequest(facetRequest)
        facetsCollector = FacetsCollector(facetSearchParams, indexReader,
                                          taxoReader)
        # perform documents search and facets accumulation
        searcher.search(
            query, MultiCollector.wrap([topDocsCollector, facetsCollector]))
        # Obtain facets results and print them
        res = facetsCollector.getFacetResults()
        i = 0
        for facetResult in res:
            print "Result #%d has %d descendants" % (
                i, facetResult.getNumValidDescendants())
            print "Result #%d : %s" % (i, facetResult)
            i += 1

        return res
Example #2
0
 def getCrowds(self, query, field = CrowdFields.text): 
     searcher = IndexSearcher(self.index, True)
     q = QueryParser(Version.LUCENE_CURRENT, field, self.analyzer).parse(query)
     collector = TopScoreDocCollector.create(hitsPerPage, True)
     searcher.search(q, collector)
     hits = collector.topDocs().scoreDocs
     
     return [
         searcher.doc(scoreDoc.doc).get(CrowdFields.id)
         for scoreDoc in hits]
Example #3
0
    def getCrowds(self, query, field=CrowdFields.text):
        searcher = IndexSearcher(self.index, True)
        q = QueryParser(Version.LUCENE_CURRENT, field,
                        self.analyzer).parse(query)
        collector = TopScoreDocCollector.create(hitsPerPage, True)
        searcher.search(q, collector)
        hits = collector.topDocs().scoreDocs

        return [
            searcher.doc(scoreDoc.doc).get(CrowdFields.id) for scoreDoc in hits
        ]
Example #4
0
    def query(indexName, queryFile, runName):
        indReader = IndexReader.open(SimpleFSDirectory(File(indexName)))
        indSearcher = IndexSearcher(indReader)
        ir = indSearcher.getIndexReader()

        qp = QueryParser(Version.LUCENE_CURRENT, "content", StandardAnalyzer(Version.LUCENE_CURRENT))

        f = open('results-'+runName, 'w')

        while(True):
            id = queryFile.readline()

            if id == "":
                break

            id = id.replace("C","")
            id = id.replace("\n","")

            queryString = queryFile.readline()
            queryString = queryString.replace("?","")
            queryString = queryString.replace("*","")
            queryString = queryString.replace("-","_")
            queryString = queryString.replace("\n","")

            query = qp.parse(queryString)

            queryFile.readline()

            returnedDocs = 1000
            collector = TopScoreDocCollector.create(returnedDocs, True)

            indSearcher.search(query, collector)

            hits = collector.topDocs().scoreDocs

            size = len(hits)
            print "Total hits for query " +id+ ": "+str(size)

            i = 0
            for hit in hits:        
                docId = hits[i].doc
                score = hits[i].score
                doc = ir.document(docId)
                j = i + 1
                f.write(id + " 0 " + doc.get('id') + " " + str(j) + " " + str(score) +" " + runName +"\n")
                i+=1

        f.close()