예제 #1
0
def retrieve(indexdir, queries):
    lucene.initVM()
    f = open("results_lucene.txt", "w")
    analyzer = StandardAnalyzer(Version.LUCENE_4_10_1)
    reader = IndexReader.open(SimpleFSDirectory(File(indexdir)))
    searcher = IndexSearcher(reader)

    fields = ["title", "abstract", "authors"]

    st = PorterStemmer()
    for id, q in queries.iteritems():
        query = q
        tokenizer = RegexpTokenizer(r'\w+')
        qwords = tokenizer.tokenize(query)
        qwords_k = [st.stem(q) for q in qwords]
        query = " ".join(qwords_k)
        parser = MultiFieldQueryParser(Version.LUCENE_CURRENT, fields, analyzer)
        parser.setDefaultOperator(QueryParserBase.OR_OPERATOR)
        query = MultiFieldQueryParser.parse(parser, query)
        MAX = 1000
        hits = searcher.search(query, MAX)
        # print "Found %d document(s) that matched query '%s':" % (hits.totalHits, query)
        for i, hit in enumerate(hits.scoreDocs):
            f.write("%s Q0 %s %s %s G17R3\n" % (id, hit.doc+1, i+1, hit.score))
            # print hit.doc+1, hit.score
            # doc = searcher.doc(hit.doc)
            # print doc.get("authors").encode("utf-8")
    f.close()
예제 #2
0
def search(searcher, analyzer, directory, query2):
    print
    print "Empty to quit."
    # command = raw_input("Query: ") #raw_input for query
    command = query2
    if command == '':
        loopVar = False
        return

    print
    print "Searching for ", command
    parserVar = MultiFieldQueryParser(fields, analyzer)
    parserVar.setDefaultOperator(QueryParserBase.AND_OPERATOR)
    query = MultiFieldQueryParser.parse(parserVar, command)

    scoreDocs = searcher.search(
        query, 10).scoreDocs  #number is max number of matching documents
    print "total matching documents in: " + str((len(scoreDocs)))
    counter = 0
    for scoreDoc in scoreDocs:  #dont really know what this is either
        doc = searcher.doc(scoreDoc.doc)
        print "@" + doc.get("u_name") + ": " + doc.get(
            "tweet") + " Score:" + str(scoreDocs[counter].score)
        docData = {}
        docData['u_name'] = doc.get("u_name")
        docData['tweet'] = doc.get("tweet")
        docData['score'] = str(scoreDocs[counter].score)
        results.append(docData)
        counter = counter + 1
    print
    print "\n------------------------------------------------------"
    return results
예제 #3
0
 def brand_scent_search(brand, scent):
     query = brand + ' ' + ''.join(scents)
     fields = ["name", "scents"]
     clauses = [
         BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD,
         BooleanClause.Occur.SHOULD
     ]
     parser = MultiFieldQueryParser(Version.LUCENE_CURRENT, fields,
                                    analyzer)
     parser.setDefaultOperator(QueryParserBase.AND_OPERATOR)
     query = MultiFieldQueryParser.parse(parser, query)
     return query
예제 #4
0
 def scents_search(former, mid, last):
     query = ''.join(former) + ' ' + ''.join(mid) + ' ' + ''.join(last)
     fields = ["former_scents", "mid_scents", "last_scents"]
     clauses = [
         BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD,
         BooleanClause.Occur.SHOULD
     ]
     parser = MultiFieldQueryParser(Version.LUCENE_CURRENT, fields,
                                    analyzer)
     parser.setDefaultOperator(QueryParserBase.AND_OPERATOR)
     query = MultiFieldQueryParser.parse(parser, query)
     return query
예제 #5
0
    def multiFieldsSearch(self, query, sim):
        lucene.getVMEnv().attachCurrentThread()

        parser = MultiFieldQueryParser(
            ["content_section", "title_section", 'title_article'],
            self.analyzer)
        parser.setDefaultOperator(QueryParserBase.OR_OPERATOR)
        query = MultiFieldQueryParser.parse(parser, QueryParser.escape(query))

        self.searcher.setSimilarity(sim)
        hits = self.searcher.search(query, 6).scoreDocs
        return hits
예제 #6
0
    def multiFieldsSearch(self, query, sim):
        """
        Method that searches through documents using content_section and title_article Fields
        searchDir : the path to the folder that contains the index.
        """
        # Now search the index:
        lucene.getVMEnv().attachCurrentThread()

        parser = MultiFieldQueryParser(["content_section", "title_article"],
                                       self.analyzer)
        parser.setDefaultOperator(QueryParserBase.OR_OPERATOR)
        query = MultiFieldQueryParser.parse(parser, QueryParser.escape(query))

        self.searcher.setSimilarity(sim)
        hits = self.searcher.search(query, 6).scoreDocs
        return hits
예제 #7
0
def func_cross(former, mid, last):
    vm_env = lucene.getVMEnv()
    vm_env.attachCurrentThread()
    STORE_DIR = "index_tb_new"
    directory = SimpleFSDirectory(File(STORE_DIR))
    searcher = IndexSearcher(DirectoryReader.open(directory))
    analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
    query = former + ' ' + ' ' + mid + ' ' + last
    fields = ["former", "mid", "last"]
    clauses = [
        BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD,
        BooleanClause.Occur.SHOULD
    ]
    parser = MultiFieldQueryParser(Version.LUCENE_CURRENT, fields, analyzer)
    parser.setDefaultOperator(QueryParserBase.AND_OPERATOR)
    query = MultiFieldQueryParser.parse(parser, query)

    scoreDocs = searcher.search(query, 200).scoreDocs
    results = process(scoreDocs, searcher)
    return results
예제 #8
0
    def multiFieldsPairSearch(self, pair, sim):
        """
        Method that searches through documents using only content_section Field
        searchDir : the path to the folder that contains the index.
        """
        # Now search the index:
        title = pair[0].replace('_', ' ')
        content = pair[1]
        parser = MultiFieldQueryParser(["content_section", "title_article"],
                                       self.analyzer)
        parser.setDefaultOperator(QueryParserBase.OR_OPERATOR)
        query1 = MultiFieldQueryParser.parse(parser, QueryParser.escape(title))
        query2 = MultiFieldQueryParser.parse(parser,
                                             QueryParser.escape(content))

        bq = BooleanQuery.Builder()
        bq.add(query1, BooleanClause.Occur.FILTER)
        bq.add(query2, BooleanClause.Occur.SHOULD)

        self.searcher.setSimilarity(sim)
        hits = self.searcher.search(bq.build(), 6).scoreDocs
        return hits