def topicsearch(query): global topic_searcher_, LAST_QUERY, LAST_RESPONSE if DEBUG: print "Raw Query: ", query analyzer = PerFieldAnalyzerWrapper(StandardAnalyzer(Version.LUCENE_CURRENT)) analyzer.addAnalyzer("loc", KeywordAnalyzer(Version.LUCENE_CURRENT)) #analyzer = StandardAnalyzer(Version.LUCENE_CURRENT) parsed_query = process_query(query) if parsed_query is None: return "" #create query for lucene real_query = text_query = parsed_query["text_query"] parsed_query["real_query"] = real_query if DEBUG: print "Real Query: ", real_query if LAST_QUERY == real_query: return LAST_RESPONSE #parse query using lucene parser and get docs p_query = QueryParser(Version.LUCENE_CURRENT, "text", analyzer).parse(real_query) print str(p_query) scoreDocs = topic_searcher_.search(p_query, 500).scoreDocs print "%s total matching documents." % len(scoreDocs) #rank results experts = [] rankedDocs = rankDocs_topic(parsed_query, topic_searcher_, scoreDocs) for i in rankedDocs: if parsed_query["profile"] == "yes": experts.append({"u": i["user"], "d": i["details"], "p": i["profile"]}) else: experts.append({"u": i["user"], "d": i["details"]}) response = {"sid": str(uuid.uuid1()), "es": experts} if "with_request" in parsed_query and parsed_query["with_request"] == "yes": response = {"q": parsed_query, "e": experts} if "user_study" in parsed_query and parsed_query["user_study"] == "yes": #write session to db session = {"q": text_query, "l": parsed_query["location"], "_id": response["sid"], "ur": {}} conn = Connection("wheezy.cs.tamu.edu", 27017) db = conn["ole_evaluation"] db["user_response"].insert(session) #print response LAST_QUERY = real_query LAST_RESPONSE = cjson.encode(response) #return cjson.encode(response) return LAST_RESPONSE
def testPerFieldAnalyzer(self): analyzer = PerFieldAnalyzerWrapper(SimpleAnalyzer()) analyzer.addAnalyzer("partnum", KeywordAnalyzer()) query = QueryParser(Version.LUCENE_CURRENT, "description", analyzer).parse("partnum:Q36 AND SPACE") scoreDocs = self.searcher.search(query, 50).scoreDocs #self.assertEqual("+partnum:Q36 +space", query.toString("description")) self.assertEqual(1, len(scoreDocs), "doc found!")
def build_perfield_analyzer(index_fields): """ This function creates a PerFieldAnalyzerWrapper which allows us to associate different Lucene Analyzers to specific fields in our Lucene index. """ analyzer = PerFieldAnalyzerWrapper(StandardAnalyzer(Version.LUCENE_CURRENT)) for (index_name, analyzer_type) in index_fields.iteritems(): if analyzer_type == "standard": continue analyzer.addAnalyzer(index_name, ANALYZER_LOOKUP.get(analyzer_type)) return analyzer
def build_perfield_analyzer(index_fields): """ This function creates a PerFieldAnalyzerWrapper which allows us to associate different Lucene Analyzers to specific fields in our Lucene index. """ analyzer = PerFieldAnalyzerWrapper(StandardAnalyzer( Version.LUCENE_CURRENT)) for (index_name, analyzer_type) in index_fields.iteritems(): if analyzer_type == "standard": continue analyzer.addAnalyzer(index_name, ANALYZER_LOOKUP.get(analyzer_type)) return analyzer
def testAnalyzer(self): analyzer = StandardAnalyzer(Version.LUCENE_CURRENT) queryString = "category:/philosophy/eastern" query = QueryParser(Version.LUCENE_CURRENT, "contents", analyzer).parse(queryString) self.assertEqual("category:\"philosophy eastern\"", query.toString("contents"), "path got split, yikes!") perFieldAnalyzer = PerFieldAnalyzerWrapper(analyzer) perFieldAnalyzer.addAnalyzer("category", WhitespaceAnalyzer()) query = QueryParser(Version.LUCENE_CURRENT, "contents", perFieldAnalyzer).parse(queryString) self.assertEqual("category:/philosophy/eastern", query.toString("contents"), "leave category field alone")
def testAnalyzer(self): analyzer = StandardAnalyzer(Version.LUCENE_CURRENT) queryString = "category:/philosophy/eastern" parser = QueryParser(Version.LUCENE_CURRENT, "contents", analyzer) parser.setAutoGeneratePhraseQueries(True) query = parser.parse(queryString) self.assertEqual("category:\"philosophy eastern\"", query.toString("contents"), "path got split, yikes!") perFieldAnalyzer = PerFieldAnalyzerWrapper(analyzer) perFieldAnalyzer.addAnalyzer("category", WhitespaceAnalyzer()) query = QueryParser(Version.LUCENE_CURRENT, "contents", perFieldAnalyzer).parse(queryString) self.assertEqual("category:/philosophy/eastern", query.toString("contents"), "leave category field alone")
# running as foreground process #indexer.setDaemon(True) #indexer.start() #print 'Starting Indexer in background...' # If return from Searcher, then call the signal handler to clean up the indexer cleanly quit_gracefully() """ # what directory we want to index directoryToWalk = dir_user_location_map env=lucene.initVM() # For now I just use the StandardAnalyzer, but you can change this #analyzer = StandardAnalyzer(Version.LUCENE_CURRENT) analyzer = PerFieldAnalyzerWrapper(StandardAnalyzer(Version.LUCENE_CURRENT)) analyzer.addAnalyzer("loc", UnderscoreSeparatorAnalyzer(Version.LUCENE_CURRENT)) """ #location indexer LOCATION_INDEX_STORE_DIR = location_index_store_dir print location_index_store_dir location_index_dir = createIndexDir(LOCATION_INDEX_STORE_DIR) # we will need a writer writer = IndexWriter(location_index_dir, analyzer, True, IndexWriter.MaxFieldLength.LIMITED) writer.setMaxFieldLength(1048576) # and start the indexer location_indexer = LocationIndexer(LOCATION_INDEX_STORE_DIR, writer, directoryToWalk) location_indexer.run() writer.close()
def search(query=None): global location_searcher_, LAST_QUERY, LAST_RESPONSE if DEBUG: print "Raw Query: ", query analyzer = PerFieldAnalyzerWrapper(StandardAnalyzer(Version.LUCENE_CURRENT)) analyzer.addAnalyzer("loc", KeywordAnalyzer(Version.LUCENE_CURRENT)) #analyzer = StandardAnalyzer(Version.LUCENE_CURRENT) parsed_query = process_query(query, "l") if parsed_query is None: return "" #create query for lucene real_query = "" locations = {} if "epsilon" in parsed_query: locations = get_nearby_locations(parsed_query["location"], parsed_query["epsilon"]) else: locations = get_nearby_locations(parsed_query["location"], 0) parsed_query["locations"] = locations if len(locations.keys()) == 1: real_query = real_query + "\"" + parsed_query["location"] + "\"" else: """ locations_query = [] for l in locations: locations_query.append(l + "^" + str(locations[l]["dwt"])) real_query = real_query + " OR ".join(locations_query) """ real_query = real_query + " OR ".join(["\"" + l + "\"" for l in locations]) parsed_query["real_query"] = real_query if DEBUG: print "Lucene Query: ", real_query if LAST_QUERY == real_query: return LAST_RESPONSE #parse query using lucene parser and get docs p_query = QueryParser(Version.LUCENE_CURRENT, "loc", analyzer).parse(real_query) scoreDocs = location_searcher_.search(p_query, 500).scoreDocs print "%s total matching documents." % len(scoreDocs) #rank results experts = [] rankedDocs = rankDocs(parsed_query, location_searcher_, scoreDocs) for i in rankedDocs: if parsed_query["profile"] == "yes": experts.append({"u": i["user"], "d": i["details"], "p": i["profile"], "t": i["tweets"]}) else: experts.append({"u": i["user"], "d": i["details"]}) response = {"sid": str(uuid.uuid1()), "es": experts} if "with_request" in parsed_query and parsed_query["with_request"] == "yes": response = {"q": parsed_query, "e": experts} if "user_study" in parsed_query and parsed_query["user_study"] == "yes": #write session to db session = {"l": parsed_query["location"], "_id": response["sid"], "ur": {}} conn = Connection("wheezy.cs.tamu.edu", 27017) db = conn["ole_evaluation"] db["user_response"].insert(session) LAST_QUERY = real_query LAST_RESPONSE = cjson.encode(response) #return cjson.encode(response) return LAST_RESPONSE