def printResult(result,query): for x in result: print 'Title : ' + x['Title'] print 'Filename : ' + x['FileName'] print 'Short description : ' + x['Content'][0:20] + '...' relevant_text = getRelevantText(x['Content'],query.lower()) print 'Relevant text : ' + relevant_text + '\n' return
def search(input_query,query,directory,weighting): ix = open_dir(directory) writer = ix.writer() with ix.searcher(weighting=weighting) as searcher: #query = QueryParser("Content", ix.schema,group=qparser.OrGroup).parse(input_query) ## here 'hsbc' is the search term results = searcher.search(input_query,limit=None) response = [] for x in results: temp = {} temp['FileName'] = x['FileName'] temp['Title'] = x['Title'] temp['Content'] = x['Content'][0:20] temp['RelevantText'] = getRelevantText(x['Content'],query.lower()) response.append(temp) ix.close() return response
def search(input_query, query, directory, weighting): ix = open_dir(directory) writer = ix.writer() with ix.searcher(weighting=weighting) as searcher: #query = QueryParser("Content", ix.schema,group=qparser.OrGroup).parse(input_query) ## here 'hsbc' is the search term results = searcher.search(input_query) response = [] for x in results: temp = {} temp['FileName'] = x['FileName'] temp['Title'] = x['Title'] temp['Content'] = x['Content'][0:20] temp['RelevantText'] = getRelevantText(x['Content'], query.lower()) response.append(temp) ix.close() return response
''' This is the main function that will sync with the indexing , query parsing , search and other parts... Other files should contain functions only and the input to them should be passed from this ''' import os from search import search_BM25, searchPhrasal from query import query_OR, query_AND from util_functions import getRelevantText if __name__ == "__main__": #ix = open_dir('Indexes') query = raw_input("Please Enter the query to search for:") input_query = query_AND(query) print "BM25 Results" result = search_BM25(input_query) for x in result: print 'Title : ' + x['Title'] print 'Filename : ' + x['FileName'] print 'Short description : ' + x['Content'][0:20] + '...' relevant_text = getRelevantText(x['Content'], query.lower()) print 'Relevant text : ' + relevant_text print "Phrasal Query Results" result = searchPhrasal(query)
''' This is the main function that will sync with the indexing , query parsing , search and other parts... Other files should contain functions only and the input to them should be passed from this ''' import os from search import search_BM25, searchPhrasal from query import query_OR,query_AND from util_functions import getRelevantText if __name__ == "__main__": #ix = open_dir('Indexes') query = raw_input("Please Enter the query to search for:") input_query = query_AND(query) print "BM25 Results" result = search_BM25(input_query) for x in result: print 'Title : ' + x['Title'] print 'Filename : ' + x['FileName'] print 'Short description : ' + x['Content'][0:20] + '...' relevant_text = getRelevantText(x['Content'],query.lower()) print 'Relevant text : ' + relevant_text print "Phrasal Query Results" result = searchPhrasal(query)