#!/usr/bin/env python
'''
@note: to be removed 
'''


import argparse
from lucenesearch.lucene_index import lucene_index
from lucenesearch.lucene_search import lucene_search

if __name__ == "__main__":
    

 
    arg_parser = argparse.ArgumentParser(description='Lucene index and search')
    arg_parser.add_argument("-d", dest="input_folder", type=str,
                            help="The root directory to index and search",
                             required=True)
    arg_parser.add_argument("-o", dest="output_folder", type=str, help="Output directory of index",
                         default="/home/abhiramj/code/temp/index",
                         required=False)
    arg_parser.add_argument("-q", dest="query_text", type=str, help="Query to search for")
    arg_parser.add_argument("-l", dest="limit", type=int, help="Maximum number of results")
    
    args = arg_parser.parse_args()
    lucene_index(args.input_folder,args.output_folder)
    responsive = lucene_search(args.output_folder, args.limit, args.query_text)
    
Example #2
0
doc_paths_file = os.path.join(DATA_PATH, 'fs_enron.email_paths')
lda_mdl_file = os.path.join(DATA_PATH, 'fs_enron.lda_mdl')
lda_index_file = os.path.join(DATA_PATH, 'fs_enron.lda_index_kl')
SEEDCONSTANT = 2013 

# Loads the LDA model and file details 

#doc_paths = load_docs_info(doc_paths_file)
#dictionary, lda, index = load_lda_variables(dictionary_file, lda_mdl_file, lda_index_file)

#Lucene specific settings and indexing 
output_folder = raw_input('Enter output folder: ')
choice = 'n'
lucene_index_file = os.path.join(output_folder, 'lucene.index')
if not os.path.exists(lucene_index_file):
    lucene_index(DATA_PATH, lucene_index_file)
while choice <> 'y':  

    ## Enter query 
    search_algorithm = raw_input('Search algorithm [LDA or Lucene]: ').strip()
    
    query = raw_input('Enter query: ')  # 'Human computer interaction'
    limit = int(raw_input('Limit: '))
    
    
    if search_algorithm == 'LDA':
    
        None
        # Process the query 
        
#        responsive_docs, non_responsive_docs = process_query(query, dictionary, lda, index, doc_paths, limit)