#!/usr/bin/env python ''' @note: to be removed ''' import argparse from lucenesearch.lucene_index import lucene_index from lucenesearch.lucene_search import lucene_search if __name__ == "__main__": arg_parser = argparse.ArgumentParser(description='Lucene index and search') arg_parser.add_argument("-d", dest="input_folder", type=str, help="The root directory to index and search", required=True) arg_parser.add_argument("-o", dest="output_folder", type=str, help="Output directory of index", default="/home/abhiramj/code/temp/index", required=False) arg_parser.add_argument("-q", dest="query_text", type=str, help="Query to search for") arg_parser.add_argument("-l", dest="limit", type=int, help="Maximum number of results") args = arg_parser.parse_args() lucene_index(args.input_folder,args.output_folder) responsive = lucene_search(args.output_folder, args.limit, args.query_text)
doc_paths_file = os.path.join(DATA_PATH, 'fs_enron.email_paths') lda_mdl_file = os.path.join(DATA_PATH, 'fs_enron.lda_mdl') lda_index_file = os.path.join(DATA_PATH, 'fs_enron.lda_index_kl') SEEDCONSTANT = 2013 # Loads the LDA model and file details #doc_paths = load_docs_info(doc_paths_file) #dictionary, lda, index = load_lda_variables(dictionary_file, lda_mdl_file, lda_index_file) #Lucene specific settings and indexing output_folder = raw_input('Enter output folder: ') choice = 'n' lucene_index_file = os.path.join(output_folder, 'lucene.index') if not os.path.exists(lucene_index_file): lucene_index(DATA_PATH, lucene_index_file) while choice <> 'y': ## Enter query search_algorithm = raw_input('Search algorithm [LDA or Lucene]: ').strip() query = raw_input('Enter query: ') # 'Human computer interaction' limit = int(raw_input('Limit: ')) if search_algorithm == 'LDA': None # Process the query # responsive_docs, non_responsive_docs = process_query(query, dictionary, lda, index, doc_paths, limit)