#!/usr/bin/env python ''' @note: to be removed ''' import argparse from lucenesearch.lucene_index import lucene_index from lucenesearch.lucene_search import lucene_search if __name__ == "__main__": arg_parser = argparse.ArgumentParser(description='Lucene index and search') arg_parser.add_argument("-d", dest="input_folder", type=str, help="The root directory to index and search", required=True) arg_parser.add_argument("-o", dest="output_folder", type=str, help="Output directory of index", default="/home/abhiramj/code/temp/index", required=False) arg_parser.add_argument("-q", dest="query_text", type=str, help="Query to search for") arg_parser.add_argument("-l", dest="limit", type=int, help="Maximum number of results") args = arg_parser.parse_args() lucene_index(args.input_folder,args.output_folder) responsive = lucene_search(args.output_folder, args.limit, args.query_text)
query = raw_input('Enter query: ') # 'Human computer interaction' limit = int(raw_input('Limit: ')) if search_algorithm == 'LDA': None # Process the query # responsive_docs, non_responsive_docs = process_query(query, dictionary, lda, index, doc_paths, limit) # nrd = np.array(non_responsive_docs) # nrd_paths = [os.path.join(dir_path, nrd[idx,2]) for idx, dir_path in enumerate(nrd[:,1])] # looks like i'm not getting full file paths elif search_algorithm == 'Lucene': # None responsive_docs = lucene_search(lucene_index_file, limit, query) non_responsive_docs = [] for file_name in find_files_in_folder(DATA_PATH): if os.path.dirname(file_name) is not lucene_index_file: # skipping index directory if file_name not in responsive_docs: non_responsive_docs.append(file_name) nrd_paths=non_responsive_docs print 'Number of responsive documents:', len(responsive_docs) print 'Number of non responsive documents:', len(non_responsive_docs) print 'The responsive files are: ' for f in responsive_docs: print f