def complex_search_benchmarks(gen_index=False, no_of_docs=1000): if gen_index: bench_utils.MAX_INDEX_ENTRIES = no_of_docs create_index() keywords = bench_utils.generate_keywords(no_of_keywords=MAX_NO_OF_KEYWORDS) index_size = bench_utils.get_dir_size(WHOOSH_INDEX_DIR) print "\n===== Performance of searching of complex queries in Whoosh =====" print "Size of the index: %f MB" % (index_size) print "No. of indexed documents: %d " % (bench_utils.MAX_INDEX_ENTRIES) print "No of search queries: %d" % (len(keywords)) print "------------------------------------------------------------------" print "Search Word Time(sec) Memory(MB)" print "----------------------------------------------" time_taken = 0 memory_used = 0 for word in keywords: tt = bench_utils.timer(complex_search, word) mu = bench_utils.memory_consumption(complex_search, (word,)) time_taken += tt memory_used += mu print "%-10s %10f %10f" % (word, tt, mu) avg_time = time_taken / len(keywords) avg_memory = memory_used / len(keywords) print "\nAverage time taken: %f secs" % (avg_time) print "Average memory used: %f MB" % (avg_memory)
def create_index_benchmark(verbose=True, use_multiprocessing=False): time_taken = bench_utils.timer(create_index, use_multiprocessing) memory_used = bench_utils.memory_consumption(create_index, (), {'use_multiprocessing': use_multiprocessing}) index_size = bench_utils.get_dir_size(WHOOSH_INDEX_DIR) if verbose: print "\n===== Performance for index creation =====" print "No. of indexed documents: %d" % (bench_utils.MAX_INDEX_ENTRIES) print "No. of words in each document: %d" % (bench_utils.MAX_WORDS_IN_TEXT) print "Length of each word: %d chars" % (bench_utils.MAX_WORD_LENGTH) print "Average time taken: %f secs" % (time_taken) print "Average memory used: %f MB" % (memory_used) return (time_taken, memory_used, index_size)