def query_index(query, hit_logs_for_each, score_logs_for_each): ### 1_Query Alternation user_code_query = Generator(query) directory = SimpleFSDirectory(File(INDICES_PATH + 'bigclonebench_4_text')) searchermgr = SearcherManager(directory, SearcherFactory()) searchermgr.maybeRefresh() searcher = searchermgr.acquire() benchsearcher = BenchSearcher(searcher) # BigCloneBench ### 8_Querying for the Final Results # Log : Bench_result for each query bench_result, score_logs_for_each = benchsearcher.more_like_this3( 5000, score_logs_for_each, user_code_query) searchermgr.release(searcher) searchermgr.close() searcher = None directory.close() directory = None if bench_result: hit_logs_for_each += str(len(bench_result)) + '\t' else: hit_logs_for_each += ('0' + '\t') sorted_bench_results = sorted(bench_result, key=attrgetter('score'), reverse=True) print 'Search Count : ', len(sorted_bench_results) recommended = recommend(sorted_bench_results) print 'Final Count : ', len(recommended) if bench_result: hit_logs_for_each += str(len(recommended)) + '\t' else: hit_logs_for_each += ('0' + '\t') return recommended, hit_logs_for_each, score_logs_for_each
def query_index(query, hit_logs_for_each, score_logs_for_each): ### 1_Query Alternation user_code_query = Generator(query) directory = SimpleFSDirectory(File(INDICES_PATH + 'stackoverflow')) searchermgr = SearcherManager(directory, SearcherFactory()) searchermgr.maybeRefresh() searcher = searchermgr.acquire() ### 2_Finding 3 Answer Snippets using the User Query (refined) answers = SnippetSearcher(searcher, user_code_query) answer_ids = answers.more_like_this(20, query=user_code_query) searchermgr.release(searcher) searchermgr.close() searcher = None directory.close() directory = None # Log : Answer count if answer_ids: hit_logs_for_each += str(len(answer_ids)) + '\t' else: hit_logs_for_each += ('0' + '\t') ### 3_Finding the Associated Questions question_ids = answers.find_question_ids(answer_ids) # Log : Answer - Question count if question_ids: hit_logs_for_each += str(len(question_ids)) + '\t' else: hit_logs_for_each += ('0' + '\t') directory = SimpleFSDirectory(File(INDICES_PATH + 'questionIndex')) searchermgr = SearcherManager(directory, SearcherFactory()) searchermgr.maybeRefresh() searcher = searchermgr.acquire() getDoc = GettingQuestionDocs(searcher) item_docs = getDoc.search( question_ids, 20)[0:7] # 순위대로 최소 7개의 question을 얻기 위해서 여기서 7개를 자름. searchermgr.release(searcher) searchermgr.close() searcher = None directory.close() directory = None # Log : Question ItemDoc count if item_docs: hit_logs_for_each += str(len(item_docs)) + '\t' else: hit_logs_for_each += ('0' + '\t') directory = SimpleFSDirectory(File(INDICES_PATH + 'questionIndex')) searchermgr = SearcherManager(directory, SearcherFactory()) searchermgr.maybeRefresh() searcher = searchermgr.acquire() ### 4_Finding 3 Similar Questions per a Question (3 X 3) similar_questions = [] question = SimilarQsSearcher(searcher) # Log : Similar Question count for each of Question ItemDoc i = 1 if item_docs: for item_doc in item_docs: similar_question = question.more_like_this2( item_doc, 7) # 각 question 들에 대해 7개씩 비슷한 것들 찾음. if similar_question: hit_logs_for_each += str(len(similar_question)) + '\t' else: hit_logs_for_each += ('0' + '\t') similar_questions += similar_question i += 1 else: hit_logs_for_each += ('0' + '\t' + '0' + '\t' + '0' + '\t' + '0' + '\t' + '0' + '\t' + '0' + '\t' + '0' + '\t' ) # 7개 searchermgr.release(searcher) searchermgr.close() searcher = None directory.close() directory = None # Log : Similar Question result count if similar_questions: hit_logs_for_each += str(len(similar_questions)) + '\t' else: hit_logs_for_each += ('0' + '\t') ### 5_Finding Associated Answers for each Question (9 - 9) answer_ids = find_answer_ids(similar_questions) # Log : Question - Answer count if answer_ids: hit_logs_for_each += str(len(answer_ids)) + '\t' else: hit_logs_for_each += ('0' + '\t') directory = SimpleFSDirectory(File(INDICES_PATH + 'stackoverflow')) searchermgr = SearcherManager(directory, SearcherFactory()) searchermgr.maybeRefresh() searcher = searchermgr.acquire() ### 6_Getting Answer Docs for the Final Query getDoc = GettingAnswerDocs(searcher) answer_docs = getDoc.search(answer_ids) searchermgr.release(searcher) searchermgr.close() searcher = None directory.close() directory = None # Log : Answer Docs count if answer_docs: hit_logs_for_each += str(len(answer_docs)) + '\t' else: hit_logs_for_each += ('0' + '\t') directory = SimpleFSDirectory(File(INDICES_PATH + 'bigclonebench_2')) searchermgr = SearcherManager(directory, SearcherFactory()) searchermgr.maybeRefresh() searcher = searchermgr.acquire() bench_results = [] benchsearcher = BenchSearcher(searcher) # BigCloneBench # Exceptional ### 7_Appending for the user query results ### 8_Querying for the Final Results # Log : Bench_result for each query for answer_doc in answer_docs: bench_result, score_logs_for_each = benchsearcher.more_like_this2( 100, answer_doc, score_logs_for_each, user_code_query, 0) # , user_query=user_code_query) if bench_result: hit_logs_for_each += str(len(bench_result)) + '\t' else: hit_logs_for_each += ('0' + '\t') bench_results += bench_result searchermgr.release(searcher) searchermgr.close() searcher = None directory.close() directory = None if answer_docs < 49: for a in range(49 - len(answer_docs)): hit_logs_for_each += ('0' + '\t') if bench_results: hit_logs_for_each += str(len(bench_results)) + '\t' else: hit_logs_for_each += ('0' + '\t') sorted_bench_results = sorted(bench_results, key=attrgetter('score'), reverse=True) print 'Search Count : ', len(sorted_bench_results) recommended = recommend(sorted_bench_results) print 'Final Count : ', len(recommended) if bench_results: hit_logs_for_each += str(len(recommended)) + '\t' else: hit_logs_for_each += ('0' + '\t') return recommended, hit_logs_for_each, score_logs_for_each