def query_index(query, hit_logs_for_each, score_logs_for_each): ### 1_Query Alternation user_code_query = Generator(query) directory = SimpleFSDirectory(File(INDICES_PATH + 'bigclonebench_3')) searchermgr = SearcherManager(directory, SearcherFactory()) searchermgr.maybeRefresh() searcher = searchermgr.acquire() bench_results = [] benchsearcher = BenchSearcher(searcher) # BigCloneBench ### 8_Querying for the Final Results # Log : Bench_result for each query bench_result, score_logs_for_each = benchsearcher.more_like_this3(5000, score_logs_for_each, user_code_query) searchermgr.release(searcher) searchermgr.close() searcher = None directory.close() directory = None if bench_result: hit_logs_for_each += str(len(bench_result)) + '\t' else: hit_logs_for_each += ('0' + '\t') sorted_bench_results = sorted(bench_result, key=attrgetter('score'), reverse=True) print 'Search Count : ', len(sorted_bench_results) recommended = recommend(sorted_bench_results) print 'Final Count : ', len(recommended) if bench_result: hit_logs_for_each += str(len(recommended)) + '\t' else: hit_logs_for_each += ('0' + '\t') return recommended, hit_logs_for_each, score_logs_for_each
def query_index(query, hit_logs_for_each, score_logs_for_each): ### 1_Query Alternation user_code_query = Generator(query) directory = SimpleFSDirectory(File(INDICES_PATH + 'stackoverflow')) searchermgr = SearcherManager(directory, SearcherFactory()) searchermgr.maybeRefresh() searcher = searchermgr.acquire() ### 2_Finding 3 Answer Snippets using the User Query (refined) answers = SnippetSearcher(searcher, user_code_query) answer_ids = answers.more_like_this(20, query=user_code_query) searchermgr.release(searcher) searchermgr.close() searcher = None directory.close() directory = None # Log : Answer count if answer_ids: hit_logs_for_each += str(len(answer_ids)) + '\t' else: hit_logs_for_each += ('0' + '\t') ### 3_Finding the Associated Questions question_ids = answers.find_question_ids(answer_ids) # Log : Answer - Question count if question_ids: hit_logs_for_each += str(len(question_ids)) + '\t' else: hit_logs_for_each += ('0' + '\t') directory = SimpleFSDirectory(File(INDICES_PATH + 'questionIndex')) searchermgr = SearcherManager(directory, SearcherFactory()) searchermgr.maybeRefresh() searcher = searchermgr.acquire() getDoc = GettingQuestionDocs(searcher) item_docs = getDoc.search( question_ids, 20)[0:7] # 순위대로 최소 7개의 question을 얻기 위해서 여기서 7개를 자름. searchermgr.release(searcher) searchermgr.close() searcher = None directory.close() directory = None # Log : Question ItemDoc count if item_docs: hit_logs_for_each += str(len(item_docs)) + '\t' else: hit_logs_for_each += ('0' + '\t') directory = SimpleFSDirectory(File(INDICES_PATH + 'questionIndex')) searchermgr = SearcherManager(directory, SearcherFactory()) searchermgr.maybeRefresh() searcher = searchermgr.acquire() ### 4_Finding 3 Similar Questions per a Question (3 X 3) similar_questions = [] question = SimilarQsSearcher(searcher) # Log : Similar Question count for each of Question ItemDoc i = 1 if item_docs: for item_doc in item_docs: similar_question = question.more_like_this2( item_doc, 7) # 각 question 들에 대해 7개씩 비슷한 것들 찾음. if similar_question: hit_logs_for_each += str(len(similar_question)) + '\t' else: hit_logs_for_each += ('0' + '\t') similar_questions += similar_question i += 1 else: hit_logs_for_each += ('0' + '\t' + '0' + '\t' + '0' + '\t' + '0' + '\t' + '0' + '\t' + '0' + '\t' + '0' + '\t' ) # 7개 searchermgr.release(searcher) searchermgr.close() searcher = None directory.close() directory = None # Log : Similar Question result count if similar_questions: hit_logs_for_each += str(len(similar_questions)) + '\t' else: hit_logs_for_each += ('0' + '\t') ### 5_Finding Associated Answers for each Question (9 - 9) answer_ids = find_answer_ids(similar_questions) # Log : Question - Answer count if answer_ids: hit_logs_for_each += str(len(answer_ids)) + '\t' else: hit_logs_for_each += ('0' + '\t') directory = SimpleFSDirectory(File(INDICES_PATH + 'stackoverflow')) searchermgr = SearcherManager(directory, SearcherFactory()) searchermgr.maybeRefresh() searcher = searchermgr.acquire() ### 6_Getting Answer Docs for the Final Query getDoc = GettingAnswerDocs(searcher) answer_docs = getDoc.search(answer_ids) searchermgr.release(searcher) searchermgr.close() searcher = None directory.close() directory = None # Log : Answer Docs count if answer_docs: hit_logs_for_each += str(len(answer_docs)) + '\t' else: hit_logs_for_each += ('0' + '\t') directory = SimpleFSDirectory(File(INDICES_PATH + 'bigclonebench_2')) searchermgr = SearcherManager(directory, SearcherFactory()) searchermgr.maybeRefresh() searcher = searchermgr.acquire() bench_results = [] benchsearcher = BenchSearcher(searcher) # BigCloneBench # Exceptional ### 7_Appending for the user query results ### 8_Querying for the Final Results # Log : Bench_result for each query for answer_doc in answer_docs: bench_result, score_logs_for_each = benchsearcher.more_like_this2( 100, answer_doc, score_logs_for_each, user_code_query, 0) # , user_query=user_code_query) if bench_result: hit_logs_for_each += str(len(bench_result)) + '\t' else: hit_logs_for_each += ('0' + '\t') bench_results += bench_result searchermgr.release(searcher) searchermgr.close() searcher = None directory.close() directory = None if answer_docs < 49: for a in range(49 - len(answer_docs)): hit_logs_for_each += ('0' + '\t') if bench_results: hit_logs_for_each += str(len(bench_results)) + '\t' else: hit_logs_for_each += ('0' + '\t') sorted_bench_results = sorted(bench_results, key=attrgetter('score'), reverse=True) print 'Search Count : ', len(sorted_bench_results) recommended = recommend(sorted_bench_results) print 'Final Count : ', len(recommended) if bench_results: hit_logs_for_each += str(len(recommended)) + '\t' else: hit_logs_for_each += ('0' + '\t') return recommended, hit_logs_for_each, score_logs_for_each
def query_index(query): ### 1_Query Alternation user_code_query = Generator(query) print 'query: ', query print 'user_code_query: ', user_code_query #open directory = SimpleFSDirectory(File(INDICES_PATH + 'stackoverflow')) searchermgr = SearcherManager(directory, SearcherFactory()) searchermgr.maybeRefresh() searcher = searchermgr.acquire() ### 2_Finding 3 Answer Snippets using the User Query (refined) answers = SnippetSearcher(searcher, user_code_query) answer_ids = answers.more_like_this(10, query=user_code_query) print 'answer_ids: ', answer_ids #close searchermgr.release(searcher) searchermgr.close() searcher = None directory.close() directory = None ### 3_Finding the Associated Questions question_ids = answers.find_question_ids(answer_ids) print 'question ids: ', question_ids #open directory = SimpleFSDirectory(File(INDICES_PATH + 'questionIndex')) searchermgr = SearcherManager(directory, SearcherFactory()) searchermgr.maybeRefresh() searcher = searchermgr.acquire() ### 4_Cutting items getDoc = GettingQuestionDocs(searcher) item_docs = getDoc.search( question_ids, 10)[0:3] # 순위대로 최소 7개의 question을 얻기 위해서 여기서 7개를 자름. # print 'item docs: ', item_docs ### 5_Finding 3 Similar Questions per a Question (3 X 3) similar_questions = [] question = SimilarQsSearcher(searcher) if item_docs: for item_doc in item_docs: similar_question = question.more_like_this2( item_doc, 3) # 각 question 들에 대해 7개씩 비슷한 것들 찾음. similar_questions += similar_question print 'similar_questions: ', similar_questions searchermgr.release(searcher) searchermgr.close() searcher = None directory.close() directory = None ### 6_Finding Associated Answers for each Question (9 - 9) answer_ids = find_answer_ids(similar_questions) print 'answer ids: ', answer_ids if not answer_ids: recommended = '' return recommended # dest_path = u'/Users/Falcon/Desktop/***Ongoing***/***[4]_FaCoY_Defect4J_Data_Share_Kui/Defect4J_Results/' # project_name = u'Chart/'################################################### # write_file() directory = SimpleFSDirectory(File(INDICES_PATH + 'stackoverflow')) searchermgr = SearcherManager(directory, SearcherFactory()) searchermgr.maybeRefresh() searcher = searchermgr.acquire() ### 7_Getting Answer Docs for the Final Query getDoc = GettingAnswerDocs(searcher) answer_docs = getDoc.search(answer_ids) # print 'answer docs: ', answer_docs searchermgr.release(searcher) searchermgr.close() searcher = None directory.close() directory = None directory = SimpleFSDirectory(File(INDICES_PATH + 'github')) searchermgr = SearcherManager(directory, SearcherFactory()) searchermgr.maybeRefresh() searcher = searchermgr.acquire() git_results = [] gitsearcher = GitSearcher(searcher) ### 7_Appending for the user query results git_result = gitsearcher.more_like_this2(10, answer_docs[0], user_code_query, 1) git_results += git_result # print 'answer docs: ', answer_docs ### 8_Querying for the Final Results for answer_doc in answer_docs: git_result = gitsearcher.more_like_this2(10, answer_doc, user_code_query, 0) git_results += git_result searchermgr.release(searcher) searchermgr.close() searcher = None directory.close() directory = None git_results = sorted(git_results, key=attrgetter('so_item.answer_id'), reverse=True) id = 0 i = 0 temp_result = [] for item in git_results: if id != item.so_item.answer_id: id = item.so_item.answer_id i = 1 temp_result.append(item) elif id == item.so_item.answer_id and i < 3: i += 1 temp_result.append(item) elif id == item.so_item.answer_id and i > 3: continue sorted_git_results = sorted(temp_result, key=attrgetter('score'), reverse=True) print 'Search Count : ', len(sorted_git_results) recommended = recommend(sorted_git_results) print 'Final Count : ', len(recommended) # Defect4J 쿼리 결과저장 # cot = 0 # for c, item in enumerate(recommended): # cot += 1 # if cot > 10: # break # result_file = "/Users/Falcon/Desktop/Pycharm_Project/FaCoY_Project/GitSearch/Defect4J_FaCoY/" + str(c+1) + "_" + str('_'.join(str(item[0]).split("/")[6:])) # write_file_over(result_file, str(item.file_content)) # result_file = '/Users/Falcon/Desktop/test.txt' # if os.path.exists(result_file): # os.remove(result_file) # # write_file(result_file, 'User Code Query \n' + str(query) + '\n' + '---------------------------' + '\n') # for c, i in enumerate(recommended): # contents = '' # contents = 'Rank: %d' % (int(c)+int(1)) # contents += '\nFile path: %s' % str(i.file[6:]) + '\n' + '---------------------------' + '\n' # contents += str(i.file_content) +'\n' + '=================================================================' + '\n\n\n' # write_file(result_file, contents) return recommended
def query_index(query, hit_logs_for_each, score_logs_for_each): print "*************** Searching Starts ***************" ### 1_Query Alternation user_code_query = Generator(query) ### 2_Finding 3 Answer Snippets using the User Query (refined) answers = SnippetSearcher("%sstackoverflow" % (INDICES_PATH), user_code_query) answer_ids = answers.more_like_this( 20, query=user_code_query ) #여기서 3개를 자르면, 3개의 answer 중 question 아이디가 존재하지 않을 경우, 그 수가 현저히 적어짐.. #Log : Answer count if answer_ids: hit_logs_for_each += str(len(answer_ids)) + '\t' else: hit_logs_for_each += ('0' + '\t') answers.reader.close() answers.directory.close() ### 3_Finding the Associated Questions question_ids = answers.find_question_ids(answer_ids) # Log : Answer - Question count if question_ids: hit_logs_for_each += str(len(question_ids)) + '\t' else: hit_logs_for_each += ('0' + '\t') getDoc = GettingQuestionDocs("%squestionIndex" % (INDICES_PATH)) item_docs = getDoc.search( question_ids, 20)[0:7] #순위대로 최소 7개의 question을 얻기 위해서 여기서 7개를 자름. # Log : Question ItemDoc count if item_docs: hit_logs_for_each += str(len(item_docs)) + '\t' else: hit_logs_for_each += ('0' + '\t') getDoc.reader.close() getDoc.directory.close() ### 4_Finding 3 Similar Questions per a Question (3 X 3) similar_questions = [] question = SimilarQsSearcher("%squestionIndex" % (INDICES_PATH)) # Log : Similar Question count for each of Question ItemDoc i = 1 if item_docs: for item_doc in item_docs: similar_question = question.more_like_this2( item_doc, 7) #각 question 들에 대해 7개씩 비슷한 것들 찾음. if similar_question: hit_logs_for_each += str(len(similar_question)) + '\t' else: hit_logs_for_each += ('0' + '\t') similar_questions += similar_question i += 1 else: hit_logs_for_each += ('0' + '\t' + '0' + '\t' + '0' + '\t' + '0' + '\t' + '0' + '\t' + '0' + '\t' + '0' + '\t') #7개 # Log : Similar Question result count if similar_questions: hit_logs_for_each += str(len(similar_questions)) + '\t' else: hit_logs_for_each += ('0' + '\t') question.reader.close() question.directory.close() ### 5_Finding Associated Answers for each Question (9 - 9) answer_ids = find_answer_ids(similar_questions) # Log : Question - Answer count if answer_ids: hit_logs_for_each += str(len(answer_ids)) + '\t' else: hit_logs_for_each += ('0' + '\t') ### 6_Getting Answer Docs for the Final Query getDoc = GettingAnswerDocs("%sstackoverflow" % (INDICES_PATH)) answer_docs = getDoc.search(answer_ids) # Log : Answer Docs count if answer_docs: hit_logs_for_each += str(len(answer_docs)) + '\t' else: hit_logs_for_each += ('0' + '\t') # temp_doc = getDoc.search(['0',]) getDoc.reader.close() getDoc.directory.close() bench_results = [] benchsearcher = BenchSearcher("%sbigclonebench" % (INDICES_PATH)) # BigCloneBench # Exceptional ### 7_Appending for the user query results # Log : Bench_result for UQ # temp_doc = ResultItem(None, 0, 'No Title', 'No Question id', 'No Answer id', 'No Description') bench_result, score_logs_for_each = benchsearcher.more_like_this2( 1, answer_docs[0], score_logs_for_each, user_code_query, 1) if bench_result: hit_logs_for_each += str(len(bench_results)) + '\t' else: hit_logs_for_each += ('0' + '\t') bench_results += bench_result ### 8_Querying for the Final Results # Log : Bench_result for each query for answer_doc in answer_docs: bench_result, score_logs_for_each = benchsearcher.more_like_this2( 1, answer_doc, score_logs_for_each, user_code_query, 0) #, user_query=user_code_query) if bench_result: hit_logs_for_each += str(len(bench_result)) + '\t' else: hit_logs_for_each += ('0' + '\t') bench_results += bench_result if answer_docs < 49: for a in range(49 - len(answer_docs)): hit_logs_for_each += ('0' + '\t') # print 'Count(Sum of the bench results) : ', len(bench_results) # Log : Results count if bench_results: hit_logs_for_each += str(len(bench_results)) + '\t' else: hit_logs_for_each += ('0' + '\t') benchsearcher.reader.close() benchsearcher.directory.close() # print '%%%final_results : ', final_result sorted_bench_results = sorted(bench_results, key=attrgetter('score'), reverse=True) # results = sorted(final_result, key=attrgetter('so_item.answer_id'), reverse=True) # print '%%%final_results_____ : ', results print "***********************************************************************************" # print sorted_bench_results # Answer set에서 나오는 숫자 제한하기.. 즉, 이걸 제한하면 같은 종류의 answer에 대한 snippet들이 반복되어 출력되는걸 막는다. # id = 0; i = 0; final_temp_result = [] # for item in sorted_bench_results: # if id != item.so_item.answer_id: # id = item.so_item.answer_id # i = 1 # final_temp_result.append(item) # elif id == item.so_item.answer_id and i < 500: # i += 1 # final_temp_result.append(item) # elif id == item.so_item.answer_id and i > 500: # continue # final_results = sorted(final_temp_result, key=attrgetter('score'), reverse=True) # print 'Count(Final results) : ', len(final_results) # recommended = recommend(final_results) print 'Count(Final results) : ', len(sorted_bench_results) recommended = recommend(sorted_bench_results) return recommended, hit_logs_for_each, score_logs_for_each