Beispiel #1
0
    def load_index(self):
        a = {
            "code": self.porter_analyzer,
            "description": self.porter_analyzer,
            "typed_method_call": KeywordAnalyzer(),
            "extends": KeywordAnalyzer(),
            "used_classes": KeywordAnalyzer(),
            "methods": KeywordAnalyzer(),
            "class_instance_creation": KeywordAnalyzer(),
            "id": KeywordAnalyzer(),
            "literals": self.porter_analyzer
        }
        self.analyzer = PerFieldAnalyzerWrapper(KeywordAnalyzer(), a)
        self.directory = SimpleFSDirectory(self.index_path)

        self.searchermgr = SearcherManager(self.directory, SearcherFactory())
        self.searchermgr.maybeRefresh()
        self.searcher = self.searchermgr.acquire()
Beispiel #2
0
def query_index(query, hit_logs_for_each, score_logs_for_each):
    ### 1_Query Alternation
    user_code_query = Generator(query)

    directory = SimpleFSDirectory(File(INDICES_PATH + 'bigclonebench_4_text'))
    searchermgr = SearcherManager(directory, SearcherFactory())
    searchermgr.maybeRefresh()
    searcher = searchermgr.acquire()

    benchsearcher = BenchSearcher(searcher)  # BigCloneBench
    ### 8_Querying for the Final Results
    # Log : Bench_result for each query
    bench_result, score_logs_for_each = benchsearcher.more_like_this3(
        5000, score_logs_for_each, user_code_query)

    searchermgr.release(searcher)
    searchermgr.close()
    searcher = None
    directory.close()
    directory = None

    if bench_result:
        hit_logs_for_each += str(len(bench_result)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')

    sorted_bench_results = sorted(bench_result,
                                  key=attrgetter('score'),
                                  reverse=True)

    print 'Search Count : ', len(sorted_bench_results)
    recommended = recommend(sorted_bench_results)
    print 'Final Count : ', len(recommended)
    if bench_result:
        hit_logs_for_each += str(len(recommended)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')
    return recommended, hit_logs_for_each, score_logs_for_each
Beispiel #3
0
def query_index(query, hit_logs_for_each, score_logs_for_each):
    ### 1_Query Alternation
    user_code_query = Generator(query)

    directory = SimpleFSDirectory(File(INDICES_PATH + 'stackoverflow'))
    searchermgr = SearcherManager(directory, SearcherFactory())
    searchermgr.maybeRefresh()
    searcher = searchermgr.acquire()

    ### 2_Finding 3 Answer Snippets using the User Query (refined)
    answers = SnippetSearcher(searcher, user_code_query)
    answer_ids = answers.more_like_this(20, query=user_code_query)

    searchermgr.release(searcher)
    searchermgr.close()
    searcher = None
    directory.close()
    directory = None

    # Log : Answer count
    if answer_ids:
        hit_logs_for_each += str(len(answer_ids)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')

    ### 3_Finding the Associated Questions
    question_ids = answers.find_question_ids(answer_ids)
    # Log : Answer - Question count
    if question_ids:
        hit_logs_for_each += str(len(question_ids)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')

    directory = SimpleFSDirectory(File(INDICES_PATH + 'questionIndex'))
    searchermgr = SearcherManager(directory, SearcherFactory())
    searchermgr.maybeRefresh()
    searcher = searchermgr.acquire()

    getDoc = GettingQuestionDocs(searcher)
    item_docs = getDoc.search(
        question_ids, 20)[0:7]  # 순위대로 최소 7개의 question을 얻기 위해서 여기서 7개를 자름.

    searchermgr.release(searcher)
    searchermgr.close()
    searcher = None
    directory.close()
    directory = None

    # Log : Question ItemDoc count
    if item_docs:
        hit_logs_for_each += str(len(item_docs)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')

    directory = SimpleFSDirectory(File(INDICES_PATH + 'questionIndex'))
    searchermgr = SearcherManager(directory, SearcherFactory())
    searchermgr.maybeRefresh()
    searcher = searchermgr.acquire()

    ### 4_Finding 3 Similar Questions per a Question (3 X 3)
    similar_questions = []
    question = SimilarQsSearcher(searcher)

    # Log : Similar Question count for each of Question ItemDoc
    i = 1
    if item_docs:
        for item_doc in item_docs:
            similar_question = question.more_like_this2(
                item_doc, 7)  # 각 question 들에 대해 7개씩 비슷한 것들 찾음.
            if similar_question:
                hit_logs_for_each += str(len(similar_question)) + '\t'
            else:
                hit_logs_for_each += ('0' + '\t')
            similar_questions += similar_question
            i += 1
    else:
        hit_logs_for_each += ('0' + '\t' + '0' + '\t' + '0' + '\t' + '0' +
                              '\t' + '0' + '\t' + '0' + '\t' + '0' + '\t'
                              )  # 7개

    searchermgr.release(searcher)
    searchermgr.close()
    searcher = None
    directory.close()
    directory = None

    # Log : Similar Question result count
    if similar_questions:
        hit_logs_for_each += str(len(similar_questions)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')

    ### 5_Finding Associated Answers for each Question (9 - 9)
    answer_ids = find_answer_ids(similar_questions)

    # Log : Question - Answer count
    if answer_ids:
        hit_logs_for_each += str(len(answer_ids)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')

    directory = SimpleFSDirectory(File(INDICES_PATH + 'stackoverflow'))
    searchermgr = SearcherManager(directory, SearcherFactory())
    searchermgr.maybeRefresh()
    searcher = searchermgr.acquire()

    ### 6_Getting Answer Docs for the Final Query
    getDoc = GettingAnswerDocs(searcher)
    answer_docs = getDoc.search(answer_ids)

    searchermgr.release(searcher)
    searchermgr.close()
    searcher = None
    directory.close()
    directory = None

    # Log : Answer Docs count
    if answer_docs:
        hit_logs_for_each += str(len(answer_docs)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')

    directory = SimpleFSDirectory(File(INDICES_PATH + 'bigclonebench_2'))
    searchermgr = SearcherManager(directory, SearcherFactory())
    searchermgr.maybeRefresh()
    searcher = searchermgr.acquire()

    bench_results = []
    benchsearcher = BenchSearcher(searcher)  # BigCloneBench

    # Exceptional
    ### 7_Appending for the user query results

    ### 8_Querying for the Final Results
    # Log : Bench_result for each query
    for answer_doc in answer_docs:
        bench_result, score_logs_for_each = benchsearcher.more_like_this2(
            100, answer_doc, score_logs_for_each, user_code_query,
            0)  # , user_query=user_code_query)
        if bench_result:
            hit_logs_for_each += str(len(bench_result)) + '\t'
        else:
            hit_logs_for_each += ('0' + '\t')
        bench_results += bench_result

    searchermgr.release(searcher)
    searchermgr.close()
    searcher = None
    directory.close()
    directory = None

    if answer_docs < 49:
        for a in range(49 - len(answer_docs)):
            hit_logs_for_each += ('0' + '\t')

    if bench_results:
        hit_logs_for_each += str(len(bench_results)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')

    sorted_bench_results = sorted(bench_results,
                                  key=attrgetter('score'),
                                  reverse=True)

    print 'Search Count : ', len(sorted_bench_results)
    recommended = recommend(sorted_bench_results)
    print 'Final Count : ', len(recommended)
    if bench_results:
        hit_logs_for_each += str(len(recommended)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')
    return recommended, hit_logs_for_each, score_logs_for_each
Beispiel #4
0
def query_index(query):
    ### 1_Query Alternation
    user_code_query = Generator(query)
    print 'query: ', query
    print 'user_code_query: ', user_code_query

    #open
    directory = SimpleFSDirectory(File(INDICES_PATH + 'stackoverflow'))
    searchermgr = SearcherManager(directory, SearcherFactory())
    searchermgr.maybeRefresh()
    searcher = searchermgr.acquire()

    ### 2_Finding 3 Answer Snippets using the User Query (refined)
    answers = SnippetSearcher(searcher, user_code_query)
    answer_ids = answers.more_like_this(10, query=user_code_query)
    print 'answer_ids: ', answer_ids

    #close
    searchermgr.release(searcher)
    searchermgr.close()
    searcher = None
    directory.close()
    directory = None

    ### 3_Finding the Associated Questions
    question_ids = answers.find_question_ids(answer_ids)
    print 'question ids: ', question_ids

    #open
    directory = SimpleFSDirectory(File(INDICES_PATH + 'questionIndex'))
    searchermgr = SearcherManager(directory, SearcherFactory())
    searchermgr.maybeRefresh()
    searcher = searchermgr.acquire()

    ### 4_Cutting items
    getDoc = GettingQuestionDocs(searcher)
    item_docs = getDoc.search(
        question_ids, 10)[0:3]  # 순위대로 최소 7개의 question을 얻기 위해서 여기서 7개를 자름.
    # print 'item docs: ', item_docs

    ### 5_Finding 3 Similar Questions per a Question (3 X 3)
    similar_questions = []
    question = SimilarQsSearcher(searcher)

    if item_docs:
        for item_doc in item_docs:
            similar_question = question.more_like_this2(
                item_doc, 3)  # 각 question 들에 대해 7개씩 비슷한 것들 찾음.
            similar_questions += similar_question

    print 'similar_questions: ', similar_questions

    searchermgr.release(searcher)
    searchermgr.close()
    searcher = None
    directory.close()
    directory = None

    ### 6_Finding Associated Answers for each Question (9 - 9)
    answer_ids = find_answer_ids(similar_questions)
    print 'answer ids: ', answer_ids

    if not answer_ids:
        recommended = ''
        return recommended
        # dest_path = u'/Users/Falcon/Desktop/***Ongoing***/***[4]_FaCoY_Defect4J_Data_Share_Kui/Defect4J_Results/'
        # project_name = u'Chart/'###################################################
        # write_file()

    directory = SimpleFSDirectory(File(INDICES_PATH + 'stackoverflow'))
    searchermgr = SearcherManager(directory, SearcherFactory())
    searchermgr.maybeRefresh()
    searcher = searchermgr.acquire()

    ### 7_Getting Answer Docs for the Final Query
    getDoc = GettingAnswerDocs(searcher)
    answer_docs = getDoc.search(answer_ids)

    # print 'answer docs: ', answer_docs

    searchermgr.release(searcher)
    searchermgr.close()
    searcher = None
    directory.close()
    directory = None

    directory = SimpleFSDirectory(File(INDICES_PATH + 'github'))
    searchermgr = SearcherManager(directory, SearcherFactory())
    searchermgr.maybeRefresh()
    searcher = searchermgr.acquire()

    git_results = []
    gitsearcher = GitSearcher(searcher)

    ### 7_Appending for the user query results
    git_result = gitsearcher.more_like_this2(10, answer_docs[0],
                                             user_code_query, 1)
    git_results += git_result

    # print 'answer docs: ', answer_docs

    ### 8_Querying for the Final Results
    for answer_doc in answer_docs:
        git_result = gitsearcher.more_like_this2(10, answer_doc,
                                                 user_code_query, 0)
        git_results += git_result

    searchermgr.release(searcher)
    searchermgr.close()
    searcher = None
    directory.close()
    directory = None

    git_results = sorted(git_results,
                         key=attrgetter('so_item.answer_id'),
                         reverse=True)
    id = 0
    i = 0
    temp_result = []
    for item in git_results:
        if id != item.so_item.answer_id:
            id = item.so_item.answer_id
            i = 1
            temp_result.append(item)

        elif id == item.so_item.answer_id and i < 3:
            i += 1
            temp_result.append(item)
        elif id == item.so_item.answer_id and i > 3:
            continue

    sorted_git_results = sorted(temp_result,
                                key=attrgetter('score'),
                                reverse=True)

    print 'Search Count : ', len(sorted_git_results)
    recommended = recommend(sorted_git_results)
    print 'Final Count : ', len(recommended)

    # Defect4J 쿼리 결과저장
    # cot = 0
    # for c, item in enumerate(recommended):
    #     cot += 1
    #     if cot > 10:
    #         break
    #     result_file = "/Users/Falcon/Desktop/Pycharm_Project/FaCoY_Project/GitSearch/Defect4J_FaCoY/" + str(c+1) + "_" + str('_'.join(str(item[0]).split("/")[6:]))
    #     write_file_over(result_file, str(item.file_content))

    # result_file = '/Users/Falcon/Desktop/test.txt'
    # if os.path.exists(result_file):
    #     os.remove(result_file)
    #
    # write_file(result_file, 'User Code Query \n' + str(query) + '\n' + '---------------------------' + '\n')
    # for c, i in enumerate(recommended):
    #     contents = ''
    #     contents = 'Rank: %d' % (int(c)+int(1))
    #     contents += '\nFile path: %s' % str(i.file[6:]) + '\n' + '---------------------------' + '\n'
    #     contents += str(i.file_content) +'\n' + '=================================================================' + '\n\n\n'
    #     write_file(result_file, contents)

    return recommended