Example #1
0
def query_index(query, hit_logs_for_each, score_logs_for_each):
    ### 1_Query Alternation
    user_code_query = Generator(query)

    directory = SimpleFSDirectory(File(INDICES_PATH + 'bigclonebench_4_text'))
    searchermgr = SearcherManager(directory, SearcherFactory())
    searchermgr.maybeRefresh()
    searcher = searchermgr.acquire()

    benchsearcher = BenchSearcher(searcher)  # BigCloneBench
    ### 8_Querying for the Final Results
    # Log : Bench_result for each query
    bench_result, score_logs_for_each = benchsearcher.more_like_this3(
        5000, score_logs_for_each, user_code_query)

    searchermgr.release(searcher)
    searchermgr.close()
    searcher = None
    directory.close()
    directory = None

    if bench_result:
        hit_logs_for_each += str(len(bench_result)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')

    sorted_bench_results = sorted(bench_result,
                                  key=attrgetter('score'),
                                  reverse=True)

    print 'Search Count : ', len(sorted_bench_results)
    recommended = recommend(sorted_bench_results)
    print 'Final Count : ', len(recommended)
    if bench_result:
        hit_logs_for_each += str(len(recommended)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')
    return recommended, hit_logs_for_each, score_logs_for_each
Example #2
0
def query_index(query, hit_logs_for_each, score_logs_for_each):
    ### 1_Query Alternation
    user_code_query = Generator(query)

    directory = SimpleFSDirectory(File(INDICES_PATH + 'stackoverflow'))
    searchermgr = SearcherManager(directory, SearcherFactory())
    searchermgr.maybeRefresh()
    searcher = searchermgr.acquire()

    ### 2_Finding 3 Answer Snippets using the User Query (refined)
    answers = SnippetSearcher(searcher, user_code_query)
    answer_ids = answers.more_like_this(20, query=user_code_query)

    searchermgr.release(searcher)
    searchermgr.close()
    searcher = None
    directory.close()
    directory = None

    # Log : Answer count
    if answer_ids:
        hit_logs_for_each += str(len(answer_ids)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')

    ### 3_Finding the Associated Questions
    question_ids = answers.find_question_ids(answer_ids)
    # Log : Answer - Question count
    if question_ids:
        hit_logs_for_each += str(len(question_ids)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')

    directory = SimpleFSDirectory(File(INDICES_PATH + 'questionIndex'))
    searchermgr = SearcherManager(directory, SearcherFactory())
    searchermgr.maybeRefresh()
    searcher = searchermgr.acquire()

    getDoc = GettingQuestionDocs(searcher)
    item_docs = getDoc.search(
        question_ids, 20)[0:7]  # 순위대로 최소 7개의 question을 얻기 위해서 여기서 7개를 자름.

    searchermgr.release(searcher)
    searchermgr.close()
    searcher = None
    directory.close()
    directory = None

    # Log : Question ItemDoc count
    if item_docs:
        hit_logs_for_each += str(len(item_docs)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')

    directory = SimpleFSDirectory(File(INDICES_PATH + 'questionIndex'))
    searchermgr = SearcherManager(directory, SearcherFactory())
    searchermgr.maybeRefresh()
    searcher = searchermgr.acquire()

    ### 4_Finding 3 Similar Questions per a Question (3 X 3)
    similar_questions = []
    question = SimilarQsSearcher(searcher)

    # Log : Similar Question count for each of Question ItemDoc
    i = 1
    if item_docs:
        for item_doc in item_docs:
            similar_question = question.more_like_this2(
                item_doc, 7)  # 각 question 들에 대해 7개씩 비슷한 것들 찾음.
            if similar_question:
                hit_logs_for_each += str(len(similar_question)) + '\t'
            else:
                hit_logs_for_each += ('0' + '\t')
            similar_questions += similar_question
            i += 1
    else:
        hit_logs_for_each += ('0' + '\t' + '0' + '\t' + '0' + '\t' + '0' +
                              '\t' + '0' + '\t' + '0' + '\t' + '0' + '\t'
                              )  # 7개

    searchermgr.release(searcher)
    searchermgr.close()
    searcher = None
    directory.close()
    directory = None

    # Log : Similar Question result count
    if similar_questions:
        hit_logs_for_each += str(len(similar_questions)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')

    ### 5_Finding Associated Answers for each Question (9 - 9)
    answer_ids = find_answer_ids(similar_questions)

    # Log : Question - Answer count
    if answer_ids:
        hit_logs_for_each += str(len(answer_ids)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')

    directory = SimpleFSDirectory(File(INDICES_PATH + 'stackoverflow'))
    searchermgr = SearcherManager(directory, SearcherFactory())
    searchermgr.maybeRefresh()
    searcher = searchermgr.acquire()

    ### 6_Getting Answer Docs for the Final Query
    getDoc = GettingAnswerDocs(searcher)
    answer_docs = getDoc.search(answer_ids)

    searchermgr.release(searcher)
    searchermgr.close()
    searcher = None
    directory.close()
    directory = None

    # Log : Answer Docs count
    if answer_docs:
        hit_logs_for_each += str(len(answer_docs)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')

    directory = SimpleFSDirectory(File(INDICES_PATH + 'bigclonebench_2'))
    searchermgr = SearcherManager(directory, SearcherFactory())
    searchermgr.maybeRefresh()
    searcher = searchermgr.acquire()

    bench_results = []
    benchsearcher = BenchSearcher(searcher)  # BigCloneBench

    # Exceptional
    ### 7_Appending for the user query results

    ### 8_Querying for the Final Results
    # Log : Bench_result for each query
    for answer_doc in answer_docs:
        bench_result, score_logs_for_each = benchsearcher.more_like_this2(
            100, answer_doc, score_logs_for_each, user_code_query,
            0)  # , user_query=user_code_query)
        if bench_result:
            hit_logs_for_each += str(len(bench_result)) + '\t'
        else:
            hit_logs_for_each += ('0' + '\t')
        bench_results += bench_result

    searchermgr.release(searcher)
    searchermgr.close()
    searcher = None
    directory.close()
    directory = None

    if answer_docs < 49:
        for a in range(49 - len(answer_docs)):
            hit_logs_for_each += ('0' + '\t')

    if bench_results:
        hit_logs_for_each += str(len(bench_results)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')

    sorted_bench_results = sorted(bench_results,
                                  key=attrgetter('score'),
                                  reverse=True)

    print 'Search Count : ', len(sorted_bench_results)
    recommended = recommend(sorted_bench_results)
    print 'Final Count : ', len(recommended)
    if bench_results:
        hit_logs_for_each += str(len(recommended)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')
    return recommended, hit_logs_for_each, score_logs_for_each