def summarize_api_class(api_class, top_questions, questions, javadoc,
                        javadoc_dict_classes):
    for api in javadoc:
        for i, method in enumerate(api.methods):
            if api.package_name + '.' + api.class_name == api_class:
                print('>>>JavaDoc<<<')
                print(api.class_name)
                print(api.class_description)
                api_descriptions = api.class_description[i]
                print(api_descriptions)
                break

    titles = dict()
    code_snippets = dict()

    # method_pure_name = api_method.split('.')[-1]

    for question in questions:
        if question.id not in top_questions:
            continue

        contains_api = False

        for answer in question.answers:

            soup = BeautifulSoup(answer.body,
                                 'html.parser',
                                 from_encoding='utf-8')

            links = soup.find_all('a')
            for link in links:
                link = link['href']
                if 'docs.oracle.com/javase/' in link and '/api/' in link and 'html' in link:
                    pair = util.parse_api_link(
                        link)  # pair[0] is class name, pair[1] is method name

                    class_name = pair[0]
                    if class_name == api_class:
                        titles[question.title] = top_questions[question.id]
                        contains_api = True

            codes = soup.find_all('code')
            for code in codes:
                code = code.get_text()
                pos = code.find('(')
                if pos != -1:
                    code = code[:pos]
                if code in javadoc_dict_classes:
                    class_name = javadoc_dict_classes[code]
                    if class_name == api_class:
                        titles[question.title] = top_questions[question.id]
                        contains_api = True

        # if contains_api:
        #     snippet_list = list()
        #     for answer in question.answers:
        #         soup = BeautifulSoup(answer.body, 'html.parser', from_encoding='utf-8')
        #         code_snippet = soup.find('pre')
        #         if code_snippet is not None and code_snippet.get_text().count('\n') <= 5 \
        #                 and '.'+method_pure_name+'(' in code_snippet.get_text():
        #             snippet_list.append(code_snippet.get_text())
        #     code_snippets[question.title] = snippet_list

    titles = sorted(titles.items(), key=lambda item: item[1], reverse=True)

    # #print('>>>Relevant Questions<<<')
    # tot = 0
    # for i, title in enumerate(titles):
    #     if tot == 3:
    #         break
    #     if len(code_snippets[title[0]])>0:
    #         tot+=1
    #         #print(str(tot)+'.'+title[0])
    #
    # if tot<3:
    #     for i, title in enumerate(titles):
    #         if tot == 3:
    #             break
    #         if len(code_snippets[title[0]])==0:
    #             tot += 1
    #             #print(str(tot)+'.'+title[0])
    #
    #
    # tot = 0
    # for i, title in enumerate(titles):
    #     if tot == 3:
    #         break
    #     if len(code_snippets[title[0]]) > 0:
    #         tot += 1
    #         if tot == 1:
    #             print('>>>Code Snippets<<<')
    #         #print('/**********code snippet', tot, '**********/')
    #         #print(code_snippets[title[0]][0])
    #
    # if tot==0:
    #     print('\n-----------------------------------------------\n')
    # else: print('-----------------------------------------------\n')
    return titles
def recommend_api(query_matrix, query_idf_vector, top_questions, questions,
                  javadoc, javadoc_dict_methods, topk):
    # remember that top_questions is a dictionary of the top-k most relevant questions of the query
    # the key is question id, the value is the similarity between the question and the query
    # questions is a list including all questions (api related) in StackOverflow
    # javadoc is a list including all api classes

    api_methods = dict()  #stores the SO_sim of api method and the query
    api_methods_count = dict()

    for question in questions:
        if question.id not in top_questions:
            continue

        tmp_set = set()

        for answer in question.answers:

            if int(answer.score) < 0:
                continue

            soup = BeautifulSoup(answer.body, 'html.parser')
            links = soup.find_all('a')
            for link in links:
                link = link['href']
                if 'docs.oracle.com/javase/' in link and '/api/' in link and 'html' in link:
                    pair = util.parse_api_link(
                        link)  # pair[0] is class name, pair[1] is method name

                    if pair[1] != '':
                        method_name = pair[0] + '.' + pair[1]
                        if method_name in tmp_set:
                            continue
                        else:
                            tmp_set.add(method_name)
                            if method_name in api_methods:
                                api_methods[method_name] += top_questions[
                                    question.id]
                                api_methods_count[method_name] += 1
                            else:
                                api_methods[method_name] = top_questions[
                                    question.id]
                                api_methods_count[method_name] = 1.0

            codes = soup.find_all('code')
            for code in codes:
                code = code.get_text()
                pos = code.find('(')
                if pos != -1:
                    code = code[:pos]

                if code in javadoc_dict_methods:
                    method_name = javadoc_dict_methods[code]
                    if method_name in tmp_set:
                        continue
                    else:
                        tmp_set.add(method_name)
                        if method_name in api_methods:
                            api_methods[method_name] += top_questions[
                                question.id]
                            api_methods_count[method_name] += 1
                        else:
                            api_methods[method_name] = top_questions[
                                question.id]
                            api_methods_count[method_name] = 1.0

    for key, value in api_methods.items():
        api_methods[key] = min(
            1.0, value / api_methods_count[key] *
            (1.0 + math.log(api_methods_count[key], 2) / 10))

    api_sim = {}

    for api in javadoc:
        class_name = api.package_name + '.' + api.class_name

        for i, method in enumerate(api.methods):

            method_name = class_name + '.' + method

            if method_name not in api_methods:
                continue
            else:
                doc_sim = similarity.sim_doc_pair(query_matrix,
                                                  api.methods_matrix[i],
                                                  query_idf_vector,
                                                  api.methods_idf_vector[i])
                so_sim = api_methods[method_name]

                if method_name in api_sim:
                    api_sim[method_name] = max(
                        api_sim[method_name],
                        2 * doc_sim * so_sim / (doc_sim + so_sim))
                else:
                    api_sim[method_name] = 2 * doc_sim * so_sim / (doc_sim +
                                                                   so_sim)

    api_sim = sorted(api_sim.items(), key=lambda item: item[1], reverse=True)

    recommended_api = list()

    for item in api_sim:
        recommended_api.append(item[0])

        if topk != -1 and len(recommended_api) >= topk:
            break

    return recommended_api
def recommend_api_class(query_matrix, query_idf_vector, top_questions,
                        questions, javadoc, javadoc_dict_classes, topk):
    # remember that top_questions is a dictionary of the top-k most relevant questions of the query
    # the key is question id, the value is the similarity between the question and the query
    # questions is a list including all questions (api related) in StackOverflow
    # javadoc is a list including all api classes

    api_classes_count = dict()
    api_classes = dict(
    )  # stores the similarity between the question (whose answer contains the API class) and the query

    for question in questions:
        if question.id not in top_questions:
            continue
        for answer in question.answers:
            if int(answer.score) < 0:
                continue

            soup = BeautifulSoup(answer.body,
                                 'html.parser',
                                 from_encoding='utf-8')

            links = soup.find_all('a')
            for link in links:
                link = link['href']
                if 'docs.oracle.com/javase/' in link and '/api/' in link and 'html' in link:
                    pair = util.parse_api_link(
                        link)  # pair[0] is class name, pair[1] is method name
                    class_name = pair[
                        0]  #note that this class_name already contains package name, i.e, java.util.Calendar
                    if class_name in api_classes:
                        api_classes[class_name] += top_questions[question.id]
                        api_classes_count[class_name] += 1
                    else:
                        api_classes[class_name] = top_questions[question.id]
                        api_classes_count[class_name] = 1

            codes = soup.find_all('code')
            for code in codes:
                code = code.get_text()
                pos = code.find('(')
                if pos != -1:
                    code = code[:pos]
                #code = code.replace('()', '')
                if code in javadoc_dict_classes:
                    # print code,'!class'
                    class_name = javadoc_dict_classes[code]
                    if class_name in api_classes:
                        api_classes[class_name] += top_questions[question.id]
                        api_classes_count[class_name] += 1
                    else:
                        api_classes[class_name] = top_questions[question.id]
                        api_classes_count[class_name] = 1

    for key, value in api_classes.items():
        api_classes[key] = min(
            1.0, value / api_classes_count[key] *
            (1.0 + math.log(api_classes_count[key], 2) / 10))

    api_sim = {}

    for api in javadoc:
        if api.package_name + '.' + api.class_name not in api_classes:
            continue

        doc_sim = 0.0

        for i, method_matrix in enumerate(api.methods_matrix):
            doc_sim = max(
                doc_sim,
                similarity.sim_doc_pair(query_matrix, method_matrix,
                                        query_idf_vector,
                                        api.methods_idf_vector[i]))

        so_sim = api_classes[api.package_name + '.' + api.class_name]

        api_sim[api.package_name + '.' +
                api.class_name] = 2 * doc_sim * so_sim / (doc_sim + so_sim)

    api_sim = sorted(api_sim.items(), key=lambda item: item[1], reverse=True)

    recommended_api = list()

    for item in api_sim:
        recommended_api.append(item[0])
        if topk != -1 and len(recommended_api) >= topk:
            break

    return recommended_api
Beispiel #4
0
def summarize_api_method(api_method, top_questions, questions, javadoc,
                         javadoc_dict_methods):
    for api in javadoc:
        for i, method in enumerate(api.methods):
            if api.package_name + '.' + api.class_name + '.' + method == api_method:
                print '>>>JavaDoc<<<'
                methods_descriptions_pure_text_changed = api.methods_descriptions_pure_text[
                    i].replace('\n', ' ').replace('  ',
                                                  ' ').split('.')[0] + '.'
                print api.methods_descriptions_pure_text[i].replace(
                    '\n', ' ').replace('  ', ' ').split('.')[0] + '.'
                break

    titles = dict()
    code_snippets = dict()

    method_pure_name = api_method.split('.')[-1]

    for question in questions:
        if question.id not in top_questions:
            continue

        contains_api = False

        for answer in question.answers:

            soup = BeautifulSoup(answer.body,
                                 'html.parser',
                                 from_encoding='utf-8')

            links = soup.find_all('a')
            for link in links:
                link = link['href']
                if 'docs.oracle.com/javase/' in link and '/api/' in link and 'html' in link:
                    pair = util.parse_api_link(
                        link)  # pair[0] is class name, pair[1] is method name

                    if pair[1] != '':
                        method_name = pair[0] + '.' + pair[1]
                        if method_name == api_method:
                            titles[question.title] = top_questions[question.id]
                            contains_api = True

            codes = soup.find_all('code')
            for code in codes:
                code = code.get_text()
                pos = code.find('(')
                if pos != -1:
                    code = code[:pos]
                if code in javadoc_dict_methods:
                    method_name = javadoc_dict_methods[code]
                    if method_name == api_method:
                        titles[question.title] = top_questions[question.id]
                        contains_api = True

        if contains_api:
            snippet_list = list()
            for answer in question.answers:
                soup = BeautifulSoup(answer.body,
                                     'html.parser',
                                     from_encoding='utf-8')
                code_snippet = soup.find('pre')
                if code_snippet is not None and code_snippet.get_text().count('\n') <= 5 \
                        and '.'+method_pure_name+'(' in code_snippet.get_text():
                    snippet_list.append(code_snippet.get_text())
            code_snippets[question.title] = snippet_list

    titles = sorted(titles.items(), key=lambda item: item[1], reverse=True)

    print '>>>Relevant Questions<<<'
    tot = 0
    for i, title in enumerate(titles):
        if tot == 3:
            break
        if len(code_snippets[title[0]]) > 0:
            tot += 1
            print str(tot) + '.' + title[0]

    if tot < 3:
        for i, title in enumerate(titles):
            if tot == 3:
                break
            if len(code_snippets[title[0]]) == 0:
                tot += 1
                print str(tot) + '.' + title[0]

    tot = 0
    for i, title in enumerate(titles):
        if tot == 3:
            break
        if len(code_snippets[title[0]]) > 0:
            tot += 1
            if tot == 1:
                print '>>>Code Snippets<<<'
            print '/**********code snippet', tot, '**********/'
            print code_snippets[title[0]][0]

    if tot == 0:
        print '\n-----------------------------------------------\n'
    else:
        print '-----------------------------------------------\n'

    return methods_descriptions_pure_text_changed, titles, code_snippets