コード例 #1
0
def java_plugin(g, issue_url):
    ex_dir, repo_name, issue_dict = prepare_repo(g, issue_url)
    # Dependency
    gradle_paths = find_all_gradle(ex_dir)
    if gradle_paths:
        raw_depens = []
        for p in gradle_paths:
            tmp = gradle_dependency(p)
            if tmp:
                raw_depens.extend(tmp)
    else:
        pom_paths = find_all_pom(ex_dir)
        raw_depens = []
        for p in pom_paths:
            tmp = pom_dependency(p)
            if tmp:
                raw_depens.extend(tmp)
    if raw_depens:
        # logger.debug(raw_depens)
        depen_ob = Dependencies(repo_name, raw_depens)
        # logger.debug(depen_ob)
        _denp_keys = depen_ob.keywords()
        # logger.debug(_denp_keys)
        denp_key_w = search_keywords(issue_dict["body_tokens"], _denp_keys, mode='depen', min_len=2)
        denp_key_w.sort(key=lambda k: k[-1], reverse=True)
        logger.info(f"Dependency {len(denp_key_w)}=={denp_key_w}")
        return {
            "depen": util.get_col(denp_key_w, 0)
        }
    else:
        return {
            "depen": list()
        }
コード例 #2
0
def android_plugin(g, issue_url):
    ex_dir, repo_name, issue_dict = prepare_repo(g, issue_url)

    root = find_app_root(ex_dir)

    # Permission
    is_path, path = check_manifest_at_root(root)
    if not is_path:
        per_key_w = []
    else:
        name, p_list = get_permission(path)
        permit_ob = Permissions(path, name, ex_dir, p_list)
        _per_keys = permit_ob.keywords()
        per_key_w = search_keywords(issue_dict["body_tokens"], _per_keys, mode='permit', min_len=2)
        per_key_w.sort(key=lambda k: k[-1], reverse=True)
    logger.info(f"Permission {len(per_key_w)}=={per_key_w}")

    # Dependency
    is_path, path = check_gradle_at_root(root)
    if not is_path:
        denp_key_w = []
    else:
        a_json = gradle_dependency(path)
        depen_ob = Dependencies(repo_name, a_json)
        _denp_keys = depen_ob.keywords()
        denp_key_w = search_keywords(issue_dict["body_tokens"], _denp_keys, mode='depen', min_len=2)
        denp_key_w.sort(key=lambda k: k[-1], reverse=True)
    logger.info(f"Dependency {len(denp_key_w)}=={denp_key_w}")

    # UI
    ui_data = get_ui_descript(ex_dir)
    ui_key_w = search_keywords(issue_dict["body"], ui_data, mode='ui', threshold=0.5, min_len=3)
    # remove the match rate less than 0.5
    logger.info(f"UI {len(ui_key_w)}=={ui_key_w}")

    return {
        "depen": util.get_col(denp_key_w, 0),
        "permit": util.get_col(per_key_w, 0),
        "ui": util.get_col(ui_key_w, 0)
    }
コード例 #3
0
def get_top_java_repo(g, id, only_android=False):
    file_name = f"openissues_repo_{str(id)}.csv"
    data = util.read_csv(file_name)
    repos = util.get_col(data, 0)
    old_repos = set(repos)

    repos = g.search_repositories(query='language:java',
                                  sort="stars",
                                  order="desc",
                                  language="Java")
    ilistdict = dict()

    with open(file_name, "a+", encoding="utf-8") as file:
        for index in range(repos.totalCount):
            api_wait_search(g)
            repo = repos[index]
            if repo.full_name in old_repos:
                continue
            if only_android ^ is_android_repo(g, repo.full_name):
                # 相同为True
                logger.info(f"skip {repo.full_name}")
                continue
            file.write(repo.full_name + "," + repo.html_url + "\n")
            file.flush()
コード例 #4
0
def mian():
    from persontoken import MY_TOKEN
    g = Github(MY_TOKEN)
    done_open_urls = util.read_csv('./hist.txt')
    done_open_urls = util.get_col(done_open_urls, 0)

    _f = open("data2.csv", 'a+', encoding='utf-8-sig', newline='')
    _f2 = open("hist.txt", 'a+', encoding='utf-8', newline='')

    try:
        csvwriter = csv.writer(_f, delimiter=',')
        for i, open_url in enumerate(open_urls):
            if open_url in done_open_urls:
                continue
            this_row = [""] * 5
            # open_url = 'https://github.com/json-path/JsonPath/issues/460'
            logger.info("-" * 100)
            logger.info(open_url)
            open_iss_ob = util.get_issue(g, open_url)
            this_row[0] = open_url

            # stacktrace / condition / title
            repo_name = open_iss_ob.repository.full_name
            extra_rm = nlp_util.full_name_token(
                repo_name)  # remove the number and its name
            curr_q = fast_query(open_iss_ob.title, open_iss_ob.body)
            logger.info(f"curr_q, {curr_q}")

            # check sequence stacktrace, condition, title
            try_pair = [
                (True, False, 'body'),  # stacktrace in body
                (False, True, 'title'),  # condition in title
                (False, False, 'title'),  # title in title
                (False, False, 'other')  # title (no field constraint)
            ]
            try_hist = []
            all_fail = True
            for _fi, pair in enumerate(try_pair):
                trace, condition, pos = pair
                query_list = form_query(curr_q,
                                        extra_rm,
                                        trace=trace,
                                        condition=condition)
                query_chars = " ".join(query_list)
                query_chars = add_pos(query_chars, pos)
                logger.debug(f"query_chars, {query_chars}")
                if query_list:
                    close_iss = run_close_query(g,
                                                query_chars,
                                                is_android,
                                                depth=10,
                                                fallback_size=5)
                    try_hist.append(query_chars)
                    this_row[1] = query_chars
                    if close_iss["info"] == 'NORMAL':
                        all_fail = False
                    else:
                        if close_iss["info"] == 'FALLBACK':
                            all_fail = False
                            logger.info(
                                f"[try {_fi}] FALLBACK failed query [Too few results], {query_chars}"
                            )
                        elif close_iss["info"] == 'EMPTY':
                            logger.info(
                                f"[try {_fi}] FALLBACK failed query [Zero results], {query_chars}"
                            )

                    if close_iss["iss"]:
                        # open url, open info online, open info offline, close url, close info (rank property)
                        # 0       , 1               , 2                , 3        , 4

                        rank_list = []
                        for _c in close_iss["iss"]:
                            close_url, close_info = _c
                            if is_android:
                                plugin = android_plugin
                            else:
                                plugin = java_plugin
                            # open url, open info online, open info offline, close url, close info (rank property)
                            # 0       , 1               , 2                , 3        , 4
                            open_off = plugin(g, open_url)
                            close_off = plugin(g, close_url)
                            logger.debug(f"open {open_url}")
                            logger.debug(
                                f"open offline rank, {open_off}=={len(open_off.keys())}"
                            )
                            logger.debug(f"close {close_url}")
                            logger.debug(
                                f"close offline rank, {close_off}=={len(close_off.keys())}"
                            )
                            assert len(open_off.keys()) == len(
                                close_off.keys())
                            all_empty = True
                            join_off = dict()
                            for _k in open_off.keys():
                                join = set(open_off[_k]) & set(close_off[_k])
                                join_off[_k] = list(join)
                                if join:
                                    all_empty = False
                                    close_info.insert(0, f"Off-SIM-{_k}")
                            logger.debug(f"join_off, {join_off}")
                            if all_empty:
                                this_row[2] = "empty offline"
                            else:
                                this_row[2] = json.dumps(join_off)

                            this_row[3] = close_url
                            flag = code_sim_wrap(g, open_url, close_url)
                            if flag:
                                close_info.insert(0, "Code-SIM")
                            this_row[4] = json.dumps(close_info)
                            rank_list.append(deepcopy(this_row))
                        rank_list = rank_issue(rank_list)
                        if rank_list:
                            csvwriter.writerows(rank_list)

                        if close_iss["info"] == 'NORMAL':
                            break

            if all_fail:
                write_list = []
                this_row[4] = 'NONE close issue'
                for col1 in try_hist:
                    this_row[1] = col1
                    write_list.append(deepcopy(this_row))
                csvwriter.writerows(write_list)

            print(open_url, file=_f2)

            _f.flush()
            _f2.flush()
    except Exception as e:
        logger.error(f"{open_url}, skip")
        print(f"{open_url}, skip", file=_f2)
        raise e
    finally:
        _f.close()
        _f2.close()
コード例 #5
0
from github import Github
from loguru import logger

import nlp_util
import util
from crawlermy import fast_query, run_close_query, form_query, add_pos
from rank_issue import code_sim_wrap, rank_issue, android_plugin, java_plugin
from util import SS

#  before running, delete data2.csv,hist.txt,log/main_one2.log

util.init_logger('main_one2.log', mode='fixed', clear=False)
is_android = False
open_urls = util.read_csv('./openlist.txt', encoding='utf-8')
open_urls = util.get_col(open_urls, 0)
open_urls = util.uniq_list(open_urls)
ss = SS(ip="vm.userx.cn", port=7891)


def mian():
    from persontoken import MY_TOKEN
    g = Github(MY_TOKEN)
    done_open_urls = util.read_csv('./hist.txt')
    done_open_urls = util.get_col(done_open_urls, 0)

    _f = open("data2.csv", 'a+', encoding='utf-8-sig', newline='')
    _f2 = open("hist.txt", 'a+', encoding='utf-8', newline='')

    try:
        csvwriter = csv.writer(_f, delimiter=',')
コード例 #6
0
    # test code
    # src = "tsv/nextcloud_android_master.tsv"
    # src = select_dir(SRC_DIR)
    src = _item
    src_out = util.read_tsv(src)
    src_out = nlp_util.process_tsv(src_out)

    file_list = os.listdir(SRC_DIR)
    file_list = [os.path.join(SRC_DIR, f) for f in file_list]
    if src in file_list:
        file_list.remove(src)
    # file_list = ['tsv/owncloud_android_master.tsv'] # one test

    scan_output = scan_match(src_out, file_list, match_name.ngram_compare, [1, 0.5, 0.5], threshold=0.7)
    # 得到src app与数据库每个app的总相似度
    logger.debug(pp.pformat(util.get_col(scan_output, [0, 1])))

    rdb = issuedb.ISSuedb()
    sql = """select issue_num, comments, state, title, body, commit_id, labels from {}
                    order by length(body) desc"""
    # remove constrain "where labels like '%bug%' or commit_id is not null"

    overall_table = {}
    # 所有相关app和item
    # for i in range(len(scan_output)):
    for i in range(4):
        one_dict = {}
        app = scan_output[i][0]
        one_dict['sim'] = scan_output[i][1]

        tab_name = table2tsv.file2table(app)
コード例 #7
0
                                i.html_url)


if __name__ == "__main__":
    init_logger(__file__)

    from persontoken import MY_TOKENs

    tklen = len(MY_TOKENs)
    tk_i = 0
    ss = SS(port=7890)
    android = False
    id = 12
    while True:
        g = Github(MY_TOKENs[tk_i % tklen])
        try:
            # get_top_java_repo(g, 6, only_android=False)

            if android:
                urls = util.read_csv('f-droid/f-droid-github-filter.csv')
                urls = util.get_col(urls, 3)
            else:
                urls = util.read_csv('java_repo_list.csv')
                urls = util.get_col(urls, 1)
            download_new_issues(g, urls, id, shuffle=True)
        except RateLimitExceededException:
            logger.error(traceback.format_exc())
            tk_i += 1
        else:
            logger.error(traceback.format_exc())