Beispiel #1
0
    def cos_test_info(self, train_time):
        # get word vector
        info_dict = self.get_corpus_result(train_time)
        # get predict interval
        reviewer = get_reviewer(self.project_name)
        print(reviewer)

        # test_number = get_test_number(self.project_name)
        # for i in test_number:
        #     new_word = info_dict.get(i)
        #     past_dict = {k: v for k, v in info_dict.items() if (k < i)}
        #     score_list = self.cos_score(new_word, past_dict, reviewer, 5)
        #     print(score_list)

        return info_dict, reviewer
Beispiel #2
0
from storage.utils import get_reviewer
from storage.utils import get_test_number
from itertools import product
import re
if not os.path.exists(tie_path):
    os.mkdir(tie_path)
ir_list= [i for i in os.walk(ir_path)][0][2]
# fps_file = [i for i in os.walk(fps_path)][0][2]
print(ir_list)
for i in ir_list:
    project = re.search(r'[a-z]+.[a-z]+', i)[0]
    project_index = [i.split('/')[1] for i in PROJECT_NAME]
    realproject = PROJECT_NAME[project_index.index(project)]
    test_number = get_test_number(realproject)
    print(test_number)
    all_reviewer = get_reviewer(realproject)
    print(realproject)
    ir_file = os.path.join(ir_path, i)
    fps_file = os.path.join(fps_path, i)
    tie_file = os.path.join(tie_path, i)
    with open(ir_file, 'r') as f:
        ir_content = f.readlines()[:-1]
    with open(fps_file, 'r') as f2:
        fps_content = f2.readlines()[:-2]
    with open(ir_file, 'r') as f3:
        ir_spend_time = f3.readlines()[-1]
        ir_spend_time = float(re.search(r"\d+(\.\d{2})?",ir_spend_time)[0])
    with open(fps_file, 'r') as f4:
        fps_spend_time = f4.readlines()[-1]
        fps_spend_time = float(re.search(r"\d+(\.\d{2})?",fps_spend_time)[0])
Beispiel #3
0
def main(project, train_time):
    cf = CountFps()
    train_start_number = get_train_start_number(project, train_time)
    # print(train_start_number)
    all_reviewer = get_reviewer(project, train_start_number)
    all_file = get_file_name(project, train_start_number)
    start_time = time.time()
    proc = Process(project)
    file_name = project.split('/')[1] + train_time.replace('-', '_')
    print('Start Prepare Corpus')
    info_dict, reviewer = proc.cos_test_info(train_time)
    cn = CommentNetwork(project, train_time)
    test_info, comment_info_all = cn.corpus_test()
    end_train_time = time.time()

    train_spend_time = end_train_time - start_time
    print('start test-----')
    count_dict1 = {'right': 0, 'wrong': 0, 'all': 0}
    count_dict3 = {'right': 0, 'wrong': 0, 'all': 0}
    count_dict5 = {'right': 0, 'wrong': 0, 'all': 0}
    not_valie_id = []
    predict_time_list = []
    for number, pull_request_user in test_info.items():
        try:
            start_predict_time = time.time()
            Rpf = {k: v for k, v in all_file.items() if k < number}
            Rp_viewers_all = {
                k: v
                for k, v in all_reviewer.items() if k < number
            }

            f = all_file.get(number)
            # print(f)
            comment_info = comment_info_all.get(number, None)
            if not comment_info or not f:
                # write_to_file(file_name, {number: 'this pull request no comment info'})
                continue

            comment_info_key = list(comment_info.keys())
            # print(comment_info_key)
            result, predict = cf.main(f, Rpf, Rp_viewers_all, 5)
            # print(f'fps is {predict[0:10]}')
            score_list = cos_func(info_dict, number, reviewer, proc)
            print('----', number, pull_request_user)

            predict = [(i[0], i[1] * 0.3) for i in predict]
            score_list = [(i[0], i[1] * 0.7) for i in score_list]

            x = Counter(dict(predict))
            y = Counter(dict(score_list))

            last_result = list(dict(x + y))
            # print('compare result is',last_result)
            end_time = time.time()
            predict_spend_time = end_time - start_predict_time
            predict_time_list.append(predict_spend_time)

            # if not score_list:
            #     continue

            # print('========== graph test ========')
            # edge_info = graph_func(comment_info_all, number, pull_request_user, cn)
            # if edge_info:
            #     network_user = [i[2] for i in edge_info]
            #     commented_user = sort_priority(score_list, network_user)
            # else:
            #     commented_user = score_list
            commented_user = last_result[0:10]
            # write_to_file(file_name, {number: commented_user})
            commented_user_1, commented_user_3, commented_user_5 = [
                commented_user[0]
            ], commented_user[0:2], commented_user[0:5]
            #
            # print('predict user',commented_user_1)
            # print('reviewer user',comment_info_key)
            # print('',commented_user_1)
            # count_dict1 = compare_result(commented_user_1, comment_info_key,count_dict1)
            # count_dict3 = compare_result(commented_user_3, comment_info_key,count_dict3)
            # count_dict5 = compare_result(commented_user_5, comment_info_key,count_dict5)

        except Exception as e:
            LOG.error('Error in {}:{},  Cause {}'.format(
                number, pull_request_user, e),
                      exc_info=True)
    # save_rate(project, file_name, count_dict1)
    # save_rate(project, file_name, count_dict3)
    # save_rate(project, file_name, count_dict5)
    predict_all_time = numpy.sum(predict_time_list)
    write_to_file(file_name, f'train spend time is {train_spend_time}')
    write_to_file(file_name, f'predict all spend time is {predict_all_time}')

    end_time = time.time()
    spend_time = end_time - start_time
    write_to_file(file_name, f'spend time is {spend_time}')
    print('end test----')