def testEARECAlgorithm(project, dates, filter_train=False, filter_test=False, a=0.5): """整合 训练数据""" recommendNum = 5 # 推荐数量 excelName = f'outputEAREC_{project}_{filter_train}_{filter_test}.xls' sheetName = 'result' """计算累积数据""" topks = [] mrrs = [] precisionks = [] recallks = [] fmeasureks = [] """初始化excel文件""" ExcelHelper().initExcelFile(fileName=excelName, sheetName=sheetName, excel_key_list=['训练集', '测试集']) for date in dates: startTime = datetime.now() recommendList, answerList, prList, convertDict, trainSize = EARECTrain.algorithmBody(date, project, recommendNum, filter_train=filter_train, filter_test=filter_test, a=a) """根据推荐列表做评价""" topk, mrr, precisionk, recallk, fmeasurek = \ DataProcessUtils.judgeRecommend(recommendList, answerList, recommendNum) topks.append(topk) mrrs.append(mrr) precisionks.append(precisionk) recallks.append(recallk) fmeasureks.append(fmeasurek) """结果写入excel""" DataProcessUtils.saveResult(excelName, sheetName, topk, mrr, precisionk, recallk, fmeasurek, date) """文件分割""" content = [''] ExcelHelper().appendExcelRow(excelName, sheetName, content, style=ExcelHelper.getNormalStyle()) content = ['训练集', '测试集'] ExcelHelper().appendExcelRow(excelName, sheetName, content, style=ExcelHelper.getNormalStyle()) print("cost time:", datetime.now() - startTime) """计算历史累积数据""" DataProcessUtils.saveFinallyResult(excelName, sheetName, topks, mrrs, precisionks, recallks, fmeasureks)
def testAlgorithm(project, dates, filter_train=False, filter_test=False, error_analysis=False, test_type=StringKeyUtils.STR_TEST_TYPE_SLIDE): # 多个case, 元组代表总共的时间跨度,最后一个月用于测试 """ algorithm : 基于信息检索 """ recommendNum = 5 # 推荐数量 excelName = f'outputIR_AC_{project}_{filter_train}_{filter_test}_{error_analysis}.xlsx' sheetName = 'result' """计算累积数据""" topks = [] mrrs = [] precisionks = [] recallks = [] fmeasureks = [] recommend_positive_success_pr_ratios = [] # pr 中有推荐成功人选的比例 recommend_positive_success_time_ratios = [] # 推荐pr * 人次 中有推荐成功人选的频次比例 recommend_negative_success_pr_ratios = [] # pr 中有推荐人选Hit 但被滤掉的pr的比例 recommend_negative_success_time_ratios = [] # 推荐pr * 人次中有推荐人选Hit 但是被滤掉的pr的比例 recommend_positive_fail_pr_ratios = [] # pr 中有推荐人选推荐错误的pr比例 recommend_positive_fail_time_ratios = [] # pr 中有pr * 人次有推荐错误的频次比例 recommend_negative_fail_pr_ratios = [] # pr 中有推荐人选不知道是否正确的比例 recommend_negative_fail_time_ratios = [] # pr中有pr * 人次有不知道是否正确的比例 error_analysis_datas = None """初始化excel文件""" ExcelHelper().initExcelFile(fileName=excelName, sheetName=sheetName, excel_key_list=['训练集', '测试集']) for date in dates: startTime = datetime.now() """根据推荐列表做评价""" recommendList, answerList, prList, convertDict, trainSize = IR_ACTrain.algorithmBody(date, project, recommendNum, filter_train=filter_train, filter_test=filter_test, test_type=test_type) topk, mrr, precisionk, recallk, fmeasurek = \ DataProcessUtils.judgeRecommend(recommendList, answerList, recommendNum) topks.append(topk) mrrs.append(mrr) precisionks.append(precisionk) recallks.append(recallk) fmeasureks.append(fmeasurek) error_analysis_data = None filter_answer_list = None if error_analysis: if test_type == StringKeyUtils.STR_TEST_TYPE_SLIDE: y = date[2] m = date[3] filename = projectConfig.getIR_ACDataPath() + os.sep + f'IR_AC_ALL_{project}_data_change_trigger_{y}_{m}_to_{y}_{m}.tsv' filter_answer_list = DataProcessUtils.getAnswerListFromChangeTriggerData(project, date, prList, convertDict, filename, 'review_user_login', 'pr_number') elif test_type == StringKeyUtils.STR_TEST_TYPE_INCREMENT: fileList = [] for i in range(date[0] * 12 + date[1], date[2] * 12 + date[3] + 1): # 拆分的数据做拼接 y = int((i - i % 12) / 12) m = i % 12 if m == 0: m = 12 y = y - 1 fileList.append( projectConfig.getIR_ACDataPath() + os.sep + f'IR_AC_ALL_{project}_data_change_trigger_{y}_{m}_to_{y}_{m}.tsv') filter_answer_list = DataProcessUtils.getAnswerListFromChangeTriggerDataByIncrement(project, prList, convertDict, fileList, 'review_user_login', 'pr_number') # recommend_positive_success_pr_ratio, recommend_positive_success_time_ratio, recommend_negative_success_pr_ratio, \ # recommend_negative_success_time_ratio, recommend_positive_fail_pr_ratio, recommend_positive_fail_time_ratio, \ # recommend_negative_fail_pr_ratio, recommend_negative_fail_time_ratio = DataProcessUtils.errorAnalysis( # recommendList, answerList, filter_answer_list, recommendNum) # error_analysis_data = [recommend_positive_success_pr_ratio, recommend_positive_success_time_ratio, # recommend_negative_success_pr_ratio, recommend_negative_success_time_ratio, # recommend_positive_fail_pr_ratio, recommend_positive_fail_time_ratio, # recommend_negative_fail_pr_ratio, recommend_negative_fail_time_ratio] recommend_positive_success_pr_ratio, recommend_negative_success_pr_ratio, recommend_positive_fail_pr_ratio, \ recommend_negative_fail_pr_ratio = DataProcessUtils.errorAnalysis( recommendList, answerList, filter_answer_list, recommendNum) error_analysis_data = [recommend_positive_success_pr_ratio, recommend_negative_success_pr_ratio, recommend_positive_fail_pr_ratio, recommend_negative_fail_pr_ratio] # recommend_positive_success_pr_ratios.append(recommend_positive_success_pr_ratio) # recommend_positive_success_time_ratios.append(recommend_positive_success_time_ratio) # recommend_negative_success_pr_ratios.append(recommend_negative_success_pr_ratio) # recommend_negative_success_time_ratios.append(recommend_negative_success_time_ratio) # recommend_positive_fail_pr_ratios.append(recommend_positive_fail_pr_ratio) # recommend_positive_fail_time_ratios.append(recommend_positive_fail_time_ratio) # recommend_negative_fail_pr_ratios.append(recommend_negative_fail_pr_ratio) # recommend_negative_fail_time_ratios.append(recommend_negative_fail_time_ratio) recommend_positive_success_pr_ratios.append(recommend_positive_success_pr_ratio) recommend_negative_success_pr_ratios.append(recommend_negative_success_pr_ratio) recommend_positive_fail_pr_ratios.append(recommend_positive_fail_pr_ratio) recommend_negative_fail_pr_ratios.append(recommend_negative_fail_pr_ratio) if error_analysis_data: # error_analysis_datas = [recommend_positive_success_pr_ratios, recommend_positive_success_time_ratios, # recommend_negative_success_pr_ratios, recommend_negative_success_time_ratios, # recommend_positive_fail_pr_ratios, recommend_positive_fail_time_ratios, # recommend_negative_fail_pr_ratios, recommend_negative_fail_time_ratios] error_analysis_datas = [recommend_positive_success_pr_ratios, recommend_negative_success_pr_ratios, recommend_positive_fail_pr_ratios, recommend_negative_fail_pr_ratios] """结果写入excel""" DataProcessUtils.saveResult(excelName, sheetName, topk, mrr, precisionk, recallk, fmeasurek, date) """文件分割""" content = [''] ExcelHelper().appendExcelRow(excelName, sheetName, content, style=ExcelHelper.getNormalStyle()) content = ['训练集', '测试集'] ExcelHelper().appendExcelRow(excelName, sheetName, content, style=ExcelHelper.getNormalStyle()) print("cost time:", datetime.now() - startTime) """推荐错误可视化""" DataProcessUtils.recommendErrorAnalyzer2(error_analysis_datas, project, f'IR_AC_{test_type}_{filter_train}_{filter_test}') """计算历史累积数据""" DataProcessUtils.saveFinallyResult(excelName, sheetName, topks, mrrs, precisionks, recallks, fmeasureks, error_analysis_datas)
def testCBAlgorithmsByMultipleLabels(projects, dates, algorithms): """ algorithm : 混合算法,提供算法的排列组合 项目 -> 日期 -> 算法排列组合 每一个项目占一个文件位置 每一个算法组合占一页 """ recommendNum = 5 # 推荐数量 for project in projects: excelName = f'outputCB_{project}.xlsx' sheetName = 'result' """初始化excel文件""" ExcelHelper().initExcelFile(fileName=excelName, sheetName=sheetName, excel_key_list=['训练集', '测试集']) """对不同时间做一个综合统计 组合的int -> [[],[]....] """ topks = {} mrrs = {} precisionks = {} recallks = {} fmeasureks = {} """初始化""" for i in range(1, 2 ** algorithms.__len__()): topks[i] = [] mrrs[i] = [] precisionks[i] = [] recallks[i] = [] fmeasureks[i] = [] for date in dates: """获得不同算法的推荐列表,答案和pr列表""" """不同算法预处理可能会筛去一些pr pr列表用于做统一""" prs = [] recommendLists = [] answerLists = [] """计算不同人之前在训练集review的次数 作为后面综合统计的第二依据""" reviewerFreq = DataProcessUtils.getReviewerFrequencyDict(project, date) for algorithm in algorithms: print(f"project:{project}, date:{date}, algorithm:{algorithm}") """根据算法获得推荐列表""" recommendList, answerList, prList, convertDict, trainSize = CBTrain.algorithmBody(date, project, algorithm, recommendNum) # print(recommendList) print("trainSize:", trainSize) """人名还原""" recommendList, answerList = CBTrain.recoverName(recommendList, answerList, convertDict) # print(recommendList) prs.append(prList) recommendLists.append(recommendList) answerLists.append(answerList) """不同算法按照共有的pr 顺序调整""" prs, recommendLists, answerLists = CBTrain.normList(prs, recommendLists, answerLists) """貌似推荐是人名也可以做效果评估 暂时不转化""" # CBTrain.convertNameToNumber(recommendLists, answerLists) """对不同算法做排列组合""" for i in range(1, 2 ** algorithms.__len__()): tempRecommendList = [] """不同算法测试的 answer列表相同,取一个即可""" answer = answerLists[0] involve = [0] * algorithms.__len__() k = i for j in range(0, algorithms.__len__()): involve[algorithms.__len__() - j - 1] = k % 2 k = floor(k / 2) """组合算法label为excel sheetName""" label = '' for j in range(0, algorithms.__len__()): if involve[j] == 1: if label != '': label = label + '_' label = label + algorithms[j] tempRecommendList.append(recommendLists[j]) sheetName = label ExcelHelper().addSheet(filename=excelName, sheetName=sheetName) """波达计数 结合不同投票选出最终名单""" finalRecommendList = [] for j in range(0, answer.__len__()): recommendList = SortAlgorithmUtils.BordaCountSortWithFreq([x[j] for x in tempRecommendList], reviewerFreq) finalRecommendList.append(recommendList) """评价指标""" topk, mrr, precisionk, recallk, fmeasurek = \ DataProcessUtils.judgeRecommend(finalRecommendList, answer, recommendNum) """结果写入excel""" DataProcessUtils.saveResult(excelName, sheetName, topk, mrr, precisionk, recallk, fmeasurek, date) """累积评价指标""" topks[i].append(topk) mrrs[i].append(mrr) precisionks[i].append(precisionk) recallks[i].append(recallk) fmeasureks[i].append(fmeasurek) """对指标做综合评判""" for i in range(1, 2 ** algorithms.__len__()): involve = [0] * algorithms.__len__() k = i for j in range(0, algorithms.__len__()): involve[algorithms.__len__() - j - 1] = k % 2 k = floor(k / 2) """组合算法label为excel sheetName""" label = '' for j in range(0, algorithms.__len__()): if involve[j] == 1: if label != '': label = label + '_' label = label + algorithms[j] sheetName = label DataProcessUtils.saveFinallyResult(excelName, sheetName, topks[i], mrrs[i], precisionks[i], recallks[i], fmeasureks[i])
def testRF_AAlgorithms(projects, dates, filter_train=False, filter_test=False, error_analysis=True): """ RF 算法由于特征和输入无法和ML兼容,单独开一个文件 """ startTime = datetime.now() for project in projects: excelName = f'outputRF_A_{project}_{filter_train}_{filter_test}_{error_analysis}.xlsx' recommendNum = 5 # 推荐数量 sheetName = 'result' """初始化excel文件""" ExcelHelper().initExcelFile(fileName=excelName, sheetName=sheetName, excel_key_list=['训练集', '测试集']) """初始化项目抬头""" content = ["项目名称:", project] ExcelHelper().appendExcelRow(excelName, sheetName, content, style=ExcelHelper.getNormalStyle()) ExcelHelper().appendExcelRow(excelName, sheetName, content, style=ExcelHelper.getNormalStyle()) """计算累积数据""" topks = [] mrrs = [] precisionks = [] recallks = [] fmeasureks = [] recommend_positive_success_pr_ratios = [] # pr 中有推荐成功人选的比例 recommend_positive_success_time_ratios = [] # 推荐pr * 人次 中有推荐成功人选的频次比例 recommend_negative_success_pr_ratios = [] # pr 中有推荐人选Hit 但被滤掉的pr的比例 recommend_negative_success_time_ratios = [] # 推荐pr * 人次中有推荐人选Hit 但是被滤掉的pr的比例 recommend_positive_fail_pr_ratios = [] # pr 中有推荐人选推荐错误的pr比例 recommend_positive_fail_time_ratios = [] # pr 中有pr * 人次有推荐错误的频次比例 recommend_negative_fail_pr_ratios = [] # pr 中有推荐人选不知道是否正确的比例 recommend_negative_fail_time_ratios = [] # pr中有pr * 人次有不知道是否正确的比例 error_analysis_datas = None for date in dates: recommendList, answerList, prList, convertDict, trainSize = RF_ATrain.algorithmBody(date, project, recommendNum, filter_train=filter_train, filter_test=filter_test) """根据推荐列表做评价""" topk, mrr, precisionk, recallk, fmeasurek = \ DataProcessUtils.judgeRecommend(recommendList, answerList, recommendNum) topks.append(topk) mrrs.append(mrr) precisionks.append(precisionk) recallks.append(recallk) fmeasureks.append(fmeasurek) error_analysis_data = None if error_analysis: y = date[2] m = date[3] filename = projectConfig.getRF_ADataPath() + os.sep + f'RF_A_ALL_{project}_data_change_trigger_{y}_{m}_to_{y}_{m}.tsv' filter_answer_list = DataProcessUtils.getAnswerListFromChangeTriggerData(project, date, prList, convertDict, filename, 'review_user_login', 'pr_number') # recommend_positive_success_pr_ratio, recommend_positive_success_time_ratio, recommend_negative_success_pr_ratio, \ # recommend_negative_success_time_ratio, recommend_positive_fail_pr_ratio, recommend_positive_fail_time_ratio, \ # recommend_negative_fail_pr_ratio, recommend_negative_fail_time_ratio = DataProcessUtils.errorAnalysis( # recommendList, answerList, filter_answer_list, recommendNum) # error_analysis_data = [recommend_positive_success_pr_ratio, recommend_positive_success_time_ratio, # recommend_negative_success_pr_ratio, recommend_negative_success_time_ratio, # recommend_positive_fail_pr_ratio, recommend_positive_fail_time_ratio, # recommend_negative_fail_pr_ratio, recommend_negative_fail_time_ratio] recommend_positive_success_pr_ratio, recommend_negative_success_pr_ratio, recommend_positive_fail_pr_ratio, \ recommend_negative_fail_pr_ratio = DataProcessUtils.errorAnalysis( recommendList, answerList, filter_answer_list, recommendNum) error_analysis_data = [recommend_positive_success_pr_ratio, recommend_negative_success_pr_ratio, recommend_positive_fail_pr_ratio, recommend_negative_fail_pr_ratio] # recommend_positive_success_pr_ratios.append(recommend_positive_success_pr_ratio) # recommend_positive_success_time_ratios.append(recommend_positive_success_time_ratio) # recommend_negative_success_pr_ratios.append(recommend_negative_success_pr_ratio) # recommend_negative_success_time_ratios.append(recommend_negative_success_time_ratio) # recommend_positive_fail_pr_ratios.append(recommend_positive_fail_pr_ratio) # recommend_positive_fail_time_ratios.append(recommend_positive_fail_time_ratio) # recommend_negative_fail_pr_ratios.append(recommend_negative_fail_pr_ratio) # recommend_negative_fail_time_ratios.append(recommend_negative_fail_time_ratio) recommend_positive_success_pr_ratios.append(recommend_positive_success_pr_ratio) recommend_negative_success_pr_ratios.append(recommend_negative_success_pr_ratio) recommend_positive_fail_pr_ratios.append(recommend_positive_fail_pr_ratio) recommend_negative_fail_pr_ratios.append(recommend_negative_fail_pr_ratio) if error_analysis_data: # error_analysis_datas = [recommend_positive_success_pr_ratios, recommend_positive_success_time_ratios, # recommend_negative_success_pr_ratios, recommend_negative_success_time_ratios, # recommend_positive_fail_pr_ratios, recommend_positive_fail_time_ratios, # recommend_negative_fail_pr_ratios, recommend_negative_fail_time_ratios] error_analysis_datas = [recommend_positive_success_pr_ratios, recommend_negative_success_pr_ratios, recommend_positive_fail_pr_ratios, recommend_negative_fail_pr_ratios] """结果写入excel""" DataProcessUtils.saveResult(excelName, sheetName, topk, mrr, precisionk, recallk, fmeasurek, date, error_analysis_data) """文件分割""" content = [''] ExcelHelper().appendExcelRow(excelName, sheetName, content, style=ExcelHelper.getNormalStyle()) content = ['训练集', '测试集'] ExcelHelper().appendExcelRow(excelName, sheetName, content, style=ExcelHelper.getNormalStyle()) print("cost time:", datetime.now() - startTime) """推荐错误可视化""" DataProcessUtils.recommendErrorAnalyzer2(error_analysis_datas, project, f'RF_{filter_train}_{filter_test}') """计算历史累积数据""" DataProcessUtils.saveFinallyResult(excelName, sheetName, topks, mrrs, precisionks, recallks, fmeasureks, error_analysis_datas)
def testCNAlgorithm(project, dates, filter_train=False, filter_test=False, is_split=False, error_analysis=False): """整合 训练数据""" """2020.8.7 新增参数 filter_data 和 error_analysis filter_train 判断是否使用 changetrigger过滤的训练数据 filter_test 判断是否使用 changetrigger过滤的验证数据 error_analysis 表示是否开启chang_trigger过滤答案的错误统计机制 """ recommendNum = 5 # 推荐数量 excelName = f'outputCN_{project}_{filter_train}_{filter_test}_{error_analysis}.xlsx' sheetName = 'result' """计算累积数据""" topks = [] mrrs = [] precisionks = [] recallks = [] fmeasureks = [] recommend_positive_success_pr_ratios = [] # pr 中有推荐成功人选的比例 recommend_positive_success_time_ratios = [] # 推荐pr * 人次 中有推荐成功人选的频次比例 recommend_negative_success_pr_ratios = [] # pr 中有推荐人选Hit 但被滤掉的pr的比例 recommend_negative_success_time_ratios = [] # 推荐pr * 人次中有推荐人选Hit 但是被滤掉的pr的比例 recommend_positive_fail_pr_ratios = [] # pr 中有推荐人选推荐错误的pr比例 recommend_positive_fail_time_ratios = [] # pr 中有pr * 人次有推荐错误的频次比例 recommend_negative_fail_pr_ratios = [] # pr 中有推荐人选不知道是否正确的比例 recommend_negative_fail_time_ratios = [] # pr中有pr * 人次有不知道是否正确的比例 error_analysis_datas = None """初始化excel文件""" ExcelHelper().initExcelFile(fileName=excelName, sheetName=sheetName, excel_key_list=['训练集', '测试集']) for date in dates: CNTrain.clean() startTime = datetime.now() prList, convertDict, trainSize, communities_data= CNTrain.algorithmBody(date, project, recommendNum, filter_train=filter_train, filter_test=filter_test, is_split=is_split) communitiesTuple = sorted(communities_data.items(), key=lambda x: x[0]) for cid, c_data in communitiesTuple: """根据推荐列表做评价""" topk, mrr, precisionk, recallk, fmeasurek = \ DataProcessUtils.judgeRecommend(c_data['recommend_list'], c_data['answer_list'], recommendNum) communities_data[cid]['topk'] = topk communities_data[cid]['mrr'] = mrr communities_data[cid]['precisionk'] = precisionk communities_data[cid]['recallk'] = recallk communities_data[cid]['fmeasurek'] = fmeasurek print("project: {0}, modularity: {1}, entropy: {2}, avg_variance: {3}".format(project, communities_data['whole']['modularity'], communities_data['whole']['entropy'], communities_data['whole']['avg_variance'])) error_analysis_data = None if error_analysis: y = date[2] m = date[3] filename = projectConfig.getCNDataPath() + os.sep + f'CN_{project}_data_change_trigger_{y}_{m}_to_{y}_{m}.tsv' filter_answer_list = DataProcessUtils.getAnswerListFromChangeTriggerData(project, date, prList, convertDict, filename, 'reviewer', 'pull_number') # recommend_positive_success_pr_ratio, recommend_positive_success_time_ratio, recommend_negative_success_pr_ratio, \ # recommend_negative_success_time_ratio, recommend_positive_fail_pr_ratio, recommend_positive_fail_time_ratio, \ # recommend_negative_fail_pr_ratio, recommend_negative_fail_time_ratio = DataProcessUtils.errorAnalysis( # recommendList, answerList, filter_answer_list, recommendNum) # error_analysis_data = [recommend_positive_success_pr_ratio, recommend_positive_success_time_ratio, # recommend_negative_success_pr_ratio, recommend_negative_success_time_ratio, # recommend_positive_fail_pr_ratio, recommend_positive_fail_time_ratio, # recommend_negative_fail_pr_ratio, recommend_negative_fail_time_ratio] recommend_positive_success_pr_ratio, recommend_negative_success_pr_ratio, recommend_positive_fail_pr_ratio, \ recommend_negative_fail_pr_ratio = DataProcessUtils.errorAnalysis( communities_data['whole']['recommend_list'], communities_data['whole']['answer_list'], filter_answer_list, recommendNum) error_analysis_data = [recommend_positive_success_pr_ratio, recommend_negative_success_pr_ratio, recommend_positive_fail_pr_ratio, recommend_negative_fail_pr_ratio] # recommend_positive_success_pr_ratios.append(recommend_positive_success_pr_ratio) # recommend_positive_success_time_ratios.append(recommend_positive_success_time_ratio) # recommend_negative_success_pr_ratios.append(recommend_negative_success_pr_ratio) # recommend_negative_success_time_ratios.append(recommend_negative_success_time_ratio) # recommend_positive_fail_pr_ratios.append(recommend_positive_fail_pr_ratio) # recommend_positive_fail_time_ratios.append(recommend_positive_fail_time_ratio) # recommend_negative_fail_pr_ratios.append(recommend_negative_fail_pr_ratio) # recommend_negative_fail_time_ratios.append(recommend_negative_fail_time_ratio) recommend_positive_success_pr_ratios.append(recommend_positive_success_pr_ratio) recommend_negative_success_pr_ratios.append(recommend_negative_success_pr_ratio) recommend_positive_fail_pr_ratios.append(recommend_positive_fail_pr_ratio) recommend_negative_fail_pr_ratios.append(recommend_negative_fail_pr_ratio) if error_analysis_data: # error_analysis_datas = [recommend_positive_success_pr_ratios, recommend_positive_success_time_ratios, # recommend_negative_success_pr_ratios, recommend_negative_success_time_ratios, # recommend_positive_fail_pr_ratios, recommend_positive_fail_time_ratios, # recommend_negative_fail_pr_ratios, recommend_negative_fail_time_ratios] error_analysis_datas = [recommend_positive_success_pr_ratios, recommend_negative_success_pr_ratios, recommend_positive_fail_pr_ratios, recommend_negative_fail_pr_ratios] topks.append(communities_data['whole']['topk']) mrrs.append(communities_data['whole']['mrr']) precisionks.append(communities_data['whole']['precisionk']) recallks.append(communities_data['whole']['recallk']) fmeasureks.append(communities_data['whole']['fmeasurek']) """结果写入excel""" DataProcessUtils.saveResult_Community_Version(excelName, sheetName, communities_data, date) error_analysis_data = None if error_analysis: y = date[2] m = date[3] filename = projectConfig.getCNDataPath() + os.sep + f'CN_{project}_data_change_trigger_{y}_{m}_to_{y}_{m}.tsv' filter_answer_list = DataProcessUtils.getAnswerListFromChangeTriggerData(project, date, prList, convertDict, filename, 'reviewer', 'pull_number') # recommend_positive_success_pr_ratio, recommend_positive_success_time_ratio, recommend_negative_success_pr_ratio, \ # recommend_negative_success_time_ratio, recommend_positive_fail_pr_ratio, recommend_positive_fail_time_ratio, \ # recommend_negative_fail_pr_ratio, recommend_negative_fail_time_ratio = DataProcessUtils.errorAnalysis( # recommendList, answerList, filter_answer_list, recommendNum) # error_analysis_data = [recommend_positive_success_pr_ratio, recommend_positive_success_time_ratio, # recommend_negative_success_pr_ratio, recommend_negative_success_time_ratio, # recommend_positive_fail_pr_ratio, recommend_positive_fail_time_ratio, # recommend_negative_fail_pr_ratio, recommend_negative_fail_time_ratio] recommend_positive_success_pr_ratio, recommend_negative_success_pr_ratio, recommend_positive_fail_pr_ratio,\ recommend_negative_fail_pr_ratio = DataProcessUtils.errorAnalysis( communities_data['whole']['recommend_list'], communities_data['whole']['answer_list'] , filter_answer_list, recommendNum) error_analysis_data = [recommend_positive_success_pr_ratio, recommend_negative_success_pr_ratio, recommend_positive_fail_pr_ratio, recommend_negative_fail_pr_ratio] # recommend_positive_success_pr_ratios.append(recommend_positive_success_pr_ratio) # recommend_positive_success_time_ratios.append(recommend_positive_success_time_ratio) # recommend_negative_success_pr_ratios.append(recommend_negative_success_pr_ratio) # recommend_negative_success_time_ratios.append(recommend_negative_success_time_ratio) # recommend_positive_fail_pr_ratios.append(recommend_positive_fail_pr_ratio) # recommend_positive_fail_time_ratios.append(recommend_positive_fail_time_ratio) # recommend_negative_fail_pr_ratios.append(recommend_negative_fail_pr_ratio) # recommend_negative_fail_time_ratios.append(recommend_negative_fail_time_ratio) recommend_positive_success_pr_ratios.append(recommend_positive_success_pr_ratio) recommend_negative_success_pr_ratios.append(recommend_negative_success_pr_ratio) recommend_positive_fail_pr_ratios.append(recommend_positive_fail_pr_ratio) recommend_negative_fail_pr_ratios.append(recommend_negative_fail_pr_ratio) if error_analysis_data: # error_analysis_datas = [recommend_positive_success_pr_ratios, recommend_positive_success_time_ratios, # recommend_negative_success_pr_ratios, recommend_negative_success_time_ratios, # recommend_positive_fail_pr_ratios, recommend_positive_fail_time_ratios, # recommend_negative_fail_pr_ratios, recommend_negative_fail_time_ratios] error_analysis_datas = [recommend_positive_success_pr_ratios, recommend_negative_success_pr_ratios, recommend_positive_fail_pr_ratios, recommend_negative_fail_pr_ratios] """结果写入excel""" DataProcessUtils.saveResult(excelName, sheetName, topk, mrr, precisionk, recallk, fmeasurek, date, error_analysis_data) """文件分割""" content = [''] ExcelHelper().appendExcelRow(excelName, sheetName, content, style=ExcelHelper.getNormalStyle()) content = ['训练集', '测试集'] ExcelHelper().appendExcelRow(excelName, sheetName, content, style=ExcelHelper.getNormalStyle()) print("cost time:", datetime.now() - startTime) """推荐错误可视化""" DataProcessUtils.recommendErrorAnalyzer2(error_analysis_datas, project, f'CN_{filter_train}_{filter_test}') """计算历史累积数据""" DataProcessUtils.saveFinallyResult(excelName, sheetName, topks, mrrs, precisionks, recallks, fmeasureks, error_analysis_datas)