def server_cache(msg, res): from ir_log import IRLog from ir_text import IRText from ir_tfidf import IRTFIDF from ir_document_count import IRDocumentCount IRLog.get_instance().println('Server is caching data') IRText.cache_all_data() IRTFIDF.cache_all_data() IRDocumentCount.cache_all_data() IRLog.get_instance().println('Server cached data') return SIGNAL_CONTINUE
def test_cache_all_data(self): from ir_log import IRLog from ir_config import IRConfig from ir_text import IRText IRConfig.get_instance().load('../data/test/bug_test.cfg') IRText.cache_all_data()
def do_test_over_file(self, filename): """Do test over the file. Args: filename: str, the input file which generated by generate_incomplete_test_file. """ from ir_log import IRLog from ir_config import IRConfig from ir_duplicate_group import IRDuplicateGroup from ir_text import IRText from ir_term_count import IRTermCount from ir_tfidf import IRTFIDF from ir_report import IRReport from ir_document_count import IRDocumentCount IRText.cache_all_data() IRTFIDF.cache_all_data() IRDocumentCount.cache_all_data() remove_self_bug_id = IRConfig.get_instance().get_bool('remove_self_bug_id', True) sim_tot_precision = 0.0 sim_tot_recall = 0.0 sim_bi_tot_recall = 0.0 sim_tot_size = 0 dup_tot_precision = 0.0 dup_tot_recall = 0.0 dup_bi_toto_recall = 0.0 dup_num = 0 test_num = 0 infile = open(filename, 'r') for line in infile: IRLog.get_instance().println('----test----') test_num += 1 line.strip() new_report = IRReport.from_string(line) ori_report = IRReport(new_report.get_dummy_bug_id()) #IRLog.get_instance().println('Summary') #IRTermCount.show_dict_compare(ori_report.get_summary_termcount(), # new_report.get_summary_termcount()) #IRLog.get_instance().println('Description') #IRTermCount.show_dict_compare(ori_report.get_description_termcount(), # new_report.get_description_termcount()) # do test for single similarities, duplicates = new_report.similarities_and_duplicates() sim_ids = [sim[0] for sim in similarities] dup_ids = [dup[0] for dup in duplicates] IRLog.get_instance().println('Sim ids: %s' % str(sim_ids)) IRLog.get_instance().println('Dup ids: %s' % str(dup_ids)) # evaluate sim sim_hit, sim_nothit, real_duplicates = \ IRDuplicateGroup.is_in_same_duplicate_group( new_report.get_dummy_bug_id(), sim_ids, remove_self_bug_id) # some group contain only one if real_duplicates.__len__() == 0: test_num -= 1 continue precision, recall = self.__report_result( new_report.get_dummy_bug_id(), sim_hit, sim_nothit, real_duplicates) sim_tot_precision += precision sim_tot_recall += recall sim_tot_size += sim_ids.__len__() sim_bi_tot_recall += 1 if recall > 0.0 else 0 if dup_ids.__len__() > 0: dup_num += 1 dup_hit, dup_nothit, real_duplicates = \ IRDuplicateGroup.is_in_same_duplicate_group( new_report.get_dummy_bug_id(), dup_ids, remove_self_bug_id) precision, recall = self.__report_result( new_report.get_dummy_bug_id(), dup_hit, dup_nothit, real_duplicates) dup_tot_precision += precision dup_tot_recall += recall dup_bi_toto_recall += 1 if recall > 0.0 else 0 # general conclusion if dup_num == 0: dup_num = 1.0 IRLog.get_instance().println(','.join(['#cases', 'sim pre', 'sim rec', 'sim birec', 'sim size',\ '#dup', 'dup pre', 'dup rec', 'dup birec'])) IRLog.get_instance().println(','.join([str(test_num), \ str(sim_tot_precision/test_num), str(sim_tot_recall/test_num), str(sim_bi_tot_recall/test_num), str(float(sim_tot_size)/test_num), \ str(dup_num), \ str(dup_tot_precision/dup_num), str(dup_tot_recall/dup_num), str(dup_bi_toto_recall/dup_num)])) infile.close()
def do_test_over_file(self, filename): """Do test over the file. Args: filename: str, the input file which generated by generate_incomplete_test_file. """ from ir_log import IRLog from ir_config import IRConfig from ir_duplicate_group import IRDuplicateGroup from ir_text import IRText from ir_term_count import IRTermCount from ir_tfidf import IRTFIDF from ir_report import IRReport from ir_document_count import IRDocumentCount IRText.cache_all_data() IRTFIDF.cache_all_data() IRDocumentCount.cache_all_data() remove_self_bug_id = IRConfig.get_instance().get_bool( 'remove_self_bug_id', True) sim_tot_precision = 0.0 sim_tot_recall = 0.0 sim_bi_tot_recall = 0.0 sim_tot_size = 0 dup_tot_precision = 0.0 dup_tot_recall = 0.0 dup_bi_toto_recall = 0.0 dup_num = 0 test_num = 0 infile = open(filename, 'r') for line in infile: IRLog.get_instance().println('----test----') test_num += 1 line.strip() new_report = IRReport.from_string(line) ori_report = IRReport(new_report.get_dummy_bug_id()) #IRLog.get_instance().println('Summary') #IRTermCount.show_dict_compare(ori_report.get_summary_termcount(), # new_report.get_summary_termcount()) #IRLog.get_instance().println('Description') #IRTermCount.show_dict_compare(ori_report.get_description_termcount(), # new_report.get_description_termcount()) # do test for single similarities, duplicates = new_report.similarities_and_duplicates() sim_ids = [sim[0] for sim in similarities] dup_ids = [dup[0] for dup in duplicates] IRLog.get_instance().println('Sim ids: %s' % str(sim_ids)) IRLog.get_instance().println('Dup ids: %s' % str(dup_ids)) # evaluate sim sim_hit, sim_nothit, real_duplicates = \ IRDuplicateGroup.is_in_same_duplicate_group( new_report.get_dummy_bug_id(), sim_ids, remove_self_bug_id) # some group contain only one if real_duplicates.__len__() == 0: test_num -= 1 continue precision, recall = self.__report_result( new_report.get_dummy_bug_id(), sim_hit, sim_nothit, real_duplicates) sim_tot_precision += precision sim_tot_recall += recall sim_tot_size += sim_ids.__len__() sim_bi_tot_recall += 1 if recall > 0.0 else 0 if dup_ids.__len__() > 0: dup_num += 1 dup_hit, dup_nothit, real_duplicates = \ IRDuplicateGroup.is_in_same_duplicate_group( new_report.get_dummy_bug_id(), dup_ids, remove_self_bug_id) precision, recall = self.__report_result( new_report.get_dummy_bug_id(), dup_hit, dup_nothit, real_duplicates) dup_tot_precision += precision dup_tot_recall += recall dup_bi_toto_recall += 1 if recall > 0.0 else 0 # general conclusion if dup_num == 0: dup_num = 1.0 IRLog.get_instance().println(','.join(['#cases', 'sim pre', 'sim rec', 'sim birec', 'sim size',\ '#dup', 'dup pre', 'dup rec', 'dup birec'])) IRLog.get_instance().println(','.join([str(test_num), \ str(sim_tot_precision/test_num), str(sim_tot_recall/test_num), str(sim_bi_tot_recall/test_num), str(float(sim_tot_size)/test_num), \ str(dup_num), \ str(dup_tot_precision/dup_num), str(dup_tot_recall/dup_num), str(dup_bi_toto_recall/dup_num)])) infile.close()