def test_cache_all_data(self): from ir_log import IRLog from ir_config import IRConfig from ir_tfidf import IRTFIDF IRLog.get_instance().start_log() IRConfig.get_instance().load('../data/test/bug_test.cfg') IRTFIDF.cache_all_data() IRLog.get_instance().stop_log()
def server_cache(msg, res): from ir_log import IRLog from ir_text import IRText from ir_tfidf import IRTFIDF from ir_document_count import IRDocumentCount IRLog.get_instance().println('Server is caching data') IRText.cache_all_data() IRTFIDF.cache_all_data() IRDocumentCount.cache_all_data() IRLog.get_instance().println('Server cached data') return SIGNAL_CONTINUE
def start_shell(cls): """Start a shell that do recommending interactively""" from ir_log import IRLog from ir_tfidf import IRTFIDF from ir_document_count import IRDocumentCount from ir_report import IRReport IRLog.get_instance().println("Starting Intereport...") IRTFIDF.cache_all_data() IRDocumentCount.cache_all_data() IRLog.get_instance().println("Intereport Started. Waiting for input") new_report = None while 1: cmd = raw_input("Input command:").strip() if cmd == 'exit': IRLog.get_instance().println('Exiting') break elif cmd == 'new': IRLog.get_instance().println('Creating New Report') import time cur_time = -1 while cur_time < 0: try: cur_time = int(time.mktime(time.strptime( raw_input("Input Time (e.g., 2011-05-05): "), '%Y-%m-%d'))) except: cur_time = -1 product = raw_input("Input Product: ") summary = raw_input("Summary: ") raw_description = raw_input("Description:\n") new_report = IRReport.from_string(IRReport.separator.join([ str(cur_time), product.lower(), summary, raw_description, '', ''])) cls.__print_report(new_report) elif cmd == 'do': IRLog.get_instance().println('Do Recommending') if new_report is None: IRLog.get_instance().println('Error! Please create ' 'report first.') else: cls.do_recommend(new_report) elif cmd == 'ls': IRLog.get_instance().println('Show Current Report') if new_report is None: IRLog.get_instance().println('Error! Please create ' 'report first.') else: cls.__print_report(new_report) elif cmd == 'ad': IRLog.get_instance().println('Appending Description') if new_report is None: IRLog.get_instance().println('Error! Please create ' 'report first.') else: append_description = raw_input("Append Description:\n") description =' '.join([new_report.get_description_text(), append_description]) dummy_report = IRReport(new_report.get_summary_text(), description) dummy_report.set_stacktrace(new_report.get_stacktrace()) dummy_report.set_basic_info(new_report.get_create_ts(), new_report.get_product()) dummy_report.set_penalty_terms(new_report.get_penalty_terms()) dummy_report.set_dummy_bug_id(new_report.get_dummy_bug_id()) new_report = dummy_report IRLog.get_instance().println('Description: %s' % description) elif cmd == 'ap': IRLog.get_instance().println('Appending Penalties') if new_report is None: IRLog.get_instance().println('Error! Please create ' 'report first.') else: raw = [] while raw.__len__() < 1: raw = raw_input('Input Penalties (split by \',\'):').split(',') from ir_term_count import IRTermCount penalty = new_report.get_penalty_terms() if penalty is None: penalty = [] penalty += IRTermCount.do_stemming(raw) new_report.set_penalty_terms(penalty) print len(penalty), penalty IRLog.get_instance().println('Penalties: %s' % \ (', '.join(penalty))) elif cmd == 'sd': IRLog.get_instance().println('Set Dummy Bug ID') if new_report is None: IRLog.get_instance().println('Error! Please create ' 'report first.') else: bug_id = -1 while bug_id <= 0: try: bug_id = int(raw_input('Dummy Bug ID: ')) except: bug_id = -1 new_report.set_dummy_bug_id(bug_id) IRLog.get_instance().println('Dummy Bug ID: %d' % bug_id) elif cmd == 'help': cls.__show_help() else: IRLog.get_instance().println('Error! Unkown command: %s' \ % cmd) cls.__show_help() # end of while 1 IRLog.get_instance().println("Bye")
if mode == 'file': test_file = sys.argv[3] bug_id = int(sys.argv[4]) from ir_sim_bug_evaluator import IRSimBugEvaluator new_report = IRSimBugEvaluator.get_report_from_test_file(test_file, bug_id) if new_report is None: IRLog.get_instance().println('Error! Cannot find report %d in %s' % \ (bug_id, test_file)) else: if sys.argv.__len__() > 5: from ir_term_count import IRTermCount penalty_terms_raw = sys.argv[4].split(',') penalty_terms = set(IRTermCount.do_stemming(penalty_terms_raw)) IRLog.get_instance().println('%d penalty terms: %s:' \ % (penalty_terms.__len__(), ','.join(penalty_terms))) new_report.set_penalty_terms(penalty_terms) elif mode == 'text': text = sys.argv[3] new_report = IRReport.from_string(text) elif mode == 'inte': IRRecommender.start_shell() exit() else: IRLog.get_instance().println('Error! Known mode %s' % mode) from ir_tfidf import IRTFIDF from ir_document_count import IRDocumentCount IRTFIDF.cache_all_data() IRDocumentCount.cache_all_data() IRRecommender.do_recommend(new_report) IRLog.get_instance().stop_log()
def start_shell(cls): """Start a shell that do recommending interactively""" from ir_log import IRLog from ir_tfidf import IRTFIDF from ir_document_count import IRDocumentCount from ir_report import IRReport IRLog.get_instance().println("Starting Intereport...") IRTFIDF.cache_all_data() IRDocumentCount.cache_all_data() IRLog.get_instance().println("Intereport Started. Waiting for input") new_report = None while 1: cmd = raw_input("Input command:").strip() if cmd == 'exit': IRLog.get_instance().println('Exiting') break elif cmd == 'new': IRLog.get_instance().println('Creating New Report') import time cur_time = -1 while cur_time < 0: try: cur_time = int( time.mktime( time.strptime( raw_input( "Input Time (e.g., 2011-05-05): "), '%Y-%m-%d'))) except: cur_time = -1 product = raw_input("Input Product: ") summary = raw_input("Summary: ") raw_description = raw_input("Description:\n") new_report = IRReport.from_string( IRReport.separator.join([ str(cur_time), product.lower(), summary, raw_description, '', '' ])) cls.__print_report(new_report) elif cmd == 'do': IRLog.get_instance().println('Do Recommending') if new_report is None: IRLog.get_instance().println('Error! Please create ' 'report first.') else: cls.do_recommend(new_report) elif cmd == 'ls': IRLog.get_instance().println('Show Current Report') if new_report is None: IRLog.get_instance().println('Error! Please create ' 'report first.') else: cls.__print_report(new_report) elif cmd == 'ad': IRLog.get_instance().println('Appending Description') if new_report is None: IRLog.get_instance().println('Error! Please create ' 'report first.') else: append_description = raw_input("Append Description:\n") description = ' '.join([ new_report.get_description_text(), append_description ]) dummy_report = IRReport(new_report.get_summary_text(), description) dummy_report.set_stacktrace(new_report.get_stacktrace()) dummy_report.set_basic_info(new_report.get_create_ts(), new_report.get_product()) dummy_report.set_penalty_terms( new_report.get_penalty_terms()) dummy_report.set_dummy_bug_id( new_report.get_dummy_bug_id()) new_report = dummy_report IRLog.get_instance().println('Description: %s' % description) elif cmd == 'ap': IRLog.get_instance().println('Appending Penalties') if new_report is None: IRLog.get_instance().println('Error! Please create ' 'report first.') else: raw = [] while raw.__len__() < 1: raw = raw_input( 'Input Penalties (split by \',\'):').split(',') from ir_term_count import IRTermCount penalty = new_report.get_penalty_terms() if penalty is None: penalty = [] penalty += IRTermCount.do_stemming(raw) new_report.set_penalty_terms(penalty) print len(penalty), penalty IRLog.get_instance().println('Penalties: %s' % \ (', '.join(penalty))) elif cmd == 'sd': IRLog.get_instance().println('Set Dummy Bug ID') if new_report is None: IRLog.get_instance().println('Error! Please create ' 'report first.') else: bug_id = -1 while bug_id <= 0: try: bug_id = int(raw_input('Dummy Bug ID: ')) except: bug_id = -1 new_report.set_dummy_bug_id(bug_id) IRLog.get_instance().println('Dummy Bug ID: %d' % bug_id) elif cmd == 'help': cls.__show_help() else: IRLog.get_instance().println('Error! Unkown command: %s' \ % cmd) cls.__show_help() # end of while 1 IRLog.get_instance().println("Bye")
test_file = sys.argv[3] bug_id = int(sys.argv[4]) from ir_sim_bug_evaluator import IRSimBugEvaluator new_report = IRSimBugEvaluator.get_report_from_test_file( test_file, bug_id) if new_report is None: IRLog.get_instance().println('Error! Cannot find report %d in %s' % \ (bug_id, test_file)) else: if sys.argv.__len__() > 5: from ir_term_count import IRTermCount penalty_terms_raw = sys.argv[4].split(',') penalty_terms = set(IRTermCount.do_stemming(penalty_terms_raw)) IRLog.get_instance().println('%d penalty terms: %s:' \ % (penalty_terms.__len__(), ','.join(penalty_terms))) new_report.set_penalty_terms(penalty_terms) elif mode == 'text': text = sys.argv[3] new_report = IRReport.from_string(text) elif mode == 'inte': IRRecommender.start_shell() exit() else: IRLog.get_instance().println('Error! Known mode %s' % mode) from ir_tfidf import IRTFIDF from ir_document_count import IRDocumentCount IRTFIDF.cache_all_data() IRDocumentCount.cache_all_data() IRRecommender.do_recommend(new_report) IRLog.get_instance().stop_log()
def do_test_over_file(self, filename): """Do test over the file. Args: filename: str, the input file which generated by generate_incomplete_test_file. """ from ir_log import IRLog from ir_config import IRConfig from ir_duplicate_group import IRDuplicateGroup from ir_text import IRText from ir_term_count import IRTermCount from ir_tfidf import IRTFIDF from ir_report import IRReport from ir_document_count import IRDocumentCount IRText.cache_all_data() IRTFIDF.cache_all_data() IRDocumentCount.cache_all_data() remove_self_bug_id = IRConfig.get_instance().get_bool('remove_self_bug_id', True) sim_tot_precision = 0.0 sim_tot_recall = 0.0 sim_bi_tot_recall = 0.0 sim_tot_size = 0 dup_tot_precision = 0.0 dup_tot_recall = 0.0 dup_bi_toto_recall = 0.0 dup_num = 0 test_num = 0 infile = open(filename, 'r') for line in infile: IRLog.get_instance().println('----test----') test_num += 1 line.strip() new_report = IRReport.from_string(line) ori_report = IRReport(new_report.get_dummy_bug_id()) #IRLog.get_instance().println('Summary') #IRTermCount.show_dict_compare(ori_report.get_summary_termcount(), # new_report.get_summary_termcount()) #IRLog.get_instance().println('Description') #IRTermCount.show_dict_compare(ori_report.get_description_termcount(), # new_report.get_description_termcount()) # do test for single similarities, duplicates = new_report.similarities_and_duplicates() sim_ids = [sim[0] for sim in similarities] dup_ids = [dup[0] for dup in duplicates] IRLog.get_instance().println('Sim ids: %s' % str(sim_ids)) IRLog.get_instance().println('Dup ids: %s' % str(dup_ids)) # evaluate sim sim_hit, sim_nothit, real_duplicates = \ IRDuplicateGroup.is_in_same_duplicate_group( new_report.get_dummy_bug_id(), sim_ids, remove_self_bug_id) # some group contain only one if real_duplicates.__len__() == 0: test_num -= 1 continue precision, recall = self.__report_result( new_report.get_dummy_bug_id(), sim_hit, sim_nothit, real_duplicates) sim_tot_precision += precision sim_tot_recall += recall sim_tot_size += sim_ids.__len__() sim_bi_tot_recall += 1 if recall > 0.0 else 0 if dup_ids.__len__() > 0: dup_num += 1 dup_hit, dup_nothit, real_duplicates = \ IRDuplicateGroup.is_in_same_duplicate_group( new_report.get_dummy_bug_id(), dup_ids, remove_self_bug_id) precision, recall = self.__report_result( new_report.get_dummy_bug_id(), dup_hit, dup_nothit, real_duplicates) dup_tot_precision += precision dup_tot_recall += recall dup_bi_toto_recall += 1 if recall > 0.0 else 0 # general conclusion if dup_num == 0: dup_num = 1.0 IRLog.get_instance().println(','.join(['#cases', 'sim pre', 'sim rec', 'sim birec', 'sim size',\ '#dup', 'dup pre', 'dup rec', 'dup birec'])) IRLog.get_instance().println(','.join([str(test_num), \ str(sim_tot_precision/test_num), str(sim_tot_recall/test_num), str(sim_bi_tot_recall/test_num), str(float(sim_tot_size)/test_num), \ str(dup_num), \ str(dup_tot_precision/dup_num), str(dup_tot_recall/dup_num), str(dup_bi_toto_recall/dup_num)])) infile.close()
def do_test_over_file(self, filename): """Do test over the file. Args: filename: str, the input file which generated by generate_incomplete_test_file. """ from ir_log import IRLog from ir_config import IRConfig from ir_duplicate_group import IRDuplicateGroup from ir_text import IRText from ir_term_count import IRTermCount from ir_tfidf import IRTFIDF from ir_report import IRReport from ir_document_count import IRDocumentCount IRText.cache_all_data() IRTFIDF.cache_all_data() IRDocumentCount.cache_all_data() remove_self_bug_id = IRConfig.get_instance().get_bool( 'remove_self_bug_id', True) sim_tot_precision = 0.0 sim_tot_recall = 0.0 sim_bi_tot_recall = 0.0 sim_tot_size = 0 dup_tot_precision = 0.0 dup_tot_recall = 0.0 dup_bi_toto_recall = 0.0 dup_num = 0 test_num = 0 infile = open(filename, 'r') for line in infile: IRLog.get_instance().println('----test----') test_num += 1 line.strip() new_report = IRReport.from_string(line) ori_report = IRReport(new_report.get_dummy_bug_id()) #IRLog.get_instance().println('Summary') #IRTermCount.show_dict_compare(ori_report.get_summary_termcount(), # new_report.get_summary_termcount()) #IRLog.get_instance().println('Description') #IRTermCount.show_dict_compare(ori_report.get_description_termcount(), # new_report.get_description_termcount()) # do test for single similarities, duplicates = new_report.similarities_and_duplicates() sim_ids = [sim[0] for sim in similarities] dup_ids = [dup[0] for dup in duplicates] IRLog.get_instance().println('Sim ids: %s' % str(sim_ids)) IRLog.get_instance().println('Dup ids: %s' % str(dup_ids)) # evaluate sim sim_hit, sim_nothit, real_duplicates = \ IRDuplicateGroup.is_in_same_duplicate_group( new_report.get_dummy_bug_id(), sim_ids, remove_self_bug_id) # some group contain only one if real_duplicates.__len__() == 0: test_num -= 1 continue precision, recall = self.__report_result( new_report.get_dummy_bug_id(), sim_hit, sim_nothit, real_duplicates) sim_tot_precision += precision sim_tot_recall += recall sim_tot_size += sim_ids.__len__() sim_bi_tot_recall += 1 if recall > 0.0 else 0 if dup_ids.__len__() > 0: dup_num += 1 dup_hit, dup_nothit, real_duplicates = \ IRDuplicateGroup.is_in_same_duplicate_group( new_report.get_dummy_bug_id(), dup_ids, remove_self_bug_id) precision, recall = self.__report_result( new_report.get_dummy_bug_id(), dup_hit, dup_nothit, real_duplicates) dup_tot_precision += precision dup_tot_recall += recall dup_bi_toto_recall += 1 if recall > 0.0 else 0 # general conclusion if dup_num == 0: dup_num = 1.0 IRLog.get_instance().println(','.join(['#cases', 'sim pre', 'sim rec', 'sim birec', 'sim size',\ '#dup', 'dup pre', 'dup rec', 'dup birec'])) IRLog.get_instance().println(','.join([str(test_num), \ str(sim_tot_precision/test_num), str(sim_tot_recall/test_num), str(sim_bi_tot_recall/test_num), str(float(sim_tot_size)/test_num), \ str(dup_num), \ str(dup_tot_precision/dup_num), str(dup_tot_recall/dup_num), str(dup_bi_toto_recall/dup_num)])) infile.close()