def test_create_new_report_from_string(self): from nose.tools import eq_ from ir_log import IRLog from ir_config import IRConfig from ir_report import IRReport from ir_term_count import IRTermCount IRLog.get_instance().start_log() IRConfig.get_instance().load('../data/test/bug_test.cfg') summary_text = 'Firefox crashed' description_text = 'When I was openning history folder, the f**king' \ ' Firefox just crashed!\n' report = IRReport(summary_text, description_text) report.set_basic_info(12345, 'core') report.set_penalty_terms(IRTermCount.do_stemming(['ie', 'explore'])) report.set_exclude_report_ids([100100]) report.set_dummy_bug_id(12345) report.set_skip_terms(IRTermCount.do_stemming(['new', 'please'])) # save to text text = report.to_string() IRLog.get_instance().println('Serialized report: %s' % (text)) # load from text new_report = IRReport.from_string(text) assert new_report.get_summary_text() == report.get_summary_text() eq_(new_report.get_description_text().strip(), report.get_description_text().strip()) assert new_report.get_create_ts() == report.get_create_ts() assert new_report.get_product() == report.get_product() assert new_report.get_dummy_bug_id() == report.get_dummy_bug_id() assert new_report.get_penalty_terms() == report.get_penalty_terms() assert new_report.get_exclude_report_ids( ) == report.get_exclude_report_ids() eq_(new_report.get_skip_terms(), report.get_skip_terms()) IRLog.get_instance().stop_log()
def test_create_new_report_from_string(self): from nose.tools import eq_ from ir_log import IRLog from ir_config import IRConfig from ir_report import IRReport from ir_term_count import IRTermCount IRLog.get_instance().start_log() IRConfig.get_instance().load('../data/test/bug_test.cfg') summary_text = 'Firefox crashed' description_text = 'When I was openning history folder, the f**king' \ ' Firefox just crashed!\n' report = IRReport(summary_text, description_text) report.set_basic_info(12345, 'core') report.set_penalty_terms(IRTermCount.do_stemming(['ie', 'explore'])) report.set_exclude_report_ids([100100]) report.set_dummy_bug_id(12345) report.set_skip_terms(IRTermCount.do_stemming(['new','please'])) # save to text text = report.to_string() IRLog.get_instance().println('Serialized report: %s' % (text)) # load from text new_report = IRReport.from_string(text) assert new_report.get_summary_text() == report.get_summary_text() eq_(new_report.get_description_text().strip(), report.get_description_text().strip()) assert new_report.get_create_ts() == report.get_create_ts() assert new_report.get_product() == report.get_product() assert new_report.get_dummy_bug_id() == report.get_dummy_bug_id() assert new_report.get_penalty_terms() == report.get_penalty_terms() assert new_report.get_exclude_report_ids() == report.get_exclude_report_ids() eq_(new_report.get_skip_terms(), report.get_skip_terms()) IRLog.get_instance().stop_log()
def test_get_termcount_of_bug(self): from ir_log import IRLog from ir_config import IRConfig from ir_term_count import IRTermCount IRConfig.get_instance().load('../data/test/bug_test.cfg') summary, description = IRTermCount.get_termcount_of_bug(100000) assert None != summary assert None != description IRLog.get_instance().println('Summary') IRTermCount.show_dict_compare(summary, {}) IRLog.get_instance().println('Description') IRTermCount.show_dict_compare(description, {})
def test_get_termcount_of_bug(self): from ir_log import IRLog from ir_config import IRConfig from ir_term_count import IRTermCount IRConfig.get_instance().load('../data/test/bug_test.cfg') summary, description = IRTermCount.get_termcount_of_bug(100000) assert None != summary assert None != description IRLog.get_instance().println('Summary') IRTermCount.show_dict_compare(summary, {}) IRLog.get_instance().println('Description') IRTermCount.show_dict_compare(description, {})
def compare_and_print_termcount(cls, title_a, report_a, title_b, report_b): from ir_log import IRLog from ir_term_count import IRTermCount summary_a, description_a = \ report_a.get_summary_and_description_termcount() summary_b, description_b = \ report_b.get_summary_and_description_termcount() IRLog.get_instance().println('[Termcount][Summary][%s][%s]' \ % (title_a, title_b)) IRTermCount.show_dict_compare(summary_a, summary_b) IRLog.get_instance().println('[Termcount][Description][%s][%s]' \ % (title_a, title_b)) IRTermCount.show_dict_compare(description_a, description_b)
def compare_and_print_termcount(cls, title_a, report_a, title_b, report_b): from ir_log import IRLog from ir_term_count import IRTermCount summary_a, description_a = \ report_a.get_summary_and_description_termcount() summary_b, description_b = \ report_b.get_summary_and_description_termcount() IRLog.get_instance().println('[Termcount][Summary][%s][%s]' \ % (title_a, title_b)) IRTermCount.show_dict_compare(summary_a, summary_b) IRLog.get_instance().println('[Termcount][Description][%s][%s]' \ % (title_a, title_b)) IRTermCount.show_dict_compare(description_a, description_b)
def test_batch_report_term_count(self): from ir_config import IRConfig from ir_mongodb_helper import IRMongodbHelper from ir_term_count import IRTermCount IRConfig.get_instance().load('../data/test/bug_test.cfg') IRTermCount.batch_generate_term_count() # simple size test con = IRMongodbHelper.get_instance().get_connection() db = con[IRConfig.get_instance().get('bug_db_name')] text_collection = db[IRConfig.get_instance().\ get('bug_text_collection_name')] termcount_collection = db[IRConfig.get_instance().\ get('bug_termcount_collection_name')] assert text_collection.count() == termcount_collection.count()
def test_batch_report_term_count(self): from ir_config import IRConfig from ir_mongodb_helper import IRMongodbHelper from ir_term_count import IRTermCount IRConfig.get_instance().load('../data/test/bug_test.cfg') IRTermCount.batch_generate_term_count() # simple size test con = IRMongodbHelper.get_instance().get_connection() db = con[IRConfig.get_instance().get('bug_db_name')] text_collection = db[IRConfig.get_instance().\ get('bug_text_collection_name')] termcount_collection = db[IRConfig.get_instance().\ get('bug_termcount_collection_name')] assert text_collection.count() == termcount_collection.count()
def __generate_single_bug(self, bug_id, drop_rate): """Generate an incomplete bug report text. Args: bug_id: int, original bug id. drop_rate: float, 0.0 for not drop, 1.0 for totally drop. Returns: IRReport """ from ir_text import IRText from ir_term_count import IRTermCount from ir_report import IRReport # get description and summary summary, description = IRText.get_summary_and_description_of_bug(bug_id) create_ts, product = IRText.get_basic_info_of_bug(bug_id) if drop_rate > 0.001: summary, description = \ IRTermCount.create_incomplete_report(summary, description, drop_rate) print description new_report = IRReport(summary, description) new_report.set_stacktrace(IRText.get_stacktrace_of_bug(bug_id)) new_report.set_dummy_bug_id(bug_id) new_report.set_basic_info(create_ts, product) return new_report
def get_report_difference(cls, new_report, similar_report): """Get the difference of terms of reports in similar_reports. return the dict difference of its summary and description respectly Args: new_report: IRReport, The new report. similar_report: IRReport, The similar report. Returns: (set, set), (diff of summary, diff of description) """ new_summary_termcount, new_description_termcount = \ new_report.get_summary_and_description_termcount() sim_summary_termcount, sim_description_termcount = \ similar_report.get_summary_and_description_termcount() diff_summary = cls.__get_dict_difference(new_summary_termcount, sim_summary_termcount) diff_description = cls.__get_dict_difference( new_description_termcount, sim_description_termcount) # still, we don't want the term in summary to be recommended diff_description -= set(new_summary_termcount.keys()) # skip the skip_terms in new report diff_description -= set(new_report.get_skip_terms()) # and product should not be recommended from ir_term_count import IRTermCount product = new_report.get_product() if product is not None: product_term = IRTermCount.do_stemming([product])[0] if product_term in diff_description: diff_description.remove(product_term) return diff_summary, diff_description
def get_report_difference(cls, new_report, similar_report): """Get the difference of terms of reports in similar_reports. return the dict difference of its summary and description respectly Args: new_report: IRReport, The new report. similar_report: IRReport, The similar report. Returns: (set, set), (diff of summary, diff of description) """ new_summary_termcount, new_description_termcount = \ new_report.get_summary_and_description_termcount() sim_summary_termcount, sim_description_termcount = \ similar_report.get_summary_and_description_termcount() diff_summary = cls.__get_dict_difference( new_summary_termcount, sim_summary_termcount) diff_description = cls.__get_dict_difference( new_description_termcount, sim_description_termcount) # still, we don't want the term in summary to be recommended diff_description -= set(new_summary_termcount.keys()) # skip the skip_terms in new report diff_description -= set(new_report.get_skip_terms()) # and product should not be recommended from ir_term_count import IRTermCount product = new_report.get_product() if product is not None: product_term = IRTermCount.do_stemming([product])[0] if product_term in diff_description: diff_description.remove(product_term) return diff_summary, diff_description
def batch_generate_tfidf(cls): """Batch calculate TFIDF.""" from ir_log import IRProgressBar from ir_config import IRConfig from ir_mongodb_helper import IRCollection from ir_document_count import IRDocumentCount from ir_term_count import IRTermCount # get config bug_id_name = IRConfig.get_instance().get('bug_id_name') summary_name = IRConfig.get_instance().get('bug_summary_name') description_name = IRConfig.get_instance().get('bug_description_name') tfidf_algorithm = IRConfig.get_instance().get('tfidf_algorithm') # prepare collections IRDocumentCount.cache_all_data() tfidf_collection = IRCollection( 'bug_db_name', 'bug_tfidf_collection_name', 'w') # batch calculate tfidf termcount_iterator = IRTermCount.get_iterator() bug_count = termcount_iterator.count() def iter_term_count(bug): summary_tfidf = cls.calculate_tfidf(bug[summary_name], summary_name, bug_count, None, tfidf_algorithm) description_tfidf = cls.calculate_tfidf(bug[description_name], description_name, bug_count, None, tfidf_algorithm) tfidf_collection.insert({bug_id_name : bug[bug_id_name], summary_name : summary_tfidf, description_name : description_tfidf}) IRProgressBar.execute_iteration_for_cursor(termcount_iterator, iter_term_count, "Calculating TFIDF") tfidf_collection.create_index([(bug_id_name, IRCollection.ASCENDING)]) tfidf_collection.close()
def __generate_single_bug(self, bug_id, drop_rate): """Generate an incomplete bug report text. Args: bug_id: int, original bug id. drop_rate: float, 0.0 for not drop, 1.0 for totally drop. Returns: IRReport """ from ir_text import IRText from ir_term_count import IRTermCount from ir_report import IRReport # get description and summary summary, description = IRText.get_summary_and_description_of_bug( bug_id) create_ts, product = IRText.get_basic_info_of_bug(bug_id) if drop_rate > 0.001: summary, description = \ IRTermCount.create_incomplete_report(summary, description, drop_rate) print description new_report = IRReport(summary, description) new_report.set_stacktrace(IRText.get_stacktrace_of_bug(bug_id)) new_report.set_dummy_bug_id(bug_id) new_report.set_basic_info(create_ts, product) return new_report
def test_create_incomplete_report(self): from ir_log import IRLog from ir_config import IRConfig from ir_term_count import IRTermCount IRConfig.get_instance().load('../data/test/bug_test.cfg') summary = 'This is a test of calculation for single report term count.' description = 'This is the description of the test report. Just a test.' summary_BoW, description_BoW = \ IRTermCount.calculate_term_count(summary, description) inc_summary, inc_description = \ IRTermCount.create_incomplete_report(summary, description, 0.4) inc_summary_bow, inc_description_bow = \ IRTermCount.calculate_term_count(inc_summary, inc_description) IRLog.get_instance().println('Original Summary: %s' % (summary)) IRLog.get_instance().println('Original Description: %s' % (description)) IRLog.get_instance().println('Incomplete Summary: %s' % (inc_summary)) IRLog.get_instance().println('Incomplete Description: %s' % (inc_description)) IRLog.get_instance().println( 'Compare original BoW with incomplete BoW') IRLog.get_instance().println('%16s\t%8s\t%8s' % ('Summary', 'Ori', 'Inc')) IRTermCount.show_dict_compare(summary_BoW, inc_summary_bow) IRLog.get_instance().println('%16s\t%8s\t%8s' % ('Description', 'Ori', 'Inc')) IRTermCount.show_dict_compare(description_BoW, inc_description_bow)
def __update_summary_and_description_termcount_from_text(self): from ir_term_count import IRTermCount summary_text, description_text = self.get_summary_and_description_text() summary_termcount, description_termcount = \ IRTermCount.calculate_term_count(summary_text, description_text) if self.__summary_termcount is None: self.__summary_termcount = summary_termcount if self.__description_termcount is None: self.__description_termcount = description_termcount
def __update_summary_and_description_termcount_from_text(self): from ir_term_count import IRTermCount summary_text, description_text = self.get_summary_and_description_text( ) summary_termcount, description_termcount = \ IRTermCount.calculate_term_count(summary_text, description_text) if self.__summary_termcount is None: self.__summary_termcount = summary_termcount if self.__description_termcount is None: self.__description_termcount = description_termcount
def test_tokenization(self): from ir_log import IRLog from ir_config import IRConfig from ir_term_count import IRTermCount from nose.tools import assert_equals IRConfig.get_instance().load('../data/test/bug_test.cfg') tests = ['mouse-down', 'set_background_color()'] expects = [['mouse-down'], ['set_background_color']] for index, test in enumerate(tests): assert_equals(expects[index], IRTermCount.do_tokenization(test))
def test_tokenization(self): from ir_log import IRLog from ir_config import IRConfig from ir_term_count import IRTermCount from nose.tools import assert_equals IRConfig.get_instance().load('../data/test/bug_test.cfg') tests = ['mouse-down', 'set_background_color()'] expects = [['mouse-down'], ['set_background_color']] for index, test in enumerate(tests): assert_equals(expects[index], IRTermCount.do_tokenization(test))
def test_single_report_term_count(self): #import sys #sys.path.append('../bin/') from ir_config import IRConfig from ir_term_count import IRTermCount IRConfig.get_instance().load('../data/test/bug_test.cfg') summary = 'This is a test of calculation for single report term count.' description = 'This is the description of the test report. Just a test.' summary_BoW, description_BoW = \ IRTermCount.calculate_term_count(summary, description) assert summary_BoW['calcul'] == 1 assert description_BoW['test'] == 2
def test_single_report_term_count(self): #import sys #sys.path.append('../bin/') from ir_config import IRConfig from ir_term_count import IRTermCount IRConfig.get_instance().load('../data/test/bug_test.cfg') summary = 'This is a test of calculation for single report term count.' description = 'This is the description of the test report. Just a test.' summary_BoW, description_BoW = \ IRTermCount.calculate_term_count(summary, description) assert summary_BoW['calcul'] == 1 assert description_BoW['test'] == 2
def batch_generate_document_count(cls): """Batch calculate term count over documents. Input is from mongodb, termcount collection. """ from ir_log import IRProgressBar from ir_config import IRConfig from ir_mongodb_helper import IRCollection from ir_term_count import IRTermCount bug_id_name = IRConfig.get_instance().get('bug_id_name') term_name = IRConfig.get_instance().get('bug_term_name') summary_name = IRConfig.get_instance().get('bug_summary_name') description_name = IRConfig.get_instance().get('bug_description_name') # Calculate document count and stored in document_count document_count = {} def iter_term_count(bug): for term in bug[summary_name]: if not term in document_count: document_count[term] = { term_name: term, summary_name: 0, description_name: 0 } document_count[term][summary_name] += 1 for term in bug[description_name]: if not term in document_count: document_count[term] = { term_name: term, summary_name: 0, description_name: 0 } document_count[term][description_name] += 1 IRProgressBar.execute_iteration_for_cursor( IRTermCount.get_iterator({}), iter_term_count, "Counting Document Count") # Write to db documentcount_collection = IRCollection( 'bug_db_name', 'bug_documentcount_collection_name', 'w') def write_to_mongo(term): documentcount_collection.insert(document_count[term]) IRProgressBar.execute_iteration_for_dict(document_count, write_to_mongo, "Write to database") documentcount_collection.create_index([(bug_id_name, IRCollection.ASCENDING)]) documentcount_collection.close()
def query(cls, summary, description, top_n): from ir_term_count import IRTermCount from ir_tfidf import IRTFIDF summary_bow, description_bow = \ IRTermCount.calculate_term_count(summary, description) summary_tfidf, description_tfidf = \ IRTFIDF.calculate_tfidf_for_report_termcount(summary_bow, description_bow) similarities = \ IRTFIDF.get_top_n_similarity_over_all(summary_tfidf, description_tfidf, top_n) return similarities
def test_stemming(self): from ir_log import IRLog from ir_config import IRConfig from ir_term_count import IRTermCount IRConfig.get_instance().load('../data/test/bug_test.cfg') tests = ['discrimination', 'disgusting', 'visualization', 'configuration'] stemmers = ['porter', 'lancaster', 'snowball'] for test in tests: out = [] for stemmer in stemmers: IRConfig.get_instance().set('stemmer', stemmer) out_token = IRTermCount.do_stemming([test]) out.append(':'.join([stemmer, out_token[0]])) IRLog.get_instance().println('%s > %s' % (test, ', '.join(out)))
def test_stemming(self): from ir_log import IRLog from ir_config import IRConfig from ir_term_count import IRTermCount IRConfig.get_instance().load('../data/test/bug_test.cfg') tests = [ 'discrimination', 'disgusting', 'visualization', 'configuration' ] stemmers = ['porter', 'lancaster', 'snowball'] for test in tests: out = [] for stemmer in stemmers: IRConfig.get_instance().set('stemmer', stemmer) out_token = IRTermCount.do_stemming([test]) out.append(':'.join([stemmer, out_token[0]])) IRLog.get_instance().println('%s > %s' % (test, ', '.join(out)))
def get_summary_and_description_termcount(self): if self.__bug_id is None: if self.__summary_termcount is None or \ self.__description_termcount is None: self.__update_summary_and_description_termcount_from_text() return [self.__summary_termcount, self.__description_termcount] else: if self.__allow_cache and \ self.__summary_termcount is not None and \ self.__description_termcount is not None: return [self.__summary_termcount, self.__description_termcount] from ir_term_count import IRTermCount summary, description = \ IRTermCount.get_termcount_of_bug(self.__bug_id) if self.__allow_cache: self.__summary_termcount, self.__description_termcount = \ summary, description return summary, description
def get_summary_and_description_termcount(self): if self.__bug_id is None: if self.__summary_termcount is None or \ self.__description_termcount is None: self.__update_summary_and_description_termcount_from_text() return [self.__summary_termcount, self.__description_termcount] else: if self.__allow_cache and \ self.__summary_termcount is not None and \ self.__description_termcount is not None: return [self.__summary_termcount, self.__description_termcount] from ir_term_count import IRTermCount summary, description = \ IRTermCount.get_termcount_of_bug(self.__bug_id) if self.__allow_cache: self.__summary_termcount, self.__description_termcount = \ summary, description return summary, description
def batch_generate_tfidf(cls): """Batch calculate TFIDF.""" from ir_log import IRProgressBar from ir_config import IRConfig from ir_mongodb_helper import IRCollection from ir_document_count import IRDocumentCount from ir_term_count import IRTermCount # get config bug_id_name = IRConfig.get_instance().get('bug_id_name') summary_name = IRConfig.get_instance().get('bug_summary_name') description_name = IRConfig.get_instance().get('bug_description_name') tfidf_algorithm = IRConfig.get_instance().get('tfidf_algorithm') # prepare collections IRDocumentCount.cache_all_data() tfidf_collection = IRCollection('bug_db_name', 'bug_tfidf_collection_name', 'w') # batch calculate tfidf termcount_iterator = IRTermCount.get_iterator() bug_count = termcount_iterator.count() def iter_term_count(bug): summary_tfidf = cls.calculate_tfidf(bug[summary_name], summary_name, bug_count, None, tfidf_algorithm) description_tfidf = cls.calculate_tfidf(bug[description_name], description_name, bug_count, None, tfidf_algorithm) tfidf_collection.insert({ bug_id_name: bug[bug_id_name], summary_name: summary_tfidf, description_name: description_tfidf }) IRProgressBar.execute_iteration_for_cursor(termcount_iterator, iter_term_count, "Calculating TFIDF") tfidf_collection.create_index([(bug_id_name, IRCollection.ASCENDING)]) tfidf_collection.close()
def batch_generate_document_count(cls): """Batch calculate term count over documents. Input is from mongodb, termcount collection. """ from ir_log import IRProgressBar from ir_config import IRConfig from ir_mongodb_helper import IRCollection from ir_term_count import IRTermCount bug_id_name = IRConfig.get_instance().get('bug_id_name') term_name = IRConfig.get_instance().get('bug_term_name') summary_name = IRConfig.get_instance().get('bug_summary_name') description_name = IRConfig.get_instance().get('bug_description_name') # Calculate document count and stored in document_count document_count = {} def iter_term_count(bug): for term in bug[summary_name]: if not term in document_count: document_count[term] = {term_name:term, summary_name:0, description_name:0} document_count[term][summary_name] += 1 for term in bug[description_name]: if not term in document_count: document_count[term] = {term_name:term, summary_name:0, description_name:0} document_count[term][description_name] += 1 IRProgressBar.execute_iteration_for_cursor(IRTermCount.get_iterator({}), iter_term_count, "Counting Document Count") # Write to db documentcount_collection = IRCollection( 'bug_db_name', 'bug_documentcount_collection_name', 'w') def write_to_mongo(term): documentcount_collection.insert(document_count[term]) IRProgressBar.execute_iteration_for_dict(document_count, write_to_mongo, "Write to database") documentcount_collection.create_index([(bug_id_name, IRCollection.ASCENDING)]) documentcount_collection.close()
def test_create_incomplete_report(self): from ir_log import IRLog from ir_config import IRConfig from ir_term_count import IRTermCount IRConfig.get_instance().load('../data/test/bug_test.cfg') summary = 'This is a test of calculation for single report term count.' description = 'This is the description of the test report. Just a test.' summary_BoW, description_BoW = \ IRTermCount.calculate_term_count(summary, description) inc_summary, inc_description = \ IRTermCount.create_incomplete_report(summary, description, 0.4) inc_summary_bow, inc_description_bow = \ IRTermCount.calculate_term_count(inc_summary, inc_description) IRLog.get_instance().println('Original Summary: %s' % (summary)) IRLog.get_instance().println('Original Description: %s' % (description)) IRLog.get_instance().println('Incomplete Summary: %s' % (inc_summary)) IRLog.get_instance().println('Incomplete Description: %s' % (inc_description)) IRLog.get_instance().println('Compare original BoW with incomplete BoW') IRLog.get_instance().println('%16s\t%8s\t%8s' % ('Summary', 'Ori', 'Inc')) IRTermCount.show_dict_compare(summary_BoW, inc_summary_bow) IRLog.get_instance().println('%16s\t%8s\t%8s' % ('Description', 'Ori', 'Inc')) IRTermCount.show_dict_compare(description_BoW, inc_description_bow)
def test_cache_all(self): from ir_config import IRConfig from ir_term_count import IRTermCount IRConfig.get_instance().load('../data/test/bug_test.cfg') IRTermCount.cache_all_data()
def test_cache_all(self): from ir_config import IRConfig from ir_term_count import IRTermCount IRConfig.get_instance().load('../data/test/bug_test.cfg') IRTermCount.cache_all_data()
def start_shell(cls): """Start a shell that do recommending interactively""" from ir_log import IRLog from ir_tfidf import IRTFIDF from ir_document_count import IRDocumentCount from ir_report import IRReport IRLog.get_instance().println("Starting Intereport...") IRTFIDF.cache_all_data() IRDocumentCount.cache_all_data() IRLog.get_instance().println("Intereport Started. Waiting for input") new_report = None while 1: cmd = raw_input("Input command:").strip() if cmd == 'exit': IRLog.get_instance().println('Exiting') break elif cmd == 'new': IRLog.get_instance().println('Creating New Report') import time cur_time = -1 while cur_time < 0: try: cur_time = int(time.mktime(time.strptime( raw_input("Input Time (e.g., 2011-05-05): "), '%Y-%m-%d'))) except: cur_time = -1 product = raw_input("Input Product: ") summary = raw_input("Summary: ") raw_description = raw_input("Description:\n") new_report = IRReport.from_string(IRReport.separator.join([ str(cur_time), product.lower(), summary, raw_description, '', ''])) cls.__print_report(new_report) elif cmd == 'do': IRLog.get_instance().println('Do Recommending') if new_report is None: IRLog.get_instance().println('Error! Please create ' 'report first.') else: cls.do_recommend(new_report) elif cmd == 'ls': IRLog.get_instance().println('Show Current Report') if new_report is None: IRLog.get_instance().println('Error! Please create ' 'report first.') else: cls.__print_report(new_report) elif cmd == 'ad': IRLog.get_instance().println('Appending Description') if new_report is None: IRLog.get_instance().println('Error! Please create ' 'report first.') else: append_description = raw_input("Append Description:\n") description =' '.join([new_report.get_description_text(), append_description]) dummy_report = IRReport(new_report.get_summary_text(), description) dummy_report.set_stacktrace(new_report.get_stacktrace()) dummy_report.set_basic_info(new_report.get_create_ts(), new_report.get_product()) dummy_report.set_penalty_terms(new_report.get_penalty_terms()) dummy_report.set_dummy_bug_id(new_report.get_dummy_bug_id()) new_report = dummy_report IRLog.get_instance().println('Description: %s' % description) elif cmd == 'ap': IRLog.get_instance().println('Appending Penalties') if new_report is None: IRLog.get_instance().println('Error! Please create ' 'report first.') else: raw = [] while raw.__len__() < 1: raw = raw_input('Input Penalties (split by \',\'):').split(',') from ir_term_count import IRTermCount penalty = new_report.get_penalty_terms() if penalty is None: penalty = [] penalty += IRTermCount.do_stemming(raw) new_report.set_penalty_terms(penalty) print len(penalty), penalty IRLog.get_instance().println('Penalties: %s' % \ (', '.join(penalty))) elif cmd == 'sd': IRLog.get_instance().println('Set Dummy Bug ID') if new_report is None: IRLog.get_instance().println('Error! Please create ' 'report first.') else: bug_id = -1 while bug_id <= 0: try: bug_id = int(raw_input('Dummy Bug ID: ')) except: bug_id = -1 new_report.set_dummy_bug_id(bug_id) IRLog.get_instance().println('Dummy Bug ID: %d' % bug_id) elif cmd == 'help': cls.__show_help() else: IRLog.get_instance().println('Error! Unkown command: %s' \ % cmd) cls.__show_help() # end of while 1 IRLog.get_instance().println("Bye")
mode = sys.argv[2] new_report = None if mode == 'file': test_file = sys.argv[3] bug_id = int(sys.argv[4]) from ir_sim_bug_evaluator import IRSimBugEvaluator new_report = IRSimBugEvaluator.get_report_from_test_file(test_file, bug_id) if new_report is None: IRLog.get_instance().println('Error! Cannot find report %d in %s' % \ (bug_id, test_file)) else: if sys.argv.__len__() > 5: from ir_term_count import IRTermCount penalty_terms_raw = sys.argv[4].split(',') penalty_terms = set(IRTermCount.do_stemming(penalty_terms_raw)) IRLog.get_instance().println('%d penalty terms: %s:' \ % (penalty_terms.__len__(), ','.join(penalty_terms))) new_report.set_penalty_terms(penalty_terms) elif mode == 'text': text = sys.argv[3] new_report = IRReport.from_string(text) elif mode == 'inte': IRRecommender.start_shell() exit() else: IRLog.get_instance().println('Error! Known mode %s' % mode) from ir_tfidf import IRTFIDF from ir_document_count import IRDocumentCount IRTFIDF.cache_all_data() IRDocumentCount.cache_all_data()
def get_termcount(self): if self.__termcount is None: from ir_term_count import IRTermCount self.__termcount = \ IRTermCount.get_bow(self.get_text(), True) return self.__termcount
def start_shell(cls): """Start a shell that do recommending interactively""" from ir_log import IRLog from ir_tfidf import IRTFIDF from ir_document_count import IRDocumentCount from ir_report import IRReport IRLog.get_instance().println("Starting Intereport...") IRTFIDF.cache_all_data() IRDocumentCount.cache_all_data() IRLog.get_instance().println("Intereport Started. Waiting for input") new_report = None while 1: cmd = raw_input("Input command:").strip() if cmd == 'exit': IRLog.get_instance().println('Exiting') break elif cmd == 'new': IRLog.get_instance().println('Creating New Report') import time cur_time = -1 while cur_time < 0: try: cur_time = int( time.mktime( time.strptime( raw_input( "Input Time (e.g., 2011-05-05): "), '%Y-%m-%d'))) except: cur_time = -1 product = raw_input("Input Product: ") summary = raw_input("Summary: ") raw_description = raw_input("Description:\n") new_report = IRReport.from_string( IRReport.separator.join([ str(cur_time), product.lower(), summary, raw_description, '', '' ])) cls.__print_report(new_report) elif cmd == 'do': IRLog.get_instance().println('Do Recommending') if new_report is None: IRLog.get_instance().println('Error! Please create ' 'report first.') else: cls.do_recommend(new_report) elif cmd == 'ls': IRLog.get_instance().println('Show Current Report') if new_report is None: IRLog.get_instance().println('Error! Please create ' 'report first.') else: cls.__print_report(new_report) elif cmd == 'ad': IRLog.get_instance().println('Appending Description') if new_report is None: IRLog.get_instance().println('Error! Please create ' 'report first.') else: append_description = raw_input("Append Description:\n") description = ' '.join([ new_report.get_description_text(), append_description ]) dummy_report = IRReport(new_report.get_summary_text(), description) dummy_report.set_stacktrace(new_report.get_stacktrace()) dummy_report.set_basic_info(new_report.get_create_ts(), new_report.get_product()) dummy_report.set_penalty_terms( new_report.get_penalty_terms()) dummy_report.set_dummy_bug_id( new_report.get_dummy_bug_id()) new_report = dummy_report IRLog.get_instance().println('Description: %s' % description) elif cmd == 'ap': IRLog.get_instance().println('Appending Penalties') if new_report is None: IRLog.get_instance().println('Error! Please create ' 'report first.') else: raw = [] while raw.__len__() < 1: raw = raw_input( 'Input Penalties (split by \',\'):').split(',') from ir_term_count import IRTermCount penalty = new_report.get_penalty_terms() if penalty is None: penalty = [] penalty += IRTermCount.do_stemming(raw) new_report.set_penalty_terms(penalty) print len(penalty), penalty IRLog.get_instance().println('Penalties: %s' % \ (', '.join(penalty))) elif cmd == 'sd': IRLog.get_instance().println('Set Dummy Bug ID') if new_report is None: IRLog.get_instance().println('Error! Please create ' 'report first.') else: bug_id = -1 while bug_id <= 0: try: bug_id = int(raw_input('Dummy Bug ID: ')) except: bug_id = -1 new_report.set_dummy_bug_id(bug_id) IRLog.get_instance().println('Dummy Bug ID: %d' % bug_id) elif cmd == 'help': cls.__show_help() else: IRLog.get_instance().println('Error! Unkown command: %s' \ % cmd) cls.__show_help() # end of while 1 IRLog.get_instance().println("Bye")
new_report = None if mode == 'file': test_file = sys.argv[3] bug_id = int(sys.argv[4]) from ir_sim_bug_evaluator import IRSimBugEvaluator new_report = IRSimBugEvaluator.get_report_from_test_file( test_file, bug_id) if new_report is None: IRLog.get_instance().println('Error! Cannot find report %d in %s' % \ (bug_id, test_file)) else: if sys.argv.__len__() > 5: from ir_term_count import IRTermCount penalty_terms_raw = sys.argv[4].split(',') penalty_terms = set(IRTermCount.do_stemming(penalty_terms_raw)) IRLog.get_instance().println('%d penalty terms: %s:' \ % (penalty_terms.__len__(), ','.join(penalty_terms))) new_report.set_penalty_terms(penalty_terms) elif mode == 'text': text = sys.argv[3] new_report = IRReport.from_string(text) elif mode == 'inte': IRRecommender.start_shell() exit() else: IRLog.get_instance().println('Error! Known mode %s' % mode) from ir_tfidf import IRTFIDF from ir_document_count import IRDocumentCount IRTFIDF.cache_all_data() IRDocumentCount.cache_all_data()