Beispiel #1
0
    def test_cluster_sentences(test):
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_sentence import IRSentence

        IRLog.get_instance().start_log()
        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        bug_id = 10000
        description = 'Version: 12.43\n'\
                      'Distribution: Gnome 12.03\n'\
                      '\n'\
                      'Steps to repreduce:\n'\
                      '1. Open firefox.\n'\
                      '2. Click Option\n'\
                      '3. Open firefox\n'\
                      '\n'\
                      'Additional information:\n'\
                      'This is really crazy when it crashed.'
        sentences = IRSentence.get_sentence_from_description(
            description, bug_id)
        group_id, selected_id = IRSentence.cluster_sentences(sentences, 3)
        groups = []
        for i in range(3):
            groups.append([])
        index = 0
        for id in group_id:
            groups[id].append(index)
            index += 1
        index = 0
        for group in groups:
            IRLog.get_instance().println('Group %d. Representative: %s' % \
                    (index, sentences[selected_id[index]].get_text()))
            for id in group:
                IRLog.get_instance().println(sentences[id].get_text())
            index += 1
Beispiel #2
0
 def batch_generate_term_count(cls):
     """Generate term count for text in mongodb database,
         and store to database.
     """
     from ir_log import IRProgressBar
     from ir_text import IRText
     from ir_config import IRConfig
     from ir_mongodb_helper import IRCollection
     # config
     bug_id_name = IRConfig.get_instance().get('bug_id_name', 'bug_id')
     summary_name = IRConfig.get_instance().get('bug_summary_name', 'summ')
     description_name = IRConfig.get_instance().\
             get('bug_description_name', 'desc')
     
     termcount_collection = IRCollection(
         'bug_db_name', 'bug_termcount_collection_name', 'w')
     def iter_text(bug):
         summary_bow, description_bow = cls.calculate_term_count(
             bug[summary_name], bug[description_name])
         termcount_collection.insert({
             bug_id_name : bug[bug_id_name],
             summary_name : summary_bow,
             description_name : description_bow })
     IRProgressBar.execute_iteration_for_cursor(IRText.get_iterator({}),
                                                iter_text, "From Text to Term Count")
     termcount_collection.create_index([(bug_id_name, IRCollection.ASCENDING)])
     termcount_collection.close()
Beispiel #3
0
    def get_summary_and_description_of_bug(cls, bug_id):
        """Get summary and description from mongodb.

        Args:
            bug_id: int

        Returns:
            [str, str], [summary, description]
        """
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        if cls.__is_cache:
            if bug_id in cls.__cache_summary_description:
                return cls.__cache_summary_description[bug_id]
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        summary_name = IRConfig.get_instance().get('bug_summary_name')
        description_name = IRConfig.get_instance().get('bug_description_name')
        text_collection = IRCollection(
            'bug_db_name', 'bug_text_collection_name', 'r')
        res = text_collection.find({bug_id_name : bug_id})
        summary = ''
        description = ''
        if res.count() > 0:
            summary = res[0][summary_name]
            description = res[0][description_name]
        if cls.__is_cache:
            cls.__cache_summary_description[bug_id] = (summary, description)
        return summary, description
Beispiel #4
0
    def test_create_new_report_from_string(self):
        from nose.tools import eq_
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_report import IRReport
        from ir_term_count import IRTermCount

        IRLog.get_instance().start_log()
        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        summary_text = 'Firefox crashed'
        description_text = 'When I was openning history folder, the f**king' \
                ' Firefox just crashed!\n'
        report = IRReport(summary_text, description_text)
        report.set_basic_info(12345, 'core')
        report.set_penalty_terms(IRTermCount.do_stemming(['ie', 'explore']))
        report.set_exclude_report_ids([100100])
        report.set_dummy_bug_id(12345)
        report.set_skip_terms(IRTermCount.do_stemming(['new','please']))
        # save to text
        text = report.to_string()
        IRLog.get_instance().println('Serialized report: %s' % (text))
        # load from text
        new_report = IRReport.from_string(text)

        assert new_report.get_summary_text() == report.get_summary_text()
        eq_(new_report.get_description_text().strip(), report.get_description_text().strip())
        assert new_report.get_create_ts() == report.get_create_ts()
        assert new_report.get_product() == report.get_product()
        assert new_report.get_dummy_bug_id() == report.get_dummy_bug_id()
        assert new_report.get_penalty_terms() == report.get_penalty_terms()
        assert new_report.get_exclude_report_ids() == report.get_exclude_report_ids()
        eq_(new_report.get_skip_terms(), report.get_skip_terms())
        IRLog.get_instance().stop_log()
Beispiel #5
0
    def __assert_collection_change(self, db_cfg_name, collection_cfg_name,
                                   is_finished):
        """Tell the agent the collection will be/has been modified.

        Args:
            db_cfg_name: str, Config name of database in config file
            collection_cfg_name: str, Config name of collection_cfg_name in 
                config file. 
            is_finished: boolean, Whether the change is about finished 
                modifying. If not, it is the intention to modify.
        """
        import time
        from ir_config import IRConfig
        db_name = IRConfig.get_instance().get(db_cfg_name)
        collection_name = IRConfig.get_instance().get(collection_cfg_name)
        meta_collection = self.__get_meta_collection(db_name)
        res = self.__find_collection_in_meta(db_name, collection_name)
        if res.count() > 0:
            meta_collection.update({self.__meta_key_name: collection_name}, {
                '$set': {
                    self.__meta_lastmodified_name: int(time.time()),
                    self.__meta_success_name: is_finished
                }
            })
        else:
            meta_collection.insert({
                self.__meta_key_name:
                collection_name,
                self.__meta_lastmodified_name:
                int(time.time()),
                self.__meta_success_name:
                is_finished
            })
Beispiel #6
0
    def similarity_over_all(self):
        """Calculate similarity between bug (summary, description) over
         all.

        Returns:
            dict, {bug_id -> [score, summary_score, description_score, stacktrace_score]}
        """

        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        from ir_text import IRText
        from ir_tfidf import IRTFIDF

        logger = IRLog.get_instance()
        search_time_span = 2 * 3600 * 24 * 365

        bug_id_name = IRConfig.get_instance().get('bug_id_name')

        create_ts_name = IRConfig.get_instance().get('bug_create_ts_name')
        product_name = IRConfig.get_instance().get('bug_product_name')

        basic_collection = IRCollection('bug_db_name',
                                        'bug_basic_collection_name', 'r')

        reports2scan = basic_collection.find({
            product_name: self.get_product(),
            create_ts_name: {
                '$gt': self.get_create_ts() - search_time_span
            },
            bug_id_name: {
                '$nin': self.__exclude_report_ids
            }
        })
        result = {}
        IRLog.get_instance().println('Comparing with %d reports.' \
                % (reports2scan.count()) )

        print self.__summary_text
        print self.__description_text

        for report in reports2scan:
            bug_id = report[bug_id_name]
            if bug_id == self.get_dummy_bug_id():
                continue
            # because we don't want to load stacktrace in case of self.__stacktrace
            #    being none, we create and fill the info of report manually
            other_report = IRReport("", "")
            other_report.__summary_tfidf, other_report.__description_tfidf = \
                    IRTFIDF.get_tfidf_of_bug(bug_id)
            # if self.__stacktrace is empty, we don't need to do this
            if self.get_stacktrace() is not None and \
                    self.get_stacktrace().__len__() > 0:
                other_report.__stacktrace = IRText.get_stacktrace_of_bug(
                    bug_id)
            if other_report.__stacktrace is None:
                other_report.__stacktrace = []
            result[bug_id] = self.similarity_with(other_report)

        return result
Beispiel #7
0
    def test_parse_info_level1(self):
        #import sys
        #sys.path.append('../bin/')
        from ir_log import IRLog
        from ir_text import IRText
        from ir_config import IRConfig
        from ir_mongodb_helper import IRMongodbHelper

        IRLog.get_instance().start_log()
        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        assert None != IRConfig.get_instance()
        IRText.parse_info_level1('../data/test/info_level1_test')
        IRLog.get_instance().stop_log()

        con = IRMongodbHelper.get_instance().get_connection()
        db = con[IRConfig.get_instance().get('bug_db_name')]
        assert None != db
        col = db[IRConfig.get_instance().get('bug_text_collection_name')]
        assert None != col
        # in the test data, we have 1000 in total.
        # within, 40 have no resolution, 154 are incomplete
        assert 833 == col.count()
        assert 'gnome is full of bugs ! (100000 currently)' == \
                col.find({'bug_id':100000})[0]["summ"]
        
        res = col.find({"summ":{'$regex':'(&gt)|(&lt)|(&quot)|(&apo)s|(&amp)'}})
        assert res.count() == 0
Beispiel #8
0
    def cache_all_data(cls):
        """Load all document count into memory.
        
        """
        from ir_log import IRProgressBar
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        # config
        summary_name = IRConfig.get_instance().get('bug_summary_name')
        description_name = IRConfig.get_instance().get('bug_description_name')
        term_name = IRConfig.get_instance().get('bug_term_name')

        cls.__is_cache = True
        documentcount_collection = IRCollection(
            'bug_db_name', 'bug_documentcount_collection_name', 'r')

        def iter_document_count(term):
            summary = term[summary_name] if summary_name in term else 0
            description = term[
                description_name] if description_name in term else 0
            cls.__cache_document_count[term[term_name]] = \
                    (summary, description)

        IRProgressBar.execute_iteration_for_cursor(
            documentcount_collection.find({}), iter_document_count,
            "Caching Document Count")
Beispiel #9
0
    def test_compare_stackinfo(self):
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_mongodb_helper import IRMongodbHelper
        from ir_gnome_st_tools import IRSTTools
        from ir_text import IRText
        from random import randint
        import pymongo

        IRLog.get_instance().start_log()
        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        IRText.parse_info_level1('../data/test/stacktrace_test')

        con = IRMongodbHelper.get_instance().get_connection()
        db = con[IRConfig.get_instance().get('bug_db_name')]
        assert None != db
        col = db[IRConfig.get_instance().get('bug_text_collection_name')]
        assert None != col

        bugs = col.find()
        total = col.count()
        st1 = bugs[0]["stacktrace"]

        for i in range(total):
            st2 = bugs[i]["stacktrace"]
            result_weight = IRSTTools.compare_stackinfo(st1, st2, 'weight')
            result_max = IRSTTools.compare_stackinfo(st1, st2, 'max')
            IRLog.get_instance().println('Weight: %f, Max: %f' \
                    % (result_weight, result_max))

        IRLog.get_instance().stop_log()
Beispiel #10
0
    def cache_all_data(cls):
        """Load all data into memory."""
        from ir_log import IRProgressBar
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        # get config
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        summary_name = IRConfig.get_instance().get('bug_summary_name')
        description_name = IRConfig.get_instance().get('bug_description_name')
        stacktrace_name = IRConfig.get_instance().get('bug_stacktrace_name')
        # caching data
        cls.set_is_cache(True)
        text_collection = \
                IRCollection('bug_db_name', 'bug_text_collection_name', 'r')
        cls.__cache_summary_description = {}
        cls.__cache_stacktrace = {}

        def iter_func(bug):
            cls.__cache_summary_description[bug[bug_id_name]] = \
                    (bug[summary_name], bug[description_name])
            cls.__cache_stacktrace[bug[bug_id_name]] = bug[stacktrace_name]

        IRProgressBar.execute_iteration_for_cursor(text_collection.find(),
                                                   iter_func,
                                                   'Caching Text Data')
        text_collection.close()
Beispiel #11
0
    def test_parse_info_level1(self):
        #import sys
        #sys.path.append('../bin/')
        from ir_log import IRLog
        from ir_text import IRText
        from ir_config import IRConfig
        from ir_mongodb_helper import IRMongodbHelper

        IRLog.get_instance().start_log()
        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        assert None != IRConfig.get_instance()
        IRText.parse_info_level1('../data/test/info_level1_test')
        IRLog.get_instance().stop_log()

        con = IRMongodbHelper.get_instance().get_connection()
        db = con[IRConfig.get_instance().get('bug_db_name')]
        assert None != db
        col = db[IRConfig.get_instance().get('bug_text_collection_name')]
        assert None != col
        # in the test data, we have 1000 in total.
        # within, 40 have no resolution, 154 are incomplete
        assert 833 == col.count()
        assert 'gnome is full of bugs ! (100000 currently)' == \
                col.find({'bug_id':100000})[0]["summ"]

        res = col.find(
            {"summ": {
                '$regex': '(&gt)|(&lt)|(&quot)|(&apo)s|(&amp)'
            }})
        assert res.count() == 0
Beispiel #12
0
    def test_cluster_sentences(test):
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_sentence import IRSentence

        IRLog.get_instance().start_log()
        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        bug_id = 10000
        description = 'Version: 12.43\n'\
                      'Distribution: Gnome 12.03\n'\
                      '\n'\
                      'Steps to repreduce:\n'\
                      '1. Open firefox.\n'\
                      '2. Click Option\n'\
                      '3. Open firefox\n'\
                      '\n'\
                      'Additional information:\n'\
                      'This is really crazy when it crashed.'
        sentences = IRSentence.get_sentence_from_description(description, bug_id)
        group_id, selected_id = IRSentence.cluster_sentences(sentences, 3)
        groups = []
        for i in range(3):
            groups.append([])
        index = 0
        for id in group_id:
            groups[id].append(index)
            index += 1
        index = 0
        for group in groups:
            IRLog.get_instance().println('Group %d. Representative: %s' % \
                    (index, sentences[selected_id[index]].get_text()))
            for id in group:
                IRLog.get_instance().println(sentences[id].get_text())
            index += 1
Beispiel #13
0
    def get_termcount_of_bug(cls, bug_id):
        """Get termcount of a bug

        Args:
            bug_id: int

        Returns:
            [dict, dict], [termcount of summary, termcount of description]
        """

        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        if cls.__is_cache:
            if bug_id in cls.__cache_term_count:
                return cls.__cache_term_count[bug_id]
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        summary_name = IRConfig.get_instance().get('bug_summary_name')
        description_name = IRConfig.get_instance().get('bug_description_name')
        termcount_collection = IRCollection(
            'bug_db_name', 'bug_termcount_collection_name', 'r')
        res = termcount_collection.find({bug_id_name : bug_id})
        summary = {}
        description = {}
        if res.count() > 0:
            summary = res[0][summary_name]
            description = res[0][description_name]
        if cls.__is_cache:
            cls.__cache_term_count[bug_id] = (summary, description)
        return summary, description
Beispiel #14
0
 def test_get_stacktrace_text_of_bug(self):
     from ir_log import IRLog
     from ir_config import IRConfig
     from ir_text import IRText
     IRConfig.get_instance().load('../data/test/bug_test.cfg')
     stacktrace_text = IRText.get_stacktrace_text_of_bug(104400)
     IRLog.get_instance().println('stacktrace_text: %s' % (stacktrace_text))
Beispiel #15
0
    def test_create_incomplete_report(self):
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_term_count import IRTermCount

        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        summary = 'This is a test of calculation for single report term count.'
        description = 'This is the description of the test report. Just a test.'
        summary_BoW, description_BoW = \
                IRTermCount.calculate_term_count(summary, description)
        inc_summary, inc_description = \
                IRTermCount.create_incomplete_report(summary, description, 0.4)
        inc_summary_bow, inc_description_bow = \
                IRTermCount.calculate_term_count(inc_summary, inc_description)
        IRLog.get_instance().println('Original Summary: %s' % (summary))
        IRLog.get_instance().println('Original Description: %s' %
                                     (description))
        IRLog.get_instance().println('Incomplete Summary: %s' % (inc_summary))
        IRLog.get_instance().println('Incomplete Description: %s' %
                                     (inc_description))
        IRLog.get_instance().println(
            'Compare original BoW with incomplete BoW')
        IRLog.get_instance().println('%16s\t%8s\t%8s' %
                                     ('Summary', 'Ori', 'Inc'))
        IRTermCount.show_dict_compare(summary_BoW, inc_summary_bow)
        IRLog.get_instance().println('%16s\t%8s\t%8s' %
                                     ('Description', 'Ori', 'Inc'))
        IRTermCount.show_dict_compare(description_BoW, inc_description_bow)
Beispiel #16
0
    def get_tfidf_of_bug(cls, bug_id):
        """Get tfidf of a bug.

        Args:
            bug_id: int

        Returns:
            [dict, dict], [TFIDF of summary, TFIDF of description]
        """

        if cls.__is_cache:
            if bug_id in cls.__cache:
                return cls.__cache[bug_id]
        # load from db
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        summary_name = IRConfig.get_instance().get('bug_summary_name')
        description_name = IRConfig.get_instance().get('bug_description_name')
        tfidf_collection = IRCollection('bug_db_name',
                                        'bug_tfidf_collection_name', 'r')
        find_result = tfidf_collection.find({bug_id_name: bug_id})
        summary = {}
        description = {}
        if find_result.count() > 0:
            summary = find_result[0][summary_name]
            description = find_result[0][description_name]
        if cls.__is_cache:
            cls.__cache[bug_id] = (summary, description)
        return summary, description
Beispiel #17
0
    def get_summary_and_description_of_bug(cls, bug_id):
        """Get summary and description from mongodb.

        Args:
            bug_id: int

        Returns:
            [str, str], [summary, description]
        """
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        if cls.__is_cache:
            if bug_id in cls.__cache_summary_description:
                return cls.__cache_summary_description[bug_id]
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        summary_name = IRConfig.get_instance().get('bug_summary_name')
        description_name = IRConfig.get_instance().get('bug_description_name')
        text_collection = IRCollection('bug_db_name',
                                       'bug_text_collection_name', 'r')
        res = text_collection.find({bug_id_name: bug_id})
        summary = ''
        description = ''
        if res.count() > 0:
            summary = res[0][summary_name]
            description = res[0][description_name]
        if cls.__is_cache:
            cls.__cache_summary_description[bug_id] = (summary, description)
        return summary, description
    def test_get_report_difference(self):
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_report import IRReport
        from ir_recommender import IRRecommender

        IRConfig.get_instance().load('../data/test/bug_test.cfg')

        new_report = IRReport('apple for summary', 'linux description')
        sim_report = IRReport('apple of ghost crashed', 'description linux wow')

        (diff_sum, diff_desc) = \
                IRRecommender.get_report_difference(new_report, sim_report)
        IRLog.get_instance().println('New summary: %s' \
                % (new_report.get_summary_text()))
        IRLog.get_instance().println('Sim summary: %s' \
                % (sim_report.get_summary_text()))
        IRLog.get_instance().println('New description: %s' \
                % (new_report.get_description_text()))
        IRLog.get_instance().println('Sim description: %s' \
                % (sim_report.get_description_text()))
        IRLog.get_instance().println('Diff of summary: %s' % (diff_sum))
        IRLog.get_instance().println('Diff of description: %s' % (diff_desc))
        assert diff_sum == {'ghost', 'crash'}
        assert diff_desc == {'wow'}
    def test_filter(self):

        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_mongodb_helper import IRMongodbHelper
        from ir_gnome_st_tools import IRSTTools
        from ir_text import IRText
        import pymongo

        IRLog.get_instance().start_log()
        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        IRText.parse_info_level1('../data/test/info_level1_test')
        
        con = IRMongodbHelper.get_instance().get_connection()
        db = con[IRConfig.get_instance().get('bug_db_name')]
        assert None != db
        col = db[IRConfig.get_instance().get('bug_text_collection_name')]
        assert None != col
        # Maybe a bug here:
        # The test of filter (originally) depends on parse_info_level1
        # But parse_info_level1 seems to invoke filter...
        for bug in col.find():
            # TODO: it's not correct. no stacktrace in desc
            desc, stack = IRSTTools.filter(bug["desc"])      


        IRLog.get_instance().stop_log()
    def test_compare_stackinfo(self):
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_mongodb_helper import IRMongodbHelper
        from ir_gnome_st_tools import IRSTTools
        from ir_text import IRText
        from random import randint
        import pymongo

        IRLog.get_instance().start_log()
        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        IRText.parse_info_level1('../data/test/stacktrace_test')
        
        con = IRMongodbHelper.get_instance().get_connection()
        db = con[IRConfig.get_instance().get('bug_db_name')]
        assert None != db
        col = db[IRConfig.get_instance().get('bug_text_collection_name')]
        assert None != col

        bugs = col.find()
        total = col.count()
        st1 = bugs[0]["stacktrace"]

        for i in range(total):
            st2 = bugs[i]["stacktrace"]
            result_weight = IRSTTools.compare_stackinfo(st1, st2, 'weight')
            result_max = IRSTTools.compare_stackinfo(st1, st2, 'max')
            IRLog.get_instance().println('Weight: %f, Max: %f' \
                    % (result_weight, result_max))

        IRLog.get_instance().stop_log()
Beispiel #21
0
    def get_stacktrace_of_bug(cls, bug_id):
        """Get stacktrace from mongodb.

        Args:
            bug_id: int

        Returns:
            [[str]], [[signature]]
        """
        if cls.__is_cache:
            if bug_id in cls.__cache_stacktrace:
                return cls.__cache_stacktrace[bug_id]
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        stacktrace_name = IRConfig.get_instance().get('bug_stacktrace_name')
        text_collection = IRCollection('bug_db_name',
                                       'bug_text_collection_name', 'r')
        res = text_collection.find({bug_id_name: bug_id})
        stacktrace = []
        if res.count() > 0:
            stacktrace = res[0][stacktrace_name]
        if cls.__is_cache:
            cls.__cache_stacktrace[bug_id] = stacktrace
        return stacktrace
Beispiel #22
0
    def calculate_tfidf_for_report_termcount(cls,
                                             summary_termcount,
                                             description_termcount):
        """Calculate TFIDF for single report.
        
        Args:
            summary_termcount: dict, {term -> termcount}
            description_termcount: dict, {term -> termcount}

        Returns:
            [dict, dict], [tfidf of summary, tfidf of description]
        """
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        documentcount_collection = IRCollection(
            'bug_db_name', 'bug_documentcount_collection_name', 'r')
        total_document = cls.get_total_report_number()
        summary_tfidf = cls.calculate_tfidf(
            summary_termcount,
            IRConfig.get_instance().get('bug_summary_name'),
            total_document,
            documentcount_collection)
        description_tfidf = cls.calculate_tfidf(
            description_termcount,
            IRConfig.get_instance().get('bug_description_name'),
            total_document,
            documentcount_collection)
        return summary_tfidf, description_tfidf
Beispiel #23
0
    def test_create_new_report_from_string(self):
        from nose.tools import eq_
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_report import IRReport
        from ir_term_count import IRTermCount

        IRLog.get_instance().start_log()
        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        summary_text = 'Firefox crashed'
        description_text = 'When I was openning history folder, the f**king' \
                ' Firefox just crashed!\n'
        report = IRReport(summary_text, description_text)
        report.set_basic_info(12345, 'core')
        report.set_penalty_terms(IRTermCount.do_stemming(['ie', 'explore']))
        report.set_exclude_report_ids([100100])
        report.set_dummy_bug_id(12345)
        report.set_skip_terms(IRTermCount.do_stemming(['new', 'please']))
        # save to text
        text = report.to_string()
        IRLog.get_instance().println('Serialized report: %s' % (text))
        # load from text
        new_report = IRReport.from_string(text)

        assert new_report.get_summary_text() == report.get_summary_text()
        eq_(new_report.get_description_text().strip(),
            report.get_description_text().strip())
        assert new_report.get_create_ts() == report.get_create_ts()
        assert new_report.get_product() == report.get_product()
        assert new_report.get_dummy_bug_id() == report.get_dummy_bug_id()
        assert new_report.get_penalty_terms() == report.get_penalty_terms()
        assert new_report.get_exclude_report_ids(
        ) == report.get_exclude_report_ids()
        eq_(new_report.get_skip_terms(), report.get_skip_terms())
        IRLog.get_instance().stop_log()
Beispiel #24
0
 def test_get_stacktrace_text_of_bug(self):
     from ir_log import IRLog
     from ir_config import IRConfig
     from ir_text import IRText
     IRConfig.get_instance().load('../data/test/bug_test.cfg')
     stacktrace_text = IRText.get_stacktrace_text_of_bug(104400)
     IRLog.get_instance().println('stacktrace_text: %s' % (stacktrace_text))
    def test_get_collection_status(self):
        
        from ir_config import IRConfig
        from ir_mongodb_helper import IRMongodbHelper
        dbhelper = IRMongodbHelper.get_instance()


        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        collection = dbhelper.get_collection(
            'bug_db_name',
            'bug_mongodb_helper_collection_name',
                True)
        ts, success = dbhelper.get_collection_status(
            'bug_db_name',
            'bug_mongodb_helper_collection_name')
        assert success == False

        db_name = IRConfig.get_instance().get('bug_db_name')
        collection_name = IRConfig.get_instance(). \
                get('bug_mongodb_helper_collection_name')
        dbhelper.update_meta( db_name, collection_name, True)
        ts, success = dbhelper.get_collection_status(
            'bug_db_name',
            'bug_mongodb_helper_collection_name')
        assert success == True
Beispiel #26
0
    def batch_generate_tfidf(cls):
        """Batch calculate TFIDF."""

        from ir_log import IRProgressBar
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        from ir_document_count import IRDocumentCount
        from ir_term_count import IRTermCount
        # get config
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        summary_name = IRConfig.get_instance().get('bug_summary_name')
        description_name = IRConfig.get_instance().get('bug_description_name')
        tfidf_algorithm = IRConfig.get_instance().get('tfidf_algorithm')
        # prepare collections
        IRDocumentCount.cache_all_data()
        tfidf_collection = IRCollection(
            'bug_db_name', 'bug_tfidf_collection_name', 'w')
        # batch calculate tfidf
        termcount_iterator = IRTermCount.get_iterator()
        bug_count = termcount_iterator.count()
        def iter_term_count(bug):
            summary_tfidf = cls.calculate_tfidf(bug[summary_name],
                                                summary_name, bug_count, None, tfidf_algorithm)
            description_tfidf = cls.calculate_tfidf(bug[description_name],
                                                    description_name, bug_count, None, tfidf_algorithm)
            tfidf_collection.insert({bug_id_name : bug[bug_id_name],
                                     summary_name : summary_tfidf,
                                     description_name : description_tfidf})
        IRProgressBar.execute_iteration_for_cursor(termcount_iterator,
                                                   iter_term_count, "Calculating TFIDF")
        tfidf_collection.create_index([(bug_id_name, IRCollection.ASCENDING)])
        tfidf_collection.close()
Beispiel #27
0
    def calculate_tfidf_for_report_termcount(cls, summary_termcount,
                                             description_termcount):
        """Calculate TFIDF for single report.
        
        Args:
            summary_termcount: dict, {term -> termcount}
            description_termcount: dict, {term -> termcount}

        Returns:
            [dict, dict], [tfidf of summary, tfidf of description]
        """
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        documentcount_collection = IRCollection(
            'bug_db_name', 'bug_documentcount_collection_name', 'r')
        total_document = cls.get_total_report_number()
        summary_tfidf = cls.calculate_tfidf(
            summary_termcount,
            IRConfig.get_instance().get('bug_summary_name'), total_document,
            documentcount_collection)
        description_tfidf = cls.calculate_tfidf(
            description_termcount,
            IRConfig.get_instance().get('bug_description_name'),
            total_document, documentcount_collection)
        return summary_tfidf, description_tfidf
Beispiel #28
0
    def get_tfidf_of_bug(cls, bug_id):
        """Get tfidf of a bug.

        Args:
            bug_id: int

        Returns:
            [dict, dict], [TFIDF of summary, TFIDF of description]
        """

        if cls.__is_cache:
            if bug_id in cls.__cache:
                return cls.__cache[bug_id]
        # load from db
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        summary_name = IRConfig.get_instance().get('bug_summary_name')
        description_name = IRConfig.get_instance().get('bug_description_name')
        tfidf_collection = IRCollection(
            'bug_db_name', 'bug_tfidf_collection_name', 'r')
        find_result = tfidf_collection.find({bug_id_name : bug_id})
        summary = {}
        description = {}
        if find_result.count() > 0:
            summary = find_result[0][summary_name]
            description = find_result[0][description_name]
        if cls.__is_cache:
            cls.__cache[bug_id] = (summary, description)
        return summary, description
Beispiel #29
0
    def test_filter(self):

        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_mongodb_helper import IRMongodbHelper
        from ir_gnome_st_tools import IRSTTools
        from ir_text import IRText
        import pymongo

        IRLog.get_instance().start_log()
        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        IRText.parse_info_level1('../data/test/info_level1_test')

        con = IRMongodbHelper.get_instance().get_connection()
        db = con[IRConfig.get_instance().get('bug_db_name')]
        assert None != db
        col = db[IRConfig.get_instance().get('bug_text_collection_name')]
        assert None != col
        # Maybe a bug here:
        # The test of filter (originally) depends on parse_info_level1
        # But parse_info_level1 seems to invoke filter...
        for bug in col.find():
            # TODO: it's not correct. no stacktrace in desc
            desc, stack = IRSTTools.filter(bug["desc"])

        IRLog.get_instance().stop_log()
Beispiel #30
0
    def get_stacktrace_of_bug(cls, bug_id):
        """Get stacktrace from mongodb.

        Args:
            bug_id: int

        Returns:
            [[str]], [[signature]]
        """
        if cls.__is_cache:
            if bug_id in cls.__cache_stacktrace:
                return cls.__cache_stacktrace[bug_id]
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        stacktrace_name = IRConfig.get_instance().get('bug_stacktrace_name')
        text_collection = IRCollection(
            'bug_db_name', 'bug_text_collection_name', 'r')
        res = text_collection.find({bug_id_name : bug_id})
        stacktrace = []
        if res.count() > 0:
            stacktrace = res[0][stacktrace_name]
        if cls.__is_cache:
            cls.__cache_stacktrace[bug_id] = stacktrace
        return stacktrace
    def test_generate_and_test_complete_test_file(self):
        from ir_config import IRConfig
        from ir_sim_bug_evaluator import IRSimBugEvaluator

        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        evl = IRSimBugEvaluator()
        evl.generate_test_file('complete_test_file', 0.0)
        evl.do_test_over_file('complete_test_file')
Beispiel #32
0
    def test_cache_all_data(self):
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_document_count import IRDocumentCount

        IRLog.get_instance().start_log()
        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        IRDocumentCount.cache_all_data()
Beispiel #33
0
    def test_generate_and_test_complete_test_file(self):
        from ir_config import IRConfig
        from ir_sim_bug_evaluator import IRSimBugEvaluator

        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        evl = IRSimBugEvaluator()
        evl.generate_test_file('complete_test_file', 0.0)
        evl.do_test_over_file('complete_test_file')
Beispiel #34
0
    def test_get_summary_and_description_of_bug(self):

        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_text import IRText
        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        summary, description = IRText.get_summary_and_description_of_bug(100000)
        IRLog.get_instance().println('summary: %s' % (summary))
        IRLog.get_instance().println('description: %s' % (description))
Beispiel #35
0
    def test_get_squared_length(self):
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_tfidf import IRTFIDF

        IRLog.get_instance().start_log()
        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        summary = {'firefox':0.4, 'chrome':0.6}
        assert abs(IRTFIDF.get_squared_length(summary) - 0.52 ) < 0.00001
Beispiel #36
0
    def test_cache_all_data(self):
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_tfidf import IRTFIDF

        IRLog.get_instance().start_log()
        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        IRTFIDF.cache_all_data()
        IRLog.get_instance().stop_log()
Beispiel #37
0
 def __weighted_scoring(self, summary_similarity, description_similarity,
                        stacktrace_similarity):
     from ir_config import IRConfig
     summary_ratio = IRConfig.get_instance().get_float('bug_summary_ratio')
     description_ratio = IRConfig.get_instance().get_float('bug_description_ratio')
     stacktrace_ratio = IRConfig.get_instance().get_float('bug_stacktrace_ratio')
     return summary_similarity * summary_ratio + \
             description_similarity * description_ratio + \
             stacktrace_similarity * stacktrace_ratio
Beispiel #38
0
    def get_documentcount(cls,
                          term,
                          field=None,
                          documentcount_collection=None):
        """Get documentcount of a term.

        Args:
            term, str

        Returns:
            if field == None: (int, int), (summary document count, description document count)
            else: int, the document count of corresponding field
        """

        if cls.__is_cache and term in cls.__cache_document_count:
            if field is None:
                return cls.__cache_document_count[term]
            else:
                from ir_config import IRConfig
                summary_name = IRConfig.get_instance().get('bug_summary_name')
                description_name = IRConfig.get_instance().get(
                    'bug_description_name')
                if field == summary_name:
                    return cls.__cache_document_count[term][0]
                elif field == description_name:
                    return cls.__cache_document_count[term][1]
                else:
                    return 0
        # load from db
        from ir_mongodb_helper import IRCollection
        from ir_config import IRConfig
        term_name = IRConfig.get_instance().get('bug_term_name')
        summary_name = IRConfig.get_instance().get('bug_summary_name')
        description_name = IRConfig.get_instance().get('bug_description_name')
        if documentcount_collection is None:
            documentcount_collection = IRCollection(
                'bug_db_name', 'bug_documentcount_collection_name', 'r')
        res = documentcount_collection.find({term_name: term})
        summary = 0
        description = 0
        if res.count() > 0:
            if summary_name in res[0]:
                summary = res[0][summary_name]
            if description_name in res[0]:
                description = res[0][description_name]
        if cls.__is_cache:
            cls.__cache_document_count[term] = (summary, description)
        # return value
        if field is None:
            return summary, description
        elif field == summary_name:
            return summary
        elif field == description_name:
            return description
        else:
            return 0
Beispiel #39
0
    def test_generate_document_count(self):
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_document_count import IRDocumentCount

        IRLog.get_instance().start_log()
        IRConfig.get_instance().load('../data/test/bug_test.cfg')

        IRDocumentCount.batch_generate_document_count()
        IRLog.get_instance().stop_log()
Beispiel #40
0
 def get_connection(self):
     """Get the connection, using db_host and db_port set in config file."""
     if self.__connection is None:
         import pymongo
         from ir_config import IRConfig
         self.__connection = pymongo.Connection(
             IRConfig.get_instance().get('db_host', self.__default_host),
             IRConfig.get_instance().get_int('db_port',
                                             self.__default_port))
     return self.__connection
Beispiel #41
0
    def test_get_summary_and_description_of_bug(self):

        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_text import IRText
        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        summary, description = IRText.get_summary_and_description_of_bug(
            100000)
        IRLog.get_instance().println('summary: %s' % (summary))
        IRLog.get_instance().println('description: %s' % (description))
Beispiel #42
0
    def __store_to_mongodb(cls, bug2group, group2bug):
        """Store duplicate group information into Mongodb.
        
        Args:
            bug2group: dict, {bug_id -> group_id}
            group2bug: dict, {group_id -> [bug_id]}
        """

        from ir_log import IRProgressBar
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection

        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        bug_group_name = IRConfig.get_instance().get('bug_group_name')
        duplicate_collection = IRCollection('bug_db_name',
                                            'bug_duplicate_collection_name',
                                            'w')

        def iter_bug_group(bug):
            duplicate_collection.insert({
                bug_id_name: bug,
                bug_group_name: bug2group[bug]
            })

        IRProgressBar.execute_iteration_for_dict(bug2group, iter_bug_group,
                                                 "Store to db")
        duplicate_collection.create_index([(bug_id_name,
                                            IRCollection.ASCENDING)])
        duplicate_collection.create_index([(bug_group_name,
                                            IRCollection.ASCENDING)])
        duplicate_collection.close()

        # duplicate group size collection
        group_name = IRConfig.get_instance().get('bug_group_name')
        group_size_name = IRConfig.get_instance().get('bug_group_size')
        duplicate_group_count_collection = IRCollection(
            'bug_db_name', 'bug_duplicate_group_count_collection_name', 'w')
        line_num = 0
        for group, bugs in group2bug.items():
            line_num += 1

        def iter_group_bug(group):
            duplicate_group_count_collection.insert({
                group_name:
                group,
                group_size_name:
                group2bug[group].__len__()
            })

        IRProgressBar.execute_iteration_for_dict(group2bug, iter_group_bug,
                                                 'Store Index')
        duplicate_group_count_collection.create_index([
            (group_name, IRCollection.ASCENDING)
        ])
        duplicate_group_count_collection.close()
Beispiel #43
0
    def calculate_tfidf(cls,
                        termcount,
                        field_name,
                        document_num,
                        documentcount_collection=None,
                        algorithm=None):
        """Calculate TFIDF for a BoW.

        Args:
            termcount: dict, {term -> count}
            field_name: str, 'summary' or 'description', in order to get 
                document count
            document_num: int, Total number of documents
            algorithm: str, 'tfidf' for term-frequency and normalized tfidf.
                            'bidf' for 0-1 counting without normalized
                            if None, fetch config from file
        Returns:
            dict, {term -> tfidf}
        """
        from math import sqrt
        from ir_document_count import IRDocumentCount

        #total_termcount = cls.__get_total_number_of_terms(termcount)
        # calcualte raw tfidf
        if algorithm is None:
            from ir_config import IRConfig
            algorithm = IRConfig.get_instance().get('tfidf_algorithm')

        tfidfs = {}
        length_2 = 0
        #total_termcount = cls.__get_total_number_of_terms(termcount)
        # calculate raw tfidf
        if algorithm is None:
            from ir_config import IRConfig
            algorithm = IRConfig.get_instance().get('tfidf_algorithm')
        for term, count in termcount.items():
            documentcount = IRDocumentCount.get_documentcount(
                term, field_name, documentcount_collection)
            idf = cls.get_idf(documentcount)
            # Warning: there're two types of tf: term count or term frequency
            #   We need to compare their performance
            # If we normalize the vector, we just use occurrence of term
            if algorithm == 'tfidf':
                tfidf = float(count) * idf  #/ total_termcount
                length_2 += tfidf**2
            elif algorithm == 'bidf':
                tfidf = (1 if count > 0 else 0) * idf
            tfidfs[term] = tfidf
        # normalize raw tfidf
        if algorithm == 'tfidf':
            length = sqrt(length_2)
            if abs(length) > 0.0001:
                for term in tfidfs:
                    tfidfs[term] /= length
        return tfidfs
Beispiel #44
0
 def __weighted_scoring(self, summary_similarity, description_similarity,
                        stacktrace_similarity):
     from ir_config import IRConfig
     summary_ratio = IRConfig.get_instance().get_float('bug_summary_ratio')
     description_ratio = IRConfig.get_instance().get_float(
         'bug_description_ratio')
     stacktrace_ratio = IRConfig.get_instance().get_float(
         'bug_stacktrace_ratio')
     return summary_similarity * summary_ratio + \
             description_similarity * description_ratio + \
             stacktrace_similarity * stacktrace_ratio
Beispiel #45
0
    def similarity_over_all(self):
        """Calculate similarity between bug (summary, description) over
         all.

        Returns:
            dict, {bug_id -> [score, summary_score, description_score, stacktrace_score]}
        """

        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        from ir_text import IRText
        from ir_tfidf import IRTFIDF

        logger = IRLog.get_instance()
        search_time_span = 2 * 3600 * 24 * 365
        
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        
        create_ts_name = IRConfig.get_instance().get('bug_create_ts_name')
        product_name = IRConfig.get_instance().get('bug_product_name')

        basic_collection = IRCollection(
            'bug_db_name', 'bug_basic_collection_name', 'r')
        
        reports2scan = basic_collection.find({
            product_name : self.get_product(),
            create_ts_name : {'$gt' : self.get_create_ts() - search_time_span},
            bug_id_name : {'$nin' : self.__exclude_report_ids} })
        result = {}
        IRLog.get_instance().println('Comparing with %d reports.' \
                % (reports2scan.count()) )
        
        print self.__summary_text
        print self.__description_text

        for report in reports2scan:
            bug_id = report[bug_id_name]
            if bug_id == self.get_dummy_bug_id():
                continue
            # because we don't want to load stacktrace in case of self.__stacktrace 
            #    being none, we create and fill the info of report manually
            other_report = IRReport("", "")
            other_report.__summary_tfidf, other_report.__description_tfidf = \
                    IRTFIDF.get_tfidf_of_bug(bug_id)
            # if self.__stacktrace is empty, we don't need to do this
            if self.get_stacktrace() is not None and \
                    self.get_stacktrace().__len__() > 0:
                other_report.__stacktrace = IRText.get_stacktrace_of_bug(bug_id)
            if other_report.__stacktrace is None:
                other_report.__stacktrace = []
            result[bug_id] = self.similarity_with(other_report)

        return result
Beispiel #46
0
    def test_tokenization(self):
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_term_count import IRTermCount
        from nose.tools import assert_equals

        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        tests = ['mouse-down', 'set_background_color()']
        expects = [['mouse-down'], ['set_background_color']]
        for index, test in enumerate(tests):
            assert_equals(expects[index], IRTermCount.do_tokenization(test))
Beispiel #47
0
    def test_batch_generate_tfidf(self):
        #import sys
        #sys.path.append('../bin/')
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_tfidf import IRTFIDF

        IRLog.get_instance().start_log()
        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        IRTFIDF.batch_generate_tfidf()
        IRLog.get_instance().stop_log()
    def test_get_duplicate_group_information(self):
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_duplicate_group import IRDuplicateGroup

        IRLog.get_instance().start_log()
        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        duplicate_group = IRDuplicateGroup()
        group_ids = duplicate_group.get_duplicate_group_information(3,10)
        IRLog.get_instance().println('Groups with size between %d, %d: %s' \
                % (0, 100, ' '.join([str(group_id) for group_id in group_ids])))
        IRLog.get_instance().stop_log()
Beispiel #49
0
    def test_top_n_similarity_over_all(self):
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_report import IRReport

        IRLog.get_instance().start_log()
        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        report = IRReport(100000)
        bugs_similarities = report.top_n_similarity_over_all(10)
        IRLog.get_instance().println('Bugs with top similarities with bug %d: %s' \
                % (100000, str(bugs_similarities)))
        IRLog.get_instance().stop_log()
    def test_get_term_by_simple_entropy(self):

        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_report import IRReport
        from ir_recommender import IRRecommender

        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        diff = [(set([]), {'a', 'b', 'c', 'd'}),
                (set([]), {'a', 'b', 'c'}),
                (set([]), {'a', 'b'}),
                (set([]), {'a'})]
    def test_get_bugs_in_group(self):
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_duplicate_group import IRDuplicateGroup

        IRLog.get_instance().start_log()
        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        duplicate_group = IRDuplicateGroup()
        bug_ids = duplicate_group.get_bugs_in_group(1)
        IRLog.get_instance().println('Group %d has bugs: ' % (1) + \
                ' '.join([str(bug_id) for bug_id in bug_ids]))
        IRLog.get_instance().stop_log()
Beispiel #52
0
    def get_documentcount(cls, term, field = None, documentcount_collection = None):
        """Get documentcount of a term.

        Args:
            term, str

        Returns:
            if field == None: (int, int), (summary document count, description document count)
            else: int, the document count of corresponding field
        """

        if cls.__is_cache and term in cls.__cache_document_count:
            if field is None:
                return cls.__cache_document_count[term]
            else:
                from ir_config import IRConfig
                summary_name = IRConfig.get_instance().get('bug_summary_name')
                description_name = IRConfig.get_instance().get('bug_description_name')
                if field == summary_name:
                    return cls.__cache_document_count[term][0]
                elif field == description_name:
                    return cls.__cache_document_count[term][1]
                else:
                    return 0
        # load from db
        from ir_mongodb_helper import IRCollection
        from ir_config import IRConfig
        term_name = IRConfig.get_instance().get('bug_term_name')
        summary_name = IRConfig.get_instance().get('bug_summary_name')
        description_name = IRConfig.get_instance().get('bug_description_name')
        if documentcount_collection is None:
            documentcount_collection = IRCollection(
                'bug_db_name', 'bug_documentcount_collection_name', 'r')
        res = documentcount_collection.find({term_name : term})
        summary = 0
        description = 0
        if res.count() > 0:
            if summary_name in res[0]:
                summary = res[0][summary_name]
            if description_name in res[0]:
                description = res[0][description_name]
        if cls.__is_cache:
           cls.__cache_document_count[term] = (summary, description)
        # return value
        if field is None:
            return summary, description
        elif field == summary_name:
            return summary
        elif field == description_name:
            return description
        else:
            return 0
Beispiel #53
0
    def test_get_tfidf_of_bug(self):
        #import sys
        #sys.path.append('../bin/')
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_tfidf import IRTFIDF

        IRLog.get_instance().start_log()
        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        summary, description = IRTFIDF.get_tfidf_of_bug(100000)
        IRLog.get_instance().println('Summary tfidf: %s' % (str(summary)))
        IRLog.get_instance().println('Description tfidf: %s' % (str(description)))
        IRLog.get_instance().stop_log()
Beispiel #54
0
    def test_get_termcount_of_bug(self):
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_term_count import IRTermCount

        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        summary, description = IRTermCount.get_termcount_of_bug(100000)
        assert None != summary
        assert None != description
        IRLog.get_instance().println('Summary')
        IRTermCount.show_dict_compare(summary, {})
        IRLog.get_instance().println('Description')
        IRTermCount.show_dict_compare(description, {})