Esempio n. 1
0
    def similarity_over_all(self):
        """Calculate similarity between bug (summary, description) over
         all.

        Returns:
            dict, {bug_id -> [score, summary_score, description_score, stacktrace_score]}
        """

        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        from ir_text import IRText
        from ir_tfidf import IRTFIDF

        logger = IRLog.get_instance()
        search_time_span = 2 * 3600 * 24 * 365

        bug_id_name = IRConfig.get_instance().get('bug_id_name')

        create_ts_name = IRConfig.get_instance().get('bug_create_ts_name')
        product_name = IRConfig.get_instance().get('bug_product_name')

        basic_collection = IRCollection('bug_db_name',
                                        'bug_basic_collection_name', 'r')

        reports2scan = basic_collection.find({
            product_name: self.get_product(),
            create_ts_name: {
                '$gt': self.get_create_ts() - search_time_span
            },
            bug_id_name: {
                '$nin': self.__exclude_report_ids
            }
        })
        result = {}
        IRLog.get_instance().println('Comparing with %d reports.' \
                % (reports2scan.count()) )

        print self.__summary_text
        print self.__description_text

        for report in reports2scan:
            bug_id = report[bug_id_name]
            if bug_id == self.get_dummy_bug_id():
                continue
            # because we don't want to load stacktrace in case of self.__stacktrace
            #    being none, we create and fill the info of report manually
            other_report = IRReport("", "")
            other_report.__summary_tfidf, other_report.__description_tfidf = \
                    IRTFIDF.get_tfidf_of_bug(bug_id)
            # if self.__stacktrace is empty, we don't need to do this
            if self.get_stacktrace() is not None and \
                    self.get_stacktrace().__len__() > 0:
                other_report.__stacktrace = IRText.get_stacktrace_of_bug(
                    bug_id)
            if other_report.__stacktrace is None:
                other_report.__stacktrace = []
            result[bug_id] = self.similarity_with(other_report)

        return result
Esempio n. 2
0
    def __generate_single_bug(self, bug_id, drop_rate):
        """Generate an incomplete bug report text.
        
        Args:
            bug_id: int, original bug id.
            drop_rate: float, 0.0 for not drop, 1.0 for totally drop.
        
        Returns:
            IRReport
        """
        from ir_text import IRText
        from ir_term_count import IRTermCount
        from ir_report import IRReport

        # get description and summary
        summary, description = IRText.get_summary_and_description_of_bug(bug_id)
        create_ts, product = IRText.get_basic_info_of_bug(bug_id)
        if drop_rate > 0.001:
            summary, description = \
                IRTermCount.create_incomplete_report(summary, description, drop_rate)
            print description
        new_report = IRReport(summary, description)
        new_report.set_stacktrace(IRText.get_stacktrace_of_bug(bug_id))
        new_report.set_dummy_bug_id(bug_id)
        new_report.set_basic_info(create_ts, product)
        return new_report
Esempio n. 3
0
    def __generate_single_bug(self, bug_id, drop_rate):
        """Generate an incomplete bug report text.
        
        Args:
            bug_id: int, original bug id.
            drop_rate: float, 0.0 for not drop, 1.0 for totally drop.
        
        Returns:
            IRReport
        """
        from ir_text import IRText
        from ir_term_count import IRTermCount
        from ir_report import IRReport

        # get description and summary
        summary, description = IRText.get_summary_and_description_of_bug(
            bug_id)
        create_ts, product = IRText.get_basic_info_of_bug(bug_id)
        if drop_rate > 0.001:
            summary, description = \
                IRTermCount.create_incomplete_report(summary, description, drop_rate)
            print description
        new_report = IRReport(summary, description)
        new_report.set_stacktrace(IRText.get_stacktrace_of_bug(bug_id))
        new_report.set_dummy_bug_id(bug_id)
        new_report.set_basic_info(create_ts, product)
        return new_report
Esempio n. 4
0
 def get_stacktrace(self):
     if self.__bug_id is None:
         return self.__stacktrace
     else:
         if self.__allow_cache and self.__stacktrace is not None:
             return self.__stacktrace
         from ir_text import IRText
         stack = IRText.get_stacktrace_of_bug(self.__bug_id)
         if self.__allow_cache:
             self.__stacktrace = stack
         return stack
Esempio n. 5
0
    def similarity_over_all(self):
        """Calculate similarity between bug (summary, description) over
         all.

        Returns:
            dict, {bug_id -> [score, summary_score, description_score, stacktrace_score]}
        """

        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        from ir_text import IRText
        from ir_tfidf import IRTFIDF

        logger = IRLog.get_instance()
        search_time_span = 2 * 3600 * 24 * 365
        
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        
        create_ts_name = IRConfig.get_instance().get('bug_create_ts_name')
        product_name = IRConfig.get_instance().get('bug_product_name')

        basic_collection = IRCollection(
            'bug_db_name', 'bug_basic_collection_name', 'r')
        
        reports2scan = basic_collection.find({
            product_name : self.get_product(),
            create_ts_name : {'$gt' : self.get_create_ts() - search_time_span},
            bug_id_name : {'$nin' : self.__exclude_report_ids} })
        result = {}
        IRLog.get_instance().println('Comparing with %d reports.' \
                % (reports2scan.count()) )
        
        print self.__summary_text
        print self.__description_text

        for report in reports2scan:
            bug_id = report[bug_id_name]
            if bug_id == self.get_dummy_bug_id():
                continue
            # because we don't want to load stacktrace in case of self.__stacktrace 
            #    being none, we create and fill the info of report manually
            other_report = IRReport("", "")
            other_report.__summary_tfidf, other_report.__description_tfidf = \
                    IRTFIDF.get_tfidf_of_bug(bug_id)
            # if self.__stacktrace is empty, we don't need to do this
            if self.get_stacktrace() is not None and \
                    self.get_stacktrace().__len__() > 0:
                other_report.__stacktrace = IRText.get_stacktrace_of_bug(bug_id)
            if other_report.__stacktrace is None:
                other_report.__stacktrace = []
            result[bug_id] = self.similarity_with(other_report)

        return result
Esempio n. 6
0
 def get_stacktrace(self):
     if self.__bug_id is None:
         return self.__stacktrace
     else:
         if self.__allow_cache and self.__stacktrace is not None:
             return self.__stacktrace
         from ir_text import IRText
         stack = IRText.get_stacktrace_of_bug(self.__bug_id)
         if self.__allow_cache:
             self.__stacktrace = stack
         return stack