Example #1
0
    def test_tfidf_asm_similarity(self):
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_tfidf import IRTFIDF
        
        IRLog.get_instance().start_log()
        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        
        vec_a = {'firefox':1, 'chrome':1}
        vec_b = {'firefox':1, 'chrome':1, 'ie':1}
        vec_c = {'firefox':1, 'windows':1, 'linux':1}

        delta = 0.0001
        assert abs(1.0 - IRTFIDF.tfidf_asm_similarity(vec_a, vec_b)) < delta
        assert abs(0.5 - IRTFIDF.tfidf_asm_similarity(vec_a, vec_c)) < delta
        assert IRTFIDF.tfidf_asm_similarity(vec_a, vec_b) > \
                IRTFIDF.tfidf_asm_similarity(vec_a, vec_b, None, ['ie'], 100)
Example #2
0
    def similarity_with(self, other_report):
        """
        Returns:
            [float, float, float, float], [total score, summary, \
                                           description, stacktrace]
        """
        from ir_config import IRConfig
        from ir_tfidf import IRTFIDF
        from ir_gnome_st_tools import IRSTTools

        summary_ratio = IRConfig.get_instance().get_float('bug_summary_ratio')
        description_ratio = IRConfig.get_instance().get_float(
            'bug_description_ratio')
        stacktrace_ratio = IRConfig.get_instance().get_float(
            'bug_stacktrace_ratio')

        summary_tfidf_a, description_tfidf_a = \
                self.get_summary_and_description_tfidf()
        summary_tfidf_b, description_tfidf_b = \
                other_report.get_summary_and_description_tfidf()

        tfidf_algorithm = IRConfig.get_instance().get('tfidf_algorithm')
        stacktrace_algorithm = IRConfig.get_instance().get(
            'stacktrace_algorithm')
        if tfidf_algorithm == 'tfidf':
            summary_similarity = IRTFIDF.tfidf_similarity(
                summary_tfidf_a, summary_tfidf_b)
            description_similarity = IRTFIDF.tfidf_similarity(
                description_tfidf_a, description_tfidf_b)
        elif tfidf_algorithm == 'bidf':
            summary_squared_length, description_squared_length = \
                    self.get_summary_and_description_tfidf_squared_length()
            summary_similarity = IRTFIDF.tfidf_asm_similarity(
                summary_tfidf_a, summary_tfidf_b, summary_squared_length)
            description_similarity = IRTFIDF.tfidf_asm_similarity(
                description_tfidf_a, description_tfidf_b,
                description_squared_length, self.__penalty_terms)

        if self.__stacktrace is None or \
                self.__stacktrace.__len__() == 0 or \
                self.__stacktrace[0].__len__() == 0:
            stacktrace_similarity = 1.0
        else:
            stacktrace_similarity = IRSTTools.compare_stackinfo(
                self.get_stacktrace(), other_report.get_stacktrace(),
                stacktrace_algorithm)

        scoring_strategy = IRConfig.get_instance().get('scoring_strategy',
                                                       'heuristic')
        if scoring_strategy == 'weighted':
            score = self.__weighted_scoring(summary_similarity,
                                            description_similarity,
                                            stacktrace_similarity)
        elif scoring_strategy == 'heuristic':
            score = self.__heuristic_scoring(summary_similarity,
                                             description_similarity,
                                             stacktrace_similarity)
        elif scoring_strategy == 'distweighted':
            score = self.__distweighted_scoring(summary_similarity,
                                                description_similarity,
                                                stacktrace_similarity)
        else:
            assert False, 'invalid scoring strategy'
        return [
            score, summary_similarity, description_similarity,
            stacktrace_similarity
        ]
Example #3
0
    def similarity_with(self, other_report):
        """
        Returns:
            [float, float, float, float], [total score, summary, \
                                           description, stacktrace]
        """
        from ir_config import IRConfig
        from ir_tfidf import IRTFIDF
        from ir_gnome_st_tools import IRSTTools


        summary_ratio = IRConfig.get_instance().get_float('bug_summary_ratio')
        description_ratio = IRConfig.get_instance().get_float('bug_description_ratio')
        stacktrace_ratio = IRConfig.get_instance().get_float('bug_stacktrace_ratio')

        summary_tfidf_a, description_tfidf_a = \
                self.get_summary_and_description_tfidf()
        summary_tfidf_b, description_tfidf_b = \
                other_report.get_summary_and_description_tfidf()

        tfidf_algorithm = IRConfig.get_instance().get('tfidf_algorithm')
        stacktrace_algorithm = IRConfig.get_instance().get('stacktrace_algorithm')
        if tfidf_algorithm == 'tfidf':
            summary_similarity = IRTFIDF.tfidf_similarity(
                summary_tfidf_a, summary_tfidf_b)
            description_similarity = IRTFIDF.tfidf_similarity(
                description_tfidf_a, description_tfidf_b)
        elif tfidf_algorithm == 'bidf':
            summary_squared_length, description_squared_length = \
                    self.get_summary_and_description_tfidf_squared_length()
            summary_similarity = IRTFIDF.tfidf_asm_similarity(
                summary_tfidf_a, summary_tfidf_b, summary_squared_length)
            description_similarity = IRTFIDF.tfidf_asm_similarity(
                description_tfidf_a, description_tfidf_b,
                description_squared_length,
                self.__penalty_terms)

        if self.__stacktrace is None or \
                self.__stacktrace.__len__() == 0 or \
                self.__stacktrace[0].__len__() == 0:
            stacktrace_similarity = 1.0
        else:
            stacktrace_similarity = IRSTTools.compare_stackinfo(
                self.get_stacktrace(), other_report.get_stacktrace(),
                stacktrace_algorithm)

        scoring_strategy = IRConfig.get_instance().get('scoring_strategy',
                                                       'heuristic')
        if scoring_strategy == 'weighted':
            score = self.__weighted_scoring(summary_similarity,
                                            description_similarity, stacktrace_similarity)
        elif scoring_strategy == 'heuristic':
            score = self.__heuristic_scoring(summary_similarity,
                                            description_similarity, stacktrace_similarity)
        elif scoring_strategy == 'distweighted':
            score = self.__distweighted_scoring(summary_similarity,
                                            description_similarity, stacktrace_similarity)
        else:
            assert False, 'invalid scoring strategy'
        return [score,
                summary_similarity,
                description_similarity,
                stacktrace_similarity]