Exemple #1
0
    def __update_summary_and_description_tfidf_from_termcount(self):
        from ir_tfidf import IRTFIDF

        summary_termcount, description_termcount = \
                self.get_summary_and_description_termcount()
        summary_tfidf, description_tfidf = \
            IRTFIDF.calculate_tfidf_for_report_termcount(summary_termcount,
                                                         description_termcount)
        if self.__summary_tfidf is None:
            self.__summary_tfidf = summary_tfidf
        if self.__description_tfidf is None:
            self.__description_tfidf = description_tfidf
Exemple #2
0
 def __update_summary_and_description_tfidf_from_termcount(self):
     from ir_tfidf import IRTFIDF
     
     summary_termcount, description_termcount = \
             self.get_summary_and_description_termcount()
     summary_tfidf, description_tfidf = \
         IRTFIDF.calculate_tfidf_for_report_termcount(summary_termcount,
                                                      description_termcount)
     if self.__summary_tfidf is None:
         self.__summary_tfidf = summary_tfidf
     if self.__description_tfidf is None:
         self.__description_tfidf = description_tfidf
Exemple #3
0
    def query(cls, summary, description, top_n):

        from ir_term_count import IRTermCount
        from ir_tfidf import IRTFIDF
        summary_bow, description_bow = \
            IRTermCount.calculate_term_count(summary, description)
        summary_tfidf, description_tfidf = \
            IRTFIDF.calculate_tfidf_for_report_termcount(summary_bow,
                                                         description_bow)
        similarities = \
            IRTFIDF.get_top_n_similarity_over_all(summary_tfidf,
                                                  description_tfidf,
                                                  top_n)
        return similarities
Exemple #4
0
    def test_calculate_tfidf_for_report_termcount_bidf(self):
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_tfidf import IRTFIDF

        IRLog.get_instance().start_log()
        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        IRConfig.get_instance().set('tfidf_algorithm', 'bidf')
        summary = {'firefox':5, 'chrome':12}
        description = {'max':10, 'min':30, 'fix':10}
        summary_tfidf, description_tfidf = \
            IRTFIDF.calculate_tfidf_for_report_termcount(summary, description)
        IRLog.get_instance().println('Summary')
        IRTFIDF.show_dict_compare(summary_tfidf, summary_tfidf)
        IRLog.get_instance().println('Description')
        IRTFIDF.show_dict_compare(description_tfidf, description_tfidf)
        IRLog.get_instance().stop_log()
Exemple #5
0
    def test_calcualte_tfidf_for_report_termcount_tfidf(self):
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_tfidf import IRTFIDF

        IRLog.get_instance().start_log()
        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        
        summary = {'firefox':5, 'chrome':12}
        description = {'max':10, 'min':30, 'fix':10}
        summary_tfidf, description_tfidf = \
            IRTFIDF.calculate_tfidf_for_report_termcount(summary, description)
        summary_sum = 0.0
        for term, tfidf in summary_tfidf.items():
            summary_sum += tfidf ** 2 
        description_sum = 0.0
        for term, tfidf in description_tfidf.items():
            description_sum += tfidf ** 2
        # print summary_sum, description_sum
        assert (summary_sum - 1.0) ** 2 < 0.00001
        assert (description_sum - 1.0) ** 2 < 0.00001