Esempio n. 1
0
def server_cache(msg, res):
    from ir_log import IRLog
    from ir_text import IRText
    from ir_tfidf import IRTFIDF
    from ir_document_count import IRDocumentCount
    IRLog.get_instance().println('Server is caching data')
    IRText.cache_all_data()
    IRTFIDF.cache_all_data()
    IRDocumentCount.cache_all_data()
    IRLog.get_instance().println('Server cached data')
    return SIGNAL_CONTINUE
Esempio n. 2
0
def server_cache(msg, res):
    from ir_log import IRLog
    from ir_text import IRText
    from ir_tfidf import IRTFIDF
    from ir_document_count import IRDocumentCount
    IRLog.get_instance().println('Server is caching data')
    IRText.cache_all_data()
    IRTFIDF.cache_all_data()
    IRDocumentCount.cache_all_data()
    IRLog.get_instance().println('Server cached data')
    return SIGNAL_CONTINUE
Esempio n. 3
0
 def test_cache_all_data(self):
     from ir_log import IRLog
     from ir_config import IRConfig
     from ir_text import IRText
     IRConfig.get_instance().load('../data/test/bug_test.cfg')
     IRText.cache_all_data()
Esempio n. 4
0
    def do_test_over_file(self, filename):
        """Do test over the file.

        Args:
            filename: str, the input file which generated by 
                generate_incomplete_test_file.
        """
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_duplicate_group import IRDuplicateGroup
        from ir_text import IRText
        from ir_term_count import IRTermCount
        from ir_tfidf import IRTFIDF
        from ir_report import IRReport
        from ir_document_count import IRDocumentCount

        IRText.cache_all_data()
        IRTFIDF.cache_all_data()
        IRDocumentCount.cache_all_data()

        remove_self_bug_id = IRConfig.get_instance().get_bool('remove_self_bug_id', True)

        sim_tot_precision = 0.0
        sim_tot_recall = 0.0
        sim_bi_tot_recall = 0.0
        sim_tot_size = 0

        dup_tot_precision = 0.0
        dup_tot_recall = 0.0
        dup_bi_toto_recall = 0.0
        dup_num = 0
        test_num = 0

        infile = open(filename, 'r')
        for line in infile:
            IRLog.get_instance().println('----test----')
            test_num += 1
            line.strip()
            new_report = IRReport.from_string(line)
            ori_report = IRReport(new_report.get_dummy_bug_id())
            #IRLog.get_instance().println('Summary')
            #IRTermCount.show_dict_compare(ori_report.get_summary_termcount(),
            #                              new_report.get_summary_termcount())
            #IRLog.get_instance().println('Description')
            #IRTermCount.show_dict_compare(ori_report.get_description_termcount(),
            #                              new_report.get_description_termcount())
            # do test for single
            similarities, duplicates = new_report.similarities_and_duplicates()
            sim_ids = [sim[0] for sim in similarities]
            dup_ids = [dup[0] for dup in duplicates]
            IRLog.get_instance().println('Sim ids: %s' % str(sim_ids))
            IRLog.get_instance().println('Dup ids: %s' % str(dup_ids))
            # evaluate sim
            sim_hit, sim_nothit, real_duplicates = \
                IRDuplicateGroup.is_in_same_duplicate_group(
                    new_report.get_dummy_bug_id(), sim_ids, remove_self_bug_id)
            # some group contain only one
            if real_duplicates.__len__() == 0:
                test_num -= 1
                continue
            
            precision, recall = self.__report_result(
                new_report.get_dummy_bug_id(), sim_hit, sim_nothit, real_duplicates)

            sim_tot_precision += precision
            sim_tot_recall += recall
            sim_tot_size += sim_ids.__len__()
            sim_bi_tot_recall += 1 if recall > 0.0 else 0

            if dup_ids.__len__() > 0:
                dup_num += 1
                dup_hit, dup_nothit, real_duplicates = \
                        IRDuplicateGroup.is_in_same_duplicate_group(
                                new_report.get_dummy_bug_id(), dup_ids, remove_self_bug_id)
                precision, recall = self.__report_result(
                        new_report.get_dummy_bug_id(), dup_hit, dup_nothit, real_duplicates)
                dup_tot_precision += precision
                dup_tot_recall += recall
                dup_bi_toto_recall += 1 if recall > 0.0 else 0
        # general conclusion
        if dup_num == 0:
            dup_num = 1.0
        IRLog.get_instance().println(','.join(['#cases', 'sim pre', 'sim rec', 'sim birec', 'sim size',\
                '#dup', 'dup pre', 'dup rec', 'dup birec']))
        IRLog.get_instance().println(','.join([str(test_num), \
                str(sim_tot_precision/test_num), str(sim_tot_recall/test_num), str(sim_bi_tot_recall/test_num), str(float(sim_tot_size)/test_num), \
                str(dup_num), \
                str(dup_tot_precision/dup_num), str(dup_tot_recall/dup_num), str(dup_bi_toto_recall/dup_num)]))
        infile.close()
Esempio n. 5
0
    def do_test_over_file(self, filename):
        """Do test over the file.

        Args:
            filename: str, the input file which generated by 
                generate_incomplete_test_file.
        """
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_duplicate_group import IRDuplicateGroup
        from ir_text import IRText
        from ir_term_count import IRTermCount
        from ir_tfidf import IRTFIDF
        from ir_report import IRReport
        from ir_document_count import IRDocumentCount

        IRText.cache_all_data()
        IRTFIDF.cache_all_data()
        IRDocumentCount.cache_all_data()

        remove_self_bug_id = IRConfig.get_instance().get_bool(
            'remove_self_bug_id', True)

        sim_tot_precision = 0.0
        sim_tot_recall = 0.0
        sim_bi_tot_recall = 0.0
        sim_tot_size = 0

        dup_tot_precision = 0.0
        dup_tot_recall = 0.0
        dup_bi_toto_recall = 0.0
        dup_num = 0
        test_num = 0

        infile = open(filename, 'r')
        for line in infile:
            IRLog.get_instance().println('----test----')
            test_num += 1
            line.strip()
            new_report = IRReport.from_string(line)
            ori_report = IRReport(new_report.get_dummy_bug_id())
            #IRLog.get_instance().println('Summary')
            #IRTermCount.show_dict_compare(ori_report.get_summary_termcount(),
            #                              new_report.get_summary_termcount())
            #IRLog.get_instance().println('Description')
            #IRTermCount.show_dict_compare(ori_report.get_description_termcount(),
            #                              new_report.get_description_termcount())
            # do test for single
            similarities, duplicates = new_report.similarities_and_duplicates()
            sim_ids = [sim[0] for sim in similarities]
            dup_ids = [dup[0] for dup in duplicates]
            IRLog.get_instance().println('Sim ids: %s' % str(sim_ids))
            IRLog.get_instance().println('Dup ids: %s' % str(dup_ids))
            # evaluate sim
            sim_hit, sim_nothit, real_duplicates = \
                IRDuplicateGroup.is_in_same_duplicate_group(
                    new_report.get_dummy_bug_id(), sim_ids, remove_self_bug_id)
            # some group contain only one
            if real_duplicates.__len__() == 0:
                test_num -= 1
                continue

            precision, recall = self.__report_result(
                new_report.get_dummy_bug_id(), sim_hit, sim_nothit,
                real_duplicates)

            sim_tot_precision += precision
            sim_tot_recall += recall
            sim_tot_size += sim_ids.__len__()
            sim_bi_tot_recall += 1 if recall > 0.0 else 0

            if dup_ids.__len__() > 0:
                dup_num += 1
                dup_hit, dup_nothit, real_duplicates = \
                        IRDuplicateGroup.is_in_same_duplicate_group(
                                new_report.get_dummy_bug_id(), dup_ids, remove_self_bug_id)
                precision, recall = self.__report_result(
                    new_report.get_dummy_bug_id(), dup_hit, dup_nothit,
                    real_duplicates)
                dup_tot_precision += precision
                dup_tot_recall += recall
                dup_bi_toto_recall += 1 if recall > 0.0 else 0
        # general conclusion
        if dup_num == 0:
            dup_num = 1.0
        IRLog.get_instance().println(','.join(['#cases', 'sim pre', 'sim rec', 'sim birec', 'sim size',\
                '#dup', 'dup pre', 'dup rec', 'dup birec']))
        IRLog.get_instance().println(','.join([str(test_num), \
                str(sim_tot_precision/test_num), str(sim_tot_recall/test_num), str(sim_bi_tot_recall/test_num), str(float(sim_tot_size)/test_num), \
                str(dup_num), \
                str(dup_tot_precision/dup_num), str(dup_tot_recall/dup_num), str(dup_bi_toto_recall/dup_num)]))
        infile.close()
Esempio n. 6
0
 def test_cache_all_data(self):
     from ir_log import IRLog
     from ir_config import IRConfig
     from ir_text import IRText
     IRConfig.get_instance().load('../data/test/bug_test.cfg')
     IRText.cache_all_data()