Ejemplo n.º 1
0
    def test_cache_all_data(self):
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_tfidf import IRTFIDF

        IRLog.get_instance().start_log()
        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        IRTFIDF.cache_all_data()
        IRLog.get_instance().stop_log()
Ejemplo n.º 2
0
def server_cache(msg, res):
    from ir_log import IRLog
    from ir_text import IRText
    from ir_tfidf import IRTFIDF
    from ir_document_count import IRDocumentCount
    IRLog.get_instance().println('Server is caching data')
    IRText.cache_all_data()
    IRTFIDF.cache_all_data()
    IRDocumentCount.cache_all_data()
    IRLog.get_instance().println('Server cached data')
    return SIGNAL_CONTINUE
Ejemplo n.º 3
0
def server_cache(msg, res):
    from ir_log import IRLog
    from ir_text import IRText
    from ir_tfidf import IRTFIDF
    from ir_document_count import IRDocumentCount
    IRLog.get_instance().println('Server is caching data')
    IRText.cache_all_data()
    IRTFIDF.cache_all_data()
    IRDocumentCount.cache_all_data()
    IRLog.get_instance().println('Server cached data')
    return SIGNAL_CONTINUE
Ejemplo n.º 4
0
    def start_shell(cls):
        """Start a shell that do recommending interactively"""
        from ir_log import IRLog
        from ir_tfidf import IRTFIDF
        from ir_document_count import IRDocumentCount
        from ir_report import IRReport

        IRLog.get_instance().println("Starting Intereport...")
        IRTFIDF.cache_all_data()
        IRDocumentCount.cache_all_data()
        IRLog.get_instance().println("Intereport Started. Waiting for input")

        new_report = None
        while 1:
            cmd = raw_input("Input command:").strip()
            if cmd == 'exit':
                IRLog.get_instance().println('Exiting')
                break
            elif cmd == 'new':
                IRLog.get_instance().println('Creating New Report')
                import time
                cur_time = -1
                while cur_time < 0:
                    try:
                        cur_time = int(time.mktime(time.strptime(
                            raw_input("Input Time (e.g., 2011-05-05): "),
                            '%Y-%m-%d')))
                    except:
                        cur_time = -1
                product = raw_input("Input Product: ")
                summary = raw_input("Summary: ")
                raw_description = raw_input("Description:\n")
                new_report = IRReport.from_string(IRReport.separator.join([
                    str(cur_time), product.lower(), summary, raw_description,
                    '', '']))
                cls.__print_report(new_report)
            elif cmd == 'do':
                IRLog.get_instance().println('Do Recommending')
                if new_report is None:
                    IRLog.get_instance().println('Error! Please create '
                                                 'report first.')
                else:
                    cls.do_recommend(new_report)
            elif cmd == 'ls':
                IRLog.get_instance().println('Show Current Report')
                if new_report is None:
                     IRLog.get_instance().println('Error! Please create '
                                                  'report first.')
                else:
                    cls.__print_report(new_report)
            elif cmd == 'ad':
                IRLog.get_instance().println('Appending Description')
                if new_report is None:
                     IRLog.get_instance().println('Error! Please create '
                                                  'report first.')
                else:
                    append_description = raw_input("Append Description:\n")
                    description =' '.join([new_report.get_description_text(),
                                           append_description])
                    dummy_report = IRReport(new_report.get_summary_text(),
                                            description)
                    dummy_report.set_stacktrace(new_report.get_stacktrace())
                    dummy_report.set_basic_info(new_report.get_create_ts(),
                                                new_report.get_product())
                    dummy_report.set_penalty_terms(new_report.get_penalty_terms())
                    dummy_report.set_dummy_bug_id(new_report.get_dummy_bug_id())
                    new_report = dummy_report
                    IRLog.get_instance().println('Description: %s' % description)
            elif cmd == 'ap':
                IRLog.get_instance().println('Appending Penalties')
                if new_report is None:
                    IRLog.get_instance().println('Error! Please create '
                                                 'report first.')
                else:
                    raw = []
                    while raw.__len__() < 1:
                        raw = raw_input('Input Penalties (split by \',\'):').split(',')
                    from ir_term_count import IRTermCount
                    penalty = new_report.get_penalty_terms()
                    if penalty is None:
                        penalty = []
                    penalty += IRTermCount.do_stemming(raw)
                    new_report.set_penalty_terms(penalty)
                    print len(penalty), penalty
                    IRLog.get_instance().println('Penalties: %s' % \
                                                     (', '.join(penalty)))
            elif cmd == 'sd':
                IRLog.get_instance().println('Set Dummy Bug ID')
                if new_report is None:
                    IRLog.get_instance().println('Error! Please create '
                                                 'report first.')
                else:
                    bug_id = -1
                    while bug_id <= 0:
                        try:
                            bug_id = int(raw_input('Dummy Bug ID: '))
                        except:
                            bug_id = -1
                    new_report.set_dummy_bug_id(bug_id)
                    IRLog.get_instance().println('Dummy Bug ID: %d' % bug_id)
            elif cmd == 'help':
                cls.__show_help()
            else:
                IRLog.get_instance().println('Error! Unkown command: %s' \
                                                % cmd)
                cls.__show_help()
        # end of while 1
        IRLog.get_instance().println("Bye")
Ejemplo n.º 5
0
    if mode == 'file':
        test_file = sys.argv[3]
        bug_id = int(sys.argv[4])
        from ir_sim_bug_evaluator import IRSimBugEvaluator
        new_report = IRSimBugEvaluator.get_report_from_test_file(test_file, bug_id)
        if new_report is None:
            IRLog.get_instance().println('Error! Cannot find report %d in %s' % \
                    (bug_id, test_file))
        else:
            if sys.argv.__len__() > 5:
                from ir_term_count import IRTermCount
                penalty_terms_raw = sys.argv[4].split(',')
                penalty_terms = set(IRTermCount.do_stemming(penalty_terms_raw))
                IRLog.get_instance().println('%d penalty terms: %s:' \
                    % (penalty_terms.__len__(), ','.join(penalty_terms)))
                new_report.set_penalty_terms(penalty_terms)
    elif mode == 'text':
        text = sys.argv[3]
        new_report = IRReport.from_string(text)
    elif mode == 'inte':
        IRRecommender.start_shell()
        exit()
    else:
        IRLog.get_instance().println('Error! Known mode %s' % mode)
    from ir_tfidf import IRTFIDF
    from ir_document_count import IRDocumentCount
    IRTFIDF.cache_all_data()
    IRDocumentCount.cache_all_data()
    IRRecommender.do_recommend(new_report)
    IRLog.get_instance().stop_log()
Ejemplo n.º 6
0
    def start_shell(cls):
        """Start a shell that do recommending interactively"""
        from ir_log import IRLog
        from ir_tfidf import IRTFIDF
        from ir_document_count import IRDocumentCount
        from ir_report import IRReport

        IRLog.get_instance().println("Starting Intereport...")
        IRTFIDF.cache_all_data()
        IRDocumentCount.cache_all_data()
        IRLog.get_instance().println("Intereport Started. Waiting for input")

        new_report = None
        while 1:
            cmd = raw_input("Input command:").strip()
            if cmd == 'exit':
                IRLog.get_instance().println('Exiting')
                break
            elif cmd == 'new':
                IRLog.get_instance().println('Creating New Report')
                import time
                cur_time = -1
                while cur_time < 0:
                    try:
                        cur_time = int(
                            time.mktime(
                                time.strptime(
                                    raw_input(
                                        "Input Time (e.g., 2011-05-05): "),
                                    '%Y-%m-%d')))
                    except:
                        cur_time = -1
                product = raw_input("Input Product: ")
                summary = raw_input("Summary: ")
                raw_description = raw_input("Description:\n")
                new_report = IRReport.from_string(
                    IRReport.separator.join([
                        str(cur_time),
                        product.lower(), summary, raw_description, '', ''
                    ]))
                cls.__print_report(new_report)
            elif cmd == 'do':
                IRLog.get_instance().println('Do Recommending')
                if new_report is None:
                    IRLog.get_instance().println('Error! Please create '
                                                 'report first.')
                else:
                    cls.do_recommend(new_report)
            elif cmd == 'ls':
                IRLog.get_instance().println('Show Current Report')
                if new_report is None:
                    IRLog.get_instance().println('Error! Please create '
                                                 'report first.')
                else:
                    cls.__print_report(new_report)
            elif cmd == 'ad':
                IRLog.get_instance().println('Appending Description')
                if new_report is None:
                    IRLog.get_instance().println('Error! Please create '
                                                 'report first.')
                else:
                    append_description = raw_input("Append Description:\n")
                    description = ' '.join([
                        new_report.get_description_text(), append_description
                    ])
                    dummy_report = IRReport(new_report.get_summary_text(),
                                            description)
                    dummy_report.set_stacktrace(new_report.get_stacktrace())
                    dummy_report.set_basic_info(new_report.get_create_ts(),
                                                new_report.get_product())
                    dummy_report.set_penalty_terms(
                        new_report.get_penalty_terms())
                    dummy_report.set_dummy_bug_id(
                        new_report.get_dummy_bug_id())
                    new_report = dummy_report
                    IRLog.get_instance().println('Description: %s' %
                                                 description)
            elif cmd == 'ap':
                IRLog.get_instance().println('Appending Penalties')
                if new_report is None:
                    IRLog.get_instance().println('Error! Please create '
                                                 'report first.')
                else:
                    raw = []
                    while raw.__len__() < 1:
                        raw = raw_input(
                            'Input Penalties (split by \',\'):').split(',')
                    from ir_term_count import IRTermCount
                    penalty = new_report.get_penalty_terms()
                    if penalty is None:
                        penalty = []
                    penalty += IRTermCount.do_stemming(raw)
                    new_report.set_penalty_terms(penalty)
                    print len(penalty), penalty
                    IRLog.get_instance().println('Penalties: %s' % \
                                                     (', '.join(penalty)))
            elif cmd == 'sd':
                IRLog.get_instance().println('Set Dummy Bug ID')
                if new_report is None:
                    IRLog.get_instance().println('Error! Please create '
                                                 'report first.')
                else:
                    bug_id = -1
                    while bug_id <= 0:
                        try:
                            bug_id = int(raw_input('Dummy Bug ID: '))
                        except:
                            bug_id = -1
                    new_report.set_dummy_bug_id(bug_id)
                    IRLog.get_instance().println('Dummy Bug ID: %d' % bug_id)
            elif cmd == 'help':
                cls.__show_help()
            else:
                IRLog.get_instance().println('Error! Unkown command: %s' \
                                                % cmd)
                cls.__show_help()
        # end of while 1
        IRLog.get_instance().println("Bye")
Ejemplo n.º 7
0
        test_file = sys.argv[3]
        bug_id = int(sys.argv[4])
        from ir_sim_bug_evaluator import IRSimBugEvaluator
        new_report = IRSimBugEvaluator.get_report_from_test_file(
            test_file, bug_id)
        if new_report is None:
            IRLog.get_instance().println('Error! Cannot find report %d in %s' % \
                    (bug_id, test_file))
        else:
            if sys.argv.__len__() > 5:
                from ir_term_count import IRTermCount
                penalty_terms_raw = sys.argv[4].split(',')
                penalty_terms = set(IRTermCount.do_stemming(penalty_terms_raw))
                IRLog.get_instance().println('%d penalty terms: %s:' \
                    % (penalty_terms.__len__(), ','.join(penalty_terms)))
                new_report.set_penalty_terms(penalty_terms)
    elif mode == 'text':
        text = sys.argv[3]
        new_report = IRReport.from_string(text)
    elif mode == 'inte':
        IRRecommender.start_shell()
        exit()
    else:
        IRLog.get_instance().println('Error! Known mode %s' % mode)
    from ir_tfidf import IRTFIDF
    from ir_document_count import IRDocumentCount
    IRTFIDF.cache_all_data()
    IRDocumentCount.cache_all_data()
    IRRecommender.do_recommend(new_report)
    IRLog.get_instance().stop_log()
Ejemplo n.º 8
0
    def do_test_over_file(self, filename):
        """Do test over the file.

        Args:
            filename: str, the input file which generated by 
                generate_incomplete_test_file.
        """
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_duplicate_group import IRDuplicateGroup
        from ir_text import IRText
        from ir_term_count import IRTermCount
        from ir_tfidf import IRTFIDF
        from ir_report import IRReport
        from ir_document_count import IRDocumentCount

        IRText.cache_all_data()
        IRTFIDF.cache_all_data()
        IRDocumentCount.cache_all_data()

        remove_self_bug_id = IRConfig.get_instance().get_bool('remove_self_bug_id', True)

        sim_tot_precision = 0.0
        sim_tot_recall = 0.0
        sim_bi_tot_recall = 0.0
        sim_tot_size = 0

        dup_tot_precision = 0.0
        dup_tot_recall = 0.0
        dup_bi_toto_recall = 0.0
        dup_num = 0
        test_num = 0

        infile = open(filename, 'r')
        for line in infile:
            IRLog.get_instance().println('----test----')
            test_num += 1
            line.strip()
            new_report = IRReport.from_string(line)
            ori_report = IRReport(new_report.get_dummy_bug_id())
            #IRLog.get_instance().println('Summary')
            #IRTermCount.show_dict_compare(ori_report.get_summary_termcount(),
            #                              new_report.get_summary_termcount())
            #IRLog.get_instance().println('Description')
            #IRTermCount.show_dict_compare(ori_report.get_description_termcount(),
            #                              new_report.get_description_termcount())
            # do test for single
            similarities, duplicates = new_report.similarities_and_duplicates()
            sim_ids = [sim[0] for sim in similarities]
            dup_ids = [dup[0] for dup in duplicates]
            IRLog.get_instance().println('Sim ids: %s' % str(sim_ids))
            IRLog.get_instance().println('Dup ids: %s' % str(dup_ids))
            # evaluate sim
            sim_hit, sim_nothit, real_duplicates = \
                IRDuplicateGroup.is_in_same_duplicate_group(
                    new_report.get_dummy_bug_id(), sim_ids, remove_self_bug_id)
            # some group contain only one
            if real_duplicates.__len__() == 0:
                test_num -= 1
                continue
            
            precision, recall = self.__report_result(
                new_report.get_dummy_bug_id(), sim_hit, sim_nothit, real_duplicates)

            sim_tot_precision += precision
            sim_tot_recall += recall
            sim_tot_size += sim_ids.__len__()
            sim_bi_tot_recall += 1 if recall > 0.0 else 0

            if dup_ids.__len__() > 0:
                dup_num += 1
                dup_hit, dup_nothit, real_duplicates = \
                        IRDuplicateGroup.is_in_same_duplicate_group(
                                new_report.get_dummy_bug_id(), dup_ids, remove_self_bug_id)
                precision, recall = self.__report_result(
                        new_report.get_dummy_bug_id(), dup_hit, dup_nothit, real_duplicates)
                dup_tot_precision += precision
                dup_tot_recall += recall
                dup_bi_toto_recall += 1 if recall > 0.0 else 0
        # general conclusion
        if dup_num == 0:
            dup_num = 1.0
        IRLog.get_instance().println(','.join(['#cases', 'sim pre', 'sim rec', 'sim birec', 'sim size',\
                '#dup', 'dup pre', 'dup rec', 'dup birec']))
        IRLog.get_instance().println(','.join([str(test_num), \
                str(sim_tot_precision/test_num), str(sim_tot_recall/test_num), str(sim_bi_tot_recall/test_num), str(float(sim_tot_size)/test_num), \
                str(dup_num), \
                str(dup_tot_precision/dup_num), str(dup_tot_recall/dup_num), str(dup_bi_toto_recall/dup_num)]))
        infile.close()
Ejemplo n.º 9
0
    def do_test_over_file(self, filename):
        """Do test over the file.

        Args:
            filename: str, the input file which generated by 
                generate_incomplete_test_file.
        """
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_duplicate_group import IRDuplicateGroup
        from ir_text import IRText
        from ir_term_count import IRTermCount
        from ir_tfidf import IRTFIDF
        from ir_report import IRReport
        from ir_document_count import IRDocumentCount

        IRText.cache_all_data()
        IRTFIDF.cache_all_data()
        IRDocumentCount.cache_all_data()

        remove_self_bug_id = IRConfig.get_instance().get_bool(
            'remove_self_bug_id', True)

        sim_tot_precision = 0.0
        sim_tot_recall = 0.0
        sim_bi_tot_recall = 0.0
        sim_tot_size = 0

        dup_tot_precision = 0.0
        dup_tot_recall = 0.0
        dup_bi_toto_recall = 0.0
        dup_num = 0
        test_num = 0

        infile = open(filename, 'r')
        for line in infile:
            IRLog.get_instance().println('----test----')
            test_num += 1
            line.strip()
            new_report = IRReport.from_string(line)
            ori_report = IRReport(new_report.get_dummy_bug_id())
            #IRLog.get_instance().println('Summary')
            #IRTermCount.show_dict_compare(ori_report.get_summary_termcount(),
            #                              new_report.get_summary_termcount())
            #IRLog.get_instance().println('Description')
            #IRTermCount.show_dict_compare(ori_report.get_description_termcount(),
            #                              new_report.get_description_termcount())
            # do test for single
            similarities, duplicates = new_report.similarities_and_duplicates()
            sim_ids = [sim[0] for sim in similarities]
            dup_ids = [dup[0] for dup in duplicates]
            IRLog.get_instance().println('Sim ids: %s' % str(sim_ids))
            IRLog.get_instance().println('Dup ids: %s' % str(dup_ids))
            # evaluate sim
            sim_hit, sim_nothit, real_duplicates = \
                IRDuplicateGroup.is_in_same_duplicate_group(
                    new_report.get_dummy_bug_id(), sim_ids, remove_self_bug_id)
            # some group contain only one
            if real_duplicates.__len__() == 0:
                test_num -= 1
                continue

            precision, recall = self.__report_result(
                new_report.get_dummy_bug_id(), sim_hit, sim_nothit,
                real_duplicates)

            sim_tot_precision += precision
            sim_tot_recall += recall
            sim_tot_size += sim_ids.__len__()
            sim_bi_tot_recall += 1 if recall > 0.0 else 0

            if dup_ids.__len__() > 0:
                dup_num += 1
                dup_hit, dup_nothit, real_duplicates = \
                        IRDuplicateGroup.is_in_same_duplicate_group(
                                new_report.get_dummy_bug_id(), dup_ids, remove_self_bug_id)
                precision, recall = self.__report_result(
                    new_report.get_dummy_bug_id(), dup_hit, dup_nothit,
                    real_duplicates)
                dup_tot_precision += precision
                dup_tot_recall += recall
                dup_bi_toto_recall += 1 if recall > 0.0 else 0
        # general conclusion
        if dup_num == 0:
            dup_num = 1.0
        IRLog.get_instance().println(','.join(['#cases', 'sim pre', 'sim rec', 'sim birec', 'sim size',\
                '#dup', 'dup pre', 'dup rec', 'dup birec']))
        IRLog.get_instance().println(','.join([str(test_num), \
                str(sim_tot_precision/test_num), str(sim_tot_recall/test_num), str(sim_bi_tot_recall/test_num), str(float(sim_tot_size)/test_num), \
                str(dup_num), \
                str(dup_tot_precision/dup_num), str(dup_tot_recall/dup_num), str(dup_bi_toto_recall/dup_num)]))
        infile.close()