コード例 #1
0
    def get_summary_and_description_of_bug(cls, bug_id):
        """Get summary and description from mongodb.

        Args:
            bug_id: int

        Returns:
            [str, str], [summary, description]
        """
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        if cls.__is_cache:
            if bug_id in cls.__cache_summary_description:
                return cls.__cache_summary_description[bug_id]
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        summary_name = IRConfig.get_instance().get('bug_summary_name')
        description_name = IRConfig.get_instance().get('bug_description_name')
        text_collection = IRCollection('bug_db_name',
                                       'bug_text_collection_name', 'r')
        res = text_collection.find({bug_id_name: bug_id})
        summary = ''
        description = ''
        if res.count() > 0:
            summary = res[0][summary_name]
            description = res[0][description_name]
        if cls.__is_cache:
            cls.__cache_summary_description[bug_id] = (summary, description)
        return summary, description
コード例 #2
0
ファイル: ir_report.py プロジェクト: LeonXJ/Intereport
    def similarity_over_all(self):
        """Calculate similarity between bug (summary, description) over
         all.

        Returns:
            dict, {bug_id -> [score, summary_score, description_score, stacktrace_score]}
        """

        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        from ir_text import IRText
        from ir_tfidf import IRTFIDF

        logger = IRLog.get_instance()
        search_time_span = 2 * 3600 * 24 * 365

        bug_id_name = IRConfig.get_instance().get('bug_id_name')

        create_ts_name = IRConfig.get_instance().get('bug_create_ts_name')
        product_name = IRConfig.get_instance().get('bug_product_name')

        basic_collection = IRCollection('bug_db_name',
                                        'bug_basic_collection_name', 'r')

        reports2scan = basic_collection.find({
            product_name: self.get_product(),
            create_ts_name: {
                '$gt': self.get_create_ts() - search_time_span
            },
            bug_id_name: {
                '$nin': self.__exclude_report_ids
            }
        })
        result = {}
        IRLog.get_instance().println('Comparing with %d reports.' \
                % (reports2scan.count()) )

        print self.__summary_text
        print self.__description_text

        for report in reports2scan:
            bug_id = report[bug_id_name]
            if bug_id == self.get_dummy_bug_id():
                continue
            # because we don't want to load stacktrace in case of self.__stacktrace
            #    being none, we create and fill the info of report manually
            other_report = IRReport("", "")
            other_report.__summary_tfidf, other_report.__description_tfidf = \
                    IRTFIDF.get_tfidf_of_bug(bug_id)
            # if self.__stacktrace is empty, we don't need to do this
            if self.get_stacktrace() is not None and \
                    self.get_stacktrace().__len__() > 0:
                other_report.__stacktrace = IRText.get_stacktrace_of_bug(
                    bug_id)
            if other_report.__stacktrace is None:
                other_report.__stacktrace = []
            result[bug_id] = self.similarity_with(other_report)

        return result
コード例 #3
0
ファイル: ir_text.py プロジェクト: LeonXJ/Intereport
    def get_summary_and_description_of_bug(cls, bug_id):
        """Get summary and description from mongodb.

        Args:
            bug_id: int

        Returns:
            [str, str], [summary, description]
        """
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        if cls.__is_cache:
            if bug_id in cls.__cache_summary_description:
                return cls.__cache_summary_description[bug_id]
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        summary_name = IRConfig.get_instance().get('bug_summary_name')
        description_name = IRConfig.get_instance().get('bug_description_name')
        text_collection = IRCollection(
            'bug_db_name', 'bug_text_collection_name', 'r')
        res = text_collection.find({bug_id_name : bug_id})
        summary = ''
        description = ''
        if res.count() > 0:
            summary = res[0][summary_name]
            description = res[0][description_name]
        if cls.__is_cache:
            cls.__cache_summary_description[bug_id] = (summary, description)
        return summary, description
コード例 #4
0
    def cache_all_data(cls):
        """Load all data into memory."""
        from ir_log import IRProgressBar
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        # get config
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        summary_name = IRConfig.get_instance().get('bug_summary_name')
        description_name = IRConfig.get_instance().get('bug_description_name')
        stacktrace_name = IRConfig.get_instance().get('bug_stacktrace_name')
        # caching data
        cls.set_is_cache(True)
        text_collection = \
                IRCollection('bug_db_name', 'bug_text_collection_name', 'r')
        cls.__cache_summary_description = {}
        cls.__cache_stacktrace = {}

        def iter_func(bug):
            cls.__cache_summary_description[bug[bug_id_name]] = \
                    (bug[summary_name], bug[description_name])
            cls.__cache_stacktrace[bug[bug_id_name]] = bug[stacktrace_name]

        IRProgressBar.execute_iteration_for_cursor(text_collection.find(),
                                                   iter_func,
                                                   'Caching Text Data')
        text_collection.close()
コード例 #5
0
ファイル: ir_text.py プロジェクト: LeonXJ/Intereport
    def get_stacktrace_of_bug(cls, bug_id):
        """Get stacktrace from mongodb.

        Args:
            bug_id: int

        Returns:
            [[str]], [[signature]]
        """
        if cls.__is_cache:
            if bug_id in cls.__cache_stacktrace:
                return cls.__cache_stacktrace[bug_id]
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        stacktrace_name = IRConfig.get_instance().get('bug_stacktrace_name')
        text_collection = IRCollection(
            'bug_db_name', 'bug_text_collection_name', 'r')
        res = text_collection.find({bug_id_name : bug_id})
        stacktrace = []
        if res.count() > 0:
            stacktrace = res[0][stacktrace_name]
        if cls.__is_cache:
            cls.__cache_stacktrace[bug_id] = stacktrace
        return stacktrace
コード例 #6
0
    def get_tfidf_of_bug(cls, bug_id):
        """Get tfidf of a bug.

        Args:
            bug_id: int

        Returns:
            [dict, dict], [TFIDF of summary, TFIDF of description]
        """

        if cls.__is_cache:
            if bug_id in cls.__cache:
                return cls.__cache[bug_id]
        # load from db
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        summary_name = IRConfig.get_instance().get('bug_summary_name')
        description_name = IRConfig.get_instance().get('bug_description_name')
        tfidf_collection = IRCollection('bug_db_name',
                                        'bug_tfidf_collection_name', 'r')
        find_result = tfidf_collection.find({bug_id_name: bug_id})
        summary = {}
        description = {}
        if find_result.count() > 0:
            summary = find_result[0][summary_name]
            description = find_result[0][description_name]
        if cls.__is_cache:
            cls.__cache[bug_id] = (summary, description)
        return summary, description
コード例 #7
0
    def cache_all_data(cls):
        """Load all document count into memory.
        
        """
        from ir_log import IRProgressBar
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        # config
        summary_name = IRConfig.get_instance().get('bug_summary_name')
        description_name = IRConfig.get_instance().get('bug_description_name')
        term_name = IRConfig.get_instance().get('bug_term_name')

        cls.__is_cache = True
        documentcount_collection = IRCollection(
            'bug_db_name', 'bug_documentcount_collection_name', 'r')

        def iter_document_count(term):
            summary = term[summary_name] if summary_name in term else 0
            description = term[
                description_name] if description_name in term else 0
            cls.__cache_document_count[term[term_name]] = \
                    (summary, description)

        IRProgressBar.execute_iteration_for_cursor(
            documentcount_collection.find({}), iter_document_count,
            "Caching Document Count")
コード例 #8
0
ファイル: ir_tfidf.py プロジェクト: LeonXJ/Intereport
    def get_tfidf_of_bug(cls, bug_id):
        """Get tfidf of a bug.

        Args:
            bug_id: int

        Returns:
            [dict, dict], [TFIDF of summary, TFIDF of description]
        """

        if cls.__is_cache:
            if bug_id in cls.__cache:
                return cls.__cache[bug_id]
        # load from db
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        summary_name = IRConfig.get_instance().get('bug_summary_name')
        description_name = IRConfig.get_instance().get('bug_description_name')
        tfidf_collection = IRCollection(
            'bug_db_name', 'bug_tfidf_collection_name', 'r')
        find_result = tfidf_collection.find({bug_id_name : bug_id})
        summary = {}
        description = {}
        if find_result.count() > 0:
            summary = find_result[0][summary_name]
            description = find_result[0][description_name]
        if cls.__is_cache:
            cls.__cache[bug_id] = (summary, description)
        return summary, description
コード例 #9
0
    def get_termcount_of_bug(cls, bug_id):
        """Get termcount of a bug

        Args:
            bug_id: int

        Returns:
            [dict, dict], [termcount of summary, termcount of description]
        """

        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        if cls.__is_cache:
            if bug_id in cls.__cache_term_count:
                return cls.__cache_term_count[bug_id]
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        summary_name = IRConfig.get_instance().get('bug_summary_name')
        description_name = IRConfig.get_instance().get('bug_description_name')
        termcount_collection = IRCollection(
            'bug_db_name', 'bug_termcount_collection_name', 'r')
        res = termcount_collection.find({bug_id_name : bug_id})
        summary = {}
        description = {}
        if res.count() > 0:
            summary = res[0][summary_name]
            description = res[0][description_name]
        if cls.__is_cache:
            cls.__cache_term_count[bug_id] = (summary, description)
        return summary, description
コード例 #10
0
    def get_stacktrace_of_bug(cls, bug_id):
        """Get stacktrace from mongodb.

        Args:
            bug_id: int

        Returns:
            [[str]], [[signature]]
        """
        if cls.__is_cache:
            if bug_id in cls.__cache_stacktrace:
                return cls.__cache_stacktrace[bug_id]
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        stacktrace_name = IRConfig.get_instance().get('bug_stacktrace_name')
        text_collection = IRCollection('bug_db_name',
                                       'bug_text_collection_name', 'r')
        res = text_collection.find({bug_id_name: bug_id})
        stacktrace = []
        if res.count() > 0:
            stacktrace = res[0][stacktrace_name]
        if cls.__is_cache:
            cls.__cache_stacktrace[bug_id] = stacktrace
        return stacktrace
コード例 #11
0
    def show_distribution_on_product_and_create_ts(cls):
        """Show the distribution of create time and number of products on
        each duplicate group.
        """
        from ir_log import IRLog
        from ir_log import IRProgressBar
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection

        bug2group_collection = IRCollection('bug_db_name',
                                            'bug_duplicate_collection_name',
                                            'r')
        basic_collection = IRCollection('bug_db_name',
                                        'bug_basic_collection_name', 'r')
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        group_name = IRConfig.get_instance().get('bug_group_name')
        product_name = IRConfig.get_instance().get('bug_product_name')
        create_ts_name = IRConfig.get_instance().get('bug_create_ts_name')

        group_ids = bug2group_collection.distinct(group_name)
        progress_bar = IRProgressBar(group_ids.__len__(), "group", False, 0, 1)
        group_num = 0
        for group_id in group_ids:
            group_num += 1
            progress_bar.set_value(group_num)
            bugs = bug2group_collection.find({group_name: group_id})
            min_ts = 9999999999
            max_ts = -1000
            product_set = set()
            for bug in bugs:
                bug_id = bug[bug_id_name]
                basic = basic_collection.find({bug_id_name: bug_id})
                if basic.count() == 0:
                    continue
                ts = basic[0][create_ts_name]
                product = basic[0][product_name]
                # ts
                if ts > max_ts:
                    max_ts = ts
                if ts < min_ts:
                    min_ts = ts
                # product
                product_set.add(product)
            IRLog.get_instance().println('ts span:%d;product number:%d' \
                    % (max_ts - min_ts, product_set.__len__()), 2)
コード例 #12
0
    def show_distribution_on_product_and_create_ts(cls):
        """Show the distribution of create time and number of products on
        each duplicate group.
        """
        from ir_log import IRLog
        from ir_log import IRProgressBar
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection

        bug2group_collection = IRCollection(
            'bug_db_name', 'bug_duplicate_collection_name', 'r')
        basic_collection = IRCollection(
            'bug_db_name', 'bug_basic_collection_name', 'r')
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        group_name = IRConfig.get_instance().get('bug_group_name')
        product_name = IRConfig.get_instance().get('bug_product_name')
        create_ts_name = IRConfig.get_instance().get('bug_create_ts_name')

        group_ids = bug2group_collection.distinct(group_name)
        progress_bar = IRProgressBar(group_ids.__len__(), "group", False, 0, 1)
        group_num = 0
        for group_id in group_ids:
            group_num += 1
            progress_bar.set_value(group_num)
            bugs = bug2group_collection.find({group_name : group_id})
            min_ts = 9999999999
            max_ts = -1000
            product_set = set()
            for bug in bugs:
                bug_id = bug[bug_id_name]
                basic = basic_collection.find({bug_id_name : bug_id})
                if basic.count() == 0:
                    continue
                ts = basic[0][create_ts_name]
                product = basic[0][product_name]
                # ts
                if ts > max_ts:
                    max_ts = ts
                if ts < min_ts:
                    min_ts = ts
                # product
                product_set.add(product)
            IRLog.get_instance().println('ts span:%d;product number:%d' \
                    % (max_ts - min_ts, product_set.__len__()), 2)
コード例 #13
0
    def get_documentcount(cls,
                          term,
                          field=None,
                          documentcount_collection=None):
        """Get documentcount of a term.

        Args:
            term, str

        Returns:
            if field == None: (int, int), (summary document count, description document count)
            else: int, the document count of corresponding field
        """

        if cls.__is_cache and term in cls.__cache_document_count:
            if field is None:
                return cls.__cache_document_count[term]
            else:
                from ir_config import IRConfig
                summary_name = IRConfig.get_instance().get('bug_summary_name')
                description_name = IRConfig.get_instance().get(
                    'bug_description_name')
                if field == summary_name:
                    return cls.__cache_document_count[term][0]
                elif field == description_name:
                    return cls.__cache_document_count[term][1]
                else:
                    return 0
        # load from db
        from ir_mongodb_helper import IRCollection
        from ir_config import IRConfig
        term_name = IRConfig.get_instance().get('bug_term_name')
        summary_name = IRConfig.get_instance().get('bug_summary_name')
        description_name = IRConfig.get_instance().get('bug_description_name')
        if documentcount_collection is None:
            documentcount_collection = IRCollection(
                'bug_db_name', 'bug_documentcount_collection_name', 'r')
        res = documentcount_collection.find({term_name: term})
        summary = 0
        description = 0
        if res.count() > 0:
            if summary_name in res[0]:
                summary = res[0][summary_name]
            if description_name in res[0]:
                description = res[0][description_name]
        if cls.__is_cache:
            cls.__cache_document_count[term] = (summary, description)
        # return value
        if field is None:
            return summary, description
        elif field == summary_name:
            return summary
        elif field == description_name:
            return description
        else:
            return 0
コード例 #14
0
ファイル: ir_report.py プロジェクト: LeonXJ/Intereport
    def similarity_over_all(self):
        """Calculate similarity between bug (summary, description) over
         all.

        Returns:
            dict, {bug_id -> [score, summary_score, description_score, stacktrace_score]}
        """

        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        from ir_text import IRText
        from ir_tfidf import IRTFIDF

        logger = IRLog.get_instance()
        search_time_span = 2 * 3600 * 24 * 365
        
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        
        create_ts_name = IRConfig.get_instance().get('bug_create_ts_name')
        product_name = IRConfig.get_instance().get('bug_product_name')

        basic_collection = IRCollection(
            'bug_db_name', 'bug_basic_collection_name', 'r')
        
        reports2scan = basic_collection.find({
            product_name : self.get_product(),
            create_ts_name : {'$gt' : self.get_create_ts() - search_time_span},
            bug_id_name : {'$nin' : self.__exclude_report_ids} })
        result = {}
        IRLog.get_instance().println('Comparing with %d reports.' \
                % (reports2scan.count()) )
        
        print self.__summary_text
        print self.__description_text

        for report in reports2scan:
            bug_id = report[bug_id_name]
            if bug_id == self.get_dummy_bug_id():
                continue
            # because we don't want to load stacktrace in case of self.__stacktrace 
            #    being none, we create and fill the info of report manually
            other_report = IRReport("", "")
            other_report.__summary_tfidf, other_report.__description_tfidf = \
                    IRTFIDF.get_tfidf_of_bug(bug_id)
            # if self.__stacktrace is empty, we don't need to do this
            if self.get_stacktrace() is not None and \
                    self.get_stacktrace().__len__() > 0:
                other_report.__stacktrace = IRText.get_stacktrace_of_bug(bug_id)
            if other_report.__stacktrace is None:
                other_report.__stacktrace = []
            result[bug_id] = self.similarity_with(other_report)

        return result
コード例 #15
0
    def get_documentcount(cls, term, field = None, documentcount_collection = None):
        """Get documentcount of a term.

        Args:
            term, str

        Returns:
            if field == None: (int, int), (summary document count, description document count)
            else: int, the document count of corresponding field
        """

        if cls.__is_cache and term in cls.__cache_document_count:
            if field is None:
                return cls.__cache_document_count[term]
            else:
                from ir_config import IRConfig
                summary_name = IRConfig.get_instance().get('bug_summary_name')
                description_name = IRConfig.get_instance().get('bug_description_name')
                if field == summary_name:
                    return cls.__cache_document_count[term][0]
                elif field == description_name:
                    return cls.__cache_document_count[term][1]
                else:
                    return 0
        # load from db
        from ir_mongodb_helper import IRCollection
        from ir_config import IRConfig
        term_name = IRConfig.get_instance().get('bug_term_name')
        summary_name = IRConfig.get_instance().get('bug_summary_name')
        description_name = IRConfig.get_instance().get('bug_description_name')
        if documentcount_collection is None:
            documentcount_collection = IRCollection(
                'bug_db_name', 'bug_documentcount_collection_name', 'r')
        res = documentcount_collection.find({term_name : term})
        summary = 0
        description = 0
        if res.count() > 0:
            if summary_name in res[0]:
                summary = res[0][summary_name]
            if description_name in res[0]:
                description = res[0][description_name]
        if cls.__is_cache:
           cls.__cache_document_count[term] = (summary, description)
        # return value
        if field is None:
            return summary, description
        elif field == summary_name:
            return summary
        elif field == description_name:
            return description
        else:
            return 0
コード例 #16
0
    def get_iterator(cls, arg):
        """Get the cursor to the items fulfill arg.

        Args:
            arg: dict, condition

        Returns:
            cursor
        """
        from ir_mongodb_helper import IRCollection
        text_collection = IRCollection('bug_db_name',
                                       'bug_text_collection_name', 'r')
        return text_collection.find(arg)
コード例 #17
0
ファイル: ir_text.py プロジェクト: LeonXJ/Intereport
    def get_iterator(cls, arg):
        """Get the cursor to the items fulfill arg.

        Args:
            arg: dict, condition

        Returns:
            cursor
        """
        from ir_mongodb_helper import IRCollection
        text_collection = IRCollection(
            'bug_db_name', 'bug_text_collection_name', 'r')
        return text_collection.find(arg)
コード例 #18
0
 def get_iterator(cls, arg=None):
     """Get iterator of termcounts fulfiling arg.
     
     Args:
         arg: dict, Condiction.
         
     Returns:
         cursor
     """
     if not arg: arg = {}
     from ir_mongodb_helper import IRCollection
     termcount_collection = IRCollection(
         'bug_db_name', 'bug_termcount_collection_name', 'r')
     return termcount_collection.find(arg)
コード例 #19
0
ファイル: ir_tfidf.py プロジェクト: LeonXJ/Intereport
 def cache_all_data(cls):
     """Load all TFIDF into memory."""
     from ir_log import IRProgressBar
     from ir_config import IRConfig
     from ir_mongodb_helper import IRCollection
     bug_id_name = IRConfig.get_instance().get('bug_id_name')
     summary_name = IRConfig.get_instance().get('bug_summary_name')
     description_name = IRConfig.get_instance().get('bug_description_name')
     tfidf_collection = IRCollection(
         'bug_db_name', 'bug_tfidf_collection_name', 'r')
     cls.set_is_cache(True)
     cls.__cache = {}
     def iter_tfidf(bug):
         cls.__cache[bug[bug_id_name]] = (bug[summary_name],
                                          bug[description_name])
     IRProgressBar.execute_iteration_for_cursor(tfidf_collection.find(),
                                                iter_tfidf, "Caching TFIDF")
コード例 #20
0
    def test_parse_info_level0(self):
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        from ir_duplicate_group import IRDuplicateGroup

        IRLog.get_instance().start_log()
        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        duplicate_group = IRDuplicateGroup()
        duplicate_group.parse_info_level0('../data/test/info_level0_test')

        #test if incomplete bugs have been removed
        bug2group = IRCollection('bug_db_name',
                                 'bug_duplicate_collection_name', 'r')
        assert bug2group is not None
        res = bug2group.find({'bug_id': 102500})
        assert res.count() == 0

        IRLog.get_instance().stop_log()
コード例 #21
0
    def test_parse_info_level0(self):
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        from ir_duplicate_group import IRDuplicateGroup

        IRLog.get_instance().start_log()
        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        duplicate_group = IRDuplicateGroup()
        duplicate_group.parse_info_level0('../data/test/info_level0_test')
        
        #test if incomplete bugs have been removed
        bug2group = IRCollection(
            'bug_db_name', 'bug_duplicate_collection_name', 'r')
        assert bug2group is not None
        res = bug2group.find({'bug_id':102500})
        assert res.count() == 0
        
        IRLog.get_instance().stop_log()
コード例 #22
0
    def cache_all_data(cls):
        """Load all TFIDF into memory."""
        from ir_log import IRProgressBar
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        summary_name = IRConfig.get_instance().get('bug_summary_name')
        description_name = IRConfig.get_instance().get('bug_description_name')
        tfidf_collection = IRCollection('bug_db_name',
                                        'bug_tfidf_collection_name', 'r')
        cls.set_is_cache(True)
        cls.__cache = {}

        def iter_tfidf(bug):
            cls.__cache[bug[bug_id_name]] = (bug[summary_name],
                                             bug[description_name])

        IRProgressBar.execute_iteration_for_cursor(tfidf_collection.find(),
                                                   iter_tfidf, "Caching TFIDF")
コード例 #23
0
    def get_bugs_in_group(cls, group_id):
        """Get bugs in a group.

        Args:
            group_id: int

        Returns:
            [int], [bug_id]
        """
        
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        
        duplicate_collection =IRCollection(
            'bug_db_name', 'bug_duplicate_collection_name', 'r')
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        group_name = IRConfig.get_instance().get('bug_group_name')
        find_result = duplicate_collection.find({group_name : group_id})
        return [bug[bug_id_name] for bug in find_result]
コード例 #24
0
    def get_bugs_in_group(cls, group_id):
        """Get bugs in a group.

        Args:
            group_id: int

        Returns:
            [int], [bug_id]
        """

        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection

        duplicate_collection = IRCollection('bug_db_name',
                                            'bug_duplicate_collection_name',
                                            'r')
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        group_name = IRConfig.get_instance().get('bug_group_name')
        find_result = duplicate_collection.find({group_name: group_id})
        return [bug[bug_id_name] for bug in find_result]
コード例 #25
0
    def get_basic_info_of_bug(cls, bug_id):
        """Get basic info from mongodb.

        Args:
            bug_id: int

        Returns:
            (int, str): (create_ts, product)
        """
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        create_ts_name = IRConfig.get_instance().get('bug_create_ts_name')
        product_name = IRConfig.get_instance().get('bug_product_name')
        basic_collection = IRCollection('bug_db_name',
                                        'bug_basic_collection_name', 'r')
        res = basic_collection.find({bug_id_name: bug_id})
        if res.count() > 0:
            return res[0][create_ts_name], res[0][product_name]
        else:
            return -1, ''
コード例 #26
0
ファイル: ir_text.py プロジェクト: LeonXJ/Intereport
    def get_basic_info_of_bug(cls, bug_id):
        """Get basic info from mongodb.

        Args:
            bug_id: int

        Returns:
            (int, str): (create_ts, product)
        """
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        create_ts_name = IRConfig.get_instance().get('bug_create_ts_name')
        product_name = IRConfig.get_instance().get('bug_product_name')
        basic_collection = IRCollection(
            'bug_db_name', 'bug_basic_collection_name', 'r')
        res = basic_collection.find({bug_id_name : bug_id})
        if res.count() > 0:
            return res[0][create_ts_name], res[0][product_name]
        else:
            return -1, ''
コード例 #27
0
    def get_duplicate_group_information(cls, group_size_min, group_size_max):
        """Calculate the size of duplicate group.

        Args:
            group_size_min: int, The minimum size of wanted group.
            group_size_max: int, The maximum size of wanted group.

        Returns:
            [int], [group_id]
        """

        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection

        duplicate_group_count_collection = IRCollection(
            'bug_db_name', 'bug_duplicate_group_count_collection_name', 'r')
        group_name = IRConfig.get_instance().get('bug_group_name')
        group_size_name = IRConfig.get_instance().get('bug_group_size')
        result = duplicate_group_count_collection.find({group_size_name : \
                {"$gt":group_size_min, "$lt":group_size_max}})
        return [group[group_name] for group in result]
コード例 #28
0
    def get_duplicate_group_information(cls, group_size_min, group_size_max):
        """Calculate the size of duplicate group.

        Args:
            group_size_min: int, The minimum size of wanted group.
            group_size_max: int, The maximum size of wanted group.

        Returns:
            [int], [group_id]
        """

        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection

        duplicate_group_count_collection = IRCollection(
            'bug_db_name', 'bug_duplicate_group_count_collection_name', 'r')
        group_name = IRConfig.get_instance().get('bug_group_name')
        group_size_name = IRConfig.get_instance().get('bug_group_size')
        result = duplicate_group_count_collection.find({group_size_name : \
                {"$gt":group_size_min, "$lt":group_size_max}})
        return [group[group_name] for group in result]
コード例 #29
0
ファイル: ir_text.py プロジェクト: LeonXJ/Intereport
 def cache_all_data(cls):
     """Load all data into memory."""
     from ir_log import IRProgressBar
     from ir_config import IRConfig
     from ir_mongodb_helper import IRCollection
     # get config
     bug_id_name = IRConfig.get_instance().get('bug_id_name')
     summary_name = IRConfig.get_instance().get('bug_summary_name')
     description_name = IRConfig.get_instance().get('bug_description_name')
     stacktrace_name = IRConfig.get_instance().get('bug_stacktrace_name')
     # caching data 
     cls.set_is_cache(True)
     text_collection = \
             IRCollection('bug_db_name', 'bug_text_collection_name', 'r')
     cls.__cache_summary_description = {}
     cls.__cache_stacktrace = {}
     def iter_func(bug):
         cls.__cache_summary_description[bug[bug_id_name]] = \
                 (bug[summary_name], bug[description_name])
         cls.__cache_stacktrace[bug[bug_id_name]] = bug[stacktrace_name]
     IRProgressBar.execute_iteration_for_cursor(
         text_collection.find(), iter_func, 'Caching Text Data')
     text_collection.close()
コード例 #30
0
 def cache_all_data(cls):
     """Load all document count into memory.
     
     """
     from ir_log import IRProgressBar
     from ir_config import IRConfig
     from ir_mongodb_helper import IRCollection
     # config
     summary_name = IRConfig.get_instance().get('bug_summary_name')
     description_name = IRConfig.get_instance().get('bug_description_name')
     term_name = IRConfig.get_instance().get('bug_term_name')
     
     cls.__is_cache = True
     documentcount_collection = IRCollection(
         'bug_db_name', 'bug_documentcount_collection_name', 'r')
     def iter_document_count(term):
         summary = term[summary_name] if summary_name in term else 0
         description = term[description_name] if description_name in term else 0
         cls.__cache_document_count[term[term_name]] = \
                 (summary, description)
     IRProgressBar.execute_iteration_for_cursor(
         documentcount_collection.find({}), iter_document_count,
         "Caching Document Count")
コード例 #31
0
    def get_group_of_bug(cls, bug_id):
        """Get the group id of a bug.

        Args:
            bug_id: int

        Returns:
            int, group_id
        """

        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection

        duplicate_collection = IRCollection(
            'bug_db_name', 'bug_duplicate_collection_name', 'r')
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        group_name = IRConfig.get_instance().get('bug_group_name')

        result = duplicate_collection.find({bug_id_name : bug_id})
        if result.count() == 0:
            return None
        else:
            return result[0][group_name]
コード例 #32
0
    def get_group_of_bug(cls, bug_id):
        """Get the group id of a bug.

        Args:
            bug_id: int

        Returns:
            int, group_id
        """

        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection

        duplicate_collection = IRCollection('bug_db_name',
                                            'bug_duplicate_collection_name',
                                            'r')
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        group_name = IRConfig.get_instance().get('bug_group_name')

        result = duplicate_collection.find({bug_id_name: bug_id})
        if result.count() == 0:
            return None
        else:
            return result[0][group_name]
コード例 #33
0
ファイル: get_product.py プロジェクト: LeonXJ/Intereport
if __name__ == '__main__':
    import sys
  
    from ir_config import IRConfig
    from ir_text import IRText
    from ir_mongodb_helper import IRCollection

    config = IRConfig.get_instance()
    config.load(sys.argv[1])
    product_name = config.get('bug_product_name')
    
    products = dict()

    basic = IRCollection('bug_db_name', 'bug_basic_collection_name', 'r')
    cursor = basic.find(None)
    for bug in cursor:
        product = bug[product_name]
        if product not in products:
            products[product] = 0
        products[product] += 1

    product_list = products.items()
    product_list.sort(cmp=lambda x,y:cmp(x[1],y[1]), reverse=True)

    prefix = '' if sys.argv.__len__() < 3 else sys.argv[2]
    surfix = '' if sys.argv.__len__() < 4 else sys.argv[3]
    threshold = 100 if sys.argv.__len__() <5 else int(sys.argv[4])
    for product in product_list:
        if product[1] < threshold:
            break