def _print_index_stats(self):
        """
        Print internal Index structures stats. Used for debugging and testing.
        """
        try:
            from pympler.asizeof import asizeof as size_of
        except ImportError:
            print('Index statistics will be approximate: `pip install pympler` for correct structure sizes')
            from sys import getsizeof as size_of

        internal_structures = [
            'postings_by_rid    ',
            'dictionary         ',
            'tokens_by_tid      ',
            'frequencies_by_tid ',
            'rules_by_rid       ',
            'frequencies_by_rid ',
        ]

        print('Index statistics:')
        total_size = 0
        for struct_name in internal_structures:
            struct = getattr(self, struct_name.strip())
            print('  ', struct_name, ':', 'length    :', len(struct))
            siz = size_of(struct)
            total_size += siz
            print('  ', struct_name, ':', 'size in MB:', round(siz / (1024 * 1024.0), 2))
        print('    TOTAL internals in MB:', round(total_size / (1024 * 1024.0), 2))
        print('    TOTAL real size in MB:', round(size_of(self) / (1024 * 1024.0), 2))
Exemple #2
0
    def _print_index_stats(self):
        """
        Print internal Index structures stats. Used for debugging and testing.
        """
        try:
            from pympler.asizeof import asizeof as size_of
        except ImportError:
            print(
                'Index statistics will be approximate: `pip install pympler` for correct structure sizes'
            )
            from sys import getsizeof as size_of

        fields = [
            'dictionary',
            'tokens_by_tid',
            'rid_by_hash',
            'rules_by_rid',
            'tids_by_rid',
            'tids_sets_by_rid',
            'tids_msets_by_rid',
            'regular_rids',
            'negative_rids',
            'small_rids',
            'false_positive_rids',
            'false_positive_rid_by_hash',
        ]

        plen = max(map(len, fields)) + 1
        internal_structures = [s + (' ' * (plen - len(s))) for s in fields]

        print('Index statistics:')
        total_size = 0
        for struct_name in internal_structures:
            struct = getattr(self, struct_name.strip())
            try:
                print('  ', struct_name, ':', 'length    :', len(struct))
            except:
                print('  ', struct_name, ':', 'repr      :', repr(struct))
            siz = size_of(struct)
            total_size += siz
            print('  ', struct_name, ':', 'size in MB:',
                  round(siz / (1024 * 1024), 2))
        print('    TOTAL internals in MB:', round(total_size / (1024 * 1024),
                                                  2))
        print('    TOTAL real size in MB:',
              round(size_of(self) / (1024 * 1024), 2))
Exemple #3
0
    def _print_index_stats(self):
        """
        Print internal Index structures stats. Used for debugging and testing.
        """
        try:
            from pympler.asizeof import asizeof as size_of
        except ImportError:
            print('Index statistics will be approximate: `pip install pympler` for correct structure sizes')
            from sys import getsizeof as size_of

        fields = [
        'dictionary',
        'tokens_by_tid',
        'rid_by_hash',
        'rules_by_rid',
        'tids_by_rid',

        'tids_sets_by_rid',
        'tids_msets_by_rid',

        'regular_rids',
        'negative_rids',
        'small_rids',
        'false_positive_rids',
        ]

        plen = max(map(len, fields)) + 1
        internal_structures = [s + (' ' * (plen - len(s))) for s in fields]

        print('Index statistics:')
        total_size = 0
        for struct_name in internal_structures:
            struct = getattr(self, struct_name.strip())
            try:
                print('  ', struct_name, ':', 'length    :', len(struct))
            except:
                print('  ', struct_name, ':', 'repr      :', repr(struct))
            siz = size_of(struct)
            total_size += siz
            print('  ', struct_name, ':', 'size in MB:', round(siz / (1024 * 1024), 2))
        print('    TOTAL internals in MB:', round(total_size / (1024 * 1024), 2))
        print('    TOTAL real size in MB:', round(size_of(self) / (1024 * 1024), 2))