Example #1
0
 def __init__(self):
     self.count: int = 0
     self.total_size: int = 0
     self._size_hist = statistics.LengthDict()
     self.size_min = sys.maxsize
     self.size_max = -sys.maxsize - 1
     self._date_hist: typing.Dict[typing.Tuple[int, int], int] = collections.defaultdict(int)
     self.extensions: typing.Set[str] = set()
Example #2
0
def test_length_dict_histogram_power_2_skip_a_value():
    ld = statistics.LengthDict()
    ld.add(2)
    ld.add(8)
    hist = ld.histogram_power_of_2()
    assert ld.zero_count == 0
    expected = [
        '>=2**1  [     1] | ++++++++++++++++++++++++++++++++++++++++',
        '>=2**2  [     0] | ',
        '>=2**3  [     1] | ++++++++++++++++++++++++++++++++++++++++',
    ]
    assert hist == expected
Example #3
0
def test_length_dict_histogram_power_2():
    ld = statistics.LengthDict()
    for i in range(64):
        ld.add(i)
    hist = ld.histogram_power_of_2()
    assert ld.zero_count == 1
    expected = [
        '>=2**0  [     1] | +',
        '>=2**1  [     2] | +++',
        '>=2**2  [     4] | +++++',
        '>=2**3  [     8] | ++++++++++',
        '>=2**4  [    16] | ++++++++++++++++++++',
        '>=2**5  [    32] | ++++++++++++++++++++++++++++++++++++++++',
    ]
    assert hist == expected
Example #4
0
def test_length_dict():
    ld = statistics.LengthDict()
    for i in range(8):
        ld.add(i)
    assert ld.count == 8
Example #5
0
def test_length_dict_reduced_power_2():
    ld = statistics.LengthDict()
    for i in range(64):
        ld.add(i)
    assert ld.reduced_power_2() == {0: 1, 1: 2, 2: 4, 3: 8, 4: 16, 5: 32}
Example #6
0
def test_length_dict_add_raises():
    ld = statistics.LengthDict()
    with pytest.raises(ValueError) as err:
        ld.add(-1)
    assert err.value.args[0] == 'Length must be >= 0 not -1'
Example #7
0
def scan_RP66V1_file_visible_records(fobj: typing.BinaryIO, fout: typing.TextIO, **kwargs) -> None:
    """Scans the file reporting Visible Records, optionally Logical Record Segments as well."""
    verbose = kwargs.get('verbose', 0)
    if not verbose:
        fout.write(
            colorama.Fore.YELLOW
            + 'Use -v to see individual records, RLE of LRSH positions and length distribution of LRS.\n'
        )
    with _output_section_header_trailer('RP66V1 Visible and LRSH Records', '*', os=fout):
        lrsh_dump = kwargs['lrsh_dump']
        with File.FileRead(fobj) as rp66_file:
            vr_position = lr_position = 0
            count_vr = 0
            count_lrsh = 0
            count_lrsh_first = 0
            count_lrsh_type = {
                'EFLR': collections.Counter(),
                'IFLR': collections.Counter(),
            }
            count_lrsh_length = statistics.LengthDict()
            lrsh_first_last = {
                (False, False): 0,
                (False, True): 0,
                (True, False): 0,
                (True, True): 0,
            }
            rle_visible_record_positions = Rle.RLE()
            rle_lrsh_positions = Rle.RLE()
            for visible_record in rp66_file.iter_visible_records():
                vr_stride = visible_record.position - vr_position
                rle_visible_record_positions.add(visible_record.position)
                if verbose:
                    fout.write(f'{visible_record} Stride: 0x{vr_stride:08x} {vr_stride:6,d}\n')
                if lrsh_dump:
                    for lrsh in rp66_file.iter_LRSHs_for_visible_record(visible_record):
                        count_lrsh_length.add(lrsh.length)
                        if lrsh.attributes.is_first:
                            rle_lrsh_positions.add(lrsh.position)
                            count_lrsh_first += 1
                            if lrsh.attributes.is_eflr:
                                count_lrsh_type['EFLR'].update([lrsh.record_type])
                            else:
                                count_lrsh_type['IFLR'].update([lrsh.record_type])
                            output = colorama.Fore.GREEN + f' {lrsh}'
                        elif lrsh.attributes.is_last:
                            output = colorama.Fore.RED + f'  --{lrsh}'
                        else:
                            output = colorama.Fore.YELLOW + f'  ..{lrsh}'
                        if verbose:
                            lr_stride = lrsh.position - lr_position
                            fout.write(f'  {output} Stride: 0x{lr_stride:08x} {lr_stride:6,d}\n')
                        lr_position = lrsh.position
                        lrsh_first_last[(lrsh.attributes.is_first, lrsh.attributes.is_last)] += 1
                        count_lrsh += 1
                vr_position = visible_record.position
                count_vr += 1
            with _output_section_header_trailer('Summary of Visible Records', '=', os=fout):
                fout.write(f'Visible records: {count_vr:,d}\n')
                with _output_section_header_trailer('RLE Visible Record Position', '-', os=fout):
                    _write_position_rle(rle_visible_record_positions, fout)
            if lrsh_dump:
                with _output_section_header_trailer('Summary of LRSH', '=', os=fout):
                    fout.write(f'LRSH: total={count_lrsh:,d} is_first={count_lrsh_first}\n')
                    with _output_section_header_trailer('Summary of Logical Record Types', '-', os=fout):
                        fout.write(f'LRSH: record types and counts (first segments only):\n')
                        for flr_type in ('EFLR', 'IFLR'):
                            fout.write(f'Count of Logical Record types for "{flr_type}" [{len(count_lrsh_type[flr_type])}]:\n')
                            for record_type in sorted(count_lrsh_type[flr_type].keys()):
                                fout.write(f'{record_type:3d} : {count_lrsh_type[flr_type][record_type]:8,d}\n')
                    with _output_section_header_trailer('Summary of LRSH Lengths', '-', os=fout):
                        fout.write(
                            f'LRSH: record lengths and counts (all segments)'
                            f' [{len(count_lrsh_length)}]'
                        )
                        if len(count_lrsh_length):
                            fout.write(f' range: {min(count_lrsh_length.keys())}...{max(count_lrsh_length.keys())}')
                        fout.write(f'\n')
                        fout.write('\n'.join(count_lrsh_length.histogram_power_of_2()))
                        fout.write(f'\n')
                    with _output_section_header_trailer('Summary of LRSH First/last', '-', os=fout):
                        fout.write(f'{"(First, Last)":16} : {"Count":8}\n')
                        for k in sorted(lrsh_first_last.keys()):
                            fout.write(f'{str(k):16} : {lrsh_first_last[k]:8d}\n')
                    if verbose:
                        with _output_section_header_trailer('RLE LRSH Position', '-', os=fout):
                            _write_position_rle(rle_lrsh_positions, fout)
                        for length in sorted(count_lrsh_length.keys()):
                            fout.write(f'{length:3d} : {count_lrsh_length[length]:8,d}\n')