def __init__(self): self.count: int = 0 self.total_size: int = 0 self._size_hist = statistics.LengthDict() self.size_min = sys.maxsize self.size_max = -sys.maxsize - 1 self._date_hist: typing.Dict[typing.Tuple[int, int], int] = collections.defaultdict(int) self.extensions: typing.Set[str] = set()
def test_length_dict_histogram_power_2_skip_a_value(): ld = statistics.LengthDict() ld.add(2) ld.add(8) hist = ld.histogram_power_of_2() assert ld.zero_count == 0 expected = [ '>=2**1 [ 1] | ++++++++++++++++++++++++++++++++++++++++', '>=2**2 [ 0] | ', '>=2**3 [ 1] | ++++++++++++++++++++++++++++++++++++++++', ] assert hist == expected
def test_length_dict_histogram_power_2(): ld = statistics.LengthDict() for i in range(64): ld.add(i) hist = ld.histogram_power_of_2() assert ld.zero_count == 1 expected = [ '>=2**0 [ 1] | +', '>=2**1 [ 2] | +++', '>=2**2 [ 4] | +++++', '>=2**3 [ 8] | ++++++++++', '>=2**4 [ 16] | ++++++++++++++++++++', '>=2**5 [ 32] | ++++++++++++++++++++++++++++++++++++++++', ] assert hist == expected
def test_length_dict(): ld = statistics.LengthDict() for i in range(8): ld.add(i) assert ld.count == 8
def test_length_dict_reduced_power_2(): ld = statistics.LengthDict() for i in range(64): ld.add(i) assert ld.reduced_power_2() == {0: 1, 1: 2, 2: 4, 3: 8, 4: 16, 5: 32}
def test_length_dict_add_raises(): ld = statistics.LengthDict() with pytest.raises(ValueError) as err: ld.add(-1) assert err.value.args[0] == 'Length must be >= 0 not -1'
def scan_RP66V1_file_visible_records(fobj: typing.BinaryIO, fout: typing.TextIO, **kwargs) -> None: """Scans the file reporting Visible Records, optionally Logical Record Segments as well.""" verbose = kwargs.get('verbose', 0) if not verbose: fout.write( colorama.Fore.YELLOW + 'Use -v to see individual records, RLE of LRSH positions and length distribution of LRS.\n' ) with _output_section_header_trailer('RP66V1 Visible and LRSH Records', '*', os=fout): lrsh_dump = kwargs['lrsh_dump'] with File.FileRead(fobj) as rp66_file: vr_position = lr_position = 0 count_vr = 0 count_lrsh = 0 count_lrsh_first = 0 count_lrsh_type = { 'EFLR': collections.Counter(), 'IFLR': collections.Counter(), } count_lrsh_length = statistics.LengthDict() lrsh_first_last = { (False, False): 0, (False, True): 0, (True, False): 0, (True, True): 0, } rle_visible_record_positions = Rle.RLE() rle_lrsh_positions = Rle.RLE() for visible_record in rp66_file.iter_visible_records(): vr_stride = visible_record.position - vr_position rle_visible_record_positions.add(visible_record.position) if verbose: fout.write(f'{visible_record} Stride: 0x{vr_stride:08x} {vr_stride:6,d}\n') if lrsh_dump: for lrsh in rp66_file.iter_LRSHs_for_visible_record(visible_record): count_lrsh_length.add(lrsh.length) if lrsh.attributes.is_first: rle_lrsh_positions.add(lrsh.position) count_lrsh_first += 1 if lrsh.attributes.is_eflr: count_lrsh_type['EFLR'].update([lrsh.record_type]) else: count_lrsh_type['IFLR'].update([lrsh.record_type]) output = colorama.Fore.GREEN + f' {lrsh}' elif lrsh.attributes.is_last: output = colorama.Fore.RED + f' --{lrsh}' else: output = colorama.Fore.YELLOW + f' ..{lrsh}' if verbose: lr_stride = lrsh.position - lr_position fout.write(f' {output} Stride: 0x{lr_stride:08x} {lr_stride:6,d}\n') lr_position = lrsh.position lrsh_first_last[(lrsh.attributes.is_first, lrsh.attributes.is_last)] += 1 count_lrsh += 1 vr_position = visible_record.position count_vr += 1 with _output_section_header_trailer('Summary of Visible Records', '=', os=fout): fout.write(f'Visible records: {count_vr:,d}\n') with _output_section_header_trailer('RLE Visible Record Position', '-', os=fout): _write_position_rle(rle_visible_record_positions, fout) if lrsh_dump: with _output_section_header_trailer('Summary of LRSH', '=', os=fout): fout.write(f'LRSH: total={count_lrsh:,d} is_first={count_lrsh_first}\n') with _output_section_header_trailer('Summary of Logical Record Types', '-', os=fout): fout.write(f'LRSH: record types and counts (first segments only):\n') for flr_type in ('EFLR', 'IFLR'): fout.write(f'Count of Logical Record types for "{flr_type}" [{len(count_lrsh_type[flr_type])}]:\n') for record_type in sorted(count_lrsh_type[flr_type].keys()): fout.write(f'{record_type:3d} : {count_lrsh_type[flr_type][record_type]:8,d}\n') with _output_section_header_trailer('Summary of LRSH Lengths', '-', os=fout): fout.write( f'LRSH: record lengths and counts (all segments)' f' [{len(count_lrsh_length)}]' ) if len(count_lrsh_length): fout.write(f' range: {min(count_lrsh_length.keys())}...{max(count_lrsh_length.keys())}') fout.write(f'\n') fout.write('\n'.join(count_lrsh_length.histogram_power_of_2())) fout.write(f'\n') with _output_section_header_trailer('Summary of LRSH First/last', '-', os=fout): fout.write(f'{"(First, Last)":16} : {"Count":8}\n') for k in sorted(lrsh_first_last.keys()): fout.write(f'{str(k):16} : {lrsh_first_last[k]:8d}\n') if verbose: with _output_section_header_trailer('RLE LRSH Position', '-', os=fout): _write_position_rle(rle_lrsh_positions, fout) for length in sorted(count_lrsh_length.keys()): fout.write(f'{length:3d} : {count_lrsh_length[length]:8,d}\n')