def _trydecode(self, data, codec: Optional[str], width: int, linecount: int) -> str: remaining = linecount result = [] if codec is None: from refinery.units.encoding.esc import esc decoded = data[:abs(width * linecount)] decoded = str(decoded | -esc(bare=True)) limit = min(abs(linecount) * width, len(decoded)) for k in range(0, limit, width): result.append(decoded[k:k + width]) return result try: import unicodedata unprintable = {'Cc', 'Cf', 'Co', 'Cs'} self.log_info(F'trying to decode as {codec}.') decoded = codecs.decode(data, codec, errors='strict') count = sum( unicodedata.category(c) not in unprintable for c in decoded) ratio = count / len(decoded) except UnicodeDecodeError as DE: self.log_info('decoding failed:', DE.reason) return None except ValueError as V: self.log_info('decoding failed:', V) return None if ratio < 0.8: self.log_info( F'data contains {ratio * 100:.2f}% printable characters, this is too low.' ) return None for paragraph in decoded.splitlines(False): if not remaining: break wrapped = [ line for chunk in textwrap.wrap( paragraph, width, break_long_words=True, break_on_hyphens=False, drop_whitespace=False, expand_tabs=True, max_lines=abs(remaining + 1), replace_whitespace=False, tabsize=4, ) for line in chunk.splitlines(keepends=False) ] remaining -= len(wrapped) result.extend(wrapped) return result[:abs(linecount)]
def test_quoted_string_01(self): unit = esc(quoted=True) self.assertEqual(unit.process(RB'"r\x65\x66\x69\x6ee\x72\x79"'), B'refinery')
def test_quoted_string_04(self): unit = esc(quoted=True) with self.assertRaises(ValueError): unit(RB'"r\x65\x66\x69\x6ee\x72\x79')
def test_quoted_string_03(self): unit = esc(quoted=True, hex=False) result = unit.reverse(B'binary\n\a\t.."refinery"!') self.assertEqual(result, BR'"binary\n\a\t..\"refinery\"!"')
def test_quoted_string_02(self): unit = esc(quoted=True, hex=True) result = unit.reverse(RB'refinery') self.assertEqual(result, BR'"\x72\x65\x66\x69\x6e\x65\x72\x79"')
def REG_SZ(data: str) -> bytes: return data.encode(self.codec) | esc(quoted=True) | bytes