def setUp(self): super().setUp() self.detok = detokenize.Detokenizer( tokens.Database([ tokens.TokenizedStringEntry(0, '$AAAAAA=='), # token for 0 tokens.TokenizedStringEntry(1, '$AgAAAA=='), # token for 2 tokens.TokenizedStringEntry(2, '$AwAAAA=='), # token for 3 tokens.TokenizedStringEntry(3, '$AgAAAA=='), # token for 2 ]))
def _read_tokenized_entries( data: bytes, domain: Pattern[str]) -> Iterator[tokens.TokenizedStringEntry]: index = 0 while index + _ENTRY.size <= len(data): magic, token, domain_len, string_len = _ENTRY.unpack_from(data, index) if magic != _TOKENIZED_ENTRY_MAGIC: raise Error( f'Expected magic number 0x{_TOKENIZED_ENTRY_MAGIC:08x}, ' f'found 0x{magic:08x}') start = index + _ENTRY.size index = start + domain_len + string_len # Create the entries, trimming null terminators. entry = tokens.TokenizedStringEntry( token, data[start + domain_len:index - 1].decode(errors=_ERROR_HANDLER), data[start:start + domain_len - 1].decode(errors=_ERROR_HANDLER), ) if data[start + domain_len - 1] != 0: raise Error( f'Domain {entry.domain} for {entry.string} not null terminated' ) if data[index - 1] != 0: raise Error(f'String {entry.string} is not null terminated') if domain.fullmatch(entry.domain): yield entry
def test_simple(self): detok = detokenize.Detokenizer( tokens.Database([ tokens.TokenizedStringEntry(0xcdab, '%02d %s %c%%', dt.datetime.now()) ])) self.assertEqual(str(detok.detokenize(b'\xab\xcd\0\0\x02\x03Two\x66')), '01 Two 3%')
def setUp(self): super().setUp() db = database.load_token_database( io.BytesIO(ELF_WITH_TOKENIZER_SECTIONS)) db.add( tokens.TokenizedStringEntry(tokens.default_hash(s), s) for s in [self.RECURSION_STRING, self.RECURSION_STRING_2]) self.detok = detokenize.Detokenizer(db)
def setUp(self): self.db = tokens.Database([ tokens.TokenizedStringEntry(1, 'Luke'), tokens.TokenizedStringEntry(2, 'Leia'), tokens.TokenizedStringEntry(2, 'Darth Vader'), tokens.TokenizedStringEntry(2, 'Emperor Palpatine'), tokens.TokenizedStringEntry(3, 'Han'), tokens.TokenizedStringEntry(4, 'Chewbacca'), tokens.TokenizedStringEntry(5, 'Darth Maul'), tokens.TokenizedStringEntry(6, 'Han Solo'), ])
def test_update_csv_file(self): self._path.write_text(CSV_DATABASE) db = tokens.DatabaseFile(self._path) self.assertEqual(str(db), CSV_DATABASE) db.add([tokens.TokenizedStringEntry(0xffffffff, 'New entry!')]) db.write_to_file() self.assertEqual(self._path.read_text(), CSV_DATABASE + 'ffffffff, ,"New entry!"\n')
def test_unparsed_data(self): detok = detokenize.Detokenizer( tokens.Database([ tokens.TokenizedStringEntry(1, 'no args', dt.datetime(100, 1, 1)), ])) result = detok.detokenize(b'\x01\0\0\0o_o') self.assertFalse(result.ok()) self.assertEqual('no args', str(result)) self.assertIn('o_o', repr(result)) self.assertIn('decoding failed', result.error_message())
def test_detokenize_missing_data_with_errors_is_unsuccessful(self): detok = detokenize.Detokenizer(tokens.Database( [tokens.TokenizedStringEntry(2, '%s', dt.datetime(1, 1, 1))]), show_errors=True) result = detok.detokenize(b'\x02\0\0\0') string, args, remaining = result.failures[0] self.assertIn('%s MISSING', string) self.assertEqual(len(args), 1) self.assertEqual(b'', remaining) self.assertEqual(len(result.failures), 1) self.assertIn('%s MISSING', str(result))
def test_merge_multiple(self): db = tokens.Database.merged( tokens.Database( [tokens.TokenizedStringEntry(1, 'one', datetime.datetime.max)]), tokens.Database( [tokens.TokenizedStringEntry(2, 'two', datetime.datetime.min)]), tokens.Database( [tokens.TokenizedStringEntry(1, 'one', datetime.datetime.min)])) self.assertEqual({str(e) for e in db.entries()}, {'one', 'two'}) db.merge( tokens.Database([ tokens.TokenizedStringEntry(4, 'four', datetime.datetime.max) ]), tokens.Database( [tokens.TokenizedStringEntry(2, 'two', datetime.datetime.max)]), tokens.Database([ tokens.TokenizedStringEntry(3, 'three', datetime.datetime.min) ])) self.assertEqual({str(e) for e in db.entries()}, {'one', 'two', 'three', 'four'})
def test_detokenize_extra_data_is_unsuccessful(self): detok = detokenize.Detokenizer( tokens.Database([ tokens.TokenizedStringEntry(1, 'no args', dt.datetime(1, 1, 1)) ])) result = detok.detokenize(b'\x01\0\0\0\x04args') self.assertEqual(len(result.failures), 1) string, args, remaining = result.failures[0] self.assertEqual('no args', string) self.assertFalse(args) self.assertEqual(b'\x04args', remaining) self.assertEqual('no args', string) self.assertEqual('no args', str(result))
def test_detokenize_missing_data_is_unsuccessful(self): detok = detokenize.Detokenizer( tokens.Database([ tokens.TokenizedStringEntry(2, '%s', date_removed=dt.datetime(1, 1, 1)) ])) result = detok.detokenize(b'\x02\0\0\0') string, args, remaining = result.failures[0] self.assertEqual('%s', string) self.assertEqual(len(args), 1) self.assertEqual(b'', remaining) self.assertEqual(len(result.failures), 1) self.assertEqual('%s', str(result))
def test_merge_multiple_datbases_in_one_call(self): """Tests the merge and merged methods with multiple databases.""" db = tokens.Database.merged( tokens.Database([ tokens.TokenizedStringEntry(1, 'one', date_removed=datetime.datetime.max) ]), tokens.Database([ tokens.TokenizedStringEntry(2, 'two', date_removed=datetime.datetime.min) ]), tokens.Database([ tokens.TokenizedStringEntry(1, 'one', date_removed=datetime.datetime.min) ])) self.assertEqual({str(e) for e in db.entries()}, {'one', 'two'}) db.merge( tokens.Database([ tokens.TokenizedStringEntry(4, 'four', date_removed=datetime.datetime.max) ]), tokens.Database([ tokens.TokenizedStringEntry(2, 'two', date_removed=datetime.datetime.max) ]), tokens.Database([ tokens.TokenizedStringEntry(3, 'three', date_removed=datetime.datetime.min) ])) self.assertEqual({str(e) for e in db.entries()}, {'one', 'two', 'three', 'four'})
def _entries(*strings: str) -> Iterator[tokens.TokenizedStringEntry]: for string in strings: yield tokens.TokenizedStringEntry(default_hash(string), string)
def test_merge(self): """Tests the tokens.Database merge method.""" db = tokens.Database() # Test basic merging into an empty database. db.merge( tokens.Database([ tokens.TokenizedStringEntry( 1, 'one', date_removed=datetime.datetime.min), tokens.TokenizedStringEntry( 2, 'two', date_removed=datetime.datetime.min), ])) self.assertEqual({str(e) for e in db.entries()}, {'one', 'two'}) self.assertEqual(db.token_to_entries[1][0].date_removed, datetime.datetime.min) self.assertEqual(db.token_to_entries[2][0].date_removed, datetime.datetime.min) # Test merging in an entry with a removal date. db.merge( tokens.Database([ tokens.TokenizedStringEntry(3, 'three'), tokens.TokenizedStringEntry( 4, 'four', date_removed=datetime.datetime.min), ])) self.assertEqual({str(e) for e in db.entries()}, {'one', 'two', 'three', 'four'}) self.assertIsNone(db.token_to_entries[3][0].date_removed) self.assertEqual(db.token_to_entries[4][0].date_removed, datetime.datetime.min) # Test merging in one entry. db.merge(tokens.Database([ tokens.TokenizedStringEntry(5, 'five'), ])) self.assertEqual({str(e) for e in db.entries()}, {'one', 'two', 'three', 'four', 'five'}) self.assertEqual(db.token_to_entries[4][0].date_removed, datetime.datetime.min) self.assertIsNone(db.token_to_entries[5][0].date_removed) # Merge in repeated entries different removal dates. db.merge( tokens.Database([ tokens.TokenizedStringEntry( 4, 'four', date_removed=datetime.datetime.max), tokens.TokenizedStringEntry( 5, 'five', date_removed=datetime.datetime.max), ])) self.assertEqual(len(db.entries()), 5) self.assertEqual({str(e) for e in db.entries()}, {'one', 'two', 'three', 'four', 'five'}) self.assertEqual(db.token_to_entries[4][0].date_removed, datetime.datetime.max) self.assertIsNone(db.token_to_entries[5][0].date_removed) # Merge in the same repeated entries now without removal dates. db.merge( tokens.Database([ tokens.TokenizedStringEntry(4, 'four'), tokens.TokenizedStringEntry(5, 'five') ])) self.assertEqual(len(db.entries()), 5) self.assertEqual({str(e) for e in db.entries()}, {'one', 'two', 'three', 'four', 'five'}) self.assertIsNone(db.token_to_entries[4][0].date_removed) self.assertIsNone(db.token_to_entries[5][0].date_removed) # Merge in an empty databsse. db.merge(tokens.Database([])) self.assertEqual({str(e) for e in db.entries()}, {'one', 'two', 'three', 'four', 'five'})
def setUp(self): super().setUp() token = 0xbaad # Database with several conflicting tokens. self.detok = detokenize.Detokenizer(tokens.Database([ tokens.TokenizedStringEntry( token, 'REMOVED', date_removed=dt.datetime(9, 1, 1)), tokens.TokenizedStringEntry(token, 'newer'), tokens.TokenizedStringEntry( token, 'A: %d', date_removed=dt.datetime(30, 5, 9)), tokens.TokenizedStringEntry( token, 'B: %c', date_removed=dt.datetime(30, 5, 10)), tokens.TokenizedStringEntry(token, 'C: %s'), tokens.TokenizedStringEntry(token, '%d%u'), tokens.TokenizedStringEntry(token, '%s%u %d'), tokens.TokenizedStringEntry(1, '%s'), tokens.TokenizedStringEntry(1, '%d'), tokens.TokenizedStringEntry(2, 'Three %s %s %s'), tokens.TokenizedStringEntry(2, 'Five %d %d %d %d %s'), ])) # yapf: disable