Ejemplo n.º 1
0
 def setUp(self):
     super().setUp()
     self.detok = detokenize.Detokenizer(
         tokens.Database([
             tokens.TokenizedStringEntry(0, '$AAAAAA=='),  # token for 0
             tokens.TokenizedStringEntry(1, '$AgAAAA=='),  # token for 2
             tokens.TokenizedStringEntry(2, '$AwAAAA=='),  # token for 3
             tokens.TokenizedStringEntry(3, '$AgAAAA=='),  # token for 2
         ]))
Ejemplo n.º 2
0
def _read_tokenized_entries(
        data: bytes,
        domain: Pattern[str]) -> Iterator[tokens.TokenizedStringEntry]:
    index = 0

    while index + _ENTRY.size <= len(data):
        magic, token, domain_len, string_len = _ENTRY.unpack_from(data, index)

        if magic != _TOKENIZED_ENTRY_MAGIC:
            raise Error(
                f'Expected magic number 0x{_TOKENIZED_ENTRY_MAGIC:08x}, '
                f'found 0x{magic:08x}')

        start = index + _ENTRY.size
        index = start + domain_len + string_len

        # Create the entries, trimming null terminators.
        entry = tokens.TokenizedStringEntry(
            token,
            data[start + domain_len:index - 1].decode(errors=_ERROR_HANDLER),
            data[start:start + domain_len - 1].decode(errors=_ERROR_HANDLER),
        )

        if data[start + domain_len - 1] != 0:
            raise Error(
                f'Domain {entry.domain} for {entry.string} not null terminated'
            )

        if data[index - 1] != 0:
            raise Error(f'String {entry.string} is not null terminated')

        if domain.fullmatch(entry.domain):
            yield entry
Ejemplo n.º 3
0
 def test_simple(self):
     detok = detokenize.Detokenizer(
         tokens.Database([
             tokens.TokenizedStringEntry(0xcdab, '%02d %s %c%%',
                                         dt.datetime.now())
         ]))
     self.assertEqual(str(detok.detokenize(b'\xab\xcd\0\0\x02\x03Two\x66')),
                      '01 Two 3%')
Ejemplo n.º 4
0
 def setUp(self):
     super().setUp()
     db = database.load_token_database(
         io.BytesIO(ELF_WITH_TOKENIZER_SECTIONS))
     db.add(
         tokens.TokenizedStringEntry(tokens.default_hash(s), s)
         for s in [self.RECURSION_STRING, self.RECURSION_STRING_2])
     self.detok = detokenize.Detokenizer(db)
Ejemplo n.º 5
0
 def setUp(self):
     self.db = tokens.Database([
         tokens.TokenizedStringEntry(1, 'Luke'),
         tokens.TokenizedStringEntry(2, 'Leia'),
         tokens.TokenizedStringEntry(2, 'Darth Vader'),
         tokens.TokenizedStringEntry(2, 'Emperor Palpatine'),
         tokens.TokenizedStringEntry(3, 'Han'),
         tokens.TokenizedStringEntry(4, 'Chewbacca'),
         tokens.TokenizedStringEntry(5, 'Darth Maul'),
         tokens.TokenizedStringEntry(6, 'Han Solo'),
     ])
Ejemplo n.º 6
0
    def test_update_csv_file(self):
        self._path.write_text(CSV_DATABASE)
        db = tokens.DatabaseFile(self._path)
        self.assertEqual(str(db), CSV_DATABASE)

        db.add([tokens.TokenizedStringEntry(0xffffffff, 'New entry!')])

        db.write_to_file()

        self.assertEqual(self._path.read_text(),
                         CSV_DATABASE + 'ffffffff,          ,"New entry!"\n')
Ejemplo n.º 7
0
 def test_unparsed_data(self):
     detok = detokenize.Detokenizer(
         tokens.Database([
             tokens.TokenizedStringEntry(1, 'no args',
                                         dt.datetime(100, 1, 1)),
         ]))
     result = detok.detokenize(b'\x01\0\0\0o_o')
     self.assertFalse(result.ok())
     self.assertEqual('no args', str(result))
     self.assertIn('o_o', repr(result))
     self.assertIn('decoding failed', result.error_message())
Ejemplo n.º 8
0
    def test_detokenize_missing_data_with_errors_is_unsuccessful(self):
        detok = detokenize.Detokenizer(tokens.Database(
            [tokens.TokenizedStringEntry(2, '%s', dt.datetime(1, 1, 1))]),
                                       show_errors=True)

        result = detok.detokenize(b'\x02\0\0\0')
        string, args, remaining = result.failures[0]
        self.assertIn('%s MISSING', string)
        self.assertEqual(len(args), 1)
        self.assertEqual(b'', remaining)
        self.assertEqual(len(result.failures), 1)
        self.assertIn('%s MISSING', str(result))
Ejemplo n.º 9
0
    def test_merge_multiple(self):
        db = tokens.Database.merged(
            tokens.Database(
                [tokens.TokenizedStringEntry(1, 'one',
                                             datetime.datetime.max)]),
            tokens.Database(
                [tokens.TokenizedStringEntry(2, 'two',
                                             datetime.datetime.min)]),
            tokens.Database(
                [tokens.TokenizedStringEntry(1, 'one',
                                             datetime.datetime.min)]))
        self.assertEqual({str(e) for e in db.entries()}, {'one', 'two'})

        db.merge(
            tokens.Database([
                tokens.TokenizedStringEntry(4, 'four', datetime.datetime.max)
            ]),
            tokens.Database(
                [tokens.TokenizedStringEntry(2, 'two',
                                             datetime.datetime.max)]),
            tokens.Database([
                tokens.TokenizedStringEntry(3, 'three', datetime.datetime.min)
            ]))
        self.assertEqual({str(e)
                          for e in db.entries()},
                         {'one', 'two', 'three', 'four'})
Ejemplo n.º 10
0
    def test_detokenize_extra_data_is_unsuccessful(self):
        detok = detokenize.Detokenizer(
            tokens.Database([
                tokens.TokenizedStringEntry(1, 'no args', dt.datetime(1, 1, 1))
            ]))

        result = detok.detokenize(b'\x01\0\0\0\x04args')
        self.assertEqual(len(result.failures), 1)
        string, args, remaining = result.failures[0]
        self.assertEqual('no args', string)
        self.assertFalse(args)
        self.assertEqual(b'\x04args', remaining)
        self.assertEqual('no args', string)
        self.assertEqual('no args', str(result))
Ejemplo n.º 11
0
    def test_detokenize_missing_data_is_unsuccessful(self):
        detok = detokenize.Detokenizer(
            tokens.Database([
                tokens.TokenizedStringEntry(2,
                                            '%s',
                                            date_removed=dt.datetime(1, 1, 1))
            ]))

        result = detok.detokenize(b'\x02\0\0\0')
        string, args, remaining = result.failures[0]
        self.assertEqual('%s', string)
        self.assertEqual(len(args), 1)
        self.assertEqual(b'', remaining)
        self.assertEqual(len(result.failures), 1)
        self.assertEqual('%s', str(result))
Ejemplo n.º 12
0
    def test_merge_multiple_datbases_in_one_call(self):
        """Tests the merge and merged methods with multiple databases."""
        db = tokens.Database.merged(
            tokens.Database([
                tokens.TokenizedStringEntry(1,
                                            'one',
                                            date_removed=datetime.datetime.max)
            ]),
            tokens.Database([
                tokens.TokenizedStringEntry(2,
                                            'two',
                                            date_removed=datetime.datetime.min)
            ]),
            tokens.Database([
                tokens.TokenizedStringEntry(1,
                                            'one',
                                            date_removed=datetime.datetime.min)
            ]))
        self.assertEqual({str(e) for e in db.entries()}, {'one', 'two'})

        db.merge(
            tokens.Database([
                tokens.TokenizedStringEntry(4,
                                            'four',
                                            date_removed=datetime.datetime.max)
            ]),
            tokens.Database([
                tokens.TokenizedStringEntry(2,
                                            'two',
                                            date_removed=datetime.datetime.max)
            ]),
            tokens.Database([
                tokens.TokenizedStringEntry(3,
                                            'three',
                                            date_removed=datetime.datetime.min)
            ]))
        self.assertEqual({str(e)
                          for e in db.entries()},
                         {'one', 'two', 'three', 'four'})
Ejemplo n.º 13
0
def _entries(*strings: str) -> Iterator[tokens.TokenizedStringEntry]:
    for string in strings:
        yield tokens.TokenizedStringEntry(default_hash(string), string)
Ejemplo n.º 14
0
    def test_merge(self):
        """Tests the tokens.Database merge method."""

        db = tokens.Database()

        # Test basic merging into an empty database.
        db.merge(
            tokens.Database([
                tokens.TokenizedStringEntry(
                    1, 'one', date_removed=datetime.datetime.min),
                tokens.TokenizedStringEntry(
                    2, 'two', date_removed=datetime.datetime.min),
            ]))
        self.assertEqual({str(e) for e in db.entries()}, {'one', 'two'})
        self.assertEqual(db.token_to_entries[1][0].date_removed,
                         datetime.datetime.min)
        self.assertEqual(db.token_to_entries[2][0].date_removed,
                         datetime.datetime.min)

        # Test merging in an entry with a removal date.
        db.merge(
            tokens.Database([
                tokens.TokenizedStringEntry(3, 'three'),
                tokens.TokenizedStringEntry(
                    4, 'four', date_removed=datetime.datetime.min),
            ]))
        self.assertEqual({str(e)
                          for e in db.entries()},
                         {'one', 'two', 'three', 'four'})
        self.assertIsNone(db.token_to_entries[3][0].date_removed)
        self.assertEqual(db.token_to_entries[4][0].date_removed,
                         datetime.datetime.min)

        # Test merging in one entry.
        db.merge(tokens.Database([
            tokens.TokenizedStringEntry(5, 'five'),
        ]))
        self.assertEqual({str(e)
                          for e in db.entries()},
                         {'one', 'two', 'three', 'four', 'five'})
        self.assertEqual(db.token_to_entries[4][0].date_removed,
                         datetime.datetime.min)
        self.assertIsNone(db.token_to_entries[5][0].date_removed)

        # Merge in repeated entries different removal dates.
        db.merge(
            tokens.Database([
                tokens.TokenizedStringEntry(
                    4, 'four', date_removed=datetime.datetime.max),
                tokens.TokenizedStringEntry(
                    5, 'five', date_removed=datetime.datetime.max),
            ]))
        self.assertEqual(len(db.entries()), 5)
        self.assertEqual({str(e)
                          for e in db.entries()},
                         {'one', 'two', 'three', 'four', 'five'})
        self.assertEqual(db.token_to_entries[4][0].date_removed,
                         datetime.datetime.max)
        self.assertIsNone(db.token_to_entries[5][0].date_removed)

        # Merge in the same repeated entries now without removal dates.
        db.merge(
            tokens.Database([
                tokens.TokenizedStringEntry(4, 'four'),
                tokens.TokenizedStringEntry(5, 'five')
            ]))
        self.assertEqual(len(db.entries()), 5)
        self.assertEqual({str(e)
                          for e in db.entries()},
                         {'one', 'two', 'three', 'four', 'five'})
        self.assertIsNone(db.token_to_entries[4][0].date_removed)
        self.assertIsNone(db.token_to_entries[5][0].date_removed)

        # Merge in an empty databsse.
        db.merge(tokens.Database([]))
        self.assertEqual({str(e)
                          for e in db.entries()},
                         {'one', 'two', 'three', 'four', 'five'})
Ejemplo n.º 15
0
    def setUp(self):
        super().setUp()
        token = 0xbaad

        # Database with several conflicting tokens.
        self.detok = detokenize.Detokenizer(tokens.Database([
            tokens.TokenizedStringEntry(
                token, 'REMOVED', date_removed=dt.datetime(9, 1, 1)),
            tokens.TokenizedStringEntry(token, 'newer'),
            tokens.TokenizedStringEntry(
                token, 'A: %d', date_removed=dt.datetime(30, 5, 9)),
            tokens.TokenizedStringEntry(
                token, 'B: %c', date_removed=dt.datetime(30, 5, 10)),
            tokens.TokenizedStringEntry(token, 'C: %s'),
            tokens.TokenizedStringEntry(token, '%d%u'),
            tokens.TokenizedStringEntry(token, '%s%u %d'),
            tokens.TokenizedStringEntry(1, '%s'),
            tokens.TokenizedStringEntry(1, '%d'),
            tokens.TokenizedStringEntry(2, 'Three %s %s %s'),
            tokens.TokenizedStringEntry(2, 'Five %d %d %d %d %s'),
        ]))  # yapf: disable