Exemple #1
0
    def test_string_too_long_warning(self):
        # This tests the maximum string length implemented in Python, which is used
        # to detect input errors.
        test_input = """
          ;; This is a typical error that should get detected for long strings.
          2014-01-01 note Assets:Temporary "Bla bla" "

          2014-02-01 open Liabilities:US:BankWithLongName:Credit-Card:Account01
          2014-02-02 open Liabilities:US:BankWithLongName:Credit-Card:Account02
          2014-02-03 open Liabilities:US:BankWithLongName:Credit-Card:Account03
          2014-02-04 open Liabilities:US:BankWithLongName:Credit-Card:Account04
          2014-02-05 open Liabilities:US:BankWithLongName:Credit-Card:Account05
          2014-02-06 open Liabilities:US:BankWithLongName:Credit-Card:Account06
          2014-02-07 open Liabilities:US:BankWithLongName:Credit-Card:Account07
          2014-02-08 open Liabilities:US:BankWithLongName:Credit-Card:Account08
          2014-02-09 open Liabilities:US:BankWithLongName:Credit-Card:Account09
          2014-02-10 open Liabilities:US:BankWithLongName:Credit-Card:Account10

          2014-02-02 note Assets:Temporary "Bla bla"
        """
        builder = lexer.LexBuilder()
        builder.long_string_maxlines_default = 8
        list(lexer.lex_iter_string(textwrap.dedent(test_input), builder))
        self.assertLessEqual(1, len(builder.errors))
        self.assertRegex(builder.errors[0].message, 'String too long')
 def test_very_long_string(self):
     # This tests lexing with a string of 256k.
     test_input = '"' + ('1234567890ABCDEF' * (256*64)) + '"'
     builder = lexer.LexBuilder()
     tokens = list(lexer.lex_iter_string(textwrap.dedent(test_input), builder))
     self.assertEqual(tokens[0][3], test_input[1:-1])
     self.assertLessEqual(0, len(builder.errors))
Exemple #3
0
 def test_bytes_encoded_utf8(self):
     utf8_bytes = self.test_utf8_string.encode('utf8')
     builder = lexer.LexBuilder()
     tokens = list(lexer.lex_iter_string(utf8_bytes, builder))
     self.assertFalse(builder.errors)
     str_tokens = [token for token in tokens if token[0] == 'STRING']
     self.assertEqual(self.expected_utf8_string, str_tokens[0][3])
Exemple #4
0
 def test_bytes_encoded_latin1_invalid(self):
     latin1_bytes = self.test_utf8_string.encode('latin1')
     builder = lexer.LexBuilder()
     tokens = list(lexer.lex_iter_string(latin1_bytes, builder))
     errors = builder.errors
     self.assertTrue(errors)
     self.assertRegex(errors[0].message,
                      "^UnicodeDecodeError: 'utf-8' codec ")
Exemple #5
0
 def test_string_newline_toolong(self):
     # Testing a string that busts the limits.
     line = 'a' * 127 + '\n'
     string = '"' + line * 128 + '"'
     builder = lexer.LexBuilder()
     tokens = list(lexer.lex_iter_string(string, builder))
     self.assertTrue(tokens[0], 'error')
     self.assertTrue(tokens[1], 'EOL')
Exemple #6
0
 def _run_lexer_with_raising_builder_method(self, test_input, method_name,
                                            expected_tokens):
     builder = lexer.LexBuilder()
     def raise_error(string):
         raise ValueError
     setattr(builder, method_name, raise_error)
     tokens = list(lexer.lex_iter_string(textwrap.dedent(test_input), builder))
     self.assertEqual(expected_tokens, tokens)
     self.assertEqual(1, len(builder.errors))
Exemple #7
0
 def test_lexer_builder_returns_none(self):
     builder = lexer.LexBuilder()
     def return_none(string):
         return None
     setattr(builder, 'STRING', return_none)
     tokens = list(lexer.lex_iter_string('"Something"', builder))
     self.assertEqual([('LEX_ERROR', 1, '"', None),
                       ('EOL', 1, '\x00', None)], tokens)
     self.assertEqual(1, len(builder.errors))
     self.assertRegex(builder.errors[0].message, "None result from lexer")
    def test_bytes_encoded_latin1(self):
        latin1_bytes = self.test_latin1_string.encode('latin1')
        builder = lexer.LexBuilder()
        tokens = list(lexer.lex_iter_string(latin1_bytes, builder, encoding='latin1'))

        # The lexer outputs no errors.
        self.assertFalse(builder.errors)

        # Check that the lexer correctly parsed the latin1 string.
        str_tokens = [token for token in tokens if token[0] == 'STRING']
        self.assertEqual(self.expected_latin1_string, str_tokens[0][3])
Exemple #9
0
    def test_bytes_encoded_utf8(self):
        utf8_bytes = self.test_utf8_string.encode('utf8')
        builder = lexer.LexBuilder()
        tokens = list(lexer.lex_iter_string(utf8_bytes, builder))

        # The lexer outputs no errors.
        self.assertFalse(builder.errors)

        # Check that the lexer correctly parsed the UTF8 string.
        str_tokens = [token for token in tokens if token[0] == 'STRING']
        self.assertEqual(self.expected_utf8_string, str_tokens[0][3])
Exemple #10
0
    def test_bytes_encoded_latin1_invalid(self):
        latin1_bytes = self.test_utf8_string.encode('latin1')
        builder = lexer.LexBuilder()
        tokens = list(lexer.lex_iter_string(latin1_bytes, builder))

        # The lexer outputs no errors.
        self.assertFalse(builder.errors)

        # Check that the lexer failed to convert the string but did not cause
        # other errors.
        str_tokens = [token for token in tokens if token[0] == 'STRING']
        self.assertNotEqual(self.expected_utf8_string, str_tokens[0][3])
Exemple #11
0
 def test_lexer_exception_CURRENCY(self):
     test_input = """
       USD
     """
     builder = lexer.LexBuilder()
     builder.commodities = {}  # This will force an exception because the
                               # parser calls add() on it.
     tokens = list(lexer.lex_iter_string(textwrap.dedent(test_input), builder))
     self.assertEqual([('EOL', 2, '\n', None),
                       ('LEX_ERROR', 2, 'USD', None),
                       ('EOL', 3, '\n', None),
                       ('EOL', 3, '\x00', None)], tokens)
     self.assertEqual(1, len(builder.errors))
 def test_string_too_long_warning(self):
     test_input = """
       ;; This is a typical error that should get detected for long strings.
       2014-01-01 note Assets:Temporary "Bla bla" "
       2014-02-01 open Liabilities:US:BankWithLongName:Credit-Card:Account01
     """ + "\n" * 64 + """
       2014-02-02 note Assets:Temporary "Bla bla"
     """
     builder = lexer.LexBuilder()
     tokens = list(lexer.lex_iter_string(textwrap.dedent(test_input), builder))
     self.assertLessEqual(1, len(builder.errors))
     self.assertEqual(builder.errors[0].message,
                      'ValueError: String too long (68 lines)')
Exemple #13
0
 def test_lexer_exception_ACCOUNT(self):
     test_input = """
       Invalid:Something
     """
     builder = lexer.LexBuilder()
     # This modification is similar to what the options do, and will cause a
     # ValueError exception to be raised in the lexer.
     builder.account_regexp = re.compile('(Assets|Liabilities|Equity)'
                                         '(:[A-Z][A-Za-z0-9-]*)*$')
     tokens = list(lexer.lex_iter_string(textwrap.dedent(test_input), builder))
     self.assertEqual([('EOL', 2, '\n', None),
                       ('LEX_ERROR', 2, 'Invalid:Something', None),
                       ('EOL', 3, '\n', None),
                       ('EOL', 3, '\x00', None)], tokens)
     self.assertEqual(1, len(builder.errors))
Exemple #14
0
 def test_lexer_exception_substring_with_quotes(self):
     test_input = """
       2016-07-15 query "hotels" "SELECT * WHERE account ~ 'Expenses:Accommodation'"
     """
     builder = lexer.LexBuilder()
     tokens = list(
         lexer.lex_iter_string(textwrap.dedent(test_input), builder))
     self.assertEqual(
         [('EOL', 2, '\n', None),
          ('DATE', 2, '2016-07-15', datetime.date(2016, 7, 15)),
          ('QUERY', 2, 'query', None), ('STRING', 2, '"', 'hotels'),
          ('STRING', 2, '"',
           "SELECT * WHERE account ~ 'Expenses:Accommodation'"),
          ('EOL', 3, '\n', None), ('EOL', 3, '\x00', None)], tokens)
     self.assertEqual(0, len(builder.errors))
Exemple #15
0
 def test_bytes_encoded_utf16_invalid(self):
     utf16_bytes = self.test_utf8_string.encode('utf16')
     builder = lexer.LexBuilder()
     tokens = list(lexer.lex_iter_string(utf16_bytes, builder))
     self.assertTrue(builder.errors)
Exemple #16
0
 def wrapped(self):
     string = fun.__doc__
     builder = lexer.LexBuilder()
     tokens = list(lexer.lex_iter_string(textwrap.dedent(string), builder))
     return fun(self, tokens, builder.errors)
Exemple #17
0
 def test_bytes_encoded_utf16(self):
     utf16_bytes = self.test_utf8_string.encode('utf16')
     builder = lexer.LexBuilder()
     with self.assertRaises(SystemError):
         tokens = list(lexer.lex_iter_string(utf16_bytes, builder))