Esempio n. 1
0
    def test_string_too_long_warning(self):
        # This tests the maximum string length implemented in Python, which is used
        # to detect input errors.
        test_input = """
          ;; This is a typical error that should get detected for long strings.
          2014-01-01 note Assets:Temporary "Bla bla" "

          2014-02-01 open Liabilities:US:BankWithLongName:Credit-Card:Account01
          2014-02-02 open Liabilities:US:BankWithLongName:Credit-Card:Account02
          2014-02-03 open Liabilities:US:BankWithLongName:Credit-Card:Account03
          2014-02-04 open Liabilities:US:BankWithLongName:Credit-Card:Account04
          2014-02-05 open Liabilities:US:BankWithLongName:Credit-Card:Account05
          2014-02-06 open Liabilities:US:BankWithLongName:Credit-Card:Account06
          2014-02-07 open Liabilities:US:BankWithLongName:Credit-Card:Account07
          2014-02-08 open Liabilities:US:BankWithLongName:Credit-Card:Account08
          2014-02-09 open Liabilities:US:BankWithLongName:Credit-Card:Account09
          2014-02-10 open Liabilities:US:BankWithLongName:Credit-Card:Account10

          2014-02-02 note Assets:Temporary "Bla bla"
        """
        builder = lexer.LexBuilder()
        builder.long_string_maxlines_default = 8
        list(lexer.lex_iter_string(textwrap.dedent(test_input), builder))
        self.assertLessEqual(1, len(builder.errors))
        self.assertRegex(builder.errors[0].message, 'String too long')
Esempio n. 2
0
    def test_parser_lex_filename(self):
        # Do not use a string to avoid issues due to string interning.
        name = object()
        self.assertEqual(sys.getrefcount(name), 2)

        f = io.BytesIO(b"")
        f.name = object()
        self.assertEqual(sys.getrefcount(f.name), 2)

        builder = lexer.LexBuilder()
        parser = _parser.Parser(builder)
        iterator = parser.lex(f, filename=name)
        tokens = list(iterator)
        # The Parser object keeps references to the input file and to
        # the name while iterating over the tokens in the input file.
        self.assertEqual(sys.getrefcount(name), 3)
        self.assertEqual(sys.getrefcount(f), 3)
        # The name attribute of the file object is not referenced.
        self.assertEqual(sys.getrefcount(f.name), 2)

        del parser
        del iterator
        # Once the Parser object is gone we should have just the local
        # reference to the file object and two references to name.
        self.assertEqual(sys.getrefcount(name), 2)
        self.assertEqual(sys.getrefcount(f), 2)
Esempio n. 3
0
 def test_lex_lineno(self):
     f = io.BytesIO(b"1.0")
     builder = lexer.LexBuilder()
     parser = _parser.Parser(builder)
     tokens = list(parser.lex(f, lineno=42))
     token, lineno, matched, value = tokens[0]
     self.assertEqual(lineno, 42)
 def test_very_long_string(self):
     # This tests lexing with a string of 256k.
     test_input = '"' + ('1234567890ABCDEF' * (256*64)) + '"'
     builder = lexer.LexBuilder()
     tokens = list(lexer.lex_iter_string(textwrap.dedent(test_input), builder))
     self.assertEqual(tokens[0][3], test_input[1:-1])
     self.assertLessEqual(0, len(builder.errors))
Esempio n. 5
0
 def test_bytes_encoded_utf8(self):
     utf8_bytes = self.test_utf8_string.encode('utf8')
     builder = lexer.LexBuilder()
     tokens = list(lexer.lex_iter_string(utf8_bytes, builder))
     self.assertFalse(builder.errors)
     str_tokens = [token for token in tokens if token[0] == 'STRING']
     self.assertEqual(self.expected_utf8_string, str_tokens[0][3])
Esempio n. 6
0
 def test_bytes_encoded_latin1_invalid(self):
     latin1_bytes = self.test_utf8_string.encode('latin1')
     builder = lexer.LexBuilder()
     tokens = list(lexer.lex_iter_string(latin1_bytes, builder))
     errors = builder.errors
     self.assertTrue(errors)
     self.assertRegex(errors[0].message,
                      "^UnicodeDecodeError: 'utf-8' codec ")
Esempio n. 7
0
 def test_string_newline_toolong(self):
     # Testing a string that busts the limits.
     line = 'a' * 127 + '\n'
     string = '"' + line * 128 + '"'
     builder = lexer.LexBuilder()
     tokens = list(lexer.lex_iter_string(string, builder))
     self.assertTrue(tokens[0], 'error')
     self.assertTrue(tokens[1], 'EOL')
Esempio n. 8
0
 def _run_lexer_with_raising_builder_method(self, test_input, method_name,
                                            expected_tokens):
     builder = lexer.LexBuilder()
     def raise_error(string):
         raise ValueError
     setattr(builder, method_name, raise_error)
     tokens = list(lexer.lex_iter_string(textwrap.dedent(test_input), builder))
     self.assertEqual(expected_tokens, tokens)
     self.assertEqual(1, len(builder.errors))
Esempio n. 9
0
 def test_lexer_builder_returns_none(self):
     builder = lexer.LexBuilder()
     def return_none(string):
         return None
     setattr(builder, 'STRING', return_none)
     tokens = list(lexer.lex_iter_string('"Something"', builder))
     self.assertEqual([('LEX_ERROR', 1, '"', None),
                       ('EOL', 1, '\x00', None)], tokens)
     self.assertEqual(1, len(builder.errors))
     self.assertRegex(builder.errors[0].message, "None result from lexer")
Esempio n. 10
0
    def test_bytes_encoded_utf8(self):
        utf8_bytes = self.test_utf8_string.encode('utf8')
        builder = lexer.LexBuilder()
        tokens = list(lexer.lex_iter_string(utf8_bytes, builder))

        # The lexer outputs no errors.
        self.assertFalse(builder.errors)

        # Check that the lexer correctly parsed the UTF8 string.
        str_tokens = [token for token in tokens if token[0] == 'STRING']
        self.assertEqual(self.expected_utf8_string, str_tokens[0][3])
    def test_bytes_encoded_latin1(self):
        latin1_bytes = self.test_latin1_string.encode('latin1')
        builder = lexer.LexBuilder()
        tokens = list(lexer.lex_iter_string(latin1_bytes, builder, encoding='latin1'))

        # The lexer outputs no errors.
        self.assertFalse(builder.errors)

        # Check that the lexer correctly parsed the latin1 string.
        str_tokens = [token for token in tokens if token[0] == 'STRING']
        self.assertEqual(self.expected_latin1_string, str_tokens[0][3])
Esempio n. 12
0
    def test_bytes_encoded_latin1_invalid(self):
        latin1_bytes = self.test_utf8_string.encode('latin1')
        builder = lexer.LexBuilder()
        tokens = list(lexer.lex_iter_string(latin1_bytes, builder))

        # The lexer outputs no errors.
        self.assertFalse(builder.errors)

        # Check that the lexer failed to convert the string but did not cause
        # other errors.
        str_tokens = [token for token in tokens if token[0] == 'STRING']
        self.assertNotEqual(self.expected_utf8_string, str_tokens[0][3])
 def test_string_too_long_warning(self):
     test_input = """
       ;; This is a typical error that should get detected for long strings.
       2014-01-01 note Assets:Temporary "Bla bla" "
       2014-02-01 open Liabilities:US:BankWithLongName:Credit-Card:Account01
     """ + "\n" * 64 + """
       2014-02-02 note Assets:Temporary "Bla bla"
     """
     builder = lexer.LexBuilder()
     tokens = list(lexer.lex_iter_string(textwrap.dedent(test_input), builder))
     self.assertLessEqual(1, len(builder.errors))
     self.assertEqual(builder.errors[0].message,
                      'ValueError: String too long (68 lines)')
Esempio n. 14
0
 def test_lexer_exception_CURRENCY(self):
     test_input = """
       USD
     """
     builder = lexer.LexBuilder()
     builder.commodities = {}  # This will force an exception because the
                               # parser calls add() on it.
     tokens = list(lexer.lex_iter_string(textwrap.dedent(test_input), builder))
     self.assertEqual([('EOL', 2, '\n', None),
                       ('LEX_ERROR', 2, 'USD', None),
                       ('EOL', 3, '\n', None),
                       ('EOL', 3, '\x00', None)], tokens)
     self.assertEqual(1, len(builder.errors))
Esempio n. 15
0
 def test_lexer_exception_ACCOUNT(self):
     test_input = """
       Invalid:Something
     """
     builder = lexer.LexBuilder()
     # This modification is similar to what the options do, and will cause a
     # ValueError exception to be raised in the lexer.
     builder.account_regexp = re.compile('(Assets|Liabilities|Equity)'
                                         '(:[A-Z][A-Za-z0-9-]*)*$')
     tokens = list(lexer.lex_iter_string(textwrap.dedent(test_input), builder))
     self.assertEqual([('EOL', 2, '\n', None),
                       ('LEX_ERROR', 2, 'Invalid:Something', None),
                       ('EOL', 3, '\n', None),
                       ('EOL', 3, '\x00', None)], tokens)
     self.assertEqual(1, len(builder.errors))
Esempio n. 16
0
 def test_lexer_exception_substring_with_quotes(self):
     test_input = """
       2016-07-15 query "hotels" "SELECT * WHERE account ~ 'Expenses:Accommodation'"
     """
     builder = lexer.LexBuilder()
     tokens = list(
         lexer.lex_iter_string(textwrap.dedent(test_input), builder))
     self.assertEqual(
         [('EOL', 2, '\n', None),
          ('DATE', 2, '2016-07-15', datetime.date(2016, 7, 15)),
          ('QUERY', 2, 'query', None), ('STRING', 2, '"', 'hotels'),
          ('STRING', 2, '"',
           "SELECT * WHERE account ~ 'Expenses:Accommodation'"),
          ('EOL', 3, '\n', None), ('EOL', 3, '\x00', None)], tokens)
     self.assertEqual(0, len(builder.errors))
Esempio n. 17
0
    def test_parser_lex(self):
        # Do not use a string to avoid issues due to string interning.
        name = object()
        # Note that passing name as an argument to sys.getrefcount()
        # counts as one reference, thus the minimum reference count
        # returned for any object is 2.
        self.assertEqual(sys.getrefcount(name), 2)

        f = io.BytesIO(b"")
        f.name = name
        # One more refernece from the 'name' attriute.
        self.assertEqual(sys.getrefcount(name), 3)
        # Just one reference to the BytesIO object.
        self.assertEqual(sys.getrefcount(f), 2)

        builder = lexer.LexBuilder()
        parser = _parser.Parser(builder)
        iterator = parser.lex(f)
        # The Parser object keeps references to the input file and to
        # the name while iterating over the tokens in the input file.
        self.assertEqual(sys.getrefcount(name), 4)
        self.assertEqual(sys.getrefcount(f), 3)
        # The iterator holds one reference to the parser.
        self.assertEqual(sys.getrefcount(parser), 3)

        tokens = list(iterator)
        # Just the EOL token.
        self.assertEqual(len(tokens), 1)
        # Once done scanning is completed the Parser object still has
        # references to the input file and to the name.
        self.assertEqual(sys.getrefcount(name), 4)
        self.assertEqual(sys.getrefcount(f), 3)

        del parser
        del iterator
        # Once the Parser object is gone we should have just the local
        # reference to the file object and two references to name.
        self.assertEqual(sys.getrefcount(name), 3)
        self.assertEqual(sys.getrefcount(f), 2)

        del f
        # With the file object gone there is one reference to name.
        self.assertEqual(sys.getrefcount(name), 2)
Esempio n. 18
0
    def test_parser_lex_multi(self):
        file1 = io.BytesIO(b"")
        file1.name = object()
        self.assertEqual(sys.getrefcount(file1.name), 2)

        file2 = io.BytesIO(b"")
        file2.name = object()
        self.assertEqual(sys.getrefcount(file2.name), 2)

        builder = lexer.LexBuilder()
        parser = _parser.Parser(builder)
        tokens = list(parser.lex(file1))
        tokens = list(parser.lex(file2))

        del parser
        # Once the Parser object is gone we should have just the local
        # references to the file objects and one references to the names.
        self.assertEqual(sys.getrefcount(file1), 2)
        self.assertEqual(sys.getrefcount(file1.name), 2)
        self.assertEqual(sys.getrefcount(file2), 2)
        self.assertEqual(sys.getrefcount(file2.name), 2)
Esempio n. 19
0
 def test_bytes_encoded_utf16_invalid(self):
     utf16_bytes = self.test_utf8_string.encode('utf16')
     builder = lexer.LexBuilder()
     tokens = list(lexer.lex_iter_string(utf16_bytes, builder))
     self.assertTrue(builder.errors)
Esempio n. 20
0
 def test_bytes_encoded_utf16(self):
     utf16_bytes = self.test_utf8_string.encode('utf16')
     builder = lexer.LexBuilder()
     with self.assertRaises(SystemError):
         tokens = list(lexer.lex_iter_string(utf16_bytes, builder))
Esempio n. 21
0
 def wrapped(self):
     string = fun.__doc__
     builder = lexer.LexBuilder()
     tokens = list(lexer.lex_iter_string(textwrap.dedent(string), builder))
     return fun(self, tokens, builder.errors)