コード例 #1
0
def parse_file(file,
               report_filename=None,
               report_firstline=1,
               encoding=None,
               **kw):
    """Parse a beancount input file and return Ledger with the list of
    transactions and tree of accounts.

    Args:
      file: file object or path to the file to be parsed.
      kw: a dict of keywords to be applied to the C parser.
    Returns:
      A tuple of (
        list of entries parsed in the file,
        list of errors that were encountered during parsing, and
        a dict of the option values that were parsed from the file.)
    """
    if encoding is not None and codecs.lookup(encoding).name != 'utf-8':
        raise ValueError('Only UTF-8 encoded files are supported.')
    with contextlib.ExitStack() as ctx:
        if file == '-':
            file = sys.stdin.buffer
        # It would be more appropriate here to check for io.RawIOBase but
        # that does not work for io.BytesIO despite it implementing the
        # readinto() method.
        elif not isinstance(file, io.IOBase):
            file = ctx.enter_context(open(file, 'rb'))
        builder = grammar.Builder()
        parser = _parser.Parser(builder)
        parser.parse(file,
                     filename=report_filename,
                     lineno=report_firstline,
                     **kw)
    return builder.finalize()
コード例 #2
0
ファイル: lexer.py プロジェクト: quangkr/beancount
def lex_iter(file, builder=None, encoding=None):
    """An iterator that yields all the tokens in the given file.

    Args:
      file: A string, the filename to run the lexer on, or a file object.
      builder: A builder of your choice. If not specified, a LexBuilder is
        used and discarded (along with its errors).
      encoding: A string (or None), the default encoding to use for strings.
    Yields:
      All the tokens in the input file as ``(token, lineno, text,
      value)`` tuples where ``token`` is a string representing the
      token kind, ``lineno`` is the line number in the input file
      where the token was matched, ``mathed`` is a bytes object
      containing the exact text matched, and ``value`` is the semantic
      value of the token or None.
    """
    # It would be more appropriate here to check for io.RawIOBase but
    # that does not work for io.BytesIO despite it implementing the
    # readinto() method.
    if not isinstance(file, io.IOBase):
        file = open(file, 'rb')
    if builder is None:
        builder = LexBuilder()
    parser = _parser.Parser(builder)
    yield from parser.lex(file, encoding=encoding)
コード例 #3
0
    def test_parser_parse(self):
        # Do not use a string to avoid issues due to string interning.
        name = object()
        self.assertEqual(sys.getrefcount(name), 2)

        f = io.BytesIO(b"")
        f.name = name
        self.assertEqual(sys.getrefcount(f.name), 3)

        builder = grammar.Builder()
        parser = _parser.Parser(builder)
        parser.parse(f)
        # The Parser object keeps a reference to the input file.
        self.assertEqual(sys.getrefcount(f), 3)
        # There are references to the file name from the Parser object
        # and from the the parsing results. In the case of an empty
        # input file from the options dictionary stored in the builder.
        self.assertEqual(sys.getrefcount(name), 5)
        builder.options = {}
        self.assertEqual(sys.getrefcount(name), 4)

        del parser
        # Once the Parser object is gone we should have just the local
        # reference to the file object and two references to name.
        self.assertEqual(sys.getrefcount(name), 3)
        self.assertEqual(sys.getrefcount(f), 2)
コード例 #4
0
ファイル: parser.py プロジェクト: quangkr/beancount
def parse_file(file, report_filename=None, report_firstline=1, **kw):
    """Parse a beancount input file and return Ledger with the list of
    transactions and tree of accounts.

    Args:
      file: file object or path to the file to be parsed.
      kw: a dict of keywords to be applied to the C parser.
    Returns:
      A tuple of (
        list of entries parsed in the file,
        list of errors that were encountered during parsing, and
        a dict of the option values that were parsed from the file.)
    """
    close_file = None
    if file == '-':
        close_file = file = sys.stdin.buffer
    # It would be more appropriate here to check for io.RawIOBase but
    # that does not work for io.BytesIO despite it implementing the
    # readinto() method.
    elif not isinstance(file, io.IOBase):
        close_file = file = open(file, 'rb')

    builder = grammar.Builder()
    parser = _parser.Parser(builder)
    parser.parse(file, filename=report_filename, lineno=report_firstline, **kw)

    if close_file:
        close_file.close()
    return builder.finalize()
コード例 #5
0
    def test_parser_lex_filename(self):
        # Do not use a string to avoid issues due to string interning.
        name = object()
        self.assertEqual(sys.getrefcount(name), 2)

        f = io.BytesIO(b"")
        f.name = object()
        self.assertEqual(sys.getrefcount(f.name), 2)

        builder = lexer.LexBuilder()
        parser = _parser.Parser(builder)
        iterator = parser.lex(f, filename=name)
        tokens = list(iterator)
        # The Parser object keeps references to the input file and to
        # the name while iterating over the tokens in the input file.
        self.assertEqual(sys.getrefcount(name), 3)
        self.assertEqual(sys.getrefcount(f), 3)
        # The name attribute of the file object is not referenced.
        self.assertEqual(sys.getrefcount(f.name), 2)

        del parser
        del iterator
        # Once the Parser object is gone we should have just the local
        # reference to the file object and two references to name.
        self.assertEqual(sys.getrefcount(name), 2)
        self.assertEqual(sys.getrefcount(f), 2)
コード例 #6
0
ファイル: parser_test.py プロジェクト: yd188/beancount
 def test_lex_lineno(self):
     f = io.BytesIO(b"1.0")
     builder = lexer.LexBuilder()
     parser = _parser.Parser(builder)
     tokens = list(parser.lex(f, lineno=42))
     token, lineno, matched, value = tokens[0]
     self.assertEqual(lineno, 42)
コード例 #7
0
    def test_parser_lex(self):
        # Do not use a string to avoid issues due to string interning.
        name = object()
        # Note that passing name as an argument to sys.getrefcount()
        # counts as one reference, thus the minimum reference count
        # returned for any object is 2.
        self.assertEqual(sys.getrefcount(name), 2)

        f = io.BytesIO(b"")
        f.name = name
        # One more refernece from the 'name' attriute.
        self.assertEqual(sys.getrefcount(name), 3)
        # Just one reference to the BytesIO object.
        self.assertEqual(sys.getrefcount(f), 2)

        builder = lexer.LexBuilder()
        parser = _parser.Parser(builder)
        iterator = parser.lex(f)
        # The Parser object keeps references to the input file and to
        # the name while iterating over the tokens in the input file.
        self.assertEqual(sys.getrefcount(name), 4)
        self.assertEqual(sys.getrefcount(f), 3)
        # The iterator holds one reference to the parser.
        self.assertEqual(sys.getrefcount(parser), 3)

        tokens = list(iterator)
        # Just the EOL token.
        self.assertEqual(len(tokens), 1)
        # Once done scanning is completed the Parser object still has
        # references to the input file and to the name.
        self.assertEqual(sys.getrefcount(name), 4)
        self.assertEqual(sys.getrefcount(f), 3)

        del parser
        del iterator
        # Once the Parser object is gone we should have just the local
        # reference to the file object and two references to name.
        self.assertEqual(sys.getrefcount(name), 3)
        self.assertEqual(sys.getrefcount(f), 2)

        del f
        # With the file object gone there is one reference to name.
        self.assertEqual(sys.getrefcount(name), 2)
コード例 #8
0
ファイル: lexer.py プロジェクト: wzyboy/beancount
def lex_iter(file, builder=None, encoding=None):
    """An iterator that yields all the tokens in the given file.

    Args:
      file: A string, the filename to run the lexer on, or a file object.
      builder: A builder of your choice. If not specified, a LexBuilder is
        used and discarded (along with its errors).
      encoding: A string (or None), the default encoding to use for strings.
    Yields:
      Tuples of the token (a string), the matched text (a string), and the line
      no (an integer).
    """
    # It would be more appropriate here to check for io.RawIOBase but
    # that does not work for io.BytesIO despite it implementing the
    # readinto() method.
    if not isinstance(file, io.IOBase):
        file = open(file, 'rb')
    if builder is None:
        builder = LexBuilder()
    parser = _parser.Parser(builder)
    yield from parser.lex(file, encoding=encoding)
コード例 #9
0
    def test_parser_lex_multi(self):
        file1 = io.BytesIO(b"")
        file1.name = object()
        self.assertEqual(sys.getrefcount(file1.name), 2)

        file2 = io.BytesIO(b"")
        file2.name = object()
        self.assertEqual(sys.getrefcount(file2.name), 2)

        builder = lexer.LexBuilder()
        parser = _parser.Parser(builder)
        tokens = list(parser.lex(file1))
        tokens = list(parser.lex(file2))

        del parser
        # Once the Parser object is gone we should have just the local
        # references to the file objects and one references to the names.
        self.assertEqual(sys.getrefcount(file1), 2)
        self.assertEqual(sys.getrefcount(file1.name), 2)
        self.assertEqual(sys.getrefcount(file2), 2)
        self.assertEqual(sys.getrefcount(file2.name), 2)
コード例 #10
0
ファイル: parser_test.py プロジェクト: yd188/beancount
 def test_parse_lineno(self):
     f = io.BytesIO(b"2020-07-30 open Assets:Test")
     builder = grammar.Builder()
     parser = _parser.Parser(builder)
     parser.parse(f, lineno=42)
     self.assertEqual(builder.entries[0].meta['lineno'], 42)