Esempio n. 1
0
 def test_from_sequence(self):
     self.assert_length_value(
         8, b'\x01', BitString.from_sequence([1], BitString.from_byte))
     self.assert_error(
         lambda: BitString.from_sequence([256], BitString.from_byte))
     self.assert_length_value(
         16, b'\x01\x02',
         BitString.from_sequence([1, 2], BitString.from_byte))
Esempio n. 2
0
 def test_from_sequence(self):
     self.assert_length_value(8, b'\x01', BitString.from_sequence([1], BitString.from_byte))
     self.assert_error(lambda: BitString.from_sequence([256], BitString.from_byte))
     self.assert_length_value(16, b'\x01\x02', BitString.from_sequence([1,2], BitString.from_byte))
Esempio n. 3
0
    def make_binary_parser():
        '''
        Create a parser for binary data.
        '''
        
        # avoid import loops
        from lepl import Word, Letter, Digit, UnsignedInteger, \
            Regexp, DfaRegexp, Drop, Separator, Delayed, Optional, Any, First, \
            args, Trace, TraceVariables
        from lepl.bin.bits import BitString
        from lepl.support.node import Node

        classes = {}
        
        def named_class(name, *args):
            '''
            Given a name and some args, create a sub-class of Binary and 
            create an instance with the given content.
            '''
            if name not in classes:
                classes[name] = type(name, (Node,), {})
            return classes[name](*args)
        
        with TraceVariables(False):

            mult    = lambda l, n: BitString.from_sequence([l] * int(n, 0)) 
                
            # an attribute or class name
            name    = Word(Letter(), Letter() | Digit() | '_')
        
            # lengths can be integers (bits) or floats (bytes.bits)
            # but if we have a float, we do not want to parse as an int
            # (or we will get a conversion error due to too small length)
            length  = First(UnsignedInteger() + '.' + Optional(UnsignedInteger()),
                            UnsignedInteger())
        
            # a literal decimal
            decimal = UnsignedInteger()
        
            # a binary number (without pre/postfix)
            binary  = Any('01')[1:]
        
            # an octal number (without pre/postfix)
            octal   = Any('01234567')[1:]
        
            # a hex number (without pre/postfix)
            hex_     = Regexp('[a-fA-F0-9]')[1:]
            
            # the letters used for binary, octal and hex values 
            #(eg the 'x' in 0xffee)
            # pylint: disable-msg=C0103
            b, o, x, d = Any('bB'), Any('oO'), Any('xX'), Any('dD')
        
            # a decimal with optional pre/postfix
            dec     = '0' + d + decimal | decimal + d + '0' | decimal
        
            # little-endian literals have normal prefix syntax (eg 0xffee) 
            little  = decimal | '0' + (b + binary | o + octal | x + hex_)
        
            # big-endian literals have postfix (eg ffeex0)
            big     = (binary + b | octal + o | hex_ + x) + '0'
        
            # optional spaces - will be ignored 
            # (use DFA here because it's multi-line, so \n will match ok)
            spaces  = Drop(DfaRegexp('[ \t\n\r]*'))
            
            with Separator(spaces):
                
                # the grammar is recursive - expressions can contain expressions - 
                # so we use a delayed matcher here as a placeholder, so that we can 
                # use them before they are defined.
                expr = Delayed()
                
                # an implicit length value can be big or little-endian
                ivalue = big | little                 > args(BitString.from_int)
                
                # a value with a length can also be decimal
                lvalue = (big | little | dec) & Drop('/') & length  \
                                                      > args(BitString.from_int)
                
                value = lvalue | ivalue
                
                repeat = value & Drop('*') & little   > args(mult)
                
                # a named value is also a tuple
                named = name & Drop('=') & (expr | value | repeat)  > tuple
                
                # an entry in the expression could be any of these
                entry = named | value | repeat | expr
                
                # and an expression itself consists of a comma-separated list of
                # one or more entries, surrounded by paremtheses
                entries = Drop('(') & entry[1:, Drop(',')] & Drop(')')
                
                # the Binary node may be explicit or implicit and takes the list of
                # entries as an argument list
                node = Optional(Drop('Node')) & entries             > Node
                
                # alternatively, we can give a name and create a named sub-class
                other = name & entries                > args(named_class)
                
                # and finally, we "tie the knot" by giving a definition for the
                # delayed matcher we introduced earlier, which is either a binary
                # node or a subclass
                expr += spaces & (node | other) & spaces
        
        #expr = Trace(expr)
        # this changes order, making 0800x0 parse as binary
        expr.config.no_compile_to_regexp()
        # use sequence to force regexp over multiple lines
        return expr.get_parse_sequence()
Esempio n. 4
0
    def make_binary_parser():
        '''
        Create a parser for binary data.
        '''

        # avoid import loops
        from lepl import Word, Letter, Digit, UnsignedInteger, \
            Regexp, DfaRegexp, Drop, Separator, Delayed, Optional, Any, First, \
            args, Trace, TraceVariables
        from lepl.bin.bits import BitString
        from lepl.support.node import Node

        classes = {}

        def named_class(name, *args):
            '''
            Given a name and some args, create a sub-class of Binary and 
            create an instance with the given content.
            '''
            if name not in classes:
                classes[name] = type(name, (Node, ), {})
            return classes[name](*args)

        with TraceVariables(False):

            mult = lambda l, n: BitString.from_sequence([l] * int(n, 0))

            # an attribute or class name
            name = Word(Letter(), Letter() | Digit() | '_')

            # lengths can be integers (bits) or floats (bytes.bits)
            # but if we have a float, we do not want to parse as an int
            # (or we will get a conversion error due to too small length)
            length = First(
                UnsignedInteger() + '.' + Optional(UnsignedInteger()),
                UnsignedInteger())

            # a literal decimal
            decimal = UnsignedInteger()

            # a binary number (without pre/postfix)
            binary = Any('01')[1:]

            # an octal number (without pre/postfix)
            octal = Any('01234567')[1:]

            # a hex number (without pre/postfix)
            hex_ = Regexp('[a-fA-F0-9]')[1:]

            # the letters used for binary, octal and hex values
            #(eg the 'x' in 0xffee)
            # pylint: disable-msg=C0103
            b, o, x, d = Any('bB'), Any('oO'), Any('xX'), Any('dD')

            # a decimal with optional pre/postfix
            dec = '0' + d + decimal | decimal + d + '0' | decimal

            # little-endian literals have normal prefix syntax (eg 0xffee)
            little = decimal | '0' + (b + binary | o + octal | x + hex_)

            # big-endian literals have postfix (eg ffeex0)
            big = (binary + b | octal + o | hex_ + x) + '0'

            # optional spaces - will be ignored
            # (use DFA here because it's multi-line, so \n will match ok)
            spaces = Drop(DfaRegexp('[ \t\n\r]*'))

            with Separator(spaces):

                # the grammar is recursive - expressions can contain expressions -
                # so we use a delayed matcher here as a placeholder, so that we can
                # use them before they are defined.
                expr = Delayed()

                # an implicit length value can be big or little-endian
                ivalue = big | little > args(BitString.from_int)

                # a value with a length can also be decimal
                lvalue = (big | little | dec) & Drop('/') & length  \
                                                      > args(BitString.from_int)

                value = lvalue | ivalue

                repeat = value & Drop('*') & little > args(mult)

                # a named value is also a tuple
                named = name & Drop('=') & (expr | value | repeat) > tuple

                # an entry in the expression could be any of these
                entry = named | value | repeat | expr

                # and an expression itself consists of a comma-separated list of
                # one or more entries, surrounded by paremtheses
                entries = Drop('(') & entry[1:, Drop(',')] & Drop(')')

                # the Binary node may be explicit or implicit and takes the list of
                # entries as an argument list
                node = Optional(Drop('Node')) & entries > Node

                # alternatively, we can give a name and create a named sub-class
                other = name & entries > args(named_class)

                # and finally, we "tie the knot" by giving a definition for the
                # delayed matcher we introduced earlier, which is either a binary
                # node or a subclass
                expr += spaces & (node | other) & spaces

        #expr = Trace(expr)
        # this changes order, making 0800x0 parse as binary
        expr.config.no_compile_to_regexp()
        # use sequence to force regexp over multiple lines
        return expr.get_parse_sequence()