예제 #1
0
def test_tnet_machinery():
    # parsing integers
    path = "machinery"
    data = cpppo.dotdict()
    source = cpppo.chainable(b'123:')
    with cpppo.integer_bytes(name="SIZE", context="size",
                             terminal=True) as SIZE:
        with contextlib.closing(SIZE.run(source=source, data=data,
                                         path=path)) as engine:
            for m, s in engine:
                if s is None:
                    break
        log.info("After SIZE: %r", data)
        assert SIZE.terminal
    assert data.machinery.size == 123

    # repeat, limited by parent context's 'value' in data
    source.chain(b"abc" * 123)
    with tnet.data_parser(name="DATA", context="data",
                          repeat="..size") as DATA:
        with contextlib.closing(DATA.run(source=source, data=data,
                                         path=path)) as engine:
            for m, s in engine:
                if s is None:
                    break
        log.info("After DATA: %r", data)
예제 #2
0
def tnet_machine( name="TNET", context="tnet" ):
    """Accept a sentence of input bytes matching a tnetstring, and then
    loop.  Sub-machine terminates at earliest match (non-greedy), causing
    echo.transition to trigger .process (which resets our sub-machine to initial
    state), and then we move to the next state (loops), allowing us to
    immediately run."""

    class tnet_parser( cpppo.state_input ):
        TYPES			= (b'#'[0], b'}'[0], b']'[0], b','[0],
                                   b'$'[0], b'!'[0], b'~'[0], b'^'[0])

        def process( self, source, machine=None, path=None, data=None ):
            """Convert the collected data according to the type"""
            tntype		= next( source )
            ours		= self.context( path )
            raw			= ours + '...data.input'
            src			= b'' if raw not in data else (
                data[raw].tostring() if sys.version_info[0] < 3
                else data[raw].tobytes() )

            if tntype == b','[0]:
                log.info("%5d bytes  data: %s", len( src ), cpppo.reprlib.repr( src ))
                data[ours]	= src
            elif tntype == b'$'[0]:
                log.info("%5d string data: %s", len( src ), cpppo.reprlib.repr( src ))
                data[ours]	= src.decode( 'utf-8' )
            elif tntype == b'#'[0]:
                data[ours]	= int( src )
                log.info("%5d int    data: %s == %s", len( src ), cpppo.reprlib.repr( src ),
                         cpppo.reprlib.repr( data[ours] ))
            elif tntype == b'~'[0]:
                assert 0 == len( src )
                data[ours]	= None
            else:
                assert False, "Invalid tnetstring type: %s" % tntype
                
    bytes_conf 			= {
        "alphabet":	cpppo.type_bytes_iter,
        "typecode":	cpppo.type_bytes_array_symbol,
    }

    SIZE			= cpppo.dfa( name="SIZE", 
                                             initial=cpppo.integer_bytes(
                                                 name="INT", context="size", decode='ascii', terminal=True ))
    COLON			= cpppo.state_drop( name="COLON", **bytes_conf )
    DATA			= data_parser( name="DATA", context="data", repeat="..size" )
    TYPE			= tnet_parser( name="TYPE", context="type", terminal=True,
                                                     **bytes_conf )

    SIZE[b':'[0]]		= COLON
    COLON[None]			= DATA
    for t in tnet_parser.TYPES:
        DATA[t]			= TYPE

    # Recognize a TNET string and then terminate, resetting to automatically
    # recognize another
    return cpppo.dfa( name=name, context=context, initial=SIZE, terminal=True )
예제 #3
0
파일: tnet.py 프로젝트: dpasquazzo/cpppo
def tnet_machine( name="TNET", context="tnet" ):
    """Accept a sentence of input bytes matching a tnetstring, and then
    loop.  Sub-machine terminates at earliest match (non-greedy), causing
    echo.transition to trigger .process (which resets our sub-machine to initial
    state), and then we move to the next state (loops), allowing us to
    immediately run."""

    class tnet_parser( cpppo.state_input ):
        codes			= (b'#'[0], b'}'[0], b']'[0], b','[0],
                                   b'$'[0], b'!'[0], b'~'[0], b'^'[0])

        def process( self, source, machine=None, path=None, data=None ):
            """Convert the collected data according to the type"""
            tntype		= next( source )
            ours		= self.context( path )
            raw			= ours + '...data.input'
            src			= ( data[raw].tostring() if sys.version_info.major < 3
                                    else data[raw].tobytes() )

            if tntype == b','[0]:
                log.info("%5d bytes  data: %s", len( src ), reprlib.repr( src ))
                data[ours]	= src
            elif tntype == b'$'[0]:
                log.info("%5d string data: %s", len( src ), reprlib.repr( src ))
                data[ours]	= src.decode( 'utf-8' )
            elif tntype == b'#'[0]:
                data[ours]	= int( src )
                log.info("%5d int    data: %s == %s", len( src ), reprlib.repr( src ),
                         reprlib.repr( data[ours] ))
            elif tntype == b'~'[0]:
                assert 0 == len( src )
                data[ours]	= None
            else:
                assert False, "Invalid tnetstring type: %s" % tntype
                
    bytes_conf 			= {
        "alphabet":	cpppo.type_bytes_iter,
        "typecode":	cpppo.type_bytes_array_symbol,
    }

    SIZE			= cpppo.integer_bytes( name="SIZE", context="size", decode='ascii' )
    COLON			= cpppo.state_drop( name="COLON", **bytes_conf )
    DATA			= data_parser( name="DATA", context="data", repeat="..size" )
    TYPE			= tnet_parser( name="TYPE", context="type", terminal=True,
                                                     **bytes_conf )
    SIZE[b':'[0]]		= COLON
    SIZE[True]			= None  # SIZE terminal, but only : acceptable

    COLON[None]			= DATA
    for t in tnet_parser.codes:
        DATA[t]			= TYPE
    DATA[True]			= None # DATA terminal, but only TNET codes acceptable

    # Recognize a TNET string and then terminate, resetting automatically
    # recognize another
    return cpppo.dfa( name=name, context=context, initial=SIZE, terminal=True )
예제 #4
0
def test_tnet_machinery():
    # parsing integers
    path = "machinery"
    SIZE = cpppo.integer_bytes(name="SIZE", context="size")
    data = cpppo.dotdict()
    source = cpppo.chainable(b'123:')
    with SIZE:
        for m, s in SIZE.run(source=source, data=data, path=path):
            if s is None:
                break
    log.info("After SIZE: %r", data)
    assert s and s.terminal
    assert data.machinery.size == 123

    # repeat, limited by parent context's 'value' in data
    DATA = tnet.data_parser(name="DATA", context="data", repeat="..size")
    source.chain(b"abc" * 123)
    with DATA:
        for m, s in DATA.run(source=source, data=data, path=path):
            if s is None:
                break
    log.info("After DATA: %r", data)
예제 #5
0
def test_tnet_machinery():
    # parsing integers
    path			= "machinery"
    SIZE			= cpppo.integer_bytes( name="SIZE", context="size", terminal=True )
    data			= cpppo.dotdict()
    source			= cpppo.chainable( b'123:' )
    with SIZE:
        for m,s in SIZE.run( source=source, data=data, path=path ):
            if s is None:
                break
    log.info( "After SIZE: %r", data )
    assert SIZE.terminal
    assert data.machinery.size == 123

    # repeat, limited by parent context's 'value' in data
    DATA			= tnet.data_parser(
        name="DATA", context="data", repeat="..size" )
    source.chain( b"abc" * 123 )
    with DATA:
        for m,s in DATA.run( source=source, data=data, path=path ):
            if s is None:
                break
    log.info( "After DATA: %r", data )
예제 #6
0
def test_decode():
    # Test decode of regexes over bytes data.  Operates in raw bytes symbols., works in Python 2/3.
    source = cpppo.peekable('π'.encode('utf-8'))
    data = cpppo.dotdict()
    with cpppo.string_bytes('pi',
                            initial='.*',
                            greedy=True,
                            context='pi',
                            decode='utf-8') as machine:
        for i, (m, s) in enumerate(machine.run(source=source, data=data)):
            log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r",
                     m.name_centered(), i, s, source.sent, source.peek(), data)
        assert i == 3
        assert source.sent == 2
        assert data.pi == 'π'

    if sys.version_info[0] < 3:
        # Test regexes over plain string data (no decode required).  Force non-unicode (counteracts
        # import unicode_literals above).  We can't use greenery.lego regexes on unicode data in
        # Python 2...
        source = cpppo.peekable(str('pi'))
        data = cpppo.dotdict()
        with cpppo.string('pi', initial='.*', greedy=True,
                          context='pi') as machine:
            for i, (m, s) in enumerate(machine.run(source=source, data=data)):
                log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r",
                         m.name_centered(), i, s, source.sent, source.peek(),
                         data)
            assert i == 3
            assert source.sent == 2
            assert data.pi == 'pi'

    else:
        # Test regexes over Python 3 unicode string data (no decode required).  Operates in native
        # unicode symbols.
        source = cpppo.peekable('π')
        data = cpppo.dotdict()
        with cpppo.string('pi', initial='.*', greedy=True,
                          context='pi') as machine:
            for i, (m, s) in enumerate(machine.run(source=source, data=data)):
                log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r",
                         m.name_centered(), i, s, source.sent, source.peek(),
                         data)
            assert i == 2
            assert source.sent == 1
            assert data.pi == 'π'

    source = cpppo.peekable(str('123'))
    data = cpppo.dotdict()
    with cpppo.integer('value') as machine:
        for i, (m, s) in enumerate(machine.run(source=source, data=data)):
            log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r",
                     m.name_centered(), i, s, source.sent, source.peek(), data)
        assert i == 4
        assert source.sent == 3
        assert data.integer == 123

    source = cpppo.peekable('123'.encode('ascii'))
    data = cpppo.dotdict()
    with cpppo.integer_bytes('value') as machine:
        for i, (m, s) in enumerate(machine.run(source=source, data=data)):
            log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r",
                     m.name_centered(), i, s, source.sent, source.peek(), data)
        assert i == 4
        assert source.sent == 3
        assert data.integer == 123

    # Try using a integer (str) parser over bytes data.  Works in Python 2, not so much in Python 3
    try:
        source = cpppo.peekable('123'.encode('ascii'))
        data = cpppo.dotdict()
        with cpppo.integer('value') as machine:
            for i, (m, s) in enumerate(machine.run(source=source, data=data)):
                log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r",
                         m.name_centered(), i, s, source.sent, source.peek(),
                         data)
            assert i == 4
            assert source.sent == 3
            assert data.integer == 123
        assert sys.version_info[0] < 3, \
            "Should have failed in Python3; str/bytes iterator both produce str/int"
    except AssertionError:
        assert not sys.version_info[0] < 3, \
            "Shouldn't have failed in Python2; str/bytes iterator both produce str"
예제 #7
0
def test_decode():
    # Test decode of regexes over bytes data.  Operates in raw bytes symbols., works in Python 2/3.
    source			= cpppo.peekable( 'π'.encode( 'utf-8' ))
    data			= cpppo.dotdict()
    with cpppo.string_bytes( 'pi', initial='.*', greedy=True, context='pi', decode='utf-8' ) as machine:
        for i,(m,s) in enumerate( machine.run( source=source, data=data )):
            log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(),
                      i, s, source.sent, source.peek(), data )
        assert i == 3
        assert source.sent == 2
        assert data.pi == 'π'
    
    if sys.version_info[0] < 3:
        # Test regexes over plain string data (no decode required).  Force non-unicode (counteracts
        # import unicode_literals above).  We can't use greenery.lego regexes on unicode data in
        # Python 2...
        source			= cpppo.peekable( str( 'pi' ))
        data			= cpppo.dotdict()
        with cpppo.string( 'pi', initial='.*', greedy=True, context='pi' ) as machine:
            for i,(m,s) in enumerate( machine.run( source=source, data=data )):
                log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(),
                          i, s, source.sent, source.peek(), data )
            assert i == 3
            assert source.sent == 2
            assert data.pi == 'pi'

    else:
        # Test regexes over Python 3 unicode string data (no decode required).  Operates in native
        # unicode symbols.
        source			= cpppo.peekable( 'π' )
        data			= cpppo.dotdict()
        with cpppo.string( 'pi', initial='.*', greedy=True, context='pi' ) as machine:
            for i,(m,s) in enumerate( machine.run( source=source, data=data )):
                log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(),
                          i, s, source.sent, source.peek(), data )
            assert i == 2
            assert source.sent == 1
            assert data.pi == 'π'

    source			= cpppo.peekable( str( '123' ))
    data			= cpppo.dotdict()
    with cpppo.integer( 'value' ) as machine:
        for i,(m,s) in enumerate( machine.run( source=source, data=data )):
            log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(),
                      i, s, source.sent, source.peek(), data )
        assert i == 4
        assert source.sent == 3
        assert data.integer == 123


    source			= cpppo.peekable( '123'.encode( 'ascii' ))
    data			= cpppo.dotdict()
    with cpppo.integer_bytes( 'value' ) as machine:
        for i,(m,s) in enumerate( machine.run( source=source, data=data )):
            log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(),
                      i, s, source.sent, source.peek(), data )
        assert i == 4
        assert source.sent == 3
        assert data.integer == 123

    # Try using a integer (str) parser over bytes data.  Works in Python 2, not so much in Python 3
    try:
        source			= cpppo.peekable( '123'.encode( 'ascii' ))
        data			= cpppo.dotdict()
        with cpppo.integer( 'value' ) as machine:
            for i,(m,s) in enumerate( machine.run( source=source, data=data )):
                log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(),
                          i, s, source.sent, source.peek(), data )
            assert i == 4
            assert source.sent == 3
            assert data.integer == 123
        assert sys.version_info[0] < 3, \
            "Should have failed in Python3; str/bytes iterator both produce str/int"
    except AssertionError:
        assert not sys.version_info[0] < 3, \
            "Shouldn't have failed in Python2; str/bytes iterator both produce str"