def test_tnet_machinery(): # parsing integers path = "machinery" data = cpppo.dotdict() source = cpppo.chainable(b'123:') with cpppo.integer_bytes(name="SIZE", context="size", terminal=True) as SIZE: with contextlib.closing(SIZE.run(source=source, data=data, path=path)) as engine: for m, s in engine: if s is None: break log.info("After SIZE: %r", data) assert SIZE.terminal assert data.machinery.size == 123 # repeat, limited by parent context's 'value' in data source.chain(b"abc" * 123) with tnet.data_parser(name="DATA", context="data", repeat="..size") as DATA: with contextlib.closing(DATA.run(source=source, data=data, path=path)) as engine: for m, s in engine: if s is None: break log.info("After DATA: %r", data)
def tnet_machine( name="TNET", context="tnet" ): """Accept a sentence of input bytes matching a tnetstring, and then loop. Sub-machine terminates at earliest match (non-greedy), causing echo.transition to trigger .process (which resets our sub-machine to initial state), and then we move to the next state (loops), allowing us to immediately run.""" class tnet_parser( cpppo.state_input ): TYPES = (b'#'[0], b'}'[0], b']'[0], b','[0], b'$'[0], b'!'[0], b'~'[0], b'^'[0]) def process( self, source, machine=None, path=None, data=None ): """Convert the collected data according to the type""" tntype = next( source ) ours = self.context( path ) raw = ours + '...data.input' src = b'' if raw not in data else ( data[raw].tostring() if sys.version_info[0] < 3 else data[raw].tobytes() ) if tntype == b','[0]: log.info("%5d bytes data: %s", len( src ), cpppo.reprlib.repr( src )) data[ours] = src elif tntype == b'$'[0]: log.info("%5d string data: %s", len( src ), cpppo.reprlib.repr( src )) data[ours] = src.decode( 'utf-8' ) elif tntype == b'#'[0]: data[ours] = int( src ) log.info("%5d int data: %s == %s", len( src ), cpppo.reprlib.repr( src ), cpppo.reprlib.repr( data[ours] )) elif tntype == b'~'[0]: assert 0 == len( src ) data[ours] = None else: assert False, "Invalid tnetstring type: %s" % tntype bytes_conf = { "alphabet": cpppo.type_bytes_iter, "typecode": cpppo.type_bytes_array_symbol, } SIZE = cpppo.dfa( name="SIZE", initial=cpppo.integer_bytes( name="INT", context="size", decode='ascii', terminal=True )) COLON = cpppo.state_drop( name="COLON", **bytes_conf ) DATA = data_parser( name="DATA", context="data", repeat="..size" ) TYPE = tnet_parser( name="TYPE", context="type", terminal=True, **bytes_conf ) SIZE[b':'[0]] = COLON COLON[None] = DATA for t in tnet_parser.TYPES: DATA[t] = TYPE # Recognize a TNET string and then terminate, resetting to automatically # recognize another return cpppo.dfa( name=name, context=context, initial=SIZE, terminal=True )
def tnet_machine( name="TNET", context="tnet" ): """Accept a sentence of input bytes matching a tnetstring, and then loop. Sub-machine terminates at earliest match (non-greedy), causing echo.transition to trigger .process (which resets our sub-machine to initial state), and then we move to the next state (loops), allowing us to immediately run.""" class tnet_parser( cpppo.state_input ): codes = (b'#'[0], b'}'[0], b']'[0], b','[0], b'$'[0], b'!'[0], b'~'[0], b'^'[0]) def process( self, source, machine=None, path=None, data=None ): """Convert the collected data according to the type""" tntype = next( source ) ours = self.context( path ) raw = ours + '...data.input' src = ( data[raw].tostring() if sys.version_info.major < 3 else data[raw].tobytes() ) if tntype == b','[0]: log.info("%5d bytes data: %s", len( src ), reprlib.repr( src )) data[ours] = src elif tntype == b'$'[0]: log.info("%5d string data: %s", len( src ), reprlib.repr( src )) data[ours] = src.decode( 'utf-8' ) elif tntype == b'#'[0]: data[ours] = int( src ) log.info("%5d int data: %s == %s", len( src ), reprlib.repr( src ), reprlib.repr( data[ours] )) elif tntype == b'~'[0]: assert 0 == len( src ) data[ours] = None else: assert False, "Invalid tnetstring type: %s" % tntype bytes_conf = { "alphabet": cpppo.type_bytes_iter, "typecode": cpppo.type_bytes_array_symbol, } SIZE = cpppo.integer_bytes( name="SIZE", context="size", decode='ascii' ) COLON = cpppo.state_drop( name="COLON", **bytes_conf ) DATA = data_parser( name="DATA", context="data", repeat="..size" ) TYPE = tnet_parser( name="TYPE", context="type", terminal=True, **bytes_conf ) SIZE[b':'[0]] = COLON SIZE[True] = None # SIZE terminal, but only : acceptable COLON[None] = DATA for t in tnet_parser.codes: DATA[t] = TYPE DATA[True] = None # DATA terminal, but only TNET codes acceptable # Recognize a TNET string and then terminate, resetting automatically # recognize another return cpppo.dfa( name=name, context=context, initial=SIZE, terminal=True )
def test_tnet_machinery(): # parsing integers path = "machinery" SIZE = cpppo.integer_bytes(name="SIZE", context="size") data = cpppo.dotdict() source = cpppo.chainable(b'123:') with SIZE: for m, s in SIZE.run(source=source, data=data, path=path): if s is None: break log.info("After SIZE: %r", data) assert s and s.terminal assert data.machinery.size == 123 # repeat, limited by parent context's 'value' in data DATA = tnet.data_parser(name="DATA", context="data", repeat="..size") source.chain(b"abc" * 123) with DATA: for m, s in DATA.run(source=source, data=data, path=path): if s is None: break log.info("After DATA: %r", data)
def test_tnet_machinery(): # parsing integers path = "machinery" SIZE = cpppo.integer_bytes( name="SIZE", context="size", terminal=True ) data = cpppo.dotdict() source = cpppo.chainable( b'123:' ) with SIZE: for m,s in SIZE.run( source=source, data=data, path=path ): if s is None: break log.info( "After SIZE: %r", data ) assert SIZE.terminal assert data.machinery.size == 123 # repeat, limited by parent context's 'value' in data DATA = tnet.data_parser( name="DATA", context="data", repeat="..size" ) source.chain( b"abc" * 123 ) with DATA: for m,s in DATA.run( source=source, data=data, path=path ): if s is None: break log.info( "After DATA: %r", data )
def test_decode(): # Test decode of regexes over bytes data. Operates in raw bytes symbols., works in Python 2/3. source = cpppo.peekable('π'.encode('utf-8')) data = cpppo.dotdict() with cpppo.string_bytes('pi', initial='.*', greedy=True, context='pi', decode='utf-8') as machine: for i, (m, s) in enumerate(machine.run(source=source, data=data)): log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data) assert i == 3 assert source.sent == 2 assert data.pi == 'π' if sys.version_info[0] < 3: # Test regexes over plain string data (no decode required). Force non-unicode (counteracts # import unicode_literals above). We can't use greenery.lego regexes on unicode data in # Python 2... source = cpppo.peekable(str('pi')) data = cpppo.dotdict() with cpppo.string('pi', initial='.*', greedy=True, context='pi') as machine: for i, (m, s) in enumerate(machine.run(source=source, data=data)): log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data) assert i == 3 assert source.sent == 2 assert data.pi == 'pi' else: # Test regexes over Python 3 unicode string data (no decode required). Operates in native # unicode symbols. source = cpppo.peekable('π') data = cpppo.dotdict() with cpppo.string('pi', initial='.*', greedy=True, context='pi') as machine: for i, (m, s) in enumerate(machine.run(source=source, data=data)): log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data) assert i == 2 assert source.sent == 1 assert data.pi == 'π' source = cpppo.peekable(str('123')) data = cpppo.dotdict() with cpppo.integer('value') as machine: for i, (m, s) in enumerate(machine.run(source=source, data=data)): log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data) assert i == 4 assert source.sent == 3 assert data.integer == 123 source = cpppo.peekable('123'.encode('ascii')) data = cpppo.dotdict() with cpppo.integer_bytes('value') as machine: for i, (m, s) in enumerate(machine.run(source=source, data=data)): log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data) assert i == 4 assert source.sent == 3 assert data.integer == 123 # Try using a integer (str) parser over bytes data. Works in Python 2, not so much in Python 3 try: source = cpppo.peekable('123'.encode('ascii')) data = cpppo.dotdict() with cpppo.integer('value') as machine: for i, (m, s) in enumerate(machine.run(source=source, data=data)): log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data) assert i == 4 assert source.sent == 3 assert data.integer == 123 assert sys.version_info[0] < 3, \ "Should have failed in Python3; str/bytes iterator both produce str/int" except AssertionError: assert not sys.version_info[0] < 3, \ "Shouldn't have failed in Python2; str/bytes iterator both produce str"
def test_decode(): # Test decode of regexes over bytes data. Operates in raw bytes symbols., works in Python 2/3. source = cpppo.peekable( 'π'.encode( 'utf-8' )) data = cpppo.dotdict() with cpppo.string_bytes( 'pi', initial='.*', greedy=True, context='pi', decode='utf-8' ) as machine: for i,(m,s) in enumerate( machine.run( source=source, data=data )): log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data ) assert i == 3 assert source.sent == 2 assert data.pi == 'π' if sys.version_info[0] < 3: # Test regexes over plain string data (no decode required). Force non-unicode (counteracts # import unicode_literals above). We can't use greenery.lego regexes on unicode data in # Python 2... source = cpppo.peekable( str( 'pi' )) data = cpppo.dotdict() with cpppo.string( 'pi', initial='.*', greedy=True, context='pi' ) as machine: for i,(m,s) in enumerate( machine.run( source=source, data=data )): log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data ) assert i == 3 assert source.sent == 2 assert data.pi == 'pi' else: # Test regexes over Python 3 unicode string data (no decode required). Operates in native # unicode symbols. source = cpppo.peekable( 'π' ) data = cpppo.dotdict() with cpppo.string( 'pi', initial='.*', greedy=True, context='pi' ) as machine: for i,(m,s) in enumerate( machine.run( source=source, data=data )): log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data ) assert i == 2 assert source.sent == 1 assert data.pi == 'π' source = cpppo.peekable( str( '123' )) data = cpppo.dotdict() with cpppo.integer( 'value' ) as machine: for i,(m,s) in enumerate( machine.run( source=source, data=data )): log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data ) assert i == 4 assert source.sent == 3 assert data.integer == 123 source = cpppo.peekable( '123'.encode( 'ascii' )) data = cpppo.dotdict() with cpppo.integer_bytes( 'value' ) as machine: for i,(m,s) in enumerate( machine.run( source=source, data=data )): log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data ) assert i == 4 assert source.sent == 3 assert data.integer == 123 # Try using a integer (str) parser over bytes data. Works in Python 2, not so much in Python 3 try: source = cpppo.peekable( '123'.encode( 'ascii' )) data = cpppo.dotdict() with cpppo.integer( 'value' ) as machine: for i,(m,s) in enumerate( machine.run( source=source, data=data )): log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data ) assert i == 4 assert source.sent == 3 assert data.integer == 123 assert sys.version_info[0] < 3, \ "Should have failed in Python3; str/bytes iterator both produce str/int" except AssertionError: assert not sys.version_info[0] < 3, \ "Shouldn't have failed in Python2; str/bytes iterator both produce str"