def test_decide(): """Allow state transition decisions based on collected context other than just the next source symbol. """ e = cpppo.state("enter") e["a"] = a = cpppo.state_input("a", context="a") a[" "] = s1 = cpppo.state_drop("s1") s1[" "] = s1 s1[None] = i1 = cpppo.integer("i1", context="i1") i1[" "] = s2 = cpppo.state_drop("s2") s2[" "] = s2 s2[None] = i2 = cpppo.integer("i2", context="i2") less = cpppo.state("less", terminal=True) greater = cpppo.state("greater", terminal=True) equal = cpppo.state("equal", terminal=True) i2[None] = cpppo.decide("isless", less, predicate=lambda machine, source, path, data: data.i1 < data.i2) i2[None] = cpppo.decide("isgreater", greater, predicate=lambda machine, source, path, data: data.i1 > data.i2) i2[None] = equal source = cpppo.peekable(str("a 1 2")) data = cpppo.dotdict() with cpppo.dfa("comparo", initial=e) as comparo: for i, (m, s) in enumerate(comparo.run(source=source, data=data)): log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data, ) assert i == 11 assert s is less source = cpppo.peekable(str("a 33 33")) data = cpppo.dotdict() with cpppo.dfa("comparo", initial=e) as comparo: for i, (m, s) in enumerate(comparo.run(source=source, data=data)): log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data, ) assert i == 13 assert s is equal
def main(): """The basic examples in the README""" # Basic DFA that accepts ab+ E = cpppo.state( 'E' ) A = cpppo.state_input( 'A' ) B = cpppo.state_input( 'B', terminal=True ) E['a'] = A A['b'] = B B['b'] = B BASIC = cpppo.dfa( 'ab+', initial=E, context='basic' ) # Composite state machine accepting ab+, ignoring ,[ ]* separators ABP = cpppo.dfa( 'ab+', initial=E, terminal=True ) SEP = cpppo.state_drop( 'SEP' ) ABP[','] = SEP SEP[' '] = SEP SEP[None] = ABP CSV = cpppo.dfa( 'CSV', initial=ABP, context='csv' ) # A regular expression; he default dfa name is the regular expression itself. REGEX = cpppo.regex( initial='(ab+)((,[ ]*)(ab+))*', context='regex' ) data = cpppo.dotdict() for machine in [ BASIC, CSV, REGEX ]: path = machine.context() + '.input' # default for state_input data source = cpppo.peekable( str( 'abbbb, ab' )) with machine: for i,(m,s) in enumerate( machine.run( source=source, data=data )): print( "%s #%3d; next byte %3d: %-10.10r: %r" % ( m.name_centered(), i, source.sent, source.peek(), data.get(path) )) print( "Accepted: %r; remaining: %r\n" % ( data.get(path), ''.join( source ))) print( "Final: %r" % ( data ))
def test_readme(): """The basic examples in the README""" # Basic DFA that accepts ab+ E = cpppo.state( "E" ) A = cpppo.state_input( "A" ) B = cpppo.state_input( "B", terminal=True ) E['a'] = A A['b'] = B B['b'] = B data = cpppo.dotdict() source = cpppo.peekable( str( 'abbbb,ab' )) with cpppo.dfa( initial=E ) as abplus: for i,(m,s) in enumerate( abplus.run( source=source, path="ab+", data=data )): log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data ) assert i == 5 assert source.peek() == str(',') # Composite state machine accepting ab+, ignoring ,[ ]* separators CSV = cpppo.dfa( "CSV", initial=E, terminal=True ) SEP = cpppo.state_drop( "SEP" ) CSV[','] = SEP SEP[' '] = SEP SEP[None] = CSV source = cpppo.peekable( str( 'abbbb, ab' )) with cpppo.dfa( initial=CSV ) as r2: for i,(m,s) in enumerate( r2.run( source=source, path="readme_CSV", data=data )): log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data ) assert i == 14 assert source.peek() is None
def tnet_machine( name="TNET", context="tnet" ): """Accept a sentence of input bytes matching a tnetstring, and then loop. Sub-machine terminates at earliest match (non-greedy), causing echo.transition to trigger .process (which resets our sub-machine to initial state), and then we move to the next state (loops), allowing us to immediately run.""" class tnet_parser( cpppo.state_input ): TYPES = (b'#'[0], b'}'[0], b']'[0], b','[0], b'$'[0], b'!'[0], b'~'[0], b'^'[0]) def process( self, source, machine=None, path=None, data=None ): """Convert the collected data according to the type""" tntype = next( source ) ours = self.context( path ) raw = ours + '...data.input' src = b'' if raw not in data else ( data[raw].tostring() if sys.version_info[0] < 3 else data[raw].tobytes() ) if tntype == b','[0]: log.info("%5d bytes data: %s", len( src ), cpppo.reprlib.repr( src )) data[ours] = src elif tntype == b'$'[0]: log.info("%5d string data: %s", len( src ), cpppo.reprlib.repr( src )) data[ours] = src.decode( 'utf-8' ) elif tntype == b'#'[0]: data[ours] = int( src ) log.info("%5d int data: %s == %s", len( src ), cpppo.reprlib.repr( src ), cpppo.reprlib.repr( data[ours] )) elif tntype == b'~'[0]: assert 0 == len( src ) data[ours] = None else: assert False, "Invalid tnetstring type: %s" % tntype bytes_conf = { "alphabet": cpppo.type_bytes_iter, "typecode": cpppo.type_bytes_array_symbol, } SIZE = cpppo.dfa( name="SIZE", initial=cpppo.integer_bytes( name="INT", context="size", decode='ascii', terminal=True )) COLON = cpppo.state_drop( name="COLON", **bytes_conf ) DATA = data_parser( name="DATA", context="data", repeat="..size" ) TYPE = tnet_parser( name="TYPE", context="type", terminal=True, **bytes_conf ) SIZE[b':'[0]] = COLON COLON[None] = DATA for t in tnet_parser.TYPES: DATA[t] = TYPE # Recognize a TNET string and then terminate, resetting to automatically # recognize another return cpppo.dfa( name=name, context=context, initial=SIZE, terminal=True )
def test_decide(): """Allow state transition decisions based on collected context other than just the next source symbol. """ e = cpppo.state("enter") e['a'] = a = cpppo.state_input("a", context='a') a[' '] = s1 = cpppo.state_drop("s1") s1[' '] = s1 s1[None] = i1 = cpppo.integer("i1", context='i1') i1[' '] = s2 = cpppo.state_drop("s2") s2[' '] = s2 s2[None] = i2 = cpppo.integer("i2", context='i2') less = cpppo.state("less", terminal=True) greater = cpppo.state("greater", terminal=True) equal = cpppo.state("equal", terminal=True) i2[None] = cpppo.decide( "isless", less, predicate=lambda machine, source, path, data: data.i1 < data.i2) i2[None] = cpppo.decide( "isgreater", greater, predicate=lambda machine, source, path, data: data.i1 > data.i2) i2[None] = equal source = cpppo.peekable(str('a 1 2')) data = cpppo.dotdict() with cpppo.dfa("comparo", initial=e) as comparo: for i, (m, s) in enumerate(comparo.run(source=source, data=data)): log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data) assert i == 12 assert s is less source = cpppo.peekable(str('a 33 33')) data = cpppo.dotdict() with cpppo.dfa("comparo", initial=e) as comparo: for i, (m, s) in enumerate(comparo.run(source=source, data=data)): log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data) assert i == 14 assert s is equal
def tnet_machine( name="TNET", context="tnet" ): """Accept a sentence of input bytes matching a tnetstring, and then loop. Sub-machine terminates at earliest match (non-greedy), causing echo.transition to trigger .process (which resets our sub-machine to initial state), and then we move to the next state (loops), allowing us to immediately run.""" class tnet_parser( cpppo.state_input ): codes = (b'#'[0], b'}'[0], b']'[0], b','[0], b'$'[0], b'!'[0], b'~'[0], b'^'[0]) def process( self, source, machine=None, path=None, data=None ): """Convert the collected data according to the type""" tntype = next( source ) ours = self.context( path ) raw = ours + '...data.input' src = ( data[raw].tostring() if sys.version_info.major < 3 else data[raw].tobytes() ) if tntype == b','[0]: log.info("%5d bytes data: %s", len( src ), reprlib.repr( src )) data[ours] = src elif tntype == b'$'[0]: log.info("%5d string data: %s", len( src ), reprlib.repr( src )) data[ours] = src.decode( 'utf-8' ) elif tntype == b'#'[0]: data[ours] = int( src ) log.info("%5d int data: %s == %s", len( src ), reprlib.repr( src ), reprlib.repr( data[ours] )) elif tntype == b'~'[0]: assert 0 == len( src ) data[ours] = None else: assert False, "Invalid tnetstring type: %s" % tntype bytes_conf = { "alphabet": cpppo.type_bytes_iter, "typecode": cpppo.type_bytes_array_symbol, } SIZE = cpppo.integer_bytes( name="SIZE", context="size", decode='ascii' ) COLON = cpppo.state_drop( name="COLON", **bytes_conf ) DATA = data_parser( name="DATA", context="data", repeat="..size" ) TYPE = tnet_parser( name="TYPE", context="type", terminal=True, **bytes_conf ) SIZE[b':'[0]] = COLON SIZE[True] = None # SIZE terminal, but only : acceptable COLON[None] = DATA for t in tnet_parser.codes: DATA[t] = TYPE DATA[True] = None # DATA terminal, but only TNET codes acceptable # Recognize a TNET string and then terminate, resetting automatically # recognize another return cpppo.dfa( name=name, context=context, initial=SIZE, terminal=True )
def main(): """The basic examples in the README""" # Basic DFA that accepts ab+ E = cpppo.state('E') A = cpppo.state_input('A') B = cpppo.state_input('B', terminal=True) E['a'] = A A['b'] = B B['b'] = B BASIC = cpppo.dfa('ab+', initial=E, context='basic') # Composite state machine accepting ab+, ignoring ,[ ]* separators ABP = cpppo.dfa('ab+', initial=E, terminal=True) SEP = cpppo.state_drop('SEP') ABP[','] = SEP SEP[' '] = SEP SEP[None] = ABP CSV = cpppo.dfa('CSV', initial=ABP, context='csv') # A regular expression; he default dfa name is the regular expression itself. REGEX = cpppo.regex(initial='(ab+)((,[ ]*)(ab+))*', context='regex') data = cpppo.dotdict() for machine in [BASIC, CSV, REGEX]: path = machine.context() + '.input' # default for state_input data source = cpppo.peekable(str('abbbb, ab')) with machine: for i, (m, s) in enumerate(machine.run(source=source, data=data)): print("%s #%3d; next byte %3d: %-10.10r: %r" % (m.name_centered(), i, source.sent, source.peek(), data.get(path))) print("Accepted: %r; remaining: %r\n" % (data.get(path), ''.join(source))) print("Final: %r" % (data))
def test_state(): """A state is expected to process its input (perhaps nothing, if its a no-input state), and then use the next input symbol to transition to another state. Each state has a context into a data artifact, into which it will collect its results. We must ensure that all state transitions are configured in the target alphabet; if an encoder is supplied, then all input symbols and all transition symbols will be encoded using it. In this test, all string literals are Unicode (in both Python 2 and 3), so we use a unicode encoder to convert them to symbols.""" unicodekwds = { 'alphabet': unicode if sys.version_info[0] < 3 else str, 'encoder': cpppo.type_unicode_encoder, } s1 = cpppo.state('one', **unicodekwds) s2 = cpppo.state_drop('two', **unicodekwds) s1['a'] = s2 assert s1['a'] is s2 source = cpppo.peeking('abc') # We can run state instances with/without acquisition g = s1.run(source=source) assert next(g) == (None, s2) assert source.peek() == 'a' with pytest.raises(StopIteration): next(g) with s1: g = s1.run(source=source) assert source.peek() == 'a' assert next(g) == (None, s2) assert source.peek() == 'a' try: next(g) assert False, "Should have terminated" except StopIteration: pass assert source.peek() == 'a' # A state machine accepting a sequence of unicode a's a_s = cpppo.state("a_s", **unicodekwds) an_a = cpppo.state_input("a", terminal=True, typecode=cpppo.type_unicode_array_symbol, **unicodekwds) a_s['a'] = an_a an_a['a'] = an_a source = cpppo.peeking('aaaa') data = cpppo.dotdict() with cpppo.dfa(initial=a_s) as aplus: for i, (m, s) in enumerate(aplus.run(source=source)): log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data) assert i == 5 assert source.peek() is None assert len(data) == 0 # Accepting a's separated by comma and space/pi (for kicks). When the lower level a's machine # doesn't recognize the symbol, then the higher level machine will recognize and discard sep = cpppo.state_drop("sep", **unicodekwds) csv = cpppo.dfa("csv", initial=a_s, terminal=True, **unicodekwds) csv[','] = sep sep[' '] = sep sep['π'] = sep sep[None] = csv source = cpppo.peeking('aaaa, a,π a') data = cpppo.dotdict() with cpppo.dfa(initial=csv) as csvaplus: for i, (m, s) in enumerate( csvaplus.run(source=source, path="csv", data=data)): log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data) assert i == 18 assert source.peek() is None assert data.csv.input.tounicode() == 'aaaaaa'
def test_state(): """A state is expected to process its input (perhaps nothing, if its a no-input state), and then use the next input symbol to transition to another state. Each state has a context into a data artifact, into which it will collect its results. We must ensure that all state transitions are configured in the target alphabet; if an encoder is supplied, then all input symbols and all transition symbols will be encoded using it. In this test, all string literals are Unicode (in both Python 2 and 3), so we use a unicode encoder to convert them to symbols.""" unicodekwds = { 'alphabet': unicode if sys.version_info[0] < 3 else str, 'encoder': cpppo.type_unicode_encoder, } s1 = cpppo.state( 'one', **unicodekwds ) s2 = cpppo.state_drop( 'two', **unicodekwds ) s1['a'] = s2 assert s1['a'] is s2 source = cpppo.peeking( 'abc' ) # We can run state instances with/without acquisition g = s1.run( source=source ) assert next( g ) == (None, s2) assert source.peek() == 'a' with pytest.raises(StopIteration): next( g ) with s1: g = s1.run( source=source ) assert source.peek() == 'a' assert next( g ) == (None, s2) assert source.peek() == 'a' try: next( g ) assert False, "Should have terminated" except StopIteration: pass assert source.peek() == 'a' # A state machine accepting a sequence of unicode a's a_s = cpppo.state( "a_s", **unicodekwds ) an_a = cpppo.state_input( "a", terminal=True, typecode=cpppo.type_unicode_array_symbol, **unicodekwds ) a_s['a'] = an_a an_a['a'] = an_a source = cpppo.peeking( 'aaaa' ) data = cpppo.dotdict() with cpppo.dfa( initial=a_s ) as aplus: for i,(m,s) in enumerate( aplus.run( source=source )): log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data ) assert i == 5 assert source.peek() is None assert len( data ) == 0 # Accepting a's separated by comma and space/pi (for kicks). When the lower level a's machine # doesn't recognize the symbol, then the higher level machine will recognize and discard sep = cpppo.state_drop( "sep", **unicodekwds ) csv = cpppo.dfa( "csv", initial=a_s , terminal=True, **unicodekwds ) csv[','] = sep sep[' '] = sep sep['π'] = sep sep[None] = csv source = cpppo.peeking( 'aaaa, a,π a' ) data = cpppo.dotdict() with cpppo.dfa( initial=csv ) as csvaplus: for i,(m,s) in enumerate( csvaplus.run( source=source, path="csv", data=data )): log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data ) assert i == 18 assert source.peek() is None assert data.csv.input.tounicode() == 'aaaaaa'