def main(): """The basic examples in the README""" # Basic DFA that accepts ab+ E = cpppo.state( 'E' ) A = cpppo.state_input( 'A' ) B = cpppo.state_input( 'B', terminal=True ) E['a'] = A A['b'] = B B['b'] = B BASIC = cpppo.dfa( 'ab+', initial=E, context='basic' ) # Composite state machine accepting ab+, ignoring ,[ ]* separators ABP = cpppo.dfa( 'ab+', initial=E, terminal=True ) SEP = cpppo.state_drop( 'SEP' ) ABP[','] = SEP SEP[' '] = SEP SEP[None] = ABP CSV = cpppo.dfa( 'CSV', initial=ABP, context='csv' ) # A regular expression; he default dfa name is the regular expression itself. REGEX = cpppo.regex( initial='(ab+)((,[ ]*)(ab+))*', context='regex' ) data = cpppo.dotdict() for machine in [ BASIC, CSV, REGEX ]: path = machine.context() + '.input' # default for state_input data source = cpppo.peekable( str( 'abbbb, ab' )) with machine: for i,(m,s) in enumerate( machine.run( source=source, data=data )): print( "%s #%3d; next byte %3d: %-10.10r: %r" % ( m.name_centered(), i, source.sent, source.peek(), data.get(path) )) print( "Accepted: %r; remaining: %r\n" % ( data.get(path), ''.join( source ))) print( "Final: %r" % ( data ))
def test_readme(): """The basic examples in the README""" # Basic DFA that accepts ab+ E = cpppo.state( "E" ) A = cpppo.state_input( "A" ) B = cpppo.state_input( "B", terminal=True ) E['a'] = A A['b'] = B B['b'] = B data = cpppo.dotdict() source = cpppo.peekable( str( 'abbbb,ab' )) with cpppo.dfa( initial=E ) as abplus: for i,(m,s) in enumerate( abplus.run( source=source, path="ab+", data=data )): log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data ) assert i == 5 assert source.peek() == str(',') # Composite state machine accepting ab+, ignoring ,[ ]* separators CSV = cpppo.dfa( "CSV", initial=E, terminal=True ) SEP = cpppo.state_drop( "SEP" ) CSV[','] = SEP SEP[' '] = SEP SEP[None] = CSV source = cpppo.peekable( str( 'abbbb, ab' )) with cpppo.dfa( initial=CSV ) as r2: for i,(m,s) in enumerate( r2.run( source=source, path="readme_CSV", data=data )): log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data ) assert i == 14 assert source.peek() is None
def test_struct(): dtp = cpppo.type_bytes_array_symbol abt = cpppo.type_bytes_iter ctx = 'val' a = cpppo.state_input( "First", alphabet=abt, typecode=dtp, context=ctx ) a[True] = b = cpppo.state_input( "Second", alphabet=abt, typecode=dtp, context=ctx ) b[True] = c = cpppo.state_input( "Third", alphabet=abt, typecode=dtp, context=ctx ) c[True] = d = cpppo.state_input( "Fourth", alphabet=abt, typecode=dtp, context=ctx ) d[None] = cpppo.state_struct( "int32", context=ctx, format=str("<i"), terminal=True ) machine = cpppo.dfa( initial=a ) with machine: material = b'\x01\x02\x03\x80\x99' segment = 3 source = cpppo.chainable() log.info( "States; %r input, by %d", material, segment ) inp = None data = cpppo.dotdict() path = "struct" sequence = machine.run( source=source, path=path, data=data ) for num in range( 10 ): try: mch,sta = next( sequence ) inp = source.peek() except StopIteration: inp = source.peek() log.info( "%s <- %-10.10r test done", cpppo.centeraxis( mch, 25, clip=True ), inp ) break log.info( "%s <- %-10.10r test rcvd", cpppo.centeraxis( mch, 25, clip=True ), inp ) if sta is None: log.info( "%s <- %-10.10r test no next state", cpppo.centeraxis( mch, 25, clip=True ), inp ) if inp is None: if not material: log.info( "%s <- %-10.10r test source finished", cpppo.centeraxis( mch, 25, clip=True ), inp ) # Will load consecutive empty iterables; chainable must handle source.chain( material[:segment] ) material = material[segment:] inp = source.peek() log.info( "%s <- %-10.10r test chain", cpppo.centeraxis( mch, 25, clip=True ), inp ) if num == 0: assert inp == b'\x01'[0]; assert sta.name == "First" if num == 1: assert inp == b'\x02'[0]; assert sta.name == "Second" if num == 2: assert inp == b'\x03'[0]; assert sta.name == "Third" if num == 3: assert inp == b'\x80'[0]; assert sta is None if num == 4: assert inp == b'\x80'[0]; assert sta.name == "Fourth" if num == 5: assert inp == b'\x99'[0]; assert sta.name == "int32" if num == 6: assert inp == b'\x99'[0]; assert sta.name == "int32" assert inp == b'\x99'[0] assert num == 6 assert sta.name == "int32" assert data.struct.val == -2147286527
def data_parser( **kwds ): """Parses raw bytes into .data, by default using ..size to denote the amount. """ kwds.setdefault( "name", "DATA" ) kwds.setdefault( "context", "data" ) kwds.setdefault( "repeat", "..size" ) return cpppo.dfa( initial=cpppo.state_input( name="BYTE", terminal=True, **bytes_conf ), **kwds )
def test_decide(): """Allow state transition decisions based on collected context other than just the next source symbol. """ e = cpppo.state("enter") e["a"] = a = cpppo.state_input("a", context="a") a[" "] = s1 = cpppo.state_drop("s1") s1[" "] = s1 s1[None] = i1 = cpppo.integer("i1", context="i1") i1[" "] = s2 = cpppo.state_drop("s2") s2[" "] = s2 s2[None] = i2 = cpppo.integer("i2", context="i2") less = cpppo.state("less", terminal=True) greater = cpppo.state("greater", terminal=True) equal = cpppo.state("equal", terminal=True) i2[None] = cpppo.decide("isless", less, predicate=lambda machine, source, path, data: data.i1 < data.i2) i2[None] = cpppo.decide("isgreater", greater, predicate=lambda machine, source, path, data: data.i1 > data.i2) i2[None] = equal source = cpppo.peekable(str("a 1 2")) data = cpppo.dotdict() with cpppo.dfa("comparo", initial=e) as comparo: for i, (m, s) in enumerate(comparo.run(source=source, data=data)): log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data, ) assert i == 11 assert s is less source = cpppo.peekable(str("a 33 33")) data = cpppo.dotdict() with cpppo.dfa("comparo", initial=e) as comparo: for i, (m, s) in enumerate(comparo.run(source=source, data=data)): log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data, ) assert i == 13 assert s is equal
def main(): """The basic examples in the README""" # Basic DFA that accepts ab+ E = cpppo.state('E') A = cpppo.state_input('A') B = cpppo.state_input('B', terminal=True) E['a'] = A A['b'] = B B['b'] = B BASIC = cpppo.dfa('ab+', initial=E, context='basic') # Composite state machine accepting ab+, ignoring ,[ ]* separators ABP = cpppo.dfa('ab+', initial=E, terminal=True) SEP = cpppo.state_drop('SEP') ABP[','] = SEP SEP[' '] = SEP SEP[None] = ABP CSV = cpppo.dfa('CSV', initial=ABP, context='csv') # A regular expression; he default dfa name is the regular expression itself. REGEX = cpppo.regex(initial='(ab+)((,[ ]*)(ab+))*', context='regex') data = cpppo.dotdict() for machine in [BASIC, CSV, REGEX]: path = machine.context() + '.input' # default for state_input data source = cpppo.peekable(str('abbbb, ab')) with machine: for i, (m, s) in enumerate(machine.run(source=source, data=data)): print("%s #%3d; next byte %3d: %-10.10r: %r" % (m.name_centered(), i, source.sent, source.peek(), data.get(path))) print("Accepted: %r; remaining: %r\n" % (data.get(path), ''.join(source))) print("Final: %r" % (data))
def test_decide(): """Allow state transition decisions based on collected context other than just the next source symbol. """ e = cpppo.state("enter") e['a'] = a = cpppo.state_input("a", context='a') a[' '] = s1 = cpppo.state_drop("s1") s1[' '] = s1 s1[None] = i1 = cpppo.integer("i1", context='i1') i1[' '] = s2 = cpppo.state_drop("s2") s2[' '] = s2 s2[None] = i2 = cpppo.integer("i2", context='i2') less = cpppo.state("less", terminal=True) greater = cpppo.state("greater", terminal=True) equal = cpppo.state("equal", terminal=True) i2[None] = cpppo.decide( "isless", less, predicate=lambda machine, source, path, data: data.i1 < data.i2) i2[None] = cpppo.decide( "isgreater", greater, predicate=lambda machine, source, path, data: data.i1 > data.i2) i2[None] = equal source = cpppo.peekable(str('a 1 2')) data = cpppo.dotdict() with cpppo.dfa("comparo", initial=e) as comparo: for i, (m, s) in enumerate(comparo.run(source=source, data=data)): log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data) assert i == 12 assert s is less source = cpppo.peekable(str('a 33 33')) data = cpppo.dotdict() with cpppo.dfa("comparo", initial=e) as comparo: for i, (m, s) in enumerate(comparo.run(source=source, data=data)): log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data) assert i == 14 assert s is equal
def test_state(): """A state is expected to process its input (perhaps nothing, if its a no-input state), and then use the next input symbol to transition to another state. Each state has a context into a data artifact, into which it will collect its results. We must ensure that all state transitions are configured in the target alphabet; if an encoder is supplied, then all input symbols and all transition symbols will be encoded using it. In this test, all string literals are Unicode (in both Python 2 and 3), so we use a unicode encoder to convert them to symbols.""" unicodekwds = { 'alphabet': unicode if sys.version_info[0] < 3 else str, 'encoder': cpppo.type_unicode_encoder, } s1 = cpppo.state('one', **unicodekwds) s2 = cpppo.state_drop('two', **unicodekwds) s1['a'] = s2 assert s1['a'] is s2 source = cpppo.peeking('abc') # We can run state instances with/without acquisition g = s1.run(source=source) assert next(g) == (None, s2) assert source.peek() == 'a' with pytest.raises(StopIteration): next(g) with s1: g = s1.run(source=source) assert source.peek() == 'a' assert next(g) == (None, s2) assert source.peek() == 'a' try: next(g) assert False, "Should have terminated" except StopIteration: pass assert source.peek() == 'a' # A state machine accepting a sequence of unicode a's a_s = cpppo.state("a_s", **unicodekwds) an_a = cpppo.state_input("a", terminal=True, typecode=cpppo.type_unicode_array_symbol, **unicodekwds) a_s['a'] = an_a an_a['a'] = an_a source = cpppo.peeking('aaaa') data = cpppo.dotdict() with cpppo.dfa(initial=a_s) as aplus: for i, (m, s) in enumerate(aplus.run(source=source)): log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data) assert i == 5 assert source.peek() is None assert len(data) == 0 # Accepting a's separated by comma and space/pi (for kicks). When the lower level a's machine # doesn't recognize the symbol, then the higher level machine will recognize and discard sep = cpppo.state_drop("sep", **unicodekwds) csv = cpppo.dfa("csv", initial=a_s, terminal=True, **unicodekwds) csv[','] = sep sep[' '] = sep sep['π'] = sep sep[None] = csv source = cpppo.peeking('aaaa, a,π a') data = cpppo.dotdict() with cpppo.dfa(initial=csv) as csvaplus: for i, (m, s) in enumerate( csvaplus.run(source=source, path="csv", data=data)): log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data) assert i == 18 assert source.peek() is None assert data.csv.input.tounicode() == 'aaaaaa'
def test_state(): """A state is expected to process its input (perhaps nothing, if its a no-input state), and then use the next input symbol to transition to another state. Each state has a context into a data artifact, into which it will collect its results. We must ensure that all state transitions are configured in the target alphabet; if an encoder is supplied, then all input symbols and all transition symbols will be encoded using it. In this test, all string literals are Unicode (in both Python 2 and 3), so we use a unicode encoder to convert them to symbols.""" unicodekwds = { 'alphabet': unicode if sys.version_info[0] < 3 else str, 'encoder': cpppo.type_unicode_encoder, } s1 = cpppo.state( 'one', **unicodekwds ) s2 = cpppo.state_drop( 'two', **unicodekwds ) s1['a'] = s2 assert s1['a'] is s2 source = cpppo.peeking( 'abc' ) # We can run state instances with/without acquisition g = s1.run( source=source ) assert next( g ) == (None, s2) assert source.peek() == 'a' with pytest.raises(StopIteration): next( g ) with s1: g = s1.run( source=source ) assert source.peek() == 'a' assert next( g ) == (None, s2) assert source.peek() == 'a' try: next( g ) assert False, "Should have terminated" except StopIteration: pass assert source.peek() == 'a' # A state machine accepting a sequence of unicode a's a_s = cpppo.state( "a_s", **unicodekwds ) an_a = cpppo.state_input( "a", terminal=True, typecode=cpppo.type_unicode_array_symbol, **unicodekwds ) a_s['a'] = an_a an_a['a'] = an_a source = cpppo.peeking( 'aaaa' ) data = cpppo.dotdict() with cpppo.dfa( initial=a_s ) as aplus: for i,(m,s) in enumerate( aplus.run( source=source )): log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data ) assert i == 5 assert source.peek() is None assert len( data ) == 0 # Accepting a's separated by comma and space/pi (for kicks). When the lower level a's machine # doesn't recognize the symbol, then the higher level machine will recognize and discard sep = cpppo.state_drop( "sep", **unicodekwds ) csv = cpppo.dfa( "csv", initial=a_s , terminal=True, **unicodekwds ) csv[','] = sep sep[' '] = sep sep['π'] = sep sep[None] = csv source = cpppo.peeking( 'aaaa, a,π a' ) data = cpppo.dotdict() with cpppo.dfa( initial=csv ) as csvaplus: for i,(m,s) in enumerate( csvaplus.run( source=source, path="csv", data=data )): log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data ) assert i == 18 assert source.peek() is None assert data.csv.input.tounicode() == 'aaaaaa'