def test_struct(): dtp = cpppo.type_bytes_array_symbol abt = cpppo.type_bytes_iter ctx = 'val' a = cpppo.state_input( "First", alphabet=abt, typecode=dtp, context=ctx ) a[True] = b = cpppo.state_input( "Second", alphabet=abt, typecode=dtp, context=ctx ) b[True] = c = cpppo.state_input( "Third", alphabet=abt, typecode=dtp, context=ctx ) c[True] = d = cpppo.state_input( "Fourth", alphabet=abt, typecode=dtp, context=ctx ) d[None] = cpppo.state_struct( "int32", context=ctx, format=str("<i"), terminal=True ) machine = cpppo.dfa( initial=a ) with machine: material = b'\x01\x02\x03\x80\x99' segment = 3 source = cpppo.chainable() log.info( "States; %r input, by %d", material, segment ) inp = None data = cpppo.dotdict() path = "struct" sequence = machine.run( source=source, path=path, data=data ) for num in range( 10 ): try: mch,sta = next( sequence ) inp = source.peek() except StopIteration: inp = source.peek() log.info( "%s <- %-10.10r test done", cpppo.centeraxis( mch, 25, clip=True ), inp ) break log.info( "%s <- %-10.10r test rcvd", cpppo.centeraxis( mch, 25, clip=True ), inp ) if sta is None: log.info( "%s <- %-10.10r test no next state", cpppo.centeraxis( mch, 25, clip=True ), inp ) if inp is None: if not material: log.info( "%s <- %-10.10r test source finished", cpppo.centeraxis( mch, 25, clip=True ), inp ) # Will load consecutive empty iterables; chainable must handle source.chain( material[:segment] ) material = material[segment:] inp = source.peek() log.info( "%s <- %-10.10r test chain", cpppo.centeraxis( mch, 25, clip=True ), inp ) if num == 0: assert inp == b'\x01'[0]; assert sta.name == "First" if num == 1: assert inp == b'\x02'[0]; assert sta.name == "Second" if num == 2: assert inp == b'\x03'[0]; assert sta.name == "Third" if num == 3: assert inp == b'\x80'[0]; assert sta is None if num == 4: assert inp == b'\x80'[0]; assert sta.name == "Fourth" if num == 5: assert inp == b'\x99'[0]; assert sta.name == "int32" if num == 6: assert inp == b'\x99'[0]; assert sta.name == "int32" assert inp == b'\x99'[0] assert num == 6 assert sta.name == "int32" assert data.struct.val == -2147286527
def test_regex(): # This forces plain strings in 2.x, unicode in 3.x (counteracts import unicode_literals above) regex = str('a*b.*x') machine = cpppo.regex(name=str('test1'), initial=regex) with machine: source = cpppo.chainable(str('aaab1230xoxx')) sequence = machine.run(source=source) for num in range(20): try: mch, sta = next(sequence) inp = source.peek() except StopIteration: inp = source.peek() log.info("%s <- %-10.10r test done", cpppo.centeraxis(mch, 25, clip=True), inp) break log.info("%s <- %-10.10r test rcvd", cpppo.centeraxis(mch, 25, clip=True), inp) if sta is None: log.info("%s <- %-10.10r test no next state", cpppo.centeraxis(mch, 25, clip=True), inp) if inp is None: log.info("%s <- %-10.10r test source finished", cpppo.centeraxis(mch, 25, clip=True), inp) # Initial state does *not* consume a source symbol if num == 0: assert inp == 'a' assert sta.name == "0'" assert source.sent == 0 if num == 1: assert inp == 'a' assert sta.name == "0" assert source.sent == 0 if num == 2: assert inp == 'a' assert sta.name == "0" assert source.sent == 1 if num == 3: assert inp == 'a' assert sta.name == "0" assert source.sent == 2 if num == 4: assert inp == 'b' assert sta.name == "2" if num == 5: assert inp == '1' assert sta.name == "2" if num == 6: assert inp == '2' assert sta.name == "2" if num == 7: assert inp == '3' assert sta.name == "2" if num == 8: assert inp == '0' assert sta.name == "2" if num == 9: assert inp == 'x' assert sta.name == "3" if num == 10: assert inp == 'o' assert sta.name == "2" # Trans. from term. to non-term. state!)) if num == 11: assert inp == 'x' assert sta.name == "3" if num == 12: assert inp == 'x' assert sta.name == "3" if num == 13: assert inp == None assert sta is None assert num < 14 assert inp is None assert num == 14 assert sta is None and machine.current.name == '3' regex = str('.*') machine = cpppo.regex(name=str('dot'), initial=regex, terminal=True) data = cpppo.dotdict() with machine: source = cpppo.chainable(str('aaab1230xoxx\0')) try: for i, (m, s) in enumerate(machine.run(source=source, data=data)): log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data) except cpppo.NonTerminal: pass assert machine.terminal assert i == 14 assert source.sent == 13 if sys.version_info[0] < 3: assert data.input.input.tostring() == 'aaab1230xoxx\x00' else: assert data.input.input.tounicode() == 'aaab1230xoxx\x00' regex = str('[^xyz]*') machine = cpppo.regex(name=str('not_xyz'), initial=regex) data = cpppo.dotdict() with machine: source = cpppo.chainable(str('aaab1230xoxx\0')) try: for i, (m, s) in enumerate(machine.run(source=source, data=data)): log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data) except cpppo.NonTerminal: pass assert not machine.terminal assert i == 9 assert source.sent == 8 if sys.version_info[0] < 3: assert data.input.input.tostring() == 'aaab1230' else: assert data.input.input.tounicode() == 'aaab1230' regex = str('[^\x00]*') machine = cpppo.regex(name=str('not_NUL'), initial=regex) data = cpppo.dotdict() with machine: source = cpppo.chainable(str('aaab1230xoxx\0')) for i, (m, s) in enumerate(machine.run(source=source, data=data)): log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data) assert i == 13 assert source.sent == 12 if sys.version_info[0] < 3: assert data.input.input.tostring() == 'aaab1230xoxx' else: assert data.input.input.tounicode() == 'aaab1230xoxx'
def test_dfa(): # Simple DFA with states consuming no input. A NULL (None) state transition # doesn't require input for state change. The Default (True) transition # requires input to make the transition, but none of these states consume # it, so it'll be left over at the end. a = cpppo.state("Initial") a[None] = b = cpppo.state("Middle") b[True] = cpppo.state("Terminal", terminal=True) source = cpppo.chainable() i = a.run(source=source) m, s = next(i) assert m is None assert s is not None and s.name == "Middle" try: next(i) assert False, "Expected no more non-transition events" except StopIteration: pass machine = cpppo.dfa(initial=a) with machine: log.info("DFA:") for initial in machine.initial.nodes(): for inp, target in initial.edges(): log.info( "%s <- %-10.10r -> %s" % (cpppo.centeraxis(initial, 25, clip=True), inp, target)) # Running with no input will yield the initial state, with None input; since it is a NULL # state (no input processed), it will simply attempt to transition. This will require the # next input from source, which is empty, so it will return input,state=(None, None) # indicating a non-terminal state and no input left. This gives the caller an opportunity # to reload input and try again. If a loop is detected (same state and input conditions # seen repeatedly), the DFA will terminate; if not in a terminal state, an exception will be # raised. log.info("States; No input") source = cpppo.chainable() sequence = machine.run(source=source) for num in range(10): try: mch, sta = next(sequence) except StopIteration: sequence = None break except cpppo.NonTerminal as e: assert "non-terminal state" in str(e) break inp = source.peek() log.info("%s <- %r" % (cpppo.centeraxis(mch, 25, clip=True), inp)) if num == 0: assert inp is None assert sta.name == "Initial" if num == 1: assert inp is None assert sta.name == "Middle" if num == 2: assert inp is None assert sta is None # And no more no-input transitions assert num < 3 # If we get here, we didn't detect loop assert num == 3 # since the iterator did not stop cleanly (after processing a state's input, # and then trying to determine the next state), it'll continue indefinitely assert sta is None assert sequence is not None # Try with some input loaded into source stream, using an identical generator sequence. # Only the first element is gotten, and is reused for every NULL state transition, and is # left over at the end. log.info("States; 'abc' input") assert source.peek() is None source.chain(b'abc') assert source.peek() == b'a'[0] # python2: str, python3: int sequence = machine.run(source=source) for num in range(10): try: mch, sta = next(sequence) except StopIteration: break inp = source.peek() log.info("%s <- %r", cpppo.centeraxis(mch, 25, clip=True), inp) if num == 0: assert inp == b'a'[0] assert sta.name == "Initial" if num == 1: assert inp == b'a'[0] assert sta.name == "Middle" if num == 2: assert inp == b'a'[0] assert sta.name == "Terminal" assert num < 3 assert num == 3 assert inp == b'a'[0] assert sta.name == "Terminal"
def test_regex(): # This forces plain strings in 2.x, unicode in 3.x (counteracts import unicode_literals above) regex = str('a*b.*x') machine = cpppo.regex( name=str('test1'), initial=regex ) with machine: source = cpppo.chainable( str('aaab1230xoxx') ) sequence = machine.run( source=source ) for num in range( 20 ): try: mch,sta = next( sequence ) inp = source.peek() except StopIteration: inp = source.peek() log.info( "%s <- %-10.10r test done", cpppo.centeraxis( mch, 25, clip=True ), inp ) break log.info( "%s <- %-10.10r test rcvd", cpppo.centeraxis( mch, 25, clip=True ), inp ) if sta is None: log.info( "%s <- %-10.10r test no next state", cpppo.centeraxis( mch, 25, clip=True ), inp ) if inp is None: log.info( "%s <- %-10.10r test source finished", cpppo.centeraxis( mch, 25, clip=True ), inp ) # Initial state does *not* consume a source symbol if num == 0: assert inp == 'a'; assert sta.name == "0'"; assert source.sent == 0 if num == 1: assert inp == 'a'; assert sta.name == "0"; assert source.sent == 0 if num == 2: assert inp == 'a'; assert sta.name == "0"; assert source.sent == 1 if num == 3: assert inp == 'a'; assert sta.name == "0"; assert source.sent == 2 if num == 4: assert inp == 'b'; assert sta.name == "2" if num == 5: assert inp == '1'; assert sta.name == "2" if num == 6: assert inp == '2'; assert sta.name == "2" if num == 7: assert inp == '3'; assert sta.name == "2" if num == 8: assert inp == '0'; assert sta.name == "2" if num == 9: assert inp == 'x'; assert sta.name == "3" if num ==10: assert inp == 'o'; assert sta.name == "2" # Trans. from term. to non-term. state!)) if num ==11: assert inp == 'x'; assert sta.name == "3" if num ==12: assert inp == 'x'; assert sta.name == "3" if num ==13: assert inp ==None; assert sta is None assert num < 14 assert inp is None assert num == 14 assert sta is None and machine.current.name == '3' regex = str('.*') machine = cpppo.regex( name=str('dot'), initial=regex, terminal=True ) data = cpppo.dotdict() with machine: source = cpppo.chainable( str('aaab1230xoxx\0') ) try: for i,(m,s) in enumerate( machine.run( source=source, data=data )): log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data ) except cpppo.NonTerminal: pass assert machine.terminal assert i == 14 assert source.sent == 13 if sys.version_info[0] < 3: assert data.input.input.tostring() == 'aaab1230xoxx\x00' else: assert data.input.input.tounicode() == 'aaab1230xoxx\x00' regex = str('[^xyz]*') machine = cpppo.regex( name=str('not_xyz'), initial=regex ) data = cpppo.dotdict() with machine: source = cpppo.chainable( str('aaab1230xoxx\0') ) try: for i,(m,s) in enumerate( machine.run( source=source, data=data )): log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data ) except cpppo.NonTerminal: pass assert not machine.terminal assert i == 9 assert source.sent == 8 if sys.version_info[0] < 3: assert data.input.input.tostring() == 'aaab1230' else: assert data.input.input.tounicode() == 'aaab1230' regex = str('[^\x00]*') machine = cpppo.regex( name=str('not_NUL'), initial=regex ) data = cpppo.dotdict() with machine: source = cpppo.chainable( str('aaab1230xoxx\0') ) for i,(m,s) in enumerate( machine.run( source=source, data=data )): log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data ) assert i == 13 assert source.sent == 12 if sys.version_info[0] < 3: assert data.input.input.tostring() == 'aaab1230xoxx' else: assert data.input.input.tounicode() == 'aaab1230xoxx'
def test_dfa(): # Simple DFA with states consuming no input. A NULL (None) state transition # doesn't require input for state change. The Default (True) transition # requires input to make the transition, but none of these states consume # it, so it'll be left over at the end. a = cpppo.state( "Initial" ) a[None] = b = cpppo.state( "Middle" ) b[True] = cpppo.state( "Terminal", terminal=True ) source = cpppo.chainable() i = a.run( source=source ) m,s = next( i ) assert m is None assert s is not None and s.name == "Middle" try: next( i ) assert False, "Expected no more non-transition events" except StopIteration: pass machine = cpppo.dfa( initial=a ) with machine: log.info( "DFA:" ) for initial in machine.initial.nodes(): for inp,target in initial.edges(): log.info( "%s <- %-10.10r -> %s" % ( cpppo.centeraxis( initial, 25, clip=True ), inp, target )) # Running with no input will yield the initial state, with None input; since it is a NULL # state (no input processed), it will simply attempt to transition. This will require the # next input from source, which is empty, so it will return input,state=(None, None) # indicating a non-terminal state and no input left. This gives the caller an opportunity # to reload input and try again. If a loop is detected (same state and input conditions # seen repeatedly), the DFA will terminate; if not in a terminal state, an exception will be # raised. log.info( "States; No input" ) source = cpppo.chainable() sequence = machine.run( source=source ) for num in range( 10 ): try: mch,sta = next( sequence ) except StopIteration: sequence = None break except cpppo.NonTerminal as e: assert "non-terminal state" in str( e ) break inp = source.peek() log.info( "%s <- %r" % ( cpppo.centeraxis( mch, 25, clip=True ), inp )) if num == 0: assert inp is None; assert sta.name == "Initial" if num == 1: assert inp is None; assert sta.name == "Middle" if num == 2: assert inp is None; assert sta is None # And no more no-input transitions assert num < 3 # If we get here, we didn't detect loop assert num == 3 # since the iterator did not stop cleanly (after processing a state's input, # and then trying to determine the next state), it'll continue indefinitely assert sta is None assert sequence is not None # Try with some input loaded into source stream, using an identical generator sequence. # Only the first element is gotten, and is reused for every NULL state transition, and is # left over at the end. log.info( "States; 'abc' input" ) assert source.peek() is None source.chain( b'abc' ) assert source.peek() == b'a'[0] # python2: str, python3: int sequence = machine.run( source=source ) for num in range( 10 ): try: mch,sta = next( sequence ) except StopIteration: break inp = source.peek() log.info( "%s <- %r", cpppo.centeraxis( mch, 25, clip=True ), inp ) if num == 0: assert inp == b'a'[0]; assert sta.name == "Initial" if num == 1: assert inp == b'a'[0]; assert sta.name == "Middle" if num == 2: assert inp == b'a'[0]; assert sta.name == "Terminal" assert num < 3 assert num == 3 assert inp == b'a'[0] assert sta.name == "Terminal"
def test_regex(): # This forces plain strings in 2.x, unicode in 3.x (counteracts import unicode_literals above) regex = str('a*b.*x') machine = cpppo.regex(name=str('test1'), initial=regex) with machine: source = cpppo.chainable(str('aaab1230xoxx')) sequence = machine.run(source=source) for num in range(20): try: mch, sta = next(sequence) inp = source.peek() except StopIteration: inp = source.peek() log.info("%s <- %-10.10r test done", cpppo.centeraxis(mch, 25, clip=True), inp) break log.info("%s <- %-10.10r test rcvd", cpppo.centeraxis(mch, 25, clip=True), inp) if sta is None: log.info("%s <- %-10.10r test no next state", cpppo.centeraxis(mch, 25, clip=True), inp) if inp is None: log.info("%s <- %-10.10r test source finished", cpppo.centeraxis(mch, 25, clip=True), inp) # Initial state does *not* consume a source symbol if num == 0: assert inp == 'a' assert sta.name == "0'" assert source.sent == 0 if num == 1: assert inp == 'a' assert sta.name == "0" assert source.sent == 0 if num == 2: assert inp == 'a' assert sta.name == "0" assert source.sent == 1 if num == 3: assert inp == 'a' assert sta.name == "0" assert source.sent == 2 if num == 4: assert inp == 'b' assert sta.name == "2" if num == 5: assert inp == '1' assert sta.name == "2" if num == 6: assert inp == '2' assert sta.name == "2" if num == 7: assert inp == '3' assert sta.name == "2" if num == 8: assert inp == '0' assert sta.name == "2" if num == 9: assert inp == 'x' assert sta.name == "3" if num == 10: assert inp == 'o' assert sta.name == "2" # Trans. from term. to non-term. state!)) if num == 11: assert inp == 'x' assert sta.name == "3" if num == 12: assert inp == 'x' assert sta.name == "3" if num == 13: assert inp == None assert sta is None assert num < 14 assert inp is None assert num == 14 assert sta is None and machine.current.name == '3'
def test_regex(): # This forces plain strings in 2.x, unicode in 3.x (counteracts import unicode_literals above) regex = str("a*b.*x") machine = cpppo.regex(name=str("test1"), initial=regex) with machine: source = cpppo.chainable(str("aaab1230xoxx")) sequence = machine.run(source=source) for num in range(20): try: mch, sta = next(sequence) inp = source.peek() except StopIteration: inp = source.peek() log.info("%s <- %-10.10r test done", cpppo.centeraxis(mch, 25, clip=True), inp) break log.info("%s <- %-10.10r test rcvd", cpppo.centeraxis(mch, 25, clip=True), inp) if sta is None: log.info("%s <- %-10.10r test no next state", cpppo.centeraxis(mch, 25, clip=True), inp) if inp is None: log.info("%s <- %-10.10r test source finished", cpppo.centeraxis(mch, 25, clip=True), inp) # Initial state does *not* consume a source symbol if num == 0: assert inp == "a" assert sta.name == "0'" assert source.sent == 0 if num == 1: assert inp == "a" assert sta.name == "0" assert source.sent == 0 if num == 2: assert inp == "a" assert sta.name == "0" assert source.sent == 1 if num == 3: assert inp == "a" assert sta.name == "0" assert source.sent == 2 if num == 4: assert inp == "b" assert sta.name == "2" if num == 5: assert inp == "1" assert sta.name == "2" if num == 6: assert inp == "2" assert sta.name == "2" if num == 7: assert inp == "3" assert sta.name == "2" if num == 8: assert inp == "0" assert sta.name == "2" if num == 9: assert inp == "x" assert sta.name == "3" if num == 10: assert inp == "o" assert sta.name == "2" # Trans. from term. to non-term. state!)) if num == 11: assert inp == "x" assert sta.name == "3" if num == 12: assert inp == "x" assert sta.name == "3" if num == 13: assert inp == None assert sta is None assert num < 14 assert inp is None assert num == 14 assert sta is None and machine.current.name == "3"