Esempio n. 1
0
def test_struct():
    dtp				= cpppo.type_bytes_array_symbol
    abt				= cpppo.type_bytes_iter
    ctx				= 'val'
    a				= cpppo.state_input( "First",  alphabet=abt, typecode=dtp, context=ctx )
    a[True] = b 		= cpppo.state_input( "Second", alphabet=abt, typecode=dtp, context=ctx )
    b[True] = c 		= cpppo.state_input( "Third",  alphabet=abt, typecode=dtp, context=ctx )
    c[True] = d			= cpppo.state_input( "Fourth", alphabet=abt, typecode=dtp, context=ctx )
    d[None] 			= cpppo.state_struct( "int32", context=ctx,
                                                      format=str("<i"),
                                                      terminal=True )
    machine			= cpppo.dfa( initial=a )
    with machine:
        material		= b'\x01\x02\x03\x80\x99'
        segment			= 3
        source			= cpppo.chainable()
        log.info( "States; %r input, by %d", material, segment )
        inp			= None
        data			= cpppo.dotdict()
        path			= "struct"
        sequence		= machine.run( source=source, path=path, data=data )
        for num in range( 10 ):
            try:
                mch,sta		= next( sequence )
                inp		= source.peek()
            except StopIteration:
                inp		= source.peek()
                log.info( "%s <- %-10.10r test done", cpppo.centeraxis( mch, 25, clip=True ), inp )
                break
            log.info( "%s <- %-10.10r test rcvd", cpppo.centeraxis( mch, 25, clip=True ), inp )
            if sta is None:
                log.info( "%s <- %-10.10r test no next state", cpppo.centeraxis( mch, 25, clip=True ), inp )
            if inp is None:
                if not material:
                    log.info( "%s <- %-10.10r test source finished", cpppo.centeraxis( mch, 25, clip=True ), inp )
                # Will load consecutive empty iterables; chainable must handle
                source.chain( material[:segment] )
                material		= material[segment:]
                inp			= source.peek()
                log.info( "%s <- %-10.10r test chain", cpppo.centeraxis( mch, 25, clip=True ), inp )
    
            if num == 0: assert inp == b'\x01'[0]; assert sta.name == "First"
            if num == 1: assert inp == b'\x02'[0]; assert sta.name == "Second"
            if num == 2: assert inp == b'\x03'[0]; assert sta.name == "Third"
            if num == 3: assert inp == b'\x80'[0]; assert sta is None
            if num == 4: assert inp == b'\x80'[0]; assert sta.name == "Fourth"
            if num == 5: assert inp == b'\x99'[0]; assert sta.name == "int32"
            if num == 6: assert inp == b'\x99'[0]; assert sta.name == "int32"
        assert inp == b'\x99'[0]
        assert num == 6
        assert sta.name == "int32"
        assert data.struct.val == -2147286527
Esempio n. 2
0
def test_regex():
    # This forces plain strings in 2.x, unicode in 3.x (counteracts import unicode_literals above)
    regex = str('a*b.*x')
    machine = cpppo.regex(name=str('test1'), initial=regex)
    with machine:
        source = cpppo.chainable(str('aaab1230xoxx'))
        sequence = machine.run(source=source)
        for num in range(20):
            try:
                mch, sta = next(sequence)
                inp = source.peek()
            except StopIteration:
                inp = source.peek()
                log.info("%s <- %-10.10r test done",
                         cpppo.centeraxis(mch, 25, clip=True), inp)
                break
            log.info("%s <- %-10.10r test rcvd",
                     cpppo.centeraxis(mch, 25, clip=True), inp)
            if sta is None:
                log.info("%s <- %-10.10r test no next state",
                         cpppo.centeraxis(mch, 25, clip=True), inp)
            if inp is None:
                log.info("%s <- %-10.10r test source finished",
                         cpppo.centeraxis(mch, 25, clip=True), inp)

            # Initial state does *not* consume a source symbol
            if num == 0:
                assert inp == 'a'
                assert sta.name == "0'"
                assert source.sent == 0
            if num == 1:
                assert inp == 'a'
                assert sta.name == "0"
                assert source.sent == 0
            if num == 2:
                assert inp == 'a'
                assert sta.name == "0"
                assert source.sent == 1
            if num == 3:
                assert inp == 'a'
                assert sta.name == "0"
                assert source.sent == 2
            if num == 4:
                assert inp == 'b'
                assert sta.name == "2"
            if num == 5:
                assert inp == '1'
                assert sta.name == "2"
            if num == 6:
                assert inp == '2'
                assert sta.name == "2"
            if num == 7:
                assert inp == '3'
                assert sta.name == "2"
            if num == 8:
                assert inp == '0'
                assert sta.name == "2"
            if num == 9:
                assert inp == 'x'
                assert sta.name == "3"
            if num == 10:
                assert inp == 'o'
                assert sta.name == "2"  # Trans. from term. to non-term. state!))
            if num == 11:
                assert inp == 'x'
                assert sta.name == "3"
            if num == 12:
                assert inp == 'x'
                assert sta.name == "3"
            if num == 13:
                assert inp == None
                assert sta is None
            assert num < 14
        assert inp is None
        assert num == 14
        assert sta is None and machine.current.name == '3'

    regex = str('.*')
    machine = cpppo.regex(name=str('dot'), initial=regex, terminal=True)
    data = cpppo.dotdict()
    with machine:
        source = cpppo.chainable(str('aaab1230xoxx\0'))
        try:
            for i, (m, s) in enumerate(machine.run(source=source, data=data)):
                log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r",
                         m.name_centered(), i, s, source.sent, source.peek(),
                         data)
        except cpppo.NonTerminal:
            pass
        assert machine.terminal
        assert i == 14
        assert source.sent == 13
        if sys.version_info[0] < 3:
            assert data.input.input.tostring() == 'aaab1230xoxx\x00'
        else:
            assert data.input.input.tounicode() == 'aaab1230xoxx\x00'

    regex = str('[^xyz]*')
    machine = cpppo.regex(name=str('not_xyz'), initial=regex)
    data = cpppo.dotdict()
    with machine:
        source = cpppo.chainable(str('aaab1230xoxx\0'))
        try:
            for i, (m, s) in enumerate(machine.run(source=source, data=data)):
                log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r",
                         m.name_centered(), i, s, source.sent, source.peek(),
                         data)
        except cpppo.NonTerminal:
            pass
        assert not machine.terminal
        assert i == 9
        assert source.sent == 8
        if sys.version_info[0] < 3:
            assert data.input.input.tostring() == 'aaab1230'
        else:
            assert data.input.input.tounicode() == 'aaab1230'

    regex = str('[^\x00]*')
    machine = cpppo.regex(name=str('not_NUL'), initial=regex)
    data = cpppo.dotdict()
    with machine:
        source = cpppo.chainable(str('aaab1230xoxx\0'))
        for i, (m, s) in enumerate(machine.run(source=source, data=data)):
            log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r",
                     m.name_centered(), i, s, source.sent, source.peek(), data)
        assert i == 13
        assert source.sent == 12
        if sys.version_info[0] < 3:
            assert data.input.input.tostring() == 'aaab1230xoxx'
        else:
            assert data.input.input.tounicode() == 'aaab1230xoxx'
Esempio n. 3
0
def test_dfa():
    # Simple DFA with states consuming no input.  A NULL (None) state transition
    # doesn't require input for state change.  The Default (True) transition
    # requires input to make the transition, but none of these states consume
    # it, so it'll be left over at the end.
    a = cpppo.state("Initial")
    a[None] = b = cpppo.state("Middle")
    b[True] = cpppo.state("Terminal", terminal=True)

    source = cpppo.chainable()
    i = a.run(source=source)
    m, s = next(i)
    assert m is None
    assert s is not None and s.name == "Middle"
    try:
        next(i)
        assert False, "Expected no more non-transition events"
    except StopIteration:
        pass

    machine = cpppo.dfa(initial=a)

    with machine:
        log.info("DFA:")
        for initial in machine.initial.nodes():
            for inp, target in initial.edges():
                log.info(
                    "%s <- %-10.10r -> %s" %
                    (cpppo.centeraxis(initial, 25, clip=True), inp, target))

        # Running with no input will yield the initial state, with None input; since it is a NULL
        # state (no input processed), it will simply attempt to transition.  This will require the
        # next input from source, which is empty, so it will return input,state=(None, None)
        # indicating a non-terminal state and no input left.  This gives the caller an opportunity
        # to reload input and try again.  If a loop is detected (same state and input conditions
        # seen repeatedly), the DFA will terminate; if not in a terminal state, an exception will be
        # raised.
        log.info("States; No input")
        source = cpppo.chainable()
        sequence = machine.run(source=source)
        for num in range(10):
            try:
                mch, sta = next(sequence)
            except StopIteration:
                sequence = None
                break
            except cpppo.NonTerminal as e:
                assert "non-terminal state" in str(e)
                break

            inp = source.peek()
            log.info("%s <- %r" % (cpppo.centeraxis(mch, 25, clip=True), inp))
            if num == 0:
                assert inp is None
                assert sta.name == "Initial"
            if num == 1:
                assert inp is None
                assert sta.name == "Middle"
            if num == 2:
                assert inp is None
                assert sta is None  # And no more no-input transitions
            assert num < 3  # If we get here, we didn't detect loop
        assert num == 3

        # since the iterator did not stop cleanly (after processing a state's input,
        # and then trying to determine the next state), it'll continue indefinitely
        assert sta is None
        assert sequence is not None

        # Try with some input loaded into source stream, using an identical generator sequence.
        # Only the first element is gotten, and is reused for every NULL state transition, and is
        # left over at the end.
        log.info("States; 'abc' input")
        assert source.peek() is None
        source.chain(b'abc')
        assert source.peek() == b'a'[0]  # python2: str, python3: int
        sequence = machine.run(source=source)
        for num in range(10):
            try:
                mch, sta = next(sequence)
            except StopIteration:
                break
            inp = source.peek()
            log.info("%s <- %r", cpppo.centeraxis(mch, 25, clip=True), inp)
            if num == 0:
                assert inp == b'a'[0]
                assert sta.name == "Initial"
            if num == 1:
                assert inp == b'a'[0]
                assert sta.name == "Middle"
            if num == 2:
                assert inp == b'a'[0]
                assert sta.name == "Terminal"
            assert num < 3
        assert num == 3
        assert inp == b'a'[0]
        assert sta.name == "Terminal"
Esempio n. 4
0
def test_regex():
    # This forces plain strings in 2.x, unicode in 3.x (counteracts import unicode_literals above)
    regex			= str('a*b.*x')
    machine			= cpppo.regex( name=str('test1'), initial=regex )
    with machine:
        source			= cpppo.chainable( str('aaab1230xoxx') )
        sequence		= machine.run( source=source )
        for num in range( 20 ):
            try:
                mch,sta		= next( sequence )
                inp		= source.peek()
            except StopIteration:
                inp		= source.peek()
                log.info( "%s <- %-10.10r test done", cpppo.centeraxis( mch, 25, clip=True ), inp )
                break
            log.info( "%s <- %-10.10r test rcvd", cpppo.centeraxis( mch, 25, clip=True ), inp )
            if sta is None:
                log.info( "%s <- %-10.10r test no next state", cpppo.centeraxis( mch, 25, clip=True ), inp )
            if inp is None:
                log.info( "%s <- %-10.10r test source finished", cpppo.centeraxis( mch, 25, clip=True ), inp )
    
            # Initial state does *not* consume a source symbol
            if num == 0: assert inp == 'a'; assert sta.name == "0'"; assert source.sent == 0
            if num == 1: assert inp == 'a'; assert sta.name == "0";  assert source.sent == 0
            if num == 2: assert inp == 'a'; assert sta.name == "0";  assert source.sent == 1
            if num == 3: assert inp == 'a'; assert sta.name == "0";  assert source.sent == 2
            if num == 4: assert inp == 'b'; assert sta.name == "2"
            if num == 5: assert inp == '1'; assert sta.name == "2"
            if num == 6: assert inp == '2'; assert sta.name == "2"
            if num == 7: assert inp == '3'; assert sta.name == "2"
            if num == 8: assert inp == '0'; assert sta.name == "2"
            if num == 9: assert inp == 'x'; assert sta.name == "3"
            if num ==10: assert inp == 'o'; assert sta.name == "2" # Trans. from term. to non-term. state!))
            if num ==11: assert inp == 'x'; assert sta.name == "3"
            if num ==12: assert inp == 'x'; assert sta.name == "3"
            if num ==13: assert inp ==None; assert sta is None
            assert num < 14
        assert inp is None
        assert num == 14
        assert sta is None and machine.current.name == '3'

    regex			= str('.*')
    machine			= cpppo.regex( name=str('dot'), initial=regex, terminal=True )
    data			= cpppo.dotdict()
    with machine:
        source			= cpppo.chainable( str('aaab1230xoxx\0') )
        try:
            for i,(m,s) in enumerate( machine.run( source=source, data=data )):
                log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(),
                          i, s, source.sent, source.peek(), data )
        except cpppo.NonTerminal:
            pass
        assert machine.terminal
        assert i == 14
        assert source.sent == 13
        if sys.version_info[0] < 3:
            assert data.input.input.tostring()  == 'aaab1230xoxx\x00'
        else:
            assert data.input.input.tounicode() == 'aaab1230xoxx\x00'

    regex			= str('[^xyz]*')
    machine			= cpppo.regex( name=str('not_xyz'), initial=regex )
    data			= cpppo.dotdict()
    with machine:
        source			= cpppo.chainable( str('aaab1230xoxx\0') )
        try:
            for i,(m,s) in enumerate( machine.run( source=source, data=data )):
                log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(),
                          i, s, source.sent, source.peek(), data )
        except cpppo.NonTerminal:
            pass
        assert not machine.terminal
        assert i == 9
        assert source.sent == 8
        if sys.version_info[0] < 3:
            assert data.input.input.tostring()  == 'aaab1230'
        else:
            assert data.input.input.tounicode() == 'aaab1230'

    regex			= str('[^\x00]*')
    machine			= cpppo.regex( name=str('not_NUL'), initial=regex )
    data			= cpppo.dotdict()
    with machine:
        source			= cpppo.chainable( str('aaab1230xoxx\0') )
        for i,(m,s) in enumerate( machine.run( source=source, data=data )):
            log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(),
                      i, s, source.sent, source.peek(), data )
        assert i == 13
        assert source.sent == 12
        if sys.version_info[0] < 3:
            assert data.input.input.tostring()  == 'aaab1230xoxx'
        else:
            assert data.input.input.tounicode() == 'aaab1230xoxx'
Esempio n. 5
0
def test_dfa():
    # Simple DFA with states consuming no input.  A NULL (None) state transition
    # doesn't require input for state change.  The Default (True) transition
    # requires input to make the transition, but none of these states consume
    # it, so it'll be left over at the end.
    a 				= cpppo.state( "Initial" )
    a[None] = b			= cpppo.state( "Middle" )
    b[True]			= cpppo.state( "Terminal", terminal=True )

    source			= cpppo.chainable()
    i				= a.run( source=source )
    m,s				= next( i )
    assert m is None
    assert s is not None and s.name == "Middle"
    try:
        next( i )
        assert False, "Expected no more non-transition events"
    except StopIteration:
        pass

    machine			= cpppo.dfa( initial=a )

    with machine:
        log.info( "DFA:" )
        for initial in machine.initial.nodes():
            for inp,target in initial.edges():
                log.info( "%s <- %-10.10r -> %s" % ( cpppo.centeraxis( initial, 25, clip=True ),
                                                     inp, target ))

        # Running with no input will yield the initial state, with None input; since it is a NULL
        # state (no input processed), it will simply attempt to transition.  This will require the
        # next input from source, which is empty, so it will return input,state=(None, None)
        # indicating a non-terminal state and no input left.  This gives the caller an opportunity
        # to reload input and try again.  If a loop is detected (same state and input conditions
        # seen repeatedly), the DFA will terminate; if not in a terminal state, an exception will be
        # raised.
        log.info( "States; No input" )
        source			= cpppo.chainable()
        sequence		= machine.run( source=source )
        for num in range( 10 ):
            try:
                mch,sta		= next( sequence )
            except StopIteration:
                sequence	= None
                break
            except cpppo.NonTerminal as e:
                assert "non-terminal state" in str( e )
                break

            inp			= source.peek()
            log.info( "%s <- %r" % ( cpppo.centeraxis( mch, 25, clip=True ), inp ))
            if num == 0: assert inp is None; assert sta.name == "Initial"
            if num == 1: assert inp is None; assert sta.name == "Middle"
            if num == 2: assert inp is None; assert sta is None	# And no more no-input transitions
            assert num < 3 # If we get here, we didn't detect loop
        assert num == 3

        # since the iterator did not stop cleanly (after processing a state's input,
        # and then trying to determine the next state), it'll continue indefinitely
        assert sta is None
        assert sequence is not None
    
        # Try with some input loaded into source stream, using an identical generator sequence.
        # Only the first element is gotten, and is reused for every NULL state transition, and is
        # left over at the end.
        log.info( "States; 'abc' input" )
        assert source.peek() is None
        source.chain( b'abc' )
        assert source.peek() == b'a'[0] # python2: str, python3: int
        sequence		= machine.run( source=source )
        for num in range( 10 ):
            try:
                mch,sta		= next( sequence )
            except StopIteration:
                break
            inp			= source.peek()
            log.info( "%s <- %r", cpppo.centeraxis( mch, 25, clip=True ), inp )
            if num == 0: assert inp == b'a'[0]; assert sta.name == "Initial"
            if num == 1: assert inp == b'a'[0]; assert sta.name == "Middle"
            if num == 2: assert inp == b'a'[0]; assert sta.name == "Terminal"
            assert num < 3
        assert num == 3
        assert inp == b'a'[0]
        assert sta.name == "Terminal"
Esempio n. 6
0
def test_regex():
    # This forces plain strings in 2.x, unicode in 3.x (counteracts import unicode_literals above)
    regex = str('a*b.*x')
    machine = cpppo.regex(name=str('test1'), initial=regex)
    with machine:
        source = cpppo.chainable(str('aaab1230xoxx'))
        sequence = machine.run(source=source)
        for num in range(20):
            try:
                mch, sta = next(sequence)
                inp = source.peek()
            except StopIteration:
                inp = source.peek()
                log.info("%s <- %-10.10r test done",
                         cpppo.centeraxis(mch, 25, clip=True), inp)
                break
            log.info("%s <- %-10.10r test rcvd",
                     cpppo.centeraxis(mch, 25, clip=True), inp)
            if sta is None:
                log.info("%s <- %-10.10r test no next state",
                         cpppo.centeraxis(mch, 25, clip=True), inp)
            if inp is None:
                log.info("%s <- %-10.10r test source finished",
                         cpppo.centeraxis(mch, 25, clip=True), inp)

            # Initial state does *not* consume a source symbol
            if num == 0:
                assert inp == 'a'
                assert sta.name == "0'"
                assert source.sent == 0
            if num == 1:
                assert inp == 'a'
                assert sta.name == "0"
                assert source.sent == 0
            if num == 2:
                assert inp == 'a'
                assert sta.name == "0"
                assert source.sent == 1
            if num == 3:
                assert inp == 'a'
                assert sta.name == "0"
                assert source.sent == 2
            if num == 4:
                assert inp == 'b'
                assert sta.name == "2"
            if num == 5:
                assert inp == '1'
                assert sta.name == "2"
            if num == 6:
                assert inp == '2'
                assert sta.name == "2"
            if num == 7:
                assert inp == '3'
                assert sta.name == "2"
            if num == 8:
                assert inp == '0'
                assert sta.name == "2"
            if num == 9:
                assert inp == 'x'
                assert sta.name == "3"
            if num == 10:
                assert inp == 'o'
                assert sta.name == "2"  # Trans. from term. to non-term. state!))
            if num == 11:
                assert inp == 'x'
                assert sta.name == "3"
            if num == 12:
                assert inp == 'x'
                assert sta.name == "3"
            if num == 13:
                assert inp == None
                assert sta is None
            assert num < 14
        assert inp is None
        assert num == 14
        assert sta is None and machine.current.name == '3'
Esempio n. 7
0
def test_regex():
    # This forces plain strings in 2.x, unicode in 3.x (counteracts import unicode_literals above)
    regex = str("a*b.*x")
    machine = cpppo.regex(name=str("test1"), initial=regex)
    with machine:
        source = cpppo.chainable(str("aaab1230xoxx"))
        sequence = machine.run(source=source)
        for num in range(20):
            try:
                mch, sta = next(sequence)
                inp = source.peek()
            except StopIteration:
                inp = source.peek()
                log.info("%s <- %-10.10r test done", cpppo.centeraxis(mch, 25, clip=True), inp)
                break
            log.info("%s <- %-10.10r test rcvd", cpppo.centeraxis(mch, 25, clip=True), inp)
            if sta is None:
                log.info("%s <- %-10.10r test no next state", cpppo.centeraxis(mch, 25, clip=True), inp)
            if inp is None:
                log.info("%s <- %-10.10r test source finished", cpppo.centeraxis(mch, 25, clip=True), inp)

            # Initial state does *not* consume a source symbol
            if num == 0:
                assert inp == "a"
                assert sta.name == "0'"
                assert source.sent == 0
            if num == 1:
                assert inp == "a"
                assert sta.name == "0"
                assert source.sent == 0
            if num == 2:
                assert inp == "a"
                assert sta.name == "0"
                assert source.sent == 1
            if num == 3:
                assert inp == "a"
                assert sta.name == "0"
                assert source.sent == 2
            if num == 4:
                assert inp == "b"
                assert sta.name == "2"
            if num == 5:
                assert inp == "1"
                assert sta.name == "2"
            if num == 6:
                assert inp == "2"
                assert sta.name == "2"
            if num == 7:
                assert inp == "3"
                assert sta.name == "2"
            if num == 8:
                assert inp == "0"
                assert sta.name == "2"
            if num == 9:
                assert inp == "x"
                assert sta.name == "3"
            if num == 10:
                assert inp == "o"
                assert sta.name == "2"  # Trans. from term. to non-term. state!))
            if num == 11:
                assert inp == "x"
                assert sta.name == "3"
            if num == 12:
                assert inp == "x"
                assert sta.name == "3"
            if num == 13:
                assert inp == None
                assert sta is None
            assert num < 14
        assert inp is None
        assert num == 14
        assert sta is None and machine.current.name == "3"