Esempio n. 1
0
def test_decide():
    """Allow state transition decisions based on collected context other than just
    the next source symbol.

    """
    e = cpppo.state("enter")
    e["a"] = a = cpppo.state_input("a", context="a")
    a[" "] = s1 = cpppo.state_drop("s1")
    s1[" "] = s1
    s1[None] = i1 = cpppo.integer("i1", context="i1")

    i1[" "] = s2 = cpppo.state_drop("s2")
    s2[" "] = s2
    s2[None] = i2 = cpppo.integer("i2", context="i2")
    less = cpppo.state("less", terminal=True)
    greater = cpppo.state("greater", terminal=True)
    equal = cpppo.state("equal", terminal=True)
    i2[None] = cpppo.decide("isless", less, predicate=lambda machine, source, path, data: data.i1 < data.i2)
    i2[None] = cpppo.decide("isgreater", greater, predicate=lambda machine, source, path, data: data.i1 > data.i2)
    i2[None] = equal

    source = cpppo.peekable(str("a 1 2"))
    data = cpppo.dotdict()
    with cpppo.dfa("comparo", initial=e) as comparo:
        for i, (m, s) in enumerate(comparo.run(source=source, data=data)):
            log.info(
                "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r",
                m.name_centered(),
                i,
                s,
                source.sent,
                source.peek(),
                data,
            )
        assert i == 11
        assert s is less

    source = cpppo.peekable(str("a 33 33"))
    data = cpppo.dotdict()
    with cpppo.dfa("comparo", initial=e) as comparo:
        for i, (m, s) in enumerate(comparo.run(source=source, data=data)):
            log.info(
                "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r",
                m.name_centered(),
                i,
                s,
                source.sent,
                source.peek(),
                data,
            )
        assert i == 13
        assert s is equal
Esempio n. 2
0
def main():
    """The basic examples in the README"""

    # Basic DFA that accepts ab+
    E			= cpppo.state( 'E' )
    A			= cpppo.state_input( 'A' )
    B			= cpppo.state_input( 'B', terminal=True )
    E['a']		= A
    A['b']		= B
    B['b']		= B

    BASIC		= cpppo.dfa( 'ab+', initial=E, context='basic' )

    # Composite state machine accepting ab+, ignoring ,[ ]* separators
    ABP			= cpppo.dfa( 'ab+', initial=E, terminal=True )
    SEP			= cpppo.state_drop( 'SEP' )
    ABP[',']		= SEP
    SEP[' ']		= SEP
    SEP[None]		= ABP

    CSV			= cpppo.dfa( 'CSV', initial=ABP, context='csv' )

    # A regular expression; he default dfa name is the regular expression itself.
    REGEX		= cpppo.regex( initial='(ab+)((,[ ]*)(ab+))*', context='regex' )

    data		= cpppo.dotdict()
    for machine in [ BASIC, CSV, REGEX ]:
        path		= machine.context() + '.input' # default for state_input data
        source		= cpppo.peekable( str( 'abbbb, ab' ))
        with machine:
            for i,(m,s) in enumerate( machine.run( source=source, data=data )):
                print( "%s #%3d; next byte %3d: %-10.10r: %r" % (
                       m.name_centered(), i, source.sent, source.peek(), data.get(path) ))
        print( "Accepted: %r; remaining: %r\n" % ( data.get(path), ''.join( source )))
    print( "Final: %r" % ( data ))
Esempio n. 3
0
def test_readme():
    """The basic examples in the README"""

    # Basic DFA that accepts ab+
    E				= cpppo.state( "E" )
    A				= cpppo.state_input( "A" )
    B				= cpppo.state_input( "B", terminal=True )
    E['a']			= A
    A['b']			= B
    B['b']			= B

    data			= cpppo.dotdict()
    source			= cpppo.peekable( str( 'abbbb,ab' ))
    with cpppo.dfa( initial=E ) as abplus:
        for i,(m,s) in enumerate( abplus.run( source=source, path="ab+", data=data )):
            log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(),
                      i, s, source.sent, source.peek(), data )
        assert i == 5
    assert source.peek() == str(',')
    
    # Composite state machine accepting ab+, ignoring ,[ ]* separators
    CSV				= cpppo.dfa( "CSV", initial=E, terminal=True )
    SEP				= cpppo.state_drop( "SEP" )

    CSV[',']			= SEP
    SEP[' ']			= SEP
    SEP[None]			= CSV

    source			= cpppo.peekable( str( 'abbbb, ab' ))
    with cpppo.dfa( initial=CSV ) as r2:
        for i,(m,s) in enumerate( r2.run( source=source, path="readme_CSV", data=data )):
            log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(),
                      i, s, source.sent, source.peek(), data )
        assert i == 14
    assert source.peek() is None
Esempio n. 4
0
def tnet_machine( name="TNET", context="tnet" ):
    """Accept a sentence of input bytes matching a tnetstring, and then
    loop.  Sub-machine terminates at earliest match (non-greedy), causing
    echo.transition to trigger .process (which resets our sub-machine to initial
    state), and then we move to the next state (loops), allowing us to
    immediately run."""

    class tnet_parser( cpppo.state_input ):
        TYPES			= (b'#'[0], b'}'[0], b']'[0], b','[0],
                                   b'$'[0], b'!'[0], b'~'[0], b'^'[0])

        def process( self, source, machine=None, path=None, data=None ):
            """Convert the collected data according to the type"""
            tntype		= next( source )
            ours		= self.context( path )
            raw			= ours + '...data.input'
            src			= b'' if raw not in data else (
                data[raw].tostring() if sys.version_info[0] < 3
                else data[raw].tobytes() )

            if tntype == b','[0]:
                log.info("%5d bytes  data: %s", len( src ), cpppo.reprlib.repr( src ))
                data[ours]	= src
            elif tntype == b'$'[0]:
                log.info("%5d string data: %s", len( src ), cpppo.reprlib.repr( src ))
                data[ours]	= src.decode( 'utf-8' )
            elif tntype == b'#'[0]:
                data[ours]	= int( src )
                log.info("%5d int    data: %s == %s", len( src ), cpppo.reprlib.repr( src ),
                         cpppo.reprlib.repr( data[ours] ))
            elif tntype == b'~'[0]:
                assert 0 == len( src )
                data[ours]	= None
            else:
                assert False, "Invalid tnetstring type: %s" % tntype
                
    bytes_conf 			= {
        "alphabet":	cpppo.type_bytes_iter,
        "typecode":	cpppo.type_bytes_array_symbol,
    }

    SIZE			= cpppo.dfa( name="SIZE", 
                                             initial=cpppo.integer_bytes(
                                                 name="INT", context="size", decode='ascii', terminal=True ))
    COLON			= cpppo.state_drop( name="COLON", **bytes_conf )
    DATA			= data_parser( name="DATA", context="data", repeat="..size" )
    TYPE			= tnet_parser( name="TYPE", context="type", terminal=True,
                                                     **bytes_conf )

    SIZE[b':'[0]]		= COLON
    COLON[None]			= DATA
    for t in tnet_parser.TYPES:
        DATA[t]			= TYPE

    # Recognize a TNET string and then terminate, resetting to automatically
    # recognize another
    return cpppo.dfa( name=name, context=context, initial=SIZE, terminal=True )
Esempio n. 5
0
def test_decide():
    """Allow state transition decisions based on collected context other than just
    the next source symbol.

    """
    e = cpppo.state("enter")
    e['a'] = a = cpppo.state_input("a", context='a')
    a[' '] = s1 = cpppo.state_drop("s1")
    s1[' '] = s1
    s1[None] = i1 = cpppo.integer("i1", context='i1')

    i1[' '] = s2 = cpppo.state_drop("s2")
    s2[' '] = s2
    s2[None] = i2 = cpppo.integer("i2", context='i2')
    less = cpppo.state("less", terminal=True)
    greater = cpppo.state("greater", terminal=True)
    equal = cpppo.state("equal", terminal=True)
    i2[None] = cpppo.decide(
        "isless",
        less,
        predicate=lambda machine, source, path, data: data.i1 < data.i2)
    i2[None] = cpppo.decide(
        "isgreater",
        greater,
        predicate=lambda machine, source, path, data: data.i1 > data.i2)
    i2[None] = equal

    source = cpppo.peekable(str('a 1 2'))
    data = cpppo.dotdict()
    with cpppo.dfa("comparo", initial=e) as comparo:
        for i, (m, s) in enumerate(comparo.run(source=source, data=data)):
            log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r",
                     m.name_centered(), i, s, source.sent, source.peek(), data)
        assert i == 12
        assert s is less

    source = cpppo.peekable(str('a 33 33'))
    data = cpppo.dotdict()
    with cpppo.dfa("comparo", initial=e) as comparo:
        for i, (m, s) in enumerate(comparo.run(source=source, data=data)):
            log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r",
                     m.name_centered(), i, s, source.sent, source.peek(), data)
        assert i == 14
        assert s is equal
Esempio n. 6
0
def tnet_machine( name="TNET", context="tnet" ):
    """Accept a sentence of input bytes matching a tnetstring, and then
    loop.  Sub-machine terminates at earliest match (non-greedy), causing
    echo.transition to trigger .process (which resets our sub-machine to initial
    state), and then we move to the next state (loops), allowing us to
    immediately run."""

    class tnet_parser( cpppo.state_input ):
        codes			= (b'#'[0], b'}'[0], b']'[0], b','[0],
                                   b'$'[0], b'!'[0], b'~'[0], b'^'[0])

        def process( self, source, machine=None, path=None, data=None ):
            """Convert the collected data according to the type"""
            tntype		= next( source )
            ours		= self.context( path )
            raw			= ours + '...data.input'
            src			= ( data[raw].tostring() if sys.version_info.major < 3
                                    else data[raw].tobytes() )

            if tntype == b','[0]:
                log.info("%5d bytes  data: %s", len( src ), reprlib.repr( src ))
                data[ours]	= src
            elif tntype == b'$'[0]:
                log.info("%5d string data: %s", len( src ), reprlib.repr( src ))
                data[ours]	= src.decode( 'utf-8' )
            elif tntype == b'#'[0]:
                data[ours]	= int( src )
                log.info("%5d int    data: %s == %s", len( src ), reprlib.repr( src ),
                         reprlib.repr( data[ours] ))
            elif tntype == b'~'[0]:
                assert 0 == len( src )
                data[ours]	= None
            else:
                assert False, "Invalid tnetstring type: %s" % tntype
                
    bytes_conf 			= {
        "alphabet":	cpppo.type_bytes_iter,
        "typecode":	cpppo.type_bytes_array_symbol,
    }

    SIZE			= cpppo.integer_bytes( name="SIZE", context="size", decode='ascii' )
    COLON			= cpppo.state_drop( name="COLON", **bytes_conf )
    DATA			= data_parser( name="DATA", context="data", repeat="..size" )
    TYPE			= tnet_parser( name="TYPE", context="type", terminal=True,
                                                     **bytes_conf )
    SIZE[b':'[0]]		= COLON
    SIZE[True]			= None  # SIZE terminal, but only : acceptable

    COLON[None]			= DATA
    for t in tnet_parser.codes:
        DATA[t]			= TYPE
    DATA[True]			= None # DATA terminal, but only TNET codes acceptable

    # Recognize a TNET string and then terminate, resetting automatically
    # recognize another
    return cpppo.dfa( name=name, context=context, initial=SIZE, terminal=True )
Esempio n. 7
0
def main():
    """The basic examples in the README"""

    # Basic DFA that accepts ab+
    E = cpppo.state('E')
    A = cpppo.state_input('A')
    B = cpppo.state_input('B', terminal=True)
    E['a'] = A
    A['b'] = B
    B['b'] = B

    BASIC = cpppo.dfa('ab+', initial=E, context='basic')

    # Composite state machine accepting ab+, ignoring ,[ ]* separators
    ABP = cpppo.dfa('ab+', initial=E, terminal=True)
    SEP = cpppo.state_drop('SEP')
    ABP[','] = SEP
    SEP[' '] = SEP
    SEP[None] = ABP

    CSV = cpppo.dfa('CSV', initial=ABP, context='csv')

    # A regular expression; he default dfa name is the regular expression itself.
    REGEX = cpppo.regex(initial='(ab+)((,[ ]*)(ab+))*', context='regex')

    data = cpppo.dotdict()
    for machine in [BASIC, CSV, REGEX]:
        path = machine.context() + '.input'  # default for state_input data
        source = cpppo.peekable(str('abbbb, ab'))
        with machine:
            for i, (m, s) in enumerate(machine.run(source=source, data=data)):
                print("%s #%3d; next byte %3d: %-10.10r: %r" %
                      (m.name_centered(), i, source.sent, source.peek(),
                       data.get(path)))
        print("Accepted: %r; remaining: %r\n" %
              (data.get(path), ''.join(source)))
    print("Final: %r" % (data))
Esempio n. 8
0
def test_state():
    """A state is expected to process its input (perhaps nothing, if its a no-input state), and then use
    the next input symbol to transition to another state.  Each state has a context into a data
    artifact, into which it will collect its results.

    We must ensure that all state transitions are configured in the target alphabet; if an encoder
    is supplied, then all input symbols and all transition symbols will be encoded using it.  In
    this test, all string literals are Unicode (in both Python 2 and 3), so we use a unicode encoder
    to convert them to symbols."""

    unicodekwds = {
        'alphabet': unicode if sys.version_info[0] < 3 else str,
        'encoder': cpppo.type_unicode_encoder,
    }
    s1 = cpppo.state('one', **unicodekwds)
    s2 = cpppo.state_drop('two', **unicodekwds)

    s1['a'] = s2
    assert s1['a'] is s2

    source = cpppo.peeking('abc')

    # We can run state instances with/without acquisition
    g = s1.run(source=source)
    assert next(g) == (None, s2)
    assert source.peek() == 'a'
    with pytest.raises(StopIteration):
        next(g)
    with s1:
        g = s1.run(source=source)
        assert source.peek() == 'a'
        assert next(g) == (None, s2)
        assert source.peek() == 'a'
        try:
            next(g)
            assert False, "Should have terminated"
        except StopIteration:
            pass
        assert source.peek() == 'a'

    # A state machine accepting a sequence of unicode a's
    a_s = cpppo.state("a_s", **unicodekwds)
    an_a = cpppo.state_input("a",
                             terminal=True,
                             typecode=cpppo.type_unicode_array_symbol,
                             **unicodekwds)
    a_s['a'] = an_a
    an_a['a'] = an_a

    source = cpppo.peeking('aaaa')
    data = cpppo.dotdict()

    with cpppo.dfa(initial=a_s) as aplus:
        for i, (m, s) in enumerate(aplus.run(source=source)):
            log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r",
                     m.name_centered(), i, s, source.sent, source.peek(), data)
        assert i == 5
        assert source.peek() is None
        assert len(data) == 0

    # Accepting a's separated by comma and space/pi (for kicks).  When the lower level a's machine
    # doesn't recognize the symbol, then the higher level machine will recognize and discard
    sep = cpppo.state_drop("sep", **unicodekwds)
    csv = cpppo.dfa("csv", initial=a_s, terminal=True, **unicodekwds)
    csv[','] = sep
    sep[' '] = sep
    sep['π'] = sep
    sep[None] = csv

    source = cpppo.peeking('aaaa, a,π a')
    data = cpppo.dotdict()

    with cpppo.dfa(initial=csv) as csvaplus:
        for i, (m, s) in enumerate(
                csvaplus.run(source=source, path="csv", data=data)):
            log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r",
                     m.name_centered(), i, s, source.sent, source.peek(), data)
        assert i == 18
        assert source.peek() is None
    assert data.csv.input.tounicode() == 'aaaaaa'
Esempio n. 9
0
def test_state():
    """A state is expected to process its input (perhaps nothing, if its a no-input state), and then use
    the next input symbol to transition to another state.  Each state has a context into a data
    artifact, into which it will collect its results.

    We must ensure that all state transitions are configured in the target alphabet; if an encoder
    is supplied, then all input symbols and all transition symbols will be encoded using it.  In
    this test, all string literals are Unicode (in both Python 2 and 3), so we use a unicode encoder
    to convert them to symbols."""

    unicodekwds			= {
        'alphabet':	unicode if sys.version_info[0] < 3 else str,
        'encoder':	cpppo.type_unicode_encoder,
    }
    s1				= cpppo.state(
        'one', **unicodekwds )
    s2				= cpppo.state_drop(
        'two', **unicodekwds )

    s1['a']			= s2
    assert s1['a'] is s2

    source			= cpppo.peeking( 'abc' )

    # We can run state instances with/without acquisition
    g				= s1.run( source=source )
    assert next( g ) == (None, s2)
    assert source.peek() == 'a'
    with pytest.raises(StopIteration):
        next( g )
    with s1:
        g			= s1.run( source=source )
        assert source.peek() == 'a'
        assert next( g ) == (None, s2)
        assert source.peek() == 'a'
        try:
            next( g )
            assert False, "Should have terminated"
        except StopIteration:
            pass
        assert source.peek() == 'a'

    
    # A state machine accepting a sequence of unicode a's
    a_s				= cpppo.state( 		"a_s", **unicodekwds )
    an_a			= cpppo.state_input(	"a",   terminal=True,
                                                        typecode=cpppo.type_unicode_array_symbol,
                                                        **unicodekwds )
    a_s['a']			= an_a
    an_a['a']			= an_a

    source			= cpppo.peeking( 'aaaa' )
    data			= cpppo.dotdict()

    with cpppo.dfa( initial=a_s ) as aplus:
        for i,(m,s) in enumerate( aplus.run( source=source )):
            log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(),
                      i, s, source.sent, source.peek(), data )
        assert i == 5
        assert source.peek() is None
        assert len( data ) == 0

    # Accepting a's separated by comma and space/pi (for kicks).  When the lower level a's machine
    # doesn't recognize the symbol, then the higher level machine will recognize and discard
    sep				= cpppo.state_drop(	"sep", **unicodekwds )
    csv				= cpppo.dfa( "csv", initial=a_s , terminal=True, **unicodekwds )
    csv[',']			= sep
    sep[' ']			= sep
    sep['π']			= sep
    sep[None]			= csv
    
    source			= cpppo.peeking( 'aaaa, a,π a' )
    data			= cpppo.dotdict()

    with cpppo.dfa( initial=csv ) as csvaplus:
        for i,(m,s) in enumerate( csvaplus.run( source=source, path="csv", data=data )):
            log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(),
                i, s, source.sent, source.peek(), data )
        assert i == 18
        assert source.peek() is None
    assert data.csv.input.tounicode() == 'aaaaaa'