Ejemplo n.º 1
0
def main():
    """The basic examples in the README"""

    # Basic DFA that accepts ab+
    E			= cpppo.state( 'E' )
    A			= cpppo.state_input( 'A' )
    B			= cpppo.state_input( 'B', terminal=True )
    E['a']		= A
    A['b']		= B
    B['b']		= B

    BASIC		= cpppo.dfa( 'ab+', initial=E, context='basic' )

    # Composite state machine accepting ab+, ignoring ,[ ]* separators
    ABP			= cpppo.dfa( 'ab+', initial=E, terminal=True )
    SEP			= cpppo.state_drop( 'SEP' )
    ABP[',']		= SEP
    SEP[' ']		= SEP
    SEP[None]		= ABP

    CSV			= cpppo.dfa( 'CSV', initial=ABP, context='csv' )

    # A regular expression; he default dfa name is the regular expression itself.
    REGEX		= cpppo.regex( initial='(ab+)((,[ ]*)(ab+))*', context='regex' )

    data		= cpppo.dotdict()
    for machine in [ BASIC, CSV, REGEX ]:
        path		= machine.context() + '.input' # default for state_input data
        source		= cpppo.peekable( str( 'abbbb, ab' ))
        with machine:
            for i,(m,s) in enumerate( machine.run( source=source, data=data )):
                print( "%s #%3d; next byte %3d: %-10.10r: %r" % (
                       m.name_centered(), i, source.sent, source.peek(), data.get(path) ))
        print( "Accepted: %r; remaining: %r\n" % ( data.get(path), ''.join( source )))
    print( "Final: %r" % ( data ))
Ejemplo n.º 2
0
def test_readme():
    """The basic examples in the README"""

    # Basic DFA that accepts ab+
    E				= cpppo.state( "E" )
    A				= cpppo.state_input( "A" )
    B				= cpppo.state_input( "B", terminal=True )
    E['a']			= A
    A['b']			= B
    B['b']			= B

    data			= cpppo.dotdict()
    source			= cpppo.peekable( str( 'abbbb,ab' ))
    with cpppo.dfa( initial=E ) as abplus:
        for i,(m,s) in enumerate( abplus.run( source=source, path="ab+", data=data )):
            log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(),
                      i, s, source.sent, source.peek(), data )
        assert i == 5
    assert source.peek() == str(',')
    
    # Composite state machine accepting ab+, ignoring ,[ ]* separators
    CSV				= cpppo.dfa( "CSV", initial=E, terminal=True )
    SEP				= cpppo.state_drop( "SEP" )

    CSV[',']			= SEP
    SEP[' ']			= SEP
    SEP[None]			= CSV

    source			= cpppo.peekable( str( 'abbbb, ab' ))
    with cpppo.dfa( initial=CSV ) as r2:
        for i,(m,s) in enumerate( r2.run( source=source, path="readme_CSV", data=data )):
            log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(),
                      i, s, source.sent, source.peek(), data )
        assert i == 14
    assert source.peek() is None
Ejemplo n.º 3
0
def test_struct():
    dtp				= cpppo.type_bytes_array_symbol
    abt				= cpppo.type_bytes_iter
    ctx				= 'val'
    a				= cpppo.state_input( "First",  alphabet=abt, typecode=dtp, context=ctx )
    a[True] = b 		= cpppo.state_input( "Second", alphabet=abt, typecode=dtp, context=ctx )
    b[True] = c 		= cpppo.state_input( "Third",  alphabet=abt, typecode=dtp, context=ctx )
    c[True] = d			= cpppo.state_input( "Fourth", alphabet=abt, typecode=dtp, context=ctx )
    d[None] 			= cpppo.state_struct( "int32", context=ctx,
                                                      format=str("<i"),
                                                      terminal=True )
    machine			= cpppo.dfa( initial=a )
    with machine:
        material		= b'\x01\x02\x03\x80\x99'
        segment			= 3
        source			= cpppo.chainable()
        log.info( "States; %r input, by %d", material, segment )
        inp			= None
        data			= cpppo.dotdict()
        path			= "struct"
        sequence		= machine.run( source=source, path=path, data=data )
        for num in range( 10 ):
            try:
                mch,sta		= next( sequence )
                inp		= source.peek()
            except StopIteration:
                inp		= source.peek()
                log.info( "%s <- %-10.10r test done", cpppo.centeraxis( mch, 25, clip=True ), inp )
                break
            log.info( "%s <- %-10.10r test rcvd", cpppo.centeraxis( mch, 25, clip=True ), inp )
            if sta is None:
                log.info( "%s <- %-10.10r test no next state", cpppo.centeraxis( mch, 25, clip=True ), inp )
            if inp is None:
                if not material:
                    log.info( "%s <- %-10.10r test source finished", cpppo.centeraxis( mch, 25, clip=True ), inp )
                # Will load consecutive empty iterables; chainable must handle
                source.chain( material[:segment] )
                material		= material[segment:]
                inp			= source.peek()
                log.info( "%s <- %-10.10r test chain", cpppo.centeraxis( mch, 25, clip=True ), inp )
    
            if num == 0: assert inp == b'\x01'[0]; assert sta.name == "First"
            if num == 1: assert inp == b'\x02'[0]; assert sta.name == "Second"
            if num == 2: assert inp == b'\x03'[0]; assert sta.name == "Third"
            if num == 3: assert inp == b'\x80'[0]; assert sta is None
            if num == 4: assert inp == b'\x80'[0]; assert sta.name == "Fourth"
            if num == 5: assert inp == b'\x99'[0]; assert sta.name == "int32"
            if num == 6: assert inp == b'\x99'[0]; assert sta.name == "int32"
        assert inp == b'\x99'[0]
        assert num == 6
        assert sta.name == "int32"
        assert data.struct.val == -2147286527
Ejemplo n.º 4
0
def data_parser( **kwds ):
    """Parses raw bytes into .data, by default using ..size to denote the amount.  """
    kwds.setdefault( "name", "DATA" )
    kwds.setdefault( "context", "data" )
    kwds.setdefault( "repeat", "..size" )
    return cpppo.dfa(
        initial=cpppo.state_input( name="BYTE", terminal=True, **bytes_conf ), **kwds )
Ejemplo n.º 5
0
def data_parser( **kwds ):
    """Parses raw bytes into .data, by default using ..size to denote the amount.  """
    kwds.setdefault( "name", "DATA" )
    kwds.setdefault( "context", "data" )
    kwds.setdefault( "repeat", "..size" )
    return cpppo.dfa(
        initial=cpppo.state_input( name="BYTE", terminal=True, **bytes_conf ), **kwds )
Ejemplo n.º 6
0
def test_decide():
    """Allow state transition decisions based on collected context other than just
    the next source symbol.

    """
    e = cpppo.state("enter")
    e["a"] = a = cpppo.state_input("a", context="a")
    a[" "] = s1 = cpppo.state_drop("s1")
    s1[" "] = s1
    s1[None] = i1 = cpppo.integer("i1", context="i1")

    i1[" "] = s2 = cpppo.state_drop("s2")
    s2[" "] = s2
    s2[None] = i2 = cpppo.integer("i2", context="i2")
    less = cpppo.state("less", terminal=True)
    greater = cpppo.state("greater", terminal=True)
    equal = cpppo.state("equal", terminal=True)
    i2[None] = cpppo.decide("isless", less, predicate=lambda machine, source, path, data: data.i1 < data.i2)
    i2[None] = cpppo.decide("isgreater", greater, predicate=lambda machine, source, path, data: data.i1 > data.i2)
    i2[None] = equal

    source = cpppo.peekable(str("a 1 2"))
    data = cpppo.dotdict()
    with cpppo.dfa("comparo", initial=e) as comparo:
        for i, (m, s) in enumerate(comparo.run(source=source, data=data)):
            log.info(
                "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r",
                m.name_centered(),
                i,
                s,
                source.sent,
                source.peek(),
                data,
            )
        assert i == 11
        assert s is less

    source = cpppo.peekable(str("a 33 33"))
    data = cpppo.dotdict()
    with cpppo.dfa("comparo", initial=e) as comparo:
        for i, (m, s) in enumerate(comparo.run(source=source, data=data)):
            log.info(
                "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r",
                m.name_centered(),
                i,
                s,
                source.sent,
                source.peek(),
                data,
            )
        assert i == 13
        assert s is equal
Ejemplo n.º 7
0
def main():
    """The basic examples in the README"""

    # Basic DFA that accepts ab+
    E = cpppo.state('E')
    A = cpppo.state_input('A')
    B = cpppo.state_input('B', terminal=True)
    E['a'] = A
    A['b'] = B
    B['b'] = B

    BASIC = cpppo.dfa('ab+', initial=E, context='basic')

    # Composite state machine accepting ab+, ignoring ,[ ]* separators
    ABP = cpppo.dfa('ab+', initial=E, terminal=True)
    SEP = cpppo.state_drop('SEP')
    ABP[','] = SEP
    SEP[' '] = SEP
    SEP[None] = ABP

    CSV = cpppo.dfa('CSV', initial=ABP, context='csv')

    # A regular expression; he default dfa name is the regular expression itself.
    REGEX = cpppo.regex(initial='(ab+)((,[ ]*)(ab+))*', context='regex')

    data = cpppo.dotdict()
    for machine in [BASIC, CSV, REGEX]:
        path = machine.context() + '.input'  # default for state_input data
        source = cpppo.peekable(str('abbbb, ab'))
        with machine:
            for i, (m, s) in enumerate(machine.run(source=source, data=data)):
                print("%s #%3d; next byte %3d: %-10.10r: %r" %
                      (m.name_centered(), i, source.sent, source.peek(),
                       data.get(path)))
        print("Accepted: %r; remaining: %r\n" %
              (data.get(path), ''.join(source)))
    print("Final: %r" % (data))
Ejemplo n.º 8
0
def test_decide():
    """Allow state transition decisions based on collected context other than just
    the next source symbol.

    """
    e = cpppo.state("enter")
    e['a'] = a = cpppo.state_input("a", context='a')
    a[' '] = s1 = cpppo.state_drop("s1")
    s1[' '] = s1
    s1[None] = i1 = cpppo.integer("i1", context='i1')

    i1[' '] = s2 = cpppo.state_drop("s2")
    s2[' '] = s2
    s2[None] = i2 = cpppo.integer("i2", context='i2')
    less = cpppo.state("less", terminal=True)
    greater = cpppo.state("greater", terminal=True)
    equal = cpppo.state("equal", terminal=True)
    i2[None] = cpppo.decide(
        "isless",
        less,
        predicate=lambda machine, source, path, data: data.i1 < data.i2)
    i2[None] = cpppo.decide(
        "isgreater",
        greater,
        predicate=lambda machine, source, path, data: data.i1 > data.i2)
    i2[None] = equal

    source = cpppo.peekable(str('a 1 2'))
    data = cpppo.dotdict()
    with cpppo.dfa("comparo", initial=e) as comparo:
        for i, (m, s) in enumerate(comparo.run(source=source, data=data)):
            log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r",
                     m.name_centered(), i, s, source.sent, source.peek(), data)
        assert i == 12
        assert s is less

    source = cpppo.peekable(str('a 33 33'))
    data = cpppo.dotdict()
    with cpppo.dfa("comparo", initial=e) as comparo:
        for i, (m, s) in enumerate(comparo.run(source=source, data=data)):
            log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r",
                     m.name_centered(), i, s, source.sent, source.peek(), data)
        assert i == 14
        assert s is equal
Ejemplo n.º 9
0
def test_state():
    """A state is expected to process its input (perhaps nothing, if its a no-input state), and then use
    the next input symbol to transition to another state.  Each state has a context into a data
    artifact, into which it will collect its results.

    We must ensure that all state transitions are configured in the target alphabet; if an encoder
    is supplied, then all input symbols and all transition symbols will be encoded using it.  In
    this test, all string literals are Unicode (in both Python 2 and 3), so we use a unicode encoder
    to convert them to symbols."""

    unicodekwds = {
        'alphabet': unicode if sys.version_info[0] < 3 else str,
        'encoder': cpppo.type_unicode_encoder,
    }
    s1 = cpppo.state('one', **unicodekwds)
    s2 = cpppo.state_drop('two', **unicodekwds)

    s1['a'] = s2
    assert s1['a'] is s2

    source = cpppo.peeking('abc')

    # We can run state instances with/without acquisition
    g = s1.run(source=source)
    assert next(g) == (None, s2)
    assert source.peek() == 'a'
    with pytest.raises(StopIteration):
        next(g)
    with s1:
        g = s1.run(source=source)
        assert source.peek() == 'a'
        assert next(g) == (None, s2)
        assert source.peek() == 'a'
        try:
            next(g)
            assert False, "Should have terminated"
        except StopIteration:
            pass
        assert source.peek() == 'a'

    # A state machine accepting a sequence of unicode a's
    a_s = cpppo.state("a_s", **unicodekwds)
    an_a = cpppo.state_input("a",
                             terminal=True,
                             typecode=cpppo.type_unicode_array_symbol,
                             **unicodekwds)
    a_s['a'] = an_a
    an_a['a'] = an_a

    source = cpppo.peeking('aaaa')
    data = cpppo.dotdict()

    with cpppo.dfa(initial=a_s) as aplus:
        for i, (m, s) in enumerate(aplus.run(source=source)):
            log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r",
                     m.name_centered(), i, s, source.sent, source.peek(), data)
        assert i == 5
        assert source.peek() is None
        assert len(data) == 0

    # Accepting a's separated by comma and space/pi (for kicks).  When the lower level a's machine
    # doesn't recognize the symbol, then the higher level machine will recognize and discard
    sep = cpppo.state_drop("sep", **unicodekwds)
    csv = cpppo.dfa("csv", initial=a_s, terminal=True, **unicodekwds)
    csv[','] = sep
    sep[' '] = sep
    sep['π'] = sep
    sep[None] = csv

    source = cpppo.peeking('aaaa, a,π a')
    data = cpppo.dotdict()

    with cpppo.dfa(initial=csv) as csvaplus:
        for i, (m, s) in enumerate(
                csvaplus.run(source=source, path="csv", data=data)):
            log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r",
                     m.name_centered(), i, s, source.sent, source.peek(), data)
        assert i == 18
        assert source.peek() is None
    assert data.csv.input.tounicode() == 'aaaaaa'
Ejemplo n.º 10
0
def test_state():
    """A state is expected to process its input (perhaps nothing, if its a no-input state), and then use
    the next input symbol to transition to another state.  Each state has a context into a data
    artifact, into which it will collect its results.

    We must ensure that all state transitions are configured in the target alphabet; if an encoder
    is supplied, then all input symbols and all transition symbols will be encoded using it.  In
    this test, all string literals are Unicode (in both Python 2 and 3), so we use a unicode encoder
    to convert them to symbols."""

    unicodekwds			= {
        'alphabet':	unicode if sys.version_info[0] < 3 else str,
        'encoder':	cpppo.type_unicode_encoder,
    }
    s1				= cpppo.state(
        'one', **unicodekwds )
    s2				= cpppo.state_drop(
        'two', **unicodekwds )

    s1['a']			= s2
    assert s1['a'] is s2

    source			= cpppo.peeking( 'abc' )

    # We can run state instances with/without acquisition
    g				= s1.run( source=source )
    assert next( g ) == (None, s2)
    assert source.peek() == 'a'
    with pytest.raises(StopIteration):
        next( g )
    with s1:
        g			= s1.run( source=source )
        assert source.peek() == 'a'
        assert next( g ) == (None, s2)
        assert source.peek() == 'a'
        try:
            next( g )
            assert False, "Should have terminated"
        except StopIteration:
            pass
        assert source.peek() == 'a'

    
    # A state machine accepting a sequence of unicode a's
    a_s				= cpppo.state( 		"a_s", **unicodekwds )
    an_a			= cpppo.state_input(	"a",   terminal=True,
                                                        typecode=cpppo.type_unicode_array_symbol,
                                                        **unicodekwds )
    a_s['a']			= an_a
    an_a['a']			= an_a

    source			= cpppo.peeking( 'aaaa' )
    data			= cpppo.dotdict()

    with cpppo.dfa( initial=a_s ) as aplus:
        for i,(m,s) in enumerate( aplus.run( source=source )):
            log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(),
                      i, s, source.sent, source.peek(), data )
        assert i == 5
        assert source.peek() is None
        assert len( data ) == 0

    # Accepting a's separated by comma and space/pi (for kicks).  When the lower level a's machine
    # doesn't recognize the symbol, then the higher level machine will recognize and discard
    sep				= cpppo.state_drop(	"sep", **unicodekwds )
    csv				= cpppo.dfa( "csv", initial=a_s , terminal=True, **unicodekwds )
    csv[',']			= sep
    sep[' ']			= sep
    sep['π']			= sep
    sep[None]			= csv
    
    source			= cpppo.peeking( 'aaaa, a,π a' )
    data			= cpppo.dotdict()

    with cpppo.dfa( initial=csv ) as csvaplus:
        for i,(m,s) in enumerate( csvaplus.run( source=source, path="csv", data=data )):
            log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(),
                i, s, source.sent, source.peek(), data )
        assert i == 18
        assert source.peek() is None
    assert data.csv.input.tounicode() == 'aaaaaa'