def test_paren():
    expr = "\\A:( ab ) | c"
    cfg = CFG(expr)

    assert cfg.__str__() == "A: r'ab|c'"

    assert cfg.parse('')[1] == False
    assert cfg.parse('ab') == ('', 'ab')
    assert cfg.parse('c') == ('', 'c')
    assert cfg.parse('ac') == ('ac', False)

    # this works properly:
    d = {'g': Symbol(None)}
    _g = (Symbol('a') + d['g'] + Symbol('b')) | Symbol('')
    d['g'].update(_g)
    assert d['g'].parse('') == ('', True)
    d['g'].parse('ab') == ('', 'b')
    d['g'].parse('aabb') == ('', 'b')

    expr = "\\A:a + \\A + b"
    cfg = CFG(expr)
    assert cfg.parse('') == ('', True)
    assert cfg.parse('ab') == ('', 'b')
    assert cfg.parse('aabb') == ('', 'b')
    #assert cfg.parse('aabb') == ('', 'b')

    #expr = "\\A:(a + \\A + b) | \\0"
    expr = "\\A:( a + \\A + b ) | \\0"
    cfg = CFG(expr)

    print(cfg)

    assert cfg.parse('') == ('', True)
    assert cfg.parse('ab') == ('', 'b')
    assert cfg.parse('aabb') == ('', 'b')
def test_add_or():
    a = Symbol('a')
    b = Symbol('b')
    c = Symbol('c')

    d = (a + b) | c

    assert d.parse('')[1] == False
    assert d.parse('ab')[0] == ''
    assert d.parse('c')[0] == ''

    d = a + (b | c)

    assert d.parse('')[1] == False
    assert d.parse('ab') == ('', 'b')
    assert d.parse('ac') == ('', 'c')
def test_add():
    a = Symbol('a')
    b = Symbol('b')
    c = a + b

    assert c.parse('')[1] == False
    assert c.parse('ab')[0] == ''
    assert c.parse('abb')[0] != ''

    a = Symbol('a*')
    b = Symbol('b*')
    c = a + b

    assert c.parse('')[1] == True
    assert c.parse('ab')[0] == ''
    assert c.parse('aab')[0] == ''
    assert c.parse('abb')[0] == ''
    assert c.parse('aa')[0] == ''
    assert c.parse('bb')[0] == ''
    assert c.parse('aba')[0] != ''
def test_cfg_manual():
    """
    test:
        - create mapping from cfg expr
        - create aggregate Symbol with arithmetic and parse
    """

    in_cfg = ("\G :\WORD \SPACE \G | \\0\n" "\WORD :\w+\n" "\SPACE : ")

    lines = [line for line in in_cfg.split('\n') if line.strip()]
    mappings = Symbol.create_mappings(lines)

    sw = Symbol(mappings['WORD'])
    ss = Symbol(mappings['SPACE'])

    g = sw + ss
    g.rrec()
    #g = g | Symbol('')

    st = "first second third fourth fifth"
    g.parse(st)
def test_or():
    a = Symbol('a')
    b = Symbol('b')
    c = a | b

    assert c.parse('')[1] == False
    assert c.parse('a')[0] == ''
    assert c.parse('b')[0] == ''

    a = Symbol('a*')
    b = Symbol('b*')
    c = a | b

    assert c.parse('')[1] == True
    assert c.parse('aa')[0] == ''
    assert c.parse('aa')[1] == 'aa'
    assert c.parse('aab')[0] == 'b'
    assert c.parse('aaba')[0] == 'ba'
    assert c.parse('bb')[0] == ''
    assert c.parse('bb')[1] == 'bb'
    assert c.parse('bba')[0] == 'a'
    assert c.parse('bbab')[0] == 'ab'
def test_star():
    a = Symbol('a*')

    assert a.parse('')[1] == True
    assert a.parse('aa')[0] == ''
    assert a.parse('aa')[1] == 'aa'
    assert a.parse('aab')[0] == 'b'
    assert a.parse('aaba')[0] == 'ba'
def test_inp_cfg():
    """
    Simple example:
    arg1: CFG that only has two important nonterminals: WORD and SPACE
    arg2: "WORD", meaning "return all parts of input that are WORD tokens"
    """

    # -> space is included if it's part of regex ?
    # here, there is NO SPACE after the colon
    # however, each symbol must be separated by a space
    #   a space in your regex means there must be two spaces ?
    in_cfg = ("\G :\WORD \SPACE \G | \\0\n" "\WORD :\w+\n" "\SPACE : ")

    lines = [line.strip() for line in in_cfg.split('\n') if line.strip()]
    mappings = Symbol.create_mappings(lines)
def test_rec():
    d = {'a': Symbol(None)}
    _a = (Symbol('a') + d['a']) | Symbol('')
    d['a'].update(_a)

    assert d['a'].parse('') == ('', True)
    assert d['a'].parse('a') == ('', True)
    assert d['a'].parse('aa') == ('', True)

    d = {'g': Symbol(None)}
    _g = (Symbol('a') + d['g'] + Symbol('b')) | Symbol('')
    d['g'].update(_g)

    assert d['g'].parse('') == ('', True)
    assert d['g'].parse('ab') == ('', 'b')
    assert d['g'].parse('aabb') == ('', 'b')
    assert d['g'].parse('aaabb') == ('', False)
    assert d['g'].parse('aabbb') == ('b', 'b')
def test_create_symbol():
    """
    old test:
        - manual symbol creation
        - manual parsing
    """
    st = "first second third fourth fifth"

    sw = Symbol('\w+')
    ss = Symbol(' ')

    while st:
        st, curr = sw.parse(st)
        print("parsed: {}".format(curr))
        st, space = ss.parse(st)