Esempio n. 1
0
 def testRegularExpressionExtract(self):
     self.maxDiff = None
     gd = RegularExpression('^[0123456789]*$')
     expected_result = [
             PositionToken(content='1', gd=gd, left=3, right=4),
             PositionToken(content='12', gd=gd, left=3, right=5),
             PositionToken(content='123', gd=gd, left=3, right=6),
             PositionToken(content='1234', gd=gd, left=3, right=7),
             PositionToken(content='2', gd=gd, left=4, right=5),
             PositionToken(content='23', gd=gd, left=4, right=6),
             PositionToken(content='234', gd=gd, left=4, right=7),
             PositionToken(content='3', gd=gd, left=5, right=6),
             PositionToken(content='34', gd=gd, left=5, right=7),
             PositionToken(content='4', gd=gd, left=6, right=7)]
     self.assertListEqual(extract(gd,'abc1234abc'), expected_result)
     expected_result = [
             PositionToken(content=['1'], gd=gd, left=3, right=4),
             PositionToken(content=['1','2'], gd=gd, left=3, right=5),
             PositionToken(content=['1','2','3'], gd=gd, left=3, right=6),
             PositionToken(content=['1','2','3','4'], gd=gd, left=3, right=7),
             PositionToken(content=['2'], gd=gd, left=4, right=5),
             PositionToken(content=['2','3'], gd=gd, left=4, right=6),
             PositionToken(content=['2','3','4'], gd=gd, left=4, right=7),
             PositionToken(content=['3'], gd=gd, left=5, right=6),
             PositionToken(content=['3','4'], gd=gd, left=5, right=7),
             PositionToken(content=['4'], gd=gd, left=6, right=7)]
     self.assertListEqual(extract(gd,[Token(x, gd) for x in 'abc1234abc']), expected_result)
     self.assertListEqual(extract(gd,[x for x in 'abc1234abc']), expected_result)
     self.assertRaises(Exception, extract, None)
     self.assertListEqual(extract(gd,''), []) #Empty input
Esempio n. 2
0
 def testEncoding(self):
     ad = ascii_encoding
     self.assertListEqual(extract(ad, ''), [])
     self.assertListEqual(extract(ad, 'a£'), [PositionToken('a', ad, 0, 1)])
     self.assertListEqual(extract(ad, ['a', '£']),
                          [PositionToken(['a'], ad, 0, 1)])
     self.assertRaises(Exception, extract, None)
Esempio n. 3
0
File: lex.py Progetto: nesaro/pydsl
def my_call_back(graph, element):
    gne = graph.node[element]
    if 'parsed' in gne:
        return  # Already parsed
    flat_list = []
    for successor in graph.successors(element):
        if successor not in graph.node or 'parsed' not in graph.node[successor]:
            my_call_back(graph, successor)
        for token in graph.node[successor]['parsed']:
            flat_list.append(token)
    sorted_flat_list = remove_subsets(flat_list)
    lexed_list = []
    prev_right = 0
    for token in sorted_flat_list:
        if prev_right != token.left:
            raise Exception("Non contiguous parsing from sucessors")
        prev_right = token.right
        lexed_list.append(token)
    from pydsl.extract import extract
    gne['parsed'] = extract(element, lexed_list)
Esempio n. 4
0
def my_call_back(graph, element):
    gne = graph.node[element]
    if 'parsed' in gne:
        return  # Already parsed
    flat_list = []
    for successor in graph.successors(element):
        if successor not in graph.node or 'parsed' not in graph.node[successor]:
            my_call_back(graph, successor)
        for token in graph.node[successor]['parsed']:
            flat_list.append(token)
    sorted_flat_list = remove_subsets(flat_list)
    lexed_list = []
    prev_right = 0
    for token in sorted_flat_list:
        if prev_right != token.left:
            raise Exception("Non contiguous parsing from sucessors")
        prev_right = token.right
        lexed_list.append(token)
    from pydsl.extract import extract
    gne['parsed'] = extract(element, lexed_list)
Esempio n. 5
0
def my_call_back(graph, element):
    gne = graph.node[element]
    if 'parsed' in gne:
        return  # Already parsed
    flat_list = []
    for successor in graph.successors(element):
        if successor not in graph.node or 'parsed' not in graph.node[successor]:
            raise Exception("Uninitialized graph %s" % successor)
        for string, gd, left, right in graph.node[successor]['parsed']:
            flat_list.append(PositionToken(string, gd, left, right))
    sorted_flat_list = sorted(flat_list, key=lambda x: x.left) #Orders elements from all sucessors
    sorted_flat_list = remove_subsets(sorted_flat_list)
    lexed_list = []
    prev_right = 0
    for string, gd, left, right in sorted_flat_list:
        if prev_right != left:
            raise Exception("Non contiguous parsing from sucessors")
        prev_right = right
        lexed_list.append(Token(string, gd))
    from pydsl.extract import extract
    gne['parsed'] = extract(element, lexed_list)
Esempio n. 6
0
 def testEncoding(self):
     ad = ascii_encoding
     self.assertListEqual(extract(ad,''), [])
     self.assertListEqual(extract(ad,'a£'), [PositionToken('a', ad, 0,1)])
     self.assertListEqual(extract(ad,['a','£']), [PositionToken(['a'], ad, 0,1)])
     self.assertRaises(Exception, extract, None)