Exemple #1
0
def extract_alphabet(alphabet, inputdata, fixed_start = False):
    """
    Receives a sequence and an alphabet, 
    returns a list of PositionTokens with all of the parts of the sequence that 
    are a subset of the alphabet
    """
    if not inputdata:
        return []
    base_alphabet = alphabet.alphabet

    if isinstance(inputdata[0], (Token, PositionToken)):
        inputdata = [x.content for x in inputdata]


    lexer = lexer_factory(alphabet, base_alphabet)
    totallen = len(inputdata)
    maxl = totallen
    minl = 1
    if fixed_start:
        max_start = 1
    else:
        max_start = totallen
    result = []
    for i in range(max_start):
        for j in range(i+minl, min(i+maxl, totallen) + 1):
            try:
                lexed = lexer(inputdata[i:j])
                if lexed:
                    result.append((i,j, inputdata[i:j]))
            except:
                continue
    result = filter_subsets(result)
    return [PositionToken(content, None, left, right) for (left, right, content) in result]
Exemple #2
0
    def test_Concept(self):
        red = String("red")
        green = String("green")
        blue = String("blue")
        alphabet = Choice([red, green, blue], ascii_encoding)
        lexer = lexer_factory(alphabet, ascii_encoding)

        def concept_translator_fun(inputtokens):
            result = []
            for token in inputtokens:
                x = str(token)
                if x == "red":
                    result.append("color red")
                elif x == "green":
                    result.append("color green")
                elif x == "blue":
                    result.append("color blue")
                else:
                    raise Exception("%s,%s" % (x, x.__class__.__name__))

            return result

        ct = concept_translator_fun


        self.assertListEqual(ct(lexer("red")), ["color red"])
        red_list = [PositionToken(content=character, gd=ascii_encoding, left=i, right=i+1) for i, character in enumerate("red")]
        self.assertListEqual(ct(lexer(red_list)), ["color red"])
Exemple #3
0
 def testOverlappingLexing(self):
     integer = RegularExpression("^[0123456789]*$")
     date = load_bnf_file("pydsl/contrib/grammar/Date.bnf", {'integer':integer, 'DayOfMonth':load_python_file('pydsl/contrib/grammar/DayOfMonth.py')})
     mydef = Alphabet([integer,date])
     lexer = lexer_factory(mydef)
     self.assertListEqual(lexer("123411/11/2001"), [("1234", integer),("11/11/2001", date)])
     self.assertListEqual(lexer([x for x in "123411/11/2001"]), [("1234", integer),("11/11/2001", date)])
Exemple #4
0
    def test_Concept(self):
        red = Sequence.from_string("red")
        green = Sequence.from_string("green")
        blue = Sequence.from_string("blue")
        alphabet = Choice([red,green,blue])
        lexer = lexer_factory(alphabet)

        def concept_translator_fun(inputtokens):
            result = []
            for x,_ in inputtokens:
                if x == "red" or x == ["r","e","d"]:
                    result.append("color red")
                elif x == "green" or x == ["g","r","e","e","n"]:
                    result.append("color green")
                elif x == "blue" or x == ["b","l","u","e"]:
                    result.append("color blue")
                else:
                    raise Exception("%s,%s" % (x, x.__class__.__name__))

            return result

        ct = concept_translator_fun

        self.assertListEqual(ct(lexer("red")), ["color red"])
        self.assertListEqual(ct(lexer([x for x in "red"])), ["color red"])
Exemple #5
0
    def test_Concept(self):
        red = String("red")
        green = String("green")
        blue = String("blue")
        alphabet = Choice([red, green, blue], ascii_encoding)
        lexer = lexer_factory(alphabet, ascii_encoding)

        def concept_translator_fun(inputtokens):
            result = []
            for token in inputtokens:
                x = str(token)
                if x == "red":
                    result.append("color red")
                elif x == "green":
                    result.append("color green")
                elif x == "blue":
                    result.append("color blue")
                else:
                    raise Exception("%s,%s" % (x, x.__class__.__name__))

            return result

        ct = concept_translator_fun

        self.assertListEqual(ct(lexer("red")), ["color red"])
        red_list = [
            PositionToken(content=character,
                          gd=ascii_encoding,
                          left=i,
                          right=i + 1) for i, character in enumerate("red")
        ]
        self.assertListEqual(ct(lexer(red_list)), ["color red"])
Exemple #6
0
def extract_alphabet(alphabet, inputdata, fixed_start=False):
    """
    Receives a sequence and an alphabet, 
    returns a list of PositionTokens with all of the parts of the sequence that 
    are a subset of the alphabet
    """
    if not inputdata:
        return []
    base_alphabet = alphabet.alphabet

    lexer = lexer_factory(alphabet, base_alphabet)
    totallen = len(inputdata)
    maxl = totallen
    minl = 1
    if fixed_start:
        max_start = 1
    else:
        max_start = totallen
    result = []
    for i in range(max_start):
        for j in range(i + minl, min(i + maxl, totallen) + 1):
            try:
                lexed = lexer(inputdata[i:j])
                if lexed and len(lexed) == 1:
                    result.append((i, j, inputdata[i:j], lexed[0].gd))
                elif lexed:
                    raise Exception
            except:
                continue
    result = filter_subsets(result)
    return [
        PositionToken(content, gd, left, right)
        for (left, right, content, gd) in result
    ]
Exemple #7
0
 def testSimpleLexing(self):
     """Test checker instantiation and call"""
     integer = RegularExpression("^[0123456789]*$")
     date = load_bnf_file("pydsl/contrib/grammar/Date.bnf", {'integer':integer, 'DayOfMonth':load_python_file('pydsl/contrib/grammar/DayOfMonth.py')})
     mydef = Choice([integer, date])
     lexer = lexer_factory(mydef, ascii_encoding)
     self.assertListEqual(lexer("1234"), [Token("1234", integer)])
     self.assertListEqual(lexer([Token(x, ascii_encoding) for x in "1234"]), [Token("1234", integer)])
Exemple #8
0
 def testSimpleLexing(self):
     """Test checker instantiation and call"""
     integer = RegularExpression("^[0123456789]*$")
     date = load_bnf_file("pydsl/contrib/grammar/Date.bnf", {'integer':integer, 'DayOfMonth':load_python_file('pydsl/contrib/grammar/DayOfMonth.py')})
     mydef = Alphabet([integer,date])
     lexer = lexer_factory(mydef)
     self.assertListEqual(lexer("1234"), [(["1","2","3","4"], integer)])
     self.assertListEqual(lexer([x for x in "1234"]), [(["1","2","3","4"], integer)])
Exemple #9
0
 def testEmptyInput(self):
     integer = RegularExpression("^[0123456789]*$")
     date = load_bnf_file(
         "pydsl/contrib/grammar/Date.bnf", {
             'integer':
             integer,
             'DayOfMonth':
             load_python_file('pydsl/contrib/grammar/DayOfMonth.py')
         })
     mydef = Choice([integer, date])
     lexer = lexer_factory(mydef, ascii_encoding)
     self.assertFalse(lexer(""))
Exemple #10
0
    def test_main_case(self):
        input_data = "1+2"
        ascii_lexer = lexer_factory(ascii_encoding, None)
        ascii_tokens = [x for x in ascii_lexer(input_data)]
        self.assertListEqual([str(x) for x in ascii_tokens], ['1', '+', '2'])

        def concept_translator_fun(inputtokens):
            result = []
            for x in inputtokens:
                if str(x) == "1":
                    result.append("one")
                elif str(x) == "2":
                    result.append("two")
                elif str(x) == "+":
                    result.append("addition")
                else:
                    raise Exception(x.__class__.__name__)

            return result

        def to_number(number):
            if number == "one":
                return 1
            if number == "two":
                return 2

        math_expression_concepts = concept_translator_fun(ascii_tokens)
        self.assertListEqual(math_expression_concepts,
                             ['one', 'addition', 'two'])
        grammar_def = [
            "S ::= E",
            "E ::= one addition two",
            "one := String,one",
            "two := String,two",
            "addition := String,addition",
        ]
        from pydsl.file.BNF import strlist_to_production_set
        production_set = strlist_to_production_set(grammar_def, {})
        from pydsl.parser.backtracing import BacktracingErrorRecursiveDescentParser
        rdp = BacktracingErrorRecursiveDescentParser(production_set)
        parse_tree = rdp(math_expression_concepts)
        from pydsl.grammar.symbol import NonTerminalSymbol

        def parse_tree_walker(tree):
            if tree.symbol == NonTerminalSymbol("S"):
                return parse_tree_walker(tree.childlist[0])
            if tree.symbol == NonTerminalSymbol("E"):
                return to_number(tree.childlist[0].symbol.gd) + to_number(
                    tree.childlist[2].symbol.gd)
            raise Exception

        result = parse_tree_walker(parse_tree[0])
        self.assertEqual(result, 3)
Exemple #11
0
    def test_main_case(self):
        input_data = "1+2"
        ascii_lexer = lexer_factory(ascii_encoding)
        ascii_tokens = [x.content for x in ascii_lexer(input_data)]
        self.assertListEqual([str(x) for x in ascii_tokens], ['1', '+', '2'])

        def concept_translator_fun(inputtokens):
            result = []
            for x in inputtokens:
                if x == "1":
                    result.append("one")
                elif x == "2":
                    result.append("two")
                elif x == "+":
                    result.append("addition")
                else:
                    raise Exception(x.__class__.__name__)

            return result
        def to_number(number):
            if number == [x for x in "one"]:
                return 1
            if number == [x for x in "two"]:
                return 2
 
        math_expression_concepts = concept_translator_fun(ascii_tokens)
        self.assertListEqual(math_expression_concepts, ['one', 'addition', 'two'])
        grammar_def = [
                "S ::= E",
                "E ::= one addition two",
                "one := String,one",
                "two := String,two",
                "addition := String,addition",
                ]
        from pydsl.file.BNF import strlist_to_production_set
        production_set = strlist_to_production_set(grammar_def, {})
        from pydsl.parser.backtracing import BacktracingErrorRecursiveDescentParser
        rdp = BacktracingErrorRecursiveDescentParser(production_set)
        parse_tree = rdp(math_expression_concepts)
        from pydsl.grammar.symbol import NonTerminalSymbol
        def parse_tree_walker(tree):
            if tree.symbol == NonTerminalSymbol("S"):
                return parse_tree_walker(tree.childlist[0])
            if tree.symbol == NonTerminalSymbol("E"):
                return to_number(tree.childlist[0].symbol.gd) + to_number(tree.childlist[2].symbol.gd)
            raise Exception
            
        result = parse_tree_walker(parse_tree[0])
        self.assertEqual(result, 3)
Exemple #12
0
 def testSimpleLexing(self):
     """Test checker instantiation and call"""
     integer = RegularExpression("^[0123456789]*$")
     date = load_bnf_file(
         "pydsl/contrib/grammar/Date.bnf", {
             'integer':
             integer,
             'DayOfMonth':
             load_python_file('pydsl/contrib/grammar/DayOfMonth.py')
         })
     mydef = Choice([integer, date])
     lexer = lexer_factory(mydef, ascii_encoding)
     self.assertListEqual(lexer("1234"), [Token("1234", integer)])
     self.assertListEqual(lexer([Token(x, ascii_encoding) for x in "1234"]),
                          [Token("1234", integer)])
Exemple #13
0
 def testOverlappingLexing(self):
     integer = RegularExpression("^[0123456789]*$")
     date = load_bnf_file(
         "pydsl/contrib/grammar/Date.bnf", {
             'integer':
             integer,
             'DayOfMonth':
             load_python_file('pydsl/contrib/grammar/DayOfMonth.py')
         })
     mydef = Choice([integer, date])
     lexer = lexer_factory(mydef, ascii_encoding)
     self.assertListEqual(lexer("123411/11/2001"), [("1234", integer),
                                                    ("11/11/2001", date)])
     self.assertListEqual(lexer([x for x in "123411/11/2001"]),
                          [("1234", integer), ("11/11/2001", date)])
Exemple #14
0
 def testSecondLevelGrammar(self):
     a = String("a")
     b = String("b")
     c = String("c")
     x = String("x")
     y = String("y")
     z = String("z")
     first_level = Choice([a,b,c])
     first_levelb = Choice([x,y,z])
     second_level = Sequence([a,b], base_alphabet=first_level)
     from pydsl.Check import checker_factory
     checker = checker_factory(second_level)
     self.assertTrue(checker([a,b]))
     second_level_alphabet = Choice([first_level, first_levelb]) 
     lexer = lexer_factory(second_level_alphabet, base=first_level+first_levelb)
     self.assertListEqual(lexer("ab"), [("a",first_level),("b",first_level)])
Exemple #15
0
 def testSecondLevelGrammar(self):
     a = String("a")
     b = String("b")
     c = String("c")
     x = String("x")
     y = String("y")
     z = String("z")
     first_level = Choice([a, b, c])
     first_levelb = Choice([x, y, z])
     second_level = Sequence([a, b], base_alphabet=first_level)
     from pydsl.check import checker_factory
     checker = checker_factory(second_level)
     self.assertTrue(checker([a, b]))
     second_level_alphabet = Choice([first_level, first_levelb])
     lexer = lexer_factory(second_level_alphabet,
                           base=first_level + first_levelb)
     self.assertListEqual(
         lexer("ab"), [Token("a", first_level),
                       Token("b", first_level)])
Exemple #16
0
 def __init__(self, bnfgrammar):
     self._lexer = lexer_factory(bnfgrammar.alphabet, ascii_encoding)
     super().__init__(bnfgrammar)
Exemple #17
0
 def testLexer(self):
     """Lexer call"""
     lexer = lexer_factory(productionset1.alphabet, ascii_encoding)
     result = list(lexer(string1))
     self.assertTrue(result)
Exemple #18
0
 def testEmptyInput(self):
     integer = RegularExpression("^[0123456789]*$")
     date = load_bnf_file("pydsl/contrib/grammar/Date.bnf", {'integer':integer, 'DayOfMonth':load_python_file('pydsl/contrib/grammar/DayOfMonth.py')})
     mydef = Alphabet([integer,date])
     lexer = lexer_factory(mydef)
     self.assertFalse(lexer(""))
Exemple #19
0
 def testLexer(self):
     """Lexer call"""
     lexer = lexer_factory(productionset1.alphabet)
     result = list(lexer(string1))
     self.assertTrue(result)
Exemple #20
0
 def __init__(self, bnfgrammar):
     self._lexer = lexer_factory(bnfgrammar.alphabet)
     self._productionset = bnfgrammar