Exemple #1
0
    def nextToken(self, include_gd=False):
        tree = PositionResultList()  # This is the extract algorithm
        valid_alternatives = []
        for gd in self.alphabet:
            checker = checker_factory(gd)
            for left in range(0, len(self.string)):
                for right in range(left +1, len(self.string) +1 ):
                    if checker.check(self.string[left:right]):
                        valid_alternatives.append((left, right, gd))
        if not valid_alternatives:
            raise Exception("Nothing consumed")
        for left, right, gd in valid_alternatives:
            string = self.string[left:right]
            tree.append(left, right, string, gd, check_position=False)

        right_length_seq = []
        for x in tree.valid_sequences():
            if x[-1]['right'] == len(self.string):
                right_length_seq.append(x)
        if not right_length_seq:
            raise Exception("No sequence found for input %s alphabet %s" % (self.string,self.alphabet))
        for y in sorted(right_length_seq, key=lambda x:len(x))[0]: #Always gets the match with less tokens
            if include_gd:
                yield Token(y['content'], y.get('gd'))
            else:
                yield Token(y['content'], None)
Exemple #2
0
 def _load_checker(originaldic):
     """Converts {"channelname","type"} into {"channelname",instance}"""
     result = {}
     for key in originaldic:
         from pydsl.Check import checker_factory
         result[key] = checker_factory(load(str(originaldic[key]))) #FIXME: load is no longer available
     return result
Exemple #3
0
    def nextToken(self, include_gd=False):
        tree = PositionResultList()  # This is the extract algorithm
        valid_alternatives = []
        for gd in self.alphabet:
            checker = checker_factory(gd)
            for left in range(0, len(self.string)):
                for right in range(left + 1, len(self.string) + 1):
                    if checker.check(self.string[left:right]):
                        valid_alternatives.append((left, right, gd))
        if not valid_alternatives:
            raise Exception("Nothing consumed")
        for left, right, gd in valid_alternatives:
            string = self.string[left:right]
            tree.append(left, right, string, gd, check_position=False)

        right_length_seq = []
        for x in tree.valid_sequences():
            if x[-1]['right'] == len(self.string):
                right_length_seq.append(x)
        if not right_length_seq:
            raise Exception("No sequence found for input %s alphabet %s" %
                            (self.string, self.alphabet))
        for y in sorted(right_length_seq, key=lambda x: len(x)
                        )[0]:  #Always gets the match with less tokens
            if include_gd:
                yield Token(y['content'], y.get('gd'))
            else:
                yield Token(y['content'], None)
Exemple #4
0
def extract(grammar, inputdata, fixed_start=False):
    """
    Receives a sequence and a grammar, 
    returns a list of PositionTokens with all of the parts of the sequence that 
    are recognized by the grammar
    """
    if not inputdata:
        return []
    checker = checker_factory(grammar)

    if isinstance(inputdata[0], (Token, PositionToken)):
        inputdata = [x.content for x in inputdata]

    totallen = len(inputdata)
    try:
        maxl = grammar.maxsize or totallen
    except NotImplementedError:
        maxl = totallen
    try:
        #minl = grammar.minsize #FIXME: It won't work with incompatible alphabets
        minl = 1
    except NotImplementedError:
        minl = 1
    if fixed_start:
        max_start = 1
    else:
        max_start = totallen
    result = []
    for i in range(max_start):
        for j in range(i + minl, min(i + maxl, totallen) + 1):
            check = checker.check(inputdata[i:j])
            if check:
                result.append(PositionToken(inputdata[i:j], None, i, j))
    return result
Exemple #5
0
 def testLogLine(self):
     repository = {
         'space':
         RegularExpression("^ $"),
         'integer':
         RegularExpression("^[0123456789]*$"),
         'ipv4':
         RegularExpression(
             "^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$"),
         'characters':
         RegularExpression("^[A-z]+$")
     }
     grammar = load_bnf_file("pydsl/contrib/grammar/logline.bnf",
                             repository)
     checker = checker_factory(grammar)
     original_string = "1.2.3.4 - - [1/1/2003:11:11:11 +2] \"GET\" 1 1 \"referer\" \"useragent\""
     tokenized = [
         x.content
         for x in lex(grammar.alphabet, ascii_encoding, original_string)
     ]
     self.assertTrue(checker.check(tokenized))
     self.assertFalse(
         checker.check(
             "1.2.3.4 - - [1/1/2003:11:11:11 +2] \"GOT\" 1 1 \"referer\" \"useragent\""
         ))
Exemple #6
0
 def __init__(self, rules, root_rule, repository={}):
     import parsley
     Grammar.__init__(self)
     repo = {}
     for k, v in repository.items():
         repo[k] = (v, checker_factory(v))[isinstance(v, Grammar)]
     self.grammar = parsley.makeGrammar(rules, repo)
     self.root_rule = root_rule
Exemple #7
0
 def __init__(self, rules, root_rule, repository={}):
     import parsley
     Grammar.__init__(self)
     repo={}
     for k, v in repository.items():
         repo[k]=(v, checker_factory(v))[isinstance(v, Grammar)]
     self.grammar=parsley.makeGrammar(rules, repo)
     self.root_rule=root_rule 
Exemple #8
0
 def testFileLoader(self):
     repository = {'DayOfMonth':load_python_file('pydsl/contrib/grammar/DayOfMonth.py')} #DayOfMonth loaded as checker
     G=load_parsley_grammar_file("pydsl/contrib/grammar/Date.parsley", "expr", repository)
     C=checker_factory(G)
     T=translator_factory(G)
     self.assertTrue(C("2/4/12"))
     self.assertEqual(T("2/4/12"),(2,4,12))
     self.assertRaises(parsley.ParseError,T, "40/4/12")
Exemple #9
0
 def testEcho(self):
     from pydsl.Translator import translate, PythonTranslator
     from pydsl.Grammar.Definition import RegularExpression
     from pydsl.Check import checker_factory
     cstring = checker_factory(RegularExpression('.*'))
     def function(my_input):
         return my_input
     pt = PythonTranslator({'my_input':cstring}, {'output':cstring}, function)
     self.assertEqual(translate(pt,{'my_input':"1234"}),"1234")
Exemple #10
0
 def __init__(self, rules, root_rule="expr", repository=None):
     import parsley
     Grammar.__init__(self)
     repo=dict(repository or {})
     for key in repo:
         if isinstance(repo[key], Grammar):
             repo[key] = checker_factory(repo[key])
     self.grammar=parsley.makeGrammar(rules, repo)
     self.root_rule=root_rule 
Exemple #11
0
 def testLogLine(self):
     repository = {'space':RegularExpression("^ $"), 
             'integer':RegularExpression("^[0123456789]*$"),
             'ipv4':RegularExpression("^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$"),
             'characters':RegularExpression("^[A-z]+$")}
     grammar = load_bnf_file("pydsl/contrib/grammar/logline.bnf", repository)
     checker = checker_factory(grammar)
     self.assertTrue(checker.check("1.2.3.4 - - [1/1/2003:11:11:11 +2] \"GET\" 1 1 \"referer\" \"useragent\""))
     self.assertFalse(checker.check("1.2.3.4 - - [1/1/2003:11:11:11 +2] \"GOT\" 1 1 \"referer\" \"useragent\""))
Exemple #12
0
 def testChecker(self):
     alphabet = GrammarCollection([self.integer,self.date])
     checker = checker_factory(alphabet)
     self.assertTrue(checker.check("1234"))
     self.assertTrue(checker.check([x for x in "1234"]))
     self.assertFalse(checker.check("11/11/1991")) #Non tokenized input
     self.assertFalse(checker.check([x for x in "11/11/1991"])) #Non tokenized input
     self.assertTrue(checker.check(["11","/","11","/","1991"])) #tokenized input
     self.assertFalse(checker.check("bcdf"))
     self.assertFalse(checker.check([x for x in "bcdf"]))
Exemple #13
0
 def testLogLine(self):
     repository = {'space':RegularExpression("^ $"), 
             'integer':RegularExpression("^[0123456789]*$"),
             'ipv4':RegularExpression("^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$"),
             'characters':RegularExpression("^[A-z]+$")}
     grammar = load_bnf_file("pydsl/contrib/grammar/logline.bnf", repository)
     checker = checker_factory(grammar)
     original_string = "1.2.3.4 - - [1/1/2003:11:11:11 +2] \"GET\" 1 1 \"referer\" \"useragent\""
     tokenized = [x.content for x in lex(grammar.alphabet, ascii_encoding, original_string)]
     self.assertTrue(checker.check(tokenized))
     self.assertFalse(checker.check("1.2.3.4 - - [1/1/2003:11:11:11 +2] \"GOT\" 1 1 \"referer\" \"useragent\""))
Exemple #14
0
 def testFileLoader(self):
     import parsley
     from pydsl.File.Parsley import load_parsley_grammar_file
     repository = {
         'DayOfMonth':
         load_python_file('pydsl/contrib/grammar/DayOfMonth.py')
     }  #DayOfMonth loaded as checker
     G = load_parsley_grammar_file("pydsl/contrib/grammar/Date.parsley",
                                   "expr", repository)
     C = checker_factory(G)
     T = translator_factory(G)
     self.assertTrue(C("2/4/12"))
     self.assertEqual(T("2/4/12"), (2, 4, 12))
     self.assertRaises(parsley.ParseError, T, "40/4/12")
Exemple #15
0
 def testSecondLevelGrammar(self):
     a = String("a")
     b = String("b")
     c = String("c")
     x = String("x")
     y = String("y")
     z = String("z")
     first_level = Choice([a,b,c])
     first_levelb = Choice([x,y,z])
     second_level = Sequence([a,b], base_alphabet=first_level)
     from pydsl.Check import checker_factory
     checker = checker_factory(second_level)
     self.assertTrue(checker([a,b]))
     second_level_alphabet = Choice([first_level, first_levelb]) 
     lexer = lexer_factory(second_level_alphabet, base=first_level+first_levelb)
     self.assertListEqual(lexer("ab"), [("a",first_level),("b",first_level)])
Exemple #16
0
 def testSecondLevelGrammar(self):
     a = String("a")
     b = String("b")
     c = String("c")
     x = String("x")
     y = String("y")
     z = String("z")
     first_level = Choice([a, b, c])
     first_levelb = Choice([x, y, z])
     second_level = Sequence([a, b], base_alphabet=first_level)
     from pydsl.Check import checker_factory
     checker = checker_factory(second_level)
     self.assertTrue(checker([a, b]))
     second_level_alphabet = Choice([first_level, first_levelb])
     lexer = lexer_factory(second_level_alphabet,
                           base=first_level + first_levelb)
     self.assertListEqual(lexer("ab"), [("a", first_level),
                                        ("b", first_level)])
Exemple #17
0
 def nextToken(self, include_gd=False):
     best_right = 0
     best_gd = None
     for gd in self.alphabet:
         checker = checker_factory(gd)
         left = self.index
         for right in range(left + 1, len(self.string) + 1):
             if checker.check(self.string[left:right]):  #TODO: Use match
                 if right > best_right:
                     best_right = right
                     best_gd = gd
     if not best_gd:
         raise Exception("Nothing consumed")
     if include_gd:
         result = self.string[self.index:best_right], best_gd
     else:
         result = self.string[self.index:best_right]
     self.index = right
     return result
Exemple #18
0
 def nextToken(self, include_gd=False):
     best_right = 0
     best_gd = None
     for gd in self.alphabet:
         checker = checker_factory(gd)
         left = self.index
         for right in range(left +1, len(self.string) +1):
             if checker.check(self.string[left:right]): #TODO: Use match
                 if right > best_right:
                     best_right = right
                     best_gd = gd
     if not best_gd:
         raise Exception("Nothing consumed")
     if include_gd:
         result = self.string[self.index:best_right], best_gd
     else:
         result = self.string[self.index:best_right]
     self.index = right
     return result
Exemple #19
0
def extract(grammar, inputdata, fixed_start = False, return_first=False):
    """
    Receives a sequence and a grammar, 
    returns a list of PositionTokens with all of the parts of the sequence that 
    are recognized by the grammar
    """
    if not inputdata:
        return []
    checker = checker_factory(grammar)

    if isinstance(inputdata[0], (Token, PositionToken)):
        inputdata = [x.content for x in inputdata]

    totallen = len(inputdata)
    try:
        maxl = grammar.maxsize or totallen
    except NotImplementedError:
        maxl = totallen
    try:
        #minl = grammar.minsize #FIXME: It won't work with incompatible alphabets
        minl = 1
    except NotImplementedError:
        minl = 1
    if fixed_start:
        max_start = 1
    else:
        max_start = totallen
    result = []
    for i in range(max_start):
        for j in range(i+minl, min(i+maxl, totallen) + 1):
            check = checker.check(inputdata[i:j])
            if check:
                this_pt = PositionToken(inputdata[i:j], None, i, j)
                if return_first:
                    return this_pt
                result.append(this_pt)
    return result
Exemple #20
0
def extract(grammar, inputdata, fixed_start = False):
    """Extract every slice of the input data that belongs to the Grammar Definition"""
    checker = checker_factory(grammar)
    totallen = len(inputdata)
    try:
        maxl = grammar.maxsize or totallen
    except NotImplementedError:
        maxl = totallen
    try:
        #minl = grammar.minsize #FIXME: It won't work with incompatible alphabets
        minl = 1
    except NotImplementedError:
        minl = 1
    if fixed_start:
        max_start = 1
    else:
        max_start = totallen
    result = []
    for i in range(max_start):
        for j in range(i+minl, min(i+maxl, totallen) + 1):
            check = checker.check(inputdata[i:j])
            if check:
                result.append((i,j, inputdata[i:j]))
    return result
Exemple #21
0
 def testChecker(self):
     checker = checker_factory(self.alphabet)
     self.assertTrue(checker.check("1234"))
     self.assertTrue(checker.check("11/11/1991"))
     self.assertFalse(checker.check("bcdf"))