Beispiel #1
0
    def nextToken(self, include_gd=False):
        tree = PositionResultList()  # This is the extract algorithm
        valid_alternatives = []
        for gd in self.alphabet:
            checker = checker_factory(gd)
            for left in range(0, len(self.string)):
                for right in range(left +1, len(self.string) +1 ):
                    if checker.check(self.string[left:right]):
                        valid_alternatives.append((left, right, gd))
        if not valid_alternatives:
            raise Exception("Nothing consumed")
        for left, right, gd in valid_alternatives:
            string = self.string[left:right]
            tree.append(left, right, string, gd, check_position=False)

        right_length_seq = []
        for x in tree.valid_sequences():
            if x[-1]['right'] == len(self.string):
                right_length_seq.append(x)
        if not right_length_seq:
            raise Exception("No sequence found for input %s alphabet %s" % (self.string,self.alphabet))
        for y in sorted(right_length_seq, key=lambda x:len(x))[0]: #Always gets the match with less tokens
            if include_gd:
                yield Token(y['content'], y.get('gd'))
            else:
                yield Token(y['content'], None)
Beispiel #2
0
    def nextToken(self):
        tree = PositionResultList()  # This is the extract algorithm
        valid_alternatives = []
        for gd in self.alphabet:
            checker = checker_factory(gd)
            for left in range(0, len(self.string)):
                if getattr(gd, 'maxsize', None):
                    max_right = left + gd.maxsize + 1
                else:
                    max_right = len(self.string) +1 
                for right in range(left +1, min(max_right, len(self.string) +1)):
                    slice = self.string[left:right]
                    if checker.check(slice):
                        tree.append(left, right, slice, gd, check_position=False)
        if not tree:
            raise Exception("Nothing consumed")

        right_length_seq = []
        valid_sequences = tree.valid_sequences()
        for x in valid_sequences:
            my_list = list(x)
            if my_list[-1]['right'] == len(self.string):
                right_length_seq.append(my_list)
        if not right_length_seq:
            raise Exception("No sequence found for input %s alphabet %s" % (self.string,self.alphabet))
        for y in sorted(right_length_seq, key=lambda x:len(x))[0]: #Always gets the match with less tokens
            yield Token(y['content'], y.get('gd'))
Beispiel #3
0
    def nextToken(self):
        tree = PositionResultList()  # This is the extract algorithm
        valid_alternatives = []
        for gd in self.alphabet:
            checker = checker_factory(gd)
            for left in range(0, len(self.string)):
                if getattr(gd, 'maxsize', None):
                    max_right = left + gd.maxsize + 1
                else:
                    max_right = len(self.string) + 1
                for right in range(left + 1,
                                   min(max_right,
                                       len(self.string) + 1)):
                    slice = self.string[left:right]
                    if checker.check(slice):
                        tree.append(left,
                                    right,
                                    slice,
                                    gd,
                                    check_position=False)
        if not tree:
            raise Exception("Nothing consumed")

        right_length_seq = []
        valid_sequences = tree.valid_sequences()
        for x in valid_sequences:
            my_list = list(x)
            if my_list[-1]['right'] == len(self.string):
                right_length_seq.append(my_list)
        if not right_length_seq:
            raise Exception("No sequence found for input %s alphabet %s" %
                            (self.string, self.alphabet))
        for y in sorted(right_length_seq, key=lambda x: len(x)
                        )[0]:  #Always gets the match with less tokens
            yield Token(y['content'], y.get('gd'))
Beispiel #4
0
 def testMain(self):
     from pydsl.tree import PositionResultList
     seq = PositionResultList()
     seq.append(0,1,".")
     seq.append(1,2,".")
     seq.append(2,3,".")
     seq.append(3,4,".")
     seq.append(4,5,".")
     self.assertEqual(len(seq.valid_sequences()[-1]), 5)
Beispiel #5
0
 def testMain(self):
     from pydsl.tree import PositionResultList
     seq = PositionResultList()
     seq.append(0, 1, ".")
     seq.append(1, 2, ".")
     seq.append(2, 3, ".")
     seq.append(3, 4, ".")
     seq.append(4, 5, ".")
     self.assertEqual(len(seq.valid_sequences()[-1]), 5)
Beispiel #6
0
    def __recursive_parser(self, onlysymbol, data, production, showerrors = False):
        """ Aux function. helps check_word"""
        LOG.debug("__recursive_parser: Begin ")
        if not data:
            return []
        from pydsl.grammar.symbol import TerminalSymbol, NullSymbol, NonTerminalSymbol
        if isinstance(onlysymbol, TerminalSymbol):
            LOG.debug("Iteration: terminalsymbol")
            return self._reduce_terminal(onlysymbol,data[0], showerrors)
        elif isinstance(onlysymbol, NullSymbol):
            return [ParseTree(0, 0, onlysymbol, "")]
        elif isinstance(onlysymbol, NonTerminalSymbol):
            validstack = []
            invalidstack = []
            for alternative in self._productionset.getProductionsBySide(onlysymbol): #Alternative
                alternativetree = PositionResultList()
                alternativeinvalidstack = []
                for symbol in alternative.rightside: # Symbol
                    symbol_success = False
                    for totalpos in alternativetree.right_limit_list(): # Right limit
                        if totalpos >= len(data):
                            continue
                        thisresult =  self.__recursive_parser(symbol, data[totalpos:], alternative, showerrors)
                        if not (thisresult and all(thisresult)):
                            alternativeinvalidstack += [x for x in thisresult if not x]
                            continue
                        symbol_success = True
                        for x in thisresult:
                            x.shift(totalpos)
                            success = alternativetree.append(x.left, x.right, x)
                            if not success:
                                #TODO: Add as an error to the tree or to another place
                                LOG.debug("Discarded symbol :" + str(symbol) + " position:" + str(totalpos))
                            else:
                                LOG.debug("Added symbol :" + str(symbol) + " position:" + str(totalpos))
                    if not symbol_success:
                        LOG.debug("Symbol doesn't work" + str(symbol))
                        break #Try next alternative
                else: # Alternative success (no break happened)
                    invalidstack += alternativeinvalidstack
                for x in alternativetree.valid_sequences():
                    validstack.append(x)
            result = []

            LOG.debug("iteration result collection finished:" + str(validstack))
            for alternative in self._productionset.getProductionsBySide(onlysymbol):
                nullcount = alternative.rightside.count(NullSymbol())
                for results in validstack:
                    nnullresults = 0
                    left = results[0]['left']
                    right = results[-1]['right']
                    nnullresults = len([x for x in results if x['content'].symbol == NullSymbol()])
                    if len(results) - nnullresults != len(alternative.rightside) - nullcount:
                        LOG.debug("Discarded: incorrect number of non null symbols")
                        continue
                    if right > len(data):
                        LOG.debug("Discarded: length mismatch")
                        continue
                    for x in range(min(len(alternative.rightside), len(results))):
                        if results[x]['content'] != alternative.rightside[x]:
                            LOG.debug("Discarded: rule doesn't match partial result")
                            continue
                    childlist = [x['content'] for x in results]
                    allvalid = all([x.valid for x in childlist])
                    if allvalid:
                        newresult = ParseTree(0, right - left, onlysymbol,
                                data[left:right], childlist = childlist)
                        newresult.valid = True
                        result.append(newresult)
            if showerrors and not result:
                erroresult = ParseTree(0,len(data), onlysymbol , data, valid = False)
                for invalid in invalidstack:
                    if invalid.content in production.rightside:
                        erroresult.append(invalid)
                return [erroresult]
            return result
        raise Exception("Unknown symbol:" + str(onlysymbol))
Beispiel #7
0
    def __recursive_parser(self,
                           onlysymbol,
                           data,
                           production,
                           showerrors=False):
        """ Aux function. helps check_word"""
        LOG.debug("__recursive_parser: Begin ")
        if not data:
            return []
        from pydsl.grammar.symbol import TerminalSymbol, NullSymbol, NonTerminalSymbol
        if isinstance(onlysymbol, TerminalSymbol):
            LOG.debug("Iteration: terminalsymbol")
            return self._reduce_terminal(onlysymbol, data[0], showerrors)
        elif isinstance(onlysymbol, NullSymbol):
            return [ParseTree(0, 0, onlysymbol, "")]
        elif isinstance(onlysymbol, NonTerminalSymbol):
            validstack = []
            invalidstack = []
            for alternative in self._productionset.getProductionsBySide(
                    onlysymbol):  #Alternative
                alternativetree = PositionResultList()
                alternativeinvalidstack = []
                for symbol in alternative.rightside:  # Symbol
                    symbol_success = False
                    for totalpos in alternativetree.right_limit_list(
                    ):  # Right limit
                        if totalpos >= len(data):
                            continue
                        thisresult = self.__recursive_parser(
                            symbol, data[totalpos:], alternative, showerrors)
                        if not (thisresult and all(thisresult)):
                            alternativeinvalidstack += [
                                x for x in thisresult if not x
                            ]
                            continue
                        symbol_success = True
                        for x in thisresult:
                            x.shift(totalpos)
                            success = alternativetree.append(
                                x.left, x.right, x)
                            if not success:
                                #TODO: Add as an error to the tree or to another place
                                LOG.debug("Discarded symbol :" + str(symbol) +
                                          " position:" + str(totalpos))
                            else:
                                LOG.debug("Added symbol :" + str(symbol) +
                                          " position:" + str(totalpos))
                    if not symbol_success:
                        LOG.debug("Symbol doesn't work" + str(symbol))
                        break  #Try next alternative
                else:  # Alternative success (no break happened)
                    invalidstack += alternativeinvalidstack
                for x in alternativetree.valid_sequences():
                    validstack.append(x)
            result = []

            LOG.debug("iteration result collection finished:" +
                      str(validstack))
            for alternative in self._productionset.getProductionsBySide(
                    onlysymbol):
                nullcount = alternative.rightside.count(NullSymbol())
                for results in validstack:
                    nnullresults = 0
                    left = results[0]['left']
                    right = results[-1]['right']
                    nnullresults = len([
                        x for x in results
                        if x['content'].symbol == NullSymbol()
                    ])
                    if len(results) - nnullresults != len(
                            alternative.rightside) - nullcount:
                        LOG.debug(
                            "Discarded: incorrect number of non null symbols")
                        continue
                    if right > len(data):
                        LOG.debug("Discarded: length mismatch")
                        continue
                    for x in range(
                            min(len(alternative.rightside), len(results))):
                        if results[x]['content'] != alternative.rightside[x]:
                            LOG.debug(
                                "Discarded: rule doesn't match partial result")
                            continue
                    childlist = [x['content'] for x in results]
                    allvalid = all([x.valid for x in childlist])
                    if allvalid:
                        newresult = ParseTree(0,
                                              right - left,
                                              onlysymbol,
                                              data[left:right],
                                              childlist=childlist)
                        newresult.valid = True
                        result.append(newresult)
            if showerrors and not result:
                erroresult = ParseTree(0,
                                       len(data),
                                       onlysymbol,
                                       data,
                                       valid=False)
                for invalid in invalidstack:
                    if invalid.content in production.rightside:
                        erroresult.append(invalid)
                return [erroresult]
            return result
        raise Exception("Unknown symbol:" + str(onlysymbol))