def parse_table(name, table, subsets):
     lines = table.split('\n')
     if len(lines) < 4:
         raise ValueError,\
         "Rule %s has too few lines to be an FSA table." % name
     pairs1 = lines[1].strip().split()
     pairs2 = lines[2].strip().split()
     if len(pairs1) != len(pairs2):
         raise ValueError,\
         "Rule %s has pair definitions that don't line up." % name
     pairs = [KimmoPair(p1, p2) for p1, p2 in zip(pairs1, pairs2)]
     finals = []
     fsa = FSA()
     for line in lines[3:]:
         line = line.strip()
         if not line: continue
         groups = re.match(r'(\w+)(\.|:)\s*(.*)', line)
         if groups is None:
             raise ValueError,\
             "Can't parse this line of the state table for rule %s:\n%s"\
             % (name, line)
         state, char, morestates = groups.groups()
         if fsa.start() == 0: fsa.set_start(state)
         if char == ':': finals.append(state)
         fsa.add_state(state)
         morestates = morestates.split()
         if len(morestates) != len(pairs):
             raise ValueError,\
             "Rule %s has a row of the wrong length:\n%s\ngot %d items, should be %d"\
             % (name, line, len(morestates), len(pairs))
         for pair, nextstate in zip(pairs, morestates):
             fsa.insert_safe(state, pair, nextstate)
     fsa.set_final(finals)
     return KimmoFSARule(name, fsa, subsets)
    def from_dfa_dict(name, states, subsets):
        fsa = FSA()
        pairs = set([KimmoPair.make('@')])
        for (statename, trans) in states.items():
            for label in trans:
                if label != 'others':
                    pairs.add(KimmoPair.make(label))
        for (statename, trans) in states.items():
            parts = statename.split()
            source = parts[-1]
            if not parts[0].startswith('rej'):
                fsa.add_final(source)

            if fsa.start() == 0 and source in ['begin', 'Begin', '1', 1]:
                fsa.set_start(source)
            if source in ['start', 'Start']:
                fsa.set_start(source)

            used_pairs = set()
            for label in trans:
                if label != 'others':
                    used_pairs.add(KimmoPair.make(label))
            for label, target in trans.items():
                if label.lower() == 'others':
                    fsa.insert_safe(source, KimmoPair.make('@'), target)
                    for pair in pairs.difference(used_pairs):
                        fsa.insert_safe(source, pair, target)
                else:
                    fsa.insert_safe(source, KimmoPair.make(label), target)
        return KimmoFSARule(name, fsa, subsets)
Esempio n. 3
0
 def parse_table(name, table, subsets):
     lines = table.split('\n')
     if len(lines) < 4:
         raise ValueError,\
         "Rule %s has too few lines to be an FSA table." % name
     pairs1 = lines[1].strip().split()
     pairs2 = lines[2].strip().split()
     if len(pairs1) != len(pairs2):
         raise ValueError,\
         "Rule %s has pair definitions that don't line up." % name
     pairs = [KimmoPair(p1, p2) for p1, p2 in zip(pairs1, pairs2)]
     finals = []
     fsa = FSA()
     for line in lines[3:]:
         line = line.strip()
         if not line: continue
         groups = re.match(r'(\w+)(\.|:)\s*(.*)', line)
         if groups is None:
             raise ValueError,\
             "Can't parse this line of the state table for rule %s:\n%s"\
             % (name, line)
         state, char, morestates = groups.groups()
         if fsa.start() == 0: fsa.set_start(state)
         if char == ':': finals.append(state)
         fsa.add_state(state)
         morestates = morestates.split()
         if len(morestates) != len(pairs):
             raise ValueError,\
             "Rule %s has a row of the wrong length:\n%s\ngot %d items, should be %d"\
             % (name, line, len(morestates), len(pairs))
         for pair, nextstate in zip(pairs, morestates):
             fsa.insert_safe(state, pair, nextstate)
     fsa.set_final(finals)
     return KimmoFSARule(name, fsa, subsets)
Esempio n. 4
0
 def from_dfa_dict(name, states, subsets):
     fsa = FSA()
     pairs = set([KimmoPair.make('@')])
     for (statename, trans) in states.items():
         for label in trans:
             if label != 'others':
                 pairs.add(KimmoPair.make(label))
     for (statename, trans) in states.items():
         parts = statename.split()
         source = parts[-1]
         if not parts[0].startswith('rej'):
             fsa.add_final(source)
         
         if fsa.start() == 0 and source in ['begin', 'Begin', '1', 1]:
             fsa.set_start(source)
         if source in ['start', 'Start']:
             fsa.set_start(source)
             
         used_pairs = set()
         for label in trans:
             if label != 'others':
                 used_pairs.add(KimmoPair.make(label))
         for label, target in trans.items():
             if label.lower() == 'others':
                 fsa.insert_safe(source, KimmoPair.make('@'), target)
                 for pair in pairs.difference(used_pairs):
                     fsa.insert_safe(source, pair, target)
             else:
                 fsa.insert_safe(source, KimmoPair.make(label), target)
     return KimmoFSARule(name, fsa, subsets)
    def _parse_context(self, tokens, i, reverse):
        (j, tree) = self._parse_list(tokens, i)
        if j == i: return (i, None)

        sigma = set()
        self._collect_alphabet(tree, sigma)
        fsa = FSA(sigma)
        final_state = self._build_fsa(fsa, fsa.start(), tree, reverse)
        fsa.set_final([final_state])
        #fsa.pp()
        dfa = fsa.dfa()
        #dfa.pp()
        dfa.prune()
        #dfa.pp()
        return (j, dfa)
Esempio n. 6
0
    def _parse_context(self, tokens, i, reverse):
        (j, tree) = self._parse_list(tokens, i)
        if j == i: return (i, None)

        sigma = set()
        self._collect_alphabet(tree, sigma)
        fsa = FSA(sigma)
        final_state = self._build_fsa(fsa, fsa.start(), tree, reverse)
        fsa.set_final([final_state])
        #fsa.pp()
        dfa = fsa.dfa()
        #dfa.pp()
        dfa.prune()
        #dfa.pp()
        return (j, dfa)