def makeCountNFA(count, nfa): s = FA.gensym() ret = FA.Nfa(states=[s], alphabet=[], transitions={}, start=s, ends=[s]) for i in range(count): ret = ret.concatenate(nfa) return ret
def concatenate(self, other): start = self.start al = self.al + [a for a in other.al if a not in self.al] trans = self.trans otn = {} states = self.states for state in other.states: if state in self.states: c = FA.gensym() while c in self.states: c = FA.gensym() else: c = state states.append(c) otn[state] = c ends = [otn[end] for end in other.ends] for state in other.states: ns = otn[state] if ns not in trans: trans[ns] = {} if state in other.trans: for sym, finstates in other.trans[state].items(): if sym not in trans[ns]: trans[ns][sym] = [] for finstate in finstates: nfs = otn[finstate] if nfs not in trans[ns][sym]: trans[ns][sym].append(nfs) nos = otn[other.start] for f in self.ends: if f not in trans: trans[f] = {} if "" not in trans[f]: trans[f][""] = [] if nos not in trans[f][""]: trans[f][""].append(nos) return IterNfa(states, al, trans, start, ends)
def union(self, other): al = self.al + [a for a in other.al if a not in self.al] otn = {} states = self.states for state in other.states: if state in self.states: c = FA.gensym() while c in self.states: c = FA.gensym() else: c = state states.append(c) otn[state] = c ends = self.ends + [otn[f] for f in other.ends] start = 0 while start in states: start += 1 states.append(start) trans = self.trans for state in other.states: ns = otn[state] if ns not in trans: trans[ns] = {} if state in other.trans: for sym, finstates in other.trans[state].items(): if sym not in trans[ns]: trans[ns][sym] = [] for finstate in finstates: nfs = otn[finstate] if nfs not in trans[ns][sym]: trans[ns][sym].append(nfs) trans[start] = {"": [self.start, otn[other.start]]} return IterNfa(states, al, trans, start, ends)
def star(self): trans = self.trans al = self.al states = self.states start = 0 while start in states: start += 1 start = FA.gensym() while start in self.states: start = FA.gensym() ends = [start] for end in self.ends: if end not in trans: trans[end] = {} if "" not in trans[end]: trans[end][""] = [] if start not in trans[end][""]: trans[end][""].append(start) trans[start] = {"": [self.start]} if start not in states: states.append(start) if start not in ends: ends.append(start) return IterNfa(states=states, alphabet=al, transitions=trans, start=start, ends=ends)
def tonfa(self): def makeCountNFA(count, nfa): s = FA.gensym() ret = FA.Nfa(states=[s], alphabet=[], transitions={}, start=s, ends=[s]) for i in range(count): ret = ret.concatenate(nfa) return ret if self.type == self.CHAR_SECTION: return FA.Nfa(states=[0, 1], alphabet=[self.content], start=0, ends=[1], transitions={0: { self.content: [1] }}) elif self.type == self.SET_SECTION: if len(self.content) == 0: raise ValueError( "Set Section is given an empty set of regexs") else: unionreg = self.content[0].tonfa() for section in self.content[1:]: unionreg = unionreg.union(section.tonfa()) return unionreg elif self.type == self.SEQUENCE_SECTION: if len(self.content) == 0: raise ValueError( "Sequence Section is given an empty set of regexs") else: concatreg = self.content[0].tonfa() for section in self.content[1:]: concatreg = concatreg.concatenate(section.tonfa()) return concatreg elif self.type == self.STAR_SECTION: if self.content == None: raise ValueError("Star Section has no preceding regex") else: return self.content.tonfa().copy().star() elif self.type == self.PLUS_SECTION: if self.content == None: raise ValueError("Plus Section has no preceding regex") else: return self.content.tonfa().concatenate( self.content.tonfa().star()) elif self.type == self.QUESTION_SECTION: if self.content == None: raise ValueError("Plus Section has no preceding regex") else: s = FA.gensym() return self.content.tonfa().union( FA.Nfa(states=[s], alphabet=[], transitions={}, start=s, ends=[s])) elif self.type == self.REPEAT_SECTION: if self.content == None: raise ValueError("Repeat Section has no preceding regex") if self.andmore == None: raise ValueError("Repeat Section has no andmore arg") if self.start == None: raise ValueError( "Repeat Section has no repeat start number") if self.start <= 0: raise ValueError( "Repeat Section has an invalid repeat start number") if self.end is not None and self.end <= 0: raise ValueError( "Repeat Section has an invalid repeat start number") if self.end == None: # we only have a start if self.andmore: # we want more than that number of repetitions cnfa = self.content.tonfa() repnfa = makeCountNFA(self.start, cnfa) return repnfa.concatenate(cnfa.star()) else: # we only want that number of repetitions cnfa = self.content.tonfa() return makeCountNFA(self.start, cnfa) else: s = FA.gensym() ret = FA.Nfa(states=[s], alphabet=[], transitions={}, start=s, ends=[s]) cnfa = self.content.tonfa() for i in range(self.start, self.end + 1): repeatnfa = makeCountNFA(i, cnfa) ret = ret.union(repeatnfa) return ret else: raise RuntimeError("Unknown Section type: ", self.type)