def build_PTA(dataset): # based on: # de la Higuera: Grammatical Inference (2010), p. 239, Algorithm 12.1 labels_available = isinstance(dataset, AnnotatedDataset) name_generator = state_generator() start = next(name_generator) states = { start } alp = dataset.alphabet accept = set() reject = set() tr = collections.defaultdict(lambda: None) for sample in dataset: curr_state = start for symbol in sample[0]: next_state = tr.get((curr_state, symbol), None) if next_state is None: next_state = next(name_generator) states.add(next_state) tr[curr_state, symbol] = next_state curr_state = next_state if labels_available: if sample[1]: accept.add(curr_state) else: reject.add(curr_state) else: accept.add(curr_state) # delta = lambda s, a: tr.get((s, a), None) delta = lambda q, a: tr[q, a] return DFA(alp, states, start, accept, reject, delta, tr)
def build_DFA(self): # observation table must be closed and complete! state_map = dict() name_gen = state_generator() for v in sorted(self.red_set, key=len): unique = True for r in sorted(state_map.keys(), key=len): # check if there exists a compatible red state # in gold algorithm, this can never happen # but in lstar it can if self.states_compatible(r, v): unique = False state_map[v] = state_map[r] if unique: state_map[v] = next(name_gen) states = set(state_map.values()) accept = set() reject = set() start = state_map[tuple()] tr = collections.defaultdict(lambda: None) for r in self.red_set: if self(r, tuple()) == 1: accept.add(state_map[r]) else: reject.add(state_map[r]) for q in self.red_set: for sym in self.alphabet: for u in self.red_set: if self.states_compatible(u, q + (sym, )): tr[state_map[q], sym] = state_map[u] # print states, accept_states, reject_states delta = lambda q, a: tr[q, a] return DFA(set(self.alphabet), states, start, accept, reject, delta)
def write_graphviz(fsa, fd, exclude_labels=False, exclude_states=None): # header fd.write("digraph G {\n") fd.write(" rankdir=LR;\n") id_gen = state_generator() state_id = collections.defaultdict(lambda: next(id_gen)) # content for s in fsa.states: if (exclude_states is None) or (s not in exclude_states): sid = state_id[s] if s in fsa.accept: fd.write(" n%d [label=\"%s\",shape=\"doublecircle\"];\n" % (sid, s)) elif s in fsa.reject: fd.write(" n%d [label=\"%s\",shape=\"circle\"];\n" % (sid, s)) else: fd.write(" n%d [label=\"%s\",shape=\"circle\",style=\"filled\"];\n" % (sid, s)) # dummy start if isinstance(fsa.start, set): for s in fsa.start: sid = state_id[s] fd.write(" START%d [shape=\"point\",color=\"white\",fontcolor=\"white\"];\n" % sid) fd.write(" START%d -> n%d;\n" % (sid, sid)) else: sid = state_id[fsa.start] fd.write(" START%d [shape=\"point\",color=\"white\",fontcolor=\"white\"];\n" % sid) fd.write(" START%d -> n%d;\n" % (sid, sid)) # edges between nodes # first collect all labels for one edge store = collections.defaultdict(lambda: []) for s, sym, ns in fsa.itertransitions(): if (exclude_states is None) or \ (s not in exclude_states and ns not in exclude_states): if sym == epsilon: store[s, ns].append("ε") #greek epsilon else: store[s, ns].append(sym) # then write it for k, v in store.items(): sid = state_id[k[0]] nsid = state_id[k[1]] if exclude_labels: fd.write(" n%d -> n%d;\n" % (sid, nsid)) else: fd.write(" n%d -> n%d [label=\"%s\"];\n" % (sid, nsid, ','.join(sorted(map(str,v))))) # trailer fd.write("}\n")
def write_graphviz(self, path): with open(path, "w") as f: # header f.write("graph G {\n") f.write(" graph [ordering=\"out\"];\n") id_gen = state_generator() nodes_id = {n: next(id_gen) for n in self._parent.keys()} # content for node in self.breadth_first_traverse(): nid = nodes_id[node] if node.state is not None: f.write(" n%d [label=\"%s:%s\"];\n" % (nid, str(node), node.state)) else: f.write(" n%d [label=\"%s\"];\n" % (nid, str(node))) for node in self.breadth_first_traverse(): if not self.is_root(node): f.write(" n%d -- n%d;\n" % (nodes_id[self._parent[node]], nodes_id[node])) # trailer f.write("}\n")
def _build_DFA_from_TSS(alphabet, prefix_set, suffix_set, segment_set, shorts_set): name_gen = state_generator() alp = set(alphabet) start_id = next(name_gen) reject = set() accept = set() tr = collections.defaultdict(lambda: None) state_map = {tuple(): start_id} prefshorts = prefix_set.union(shorts_set) for strng in prefshorts: for substrng in prefixes(strng): if substrng not in state_map: state_map[substrng] = next(name_gen) for strng in segment_set: if strng[1:] not in state_map: state_map[strng[1:]] = next(name_gen) if strng[:-1] not in state_map: state_map[strng[:-1]] = next(name_gen) for strng in prefshorts: for pref, sym, suff in prefix_symbol_suffix(strng): if not pref: tr[start_id, sym[0]] = state_map[sym] else: tr[state_map[pref], sym[0]] = state_map[pref + sym] for strng in segment_set: tr[state_map[strng[:-1]], strng[-1]] = state_map[strng[1:]] for strng in suffix_set.union(shorts_set): accept.add(state_map[strng]) delta = lambda q, a: tr[q, a] return DFA(alp, set(state_map.values()), start_id, accept, reject, delta, tr)
def __init__(self, expression, alphabet=[], syntax="textbook", whitespace=False): self.grammar_def = { "textbook": self._textbook_grammar, "regex": self._regex_grammar } self.expression = expression self.whitespace = whitespace self.alphabet = set(alphabet) self.sgen = state_generator() self.states = set() self.start = None self.accept = None self.tr = collections.defaultdict(set) self.grammar = self.grammar_def[syntax]() self._dfa = None if expression: self.grammar.parseString(expression) self._dfa = self.mindfa()