Ejemplo n.º 1
0
def build_PTA(dataset):
    # based on:
    # de la Higuera: Grammatical Inference (2010), p.  239, Algorithm 12.1
    labels_available = isinstance(dataset, AnnotatedDataset)
    name_generator = state_generator()
    start = next(name_generator)
    states = { start }
    alp = dataset.alphabet
    accept = set()
    reject = set()
    tr = collections.defaultdict(lambda: None)
    for sample in dataset:
        curr_state = start
        for symbol in sample[0]:
            next_state = tr.get((curr_state, symbol), None)
            if next_state is None:
                next_state = next(name_generator)
                states.add(next_state)
                tr[curr_state, symbol] = next_state
            curr_state = next_state
        if labels_available:
            if sample[1]:
                accept.add(curr_state)
            else:
                reject.add(curr_state)
        else:
            accept.add(curr_state)
    # delta = lambda s, a: tr.get((s, a), None)
    delta = lambda q, a: tr[q, a]
    return DFA(alp, states, start, accept, reject, delta, tr)
Ejemplo n.º 2
0
    def build_DFA(self):
        # observation table must be closed and complete!
        state_map = dict()
        name_gen = state_generator()
        for v in sorted(self.red_set, key=len):
            unique = True
            for r in sorted(state_map.keys(), key=len):
                # check if there exists a compatible red state
                # in gold algorithm, this can never happen
                # but in lstar it can
                if self.states_compatible(r, v):
                    unique = False
                    state_map[v] = state_map[r]
            if unique:
                state_map[v] = next(name_gen)
        states = set(state_map.values())
        accept = set()
        reject = set()
        start = state_map[tuple()]
        tr = collections.defaultdict(lambda: None)
        for r in self.red_set:
            if self(r, tuple()) == 1:
                accept.add(state_map[r])
            else:
                reject.add(state_map[r])

        for q in self.red_set:
            for sym in self.alphabet:
                for u in self.red_set:
                    if self.states_compatible(u, q + (sym, )):
                        tr[state_map[q], sym] = state_map[u]
        # print states, accept_states, reject_states
        delta = lambda q, a: tr[q, a]
        return DFA(set(self.alphabet), states, start, accept, reject, delta)
Ejemplo n.º 3
0
def write_graphviz(fsa, fd, exclude_labels=False, exclude_states=None):
    # header
    fd.write("digraph G {\n")
    fd.write("  rankdir=LR;\n")
    id_gen = state_generator()
    state_id = collections.defaultdict(lambda: next(id_gen))

    # content
    for s in fsa.states:
        if (exclude_states is None) or (s not in exclude_states):
            sid = state_id[s]
            if s in fsa.accept:
                fd.write("  n%d [label=\"%s\",shape=\"doublecircle\"];\n" % (sid, s))
            elif s in fsa.reject:
                fd.write("  n%d [label=\"%s\",shape=\"circle\"];\n" % (sid, s))
            else:
                fd.write("  n%d [label=\"%s\",shape=\"circle\",style=\"filled\"];\n" % (sid, s))

    # dummy start
    if isinstance(fsa.start, set):
        for s in fsa.start:
            sid = state_id[s]
            fd.write("  START%d [shape=\"point\",color=\"white\",fontcolor=\"white\"];\n" % sid)
            fd.write("  START%d -> n%d;\n" % (sid, sid))
    else:
        sid = state_id[fsa.start]
        fd.write("  START%d [shape=\"point\",color=\"white\",fontcolor=\"white\"];\n" % sid)
        fd.write("  START%d -> n%d;\n" % (sid, sid))

    # edges between nodes
    # first collect all labels for one edge
    store = collections.defaultdict(lambda: [])
    for s, sym, ns in fsa.itertransitions():
        if (exclude_states is None) or \
        (s not in exclude_states and ns not in exclude_states):
            if sym == epsilon:
                store[s, ns].append("ε")   #greek epsilon
            else:
                store[s, ns].append(sym)
    # then write it
    for k, v in store.items():
        sid = state_id[k[0]]
        nsid = state_id[k[1]]
        if exclude_labels:
            fd.write("  n%d -> n%d;\n" % (sid, nsid))
        else:
            fd.write("  n%d -> n%d [label=\"%s\"];\n" % (sid, nsid,
                                                 ','.join(sorted(map(str,v)))))
    # trailer        
    fd.write("}\n")
Ejemplo n.º 4
0
 def write_graphviz(self, path):
     with open(path, "w") as f:
         # header
         f.write("graph G {\n")
         f.write("  graph [ordering=\"out\"];\n")
         id_gen = state_generator()
         nodes_id = {n: next(id_gen) for n in self._parent.keys()}
         # content
         for node in self.breadth_first_traverse():
             nid = nodes_id[node]
             if node.state is not None:
                 f.write("  n%d [label=\"%s:%s\"];\n" %
                         (nid, str(node), node.state))
             else:
                 f.write("  n%d [label=\"%s\"];\n" % (nid, str(node)))
         for node in self.breadth_first_traverse():
             if not self.is_root(node):
                 f.write("  n%d -- n%d;\n" %
                         (nodes_id[self._parent[node]], nodes_id[node]))
         # trailer
         f.write("}\n")
Ejemplo n.º 5
0
def _build_DFA_from_TSS(alphabet, prefix_set, suffix_set, segment_set,
                        shorts_set):
    name_gen = state_generator()
    alp = set(alphabet)
    start_id = next(name_gen)
    reject = set()
    accept = set()
    tr = collections.defaultdict(lambda: None)
    state_map = {tuple(): start_id}

    prefshorts = prefix_set.union(shorts_set)

    for strng in prefshorts:
        for substrng in prefixes(strng):
            if substrng not in state_map:
                state_map[substrng] = next(name_gen)

    for strng in segment_set:
        if strng[1:] not in state_map:
            state_map[strng[1:]] = next(name_gen)
        if strng[:-1] not in state_map:
            state_map[strng[:-1]] = next(name_gen)

    for strng in prefshorts:
        for pref, sym, suff in prefix_symbol_suffix(strng):
            if not pref:
                tr[start_id, sym[0]] = state_map[sym]
            else:
                tr[state_map[pref], sym[0]] = state_map[pref + sym]

    for strng in segment_set:
        tr[state_map[strng[:-1]], strng[-1]] = state_map[strng[1:]]

    for strng in suffix_set.union(shorts_set):
        accept.add(state_map[strng])

    delta = lambda q, a: tr[q, a]
    return DFA(alp, set(state_map.values()), start_id, accept, reject, delta,
               tr)
Ejemplo n.º 6
0
    def __init__(self,
                 expression,
                 alphabet=[],
                 syntax="textbook",
                 whitespace=False):
        self.grammar_def = {
            "textbook": self._textbook_grammar,
            "regex": self._regex_grammar
        }
        self.expression = expression
        self.whitespace = whitespace
        self.alphabet = set(alphabet)
        self.sgen = state_generator()
        self.states = set()
        self.start = None
        self.accept = None
        self.tr = collections.defaultdict(set)

        self.grammar = self.grammar_def[syntax]()

        self._dfa = None
        if expression:
            self.grammar.parseString(expression)
            self._dfa = self.mindfa()