def gen_delta_table(nr, delta, no_states, qr, direction=True): no_tokens = len(qtree.extract_token_set(qr)) + 1 # all known tokens plus the 'unknown' token offset = 0 offset_map = dict() set_table = list() offset_table = list() delta_table = list() for q, t in enumerate_inputs(no_states, qr): target_set = tuple(sorted(list(delta(q, t, direction)))) if target_set in offset_map: offset_table.append(offset_map[target_set]) else: offset_table.append(offset) offset_map[target_set] = offset set_table.extend([len(target_set)] + list(target_set)) offset += len(target_set) + 1 delta_table.append(no_states) # number of states delta_table.append(no_tokens) # number of tokens delta_table.append(nr.state_id) # start state delta_table.extend(offset_table) # offset table delta_table.extend(set_table) # table containing the sets assert len(delta_table) == 3 + no_states * no_tokens + len(set_table) return delta_table
def print_delta_table(delta_table, qr): """ only used for sanity checking """ tokens = sorted(list(qtree.extract_token_set(qr))) tokens.append('[u]') token_map = dict(zip( range(len(tokens)), tokens )) no_states = delta_table[0] no_tokens = delta_table[1] offset_table = delta_table[3 : 3 + no_states * no_tokens] set_table = delta_table[3+no_states * no_tokens : len(delta_table)] print len(offset_table), offset_table print set_table for (q, t) in ((i,j) for i in xrange(no_states) for j in xrange(no_tokens)): print q, t target_offset = offset_table[q * no_tokens + t] target_set_length = set_table[target_offset] target_set = set_table[target_offset+1 : target_offset+1+target_set_length] print "({0}, {1}) -> {2}".format(q, t, target_set)
def gen_delta_table(nr, delta, no_states, qr, direction=True): no_tokens = len(qtree.extract_token_set(qr))+1 # all known tokens plus the 'unknown' token offset = 0 offset_map = dict() set_table = list() offset_table = list() delta_table = list() for q, t in enumerate_inputs(no_states, qr): target_set = tuple(sorted(list(delta(q, t, direction)))) if target_set in offset_map: offset_table.append(offset_map[target_set]) else: offset_table.append(offset) offset_map[target_set] = offset set_table.extend([len(target_set)] + list(target_set)) offset += len(target_set) + 1 delta_table.append(no_states) # number of states delta_table.append(no_tokens) # number of tokens delta_table.append(nr.state_id) # start state delta_table.extend(offset_table) # offset table delta_table.extend(set_table) # table containing the sets assert len(delta_table) == 3 + no_states * no_tokens + len(set_table) return delta_table
def qtree2nfa(qt, alphabet=None): if alphabet is None: alphabet = qtree.extract_token_set(qt) alphabet.add('[u]') # the unknown token no_queries = find_max_query(qt) + 1 nfa = fa.FiniteAutomaton(alphabet=alphabet, states=None, default_state_id=(lambda fa: no_queries + len(fa.states)- len(fa.accepting))) def qtree2eps_nfa_rec(qtn, q_parent=None, label=None): state_id, accepting = None, False if qtn.matching >= 0: state_id, accepting = qtn.matching, True if q_parent is not None: q = nfa.new_state(accepting, start_state=False, state_id=state_id) nfa.add_transition(q_parent, q, label) else: q = nfa.new_state(accepting, start_state=True, state_id=state_id) for ((typ, tag), child) in qtn.children.items(): if typ == '//': q_rec = nfa.new_state() nfa.add_transition(q, q_rec, None) nfa.add_transition(q_rec, q_rec, set(nfa.alphabet)) qtree2eps_nfa_rec(child, q_rec, tag2alphabet(nfa, tag)) else: qtree2eps_nfa_rec(child, q, tag2alphabet(nfa, tag)) qtree2eps_nfa_rec(qt) return nfa
def qtree2eps_nfa(qr): """ @param qr: root of the query tree """ max_query = [-1] def qtree2eps_nfa_rec(qn, nn, token_set): """ @param qn: current query tree node @param nn: current nfa node """ nn.accepting = qn.matching for ((typ, tag), child) in qn.children.items(): if child.matching > max_query[0]: max_query[0] = child.matching new_node = EpsNFANode(accepting=child.matching) if typ == '//': nn.add_transition(EPS, gen_rec_node(tag, token_set, new_node), token_set) else: nn.add_transition(tag, new_node, token_set) qtree2eps_nfa_rec(child, new_node, token_set) nr = EpsNFANode() token_set = qtree.extract_token_set(qr) print token_set qtree2eps_nfa_rec(qr, nr, token_set) no_queries = max_query[0] + 1 no_states, state_mapping = enum_states(nr, no_queries) q_N = EpsNFANode() q_N.state_id = no_states for q in state_mapping.values(): q.add_transition('[o]', q_N, token_set) q_N.add_transition('[o]', q_N, token_set) state_mapping[q_N.state_id] = q_N return nr, no_states + 1, no_queries, state_mapping
def qtree2eps_nfa(qr): """ @param qr: root of the query tree """ max_query = [-1] def qtree2eps_nfa_rec(qn, nn, token_set): """ @param qn: current query tree node @param nn: current nfa node """ nn.accepting = qn.matching for ((typ, tag), child) in qn.children.items(): if child.matching > max_query[0]: max_query[0] = child.matching new_node = EpsNFANode(accepting=child.matching) if typ == '//': nn.add_transition(EPS, gen_rec_node(tag, token_set, new_node), token_set) else: nn.add_transition(tag, new_node, token_set) qtree2eps_nfa_rec(child, new_node, token_set) nr = EpsNFANode() token_set = qtree.extract_token_set(qr) print token_set qtree2eps_nfa_rec(qr, nr, token_set) no_queries = max_query[0]+1 no_states, state_mapping = enum_states(nr, no_queries) q_N = EpsNFANode() q_N.state_id = no_states for q in state_mapping.values(): q.add_transition('[o]', q_N, token_set) q_N.add_transition('[o]', q_N, token_set) state_mapping[q_N.state_id] = q_N return nr, no_states+1, no_queries, state_mapping
def qtree2nfa(qt, alphabet=None): if alphabet is None: alphabet = qtree.extract_token_set(qt) alphabet.add('[u]') # the unknown token no_queries = find_max_query(qt) + 1 nfa = fa.FiniteAutomaton( alphabet=alphabet, states=None, default_state_id=( lambda fa: no_queries + len(fa.states) - len(fa.accepting))) def qtree2eps_nfa_rec(qtn, q_parent=None, label=None): state_id, accepting = None, False if qtn.matching >= 0: state_id, accepting = qtn.matching, True if q_parent is not None: q = nfa.new_state(accepting, start_state=False, state_id=state_id) nfa.add_transition(q_parent, q, label) else: q = nfa.new_state(accepting, start_state=True, state_id=state_id) for ((typ, tag), child) in qtn.children.items(): if typ == '//': q_rec = nfa.new_state() nfa.add_transition(q, q_rec, None) nfa.add_transition(q_rec, q_rec, set(nfa.alphabet)) qtree2eps_nfa_rec(child, q_rec, tag2alphabet(nfa, tag)) else: qtree2eps_nfa_rec(child, q, tag2alphabet(nfa, tag)) qtree2eps_nfa_rec(qt) return nfa
def enumerate_inputs(no_states, qr): tokens = sorted(list(qtree.extract_token_set(qr))) tokens.append('[u]') for q in xrange(0, no_states): for t in tokens: yield q, t