Esempio n. 1
0
def gen_delta_table(nr, delta, no_states, qr, direction=True):
    no_tokens = len(qtree.extract_token_set(qr)) + 1
    # all known tokens plus the 'unknown' token

    offset = 0
    offset_map = dict()
    set_table = list()
    offset_table = list()

    delta_table = list()

    for q, t in enumerate_inputs(no_states, qr):
        target_set = tuple(sorted(list(delta(q, t, direction))))
        if target_set in offset_map:
            offset_table.append(offset_map[target_set])
        else:
            offset_table.append(offset)
            offset_map[target_set] = offset
            set_table.extend([len(target_set)] + list(target_set))
            offset += len(target_set) + 1

    delta_table.append(no_states)  # number of states
    delta_table.append(no_tokens)  # number of tokens
    delta_table.append(nr.state_id)  # start state
    delta_table.extend(offset_table)  # offset table
    delta_table.extend(set_table)  # table containing the sets

    assert len(delta_table) == 3 + no_states * no_tokens + len(set_table)
    return delta_table
Esempio n. 2
0
def print_delta_table(delta_table, qr):
    """
    only used for sanity checking
    """
    tokens = sorted(list(qtree.extract_token_set(qr)))
    tokens.append('[u]')

    token_map = dict(zip( range(len(tokens)), tokens ))

    no_states = delta_table[0]
    no_tokens = delta_table[1]
    
    offset_table = delta_table[3 : 3 + no_states * no_tokens]
    set_table = delta_table[3+no_states * no_tokens : len(delta_table)]

    print len(offset_table), offset_table
    print set_table
    for (q, t) in ((i,j) for i in xrange(no_states) for j in xrange(no_tokens)):
        print q, t
        target_offset = offset_table[q * no_tokens + t]
        target_set_length = set_table[target_offset]
        target_set = set_table[target_offset+1 :
                target_offset+1+target_set_length]
        
        print "({0}, {1}) -> {2}".format(q, t, target_set)
Esempio n. 3
0
def gen_delta_table(nr, delta, no_states, qr, direction=True):
    no_tokens = len(qtree.extract_token_set(qr))+1
    # all known tokens plus the 'unknown' token

    offset = 0
    offset_map = dict()
    set_table = list()
    offset_table = list()

    delta_table = list()

    for q, t in enumerate_inputs(no_states, qr):
        target_set = tuple(sorted(list(delta(q, t, direction))))
        if target_set in offset_map:
            offset_table.append(offset_map[target_set])
        else:
            offset_table.append(offset)
            offset_map[target_set] = offset
            set_table.extend([len(target_set)] + list(target_set))
            offset += len(target_set) + 1

    delta_table.append(no_states) # number of states
    delta_table.append(no_tokens) # number of tokens
    delta_table.append(nr.state_id) # start state
    delta_table.extend(offset_table) # offset table
    delta_table.extend(set_table) # table containing the sets

    assert len(delta_table) == 3 + no_states * no_tokens + len(set_table)
    return delta_table
Esempio n. 4
0
def qtree2nfa(qt, alphabet=None):
    if alphabet is None:
        alphabet = qtree.extract_token_set(qt)
        alphabet.add('[u]') # the unknown token

    no_queries = find_max_query(qt) + 1
    
    nfa = fa.FiniteAutomaton(alphabet=alphabet, states=None,
            default_state_id=(lambda fa: no_queries + len(fa.states)-
                len(fa.accepting)))

    def qtree2eps_nfa_rec(qtn, q_parent=None, label=None):
        state_id, accepting = None, False
        if qtn.matching >= 0:
            state_id, accepting = qtn.matching, True

        if q_parent is not None:
            q = nfa.new_state(accepting, start_state=False, state_id=state_id)
            nfa.add_transition(q_parent, q, label)
        else:
            q = nfa.new_state(accepting, start_state=True, state_id=state_id)

        for ((typ, tag), child) in qtn.children.items():
            if typ == '//':
                q_rec = nfa.new_state()
                nfa.add_transition(q, q_rec, None)
                nfa.add_transition(q_rec, q_rec, set(nfa.alphabet))
                qtree2eps_nfa_rec(child, q_rec, tag2alphabet(nfa, tag))
            else:
                qtree2eps_nfa_rec(child, q, tag2alphabet(nfa, tag))

    qtree2eps_nfa_rec(qt)

    return nfa
Esempio n. 5
0
def qtree2eps_nfa(qr):
    """
    @param qr: root of the query tree
    """
    max_query = [-1]

    def qtree2eps_nfa_rec(qn, nn, token_set):
        """
        @param qn: current query tree node
        @param nn: current nfa node
        """
        nn.accepting = qn.matching
        for ((typ, tag), child) in qn.children.items():
            if child.matching > max_query[0]:
                max_query[0] = child.matching
            new_node = EpsNFANode(accepting=child.matching)
            if typ == '//':
                nn.add_transition(EPS, gen_rec_node(tag, token_set, new_node),
                                  token_set)
            else:
                nn.add_transition(tag, new_node, token_set)
            qtree2eps_nfa_rec(child, new_node, token_set)

    nr = EpsNFANode()
    token_set = qtree.extract_token_set(qr)
    print token_set
    qtree2eps_nfa_rec(qr, nr, token_set)

    no_queries = max_query[0] + 1

    no_states, state_mapping = enum_states(nr, no_queries)

    q_N = EpsNFANode()
    q_N.state_id = no_states

    for q in state_mapping.values():
        q.add_transition('[o]', q_N, token_set)

    q_N.add_transition('[o]', q_N, token_set)
    state_mapping[q_N.state_id] = q_N

    return nr, no_states + 1, no_queries, state_mapping
Esempio n. 6
0
def qtree2eps_nfa(qr):
    """
    @param qr: root of the query tree
    """
    max_query = [-1]
    def qtree2eps_nfa_rec(qn, nn, token_set):
        """
        @param qn: current query tree node
        @param nn: current nfa node
        """
        nn.accepting = qn.matching
        for ((typ, tag), child) in qn.children.items():
            if child.matching > max_query[0]:
                max_query[0] = child.matching
            new_node = EpsNFANode(accepting=child.matching)
            if typ == '//':
                nn.add_transition(EPS, gen_rec_node(tag, token_set, new_node),
                        token_set)
            else:
                nn.add_transition(tag, new_node, token_set)
            qtree2eps_nfa_rec(child, new_node, token_set)

    nr = EpsNFANode()
    token_set = qtree.extract_token_set(qr)
    print token_set
    qtree2eps_nfa_rec(qr, nr, token_set)

    no_queries = max_query[0]+1

    no_states, state_mapping = enum_states(nr, no_queries)

    q_N = EpsNFANode()
    q_N.state_id = no_states

    for q in state_mapping.values():
        q.add_transition('[o]', q_N, token_set)

    q_N.add_transition('[o]', q_N, token_set)
    state_mapping[q_N.state_id] = q_N

    return nr, no_states+1, no_queries, state_mapping
Esempio n. 7
0
def qtree2nfa(qt, alphabet=None):
    if alphabet is None:
        alphabet = qtree.extract_token_set(qt)
        alphabet.add('[u]')  # the unknown token

    no_queries = find_max_query(qt) + 1

    nfa = fa.FiniteAutomaton(
        alphabet=alphabet,
        states=None,
        default_state_id=(
            lambda fa: no_queries + len(fa.states) - len(fa.accepting)))

    def qtree2eps_nfa_rec(qtn, q_parent=None, label=None):
        state_id, accepting = None, False
        if qtn.matching >= 0:
            state_id, accepting = qtn.matching, True

        if q_parent is not None:
            q = nfa.new_state(accepting, start_state=False, state_id=state_id)
            nfa.add_transition(q_parent, q, label)
        else:
            q = nfa.new_state(accepting, start_state=True, state_id=state_id)

        for ((typ, tag), child) in qtn.children.items():
            if typ == '//':
                q_rec = nfa.new_state()
                nfa.add_transition(q, q_rec, None)
                nfa.add_transition(q_rec, q_rec, set(nfa.alphabet))
                qtree2eps_nfa_rec(child, q_rec, tag2alphabet(nfa, tag))
            else:
                qtree2eps_nfa_rec(child, q, tag2alphabet(nfa, tag))

    qtree2eps_nfa_rec(qt)

    return nfa
Esempio n. 8
0
def enumerate_inputs(no_states, qr):
    tokens = sorted(list(qtree.extract_token_set(qr)))
    tokens.append('[u]')
    for q in xrange(0, no_states):
        for t in tokens:
            yield q, t
Esempio n. 9
0
def enumerate_inputs(no_states, qr):
    tokens = sorted(list(qtree.extract_token_set(qr)))
    tokens.append('[u]')
    for q in xrange(0, no_states):
        for t in tokens:
            yield q, t