def _substitute(self, tree, nodes, node_stack): ''' Let (parent(tree), parent(parent(tree)), ..., parent(...(parent(tree))...)) the parental hierarchy of tree. It can be denoted as (P1, ..., P(n)) where P(i) is the node id of the i-th grandparent of tree. The substitution algorithm seeks for P(i) with repl_id == P(i). If found it replaces P(i) in P(i+1) with nodes = (N1, ..., N(k)). It is guaranteed that the node id of N(j) is repl_id. ''' repl_id = nodes[0][0] if tree[0] == repl_id: return (tree, node_stack) if is_token(repl_id): # replace token if is_token(tree[0]): tree[:] = nodes[0] return (tree, node_stack) else: i = len(node_stack)-1 while i>=0: P = node_stack[i] i-=1 if repl_id == P[0]: try: nd_list = node_stack[i] i-=1 for j, N in enumerate(nd_list): if id(N) == id(P): nd_list[:] = nd_list[:j]+list(nodes)+nd_list[j+1:] return (nd_list, node_stack[i-1:]) except IndexError: # nothing to pop from node_stack P[:] = nodes[0] return (P, node_stack[i-1:]) self._create_translation_error(tree, nodes, node_stack)
def check(r, state, tracer, visited): R = set() C = {} msg = "" must_trace = False states = tracer.select(state[0]) selection = list(set(s[0] for s in states if s and s[0]!=FIN)) for i,s in enumerate(selection): if is_token(s): S = set([s]) else: S = nfamodule.reachables[s] if R&S: for u in C: if C[u]&S: if is_token(s): msg = "%s : %s -> FirstSet(%s) /\\ {%s} = %s\n"%(r, state, u, s, C[u]&S) elif is_token(u): msg = "%s : %s -> {%s} /\\ FirstSet(%s) = %s\n"%(r, state, u, s, C[u]&S) else: msg = "%s : %s -> FirstSet(%s) /\\ FirstSet(%s) = %s\n"%(r, state, u, s, C[u]&S) lineno = sys._getframe(0).f_lineno +1 if print_warning: warnings.warn_explicit(msg, NeedsMoreExpansionWarning, "nfadatagen.py", lineno) backtracking.add(r) break else: R.update(S) C[s] = S for state in states: if state[0] is not FIN and state not in visited: visited.add(state) subtracer = tracer.clone() check(r, state, subtracer, visited)
def _substitute(self, tree, nodes, node_stack): ''' Let (parent(tree), parent(parent(tree)), ..., parent(...(parent(tree))...)) the parental hierarchy of tree. It can be denoted as (P1, ..., P(n)) where P(i) is the node id of the i-th grandparent of tree. The substitution algorithm seeks for P(i) with repl_id == P(i). If found it replaces P(i) in P(i+1) with nodes = (N1, ..., N(k)). It is guaranteed that the node id of N(j) is repl_id. ''' repl_id = nodes[0][0] if tree[0] == repl_id: return (tree, node_stack) if is_token(repl_id): # replace token if is_token(tree[0]): tree[:] = nodes[0] return (tree, node_stack) else: i = len(node_stack) - 1 while i >= 0: P = node_stack[i] i -= 1 if repl_id == P[0]: try: nd_list = node_stack[i] i -= 1 for j, N in enumerate(nd_list): if id(N) == id(P): nd_list[:] = nd_list[:j] + list( nodes) + nd_list[j + 1:] return (nd_list, node_stack[i - 1:]) except IndexError: # nothing to pop from node_stack P[:] = nodes[0] return (P, node_stack[i - 1:]) self._create_translation_error(tree, nodes, node_stack)
def node_cmp(cls, tree, node_id): ''' Compares first node of cst tree with node_id. @param tree: CST @param node_id: integer representing a py_symbol or a py_token @return: - 0 if node_id is tree-root. - -1 if tree is a py_token or node_id cannot be the node_id of any subtree. - 1 otherwise ''' tree_id = tree[0] % LANGLET_ID_OFFSET node_id = node_id % LANGLET_ID_OFFSET if tree_id == node_id: # do we still want this? it makes nodes of two different langlets return 0 # comparable. elif is_token(tree_id): # is token return -1 if cls.activated: if is_symbol(node_id): try: s0 = hierarchy[tree_id] # global ??? s1 = hierarchy[node_id] if s0 > s1: return -1 except KeyError: return 1 else: return 1 else: return 1
def node_cmp(cls, tree, node_id): ''' Compares first node of cst tree with node_id. @param tree: CST @param node_id: integer representing a py_symbol or a py_token @return: - 0 if node_id is tree-root. - -1 if tree is a py_token or node_id cannot be the node_id of any subtree. - 1 otherwise ''' tree_id = tree[0] % LANGLET_ID_OFFSET node_id = node_id % LANGLET_ID_OFFSET if tree_id == node_id: # do we still want this? it makes nodes of two different langlets return 0 # comparable. elif is_token(tree_id): # is token return -1 if cls.activated: if is_symbol(node_id): try: s0 = hierarchy[tree_id] # global ??? s1 = hierarchy[node_id] if s0>s1: return -1 except KeyError: return 1 else: return 1 else: return 1
def check_first_follow_conflict(self): ''' A FirstFollowConflict warning is raised when a conflict occurs. Background :: This method checks for first/follow conflicts. A first/follow conflict is an ambiguity that occur in grammars like G: A* A If a word 'AA' is given one can match it with the first A* ( which leads to an error ) or the first character by A* and the second by A. A first/follow conflict shall always be removed and it needs to be removed manually! ''' last_sets = self.nfadata.compute_last_set() fin_cycles = self.nfadata.compute_fin_cycles() warned = set() def format_stream(T, k): stream = [] for t in [t for t in T[:k+2]]: if isinstance(t, int): stream.append(self.node_name(t)) else: stream.append("'"+t+"'") return stream[0]+': '+' '.join(stream[1:]) for r, traces in self._cyclefree_traces().items(): for T in traces: for i in range(1,len(T)-1): A = T[i] B = T[i+1] if (r,A,B) in warned or A == B: continue if is_token(B): if B in last_sets.get(A, set()): if B in fin_cycles[A]: warn_text = "%s -> LastSet(%s) /\\ set([%s]) != {}"%(self.node_name(r),self.node_name(A),self.node_name(B)) #, format_stream(T,i)) warnings.warn_explicit(warn_text, FirstFollowConflict, "nfadatagen.py", sys._getframe(0).f_lineno-1) warned.add((r,A,B)) break else: S = last_sets.get(A, set()) & self.nfadata.reachables.get(B, set()) if S: C = S & fin_cycles[A] if C: warn_text = warn_text = "%s -> LastSet(%s) /\\ FirstSet(%s) != {}"%(self.node_name(r), self.node_name(A),self.node_name(B)) #,format_stream(T,i)) warnings.warn_explicit(warn_text, FirstFollowConflict, "nfadatagen.py", sys._getframe(0).f_lineno-1) warned.add((r,A,B)) print " /\\", set([self.node_name(c) for c in C]) break
def find_token_gen(tree, depth=MAX_DEPTH): if is_token(tree[0]): yield tree elif depth < 0: raise StopIteration for sub in tree[1:]: if isinstance(sub, list): for item in find_token_gen(sub, depth=depth - 1): if item: yield item
def find_token_gen(tree, depth = MAX_DEPTH): if is_token(tree[0]): yield tree elif depth<0: raise StopIteration for sub in tree[1:]: if isinstance(sub, list): for item in find_token_gen(sub, depth=depth-1): if item: yield item
def find_token_chains_gen(tree, depth = MAX_DEPTH, chain=[]): if is_token(tree[0]): yield Chain(chain+[tree]) elif depth<0: raise StopIteration for sub in tree[1:]: if isinstance(sub, list): for item in find_token_chains_gen(sub, depth=depth-1, chain = chain+[tree]): if item: yield item
def find_token_chain(tree, depth = MAX_DEPTH, chain = []): if is_token(tree[0]): return Chain(chain+[tree]) if depth<0: return for sub in tree[1:-1]: res = find_token_chain(sub, depth=depth-1, chain = chain+[tree]) if res: return res if isinstance(tree[-1], list): return find_token_chain(tree[-1], depth=depth-1, chain = chain+[tree])
def find_token_chains_gen(tree, depth=MAX_DEPTH, chain=[]): if is_token(tree[0]): yield Chain(chain + [tree]) elif depth < 0: raise StopIteration for sub in tree[1:]: if isinstance(sub, list): for item in find_token_chains_gen(sub, depth=depth - 1, chain=chain + [tree]): if item: yield item
def create_referrer(symbols): ''' The referrer is a dictionary ''' referrer = {} for r, sym in symbols.items(): for s in sym: if is_token(s): continue R = referrer.get(s, set()) R.add(r) referrer[s] = R return referrer
def create_referrer(symbols): ''' The referrer is a dictionary ''' referrer = {} for r, sym in symbols.items(): for s in sym: if is_token(s): continue R = referrer.get(s,set()) R.add(r) referrer[s] = R return referrer
def find_token_chain(tree, depth=MAX_DEPTH, chain=[]): if is_token(tree[0]): return Chain(chain + [tree]) if depth < 0: return for sub in tree[1:-1]: res = find_token_chain(sub, depth=depth - 1, chain=chain + [tree]) if res: return res if isinstance(tree[-1], list): return find_token_chain(tree[-1], depth=depth - 1, chain=chain + [tree])
def _create_translation_error(self, tree, nodes, node_stack): repl_id = nodes[0][0] if is_token(repl_id): raise TranslationError( "Cannot substitute non-terminal %s by terminal %s" % ((tree[0], self.langlet.get_node_name(tree[0])), (repl_id, self.langlet.get_node_name(repl_id)))) else: trace = [(tree[0], self.langlet.get_node_name(tree[0]))] while node_stack: P = node_stack.pop() trace.append((P[0], self.langlet.get_node_name(P[0]))) trace = str(trace) S = "Failed to substitute node %s by %s.\n Node %s must be one of the nodes or a projection in: \n\n%s" % ( (tree[0], self.langlet.get_node_name(tree[0])), (repl_id, self.langlet.get_node_name(repl_id)), (repl_id, self.langlet.get_node_name(repl_id)), trace) raise TranslationError(S)
def _create_translation_error(self, tree, nodes, node_stack): repl_id = nodes[0][0] if is_token(repl_id): raise TranslationError("Cannot substitute non-terminal %s by terminal %s"%( (tree[0], self.langlet.get_node_name(tree[0])), (repl_id, self.langlet.get_node_name(repl_id)) )) else: trace = [(tree[0], self.langlet.get_node_name(tree[0]))] while node_stack: P = node_stack.pop() trace.append((P[0], self.langlet.get_node_name(P[0]))) trace = str(trace) S = "Failed to substitute node %s by %s.\n Node %s must be one of the nodes or a projection in: \n\n%s"%( (tree[0], self.langlet.get_node_name(tree[0])), (repl_id, self.langlet.get_node_name(repl_id)), (repl_id, self.langlet.get_node_name(repl_id)), trace) raise TranslationError( S )
def find_node(tree, nid, depth = MAX_DEPTH, exclude = ()): ''' Finds one node of a given node id. ( Non-recursive depth first search implementation. ) ''' if is_token(tree[0]): if nid == tree[0]: return tree else: return dq = deque() for sub in tree[1:]: dq.append((sub, depth-1)) while dq: node, depth = dq.popleft() s = node[0] if s == nid: return node elif is_symbol(s) and s not in exclude and depth>0: subnodes = zip(node[:0:-1], [depth-1]*(len(node)-1)) dq.extendleft(subnodes)
def find_node(tree, nid, depth=MAX_DEPTH, exclude=()): ''' Finds one node of a given node id. ( Non-recursive depth first search implementation. ) ''' if is_token(tree[0]): if nid == tree[0]: return tree else: return dq = deque() for sub in tree[1:]: dq.append((sub, depth - 1)) while dq: node, depth = dq.popleft() s = node[0] if s == nid: return node elif is_symbol(s) and s not in exclude and depth > 0: subnodes = zip(node[:0:-1], [depth - 1] * (len(node) - 1)) dq.extendleft(subnodes)
def expand(self, rule = 0, visited = set()): ''' Algorithm :: For each transition S -> {L1, ..., Ln} in NFA[rule] with at least two follow states L1, L2 and Li!=None we determine the corresponding selection sel = {s1, ..., sk} From sel we build Ri = s1.reach \/ s2.reach \/ .... si.reach successively. If Ri intersects with s(i+1) find the first sj, j=1,...,i with sj.reach /\ s(i+1).reach Now embedd the smaller of both NFAs into NFA[rule]. Repeat this procedure for each transition T until Rn-1 /\ sn.reach = {}. ''' if not rule: rule = self.start_symbol visited.add(rule) more = True must_select = False while more: more = False selections = self._all_selections(rule) if len(selections)>TRAIL_MAX_ALLOWED_STATES: raise OverflowError("NFA size > TRAIL_MAX_ALLOWED_STATES. Cannot expand rule `%s : %s`"%(rule, self.node_name(rule))) if self.warn_cnt>10: raise OverflowError("More than ten expansion warnings issued. Expansion is terminated!") for follow in selections: selectable = sorted(list(set(s[0] for s in follow if s and s[0]!=FIN))) if len(selectable)<=1: continue R = set() C = {} for i,s in enumerate(selectable): tok_s = False if is_token(s): tok_s = True S = set([s]) else: S = self.nfadata.reachables[s] if R&S: for u in selectable[:i]: tok_u,U = C[u] if U&S: if tok_s: k = u elif tok_u: k = s else: N_s = self.nfadata.nfas[s][2] N_u = self.nfadata.nfas[u][2] k = (s if len(N_s)<=len(N_u) else u) break for state in (state for state in follow if state[0] == k): self.maybe_expand(state[0], visited) self.embedd_nfa(state, rule) more = True break break else: R.update(S) C[s] = (tok_s, S) else: continue break else: break
def maybe_expand(self, r, visited): if r and r not in visited and not is_token(r): self.expand(r, visited)