def compute_langlet_expr(langlet, start_symbol=None): running_cycle = set() state_traces = {} for s, nfa in langlet.parse_nfa.nfas.items(): if s is start_symbol or s not in start_symbols: state_traces[s] = compute_state_traces(nfa) _, start, _ = langlet.parse_nfa.nfas[start_symbol] segtree = SegmentTree(langlet) segtree.create() supertrace = compute_super_tr(langlet, start, state_traces, segtree, running_cycle, start_symbols) flat_traces = compute_flat_tr(langlet) langlet_trace = [] for t in supertrace: if is_symbol(t[0]): langlet_trace.extend(flat_traces[t[0]]) else: langlet_trace.append(t[0]) # for item in langlet_trace: # print item, langlet.get_node_name(item) tgen = TokenGenerator(langlet, stdlen=1) tokstream = [] letters = "abcdefg" i = 0 for tid in langlet_trace: if tid == langlet.token.NAME: tokstream.append([tid, letters[i % len(letters)]]) i += 1 elif is_keyword(tid): tokstream.append([tid, langlet.get_node_name(tid)[4:]]) else: tokstream.append([tid, tgen.gen_token_string(tid + SYMBOL_OFFSET)]) return langlet.unparse([1000] + tokstream)
def __init__(self, langlet, start_symbol=None): self.langlet = langlet self.start_symbol = start_symbol self.state_traces = {} self.unused_symbols = set() self.compute_unused_symbols() self.segtree = SegmentTree(langlet) self.segtree.create() self.token_traces = {} self.expr_types = set([self.start_symbol]) self.compute_expr_types() self.tokgen = TokenGenerator(langlet, stdlen=1) self._cnt = 1 self._id = 0 self._expressions = []
class SourceGenerator(object): Varnames = "abcdestuxyz" def __init__(self, langlet, start_symbol=None): self.langlet = langlet self.start_symbol = start_symbol self.state_traces = {} self.unused_symbols = set() self.compute_unused_symbols() self.segtree = SegmentTree(langlet) self.segtree.create() self.token_traces = {} self.expr_types = set([self.start_symbol]) self.compute_expr_types() self.tokgen = TokenGenerator(langlet, stdlen=1) self._cnt = 1 self._id = 0 self._expressions = [] def compute_expr_types(self): symbols = [s for s in self.langlet.parse_nfa.symbols_of[self.start_symbol] if is_symbol(s)] symbols.insert(0, self.start_symbol) for s in self.langlet.parse_nfa.nfas: if s not in self.unused_symbols: for sym in symbols: seg = self.segtree[sym:s] if seg: self.expr_types.add(s) def compute_unused_symbols(self): if self.start_symbol is None: self.start_symbol = self.langlet.parse_nfa.start_symbols[0] self.unused_symbols = self.langlet.parse_nfa.start_symbols[1] self.unused_symbols.remove(self.start_symbol) else: rules = rule_ids(self.langlet, self.start_symbol) self.unused_symbols = set() for r in self.langlet.parse_nfa.nfas: if r not in rules: self.unused_symbols.add(r) def expressions(self): if self._expressions: return self._expressions self._compute_all_state_traces() self._insert_non_expr_state_traces() self._compute_all_token_traces() self._remove_duplicates() for s in self.expr_types: name = self.langlet.get_node_name(s) for tr in self.token_traces[s]: self._expressions.append((s, name, self.langlet.untokenize(tr))) return self._expressions def _remove_duplicates(self): S = set() for s in self.expr_types: traces = self.token_traces[s] new_traces = [] for i, trace in enumerate(traces[:]): tup = tuple(s[0] for s in trace) if tup not in S: new_traces.append(trace) S.add(tup) self.token_traces[s] = new_traces def _compute_all_state_traces(self): for s, nfa in self.langlet.parse_nfa.nfas.items(): if s is self.start_symbol or s not in self.unused_symbols: traces = compute_all_tr(1, nfa) for tr in traces: del tr[-1] self.state_traces[s] = traces def _compute_all_token_traces(self): Tr = [] rest = [] for s, traces in self.state_traces.items(): for trace in traces: Tr.append((s, trace)) n = 0 while True: if Tr: s, trace = Tr.pop() else: if len(rest) == n: break else: n = len(rest) Tr = rest[::-1] rest = [] visited = set([s]) tokentrace = self._compute_token_trace(trace, visited) if tokentrace: tt = self.token_traces.get(s, []) tt.append(tokentrace) self.token_traces[s] = tt else: rest.append((s, trace)) def _insert_non_expr_state_traces(self): non_expr_types = set() for s in self.state_traces: if s not in self.expr_types: non_expr_types.add(s) def insert(s, nids): for e in nids: if e == s: continue for tr in self.state_traces[e][:]: for i, state in enumerate(tr): if state[0] == s: for T in self.state_traces[s]: self.state_traces[e].append(tr[:i] + T + tr[i + 1 :]) return e inserted = set() for s in non_expr_types: e = s while True: if not insert(e, self.expr_types): f = insert(e, non_expr_types) if f and f in inserted: # repeat insertion e = f else: break else: inserted.add(e) break def _compute_token_trace(self, state_trace, visited): tokstream = [] for state in state_trace: nid = state[0] if is_keyword(nid): tokstream.append([nid, self.langlet.get_node_name(nid)[4:]]) elif is_token(nid): if nid == self.langlet.token.NAME: name = self.Varnames[self._id % len(self.Varnames)] tokstream.append([nid, name]) self._id += 1 else: tokstream.append([nid, self.tokgen.gen_token_string(nid + SYMBOL_OFFSET)]) else: seg = self.segtree[nid : self.langlet.token.NAME] if seg: S, P = proj_segment(seg) for t in P: if t == self.langlet.token.NAME: tokstream.append([t, self.langlet.get_node_name(S if S != 0 else nid)]) elif is_keyword(t): tokstream.append([t, self.langlet.get_node_name(t)[4:]]) else: tokstream.append([t, self.tokgen.gen_token_string(t + SYMBOL_OFFSET)]) else: nt_traces = self.token_traces.get(nid, []) if nt_traces: idx = self._cnt % len(nt_traces) self._cnt += 1 tokstream += nt_traces[idx] else: if nid in visited: return else: visited.add(nid) for i, st in enumerate(self.state_traces[nid][:]): tr = self._compute_token_trace(st, visited) if tr: tokstream += tr del self.state_traces[nid][i] tt = self.token_traces.get(nid, []) tt.append(tr) self.token_traces[nid] = tt break else: return visited.remove(nid) return tokstream
class SourceGenerator(object): Varnames = "abcdestuxyz" def __init__(self, langlet, start_symbol=None): self.langlet = langlet self.start_symbol = start_symbol self.state_traces = {} self.unused_symbols = set() self.compute_unused_symbols() self.segtree = SegmentTree(langlet) self.segtree.create() self.token_traces = {} self.expr_types = set([self.start_symbol]) self.compute_expr_types() self.tokgen = TokenGenerator(langlet, stdlen=1) self._cnt = 1 self._id = 0 self._expressions = [] def compute_expr_types(self): symbols = [ s for s in self.langlet.parse_nfa.symbols_of[self.start_symbol] if is_symbol(s) ] symbols.insert(0, self.start_symbol) for s in self.langlet.parse_nfa.nfas: if s not in self.unused_symbols: for sym in symbols: seg = self.segtree[sym:s] if seg: self.expr_types.add(s) def compute_unused_symbols(self): if self.start_symbol is None: self.start_symbol = self.langlet.parse_nfa.start_symbols[0] self.unused_symbols = self.langlet.parse_nfa.start_symbols[1] self.unused_symbols.remove(self.start_symbol) else: rules = rule_ids(self.langlet, self.start_symbol) self.unused_symbols = set() for r in self.langlet.parse_nfa.nfas: if r not in rules: self.unused_symbols.add(r) def expressions(self): if self._expressions: return self._expressions self._compute_all_state_traces() self._insert_non_expr_state_traces() self._compute_all_token_traces() self._remove_duplicates() for s in self.expr_types: name = self.langlet.get_node_name(s) for tr in self.token_traces[s]: self._expressions.append( (s, name, self.langlet.untokenize(tr))) return self._expressions def _remove_duplicates(self): S = set() for s in self.expr_types: traces = self.token_traces[s] new_traces = [] for i, trace in enumerate(traces[:]): tup = tuple(s[0] for s in trace) if tup not in S: new_traces.append(trace) S.add(tup) self.token_traces[s] = new_traces def _compute_all_state_traces(self): for s, nfa in self.langlet.parse_nfa.nfas.items(): if s is self.start_symbol or s not in self.unused_symbols: traces = compute_all_tr(1, nfa) for tr in traces: del tr[-1] self.state_traces[s] = traces def _compute_all_token_traces(self): Tr = [] rest = [] for s, traces in self.state_traces.items(): for trace in traces: Tr.append((s, trace)) n = 0 while True: if Tr: s, trace = Tr.pop() else: if len(rest) == n: break else: n = len(rest) Tr = rest[::-1] rest = [] visited = set([s]) tokentrace = self._compute_token_trace(trace, visited) if tokentrace: tt = self.token_traces.get(s, []) tt.append(tokentrace) self.token_traces[s] = tt else: rest.append((s, trace)) def _insert_non_expr_state_traces(self): non_expr_types = set() for s in self.state_traces: if s not in self.expr_types: non_expr_types.add(s) def insert(s, nids): for e in nids: if e == s: continue for tr in self.state_traces[e][:]: for i, state in enumerate(tr): if state[0] == s: for T in self.state_traces[s]: self.state_traces[e].append(tr[:i] + T + tr[i + 1:]) return e inserted = set() for s in non_expr_types: e = s while True: if not insert(e, self.expr_types): f = insert(e, non_expr_types) if f and f in inserted: # repeat insertion e = f else: break else: inserted.add(e) break def _compute_token_trace(self, state_trace, visited): tokstream = [] for state in state_trace: nid = state[0] if is_keyword(nid): tokstream.append([nid, self.langlet.get_node_name(nid)[4:]]) elif is_token(nid): if nid == self.langlet.token.NAME: name = self.Varnames[self._id % len(self.Varnames)] tokstream.append([nid, name]) self._id += 1 else: tokstream.append([ nid, self.tokgen.gen_token_string(nid + SYMBOL_OFFSET) ]) else: seg = self.segtree[nid:self.langlet.token.NAME] if seg: S, P = proj_segment(seg) for t in P: if t == self.langlet.token.NAME: tokstream.append([ t, self.langlet.get_node_name( S if S != 0 else nid) ]) elif is_keyword(t): tokstream.append( [t, self.langlet.get_node_name(t)[4:]]) else: tokstream.append([ t, self.tokgen.gen_token_string(t + SYMBOL_OFFSET) ]) else: nt_traces = self.token_traces.get(nid, []) if nt_traces: idx = self._cnt % len(nt_traces) self._cnt += 1 tokstream += nt_traces[idx] else: if nid in visited: return else: visited.add(nid) for i, st in enumerate(self.state_traces[nid][:]): tr = self._compute_token_trace(st, visited) if tr: tokstream += tr del self.state_traces[nid][i] tt = self.token_traces.get(nid, []) tt.append(tr) self.token_traces[nid] = tt break else: return visited.remove(nid) return tokstream