Exemple #1
0
def compute_langlet_expr(langlet, start_symbol=None):
    running_cycle = set()
    state_traces = {}
    for s, nfa in langlet.parse_nfa.nfas.items():
        if s is start_symbol or s not in start_symbols:
            state_traces[s] = compute_state_traces(nfa)
    _, start, _ = langlet.parse_nfa.nfas[start_symbol]

    segtree = SegmentTree(langlet)
    segtree.create()
    supertrace = compute_super_tr(langlet, start, state_traces, segtree, running_cycle, start_symbols)
    flat_traces = compute_flat_tr(langlet)
    langlet_trace = []
    for t in supertrace:
        if is_symbol(t[0]):
            langlet_trace.extend(flat_traces[t[0]])
        else:
            langlet_trace.append(t[0])
    # for item in langlet_trace:
    #    print item, langlet.get_node_name(item)
    tgen = TokenGenerator(langlet, stdlen=1)
    tokstream = []
    letters = "abcdefg"
    i = 0
    for tid in langlet_trace:
        if tid == langlet.token.NAME:
            tokstream.append([tid, letters[i % len(letters)]])
            i += 1
        elif is_keyword(tid):
            tokstream.append([tid, langlet.get_node_name(tid)[4:]])
        else:
            tokstream.append([tid, tgen.gen_token_string(tid + SYMBOL_OFFSET)])
    return langlet.unparse([1000] + tokstream)
Exemple #2
0
def compute_langlet_expr(langlet, start_symbol=None):
    running_cycle = set()
    state_traces = {}
    for s, nfa in langlet.parse_nfa.nfas.items():
        if s is start_symbol or s not in start_symbols:
            state_traces[s] = compute_state_traces(nfa)
    _, start, _ = langlet.parse_nfa.nfas[start_symbol]

    segtree = SegmentTree(langlet)
    segtree.create()
    supertrace = compute_super_tr(langlet, start, state_traces, segtree,
                                  running_cycle, start_symbols)
    flat_traces = compute_flat_tr(langlet)
    langlet_trace = []
    for t in supertrace:
        if is_symbol(t[0]):
            langlet_trace.extend(flat_traces[t[0]])
        else:
            langlet_trace.append(t[0])
    # for item in langlet_trace:
    #    print item, langlet.get_node_name(item)
    tgen = TokenGenerator(langlet, stdlen=1)
    tokstream = []
    letters = "abcdefg"
    i = 0
    for tid in langlet_trace:
        if tid == langlet.token.NAME:
            tokstream.append([tid, letters[i % len(letters)]])
            i += 1
        elif is_keyword(tid):
            tokstream.append([tid, langlet.get_node_name(tid)[4:]])
        else:
            tokstream.append([tid, tgen.gen_token_string(tid + SYMBOL_OFFSET)])
    return langlet.unparse([1000] + tokstream)
Exemple #3
0
 def __init__(self, langlet, start_symbol=None):
     self.langlet = langlet
     self.start_symbol = start_symbol
     self.state_traces = {}
     self.unused_symbols = set()
     self.compute_unused_symbols()
     self.segtree = SegmentTree(langlet)
     self.segtree.create()
     self.token_traces = {}
     self.expr_types = set([self.start_symbol])
     self.compute_expr_types()
     self.tokgen = TokenGenerator(langlet, stdlen=1)
     self._cnt = 1
     self._id = 0
     self._expressions = []
Exemple #4
0
 def __init__(self, langlet, start_symbol=None):
     self.langlet = langlet
     self.start_symbol = start_symbol
     self.state_traces = {}
     self.unused_symbols = set()
     self.compute_unused_symbols()
     self.segtree = SegmentTree(langlet)
     self.segtree.create()
     self.token_traces = {}
     self.expr_types = set([self.start_symbol])
     self.compute_expr_types()
     self.tokgen = TokenGenerator(langlet, stdlen=1)
     self._cnt = 1
     self._id = 0
     self._expressions = []
Exemple #5
0
class SourceGenerator(object):
    Varnames = "abcdestuxyz"

    def __init__(self, langlet, start_symbol=None):
        self.langlet = langlet
        self.start_symbol = start_symbol
        self.state_traces = {}
        self.unused_symbols = set()
        self.compute_unused_symbols()
        self.segtree = SegmentTree(langlet)
        self.segtree.create()
        self.token_traces = {}
        self.expr_types = set([self.start_symbol])
        self.compute_expr_types()
        self.tokgen = TokenGenerator(langlet, stdlen=1)
        self._cnt = 1
        self._id = 0
        self._expressions = []

    def compute_expr_types(self):
        symbols = [s for s in self.langlet.parse_nfa.symbols_of[self.start_symbol] if is_symbol(s)]
        symbols.insert(0, self.start_symbol)
        for s in self.langlet.parse_nfa.nfas:
            if s not in self.unused_symbols:
                for sym in symbols:
                    seg = self.segtree[sym:s]
                    if seg:
                        self.expr_types.add(s)

    def compute_unused_symbols(self):
        if self.start_symbol is None:
            self.start_symbol = self.langlet.parse_nfa.start_symbols[0]
            self.unused_symbols = self.langlet.parse_nfa.start_symbols[1]
            self.unused_symbols.remove(self.start_symbol)
        else:
            rules = rule_ids(self.langlet, self.start_symbol)
            self.unused_symbols = set()
            for r in self.langlet.parse_nfa.nfas:
                if r not in rules:
                    self.unused_symbols.add(r)

    def expressions(self):
        if self._expressions:
            return self._expressions
        self._compute_all_state_traces()
        self._insert_non_expr_state_traces()
        self._compute_all_token_traces()
        self._remove_duplicates()
        for s in self.expr_types:
            name = self.langlet.get_node_name(s)
            for tr in self.token_traces[s]:
                self._expressions.append((s, name, self.langlet.untokenize(tr)))
        return self._expressions

    def _remove_duplicates(self):
        S = set()
        for s in self.expr_types:
            traces = self.token_traces[s]
            new_traces = []
            for i, trace in enumerate(traces[:]):
                tup = tuple(s[0] for s in trace)
                if tup not in S:
                    new_traces.append(trace)
                    S.add(tup)
            self.token_traces[s] = new_traces

    def _compute_all_state_traces(self):
        for s, nfa in self.langlet.parse_nfa.nfas.items():
            if s is self.start_symbol or s not in self.unused_symbols:
                traces = compute_all_tr(1, nfa)
                for tr in traces:
                    del tr[-1]
                self.state_traces[s] = traces

    def _compute_all_token_traces(self):
        Tr = []
        rest = []
        for s, traces in self.state_traces.items():
            for trace in traces:
                Tr.append((s, trace))
        n = 0
        while True:
            if Tr:
                s, trace = Tr.pop()
            else:
                if len(rest) == n:
                    break
                else:
                    n = len(rest)
                    Tr = rest[::-1]
                    rest = []
            visited = set([s])
            tokentrace = self._compute_token_trace(trace, visited)
            if tokentrace:
                tt = self.token_traces.get(s, [])
                tt.append(tokentrace)
                self.token_traces[s] = tt
            else:
                rest.append((s, trace))

    def _insert_non_expr_state_traces(self):
        non_expr_types = set()
        for s in self.state_traces:
            if s not in self.expr_types:
                non_expr_types.add(s)

        def insert(s, nids):
            for e in nids:
                if e == s:
                    continue
                for tr in self.state_traces[e][:]:
                    for i, state in enumerate(tr):
                        if state[0] == s:
                            for T in self.state_traces[s]:
                                self.state_traces[e].append(tr[:i] + T + tr[i + 1 :])
                            return e

        inserted = set()
        for s in non_expr_types:
            e = s
            while True:
                if not insert(e, self.expr_types):
                    f = insert(e, non_expr_types)
                    if f and f in inserted:
                        # repeat insertion
                        e = f
                    else:
                        break
                else:
                    inserted.add(e)
                    break

    def _compute_token_trace(self, state_trace, visited):
        tokstream = []
        for state in state_trace:
            nid = state[0]
            if is_keyword(nid):
                tokstream.append([nid, self.langlet.get_node_name(nid)[4:]])
            elif is_token(nid):
                if nid == self.langlet.token.NAME:
                    name = self.Varnames[self._id % len(self.Varnames)]
                    tokstream.append([nid, name])
                    self._id += 1
                else:
                    tokstream.append([nid, self.tokgen.gen_token_string(nid + SYMBOL_OFFSET)])
            else:
                seg = self.segtree[nid : self.langlet.token.NAME]
                if seg:
                    S, P = proj_segment(seg)
                    for t in P:
                        if t == self.langlet.token.NAME:
                            tokstream.append([t, self.langlet.get_node_name(S if S != 0 else nid)])
                        elif is_keyword(t):
                            tokstream.append([t, self.langlet.get_node_name(t)[4:]])
                        else:
                            tokstream.append([t, self.tokgen.gen_token_string(t + SYMBOL_OFFSET)])
                else:
                    nt_traces = self.token_traces.get(nid, [])
                    if nt_traces:
                        idx = self._cnt % len(nt_traces)
                        self._cnt += 1
                        tokstream += nt_traces[idx]
                    else:
                        if nid in visited:
                            return
                        else:
                            visited.add(nid)
                            for i, st in enumerate(self.state_traces[nid][:]):
                                tr = self._compute_token_trace(st, visited)
                                if tr:
                                    tokstream += tr
                                    del self.state_traces[nid][i]
                                    tt = self.token_traces.get(nid, [])
                                    tt.append(tr)
                                    self.token_traces[nid] = tt
                                    break
                                else:
                                    return
                            visited.remove(nid)
        return tokstream
Exemple #6
0
class SourceGenerator(object):
    Varnames = "abcdestuxyz"

    def __init__(self, langlet, start_symbol=None):
        self.langlet = langlet
        self.start_symbol = start_symbol
        self.state_traces = {}
        self.unused_symbols = set()
        self.compute_unused_symbols()
        self.segtree = SegmentTree(langlet)
        self.segtree.create()
        self.token_traces = {}
        self.expr_types = set([self.start_symbol])
        self.compute_expr_types()
        self.tokgen = TokenGenerator(langlet, stdlen=1)
        self._cnt = 1
        self._id = 0
        self._expressions = []

    def compute_expr_types(self):
        symbols = [
            s for s in self.langlet.parse_nfa.symbols_of[self.start_symbol]
            if is_symbol(s)
        ]
        symbols.insert(0, self.start_symbol)
        for s in self.langlet.parse_nfa.nfas:
            if s not in self.unused_symbols:
                for sym in symbols:
                    seg = self.segtree[sym:s]
                    if seg:
                        self.expr_types.add(s)

    def compute_unused_symbols(self):
        if self.start_symbol is None:
            self.start_symbol = self.langlet.parse_nfa.start_symbols[0]
            self.unused_symbols = self.langlet.parse_nfa.start_symbols[1]
            self.unused_symbols.remove(self.start_symbol)
        else:
            rules = rule_ids(self.langlet, self.start_symbol)
            self.unused_symbols = set()
            for r in self.langlet.parse_nfa.nfas:
                if r not in rules:
                    self.unused_symbols.add(r)

    def expressions(self):
        if self._expressions:
            return self._expressions
        self._compute_all_state_traces()
        self._insert_non_expr_state_traces()
        self._compute_all_token_traces()
        self._remove_duplicates()
        for s in self.expr_types:
            name = self.langlet.get_node_name(s)
            for tr in self.token_traces[s]:
                self._expressions.append(
                    (s, name, self.langlet.untokenize(tr)))
        return self._expressions

    def _remove_duplicates(self):
        S = set()
        for s in self.expr_types:
            traces = self.token_traces[s]
            new_traces = []
            for i, trace in enumerate(traces[:]):
                tup = tuple(s[0] for s in trace)
                if tup not in S:
                    new_traces.append(trace)
                    S.add(tup)
            self.token_traces[s] = new_traces

    def _compute_all_state_traces(self):
        for s, nfa in self.langlet.parse_nfa.nfas.items():
            if s is self.start_symbol or s not in self.unused_symbols:
                traces = compute_all_tr(1, nfa)
                for tr in traces:
                    del tr[-1]
                self.state_traces[s] = traces

    def _compute_all_token_traces(self):
        Tr = []
        rest = []
        for s, traces in self.state_traces.items():
            for trace in traces:
                Tr.append((s, trace))
        n = 0
        while True:
            if Tr:
                s, trace = Tr.pop()
            else:
                if len(rest) == n:
                    break
                else:
                    n = len(rest)
                    Tr = rest[::-1]
                    rest = []
            visited = set([s])
            tokentrace = self._compute_token_trace(trace, visited)
            if tokentrace:
                tt = self.token_traces.get(s, [])
                tt.append(tokentrace)
                self.token_traces[s] = tt
            else:
                rest.append((s, trace))

    def _insert_non_expr_state_traces(self):
        non_expr_types = set()
        for s in self.state_traces:
            if s not in self.expr_types:
                non_expr_types.add(s)

        def insert(s, nids):
            for e in nids:
                if e == s:
                    continue
                for tr in self.state_traces[e][:]:
                    for i, state in enumerate(tr):
                        if state[0] == s:
                            for T in self.state_traces[s]:
                                self.state_traces[e].append(tr[:i] + T +
                                                            tr[i + 1:])
                            return e

        inserted = set()
        for s in non_expr_types:
            e = s
            while True:
                if not insert(e, self.expr_types):
                    f = insert(e, non_expr_types)
                    if f and f in inserted:
                        # repeat insertion
                        e = f
                    else:
                        break
                else:
                    inserted.add(e)
                    break

    def _compute_token_trace(self, state_trace, visited):
        tokstream = []
        for state in state_trace:
            nid = state[0]
            if is_keyword(nid):
                tokstream.append([nid, self.langlet.get_node_name(nid)[4:]])
            elif is_token(nid):
                if nid == self.langlet.token.NAME:
                    name = self.Varnames[self._id % len(self.Varnames)]
                    tokstream.append([nid, name])
                    self._id += 1
                else:
                    tokstream.append([
                        nid,
                        self.tokgen.gen_token_string(nid + SYMBOL_OFFSET)
                    ])
            else:
                seg = self.segtree[nid:self.langlet.token.NAME]
                if seg:
                    S, P = proj_segment(seg)
                    for t in P:
                        if t == self.langlet.token.NAME:
                            tokstream.append([
                                t,
                                self.langlet.get_node_name(
                                    S if S != 0 else nid)
                            ])
                        elif is_keyword(t):
                            tokstream.append(
                                [t, self.langlet.get_node_name(t)[4:]])
                        else:
                            tokstream.append([
                                t,
                                self.tokgen.gen_token_string(t + SYMBOL_OFFSET)
                            ])
                else:
                    nt_traces = self.token_traces.get(nid, [])
                    if nt_traces:
                        idx = self._cnt % len(nt_traces)
                        self._cnt += 1
                        tokstream += nt_traces[idx]
                    else:
                        if nid in visited:
                            return
                        else:
                            visited.add(nid)
                            for i, st in enumerate(self.state_traces[nid][:]):
                                tr = self._compute_token_trace(st, visited)
                                if tr:
                                    tokstream += tr
                                    del self.state_traces[nid][i]
                                    tt = self.token_traces.get(nid, [])
                                    tt.append(tr)
                                    self.token_traces[nid] = tt
                                    break
                                else:
                                    return
                            visited.remove(nid)
        return tokstream