def __init__(self, start_symbol, grammar, lr_type=0): self.grammar = grammar self.start_symbol = start_symbol self.state_sets = [] self.edges = {} self.ids = {} self.todo = [] self.done = set() self.maybe_compatible = {} self.goto_time = 0 self.add_time = 0 self.closure_time = 0 self.closure_count = 0 self.addcount = 0 self.weakly = 0 self.weakly_count = 0 self.mergetime = 0 helper = Helper(grammar) self.helper = helper if lr_type == LR0: self.closure = helper.closure_0 self.goto = helper.goto_0 self.start_set = StateSet([LR0Element(Production(None, [self.start_symbol]), 0)]) elif lr_type == LR1 or lr_type == LALR: self.closure = helper.closure_1 self.goto = helper.goto_1 self.start_set = StateSet() self.start_set.add(LR0Element(Production(None, [self.start_symbol]), 0), set([FinishSymbol()]))
def goto_1(grammar, state_set, symbol): result = StateSet() for state in state_set: s = state.next_symbol() if s == symbol: new_state = state.clone() new_state.d += 1 result.add(new_state) return closure_1(grammar, result)
def goto_0(self, state_set, symbol): result = StateSet() for state in state_set.elements: s = state.next_symbol() if s == symbol: new_state = state.clone() new_state.d += 1 result.add(new_state) return self.closure_0(result)
def goto_1(self, state_set, symbol): try: self.goto_count[(id(state_set), symbol)] += 1 except KeyError: self.goto_count[(id(state_set), symbol)] = 1 print("goto", state_set, symbol, self.goto_count[(id(state_set), symbol)]) result = StateSet() for state in state_set: s = state.next_symbol() if s == symbol: new_state = state.clone() new_state.d += 1 result.add(new_state) print("goto END") return self.closure_1(result)
def closure_1(grammar, state_set): assert False result = StateSet() # Step 1 for state in state_set.elements: result.add(state) # Step 2 for state in result: symbol = state.next_symbol() if isinstance(symbol, Nonterminal): f = set() for l in state.lookahead: betaL = [] betaL.extend(state.remaining_symbols()) betaL.append(l) f |= old2_first(grammar, betaL) alternatives = grammar[symbol].alternatives for a in alternatives: # create epsilon symbol if alternative is empty if a == []: a = [Epsilon()] p = Production(symbol, a) s = LR1Element(p, 0, f) if a == [epsilon]: s.d = 1 result.add(s) # merge states that only differ in their lookahead result.merge() return result
def closure_0(grammar, state_set): result = StateSet() # 1) Add state_set to it's own closure for state in state_set.elements: result.add(state) # 2) If there exists an LR-element with a Nonterminal as its next symbol # add all production with this symbol on the left side to the closure for state in result: symbol = state.next_symbol() if isinstance(symbol, Nonterminal): alternatives = grammar[symbol].alternatives for a in alternatives: # create epsilon symbol if alternative is empty if a == []: a = [epsilon] p = Production(symbol, a) s = State(p, 0) if a == [epsilon]: s.d = 1 result.add(s) return result
class StateGraph(object): def __init__(self, start_symbol, grammar, lr_type=0): self.grammar = grammar self.start_symbol = start_symbol self.state_sets = [] self.edges = {} self.ids = {} self.todo = [] self.done = set() self.maybe_compatible = {} self.goto_time = 0 self.add_time = 0 self.closure_time = 0 self.closure_count = 0 self.addcount = 0 self.weakly = 0 self.weakly_count = 0 self.mergetime = 0 helper = Helper(grammar) self.helper = helper if lr_type == LR0: self.closure = helper.closure_0 self.goto = helper.goto_0 self.start_set = StateSet([LR0Element(Production(None, [self.start_symbol]), 0)]) elif lr_type == LR1 or lr_type == LALR: self.closure = helper.closure_1 self.goto = helper.goto_1 self.start_set = StateSet() self.start_set.add(LR0Element(Production(None, [self.start_symbol]), 0), set([FinishSymbol()])) def build(self): State._hashtime = 0 start = time() start_set = self.start_set closure = start_set self.state_sets.append(closure) self.ids[closure] = 0 _id = 0 self.todo.append(_id) while self.todo: self.addcount += 1 _id = self.todo.pop() self.done.add(_id) closure_start = time() state_set = self.closure(self.state_sets[_id]) self.closure_count += 1 closure_end = time() self.closure_time += closure_end - closure_start new_gotos = {} goto_start = time() # create new sets first, then calculate closure for lrelement in state_set.elements: symbol = lrelement.next_symbol() if not symbol: # state is final continue #XXX optimisation: create all configurations before building new_element = lrelement.clone() new_element.d += 1 new_element_la = state_set.get_lookahead(lrelement) stateset = new_gotos.setdefault(symbol, StateSet()) stateset.add(new_element, new_element_la) # now calculate closure and add result to state_sets goto_end = time() self.goto_time += goto_end - goto_start for ss in new_gotos: new_state_set = new_gotos[ss] add_start = time() self.add(_id, ss, new_state_set) add_end = time() self.add_time += add_end - add_start end = time() logging.info("add time %s", self.add_time) logging.info("closure time %s", self.closure_time) logging.info("closure time helper %s", self.helper.closure_time) logging.info("goto time %s", self.goto_time) logging.info("hashtime %s", StateSet._hashtime) logging.info("addcount %s", self.addcount) logging.info("states %s", len(self.state_sets)) logging.info("weakly %s", self.weakly) logging.info("weakly count %s", self.weakly_count) logging.info("mergetime %s", self.mergetime) # apply closure logging.info("Apply closure to states") clstart = time() new_state_sets = [] new_ids = {} for state in self.state_sets: _id = self.ids[state] new_state = self.closure(state) new_state_sets.append(new_state) new_ids[new_state] = new_state self.state_sets = new_state_sets logging.info("after closure %s", len(new_state_sets)) logging.info("edges %s", len(set(self.edges.values()))) self.ids = new_ids logging.info(time() - clstart) logging.info("Finished building Stategraph in %s", end-start) self.closure = None self.goto = None def weakly_compatible(self, s1, s2): self.weakly_count += 1 core = s1.elements if core != s2.elements: return False if len(core) == 1: return True self.weakly -= time() core = list(core) for i in range(0, len(core)-1): I = core[i] for j in range(i+1, len(core)): J = core[j] if ((s1.lookaheads[I] & s2.lookaheads[J] or s1.lookaheads[J] & s2.lookaheads[I]) and not s1.lookaheads[I] & s1.lookaheads[J] and not s2.lookaheads[I] & s2.lookaheads[J]): self.weakly += time() return False self.weakly += time() return True def find_stateset_without_lookahead(self, state_set): for ss in self.state_sets: if state_set.equals(ss, True): return ss return None def merge_lookahead(self, old, new): self.mergetime -= time() changed = False for element in new.elements: la1 = new.get_lookahead(element) la2 = old.get_lookahead(element) if la1 - la2: changed = True new_la = la2 | la1 old.lookaheads[element] = new_la self.mergetime += time() return changed def add(self, from_id, symbol, state_set): merged = False #for candidate in self.state_sets: # only check states that can be reached by symbol for _id in self.maybe_compatible.setdefault(symbol,set()): candidate = self.state_sets[_id] if self.weakly_compatible(state_set, candidate): # merge them merged = True changed = self.merge_lookahead(candidate, state_set) self.edges[(from_id, symbol)] = _id if changed and _id in self.done: # move state to todo list self.todo.append(_id) #XXX only need to to that if this state is already done (moving not necessary if it hasn't been looked at anyway (e.g. state at the end of list) self.done.remove(_id) if not merged: # add normally and put on todo list self.state_sets.append(state_set) _id = len(self.state_sets)-1 self.edges[(from_id, symbol)] = _id self.ids[state_set] = _id self.todo.append(_id) # add to maybe compatible mc = self.maybe_compatible.setdefault(symbol, set()) mc.add(_id) def oldadd(self, from_id, symbol, state_set): # LALR way #ss = self.find_stateset_without_lookahead(state_set) #if ss: # #print("found existing stateset -> merging") # #print(ss) # #print(state_set) # self.merge_lookahead(ss, state_set) # _id = self.state_sets.index(ss) #else: # self.state_sets.append(state_set) # _id = len(self.state_sets)-1 #self.edges[(from_id, symbol)] = _id # normal LR(1) way add_start = time() _id = self.ids.get(state_set) if _id is None: # new state self.addcount += 1 self.state_sets.append(state_set) _id = len(self.state_sets)-1 self.ids[state_set] = _id self.todo.append(_id) self.edges[(from_id, symbol)] = _id add_end = time() self.add_time += add_end - add_start def follow(self, from_id, symbol): try: _id = self.edges[(from_id, symbol)] return _id except KeyError: return None def get_symbols(self): s = set() for _, symbol in self.edges.keys(): s.add(symbol) return s def get_state_set(self, i): return self.state_sets[i] def convert_lalr(self): removelist = set([]) l = len(self.state_sets) for i in range(l): if i in removelist: continue for j in range(l): if j in removelist: continue s1 = self.state_sets[i] s2 = self.state_sets[j] if s1 is not s2 and s1.equals(s2, False): for e in s2: s1.add(e) # this should automatically merge the lookahead of the states s1.merge() for key in self.edges: fromid, symbol = key to = self.edges[key] if fromid == j: fromid == i if to == j: to == i self.edges.pop(key) self.edges[(fromid, symbol)] = to removelist.add(j) l = list(removelist) l.sort() l.reverse() for j in l: self.state_sets.pop(j)
class StateGraph(object): def __init__(self, start_symbol, grammar, lr_type=0): self.grammar = grammar self.start_symbol = start_symbol self.state_sets = [] self.edges = {} self.ids = {} self.todo = [] self.done = set() self.maybe_compatible = {} self.goto_time = 0 self.add_time = 0 self.closure_time = 0 self.closure_count = 0 self.addcount = 0 self.weakly = 0 self.weakly_count = 0 self.mergetime = 0 helper = Helper(grammar) self.helper = helper if lr_type == LR0: self.closure = helper.closure_0 self.goto = helper.goto_0 self.start_set = StateSet([LR0Element(Production(None, [self.start_symbol]), 0)]) elif lr_type == LR1 or lr_type == LALR: self.closure = helper.closure_1 self.goto = helper.goto_1 self.start_set = StateSet() self.start_set.add(LR0Element(Production(None, [self.start_symbol]), 0), set([FinishSymbol()])) def build(self): State._hashtime = 0 start = time() start_set = self.start_set closure = start_set #closure = self.closure(start_set) self.state_sets.append(closure) self.ids[closure] = 0 _id = 0 self.todo.append(_id) while self.todo: self.addcount += 1 _id = self.todo.pop() self.done.add(_id) #print("id:", _id) closure_start = time() state_set = self.closure(self.state_sets[_id]) self.closure_count += 1 closure_end = time() self.closure_time += closure_end - closure_start #state_set = self.state_sets[_id] new_gotos = {} goto_start = time() # create new sets first, then calculate closure for lrelement in state_set.elements: symbol = lrelement.next_symbol() if not symbol: # state is final continue #XXX optimisation: create all configurations before building new_element = lrelement.clone() new_element.d += 1 new_element_la = state_set.get_lookahead(lrelement) stateset = new_gotos.setdefault(symbol, StateSet()) stateset.add(new_element, new_element_la) # now calculate closure and add result to state_sets goto_end = time() self.goto_time += goto_end - goto_start for ss in new_gotos: new_state_set = new_gotos[ss] #new_state_set = self.closure(new_gotos[ss]) add_start = time() self.add(_id, ss, new_state_set) add_end = time() self.add_time += add_end - add_start end = time() logging.info("add time %s", self.add_time) logging.info("closure time %s", self.closure_time) logging.info("closure time helper %s", self.helper.closure_time) logging.info("goto time %s", self.goto_time) logging.info("hashtime %s", StateSet._hashtime) logging.info("addcount %s", self.addcount) logging.info("states %s", len(self.state_sets)) logging.info("weakly %s", self.weakly) logging.info("weakly count %s", self.weakly_count) logging.info("mergetime %s", self.mergetime) #print("maybe", self.maybe_compatible) #for key in self.maybe_compatible: # print(key, len(self.maybe_compatible[key])) # apply closure logging.info("Apply closure to states") clstart = time() new_state_sets = [] new_ids = {} for state in self.state_sets: _id = self.ids[state] new_state = self.closure(state) new_state_sets.append(new_state) new_ids[new_state] = new_state self.state_sets = new_state_sets logging.info("after closure %s", len(new_state_sets)) logging.info("edges %s", len(set(self.edges.values()))) self.ids = new_ids logging.info(time() - clstart) logging.info("Finished building Stategraph in %s", end-start) self.closure = None self.goto = None def weakly_compatible(self, s1, s2): self.weakly_count += 1 core = s1.elements if core != s2.elements: return False if len(core) == 1: return True self.weakly -= time() core = list(core) for i in range(0, len(core)-1): I = core[i] for j in range(i+1, len(core)): J = core[j] if ((s1.lookaheads[I] & s2.lookaheads[J] or s1.lookaheads[J] & s2.lookaheads[I]) and not s1.lookaheads[I] & s1.lookaheads[J] and not s2.lookaheads[I] & s2.lookaheads[J]): self.weakly += time() return False self.weakly += time() return True def find_stateset_without_lookahead(self, state_set): for ss in self.state_sets: if state_set.equals(ss, True): return ss return None def merge_lookahead(self, old, new): self.mergetime -= time() changed = False #for e1 in new.elements: # for e2 in old.elements: # if e1 == e2: # compare without lookahead # #print("merging", e1, "and", e2) # if e1.lookahead - e2.lookahead: # changed = True # e2.lookahead |= e1.lookahead for element in new.elements: la1 = new.get_lookahead(element) la2 = old.get_lookahead(element) if la1 - la2: changed = True new_la = la2 | la1 old.lookaheads[element] = new_la self.mergetime += time() return changed def add(self, from_id, symbol, state_set): merged = False #for candidate in self.state_sets: # only check states that can be reached by symbol for _id in self.maybe_compatible.setdefault(symbol,set()): candidate = self.state_sets[_id] if self.weakly_compatible(state_set, candidate): # merge them merged = True changed = self.merge_lookahead(candidate, state_set) self.edges[(from_id, symbol)] = _id if changed and _id in self.done: # move state to todo list self.todo.append(_id) #XXX only need to to that if this state is already done (moving not necessary if it hasn't been looked at anyway (e.g. state at the end of list) self.done.remove(_id) if not merged: # add normally and put on todo list self.state_sets.append(state_set) _id = len(self.state_sets)-1 self.edges[(from_id, symbol)] = _id self.ids[state_set] = _id self.todo.append(_id) # add to maybe compatible mc = self.maybe_compatible.setdefault(symbol, set()) mc.add(_id) def oldadd(self, from_id, symbol, state_set): # LALR way #ss = self.find_stateset_without_lookahead(state_set) #if ss: # #print("found existing stateset -> merging") # #print(ss) # #print(state_set) # self.merge_lookahead(ss, state_set) # _id = self.state_sets.index(ss) #else: # self.state_sets.append(state_set) # _id = len(self.state_sets)-1 #self.edges[(from_id, symbol)] = _id # normal LR(1) way add_start = time() _id = self.ids.get(state_set) if _id is None: # new state self.addcount += 1 self.state_sets.append(state_set) _id = len(self.state_sets)-1 self.ids[state_set] = _id self.todo.append(_id) self.edges[(from_id, symbol)] = _id add_end = time() self.add_time += add_end - add_start def follow(self, from_id, symbol): try: _id = self.edges[(from_id, symbol)] return _id except KeyError: return None def get_symbols(self): s = set() for _, symbol in self.edges.keys(): s.add(symbol) return s def get_state_set(self, i): return self.state_sets[i] def convert_lalr(self): removelist = set([]) l = len(self.state_sets) for i in range(l): if i in removelist: continue for j in range(l): if j in removelist: continue s1 = self.state_sets[i] s2 = self.state_sets[j] if s1 is not s2 and s1.equals(s2, False): for e in s2: s1.add(e) # this should automatically merge the lookahead of the states s1.merge() for key in self.edges: fromid, symbol = key to = self.edges[key] if fromid == j: fromid == i if to == j: to == i self.edges.pop(key) self.edges[(fromid, symbol)] = to removelist.add(j) l = list(removelist) l.sort() l.reverse() for j in l: self.state_sets.pop(j)
def closure_1(self, state_set): la_dict = {} result = set() working_set = set() # Step 1 for element in state_set.elements: la_dict[element] = state_set.get_lookahead(element) result.add(element) working_set.add(element) # Step 2 i=0 temp = working_set while 1: newelements = set() for state in temp: if state.isfinal(): continue symbol = state.next_symbol() if isinstance(symbol, Nonterminal): f = set() for l in la_dict[state]:#state.lookahead: betaL = [] betaL.extend(state.remaining_symbols()) betaL.append(l) f |= self.first(betaL) alternatives = self.grammar[symbol].alternatives i = -1 for a in alternatives: i = i + 1 # create epsilon symbol if alternative is empty if a == []: a = [Epsilon()] p = Production(symbol, a, self.grammar[symbol].annotations[i], self.grammar[symbol].precs[i]) if self.grammar[symbol].inserts.has_key(i): insert = self.grammar[symbol].inserts[i] p.inserts[insert[0]] = insert[1] s = LR0Element(p, 0) if a == [epsilon]: s.d = 1 # NEW ELEMENT: # 1. completely new (+lookahead): add to result # 2. new lookahead: update lookahead in la_dict # -> add to new working set # 3. already known: ignore if s in result: if f.issubset(la_dict[s]): # lookahead in combination with state already known continue else: la_dict[s] |= f # new lookahead else: la_dict[s] = set(f) # completely new result.add(s) newelements.add(s) temp = newelements if len(temp) == 0: break i += 1 # add lookaheads final_result = StateSet() for element in result: final_result.add(element, la_dict[element]) return final_result