def lr_is_legal(self, items): # 判别lr是否合法 has_protocol = 0 # 是否存在规约项目 has_shift = 0 # 是否存在移进项目 for item in items: for it in item: ind = it.index(self.dot) + 1 if ind == len(it): if has_protocol != 0 or has_shift != 0: return False has_protocol = 1 else: if is_terminal(it[ind]): has_shift = 1 return True
def get_v(self): for n_css in self.grams: k, v = n_css[0], n_css[1:] self.v_n.append(k) self.v_s.append(k) for w in v: if is_terminal(w): self.v_t.append(w) else: self.v_n.append(w) self.v_s.append(w) self.v_s = sorted(set(self.v_s)) self.v_t = sorted(set(self.v_t)) # 分析时遇到 # 表示接受 self.v_t.append('#') self.v_n = sorted(set(self.v_n)) # 映射 self.v_t_2_int = {v: i for i, v in enumerate(self.v_t)} self.v_n_2_int = {v: i for i, v in enumerate(self.v_n)}
def get_CLOSURE(self, tmp): # 生成闭包 CLOSURE = [] for it in tmp: # I的任何项目都属于CLOSURE(I) if it not in CLOSURE: CLOSURE.append(it) # 分离出点后的字母 ind = it.index(self.dot) + 1 if ind == len(it): continue v = it[ind] # 若v 属于非终结符 if not is_terminal(v): # 获得 A->.r r:任意字符串 res = self.get_VN_gram(v) for re in res: if re not in CLOSURE: CLOSURE.append(re) tmp.append(re) return CLOSURE
def get_lr_table(self, items): # 构建lr分析表 self.init_lr_table(items) # hf = self.lr_is_legal(items) # if not hf: # print("不合法") # # exit() for i, item in enumerate(items): yi_set = set() fo_set_list = list() ind_to_not_t = list() for it in item: ind = it.index(self.dot) + 1 # 即dot是否在产生式末尾 if ind == len(it): # 判断是否写入ACTION # if it == ['s_', t_unit, dot]: # self.ACTION[i][-1] = "acc" inde = self.find_gram(it) if inde != -1: for k in range(len(self.ACTION[i])): # 进行规约 r k v[inde] if self.ACTION[i][k]: print('簇:{:>05d} 遇到 {:18s} 保留前者:{}'.format( i, self.v_t[k], self.ACTION[i][k])) # print('发生冲突族:{:>06d} info:移进:规约 {:}'.format(i,it)) # if self.ACTION[i][k][0] == 's': # if not yi_set: # yi_set, fo_set_list, ind_to_not_t = self._is_solvable(item) # flag = self.conflict_check(self.v_t[k], yi_set, fo_set_list) # if flag == -2: # print(i, self.v_t[k]) # print('保留移进:', self.ACTION[i][k]) # else: # raise Exception('rdfgdr') # 若移进-规约冲突 归约-归约冲突 else: self.ACTION[i][k] = ['r', inde] else: next_item = self.go(item, it[ind]) # print("go(%s, %s)-->%s" % (str(item), y[0], str(next_item))) inde = self.is_inItems(next_item, items) y = it[ind] if inde != -1: # 判断是否写入GOTO if is_terminal(y): # 表示没有发生冲突, flag = -2 j = self.v_t.index(y) # 发生冲突 if self.ACTION[i][j]: print('发生冲突族:{:>06d} info:移进'.format(i)) # 没有判断此项目族是否可解决 # if not yi_set: # yi_set, fo_set_list, ind_to_not_t = self._is_solvable(item) # flag = self.conflict_check(y, yi_set, fo_set_list) # if flag == -2: # if yi_set: # print('解决冲突族:{:>06d} info:移进'.format(i)) # print('解决冲突族:{:>06d} info:移进'.format(i)) # if self.v_t[j] in ['(']: # flag = 1 self.ACTION[i][j] = ['s', inde] # else: # raise Exception('sdfd') else: # 非终结符的坐标 j = self.v_n.index(y) self.GOTO[i][j] = inde else: raise Exception("go不来") self.print_lt_table()
def analysis(self, input_seq: list, t_unit, sema_analysis=False): """ :param input_seq: [(1,0,const,120,CONST)] :param t_unit:str ":param token : cur_line cur_col val typ id VAL :return: """ # 建立分析树开始节点 start = TreeNode('0', t_unit) # 初始化分析栈 anal_seq = [TreeNode('-1', '#'), start] # 将‘#’加入输入串 input_seq.append(['#']) # 弹出栈顶原数到a input_node = input_seq.pop(0) a = input_node[-1] # 标志是否发生错误 error = list() if sema_analysis: # 同时进行语义分析 semantic_analysis = SemanticAnalysis() self.br = False while True: # 弹出分析栈底元素到x tree_node = anal_seq.pop() x = tree_node.value if x == '#': break if is_terminal(x): # 分析结束 if x == a: # 匹配上终结符,修改,eg:'INT'->100 tree_node.value = input_node[2] if sema_analysis: # 匹配上终结符,进行语义分析 semantic_analysis.analysis_terminal(input_node) # 匹配上终结符,取出下一个输入符号 input_node = input_seq.pop(0) a = input_node[-1] else: # # 添加错误 # 当前输入符号与栈顶终结符不匹配,即该符号应该是x if input_node[-1] == '#': continue error.append( tuple([ len(error), input_node.cur_line, input_node.cur_col, input_node.val, x, True ])) # 回复 # if self.br: # input_node = input_seq.pop(0) # a = input_node[-1] # self.br = False # 保留 x # anal_seq.append(tree_node) # input_node = input_seq.pop(0) # a = input_node[-1] else: if sema_analysis: # 语义分析非终结符, semantic_analysis.analysis_non_terminal( x, len(anal_seq) - 1) # 查分析表 if a in self.g_dict[x].first: # 反序压入栈中 anal_seq.extend( add_tree_node(tree_node, self.g_dict[x].first[a])[::-1]) elif 'EMPTY' in self.g_dict[x].first: # 匹配空 if a in self.g_dict[x].follow: add_tree_node(tree_node, ['Σ']) else: # print(a, self.g_dict[x].follow) # 发生错误检查,跳到能匹配的位置 if input_node[-1] == '#': continue error.append( tuple([ len(error), input_node.cur_line, input_node.cur_col, input_node.val, a, False ])) input_node = self.error_deal(input_node, x, tree_node, anal_seq, input_seq) a = input_node[-1] else: error.append( tuple([ len(error), input_node.cur_line, input_node.cur_col, input_node.val, a, False ])) input_node = self.error_deal(input_node, x, tree_node, anal_seq, input_seq) a = input_node[-1] fu_table = {} if sema_analysis: fu_table = { 'fun': semantic_analysis.fun_table, 'con': semantic_analysis.constant_table, 'var': semantic_analysis.variable_table } for i in semantic_analysis.error: print(i) error.extend([tuple(i) for i in semantic_analysis.error]) # 将错误转换 error = sorted(list(set(error)), key=lambda x: x[0]) error_info_list = [] for err in error: i, c, l, v, x, zq = err if zq: error_info_list.append( '第{:^3d}行第{:^3d}列 符号{:^7}前缺失符号{:^7}'.format( int(c) + 1, int(l), v, x)) else: error_info_list.append('第{:^3d}行第{:^3d}列 符号{:^7}附近存在错误'.format( int(c) + 1, int(l), v)) return start, fu_table, self._fi(error_info_list)
def _get_follow(self): # 遍历所有非终结符的产生式 for k, v in self.g_dict.items(): for candidate in v.candidate_list: # 若候选式中仅有一个单词,一定是非终结符 if len(candidate) == 1: if not is_terminal(candidate[0]): self.g_dict[candidate[0]].follow.extend(v.follow) else: # 得到候选式的逆序 temp = candidate[::-1] if not is_terminal(temp[0]): # 定义flag,避免 S ->ABD B-->EMPTY D!-->EMPTY flag = True # S - > ABD 即folow(D) +=follow(S) self.g_dict[temp[0]].follow.extend(v.follow) temp1 = temp[0] for i in temp[1:]: # 当前i是终结符,则跳过 if is_terminal(i): temp1 = i flag = False else: # S-> ABD 即folow(A) += first(BD/EMPTY) # folow(B) +=First(D/EMPTY) # folow(A) ++FIRST(B/EMPTY) if not is_terminal(temp1): # 过滤掉FIRST中的EMPTY self.g_dict[i].follow.extend( filter( lambda x: x != 'EMPTY', self.g_dict[temp1].first.keys())) # S->ABD D-->EMPTY folow(B)+=follow(S) if flag and ('EMPTY' in self.g_dict[temp1].first): self.g_dict[i].follow.extend(v.follow) else: # 避免 S ->ABD B-->EMPTY D!-->EMPTY flag = False # S->AaB follow(A) += a else: # 去除单引号 if temp1[0] == "'": self.g_dict[i].follow.append(temp1[1]) else: self.g_dict[i].follow.append(temp1) temp1 = i # S->ABa else: temp1 = temp[0] for i in temp[1:]: # 是终结符,跳过 if is_terminal(i): temp1 = i else: # S->ABa follow(B) += a if is_terminal(temp1): # 去除单引号 if temp1[0] == "'": self.g_dict[i].follow.append(temp1[1]) else: self.g_dict[i].follow.append(temp1) else: # S->ABa follow(A) += first(B/EMPTY) self.g_dict[i].follow.extend( filter( lambda x: x != 'EMPTY', self.g_dict[temp1].first.keys())) temp1 = i
def get_first(self): # A->aT or A->EMPTY for non_t, sponser in self.g_dict.items(): # 获得候选式 candidate_list = sponser.candidate_list # print('cal:', candidate_list) for candidate in candidate_list: # 获得每个候选式的首单词 word = candidate[0] if is_terminal(word) or word == 'EMPTY': sponser.first[word] = candidate else: sponser.first_relation[candidate] = None # A->X1X2X3a for non_t, sponser in self.g_dict.items(): # 获得候选关系表 first_relation = sponser.first_relation ks = [c for c in first_relation.keys()] for candidate in ks: # 计数,截断 count = 0 # 是否停止查找 flag = True for word in candidate: count += 1 # 非结符,找其候选式是否存在EMPTY if not is_terminal(word): for can in self.g_dict[word].candidate_list: if can[0] == 'EMPTY': flag = False break else: flag = False if flag: break first_relation[candidate] = count # 依靠FIRST关系表,将其关系表中的word的FIRST集中元素加入其中 for non_t, sponser in self.g_dict.items(): first_relation = sponser.first_relation # 对每个候选式递归地求first for candidate, count in first_relation.items(): canc = list(candidate[:count]) for word in canc: if not is_terminal(word): # 对应非终结符的FIRST集 first = self.g_dict[word].first # 终结符 ->产生式 first: dict for k in first: if k == "EMPTY": continue if k not in sponser.first: sponser.first[k] = candidate # 将对应非终结符的first关系链表加入此产生式对应的关系表中 for k, v in self.g_dict[word].first_relation.items(): for w in k[:v]: if w not in canc: canc: list canc.append(w) else: sponser.first[word] = candidate