def cal_firstVt(gram_list: list,Vn_list:list,Vt_list:list) -> 'firstVt_dict': """计算每一个终结符的firstVt--{'a': set({'abc''})}""" # 初始化每一个终结符的firstVt集 firstVt_dict = {} for v in Vn_list: firstVt_dict[v] = set() while True: # 循坏开始前,记录每个非终结符的firstvt集的长度 len_list1 = len_dict(firstVt_dict) for gene in gram_list: # 产生式的右部含有终结符 if has_Vt(gene): # 找出首个终结符 first_vt = list(filter(lambda x: not isupper(x),gene[1]))[0] firstVt_dict[gene[0]].add(first_vt) # 产生式右部为空的情况 elif gene[1] == '': pass else: # 产生式的右部不含终结符 # 找出首个非终结符 first_vn = list(filter(lambda x: isupper(x),gene[1]))[0] for char in firstVt_dict[first_vn]: firstVt_dict[gene[0]].add(char) # 循坏开始后,记录每个非终结符的firstvt集的长度 len_list2 = len_dict(firstVt_dict) # 如果每个非终结符的firstvt集都没有再变化,则计算完毕 if len_list1 == len_list2: break print('firstVt集为:',firstVt_dict) return firstVt_dict
def cal_select(gene_list: list, first_dict: dict, follow_dict: dict) -> 'select_dict': """计算每一个产生式的select集(字典结构):{'A->abc': set{'abc'},}""" # 初始化select集 select_dict = {} # for gene in gene_list: # temp_gene = '->'.join(gene) # select_dict[temp_gene] = set() for gene in gene_list: temp_gene = '->'.join(gene) select_dict[temp_gene] = set() # 产生式右部为空 if gene[1] == '': for char in follow_dict[gene[0]]: select_dict[temp_gene].add(char) # 产生式右部第一个符号为终结符 elif islower(gene[1][0]): select_dict[temp_gene].add(gene[1][0]) # 产生式右部第一个符号为非终结符 elif isupper(gene[1][0]): for v in gene[1]: # 找到产生式右部首个一定不为空的符号的位置 # 初始化为最后一个符号 not_null_index = len(gene[1]) - 1 for i in range(1, len(gene[1])): v = gene[1][i] # 首个Vn不能推出空,或者能推出空后面的首个Vt if isupper(v) and '' not in first_dict[v] or islower(v): not_null_index = i break # 产生式的右部可推出空 if isupper(gene[1][not_null_index]) and not_null_index == len( gene[1]) - 1 and '' in first_dict[gene[1] [not_null_index]]: for m in (0, len(gene[1])): select_dict[temp_gene].add(first_dict[gene[1][m]]) select_dict[temp_gene] -= {''} select_dict[temp_gene].add(follow_dict[gene[0]]) # 产生式的右部不可推出空 else: for n in range(0, not_null_index + 1): if islower(gene[1][n]): select_dict[temp_gene].add(gene[1][n]) else: if '' in first_dict[gene[1][n]]: for char in first_dict[gene[1][n]]: select_dict[temp_gene].add(char) select_dict[temp_gene] -= {''} else: for char in first_dict[gene[1][n]]: select_dict[temp_gene].add(char) print('select集为:', select_dict) return select_dict
def classify_status(gram_list: list,firstVt_dict,lastVt_dict) -> '[[set(),'>',set()]]': """计算终结符之间的关系""" status_list = [] for gene in gram_list: if len(gene[1]) >= 2: for i in range(0,len(gene[1]) - 1): if not isupper(gene[1][i]) and not isupper(gene[1][i + 1]): temp_set1 = set() temp_set2 = set() temp_set1.add(gene[1][i]) temp_set2.add(gene[1][i + 1]) temp_list = [] temp_list.append(temp_set1) temp_list.append('=') temp_list.append(temp_set2) status_list.append(temp_list) elif not isupper(gene[1][i]) and isupper(gene[1][i + 1]): temp_set1 = set() temp_set2 = set() temp_set1.add(gene[1][i]) temp_list = [] temp_list.append(temp_set1) temp_list.append('<') temp_list.append(firstVt_dict[gene[1][i + 1]]) # print('temp_list:',temp_list) # print('status_list',status_list) status_list.append(temp_list) # print('status_list',status_list) elif isupper(gene[1][i]) and not isupper(gene[1][i + 1]): temp_set1 = set() temp_set2 = set() temp_set2.add(gene[1][i + 1]) temp_list = [] temp_list.append(lastVt_dict[gene[1][i]]) temp_list.append('>') temp_list.append(temp_set2) status_list.append(temp_list) if len(gene[1]) >= 3: for i in range(0,len(gene[1]) - 2): # print(gene[1][i]) if not isupper(gene[1][i]) and isupper(gene[1][i + 1]) and not isupper(gene[1][i + 2]): temp_set1 = set() temp_set2 = set() temp_set1.add(gene[1][i]) temp_set2.add(gene[1][i + 2]) temp_list = [] temp_list.append(temp_set1) temp_list.append('=') temp_list.append(temp_set2) status_list.append(temp_list) print('各终结符之间的优先关系:') for status in status_list: print(str(status)) return status_list
def has_Vt(gene: tuple)->bool: """#判断产生式的右部是否含有终结符""" temp_vt_list = list(filter(lambda x: not isupper(x),gene[1])) if temp_vt_list != []: return True else: return False
def find_V(grammer_list: list) -> 'Vn_list,Vt_list': """找出文法中的所有非终结符和终结符""" # 非终结符列表 Vn_list = [] Vn_set = set() # 终结符列表 Vt_list = [] Vt_set = set() # 所有符号的列表 total_list = [] for i in grammer_list: # 将元组转为列表 temp_list = list(i) # 将所有列表转为一个列表 total_list.extend(temp_list) # 将列表转为字符串 total_str = ''.join(total_list) # 将所有符号组成的字符串转为列表中的单个字符 for char in total_str: # 筛选出字符列表中的终结符和非终结符 if isupper(char): Vn_set.add(char) else: Vt_set.add(char) Vn_list = list(Vn_set) Vt_list = list(Vt_set) print('Vn为:',Vn_list) print('Vt为:', Vt_list) return Vn_list,Vt_list
def cal_first(gene_list: list, Vn_list: list) -> 'first_dict': """计算每一个非终结符first集(字典结构){'A': set{'abc'},}""" # 为所有的非终结符初始化first集 first_dict = {} for Vn in Vn_list: first_dict[Vn] = set() while True: # 记录每轮循环前的first集的大小 len_list1 = len_dict(first_dict) # 遍历每一条产生式,计算first集 for gene in gene_list: if gene[1] == '': first_char = '' else: first_char = gene[1][0] # print(first_char) if first_char in Vt_list: first_dict[gene[0]].add(first_char) # and len(gene[1]) == 1 elif first_char == '' and len(gene[1]) == 0: first_dict[gene[0]].add('') elif first_char in Vn_list: # 找到产生式右部首个一定不为空的符号的位置 # 初始化为最后一个符号 not_null_index = len(gene[1]) - 1 for i in range(1, len(gene[1])): v = gene[1][i] # 首个Vn不能推出空,或者能推出空后面的首个Vt if isupper(v) and '' not in first_dict[v] or islower(v): not_null_index = i break # 计算该情况下的first集 for i in range(0, not_null_index + 1): if islower(gene[1][i]): first_dict[gene[0]].add(gene[1][i]) else: for char in first_dict[gene[1][i]]: if '' in first_dict[gene[1][i]]: first_dict[gene[0]].add(char) first_dict[gene[0]] -= {''} else: first_dict[gene[0]].add(char) # 记录每轮循环后的first集的大小 len_list2 = len_dict(first_dict) # 判断first是否增大,若全未增大,则循环结束 if len_list1 == len_list2: break print('first集为:', first_dict) return first_dict
def cal_lastVt(gram_list: list,Vn_list,Vt_list) -> 'lastVt_dict': """计算每一个终结符的lastVt--{'a': {'abc''}}""" # 初始化每一个终结符的lastVt集 lastVt_dict = {} for v in Vn_list: lastVt_dict[v] = set() while True: # 循坏开始前,记录每个非终结符的lastVt集的长度 len_list1 = len_dict(lastVt_dict) for gene in gram_list: # 产生式的右部含有终结符 if has_Vt(gene): # 找出最后一个终结符 temp_list = list(filter(lambda x: not isupper(x), gene[1])) last_vt = temp_list[len(temp_list) - 1] lastVt_dict[gene[0]].add(last_vt) # 产生式右部为空的情况 elif gene[1] == '': pass else: # 产生式的右部不含终结符 # 找出最后一个非终结符 temp_list = list(filter(lambda x: isupper(x), gene[1])) last_vn = temp_list[len(temp_list) - 1] for char in lastVt_dict[last_vn]: lastVt_dict[gene[0]].add(char) # 循坏开始后,记录每个非终结符的lastVt集的长度 len_list2 = len_dict(lastVt_dict) # 如果每个非终结符的lastVt集都没有再变化,则计算完毕 if len_list1 == len_list2: break print('lastVt集为:', lastVt_dict) return lastVt_dict
def checkUpper(input): if input == "Đ" or input == "Ư": return True else: return isupper(input)
def FMMBMM(self,sentence): sentence = self.standardlizeString(sentence) vw = sentence.strip().split(' ') n = len(vw) q = [] strVWS = " |" for i in range(0, n+1): q.append('') w = '' vt1 = [] i = 0 j = 0 k = 0 a = [] for i in range(0, n+1): a.append(0.0) L = [[0]*(n + 1) for x in range(n + 1)] for i in range(0,n+1): for j in range(0,n+1): L[i][j]=self.maxval i = n - 1 while i >=0: w = vw[i] L[i][i+1] = 1.0 q[i+1] = w if i >0: k = i for j in reversed(range(0, k)): w = vw[j]+" "+w if self.td.get(w.lower(),0) > 0: L[j][k+1] = 1.0 /(k-j+1) q[k+1]=w i -= 1 a[0]=self.minval for i in range(1, n+1): for j in range(0, i): w = vw[j] if a[i] > (a[j] + L[j][i]): a[i] = a[j]+ L[j][i] for k in range(j+1, i): w = w + " "+ vw[k] q[i] = w i = n tpstrVWS='' while i > 0: vt1 = q[i].strip().split(' ') strVWS = " | " + q[i] + strVWS tpstrVWS = ' '+q[i].replace(' ','_') + tpstrVWS+' ' i = i - len(vt1) ttp = tpstrVWS.strip().split() listUpper = [] for index in range(0, len(ttp)): if isupper(ttp[index][0]) and ttp[index].find('_') <0: listUpper.append(index) listreplace = [] dicDau = {} for vv in group_consecutives(listUpper): if len(vv) >= 2: listreplace.append(vv) for v in vv[:-1]: dicDau[v]=1 result = "" for i in range(0, len(ttp)): result += ttp[i]+self.dau[dicDau.get(i,0)] return result.strip()
def cal_follow(gene_list: list, Vn_list: list, first_dict: dict) -> 'follow_dict': """计算每一个非终结符follow集(字典结构){'A': set{'abc'},}""" # 为每一个非终结符初始化follow集,初始化开始符号的follow集为# follow_dict = {} for Vn in Vn_list: follow_dict[Vn] = set() follow_dict['S'].add('#') while True: len_list1 = len_dict(follow_dict) # for Vn in Vn_list: for gene in gene_list: for i in range(0, len(gene[1])): # 指定的非终结符后还有其他符号 if isupper(gene[1][i]) and i < len(gene[1]) - 1: if islower(gene[1][i + 1]): follow_dict[gene[1][i]].add(gene[1][i + 1]) else: # 找到产生式右部的指定个非终结符后首个一定不为空的符号的位置 # 初始化为最后一个符号的位置 not_null_index = len(gene[1]) - 1 for j in range(i + 1, len(gene[1])): v = gene[1][j] # 首个Vn不能推出空,或者能推出空后面的首个Vt if isupper(v) and '' not in first_dict[ v] or islower(v): not_null_index = j break # 没找到产生式右部的指定个非终结符后首个一定不为空的符号 if isupper(gene[1] [not_null_index]) and not_null_index == len( gene[1]) - 1 and '' in first_dict[ gene[1][not_null_index]]: for m in range(i + 1, not_null_index + 1): for char in first_dict[gene[1][m]]: follow_dict[gene[1][i]].add(char) follow_dict[gene[1][i]] -= {''} for char in follow_dict[gene[0]]: follow_dict[gene[1][i]].add(char) # 找到了产生式右部的指定的非终结符后首个一定不为空的符号 else: # 计算该情况下的follow集 for n in range(i + 1, not_null_index + 1): if islower(gene[1][n]): follow_dict[gene[1][i]].add(gene[1][n]) else: if '' in first_dict[gene[1][n]]: for char in first_dict[gene[1][n]]: follow_dict[gene[1][i]].add(char) follow_dict[gene[1][i]] -= {''} else: for char in first_dict[gene[1][n]]: follow_dict[gene[1][i]].add(char) # 所求的非终结符位于产生式右部的最后一位 elif isupper(gene[1][i]) and i == len(gene[1]) - 1: for char in follow_dict[gene[0]]: follow_dict[gene[1][i]].add(char) len_list2 = len_dict(follow_dict) if len_list1 == len_list2: break print('follow集为:', follow_dict) return follow_dict