def clickThey(): global d_key global pa rep_talk() return pa += 1 act.configure(text='批刷' + str(pa) + '词') f = '' if platform.system() == 'Windows': f = open("d:/YYY/词林动物1.txt", 'r', encoding='UTF-8') else: f = open("/home/ubuntu/DaiMeng/data/xmlData/词林动物1.txt", 'r', encoding='UTF-8') words = f.readlines() for word in words: word = word.strip('\n').replace('\ufeff','') trans_json(request_(words)) phsen_ana = get_phsen() Ds_scan() fast_scan() ph_und = trans_ltp.get_thisund() for key in ph_und.Semdic: t1.insert(INSERT, key + '扫描::' + str(ph_und.Semdic[key]) + '\n')
def clickIt(): global d_key global ra ra += 1 act.configure(text = '随机' + str(ra) +'词') if e.get() == '': ra_wo = test_random() else: ra_wo = [e.get().split(' ')[0],get_explans(e.get().split(' ')[0])[0]] t1.delete('1.0', 'end') t1.mark_set(INSERT , '2.1') wo_ex = (ra_wo[0] + ':' + ra_wo[1].strip(' ')).replace('.','=').replace(',','!').replace("|",'').replace(',','!') trans_json (request_(wo_ex)) t1.insert(INSERT, wo_ex + '\n') phsen_ana = get_phsen() Ds_scan() fast_scan() hownet_words_list = get_hownet_words(r'/home/ubuntu/DaiMeng/data/xmlData/整理.txt') b = [i for i in hownet_words_list if i.word == ra_wo[0]] for c in b: t_A.insert(INSERT,c.word + c.gc + c.DEF + '\n') print(c.word, c.gc, c.DEF) sn = 0 #字典返回解释后,0句0词的信息加载为d_key wr_in ='' for sen in phsen_ana: print(sen.sen_mean) if sn == 0: set_d_key(wo = sen.w_anas[0].wo ,pos = sen.w_anas[0].pos,syn=sen.w_anas[0],cla = sen.w_anas[0].cla) sen_result = sentence_match_result(sen.w_anas,js_cla='字典') if sen_result: t1.insert(INSERT, '内涵知识:' + str(sen_result[-1]) + '\n') if ra_wo[0].find(':') == -1: wr_in += ',' + str(sen_result[-1]) sn += 1 if wr_in != '': formatok = True cc = 0 while cc < len(ra_wo[0]): #检查节点名,不能有特殊字符 if '._!|/'.find(ra_wo[0][cc]) > -1: formatok = False break cc += 1 if formatok: d_key = get_d_key() Gn_audit(d_key, wr_in.strip(',')) ph_und = trans_ltp.get_thisund() for key in ph_und.Semdic: t1.insert(INSERT, key + '扫描::' + str(ph_und.Semdic[key]) + '\n')
def rep_talk(input='我们出门买橡皮呗'): trans_json (request_(input)) phsen_ana = get_phsen() for sen in phsen_ana: sen_result = sentence_match_result(sen.w_anas) exp_result = experience_mean_result(sen) dsent_list = [] lsent_list =[] if (sen_result): trans_ltp.thinking_str += '这句注意=' + sen.sen_in + '意味着=' + str(sen_result[-1]) dsent_list.append(sen_result.pop(0)) lsent_list.append(sen_result.pop(0)) phsen_ana[i].l_form = lsent_list phsen_ana[i].d_form = dsent_list if exp_result: phsen_ana[i].exp_mean = exp_result break # 这里干啥忘了,不敢删除那就跑一轮吧 Ds_scan() fast_scan() test_dic = {} ss = 0 ph_und = trans_ltp.get_thisund() Has_Qu = False # 疑问句指示 print(trans_ltp.thinking_str) for sen in phsen_ana: disp_wo = '' if_Q = False Q_if = False disp_wo += sen.sen_in for w in sen.w_anas: if len(disp_wo) < 500: disp_wo += (w.wo + '::' + w.yuf + '::' + w.pos + '::' + w.wgr + ' :: ' + w.rel + '\n') if w.pos == '疑问词': if_Q = True phsen_ana[ss].sen_mean = phsen_ana[ss].sen_mean.replace('=' + w.wo, '=疑问=' + w.wo) # 做上疑问标记,=号方便劈开 elif w.wo == '?': Q_if = True test_dic['句子sn=' + str(ss)] = sen.sen_in + '\n' + sen.sen_mean + '\n' test_dic['词汇sn=' + str(ss)] = disp_wo if if_Q and Q_if: phsen_ana[ss].env_mean += ',句式=疑问' Has_Qu = True ss += 1 if len(phsen_ana)>1: phase_ana() # 这里是段落理解的演示,单句无效 ph_und = trans_ltp.get_thisund() print('tttttttttt'+ thinking_str) if '回复' in test_dic: test_dic['回复'] += Thlog_ana(ph_und.rec[1]) else: test_dic['回复'] = Thlog_ana(ph_und.rec[1]) else: # 这里是分类知识库的搜多,仅限单句 Thlog_ana(myexp_ana()) ph_und = trans_ltp.get_thisund() if '回复' in test_dic: test_dic['回复'] += Thlog_ana(myexp_ana()) else: test_dic['回复'] = Thlog_ana(myexp_ana()) if '回复' in test_dic: test_dic['回复'] += Thlog_ana(myexp_ana(no='我的日记')) else: test_dic['回复'] = Thlog_ana(myexp_ana(no='我的日记')) test_dic['扫描'] = str(ph_und.Semdic) if len(test_dic['回复']) < 3 : ph_und = trans_ltp.get_thisund() for key in ph_und.Semdic: if key == 'V' : for vv in ph_und.Semdic[key]: if vv.find('sn='): continue thinking_str += '重要的'+ vv + '\n' test_dic['回复'] += Thlog_ana() print(ph_und.Semdic) print('***************') print(test_dic['回复']) test_dic['回复']=test_dic['回复'].replace('\n\n','\n').replace('\n','\nΨ').strip('Ψ') test_dic['回复']=test_dic['回复'].replace('\n\n','\n').replace('\n','\nΨ').strip('Ψ') return test_dic
def clickMe(): global ti global ssi ti += 1 inp = e.get() ssi += len(re.split(r'[,。!?;]|\.\.\.',inp)) -1 trans_json(request_(inp)) t_nlu.member.append(inp) # 做输入/对话的记录 phsen_ana = get_phsen() act.configure(text = str(ti) + '段:' + str(len(phsen_ana)) + '句') t0.delete('1.0','end') t1.delete('1.0','end') t_A.delete('1.0','end') t0.mark_set(INSERT , '2.1') t1.mark_set(INSERT , '2.1') t_A.mark_set(INSERT , '2.1') i = 0 Has_Qu = False sn = 0 for sen in phsen_ana: if sn == 0: set_d_key(wo = sen.w_anas[0].wo ,pos = sen.w_anas[0].pos,syn=sen.w_anas[0],cla = sen.w_anas[0].cla) sen_result = sentence_match_result(sen.w_anas) exp_result = experience_mean_result(sen) dsent_list = [] lsent_list =[] if (sen_result): trans_ltp.thinking_str += '这句注意=' + sen.sen_in + '意味着=' + str(sen_result[-1]) dsent_list.append(sen_result.pop(0)) lsent_list.append(sen_result.pop(0)) phsen_ana[i].l_form = lsent_list phsen_ana[i].d_form = dsent_list if exp_result: phsen_ana[i].exp_mean = exp_result t0.insert(INSERT,sen.sen_mean + '\n') t0.insert(INSERT,str(lsent_list) + '\n') t0.insert(INSERT,str(dsent_list) + '\n') t0.insert(INSERT,str(exp_result) + '\n') sn += 1 Ds_scan() fast_scan() for sen in phsen_ana: disp_wo = '' if_Q =False Q_if =False disp_wo += sen.sen_in for w in sen.w_anas: if len(disp_wo) < 1200: disp_wo +=(w.wo + '::' + w.yuf + '::'+w.pos + '::' + w.wgr + ' :: ' +w.cla + '::' + w.rel + '\n') if w.pos=='疑问词': if_Q = True phsen_ana[i].sen_mean = phsen_ana[i].sen_mean.replace('='+ w.wo,'=疑问='+w.wo) #做上疑问标记,=号方便劈开 elif w.wo == '?': Q_if = True t1.insert(INSERT,disp_wo) if if_Q and Q_if: phsen_ana[i].env_mean += ',句式=疑问' t0.insert(INSERT,'句式=疑问') Has_Qu = True i += 1 ph_und = trans_ltp.get_thisund() if Has_Qu: Q_res = Query_ana(phsen_ana) t_A.insert(INSERT, str(Q_res) + '\n') for key in ph_und.Semdic: t1.insert(INSERT, key + '扫描::' + str(ph_und.Semdic[key]) +'\n') phase_ana() if len(phsen_ana) > 0: t_A.insert(INSERT, Thlog_ana()) t_A.insert(INSERT, Thlog_ana(myexp_ana())) t_A.insert(INSERT, Thlog_ana(myexp_ana(no='我的日记'))) if 'UND' not in t_nlu.features: t_nlu.features['UND'] = {} t_nlu.features['UND'][inp] = ph_und # 等完成后,一起收了,注意这是个二维队列,与ph_und平行的还能添加更多的后续分析 print('ssi' * 5 + ' ' + str(ssi)) if ssi > 4: t_A.insert(INSERT, Thlog_ana(rep_ph_ana(t_nlu))) # 对话段落每5句再做一次分析。 ssi = 0
def fast_scan(js_cla='fast', sxml=None): # 面向phsen_ana,对所有句子对短小属性做快速、可重复的匹配。已支持@as,@C,@Y,双重变量 global this_und root = sxml # 添加表达2之外其他描述句式的选择,要送出句式集合节点的上一层 if not root: root = ET.parse(r'/home/ubuntu/DaiMeng/data/xmlData/表达2.xml').getroot() this_und = trans_ltp.get_thisund() Yf_ele = ['主语', 'V', '状语'] for ele in Yf_ele: this_und.Semdic[ele] = '' for js_no in root.iter(js_cla): # 增加的第一层是分类标签,一般只有0号有效,所以跑一次就可以了 ss = 0 for sen in this_und.phsen_ana: Yf_str = sen.sen_mean.replace('::', ',').split(',') for ele in Yf_ele: sen_ele = '' for ys in Yf_str: if ys.find(ele + '=') > -1: sen_ele += ',' + ys if sen_ele != '': this_und.Semdic[ele] += 'sn=' + str( ss) + sen_ele + '\n' # 每句scan后写一行 matrec_dic = {} #记录每个主题中该句子各词语匹配情况 for node in js_no.iter('句式'): logic_express = node.find('逻辑句式').text sem_t = None if '主题' in node.attrib: sem_t = node.attrib['主题'] sen_sem = '' for ch_no in node.iter('描述句式'): if '逻辑' in ch_no.attrib: logic_express = ch_no.attrib[ '逻辑'] # 在特殊情况下,使用单句有效的逻辑句式,用于截胡后边 if sem_t != None: if not sem_t in this_und.Semdic: this_und.Semdic[sem_t] = '' des_str = ch_no.text.split(',') as_pars = [] # @as参数组的列表 as_par = ['wo', ''] # @as匹配参数的元组 if_as = False if ch_no.text.find('@as') > -1: dd = 0 for des in des_str: # 记录装载本次des句式中包含的as变量位置和参数 if des[:3] == '@as': if_as = True as_pai = des.split('.') if int( as_pai[1] ) == -1: # 如果是公用的dicexpl_key则单独装载。(-1表示) d_key = get_d_key() if as_pai[2] == 'wo': des_str[dd] = d_key.wo elif as_pai[2] == 'pos': des_str[dd] = '@P.' + d_key.pos elif as_pai[2] == 'syn': des_str[dd] = d_key.syn.split('+')[0] elif as_pai[2] == 'cla': cat_cla = d_key.cla.split( '+')[0].split('.')[-1] cat_cla = re.sub('\d+', '', cat_cla) cat_cla = re.sub('[=#@]+', '', cat_cla) des_str[dd] = '@G.in(' + cat_cla[ cat_cla.find('_') + 1:] + ')' # 只取对应目标的第0个分类的最后一截。 # @as变量的1号位置是跟随的des句式元素序号,从0开始;2号位置是跟随的内容(分类,词性或者词本身),dd是本身的位置。 else: as_par = [dd, int(as_pai[1]), as_pai[2]] as_pars.append(as_par) dd += 1 j = 0 w = 0 dista = 0 par_C = '' mat_dic = {} B_C = -1 # @C变量开关 last_mat = -1 # 上一个匹配好的w_anas位置 while j < len(des_str): # 以描述句式为基准进行匹配 if w > len(sen.w_anas) - 1: break mat_this = False mat_des = False s_mat = des_str[j].replace('opt', '') if des_str[j].find('@C') > -1: # 不管@C函数 B_C = w if des_str[j].split('.')[1] == 'wo': par_C = sen.w_anas[w].wo if j == len(des_str) - 1: # 最后一项 @C全收 ww = w + 1 while ww < len(sen.w_anas): par_C += '+' + sen.w_anas[ww].wo ww += 1 B_C = -1 last_mat = len(sen.w_anas) - 1 j += 1 w += 1 continue elif des_str[j].find('@Y.') > -1: # 依存句法的relate匹配 for ww in range(w, len( sen.w_anas)): # 在写@Y变量时要考虑到前后秩序 if sen.w_anas[ww].pos == '标点': break # 由wgr词组判断是否关联 一级直连的放行,因为前边只能是HED if_rel = ('_' + sen.w_anas[ww].wgr + '_').find( '_' + sen.w_anas[last_mat].wo + '_') > -1 if j == 0: if_rel = True elif 'HEDCOV'.find(sen.w_anas[ww].yuf) > -1: if_rel = True # 这里要求前后的两项有语法依存关系,HED关联的那一层关系无效,不能跨越标点使用 # print(sen.w_anas[ww].yuf + '++++' + cat_str('@Y.', des_str[j], '+')) if if_rel and sen.w_anas[ww].yuf == cat_str( '@Y.', des_str[j] + '+', '+'): # 会漏一个字符 if des_str[j].find('+') == -1: this_und.phsen_ana[ss].w_anas[ w].ele = '+' + des_str[j] elif element_match( sen.w_anas[ww], des_str[j].split('+')[-1]): this_und.phsen_ana[ss].w_anas[ w].ele = '+' + des_str[j] else: continue matrec_dic[w] = des_str[j] mat_dic[des_str[j]] = sen.w_anas[ww].wo mat_this = True last_mat = ww break if mat_this: # @Y变量从这里输出了 if j == len(des_str) - 1: t_sem = str( logic_sentence(mat_dic, logic_express)).replace( '\'', '') this_und.phsen_ana[ss].w_anas[ last_mat].ele = t_sem #以此记录匹配上的词语位置 if sen_sem.find( t_sem.split('=')[-1].strip( ']')) == -1: sen_sem += ',' + t_sem if len(sen.w_anas) - w > len( des_str): # 后边够长时可以重复匹配 j = 0 w = max(w, last_mat) + 1 continue else: break else: j += 1 w = max(w, last_mat) + 1 continue else: j = 0 w += 1 continue if element_match(this_und.phsen_ana[ss].w_anas[w], s_mat): last_mat = w this_und.phsen_ana[ss].w_anas[ w].ele = '+' + des_str[j] matrec_dic[w] = des_str[j] mat_dic[des_str[j]] = sen.w_anas[w].wo if if_as: for a_par in as_pars: # 每一项匹配成功后,查找后边有没有as项追随自己,发现后按各参数执行替换。 if int(a_par[1]) == j: if a_par[2] == 'v': des_str[int( a_par[0])] = des_sent_list[j] elif a_par[2] == 'wo': des_str[int( a_par[0])] = sen.w_anas[w].wo elif a_par[2] == 'pos': des_str[int( a_par[0] )] = '@P.' + sen.w_anas[w].pos elif a_par[2] == 'cla': des_str[int( a_par[0] )] = '@G.in' + sen.w_anas[ w].cla # if_as复原.cla if B_C > -1: mat_dic[des_str[j - 1]] = par_C B_C = -1 if j == len(des_str) - 1: mat_des = True else: dista = 1 j += 1 w += 1 continue elif B_C > -1: par_C += '+' + sen.w_anas[w].wo w += 1 continue elif des_str[j].find('opt') > -1: j += 1 continue elif dista > 0 and '助词副词形容词数词'.find( sen.w_anas[w].pos) > -1: dista += -1 w += 1 continue elif len( sen.w_anas) - w > len(des_str): # 后边够长时可以也重复匹配 if if_as: des_str = ch_no.text.split(',') # if_as复原 mat_dic[des_str[j]] = '' j = 0 w += 1 continue elif not mat_des: break if mat_des: if if_as: des_str = des_str = ch_no.text.split( ',') # if_as复原 # 每句在字典栏目中加写一行 t_sem = str(logic_sentence(mat_dic, logic_express)).replace( '\'', '') if this_und.phsen_ana[ss].w_anas[ last_mat].ele == None: this_und.phsen_ana[ss].w_anas[ last_mat].ele = t_sem else: this_und.phsen_ana[ss].w_anas[ last_mat].ele += '\n' + t_sem if sen_sem.find(t_sem.split('=')[-1]) == -1: sen_sem += ',' + t_sem if len(sen.w_anas) - w > len( des_str): # 后边够长时可以重复匹配 j = 0 w += 1 continue else: break if sen_sem != '': this_und.Semdic[sem_t] += 'sn=' + str(ss) + sen_sem + '\n' if sen_sem.find('发送信息=') > -1: if sen.sen_in[-1] != '”': sss = ss + 1 while sss < len(this_und.phsen_ana): this_und.Semdic[sem_t] += 'sn=' + str( sss) + ',信息={' + this_und.phsen_ana[ sss].sen_in.replace(' ', '+') + '\n' sss += 1 if this_und.phsen_ana[sss].sen_in[-1] == '”': break ss += 1 ele_sts(this_und.Semdic) break this_und.rec[0] = trans_ltp.input_a this_und.rec[1] = trans_ltp.thinking_str trans_ltp.set_thisund(ph=this_und) return this_und
def rep_talk(input='我们出门买橡皮呗'): trans_ltp.thinking_str = '' global ssi ssi += len(re.split(r'[,。!?;]|\.\.\.',input)) -1 trans_json(request_(input)) t_nlu.member.append(input) # 做输入/对话的记录 if len(input)>4: if input[:4] == '@古诗#': print(input[4:]) phase_json.phase_json_ana(input[4:], idiom = True) print(trans_ltp.thinking_str) trans_json (request_(input)) phsen_ana = get_phsen() for sen in phsen_ana: sen_result = sentence_match_result(sen.w_anas) exp_result = experience_mean_result(sen) dsent_list = [] lsent_list =[] if (sen_result): trans_ltp.thinking_str += '这句注意=' + sen.sen_in + '意味着=' + str(sen_result[-1]) dsent_list.append(sen_result.pop(0)) lsent_list.append(sen_result.pop(0)) phsen_ana[i].l_form = lsent_list phsen_ana[i].d_form = dsent_list if exp_result: phsen_ana[i].exp_mean = exp_result break # 这里干啥忘了,不敢删除那就跑一轮吧 Ds_scan() fast_scan() test_dic = {} ss = 0 ph_und = trans_ltp.get_thisund() Has_Qu = False # 疑问句指示 print(trans_ltp.thinking_str) for sen in phsen_ana: disp_wo = '' if_Q = False Q_if = False disp_wo += sen.sen_in for w in sen.w_anas: if len(disp_wo) < 500: disp_wo += (w.wo + '::' + w.yuf + '::' + w.pos + '::' + w.wgr + ' :: ' + w.rel + '\n') if w.pos == '疑问词': if_Q = True phsen_ana[ss].sen_mean = phsen_ana[ss].sen_mean.replace('=' + w.wo, '=疑问=' + w.wo) # 做上疑问标记,=号方便劈开 elif w.wo == '?': Q_if = True test_dic['句子sn=' + str(ss)] = sen.sen_in + '\n' + sen.sen_mean + '\n' test_dic['词汇sn=' + str(ss)] = disp_wo if if_Q and Q_if: phsen_ana[ss].env_mean += ',句式=疑问' Has_Qu = True ss += 1 if len(phsen_ana)>1: print(321,phsen_ana) phase_ana() # 这里是段落理解的演示,单句无效 ph_und = trans_ltp.get_thisund() # print('tttttttttt'+ trans_ltp.thinking_str) if '回复' in test_dic: test_dic['回复'] += Thlog_ana(ph_und.rec[1]) else: test_dic['回复'] = Thlog_ana(ph_und.rec[1]) #print('***************') #print(test_dic['回复']) else: # 这里是分类知识库的搜多,仅限单句 Thlog_ana(myexp_ana()) ph_und = trans_ltp.get_thisund() if '回复' in test_dic: test_dic['回复'] += Thlog_ana(myexp_ana()) else: test_dic['回复'] = Thlog_ana(myexp_ana()) #print('********2*******') #print(test_dic['回复']) if '回复' in test_dic: trans_ltp.thinking_str = '' test_dic['回复'] += Thlog_ana(myexp_ana(no='我的日记')) else: test_dic['回复'] = Thlog_ana(myexp_ana(no='我的日记')) print('********3*******') print(test_dic['回复']) test_dic['扫描'] = str(ph_und.Semdic) if len(test_dic['回复']) < 3 : ph_und = trans_ltp.get_thisund() for key in ph_und.Semdic: if key == 'V' : for vv in ph_und.Semdic[key]: if vv.find('sn='): continue thinking_str += '重要的'+ vv + '\n' test_dic['回复'] += Thlog_ana() print(ph_und.Semdic) if 'UND' not in t_nlu.features: t_nlu.features['UND'] = {} t_nlu.features['UND'][input] = ph_und # 等完成后,一起收了,注意这是个二维队列,与ph_und平行的还能添加更多的后续分析 print('ssi' * 5 + ' ' + str(ssi)) if ssi > 4: test_dic['回复'] += Thlog_ana(rep_ph_ana(t_nlu)) # 对话段落每5句再做一次分析。 ssi = 0 test_dic['回复']=test_dic['回复'].replace('\n\n','\n').replace('\n','\nΨ').strip('Ψ') print('******4*****') print(test_dic['回复']) return test_dic