def inputsentence_analysis(inputsentence): post_data = "sentence=" + inputsentence url = urllib2.urlopen("http://barbar.cs.lth.se:8081/parse", data=post_data) returncode = url.getcode() content = url.read() #if returncode != 200: # print >> log,'NLP server error (problem processing)' #print content #print type(content) content = content.split('\n') sent = "" #result={} #SemPar = 0 #NoSbj = 0 #print "data is", repr(content) #print content for row in content: table = row.split('\t') #sent.append([table[0], table[1], table[2], table[4], table[5], table[6], table[8], table[10], table[12], table[13], "\n"]) sent += table[0] + "\t" + table[1] + "\t" + table[2] + "\t" + table[ 4] + "\t" + table[5] + "\t" + table[6] + "\t" + table[ 8] + "\t" + table[10] + "\t" + table[12] + "\t" + table[ 13] + "\n" #sent+=table[0]+"\t"+table[1]+"\n" print sent dg = DependencyGraph(sent) tree = dg.tree() print tree.pprint() #print(dg) print(dg.to_conll(4))
def _parse(self, t): dg = DependencyGraph() i = 0 for line in t.splitlines(): if line[0] in "*+": # start of bunsetsu or tag cells = line.strip().split(" ", 3) m = re.match(r"([\-0-9]*)([ADIP])", cells[1]) assert m is not None node = dg.nodes[i] node.update({"address": i, "rel": m.group(2), "word": []}) dep_parent = int(m.group(1)) if dep_parent == -1: dg.root = node else: dg.nodes[dep_parent]["deps"].append(i) i += 1 elif line[0] != "#": # normal morph cells = line.strip().split(" ") # convert cells to morph tuples morph = cells[0], " ".join(cells[1:]) dg.nodes[i - 1]["word"].append(morph) if self.morphs2str: for node in dg.nodes.values(): node["word"] = self.morphs2str(node["word"]) return dg.tree()
def _parse(self, t): dg = DependencyGraph() i = 0 for line in t.splitlines(): if line[0] in '*+': # start of bunsetsu or tag cells = line.strip().split(" ", 3) m = re.match(r"([\-0-9]*)([ADIP])", cells[1]) assert m is not None node = dg.nodes[i] node.update({'address': i, 'rel': m.group(2), 'word': []}) dep_parent = int(m.group(1)) if dep_parent == -1: dg.root = node else: dg.nodes[dep_parent]['deps'].append(i) i += 1 elif line[0] != '#': # normal morph cells = line.strip().split(" ") # convert cells to morph tuples morph = cells[0], ' '.join(cells[1:]) dg.nodes[i - 1]['word'].append(morph) if self.morphs2str: for node in dg.nodes.values(): node['word'] = self.morphs2str(node['word']) return dg.tree()
def parse(sent): con_parse, = con_parser.raw_parse(sent) dep_parse, = dep_parser.raw_parse(sent) print() print("Constituency Tree:") con_parse.pretty_print() dg = DependencyGraph(dep_parse.to_conll(4)) print() print("Dependency Tree:") dg.tree().pprint() print() print("Dependencies:") for governor, dependency, dependent in dg.triples(): print(governor, dependency, dependent)
def _parse(self, t): dg = DependencyGraph() i = 0 for line in t.splitlines(): if line.startswith("*") or line.startswith("+"): # start of bunsetsu or tag cells = line.strip().split(" ", 3) m = re.match(r"([\-0-9]*)([ADIP])", cells[1]) assert m is not None node = dg.nodelist[i] node['address'] = i node['rel'] = m.group(2) # dep_type node['word'] = [] dep_parent = int(m.group(1)) while len(dg.nodelist) < i + 1 or len( dg.nodelist) < dep_parent + 1: dg.nodelist.append({'word': [], 'deps': []}) if dep_parent == -1: dg.root = node else: dg.nodelist[dep_parent]['deps'].append(i) i += 1 elif not line.startswith("#"): # normal morph cells = line.strip().split(" ") # convert cells to morph tuples morph = (cells[0], ' '.join(cells[1:])) dg.nodelist[i - 1]['word'].append(morph) if self.morphs2str: for node in dg.nodelist: node['word'] = self.morphs2str(node['word']) return dg.tree()
def _parse(self, t): dg = DependencyGraph() i = 0 for line in t.splitlines(): if line.startswith("*") or line.startswith("+"): # start of bunsetsu or tag cells = line.strip().split(" ", 3) m = re.match(r"([\-0-9]*)([ADIP])", cells[1]) assert m is not None node = dg.nodelist[i] node['address'] = i node['rel'] = m.group(2) # dep_type node['word'] = [] dep_parent = int(m.group(1)) while len(dg.nodelist) < i+1 or len(dg.nodelist) < dep_parent+1: dg.nodelist.append({'word':[], 'deps':[]}) if dep_parent == -1: dg.root = node else: dg.nodelist[dep_parent]['deps'].append(i) i += 1 elif not line.startswith("#"): # normal morph cells = line.strip().split(" ") # convert cells to morph tuples morph = ( cells[0], ' '.join(cells[1:]) ) dg.nodelist[i-1]['word'].append(morph) if self.morphs2str: for node in dg.nodelist: node['word'] = self.morphs2str(node['word']) return dg.tree()
def par(self, infilm, outfilm): input_data = open(infilm, 'r', encoding='utf-8') output_data = open(outfilm, 'w+', encoding='utf=8') for line in input_data.readlines(): line = line.strip() # 分词 words = self.segmentor.segment(line) # self.segmentor.load_with_lexicon('lexicon') # 使用自定义词典,lexicon外部词典文件路径 print('分词:' + '\t'.join(words)) # 词性标注 postags = self.postagger.postag(words) print('词性标注:' + '\t'.join(postags)) # 句法分析 arcs = self.parser.parse(words, postags) rely_id = [arc.head for arc in arcs] # 提取依存父节点id relation = [arc.relation for arc in arcs] # 提取依存关系 heads = ['Root' if id == 0 else words[id - 1] for id in rely_id] # 匹配依存父节点词语 output_data.write(line) output_data.write('\n') output_data.write('句法分析:') par_result = '' for i in range(len(words)): if arcs[i].head == 0: arcs[i].relation = "ROOT" par_result += "\t" + words[i] + "(" + arcs[ i].relation + ")" + "\t" + postags[i] + "\t" + str( arcs[i].head) + "\t" + arcs[i].relation + "\n" output_data.write(relation[i] + '(' + words[i] + ', ' + heads[i] + ')' + '\n') print(par_result) conlltree = DependencyGraph(par_result) # 转换为依存句法图 tree = conlltree.tree() # 构建树结构 tree.draw() # 显示输出的树 output_data.write('\n') input_data.close() output_data.close()
\end{dependency} \end{CJK} \end{document} ''' else: print >>fout,r'''\begin{tikzpicture}''' i = -1 dep_str = '' wids = [] wlens,xpos = [0],[0] for s in open(parse_file): if len(s.strip()) == 0: i += 1 if i == line_num: dg = DependencyGraph(dep_str) tree = dg.tree() if flag == '0': h = tree.height() traverse(tree,h,0) for k,w in wids: print >>fout,'\\node(m{}) at({},{}) {{{}}};'.format(k,0.6*xpos[k],0,w) print >>fout,'\\node at({},{}) {{{}}};'.format(0.6*xpos[k],-0.5,k-1) print >>fout,'\\draw[dotted] (m{}) -- (n{});'.format(k,k) else: print >>fout,tree.pprint_latex_qtree() break dep_str = '' wids = [] wlens,xpos = [0],[0] else: s = s.split()