def inputsentence_analysis(inputsentence):
    post_data = "sentence=" + inputsentence
    url = urllib2.urlopen("http://barbar.cs.lth.se:8081/parse", data=post_data)
    returncode = url.getcode()
    content = url.read()

    #if returncode != 200:
    #       print >> log,'NLP server error (problem processing)'

    #print content
    #print type(content)

    content = content.split('\n')
    sent = ""
    #result={}
    #SemPar = 0
    #NoSbj = 0
    #print "data is", repr(content)
    #print content

    for row in content:
        table = row.split('\t')
        #sent.append([table[0], table[1], table[2], table[4],  table[5], table[6], table[8], table[10], table[12], table[13], "\n"])
        sent += table[0] + "\t" + table[1] + "\t" + table[2] + "\t" + table[
            4] + "\t" + table[5] + "\t" + table[6] + "\t" + table[
                8] + "\t" + table[10] + "\t" + table[12] + "\t" + table[
                    13] + "\n"
    #sent+=table[0]+"\t"+table[1]+"\n"
    print sent
    dg = DependencyGraph(sent)
    tree = dg.tree()
    print tree.pprint()
    #print(dg)
    print(dg.to_conll(4))
Example #2
0
    def _parse(self, t):
        dg = DependencyGraph()
        i = 0
        for line in t.splitlines():
            if line[0] in "*+":
                # start of bunsetsu or tag

                cells = line.strip().split(" ", 3)
                m = re.match(r"([\-0-9]*)([ADIP])", cells[1])

                assert m is not None

                node = dg.nodes[i]
                node.update({"address": i, "rel": m.group(2), "word": []})

                dep_parent = int(m.group(1))

                if dep_parent == -1:
                    dg.root = node
                else:
                    dg.nodes[dep_parent]["deps"].append(i)

                i += 1
            elif line[0] != "#":
                # normal morph
                cells = line.strip().split(" ")
                # convert cells to morph tuples
                morph = cells[0], " ".join(cells[1:])
                dg.nodes[i - 1]["word"].append(morph)

        if self.morphs2str:
            for node in dg.nodes.values():
                node["word"] = self.morphs2str(node["word"])

        return dg.tree()
Example #3
0
    def _parse(self, t):
        dg = DependencyGraph()
        i = 0
        for line in t.splitlines():
            if line[0] in '*+':
                # start of bunsetsu or tag

                cells = line.strip().split(" ", 3)
                m = re.match(r"([\-0-9]*)([ADIP])", cells[1])

                assert m is not None

                node = dg.nodes[i]
                node.update({'address': i, 'rel': m.group(2), 'word': []})

                dep_parent = int(m.group(1))

                if dep_parent == -1:
                    dg.root = node
                else:
                    dg.nodes[dep_parent]['deps'].append(i)

                i += 1
            elif line[0] != '#':
                # normal morph
                cells = line.strip().split(" ")
                # convert cells to morph tuples
                morph = cells[0], ' '.join(cells[1:])
                dg.nodes[i - 1]['word'].append(morph)

        if self.morphs2str:
            for node in dg.nodes.values():
                node['word'] = self.morphs2str(node['word'])

        return dg.tree()
def parse(sent):
    con_parse, = con_parser.raw_parse(sent)
    dep_parse, = dep_parser.raw_parse(sent)

    print()
    print("Constituency Tree:")
    con_parse.pretty_print()

    dg = DependencyGraph(dep_parse.to_conll(4))
    print()
    print("Dependency Tree:")
    dg.tree().pprint()

    print()
    print("Dependencies:")
    for governor, dependency, dependent in dg.triples():
        print(governor, dependency, dependent)
Example #5
0
    def _parse(self, t):
        dg = DependencyGraph()
        i = 0
        for line in t.splitlines():
            if line.startswith("*") or line.startswith("+"):
                # start of bunsetsu or tag

                cells = line.strip().split(" ", 3)
                m = re.match(r"([\-0-9]*)([ADIP])", cells[1])

                assert m is not None

                node = dg.nodelist[i]
                node['address'] = i
                node['rel'] = m.group(2)  # dep_type

                node['word'] = []

                dep_parent = int(m.group(1))

                while len(dg.nodelist) < i + 1 or len(
                        dg.nodelist) < dep_parent + 1:
                    dg.nodelist.append({'word': [], 'deps': []})

                if dep_parent == -1:
                    dg.root = node
                else:
                    dg.nodelist[dep_parent]['deps'].append(i)

                i += 1
            elif not line.startswith("#"):
                # normal morph
                cells = line.strip().split(" ")
                # convert cells to morph tuples
                morph = (cells[0], ' '.join(cells[1:]))
                dg.nodelist[i - 1]['word'].append(morph)

        if self.morphs2str:
            for node in dg.nodelist:
                node['word'] = self.morphs2str(node['word'])

        return dg.tree()
Example #6
0
    def _parse(self, t):
        dg = DependencyGraph()
        i = 0
        for line in t.splitlines():
            if line.startswith("*") or line.startswith("+"):
                # start of bunsetsu or tag

                cells = line.strip().split(" ", 3)
                m = re.match(r"([\-0-9]*)([ADIP])", cells[1])

                assert m is not None

                node = dg.nodelist[i]
                node['address'] = i
                node['rel'] = m.group(2)  # dep_type

                node['word'] = []

                dep_parent = int(m.group(1))

                while len(dg.nodelist) < i+1 or len(dg.nodelist) < dep_parent+1:
                    dg.nodelist.append({'word':[], 'deps':[]})

                if dep_parent == -1:
                    dg.root = node
                else:
                    dg.nodelist[dep_parent]['deps'].append(i)

                i += 1
            elif not line.startswith("#"):
                # normal morph
                cells = line.strip().split(" ")
                # convert cells to morph tuples
                morph = ( cells[0], ' '.join(cells[1:]) )
                dg.nodelist[i-1]['word'].append(morph)

        if self.morphs2str:
            for node in dg.nodelist:
                node['word'] = self.morphs2str(node['word'])

        return dg.tree()
Example #7
0
    def par(self, infilm, outfilm):
        input_data = open(infilm, 'r', encoding='utf-8')
        output_data = open(outfilm, 'w+', encoding='utf=8')
        for line in input_data.readlines():
            line = line.strip()
            # 分词
            words = self.segmentor.segment(line)
            # self.segmentor.load_with_lexicon('lexicon')  # 使用自定义词典,lexicon外部词典文件路径
            print('分词:' + '\t'.join(words))

            # 词性标注
            postags = self.postagger.postag(words)
            print('词性标注:' + '\t'.join(postags))

            # 句法分析
            arcs = self.parser.parse(words, postags)
            rely_id = [arc.head for arc in arcs]  # 提取依存父节点id
            relation = [arc.relation for arc in arcs]  # 提取依存关系
            heads = ['Root' if id == 0 else words[id - 1]
                     for id in rely_id]  # 匹配依存父节点词语

            output_data.write(line)
            output_data.write('\n')
            output_data.write('句法分析:')
            par_result = ''
            for i in range(len(words)):
                if arcs[i].head == 0:
                    arcs[i].relation = "ROOT"
                par_result += "\t" + words[i] + "(" + arcs[
                    i].relation + ")" + "\t" + postags[i] + "\t" + str(
                        arcs[i].head) + "\t" + arcs[i].relation + "\n"
                output_data.write(relation[i] + '(' + words[i] + ', ' +
                                  heads[i] + ')' + '\n')
            print(par_result)
            conlltree = DependencyGraph(par_result)  # 转换为依存句法图
            tree = conlltree.tree()  # 构建树结构
            tree.draw()  # 显示输出的树
            output_data.write('\n')
        input_data.close()
        output_data.close()
Example #8
0
\end{dependency}
\end{CJK}
\end{document}
'''
else:
    print >>fout,r'''\begin{tikzpicture}'''
    i = -1
    dep_str = ''
    wids = []
    wlens,xpos = [0],[0]
    for s in open(parse_file):
        if len(s.strip()) == 0:
            i += 1
            if i == line_num:
                dg = DependencyGraph(dep_str)
                tree = dg.tree()
                if flag == '0':
                    h = tree.height()
                    traverse(tree,h,0)
                    for k,w in wids:
                        print >>fout,'\\node(m{}) at({},{}) {{{}}};'.format(k,0.6*xpos[k],0,w)
                        print >>fout,'\\node at({},{}) {{{}}};'.format(0.6*xpos[k],-0.5,k-1)
                        print >>fout,'\\draw[dotted] (m{}) -- (n{});'.format(k,k)
                else:
                    print >>fout,tree.pprint_latex_qtree()
                break
            dep_str = ''
            wids = []
            wlens,xpos = [0],[0]
        else:
            s = s.split()