def inputsentence_analysis(inputsentence):
    post_data = "sentence=" + inputsentence
    url = urllib2.urlopen("http://barbar.cs.lth.se:8081/parse", data=post_data)
    returncode = url.getcode()
    content = url.read()

    #if returncode != 200:
    #       print >> log,'NLP server error (problem processing)'

    #print content
    #print type(content)

    content = content.split('\n')
    sent = ""
    #result={}
    #SemPar = 0
    #NoSbj = 0
    #print "data is", repr(content)
    #print content

    for row in content:
        table = row.split('\t')
        #sent.append([table[0], table[1], table[2], table[4],  table[5], table[6], table[8], table[10], table[12], table[13], "\n"])
        sent += table[0] + "\t" + table[1] + "\t" + table[2] + "\t" + table[
            4] + "\t" + table[5] + "\t" + table[6] + "\t" + table[
                8] + "\t" + table[10] + "\t" + table[12] + "\t" + table[
                    13] + "\n"
    #sent+=table[0]+"\t"+table[1]+"\n"
    print sent
    dg = DependencyGraph(sent)
    tree = dg.tree()
    print tree.pprint()
    #print(dg)
    print(dg.to_conll(4))
Esempio n. 2
0
    def _parse(self, t):
        dg = DependencyGraph()
        i = 0
        for line in t.splitlines():
            if line[0] in "*+":
                # start of bunsetsu or tag

                cells = line.strip().split(" ", 3)
                m = re.match(r"([\-0-9]*)([ADIP])", cells[1])

                assert m is not None

                node = dg.nodes[i]
                node.update({"address": i, "rel": m.group(2), "word": []})

                dep_parent = int(m.group(1))

                if dep_parent == -1:
                    dg.root = node
                else:
                    dg.nodes[dep_parent]["deps"].append(i)

                i += 1
            elif line[0] != "#":
                # normal morph
                cells = line.strip().split(" ")
                # convert cells to morph tuples
                morph = cells[0], " ".join(cells[1:])
                dg.nodes[i - 1]["word"].append(morph)

        if self.morphs2str:
            for node in dg.nodes.values():
                node["word"] = self.morphs2str(node["word"])

        return dg.tree()
Esempio n. 3
0
    def _parse(self, t):
        dg = DependencyGraph()
        i = 0
        for line in t.splitlines():
            if line[0] in '*+':
                # start of bunsetsu or tag

                cells = line.strip().split(" ", 3)
                m = re.match(r"([\-0-9]*)([ADIP])", cells[1])

                assert m is not None

                node = dg.nodes[i]
                node.update({'address': i, 'rel': m.group(2), 'word': []})

                dep_parent = int(m.group(1))

                if dep_parent == -1:
                    dg.root = node
                else:
                    dg.nodes[dep_parent]['deps'].append(i)

                i += 1
            elif line[0] != '#':
                # normal morph
                cells = line.strip().split(" ")
                # convert cells to morph tuples
                morph = cells[0], ' '.join(cells[1:])
                dg.nodes[i - 1]['word'].append(morph)

        if self.morphs2str:
            for node in dg.nodes.values():
                node['word'] = self.morphs2str(node['word'])

        return dg.tree()
Esempio n. 4
0
    def __init__(self, ldg=None):
        DependencyGraph.__init__(self)
        self.nodes = defaultdict(lambda: {'address': None,
                                          'ldg': 0,
                                          'gid': 1, #has the same value of the gid of nodes in ldg.
                                          'lemma': None,
                                          'head': None,
                                          'deps': defaultdict(int),
                                          'remaining_ops': defaultdict(list), #list(LgGraph.operator_dic.keys()),
                                          'ctag': None,
                                          'tag': None,
                                          'feats': None,
                                          })
        self.git_list = [1]
        self.nodes[0].update(
                        {'address': 0,
                         'head': -1,
                         'ldg': 'TOP',
                         'gid': 1, #has the same value of the gid of nodes in ldg.
                         'remaining_ops': defaultdict(list),
                         }
                    )
        if isinstance(ldg, LgGraph):
            self.nodes[0]['ldg'] = ldg

        if isinstance(ldg, GraphNet):
            self.nodes = ldg
            self.git_list = ldg.get_git_list()
Esempio n. 5
0
 def __init__(self, sent, top_relation_label):
     DependencyGraph.__init__(self,
                              sent,
                              top_relation_label=top_relation_label)
     self.words = [
         node['word'] for i, node in sorted(self.nodes.items())
         if node['tag'] != 'TOP'
     ]
def evald(prd_list, trg_file_path, directed=True):
    """Compute UAS score."""
    with open(trg_file_path, 'r') as trg_file:
        trg_string = trg_file.read().strip()
        trg_string_list = trg_string.split('\n\n')
        try:
            trg_list = [
                DependencyGraph(t, top_relation_label='root')
                for t in trg_string_list
            ]
        except ValueError:

            def extract_10_cells(cells, index):
                line_index, word, lemma, tag, _, head, rel, _, _, _ = cells
                try:
                    index = int(line_index)
                except ValueError:
                    # index can't be parsed as an integer, use default
                    pass
                return index, word, lemma, tag, tag, '', head, rel

            trg_list = [
                DependencyGraph(t,
                                top_relation_label='root',
                                cell_extractor=extract_10_cells)
                for t in trg_string_list
            ]

    correct = 0.0
    total = 0.0
    for prd, trg in zip(prd_list, trg_list):
        # assert len(prd.nodes) == len(trg.nodes)
        prd_deps = get_dep(prd, directed)
        trg_deps = get_dep(trg, directed)
        if len(prd_deps) != len(trg_deps):
            print(prd)
            print(prd_deps, len(prd_deps))
            print(trg)
            print(trg_deps, len(trg_deps))
            raise Exception

        for d in prd_deps:
            if d in trg_deps:
                correct += 1
        total += len(prd_deps)
    acc = correct / total

    if directed:
        print('DDA: %.3f' % acc)
    else:
        print('UDA: %.3f' % acc)
    return acc
Esempio n. 7
0
def read_conllx_triples(
    corpus_file
):  #='/tmp/autocorpus3/autocorpus3/static/Chinese_train_pos.xml.utf8.Chinese_medicine.segmented.conllx'):
    triples = []
    sents = concat([
        DependencyCorpusView(corpus_file,
                             tagged=False,
                             group_by_sent=True,
                             dependencies=True)
    ])
    for sent in sents:
        dg = DependencyGraph(sent, top_relation_label='root')
        triples += dg.triples()
    return triples
Esempio n. 8
0
    def sent_to_dggraph(self, sent, rootrel='PRED'):
        """
        Creates a Dependency Graph object from an AGLDT sentence
        Parameters
        ----------
        sent : list(named tuple)
            the AGLDT sentence
        Returns
        -------
        nltk.parse.DependencyGraph
        """

        from nltk.parse import DependencyGraph

        strsent = "\n".join([
            "{}\t{}\t{}\t{}".format(w.form, w.postag, w.head, w.relation)
            for w in sent
        ])
        for w in sent:
            if w.head == '0':
                rootrel = w.relation
                break
        g = DependencyGraph(strsent,
                            cell_separator="\t",
                            top_relation_label=rootrel)

        return g
Esempio n. 9
0
def test_oracle():
    """Make sure that the oracle is able to build the correct arcs in order"""
    graph_data = """\
word_1 tag_1 0 ROOT
word_2 tag_2 3 deprel_2
word_3 tag_3 5 deprel_3
word_4 tag_4 3 deprel_4
word_5 tag_5 1 deprel_5
"""
    graph = DependencyGraph(graph_data)
    pp = PartialParse(get_sentence_from_graph(graph))
    transition_ids = []
    while not pp.complete:
        transition_id, deprel = pp.get_oracle(graph)
        transition_ids.append(transition_id)
        pp.parse_step(transition_id, deprel)
    _test_arcs("oracle", pp, [(0, 1, 'ROOT'), (3, 2, 'deprel_2'),
                              (5, 3, 'deprel_3'), (3, 4, 'deprel_4'),
                              (1, 5, 'deprel_5')])
    ex_tids = [
        pp.shift_id,
        pp.shift_id,
        pp.shift_id,  # 0 1 2 3
        pp.left_arc_id,
        pp.shift_id,  # 0 1 3 4
        pp.right_arc_id,
        pp.shift_id,  # 0 1 3 5
        pp.left_arc_id,  # 0 1 5
        pp.right_arc_id,
        pp.right_arc_id,  # 0
    ]
    assert transition_ids == ex_tids, \
        "oracle test resulted in transitions {}, expected {}".format(
            transition_ids, ex_tids)
    print('oracle test passed!')
Esempio n. 10
0
	def tagged_parse_sents(self, sentences, verbose=False):
		"""
		>>> parser.parse(['من', 'به', 'مدرسه', 'رفته بودم', '.']).tree().pprint()
		'(رفته_بودم من (به مدرسه) .)'
		"""

		input_file = tempfile.NamedTemporaryFile(prefix='malt_input.conll', dir=self.working_dir, delete=False)
		output_file = tempfile.NamedTemporaryFile(prefix='malt_output.conll', dir=self.working_dir, delete=False)

		try:
			for sentence in sentences:
				for i, (word, tag) in enumerate(sentence, start=1):
					word = word.strip()
					if not word: word = '_'
					input_file.write(('\t'.join([str(i), word.replace(' ', '_'), self.lemmatize(word, tag).replace(' ', '_'), tag, tag, '_', '0', 'ROOT', '_', '_', '\n'])).encode('utf8'))
				input_file.write('\n\n'.encode('utf8'))
			input_file.close()

			cmd = ['java', '-jar', self._malt_bin, '-w', self.working_dir, '-c', self.mco, '-i', input_file.name, '-o', output_file.name, '-m', 'parse']
			if self._execute(cmd, verbose) != 0:
				raise Exception("MaltParser parsing failed: %s" % (' '.join(cmd)))

			return [DependencyGraph(item) for item in codecs.open(output_file.name, encoding='utf8').read().split('\n\n')]

		finally:
			input_file.close()
			os.remove(input_file.name)
			output_file.close()
			os.remove(output_file.name)
Esempio n. 11
0
    def to_dependendency_graph(self):
        """
        Returns the sentence as an NLTK dependency graph. Useful if you want to use the DOT
        visualization of the dependency graph or `triples` that returns
        all the dependency triplets of a sentence as tuples: (form_head, dependency_label, form_dependent).
        Refer to the class documentation of nltk.parse.DependencyGraph for more details.
        """

        d = {
            "labels(n)": "type",
            "n.form": "form",
            "n.pos": "pos",
            "n.head": "head",
            "n.original_label": "label"
        }
        c = self._query_sent(include_artificial=True, **d)
        s = ""
        try:
            top_lab = [
                d["Rel"] for d in self.dependents if d["Rel"] != "AuxK"
            ][0]
        except IndexError:
            # default to pred
            top_lab = "PRED"

        for e in c:
            if e["type"][0] == "Artificial":
                form = "*" + e["form"]
            else:
                form = e["form"]
            s = s + "{}\t{}\t{}\t{}\n".format(form, e["pos"], e["head"],
                                              e["label"])

        return DependencyGraph(s, top_relation_label=top_lab)
Esempio n. 12
0
def test_oracle2():
    """Make sure that the oracle is able to build the correct arcs in order"""
    graph_data = """\
Nadia PROPN 2 nsubj
rode VERB 0 ROOT
the DET 5 det
old ADJ 5 amod
donkey NOUN 2 dobj
with ADP 2 prep
dexterity NOUN 6 pobj
"""
    graph = DependencyGraph(graph_data)
    pp = PartialParse(get_sentence_from_graph(graph))
    transition_ids = []
    while not pp.complete:
        transition_id, deprel = pp.get_oracle(graph)
        transition_ids.append(transition_id)
        pp.parse_step(transition_id, deprel)
    _test_arcs("oracle", pp, [(2, 1, 'nsubj'), (0, 2, 'ROOT'), (5, 3, 'det'),
                              (5, 4, 'amod'), (2, 5, 'dobj'), (2, 6, 'prep'),
                              (6, 7, 'pobj')])
    ex_tids = [
        pp.shift_id, pp.shift_id, pp.left_arc_id, pp.shift_id, pp.shift_id,
        pp.shift_id, pp.left_arc_id, pp.left_arc_id, pp.right_arc_id,
        pp.shift_id, pp.shift_id, pp.right_arc_id, pp.right_arc_id,
        pp.right_arc_id
    ]
    assert transition_ids == ex_tids, \
        "oracle2 test resulted in transitions {}, expected {}".format(
            transition_ids, ex_tids)
    print('oracle2 test passed!')
Esempio n. 13
0
def read_dep_parses(inputfile, depfile, filename=' ', make_graph=False):
    fh = unzip_corpus(inputfile, depfile, True).splitlines()

    # list to store the results
    graphs = []

    # Read the lines containing the first parse.
    dep, qID = read_dep(fh, filename)

    # print(qID)
    # While there are more lines:
    # 1) create the DependencyGraph
    # 2) add it to our list
    # 3) try again until we're done
    while dep is not None:
        #creates dependency graph
        if (make_graph):
            graph = DependencyGraph(dep)
        else:
            graph = dep

        graphs.append((qID, graph))

        #removes lines that were present in dep
        n = len(dep.splitlines() + [qID]) + 1
        fh = fh[n:]

        dep, qID = read_dep(fh, filename)

    #returns a tuple (questionID, DependencyGraph)
    return graphs
def parse(sent):
    con_parse, = con_parser.raw_parse(sent)
    dep_parse, = dep_parser.raw_parse(sent)

    print()
    print("Constituency Tree:")
    con_parse.pretty_print()

    dg = DependencyGraph(dep_parse.to_conll(4))
    print()
    print("Dependency Tree:")
    dg.tree().pprint()

    print()
    print("Dependencies:")
    for governor, dependency, dependent in dg.triples():
        print(governor, dependency, dependent)
Esempio n. 15
0
def flattened_node_list(graph):
    """
    Takes an instance of DependencyGraph corresponding to a parsed sentence.
    Flattens into a list of DependencyGraph instances, each with a different
    word from the sentence as its root node (and no children).
    """
    nodelist = copy.copy(graph.nodelist[1:])
    flattened = []
    for node in nodelist:
        node["deps"] = []
        node["head"] = 0
        node["address"] = 1
        new_graph = DependencyGraph()
        new_graph.nodelist.append(node)
        new_graph.root = node
        flattened.append(new_graph)
    return flattened
Esempio n. 16
0
 def parsed_sents(self, fileids=None):
     sents = concat(
         [
             DependencyCorpusView(fileid, False, True, True, encoding=enc)
             for fileid, enc in self.abspaths(fileids, include_encoding=True)
         ]
     )
     return [DependencyGraph(sent) for sent in sents]
Esempio n. 17
0
def prepare_deps(raw_deps):

    if type(raw_deps) == float and math.isnan(raw_deps):
        return []
    return [
        DependencyGraph(dep, top_relation_label="root")
        for dep in raw_deps.split("\n\n") if len(dep) > 2
    ]
Esempio n. 18
0
def flattened_node_list(graph):
    """
    Takes an instance of DependencyGraph corresponding to a parsed sentence.
    Flattens into a list of DependencyGraph instances, each with a different
    word from the sentence as its root node (and no children).
    """
    nodelist = copy.copy(graph.nodelist[1:])
    flattened = []
    for node in nodelist:
        node["deps"] = []
        node["head"] = 0
        node["address"] = 1
        new_graph = DependencyGraph()
        new_graph.nodelist.append(node)
        new_graph.root = node
        flattened.append(new_graph)
    return flattened
Esempio n. 19
0
    def _parse(self, t):
        dg = DependencyGraph()
        i = 0
        for line in t.splitlines():
            if line.startswith("*") or line.startswith("+"):
                # start of bunsetsu or tag

                cells = line.strip().split(" ", 3)
                m = re.match(r"([\-0-9]*)([ADIP])", cells[1])

                assert m is not None

                node = dg.nodelist[i]
                node['address'] = i
                node['rel'] = m.group(2)  # dep_type

                node['word'] = []

                dep_parent = int(m.group(1))

                while len(dg.nodelist) < i + 1 or len(
                        dg.nodelist) < dep_parent + 1:
                    dg.nodelist.append({'word': [], 'deps': []})

                if dep_parent == -1:
                    dg.root = node
                else:
                    dg.nodelist[dep_parent]['deps'].append(i)

                i += 1
            elif not line.startswith("#"):
                # normal morph
                cells = line.strip().split(" ")
                # convert cells to morph tuples
                morph = (cells[0], ' '.join(cells[1:]))
                dg.nodelist[i - 1]['word'].append(morph)

        if self.morphs2str:
            for node in dg.nodelist:
                node['word'] = self.morphs2str(node['word'])

        return dg.tree()
Esempio n. 20
0
    def __init__(self, tree_str=None, cell_extractor=None, zero_based=False, cell_separator=None, top_relation_label='root'):
        DependencyGraph.__init__(self,tree_str, cell_extractor, zero_based, cell_separator, top_relation_label)

        self.nodes = defaultdict(lambda:  {'address': None,
                                   'word': None,
                                   'lemma': None,
                                   'ctag': None,    # upostag
                                   'tag': None,     # xpostag
                                   'feats': None,
                                   'head': None,
                                   'deps': defaultdict(list),
                                   'rel': None,
                                   })
        self.nodes[0].update(
            {
                'ctag': 'TOP',
                'tag': 'TOP',
                'ID': 0,
            }
        )
Esempio n. 21
0
    def _parse(self, t):
        dg = DependencyGraph()
        i = 0
        for line in t.splitlines():
            if line.startswith("*") or line.startswith("+"):
                # start of bunsetsu or tag

                cells = line.strip().split(" ", 3)
                m = re.match(r"([\-0-9]*)([ADIP])", cells[1])

                assert m is not None

                node = dg.nodelist[i]
                node['address'] = i
                node['rel'] = m.group(2)  # dep_type

                node['word'] = []

                dep_parent = int(m.group(1))

                while len(dg.nodelist) < i+1 or len(dg.nodelist) < dep_parent+1:
                    dg.nodelist.append({'word':[], 'deps':[]})

                if dep_parent == -1:
                    dg.root = node
                else:
                    dg.nodelist[dep_parent]['deps'].append(i)

                i += 1
            elif not line.startswith("#"):
                # normal morph
                cells = line.strip().split(" ")
                # convert cells to morph tuples
                morph = ( cells[0], ' '.join(cells[1:]) )
                dg.nodelist[i-1]['word'].append(morph)

        if self.morphs2str:
            for node in dg.nodelist:
                node['word'] = self.morphs2str(node['word'])

        return dg.tree()
Esempio n. 22
0
    def __init__(self, tree_str=None, cell_extractor=None, zero_based=False, cell_separator=None, top_relation_label='root'):
        DependencyGraph.__init__(self,tree_str, cell_extractor, zero_based, cell_separator, top_relation_label)

        self.nodes = defaultdict(lambda:  {'address': None,
                                   'word': None,
                                   'lemma': None,
                                   'ctag': None,    # upostag
                                   'tag': None,     # xpostag
                                   'feats': None,
                                   'head': None,
                                   'deps': defaultdict(list),
                                   'rel': None,
                                   })

        self.nodes[0].update(
            {
                'ctag': 'TOP',
                'tag': 'TOP',
                'address': 0,
            }
        )
Esempio n. 23
0
def Process(sentence):
    words = sentence.replace('|', '।').split()
    tags = test_fn(words)
    text = []
    i = 0
    for word, tag in zip(words, tags):
        i += 1
        fill = '_'
        text.append('\t'.join(
            [str(i), word, fill, fill, fill, fill, fill, fill, fill, fill]))
    dg = DependencyGraph('\n'.join(text))
    text = '\n'.join(text)
    text = text + '\n\n' + text
    with open('biaffine-parser-master/data/naive3.conllx', 'w') as f:
        f.write(text)
    os.chdir('biaffine-parser-master')
    os.system(
        'python run.py predict --feat=bert --fdata=data/naive3.conllx --fpred=data/naive3.conllx'
    )
    txt = ''
    os.chdir('..')
    with open('biaffine-parser-master/data/naive3.conllx', 'r') as f:
        txt = f.read().split('\n\n')[0]

    # parser = TransitionParser('arc-eager')
    # with open('models/parser.pkl','rb') as in_file:
    #     parser = pickle.load(in_file)
    # predictions = parser.parse([dg],'models/arc_eager.model')
    # txt = predictions[0].to_conll(4)
    err = False
    try:
        out = DependencyGraph(txt)
        out_dot = out.to_dot()
        G = pgv.AGraph(out_dot)
        G.layout(prog='dot')  # use dot
        G.draw('static/process.png')
    except:
        err = True
        txt += '''Error generating graph.\n'''
    return txt, err
Esempio n. 24
0
    def par(self, infilm, outfilm):
        input_data = open(infilm, 'r', encoding='utf-8')
        output_data = open(outfilm, 'w+', encoding='utf=8')
        for line in input_data.readlines():
            line = line.strip()
            # 分词
            words = self.segmentor.segment(line)
            # self.segmentor.load_with_lexicon('lexicon')  # 使用自定义词典,lexicon外部词典文件路径
            print('分词:' + '\t'.join(words))

            # 词性标注
            postags = self.postagger.postag(words)
            print('词性标注:' + '\t'.join(postags))

            # 句法分析
            arcs = self.parser.parse(words, postags)
            rely_id = [arc.head for arc in arcs]  # 提取依存父节点id
            relation = [arc.relation for arc in arcs]  # 提取依存关系
            heads = ['Root' if id == 0 else words[id - 1]
                     for id in rely_id]  # 匹配依存父节点词语

            output_data.write(line)
            output_data.write('\n')
            output_data.write('句法分析:')
            par_result = ''
            for i in range(len(words)):
                if arcs[i].head == 0:
                    arcs[i].relation = "ROOT"
                par_result += "\t" + words[i] + "(" + arcs[
                    i].relation + ")" + "\t" + postags[i] + "\t" + str(
                        arcs[i].head) + "\t" + arcs[i].relation + "\n"
                output_data.write(relation[i] + '(' + words[i] + ', ' +
                                  heads[i] + ')' + '\n')
            print(par_result)
            conlltree = DependencyGraph(par_result)  # 转换为依存句法图
            tree = conlltree.tree()  # 构建树结构
            tree.draw()  # 显示输出的树
            output_data.write('\n')
        input_data.close()
        output_data.close()
    def trees(self):
        for sentence in self._sentences():
            tree = DependencyGraph(sentence)

            for node in word_nodes(tree):
                node['mtag'] = [node['ctag'], node['tag']]

                if 'ezafe' in node['feats']:
                    node['mtag'].append('EZ')

                node['mtag'] = self._pos_map(node['mtag'])

            yield tree
Esempio n. 26
0
 def parsed_sents_3cells(self, tagset=None, fileids=None):
     sentences = []
     if tagset == 'opt':
         sentences = self.extract(opt=True, head=True, fileids=fileids)
     else:
         sentences = self.extract(tags=True, head=True, fileids=fileids)
     restruct = []
     for j in sentences:
         str = ""
         for i in j:
             str += '{0}\t{1}\t{2}\n'.format(i[0], i[1], i[2])
         restruct.append(str)
     return [DependencyGraph(s) for s in restruct]
Esempio n. 27
0
 def _to_dep_graph(self, graph):
     cur = self._get_nodes(graph)
     s = ""
     top_rels = []
     for c in cur:
         form = c["n"]["form"]
         pos = c["n"]["pos"].title() if c["n"]["pos"] else "X"
         head = c["n"]["head"]
         lab = c["n"]["original_label"]
         s += "{} {} {} {}\n".format(form, pos, head, lab)
         if head == 0:
             top_rels.append(lab)
     return DependencyGraph(s, top_relation_label=top_rels[0])
Esempio n. 28
0
def clean_corpus(path, parser_std):
    arquivo = open(path)
    a = arquivo.read()
    new_training_data = open(path + "1", "w")
    graphs = [DependencyGraph(entry) for entry in a.split('\n\n') if entry]
    for depgraph in graphs:
        if parser_std._is_projective(depgraph):
            new_training_data.write(depgraph.to_conll(style=10) + "\n")

    os.remove(arquivo.name)
    new_training_data.close()
    os.rename(new_training_data.name,
              (new_training_data.name).replace("1", ""))
    new_training_data.close()
Esempio n. 29
0
def cabocha2depgraph(t):
    dg = DependencyGraph()
    i = 0
    for line in t.splitlines():
        if line.startswith("*"):
            # start of bunsetsu

            cells = line.strip().split(" ", 3)
            m = re.match(r"([\-0-9]*)([ADIP])", cells[2])

            node = dg.nodelist[i]
            node.update(
                {'address': i,
                 'rel': m.group(2),  # dep_type
                 'word': [],
                 'tag': []
                 })
            dep_parent = int(m.group(1))

            while len(dg.nodelist) < i + 1 or len(dg.nodelist) < dep_parent + 1:
                dg.nodelist.append({'word': [], 'deps': [], 'tag': []})

            if dep_parent == -1:
                dg.root = node
            else:
                dg.nodelist[dep_parent]['deps'].append(i)

            i += 1
        elif not line.startswith("EOS"):
            # normal morph
            cells = line.strip().split("\t")

            morph = (cells[0], tuple(cells[1].split(',')))
            dg.nodelist[i - 1]['word'].append(morph[0])
            dg.nodelist[i - 1]['tag'].append(morph[1])

        return dg
Esempio n. 30
0
	def _parse(self, input):
		lines = [DependencyGraph._normalize(line) for line in input.split('\n') if line.strip()]
		temp = []
		for index, line in enumerate(lines):
			cells = line.split('\t')
			_, word, _, tag, _, _, head, rel, _, _ = cells
			head = int(head)
			self.nodelist.append({'address': index+1, 'word': word, 'tag': tag, 'head': head, 'rel': rel, 'deps': [d for (d,h) in temp if h == index+1]})
			try:
				self.nodelist[head]['deps'].append(index+1)
			except IndexError:
				temp.append((index+1, head))

		root_address = self.nodelist[0]['deps'][0]
		self.root = self.nodelist[root_address]
Esempio n. 31
0
    def trees(self):
        for sentence in self._sentences():
            tree = DependencyGraph(sentence)

            for node in tree.nodelist[1:]:
                node['mtag'] = [node['ctag'], node['tag']]

            for node in tree.nodelist[1:]:
                if node['rel'] in ('MOZ', 'NPOSTMOD'):
                    tree.nodelist[node['head']]['mtag'].append('EZ')

            for node in tree.nodelist[1:]:
                node['mtag'] = self._pos_map(node['mtag'])

            yield tree
Esempio n. 32
0
def read_dep_parses(depfile):
    fh = open(depfile, 'r')
    # list to store the results
    graphs = []
    # Read the lines containing the first parse.
    dep = read_dep(fh)
    # While there are more lines:
    # 1) create the DependencyGraph
    # 2) add it to our list
    # 3) try again until we're done
    while dep is not None:
        graph = DependencyGraph(dep)
        graphs.append(graph)
        dep = read_dep(fh)
    fh.close()
    return graphs
Esempio n. 33
0
    def test_minimal_connected_graph(self):
        lst = [(
            "1 Nero _ NNP NNP _ 2 nsubj _ _ *2 played _ VBD VBD _ 0 root _ _ *3 his _ PRP$ PRP$ _ 4 nmod:poss _ _ *4 flute _ NN NN _ 2 dobj _ _ *5 while _ IN IN _ 7 mark _ _ *6 Rome _ NNP NNP _ 7 nsubj _ _ *7 burned _ VBD VBD _ 2 advcl _ _ *",
            ["while"], [2], [7], [5, 7, 2]),
               ("1	Weder	_	NNP	NNP	_	6	compound	_	_*\
2	war	_	NN	NN	_	6	compound	_	_*\
3	der	_	NN	NN	_	6	compound	_	_*\
4	Stern	_	NNP	NNP	_	6	compound	_	_*\
5	von	_	NNP	NNP	_	6	compound	_	_*\
6	Bethlehem	_	NNP	NNP	_	7	nsubj	_	_*\
7	eine	_	VBP	VBP	_	0	root	_	_*\
8	Supernova	_	NNP	NNP	_	12	compound	_	_*\
10	noch	_	NNP	NNP	_	12	appos	_	_*\
11	ein	_	NNPS	NNPS	_	12	compound	_	_*\
12	Komet	_	NNP	NNP	_	7	dobj	_	_*", ["Weder",
                                    "noch"], [7], [6, 12], [1, 6, 10, 12, 7])]
        for record in lst:
            cnll = record[0]
            cnll = cnll.replace('*', '\n')
            g = DependencyGraph(cnll)
            root = snt2ldg.get_root_address_from_nltkg(g)
            print('root', root)
            assert root == record[2]

            newg = deepcopy(g)
            addressLst = []
            rootAddress = snt2ldg.get_root_address_from_nltkg(newg)
            wordAddress = []
            for word in record[1]:
                wordAddress += snt2ldg.get_address_of_word(newg, word)
            for address in wordAddress:
                addressBetween = snt2ldg.get_addresses_from_child_to_ancestor(
                    newg, address, rootAddress[0])
                addressLst += [address] + addressBetween

            addressLst += rootAddress
            print(addressLst)
            assert addressLst == record[4]
            for i in list(newg.nodes.keys()):
                if i not in addressLst:
                    newg.remove_by_address(i)
            pprint(newg.nodes)
            assert len(newg.nodes) == len(record[4])
Esempio n. 34
0
def DepGraphList(sentenceList):
    dgList = []
    i = 0
    j = 0
    for sentence in sentenceList:
        text = []
        for token in sentence:
            text.append(' '.join([
                token['form'], token['upostag'],
                str(token['head']), token['deprel'].upper()
            ]))
        try:
            dg = DependencyGraph('\n'.join(text))
        except:
            j += 1
            continue
        i += 1
        dgList.append(dg)
    print(i, j)
    return dgList
Esempio n. 35
0
def read_dep_parses_questions(depfile):
    fh = open(depfile, 'r')
    # list to store the results
    graphs = {}
    # Read the lines containing the first parse.
    res = read_dep_questions(fh)
    dep = res[1]
    qid = res[0]
    # While there are more lines:
    # 1) create the DependencyGraph
    # 2) add it to our list
    # 3) try again until we're done
    while dep is not None:
        graph = DependencyGraph(dep)
        graphs[qid] = graph
        res = read_dep_questions(fh)
        qid = res[0]
        dep = res[1]
    fh.close()
    return graphs
def dependency_parse_to_graph(filename):
    """
    Read dependency parser output from file and construct graph
    """
    data = ''
    dtree = []
    with open(filename, 'r') as f:
        for line in f:
            if line[0] != '#':
                if 'root' in line:
                    elements = line.split('\t')
                    if elements[7] == 'root':
                        elements[7] = 'ROOT'
                        line = '\t'.join(elements)
                data += line
                if line == '\n':
                    dg = DependencyGraph(data.decode('utf8'))
                    dtree.append(dg)
                    data = ''
    return dtree
Esempio n. 37
0
def parse(f_item):
    dg_list = []
    sent = ''
    cnt = 0
    with open(f_item, 'rt', encoding='utf-8') as f:
        for line in f:
            if line != '\n':
                sent += line
            else:
                dg = DependencyGraph(sent, zero_based=True)
                dg_list.append(dg)
                sent = ''
                cnt += 1
                print('the index of sentence' + str(cnt))
            if cnt == 3:
                pass

    #only use them to predict, no training!!
    parser_std = TransitionParser('arc-standard')

    #loading trained features
    folder_dir = 'C:/Users/Haoran Zhang/Desktop/nlp/'
    parser_std._dictionary = pickle.load(
        open(folder_dir + 'self._dictionary', 'rb'))
    parser_std._transition = pickle.load(
        open(folder_dir + 'self._transition', 'rb'))
    parser_std._match_transition = pickle.load(
        open(folder_dir + 'self.match_transition', 'rb'))

    result = parser_std.parse(
        dg_list,
        folder_dir + 'temp.arcstd.model')  #result here is a list of dg
    #print(result[0].to_conll(4))
    try:
        f = open(to_dir + f_item, 'w')
        for item in result:
            f.writelines(item.to_conll(4))
            f.writelines('\n')
    finally:
        f.close()
Esempio n. 38
0
    print >>fout,r'''
\end{dependency}
\end{CJK}
\end{document}
'''
else:
    print >>fout,r'''\begin{tikzpicture}'''
    i = -1
    dep_str = ''
    wids = []
    wlens,xpos = [0],[0]
    for s in open(parse_file):
        if len(s.strip()) == 0:
            i += 1
            if i == line_num:
                dg = DependencyGraph(dep_str)
                tree = dg.tree()
                if flag == '0':
                    h = tree.height()
                    traverse(tree,h,0)
                    for k,w in wids:
                        print >>fout,'\\node(m{}) at({},{}) {{{}}};'.format(k,0.6*xpos[k],0,w)
                        print >>fout,'\\node at({},{}) {{{}}};'.format(0.6*xpos[k],-0.5,k-1)
                        print >>fout,'\\draw[dotted] (m{}) -- (n{});'.format(k,k)
                else:
                    print >>fout,tree.pprint_latex_qtree()
                break
            dep_str = ''
            wids = []
            wlens,xpos = [0],[0]
        else: