Example #1
0
def conll_file_demo(wf, c, lst, i):
    print('Mass conll_read demo...')
    graphs = [DependencyGraph(entry) for entry in lst if entry]
    ars = []
    sent_ids = []
    events = []
    reltypes = []
    test_path = "./test/test{:0>2d}.txt".format(i)
    with open(test_path, 'r+') as f1:
        lines = f1.readlines()
        for line in lines:
            cols = line.split("\t")
            ars.append((cols[1], cols[4]))
            sent_ids.append(cols[0])
            events.append(cols[3])
            reltypes.append(cols[6])
    print(len(graphs), len(ars))
    for l, graph, ar, sent_id_tmp, event, reltype in zip(
            lst, graphs, ars, sent_ids, events, reltypes):
        reltype = reltype.strip()
        depgraph = DependencyGraph(l.strip())
        depgraph.tree = graph.tree()
        dep_nx = nxGraphWroot(depgraph)
        dep_nx = dep_nx.to_undirected()
        #print(sent_id_tmp, ar)
        shortest_path = nx.shortest_path(dep_nx,
                                         source=int(ar[0]),
                                         target=int(ar[1]))
        shortest_path_word = []
        for i in shortest_path:
            shortest_path_word.append(c[sent_id_tmp][i])
        write_line = sent_id_tmp + '\t' + "reltype=" + reltype + '\t' + str(
            shortest_path_word) + '\n'
        wf.write(write_line)
 def parse(self, tokens):
     """
     Parses the list of tokens subject to the projectivity constraint
     and the productions in the parser's grammar.  This uses a method
     similar to the span-concatenation algorithm defined in Eisner (1996).
     It returns the most probable parse derived from the parser's
     probabilistic dependency grammar.
     """
     self._tokens = list(tokens)
     chart = []
     for i in range(0, len(self._tokens) + 1):
         chart.append([])
         for j in range(0, len(self._tokens) + 1):
             chart[i].append(ChartCell(i, j))
             if i == j + 1:
                 if tokens[i - 1] in self._grammar._tags:
                     for tag in self._grammar._tags[tokens[i - 1]]:
                         chart[i][j].add(DependencySpan(i - 1, i, i - 1, [-1], [tag]))
                 else:
                     print "No tag found for input token '%s', parse is impossible." % tokens[i - 1]
                     return []
     for i in range(1, len(self._tokens) + 1):
         for j in range(i - 2, -1, -1):
             for k in range(i - 1, j, -1):
                 for span1 in chart[k][j]._entries:
                     for span2 in chart[i][k]._entries:
                         for newspan in self.concatenate(span1, span2):
                             chart[i][j].add(newspan)
     graphs = []
     trees = []
     max_parse = None
     max_score = 0
     for parse in chart[len(self._tokens)][0]._entries:
         conll_format = ""
         malt_format = ""
         for i in range(len(tokens)):
             malt_format += "%s\t%s\t%d\t%s\n" % (tokens[i], "null", parse._arcs[i] + 1, "null")
             conll_format += "\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n" % (
                 i + 1,
                 tokens[i],
                 tokens[i],
                 parse._tags[i],
                 parse._tags[i],
                 "null",
                 parse._arcs[i] + 1,
                 "null",
                 "-",
                 "-",
             )
         dg = DependencyGraph(conll_format)
         score = self.compute_prob(dg)
         if score > max_score:
             max_parse = dg.tree()
             max_score = score
     return [max_parse, max_score]
Example #3
0
    def parse(self, tokens):
        """
        Parses the list of tokens subject to the projectivity constraint
        and the productions in the parser's grammar.  This uses a method
        similar to the span-concatenation algorithm defined in Eisner (1996).
        It returns the most probable parse derived from the parser's
        probabilistic dependency grammar.
        """
        self._tokens = list(tokens)
        chart = []
        for i in range(0, len(self._tokens) + 1):
            chart.append([])
            for j in range(0, len(self._tokens) + 1):
                chart[i].append(ChartCell(i, j))
                if i == j + 1:
                    if tokens[i - 1] in self._grammar._tags:
                        for tag in self._grammar._tags[tokens[i - 1]]:
                            chart[i][j].add(
                                DependencySpan(i - 1, i, i - 1, [-1], [tag]))
                    else:
                        chart[i][j].add(
                            DependencySpan(i - 1, i, i - 1, [-1], [u'NULL']))

        for i in range(1, len(self._tokens) + 1):
            for j in range(i - 2, -1, -1):
                for k in range(i - 1, j, -1):
                    for span1 in chart[k][j]._entries:
                        for span2 in chart[i][k]._entries:
                            for newspan in self.concatenate(span1, span2):
                                chart[i][j].add(newspan)
        trees = []
        max_parse = None
        max_score = 0
        for parse in chart[len(self._tokens)][0]._entries:
            conll_format = ""
            malt_format = ""
            for i in range(len(tokens)):
                malt_format += '%s\t%s\t%d\t%s\n' % (
                    tokens[i], 'null', parse._arcs[i] + 1, 'null')
                #conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (i+1, tokens[i], tokens[i], parse._tags[i], parse._tags[i], 'null', parse._arcs[i] + 1, 'null', '-', '-')
                # Modify to comply with recent change in dependency graph such that there must be a ROOT element.
                conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (
                    i + 1, tokens[i], tokens[i], parse._tags[i],
                    parse._tags[i], 'null', parse._arcs[i] + 1, 'ROOT', '-',
                    '-')
            dg = DependencyGraph(conll_format)
            score = self.compute_prob(dg)
            trees.append((score, dg.tree()))
        trees.sort(key=lambda e: -e[0])
        if trees == []:
            trees = [(0.0, Tree(tokens[0], tokens[1:]))]
        return ((score, tree) for (score, tree) in trees)
 def parse(self, tokens):
     """
     Parses the list of tokens subject to the projectivity constraint
     and the productions in the parser's grammar.  This uses a method
     similar to the span-concatenation algorithm defined in Eisner (1996).
     It returns the most probable parse derived from the parser's
     probabilistic dependency grammar.
     """
     self._tokens = list(tokens)
     chart = []
     for i in range(0, len(self._tokens) + 1):
         chart.append([])
         for j in range(0, len(self._tokens) + 1):
             chart[i].append(ChartCell(i, j))
             if i == j + 1:
                 if tokens[i - 1] in self._grammar._tags:
                     for tag in self._grammar._tags[tokens[i - 1]]:
                         chart[i][j].add(
                             DependencySpan(i - 1, i, i - 1, [-1], [tag]))
                 else:
                     print(
                         'No tag found for input token \'%s\', parse is impossible.'
                         % tokens[i - 1])
                     return []
     for i in range(1, len(self._tokens) + 1):
         for j in range(i - 2, -1, -1):
             for k in range(i - 1, j, -1):
                 for span1 in chart[k][j]._entries:
                     for span2 in chart[i][k]._entries:
                         for newspan in self.concatenate(span1, span2):
                             chart[i][j].add(newspan)
     graphs = []
     trees = []
     max_parse = None
     max_score = 0
     for parse in chart[len(self._tokens)][0]._entries:
         conll_format = ""
         malt_format = ""
         for i in range(len(tokens)):
             malt_format += '%s\t%s\t%d\t%s\n' % (
                 tokens[i], 'null', parse._arcs[i] + 1, 'null')
             conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (
                 i + 1, tokens[i], tokens[i], parse._tags[i],
                 parse._tags[i], 'null', parse._arcs[i] + 1, 'null', '-',
                 '-')
         dg = DependencyGraph(conll_format)
         score = self.compute_prob(dg)
         if score > max_score:
             max_parse = dg.tree()
             max_score = score
     return [max_parse, max_score]
Example #5
0
    def parse(self, tokens):
        """
        Performs a projective dependency parse on the list of tokens using
        a chart-based, span-concatenation algorithm similar to Eisner (1996).

        :param tokens: The list of input tokens.
        :type tokens: list(str)
        :return: An iterator over parse trees.
        :rtype: iter(Tree)
        """
        self._tokens = list(tokens)
        chart = []
        for i in range(0, len(self._tokens) + 1):
            chart.append([])
            for j in range(0, len(self._tokens) + 1):
                chart[i].append(ChartCell(i, j))
                if i == j + 1:
                    chart[i][j].add(
                        DependencySpan(i - 1, i, i - 1, [-1], ["null"]))

        for i in range(1, len(self._tokens) + 1):
            for j in range(i - 2, -1, -1):
                for k in range(i - 1, j, -1):
                    for span1 in chart[k][j]._entries:
                        for span2 in chart[i][k]._entries:
                            for newspan in self.concatenate(span1, span2):
                                chart[i][j].add(newspan)

        for parse in chart[len(self._tokens)][0]._entries:
            conll_format = ""
            #            malt_format = ""
            for i in range(len(tokens)):
                #                malt_format += '%s\t%s\t%d\t%s\n' % (tokens[i], 'null', parse._arcs[i] + 1, 'null')
                # conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (i+1, tokens[i], tokens[i], 'null', 'null', 'null', parse._arcs[i] + 1, 'null', '-', '-')
                # Modify to comply with the new Dependency Graph requirement (at least must have an root elements)
                conll_format += "\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n" % (
                    i + 1,
                    tokens[i],
                    tokens[i],
                    "null",
                    "null",
                    "null",
                    parse._arcs[i] + 1,
                    "ROOT",
                    "-",
                    "-",
                )
            dg = DependencyGraph(conll_format)
            #           if self.meets_arity(dg):
            yield dg.tree()
    def parse(self, tokens):
        """
        Performs a projective dependency parse on the list of tokens using
        a chart-based, span-concatenation algorithm similar to Eisner (1996).

        :param tokens: The list of input tokens.
        :type tokens: list(str)
        :return: A list of parse trees.
        :rtype: list(Tree)
        """
        self._tokens = list(tokens)
        chart = []
        for i in range(0, len(self._tokens) + 1):
            chart.append([])
            for j in range(0, len(self._tokens) + 1):
                chart[i].append(ChartCell(i, j))
                if i == j + 1:
                    chart[i][j].add(DependencySpan(i - 1, i, i - 1, [-1], ["null"]))
        for i in range(1, len(self._tokens) + 1):
            for j in range(i - 2, -1, -1):
                for k in range(i - 1, j, -1):
                    for span1 in chart[k][j]._entries:
                        for span2 in chart[i][k]._entries:
                            for newspan in self.concatenate(span1, span2):
                                chart[i][j].add(newspan)
        graphs = []
        trees = []
        for parse in chart[len(self._tokens)][0]._entries:
            conll_format = ""
            #            malt_format = ""
            for i in range(len(tokens)):
                #                malt_format += '%s\t%s\t%d\t%s\n' % (tokens[i], 'null', parse._arcs[i] + 1, 'null')
                conll_format += "\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n" % (
                    i + 1,
                    tokens[i],
                    tokens[i],
                    "null",
                    "null",
                    "null",
                    parse._arcs[i] + 1,
                    "null",
                    "-",
                    "-",
                )
            dg = DependencyGraph(conll_format)
            #           if self.meets_arity(dg):
            graphs.append(dg)
            trees.append(dg.tree())
        return trees
    def parse(self, tokens):
        """
        Performs a projective dependency parse on the list of tokens using
        a chart-based, span-concatenation algorithm similar to Eisner (1996).

        :param tokens: The list of input tokens.
        :type tokens: list(str)
        :return: An iterator over parse trees.
        :rtype: iter(Tree)
        """
        self._tokens = list(tokens)
        chart = []
        for i in range(0, len(self._tokens) + 1):
            chart.append([])
            for j in range(0, len(self._tokens) + 1):
                chart[i].append(ChartCell(i, j))
                if i == j + 1:
                    chart[i][j].add(DependencySpan(i - 1, i, i - 1, [-1], ['null']))

        for i in range(1, len(self._tokens) + 1):
            for j in range(i - 2, -1, -1):
                for k in range(i - 1, j, -1):
                    for span1 in chart[k][j]._entries:
                        for span2 in chart[i][k]._entries:
                            for newspan in self.concatenate(span1, span2):
                                chart[i][j].add(newspan)

        for parse in chart[len(self._tokens)][0]._entries:
            conll_format = ""
            #            malt_format = ""
            for i in range(len(tokens)):
                #                malt_format += '%s\t%s\t%d\t%s\n' % (tokens[i], 'null', parse._arcs[i] + 1, 'null')
                # conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (i+1, tokens[i], tokens[i], 'null', 'null', 'null', parse._arcs[i] + 1, 'null', '-', '-')
                # Modify to comply with the new Dependency Graph requirement (at least must have an root elements)
                conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (
                    i + 1,
                    tokens[i],
                    tokens[i],
                    'null',
                    'null',
                    'null',
                    parse._arcs[i] + 1,
                    'ROOT',
                    '-',
                    '-',
                )
            dg = DependencyGraph(conll_format)
            #           if self.meets_arity(dg):
            yield dg.tree()
Example #8
0
 def parse(self, tokens):
     """
     Parses the list of tokens subject to the projectivity constraint
     and the productions in the parser's grammar.  This uses a method
     similar to the span-concatenation algorithm defined in Eisner (1996).
     It returns the most probable parse derived from the parser's
     probabilistic dependency grammar.
     """
     self._tokens = list(tokens)
     chart = []
     for i in range(0, len(self._tokens) + 1):
         chart.append([])
         for j in range(0, len(self._tokens) + 1):
             chart[i].append(ChartCell(i,j))
             if i==j+1:
                 if tokens[i-1] in self._grammar._tags:
                     for tag in self._grammar._tags[tokens[i-1]]:
                         chart[i][j].add(DependencySpan(i-1,i,i-1,[-1], [tag]))
                 else:
                     chart[i][j].add(DependencySpan(i-1,i,i-1,[-1], [u'NULL']))
                     
     for i in range(1,len(self._tokens)+1):
         for j in range(i-2,-1,-1):
             for k in range(i-1,j,-1):
                 for span1 in chart[k][j]._entries:
                         for span2 in chart[i][k]._entries:
                             for newspan in self.concatenate(span1, span2):
                                 chart[i][j].add(newspan)
     trees = []
     max_parse = None
     max_score = 0
     for parse in chart[len(self._tokens)][0]._entries:
         conll_format = ""
         malt_format = ""
         for i in range(len(tokens)):
             malt_format += '%s\t%s\t%d\t%s\n' % (tokens[i], 'null', parse._arcs[i] + 1, 'null')
             #conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (i+1, tokens[i], tokens[i], parse._tags[i], parse._tags[i], 'null', parse._arcs[i] + 1, 'null', '-', '-')
             # Modify to comply with recent change in dependency graph such that there must be a ROOT element. 
             conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (i+1, tokens[i], tokens[i], parse._tags[i], parse._tags[i], 'null', parse._arcs[i] + 1, 'ROOT', '-', '-')
         dg = DependencyGraph(conll_format)
         score = self.compute_prob(dg)            
         trees.append((score, dg.tree()))
     trees.sort(key=lambda e: -e[0])
     if trees == []:
         trees = [(0.0,Tree(tokens[0],tokens[1:]))]
     return ((score,tree) for (score, tree) in trees)
def conll_file_demo(wf, c, lst, i):
    print('Mass conll_read demo...')
    graphs = [DependencyGraph(entry) for entry in lst if entry]
    ars = []
    sent_ids = []
    #sent_ids2 = []
    events = []
    reltypes = []
    with open("MAT.txt", 'r') as f1:
        lines = f1.readlines()
        for line in lines:
            cols = line.split("\t")
            if cols[0] != cols[7] and int(cols[0][1:4]) == i:
                ars.append((cols[1], cols[4]))
                #ars.append((cols[8],cols[11]))
                sent_ids.append(cols[0])
                #sent_ids.append(cols[7])
                events.append(cols[3])
                #events.append(cols[10])
                reltypes.append(cols[14])

    #print(len(graphs),len(ars))

    for l, graph, ar, sent_id_tmp, event, reltype in zip(
            lst, graphs, ars, sent_ids, events, reltypes):
        reltype = reltype.strip()
        depgraph = DependencyGraph(l.strip())
        depgraph.tree = graph.tree()
        dep_nx = nxGraphWroot(depgraph)
        dep_nx = dep_nx.to_undirected()
        #print(sent_id_tmp, ar)
        shortest_path = nx.shortest_path(dep_nx,
                                         source=int(ar[0]),
                                         target=int(ar[1]))
        #shortest_path = nx.shortest_path(dep_nx, source = int(ar[2]),target = int(ar[3]))
        shortest_path_word = []
        #shortest_path_word2 = []
        for i in shortest_path:
            print(sent_id_tmp)
            shortest_path_word.append(c[sent_id_tmp][i])
            #shortest_path_word2.append(c[sent_id_tmp2][i])
        write_line = sent_id_tmp + '\t' + str(
            shortest_path_word) + '\t' + "reltype=" + reltype + '\n'
        wf.write(write_line)
Example #10
0
    def parse(self, tokens):
        """
        Performs a projective dependency parse on the list of tokens using
        a chart-based, span-concatenation algorithm similar to Eisner (1996).

        :param tokens: The list of input tokens.
        :type tokens: list(str)
        :return: A list of parse trees.
        :rtype: list(Tree)
        """
        self._tokens = list(tokens)
        chart = []
        for i in range(0, len(self._tokens) + 1):
            chart.append([])
            for j in range(0, len(self._tokens) + 1):
                chart[i].append(ChartCell(i, j))
                if i == j + 1:
                    chart[i][j].add(
                        DependencySpan(i - 1, i, i - 1, [-1], ['null']))
        for i in range(1, len(self._tokens) + 1):
            for j in range(i - 2, -1, -1):
                for k in range(i - 1, j, -1):
                    for span1 in chart[k][j]._entries:
                        for span2 in chart[i][k]._entries:
                            for newspan in self.concatenate(span1, span2):
                                chart[i][j].add(newspan)
        graphs = []
        trees = []
        for parse in chart[len(self._tokens)][0]._entries:
            conll_format = ""
            #            malt_format = ""
            for i in range(len(tokens)):
                #                malt_format += '%s\t%s\t%d\t%s\n' % (tokens[i], 'null', parse._arcs[i] + 1, 'null')
                conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (
                    i + 1, tokens[i], tokens[i], 'null', 'null', 'null',
                    parse._arcs[i] + 1, 'null', '-', '-')
            dg = DependencyGraph(conll_format)
            #           if self.meets_arity(dg):
            graphs.append(dg)
            trees.append(dg.tree())
        return trees
Example #11
0
text_data = [
    '!!!!!!""@__BrighterDays: I can not just sit up and HATE on another bitch .. I got too much shit going on!""',
    'I can not just sit up and HATE on another bitch .. I got too much shit going on!'
]
try:
    #parse the raw string into two different lanugage representation formats
    result_stanford = tweebo_api.parse_stanford(text_data)
    result_conll = tweebo_api.parse_conll(text_data)

    nltk_result = add_root_node(result_conll)
    nltk_dep_tree_0 = DependencyGraph(nltk_result[0])
    nltk_dep_tree_1 = DependencyGraph(nltk_result[1])

    #print(result_stanford)
    #print(result_conll)
    #print(nltk_result)
    #print(nltk_dep_tree.contains_cycle())
    tree_0 = nltk_dep_tree_0.tree()
    tree_1 = nltk_dep_tree_1.tree()
    #nltk_dep_tree.tree().view()
    print(tree_0)
    for subtree in tree_0.subtrees():
        print(subtree)

    print(tree_1)
    for subtree in tree_1.subtrees():
        print(subtree)

    #TODO test a multi-sentence string!!
except ServerError as e:
    print(f'{e}\n{e.message}')