Exemple #1
0
    def test_equals(self):
        p1 = core.Passage("1")
        p2 = core.Passage("2")
        p1l0 = layer0.Layer0(p1)
        p2l0 = layer0.Layer0(p2)
        p1l1 = layer1.Layer1(p1)
        p2l1 = layer1.Layer1(p2)
        self.assertTrue(p1.equals(p2) and p2.equals(p1))

        # Checks basic passage equality and Attrib/tag/len differences
        p1l0.add_terminal("0", False)
        p1l0.add_terminal("1", False)
        p1l0.add_terminal("2", False)
        p2l0.add_terminal("0", False)
        p2l0.add_terminal("1", False)
        p2l0.add_terminal("2", False)
        self.assertTrue(p1.equals(p2) and p2.equals(p1))
        pnct2 = p2l0.add_terminal("3", True)
        self.assertFalse(p1.equals(p2) or p2.equals(p1))
        temp = p1l0.add_terminal("3", False)
        self.assertFalse(p1.equals(p2) or p2.equals(p1))
        temp.destroy()
        pnct1 = p1l0.add_terminal("3", True)
        self.assertTrue(p1.equals(p2) and p2.equals(p1))

        # Check Edge and node equality
        ps1 = p1l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
        self.assertFalse(p1.equals(p2) or p2.equals(p1))
        ps2 = p2l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
        self.assertTrue(p1.equals(p2) and p2.equals(p1))
        p1l1.add_fnode(ps1, layer1.EdgeTags.Participant)
        self.assertFalse(p1.equals(p2) or p2.equals(p1))
        self.assertTrue(ps1.equals(ps2, recursive=False))
        p2l1.add_fnode(ps2, layer1.EdgeTags.Process)
        self.assertFalse(p1.equals(p2) or p2.equals(p1))
        p2l1.add_fnode(ps2, layer1.EdgeTags.Participant)
        self.assertFalse(p1.equals(p2) or p2.equals(p1))
        p1l1.add_fnode(ps1, layer1.EdgeTags.Process)
        self.assertTrue(p1.equals(p2) and p2.equals(p1))
        self.assertFalse(
            p1.equals(p2, ordered=True) or p2.equals(p1, ordered=True))
        p1l1.add_fnode(ps1, layer1.EdgeTags.Adverbial, implicit=True)
        ps2d3 = p2l1.add_fnode(ps2, layer1.EdgeTags.Adverbial)
        self.assertFalse(p1.equals(p2) or p2.equals(p1))
        ps2d3.attrib["implicit"] = True
        self.assertTrue(p1.equals(p2) and p2.equals(p1))
        ps2[2].attrib["remote"] = True
        self.assertFalse(p1.equals(p2) or p2.equals(p1))
        ps1[2].attrib["remote"] = True
        self.assertTrue(p1.equals(p2) and p2.equals(p1))
        p1l1.add_punct(None, pnct1)
        self.assertFalse(p1.equals(p2) or p2.equals(p1))
        p2l1.add_punct(None, pnct2)
        self.assertTrue(p1.equals(p2) and p2.equals(p1))
        core.Layer("2", p1)
        self.assertFalse(p1.equals(p2) or p2.equals(p1))
Exemple #2
0
def function2():
    p = core.Passage("2")
    l0 = layer0.Layer0(p)
    l1 = layer1.Layer1(p)
    # 5 terminals (1-5), #5 is punctuation
    terms = [l0.add_terminal(text=str(i), punct=(i == 5)) for i in range(1, 6)]

    # Scene #1: [H [S 1] [D 2] [F 2]]
    ps1 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    p1 = l1.add_fnode(ps1, layer1.EdgeTags.State)
    a = l1.add_fnode(ps1, layer1.EdgeTags.Adverbial)
    p1.add(layer1.EdgeTags.Terminal, terms[0])
    a.add(layer1.EdgeTags.Terminal, terms[1])
    f = l1.add_fnode(ps1, layer1.EdgeTags.Function)
    f.add(layer1.EdgeTags.Terminal, terms[2])

    # Scene #2: [H [A* 2] [S 4]]
    ps2 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    p2 = l1.add_fnode(ps2, layer1.EdgeTags.State)
    p2.add(layer1.EdgeTags.Terminal, terms[3])

    # Punctuation #5 - not under a scene
    l1.add_punct(None, terms[4])

    # adding remote argument to scene #2
    l1.add_remote(ps2, layer1.EdgeTags.Adverbial, a)

    return p
Exemple #3
0
 def __init__(self, passage):
     self.args = Config().args
     self.constraints = CONSTRAINTS.get(passage.extra.get("format"), Constraints)(implicit=self.args.implicit)
     self.log = []
     self.finished = False
     self.passage = passage
     try:
         l0 = passage.layer(layer0.LAYER_ID)
     except KeyError as e:
         raise IOError("Passage %s is missing layer %s" % (passage.ID, layer0.LAYER_ID)) from e
     try:
         l1 = passage.layer(layer1.LAYER_ID)
     except KeyError:
         l1 = layer1.Layer1(passage)
     self.labeled = any(n.outgoing or n.attrib.get(LABEL_ATTRIB) for n in l1.all)
     self.terminals = [Node(i, orig_node=t, root=passage, text=t.text, paragraph=t.paragraph, tag=t.tag)
                       for i, t in enumerate(l0.all, start=1)]
     self.stack = []
     self.buffer = deque()
     self.nodes = []
     self.heads = set()
     self.need_label = {}  # If we are waiting for label_node() to be called, which node is to be labeled by it
     self.root = self.add_node(orig_node=l1.heads[0], is_root=True)  # Root is not in the buffer
     self.stack.append(self.root)
     self.buffer += self.terminals
     self.nodes += self.terminals
     self.actions = []  # History of applied actions
     self.type_validity_cache = {}
Exemple #4
0
def discontiguous():
    """Creates a highly-discontiguous Passage object."""
    p = core.Passage("1")
    l0 = layer0.Layer0(p)
    l1 = layer1.Layer1(p)
    # 20 terminals (1-20), #10 and #20 are punctuation
    terms = [l0.add_terminal(text=str(i), punct=(i % 10 == 0))
             for i in range(1, 21)]

    # First parallel scene, stretching on terminals 1-10
    # The dashed edge tags (e.g. -C, C-) mean discontiguous units
    # [PS [D [E 0] [C- 1] [E 2] [-C 3]]
    #     [A- 4] [P- 5 6] [-A 7] [F 8] [-P [U 9]]]
    # In addition, D takes P as a remote G
    ps1 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    d1 = l1.add_fnode(ps1, layer1.EdgeTags.Adverbial)
    e1 = l1.add_fnode(d1, layer1.EdgeTags.Elaborator)
    c1 = l1.add_fnode(d1, layer1.EdgeTags.Center)
    e2 = l1.add_fnode(d1, layer1.EdgeTags.Elaborator)
    a1 = l1.add_fnode(ps1, layer1.EdgeTags.Participant)
    p1 = l1.add_fnode(ps1, layer1.EdgeTags.Process)
    f1 = l1.add_fnode(ps1, layer1.EdgeTags.Function)
    l1.add_remote(d1, layer1.EdgeTags.Ground, p1)
    e1.add(layer1.EdgeTags.Terminal, terms[0])
    c1.add(layer1.EdgeTags.Terminal, terms[1])
    e2.add(layer1.EdgeTags.Terminal, terms[2])
    c1.add(layer1.EdgeTags.Terminal, terms[3])
    a1.add(layer1.EdgeTags.Terminal, terms[4])
    p1.add(layer1.EdgeTags.Terminal, terms[5])
    p1.add(layer1.EdgeTags.Terminal, terms[6])
    a1.add(layer1.EdgeTags.Terminal, terms[7])
    f1.add(layer1.EdgeTags.Terminal, terms[8])
    l1.add_punct(p1, terms[9])

    # Second parallel scene, stretching on terminals 11-14 + 18-20
    # [PS- [D IMPLICIT] [G IMPLICIT] [P 10 11 12 13]]
    # [-PS [A 17 18 [U 19]]]
    ps2 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    l1.add_fnode(ps2, layer1.EdgeTags.Adverbial, implicit=True)
    l1.add_fnode(ps2, layer1.EdgeTags.Ground, implicit=True)
    p2 = l1.add_fnode(ps2, layer1.EdgeTags.Process)
    a2 = l1.add_fnode(ps2, layer1.EdgeTags.Participant)
    p2.add(layer1.EdgeTags.Terminal, terms[10])
    p2.add(layer1.EdgeTags.Terminal, terms[11])
    p2.add(layer1.EdgeTags.Terminal, terms[12])
    p2.add(layer1.EdgeTags.Terminal, terms[13])
    a2.add(layer1.EdgeTags.Terminal, terms[17])
    a2.add(layer1.EdgeTags.Terminal, terms[18])
    l1.add_punct(a2, terms[19])

    # Third parallel scene, stretching on terminals 15-17
    # [PS [P IMPLICIT] 14 [A 15 16]]
    ps3 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    ps3.add(layer1.EdgeTags.Terminal, terms[14])
    l1.add_fnode(ps3, layer1.EdgeTags.Process, implicit=True)
    a3 = l1.add_fnode(ps3, layer1.EdgeTags.Participant)
    a3.add(layer1.EdgeTags.Terminal, terms[15])
    a3.add(layer1.EdgeTags.Terminal, terms[16])

    return p
Exemple #5
0
def crossing():
    """Creates a :class:`Passage` with multiple sentences and paragraphs, with crossing edges.

    Passage: [1 2 [3 P(remote)] H] .
             [[3 P] . 4 . H]

    """
    p = core.Passage("1")
    l0 = layer0.Layer0(p)
    l1 = layer1.Layer1(p)
    terms = [
        l0.add_terminal("1", False),
        l0.add_terminal("2", False),
        l0.add_terminal(".", True),
        l0.add_terminal("3", False, paragraph=2),
        l0.add_terminal(".", True, paragraph=2),
        l0.add_terminal("4", False, paragraph=2),
        l0.add_terminal(".", True, paragraph=2),
    ]
    h1 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    h2 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    p1 = l1.add_fnode(h2, layer1.EdgeTags.Process)
    l1.add_remote(h1, layer1.EdgeTags.Process, p1)
    h1.add(layer1.EdgeTags.Terminal, terms[0])
    h1.add(layer1.EdgeTags.Terminal, terms[1])
    l1.add_punct(None, terms[2])
    p1.add(layer1.EdgeTags.Terminal, terms[3])
    l1.add_punct(h2, terms[4])
    h2.add(layer1.EdgeTags.Terminal, terms[5])
    l1.add_punct(h2, terms[6])
    return p
Exemple #6
0
def function1():
    p = core.Passage("1")
    l0 = layer0.Layer0(p)
    l1 = layer1.Layer1(p)
    # 5 terminals (1-5), #5 is punctuation
    terms = [l0.add_terminal(text=str(i), punct=(i == 5)) for i in range(1, 6)]

    # Scene #1: [H [P 1] [A 2]]
    ps1 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    p1 = l1.add_fnode(ps1, layer1.EdgeTags.Process)
    a = l1.add_fnode(ps1, layer1.EdgeTags.Participant)
    p1.add(layer1.EdgeTags.Terminal, terms[0])
    a.add(layer1.EdgeTags.Terminal, terms[1])

    # Function #1 with terminal 3 - its location should not affect evaluation
    f = l1.add_fnode(None, layer1.EdgeTags.Function)
    f.add(layer1.EdgeTags.Terminal, terms[2])

    # Scene #2: [H [A* 2] [S 4]]
    ps2 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    p2 = l1.add_fnode(ps2, layer1.EdgeTags.State)
    p2.add(layer1.EdgeTags.Terminal, terms[3])
    l1.add_fnode(ps2, layer1.EdgeTags.Participant, implicit=True)  # implicit should not affect evaluation

    # Punctuation #5 - not under a scene
    l1.add_punct(ps2, terms[4])  # punctuation should not affect evaluation

    # adding remote argument to scene #2
    l1.add_remote(ps2, layer1.EdgeTags.Participant, a)

    return p
Exemple #7
0
def create_passage(num_terms=3, *punct):
    p = core.Passage("1")
    l0 = layer0.Layer0(p)
    l1 = layer1.Layer1(p)
    terms = [
        l0.add_terminal(text=str(i), punct=(i in punct))
        for i in range(1, num_terms + 1)
    ]
    return p, l1, terms
Exemple #8
0
    def _build_passage(self):
        p = core.Passage(self.sentence_id or self.passage_id)
        l0 = layer0.Layer0(p)
        l1 = layer1.Layer1(p)
        paragraph = 1

        # add normal nodes
        while self.pending_nodes:
            for i in reversed(range(len(self.pending_nodes))):
                parent_id, edge_tag, node_id = self.pending_nodes[i]
                parent = self.node_by_id.get(parent_id, -1)
                if parent != -1:
                    del self.pending_nodes[i]
                    implicit = node_id not in self.node_ids_with_children
                    node = l1.add_fnode(parent, edge_tag, implicit=implicit)
                    if edge_tag == EdgeTags.Punctuation:
                        node.tag = layer1.NodeTags.Punctuation
                    self.node_by_id[node_id] = node

        # add remotes
        for parent_id, edge_tag, node_id in self.remotes:
            l1.add_remote(self.node_by_id[parent_id], edge_tag,
                          self.node_by_id[node_id])

        # add linkages
        for node_id, children in self.linkages.items():
            link_relation = next(self.node_by_id[i] for i, t in children
                                 if t == EdgeTags.LinkRelation)
            link_arguments = [
                self.node_by_id[i] for i, t in children
                if t == EdgeTags.LinkArgument
            ]
            l1.add_linkage(link_relation, *link_arguments)

        # add terminals
        for text, tag, edge_tag, parent_id in self.terminals:
            punctuation = (tag == layer0.NodeTags.Punct)
            terminal = l0.add_terminal(text=text,
                                       punct=punctuation,
                                       paragraph=paragraph)
            try:
                parent = self.node_by_id[parent_id]
            except KeyError as e:
                raise ValueError(
                    "Terminal ('%s') with bad parent (%s) in passage %s" %
                    (text, parent_id, p.ID)) from e
            if parent is None:
                print("Terminal is a child of the root: '%s'" % text,
                      file=sys.stderr)
                parent = l1.add_fnode(parent, edge_tag)
            if edge_tag != EdgeTags.Terminal:
                print("Terminal with incoming %s edge: '%s'" %
                      (edge_tag, text),
                      file=sys.stderr)
            parent.add(EdgeTags.Terminal, terminal)

        return p
Exemple #9
0
 def build_passage(self, graph, terminals_only=False):
     passage = core.Passage(graph.id)
     self.is_ucca = (graph.format == "ucca")
     if graph.format is None or graph.format == self.format:
         passage.extra["format"] = self.format
     self.create_terminals(graph, layer0.Layer0(passage))
     if not terminals_only:
         self.create_non_terminals(graph, layer1.Layer1(passage))
         graph.link_pre_terminals()
     return passage
Exemple #10
0
def main(args):

    streusle_file = args[0]
    outpath = args[1]

    for doc_id, doc in get_streusle_docs(streusle_file).items():
        for unit in list(doc['exprs'].values()):
            ID = f'{doc_id}_{unit["sent_offs"]}_{unit["local_toknums"][0]}-{unit["local_toknums"][-1]}'
            sent = doc['sents'][int(unit['sent_offs'])-1]

            # print(sent)
            # print(unit)

            p = ucore.Passage(ID)
            l0 = ul0.Layer0(p)
            l1 = ul1.Layer1(p)

            root = l1.add_fnode(l1._head_fnode, ul1.EdgeTags.ParallelScene)

            # gov
            preterminal = l1.add_fnode(root, 'gov')
            # preterminal._fedge().attrib['remote'] = True
            if unit['heuristic_relation']['gov'] is not None:
                rel = sent['toks'][unit['heuristic_relation'][f'local_gov']-1]
                rel_unit = sent['swes'].get(str(rel['#']))
                if rel_unit is None:
                    rel_unit = sent['smwes'].get(str(rel.get('smwe', [-1, -1])[0]), None)
                term = create_terminal(rel, rel_unit, l0, False)
                preterminal.add(ul1.EdgeTags.Terminal, term)


            # P unit
            preterminal = l1.add_fnode(root, unit['ss'])
            for i in unit["toknums"]:
                tok = doc['toks'][i-1]
                term = create_terminal(tok, unit, l0, True)
                preterminal.add(ul1.EdgeTags.Terminal, term)

            # obj
            preterminal = l1.add_fnode(root, 'obj')
            # preterminal._fedge().attrib['remote'] = True
            if unit['heuristic_relation']['obj'] is not None and unit['lexcat'] != 'PP':
                rel = sent['toks'][unit['heuristic_relation'][f'local_obj'] - 1]
                rel_unit = sent['swes'].get(str(rel['#']))
                if rel_unit is None:
                    rel_unit = sent['smwes'].get(str(rel.get('smwe', [-1, -1])[0]), None)
                term = create_terminal(rel, rel_unit, l0, False)
                preterminal.add(ul1.EdgeTags.Terminal, term)


            uconv.passage2file(p, f'{outpath}/{ID}.xml')
Exemple #11
0
def main(args):
    for i, line in enumerate(tqdm(gen_lines(args.filenames),
                                  unit=" lines",
                                  desc="Creating passages"),
                             start=1):
        p = core.Passage(args.format % i)
        l0 = layer0.Layer0(p)
        layer1.Layer1(p)
        for tok in line.split():
            l0.add_terminal(text=tok, punct=PUNCTUATION.issuperset(tok))
        write_passage(p,
                      outdir=args.out_dir,
                      binary=args.binary,
                      verbose=False)
Exemple #12
0
def _from_site_annotation(elem, passage, elem2node):
    """Parses site XML annotation.

    Parses the whole annotation, given that the terminals are already processed
    and converted and appear in elem2node.

    Args:
        elem: root XML element
        passage: the passage to create, with layer0, w/o layer1
        elem2node: mapping from site ID to Nodes, should contain the Terminals

    Raises:
        SiteXMLUnknownElement: if an unknown, unhandled element is found

    """
    tbd = []
    l1 = layer1.Layer1(passage)
    l1head = l1.heads[0]
    groups_root = elem.find(SiteCfg.Paths.Discontiguous)

    # this takes care of the heirarichal annotation
    for subelem in elem.iterfind(SiteCfg.Paths.Annotation):
        tbd.extend(
            _parse_site_units(subelem, l1head, passage, groups_root,
                              elem2node))

    # Hadnling remotes and linkages, which usually contain IDs from all over
    # the annotation, hence must be taken care of after all elements are
    # converted
    for parent, elem in tbd:
        if elem.tag == SiteCfg.Tags.Remote:
            edge_tag = SiteCfg.TagConversion[elem.get(SiteCfg.Attr.ElemTag)]
            child = SiteUtil.get_node(elem, elem2node)
            if child is None:  # big in XML, points to an invalid ID
                sys.stderr.write(
                    "Warning: remoteUnit with ID {} is invalid - skipping\n".
                    format(elem.get(SiteCfg.Attr.SiteID)))
                continue
            l1.add_remote(parent, edge_tag, child)
        elif elem.tag == SiteCfg.Tags.Linkage:
            args = [
                elem2node[x]
                for x in elem.get(SiteCfg.Attr.LinkageArgs).split(',')
            ]
            l1.add_linkage(parent, *args)
        else:
            raise SiteXMLUnknownElement
Exemple #13
0
    def _build_passage(self, stream):
        # p = core.Passage(self.sentence_id or self.passage_id)
        p = core.Passage(self.passage_id)
        l0 = layer0.Layer0(p)
        l1 = layer1.Layer1(p)
        paragraph = 1

        next(self.parse(stream))

        # add normal nodes
        self.pending_nodes = list(reversed(self.pending_nodes))
        while self.pending_nodes:
            for i in reversed(range(len(self.pending_nodes))):
                parent_id, edge_tag, node_id = self.pending_nodes[i]
                parent = self.node_by_id.get(parent_id, -1)
                if parent != -1:
                    del self.pending_nodes[i]
                    implicit = node_id not in self.node_ids_with_children
                    node = l1.add_fnode(parent, edge_tag, implicit=implicit)
                    if edge_tag == EdgeTags.Punctuation:
                        node.tag = layer1.NodeTags.Punctuation
                    self.node_by_id[node_id] = node

        # add terminals
        for text, tag, edge_tag, parent_id in self.terminals:
            punctuation = (tag == layer0.NodeTags.Punct)
            terminal = l0.add_terminal(text=text,
                                       punct=punctuation,
                                       paragraph=paragraph)
            try:
                parent = self.node_by_id[parent_id]
            except KeyError as e:
                raise ValueError(
                    "Terminal ('%s') with bad parent (%s) in passage %s" %
                    (text, parent_id, p.ID)) from e
            if parent is None:
                print("Terminal is a child of the root: '%s'" % text,
                      file=sys.stderr)
                parent = l1.add_fnode(parent, edge_tag)
            if edge_tag != EdgeTags.Terminal:
                print("Terminal with incoming %s edge: '%s'" %
                      (edge_tag, text),
                      file=sys.stderr)
            parent.add(EdgeTags.Terminal, terminal)
        return p
Exemple #14
0
def multi_sent_with_quotes():
    """Creates a :class:`Passage` with multiple sentences and paragraphs, with quotes in them.

    Passage: [1 2 [" U] [3 P] H] . [" U] [[5 6 . P] H]
             [[8 P] . 10 . H]

    """
    p = core.Passage("1")
    l0 = layer0.Layer0(p)
    l1 = layer1.Layer1(p)
    terms = [l0.add_terminal(str(i), False) for i in range(1, 3)]
    terms.append(l0.add_terminal('"', True))
    terms.append(l0.add_terminal("3", False))
    terms.append(l0.add_terminal(".", True))
    terms.append(l0.add_terminal('"', True))
    terms.append(l0.add_terminal("5", False))
    terms.append(l0.add_terminal("6", False))
    terms.append(l0.add_terminal(".", True))
    terms.append(l0.add_terminal("8", False, paragraph=2))
    terms.append(l0.add_terminal(".", True, paragraph=2))
    terms.append(l0.add_terminal("10", False, paragraph=2))
    terms.append(l0.add_terminal(".", True, paragraph=2))
    h1 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    h2 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    h3 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    p1 = l1.add_fnode(h1, layer1.EdgeTags.Process)
    p2 = l1.add_fnode(h2, layer1.EdgeTags.Process)
    p3 = l1.add_fnode(h3, layer1.EdgeTags.Process)
    h1.add(layer1.EdgeTags.Terminal, terms[0])
    h1.add(layer1.EdgeTags.Terminal, terms[1])
    l1.add_punct(None, terms[2])
    p1.add(layer1.EdgeTags.Terminal, terms[3])
    l1.add_punct(None, terms[4])
    l1.add_punct(None, terms[5])
    p2.add(layer1.EdgeTags.Terminal, terms[6])
    p2.add(layer1.EdgeTags.Terminal, terms[7])
    l1.add_punct(p2, terms[8])
    p3.add(layer1.EdgeTags.Terminal, terms[9])
    l1.add_punct(h3, terms[10])
    h3.add(layer1.EdgeTags.Terminal, terms[11])
    l1.add_punct(h3, terms[12])
    return p
Exemple #15
0
def graph2passage(graph, input):
    passage = core.Passage(graph.id)
    l0 = layer0.Layer0(passage)
    anchors = {(anchor["from"], anchor["to"], is_punct(node))
               for node in graph.nodes for anchor in node.anchors or ()}
    terminals = {(i, j): l0.add_terminal(text=input[i:j], punct=punct)
                 for i, j, punct in sorted(anchors)}

    l1 = layer1.Layer1(passage)
    queue = [(node, None if node.is_top else layer1.FoundationalNode(
        root=l1.root, tag=layer1.NodeTags.Foundational, ID=l1.next_id()))
             for node in graph.nodes if is_primary_root(node)]

    id_to_unit = {node.id: unit for (node, unit) in queue}
    remotes = []
    while queue:
        parent, parent_unit = queue.pop(0)
        for tgt, edges in groupby(sorted(parent.outgoing_edges,
                                         key=attrgetter("tgt")),
                                  key=attrgetter("tgt")):
            edges = list(edges)
            labels = [edge.lab for edge in edges]
            if is_remote(edges[0]):
                remotes.append((parent_unit, labels, tgt))
            else:
                child = graph.find_node(tgt)
                child_unit = id_to_unit[tgt] = l1.add_fnode_multiple(
                    parent_unit, labels, implicit=is_implicit(child))
                queue.append((child, child_unit))
        for anchor in parent.anchors or ():
            if parent_unit is None:  # Terminal children of the root are not valid in UCCA, so warn but be faithful
                print(
                    "graph2passage(): anchors of the root node converted to Terminal children in ‘{}’."
                    "".format(graph.id),
                    file=sys.stderr)
                parent_unit = l1.heads[0]
            parent_unit.add(layer1.EdgeTags.Terminal, terminals[anchor["from"],
                                                                anchor["to"]])
    for parent, labels, tgt in remotes:
        l1.add_remote_multiple(parent, labels, id_to_unit[tgt])
    return passage
Exemple #16
0
 def create_passage(self, verify=True, **kwargs):
     """
     Create final passage from temporary representation
     :param verify: fail if this results in an improper passage
     :return: core.Passage created from self.nodes
     """
     Config().print("Creating passage %s from state..." % self.passage.ID, level=2)
     passage = core.Passage(self.passage.ID)
     passage_format = kwargs.get("format") or self.passage.extra.get("format")
     if passage_format:
         passage.extra["format"] = passage_format
     self.passage.layer(layer0.LAYER_ID).copy(passage)
     l0 = passage.layer(layer0.LAYER_ID)
     l1 = layer1.Layer1(passage)
     self.root.node = l1.heads[0]
     if self.args.node_labels:
         self.root.set_node_label()
     if self.labeled:  # We have a reference passage
         self.root.set_node_id()
     Node.attach_nodes(l0, l1, self.nodes, self.labeled, self.args.node_labels, verify)
     return passage
Exemple #17
0
    def create_multi_passage():
        """Creates a :class:Passage with multiple sentences and paragraphs.

        Passage: [1 2 [3 P] H] . [[5 6 . P] H]
                 [[8 P] . 10 . H]

        """
        p = core.Passage('1')
        l0 = layer0.Layer0(p)
        l1 = layer1.Layer1(p)
        terms = [l0.add_terminal(str(i), False) for i in range(1, 4)]
        terms.append(l0.add_terminal('.', True))
        terms.append(l0.add_terminal('5', False))
        terms.append(l0.add_terminal('6', False))
        terms.append(l0.add_terminal('.', True))
        terms.append(l0.add_terminal('8', False, paragraph=2))
        terms.append(l0.add_terminal('.', True, paragraph=2))
        terms.append(l0.add_terminal('10', False, paragraph=2))
        terms.append(l0.add_terminal('.', True, paragraph=2))
        h1 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
        h2 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
        h3 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
        p1 = l1.add_fnode(h1, layer1.EdgeTags.Process)
        p2 = l1.add_fnode(h2, layer1.EdgeTags.Process)
        p3 = l1.add_fnode(h3, layer1.EdgeTags.Process)
        h1.add(layer1.EdgeTags.Terminal, terms[0])
        h1.add(layer1.EdgeTags.Terminal, terms[1])
        p1.add(layer1.EdgeTags.Terminal, terms[2])
        l1.add_punct(None, terms[3])
        p2.add(layer1.EdgeTags.Terminal, terms[4])
        p2.add(layer1.EdgeTags.Terminal, terms[5])
        l1.add_punct(p2, terms[6])
        p3.add(layer1.EdgeTags.Terminal, terms[7])
        l1.add_punct(h3, terms[8])
        h3.add(layer1.EdgeTags.Terminal, terms[9])
        l1.add_punct(h3, terms[10])
        return p
Exemple #18
0
def empty():
    p = core.Passage(ID="1")
    layer0.Layer0(p)
    layer1.Layer1(p)
    return p
Exemple #19
0
def l1_passage():
    """Creates a Passage to work with using layer1 objects.

    Annotation layout (what annotation each terminal has):
        1: Linker, linked with the first parallel scene
        2-10: Parallel scene #1, 2-5 ==> Participant #1
            6-9 ==> Process #1, 10 ==> Punctuation, remote Participant is
            Adverbial #2
        11-19: Parallel scene #23, which encapsulated 2 scenes and a linker
            (not a real scene, has no process, only for grouping)
        11-15: Parallel scene #2 (under #23), 11-14 ==> Participant #3,
            15 ==> Adverbial #2, remote Process is Process #1
        16: Linker #2, links Parallel scenes #2 and #3
        17-19: Parallel scene #3, 17-18 ==> Process #3,
            19 ==> Participant #3, implicit Participant
        20: Punctuation (under the head)

    """

    p = core.Passage("1")
    l0 = layer0.Layer0(p)
    l1 = layer1.Layer1(p)
    # 20 terminals (1-20), #10 and #20 are punctuation
    terms = [l0.add_terminal(text=str(i), punct=(i % 10 == 0))
             for i in range(1, 21)]

    # Linker #1 with terminal 1
    link1 = l1.add_fnode(None, layer1.EdgeTags.Linker)
    link1.add(layer1.EdgeTags.Terminal, terms[0])

    # Scene #1: [[2 3 4 5 P] [6 7 8 9 A] [10 U] H]
    ps1 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    p1 = l1.add_fnode(ps1, layer1.EdgeTags.Process)
    a1 = l1.add_fnode(ps1, layer1.EdgeTags.Participant)
    p1.add(layer1.EdgeTags.Terminal, terms[1])
    p1.add(layer1.EdgeTags.Terminal, terms[2])
    p1.add(layer1.EdgeTags.Terminal, terms[3])
    p1.add(layer1.EdgeTags.Terminal, terms[4])
    a1.add(layer1.EdgeTags.Terminal, terms[5])
    a1.add(layer1.EdgeTags.Terminal, terms[6])
    a1.add(layer1.EdgeTags.Terminal, terms[7])
    a1.add(layer1.EdgeTags.Terminal, terms[8])
    l1.add_punct(ps1, terms[9])

    # Scene #2: [[11 12 13 14 P] [15 D]]
    #ps12 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    ps2 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    a2 = l1.add_fnode(ps2, layer1.EdgeTags.Participant)
    a2.add(layer1.EdgeTags.Terminal, terms[10])
    a2.add(layer1.EdgeTags.Terminal, terms[11])
    a2.add(layer1.EdgeTags.Terminal, terms[12])
    a2.add(layer1.EdgeTags.Terminal, terms[13])
    d2 = l1.add_fnode(ps2, layer1.EdgeTags.Adverbial)
    d2.add(layer1.EdgeTags.Terminal, terms[14])

    # Linker #2: [16 L]
    link2 = l1.add_fnode(None, layer1.EdgeTags.Linker)
    link2.add(layer1.EdgeTags.Terminal, terms[15])

    # Scene #3: [[16 17 S] [18 A] (implicit participant) H]
    ps3 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
    p3 = l1.add_fnode(ps3, layer1.EdgeTags.State)
    p3.add(layer1.EdgeTags.Terminal, terms[16])
    p3.add(layer1.EdgeTags.Terminal, terms[17])
    a3 = l1.add_fnode(ps3, layer1.EdgeTags.Participant)
    a3.add(layer1.EdgeTags.Terminal, terms[18])
    l1.add_fnode(ps3, layer1.EdgeTags.Participant, implicit=True)

    # Punctuation #20 - not under a scene
    l1.add_punct(None, terms[19])

    # adding remote argument to scene #1, remote process to scene #2
    # creating linkages L1->H1, H2<-L2->H3
    l1.add_remote(ps1, layer1.EdgeTags.Participant, d2)
    l1.add_remote(ps2, layer1.EdgeTags.Process, p1)
    l1.add_linkage(link1, ps1)
    l1.add_linkage(link2, ps2, ps3)

    return p
Exemple #20
0
def passage2():
    p = core.Passage("2")
    l0 = layer0.Layer0(p)
    l1 = layer1.Layer1(p)
    # 20 terminals (1-20), #10 and #20 are punctuation
    terms = [l0.add_terminal(text=str(i), punct=(i % 10 == 0)) for i in range(1, 21)]

    # Linker #1 with terminal 1
    link1 = l1.add_fnode(None, layer1.EdgeTags.Linker)  # true
    link1.add(layer1.EdgeTags.Terminal, terms[0])

    # Scene #1: [[2 3 4 5 P] [6 7 8 9 A] [10 U] H]
    ps1 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)  # true
    p1 = l1.add_fnode(ps1, layer1.EdgeTags.Process)  # true
    a1 = l1.add_fnode(ps1, layer1.EdgeTags.Participant)  # true
    p1.add(layer1.EdgeTags.Terminal, terms[1])
    p1.add(layer1.EdgeTags.Terminal, terms[2])
    p1.add(layer1.EdgeTags.Terminal, terms[3])
    p1.add(layer1.EdgeTags.Terminal, terms[4])
    a1.add(layer1.EdgeTags.Terminal, terms[5])
    a1.add(layer1.EdgeTags.Terminal, terms[6])
    a1.add(layer1.EdgeTags.Terminal, terms[7])
    a1.add(layer1.EdgeTags.Terminal, terms[8])
    l1.add_punct(ps1, terms[9])

    # Scene #23: [[11 12 13 14 15 H] [16 L] [17 18 19 H] H]
    # Scene #2: [[11 12 13 14 H] [15 E]]
    ps23 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)  # true
    ps2 = l1.add_fnode(ps23, layer1.EdgeTags.ParallelScene)  # true
    a2 = l1.add_fnode(ps2, layer1.EdgeTags.ParallelScene)  # false
    a2.add(layer1.EdgeTags.Terminal, terms[10])
    a2.add(layer1.EdgeTags.Terminal, terms[11])
    a2.add(layer1.EdgeTags.Terminal, terms[12])
    a2.add(layer1.EdgeTags.Terminal, terms[13])
    d2 = l1.add_fnode(ps1, layer1.EdgeTags.Elaborator)  # false
    d2.add(layer1.EdgeTags.Terminal, terms[14])

    # Linker #2: [16 L]
    link2 = l1.add_fnode(ps23, layer1.EdgeTags.Linker)  # true
    link2.add(layer1.EdgeTags.Terminal, terms[15])

    # Scene #3: [[16 17 P] [18 A] (implicit participant) H]
    ps3 = l1.add_fnode(ps23, layer1.EdgeTags.ParallelScene)  # true
    p3 = l1.add_fnode(ps3, layer1.EdgeTags.Process)  # false
    p3.add(layer1.EdgeTags.Terminal, terms[16])
    p3.add(layer1.EdgeTags.Terminal, terms[17])
    a3 = l1.add_fnode(ps3, layer1.EdgeTags.Participant)  # true
    a3.add(layer1.EdgeTags.Terminal, terms[18])
    l1.add_fnode(ps3, layer1.EdgeTags.Participant, implicit=True)

    # Punctuation #20 - not under a scene
    l1.add_punct(None, terms[19])

    # adding remote argument to scene #1, remote process to scene #2
    # creating linkages L1->H1, H2<-L2->H3
    l1.add_remote(ps1, layer1.EdgeTags.Participant, d2)
    l1.add_remote(ps1, layer1.EdgeTags.Participant, a3)
    l1.add_remote(ps2, layer1.EdgeTags.State, p1)
    l1.add_linkage(link1, ps1)
    l1.add_linkage(link2, ps2, ps3)

    return p
Exemple #21
0
def evaluate_with_label(sent_tensor, model, a_model, label_model, s_model, rm_model, rm_lstm_model,
                        ori_sent, dev_passage, pos,
                        pos_tensor, labels, label2index, ent, ent_tensor, case_tensor, unroll):
    """

    :param sent_tensor:
    :param model:
    :param a_model:
    :param label_model:
    :param ori_sent:
    :param dev_passage:
    :param pos:
    :param pos_tensor:
    :param labels:
    :param label2index:
    :return:
    """

    # print("original sent")
    # print(ori_sent)

    create_by_leftmost = True

    using_s_model = False
    if not isinstance(s_model, str):
        using_s_model = True

    using_rm_model = False
    if not isinstance(rm_model, str):
        using_rm_model = True
        output_rm, hidden_rm = rm_lstm_model(sent_tensor, pos_tensor, ent_tensor, case_tensor, unroll)
        output_2d_rm = output_rm.squeeze(1)

    max_recur = 7
    i = 0
    sent_length = len(ori_sent)

    l1_node_list = []
    l0_node_list = []
    node_encoding = {}
    ck_node_encoding = {}

    output, hidden = model(sent_tensor, pos_tensor, ent_tensor, case_tensor, unroll)

    output_2d = output.squeeze(1)

    # initialize passage
    passageID = dev_passage.ID
    passage = core.Passage(passageID)
    l0 = layer0.Layer0(root=passage)
    l1 = layer1.Layer1(passage)

    predicted_scene = False

    already_in_propn = []
    rm_to_add = defaultdict(list)

    while i < sent_length:
        terminal_token = ori_sent[i]
        pos_tag = pos[i]
        ent_type = ent[i]

        if not predict_l1:
            # moved to l0_l1_rule.py
            pass
        # predict l0 to l1
        else:
            # create terminal node in l0
            is_punc = terminal_token in punc
            terminal_node = l0.add_terminal(terminal_token, is_punc)
            l0_node_list.append(terminal_node)

            l1_position = len(l1._all) + 1
            ID = "{}{}{}".format("1", core.Node.ID_SEPARATOR, l1_position)
            terminal_node_in_l1 = FoundationalNode(ID, passage, tag=layer1.NodeTags.Punctuation if
                                                   is_punc else layer1.NodeTags.Foundational)
            terminal_node_in_l1.add(terminal_tag, terminal_node)
            l1_node_list.append(terminal_node_in_l1)
            node_encoding[terminal_node_in_l1] = output[i]
            ck_node_encoding[terminal_node_in_l1] = [i, i]

            output_i = output[i]
            attn_i = a_model(output_i, output_2d, i)
            top_k_value, top_k_ind = torch.topk(attn_i, 1)

            # for debugging
            tki = top_k_ind.data[0][0]

            # attend to the current terminal itself
            if top_k_ind.data[0] >= i:

                # # remote node to a node to the right of the parent
                # if i in rm_to_add:
                #     for remote_pred in rm_to_add[i]:
                #         rm_parent, rm_label = remote_pred
                #         rm_parent.add(rm_label, terminal_node_in_l1, edge_attrib={'remote': True})

                i += 1
                continue
            else:
                top_k_node = l0_node_list[top_k_ind]
                parent_node = get_parent_node(top_k_node)
                # new_node_position = len(l1._all) + 1
                # new_node_ID = "{}{}{}".format("1", core.Node.ID_SEPARATOR, new_node_position)
                # new_node = FoundationalNode(new_node_ID, passage, tag=layer1.NodeTags.Foundational)
                """TODO: check this. not sure if it should be the left most child or top_k_ind"""
                debug_left_most_id = get_left_most_id(parent_node)
                # debug_left_most_id = top_k_ind

                # if using_s_model:
                #     output_boundary = output[debug_left_most_id: i + 1]
                #     if unroll and debug_left_most_id > 0:
                #         new_node_enc, combine_l0 = s_model(output_boundary, inp_hidden=hidden[debug_left_most_id - 1],
                #                                            layer0=True)
                #     else:
                output_boundary = output[debug_left_most_id: i + 1]
                new_node_enc, combine_l0, is_dis = s_model(output_boundary, layer0=True, dis=True)
                if using_rm_model:
                    output_boundary_rm = output_rm[debug_left_most_id: i + 1]
                    new_node_enc_rm, _ = s_model(output_boundary_rm)
                # else:
                #     new_node_enc = output[i] - output[debug_left_most_id]

                propn_topk_value, propn_topk_ind = torch.topk(combine_l0, 1)
                dis_topk_value, dis_topk_ind = torch.topk(is_dis, 1)
                # need to combine nodes in l0

                if dis_topk_ind.data[0] == 1 and propn_topk_ind.data[0] == 1:
                    dis_left_node_l0 = l0_node_list[top_k_ind]
                    dis_left_node_l1 = dis_left_node_l0.parents[0]
                    dis_left_node_l0._incoming = []
                    dis_left_node_l1._outgoing = []
                    terminal_node_in_l1.add(terminal_tag, dis_left_node_l0)

                    # i += 1
                    # continue

                combined = False
                if propn_topk_ind.data[0] == 1 and dis_topk_ind.data[0] == 0 and \
                        debug_left_most_id not in already_in_propn:
                    # check if within the left and right boundary if there is already a node in propn
                    valid_attention = True
                    for j in range(debug_left_most_id, i + 1):
                        if j in already_in_propn:
                            valid_attention = False

                    if valid_attention:
                        combine_list = []
                        while True:
                            item_node = l1_node_list.pop()
                            l1_node_to_l0_idx = get_left_most_id(item_node)
                            itemid = item_node.ID
                            pid = parent_node.ID
                            combine_list.append(item_node)
                            if l1_node_to_l0_idx == debug_left_most_id:
                                break

                        # make sure not to attend to a node with parents
                        for ck_node in combine_list:
                            # ck_node can be a combined node
                            ck_node_l0 = l0_node_list[get_left_most_id(ck_node)]
                            ck_node_l1 = ck_node_l0.parents[0]
                            if len(ck_node_l1.parents) > 0:
                                valid_attention = False
                                break
                        # push back without change
                        if not valid_attention:
                            combined = False
                            # to be consistent with popping, we loop in the reverse order
                            for ck_node in reversed(combine_list):
                                l1_node_list.append(ck_node)
                        else:
                            combined = True
                            l1_position = len(l1._all) + 1
                            ID = "{}{}{}".format("1", core.Node.ID_SEPARATOR, l1_position)
                            terminal_node_in_l1 = FoundationalNode(ID, passage, tag=layer1.NodeTags.Foundational)
                            for l1_node in combine_list:
                                assert len(l1_node.children) == 1, "l1_node has more than 1 children"
                                terminal_node = l1_node.children[0]
                                # remove node_in_l1
                                # cannot use "remove" function
                                # l1_node.remove(terminal_node)
                                terminal_node._incoming = []
                                l1_node._outgoing = []
                                # if remove node from l1 then ID will be a problem
                                # try:
                                #     l1._remove_node(l1_node)
                                # except:
                                #     pass
                                # combine nodes
                                terminal_node_in_l1.add(terminal_tag, terminal_node)
                                already_in_propn.append(get_left_most_id(terminal_node))
                            l1_node_list.append(terminal_node_in_l1)
                            left_most_idx = get_left_most_id(terminal_node_in_l1)
                            node_encoding[terminal_node_in_l1] = new_node_enc
                            ck_node_encoding[terminal_node_in_l1] = [debug_left_most_id, i]

                # # remote node to a node to the right of the parent
                # if i in rm_to_add:
                #     for remote_pred in rm_to_add[i]:
                #         rm_parent, rm_label = remote_pred
                #         rm_parent.add(rm_label, terminal_node_in_l1, edge_attrib={'remote': True})
                        
                if not combined:
                    children = []
                    new_node_position = len(l1._all) + 1
                    new_node_ID = "{}{}{}".format("1", core.Node.ID_SEPARATOR, new_node_position)
                    new_node = FoundationalNode(new_node_ID, passage, tag=layer1.NodeTags.Foundational)
                    while True:
                        item_node = l1_node_list.pop()
                        itemid = item_node.ID
                        pid = parent_node.ID
                        children.append(item_node)
                        if item_node.ID == parent_node.ID:
                            for child in children:
                                child_enc = node_encoding[child]
                                ck_child_enc = ck_node_encoding[child]
                                label_weight = label_model(new_node_enc, child_enc)

                                # restrict predicting "H" label
                                label_top_k_value, label_top_k_ind = torch.topk(label_weight, 1)
                                # label_top_k_values, label_top_k_inds = torch.topk(label_weight, 2)
                                # label_top_k_ind = label_top_k_inds[0][0]
                                # if label_top_k_ind == label2index["H"]:
                                #     if not (debug_left_most_id == 0 and i == len(ori_sent) - 1):
                                #         label_top_k_ind = label_top_k_inds[0][1]
                                #     else:
                                #         predicted_scene = True

                                pred_label = labels[label_top_k_ind]
                                new_node.add(pred_label, child)

                            # predict remote edge
                            if using_rm_model:
                                rm_weight = rm_model(new_node_enc_rm, output_2d_rm, sent_length)
                                rm_top_k_value, rm_top_k_ind = torch.topk(rm_weight, 1)
                                if rm_top_k_ind < get_left_most_id(new_node):
                                    rm_pred_label = "A"
                                    new_node.add(rm_pred_label, get_primary_parent(l0_node_list[rm_top_k_ind]),
                                                 edge_attrib={'remote': True})
                                elif rm_top_k_ind > get_right_most_id(new_node):
                                    rm_pred_label = "A"
                                    # new_node.add(rm_pred_label, get_primary_parent(l0_node_list[rm_top_k_ind]),
                                    #              edge_attrib={'remote': True})
                                    rm_to_add[rm_top_k_ind.data.cpu().numpy()[0][0]].append((new_node, rm_pred_label))

                            l1_node_list.append(new_node)
                            node_encoding[new_node] = new_node_enc
                            ck_node_encoding[new_node] = [debug_left_most_id, i]
                            break
                    left_most_idx = get_left_most_id(new_node)

                    if left_most_idx > top_k_ind:
                        left_most_idx = top_k_ind

        # recursive call to see if need to create new node
        for r in range(1, max_recur + 1):
            if using_s_model:
                output_boundary = output[left_most_idx: i + 1]
                if left_most_idx >= i + 1:
                    print("ERROR:")
                    print("Combined?")
                    print(combined)
                    print("left_most_idx")
                    print(left_most_idx)
                    print("i")
                    print(i)
                if unroll and left_most_idx > 0:
                    new_node_output, combine_l0 = s_model(output_boundary, inp_hidden=hidden[left_most_idx - 1])
                else:
                    new_node_output, combine_l0 = s_model(output_boundary)
            else:
                new_node_output = output[i] - output[left_most_idx]

            new_node_attn_weight = a_model(new_node_output, output_2d, i)
            r_top_k_value, r_top_k_ind = torch.topk(new_node_attn_weight, 1)

            # predict out of boundary
            if r_top_k_ind > i:
                break
            # attend to the new node itself
            elif left_most_idx <= r_top_k_ind <= i:
                break
            # create new node
            else:
                r_top_k_node = l0_node_list[r_top_k_ind]
                r_parent_node = get_parent_node(r_top_k_node)
                new_node_position = len(l1._all) + 1
                new_node_ID = "{}{}{}".format("1", core.Node.ID_SEPARATOR, new_node_position)
                new_node = FoundationalNode(new_node_ID, passage, tag=layer1.NodeTags.Foundational)
                """TODO: same as before. check this. not sure if it should be the left most child or top_k_ind"""
                debug_left_most_id = get_left_most_id(r_parent_node)

                if debug_left_most_id > r_top_k_ind:
                    debug_left_most_id = r_top_k_ind

                if using_s_model:
                    output_boundary = output[debug_left_most_id: i + 1]
                    if unroll and debug_left_most_id > 0:
                        r_new_node_enc, combine_l0 = s_model(output_boundary, inp_hidden=hidden[debug_left_most_id - 1])
                    else:
                        r_new_node_enc, combine_l0 = s_model(output_boundary)

                        if using_rm_model:
                            output_boundary_rm = output_rm[debug_left_most_id: i + 1]
                            r_new_node_enc_rm, _ = s_model(output_boundary_rm)
                else:
                    r_new_node_enc = output[i] - output[debug_left_most_id]

                # r_new_node_enc = output[i] - output[get_left_most_id(r_parent_node)]
                children = []
                while True:
                    item_node = l1_node_list.pop()
                    children.append(item_node)
                    if item_node.ID == r_parent_node.ID:
                        for child in children:
                            child_enc = node_encoding[child]
                            ck_child_enc = ck_node_encoding[child]
                            label_weight = label_model(r_new_node_enc, child_enc)

                            # restrict predicting "H" label
                            label_top_k_value, label_top_k_ind = torch.topk(label_weight, 1)
                            # label_top_k_values, label_top_k_inds = torch.topk(label_weight, 2)
                            # label_top_k_ind = label_top_k_inds[0][0]
                            # if label_top_k_ind == label2index["H"]:
                            #     if not (debug_left_most_id == 0 and i == len(ori_sent) - 1):
                            #         label_top_k_ind = label_top_k_inds[0][1]
                            #     else:
                            #         predicted_scene = True

                            pred_label = labels[label_top_k_ind]
                            new_node.add(pred_label, child)

                        # predict remote edge
                        if using_rm_model:
                            rm_weight = rm_model(r_new_node_enc_rm, output_2d_rm, sent_length)
                            rm_top_k_value, rm_top_k_ind = torch.topk(rm_weight, 1)
                            if rm_top_k_ind < get_left_most_id(new_node):
                                rm_pred_label = "A"
                                new_node.add(rm_pred_label, get_primary_parent(l0_node_list[rm_top_k_ind]),
                                             edge_attrib={'remote': True})
                            elif rm_top_k_ind > get_right_most_id(new_node):
                                rm_pred_label = "A"
                                # new_node.add(rm_pred_label, get_primary_parent(l0_node_list[rm_top_k_ind]),
                                #              edge_attrib={'remote': True})
                                rm_to_add[rm_top_k_ind.data.cpu().numpy()[0][0]].append((new_node, rm_pred_label))

                        l1_node_list.append(new_node)
                        """WARNING: seems this is wrong. changed"""
                        # node_encoding[new_node] = output[i] - r_new_node_enc
                        node_encoding[new_node] = r_new_node_enc
                        ck_node_encoding[new_node] = [debug_left_most_id, i]
                        break
                left_most_idx = get_left_most_id(new_node)

        i += 1

    # # check if Node(1.1) is empty
    # if not predicted_scene:
    #     head_node = l1.heads[0]
    #     head_node_enc = output[-1] - output[0]
    #     for node in l1_node_list:
    #         # print(node.get_terminals())
    #         current_node_encoding = node_encoding[node]
    #         label_weight = label_model(head_node_enc, current_node_encoding)
    #         label_top_k_value, label_top_k_ind = torch.topk(label_weight, 1)
    #         pred_label = labels[label_top_k_ind]
    #         head_node.add(pred_label, node)

    # passage = clean_nodes(passage)

    # print(passage.ID)
    # ioutil.write_passage(passage, outdir="pred_test/")

    return passage
Exemple #22
0
def n_evaluate(sent_tensor, model, attn, ori_sent, dev_passage, pos,
               pos_tensor):
    """
    predict a passage
    :param sent_tensor:
    :param model:
    :param attn:
    :param ori_sent:
    :param dev_passage:
    :param pos:
    :return:
    """

    # print("original sent")
    # print(ori_sent)

    create_by_leftmost = True

    max_recur = 5
    i = 0
    k = 0
    l1_node_list = []
    l0_node_list = []

    output, hidden = model(sent_tensor, pos_tensor)

    # initialize passage
    passageID = dev_passage.ID
    passage = core.Passage(passageID)
    l0 = layer0.Layer0(root=passage)
    l1 = layer1.Layer1(passage)

    while i < len(ori_sent):
        terminal_token = ori_sent[i]
        pos_tag = pos[i]

        # proper nouns (only use when there are more than one consecutive PROPNs
        if pos_tag == "PROPN" and i + 1 < len(ori_sent) and (pos[i + 1] == "PROPN" or pos[i + 1] == "NUM") \
                or (pos_tag == "DET" and i + 1 < len(ori_sent) and pos[i + 1] == "PROPN"):

            left_most_idx = i
            output_i = output[i]
            combine_list = []

            # For cases like "April(PROPN) 30(NUM) ,(PUNCT) 2008(NUM)"
            if i + 3 < len(ori_sent) and pos[i + 1] == "NUM" and pos[
                    i + 2] == "PUNCT" and pos[i + 3] == "NUM":
                for _ in range(4):
                    # create terminal node in l0
                    terminal_token = ori_sent[i]
                    is_punc = terminal_token in punc
                    terminal_node = l0.add_terminal(terminal_token, is_punc)
                    l0_node_list.append(terminal_node)
                    combine_list.append(terminal_node)
                    i += 1

            # elif pos_tag == "PROPN":
            #     while True:
            #         if pos[i] != "PROPN":
            #             break
            #         # create terminal node in l0
            #         terminal_token = ori_sent[i]
            #         is_punc = terminal_token in punc
            #         terminal_node = l0.add_terminal(terminal_token, is_punc)
            #         l0_node_list.append(terminal_node)
            #         combine_list.append(terminal_node)
            #         i += 1
            # else:
            #     # for cases like "The Bahamas"
            #     while True:
            #         # create terminal node in l0
            #         terminal_token = ori_sent[i]
            #         is_punc = terminal_token in punc
            #         terminal_node = l0.add_terminal(terminal_token, is_punc)
            #         l0_node_list.append(terminal_node)
            #         combine_list.append(terminal_node)
            #         i += 1
            #         if pos[i] != "PROPN":
            #             break

            # including cases like "The Bahamas"
            else:
                while True:
                    # create terminal node in l0
                    terminal_token = ori_sent[i]
                    is_punc = terminal_token in punc
                    terminal_node = l0.add_terminal(terminal_token, is_punc)
                    l0_node_list.append(terminal_node)
                    combine_list.append(terminal_node)
                    i += 1

                    if i >= len(ori_sent):
                        break
                    # for cases like "Lara Croft: Tomb Raider"
                    if ori_sent[i] == ":" and i + 1 < len(pos) and pos[
                            i + 1] == "PROPN":
                        continue
                    elif pos[i] != "PROPN":
                        break

            # combine the nodes in combine_list to one node in l1
            l1_position = len(l1._all) + 1
            ID = "{}{}{}".format("1", core.Node.ID_SEPARATOR, l1_position)
            terminal_node_in_l1 = FoundationalNode(
                ID, passage, tag=layer1.NodeTags.Foundational)
            for terminal_node in combine_list:
                terminal_node_in_l1.add(terminal_tag, terminal_node)
            l1_node_list.append(terminal_node_in_l1)

            i -= 1

        else:
            # create terminal node in l0
            is_punc = terminal_token in punc
            terminal_node = l0.add_terminal(terminal_token, is_punc)
            l0_node_list.append(terminal_node)

            l1_position = len(l1._all) + 1
            ID = "{}{}{}".format("1", core.Node.ID_SEPARATOR, l1_position)
            terminal_node_in_l1 = FoundationalNode(
                ID,
                passage,
                tag=layer1.NodeTags.Punctuation
                if is_punc else layer1.NodeTags.Foundational)
            terminal_node_in_l1.add(terminal_tag, terminal_node)
            l1_node_list.append(terminal_node_in_l1)

            output_i = output[i]
            attn_i = attn(output_i)
            top_k_value, top_k_ind = torch.topk(attn_i, 1)

            # for debugging
            tki = top_k_ind.data[0][0]

            # attend to the current terminal itself
            if top_k_ind.data[0] >= i:
                i += 1
                continue
            else:
                top_k_node = l0_node_list[top_k_ind]
                parent_node = get_parent_node(top_k_node)
                new_node_position = len(l1._all) + 1
                new_node_ID = "{}{}{}".format("1", core.Node.ID_SEPARATOR,
                                              new_node_position)
                new_node = FoundationalNode(new_node_ID,
                                            passage,
                                            tag=layer1.NodeTags.Foundational)
                children = []
                while True:
                    item_node = l1_node_list.pop()
                    itemid = item_node.ID
                    pid = parent_node.ID
                    children.append(item_node)
                    if item_node.ID == parent_node.ID:
                        for child in children:
                            new_node.add(str(k), child)
                            k += 1
                        l1_node_list.append(new_node)
                        break
                left_most_idx = get_left_most_id(new_node)

        # recursive call to see if need to create new node
        for r in range(1, max_recur + 1):
            new_node_output = output_i - output[left_most_idx]
            new_node_attn_weight = attn(new_node_output)
            r_top_k_value, r_top_k_ind = torch.topk(new_node_attn_weight, 1)
            #predict out of boundary
            if r_top_k_ind > i:
                break
            # attend to the new node itself
            elif left_most_idx <= r_top_k_ind <= i:
                break
            # create new node
            else:
                r_top_k_node = l0_node_list[r_top_k_ind]
                r_parent_node = get_parent_node(r_top_k_node)
                new_node_position = len(l1._all) + 1
                new_node_ID = "{}{}{}".format("1", core.Node.ID_SEPARATOR,
                                              new_node_position)
                new_node = FoundationalNode(new_node_ID,
                                            passage,
                                            tag=layer1.NodeTags.Foundational)
                children = []
                while True:
                    item_node = l1_node_list.pop()
                    children.append(item_node)
                    if item_node.ID == r_parent_node.ID:
                        for child in children:
                            new_node.add(str(k), child)
                            k += 1
                        l1_node_list.append(new_node)
                        break
                left_most_idx = get_left_most_id(new_node)

        i += 1

        # print(passage)

    # check if Node(1.1) is empty
    head_node = l1.heads[0]
    if len(head_node.get_terminals()) == 0:
        for node in l1_node_list:
            head_node.add(str(k), node)
            k += 1

    return passage
Exemple #23
0
    def create_passage(self, verify=True):
        """
        Create final passage from temporary representation
        :param verify: fail if this results in an improper passage
        :return: core.Passage created from self.nodes
        """
        passage = core.Passage(self.passage.ID)
        l0 = layer0.Layer0(passage)
        terminals = [
            l0.add_terminal(text=terminal.text,
                            punct=terminal.tag == layer0.NodeTags.Punct,
                            paragraph=terminal.paragraph)
            for terminal in self.terminals
        ]
        l1 = layer1.Layer1(passage)
        self.root.node = l1.heads[0]
        self.root.set_node_label()
        if self.labeled:  # We have a reference passage
            self.root.set_node_id()
            self.fix_terminal_tags(terminals)
        remotes = []  # To be handled after all nodes are created
        linkages = []  # To be handled after all non-linkage nodes are created
        self.topological_sort()  # Sort self.nodes
        for node in self.nodes:
            if self.labeled and verify:
                assert node.text or node.outgoing or node.implicit, "Non-terminal leaf node: %s" % node
            if node.is_linkage:
                linkages.append(node)
            else:
                for edge in node.outgoing:
                    if edge.remote:
                        remotes.append((node, edge))
                    else:
                        edge.child.add_to_l1(l1, node, edge.tag, terminals,
                                             self.labeled)

        for node, edge in remotes:  # Add remote edges
            try:
                assert node.node is not None, "Remote edge from nonexistent node"
                assert edge.child.node is not None, "Remote edge to nonexistent node"
                l1.add_remote(node.node, edge.tag, edge.child.node)
            except AssertionError:
                if verify:
                    raise

        for node in linkages:  # Add linkage nodes and edges
            try:
                link_relation = None
                link_args = []
                for edge in node.outgoing:
                    assert edge.child.node is not None, "Linkage edge to nonexistent node"
                    if edge.tag == EdgeTags.LinkRelation:
                        assert link_relation is None, \
                            "Multiple link relations: %s, %s" % (link_relation, edge.child.node)
                        link_relation = edge.child.node
                    elif edge.tag == EdgeTags.LinkArgument:
                        link_args.append(edge.child.node)
                assert link_relation is not None, "No link relations: %s" % node
                # if len(link_args) < 2:
                #     print("Less than two link arguments for linkage %s" % node, file=sys.stderr)
                node.node = l1.add_linkage(link_relation, *link_args)
                if node.node_id:  # We are in training and we have a gold passage
                    node.node.extra["remarks"] = node.node_id  # For reference
            except AssertionError:
                if verify:
                    raise

        return passage