def root2tree(self, start_node=None): root_nodes = self.child_dict[start_node] num_roots = len(root_nodes) if num_roots == 1: return self.dt(start_node=root_nodes[0]) elif num_roots > 1: # An undesired, but common case (at least in the PCC corpus). # This happens if there's one EDU not to connected to the rest # of the tree (e.g. a headline). We will just make all 'root' # nodes part of a multinuc relation called VIRTUAL_ROOT. logging.log( logging.INFO, "File '{}' has {} roots!".format( os.path.basename(self.filepath), num_roots)) root_subtrees = [ n_wrap(self.dt(start_node=root_id), debug=self.debug, root_id=root_id) for root_id in root_nodes ] sorted_subtrees = self.sort_subtrees(*root_subtrees) # assign the root_id of the highest subtree to the virtual root max_height, virtual_root_id = max( (st.height(), st.root_id) for st in sorted_subtrees) return t(VIRTUAL_ROOT, sorted_subtrees, debug=self.debug, root_id=virtual_root_id) else: return t('')
def convert_schema(self, nuc_tuple, inner_sat_tuples, outer_sat_tuples): """subtrees are represented as (tree, linear tree position) tuples. returns relation as root node. """ nuc_tree, nuc_pos = nuc_tuple sat_tuples = inner_sat_tuples + outer_sat_tuples last_sat_tuple_pos = len(sat_tuples) - 1 for i, (sat_tree, sat_pos) in enumerate(sat_tuples): relname = self.get_relname(sat_tree.root_id) if sat_pos < nuc_pos: ordered_trees = [sat_tree, nuc_tree] else: ordered_trees = [nuc_tree, sat_tree] if i == last_sat_tuple_pos: nuc_tree = t(relname, ordered_trees, debug=self.debug, root_id=nuc_tree.root_id) else: nuc_tree = t('N', [(relname, ordered_trees)], debug=self.debug, root_id=nuc_tree.root_id) return nuc_tree
def make_span(parented_tree): """create a 'span' or 'leaf' subtree for dis/lisp/RST-DT-formatted trees. Examples: span (a subtree that covers the leaves 1 to 7) ___|____ 1 7 leaf (a subtree that only covers leaf 7) | 7 """ all_leaves = all_leaf_positions(parented_tree) if is_root(parented_tree): return t('span', ['1', str(len(all_leaves))]) subtree_leaves = subtree_leaf_positions(parented_tree) if len(subtree_leaves) == 1: edu_id = all_leaves.index(subtree_leaves[0]) + 1 return t('leaf', [str(edu_id)]) elif len(subtree_leaves) > 1: first_edu_id = all_leaves.index(subtree_leaves[0]) + 1 last_edu_id = all_leaves.index(subtree_leaves[-1]) + 1 return t('span', [str(first_edu_id), str(last_edu_id)]) else: raise NotImplementedError('Subtree has no leaves')
def test_rs3filewriter_nucsat(): """A DGParentedTree with one nuc-sat relation is correctly converted into an RS3 file and back.""" input_tree = t("circumstance", [ ("S", ["foo"]), ("N", ["bar"])]) expected_output_tree = example2tree("foo-bar-circ-foo-to-bar.rs3") tempfile = NamedTemporaryFile() RS3FileWriter(input_tree, output_filepath=tempfile.name) produced_output_tree = RSTTree(tempfile.name) assert produced_output_tree.edu_strings == produced_output_tree.tree.leaves() == ['foo', 'bar'] assert input_tree == expected_output_tree.tree == produced_output_tree.tree input_tree = t("circumstance", [ ("N", ["foo"]), ("S", ["bar"])]) expected_output_tree = example2tree("foo-bar-circ-bar-to-foo.rs3") tempfile = NamedTemporaryFile() RS3FileWriter(input_tree, output_filepath=tempfile.name) produced_output_tree = RSTTree(tempfile.name) assert produced_output_tree.edu_strings == produced_output_tree.tree.leaves() == ['foo', 'bar'] assert input_tree == expected_output_tree.tree == produced_output_tree.tree
def root2tree(self, start_node=None): root_nodes = self.child_dict[start_node] num_roots = len(root_nodes) if num_roots == 1: return self.dt(start_node=root_nodes[0]) elif num_roots > 1: # An undesired, but common case (at least in the PCC corpus). # This happens if there's one EDU not to connected to the rest # of the tree (e.g. a headline). We will just make all 'root' # nodes part of a multinuc relation called VIRTUAL_ROOT. logging.log(logging.INFO, "File '{}' has {} roots!".format( os.path.basename(self.filepath), num_roots)) root_subtrees = [n_wrap(self.dt(start_node=root_id), debug=self.debug, root_id=root_id) for root_id in root_nodes] sorted_subtrees = self.sort_subtrees(*root_subtrees) # assign the root_id of the highest subtree to the virtual root max_height, virtual_root_id = max((st.height(), st.root_id) for st in sorted_subtrees) return t(VIRTUAL_ROOT, sorted_subtrees, debug=self.debug, root_id=virtual_root_id) else: return t('')
def gen_numbered_nucsat(first_element, number): expected_elems = ('N', 'S') assert first_element in expected_elems nuc = ('N', ['nuc']) sat = ('S', ['sat-{}'.format(number)]) if first_element == 'N': return t('nuc-sat-{}'.format(number), [nuc, sat]) else: return t('sat-nuc-{}'.format(number), [sat, nuc])
def sorted_nucsat_tree(self, nuc_tree, sat_tree): sorted_subtrees = self.sort_subtrees(nuc_tree, sat_tree) relname = self.get_relname(sat_tree.root_id) return t(relname, sorted_subtrees, debug=self.debug, root_id=nuc_tree.root_id)
def test_nucsat(): """A single nucleus-satellite relation is converted into rst.sty format.""" sat_before_nuc = \ t('circumstance', [ ('S', ['sat first']), ('N', ['nuc second']) ]) result = dg.write_rstlatex(sat_before_nuc) assert result.rstlatextree == u'\\dirrel\n\t{circumstance}{\\rstsegment{sat first}}\n\t{}{\\rstsegment{nuc second}}' nuc_before_sat = \ t('circumstance', [ ('N', ['nuc first']), ('S', ['sat second']) ]) result = dg.write_rstlatex(nuc_before_sat) assert result.rstlatextree == u'\\dirrel\n\t{}{\\rstsegment{nuc first}}\n\t{circumstance}{\\rstsegment{sat second}}'
def convert(parented_tree): if is_root(parented_tree): span_description = make_span(parented_tree) children = [span_description] for subtree in get_nucsat_subtrees(parented_tree): children.append(convert(subtree)) orphaned_children = [orphanize(child) for child in children] return t('Root', orphaned_children) elif is_leaf(parented_tree): return make_edu(parented_tree) else: span_description = make_span(parented_tree) rel_description = make_rel2par(parented_tree) children = [span_description, rel_description] for subtree in get_nucsat_subtrees(parented_tree): children.append(convert(subtree)) tree_label = convert_label(parented_tree.label()) orphaned_children = [orphanize(child) for child in children] return t(tree_label, orphaned_children)
def test_rs3filewriter_emptytree(): """An empty DGParentedTree is converted into an empty RS3 file and back.""" input_tree = t("", []) expected_output_tree = example2tree("empty.rs3") tempfile = NamedTemporaryFile() RS3FileWriter(input_tree, output_filepath=tempfile.name) produced_output_tree = RSTTree(tempfile.name) assert produced_output_tree.edu_strings == produced_output_tree.tree.leaves() == [] assert input_tree == expected_output_tree.tree == produced_output_tree.tree
def test_rs3filewriter_onesegmenttree(): """A DGParentedTree with only one segment is correctly converted into an RS3 file and back.""" input_tree = t("N", ["foo"]) expected_output_tree = example2tree('only-one-segment.rs3') tempfile = NamedTemporaryFile() RS3FileWriter(input_tree, output_filepath=tempfile.name) produced_output_tree = RSTTree(tempfile.name) assert produced_output_tree.edu_strings == produced_output_tree.tree.leaves() == ['foo'] assert input_tree == expected_output_tree.tree == produced_output_tree.tree
def test_multinuc(): """A multinuclear relation is converted into rst.sty format.""" contrast = \ t('contrast', [ ('N', ['nuc-1']), ('N', ['nuc-2']) ]) result = dg.write_rstlatex(contrast) assert result.rstlatextree == u'\\multirel{contrast}\n\t{\\rstsegment{nuc-1}}\n\t{\\rstsegment{nuc-2}}' joint = \ t('joint', [ ('N', ['nuc-1']), ('N', ['nuc-2']), ('N', ['nuc-3']) ]) result = dg.write_rstlatex(joint) assert result.rstlatextree == u'\\multirel{joint}\n\t{\\rstsegment{nuc-1}}\n\t{\\rstsegment{nuc-2}}\n\t{\\rstsegment{nuc-3}}'
def convert_schema(self, nuc_tuple, inner_sat_tuples, outer_sat_tuples): """subtrees are represented as (tree, linear tree position) tuples. returns relation as root node. """ nuc_tree, nuc_pos = nuc_tuple sat_tuples = inner_sat_tuples + outer_sat_tuples last_sat_tuple_pos = len(sat_tuples)-1 for i, (sat_tree, sat_pos) in enumerate(sat_tuples): relname = self.get_relname(sat_tree.root_id) if sat_pos < nuc_pos: ordered_trees = [sat_tree, nuc_tree] else: ordered_trees = [nuc_tree, sat_tree] if i == last_sat_tuple_pos: nuc_tree = t(relname, ordered_trees, debug=self.debug, root_id=nuc_tree.root_id) else: nuc_tree = t('N', [(relname, ordered_trees)], debug=self.debug, root_id=nuc_tree.root_id) return nuc_tree
def make_rel2par(nuc_or_sat_subtree): if is_root(nuc_or_sat_subtree): raise ValueError("Root node can't have a relation.") subtree_root_label = nuc_or_sat_subtree.label() parent_label = nuc_or_sat_subtree.parent().label() if subtree_root_label == 'S': return t('rel2par', [parent_label]) elif subtree_root_label == 'N': siblings = get_siblings(nuc_or_sat_subtree) root = nuc_or_sat_subtree.root() sibling_labels = [root[sib].label() for sib in siblings] if len(siblings) == 1 and sibling_labels[0] == 'S': return t('rel2par', ['span']) elif all([label == 'N' for label in sibling_labels]): return t('rel2par', [parent_label]) else: raise ValueError( "Can't mix sibling types. Expected 'N' or 'S', got: {}".format(sibling_labels)) else: raise ValueError( "Unknown nuclearity. Expected 'N' or 'S', got: {}".format(subtree_root_label))
def dis2tree(dis_tree, wrap_tree=False): assert get_tree_type(dis_tree) in SUBTREE_TYPES, "tree_type: {}".format( get_tree_type(dis_tree)) if get_node_type(dis_tree) == 'leaf': return leaf2tree(dis_tree) if is_root(dis_tree): children = dis_tree[1:] else: children = dis_tree[2:] child_types = get_child_types(children) if len(child_types) == 1: # this is a multinuc relation assert NUC in child_types, "child_types: {}".format(child_types) assert len(child_types[NUC]) > 1, "len: {}".format( len(child_types[NUC])) subtrees = [ dis2tree(children[child_id], wrap_tree=True) for child_id in child_types[NUC] ] # all subtrees of a multinuc have the same relation, so we can just read it from the first one reltype = get_relation_type(children[0]) else: # this is a nucleus-satellite relation assert len(child_types) == 2, "child_types: {}".format(child_types) assert NUC in child_types and SAT in child_types, "child_types: {}".format( child_types) assert len(child_types[NUC]) == 1 and len(child_types[SAT]) == 1, \ "child_types: {}".format(child_types) nuc_child_id = child_types[NUC][0] nuc_subtree = dis2tree(children[nuc_child_id], wrap_tree=True) sat_child_id = child_types[SAT][0] sat_child = children[sat_child_id] sat_subtree = dis2tree(sat_child, wrap_tree=True) # determine order of subtrees if nuc_child_id < sat_child_id: subtrees = [nuc_subtree, sat_subtree] else: subtrees = [sat_subtree, nuc_subtree] # the relation type is only stored in the satellite reltype = get_relation_type(sat_child) rst_tree = t(reltype, subtrees) return get_wrapped_tree(dis_tree, rst_tree, wrap_tree=wrap_tree)
def test_t(): assert t("", []) == DGParentedTree("", []) assert t("") == DGParentedTree("", []) assert t("foo", []) == DGParentedTree("foo", []) assert t("foo") == DGParentedTree("foo", []) assert t("foo", ["bar"]) == DGParentedTree("foo", ["bar"]) assert t("foo", ["bar", "baz"]) == DGParentedTree("foo", ["bar", "baz"])
def test_writetofile(): """A single nucleus-satellite relation is converted into rst.sty format and written to a file. """ sat_before_nuc = \ t('circumstance', [ ('S', ['sat first']), ('N', ['nuc second']) ]) tempfile = NamedTemporaryFile() dg.write_rstlatex(sat_before_nuc, tempfile.name) with open(tempfile.name, 'r') as rstlatex_file: assert rstlatex_file.read() == u'\\dirrel\n\t{circumstance}{\\rstsegment{sat first}}\n\t{}{\\rstsegment{nuc second}}\n'
def test_rs3filewriter_onesegmenttree_umlauts(): """A DGParentedTree with only one segment with umlauts is correctly converted into an RS3 file and back. """ edu_string = u"Über sein östliches Äußeres" input_tree = t("N", [edu_string]) expected_output_tree = example2tree('only-one-segment-with-umlauts.rs3') tempfile = NamedTemporaryFile() RS3FileWriter(input_tree, output_filepath=tempfile.name) produced_output_tree = RSTTree(tempfile.name) assert expected_output_tree.edu_strings == \ produced_output_tree.edu_strings == \ produced_output_tree.tree.leaves() == [edu_string] assert input_tree == expected_output_tree.tree == produced_output_tree.tree
def test_rs3filewriter_nested(): """A DGParentedTree with a multinuc relation nested in a nuc-sat relation is correctly converted into an RS3 file and back.""" input_tree = t('elaboration', [ ('N', ['eins']), ('S', [ ('joint', [ ('N', ['zwei']), ('N', ['drei'])])])]) expected_output_tree = example2tree('eins-zwei-drei-(elab-eins-from-(joint-zwei-and-drei).rs3') tempfile = NamedTemporaryFile() RS3FileWriter(input_tree, output_filepath=tempfile.name) produced_output_tree = RSTTree(tempfile.name) assert produced_output_tree.edu_strings == produced_output_tree.tree.leaves() == ['eins', 'zwei', 'drei'] assert input_tree == expected_output_tree.tree == produced_output_tree.tree
def segment2tree(self, elem_id, elem, elem_type, start_node=None): if elem['reltype'] == 'rst': # this elem is the S in an N-S relation root_label = 'S' else: root_label = 'N' tree = t(root_label, [elem['text']], debug=self.debug, root_id=elem_id) if elem_id not in self.child_dict: # this might be a root segment without any children # (e.g. a headline in PCC) or the only segment in a span # (which makes no sense in RST) if elem.get('reltype') in ('span', '', None): if elem['nuclearity'] != 'root': logging.log( logging.INFO, "Segment '{}' in file '{}' is a non-root nucleus without children" .format(elem_id, os.path.basename(self.filepath))) if elem.get('relname') == 'span': parent_elem = self.elem_dict.get(elem.get('parent')) if parent_elem: elem['relname'] = parent_elem.get('relname') return tree if len(self.child_dict[elem_id]) == 1: # this segment is (also) the N in an N-S relation sat_id = self.child_dict[elem_id][0] sat_subtree = self.dt(start_node=sat_id) return self.sorted_nucsat_tree(tree, sat_subtree) elif len(self.child_dict[elem_id]) >= 2: # this segment is (also) the N in an RST schema, # as such it must only have satellites as children assert all([ self.elem_dict[child_id]['nuclearity'] == 'satellite' for child_id in self.child_dict[elem_id] ]) sat_subtrees = [ self.dt(start_node=child_id) for child_id in self.child_dict[elem_id] ] return self.order_schema(tree, sat_subtrees)
def dis2tree(dis_tree, wrap_tree=False): assert get_tree_type(dis_tree) in SUBTREE_TYPES, "tree_type: {}".format(get_tree_type(dis_tree)) if get_node_type(dis_tree) == 'leaf': return leaf2tree(dis_tree) if is_root(dis_tree): children = dis_tree[1:] else: children = dis_tree[2:] child_types = get_child_types(children) if len(child_types) == 1: # this is a multinuc relation assert NUC in child_types, "child_types: {}".format(child_types) assert len(child_types[NUC]) > 1, "len: {}".format(len(child_types[NUC])) subtrees = [dis2tree(children[child_id], wrap_tree=True) for child_id in child_types[NUC]] # all subtrees of a multinuc have the same relation, so we can just read it from the first one reltype = get_relation_type(children[0]) else: # this is a nucleus-satellite relation assert len(child_types) == 2, "child_types: {}".format(child_types) assert NUC in child_types and SAT in child_types, "child_types: {}".format(child_types) assert len(child_types[NUC]) == 1 and len(child_types[SAT]) == 1, \ "child_types: {}".format(child_types) nuc_child_id = child_types[NUC][0] nuc_subtree = dis2tree(children[nuc_child_id], wrap_tree=True) sat_child_id = child_types[SAT][0] sat_child = children[sat_child_id] sat_subtree = dis2tree(sat_child, wrap_tree=True) # determine order of subtrees if nuc_child_id < sat_child_id: subtrees = [nuc_subtree, sat_subtree] else: subtrees = [sat_subtree, nuc_subtree] # the relation type is only stored in the satellite reltype = get_relation_type(sat_child) rst_tree = t(reltype, subtrees) return get_wrapped_tree(dis_tree, rst_tree, wrap_tree=wrap_tree)
def segment2tree(self, elem_id, elem, elem_type, start_node=None): if elem['reltype'] == 'rst': # this elem is the S in an N-S relation root_label = 'S' else: root_label = 'N' tree = t(root_label, [elem['text']], debug=self.debug, root_id=elem_id) if elem_id not in self.child_dict: # this might be a root segment without any children # (e.g. a headline in PCC) or the only segment in a span # (which makes no sense in RST) if elem.get('reltype') in ('span', '', None): if elem['nuclearity'] != 'root': logging.log( logging.INFO, "Segment '{}' in file '{}' is a non-root nucleus without children".format( elem_id, os.path.basename(self.filepath))) if elem.get('relname') == 'span': parent_elem = self.elem_dict.get(elem.get('parent')) if parent_elem: elem['relname'] = parent_elem.get('relname') return tree if len(self.child_dict[elem_id]) == 1: # this segment is (also) the N in an N-S relation sat_id = self.child_dict[elem_id][0] sat_subtree = self.dt(start_node=sat_id) return self.sorted_nucsat_tree(tree, sat_subtree) elif len(self.child_dict[elem_id]) >= 2: # this segment is (also) the N in an RST schema, # as such it must only have satellites as children assert all([self.elem_dict[child_id]['nuclearity'] == 'satellite' for child_id in self.child_dict[elem_id]]) sat_subtrees = [self.dt(start_node=child_id) for child_id in self.child_dict[elem_id]] return self.order_schema(tree, sat_subtrees)
def s_wrap(tree, debug=False, root_id=None): """Ensure the given tree has a nucleus as its root. If the root of the tree is a satellite, return it. If the root of the tree is a nucleus, replace the nucleus with a satellite and return the tree. If the root of the tree is a relation, place a satellite on top and return the tree. """ root_label = tree.label() expected_n_root = debug_root_label('N', debug, tree.root_id) expected_s_root = debug_root_label('S', debug, tree.root_id) if root_label == expected_s_root: return tree elif root_label == expected_n_root: tree.set_label(expected_s_root) return tree else: return t('S', [tree], debug=debug, root_id=root_id)
def test_rs3filewriter_pcc_10575(): """PCC rs3 file 10575 can be converted rs3 -> dgtree -> rs3' -> dgtree', without information loss between dgtree and dgtree'. """ input_tree = t('interpretation', [ ('N', [ ('circumstance', [ ('S', ['eins']), ('N', [ ('contrast', [ ('N', ['zwei']), ('N', [ ('cause', [ ('N', ['drei']), ('S', ['vier'])])])])])])]), ('S', ['fuenf'])]) expected_output_tree = example2tree('maz-10575-excerpt.rs3') tempfile = NamedTemporaryFile() RS3FileWriter(input_tree, output_filepath=tempfile.name) produced_output_tree = RSTTree(tempfile.name) assert produced_output_tree.edu_strings == produced_output_tree.tree.leaves() == ['eins', 'zwei', 'drei', 'vier', 'fuenf'] assert input_tree == expected_output_tree.tree == produced_output_tree.tree
def make_edu(edu_string): tokens = edu_string.split() tokens[0] = u'_!' + tokens[0] tokens[-1] = tokens[-1] + u'_!' return t('text', tokens)
def group2tree(self, elem_id, elem, elem_type, start_node=None): reltype = elem.get('reltype') root_wrap = s_wrap if reltype == 'rst' else n_wrap # rst: this elem is the S in an N-S relation # multinuc: this elem is one of several Ns in a multinuc relation if reltype in ('rst', 'multinuc'): if len(self.child_dict[elem_id]) == 1: # this group is the root of another N-S relation subtree_id = self.child_dict[elem_id][0] subtree = self.dt(start_node=subtree_id) else: subtrees = [ self.elem_wrap(self.dt(start_node=c), debug=self.debug, root_id=c) for c in self.child_dict[elem_id] ] sorted_subtrees = self.sort_subtrees(*subtrees) first_child_id = self.child_dict[elem_id][0] subtrees_relname = self.get_relname(first_child_id) subtree = t(subtrees_relname, sorted_subtrees, debug=self.debug, root_id=elem_id) return root_wrap(subtree, debug=self.debug, root_id=elem_id) else: assert reltype in ('', None, 'span'), \ "Unexpected combination: elem_type '%s' and reltype '%s'" \ % (elem_type, elem['reltype']) # this elem is the N in an N-S relation if elem['group_type'] == 'multinuc': # this elem is also the 'root node' of a multinuc relation child_ids = self.child_dict[elem_id] multinuc_child_ids = [ c for c in child_ids if self.elem_dict[c]['reltype'] == 'multinuc' ] multinuc_relname = self.get_relname(multinuc_child_ids[0]) multinuc_elements = [ self.dt(start_node=mc) for mc in multinuc_child_ids ] sorted_subtrees = self.sort_subtrees(*multinuc_elements) multinuc_subtree = t(multinuc_relname, [sorted_subtrees], debug=self.debug, root_id=elem_id) other_child_ids = [ c for c in child_ids if c not in multinuc_child_ids ] if other_child_ids: # this element is the N in an S-N-S schema nuc_tree = t('N', multinuc_subtree, debug=self.debug, root_id=elem_id) assert all([ self.elem_dict[child_id]['nuclearity'] == 'satellite' for child_id in other_child_ids ]) sat_subtrees = [ self.dt(start_node=child_id) for child_id in other_child_ids ] return self.order_schema(nuc_tree, sat_subtrees) else: # this elem is only the head of a multinuc relation # TODO: does this make sense / is this ever reached? return multinuc_subtree else: #~ assert elem['group_type'] == 'span', \ #~ "Unexpected group_type '%s'" % elem['group_type'] if len(self.child_dict[elem_id]) == 1: # this span at the top of a tree was only added for visual purposes child_id = self.child_dict[elem_id][0] return self.dt(start_node=child_id) elif len(self.child_dict[elem_id]) == 2: # this elem is the N of an N-S relation (child: S), but is also # a span over another relation (child: N) children = {} for child_id in self.child_dict[elem_id]: children[self.elem_dict[child_id] ['nuclearity']] = child_id sat_id = children['satellite'] sat_subtree = self.dt(start_node=sat_id) nuc_subtree = self.dt(start_node=children['nucleus']) nuc_tree = n_wrap(nuc_subtree, debug=self.debug, root_id=elem_id) return self.sorted_nucsat_tree(nuc_tree, sat_subtree) elif len(self.child_dict[elem_id]) > 2: children = defaultdict(list) for child_id in self.child_dict[elem_id]: children[self.elem_dict[child_id] ['nuclearity']].append(child_id) assert len(children['nucleus']) == 1 nuc_subtree = self.dt(start_node=children['nucleus'][0]) nuc_tree = t('N', nuc_subtree, debug=self.debug, root_id=elem_id) sat_subtrees = [ self.dt(start_node=sat_child_id) for sat_child_id in children['satellite'] ] return self.order_schema(nuc_tree, sat_subtrees) else: #len(child_dict[elem_id]) == 0 raise TooFewChildrenError( "A span group ('%s)' should have at least 1 child: %s" \ % (elem_id, self.child_dict[elem_id]))
def test_multisat(): """A set of relations sharing the same nucleus is converted into rst.sty format.""" # S-N-S sat_nuc_sat = t(MULTISAT_RELNAME, [ ('S', gen_numbered_nucsat('S', 1)), ('S', gen_numbered_nucsat('N', 1)) ]) result = dg.write_rstlatex(sat_nuc_sat) assert result.rstlatextree == u'\\dirrel\n\t{sat-nuc-1}{\\rstsegment{sat-1}}\n\t{}{\\rstsegment{nuc}}\n\t{nuc-sat-1}{\\rstsegment{sat-1}}' # S-S-N sat_sat_nuc = t(MULTISAT_RELNAME, [ ('S', gen_numbered_nucsat('S', 1)), ('S', gen_numbered_nucsat('S', 2)) ]) result = dg.write_rstlatex(sat_sat_nuc) assert result.rstlatextree == u'\\dirrel\n\t{sat-nuc-1}{\\rstsegment{sat-1}}\n\t{sat-nuc-2}{\\rstsegment{sat-2}}\n\t{}{\\rstsegment{nuc}}' # N-S-S nuc_sat_sat = t(MULTISAT_RELNAME, [ ('S', gen_numbered_nucsat('N', 1)), ('S', gen_numbered_nucsat('N', 2)) ]) result = dg.write_rstlatex(nuc_sat_sat) assert result.rstlatextree == u'\\dirrel\n\t{}{\\rstsegment{nuc}}\n\t{nuc-sat-1}{\\rstsegment{sat-1}}\n\t{nuc-sat-2}{\\rstsegment{sat-2}}' # S-N-S-S sat_nuc_sat_sat = t(MULTISAT_RELNAME, [ ('S', gen_numbered_nucsat('S', 1)), ('S', gen_numbered_nucsat('N', 1)), ('S', gen_numbered_nucsat('N', 2)) ]) result = dg.write_rstlatex(sat_nuc_sat_sat) assert result.rstlatextree == u'\\dirrel\n\t{sat-nuc-1}{\\rstsegment{sat-1}}\n\t{}{\\rstsegment{nuc}}\n\t{nuc-sat-1}{\\rstsegment{sat-1}}\n\t{nuc-sat-2}{\\rstsegment{sat-2}}' # S-S-N-S sat_sat_nuc_sat = t(MULTISAT_RELNAME, [ ('S', gen_numbered_nucsat('S', 1)), ('S', gen_numbered_nucsat('S', 2)), ('S', gen_numbered_nucsat('N', 1)) ]) result = dg.write_rstlatex(sat_sat_nuc_sat) assert result.rstlatextree == u'\\dirrel\n\t{sat-nuc-1}{\\rstsegment{sat-1}}\n\t{sat-nuc-2}{\\rstsegment{sat-2}}\n\t{}{\\rstsegment{nuc}}\n\t{nuc-sat-1}{\\rstsegment{sat-1}}' # S-S-S-N-S sat_sat_sat_nuc_sat = t(MULTISAT_RELNAME, [ ('S', gen_numbered_nucsat('S', 1)), ('S', gen_numbered_nucsat('S', 2)), ('S', gen_numbered_nucsat('S', 3)), ('S', gen_numbered_nucsat('N', 1)) ]) result = dg.write_rstlatex(sat_sat_sat_nuc_sat) assert result.rstlatextree == u'\\dirrel\n\t{sat-nuc-1}{\\rstsegment{sat-1}}\n\t{sat-nuc-2}{\\rstsegment{sat-2}}\n\t{sat-nuc-3}{\\rstsegment{sat-3}}\n\t{}{\\rstsegment{nuc}}\n\t{nuc-sat-1}{\\rstsegment{sat-1}}' # S-N-S-S-S sat_nuc_sat_sat_sat = t(MULTISAT_RELNAME, [ ('S', gen_numbered_nucsat('S', 1)), ('S', gen_numbered_nucsat('N', 1)), ('S', gen_numbered_nucsat('N', 2)), ('S', gen_numbered_nucsat('N', 3)) ]) result = dg.write_rstlatex(sat_nuc_sat_sat_sat) assert result.rstlatextree == u'\\dirrel\n\t{sat-nuc-1}{\\rstsegment{sat-1}}\n\t{}{\\rstsegment{nuc}}\n\t{nuc-sat-1}{\\rstsegment{sat-1}}\n\t{nuc-sat-2}{\\rstsegment{sat-2}}\n\t{nuc-sat-3}{\\rstsegment{sat-3}}' # S-S-S-N-S-S-S sat_sat_sat_nuc_sat_sat_sat = t(MULTISAT_RELNAME, [ ('S', gen_numbered_nucsat('S', 1)), ('S', gen_numbered_nucsat('S', 2)), ('S', gen_numbered_nucsat('S', 3)), ('S', gen_numbered_nucsat('N', 1)), ('S', gen_numbered_nucsat('N', 2)), ('S', gen_numbered_nucsat('N', 3)) ]) result = dg.write_rstlatex(sat_sat_sat_nuc_sat_sat_sat) assert result.rstlatextree == u'\\dirrel\n\t{sat-nuc-1}{\\rstsegment{sat-1}}\n\t{sat-nuc-2}{\\rstsegment{sat-2}}\n\t{sat-nuc-3}{\\rstsegment{sat-3}}\n\t{}{\\rstsegment{nuc}}\n\t{nuc-sat-1}{\\rstsegment{sat-1}}\n\t{nuc-sat-2}{\\rstsegment{sat-2}}\n\t{nuc-sat-3}{\\rstsegment{sat-3}}'
def n_wrap(tree): return t('N', [tree])
def s_wrap(tree): return t('S', [tree])
def group2tree(self, elem_id, elem, elem_type, start_node=None): reltype = elem.get('reltype') root_wrap = s_wrap if reltype == 'rst' else n_wrap # rst: this elem is the S in an N-S relation # multinuc: this elem is one of several Ns in a multinuc relation if reltype in ('rst', 'multinuc'): if len(self.child_dict[elem_id]) == 1: # this group is the root of another N-S relation subtree_id = self.child_dict[elem_id][0] subtree = self.dt(start_node=subtree_id) else: subtrees = [self.elem_wrap(self.dt(start_node=c), debug=self.debug, root_id=c) for c in self.child_dict[elem_id]] sorted_subtrees = self.sort_subtrees(*subtrees) first_child_id = self.child_dict[elem_id][0] subtrees_relname = self.get_relname(first_child_id) subtree = t(subtrees_relname, sorted_subtrees, debug=self.debug, root_id=elem_id) return root_wrap(subtree, debug=self.debug, root_id=elem_id) else: assert reltype in ('', None, 'span'), \ "Unexpected combination: elem_type '%s' and reltype '%s'" \ % (elem_type, elem['reltype']) # this elem is the N in an N-S relation if elem['group_type'] == 'multinuc': # this elem is also the 'root node' of a multinuc relation child_ids = self.child_dict[elem_id] multinuc_child_ids = [c for c in child_ids if self.elem_dict[c]['reltype'] == 'multinuc'] multinuc_relname = self.get_relname(multinuc_child_ids[0]) multinuc_elements = [self.dt(start_node=mc) for mc in multinuc_child_ids] sorted_subtrees = self.sort_subtrees(*multinuc_elements) multinuc_subtree = t( multinuc_relname, [sorted_subtrees], debug=self.debug, root_id=elem_id) other_child_ids = [c for c in child_ids if c not in multinuc_child_ids] if other_child_ids: # this element is the N in an S-N-S schema nuc_tree = t('N', multinuc_subtree, debug=self.debug, root_id=elem_id) assert all([self.elem_dict[child_id]['nuclearity'] == 'satellite' for child_id in other_child_ids]) sat_subtrees = [self.dt(start_node=child_id) for child_id in other_child_ids] return self.order_schema(nuc_tree, sat_subtrees) else: # this elem is only the head of a multinuc relation # TODO: does this make sense / is this ever reached? return multinuc_subtree else: #~ assert elem['group_type'] == 'span', \ #~ "Unexpected group_type '%s'" % elem['group_type'] if len(self.child_dict[elem_id]) == 1: # this span at the top of a tree was only added for visual purposes child_id = self.child_dict[elem_id][0] return self.dt(start_node=child_id) elif len(self.child_dict[elem_id]) == 2: # this elem is the N of an N-S relation (child: S), but is also # a span over another relation (child: N) children = {} for child_id in self.child_dict[elem_id]: children[self.elem_dict[child_id]['nuclearity']] = child_id sat_id = children['satellite'] sat_subtree = self.dt(start_node=sat_id) nuc_subtree = self.dt(start_node=children['nucleus']) nuc_tree = n_wrap(nuc_subtree, debug=self.debug, root_id=elem_id) return self.sorted_nucsat_tree(nuc_tree, sat_subtree) elif len(self.child_dict[elem_id]) > 2: children = defaultdict(list) for child_id in self.child_dict[elem_id]: children[self.elem_dict[child_id]['nuclearity']].append(child_id) assert len(children['nucleus']) == 1 nuc_subtree = self.dt(start_node=children['nucleus'][0]) nuc_tree = t('N', nuc_subtree, debug=self.debug, root_id=elem_id) sat_subtrees = [self.dt(start_node=sat_child_id) for sat_child_id in children['satellite']] return self.order_schema(nuc_tree, sat_subtrees) else: #len(child_dict[elem_id]) == 0 raise TooFewChildrenError( "A span group ('%s)' should have at least 1 child: %s" \ % (elem_id, self.child_dict[elem_id]))