def test_fix_rs3filewriter_newlines_in_edus(): """An RST tree that contains newlines in its EDUs can be converted into an rs3 file just like a tree without newlines.""" t_good = t('Temporal', [('N', ['Szeryng subsequently focused on teaching']), ('S', ['before resuming his concert career in 1954.'])]) t_bad = t('Temporal', [('N', ['Szeryng\nsubsequently\nfocused on\nteaching']), ('S', ['before resuming\nhis concert\ncareer in 1954.'])]) tempfile = NamedTemporaryFile() RS3FileWriter(t_good, output_filepath=tempfile.name) produced_good_output_tree = RSTTree(tempfile.name) tempfile = NamedTemporaryFile() RS3FileWriter(t_bad, output_filepath=tempfile.name) produced_bad_output_tree = RSTTree(tempfile.name) assert produced_good_output_tree.edu_strings == \ produced_bad_output_tree.edu_strings assert produced_good_output_tree.tree == \ produced_bad_output_tree.tree
def test_read_stagedp_long(fixtures_input_dir): input_tree = rstc.read_stagedp( os.path.join(fixtures_input_dir, 'long.stagedp')) assert isinstance(input_tree, StageDPRSTTree) tempfile = NamedTemporaryFile() RS3FileWriter(input_tree, output_filepath=tempfile.name) produced_output_tree = RSTTree(tempfile.name) assert input_tree.tree == produced_output_tree.tree
def test_read_dis2_tree(fixtures_input_dir): input_tree = rstc.read_distree( os.path.join(fixtures_input_dir, 'rst-example2.dis')) assert isinstance(input_tree, DisRSTTree) tempfile = NamedTemporaryFile() RS3FileWriter(input_tree, output_filepath=tempfile.name) produced_output_tree = RSTTree(tempfile.name) assert input_tree.tree == produced_output_tree.tree
def test_read_hilda2(fixtures_input_dir): input_tree = rstc.read_hilda(os.path.join(fixtures_input_dir, 'long.hilda')) assert isinstance(input_tree, HILDARSTTree) tempfile = NamedTemporaryFile() RS3FileWriter(input_tree, output_filepath=tempfile.name) produced_output_tree = RSTTree(tempfile.name) assert input_tree.tree == produced_output_tree.tree
def test_read_hs2015a(fixtures_input_dir): input_tree = rstc.read_hs2015tree( os.path.join(fixtures_input_dir, 'short.hs2015')) assert isinstance(input_tree, HS2015RSTTree) tempfile = NamedTemporaryFile() RS3FileWriter(input_tree, output_filepath=tempfile.name) produced_output_tree = RSTTree(tempfile.name) assert input_tree.tree == produced_output_tree.tree
def test_read_stagedp_one_edu(fixtures_input_dir): """the converter must not crash if the input only consists of one EDU.""" input_tree = rstc.read_stagedp( os.path.join(fixtures_input_dir, 'one-edu.stagedp')) assert isinstance(input_tree, StageDPRSTTree) tempfile = NamedTemporaryFile() RS3FileWriter(input_tree, output_filepath=tempfile.name) produced_output_tree = RSTTree(tempfile.name) assert input_tree.tree == produced_output_tree.tree
def test_read_codra_tree(): input_tree = rstc.read_codra(os.path.join(IN_DIR, 'long.codra')) assert isinstance(input_tree, CodraRSTTree) tempfile = NamedTemporaryFile() RS3FileWriter(input_tree, output_filepath=tempfile.name) produced_output_tree = RSTTree(tempfile.name) # there is a difference in the tree objects but I don't know what it is, # as their pretty-print representations are identical assert input_tree.tree.pprint() == produced_output_tree.tree.pprint()
def test_rs3filewriter_onesegmenttree(): """A DGParentedTree with only one segment is correctly converted into an RS3 file and back.""" input_tree = t("N", ["foo"]) expected_output_tree = example2tree('only-one-segment.rs3') tempfile = NamedTemporaryFile() RS3FileWriter(input_tree, output_filepath=tempfile.name) produced_output_tree = RSTTree(tempfile.name) assert produced_output_tree.edu_strings == produced_output_tree.tree.leaves( ) == ['foo'] assert input_tree == expected_output_tree.tree == produced_output_tree.tree
def test_rs3filewriter_emptytree(): """An empty DGParentedTree is converted into an empty RS3 file and back.""" input_tree = t("", []) expected_output_tree = example2tree("empty.rs3") tempfile = NamedTemporaryFile() RS3FileWriter(input_tree, output_filepath=tempfile.name) produced_output_tree = RSTTree(tempfile.name) assert produced_output_tree.edu_strings == produced_output_tree.tree.leaves( ) == [] assert input_tree == expected_output_tree.tree == produced_output_tree.tree
def test_rs3filewriter_nucsat(): """A DGParentedTree with one nuc-sat relation is correctly converted into an RS3 file and back.""" input_tree = t("circumstance", [("S", ["foo"]), ("N", ["bar"])]) expected_output_tree = example2tree("foo-bar-circ-foo-to-bar.rs3") tempfile = NamedTemporaryFile() RS3FileWriter(input_tree, output_filepath=tempfile.name) produced_output_tree = RSTTree(tempfile.name) assert produced_output_tree.edu_strings == produced_output_tree.tree.leaves( ) == ['foo', 'bar'] assert input_tree == expected_output_tree.tree == produced_output_tree.tree input_tree = t("circumstance", [("N", ["foo"]), ("S", ["bar"])]) expected_output_tree = example2tree("foo-bar-circ-bar-to-foo.rs3") tempfile = NamedTemporaryFile() RS3FileWriter(input_tree, output_filepath=tempfile.name) produced_output_tree = RSTTree(tempfile.name) assert produced_output_tree.edu_strings == produced_output_tree.tree.leaves( ) == ['foo', 'bar'] assert input_tree == expected_output_tree.tree == produced_output_tree.tree
def test_rs3filewriter_nested(): """A DGParentedTree with a multinuc relation nested in a nuc-sat relation is correctly converted into an RS3 file and back.""" input_tree = t('elaboration', [('N', ['eins']), ('S', [('joint', [('N', ['zwei']), ('N', ['drei'])])])]) expected_output_tree = example2tree( 'eins-zwei-drei-(elab-eins-from-(joint-zwei-and-drei).rs3') tempfile = NamedTemporaryFile() RS3FileWriter(input_tree, output_filepath=tempfile.name) produced_output_tree = RSTTree(tempfile.name) assert produced_output_tree.edu_strings == produced_output_tree.tree.leaves( ) == ['eins', 'zwei', 'drei'] assert input_tree == expected_output_tree.tree == produced_output_tree.tree
def test_rs3filewriter_onesegmenttree_umlauts(): """A DGParentedTree with only one segment with umlauts is correctly converted into an RS3 file and back. """ edu_string = "Über sein östliches Äußeres" input_tree = t("N", [edu_string]) expected_output_tree = example2tree('only-one-segment-with-umlauts.rs3') tempfile = NamedTemporaryFile() RS3FileWriter(input_tree, output_filepath=tempfile.name) produced_output_tree = RSTTree(tempfile.name) assert expected_output_tree.edu_strings == \ produced_output_tree.edu_strings == \ produced_output_tree.tree.leaves() == [edu_string] assert input_tree == expected_output_tree.tree == produced_output_tree.tree
def test_rs3filewriter_pcc_10575(): """PCC rs3 file 10575 can be converted rs3 -> dgtree -> rs3' -> dgtree', without information loss between dgtree and dgtree'. """ input_tree = t('interpretation', [('N', [('circumstance', [ ('S', ['eins']), ('N', [('contrast', [('N', ['zwei']), ('N', [('cause', [('N', ['drei']), ('S', ['vier'])])])])]) ])]), ('S', ['fuenf'])]) expected_output_tree = example2tree('maz-10575-excerpt.rs3') tempfile = NamedTemporaryFile() RS3FileWriter(input_tree, output_filepath=tempfile.name) produced_output_tree = RSTTree(tempfile.name) assert produced_output_tree.edu_strings == produced_output_tree.tree.leaves( ) == ['eins', 'zwei', 'drei', 'vier', 'fuenf'] assert input_tree == expected_output_tree.tree == produced_output_tree.tree
def example2tree(rs3tree_example_filename, rs3tree_dir=RS3TREE_DIR, debug=False): """Given the filename of an rs3 file and its directory, return an RSTTree instance of it.""" filepath = os.path.join(rs3tree_dir, rs3tree_example_filename) return RSTTree(filepath, debug=debug)