def setUp(self): parser = PGCParser() self.pg_corpus = parser.parse("data/corpus-1.pgc") self.generator = PGCGenerator() self.tmpfn = tempfile.NamedTemporaryFile().name
class Test_PGCGenerator(unittest.TestCase): def setUp(self): parser = PGCParser() self.pg_corpus = parser.parse("data/corpus-1.pgc") self.generator = PGCGenerator() self.tmpfn = tempfile.NamedTemporaryFile().name def _can_be_parsed(self, corpus_filename): parser = PGCParser() corpus = parser.parse(corpus_filename) return len(corpus) def test_rel_path(self): """ test if relative graphbank file paths are correct after saving the corpus in a different directory """ tree = self.generator.generate(self.pg_corpus, outf=self.tmpfn, pprint=True) out_dir = os.path.dirname(self.tmpfn) for file_elem in tree.findall("//file"): rel_path = file_elem.text self.assertFalse(os.path.isabs(rel_path)) abs_path = os.path.join(out_dir, rel_path) self.assertTrue(os.path.exists(abs_path)) self.assertTrue(self._can_be_parsed(self.tmpfn)) def test_abs_path(self): """ test if absolute graphbank file paths are correct after saving the corpus in a different directory """ tree = self.generator.generate(self.pg_corpus, outf=self.tmpfn, pprint=True, abs_path=True) for file_elem in tree.findall("//file"): gb_path = file_elem.text self.assertTrue(os.path.isabs(gb_path)) self.assertTrue(os.path.exists(gb_path)) self.assertTrue(self._can_be_parsed(self.tmpfn)) def test_merge_1(self): """ merging a corpus with copy of itself should not change the number of graphbanks """ parser = PGCParser() pg_corpus2 = parser.parse("data/corpus-1.pgc") self.pg_corpus.extend(pg_corpus2) self.assertEqual(len(self.pg_corpus), 2 * len(pg_corpus2)) tree = self.generator.generate(self.pg_corpus, outf=self.tmpfn, pprint=True, abs_path=True) gb_elem = tree.find("//graphbanks") self.assertEqual(len(gb_elem), 2) self.assertTrue(self._can_be_parsed(self.tmpfn)) def test_merge_2(self): """ merging a corpus with another corpus should change the number of graphbanks """ parser = PGCParser() pg_corpus2 = parser.parse("data/corpus-2.pgc") self.pg_corpus.extend(pg_corpus2) self.assertEqual(len(self.pg_corpus), 6) tree = self.generator.generate(self.pg_corpus, outf=self.tmpfn, pprint=True, abs_path=True) gb_elem = tree.find("//graphbanks") self.assertEqual(len(gb_elem), 4) self.assertTrue(self._can_be_parsed(self.tmpfn))
def setUp(self): # create an element tree which we can mutilate and save pg_corpus = ParallelGraphCorpus(inf="data/corpus-1.pgc") self.tmpfn = tempfile.NamedTemporaryFile().name generator = PGCGenerator() self.tree = generator.generate(pg_corpus, outf=self.tmpfn)
class Test_PGCGenerator(unittest.TestCase): def setUp(self): parser = PGCParser() self.pg_corpus = parser.parse("data/corpus-1.pgc") self.generator = PGCGenerator() self.tmpfn = tempfile.NamedTemporaryFile().name def _can_be_parsed(self, corpus_filename): parser = PGCParser() corpus = parser.parse(corpus_filename) return len(corpus) def test_rel_path(self): """ test if relative graphbank file paths are correct after saving the corpus in a different directory """ tree = self.generator.generate(self.pg_corpus, outf=self.tmpfn, pprint=True) out_dir = os.path.dirname(self.tmpfn) for file_elem in tree.findall("//file"): rel_path = file_elem.text self.assertFalse(os.path.isabs(rel_path)) abs_path = os.path.join(out_dir, rel_path) self.assertTrue(os.path.exists(abs_path)) self.assertTrue(self._can_be_parsed(self.tmpfn)) def test_abs_path(self): """ test if absolute graphbank file paths are correct after saving the corpus in a different directory """ tree = self.generator.generate(self.pg_corpus, outf=self.tmpfn, pprint=True, abs_path=True) for file_elem in tree.findall("//file"): gb_path = file_elem.text self.assertTrue(os.path.isabs(gb_path)) self.assertTrue(os.path.exists(gb_path)) self.assertTrue(self._can_be_parsed(self.tmpfn)) def test_merge_1(self): """ merging a corpus with copy of itself should not change the number of graphbanks """ parser = PGCParser() pg_corpus2 = parser.parse("data/corpus-1.pgc") self.pg_corpus.extend(pg_corpus2) self.assertEqual(len(self.pg_corpus), 2*len(pg_corpus2)) tree = self.generator.generate(self.pg_corpus, outf=self.tmpfn, pprint=True, abs_path=True) gb_elem = tree.find("//graphbanks") self.assertEqual(len(gb_elem), 2) self.assertTrue(self._can_be_parsed(self.tmpfn)) def test_merge_2(self): """ merging a corpus with another corpus should change the number of graphbanks """ parser = PGCParser() pg_corpus2 = parser.parse("data/corpus-2.pgc") self.pg_corpus.extend(pg_corpus2) self.assertEqual(len(self.pg_corpus), 6) tree = self.generator.generate(self.pg_corpus, outf=self.tmpfn, pprint=True, abs_path=True) gb_elem = tree.find("//graphbanks") self.assertEqual(len(gb_elem), 4) self.assertTrue(self._can_be_parsed(self.tmpfn))