Beispiel #1
0
 def setUp(self):
     parser = PGCParser()
     self.pg_corpus = parser.parse("data/corpus-1.pgc")
     self.generator = PGCGenerator()
     self.tmpfn = tempfile.NamedTemporaryFile().name
Beispiel #2
0
class Test_PGCGenerator(unittest.TestCase):
    def setUp(self):
        parser = PGCParser()
        self.pg_corpus = parser.parse("data/corpus-1.pgc")
        self.generator = PGCGenerator()
        self.tmpfn = tempfile.NamedTemporaryFile().name

    def _can_be_parsed(self, corpus_filename):
        parser = PGCParser()
        corpus = parser.parse(corpus_filename)
        return len(corpus)

    def test_rel_path(self):
        """
        test if relative graphbank file paths are correct after saving the
        corpus in a different directory
        """
        tree = self.generator.generate(self.pg_corpus,
                                       outf=self.tmpfn,
                                       pprint=True)
        out_dir = os.path.dirname(self.tmpfn)

        for file_elem in tree.findall("//file"):
            rel_path = file_elem.text
            self.assertFalse(os.path.isabs(rel_path))

            abs_path = os.path.join(out_dir, rel_path)
            self.assertTrue(os.path.exists(abs_path))

        self.assertTrue(self._can_be_parsed(self.tmpfn))

    def test_abs_path(self):
        """
        test if absolute graphbank file paths are correct after saving the
        corpus in a different directory
        """
        tree = self.generator.generate(self.pg_corpus,
                                       outf=self.tmpfn,
                                       pprint=True,
                                       abs_path=True)

        for file_elem in tree.findall("//file"):
            gb_path = file_elem.text
            self.assertTrue(os.path.isabs(gb_path))
            self.assertTrue(os.path.exists(gb_path))

        self.assertTrue(self._can_be_parsed(self.tmpfn))

    def test_merge_1(self):
        """
        merging a corpus with copy of itself should not change the number of
        graphbanks
        """
        parser = PGCParser()
        pg_corpus2 = parser.parse("data/corpus-1.pgc")
        self.pg_corpus.extend(pg_corpus2)

        self.assertEqual(len(self.pg_corpus), 2 * len(pg_corpus2))

        tree = self.generator.generate(self.pg_corpus,
                                       outf=self.tmpfn,
                                       pprint=True,
                                       abs_path=True)

        gb_elem = tree.find("//graphbanks")

        self.assertEqual(len(gb_elem), 2)

        self.assertTrue(self._can_be_parsed(self.tmpfn))

    def test_merge_2(self):
        """
        merging a corpus with another corpus should change the number of
        graphbanks
        """
        parser = PGCParser()
        pg_corpus2 = parser.parse("data/corpus-2.pgc")
        self.pg_corpus.extend(pg_corpus2)

        self.assertEqual(len(self.pg_corpus), 6)

        tree = self.generator.generate(self.pg_corpus,
                                       outf=self.tmpfn,
                                       pprint=True,
                                       abs_path=True)

        gb_elem = tree.find("//graphbanks")

        self.assertEqual(len(gb_elem), 4)

        self.assertTrue(self._can_be_parsed(self.tmpfn))
Beispiel #3
0
 def setUp(self):
     # create an element tree which we can mutilate and save
     pg_corpus = ParallelGraphCorpus(inf="data/corpus-1.pgc")
     self.tmpfn = tempfile.NamedTemporaryFile().name
     generator = PGCGenerator()
     self.tree = generator.generate(pg_corpus, outf=self.tmpfn)
Beispiel #4
0
 def setUp(self):
     parser = PGCParser()
     self.pg_corpus = parser.parse("data/corpus-1.pgc")
     self.generator = PGCGenerator()        
     self.tmpfn = tempfile.NamedTemporaryFile().name
Beispiel #5
0
class Test_PGCGenerator(unittest.TestCase):
    
    def setUp(self):
        parser = PGCParser()
        self.pg_corpus = parser.parse("data/corpus-1.pgc")
        self.generator = PGCGenerator()        
        self.tmpfn = tempfile.NamedTemporaryFile().name
        
        
    def _can_be_parsed(self, corpus_filename):
        parser = PGCParser()
        corpus = parser.parse(corpus_filename)
        return len(corpus)
        
        
    def test_rel_path(self):
        """
        test if relative graphbank file paths are correct after saving the
        corpus in a different directory
        """
        tree = self.generator.generate(self.pg_corpus, outf=self.tmpfn,
                                       pprint=True)
        out_dir = os.path.dirname(self.tmpfn)
        
        for file_elem in tree.findall("//file"):
            rel_path = file_elem.text
            self.assertFalse(os.path.isabs(rel_path))
            
            abs_path = os.path.join(out_dir, rel_path)
            self.assertTrue(os.path.exists(abs_path))
        
        self.assertTrue(self._can_be_parsed(self.tmpfn))

            
    def test_abs_path(self):
        """
        test if absolute graphbank file paths are correct after saving the
        corpus in a different directory
        """
        tree = self.generator.generate(self.pg_corpus, outf=self.tmpfn,
                                       pprint=True, abs_path=True)
        
        for file_elem in tree.findall("//file"):
            gb_path = file_elem.text
            self.assertTrue(os.path.isabs(gb_path))
            self.assertTrue(os.path.exists(gb_path))
            
        self.assertTrue(self._can_be_parsed(self.tmpfn))
        
        
    def test_merge_1(self):
        """
        merging a corpus with copy of itself should not change the number of
        graphbanks
        """
        parser = PGCParser()
        pg_corpus2 = parser.parse("data/corpus-1.pgc")
        self.pg_corpus.extend(pg_corpus2)
        
        self.assertEqual(len(self.pg_corpus),
                         2*len(pg_corpus2))
        
        tree = self.generator.generate(self.pg_corpus, outf=self.tmpfn,
                                       pprint=True, abs_path=True)
        
        gb_elem = tree.find("//graphbanks")
        
        self.assertEqual(len(gb_elem), 2)
            
        self.assertTrue(self._can_be_parsed(self.tmpfn))
        

    def test_merge_2(self):
        """
        merging a corpus with another corpus should change the number of
        graphbanks
        """
        parser = PGCParser()
        pg_corpus2 = parser.parse("data/corpus-2.pgc")
        self.pg_corpus.extend(pg_corpus2)
        
        self.assertEqual(len(self.pg_corpus), 6)
        
        tree = self.generator.generate(self.pg_corpus, outf=self.tmpfn,
                                       pprint=True, abs_path=True)
        
        gb_elem = tree.find("//graphbanks")
        
        self.assertEqual(len(gb_elem), 4)
            
        self.assertTrue(self._can_be_parsed(self.tmpfn))        
Beispiel #6
0
 def setUp(self):
     # create an element tree which we can mutilate and save
     pg_corpus = ParallelGraphCorpus(inf="data/corpus-1.pgc")
     self.tmpfn = tempfile.NamedTemporaryFile().name
     generator = PGCGenerator()  
     self.tree = generator.generate(pg_corpus, outf=self.tmpfn)