Beispiel #1
0
    def test_read_gold_standard(self):
        all_files = data_processing.get_list_all_corrected_files(FIXTURES)
        all_files.sort()

        chars = {'ascii':u'ascii yo!', 'iso-8859-1':u'\xd3', 'utf-8':u'\xae', 'utf-16':u'\xae'}

        for e in chars:
            content_comments = data_processing.read_gold_standard(
                FIXTURES, e)
            actual_content = (u"Content here\nmore content\n" + chars[e] + u"\n")
            self.assertEqual(content_comments[0], actual_content)
            self.assertEqual(content_comments[1], '\nsome comments\n')
Beispiel #2
0
 def test_utf8(self):
     gs = ' '.join(data_processing.read_gold_standard(FIXTURES,
         'utf-8_chinese'))
     self.assertEqual(gs, Testread_gold_standard.actual_chinese_content)