Example #1
0
 def test_greek_betacode_to_unicode(self):
     """Test conversion of betacode to unicode
     note: assertEqual appears to not be correctly comparing certain
     characters (ά and ί, at least)
     """
     beta_example = r"""O(/PWS OU)=N MH\ TAU)TO\ """
     replacer = Replacer()
     unicode = replacer.beta_code(beta_example)
     target_unicode = 'ὅπως οὖν μὴ ταὐτὸ '
     self.assertEqual(unicode, target_unicode)
Example #2
0
 def test_greek_betacode_to_unicode(self):
     """Test conversion of betacode to unicode
     note: assertEqual appears to not be correctly comparing certain
     characters (ά and ί, at least)
     """
     beta_example = r"""O(/PWS OU)=N MH\ TAU)TO\ """
     replacer = Replacer()
     unicode = replacer.beta_code(beta_example)
     target_unicode = 'ὅπως οὖν μὴ ταὐτὸ '
     self.assertEqual(unicode, target_unicode)
Example #3
0
 def compile_tlg_txt(self):
     """Reads original Beta Code files and converts to Unicode files"""
     logging.info('Starting TLG corpus compilation into files.')
     compiled_files_dir_tlg = os.path.join(self.compiled_files_dir, 'tlg')
     if os.path.isdir(compiled_files_dir_tlg) is True:
         pass
     else:
         os.mkdir(compiled_files_dir_tlg)
     self.make_tlg_index_file_author()
     self.read_tlg_index_file_author()
     for file_name in tlg_index:
         abbrev = tlg_index[file_name]
         orig_files_dir_tlg = os.path.join(self.orig_files_dir, 'tlg')
         file_name_txt = file_name + '.TXT'
         files_path = os.path.join(orig_files_dir_tlg, file_name_txt)
         try:
             with open(files_path, 'rb') as index_opened:
                 txt_read = index_opened.read().decode('latin-1')
                 txt_ascii = remove_non_ascii(txt_read)
                 local_replacer = Replacer()
                 new_uni = local_replacer.beta_code(txt_ascii)
                 file_name_txt_uni = file_name + '.txt'
                 file_path = os.path.join(compiled_files_dir_tlg,
                                          file_name_txt_uni)
                 try:
                     with open(file_path, 'w') as new_file:
                         new_file.write(new_uni)
                 except IOError:
                     logging.error('Failed to write to new file %s of '
                                   'author %s', file_name, abbrev)
             logging.info('Finished TLG corpus compilation to %s',
                          file_path)
         except IOError:
             logging.error('Failed to open TLG file %s of author %s',
                           file_name, abbrev)
     self.make_tlg_meta_index()
     self.make_tlg_index_auth_works()