def test_find_transliteration(self): """ Tests calling transliteration in a file. """ path = os.path.join('..', 'texts', 'two_text.txt') f_i = FileImport(path) f_i.read_file() cdli = CDLICorpus() cdli.parse_file(f_i.file_lines) output = [ cdli.catalog[text]['transliteration'] for text in cdli.catalog ] goal = [ [ 'a-na ia-ah-du-li-[im]', 'qi2-bi2-[ma]', 'um-ma a-bi-sa-mar#-[ma]', 'sa-li-ma-am e-pu-[usz]', 'asz-szum mu-sze-zi-ba-am# [la i-szu]', '[sa]-li#-ma-am sza e-[pu-szu]', '[u2-ul] e-pu-usz sa#-[li-mu-um]', '[u2-ul] sa-[li-mu-um-ma]', 'isz#-tu mu#-[sze-zi-ba-am la i-szu]', 'a-la-nu-ia sza la is,-s,a-ab#-[tu]', 'i-na-an-na is,-s,a-ab-[tu]', 'i-na ne2-kur-ti _lu2_ ha-szi-[im{ki}]', 'ur-si-im{ki} _lu2_ ka-ar-ka#-[mi-is{ki}]', 'u3 ia-am-ha-ad[{ki}]', 'a-la-nu an-nu-tum u2-ul ih-li-qu2#', 'i-na ne2-kur-ti {disz}sa-am-si-{d}iszkur#-ma', 'ih-ta-al-qu2', 'u3 a-la-nu sza ki-ma u2-hu-ru u2-sze-zi-ib#', 'u3 na-pa-asz2-ti u2-ba-li-it,', 'pi2-qa-at ha-s,e-ra#-at', 'asz-szum a-la-nu-ka', 'u3 ma-ru-ka sza-al#-[mu]', '[a-na na-pa]-asz2#-ti-ia i-tu-ur' ], [ 'a-na ia-ah-du-[li-im]', 'qi2-bi2-[ma]', 'um-ma a-bi-sa-mar-[ma]', 'asz-szum sza a-qa-bi-kum la ta-ha-asz2#', 'a-na ma-ni-im lu-ud-bu-ub', 'szum-ma a-na?-<ku> a-na a-bi-ia la ad#-[bu-ub]', 'szum-ma a-bi-sa-mar te-zi-ir#', 'u3 a-la#-ni#-ka te-zi-ir-ma#', 'i-na an-ni-a-tim sza a-da-bu-[bu]', 'a-na-ku mi-im-ma u2-ul e-le#-[i]', 'sza sza-ru-ti-ka u3 sza ra-pa#-[szi-ka e-pu-usz]', 'u3 lu-u2 sza sza-ru-ut-ka u2-ul te-le#-[i]', 'u3 lu-u2 sza ra-pa-szi-ka [te-ep-pe2-esz]', 'u3 lu ma-at ia-ma-ha-ad#{ki}', 'u3# lu# _u4 8(disz)-kam_ isz-tu [i-na-an-na]', 'um#-[...]', 'lu#-[...]', 'a-[...]', 'szum#-[...]', 'a-na# [...]', 'ma-li# [...]', 'u3 u2-hu-ur# [...]', 'a-su2-ur-ri [...]', 'szu-zi-ba-an#-[ni ...]', 'a-na [...]', 'pi2-qa-at ta-qa-ab#-[bi um-ma at-ta-a-ma]', '{disz}a-bi-sa-mar u2-ul ma-ri u3 bi-ti a-na la bi-tu#-[tu-ur2-ma]', 'bi-tum bi-it-ka u3 {disz}a-bi#-[sa]-mar# ma-ru-ka-[ma]' ] ] self.assertEqual(output, goal)
def test_print_catalog(self): """ Tests _chunk_text. """ path = os.path.join('..', 'texts', 'single_text.txt') f_i = FileImport(path) f_i.read_file() cdli = CDLICorpus() cdli.parse_file(f_i.file_lines) output = cdli.print_catalog(catalog_filter=['transliteration']) goal = print(output) self.assertEqual(output, goal)
def test_find_cdli_number(self): """ Tests list_pnums. """ path = os.path.join('..', 'texts', 'two_text.txt') f_i = FileImport(path) f_i.read_file() cdli = CDLICorpus() cdli.parse_file(f_i.file_lines) output = cdli.list_pnums() goal = ['P254202', 'P254203'] self.assertEqual(output, goal)
def test_find_edition(self): """ Tests list_editions. """ path = os.path.join('..', 'texts', 'two_text.txt') f_i = FileImport(path) f_i.read_file() cdli = CDLICorpus() cdli.parse_file(f_i.file_lines) output = cdli.list_editions() goal = ['ARM 01, 001', 'ARM 01, 002'] self.assertEqual(output, goal)
def test_markdown_single_text(self): """ Tests markdown_single_text. """ path = os.path.join('..', 'texts', 'cdli_corpus.txt') f_i = FileImport(path) f_i.read_file() text_file = f_i.file_lines cdli = CDLICorpus() cdli.parse_file(text_file) p_p = PrettyPrint() p_p.markdown_single_text(cdli.catalog, 'P254203') output = p_p.markdown_text goal = """ARM 01, 002 P254203 --- ### metadata ### transliteration a-na ia-ah-du-[li-im] qi2-bi2-[ma] um-ma a-bi-sa-mar-[ma] asz-szum sza a-qa-bi-kum la ta-ha-asz2# a-na ma-ni-im lu-ud-bu-ub szum-ma a-na?-<ku> a-na a-bi-ia la ad#-[bu-ub] szum-ma a-bi-sa-mar te-zi-ir# u3 a-la#-ni#-ka te-zi-ir-ma# i-na an-ni-a-tim sza a-da-bu-[bu] a-na-ku mi-im-ma u2-ul e-le#-[i] sza sza-ru-ti-ka u3 sza ra-pa#-[szi-ka e-pu-usz] u3 lu-u2 sza sza-ru-ut-ka u2-ul te-le#-[i] u3 lu-u2 sza ra-pa-szi-ka [te-ep-pe2-esz] u3 lu ma-at ia-ma-ha-ad#{ki} u3# lu# _u4 8(disz)-kam_ isz-tu [i-na-an-na] um#-[...] lu#-[...] a-[...] szum#-[...] a-na# [...] ma-li# [...] u3 u2-hu-ur# [...] a-su2-ur-ri [...] szu-zi-ba-an#-[ni ...] a-na [...] pi2-qa-at ta-qa-ab#-[bi um-ma at-ta-a-ma] {disz}a-bi-sa-mar u2-ul ma-ri u3 bi-ti a-na la bi-tu#-[tu-ur2-ma] bi-tum bi-it-ka u3 {disz}a-bi#-[sa]-mar# ma-ru-ka-[ma] ### normalization ### translation """ self.assertEqual(output, goal)
def test_table_of_contents(self): """ Tests toc. """ path = os.path.join('..', 'texts', 'two_text.txt') f_i = FileImport(path) f_i.read_file() cdli = CDLICorpus() cdli.parse_file(f_i.file_lines) output = cdli.toc() goal = [ 'Pnum: P254202, Edition: ARM 01, 001, length: 23 line(s)', 'Pnum: P254203, Edition: ARM 01, 002, length: 28 line(s)' ] self.assertEqual(output, goal)
def test_find_metadata(self): """ Tests calling metadata in a file. """ path = os.path.join('..', 'texts', 'two_text.txt') f_i = FileImport(path) f_i.read_file() cdli = CDLICorpus() cdli.parse_file(f_i.file_lines) output = [cdli.catalog[text]['metadata'] for text in cdli.catalog] goal = [ [ 'Primary publication: ARM 01, 001', 'Author(s): Dossin, Georges', 'Publication date: 1946', 'Secondary publication(s): Durand, Jean-Marie, LAPO 16, 0305', 'Collection: National Museum of Syria, Damascus, Syria', 'Museum no.: NMSD —', 'Accession no.:', 'Provenience: Mari (mod. Tell Hariri)', 'Excavation no.:', 'Period: Old Babylonian (ca. 1900-1600 BC)', 'Dates referenced:', 'Object type: tablet', 'Remarks:', 'Material: clay', 'Language: Akkadian', 'Genre: Letter', 'Sub-genre:', 'CDLI comments:', 'Catalogue source: 20050104 cdliadmin', 'ATF source: cdlistaff', 'Translation: Durand, Jean-Marie (fr); Guerra, Dylan M. (en)', 'UCLA Library ARK: 21198/zz001rsp8x', 'Composite no.:', 'Seal no.:', 'CDLI no.: P254202' ], [ 'Primary publication: ARM 01, 002', 'Author(s): Dossin, Georges', 'Publication date: 1946', 'Secondary publication(s): Durand, Jean-Marie, LAPO 16, 0306', 'Collection: National Museum of Syria, Damascus, Syria', 'Museum no.: NMSD —', 'Accession no.:', 'Provenience: Mari (mod. Tell Hariri)', 'Excavation no.:', 'Period: Old Babylonian (ca. 1900-1600 BC)', 'Dates referenced:', 'Object type: tablet', 'Remarks:', 'Material: clay', 'Language: Akkadian', 'Genre: Letter', 'Sub-genre:', 'CDLI comments:', 'Catalogue source: 20050104 cdliadmin', 'ATF source: cdlistaff', 'Translation:', 'UCLA Library ARK: 21198/zz001rsp9f', 'Composite no.:', 'Seal no.:', 'CDLI no.: P254203' ] ] self.assertEqual(output, goal)
def test_call_text(self): """ Tests calling a text. """ path = os.path.join('..', 'texts', 'ARM1Akkadian.txt') f_i = FileImport(path) f_i.read_file() cdli = CDLICorpus() cdli.parse_file(f_i.file_lines) output = cdli.catalog['P254226']['raw_text'] goal = [ '@tablet', '@obverse', '@column 1', '1. a-na ia-as2-ma-ah-{d}iszkur', '2. qi2-bi2-ma', '3. um-ma {d}utu-szi-{d}iszkur', '4. a-bu-ka-a-ma', '5. asz-szum _{lu2}nagar mesz_ sza tu-ut-tu-ul{ki}', '6. sza i-na szu-ba-at-{d}utu{ki} wa-asz-bu', '7. a-na tu-ut-tu-ul{ki}', '8. tu-ur-ri-im', '9. sza ta-asz-pu-ra-am', '10. a-na {d!}iszkur-lu2-ti', '11. asz3-ta-pa-ar _{lu2}nagar mesz_ szu-nu-ti', '12. a-na tu-ut-tu-ul{ki}', '13. u2-ta-ar', '14. u3 qa-as-su2-nu li-isz-ku-nu-ma', '15. {gesz}ma2-tu{hi-a} li-pu-szu' ] self.assertEqual(output, goal)
from collections import Counter from Importer.file_importer import FileImport from Importer.cdli_corpus import CDLICorpus from ATFConverter.tokenizer import Tokenizer from ATFConverter.atf_converter import ATFConverter fi = FileImport('texts/Akkadian.txt') fi.read_file() cc = CDLICorpus() cc.parse_file(fi.file_lines) tk = Tokenizer() atf = ATFConverter() stopwords = ['a-na', 'u3', 'sza', '[...]', 'i-na', '=', 'ARM', '01,', 'lang', 'akk', 'um-ma', 'la', 'u2-ul', 'mesz_', 'asz-szum', '0.1', 'broken', 'isz-tu', '_lu2_', 'ki-a-am', '1(disz)', 'ki-ma', 'x', 'sza-a-ti', 'the', '_lu2', '...]', 'lu-u2', 'sza#', 'a-na#', '_u4', 'beginning', 'of', '2(disz)', '[a-na', 'szum-ma', 'hi-a_', 'ana', 'a-di'] bag_of_words = [] for lines in cc.catalog['P249253']['transliteration']: for word in tk.word_tokenizer(lines): if word[0] not in stopwords: bag_of_words.append('-'.join(atf.process(word[0].split('-')))) frequency_analysis = Counter(bag_of_words).most_common(11) print(frequency_analysis)
def test_abnormalities(self): """Tests lines 83, 102, 121-2""" path = os.path.join('..', 'texts', 'two_text_abnormalities.txt') f_i = FileImport(path) f_i.read_file() text_file = f_i.file_lines cdli = CDLICorpus() cdli.parse_file(text_file) goal = { 'P254202': { 'edition': 'ARM 01, 001', 'metadata': [], 'normalization': [], 'pnum': 'P254202', 'raw_text': [ '@obverse', '1. a-na ia-ah-du-li-[im]', '2. qi2-bi2-[ma]', '3. um-ma a-bi-sa-mar#-[ma]', '4. sa-li-ma-am e-pu-[usz]', '5. asz-szum mu-sze-zi-ba-am# [la i-szu]', '6. [sa]-li#-ma-am sza e-[pu-szu]', '7. [u2-ul] e-pu-usz sa#-[li-mu-um]', '8. [u2-ul] sa-[li-mu-um-ma]', '$ rest broken', '@reverse', '$ beginning broken', "1'. isz#-tu mu#-[sze-zi-ba-am la i-szu]", "2'. a-la-nu-ia sza la is,-s,a-ab#-[tu]", "3'. i-na-an-na is,-s,a-ab-[tu]", "4'. i-na ne2-kur-ti _lu2_ ha-szi-[im{ki}]", "5'. ur-si-im{ki} _lu2_ ka-ar-ka#-[mi-is{ki}]", "6'. u3 ia-am-ha-ad[{ki}]", "7'. a-la-nu an-nu-tum u2-ul ih-li-qu2#", "8'. i-na ne2-kur-ti {disz}sa-am-si-{d}iszkur#-ma", "9'. ih-ta-al-qu2", "10'. u3 a-la-nu sza ki-ma u2-hu-ru u2-sze-zi-ib#", "11'. u3 na-pa-asz2-ti u2-ba-li-it,", "12'. pi2-qa-at ha-s,e-ra#-at", "13'. asz-szum a-la-nu-ka", "14'. u3 ma-ru-ka sza-al#-[mu]", "15'. [a-na na-pa]-asz2#-ti-ia i-tu-ur" ], 'translation': [], 'transliteration': [ 'a-na ia-ah-du-li-[im]', 'qi2-bi2-[ma]', 'um-ma a-bi-sa-mar#-[ma]', 'sa-li-ma-am e-pu-[usz]', 'asz-szum mu-sze-zi-ba-am# [la i-szu]', '[sa]-li#-ma-am sza e-[pu-szu]', '[u2-ul] e-pu-usz sa#-[li-mu-um]', '[u2-ul] sa-[li-mu-um-ma]', 'isz#-tu mu#-[sze-zi-ba-am la i-szu]', 'a-la-nu-ia sza la is,-s,a-ab#-[tu]', 'i-na-an-na is,-s,a-ab-[tu]', 'i-na ne2-kur-ti _lu2_ ha-szi-[im{ki}]', 'ur-si-im{ki} _lu2_ ka-ar-ka#-[mi-is{ki}]', 'u3 ia-am-ha-ad[{ki}]', 'a-la-nu an-nu-tum u2-ul ih-li-qu2#', 'i-na ne2-kur-ti {disz}sa-am-si-{d}iszkur#-ma', 'ih-ta-al-qu2', 'u3 a-la-nu sza ki-ma u2-hu-ru u2-sze-zi-ib#', 'u3 na-pa-asz2-ti u2-ba-li-it,', 'pi2-qa-at ha-s,e-ra#-at', 'asz-szum a-la-nu-ka', 'u3 ma-ru-ka sza-al#-[mu]', '[a-na na-pa]-asz2#-ti-ia i-tu-ur' ] }, 'P254203': { 'edition': '', 'metadata': [], 'normalization': [], 'pnum': 'P254203', 'raw_text': [ '@obverse', '1. a-na ia-ah-du-[li-im]', '2. qi2-bi2-[ma]', '3. um-ma a-bi-sa-mar-[ma]', '4. asz-szum sza a-qa-bi-kum la ta-ha-asz2#', '5. a-na ma-ni-im lu-ud-bu-ub', '6. szum-ma a-na?-<ku> a-na a-bi-ia la ad#-[bu-ub]', '7. szum-ma a-bi-sa-mar te-zi-ir#', '8. u3 a-la#-ni#-ka te-zi-ir-ma#', '9. i-na an-ni-a-tim sza a-da-bu-[bu]', '10. a-na-ku mi-im-ma u2-ul e-le#-[i]', '11. sza sza-ru-ti-ka u3 sza ra-pa#-[szi-ka e-pu-usz]', '12. u3 lu-u2 sza sza-ru-ut-ka u2-ul te-le#-[i]', '13. u3 lu-u2 sza ra-pa-szi-ka [te-ep-pe2-esz]', '14. u3 lu ma-at ia-ma-ha-ad#{ki}', '15. u3# lu# _u4 8(disz)-kam_ isz-tu [i-na-an-na]', '$ rest broken', '@reverse', '$ beginning broken', "1'. um#-[...]", "2'. lu#-[...]", "3'. a-[...]", "4'. szum#-[...]", "5'. a-na# [...]", "6'. ma-li# [...]", "7'. u3 u2-hu-ur# [...]", "8'. a-su2-ur-ri [...]", "9'. szu-zi-ba-an#-[ni ...]", "10'. a-na [...]", "11'. pi2-qa-at ta-qa-ab#-[bi um-ma at-ta-a-ma]", '@left', '1. {disz}a-bi-sa-mar u2-ul ma-ri u3 bi-ti a-na la bi-tu#-[tu-ur2-ma]', '2. bi-tum bi-it-ka u3 {disz}a-bi#-[sa]-mar# ma-ru-ka-[ma]' ], 'translation': [], 'transliteration': [ 'a-na ia-ah-du-[li-im]', 'qi2-bi2-[ma]', 'um-ma a-bi-sa-mar-[ma]', 'asz-szum sza a-qa-bi-kum la ta-ha-asz2#', 'a-na ma-ni-im lu-ud-bu-ub', 'szum-ma a-na?-<ku> a-na a-bi-ia la ad#-[bu-ub]', 'szum-ma a-bi-sa-mar te-zi-ir#', 'u3 a-la#-ni#-ka te-zi-ir-ma#', 'i-na an-ni-a-tim sza a-da-bu-[bu]', 'a-na-ku mi-im-ma u2-ul e-le#-[i]', 'sza sza-ru-ti-ka u3 sza ra-pa#-[szi-ka e-pu-usz]', 'u3 lu-u2 sza sza-ru-ut-ka u2-ul te-le#-[i]', 'u3 lu-u2 sza ra-pa-szi-ka [te-ep-pe2-esz]', 'u3 lu ma-at ia-ma-ha-ad#{ki}', 'u3# lu# _u4 8(disz)-kam_ isz-tu [i-na-an-na]', 'um#-[...]', 'lu#-[...]', 'a-[...]', 'szum#-[...]', 'a-na# [...]', 'ma-li# [...]', 'u3 u2-hu-ur# [...]', 'a-su2-ur-ri [...]', 'szu-zi-ba-an#-[ni ...]', 'a-na [...]', 'pi2-qa-at ta-qa-ab#-[bi um-ma at-ta-a-ma]', '{disz}a-bi-sa-mar u2-ul ma-ri u3 bi-ti a-na la bi-tu#-[tu-ur2-ma]', 'bi-tum bi-it-ka u3 {disz}a-bi#-[sa]-mar# ma-ru-ka-[ma]' ] } } self.assertEqual(cdli.catalog, goal)
def test_parse_file(self): """ Tests parse_file. """ path = os.path.join('..', 'texts', 'two_text.txt') f_i = FileImport(path) f_i.read_file() cdli = CDLICorpus() cdli.parse_file(f_i.file_lines) output = cdli.chunks goal = [ [ 'Primary publication: ARM 01, 001', 'Author(s): Dossin, Georges', 'Publication date: 1946', 'Secondary publication(s): Durand, Jean-Marie, LAPO 16, 0305', 'Collection: National Museum of Syria, Damascus, Syria', 'Museum no.: NMSD —', 'Accession no.:', 'Provenience: Mari (mod. Tell Hariri)', 'Excavation no.:', 'Period: Old Babylonian (ca. 1900-1600 BC)', 'Dates referenced:', 'Object type: tablet', 'Remarks:', 'Material: clay', 'Language: Akkadian', 'Genre: Letter', 'Sub-genre:', 'CDLI comments:', 'Catalogue source: 20050104 cdliadmin', 'ATF source: cdlistaff', 'Translation: Durand, Jean-Marie (fr); Guerra, Dylan M. (en)', 'UCLA Library ARK: 21198/zz001rsp8x', 'Composite no.:', 'Seal no.:', 'CDLI no.: P254202', 'Transliteration:', '&P254202 = ARM 01, 001', '#atf: lang akk', '@tablet', '@obverse', '1. a-na ia-ah-du-li-[im]', '2. qi2-bi2-[ma]', '3. um-ma a-bi-sa-mar#-[ma]', '4. sa-li-ma-am e-pu-[usz]', '5. asz-szum mu-sze-zi-ba-am# [la i-szu]', '6. [sa]-li#-ma-am sza e-[pu-szu]', '7. [u2-ul] e-pu-usz sa#-[li-mu-um]', '8. [u2-ul] sa-[li-mu-um-ma]', '$ rest broken', '@reverse', '$ beginning broken', "1'. isz#-tu mu#-[sze-zi-ba-am la i-szu]", "2'. a-la-nu-ia sza la is,-s,a-ab#-[tu]", "3'. i-na-an-na is,-s,a-ab-[tu]", "4'. i-na ne2-kur-ti _lu2_ ha-szi-[im{ki}]", "5'. ur-si-im{ki} _lu2_ ka-ar-ka#-[mi-is{ki}]", "6'. u3 ia-am-ha-ad[{ki}]", "7'. a-la-nu an-nu-tum u2-ul ih-li-qu2#", "8'. i-na ne2-kur-ti {disz}sa-am-si-{d}iszkur#-ma", "9'. ih-ta-al-qu2", "10'. u3 a-la-nu sza ki-ma u2-hu-ru u2-sze-zi-ib#", "11'. u3 na-pa-asz2-ti u2-ba-li-it,", "12'. pi2-qa-at ha-s,e-ra#-at", "13'. asz-szum a-la-nu-ka", "14'. u3 ma-ru-ka sza-al#-[mu]", "15'. [a-na na-pa]-asz2#-ti-ia i-tu-ur" ], [ 'Primary publication: ARM 01, 002', 'Author(s): Dossin, Georges', 'Publication date: 1946', 'Secondary publication(s): Durand, Jean-Marie, LAPO 16, 0306', 'Collection: National Museum of Syria, Damascus, Syria', 'Museum no.: NMSD —', 'Accession no.:', 'Provenience: Mari (mod. Tell Hariri)', 'Excavation no.:', 'Period: Old Babylonian (ca. 1900-1600 BC)', 'Dates referenced:', 'Object type: tablet', 'Remarks:', 'Material: clay', 'Language: Akkadian', 'Genre: Letter', 'Sub-genre:', 'CDLI comments:', 'Catalogue source: 20050104 cdliadmin', 'ATF source: cdlistaff', 'Translation:', 'UCLA Library ARK: 21198/zz001rsp9f', 'Composite no.:', 'Seal no.:', 'CDLI no.: P254203', 'Transliteration:', '&P254203 = ARM 01, 002', '#atf: lang akk', '@tablet', '@obverse', '1. a-na ia-ah-du-[li-im]', '2. qi2-bi2-[ma]', '3. um-ma a-bi-sa-mar-[ma]', '4. asz-szum sza a-qa-bi-kum la ta-ha-asz2#', '5. a-na ma-ni-im lu-ud-bu-ub', '6. szum-ma a-na?-<ku> a-na a-bi-ia la ad#-[bu-ub]', '7. szum-ma a-bi-sa-mar te-zi-ir#', '8. u3 a-la#-ni#-ka te-zi-ir-ma#', '9. i-na an-ni-a-tim sza a-da-bu-[bu]', '10. a-na-ku mi-im-ma u2-ul e-le#-[i]', '11. sza sza-ru-ti-ka u3 sza ra-pa#-[szi-ka e-pu-usz]', '12. u3 lu-u2 sza sza-ru-ut-ka u2-ul te-le#-[i]', '13. u3 lu-u2 sza ra-pa-szi-ka [te-ep-pe2-esz]', '14. u3 lu ma-at ia-ma-ha-ad#{ki}', '15. u3# lu# _u4 8(disz)-kam_ isz-tu [i-na-an-na]', '$ rest broken', '@reverse', '$ beginning broken', "1'. um#-[...]", "2'. lu#-[...]", "3'. a-[...]", "4'. szum#-[...]", "5'. a-na# [...]", "6'. ma-li# [...]", "7'. u3 u2-hu-ur# [...]", "8'. a-su2-ur-ri [...]", "9'. szu-zi-ba-an#-[ni ...]", "10'. a-na [...]", "11'. pi2-qa-at ta-qa-ab#-[bi um-ma at-ta-a-ma]", '@left', '1. {disz}a-bi-sa-mar u2-ul ma-ri u3 bi-ti a-na ' 'la bi-tu#-[tu-ur2-ma]', '2. bi-tum bi-it-ka u3 {disz}a-bi#-[sa]-mar# ma-ru-ka-[ma]' ] ] self.assertEqual(output, goal)
from Importer.file_importer import FileImport from Importer.cdli_corpus import CDLICorpus f1 = FileImport('./texts/ARM1Akkadian.txt') f1.read_file() c = CDLICorpus() c.parse_file(f1.file_lines) text = c.catalog['P254316'] lines = zip(text['transliteration'], text['normalization'], text['translation']) for i, line in enumerate(lines): for x in range(3): print(f"{i}: {line[x]}") print()
def test_html_print_file(self): """ Tests html_print_file. """ path = os.path.join('..', 'texts', 'two_text.txt') f_i = FileImport(path) f_i.read_file() text_file = f_i.file_lines cdli = CDLICorpus() cdli.parse_file(text_file) destination = os.path.join('..', 'tests', 'html_file.html') p_p = PrettyPrint() p_p.html_print_file(cdli.catalog, destination) f_o = FileImport(destination) f_o.read_file() output = f_o.raw_file goal = \ """<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>ARM 01, 001</title> </head> <body><table cellpadding="10"; border="1"> <tr><th> <h2>ARM 01, 001<br>P254202</h2> </th><th> <h3>transliteration</h3> </th><th> <h3>normalization</h3> </th><th> <h3>translation</h3> </tr><tr><td> Primary publication: ARM 01, 001<br> Author(s): Dossin, Georges<br> Publication date: 1946<br> Secondary publication(s): Durand, Jean-Marie, LAPO 16, 0305<br> Collection: National Museum of Syria, Damascus, Syria<br> Museum no.: NMSD —<br> Accession no.:<br> Provenience: Mari (mod. Tell Hariri)<br> Excavation no.:<br> Period: Old Babylonian (ca. 1900-1600 BC)<br> Dates referenced:<br> Object type: tablet<br> Remarks:<br> Material: clay<br> Language: Akkadian<br> Genre: Letter<br> Sub-genre:<br> CDLI comments:<br> Catalogue source: 20050104 cdliadmin<br> ATF source: cdlistaff<br> Translation: Durand, Jean-Marie (fr); Guerra, Dylan M. (en)<br> UCLA Library ARK: 21198/zz001rsp8x<br> Composite no.:<br> Seal no.:<br> CDLI no.: P254202</td><td> <p>a-na ia-ah-du-li-[im]<br> qi2-bi2-[ma]<br> um-ma a-bi-sa-mar#-[ma]<br> sa-li-ma-am e-pu-[usz]<br> asz-szum mu-sze-zi-ba-am# [la i-szu]<br> [sa]-li#-ma-am sza e-[pu-szu]<br> [u2-ul] e-pu-usz sa#-[li-mu-um]<br> [u2-ul] sa-[li-mu-um-ma]<br> isz#-tu mu#-[sze-zi-ba-am la i-szu]<br> a-la-nu-ia sza la is,-s,a-ab#-[tu]<br> i-na-an-na is,-s,a-ab-[tu]<br> i-na ne2-kur-ti _lu2_ ha-szi-[im{ki}]<br> ur-si-im{ki} _lu2_ ka-ar-ka#-[mi-is{ki}]<br> u3 ia-am-ha-ad[{ki}]<br> a-la-nu an-nu-tum u2-ul ih-li-qu2#<br> i-na ne2-kur-ti {disz}sa-am-si-{d}iszkur#-ma<br> ih-ta-al-qu2<br> u3 a-la-nu sza ki-ma u2-hu-ru u2-sze-zi-ib#<br> u3 na-pa-asz2-ti u2-ba-li-it,<br> pi2-qa-at ha-s,e-ra#-at<br> asz-szum a-la-nu-ka<br> u3 ma-ru-ka sza-al#-[mu]<br> [a-na na-pa]-asz2#-ti-ia i-tu-ur </td><td> <p> </td><td> <font size='2'> </font></td></tr> </table> <br> </body> </html><!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>ARM 01, 002</title> </head> <body><table cellpadding="10"; border="1"> <tr><th> <h2>ARM 01, 002<br>P254203</h2> </th><th> <h3>transliteration</h3> </th><th> <h3>normalization</h3> </th><th> <h3>translation</h3> </tr><tr><td> Primary publication: ARM 01, 002<br> Author(s): Dossin, Georges<br> Publication date: 1946<br> Secondary publication(s): Durand, Jean-Marie, LAPO 16, 0306<br> Collection: National Museum of Syria, Damascus, Syria<br> Museum no.: NMSD —<br> Accession no.:<br> Provenience: Mari (mod. Tell Hariri)<br> Excavation no.:<br> Period: Old Babylonian (ca. 1900-1600 BC)<br> Dates referenced:<br> Object type: tablet<br> Remarks:<br> Material: clay<br> Language: Akkadian<br> Genre: Letter<br> Sub-genre:<br> CDLI comments:<br> Catalogue source: 20050104 cdliadmin<br> ATF source: cdlistaff<br> Translation:<br> UCLA Library ARK: 21198/zz001rsp9f<br> Composite no.:<br> Seal no.:<br> CDLI no.: P254203</td><td> <p>a-na ia-ah-du-[li-im]<br> qi2-bi2-[ma]<br> um-ma a-bi-sa-mar-[ma]<br> asz-szum sza a-qa-bi-kum la ta-ha-asz2#<br> a-na ma-ni-im lu-ud-bu-ub<br> szum-ma a-na?-<ku> a-na a-bi-ia la ad#-[bu-ub]<br> szum-ma a-bi-sa-mar te-zi-ir#<br> u3 a-la#-ni#-ka te-zi-ir-ma#<br> i-na an-ni-a-tim sza a-da-bu-[bu]<br> a-na-ku mi-im-ma u2-ul e-le#-[i]<br> sza sza-ru-ti-ka u3 sza ra-pa#-[szi-ka e-pu-usz]<br> u3 lu-u2 sza sza-ru-ut-ka u2-ul te-le#-[i]<br> u3 lu-u2 sza ra-pa-szi-ka [te-ep-pe2-esz]<br> u3 lu ma-at ia-ma-ha-ad#{ki}<br> u3# lu# _u4 8(disz)-kam_ isz-tu [i-na-an-na]<br> um#-[...]<br> lu#-[...]<br> a-[...]<br> szum#-[...]<br> a-na# [...]<br> ma-li# [...]<br> u3 u2-hu-ur# [...]<br> a-su2-ur-ri [...]<br> szu-zi-ba-an#-[ni ...]<br> a-na [...]<br> pi2-qa-at ta-qa-ab#-[bi um-ma at-ta-a-ma]<br> {disz}a-bi-sa-mar u2-ul ma-ri u3 bi-ti a-na la bi-tu#-[tu-ur2-ma]<br> bi-tum bi-it-ka u3 {disz}a-bi#-[sa]-mar# ma-ru-ka-[ma] </td><td> <p> </td><td> <font size='2'> </font></td></tr> </table> <br> </body> </html>""" self.assertEqual(output, goal)
def test_html_print_single_text(self): """ Tests html_print_single_text. """ path = os.path.join('..', 'texts', 'cdli_corpus.txt') destination = os.path.join('..', 'tests', 'html_single_text.html') f_i = FileImport(path) f_i.read_file() text_file = f_i.file_lines cdli = CDLICorpus() cdli.parse_file(text_file) p_p = PrettyPrint() p_p.html_print_single_text(cdli.catalog, 'P500444', destination) f_o = FileImport(destination) f_o.read_file() output = f_o.raw_file goal = \ """<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>NABU 2017/015</title> </head> <body><table cellpadding="10"; border="1"> <tr><th> <h2>NABU 2017/015<br>P500444</h2> </th><th> <h3>transliteration</h3> </th><th> <h3>normalization</h3> </th><th> <h3>translation</h3> </tr><tr><td> </td><td> <p>a-na {d}nin-urta<br> be-li2 ra-bi-i<br> be-li2-szu<br> ka-dasz2-man-{d}en-lil2<br> _lugal_ babila2{ki}<br> _dumu_ ka-dasz2-man-tur2-gu _lugal_<br> a-na szu-ru-uk _bala_-szu<br> i-qi2-isz </td><td> <p>ana ninurta<br> bēli rabî<br> bēlišu<br> kadašman-enlil<br> šar bābili<br> mār kadašman-turgu šarri<br> ana šūruk palîšu<br> iqīš </td><td> <font size='2'> For Ninurta,<br> the great lord,<br> his lord,<br> did Kadašman-Enlil,<br> king of Babylon,<br> son of Kadašman-Turgu, the king,<br> for the lengthening of his reign<br> offer (this seal). </font></td></tr> </table> <br> </body> </html>""" self.assertEqual(output, goal)