def test_old_swedish(self): sentence = "Far man kunu oc dör han för en hun far barn. oc sigher hun oc hænnæ frændær." tr = ut.Transcriber(old_swedish.DIPHTHONGS_IPA, old_swedish.DIPHTHONGS_IPA_class, old_swedish.IPA_class, old_swedish.old_swedish_rules) transcribed_sentence = tr.main(sentence) self.assertEqual("[far man kunu ok dør han før ɛn hun far barn ok siɣɛr hun ok hɛnːɛ frɛndɛr]", transcribed_sentence)
def test_gothic_transcriber(self): example_sentence = "Anastodeins aiwaggeljons Iesuis Xristaus sunaus gudis." tr = ut.Transcriber(gothic.DIPHTHONGS_IPA, gothic.DIPHTHONGS_IPA_class, gothic.IPA_class, gothic.gothic_rules) transcribed_sentence = tr.main(example_sentence) target = "[anastoːðiːns ɛwaŋgeːljoːns jeːsuis kristɔs sunɔs guðis]" self.assertEqual(target, transcribed_sentence)
def test_old_norse_transcriber(self): example_sentence = "Almáttigr guð skapaði í upphafi himin ok jörð ok alla þá hluti, er þeim fylgja, og " \ "síðast menn tvá, er ættir eru frá komnar, Adam ok Evu, ok fjölgaðist þeira kynslóð ok " \ "dreifðist um heim allan." tr = ut.Transcriber(ont.DIPHTHONGS_IPA, ont.DIPHTHONGS_IPA_class, ont.IPA_class, ont.old_norse_rules) transcribed_sentence = tr.main(example_sentence) target = "[almaːtːiɣr guð skapaði iː upːhavi himin ɔk jœrð ɔk alːa θaː hluti ɛr θɛim fylɣja ɔɣ siːðast mɛnː " \ "tvaː ɛr ɛːtːir ɛru fraː kɔmnar adam ɔk ɛvu ɔk fjœlɣaðist θɛira kynsloːð ɔk drɛivðist um hɛim alːan]" self.assertEqual(target, transcribed_sentence)
from xml.etree import ElementTree from xml.etree.ElementTree import XMLParser from cltk.phonology import utils as phu from cltk.phonology.old_norse import transcription as ont from cltk.phonology.syllabify import Syllabifier from cltk.tokenize.word import WordTokenizer from cltk.corpus.old_norse.syllabifier import hierarchy, invalid_onsets from cltk.text_reuse.levenshtein import Levenshtein from zoegas.constants import postags, dictionary_name, pos_verbose # phonetic transcriber phonetic_transcriber = phu.Transcriber(ont.DIPHTHONGS_IPA, ont.DIPHTHONGS_IPA_class, ont.IPA_class, ont.old_norse_rules) # Old Norse syllabifier s = Syllabifier(language="old_norse", break_geminants=True) s.set_invalid_onsets(invalid_onsets) s.set_hierarchy(hierarchy) old_norse_word_tokenizer = WordTokenizer("old_norse") def clean(text: str) -> Optional[str]: """ :param text: :return: