Esempio n. 1
0
 def test_old_swedish(self):
     sentence = "Far man kunu oc dör han för en hun far barn. oc sigher hun oc hænnæ frændær."
     tr = ut.Transcriber(old_swedish.DIPHTHONGS_IPA, old_swedish.DIPHTHONGS_IPA_class, old_swedish.IPA_class,
                         old_swedish.old_swedish_rules)
     transcribed_sentence = tr.main(sentence)
     self.assertEqual("[far man kunu ok dør han før ɛn hun far barn ok siɣɛr hun ok hɛnːɛ frɛndɛr]",
                      transcribed_sentence)
Esempio n. 2
0
    def test_gothic_transcriber(self):
        example_sentence = "Anastodeins aiwaggeljons Iesuis Xristaus sunaus gudis."

        tr = ut.Transcriber(gothic.DIPHTHONGS_IPA,
                            gothic.DIPHTHONGS_IPA_class, gothic.IPA_class, gothic.gothic_rules)
        transcribed_sentence = tr.main(example_sentence)
        target = "[anastoːðiːns ɛwaŋgeːljoːns jeːsuis kristɔs sunɔs guðis]"
        self.assertEqual(target, transcribed_sentence)
Esempio n. 3
0
    def test_old_norse_transcriber(self):
        example_sentence = "Almáttigr guð skapaði í upphafi himin ok jörð ok alla þá hluti, er þeim fylgja, og " \
                           "síðast menn tvá, er ættir eru frá komnar, Adam ok Evu, ok fjölgaðist þeira kynslóð ok " \
                           "dreifðist um heim allan."

        tr = ut.Transcriber(ont.DIPHTHONGS_IPA, ont.DIPHTHONGS_IPA_class, ont.IPA_class, ont.old_norse_rules)
        transcribed_sentence = tr.main(example_sentence)
        target = "[almaːtːiɣr guð skapaði iː upːhavi himin ɔk jœrð ɔk alːa θaː hluti ɛr θɛim fylɣja ɔɣ siːðast mɛnː " \
                 "tvaː ɛr ɛːtːir ɛru fraː kɔmnar adam ɔk ɛvu ɔk fjœlɣaðist θɛira kynsloːð ɔk drɛivðist um hɛim alːan]"
        self.assertEqual(target, transcribed_sentence)
Esempio n. 4
0
from xml.etree import ElementTree
from xml.etree.ElementTree import XMLParser

from cltk.phonology import utils as phu
from cltk.phonology.old_norse import transcription as ont
from cltk.phonology.syllabify import Syllabifier
from cltk.tokenize.word import WordTokenizer
from cltk.corpus.old_norse.syllabifier import hierarchy, invalid_onsets
from cltk.text_reuse.levenshtein import Levenshtein

from zoegas.constants import postags, dictionary_name, pos_verbose

# phonetic transcriber
phonetic_transcriber = phu.Transcriber(ont.DIPHTHONGS_IPA,
                                       ont.DIPHTHONGS_IPA_class, ont.IPA_class,
                                       ont.old_norse_rules)

# Old Norse syllabifier
s = Syllabifier(language="old_norse", break_geminants=True)
s.set_invalid_onsets(invalid_onsets)
s.set_hierarchy(hierarchy)

old_norse_word_tokenizer = WordTokenizer("old_norse")


def clean(text: str) -> Optional[str]:
    """

    :param text:
    :return: