Example #1
0
    def test_complex_data(self):
        """
                class TestIPA2IPA.test_complex_data
        """

        with open('tests/ipa2ipa.data.complex.txt', 'r') as sourcefile:
            for char in sourcefile:

                src = char.strip()

                if src != "":
                    phoseg = PhoSegObject( ipa = src )

                    dest = phoseg.get_ipa_representation(insert_dot = False)

                    print("[TestIPA2IPA.test_complex_data]",
                          phoseg.error_msg,
                          src,
                          return_an_analysis_of_a_string(src),
                          " != ",
                          dest,
                          return_an_analysis_of_a_string(dest))

                    self.assertEqual( phoseg.initialization_ok, True )
                    self.assertEqual( src, dest )
Example #2
0
    def init(self, _sipa_str):
        """
                SIPA.init

                Initialization from the SIPA string <_sipa_str>

                ENTRY VALUE :
                * _sipa_str : (str) SIPA string
        """

        sipa_str = IPATonalCharsToInternalChar(_sipa_str)

        # we check that every character in <ipa_str> is a known character :
        for char in sipa_str:

            if char not in IPA_PREART_KEYS and \
               char not in IPA_MAINART_KEYS and \
               char not in IPA_POSTART_KEYS and \
               char not in SIPA_CHARACTERS:

                error_msg = "SIPA.init : unknwon character '{0}'({1}) in '{2}'."
                raise PhoSegError(error_msg.format(char,
                                                   return_an_analysis_of_a_string(char),
                                                   sipa_str))

        for syllable in re.finditer(NAMED_SIPA_SYLLABLE_PATTERN,
                                    sipa_str):

            onset = syllable.group("onset")
            if onset is None:
                onset = ""

            nucleus = syllable.group("nucleus")
            if nucleus is None:
                nucleus = ""

            coda = syllable.group("coda")
            if coda is None:
                coda = ""

            self.append( { "onset" : IPA(onset),
                           "nucleus" : IPA(nucleus),
                           "coda" : IPA(coda)
                         }
                       )

        # We count the number of parenthesis in <sipa_str> and we compare
        # this number to the number of syllables created.
        if sipa_str.count('(') != len(self):
            error_msg = "(SIPA.init) Wrong initialization : "
            error_msg += "the number of parenthesis differs from the number of syllables;"
            error_msg += "nbr of parenthesis = "+str(sipa_str.count('('))+"; "
            error_msg += "sipa_str='{0}'; ".format(sipa_str)
            error_msg += "len(self)="+str(len(self))
            raise PhoSegError(error_msg)
Example #3
0
    def test(self):
        """
                class TestSIPA2SIPA.test
        """

        with open('tests/sipa2sipa.data.txt', 'r') as sourcefile:
            for char in sourcefile:

                src = char.strip()

                if src != "":
                    phoseg = PhoSegObject( sipa = src )

                    dest = phoseg.get_sipa_representation()

                    print("[TestSIPA2SIPA.test]",
                          src,
                          return_an_analysis_of_a_string(src),
                          " != ",
                          dest,
                          return_an_analysis_of_a_string(dest))

                    self.assertEqual( phoseg.initialization_ok, True )
                    self.assertEqual( src, dest )
Example #4
0
    def init_the_names(self, _ipa_str):
        """
                IPA.init_the_names

                Initialization with the keywords found in the IPA string <_ipa_str>

                ENTRY VALUE :
                * _ipa_str : (str) IPA string
        """

        #.......................................................................
        # replacements : 0x0361 (ligature in "t͡s" is replaced by 0x032F applied
        # on the next character) : t͡s -> ts̯
        #.......................................................................
        ipa_str = []
        nextchar_willbe_notindependant = False

        for char in IPATonalCharsToInternalChar(_ipa_str):

            if nextchar_willbe_notindependant:
                ipa_str.append( char )
                ipa_str.append( chr(0x032F) )
                nextchar_willbe_notindependant = False

            else:

                if char == chr(0x0361):
                    nextchar_willbe_notindependant = True

                else:
                    ipa_str.append( char )

        #.......................................................................
        # (list)ipa_str -> (string)ipa_str
        #.......................................................................
        ipa_str = "".join(ipa_str)

        #.......................................................................
        # we check if every character in <ipa_str> is a known character :
        #.......................................................................
        for char in ipa_str:
            if char not in IPA_PREART_KEYS and \
               char not in IPA_MAINART_KEYS and \
               char not in IPA_POSTART_KEYS:

                error_msg = "IPA.init_the_names : unknown character '{0}' (={1}) in '{2}'."
                raise PhoSegError(error_msg.format(char,
                                                   return_an_analysis_of_a_string(char),
                                                   ipa_str))

        #.......................................................................
        # main loop
        #.......................................................................
        for phoneme in re.finditer(NAMED_PHONEME_PATTERN, ipa_str):

            #. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
            # new_element : a dictionary with three keys :
            # * predata : [ list of strings ]
            # * maindata : string
            # * postdata : [ list of string ]
            #. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
            new_element = dict()

            if phoneme.group("predata") is None:
                new_element["predata"]  = []
            else:
                new_element["predata"] = [IPA_PREART[char] for char in phoneme.group("predata") ]

            new_element["maindata"] = IPA_MAINART[ phoneme.group("maindata") ]

            if phoneme.group("postdata") is None:
                new_element["postdata"]  = []
            else:
                new_element["postdata"] = [IPA_POSTART[char] for char in phoneme.group("postdata") ]

            #. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
            # we add to <self> the dict by converting it to keywords :
            #. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
            self.add_ipanames_as_keywords(new_element)

        #.......................................................................
        # special case : if we find in self[x] (x>0) a keyword relative
        # to tones AND the keyword "notindependant" , we move these keywords to
        # the last index being independant.
        #
        # E.g. : "ai̯˧˥" = (a)     + ("notindependant" i, ˧˥ )
        #        becomes  (a, ˧˥) + ("notindependant" i)
        #
        # self[x] are strings of characters separated by spaces.
        #
        #.......................................................................
        last_independant_index = -1
        for i in range(1, len(self)):

            if "notindependant" not in self[i]:
                # independant index :
                last_independant_index = i

            else:
                # dependant index, with maybe a tone keyword to be moved backward :
                for keyword in self[i].split(" "):

                    # if we have digits, we have a tone keyword (like '123') :
                    if keyword.isdigit():
                        # removing the tone keyword from self[i] :
                        self[i] = self[i].replace(keyword, "")
                        # adding the tone keyword to self[last_independant_index]
                        self[last_independant_index] += " " + keyword