Ejemplo n.º 1
0
class Syllabification:
    """
    SPPAS automatic syllabification annotation.
    """

    def __init__(self, rulesfilename, logfile=None):
        """
        Create a new syllabification instance.

        Load rules from a text file, depending on the language and phonemes
        encoding. See documentation for details about this file.

        @param rulesfilename is a file with rules to syllabify.

        """
        # Load a set of initial rules from a file:
        self.load_rules(rulesfilename)

        # Create each instance:
        self.phonemes  = None
        self.syllables = None
        self.logfile   = logfile

        # Initializations
        self.vow1 = 0
        self.vow2 = 1

    # End __init__
    # ------------------------------------------------------------------


    def load_rules(self, rulesfilename):
        """
        Load the list of rules.

        @param rulesfilename is a file with rules to syllabify.

         """
        try:
            self.rules = Rules(rulesfilename)
        except Exception as e:
            raise IOError("Syll::sppasSyll. Failed in loading rules: %s\n"%str(e))


    # End load_rules
    # ------------------------------------------------------------------


    def get_syllables(self):
        """
        Return the syllables.

        @return A Transcription() with syllables

        """
        return self.syllables

    # End get_syllables
    # ------------------------------------------------------------------


    def add_syllable(self, limit):
        """
        Add a syllable to the object "syllables".

        Syllables is a list of phonemes between two limits, the one of the
        previous syllable and the one in parameter

        @param limit is the index of the last phoneme of the previous syllable

        """
        #the phoneme at the beginning of the syllable to add is the one which follow
        #the last phoneme of the previous syllable
        if self.syll.IsEmpty():
            starttime   = self.phonemes.GetBegin().GetMidpoint()
            startradius = self.phonemes.GetBegin().GetRadius()
        else:
            starttime = self.syll.GetEndValue()
            startradius = self.syll.GetEnd().GetRadius()

        #the end of the syllable is the end of the phoneme pointed by "limit"
        e = self.phonemes[limit].GetLocation().GetEnd().GetMidpoint()
        er = self.phonemes[limit].GetLocation().GetEnd().GetRadius()
        p = "" #phonemes
        c = "" #classes
        s = "" #structures

        for i in range(self.prevlimit, limit + 1):
            #print "infor%d"%i#c%
            strphone = self.phonemes[i].GetLabel().GetValue()
            #print "phon=%s\n"%strphone#c%
            strclass = self.rules.get_class( strphone )
            strtype = strclass
            if self.is_consonant(strtype):
                strtype = "C"
            p += strphone
            if strtype == "#":
                c += strphone
                s += strphone
            else:
                c += strclass
                s += strtype

        if len(p)>15:
            # MUST BE CHANGED : DO NOT RAISE AN EXCEPTION !!!!!!!!!!!!!!
            # JUST IGNORE THIS SEGMENT AND GO TO WORK FOR THE NEXT ONE !!!!!!
            raise Exception("Syll::sppasSyll. Failed when syllabifying (more than 15 phonemes in a syllable!)\n")

        time = TimeInterval(TimePoint(starttime,startradius), TimePoint(e,er))
        self.syll.Append(Annotation(time, Label(p)))

        time = TimeInterval(TimePoint(starttime,startradius), TimePoint(e,er))
        self.cls.Append(Annotation(time, Label(c)))

        time = TimeInterval(TimePoint(starttime,startradius), TimePoint(e,er))
        self.struct.Append(Annotation(time, Label(s)))

        self.prevlimit = limit + 1

    # End add_syllable
    # ------------------------------------------------------------------


    def find_next_break (self, start):
        """
        Find the index of the next vowel or silence.

        @param start is the position of the phoneme where the search will begin
        @return the position of the next vowel or break or the last phone

        """
        for i in range (start, self.phonemes.GetSize()):
            if 'error' in self.phonemes[i].GetLabel().GetValue().lower():
                self.phonemes[i].GetLabel().SetValue( '#' ) # Convert to silence
            strclass = self.rules.get_class(self.phonemes[i].GetLabel().GetValue())
            if not self.is_consonant(strclass):
                return i
        return self.phonemes.GetSize()-1

    # End find_next_break
    # ------------------------------------------------------------------


    def shift(self, limit):
        """
        Add a syllable that ends at the phoneme pointed by "limit".
        There can be a difference between the effective limit and the limit
        given in parameter if it is between two indivisible phonemes.

        @param limit is the index of the phoneme where the segmentation will take place
        @return effective limit

        """
        # if the limit is between two indivisible phonemes,
        # it will be moved except if the move reach the previous syllable
        if (self.vow2-self.vow1) > 2 and self.rules.get_class(self.phonemes[self.vow2].GetLabel().GetValue()) != "#" and self.vow2 != self.phonemes.GetSize()-1:
            _str = ""
            if (limit - 2) > 0 and (limit + 2) < self.phonemes.GetSize():
                _str = self.phonemes[limit - 2].GetLabel().GetValue()\
                        + " " + self.phonemes[limit - 1].GetLabel().GetValue()\
                        + " " + self.phonemes[limit].GetLabel().GetValue()\
                        + " " + self.phonemes[limit + 1].GetLabel().GetValue()\
                        + " " + self.phonemes[limit + 2].GetLabel().GetValue()
            elif (limit - 1) > 0:
                _str = "ANY " + self.phonemes[limit - 1].GetLabel().GetValue()\
                        + " " + self.phonemes[limit].GetLabel().GetValue()\
                        + " " + self.phonemes[limit + 1].GetLabel().GetValue()\
                        + " " + self.phonemes[limit + 2].GetLabel().GetValue()

            d = self.rules.get_gap( _str )
            if d!=0:
                if limit+d >= self.vow1:
                    limit += d

        # Adding the syllable
        self.add_syllable( limit )

        # Beginning of the new syllable
        self.vow1 = self.vow2
        self.vow2 = self.find_next_break( self.vow1+1 )
        return limit

    # End shift
    # ------------------------------------------------------------------


    def is_consonant(self, string):
        """
        Return true if string is not a vowel nor a silence.

        """
        return string not in ("V", "W", "#")


    def analyze_breaks(self):
        """
        Deal with the cases where syllabification is systematic (##, #V, V#).
        Edit the values of global variables vow1 and vow2

        """
        vbreak = True
        while vbreak == True:
            v1 = self.rules.get_class( self.phonemes[self.vow1].GetLabel().GetValue() )
            v2 = self.rules.get_class( self.phonemes[self.vow2].GetLabel().GetValue() )

            # the last phoneme is a consonant!
            if self.is_consonant(v2) and self.vow2 == self.phonemes.GetSize()-1:
                self.shift(self.vow2)
            # vow1=V and vow2 = #
            elif v1 in ("V", "W") and v2 == "#":
                self.shift(self.vow2-1)
            # vow1=# and vow2 = V
            elif v1 == "#" and v2 in ("V", "W"):
                self.shift(self.vow1)
            # vow1=# and vow2 = #
            elif v1 == "#" and v2 == "#":
                if self.vow2 == (self.vow1+1):
                    self.shift(self.vow1)
                else:
                    #Sometimes, there can be consonants, without vowel, between two breaks
                    self.add_syllable( self.vow1)
                    self.shift(self.vow2-1)
            else:
                vbreak = False

            if self.vow1 >= self.vow2:
                vbreak = False

    # End analyze_breaks
    # ------------------------------------------------------------------


    def syllabificationVV(self):
        """
        Break down into syllables: continue until positioning itself between
        two vowels (others cases are systematics), apply the suited rule.

        """
        # Call the rules only if we are between two vowels
        self.analyze_breaks()
        if self.vow1 >= self.vow2:
            return

        # Build two strings, one for the classes and one for the phonemes
        classes = "V"
        phones  = self.phonemes[self.vow1].GetLabel().GetValue()
        for i in range(self.vow1+1, self.vow2+1):
            classes += self.rules.get_class( self.phonemes[i].GetLabel().GetValue() )
            phones  += " "+self.phonemes[i].GetLabel().GetValue()

        # Apply the rule, add the syllable
        d = self.rules.get_boundary( phones )
        if d ==-1:
            if self.logfile:
                self.logfile.print_message("No rule found for" +classes,status=3)
            else:
                sys.stderr.write("INFO: no rule found for" +classes+"\n")
            d = 0

        self.shift( self.vow1 + d)

    # End syllabificationVV
    # ------------------------------------------------------------------


    def syllabify(self, phonemes):
        """
        Syllabify (after loading the rules).

        @param phonemes (Tier) is the tier to syllabify

        """
        # Init
        self.phonemes  = phonemes
        self.syllables = None
        self.prevlimit = 0

        # Verifications: is there any data to syllabify?
        if self.phonemes.IsEmpty() is True:
            raise IOError("Syll::sppasSyll. Empty phoneme tier.\n")

        # Create output Transcription
        self.syllables = Transcription("Syllabification")
        self.syll      = self.syllables.NewTier(name="Syllables")
        self.cls       = self.syllables.NewTier(name="Classes")
        self.struct    = self.syllables.NewTier(name="Structures")

        if self.phonemes.GetSize() == 1:
            self.add_syllable(0)
            return self.syllables

        # Initialization of vow1 and vow2
        if "dummy" in self.phonemes[0].GetLabel().GetValue():
            self.vow1 = self.find_next_break(1)
            self.add_syllable(0)
            if self.vow1 == 0:
                return self.syllables
        else:
            self.vow1 = self.find_next_break(0)
        self.vow2 = self.find_next_break( self.vow1+1 )

        # Syllabification is here:
        while self.vow1 < self.vow2:
            self.syllabificationVV()

        # Add the last set of phonemes as a new syllable
        # ----------------------------------------------
        lasti = self.phonemes.GetSize() -1
        classe  = self.rules.get_class( self.phonemes[lasti].GetLabel().GetValue() )

        if self.vow2 <= lasti and ( classe in ("V", "W", "#")):
            self.add_syllable( lasti )

        return self.syllables

    # End syllabify
    # ------------------------------------------------------------------


    def syllabify2(self, phonemesTier, intervalsTier):
        """
        Syllabify inside specific intervals.

        @param phonemesTier (Tier) is the tier to syllabify
        @param intervalsTier (Tier) is the reference tier

        @return:  syllables (Tier)

        """

        if intervalsTier.IsEmpty() is True:
            raise IOError("Syll::sppasSyll. Empty interval tier.\n")

        # Quick and dirty solution to allows the "Find" method to work properly
        # on manually annotated files. We should suppose the Radius value to be
        # already fixed properly... which is never the case, because we mostly
        # read data from textgrid files!
        phonemes = phonemesTier.Copy()    # do not damage the given tier
        phonemes.SetRadius(0.005)         # 10ms vagueness seems a reasonable value
        intervals = intervalsTier.Copy()
        intervals.SetRadius(0.005)
        # consequently, all our syllables tiers will have our default radius
        # instead of the original one fixed in the phonemes tier

        # Create output Transcription
        syllables = Transcription("Syllabification")
        syll      = syllables.NewTier(name="Syllables-seg")
        cls       = syllables.NewTier(name="Classes-seg")
        struct    = syllables.NewTier(name="Structures-seg")

        # Extract phonemes between start and end for each interval
        for interval in intervals:
            start = interval.GetLocation().GetBegin()
            end   = interval.GetLocation().GetEnd()
            phons = phonemes.Find(start, end, overlaps=False)

            if not phons or not len(phons):
                continue

            phon_tier = Tier()
            for phon in phons:
                phon_tier.Append(phon)

            # Debordement !
            if not syllables.IsEmpty() and syllables.GetEnd() > phon_tier.GetBeginValue():
                phon_tier.Pop(0)

            try:
                trs_syll = self.syllabify(phon_tier)
                syllable = trs_syll.Find("Syllables")
                classes  = trs_syll.Find("Classes")
                structs  = trs_syll.Find("Structures")

                for s, c, st in zip(syllable, classes, structs):
                    syll.Append(s)
                    cls.Append(c)
                    struct.Append(st)
            except Exception:
                pass # if overlaps

        return syllables