Esempio n. 1
0
    def split(self, delta, subdelta):
        """Separates the mark's string of slash-separated landmarks into a list of 
        adjacent single landmarks. Return a list of LMPoint instances  """

        split = self.mark.split(',')

        out = []

        t = self.time        
        for s in split:
            if '/' in s:
                subsplit = s.split('/')
                for ss in subsplit:
                    # splitted lms result from the same phoneme transitions, i.e. lm.phns
                    mark = ss.strip()
                    if LMref.is_std(mark):
                        out.append(LMPoint(t, mark))     
                        t += subdelta
                    else:
                        print(mark, 'is not a recognized standard landmark')
            else:
                mark = s.strip()
                if LMref.is_std(mark):
                    out.append(LMPoint(t, mark))
                    t+= delta
                else:
                    print(mark, 'is not a recognized standard landmark')
        return out
Esempio n. 2
0
    def predictLM(self):
        """ Predict landmarks from generated phonemes."""
        phns = self.get_tier('phones')
        lm_tier = LMTier(name="predicted", xmin = self.xmin, xmax=self.xmax)                
##        g_tier = PointTier(name="g", xmin = self.xmin, xmax=self.xmax)
##        n_tier = PointTier(name="v",xmin = self.xmin, xmax=self.xmax)

        prev = Phoneme(0,0)
        for phn in phns:
            # generate landmark from phoneme pairs
            lm=LMref.predict_table[LMref.phoneme_class(prev.text)][LMref.phoneme_class(phn.text)]            
            if lm!='':
                lm_tier.insert(LMPoint(phn.xmin, lm))
                
##            # glottalization
##            if is_voiced(prev.text) and not is_voiced(phn):
##                g_tier.insert(Point(phn.xmin, mark='-g'))
##            elif is_voiced(phn) and not is_voiced(prev.text):
##                g_tier.insert(Point(phn.xmin, mark='+g'))
##            # velopharyngeal
##            if is_nasal(prev.text) and not is_nasal(phn):
##                n_tier.insert(Point(phn.xmin, mark='-n'))
##            elif is_nasal(phn) and not is_nasal(prev.text):
##                n_tier.insert(Point(phn.xmin, mark='+n'))
                
            prev=phn
        
        self.append(LMTier.lmTier(lm_tier).splitLMs())
        return self.tiers[-1]
Esempio n. 3
0
    def split(self, delta, subdelta):
        """Separates the mark's string of slash-separated landmarks into a list of 
        adjacent single landmarks. Return a list of LMPoint instances  """

        split = self.mark.split(',')

        out = []

        t = self.time
        for s in split:
            if '/' in s:
                subsplit = s.split('/')
                for ss in subsplit:
                    # splitted lms result from the same phoneme transitions, i.e. lm.phns
                    mark = ss.strip()
                    if LMref.is_std(mark):
                        out.append(LMPoint(t, mark))
                        t += subdelta
                    else:
                        if self.verbose:
                            print(mark,
                                  'is not a recognized standard landmark')
            else:
                mark = s.strip()
                if LMref.is_std(mark):
                    out.append(LMPoint(t, mark))
                    t += delta
                else:
                    if self.verbose:
                        print(mark, 'is not a recognized standard landmark')
        return out
Esempio n. 4
0
    def predictLM(self):
        """ Predict landmarks from generated phonemes."""
        phns = self.get_tier('phones')
        lm_tier = LMTier(name="predicted", xmin=self.xmin, xmax=self.xmax)
        ##        g_tier = PointTier(name="g", xmin = self.xmin, xmax=self.xmax)
        ##        n_tier = PointTier(name="v",xmin = self.xmin, xmax=self.xmax)

        prev = Phoneme(0, 0)
        for phn in phns:
            # generate landmark from phoneme pairs
            try:
                lm = LMref.predict_table[LMref.phoneme_class(
                    prev.text)][LMref.phoneme_class(phn.text)]
            except KeyError:
                raise RuntimeError(LMref.phoneme_class(prev.text),
                                   LMref.phoneme_class(phn.text), prev.text,
                                   phn.text, phn.xmax)
            if lm != '':
                lm_tier.insert(LMPoint(phn.xmin, lm))

##            # glottalization
##            if is_voiced(prev.text) and not is_voiced(phn):
##                g_tier.insert(Point(phn.xmin, mark='-g'))
##            elif is_voiced(phn) and not is_voiced(prev.text):
##                g_tier.insert(Point(phn.xmin, mark='+g'))
##            # velopharyngeal
##            if is_nasal(prev.text) and not is_nasal(phn):
##                n_tier.insert(Point(phn.xmin, mark='-n'))
##            elif is_nasal(phn) and not is_nasal(prev.text):
##                n_tier.insert(Point(phn.xmin, mark='+n'))

            prev = phn

        self.append(LMTier.lmTier(lm_tier).splitLMs())
        return self.tiers[-1]
Esempio n. 5
0
    def convertLM(self, verbose=False):
        """
        Convert hand-labeled landmarks into the standard format if possible
        (or leave unchanged if parsing failed) and put them into new tier 'observed'
        (See 'Relating manual landmark labels with predicted landmark labels' in reference folder.)
        Return the unconverted points.
        """
        if self.get_tier('landmarks'):
            old_lms = LMTier.lmTier(self.get_tier('landmarks')).splitLMs()
        elif self.get_tier('LM'):
            old_lms = LMTier.lmTier(self.get_tier('LM')).splitLMs()
        if self.get_tier('LMmod'):
            old_comments = LMTier.lmTier(self.get_tier("LMmod")).splitLMs()
        elif self.get_tier('LMmods'):
            old_comments = LMTier.lmTier(self.get_tier("LMmods")).splitLMs()
        elif self.get_tier('comments'):
            old_comments = LMTier.lmTier(self.get_tier("comments")).splitLMs()
        new_lms = old_lms.merge(old_comments)
        new_lms.name = 'observed'
        errors = []

        if self.verbose:
            print('Converting hand-labeled landmarks into standard representation....')
        for point in new_lms:
            try:
                point.mark = LMref.stdLM(point.mark)
            except Exception as e:
                print(e)
                errors.append(point)           
        self.append(new_lms.splitLMs())
        return self.tiers[-1]
Esempio n. 6
0
    def convertLM(self, verbose=False):
        """
        Convert hand-labeled landmarks into the standard format if possible
        (or leave unchanged if parsing failed) and put them into new tier 'observed'
        (See 'Relating manual landmark labels with predicted landmark labels' in reference folder.)
        Return the unconverted points.
        """
        if self.get_tier('landmarks'):
            old_lms = LMTier.lmTier(self.get_tier('landmarks')).splitLMs()
        elif self.get_tier('LM'):
            old_lms = LMTier.lmTier(self.get_tier('LM')).splitLMs()
        if self.get_tier('LMmod'):
            old_comments = LMTier.lmTier(self.get_tier("LMmod")).splitLMs()
        elif self.get_tier('LMmods'):
            old_comments = LMTier.lmTier(self.get_tier("LMmods")).splitLMs()
        elif self.get_tier('comments'):
            old_comments = LMTier.lmTier(self.get_tier("comments")).splitLMs()
        new_lms = old_lms.merge(old_comments)
        new_lms.name = 'observed'
        errors = []

        if self.verbose:
            print(
                'Converting hand-labeled landmarks into standard representation....'
            )
        for point in new_lms:
            try:
                point.mark = LMref.stdLM(point.mark)
            except Exception as e:
                print(e)
                errors.append(point)
        self.append(new_lms.splitLMs())
        return self.tiers[-1]
Esempio n. 7
0
    def __init__(self, tmin, tmax, phn='#', t='', n=0, sn=0):
        """ Default values corresponds to a silence interval. """
        Interval.__init__(self, tmin, tmax, phn)

        # manner class of the phoneme (string)
        self.manner = LMref.phoneme_class(phn)

        # Lexical stress (int)
        try:
            self.stress = int(phn[-1])
        except:
            self.stress = -1

        # Syllabic position of phoneme
        # Type (string)
        self.type = t
        # Number (int)
        self.number = n
        # Subnumber (int)
        self.subnumber = sn

        self.links = {}
Esempio n. 8
0
    def __init__(self, tmin, tmax, phn='#', t='', n=0, sn=0):
        """ Default values corresponds to a silence interval. """
        Interval.__init__(self, tmin, tmax, phn)
        
        # manner class of the phoneme (string)
        self.manner = LMref.phoneme_class(phn)
        
        # Lexical stress (int)
        try:
            self.stress = int(phn[-1])
        except:
            self.stress = -1
            
        # Syllabic position of phoneme 
        # Type (string)
        self.type = t
        # Number (int)
        self.number = n
        # Subnumber (int)
        self.subnumber = sn


        self.links = {}
Esempio n. 9
0
 def checkFormat(self):
     for p in self.items:
         if not LMref.is_std(p.mark):
             raise Exception("Cannot recognize label", p)
Esempio n. 10
0
    def extractBreaks(self):
        """ Construct phrase and subphrase context tier according to given breaks;
        also link each word with its corresponding phrase and subphrase.
        breaks: a PointTier contains break labels
        """
        breaks = self.get_tier("breaks")
        words = self.get_tier("words")
        phrases = PointTier("phrases", self.xmin, self.xmax)
        subphrases = PointTier("subphrases", self.xmin, self.xmax)

##        for w in words:
##            w.break3=None
##            w.break4=None
        
        # First pass: put phrasing information in words
##        o = 0
##        for w in words:
##            bs =  breaks.findBetween(w.xmin, w.xmax, offset=o)
##            for b in bs:
##                if '3' in b.mark:
##                    w.break3 = True
##                if '4' in b.mark:
##                    w.break4 = True
##                o = b.index
##        t = b3[0]       
##        for w in words:
##            w.links[b3.name]=b3.findBetween(w.xmin, w.xmax, t)
##            t = w.links[b3.name][-1]
##        t = b4[0]
##        for w in words:
##            w.links[b4.name]=b4.findBetween(w.xmin, w.xmax, t)
##            t = w.links[b4.name][-1]
            
        # Second pass: word position in subphrases: words.findBetween(sph.xmin, sph.xmax)
        
        b1 = breaks4[0]
        for b2 in breaks4[1:]:
            words = w.findBetween(b1.time, b2.time, words[-1].index+1)
            w1 = words[words[0].index-1]
            w2 = words[words[-1].index+1]
            if w1.xmax - b1 < b1 - w1.xmin:
                words = [w1]+words
            if w2.xmax - b2 > b2 - w2.xmin:
                words.append(w2)            
            sph = Subphrase(b1.time, b2.time, ' '.join([w.text for w in words]))         
            for i in range(len(words)):
                word.links[breaks4.name]=(b1, b2)
                word.links[subphrases.name]=sph
            subphrases.apend(sph)
            b1=b2
        
        b1 = breaks3[0]
        for b2 in breaks3[1:]:
            words = w.findBetween(b1.time, b2.time, words[-1].index+1)
            w1 = words[words[0].index-1]
            w2 = words[words[-1].index+1]
            if w1.xmax - b1 < b1 - w1.xmin:
                words = [w1]+words
            if w2.xmax - b2 > b2 - w2.xmin:
                words.append(w2)            
            ph = Phrase(b1.time, b2.time, ' '.join([w.text for w in words]))         
            for i in range(len(words)):
                word.links[breaks3.name]=(b1, b2)
                word.links[Phrase.name]=ph
            phrases.apend(sph)
            b1=b2

        text = []       
        tprev = self.xmin
        for w in words:
            if LMref.is_word(w.text):
                text+=[w]
                w.ip=len(text)
                if w.break4:
                    sphrs.append(Subphrase(tprev,w.xmax,text))
                    text=[]
                    tprev = w.xmax
        if not w.break4:    # last word
                sphrs.append(Subphrase(tprev,w.xmax,text))
            
        # Third pass: word position in phrases
        text = []       
        tprev = self.xmin
        for w in words:
            if LMref.is_word(w.text):
                text+=[w]
                w.IP=len(text)
                if w.break3:
                    phrs.append(Phrase(tprev,w.xmax,text))
                    text=[]
                    tprev = w.xmax
        if not w.break3:
                phrs.append(Phrase(tprev,w.xmax,text))

        self.append(sphrs)
        self.append(phrs)        
Esempio n. 11
0
 def checkFormat(self):
     for p in self.items:
         if not LMref.is_std(p.mark):
             raise Exception("Cannot recognize label", p)
Esempio n. 12
0
    def extractBreaks(self):
        """ Construct phrase and subphrase context tier according to given breaks;
        also link each word with its corresponding phrase and subphrase.
        breaks: a PointTier contains break labels
        """
        breaks = self.get_tier("breaks")
        words = self.get_tier("words")
        phrases = PointTier("phrases", self.xmin, self.xmax)
        subphrases = PointTier("subphrases", self.xmin, self.xmax)

        ##        for w in words:
        ##            w.break3=None
        ##            w.break4=None

        # First pass: put phrasing information in words
        ##        o = 0
        ##        for w in words:
        ##            bs =  breaks.findBetween(w.xmin, w.xmax, offset=o)
        ##            for b in bs:
        ##                if '3' in b.mark:
        ##                    w.break3 = True
        ##                if '4' in b.mark:
        ##                    w.break4 = True
        ##                o = b.index
        ##        t = b3[0]
        ##        for w in words:
        ##            w.links[b3.name]=b3.findBetween(w.xmin, w.xmax, t)
        ##            t = w.links[b3.name][-1]
        ##        t = b4[0]
        ##        for w in words:
        ##            w.links[b4.name]=b4.findBetween(w.xmin, w.xmax, t)
        ##            t = w.links[b4.name][-1]

        # Second pass: word position in subphrases: words.findBetween(sph.xmin, sph.xmax)

        b1 = breaks4[0]
        for b2 in breaks4[1:]:
            words = w.findBetween(b1.time, b2.time, words[-1].index + 1)
            w1 = words[words[0].index - 1]
            w2 = words[words[-1].index + 1]
            if w1.xmax - b1 < b1 - w1.xmin:
                words = [w1] + words
            if w2.xmax - b2 > b2 - w2.xmin:
                words.append(w2)
            sph = Subphrase(b1.time, b2.time,
                            ' '.join([w.text for w in words]))
            for i in range(len(words)):
                word.links[breaks4.name] = (b1, b2)
                word.links[subphrases.name] = sph
            subphrases.apend(sph)
            b1 = b2

        b1 = breaks3[0]
        for b2 in breaks3[1:]:
            words = w.findBetween(b1.time, b2.time, words[-1].index + 1)
            w1 = words[words[0].index - 1]
            w2 = words[words[-1].index + 1]
            if w1.xmax - b1 < b1 - w1.xmin:
                words = [w1] + words
            if w2.xmax - b2 > b2 - w2.xmin:
                words.append(w2)
            ph = Phrase(b1.time, b2.time, ' '.join([w.text for w in words]))
            for i in range(len(words)):
                word.links[breaks3.name] = (b1, b2)
                word.links[Phrase.name] = ph
            phrases.apend(sph)
            b1 = b2

        text = []
        tprev = self.xmin
        for w in words:
            if LMref.is_word(w.text):
                text += [w]
                w.ip = len(text)
                if w.break4:
                    sphrs.append(Subphrase(tprev, w.xmax, text))
                    text = []
                    tprev = w.xmax
        if not w.break4:  # last word
            sphrs.append(Subphrase(tprev, w.xmax, text))

        # Third pass: word position in phrases
        text = []
        tprev = self.xmin
        for w in words:
            if LMref.is_word(w.text):
                text += [w]
                w.IP = len(text)
                if w.break3:
                    phrs.append(Phrase(tprev, w.xmax, text))
                    text = []
                    tprev = w.xmax
        if not w.break3:
            phrs.append(Phrase(tprev, w.xmax, text))

        self.append(sphrs)
        self.append(phrs)