def split(self, delta, subdelta): """Separates the mark's string of slash-separated landmarks into a list of adjacent single landmarks. Return a list of LMPoint instances """ split = self.mark.split(',') out = [] t = self.time for s in split: if '/' in s: subsplit = s.split('/') for ss in subsplit: # splitted lms result from the same phoneme transitions, i.e. lm.phns mark = ss.strip() if LMref.is_std(mark): out.append(LMPoint(t, mark)) t += subdelta else: print(mark, 'is not a recognized standard landmark') else: mark = s.strip() if LMref.is_std(mark): out.append(LMPoint(t, mark)) t+= delta else: print(mark, 'is not a recognized standard landmark') return out
def predictLM(self): """ Predict landmarks from generated phonemes.""" phns = self.get_tier('phones') lm_tier = LMTier(name="predicted", xmin = self.xmin, xmax=self.xmax) ## g_tier = PointTier(name="g", xmin = self.xmin, xmax=self.xmax) ## n_tier = PointTier(name="v",xmin = self.xmin, xmax=self.xmax) prev = Phoneme(0,0) for phn in phns: # generate landmark from phoneme pairs lm=LMref.predict_table[LMref.phoneme_class(prev.text)][LMref.phoneme_class(phn.text)] if lm!='': lm_tier.insert(LMPoint(phn.xmin, lm)) ## # glottalization ## if is_voiced(prev.text) and not is_voiced(phn): ## g_tier.insert(Point(phn.xmin, mark='-g')) ## elif is_voiced(phn) and not is_voiced(prev.text): ## g_tier.insert(Point(phn.xmin, mark='+g')) ## # velopharyngeal ## if is_nasal(prev.text) and not is_nasal(phn): ## n_tier.insert(Point(phn.xmin, mark='-n')) ## elif is_nasal(phn) and not is_nasal(prev.text): ## n_tier.insert(Point(phn.xmin, mark='+n')) prev=phn self.append(LMTier.lmTier(lm_tier).splitLMs()) return self.tiers[-1]
def split(self, delta, subdelta): """Separates the mark's string of slash-separated landmarks into a list of adjacent single landmarks. Return a list of LMPoint instances """ split = self.mark.split(',') out = [] t = self.time for s in split: if '/' in s: subsplit = s.split('/') for ss in subsplit: # splitted lms result from the same phoneme transitions, i.e. lm.phns mark = ss.strip() if LMref.is_std(mark): out.append(LMPoint(t, mark)) t += subdelta else: if self.verbose: print(mark, 'is not a recognized standard landmark') else: mark = s.strip() if LMref.is_std(mark): out.append(LMPoint(t, mark)) t += delta else: if self.verbose: print(mark, 'is not a recognized standard landmark') return out
def predictLM(self): """ Predict landmarks from generated phonemes.""" phns = self.get_tier('phones') lm_tier = LMTier(name="predicted", xmin=self.xmin, xmax=self.xmax) ## g_tier = PointTier(name="g", xmin = self.xmin, xmax=self.xmax) ## n_tier = PointTier(name="v",xmin = self.xmin, xmax=self.xmax) prev = Phoneme(0, 0) for phn in phns: # generate landmark from phoneme pairs try: lm = LMref.predict_table[LMref.phoneme_class( prev.text)][LMref.phoneme_class(phn.text)] except KeyError: raise RuntimeError(LMref.phoneme_class(prev.text), LMref.phoneme_class(phn.text), prev.text, phn.text, phn.xmax) if lm != '': lm_tier.insert(LMPoint(phn.xmin, lm)) ## # glottalization ## if is_voiced(prev.text) and not is_voiced(phn): ## g_tier.insert(Point(phn.xmin, mark='-g')) ## elif is_voiced(phn) and not is_voiced(prev.text): ## g_tier.insert(Point(phn.xmin, mark='+g')) ## # velopharyngeal ## if is_nasal(prev.text) and not is_nasal(phn): ## n_tier.insert(Point(phn.xmin, mark='-n')) ## elif is_nasal(phn) and not is_nasal(prev.text): ## n_tier.insert(Point(phn.xmin, mark='+n')) prev = phn self.append(LMTier.lmTier(lm_tier).splitLMs()) return self.tiers[-1]
def convertLM(self, verbose=False): """ Convert hand-labeled landmarks into the standard format if possible (or leave unchanged if parsing failed) and put them into new tier 'observed' (See 'Relating manual landmark labels with predicted landmark labels' in reference folder.) Return the unconverted points. """ if self.get_tier('landmarks'): old_lms = LMTier.lmTier(self.get_tier('landmarks')).splitLMs() elif self.get_tier('LM'): old_lms = LMTier.lmTier(self.get_tier('LM')).splitLMs() if self.get_tier('LMmod'): old_comments = LMTier.lmTier(self.get_tier("LMmod")).splitLMs() elif self.get_tier('LMmods'): old_comments = LMTier.lmTier(self.get_tier("LMmods")).splitLMs() elif self.get_tier('comments'): old_comments = LMTier.lmTier(self.get_tier("comments")).splitLMs() new_lms = old_lms.merge(old_comments) new_lms.name = 'observed' errors = [] if self.verbose: print('Converting hand-labeled landmarks into standard representation....') for point in new_lms: try: point.mark = LMref.stdLM(point.mark) except Exception as e: print(e) errors.append(point) self.append(new_lms.splitLMs()) return self.tiers[-1]
def convertLM(self, verbose=False): """ Convert hand-labeled landmarks into the standard format if possible (or leave unchanged if parsing failed) and put them into new tier 'observed' (See 'Relating manual landmark labels with predicted landmark labels' in reference folder.) Return the unconverted points. """ if self.get_tier('landmarks'): old_lms = LMTier.lmTier(self.get_tier('landmarks')).splitLMs() elif self.get_tier('LM'): old_lms = LMTier.lmTier(self.get_tier('LM')).splitLMs() if self.get_tier('LMmod'): old_comments = LMTier.lmTier(self.get_tier("LMmod")).splitLMs() elif self.get_tier('LMmods'): old_comments = LMTier.lmTier(self.get_tier("LMmods")).splitLMs() elif self.get_tier('comments'): old_comments = LMTier.lmTier(self.get_tier("comments")).splitLMs() new_lms = old_lms.merge(old_comments) new_lms.name = 'observed' errors = [] if self.verbose: print( 'Converting hand-labeled landmarks into standard representation....' ) for point in new_lms: try: point.mark = LMref.stdLM(point.mark) except Exception as e: print(e) errors.append(point) self.append(new_lms.splitLMs()) return self.tiers[-1]
def __init__(self, tmin, tmax, phn='#', t='', n=0, sn=0): """ Default values corresponds to a silence interval. """ Interval.__init__(self, tmin, tmax, phn) # manner class of the phoneme (string) self.manner = LMref.phoneme_class(phn) # Lexical stress (int) try: self.stress = int(phn[-1]) except: self.stress = -1 # Syllabic position of phoneme # Type (string) self.type = t # Number (int) self.number = n # Subnumber (int) self.subnumber = sn self.links = {}
def checkFormat(self): for p in self.items: if not LMref.is_std(p.mark): raise Exception("Cannot recognize label", p)
def extractBreaks(self): """ Construct phrase and subphrase context tier according to given breaks; also link each word with its corresponding phrase and subphrase. breaks: a PointTier contains break labels """ breaks = self.get_tier("breaks") words = self.get_tier("words") phrases = PointTier("phrases", self.xmin, self.xmax) subphrases = PointTier("subphrases", self.xmin, self.xmax) ## for w in words: ## w.break3=None ## w.break4=None # First pass: put phrasing information in words ## o = 0 ## for w in words: ## bs = breaks.findBetween(w.xmin, w.xmax, offset=o) ## for b in bs: ## if '3' in b.mark: ## w.break3 = True ## if '4' in b.mark: ## w.break4 = True ## o = b.index ## t = b3[0] ## for w in words: ## w.links[b3.name]=b3.findBetween(w.xmin, w.xmax, t) ## t = w.links[b3.name][-1] ## t = b4[0] ## for w in words: ## w.links[b4.name]=b4.findBetween(w.xmin, w.xmax, t) ## t = w.links[b4.name][-1] # Second pass: word position in subphrases: words.findBetween(sph.xmin, sph.xmax) b1 = breaks4[0] for b2 in breaks4[1:]: words = w.findBetween(b1.time, b2.time, words[-1].index+1) w1 = words[words[0].index-1] w2 = words[words[-1].index+1] if w1.xmax - b1 < b1 - w1.xmin: words = [w1]+words if w2.xmax - b2 > b2 - w2.xmin: words.append(w2) sph = Subphrase(b1.time, b2.time, ' '.join([w.text for w in words])) for i in range(len(words)): word.links[breaks4.name]=(b1, b2) word.links[subphrases.name]=sph subphrases.apend(sph) b1=b2 b1 = breaks3[0] for b2 in breaks3[1:]: words = w.findBetween(b1.time, b2.time, words[-1].index+1) w1 = words[words[0].index-1] w2 = words[words[-1].index+1] if w1.xmax - b1 < b1 - w1.xmin: words = [w1]+words if w2.xmax - b2 > b2 - w2.xmin: words.append(w2) ph = Phrase(b1.time, b2.time, ' '.join([w.text for w in words])) for i in range(len(words)): word.links[breaks3.name]=(b1, b2) word.links[Phrase.name]=ph phrases.apend(sph) b1=b2 text = [] tprev = self.xmin for w in words: if LMref.is_word(w.text): text+=[w] w.ip=len(text) if w.break4: sphrs.append(Subphrase(tprev,w.xmax,text)) text=[] tprev = w.xmax if not w.break4: # last word sphrs.append(Subphrase(tprev,w.xmax,text)) # Third pass: word position in phrases text = [] tprev = self.xmin for w in words: if LMref.is_word(w.text): text+=[w] w.IP=len(text) if w.break3: phrs.append(Phrase(tprev,w.xmax,text)) text=[] tprev = w.xmax if not w.break3: phrs.append(Phrase(tprev,w.xmax,text)) self.append(sphrs) self.append(phrs)
def extractBreaks(self): """ Construct phrase and subphrase context tier according to given breaks; also link each word with its corresponding phrase and subphrase. breaks: a PointTier contains break labels """ breaks = self.get_tier("breaks") words = self.get_tier("words") phrases = PointTier("phrases", self.xmin, self.xmax) subphrases = PointTier("subphrases", self.xmin, self.xmax) ## for w in words: ## w.break3=None ## w.break4=None # First pass: put phrasing information in words ## o = 0 ## for w in words: ## bs = breaks.findBetween(w.xmin, w.xmax, offset=o) ## for b in bs: ## if '3' in b.mark: ## w.break3 = True ## if '4' in b.mark: ## w.break4 = True ## o = b.index ## t = b3[0] ## for w in words: ## w.links[b3.name]=b3.findBetween(w.xmin, w.xmax, t) ## t = w.links[b3.name][-1] ## t = b4[0] ## for w in words: ## w.links[b4.name]=b4.findBetween(w.xmin, w.xmax, t) ## t = w.links[b4.name][-1] # Second pass: word position in subphrases: words.findBetween(sph.xmin, sph.xmax) b1 = breaks4[0] for b2 in breaks4[1:]: words = w.findBetween(b1.time, b2.time, words[-1].index + 1) w1 = words[words[0].index - 1] w2 = words[words[-1].index + 1] if w1.xmax - b1 < b1 - w1.xmin: words = [w1] + words if w2.xmax - b2 > b2 - w2.xmin: words.append(w2) sph = Subphrase(b1.time, b2.time, ' '.join([w.text for w in words])) for i in range(len(words)): word.links[breaks4.name] = (b1, b2) word.links[subphrases.name] = sph subphrases.apend(sph) b1 = b2 b1 = breaks3[0] for b2 in breaks3[1:]: words = w.findBetween(b1.time, b2.time, words[-1].index + 1) w1 = words[words[0].index - 1] w2 = words[words[-1].index + 1] if w1.xmax - b1 < b1 - w1.xmin: words = [w1] + words if w2.xmax - b2 > b2 - w2.xmin: words.append(w2) ph = Phrase(b1.time, b2.time, ' '.join([w.text for w in words])) for i in range(len(words)): word.links[breaks3.name] = (b1, b2) word.links[Phrase.name] = ph phrases.apend(sph) b1 = b2 text = [] tprev = self.xmin for w in words: if LMref.is_word(w.text): text += [w] w.ip = len(text) if w.break4: sphrs.append(Subphrase(tprev, w.xmax, text)) text = [] tprev = w.xmax if not w.break4: # last word sphrs.append(Subphrase(tprev, w.xmax, text)) # Third pass: word position in phrases text = [] tprev = self.xmin for w in words: if LMref.is_word(w.text): text += [w] w.IP = len(text) if w.break3: phrs.append(Phrase(tprev, w.xmax, text)) text = [] tprev = w.xmax if not w.break3: phrs.append(Phrase(tprev, w.xmax, text)) self.append(sphrs) self.append(phrs)