def clean_tier(old_tier): new_tier = IntervalTier(name=old_tier.name) for interval in iter(old_tier): new_tier.addInterval(clean_interval(interval)) return new_tier
def read_tg_from_str(tg_str, round_digits=DEFAULT_TEXTGRID_PRECISION): """ Read the tiers contained in the Praat-formatted string tg_str into a TextGrid object. Times are rounded to the specified precision. Adapted from TextGrid.read() """ source = StringIO(tg_str) tg = TextGrid() file_type, short = parse_header(source) if file_type != "TextGrid": raise ValueError("The file could not be parsed as a TextGrid as it is " "lacking a proper header.") tg.minTime = parse_line(source.readline(), short, round_digits) tg.maxTime = parse_line(source.readline(), short, round_digits) source.readline() # More header junk if short: m = int(source.readline().strip()) # Will be tg.n else: m = int(source.readline().strip().split()[2]) # Will be tg.n if not short: source.readline() for i in range(m): # Loop over grids if not short: source.readline() if parse_line(source.readline(), short, round_digits) == "IntervalTier": inam = parse_line(source.readline(), short, round_digits) imin = parse_line(source.readline(), short, round_digits) imax = parse_line(source.readline(), short, round_digits) itie = IntervalTier(inam, imin, imax) itie.strict = tg.strict n = int(parse_line(source.readline(), short, round_digits)) for j in range(n): if not short: source.readline().rstrip().split() # Header junk jmin = parse_line(source.readline(), short, round_digits) jmax = parse_line(source.readline(), short, round_digits) jmrk = get_mark(source, short) if jmin < jmax: # Non-null itie.addInterval(Interval(jmin, jmax, jmrk)) tg.append(itie) else: # PointTier inam = parse_line(source.readline(), short, round_digits) imin = parse_line(source.readline(), short, round_digits) imax = parse_line(source.readline(), short, round_digits) itie = PointTier(inam) n = int(parse_line(source.readline(), short, round_digits)) for j in range(n): source.readline().rstrip() # Header junk jtim = parse_line(source.readline(), short, round_digits) jmrk = get_mark(source, short) itie.addPoint(Point(jtim, jmrk)) tg.append(itie) return tg
def saveSyllableInTextGrid(sylIntervals, gridobj, f): if len(sylIntervals) == 0: #No interval to save return minTime = sylIntervals[0].minTime #getting the min time for syllable tier maxTime = sylIntervals[len(sylIntervals) - 1].maxTime #getting the max time for syllable tier syllableTier = IntervalTier('Syllable', minTime, maxTime) #creating Tier for interval in sylIntervals: syllableTier.addInterval(interval) gridobj.append(syllableTier) #appending Tier in text grid gridobj.write(f) # writing the new Text Grid
def read(self, f): """ Read the tiers contained in the Praat-formated TextGrid file indicated by string f """ source = readFile(f) self.minTime = round(float(source.readline().split()[2]), 5) self.maxTime = round(float(source.readline().split()[2]), 5) source.readline() # more header junk m = int(source.readline().rstrip().split()[2]) # will be self.n source.readline() for i in range(m): # loop over grids source.readline() if source.readline().rstrip().split()[2] == '"IntervalTier"': inam = source.readline().rstrip().split(' = ')[1].strip('"') inam = self.name_filter(inam) imin = round(float(source.readline().rstrip().split()[2]), 5) imax = round(float(source.readline().rstrip().split()[2]), 5) itie = IntervalTier(inam) for j in range(int(source.readline().rstrip().split()[3])): source.readline().rstrip().split() # header junk jmin = round(float(source.readline().rstrip().split()[2]), 5) jmax = round(float(source.readline().rstrip().split()[2]), 5) jmrk = _getMark(source) if jmin < jmax: # non-null itie.addInterval(Interval(jmin, jmax, jmrk)) self.append(itie) else: # pointTier inam = source.readline().rstrip().split(' = ')[1].strip('"') inam = self.name_filter(inam) imin = round(float(source.readline().rstrip().split()[2]), 5) imax = round(float(source.readline().rstrip().split()[2]), 5) itie = PointTier(inam) n = int(source.readline().rstrip().split()[3]) for j in range(n): source.readline().rstrip() # header junk jtim = round(float(source.readline().rstrip().split()[2]), 5) jmrk = source.readline().rstrip().split()[2][1:-1] source.close()
def merge_and_mark_tiers(tg_file="", output_file="", tiers=()): """ Creates a new TextGrid file with an added IntervalTier. """ tg = textgrid.TextGrid() tg.read(f=tg_file) for t1_name, t2_name in combinations(tiers, 2): validate_overlapping_tiers(tg.getFirst(t1_name), tg.getFirst(t2_name)) merged_tier = IntervalTier( name="Merged", minTime=min(map(lambda x: tg.getFirst(x).minTime, tiers)), maxTime=min(map(lambda x: tg.getFirst(x).maxTime, tiers))) marked_tier = IntervalTier( name="Marked", minTime=min(map(lambda x: tg.getFirst(x).minTime, tiers)), maxTime=min(map(lambda x: tg.getFirst(x).maxTime, tiers))) for tier_name, interval in filter( lambda x: x[1].mark, chain.from_iterable( map(lambda x: zip(repeat(x.name), iter(x)), map(lambda t: tg.getFirst(t), tiers)))): marked_tier.addInterval( Interval(minTime=interval.minTime, maxTime=interval.maxTime, mark=tier_name)) merged_tier.addInterval( Interval(minTime=interval.minTime, maxTime=interval.maxTime, mark=interval.mark)) tg.tiers.insert(1, marked_tier) tg.tiers.insert(2, merged_tier) with open(output_file, "w") as f: tg.write(f)
def read(self, f): """ Read the tiers contained in the Praat-formated TextGrid file indicated by string f """ source = readFile(f) self.minTime = round(float(source.readline().split()[2]), 5) self.maxTime = round(float(source.readline().split()[2]), 5) source.readline() # more header junk m = int(source.readline().rstrip().split()[2]) # will be self.n source.readline() for i in range(m): # loop over grids source.readline() if source.readline().rstrip().split()[2] == '"IntervalTier"': inam = source.readline().rstrip().split(' = ')[1].strip('"') imin = round(float(source.readline().rstrip().split()[2]), 5) imax = round(float(source.readline().rstrip().split()[2]), 5) itie = IntervalTier(inam) for j in range(int(source.readline().rstrip().split()[3])): source.readline().rstrip().split() # header junk jmin = round(float(source.readline().rstrip().split()[2]), 5) jmax = round(float(source.readline().rstrip().split()[2]), 5) jmrk = self._getMark(source) if jmin < jmax: # non-null itie.addInterval(Interval(jmin, jmax, jmrk)) self.append(itie) else: # pointTier inam = source.readline().rstrip().split(' = ')[1].strip('"') imin = round(float(source.readline().rstrip().split()[2]), 5) imax = round(float(source.readline().rstrip().split()[2]), 5) itie = PointTier(inam) n = int(source.readline().rstrip().split()[3]) for j in range(n): source.readline().rstrip() # header junk jtim = round(float(source.readline().rstrip().split()[2]), 5) jmrk = source.readline().rstrip().split()[2][1:-1] source.close()
if i == 0: for x in ti: x.maxTime += cur_dur x.minTime += cur_dur wordintervals.append(x) elif i == 1: for x in ti: x.maxTime += cur_dur x.minTime += cur_dur phoneintervals.append(x) cur_dur += maxtime words = IntervalTier(name='words') for i in wordintervals: words.addInterval(i) phones = IntervalTier(name='phones') for i in phoneintervals: phones.addInterval(i) tg1 = TextGrid(maxTime = cur_dur) tg1.append(words) tg1.append(phones) tg1.write(chapteroutpath1, null = '') speaker_tier = IntervalTier(name=speaker) for i in range(len(groupedwavfiles)): if i == 1: speaker_tier.add(0.0, wavfiletimes[0], groupedlabtext[0]) else: speaker_tier.add(wavfiletimes[i-2], wavfiletimes[i-1], groupedlabtext[i-1]) tg2 = TextGrid(maxTime = duration)
if i == 0: for x in ti: x.maxTime += cur_dur x.minTime += cur_dur wordintervals.append(x) elif i == 1: for x in ti: x.maxTime += cur_dur x.minTime += cur_dur phoneintervals.append(x) cur_dur += maxtime words = IntervalTier(name='words') for i in wordintervals: words.addInterval(i) phones = IntervalTier(name='phones') for i in phoneintervals: phones.addInterval(i) tg1 = TextGrid(maxTime=cur_dur) tg1.append(words) tg1.append(phones) tg1.write(chapteroutpath1, null='') speaker_tier = IntervalTier(name=speaker) for i in range(len(groupedwavfiles)): if i == 1: speaker_tier.add(0.0, wavfiletimes[0], groupedlabtext[0]) else: speaker_tier.add(wavfiletimes[i - 2], wavfiletimes[i - 1], groupedlabtext[i - 1])
for s in sorted(speaker_word_tiers.keys()): w_tier = IntervalTier('{} - word'.format(s), 0, duration) p_tier = IntervalTier('{} - phone'.format(s), 0, duration) for w in sorted(speaker_word_tiers[s]): if len(w_tier) and w_tier[-1].mark in [ 'sp', '{OOV}' ] and w_tier[-1].maxTime > w.minTime: w_tier[-1].maxTime = w.minTime if len(w_tier) and w.mark in [ 'sp', '{OOV}' ] and w_tier[-1].maxTime > w.minTime: w.minTime = w_tier[-1].maxTime #print(w) if w.maxTime > duration: w.maxTime = duration w_tier.addInterval(w) for p in sorted(speaker_phone_tiers[s]): if len(p_tier) and p_tier[ -1].mark == 'sil' and p_tier[-1].maxTime > p.minTime: p_tier[-1].maxTime = p.minTime if len(p_tier) and p.mark == 'sil' and p_tier[ -1].maxTime > p.minTime: p.minTime = p_tier[-1].maxTime #print(p) if p.maxTime > duration: p.maxTime = duration try: p_tier.addInterval(p) except ValueError: pass new_tg.append(w_tier)