def ctm_to_textgrid(phone_ctm, out_directory, utt2dur, frameshift=0.01): textgrid_write_errors = {} frameshift = Decimal(str(frameshift)) if not os.path.exists(out_directory): os.makedirs(out_directory) utt2dur_mapping = generate_utt2dur(utt2dur) for i, (k, v) in enumerate(sorted(phone_ctm.items())): maxtime = Decimal(str(utt2dur_mapping[k])) try: tg = TextGrid(maxTime=maxtime) phonetier = IntervalTier(name='phones', maxTime=maxtime) for interval in v: if maxtime - interval[1] < frameshift: interval[1] = maxtime #remove B E I and stress (0,1) information from phoneme interval[2] = re.sub("\d+", "", interval[2].split('_')[0]) phonetier.add(*interval) tg.append(phonetier) outpath = os.path.join(out_directory, k + '.TextGrid') tg.write(outpath) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() textgrid_write_errors[k] = '\n'.join( traceback.format_exception(exc_type, exc_value, exc_traceback)) if textgrid_write_errors: error_log = os.path.join(out_directory, 'output_errors.txt') with io_open(error_log, 'w', encoding='utf-8') as f: f.write( u'The following exceptions were encountered during the ouput of the alignments to TextGrids:\n\n' ) for k, v in textgrid_write_errors.items(): f.write(u'{}:\n'.format(k)) f.write(u'{}\n\n'.format(v))
def reorg_noncollapsed(f): padding = 0.1 print(f) tg_path = os.path.join(noncollapsed_dir, f) tg = TextGrid() tg.read(tg_path) new_tg = TextGrid(maxTime=tg.maxTime) new_tg_path = tg_path.replace(noncollapsed_dir, data_dir) for tier in tg.tiers: new_tier = IntervalTier(name=tier.name, maxTime=tg.maxTime) for i in tier: new_mark = sub_pattern.sub(' ', i.mark).strip() if not new_mark: continue new_begin = i.minTime - padding if new_begin < 0: new_begin = 0 new_end = i.maxTime + padding if new_end > tg.maxTime: new_end = tg.maxTime try: new_tier.add(new_begin, new_end, new_mark) except ValueError: new_tier[-1].maxTime = new_end new_tier[-1].mark += ' ' + new_mark print(len(new_tier)) new_tg.append(new_tier) new_tg.write(new_tg_path)
def export_segments(self, output_directory): from decimal import Decimal from textgrid import TextGrid, IntervalTier file_dict = {} for utt, segment in self.corpus.vad_segments.items(): filename, utt_begin, utt_end = segment utt_begin = Decimal(utt_begin) utt_end = Decimal(utt_end) if filename not in file_dict: file_dict[filename] = {} speaker = 'segments' text = 'speech' if speaker not in file_dict[filename]: file_dict[filename][speaker] = [] file_dict[filename][speaker].append([utt_begin, utt_end, text]) for filename, speaker_dict in file_dict.items(): try: speaker_directory = os.path.join( output_directory, self.corpus.file_directory_mapping[filename]) except KeyError: speaker_directory = output_directory os.makedirs(speaker_directory, exist_ok=True) max_time = self.corpus.get_wav_duration(filename) tg = TextGrid(maxTime=max_time) for speaker in sorted(speaker_dict.keys()): words = speaker_dict[speaker] tier = IntervalTier(name=speaker, maxTime=max_time) for w in words: if w[1] > max_time: w[1] = max_time tier.add(*w) tg.append(tier) tg.write(os.path.join(speaker_directory, filename + '.TextGrid'))
def loadOrGenerate(self): fname = self.app.Data.checkFileLevel('.TextGrid', shoulderror=False) if fname: self.TextGrid = self.fromFile(fname) else: minTime = 0. if not hasattr(self.app.Audio, 'duration'): self.app.Audio.reset() maxTime = self.app.Audio.duration self.TextGrid = TextGridFile(maxTime=maxTime) sentenceTier = IntervalTier("text") sentenceTier.add(minTime, maxTime, "text") self.TextGrid.tiers.append(sentenceTier) fname = self.app.Data.unrelativize( self.app.Data.getCurrentFilename() + '.TextGrid') self.app.Data.setFileLevel('.TextGrid', fname) names = self.TextGrid.getNames() for i, n in enumerate(names): if n in ALIGNMENT_TIER_NAMES: if len(self.TextGrid[i]) == 0: self.TextGrid.pop(i) break else: return self.genFramesTier()
def loadOrGenerate(self): fname = self.app.Data.checkFileLevel('.TextGrid', shoulderror=False) if fname: self.TextGrid = self.fromFile(fname) else: minTime = 0. if not hasattr(self.app.Audio, 'duration'): self.app.Audio.reset() try: maxTime = self.app.Audio.duration except: warn( 'Audio has no duration attribute after calling reset(), defaulting to 1 second' ) maxTime = 1. self.TextGrid = TextGridFile(maxTime=maxTime) keys = self.app.Data.getFileLevel('all') if not ('.ult' in keys and '.txt' in keys): sentenceTier = IntervalTier("text") sentenceTier.add(minTime, maxTime, "text") self.TextGrid.append(sentenceTier) fname = self.app.Data.unrelativize( self.app.Data.getCurrentFilename() + '.TextGrid') self.app.Data.setFileLevel('.TextGrid', fname) names = self.TextGrid.getNames() for i, n in enumerate(names): if n in ALIGNMENT_TIER_NAMES: if len(self.TextGrid[i]) == 0: self.TextGrid.pop(i) break else: self.frameTierName = n return self.genFramesTier()
def clean_tier(old_tier): new_tier = IntervalTier(name=old_tier.name) for interval in iter(old_tier): new_tier.addInterval(clean_interval(interval)) return new_tier
def setUp(self): self.utt = utterance.Utterance() self.float_mat = FloatMatrix() self.int_mat = Int32Matrix() self.bool_mat = BinaryMatrix() self.seg = Segment() self.tier = IntervalTier('test', 0, 2) self.tier.add(0, 1, "a") self.tier.add(1, 2, "b")
def createTextGrid(data, tierName = "words"): tier = IntervalTier(tierName) txtgrid = TextGrid() prevTime = 0 for (name, time, dur, words) in data: tier.add(prevTime, prevTime+dur, makeSentence(words)) prevTime += dur txtgrid.append(tier) return txtgrid
def export_classification(self, output_directory): if self.cluster: self.cluster_utterances() else: self.get_classification_stats() from decimal import Decimal from textgrid import TextGrid, IntervalTier spk2utt_path = os.path.join(self.classify_directory, 'spk2utt') utt2spk_path = os.path.join(self.classify_directory, 'utt2spk') if self.corpus.segments: utt2spk = load_scp(utt2spk_path) file_dict = {} for utt, segment in self.corpus.segments.items(): filename, utt_begin, utt_end = segment.split(' ') utt_begin = Decimal(utt_begin) utt_end = Decimal(utt_end) if filename not in file_dict: file_dict[filename] = {} speaker = utt2spk[utt] text = self.corpus.text_mapping[utt] if speaker not in file_dict[filename]: file_dict[filename][speaker] = [] file_dict[filename][speaker].append([utt_begin, utt_end, text]) for filename, speaker_dict in file_dict.items(): try: speaker_directory = os.path.join( output_directory, self.corpus.file_directory_mapping[filename]) except KeyError: speaker_directory = output_directory max_time = self.corpus.get_wav_duration(filename) tg = TextGrid(maxTime=max_time) for speaker in sorted(speaker_dict.keys()): words = speaker_dict[speaker] tier = IntervalTier(name=speaker, maxTime=max_time) for w in words: if w[1] > max_time: w[1] = max_time tier.add(*w) tg.append(tier) tg.write( os.path.join(speaker_directory, filename + '.TextGrid')) else: spk2utt = load_scp(spk2utt_path) for speaker, utts in spk2utt.items(): speaker_dir = os.path.join(output_directory, speaker) os.makedirs(speaker_dir, exist_ok=True) with open(os.path.join(speaker_dir, 'utterances.txt'), 'w', encoding='utf8') as f: for u in utts: f.write('{}\n'.format(u))
def convert_ctm_to_textgrid(ctm, textgrid): words = [] phonemes = [] with open(ctm, encoding='utf-8') as f: for l in f: tok = l.strip().split() text = tok[4] beg = float(tok[2]) dur = float(tok[3]) if tok[0][0] == '@': if besi.match(text): text = text[:-2] phonemes.append((text, beg, dur)) else: words.append((text, beg, dur)) tw = IntervalTier(name='words') tp = IntervalTier(name='phonemes') for seg in words: try: tw.add(round(seg[1], 2), round(seg[1] + seg[2], 2), seg[0]) except ValueError: print("Error in word seg: " + seg[0]) for seg in phonemes: try: tp.add(round(seg[1], 2), round(seg[1] + seg[2], 2), seg[0]) except ValueError: print("Error in phoneme seg: " + seg[0]) tg = TextGrid() tg.append(tw) tg.append(tp) tg.write(textgrid)
def saveSyllableInTextGrid(sylIntervals, gridobj, f): if len(sylIntervals) == 0: #No interval to save return minTime = sylIntervals[0].minTime #getting the min time for syllable tier maxTime = sylIntervals[len(sylIntervals) - 1].maxTime #getting the max time for syllable tier syllableTier = IntervalTier('Syllable', minTime, maxTime) #creating Tier for interval in sylIntervals: syllableTier.addInterval(interval) gridobj.append(syllableTier) #appending Tier in text grid gridobj.write(f) # writing the new Text Grid
def test_tier_duplication(): error_log.flush() tg = TextGrid() interval = IntervalTier("A", minTime=0, maxTime=10) tg.tiers = [interval] tg_doc = SingleAnnotatorTextGrid.from_textgrid(tg, [], None) tg_doc.check()
def generator_textgrid(maxtime, lines, output): # Download Praat: https://www.fon.hum.uva.nl/praat/ interval = maxtime / (len(lines) + 1) margin = 0.0001 tg = TextGrid(maxTime=maxtime) linetier = IntervalTier(name="line", maxTime=maxtime) i = 0 for l in lines: s, e, w = l.split() linetier.add(minTime=float(s) + margin, maxTime=float(e), mark=w) tg.append(linetier) print("successfully generator {}".format(output)) tg.write(output)
def createNew(textgrid, tier_name, VERBOSE=False): tiers = textgrid.getList(tier_name) tier = tiers[0] new_tier = IntervalTier(tier_name+'_clean') new_txtgrid = TextGrid() if VERBOSE == True: print ("Old tier: %s" % tier) for interval in tier: if isPause(interval.mark) == True: new_tier.add(interval.minTime, interval.maxTime, '') else: new_tier.add(interval.minTime, interval.maxTime, fixString(interval.mark)) new_txtgrid.append(new_tier) if VERBOSE == True: print ("New tier: %s" % new_tier) return new_txtgrid
def read_tg_from_str(tg_str, round_digits=DEFAULT_TEXTGRID_PRECISION): """ Read the tiers contained in the Praat-formatted string tg_str into a TextGrid object. Times are rounded to the specified precision. Adapted from TextGrid.read() """ source = StringIO(tg_str) tg = TextGrid() file_type, short = parse_header(source) if file_type != "TextGrid": raise ValueError("The file could not be parsed as a TextGrid as it is " "lacking a proper header.") tg.minTime = parse_line(source.readline(), short, round_digits) tg.maxTime = parse_line(source.readline(), short, round_digits) source.readline() # More header junk if short: m = int(source.readline().strip()) # Will be tg.n else: m = int(source.readline().strip().split()[2]) # Will be tg.n if not short: source.readline() for i in range(m): # Loop over grids if not short: source.readline() if parse_line(source.readline(), short, round_digits) == "IntervalTier": inam = parse_line(source.readline(), short, round_digits) imin = parse_line(source.readline(), short, round_digits) imax = parse_line(source.readline(), short, round_digits) itie = IntervalTier(inam, imin, imax) itie.strict = tg.strict n = int(parse_line(source.readline(), short, round_digits)) for j in range(n): if not short: source.readline().rstrip().split() # Header junk jmin = parse_line(source.readline(), short, round_digits) jmax = parse_line(source.readline(), short, round_digits) jmrk = get_mark(source, short) if jmin < jmax: # Non-null itie.addInterval(Interval(jmin, jmax, jmrk)) tg.append(itie) else: # PointTier inam = parse_line(source.readline(), short, round_digits) imin = parse_line(source.readline(), short, round_digits) imax = parse_line(source.readline(), short, round_digits) itie = PointTier(inam) n = int(parse_line(source.readline(), short, round_digits)) for j in range(n): source.readline().rstrip() # Header junk jtim = parse_line(source.readline(), short, round_digits) jmrk = get_mark(source, short) itie.addPoint(Point(jtim, jmrk)) tg.append(itie) return tg
def ctm_to_textgrid(word_ctm, phone_ctm, out_directory, corpus, dictionary, frameshift=0.01): textgrid_write_errors = {} frameshift = Decimal(str(frameshift)) if not os.path.exists(out_directory): os.makedirs(out_directory, exist_ok=True) silences = {dictionary.optional_silence, dictionary.nonoptional_silence} for i, (filename, speaker_dict) in enumerate(sorted(word_ctm.items())): maxtime = corpus.get_wav_duration(filename) try: speaker_directory = os.path.join( out_directory, corpus.file_directory_mapping[filename]) tg = TextGrid(maxTime=maxtime) for speaker in corpus.speaker_ordering[filename]: words = speaker_dict[speaker] word_tier_name = '{} - words'.format(speaker) phone_tier_name = '{} - phones'.format(speaker) word_tier = IntervalTier(name=word_tier_name, maxTime=maxtime) phone_tier = IntervalTier(name=phone_tier_name, maxTime=maxtime) for w in words: word_tier.add(*w) for p in phone_ctm[filename][speaker]: if len(phone_tier) > 0 and phone_tier[ -1].mark in silences and p[2] in silences: phone_tier[-1].maxTime = p[1] else: if len(phone_tier) > 0 and p[2] in silences and p[ 0] < phone_tier[-1].maxTime: p = phone_tier[-1].maxTime, p[1], p[2] elif len(phone_tier) > 0 and p[2] not in silences and p[0] < phone_tier[-1].maxTime and \ phone_tier[-1].mark in silences: phone_tier[-1].maxTime = p[0] phone_tier.add(*p) tg.append(word_tier) tg.append(phone_tier) tg.write(os.path.join(speaker_directory, filename + '.TextGrid')) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() textgrid_write_errors[filename] = '\n'.join( traceback.format_exception(exc_type, exc_value, exc_traceback)) if textgrid_write_errors: error_log = os.path.join(out_directory, 'output_errors.txt') with open(error_log, 'w', encoding='utf8') as f: f.write( 'The following exceptions were encountered during the ouput of the alignments to TextGrids:\n\n' ) for k, v in textgrid_write_errors.items(): f.write('{}:\n'.format(k)) f.write('{}\n\n'.format(v))
def gen_template_tg(self, duration: float, filename: str): new_tg = TextGrid(name=filename, minTime=0.0, maxTime=duration) for tier_name in self.tiers_specs.keys(): new_tier = IntervalTier(name=tier_name, minTime=0.0, maxTime=duration) new_tg.append(new_tier) return new_tg
def read(self, f): """ Read the tiers contained in the Praat-formated TextGrid file indicated by string f """ source = readFile(f) self.minTime = round(float(source.readline().split()[2]), 5) self.maxTime = round(float(source.readline().split()[2]), 5) source.readline() # more header junk m = int(source.readline().rstrip().split()[2]) # will be self.n source.readline() for i in range(m): # loop over grids source.readline() if source.readline().rstrip().split()[2] == '"IntervalTier"': inam = source.readline().rstrip().split(' = ')[1].strip('"') inam = self.name_filter(inam) imin = round(float(source.readline().rstrip().split()[2]), 5) imax = round(float(source.readline().rstrip().split()[2]), 5) itie = IntervalTier(inam) for j in range(int(source.readline().rstrip().split()[3])): source.readline().rstrip().split() # header junk jmin = round(float(source.readline().rstrip().split()[2]), 5) jmax = round(float(source.readline().rstrip().split()[2]), 5) jmrk = _getMark(source) if jmin < jmax: # non-null itie.addInterval(Interval(jmin, jmax, jmrk)) self.append(itie) else: # pointTier inam = source.readline().rstrip().split(' = ')[1].strip('"') inam = self.name_filter(inam) imin = round(float(source.readline().rstrip().split()[2]), 5) imax = round(float(source.readline().rstrip().split()[2]), 5) itie = PointTier(inam) n = int(source.readline().rstrip().split()[3]) for j in range(n): source.readline().rstrip() # header junk jtim = round(float(source.readline().rstrip().split()[2]), 5) jmrk = source.readline().rstrip().split()[2][1:-1] source.close()
def convert_ctm_to_textgrid(ctms, textgrid): for ctm in ctms: tiername = ctm.stem ret = [] with open(ctm, encoding='utf-8') as f: for l in f: tok = l.strip().split() word = tok[4] beg = float(tok[2]) dur = float(tok[3]) ret.append((word, beg, dur)) t = IntervalTier(name=tiername) for seg in ret: try: t.add(round(seg[1], 2), round(seg[1] + seg[2], 2), seg[0]) except ValueError: print("Error in seg: " + seg[0]) tg = TextGrid() tg.append(t) tg.write(textgrid)
def create_grid(wav_file: Path, text: str, tier_name: str, n_digits: int) -> TextGrid: assert wav_file.is_file() assert len(text) > 0 duration_s = get_wav_duration_s(wav_file) duration_s = round(duration_s, n_digits) result = TextGrid(None, 0, duration_s) tier = IntervalTier(tier_name, 0, duration_s) symbols = list(text) tier.intervals.extend(get_intervals(symbols, duration_s, n_digits)) result.append(tier) return result
def read_segment(val: Segment) -> IntervalTier: """Read a Segment message and save it to an IntervalTier object. Args: val: A Segment message as defined in data_utterance.pb. Returns: interval: The Segment message saved in an IntervalTier object. """ symbols = val.symbol start_time = mat_to_numpy(val.start_time) end_time = mat_to_numpy(val.end_time) num_items = val.num_item if not (len(symbols) == len(start_time) == len(end_time) == num_items): raise ValueError("Interval item number is not consistent!") interval = IntervalTier(minTime=start_time[0], maxTime=end_time[-1]) for sym, min_time, max_time in zip(symbols, start_time, end_time): interval.add(min_time, max_time, sym) return interval
def parseFile(path, fn): filename= fn.split(".")[0] #just name of file with open(path, "r") as f1: lines = f1.readlines() SAM = getSAM(lines) allSegs = getMAU(lines, SAM, filename) if allSegs is None: return segs = [] for seg in allSegs: #print("%f %f %s %s"%(seg.start, seg.end, seg.segment, seg.index)) tup = getSegInfo(seg) segs.append(tup) words = getWords(lines, allSegs, filename) maxtime = getMaxTime(allSegs) if maxtime == -1: return tg = TextGrid(maxTime = maxtime) wordtier = IntervalTier(name = 'words', maxTime = maxtime) phonetier = IntervalTier(name = 'phones', maxTime = maxtime) for interval in words: wordtier.add(*interval) for interval in segs: phonetier.add(*interval) tg.append(wordtier) tg.append(phonetier) outpath = "/Users/elias/Desktop/TextGrids/%s.TextGrid"%filename tg.write(outpath)
def read(self, f): """ Read the tiers contained in the Praat-formated TextGrid file indicated by string f """ source = readFile(f) self.minTime = round(float(source.readline().split()[2]), 5) self.maxTime = round(float(source.readline().split()[2]), 5) source.readline() # more header junk m = int(source.readline().rstrip().split()[2]) # will be self.n source.readline() for i in range(m): # loop over grids source.readline() if source.readline().rstrip().split()[2] == '"IntervalTier"': inam = source.readline().rstrip().split(' = ')[1].strip('"') imin = round(float(source.readline().rstrip().split()[2]), 5) imax = round(float(source.readline().rstrip().split()[2]), 5) itie = IntervalTier(inam) for j in range(int(source.readline().rstrip().split()[3])): source.readline().rstrip().split() # header junk jmin = round(float(source.readline().rstrip().split()[2]), 5) jmax = round(float(source.readline().rstrip().split()[2]), 5) jmrk = self._getMark(source) if jmin < jmax: # non-null itie.addInterval(Interval(jmin, jmax, jmrk)) self.append(itie) else: # pointTier inam = source.readline().rstrip().split(' = ')[1].strip('"') imin = round(float(source.readline().rstrip().split()[2]), 5) imax = round(float(source.readline().rstrip().split()[2]), 5) itie = PointTier(inam) n = int(source.readline().rstrip().split()[3]) for j in range(n): source.readline().rstrip() # header junk jtim = round(float(source.readline().rstrip().split()[2]), 5) jmrk = source.readline().rstrip().split()[2][1:-1] source.close()
def time_to_frame_interval_tier(time_tier: IntervalTier, shift) -> IntervalTier: """Convert an IntervalTier in time to frame. Args: time_tier: IntervalTier represented in seconds. shift: Window shift in ms. Returns: frame_tier: IntervalTier represented in frames. """ max_frame = time_to_frame(time_tier.maxTime, shift) frame_tier = IntervalTier(time_tier.name, 0, max_frame) # Deal with (occasionally) very short segments -- less than a frame shift # If we have consecutive very small segments then the function will raise a # ValueError start_shift = 0 for each_interval in time_tier.intervals: curr_min_frame = time_to_frame(each_interval.minTime, shift) if start_shift > 0: logging.warning("Last segment is too short, have to cut the %d " "frame(s) from the beginning of the current " "segment.", start_shift) curr_min_frame += start_shift start_shift = 0 curr_max_frame = time_to_frame(each_interval.maxTime, shift) if curr_min_frame >= curr_max_frame: curr_max_frame = curr_min_frame + 1 start_shift = curr_max_frame - curr_min_frame logging.warning("The current segment is too short, extend it for " "%d frame(s).", start_shift) if curr_max_frame > frame_tier.maxTime: raise ValueError("Extreme short segments in the tier, please fix " "these.") frame_tier.add(curr_min_frame, curr_max_frame, each_interval.mark) return frame_tier
def genFramesTier(self): debug('generating frames tier for %s' % self.app.Data.getCurrentFilename()) self.frameTierName = 'frames' times = self.app.Dicom.getFrameTimes() self.app.Data.setFileLevel("NumberOfFrames", len(times)) try: maxTime = max(self.app.Audio.duration, times[-1]) except AttributeError: maxTime = times[-1] tier = PointTier('frames', maxTime=maxTime) for f, t in enumerate(times): tier.addPoint(Point(t, str(f))) if not self.TextGrid.maxTime or maxTime > self.TextGrid.maxTime: self.TextGrid.maxTime = maxTime self.TextGrid.append(tier) keys = self.app.Data.getFileLevel('all') if '.ult' in keys and '.txt' in keys: fname = self.app.Data.unrelativize( self.app.Data.getFileLevel('.txt')) f = open(fname, 'rb') s = util.decode_bytes(f.read()) f.close() if s: line = s.splitlines()[0] sentenceTier = IntervalTier("sentence") sentenceTier.add(0, self.app.Audio.duration, line) self.TextGrid.append(sentenceTier) self.TextGrid.tiers = [self.TextGrid.tiers[-1] ] + self.TextGrid.tiers[:-1] path = self.app.Data.unrelativize( self.app.Data.getFileLevel('.TextGrid')) self.TextGrid.write(path) self.TextGrid = TextGridFile.fromFile(path)
def test_time_to_frame_interval_tier_short_seg(self): tier = IntervalTier('test', 0, 0.01) tier.add(0, 0.003, "a") tier.add(0.003, 0.01, "b") frame_tier = utterance.time_to_frame_interval_tier(tier, 5) self.assertEqual(frame_tier.minTime, 0) self.assertEqual(frame_tier.maxTime, 2) self.assertEqual(frame_tier.intervals[0].minTime, 0) self.assertEqual(frame_tier.intervals[0].maxTime, 1) self.assertEqual(frame_tier.intervals[1].minTime, 1) self.assertEqual(frame_tier.intervals[1].maxTime, 2)
def export_transcriptions(self, output_directory, source=None): transcripts = self._load_transcripts(source) print(self.corpus.file_directory_mapping) if not self.corpus.segments: for utt, t in transcripts.items(): relative = self.corpus.file_directory_mapping[utt] if relative: speaker_directory = os.path.join(output_directory, relative) else: speaker_directory = output_directory os.makedirs(speaker_directory, exist_ok=True) outpath = os.path.join(speaker_directory, utt + '.lab') with open(outpath, 'w', encoding='utf8') as f: f.write(t) else: for filename in self.corpus.speaker_ordering.keys(): maxtime = self.corpus.get_wav_duration(filename) try: speaker_directory = os.path.join( output_directory, self.corpus.file_directory_mapping[filename]) except KeyError: speaker_directory = output_directory tiers = {} for speaker in self.corpus.speaker_ordering[filename]: tiers[speaker] = IntervalTier(name=speaker, maxTime=maxtime) tg = TextGrid(maxTime=maxtime) for utt_name, text in transcripts.items(): utt_filename, begin, end = self.corpus.segments[ utt_name].split(' ') if utt_filename != filename: continue speaker = self.corpus.utt_speak_mapping[utt_name] begin = float(begin) end = float(end) tiers[speaker].add(begin, end, text) for t in tiers.values(): tg.append(t) tg.write( os.path.join(speaker_directory, filename + '.TextGrid'))
def merge_and_mark_tiers(tg_file="", output_file="", tiers=()): """ Creates a new TextGrid file with an added IntervalTier. """ tg = textgrid.TextGrid() tg.read(f=tg_file) for t1_name, t2_name in combinations(tiers, 2): validate_overlapping_tiers(tg.getFirst(t1_name), tg.getFirst(t2_name)) merged_tier = IntervalTier( name="Merged", minTime=min(map(lambda x: tg.getFirst(x).minTime, tiers)), maxTime=min(map(lambda x: tg.getFirst(x).maxTime, tiers))) marked_tier = IntervalTier( name="Marked", minTime=min(map(lambda x: tg.getFirst(x).minTime, tiers)), maxTime=min(map(lambda x: tg.getFirst(x).maxTime, tiers))) for tier_name, interval in filter( lambda x: x[1].mark, chain.from_iterable( map(lambda x: zip(repeat(x.name), iter(x)), map(lambda t: tg.getFirst(t), tiers)))): marked_tier.addInterval( Interval(minTime=interval.minTime, maxTime=interval.maxTime, mark=tier_name)) merged_tier.addInterval( Interval(minTime=interval.minTime, maxTime=interval.maxTime, mark=interval.mark)) tg.tiers.insert(1, marked_tier) tg.tiers.insert(2, merged_tier) with open(output_file, "w") as f: tg.write(f)
def reorg_original(f): print(f) tg_path = os.path.join(noncollapsed_dir, f) tg = TextGrid() tg.read(tg_path) new_tg = TextGrid(maxTime=tg.maxTime) new_tg_path = tg_path.replace('_original.TextGrid', '.TextGrid') sentence_tier = tg.getFirst('Sentences') speaker_tier = tg.getFirst('Speakers') speaker_tiers = {} for i in speaker_tier: if i.mark == '': continue if ',' in i.mark: continue if i.mark == 'Tân.': continue speaker_tiers[i.mark] = IntervalTier(i.mark, maxTime=tg.maxTime) for i in sentence_tier: if not i.mark.strip(): continue duration = i.maxTime - i.minTime mid_point = i.minTime + duration / 2 speaker_int = speaker_tier.intervalContaining(mid_point) speaker = speaker_int.mark if speaker == 'Tân.': speaker = 'Tan' if speaker == '': continue if len(speaker_tiers[speaker] ) > 0 and speaker_tiers[speaker][-1].maxTime == i.minTime: speaker_tiers[speaker][-1].maxTime = i.maxTime speaker_tiers[speaker][ -1].mark = speaker_tiers[speaker][-1].mark + ' ' + i.mark else: speaker_tiers[speaker].addInterval(i) for k, v in sorted(speaker_tiers.items()): new_tg.append(v) print(speaker_tiers.keys()) new_tg.write(new_tg_path)
def save_text_file(self, file_name): text_file_path = None if self.segments: first_utt = self.file_utt_mapping[file_name][0] text_file_path = self.utt_text_file_mapping[first_utt] tg = TextGrid() tg.read(text_file_path) tiers = {} for utt in self.file_utt_mapping[file_name]: seg = self.segments[utt] fn, begin, end = seg.split() begin = round(float(begin), 4) end = round(float(end), 4) text = self.text_mapping[utt] speaker = self.utt_speak_mapping[utt] if speaker not in tiers: tiers[speaker] = IntervalTier(name=speaker, maxTime=tg.maxTime) tiers[speaker].add(begin, end, text) tg.tiers = [x for x in tiers.values()] tg.write(text_file_path)
def ctm_to_textgrid(word_ctm, phone_ctm, out_directory, corpus, dictionary, frameshift=0.01): textgrid_write_errors = {} frameshift = Decimal(str(frameshift)) if not os.path.exists(out_directory): os.makedirs(out_directory, exist_ok=True) if not corpus.segments: for i, (k, v) in enumerate(sorted(word_ctm.items())): maxtime = Decimal(str(corpus.get_wav_duration(k))) speaker = list(v.keys())[0] v = list(v.values())[0] try: tg = TextGrid(maxTime=maxtime) wordtier = IntervalTier(name='words', maxTime=maxtime) phonetier = IntervalTier(name='phones', maxTime=maxtime) for interval in v: if maxtime - interval[1] < frameshift: # Fix rounding issues interval[1] = maxtime wordtier.add(*interval) for interval in phone_ctm[k][speaker]: if maxtime - interval[1] < frameshift: interval[1] = maxtime phonetier.add(*interval) tg.append(wordtier) tg.append(phonetier) relative = corpus.file_directory_mapping[k] if relative: speaker_directory = os.path.join(out_directory, relative) else: speaker_directory = out_directory os.makedirs(speaker_directory, exist_ok=True) outpath = os.path.join(speaker_directory, k + '.TextGrid') tg.write(outpath) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() textgrid_write_errors[k] = '\n'.join(traceback.format_exception(exc_type, exc_value, exc_traceback)) else: silences = {dictionary.optional_silence, dictionary.nonoptional_silence} for i, (filename, speaker_dict) in enumerate(sorted(word_ctm.items())): maxtime = corpus.get_wav_duration(filename) try: speaker_directory = os.path.join(out_directory, corpus.file_directory_mapping[filename]) tg = TextGrid(maxTime=maxtime) for speaker in corpus.speaker_ordering[filename]: words = speaker_dict[speaker] word_tier_name = '{} - words'.format(speaker) phone_tier_name = '{} - phones'.format(speaker) word_tier = IntervalTier(name=word_tier_name, maxTime=maxtime) phone_tier = IntervalTier(name=phone_tier_name, maxTime=maxtime) for w in words: word_tier.add(*w) for p in phone_ctm[filename][speaker]: if len(phone_tier) > 0 and phone_tier[-1].mark in silences and p[2] in silences: phone_tier[-1].maxTime = p[1] else: if len(phone_tier) > 0 and p[2] in silences and p[0] < phone_tier[-1].maxTime: p = phone_tier[-1].maxTime, p[1], p[2] elif len(phone_tier) > 0 and p[2] not in silences and p[0] < phone_tier[-1].maxTime and \ phone_tier[-1].mark in silences: phone_tier[-1].maxTime = p[0] phone_tier.add(*p) tg.append(word_tier) tg.append(phone_tier) tg.write(os.path.join(speaker_directory, filename + '.TextGrid')) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() textgrid_write_errors[filename] = '\n'.join(traceback.format_exception(exc_type, exc_value, exc_traceback)) if textgrid_write_errors: error_log = os.path.join(out_directory, 'output_errors.txt') with open(error_log, 'w', encoding='utf8') as f: f.write('The following exceptions were encountered during the ouput of the alignments to TextGrids:\n\n') for k,v in textgrid_write_errors.items(): f.write('{}:\n'.format(k)) f.write('{}\n\n'.format(v))
maxtime = tg.maxTime for i, ti in enumerate(tg.tiers): if i == 0: for x in ti: x.maxTime += cur_dur x.minTime += cur_dur wordintervals.append(x) elif i == 1: for x in ti: x.maxTime += cur_dur x.minTime += cur_dur phoneintervals.append(x) cur_dur += maxtime words = IntervalTier(name='words') for i in wordintervals: words.addInterval(i) phones = IntervalTier(name='phones') for i in phoneintervals: phones.addInterval(i) tg1 = TextGrid(maxTime = cur_dur) tg1.append(words) tg1.append(phones) tg1.write(chapteroutpath1, null = '') speaker_tier = IntervalTier(name=speaker) for i in range(len(groupedwavfiles)): if i == 1: speaker_tier.add(0.0, wavfiletimes[0], groupedlabtext[0]) else:
def parse_transcript(path): file_name = os.path.splitext(os.path.basename(path))[0] tg_path = path.replace(os.path.join(orig_dir, 'txt'), output_dir).replace('.txt', '.TextGrid') tg = TextGrid() tiers = {} continuation = False prev_speaker = None with open(path, 'r', encoding='utf8') as f: for i, line in enumerate(f): line = line.strip() if i == 0: continue if not line: continue if line in ['<I>', '</I>']: continue if line.startswith('&'): continue m = re.match( r'^<\$(\w)>.*<start=?([0-9:.;l ]+) end6?=([0-9>:.;l ]*)>?[?]?\s+<#>(.+)$', line) if m is None: text = parse_text(line) try: tiers[speaker][-1].mark += ' ' + text except UnboundLocalError: continue # error else: speaker_code, start, end, text = m.groups() if speaker_code == 'Z': continue try: speaker = file_code_to_speaker[(file_name, speaker_code)] except KeyError: speaker = 'unknown_{}_{}'.format(file_name, speaker_code) if speaker not in tiers: tiers[speaker] = IntervalTier(speaker) start = parse_time(start) end = parse_time(end) text = parse_text(text) if text == "Again he's quoting": continue if not text: continue if end is None: continue if start is None: if prev_speaker != speaker: continue continuation = True tiers[speaker][-1].mark += ' ' + text if '<' in text.replace('<beep_sound>', '').replace('<unk>', ''): print(file_name, start, end, text) print(line) if continuation or (len(tiers[speaker]) > 0 and start - tiers[speaker][-1].maxTime < 0.1): tiers[speaker][-1].mark += ' ' + text if not continuation: tiers[speaker][-1].maxTime = end continuation = False else: tiers[speaker].add(start, end, text) # print(speaker) # print(start, end) # print(text) prev_speaker = speaker print(tiers.keys(), [len(x) for x in tiers.values()]) for v in tiers.values(): tg.append(v) tg.write(tg_path)
return norm_value * (max - min) + min seg_ind = 0 for f in sorted(os.listdir(data_dir)): if not f.endswith('.TextGrid'): continue print(f) wav_file = f.replace('.TextGrid', '.adc.wav') textgrid_path = os.path.join(data_dir, f) wav_path = os.path.join(data_dir, wav_file) tg = TextGrid() tg.read(textgrid_path) word_tier = tg.getFirst('words') segmentation_tier = IntervalTier('segments', 0, word_tier.maxTime) durations = [] for interval in word_tier: if interval.mark == '': continue durations.append(interval.maxTime - interval.minTime) max_duration = max(durations) min_duration = min(durations) min_thresh = 0.01 max_thresh = 0.05 segs = [] for interval in word_tier: if interval.mark == '': continue
def ctm_to_textgrid(word_ctm, phone_ctm, out_directory, corpus, dictionary, frameshift=0.01): textgrid_write_errors = {} frameshift = Decimal(str(frameshift)) if not os.path.exists(out_directory): os.makedirs(out_directory, exist_ok=True) if not corpus.segments: for i, (k, v) in enumerate(sorted(word_ctm.items())): maxtime = Decimal(str(corpus.get_wav_duration(k))) speaker = list(v.keys())[0] v = list(v.values())[0] try: tg = TextGrid(maxTime=maxtime) wordtier = IntervalTier(name='words', maxTime=maxtime) phonetier = IntervalTier(name='phones', maxTime=maxtime) for interval in v: if maxtime - interval[ 1] < frameshift: # Fix rounding issues interval[1] = maxtime wordtier.add(*interval) for interval in phone_ctm[k][speaker]: if maxtime - interval[1] < frameshift: interval[1] = maxtime phonetier.add(*interval) tg.append(wordtier) tg.append(phonetier) if corpus.speaker_directories: speaker_directory = os.path.join( out_directory, corpus.utt_speak_mapping[k]) else: speaker_directory = out_directory os.makedirs(speaker_directory, exist_ok=True) outpath = os.path.join(speaker_directory, k + '.TextGrid') tg.write(outpath) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() textgrid_write_errors[k] = '\n'.join( traceback.format_exception(exc_type, exc_value, exc_traceback)) else: silences = { dictionary.optional_silence, dictionary.nonoptional_silence } for i, (filename, speaker_dict) in enumerate(sorted(word_ctm.items())): maxtime = corpus.get_wav_duration(filename) try: tg = TextGrid(maxTime=maxtime) for speaker, words in speaker_dict.items(): word_tier_name = '{} - words'.format(speaker) phone_tier_name = '{} - phones'.format(speaker) word_tier = IntervalTier(name=word_tier_name, maxTime=maxtime) phone_tier = IntervalTier(name=phone_tier_name, maxTime=maxtime) for w in words: word_tier.add(*w) for p in phone_ctm[filename][speaker]: if len(phone_tier) > 0 and phone_tier[ -1].mark in silences and p[2] in silences: phone_tier[-1].maxTime = p[1] else: if len(phone_tier) > 0 and p[2] in silences and p[ 0] < phone_tier[-1].maxTime: p = phone_tier[-1].maxTime, p[1], p[2] elif len(phone_tier) > 0 and p[2] not in silences and p[0] < phone_tier[-1].maxTime and \ phone_tier[-1].mark in silences: phone_tier[-1].maxTime = p[0] phone_tier.add(*p) tg.append(word_tier) tg.append(phone_tier) tg.write(os.path.join(out_directory, filename + '.TextGrid')) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() textgrid_write_errors[filename] = '\n'.join( traceback.format_exception(exc_type, exc_value, exc_traceback)) if textgrid_write_errors: error_log = os.path.join(out_directory, 'output_errors.txt') with open(error_log, 'w', encoding='utf8') as f: f.write( 'The following exceptions were encountered during the ouput of the alignments to TextGrids:\n\n' ) for k, v in textgrid_write_errors.items(): f.write('{}:\n'.format(k)) f.write('{}\n\n'.format(v))
label = ss[4] result.append([begin, end, label]) return result if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('words_ctm') parser.add_argument('phones_ctm') parser.add_argument('output_textgrid') args = parser.parse_args() words = read_ctm(args.words_ctm) phones = read_ctm(args.phones_ctm) max_time = phones[-1][1] tg = TextGrid(maxTime=max_time) word_tier = IntervalTier(name="words", maxTime=max_time) phone_tier = IntervalTier(name="phones", maxTime=max_time) for w in words: word_tier.add(*w) for p in phones: phone_tier.add(*p) tg.append(word_tier) tg.append(phone_tier) tg.write(args.output_textgrid)
def ctm_to_textgrid(word_ctm, phone_ctm, out_directory, corpus): if not os.path.exists(out_directory): os.makedirs(out_directory, exist_ok=True) if not corpus.segments: for i,(k,v) in enumerate(word_ctm.items()): maxtime = corpus.get_wav_duration(k) try: tg = TextGrid(maxTime = maxtime) wordtier = IntervalTier(name = 'words', maxTime = maxtime) phonetier = IntervalTier(name = 'phones', maxTime = maxtime) for interval in v: wordtier.add(*interval) for interval in phone_ctm[k]: phonetier.add(*interval) tg.append(wordtier) tg.append(phonetier) if corpus.speaker_directories: speaker_directory = os.path.join(out_directory, corpus.utt_speak_mapping[k]) else: speaker_directory = out_directory os.makedirs(speaker_directory, exist_ok=True) outpath = os.path.join(speaker_directory, k + '.TextGrid') tg.write(outpath) except ValueError as e: print('Could not write textgrid for {}'.format(k)) print(e) else: tgs = {} for i,(k,v) in enumerate(word_ctm.items()): rec = corpus.segments[k] rec, begin, end = rec.split(' ') maxtime = corpus.get_wav_duration(k) if rec not in tgs: tgs[rec] = TextGrid(maxTime = maxtime) tg = tgs[rec] begin = float(begin) speaker = corpus.utt_speak_mapping[k] word_tier_name = '{} - words'.format(speaker) phone_tier_name = '{} - phones'.format(speaker) wordtier = tg.getFirst(word_tier_name) if wordtier is None: wordtier = IntervalTier(name = word_tier_name, maxTime = maxtime) tg.append(wordtier) phonetier = tg.getFirst(phone_tier_name) if phonetier is None: phonetier = IntervalTier(name = phone_tier_name, maxTime = maxtime) tg.append(phonetier) for interval in v: interval = interval[0] + begin, interval[1] + begin, interval[2] wordtier.add(*interval) for interval in phone_ctm[k]: interval = interval[0] + begin, interval[1] + begin, interval[2] phonetier.add(*interval) for k,v in tgs.items(): outpath = os.path.join(out_directory, k + '.TextGrid') try: v.write(outpath) except ValueError as e: print('Could not write textgrid for {}'.format(k)) print(e)