def parse_maus_par(parfilename, sample_rate): ort_ivs = [] mau_ivs = [] with open(parfilename, 'r') as parfile: # print(parfilename) parreader = csv.reader(parfile, delimiter='\t', quotechar=None) for row in parreader: if row[0] == "ORT:": oiv = tgt.Interval(0, 0, row[2]) oiv.has_begin_set = False ort_ivs.append(oiv) assert len(ort_ivs) == int(row[1]) + 1 elif row[0] == "MAU:": ivbegin = float(row[1]) / sample_rate ivend = (float(row[1]) + float(row[2]) + 1) / sample_rate wnum = int(row[3]) # print(wnum, ivbegin, ivend, row[4]) ort_ivs[wnum].end_time = ivend if wnum >= 0 and not ort_ivs[wnum].has_begin_set: ort_ivs[wnum].start_time = ivbegin ort_ivs[wnum].has_begin_set = True mau_ivs.append(tgt.Interval(ivbegin, ivend, row[4])) if not mau_ivs: return [], [] for iv in ort_ivs: assert iv.has_begin_set, "Incomplete MAU tier in %s" % parfilename return ort_ivs, mau_ivs
def find_cycles(self, win_len=10, delta=1, lookahead=1, include_holds=True, **kwargs): """Locate peaks and troughs in the signal.""" resp_scaled = self._move_zscore(win_len * self.samp_freq) peaks, troughs = peakdetect(resp_scaled, delta=delta, lookahead=lookahead) # Make sure we start with an inhalation and end with an exhalation. if peaks[0] < troughs[0]: peaks = peaks[1:] if peaks[-1] > troughs[-1]: peaks = peaks[:-1] assert len(peaks) == len(troughs) - 1, \ 'Expected {} peaks, got {}'.format(len(troughs) - 1, len(peaks)) # Store the results in an IntervalTier. inhalations = zip(troughs[:-1], peaks) exhalations = zip(peaks, troughs[1:]) segments = tgt.IntervalTier(name='resp') for inh, exh in zip(inhalations, exhalations): inh_onset = inh[0] / self.samp_freq inh_offset = inh[1] / self.samp_freq exh_offset = exh[1] / self.samp_freq segments.add_interval(tgt.Interval(inh_onset, inh_offset, 'in')) segments.add_interval(tgt.Interval(inh_offset, exh_offset, 'out')) self.segments = segments if include_holds: # Pass kwargs to find_holds. self.find_holds(**kwargs)
def add_onsets_rhymes(title, input_path, output_path): # Load the textgrid tg = tgt.read_textgrid(os.path.join(input_path, title + '.TextGrid')) # Load name of all tiers tier_names = tg.get_tier_names() # Select a tier whose name contains 'syllables' sylls_tier_name = [name for name in tier_names if 'sylls' in name][0] sylls_tier = tg.get_tier_by_name(sylls_tier_name) # Select a tier whose name contains 'phones' phones_tier_name = [name for name in tier_names if 'phones' in name][0] phones_tier = tg.get_tier_by_name(phones_tier_name) # Start an empty tier for onset-rhymes onset_rhyme_tier = tgt.IntervalTier() onset_rhyme_tier_name = [name for name in tier_names if 'words' in name][0].replace('words', 'OR') onset_rhyme_tier.name = onset_rhyme_tier_name onset_rhyme_intervals = [] for syll in sylls_tier._get_annotations(): #print(syll) phs = phones_tier.get_annotations_between_timepoints( syll.start_time, syll.end_time) nucleus_index = calculate_nucleus_index(phs) # If the first phone contains a number then it means the whole syll has no onset, so we only add a rhyme if nucleus_index == 0: onset_rhyme_intervals.append( tgt.Interval(syll.start_time, syll.end_time, 'R')) # If the onset is present add onset and rhyme intervals else: onset_rhyme_intervals.append( tgt.Interval(syll.start_time, phs[nucleus_index - 1].end_time, 'O')) onset_rhyme_intervals.append( tgt.Interval(phs[nucleus_index].start_time, syll.end_time, 'R')) # Add all the intervals to the onset rhyme tier onset_rhyme_tier.add_annotations(onset_rhyme_intervals) # Add the onset rhyme tier to the TextGrid tg.add_tier(onset_rhyme_tier) # Move syll tier after the onset_rhyme_tier tg.delete_tier(sylls_tier_name) tg.add_tier(sylls_tier) tgt.write_to_file(tg, os.path.join(output_path, title + '.TextGrid'), format='short')
def read_maus_alignments(tmpdir, offsets, orttier, mautier, sample_rate): logging.info("Reading MAUS alignments") for i, foffset in enumerate(offsets): intervalcnt = i + 1 parfile = "%s/iv%s.par" % (tmpdir, intervalcnt) try: ort_ivs, mau_ivs = parse_maus_par(parfile, sample_rate) if not ort_ivs and foffset.transcription_valid: logging.warning("No alignment imported for interval %s: %s" % (intervalcnt, foffset)) for iv in ort_ivs: orttier.add_annotation( tgt.Interval(iv.start_time + foffset.start_time, iv.end_time + foffset.start_time, iv.text)) for iv in mau_ivs: mautier.add_annotation( tgt.Interval(iv.start_time + foffset.start_time, iv.end_time + foffset.start_time, iv.text)) except IOError: if foffset.transcription_valid: logging.warning("No alignment imported for interval %s: %s" % (intervalcnt, foffset)) except: logging.error("Exception while parsing TextGrid %s" % parfile) raise
def get_textgrid(self): tg = tgt.TextGrid() t = tgt.IntervalTier(name='Word') for w in self.words.segments: t.add_interval(tgt.Interval(w.start, w.end, w.text)) tg.add_tier(t) t = tgt.IntervalTier(name='Phoneme') for ph in self.phonemes.segments: t.add_interval(tgt.Interval(ph.start, ph.end, ph.text)) tg.add_tier(t) return tgt.io.export_to_long_textgrid(tg)
def annotate(textGrid, annotatedTextGrid): utterance = annotatedTextGrid.tiers[0][0].text annotation = annotatedTextGrid.tiers[2][0].text st = textGrid.tiers[0].start_time et = textGrid.tiers[0].end_time uttInterval = tgt.Interval(start_time=st, end_time=et, text=utterance) annInterval = tgt.Interval(start_time=st, end_time=et, text=annotation) uttTier = tgt.IntervalTier(start_time=st, end_time=et, name="Utterance") annTier = tgt.IntervalTier(start_time=st, end_time=et, name="Annotation") uttTier.add_interval(uttInterval) annTier.add_interval(annInterval) textGrid.add_tier(uttTier) textGrid.add_tier(annTier) return textGrid
def find_holds(self, min_hold_dur=0.25, min_hold_gap=0.15, peak_prominence=0.05, bins=100): """Locate respiratory holds. The method is based on the original MATLAB implementation in Breathmetrics (https://github.com/zelanolab/breathmetrics), adapted to the RIP signal. See also: Noto T, Zhou G, Schuele S, Templer J, & Zelano C (2018) Automated analysis of breathing waveforms using BreathMetrics: a respiratory signal processing toolbox. Chemical Senses (in press). """ self._filt = self.filter_lowpass(cutoff=3, order=8, inplace=False) # self._filt = self.res # Identify inhalations and exhalation if not present. if self.segments is None: self.find_cycles() hold_cand = [] for intr in self.segments: lo = round(intr.start_time * self.samp_freq) hi = round(intr.end_time * self.samp_freq) intr_holds = self._find_holds_within_interval( lo, hi, peak_prominence, bins) if intr_holds is not None: hold_cand += [(lo + h[0], lo + h[1]) for h in intr_holds] # Merge holds which lie closer than min_hold_gap and # exclude holds shorter than min_hold_dur. holds = [] prev_hold = None for h in hold_cand: if prev_hold is None: prev_hold = h elif h[0] - prev_hold[1] < min_hold_gap * self.samp_freq: prev_hold = (prev_hold[0], h[1]) else: if prev_hold[1] - prev_hold[0] >= min_hold_dur * self.samp_freq: holds.append(prev_hold) prev_hold = h if prev_hold[1] - prev_hold[0] >= min_hold_dur * self.samp_freq: holds.append(prev_hold) # Build a holds tier. holds_tier = tgt.IntervalTier(name='holds') for lo, hi in holds: start = lo / self.samp_freq end = hi / self.samp_freq # Filter out holds overlapping with speech or inhalation: if (self.overlaps_speech(start, end) or self.overlaps_inhalation(start, end)): continue holds_tier.add_interval(tgt.Interval(start, end, 'hold')) self.holds = holds_tier
def segment_speech_praat(wavfile, channel=1, denoise=False, trainbegin=0, trainwindow=1, threshold=50, min_sil_duration=0.02, min_snd_duration=0.02): vadscript = os.path.join(os.path.dirname(sys.argv[0]), "vad.praat") result = util.call_check([ "praat", "--run", vadscript, os.path.realpath(wavfile), str(channel), str(int(denoise)), str(trainbegin), str(trainwindow), str(threshold), str(min_sil_duration), str(min_snd_duration) ], True) speech_chunks = [] intensities = [] for line in result.decode().split("\n"): items = line.split("\t") if line.startswith("silence threshold"): logging.info(line) elif len(items) > 2: if items[0] == "chunk": iv = tgt.Interval(float(items[1]), float(items[2]), items[3]) iv.as_db = float(items[3]) speech_chunks.append(iv) elif items[0] == "itn": intensities.append( IntensityVal(float(items[1]), float(items[2]))) return speech_chunks, intensities
def split_utterances(tmpdir, speechtiername, infile, wavfile, channel, denoise): logging.info("Splitting audio into utterance segments") splitaudioscript = os.path.join(os.path.dirname(sys.argv[0]), "splitaudio.praat") if denoise: logging.warning("Assuming 1-4 seconds are non-speech for denoising") result = util.call_check([ "praat", "--run", splitaudioscript, os.path.realpath(wavfile), os.path.realpath(infile), "%s/iv" % tmpdir, str(channel), speechtiername, str(int(denoise)) ], True) offsets = [] for line in result.decode().split("\n"): items = line.split("\t") if len(items) == 3: foffset = tgt.Interval(start_time=float(items[0]), end_time=float(items[1]), text=items[2]) offsets.append(foffset) logging.info("Split completed: %s segments" % len(offsets)) return offsets
def _merge_holds(cycles, holds): """Merge respiratory holds with the inhalation and exhalation boundaries.""" i, j = 0, 0 cycles = tgt.IntervalTier() cur_intr = None while i < len(cycles) and j < len(holds): if cycles: c_start = max(cycles[-1].end_time, cycles[i].start_time) else: c_start = cycles[i].start_time c_end = min(cycles[i].end_time, holds[j].start_time), cur_intr = tgt.Interval(c_start, c_end, cycles[i].text) if cur_intr.start_time < holds[j].start_time: cycles.add_interval(cur_intr) if cycles[i].end_time > holds[j].start_time: cycles.add_interval(holds[j]) j += 1 if cycles[i].end_time <= cycles[-1].end_time: i += 1 return cycles
def add_lemmas(title, input1_path, output_path): # Load textgrid tg = tgt.read_textgrid(os.path.join(input1_path, title + '.TextGrid')) tier_names = tg.get_tier_names() # Load pos tier pos_tier_name = [name for name in tier_names if 'pos' in name][0] pos_tier = tg.get_tier_by_name(pos_tier_name) # Load words tier words_tier_name = [name for name in tier_names if 'words' in name][0] words_tier = tg.get_tier_by_name(words_tier_name) # Start empty lemmas tier lemmas_tier = tgt.IntervalTier() lemmas_tier_name = [name for name in tier_names if 'words' in name][0].replace('words', 'lemmas') lemmas_tier.name = lemmas_tier_name # Generate lemma intervals lemmas_intervals = [ tgt.Interval(w_interval.start_time, w_interval.end_time, lemmatize_word(w_interval.text, pos_tier[i].text)) for i, w_interval in enumerate(words_tier) ] # Add lemmas to tier lemmas_tier.add_annotations(lemmas_intervals) tg.add_tier(lemmas_tier) tgt.write_to_file(tg, os.path.join(output_path, title + '.TextGrid'), format='short')
def save_intervalframe_to_textgrid(framedict, filepath, encoding='utf-8'): """Write a dict of IntervalFrames in a textgrid-File. Arguments: framedict -- Dictionary of dataframes. The keys become tier names in the textgrid file filepath -- Path + filename of the file to be written. Keyword arguments: encoding: character encoding to save textgrid file """ if len(framedict) < 1: print "invalid data!" return mytextgrid = tgt.TextGrid() for tier_name in framedict.keys(): newtier = framedict[tier_name] if len(newtier.columns) == 3: mytier = tgt.IntervalTier(name=tier_name) for row in newtier.index: myinterval = tgt.Interval(newtier[newtier.columns[0]][row], newtier[newtier.columns[1]][row], newtier[newtier.columns[2]][row]) mytier.add_interval(myinterval) elif len(newtier.columns) == 2: mytier = tgt.PointTier(name=tier_name) for row in newtier.index: mypoint = tgt.Point(newtier[newtier.columns[0]][row], newtier[newtier.columns[1]][row]) mytier.add_point(mypoint) mytextgrid.add_tier(mytier) tgt.write_to_file(mytextgrid, filepath, encoding=encoding, format="long")
def prepare_textgrid(self, df, offset): grid = tgt.TextGrid() tier = tgt.IntervalTier() tier.name = "Context" grid.add_tier(tier) for x in df.index: start = df.loc[x]["coq_word_starttime_1"] end = df.loc[x]["coq_word_endtime_1"] text = df.loc[x]["coq_word_label_1"] interval = tgt.Interval(start - offset, end - offset) interval.text = text tier.add_interval(interval) return grid
def writeTextGrid(outfile, word_alignments): tg = tgt.TextGrid() phone_tier = tgt.IntervalTier(name='phone') word_tier = tgt.IntervalTier(name='word') for data in word_alignments: word = data[0] phones = data[1:] if len(phones) > 0: start_time = phones[0][1] end_time = phones[-1][2] word_tier.add_interval( tgt.Interval(start_time, end_time, text=word)) for (p, p_start, p_end) in phones: phone_tier.add_interval(tgt.Interval(p_start, p_end, text=p)) tg.add_tier(phone_tier) tg.add_tier(word_tier) tgt.io.write_to_file(tg, outfile, format='long')
def find_holds(self, min_hold_dur=0.25, min_hold_gap=0.15, peak_prominence=0.05, bins=100): # Identify inhalations and exhalation if not present. if self.segments is None: self.find_cycles() hold_cand = [] # seg_samp = np.concatenate( # (np.stack([self._troughs[:-1], self._peaks], axis=1), # np.stack([self._peaks, self._troughs[1:]], axis=1))) for intr in self.segments: lo = round(intr.start_time * self.samp_freq) hi = round(intr.end_time * self.samp_freq) intr_holds = self._find_holds_within_interval( lo, hi, peak_prominence, bins) if intr_holds is not None: hold_cand += [(lo + h[0], lo + h[1]) for h in intr_holds] if not hold_cand: return # Merge holds which lie closer than min_hold_gap and # exclude holds shorter than min_hold_dur. holds = [] prev_hold = None for h in hold_cand: if prev_hold is None: prev_hold = h elif h[0] - prev_hold[1] < min_hold_gap * self.samp_freq: prev_hold = (prev_hold[0], h[1]) else: if prev_hold[1] - prev_hold[0] >= min_hold_dur * self.samp_freq: holds.append(prev_hold) prev_hold = h if prev_hold[1] - prev_hold[0] >= min_hold_dur * self.samp_freq: holds.append(prev_hold) # Build a holds t holds_tier = tgt.IntervalTier(name='holds') for lo, hi in holds: holds_tier.add_interval(tgt.Interval(lo / self.samp_freq, hi / self.samp_freq, 'hold')) self.holds = holds_tier
def format(self, syncmap): try: import tgt except ImportError as exc: self.log_exc(u"Python module tgt is not installed", exc, True, ImportError) # from https://github.com/hbuschme/TextGridTools/blob/master/tgt/io.py textgrid = tgt.TextGrid() tier = tgt.IntervalTier(name="Token") for fragment in syncmap.fragments: begin = float(fragment.begin) end = float(fragment.end) text = fragment.text_fragment.text if text == u"": text = u"SIL" interval = tgt.Interval(begin, end, text=text) tier.add_interval(interval) textgrid.add_tier(tier) if self.variant == self.DEFAULT: msg = tgt.io.export_to_long_textgrid(textgrid) else: msg = tgt.io.export_to_short_textgrid(textgrid) return gf.safe_unicode(msg)
def add_pos(title, input1_path, input2_path, output_path): # Load the textgrid tg = tgt.read_textgrid(os.path.join(input1_path, title + '.TextGrid')) # Load name of all tiers tier_names = tg.get_tier_names() # Select a tier whose name contains 'words' words_tier_name = [name for name in tier_names if 'words' in name][0] words_tier = tg.get_tier_by_name(words_tier_name) # Start an empty tier for POS_tags pos_tier = tgt.IntervalTier() pos_tier_name = [name for name in tier_names if 'words' in name][0].replace('words', 'pos') pos_tier.name = pos_tier_name # Extract words intervals word_intervals = [w for w in words_tier._get_annotations()] # Extract words words = [w.text for w in words_tier._get_annotations()] # Load text txt = '' with open(os.path.join(input2_path, title + '.txt'), 'r', encoding='utf-8') as f: for l in f: l = ' '.join(l.split()) for char in l.replace('\n', ' ').replace('\t', ' '): txt += char # Try to use my own tagger from txt and see if it matches the words in the original word tier # If they don't match just use the list of words from the tier and feed them to the tagger (this option is less accurate) my_tags = my_tagger(txt) if len(my_tags) == len(words): # True for every mismatch between words in words_tier and words produced by my_tagger mismatches = [ True for i, tag in enumerate(my_tags) if tag[0] != words[i] ] # If everything matches up we can use my_tags, else we resort to the vanilla nltk one if True not in mismatches: POS_tags = my_tags else: POS_tags = nltk.pos_tag(words) else: print(title) POS_tags = nltk.pos_tag(words) pos_intervals = [ tgt.Interval(interval.start_time, interval.end_time, POS_tags[i][1]) for i, interval in enumerate(word_intervals) ] pos_tier.add_annotations(pos_intervals) tg.add_tier(pos_tier) tgt.write_to_file(tg, os.path.join(output_path, title + '.TextGrid'), format='short')
def process_dataframe(self, df, grid, offset, end_time, left_padding, right_padding, remember_time, grid_id): """ Fill the grid by using the content of the data frame. """ corpus_features = [x for x, _ in self.resource.get_corpus_features()] data_columns = [ x for x in df.columns if ("_starttime_" not in x and "_endtime_" not in x) ] max_stop = end_time + left_padding + right_padding for col in data_columns: interval = None # add the corpus IDs if no real feature is selected: if col == "coquery_invisible_corpus_id": if self._artificial_corpus_id: tier_name = "corpus_id" else: continue number = 1 elif col.startswith("coquery_invisible"): continue elif col.startswith(("func", "coquery", "db")): tier_name = self.session.translate_header(col) else: s = col.partition("coq_")[-1] rc_feature, _, number = s.rpartition("_") _, tab, feature = ( self.resource.split_resource_feature(rc_feature)) tier_name = "{}_{}".format(tab, feature) tier = grid.get_tier_by_name(tier_name) if (not tier_name.startswith("segment") and tier_name in corpus_features and not self.resource.is_tokenized(tier_name)): # corpus feature -- add one interval that # covers the whole text grid content = utf8(df[col].values[0]) stop = max_stop interval = tgt.Interval(0, stop, content) if len(tier.intervals) == 0: tier.add_interval(interval) else: if not tier_name.startswith("segment"): # lexical feature -- add one interval per entry for i in df.index: row = df.loc[i] dtype = df.dtypes[col] try: val = utf8(row[col].astype(dtype)) except AttributeError: val = utf8(row[col]) try: label_s, label_e = self.feature_timing[tier_name] start_col = "coq_{}_{}".format(label_s, number) end_col = "coq_{}_{}".format(label_e, number) start = left_padding - offset + row[start_col] stop = left_padding - offset + row[end_col] except KeyError: start = 0 stop = max_stop interval = tgt.Interval(start, stop, val) try: tier.add_interval(interval) except ValueError as e: # ValueErrors occur if the new interval overlaps # with a previous interval. # This can happen if no word boundaries are # selected in a multi-word query. pass #logger.warn("{}: {} ({})".format( #self.session.translate_header(tier.name), #e, grid_id)) else: # segment features start_label, end_label = self.feature_timing[tier_name] start_col = "coq_{}_1".format(start_label) end_col = "coq_{}_1".format(end_label) for i in df.index: row = df.loc[i] val = utf8(row[col]) try: start = row[start_col] end = row[end_col] except KeyError: start = 0 end = end_time interval = tgt.Interval(left_padding - offset + start, left_padding - offset + end, val) try: tier.add_interval(interval) except ValueError as e: logger.warn("{}: {} ({})".format( self.session.translate_header(tier.name), e, grid_id)) if interval: # make sure that the tier is always correctly padded to the # right: tier.end_time = max(tier.end_time, interval.end_time + right_padding) tier.end_time = min(tier.end_time, max_stop) if remember_time: tier = grid.get_tier_by_name("Original timing") str_start = utf8(offset - left_padding) str_end = utf8(offset + grid.end_time - left_padding) tier.add_point(tgt.Point(0, str_start)) tier.add_point(tgt.Point(grid.end_time, str_end)) return grid
def duration(path, C_list, V_list, cid): #file_list = glob.glob(path + r"\*\sent\*.TextGrid") # glob匹配所有的符合条件的文件,并将以list的形式返回 file_list = glob.glob(path + r"\sent\*.TextGrid") # glob匹配所有的符合条件的文件,并将以list的形式返回 #print(file_list) AlldeltS = [] # 依次计算 每一个 textgrid 的 结果值 把结果存在 总列表中 all_vs = [] all_rpvis = [] # 可能有点问题 每次累加进去 一个值 但是 不清空 all_npvis = [] all_ms = [] for file in file_list: TextGrid = tgt.read_textgrid(file, include_empty_intervals=True) # 依次读取TextGrid文件 if cid == 'jp': tier = TextGrid.get_tier_by_name(TextGrid.get_tier_names()[0]) #print(tier) if cid == 'cn': tier = TextGrid.get_tier_by_name(TextGrid.get_tier_names()[0]) #print(tier) elif cid == 'ru': tier = TextGrid.get_tier_by_name(TextGrid.get_tier_names()[1]) # 根据 tier的 name/位置 读取 intervals #print(tier) # tier = TextGrid.get_tier_by_name('SY') tier_name = TextGrid.get_tier_names() # 获取全部的tier 名字 start = tier.start_time end = tier.end_time start_syl = tier.start_time end_syl = tier.end_time tier2insert = tgt.IntervalTier(start, end, name='CV') # 获取起始点和 终点 插入一条 CV的 intervals TextGrid.insert_tier(tier2insert, 3) CV = TextGrid.get_tier_by_name('CV') annotation = tier.intervals # 插入一个 intervals #syllable = tier_syll.intervals num = [] S_duration = [] # syllable_duration duration_all_S = 0 # 全部时长和 (用于计算 %V 和 其他相关参数) for i in range(len(annotation)): # 循环 替换 和 计算 时长 old_name = annotation[i].text old_start_time = annotation[i].start_time old_end_time = annotation[i].end_time duration = old_end_time - old_start_time #if old_name in C_list: # 判断 属于 C / V if old_name != 'sil': new_name = 'S' # elif old_name in V_list: # 判断 属于 C / V # new_name = 'S' else: new_name = 'none' # print(old_name, new_name) Interval = tgt.Interval(old_start_time, old_end_time, text=new_name) # interval格式- 依次填写 # print(old_name, new_name, 'duration=', duration) if new_name == 'S': S_duration.append(duration) # 加入 duration 的list duration_all_S = duration_all_S + duration CV.add_interval(Interval) # 将 intervals 的标注 >> 到 textgrid #print(file, S_duration) mean_syl = duration_all_S/len(S_duration) #print(mean_syl) vacS = duration_all_S / len(S_duration) # print(num) # if num > 0: # mean_syl = a / (len(C_duration) + len(V_duration)) # 计算一个 mean_syllable duration 用于 语速 # print(mean_syl) # mean_syl = a/(len(C_duration)+len(V_duration)) # print(mean_syl) # vacroC = round(deltaC(C_duration) / mean_syl * 100, 4) # vacroV = round(deltaC(V_duration) / mean_syl * 100, 4) vacroS = round(deltaS(S_duration) / vacS * 100, 4) # print(file, ',', # # deltaS(S_duration), ',', # # vacroS, ',', # # rPVI_s(S_duration), ',', # # nPVI_S(S_duration), ',', # ) #print(nPVI_S(S_duration)) AlldeltS.append(deltaS(S_duration)) all_vs.append(vacroS) all_rpvis.append(rPVI_s(S_duration)) all_npvis.append(nPVI_S(S_duration)) all_ms.append(mean_syl) deltS = round(np.mean(AlldeltS), 9) vs = round(np.mean(all_vs), 9) rpvis = round(np.mean(all_rpvis), 9) npvis = round(np.mean(all_npvis), 9) ms = round(np.mean(all_ms), 9) # print(path, ',', ms, ',', deltS, ',', vs, ',', rpvis, ',', npvis, ',', )
def add_punctuation(title, textgrid_path, txt_path, output_path): txt = '' with open(os.path.join(txt_path, title + '.txt'), 'r', encoding='utf-8') as f: for l in f: l = ' '.join(l.split()) for char in l.replace('\n', ' ').replace('\t', ' ').lower(): txt += char word_non_words = [detect_non_words(w) for w in txt.split()] # Exclude non-words such as -' , - etc. txt_words = [w for w in word_non_words if w != '<punct>'] # Strip words of punctuation before and after the first/last alphanum txt_words = [clean_word(w, title, txt) for w in txt_words] tg = tgt.read_textgrid(os.path.join(textgrid_path, title + '.TextGrid')) # Load name of all tiers tier_names = tg.get_tier_names() # Select a tier whose name contains 'words' word_tier_name = [name for name in tier_names if 'words' in name][0] word_tier = tg.get_tier_by_name(word_tier_name) word_list = [w.text for w in word_tier._get_annotations()] if len(word_list) == len(txt_words): w_indices = [] w_indices.append(0) start = 0 for lw in txt_words: idx = txt.find(lw, start, len(txt)) start = idx + len(lw) w_indices.append(idx) w_indices.append(idx + len(lw)) w_indices.append(len(txt)) p_indices = [[w_indices[i], w_indices[i + 1]] for i in range(0, len(w_indices) - 1, 2)] punctuation = [txt[i[0]:i[1]].replace(' ', '') for i in p_indices] punctuation[0] = 'start' + punctuation[0] punctuation[-1] = punctuation[-1] + 'end' punctuation = [p if p != '' else '_' for p in punctuation] bp = punctuation[0:-1] fp = punctuation[1:] word_durations = [] for w in word_tier._get_annotations(): word_durations.append(w) # here we go thru this list ([[w_dur1, w_dur2, etc.], [w_dur1, w_dur2, etc.], etc]) and we keep the first and the last duration of every word bp_tier = tgt.IntervalTier() bp_tier.name = [name for name in tier_names if 'words' in name][0].replace('words', 'bp') bp_intervals = [ tgt.Interval(word_durations[i].start_time, word_durations[i].end_time, bp[i]) for i in range(0, len(word_durations)) ] bp_tier.add_annotations(bp_intervals) tg.add_tier(bp_tier) fp_tier = tgt.IntervalTier() fp_tier.name = [name for name in tier_names if 'words' in name][0].replace('words', 'fp') fp_intervals = [ tgt.Interval(word_durations[i].start_time, word_durations[i].end_time, fp[i]) for i in range(0, len(word_durations)) ] fp_tier.add_annotations(fp_intervals) tg.add_tier(fp_tier) else: word_durations = [] for w in word_tier._get_annotations(): word_durations.append(w) bp = ['start'] + ['<unk>' for i in range(len(word_durations) - 1)] fp = ['<unk>' for i in range(len(word_durations) - 1)] + ['end'] bp_tier = tgt.IntervalTier() bp_tier.name = [name for name in tier_names if 'words' in name][0].replace('words', 'bp') bp_intervals = [ tgt.Interval(word_durations[i].start_time, word_durations[i].end_time, bp[i]) for i in range(0, len(word_durations)) ] bp_tier.add_annotations(bp_intervals) tg.add_tier(bp_tier) fp_tier = tgt.IntervalTier() fp_tier.name = [name for name in tier_names if 'words' in name][0].replace('words', 'fp') fp_intervals = [ tgt.Interval(word_durations[i].start_time, word_durations[i].end_time, fp[i]) for i in range(0, len(word_durations)) ] fp_tier.add_annotations(fp_intervals) tg.add_tier(fp_tier) # For now we generate the modified TextGrids in the same folder is the old ones. Later, sent the new files into a new folder tgt.write_to_file(tg, os.path.join(output_path, title + '.TextGrid'), format='short')
def annotate(title, xml_path, textgrid_path, annotations_path): try: tree = ET.parse(os.path.join(xml_path, title + '.xml')) root = tree.getroot() stress_phone_seq = [ ] # The content here come from the xml file. Output format: [[ph1, ph2, etc.], [ph1, ph2, etc.], etc] stress_seq = [ ] # The content here come from the xml file. Output format: [[0], [2], [1],etc] for p in root[0]: for s in p: for phrase in s: for word in phrase: # get rid of words in xml that lack a phonemic counterpart in the textGrid if word.text not in ('!', ',', '-', '.', '..', '...', ':', '?'): for syllable in word: stress_phone_group = [] stress_group = [] stress_group.append(syllable.attrib['stress']) stress_seq.append(stress_group) for ph in syllable: stress_phone_group.append(ph.attrib['p']) stress_phone_seq.append(stress_phone_group) tg = tgt.read_textgrid(os.path.join(textgrid_path, title + '.TextGrid')) phones_tier = tg.get_tier_by_name('phones') word_tier = tg.get_tier_by_name('words') #word_durations = [w for w in word_tier._get_annotations() if w.text != '-'] # use this instead of the next snippet if you remove '-' from the vocabulary. Atm '-' is mapped to 'min@s' word_durations = [] dash_intervals = [] for w in word_tier._get_annotations(): if w.text == '-': dash_intervals.append(w) else: word_durations.append(w) for dash in dash_intervals: # Here we delete all the phone annotation that are read out as "minus", if you don't u mess up the alignment phones_tier.delete_annotations_between_timepoints( dash.start_time, dash.end_time, left_overlap=False, right_overlap=False) phone_durations = [ p for p in phones_tier._get_annotations() if p.text != 'sil' ] # here we gather the phone durations following the same format as pos_phone_seq, i.e. [[ph_dur1, ph_dur2, etc.], [ph_dur1, ph_dur2, etc.], etc] #print([j for i in stress_phone_seq for j in i]) #print([i.text for i in phone_durations]) l = [] k = -1 for i in range(0, len(stress_phone_seq)): m = [] for j in range(0, len(stress_phone_seq[i])): k += 1 m.append(phone_durations[k]) l.append(m) # here we go thru this list ([[ph_dur1, ph_dur2, etc.], [ph_dur1, ph_dur2, etc.], etc]) and we keep the first and the last duration of every syllable syl_durations = [(syl[0].start_time, syl[-1].end_time) for syl in l] syllable_tier = tgt.IntervalTier() syllable_tier.name = 'syllables' syllable_tier.start_time = phones_tier.start_time syllable_tier.end_time = phones_tier.end_time syllable_intervals = [ tgt.Interval(syl_durations[i][0], syl_durations[i][1], str(stress_seq[i][0])) for i in range(0, len(syl_durations)) ] syllable_tier.add_annotations(syllable_intervals) for phone in phones_tier: phone.text = phone.text.replace('Q', '@@').replace('ts', 't').replace( 'sp', 'sil') vowels = [ '@', '@@', 'a', 'aa', 'ai', 'au', 'e', 'e@', 'ei', 'i', 'i@', 'ii', 'o', 'oi', 'oo', 'ou', 'u', 'u@', 'uh', 'uu' ] for phone in phones_tier: if phone.text in vowels: phone_centre = phone.start_time + (phone.end_time - phone.start_time) / 2 phone.text = phone.text + syllable_tier.get_annotations_by_time( phone_centre)[0].text # For now we generate the modified TextGrids in the same folder is the old ones. Later, sent the new files into a new folder newTitle = os.path.join(annotations_path, title + '.TextGrid') tgt.write_to_file(tg, newTitle, format='short') except: pass
def segment_speech(infile, outfile, wavfile, channel, filtertiername, shiftonset, shiftoffset, denoise, trainbegin, trainwindow, speechthresh, snradd): logging.info("Segmenting speech in %s" % wavfile) duration, _ = util.get_wav_duration(wavfile) tg, tier = util.init_textgrid(infile, duration, "seg.speech") logging.info("Floor estimation...") _, intensities = segment_speech_praat(wavfile, channel, denoise=denoise, trainbegin=trainbegin, trainwindow=trainwindow) silencelevel = find_silence_level(intensities, trainwindow) + snradd logging.info("estimated floor noise level: %s" % silencelevel) logging.info("Segmentation...") speech_chunks, intensities = segment_speech_praat(wavfile, channel, threshold=silencelevel, denoise=denoise, trainbegin=trainbegin, trainwindow=trainwindow) for iv in speech_chunks: tier.add_annotation(iv) dbvalues = [x.as_db - silencelevel for x in tier] dbfilterthreshold = silencelevel + (sum(dbvalues) / len(dbvalues) * speechthresh) logging.info("speech filtering threshold: %s" % dbfilterthreshold) logging.info("vad segments: %s" % len(tier)) if filtertiername is None: filtertier = tgt.IntervalTier() filtertier.add_annotation( tgt.Annotation(tier.start_time, tier.end_time, "speech")) filtertiername = "<all>" else: filtertier = tg.get_tier_by_name(filtertiername) resulttier = tgt.IntervalTier(name="seg.speech") speechsegments = [s for s in filtertier if s.text == "speech"] logging.info("expected speech segments: %s" % len(speechsegments)) stats_filtered = 0 stats_all = 0 for speechseg in speechsegments: speechivs = tier.get_annotations_between_timepoints( speechseg.start_time, speechseg.end_time) if len(speechivs) == 0: speechivs = tier.get_annotations_between_timepoints( speechseg.start_time, speechseg.end_time, left_overlap=True, right_overlap=True) if len(speechivs) > 0: logging.warning( "Speech segments overlap with the boundaries of %s in %s. " "VAD problem? Shortening..." % (speechseg, filtertiername)) for siv in speechivs: siv.start_time = max(speechseg.start_time, siv.start_time) siv.end_time = min(speechseg.end_time, siv.end_time) if len(speechivs) == 0: logging.warning("No speech segments in %s overlap with %s" % (filtertiername, speechseg)) continue dbfilteredivs = tgt.IntervalTier() for siv in [x for x in speechivs if x.as_db > dbfilterthreshold]: dbfilteredivs.add_annotation(siv) stats_filtered += len(speechivs) - len(dbfilteredivs) stats_all += len(speechivs) if len(dbfilteredivs) == 0: logging.warning( "All speech segments in %s dropped since their energy is below %.2f" % (speechseg, dbfilterthreshold)) continue start_time = min([x.start_time for x in dbfilteredivs]) end_time = max([x.end_time for x in dbfilteredivs]) resulttier.add_annotation( tgt.Interval(start_time + shiftonset, end_time + shiftoffset, "speech")) assert stats_all > 0, "VAD was unable segment speech. Check silence region: calculated threshold %.2f db. " \ "Speech threshold: %.2f db." % (silencelevel, dbfilterthreshold) tier = resulttier logging.info( "Dropped %d of %d speech segments (%.2f%%) with energy below %.2f db" % (stats_filtered, stats_all, (stats_filtered / stats_all * 100), dbfilterthreshold)) tier.name = "seg.speech" tg.add_tier(tier) logging.info("Writing %s" % outfile) tgt.io.write_to_file(textgrid=tg, filename=outfile, format="long")
# Usage: python segment_laughter.py <input_audio_file> <stored_model_path> <output_folder> <save_to_textgrid> if __name__ == '__main__': if parse_inputs(): input_path, model_path, output_path, threshold, min_length, save_to_textgrid = parse_inputs( ) min_length = seconds_to_frames(min_length) laughs = laugh_segmenter.segment_laughs(input_path, model_path, output_path, threshold, min_length, save_to_textgrid) print("found %d laughs." % (len(laughs))) if not save_to_textgrid: for laugh in laughs: print(laugh) else: tg = tgt.TextGrid() laughs_tier = tgt.IntervalTier( name='laughter', objects=[ tgt.Interval(l['start'], l['end'], 'laugh') for l in laughs ]) tg.add_tier(laughs_tier) fname = os.path.splitext(os.path.basename(input_path))[0] tgt.write_to_file( tg, os.path.join(output_path, fname + '_laughter.TextGrid')) print('Saved laughter segments in {}'.format( os.path.join(output_path, fname + '_laughter.TextGrid')))
def add_syllables(title, input_path, syllabification_file_path, output_path): # Load language syllable structure for the syllabifier with open(syllabification_file_path) as f: language_syllables = json.load(f) # Load the textgrid tg = tgt.read_textgrid(os.path.join(input_path,title+'.TextGrid')) # Load name of all tiers tier_names = tg.get_tier_names() # Select a tier whose name contains 'words' words_tier_name = [name for name in tier_names if 'words' in name][0] words_tier = tg.get_tier_by_name(words_tier_name) # Select a tier whose name contains 'phones' phones_tier_name = [name for name in tier_names if 'phones' in name][0] phones_tier = tg.get_tier_by_name(phones_tier_name) # Start an empty tier for syllables syllable_tier = tgt.IntervalTier() syll_tier_name = [name for name in tier_names if 'words' in name][0].replace('words', 'sylls') syllable_tier.name = syll_tier_name # Syllabify one word at a time for w in words_tier._get_annotations(): # For the current word, get all of its phones phs = phones_tier.get_annotations_between_timepoints(w.start_time, w.end_time) for ph in phs: if ph.text == 'spn': ph.text = 'aa1' # Transform the string of phones into a string of syllables # Format: ph1 ph2 . ph3 ph4 ph5 . ph6 etc. s = stringify(syllabify(' '.join([ph.text for ph in phs]), language_syllables)) # From string of syllables to a nested lists of phone indeces # Format: [[ph1_idx, ph2_idx, etc.], [ph3_idx, ph4_idx, etc.], etc.] sylls = [syll.split() for syll in s.split('.')] i = 0 sylls_indeces = [] for j, syll in enumerate(sylls): syll_indeces = [] for k in range(0, len(syll)): syll_indeces.append(int(i)) i += 1 sylls_indeces.append(syll_indeces) # Extract the relevant intervals using the indeces sylls_intervals = [[phs[index] for index in ph_group] for ph_group in sylls_indeces] # Extract the stress for each syllable: # Format: [['0'], ['1'], etc.] sylls_stresses = [[char for char in ''.join(ph_group) if char.isdigit()==True] for ph_group in sylls] sylls_stresses = [ph_group if ph_group != [] else ['0'] for ph_group in sylls_stresses] #print(w) #print(sylls_indeces) #print(sylls_stresses) #print(sylls_intervals) syllable_intervals = [tgt.Interval(interval[0].start_time, interval[-1].end_time, str(sylls_stresses[i][0])) for i, interval in enumerate(sylls_intervals)] #print(syllable_intervals) syllable_tier.add_annotations(syllable_intervals) tg.add_tier(syllable_tier) tgt.write_to_file(tg, os.path.join(output_path,title+'.TextGrid'), format='short')
def duration(path, C_list, V_list, mode, cid): # path="E:\coorpus" # path_cn = 'F:\SAITCorpus\CN' # path = 'F:\SAITCorpus' #if mode = # 传递 参数 #if len(path) < 14: # 判断是否 是 计算国家 / 发音人( 路径结构略不一样) if mode == 'country': file_list = glob.glob( path + r"\*\sent\*.TextGrid") # glob匹配所有的符合条件的文件,并将以list的形式返回 国家 elif mode == 'spker': file_list = glob.glob( path + r"\sent\*.TextGrid") # glob匹配所有的符合条件的文件,并将以list的形式返回 国家 #else: #ditto #file_list=glob.glob(path + r"\sent\*.TextGrid") # 发音人; # print('filename',',', '%V\t',',', 'deltaC\t', ',', 'deltaV\t') # print(file_list) AlldeltC = [] # 依次计算 每一个 textgrid 的 结果值 把结果存在 总列表中 AlldeltV = [] all_vc = [] all_vv = [] all_rpvic = [] all_rpviv = [] # 可能有点问题 每次累加进去 一个值 但是 不清空 all_npvic = [] all_npviv = [] all_pctV = [] for file in file_list: TextGrid = tgt.read_textgrid( file, include_empty_intervals=True) # 依次读取TextGrid文件 if cid == 'cn': tier = TextGrid.get_tier_by_name(TextGrid.get_tier_names()[2]) #print(tier) elif cid == 'jp': tier = TextGrid.get_tier_by_name(TextGrid.get_tier_names()[1]) elif cid == 'ru': tier = TextGrid.get_tier_by_name(TextGrid.get_tier_names()[2]) #tier_syll = TextGrid.get_tier_by_name(TextGrid.get_tier_names()[1]) #根据 tier的 name/位置 读取 intervals #tier = TextGrid.get_tier_by_name('SY') tier_name = TextGrid.get_tier_names() # 获取全部的tier 名字 start = tier.start_time end = tier.end_time tier2insert = tgt.IntervalTier( start, end, name='CV') # 获取起始点和 终点 插入一条 CV的 intervals TextGrid.insert_tier(tier2insert, 3) CV = TextGrid.get_tier_by_name('CV') annotation = tier.intervals # 插入一个 intervals num = [] C_duration = [] # 每一个 intervals 的 时长信息 表 V_duration = [] duration_all_C = 0 # 全部时长和 (用于计算 %V 和 其他相关参数) duration_all_V = 0 for i in range(len(annotation)): # 循环 替换 和 计算 时长 old_name = annotation[i].text old_start_time = annotation[i].start_time old_end_time = annotation[i].end_time duration = old_end_time - old_start_time if old_name in C_list: # 判断 属于 C / V new_name = 'C' elif old_name in V_list: # 判断 属于 C / V new_name = 'V' else: new_name = 'none' Interval = tgt.Interval(old_start_time, old_end_time, text=new_name) # interval格式- 依次填写 # print(old_name, new_name, 'duration=', duration) if new_name == 'C': C_duration.append(duration) #加入 duration 的list duration_all_C = duration_all_C + duration elif new_name == 'V': V_duration.append(duration) duration_all_V += duration CV.add_interval(Interval) # 将 intervals 的标注 >> 到 textgrid a = duration_all_V + duration_all_C # 句子时长(去除 sli) pctV = duration_all_V / a #mean_syl = a / (len(C_duration) + len(V_duration)) # 计算一个 mean_syllable duration 用于 语速 #print(mean_syl) #print(len(C_duration)) vacC = duration_all_C / len(C_duration) vacV = duration_all_V / len(V_duration) #vacroC = round(deltaC(C_duration) / vacC * 100, 4) #vacroV = round(deltaC(V_duration) / vacV * 100, 4) vacroC = deltaC(C_duration) / vacC * 100 vacroV = deltaC(V_duration) / vacV * 100 # print(file, ',', # # deltaC(C_duration), ',', # deltaV(V_duration), ',', # # vacroC, ',', # vacroV, ',', # # rPVI_c(C_duration), ',', # rPVI_V(V_duration), ',', # # nPVI_C(C_duration), ',', # nPVI_V(V_duration), ',', # # ) AlldeltC.append(deltaC(C_duration)) AlldeltV.append(deltaV(V_duration)) all_vc.append(vacroC) all_vv.append(vacroV) all_rpvic.append(rPVI_c(C_duration)) all_rpviv.append(rPVI_V(V_duration)) all_npvic.append(nPVI_C(C_duration)) all_npviv.append(nPVI_V(V_duration)) all_pctV.append(pctV) deltC = round(np.mean(AlldeltC), 9) deltV = round(np.mean(AlldeltV), 9) vc = round(np.mean(all_vc), 9) vv = round(np.mean(all_vv), 9) rpvic = round(np.mean(all_rpvic), 9) rpviv = round(np.mean(all_rpviv), 9) npvic = round(np.mean(all_npvic), 9) npviv = round(np.mean(all_npviv), 9) perctV = round(np.mean(all_pctV), 9) print(path, ',', perctV, ',', deltC, ',', deltV, ',', vc, ',', vv, ',', rpvic, ',', rpviv, ',', npvic, ',', npviv, ',')
def metircs(path): C_list = [] V_list = [] file = open(r'C:\Users\GIGABYTE\Desktop\VC_classification.txt') with file as f: lines = f.readlines() C_list = lines = [ line.rstrip('\n\t') for line in open(r'C:\Users\GIGABYTE\Desktop\VC_classification.txt') ] file1 = open(r'C:\Users\GIGABYTE\Desktop\All_V.txt') with file1 as f1: lines1 = f1.readlines() V_list = lines1 = [ line.rstrip('\n\t') for line in open(r'C:\Users\GIGABYTE\Desktop\All_V.txt') ] #切分 VC 列表 print(V_list, C_list) # path="E:\coorpus" # path_cn = 'F:\SAITCorpus\CN' # path = 'F:\SAITCorpus' file_list = glob.glob( path + r"\*\sent\*.TextGrid") #glob匹配所有的符合条件的文件,并将以list的形式返回 file_list = glob.glob(path + r"\sent\*.TextGrid") #glob匹配所有的符合条件的文件,并将以list的形式返回 print('filename\t', '%V\t', 'deltaC\t', 'deltaV\t') for file in file_list: TextGrid = tgt.read_textgrid( file, include_empty_intervals=True) #读取TextGrid文件 #tier = TextGrid.get_tier_by_name(TextGrid.get_tier_names()[2]) tier = TextGrid.get_tier_by_name(TextGrid.get_tier_names()[1]) tier_name = TextGrid.get_tier_names() # 获取全部的tier 名字 start = tier.start_time end = tier.end_time tier2insert = tgt.IntervalTier(start, end, name='CV') TextGrid.insert_tier(tier2insert, 3) CV = TextGrid.get_tier_by_name('CV') annotation = tier.intervals AlldeltC = [] AlldeltV = [] C_duration = [] V_duration = [] duration_all_C = 0 duration_all_V = 0 for i in range(len(annotation)): old_name = annotation[i].text old_start_time = annotation[i].start_time old_end_time = annotation[i].end_time duration = old_end_time - old_start_time if old_name in C_list: new_name = 'C' elif old_name in V_list: new_name = 'V' else: new_name = 'none' #print(old_name, new_name) Interval = tgt.Interval(old_start_time, old_end_time, text=new_name) #interval格式 #print(old_name, new_name, 'duration=', duration) if new_name == 'C': C_duration.append(duration) duration_all_C = duration_all_C + duration elif new_name == 'V': V_duration.append(duration) duration_all_V += duration CV.add_interval(Interval) #print(V_duration) #print('%V value = ', duration_all_V / (duration_all_V + duration_all_C)) V = duration_all_V / (duration_all_V + duration_all_C) deltaC = statistics.stdev(C_duration) # 取标准差 使用 python statistic 模块 deltaV = statistics.stdev(V_duration) # ditto #print(deltaV) a = round(V * 100, 2) b = round(deltaC * 100, 2) c = round(deltaV * 100, 2) #print(file, ',', a, ',', b, ',', c) AlldeltC.append(deltaC) AlldeltV.append(deltaV) C = np.mean(AlldeltC) V = np.mean(AlldeltV) #print(file, '\t', C, '\t', V) C_ALL = [] C_ALL.append(C) V_ALL = [] V_ALL.append(V) print(np.mean(C_ALL), np.mean(V_ALL)) print('all set!')