def main(): # Parse the command-line arguments. args = parse_arguments() tg_path = args['tg_path'] offset_start = args['offset_start'] offset_end = args['offset_end'] outpath = args['outpath'] # Read the TextGrid tg = tgt.read_textgrid(tg_path) tg_part = tgt.TextGrid() if offset_start is None and offset_end is None: raise Exception( 'At least one of offset_start and offset_end must be specified.') elif offset_start is None: offset_start = tg.start_time elif offset_end is None: offset_end = tg.end_time for tr in tg: intr_part = tr.get_annotations_between_timepoints( offset_start, offset_end) tier_part = tgt.IntervalTier(name=tr.name, start_time=tr.start_time, end_time=tr.end_time, objects=intr_part) tg_part.add_tier(tier_part) if outpath is None: tg_dirname, tg_filename = os.path.split(tg_path) outpath = os.path.splitext(tg_filename)[0] + '_part.TextGrid' tgt.write_to_file(tg_part, outpath)
def parse_grid(grid_path): ''' note : parse a textgrid and then split the wav matches the textgrid arg : grid_path : textgrid path to parse ''' dir_path = os.path.dirname(grid_path) grid_filename = os.path.basename(grid_path) name = os.path.splitext(grid_filename)[0] wav_path = os.path.join(dir_path, name + ".wav") target_tier = "comma" tg_obj = tgt.read_textgrid(grid_path) # get objects(textgrid-tier & wav) matches the grid path tier_obj = tg_obj.get_tier_by_name(target_tier) wav_obj, sr = librosa.load(wav_path, sr=None) for idx in range(len(tier_obj)): part = tier_obj[idx] time_s = librosa.time_to_samples(part.start_time, sr) time_e = librosa.time_to_samples(part.end_time, sr) librosa.output.write_wav('{}_{}.wav'.format(name, idx), wav_obj[time_s:time_e], sr) with open("{}_{}.txt".format(name, idx), "w") as f: f.write(part.text)
def add_lemmas(title, input1_path, output_path): # Load textgrid tg = tgt.read_textgrid(os.path.join(input1_path, title + '.TextGrid')) tier_names = tg.get_tier_names() # Load pos tier pos_tier_name = [name for name in tier_names if 'pos' in name][0] pos_tier = tg.get_tier_by_name(pos_tier_name) # Load words tier words_tier_name = [name for name in tier_names if 'words' in name][0] words_tier = tg.get_tier_by_name(words_tier_name) # Start empty lemmas tier lemmas_tier = tgt.IntervalTier() lemmas_tier_name = [name for name in tier_names if 'words' in name][0].replace('words', 'lemmas') lemmas_tier.name = lemmas_tier_name # Generate lemma intervals lemmas_intervals = [ tgt.Interval(w_interval.start_time, w_interval.end_time, lemmatize_word(w_interval.text, pos_tier[i].text)) for i, w_interval in enumerate(words_tier) ] # Add lemmas to tier lemmas_tier.add_annotations(lemmas_intervals) tg.add_tier(lemmas_tier) tgt.write_to_file(tg, os.path.join(output_path, title + '.TextGrid'), format='short')
def get_textgrid_sa(mfa_file, merge_shorter=0.15, pause_tokens=[""]): read_textgrid = tgt.read_textgrid(mfa_file, include_empty_intervals=False) [words, start_time, end_time] = read_word_alignment(read_textgrid) assert len(words) == len(start_time) == len(end_time) stack = [] out_words = [] for i in range(len(words)): if words[i] in pause_tokens: # pause skip continue if stack: if start_time[i] - stack[-1][-1] > merge_shorter: # determine how much long is the pause out_words.append([words[i]]) stack.append([start_time[i], end_time[i]]) else: stack[-1][-1] = end_time[i] out_words[-1].append(words[i]) else: stack.append([start_time[i], end_time[i]]) out_words.append([words[i]]) return stack, out_words
def generate_punctuation_dictionary(titles, input_path, output_path): punctuation_dict = {} # Gather bp intervals first for title in titles: # Load the textgrid tg = tgt.read_textgrid(os.path.join(input_path, title + '.TextGrid')) # Load name of all tiers tier_names = tg.get_tier_names() # Select a tier whose name contains 'bp' bp_tier_name = [name for name in tier_names if 'bp' in name][0] bp_tier = tg.get_tier_by_name(bp_tier_name) # Tally up all the pos in the textgrids for interval in bp_tier: if interval.text not in punctuation_dict.keys(): punctuation_dict[interval.text] = 1 else: punctuation_dict[interval.text] += 1 # Select a tier whose name contains 'fp' fp_tier_name = [name for name in tier_names if 'fp' in name][0] fp_tier = tg.get_tier_by_name(fp_tier_name) # Tally up all the pos in the textgrids for interval in fp_tier: if interval.text not in punctuation_dict.keys(): punctuation_dict[interval.text] = 1 else: punctuation_dict[interval.text] += 1 punct_tuples = sorted(punctuation_dict.items(), key=lambda x: x[1], reverse=True) # 0.8 means that of all the frequencies we keep the highest 80% (for stimuli this was 100%) # excluding the lowest frequency words might help make the model more robust punct_freqs = sorted(set([punct_tuple[1] for punct_tuple in punct_tuples]), reverse=True) punct_freqs = punct_freqs[:math.floor(len(punct_freqs) * 0.8)] punct_list = [ punct_tuple[0] for punct_tuple in punct_tuples if punct_tuple[1] in punct_freqs ] # Extract the pos found in the corpus add the unk tag punct_list = ['<unk>'] + punct_list # Convert the list into hot vector dictionary hv_dict = {} for j, v in enumerate(punct_list): hv_dict[v] = [int(i) for i in np.eye(len(punct_list), dtype=int)[j]] with open(os.path.join(output_path, 'punctuation_dictionary.json'), 'w') as f: json.dump(hv_dict, f)
def segment_phonemes(whichFold): ''' for each phoneme: extract the audio segments from many audio files and concatentate it in a wav file then one can extract features from that audio (see extractFeatures script) all_audio_seg: the concatenated audio ''' annoFiles = get_list_anno_files(AUDIO_REC_IDS, whichFold) for annoFile in annoFiles: # loop in all annotation files all_audio_seg = [np.empty([0, 0], dtype='int16')] * len( PHONEMELIST ) # chunks of concatenated curr_file_audio for each phonemes audioFile = annoFile.replace('.TextGrid', '.wav') fs, curr_file_audio = scipy.io.wavfile.read( os.path.join(audioPath, audioFile)) # get 1 channel if np.shape(curr_file_audio)[-1] == 2: curr_file_audio = curr_file_audio[:, 0] print audioFile, fs, curr_file_audio.shape ######### read phoneme annotations tgfile = tgt.read_textgrid(os.path.join(annoPath, annoFile)) tgfile.get_tier_names() tier_phonemes = tgfile.get_tier_by_name("phonemes") all_audio_seg = concat_phoneme_audio(tier_phonemes, curr_file_audio, all_audio_seg, fs) ## write the all_audio_seg for each phoneme to file write_audio_to_file(all_audio_seg)
def _get_label_line(self, path): ''' Returns aligned labels list where each element corresponds to phone on audio on i-th timestep. By the paper setting, timestep is 0.01sec ''' get_textgrid_path = lambda p: p[:-len('flac')] + 'TextGrid' textgrid_path = get_textgrid_path(path) textgrid = tgt.read_textgrid(self.root + textgrid_path) tg_len = textgrid.end_time - textgrid.start_time labels = [0 for x in range(int(tg_len * 100))] time_cur = textgrid.start_time tiers = textgrid.tiers[1] cur_tier = tiers[0] cur_tier_i = 0 for i, lab in enumerate(labels): labels[i] = self.phone2ix[cur_tier.text] time_cur += 0.01 if cur_tier.end_time < time_cur and cur_tier_i+1 < len(tiers): cur_tier_i += 1 cur_tier = tiers[cur_tier_i] return tuple(labels) # tuple is more memory efficient
def add_onsets_rhymes(title, input_path, output_path): # Load the textgrid tg = tgt.read_textgrid(os.path.join(input_path, title + '.TextGrid')) # Load name of all tiers tier_names = tg.get_tier_names() # Select a tier whose name contains 'syllables' sylls_tier_name = [name for name in tier_names if 'sylls' in name][0] sylls_tier = tg.get_tier_by_name(sylls_tier_name) # Select a tier whose name contains 'phones' phones_tier_name = [name for name in tier_names if 'phones' in name][0] phones_tier = tg.get_tier_by_name(phones_tier_name) # Start an empty tier for onset-rhymes onset_rhyme_tier = tgt.IntervalTier() onset_rhyme_tier_name = [name for name in tier_names if 'words' in name][0].replace('words', 'OR') onset_rhyme_tier.name = onset_rhyme_tier_name onset_rhyme_intervals = [] for syll in sylls_tier._get_annotations(): #print(syll) phs = phones_tier.get_annotations_between_timepoints( syll.start_time, syll.end_time) nucleus_index = calculate_nucleus_index(phs) # If the first phone contains a number then it means the whole syll has no onset, so we only add a rhyme if nucleus_index == 0: onset_rhyme_intervals.append( tgt.Interval(syll.start_time, syll.end_time, 'R')) # If the onset is present add onset and rhyme intervals else: onset_rhyme_intervals.append( tgt.Interval(syll.start_time, phs[nucleus_index - 1].end_time, 'O')) onset_rhyme_intervals.append( tgt.Interval(phs[nucleus_index].start_time, syll.end_time, 'R')) # Add all the intervals to the onset rhyme tier onset_rhyme_tier.add_annotations(onset_rhyme_intervals) # Add the onset rhyme tier to the TextGrid tg.add_tier(onset_rhyme_tier) # Move syll tier after the onset_rhyme_tier tg.delete_tier(sylls_tier_name) tg.add_tier(sylls_tier) tgt.write_to_file(tg, os.path.join(output_path, title + '.TextGrid'), format='short')
def read_in_single_textgrid(file_name): #file_name = os.path.join('/home/zx/Dolphin/Data/textgrid/test/20191011/zcz/1616893', '37429442330632_2019_09_18_01_45_11.TextGrid') tg_data = tgt.read_textgrid( file_name) # read a Praat TextGrid file and return a TextGrid object # print(tg_data) tier_names = tg_data.get_tier_names() # get names of all tiers # print (tier_names) return tg_data
def extract_f0_timepoints(title, input_path, output_path, f0_sr): tg = tgt.read_textgrid(os.path.join(input_path,title+'.TextGrid')) # Original recording's sampling rate audio_sr = 0.005 # Load name of all tiers tier_names = tg.get_tier_names() # Select a tier whose name contains 'sylls' syllables_tier_name = [name for name in tier_names if 'sylls' in name][0] syllables_tier = tg.get_tier_by_name(syllables_tier_name) # In this list we collect the timepoints where we sample the f0 f0_timepoints = [] # Sampling is syllable based, so we loop over each syllable for interval in syllables_tier: # add the first time of the syllable to the list interval_start = interval.start_time f0_timepoints.append(float(interval_start)) # plot the syllable boundary #plt.axvline(x=interval_start, color='orange', linewidth=1) # How many times the default sampling rate approx. fits into this interval n_extractions = round(interval.duration()/f0_sr) # This happens if the interval is shorter than the sampling rate, so we just keep the first value for this interval if n_extractions == 0: pass else: # Based on the N of extractions calculate a new sampling rate, which is specific to this interval sampling_step = interval.duration()/(n_extractions) # Extract based on the new sampling rate # The loop is for n_extractions-1 times because the last point is the first point of the next interval for j in range(0, n_extractions-1): interval_start += sampling_step f0_timepoints.append(interval_start) # Plot extraction points between boundaries #plt.axvline(x=interval_start, color='gray', linewidth=0.5) # Add the very last time of the last syllable interval_start = float(syllables_tier[-1].end_time) f0_timepoints.append(interval_start) # Plot the last point of the last syllable #plt.axvline(x=interval_start, color='orange', linewidth=1) #plt.show() with open(os.path.join(output_path,title+'.json'), 'w') as f: json.dump(f0_timepoints, f)
def get_textgrid_intervals(tgfilepath): #print(tgfilepath) try: tg = tgt.read_textgrid(tgfilepath) except: print('Missing textgrid file') return False tiers = tg.get_tier_by_name("words") return tiers.intervals
def child_speech_detector(mom_puzzle_textgrid, v): """ Detects child speech segments by: filter out manually annotated mom speech segments compute child speech band energy per frame, set threhold of being voiced or not apply median filter to the results of being voiced or not per frame if two voiced frames are seperated by only 1 min, merge return time intervals of detected child speech Args: mom_puzzle_textgrid: manually annotated mom speech textgrids v: VoiceActivityDetector object Returns: speech_time: the time intervals of detected child speech segments mom_tier: manually annotated mom speech textgrid tier """ speech_time = {} data = v.data tg = tgt.read_textgrid(mom_puzzle_textgrid) mom_tier = tg.get_tier_by_name('Mother') child_tier = tg.get_tier_by_name('Child') for i in range(len(mom_tier)): end_sample = int(round(mom_tier[i].end_time * v.rate)) if i == len(mom_tier) - 1: start_sample = len(data) else: start_sample = int(round(mom_tier[i + 1].start_time * v.rate)) v.data = data[end_sample:start_sample] detected_windows = v.detect_speech() for sample_start, speech in detected_windows: sample_start, speech = int(sample_start), int(speech) sample_start += end_sample if speech: # extend if two intervals speperate by only 1min region_start_time = sample_start * 1.0 / v.rate region_start_time = float( "{0:.2f}".format(region_start_time)) # round to 2 floats if len(speech_time): largest_time_prev = max(speech_time.keys()) if region_start_time - 1 <= largest_time_prev: speech_time[region_start_time] = speech_time[ largest_time_prev] del speech_time[largest_time_prev] else: speech_time[region_start_time] = region_start_time else: speech_time[region_start_time] = region_start_time speech_time = sorted(speech_time.items(), key=operator.itemgetter(1)) return speech_time, mom_tier
def extract_linguistic_labels(title, input1_path, input2_path, output_path): # Load the f0_timepoints with open(os.path.join(input1_path, title + '.json')) as f: f0_timepoints = json.load(f) # Load textgrid tg = tgt.read_textgrid(os.path.join(input2_path, title + '.TextGrid')) tier_names = tg.get_tier_names() pos_tier_name = [name for name in tier_names if 'pos' in name][0] # Extract features from TextGrid, except for phones labels = [] for t in f0_timepoints: labels_sublist = [] for tier in tier_names: if 'phones' not in tier and 'words' not in tier: label = tg.get_tier_by_name(tier).get_annotations_by_time( t)[0].text labels_sublist.append(label) labels.append(labels_sublist) # Extract syllable boundaries syllables_tier_name = [name for name in tier_names if 'sylls' in name][0] syllables_tier = tg.get_tier_by_name(syllables_tier_name) syllable_boundary_times = [syllables_tier[0].start_time] + [ interval.end_time for interval in syllables_tier ] syllable_boudaries = [['1'] if t in syllable_boundary_times else ['0'] for t in f0_timepoints] # Extract word boundaries wores_tier_name = [name for name in tier_names if 'words' in name][0] words_tier = tg.get_tier_by_name(wores_tier_name) word_boundary_times = [words_tier[0].start_time ] + [interval.end_time for interval in words_tier] word_boudaries = [['1'] if t in word_boundary_times else ['0'] for t in f0_timepoints] syllable_word_boundaries = [ syllable_boudaries[i] + word_boudaries[i] for i in range(len(syllable_boudaries)) ] labels = [ syllable_word_boundaries[i] + labels[i] for i in range(len(labels)) ] with open(os.path.join(output_path, title + '.json'), 'w') as f: json.dump(labels, f)
def main(): ap = argparse.ArgumentParser() ap.add_argument( 'shift', help='offset by which to shift the boundaries (positive or negative)', type=float) ap.add_argument( 'file', help='the textgrid file', type=str) ap.add_argument( '-e', '--encoding', help='file encoding (default "utf-8")', default='utf-8', type=str) ap.add_argument( '-f', '--format', help='the output format (default "short")', default='short', type=str) ap.add_argument( '-o', '--outfile', help='the output file (defaults to inputfile.shifted.Extension)', type=str) arguments = ap.parse_args() # Read file try: tg = tgt.read_textgrid( filename=arguments.file, encoding=arguments.encoding) except IOError: print('An error occurred reading file {file}'. format(file=arguments.file)) sys.exit(1) # Create new textgrid if arguments.outfile is None: basename, extension = os.path.splitext(arguments.file) output_filename = basename + '.shifted' + extension else: output_filename = arguments.outfile tg_shifted = tgt.TextGrid(filename=output_filename) # Shift boundaries for tier in tg: ts = tgt.util.shift_boundaries(tier, arguments.shift, 0) tg_shifted.add_tier(ts) # Write file tgt.write_to_file( textgrid=tg_shifted, filename=tg_shifted.filename, format=arguments.format, encoding=arguments.encoding)
def generate_lemmas_dictionary(titles, input1_path, input2_path, output_path): # Load a control wordlist of the most frequent words in the eng language wl = load_wordlist(input2_path) # Collect all of the lemmas from the textgrids in only keep the ones in the control wordlist # We use the list because we don't want to learn frequent but biased words such as character names or words that are genre-related lemmas_dict = {} for title in titles: # Load the textgrid tg = tgt.read_textgrid(os.path.join(input1_path, title + '.TextGrid')) # Load name of all tiers tier_names = tg.get_tier_names() # Select a tier whose name contains 'lemmas' lemmas_tier_name = [name for name in tier_names if 'lemmas' in name][0] lemmas_tier = tg.get_tier_by_name(lemmas_tier_name) # Tally up all the lemmas in the textgrids for interval in lemmas_tier: if interval.text not in lemmas_dict.keys(): lemmas_dict[interval.text] = 1 else: lemmas_dict[interval.text] += 1 lemmas_tuples = sorted(lemmas_dict.items(), key=lambda x: x[1], reverse=True) # 0.8 means that of all the frequencies we keep the highest 80% (for stimuli this was 100%) # excluding the lowest frequency words might help make the model more robust lemmas_freqs = sorted(set( [lemmas_tuple[1] for lemmas_tuple in lemmas_tuples]), reverse=True) lemmas_freqs = lemmas_freqs[:math.floor(len(lemmas_freqs) * 0.8)] lemmas_list = [ lemmas_tuple[0] for lemmas_tuple in lemmas_tuples if lemmas_tuple[1] in lemmas_freqs ] lemmas_list = ['<unk>'] + [w for w in wl if w in lemmas_list] # Convert the list into hot vector dictionary hv_dict = {} for j, v in enumerate(lemmas_list): hv_dict[v] = [int(i) for i in np.eye(len(lemmas_list), dtype=int)[j]] with open(os.path.join(output_path, 'lemmas_dictionary.json'), 'w') as f: json.dump(hv_dict, f)
def load_textgrid(file_name, tier_name='phone'): '''Load textgrid & return times and labels''' tg = tgt.read_textgrid(file_name) tier = tg.get_tier_by_name(tier_name) times = [] labels = [] for t in tier: times.append([round(t.start_time, 4), round(t.end_time, 4)]) labels.append(t.text) assert len(times) > 0, f'"times" is empty: len={len(times)}' assert len(labels) > 0, f'"{tier_name}" is empty: len={len(labels)}' return np.array(times, dtype='float32'), labels
def f1(predictions, grid_file, error=(0, 0)): #Testing(grid_file,cre4,3200*0.256,pred,3200,for_pos) vals = Testing(tgt.read_textgrid(grid_file), 4, 32000 * 0.256, pickle.load(open(predictions, 'rb')), 32000, for_pos=False) false_pos, true_pos, merged_pos = find( Testing(tgt.read_textgrid(grid_file), 4, 32000 * 0.256, pickle.load(open(predictions, 'rb')), 32000, True), error) if error != (0, 0): vals = (delete_vals(vals[0], merged_pos), vals[1]) false_neg, true_neg = find(vals, (0, 0), for_pos=False) #false_neg = true_neg = 1 print(false_pos, true_pos, false_neg, true_neg) precision = true_pos / (true_pos + false_pos) recall = true_pos / (true_pos + false_neg) print(precision, recall) score = 2 * (precision * recall / (precision + recall)) print(score) return merged_pos
def main(): ap = argparse.ArgumentParser() ap.add_argument( 'shift', help='offset by which to shift the boundaries (positive or negative)', type=float) ap.add_argument('file', help='the textgrid file', type=str) ap.add_argument('-e', '--encoding', help='file encoding (default "utf-8")', default='utf-8', type=str) ap.add_argument('-f', '--format', help='the output format (default "short")', default='short', type=str) ap.add_argument( '-o', '--outfile', help='the output file (defaults to inputfile.shifted.Extension)', type=str) arguments = ap.parse_args() # Read file try: tg = tgt.read_textgrid(filename=arguments.file, encoding=arguments.encoding) except IOError: print('An error occurred reading file {file}'.format( file=arguments.file)) sys.exit(1) # Create new textgrid if arguments.outfile is None: basename, extension = os.path.splitext(arguments.file) output_filename = basename + '.shifted' + extension else: output_filename = arguments.outfile tg_shifted = tgt.TextGrid(filename=output_filename) # Shift boundaries for tier in tg: ts = tgt.util.shift_boundaries(tier, arguments.shift, 0) tg_shifted.add_tier(ts) # Write file tgt.write_to_file(textgrid=tg_shifted, filename=tg_shifted.filename, format=arguments.format, encoding=arguments.encoding)
def open_intervalframe_from_textgrid(filepath, encoding='utf-8', asobjects=False, include_empty_intervals=False): """Import a textgrid and return a dict of IntervalFrames. Each tier in the textgrid becomes an IntervalFrame (Pandas DataFrame) The Intervals by default are tokenized into start_time, end_time and text columns. The points (for point tiers) are tokenized into time and mark columns. Arguments: filepath -- Path + filename of the TextGrid file to be imported. Keyword Arguments: asobjects -- If True, then values are intervalobjects (as defined in package tgt, instead of tokenizing into start_time etc. IntervalFrame has only one column with these objects. include_empty_intervals -- If enabled, empty intervals between annotations are also returned encoding -- character encoding to read the textgrid file """ textgrid = tgt.read_textgrid(filepath, encoding, include_empty_intervals) result = {} for tier in textgrid.tiers: if len(tier) > 0: if isinstance(tier, tgt.IntervalTier): frame = pd.DataFrame(tier.intervals, columns=['intervals']) if asobjects == False: frame['start_time'] = frame['intervals'].map(lambda x:\ x.start_time) frame['end_time'] = frame['intervals'].map(lambda x:\ x.end_time) frame['text'] = frame['intervals'].map(lambda x: x.text) del frame['intervals'] elif isinstance(tier, tgt.PointTier): frame = pd.DataFrame(tier.points, columns=['points']) if asobjects == False: frame['time'] = frame['points'].map(lambda x: x.time) frame['mark'] = frame['points'].map(lambda x: x.text) del frame['points'] result[tier.name] = frame return result
def concatenate_textgrids(input_files, encoding): """Concatenate Tiers with matching names. TextGrids are concatenated in the order they are specified. The number and the names of tiers must be the same in each TextGrid.""" # Read all TextGrids into a list. textgrids = [tgt.read_textgrid(path, encoding) for path in input_files] # Check whether the TextGrids have the same number of tiers. ntiers = [len(x) for x in textgrids] assert all([ntiers[0] == x for x in ntiers[1:]]),\ 'TextGrids have different numbers of tiers.' # Check whether tiers in the TextGrids have the same names. tier_names = [sorted(x.get_tier_names()) for x in textgrids] assert all([tier_names[0] == x for x in tier_names[1:]]),\ 'Names of tiers do not match.' tot_duration = 0 tiers = {} # tier_name : tgt.Tier() for textgrid in textgrids: for tier in textgrid.tiers: intervals = [] # If this is the first we see this tier, we just make a copy # of it as it is. if tier.name not in tiers.keys(): tiers[tier.name] = copy.deepcopy(tier) # Otherwise we update the start and end times of intervals # and append them to the first part. else: for interval in tier.intervals: interval.left_bound += tot_duration interval.right_bound += tot_duration intervals.append(interval) tiers[tier.name].add_intervals(intervals) tot_duration += textgrid.end_time() # Create a new TextGrid textgrid_concatenated = tgt.TextGrid() # Add tiers in the order they're found in the first TextGrid. textgrid_concatenated.add_tiers([tiers[x] for x in textgrids[0].get_tier_names()]) return textgrid_concatenated
def segment_textgrid_audio(textgrid_path, tier): directory = os.listdir(textgrid_path) try: os.mkdir(textgrid_path + '/segments') except: print("Path already exists") for f in directory: if '.TextGrid' in f: text_grid = tgt.read_textgrid(os.path.join(textgrid_path, f)) wav = os.path.join(textgrid_path, f.replace('TextGrid', 'wav')) interval_num = 0 for interval in text_grid.tiers[tier].intervals: interval_num +=1 if interval.text != '': start_audio = interval.start_time * 1000 end_audio = interval.end_time * 1000 audio_segment = AudioSegment.from_file(wav) audio_segment = audio_segment[start_audio:end_audio] audio_segment.export(textgrid_path + '/segments/'+f.replace('.TextGrid', '_')+'_'+str(interval_num)+'.wav', format="wav")
def read_textgrid(filename, sample_rate=200): import tgt try: tg = tgt.read_textgrid(filename) #, include_empty_intervals=True) except: print("reading " + filename + " failed") return tiers = [] labs = {} for tier in tg.get_tier_names(): if (tg.get_tier_by_name(tier)).tier_type() != 'IntervalTier': continue tiers.append(tg.get_tier_by_name(tier)) lab = [] for a in tiers[-1].annotations: try: # this was for some past experiment if a.text in ["p1", "p2", "p3", "p4", "p5", "p6", "p7"]: lab[-1][-1] = lab[-1][-1] + "_" + a.text else: #lab.append([a.start_time*sample_rate,a.end_time*sample_rate,a.text.encode('utf-8')]) lab.append([ a.start_time * sample_rate, a.end_time * sample_rate, a.text ]) except: pass #print tiers[-1].encode('latin-1') labs[tier.lower()] = lab try: for i in range(len(labs['prosody'])): if labs['prosody'][i][2][-2:] not in [ "p1", "p2", "p3", "p4", "p5", "p6", "p7" ]: labs['prosody'][i][2] += "_p0" except: pass return labs
def getAmatrix(TextGrid_Directory, CsvDirectory, N): A = np.zeros((N, N)) # pdb.set_trace() for root, dirs, filenames in os.walk(TextGrid_Directory): # pdb.set_trace() for f in filenames: if f == '.DS_Store': continue tg = tgt.read_textgrid(TextGrid_Directory+f) ipu_tier = tg.get_tier_by_name('Key-Child') # pdb.set_trace() prev = map(ipu_tier[0].text) for i in range(1, len(ipu_tier)): cur = map(ipu_tier[i].text) if cur >= N: break A[prev, cur] += 1 prev = cur return A
def getBmatrix_multi(num_label, TextGrid_Directory, TextGridname, CsvDirectory): tg = tgt.read_textgrid(TextGrid_Directory+TextGridname) ipu_tier = tg.get_tier_by_name('Key-Child') filename = TextGridname.split('.')[0] # dictionaries with key of filename, values of softmax probability predictions being that label prob = {} with open(CsvDirectory) as csvfile: reader = csv.reader(csvfile, delimiter=' ', quotechar='|') # prob_filenames = [row[0].split(',')[0] for row in reader] for row in reader: if row[0].split(',')[0] == 'total_filename': continue if num_label == 4: prob[row[0].split(',')[0]] = [float(row[0].split(',')[-4]), float(row[0].split(',')[-3]), float(row[0].split(',')[-2]), float(row[0].split(',')[-1])] else: prob[row[0].split(',')[0]] = [float(row[0].split(',')[-5]), float(row[0].split(',')[-4]), float(row[0].split(',')[-3]), float(row[0].split(',')[-2]), float(row[0].split(',')[-1])] y = [] prob_order = [] # append the probabilities of that segment being different classes in order for seg in ipu_tier: start_time = seg.start_time end_time = seg.end_time annotation = seg.text segment_filename = filename + '-' + str(start_time) + '-' + str(end_time) + '-' + annotation if num_label == 4: if annotation == 'HIC': continue if not segment_filename in prob: continue if segment_filename in prob: prob_order.append(prob[segment_filename]) y.append(map(annotation)) prob_order = np.array(prob_order) res = prob_order.T prediction_orignal = np.argmax(res, 0) accuracy = sum(y == prediction_orignal) * 1.0/len(y) FSCORE = f1_score(y, prediction_orignal, average='macro') # print('original accuracy:',accuracy) return res, accuracy, y, FSCORE
def get_tiernames_from_tgfile(read_file, print_encoding_info=False): encoding_types = [ 'utf-8', 'utf-8-sig', 'utf-16', 'utf-16-le', 'utf-16-be' ] for encoding_type in encoding_types: try: tg = tgt.read_textgrid(read_file, encoding=encoding_type) if type(tg) is tgt.core.TextGrid: if print_encoding_info: print( f'\t\'{basename(read_file)}\' encoding in {encoding_type}.' ) break except: # print(f'WARNING: failed with reading as {encoding_type}') continue try: tier_names = [tier.name for tier in tg.tiers] except: print(f'ERROR: {read_file} not in {encoding_types}') raise return tg, tier_names
def __init__(self, path): # pp = pprint.PrettyPrinter(indent=4) self.generator = MarkovGenerator(n=1, max=3) self.path = path + '/textGrids/' #let's get the text from the textGrids, save the annotations in a dict, key=filename self.annotations = dict() for tgFile in os.listdir(self.path): if tgFile[-9:] == '.TextGrid': #print tgFile tg = tgt.read_textgrid(self.path + tgFile) file_annotations = [i for t in tg.tiers for i in t] for i in range(len(file_annotations)): a1 = file_annotations[i] filename = tgFile[:-9] self.annotations[a1.text] = (filename, a1) if i == len(file_annotations)-1: continue else: a2 = file_annotations[i+1] self.feedMarkov(a1,a2)
def main(): args = parser() tgfiles = args.textgrid.split(',') identifiers = args.identifier.split(',') assert len(tgfiles) == len(identifiers), "number of textgrids must match number of identifiers" pairs = zip(tgfiles, identifiers) rows = [] for tgfile, identifier in pairs: tg = tgt.read_textgrid(tgfile) tier = tg.get_tier_by_name(args.tier) matches = tier.get_annotations_with_text(args.regex, regex=True) for m in matches: rows.append((str(m.start_time), str(m.end_time), str(m.duration()), m.text, identifier)) with open(args.output_path, 'w') as out: out.write("start\tend\tduration\tlabel\tidentifier\n") for row in rows: out.write('\t'.join(row) + '\n')
def calculate_durations(textgrid, phone2idx): tokens = [] durs = [] frames_per_second = args.sr / args.hop_length tg = tgt.read_textgrid(textgrid, include_empty_intervals=True) data_tier = tg.get_tier_by_name("phones") # Get total frames total_frames = ceil((data_tier.end_time - data_tier.start_time) * frames_per_second) # Find start and end frames of each token se_in_frames = np.array([(frames_per_second * d.start_time, frames_per_second * d.end_time) for d in data_tier]) se_in_frames = np.round(se_in_frames) durs = (se_in_frames[:, 1] - se_in_frames[:, 0]).astype(int) blank_set = ('sil', 'sp', 'spn', '', '<unk>') blank_token = " " # merge repeated blank tokens tokens, durations = [], [] for i in range(len(data_tier)): x = data_tier[i].text if x == 'spn': return None, None, None x = blank_token if x in blank_set else x if len(tokens) and tokens[-1] == blank_token and x == blank_token: durations[-1] += durs[i] else: tokens.append(x) durations.append(durs[i]) tokens_enc = [phone2idx[token] for token in tokens] tokens_enc, durations = torch.LongTensor(tokens_enc), torch.LongTensor(durations) # Add rounding error to final token durations[-1] += total_frames - durations.sum() return tokens, tokens_enc, durations
def get_textgrid_sa(mfa_file): read_textgrid = tgt.read_textgrid(mfa_file) [words, start_time, end_time] = read_word_alignment(read_textgrid) assert len(words) == len(start_time) == len(end_time) stack = [] for i in range(len(words)): if words[i] == "": continue if stack: if start_time[i] > stack[-1][-1]: stack.append([start_time[i], end_time[i]]) #print("non contiguos word") else: stack[-1][-1] = end_time[i] else: stack.append([start_time[i], end_time[i]]) return stack
def main(): args = parser() tgfiles = args.textgrid.split(',') identifiers = args.identifier.split(',') assert len(tgfiles) == len( identifiers), "number of textgrids must match number of identifiers" pairs = zip(tgfiles, identifiers) rows = [] for tgfile, identifier in pairs: tg = tgt.read_textgrid(tgfile) tier = tg.get_tier_by_name(args.tier) matches = tier.get_annotations_with_text(args.regex, regex=True) for m in matches: rows.append((str(m.start_time), str(m.end_time), str(m.duration()), m.text, identifier)) with open(args.output_path, 'w') as out: out.write("start\tend\tduration\tlabel\tidentifier\n") for row in rows: out.write('\t'.join(row) + '\n')
def textgrid2rttm(textgrid): ''' Take in input the path to a text grid, and output a dictionary of lists *{spkr: [ (onset, duration) ]}* that can easily be written in rttm format. ''' # init output rttm_out = dict() # open textgrid #tg = tgio.openTextgrid(textgrid) tg = tgt.read_textgrid(textgrid) # loop over all speakers in this text grid #for spkr in tg.tierNameList: for spkr in tg.get_tier_names(): spkr_timestamps = [] # loop over all annotations for this speaker #for interval in tg.tierDict[spkr].entryList: for _interval in tg.get_tiers_by_name(spkr): for interval in _interval: bg, ed, label = interval.start_time,\ interval.end_time,\ interval.text #if label == "x": # continue #elif label == "1" or label == "2": # spkr_timestamps.append((bg, ed-bg)) spkr_timestamps.append((bg, ed-bg)) # add list of onsets, durations for each speakers rttm_out[spkr] = spkr_timestamps return rttm_out
def main(): # Parse the command-line arguments. args = parse_arguments() tg_path = args['tg_path'] offset_start = args['offset_start'] offset_end = args['offset_end'] outpath = args['outpath'] # Read the TextGrid tg = tgt.read_textgrid(tg_path) tg_part = tgt.TextGrid() if offset_start is None and offset_end is None: raise Exception('At least one of offset_start and offset_end must be specified.') elif offset_start is None: offset_start = tg.start_time elif offset_end is None: offset_end = tg.end_time for tr in tg: intr_part = tr.get_annotations_between_timepoints( offset_start, offset_end) tier_part = tgt.IntervalTier( name=tr.name, start_time=tr.start_time, end_time=tr.end_time, objects=intr_part) tg_part.add_tier(tier_part) if outpath is None: tg_dirname, tg_filename = os.path.split(tg_path) outpath = os.path.splitext(tg_filename)[0] + '_part.TextGrid' tgt.write_to_file(tg_part, outpath)
def stitch_textgrid(batch_title, sequenced_title, input2b_path, input2_path, output3_path): combined_intervals = [] new_tg = tgt.TextGrid() new_phone_tier = tgt.IntervalTier() final_phone_tier = tgt.IntervalTier() new_word_tier = tgt.IntervalTier() last_dur = 0.0 for i, title in enumerate(sequenced_title): wave_file = wave.open(os.path.join(input2b_path, title + '.wav'), 'rb') frameRate = wave_file.getframerate() n_frames = wave_file.getnframes() dur = n_frames / frameRate f0_start_time = 0.0 f0_end_time = dur tg = tgt.read_textgrid(os.path.join(input2_path, title + '.TextGrid')) # Load name of all tiers tier_names = tg.get_tier_names() words_tier_name = [name for name in tier_names if 'words' in name][0] words_tier = tg.get_tier_by_name(words_tier_name) phones_tier_name = [name for name in tier_names if 'phones' in name][0] phones_tier = tg.get_tier_by_name(phones_tier_name) word_annotations = words_tier.get_annotations_between_timepoints( f0_start_time, f0_end_time) phone_annotations = phones_tier.get_annotations_between_timepoints( f0_start_time, f0_end_time) word_intervals = [] for interval in word_annotations: interval.end_time = interval.end_time + last_dur interval.start_time = interval.start_time + last_dur word_intervals.append(interval) if word_intervals[-1].end_time > last_dur + f0_end_time: word_intervals[-1].end_time = last_dur + f0_end_time phone_intervals = [] for j, interval in enumerate(phone_annotations): interval.end_time = interval.end_time + last_dur interval.start_time = interval.start_time + last_dur if interval.text != 'sil' and interval.text != 'sp': phone_intervals.append(interval) elif i == len(sequenced_title) - 1 and j == len( phone_annotations) - 1: phone_intervals.append(interval) if phone_intervals[-1].end_time > last_dur + f0_end_time: phone_intervals[-1].end_time = last_dur + f0_end_time new_word_tier.add_annotations(word_intervals) new_phone_tier.add_annotations(phone_intervals) last_dur += dur phones_tier_copy = new_phone_tier.get_copy_with_gaps_filled( start_time=None, end_time=None, empty_string='') # Replace all sil and sp intervals with <sil> tag #store these intervals to a list so that we can add them to the other tiers sil_intervals = [] phone_intervals = [] for interval in phones_tier_copy: if interval.text == '': interval.text = 'sil' sil_intervals.append(interval) else: phone_intervals.append(interval) final_phone_tier.add_annotations(phone_intervals) final_phone_tier.add_annotations(sil_intervals) final_phone_tier.name = phones_tier_name new_word_tier.name = words_tier_name new_tg.add_tier(new_word_tier) new_tg.add_tier(final_phone_tier) tgt.write_to_file(new_tg, os.path.join(output3_path, batch_title + '.TextGrid'), format='short')
for j in intervals: if i == j.text: intervals.remove(j) break # intervals = [interval for interval in intervals if interval.text not in string.ascii_uppercase[:4]] return intervals def standardize(word): word = re.sub(r'\[.+?\]\s*', '', word) return word #handle the textgrid file files = [file for file in os.listdir(folder) if file.endswith(".TextGrid")] for f in files: tg = tgt.read_textgrid(f) tier = tg.get_tier_by_name('words') #get intervals intervals = [i for i in tier.intervals if i.text != 'sp' and i.text != 'sil'] intervals = remove_redundant(intervals) #match the file index_json = match_file(f[:len(f)-9]) #update json json_type = json.loads(json_list[index_json]) count = 0 print f for i in json_type["imgs"]: for j in i["texts"]: words = j["content"].split(" ")
def setUp(self): self.f = open_streamframe_from_xiofile( "data/fseeksmaller.xio.gz", "lab-labtop/irioKinect 2", window_size=5, with_fields=[], without_fields=[], discard_duplicates=True, start_time=0, end_time=13, relative=True, timestamp_offset=10, ) self.f2 = open_streamframe_from_xiofile( "data/fseeksmaller.xio.gz", "lab-labtop/irioKinect", window_size=5, with_fields=[], without_fields=[], discard_duplicates=True, start_time=0, end_time=13, relative=True, timestamp_offset=10, ) # self.outtake_from_stream = self.f.ix[1341393414826]['framenumber'] self.fraw = open_streamframe_from_xiofile( "data/fseeksmaller.xio.gz", "lab-labtop/irioKinect 2", window_size=5, with_fields=[], without_fields=[], discard_duplicates=True, start_time=0, end_time=13, relative=True, timestamp_offset="raw", ) self.ff = open_streamframe_from_xiofile( "data/fseeksmaller.xio.gz", "lab-labtop/irioKinect 2", window_size=5, with_fields=[], without_fields=[], discard_duplicates=True, start_time=0, end_time=13, relative=True, ) save_streamframe_to_xiofile({"lab-labtop/irioKinect 2": self.f}, "data/sf_to_xio.xio.gz") save_streamframe_to_xiofile( {"lab-labtop/irioKinect 2": self.f, "lab-labtop/irioKinect": self.f2}, "data/sf_to_xio2.xio.gz" ) self.rsn = open_streamframe_from_xiofile("data/fseeksmaller.xio.gz", "wrong/sensor/name") self.outtake_from_xio = XIOFile("data/sf_to_xio.xio.gz", indexing=True) self.outtake_from_xio_2 = XIOFile("data/sf_to_xio2.xio.gz", indexing=True) self.q = [ ex for ex in quantize(self.outtake_from_xio.xio_quicklinegen(0, 13, True, True), "lab-labtop/irioKinect 2") ][0]["soundAngle"] self.ivf = open_intervalframe_from_textgrid( "data/r1_12_15with" "Point.TextGrid", encoding="utf-8", asobjects=False, include_empty_intervals=False )["P"] self.cv = convert_pointtier_to_streamframe(self.ivf) self.pf = convert_streamframe_to_pointtier(self.f) self.outtake_from_pf = convert_streamframe_to_pointtier(self.f)["soundAngle"].ix[0]["time"] self.if_from_tg = open_intervalframe_from_textgrid( "data/r1-20120704-" "cam1-head-zm.TextGrid", encoding="utf-8", asobjects=False, include_empty_intervals=False, ) self.if_from_tg_tier = self.if_from_tg.values()[0] save_intervalframe_to_textgrid(self.if_from_tg, "data/testif.TextGrid", encoding="utf-8") self.tg = tgt.read_textgrid("data/testif.TextGrid", encoding="utf-8", include_empty_intervals=False) self.ic1 = open_intervalframe_from_increco("data/test.inc_reco") self.ic2 = open_intervalframe_from_increco("data/test.inc_reco", lastonly=True)
def main(wav_path, outfile_path, speech_path=None, speech_tier_name=None): if speech_path is not None and speech_path is not None: tg = tgt.read_textgrid(speech_path) speech = tg.get_tier_by_name(speech_tier_name) else: speech = None # Read the respiratory signal, detrend it, identify cycles and # holds. resp = rip.RIP.from_wav(wav_path, speech=speech) resp.remove_baseline() resp.find_cycles(include_holds=True) resp.find_holds() resp.estimate_range() resp.estimate_rel(30) resp.save_annotations('breath.TextGrid') fname = os.path.splitext(os.path.basename(wav_path))[0] # For each inhalation and exhalation, extract the respiratory # cycles. features = [] for i, seg in enumerate(resp.segments): start = seg.start_time end = seg.end_time label = seg.text features_seg = { 'file': fname, 'start': start, 'end': end, 'segment': label } # Odd-numbered rows correspond to inhalations and even-numbered # rows correspond to exhalations. if label == 'out': cycle_start = resp.segments[i - 1].start_time features_seg['duty_cycle'] = (end - start) / (end - cycle_start) else: cycle_end = resp.segments[i + 1].end_time features_seg['duty_cycle'] = (end - start) / (cycle_end - start) features_seg['duration'] = end - start features_seg['slope'] = resp.extract_slope(start, end) features_seg['amplitude'] = resp.extract_amplitude(start, end) features_seg['vol_start'] = resp.extract_level(start) features_seg['vol_end'] = resp.extract_level(end) holds = resp.holds.get_annotations_between_timepoints( start, end, left_overlap=True, right_overlap=True) features_seg['nholds'] = len(holds) if len(holds): holds_dur = sum(h.end_time - h.start_time for h in holds) holds_dur -= max(0, start - holds[0].start_time) holds_dur -= max(0, holds[-1].end_time - end) else: holds_dur = 0 if speech is not None: # Extract: time lag, interval before and after: duration, # start, end level, slope pass features.append(features_seg) with open(outfile_path, 'w') as fout: csv_out = csv.DictWriter(fout, fieldnames=features[0].keys()) csv_out.writeheader() csv_out.writerows(features)
def duration(path, C_list, V_list, cid): #file_list = glob.glob(path + r"\*\sent\*.TextGrid") # glob匹配所有的符合条件的文件,并将以list的形式返回 file_list = glob.glob(path + r"\sent\*.TextGrid") # glob匹配所有的符合条件的文件,并将以list的形式返回 #print(file_list) AlldeltS = [] # 依次计算 每一个 textgrid 的 结果值 把结果存在 总列表中 all_vs = [] all_rpvis = [] # 可能有点问题 每次累加进去 一个值 但是 不清空 all_npvis = [] all_ms = [] for file in file_list: TextGrid = tgt.read_textgrid(file, include_empty_intervals=True) # 依次读取TextGrid文件 if cid == 'jp': tier = TextGrid.get_tier_by_name(TextGrid.get_tier_names()[0]) #print(tier) if cid == 'cn': tier = TextGrid.get_tier_by_name(TextGrid.get_tier_names()[0]) #print(tier) elif cid == 'ru': tier = TextGrid.get_tier_by_name(TextGrid.get_tier_names()[1]) # 根据 tier的 name/位置 读取 intervals #print(tier) # tier = TextGrid.get_tier_by_name('SY') tier_name = TextGrid.get_tier_names() # 获取全部的tier 名字 start = tier.start_time end = tier.end_time start_syl = tier.start_time end_syl = tier.end_time tier2insert = tgt.IntervalTier(start, end, name='CV') # 获取起始点和 终点 插入一条 CV的 intervals TextGrid.insert_tier(tier2insert, 3) CV = TextGrid.get_tier_by_name('CV') annotation = tier.intervals # 插入一个 intervals #syllable = tier_syll.intervals num = [] S_duration = [] # syllable_duration duration_all_S = 0 # 全部时长和 (用于计算 %V 和 其他相关参数) for i in range(len(annotation)): # 循环 替换 和 计算 时长 old_name = annotation[i].text old_start_time = annotation[i].start_time old_end_time = annotation[i].end_time duration = old_end_time - old_start_time #if old_name in C_list: # 判断 属于 C / V if old_name != 'sil': new_name = 'S' # elif old_name in V_list: # 判断 属于 C / V # new_name = 'S' else: new_name = 'none' # print(old_name, new_name) Interval = tgt.Interval(old_start_time, old_end_time, text=new_name) # interval格式- 依次填写 # print(old_name, new_name, 'duration=', duration) if new_name == 'S': S_duration.append(duration) # 加入 duration 的list duration_all_S = duration_all_S + duration CV.add_interval(Interval) # 将 intervals 的标注 >> 到 textgrid #print(file, S_duration) mean_syl = duration_all_S/len(S_duration) #print(mean_syl) vacS = duration_all_S / len(S_duration) # print(num) # if num > 0: # mean_syl = a / (len(C_duration) + len(V_duration)) # 计算一个 mean_syllable duration 用于 语速 # print(mean_syl) # mean_syl = a/(len(C_duration)+len(V_duration)) # print(mean_syl) # vacroC = round(deltaC(C_duration) / mean_syl * 100, 4) # vacroV = round(deltaC(V_duration) / mean_syl * 100, 4) vacroS = round(deltaS(S_duration) / vacS * 100, 4) # print(file, ',', # # deltaS(S_duration), ',', # # vacroS, ',', # # rPVI_s(S_duration), ',', # # nPVI_S(S_duration), ',', # ) #print(nPVI_S(S_duration)) AlldeltS.append(deltaS(S_duration)) all_vs.append(vacroS) all_rpvis.append(rPVI_s(S_duration)) all_npvis.append(nPVI_S(S_duration)) all_ms.append(mean_syl) deltS = round(np.mean(AlldeltS), 9) vs = round(np.mean(all_vs), 9) rpvis = round(np.mean(all_rpvis), 9) npvis = round(np.mean(all_npvis), 9) ms = round(np.mean(all_ms), 9) # print(path, ',', ms, ',', deltS, ',', vs, ',', rpvis, ',', npvis, ',', )