def main():

    # Parse the command-line arguments.
    args = parse_arguments()
    tg_path = args['tg_path']
    offset_start = args['offset_start']
    offset_end = args['offset_end']
    outpath = args['outpath']

    # Read the TextGrid
    tg = tgt.read_textgrid(tg_path)
    tg_part = tgt.TextGrid()

    if offset_start is None and offset_end is None:
        raise Exception(
            'At least one of offset_start and offset_end must be specified.')
    elif offset_start is None:
        offset_start = tg.start_time
    elif offset_end is None:
        offset_end = tg.end_time

    for tr in tg:
        intr_part = tr.get_annotations_between_timepoints(
            offset_start, offset_end)
        tier_part = tgt.IntervalTier(name=tr.name,
                                     start_time=tr.start_time,
                                     end_time=tr.end_time,
                                     objects=intr_part)
        tg_part.add_tier(tier_part)

    if outpath is None:
        tg_dirname, tg_filename = os.path.split(tg_path)
        outpath = os.path.splitext(tg_filename)[0] + '_part.TextGrid'

    tgt.write_to_file(tg_part, outpath)
Example #2
0
def parse_grid(grid_path):
    '''
       note : parse a textgrid and then split the wav matches the textgrid

       arg :
           grid_path : textgrid path to parse
	'''

    dir_path = os.path.dirname(grid_path)
    grid_filename = os.path.basename(grid_path)
    name = os.path.splitext(grid_filename)[0]
    wav_path = os.path.join(dir_path, name + ".wav")
    target_tier = "comma"

    tg_obj = tgt.read_textgrid(grid_path)
    # get objects(textgrid-tier & wav) matches the grid path
    tier_obj = tg_obj.get_tier_by_name(target_tier)
    wav_obj, sr = librosa.load(wav_path, sr=None)

    for idx in range(len(tier_obj)):
        part = tier_obj[idx]
        time_s = librosa.time_to_samples(part.start_time, sr)
        time_e = librosa.time_to_samples(part.end_time, sr)
        librosa.output.write_wav('{}_{}.wav'.format(name, idx),
                                 wav_obj[time_s:time_e], sr)
        with open("{}_{}.txt".format(name, idx), "w") as f:
            f.write(part.text)
Example #3
0
def add_lemmas(title, input1_path, output_path):

    # Load textgrid
    tg = tgt.read_textgrid(os.path.join(input1_path, title + '.TextGrid'))
    tier_names = tg.get_tier_names()

    # Load pos tier
    pos_tier_name = [name for name in tier_names if 'pos' in name][0]
    pos_tier = tg.get_tier_by_name(pos_tier_name)

    # Load words tier
    words_tier_name = [name for name in tier_names if 'words' in name][0]
    words_tier = tg.get_tier_by_name(words_tier_name)

    # Start empty lemmas tier
    lemmas_tier = tgt.IntervalTier()
    lemmas_tier_name = [name for name in tier_names
                        if 'words' in name][0].replace('words', 'lemmas')
    lemmas_tier.name = lemmas_tier_name

    # Generate lemma intervals
    lemmas_intervals = [
        tgt.Interval(w_interval.start_time, w_interval.end_time,
                     lemmatize_word(w_interval.text, pos_tier[i].text))
        for i, w_interval in enumerate(words_tier)
    ]

    # Add lemmas to tier
    lemmas_tier.add_annotations(lemmas_intervals)
    tg.add_tier(lemmas_tier)

    tgt.write_to_file(tg,
                      os.path.join(output_path, title + '.TextGrid'),
                      format='short')
def get_textgrid_sa(mfa_file, merge_shorter=0.15, pause_tokens=[""]):
    read_textgrid = tgt.read_textgrid(mfa_file, include_empty_intervals=False)
    [words, start_time, end_time] = read_word_alignment(read_textgrid)
    assert len(words) == len(start_time) == len(end_time)
    stack = []
    out_words = []
    for i in range(len(words)):

        if words[i] in pause_tokens:  # pause skip
            continue

        if stack:
            if start_time[i] - stack[-1][-1] > merge_shorter:
                # determine how much long is the pause

                out_words.append([words[i]])
                stack.append([start_time[i], end_time[i]])
            else:
                stack[-1][-1] = end_time[i]
                out_words[-1].append(words[i])

        else:

            stack.append([start_time[i], end_time[i]])
            out_words.append([words[i]])

    return stack, out_words
def generate_punctuation_dictionary(titles, input_path, output_path):

    punctuation_dict = {}

    # Gather bp intervals first
    for title in titles:

        # Load the textgrid
        tg = tgt.read_textgrid(os.path.join(input_path, title + '.TextGrid'))

        # Load name of all tiers
        tier_names = tg.get_tier_names()

        # Select a tier whose name contains 'bp'
        bp_tier_name = [name for name in tier_names if 'bp' in name][0]
        bp_tier = tg.get_tier_by_name(bp_tier_name)

        # Tally up all the pos in the textgrids
        for interval in bp_tier:
            if interval.text not in punctuation_dict.keys():
                punctuation_dict[interval.text] = 1
            else:
                punctuation_dict[interval.text] += 1

        # Select a tier whose name contains 'fp'
        fp_tier_name = [name for name in tier_names if 'fp' in name][0]
        fp_tier = tg.get_tier_by_name(fp_tier_name)

        # Tally up all the pos in the textgrids
        for interval in fp_tier:
            if interval.text not in punctuation_dict.keys():
                punctuation_dict[interval.text] = 1
            else:
                punctuation_dict[interval.text] += 1

    punct_tuples = sorted(punctuation_dict.items(),
                          key=lambda x: x[1],
                          reverse=True)

    # 0.8 means that of all the frequencies we keep the highest 80% (for stimuli this was 100%)
    # excluding the lowest frequency words might help make the model more robust
    punct_freqs = sorted(set([punct_tuple[1] for punct_tuple in punct_tuples]),
                         reverse=True)
    punct_freqs = punct_freqs[:math.floor(len(punct_freqs) * 0.8)]
    punct_list = [
        punct_tuple[0] for punct_tuple in punct_tuples
        if punct_tuple[1] in punct_freqs
    ]

    # Extract the pos found in the corpus add the unk tag
    punct_list = ['<unk>'] + punct_list

    # Convert the list into hot vector dictionary
    hv_dict = {}
    for j, v in enumerate(punct_list):
        hv_dict[v] = [int(i) for i in np.eye(len(punct_list), dtype=int)[j]]

    with open(os.path.join(output_path, 'punctuation_dictionary.json'),
              'w') as f:
        json.dump(hv_dict, f)
def segment_phonemes(whichFold):
    '''

    for each phoneme: extract the audio segments from many audio files and concatentate it in a wav file
    then one can extract features from that audio (see extractFeatures script)
    
    all_audio_seg:  the concatenated audio
    '''

    annoFiles = get_list_anno_files(AUDIO_REC_IDS, whichFold)

    for annoFile in annoFiles:  # loop in all annotation files
        all_audio_seg = [np.empty([0, 0], dtype='int16')] * len(
            PHONEMELIST
        )  # chunks of concatenated curr_file_audio for each phonemes

        audioFile = annoFile.replace('.TextGrid', '.wav')
        fs, curr_file_audio = scipy.io.wavfile.read(
            os.path.join(audioPath, audioFile))
        # get 1 channel
        if np.shape(curr_file_audio)[-1] == 2:
            curr_file_audio = curr_file_audio[:, 0]

        print audioFile, fs, curr_file_audio.shape

        ######### read phoneme annotations
        tgfile = tgt.read_textgrid(os.path.join(annoPath, annoFile))
        tgfile.get_tier_names()
        tier_phonemes = tgfile.get_tier_by_name("phonemes")

        all_audio_seg = concat_phoneme_audio(tier_phonemes, curr_file_audio,
                                             all_audio_seg, fs)

        ## write the all_audio_seg for each phoneme to file
        write_audio_to_file(all_audio_seg)
Example #7
0
    def _get_label_line(self, path):
        '''
        Returns aligned labels list where each element corresponds to phone on
        audio on i-th timestep. By the paper setting, timestep is 0.01sec
        '''
        get_textgrid_path = lambda p: p[:-len('flac')] + 'TextGrid'
        textgrid_path = get_textgrid_path(path)

        textgrid = tgt.read_textgrid(self.root + textgrid_path)
        tg_len = textgrid.end_time - textgrid.start_time
        labels = [0 for x in range(int(tg_len * 100))]

        time_cur = textgrid.start_time
        tiers = textgrid.tiers[1]
        cur_tier = tiers[0]
        cur_tier_i = 0
        for i, lab in enumerate(labels):
            labels[i] = self.phone2ix[cur_tier.text]

            time_cur += 0.01
            if cur_tier.end_time < time_cur and cur_tier_i+1 < len(tiers):
                cur_tier_i += 1
                cur_tier = tiers[cur_tier_i]

        return tuple(labels) # tuple is more memory efficient
Example #8
0
def add_onsets_rhymes(title, input_path, output_path):

    # Load the textgrid
    tg = tgt.read_textgrid(os.path.join(input_path, title + '.TextGrid'))

    # Load name of all tiers
    tier_names = tg.get_tier_names()

    # Select a tier whose name contains 'syllables'
    sylls_tier_name = [name for name in tier_names if 'sylls' in name][0]
    sylls_tier = tg.get_tier_by_name(sylls_tier_name)

    # Select a tier whose name contains 'phones'
    phones_tier_name = [name for name in tier_names if 'phones' in name][0]
    phones_tier = tg.get_tier_by_name(phones_tier_name)

    # Start an empty tier for onset-rhymes
    onset_rhyme_tier = tgt.IntervalTier()
    onset_rhyme_tier_name = [name for name in tier_names
                             if 'words' in name][0].replace('words', 'OR')
    onset_rhyme_tier.name = onset_rhyme_tier_name

    onset_rhyme_intervals = []

    for syll in sylls_tier._get_annotations():

        #print(syll)
        phs = phones_tier.get_annotations_between_timepoints(
            syll.start_time, syll.end_time)

        nucleus_index = calculate_nucleus_index(phs)

        # If the first phone contains a number then it means the whole syll has no onset, so we only add a rhyme
        if nucleus_index == 0:
            onset_rhyme_intervals.append(
                tgt.Interval(syll.start_time, syll.end_time, 'R'))

        # If the onset is present add onset and rhyme intervals
        else:
            onset_rhyme_intervals.append(
                tgt.Interval(syll.start_time, phs[nucleus_index - 1].end_time,
                             'O'))

            onset_rhyme_intervals.append(
                tgt.Interval(phs[nucleus_index].start_time, syll.end_time,
                             'R'))

    # Add all the intervals to the onset rhyme tier
    onset_rhyme_tier.add_annotations(onset_rhyme_intervals)

    # Add the onset rhyme tier to the TextGrid
    tg.add_tier(onset_rhyme_tier)

    # Move syll tier after the onset_rhyme_tier
    tg.delete_tier(sylls_tier_name)
    tg.add_tier(sylls_tier)

    tgt.write_to_file(tg,
                      os.path.join(output_path, title + '.TextGrid'),
                      format='short')
Example #9
0
def read_in_single_textgrid(file_name):
    #file_name = os.path.join('/home/zx/Dolphin/Data/textgrid/test/20191011/zcz/1616893', '37429442330632_2019_09_18_01_45_11.TextGrid')
    tg_data = tgt.read_textgrid(
        file_name)  # read a Praat TextGrid file and return a TextGrid object
    #    print(tg_data)
    tier_names = tg_data.get_tier_names()  # get names of all tiers
    #    print (tier_names)
    return tg_data
def extract_f0_timepoints(title, input_path, output_path, f0_sr):

	tg = tgt.read_textgrid(os.path.join(input_path,title+'.TextGrid'))

	# Original recording's sampling rate
	audio_sr = 0.005 

	# Load name of all tiers
	tier_names = tg.get_tier_names()

	# Select a tier whose name contains 'sylls'
	syllables_tier_name = [name for name in tier_names if 'sylls' in name][0]
	syllables_tier = tg.get_tier_by_name(syllables_tier_name)

	# In this list we collect the timepoints where we sample the f0
	f0_timepoints = [] 

	# Sampling is syllable based, so we loop over each syllable
	for interval in syllables_tier:

		# add the first time of the syllable to the list
		interval_start = interval.start_time
		f0_timepoints.append(float(interval_start))

		# plot the syllable boundary
		#plt.axvline(x=interval_start, color='orange', linewidth=1)

		# How many times the default sampling rate approx. fits into this interval
		n_extractions = round(interval.duration()/f0_sr)

		# This happens if the interval is shorter than the sampling rate, so we just keep the first value for this interval
		if n_extractions == 0:
			pass

		else:
			# Based on the N of extractions calculate a new sampling rate, which is specific to this interval
			sampling_step = interval.duration()/(n_extractions)

			# Extract based on the new sampling rate 
			# The loop is for n_extractions-1 times because the last point is the first point of the next interval
			for j in range(0, n_extractions-1):
				interval_start += sampling_step
				f0_timepoints.append(interval_start)
				
				# Plot extraction points between boundaries
				#plt.axvline(x=interval_start, color='gray', linewidth=0.5)

	# Add the very last time of the last syllable
	interval_start = float(syllables_tier[-1].end_time)
	f0_timepoints.append(interval_start)

	# Plot the last point of the last syllable
	#plt.axvline(x=interval_start, color='orange', linewidth=1)

	#plt.show()

	with open(os.path.join(output_path,title+'.json'), 'w') as f:
		json.dump(f0_timepoints, f)
Example #11
0
def get_textgrid_intervals(tgfilepath):
    #print(tgfilepath)
    try:
        tg = tgt.read_textgrid(tgfilepath)
    except:
        print('Missing textgrid file')
        return False
    tiers = tg.get_tier_by_name("words")
    return tiers.intervals
def child_speech_detector(mom_puzzle_textgrid, v):
    """ Detects child speech segments by:
		filter out manually annotated mom speech segments
		compute child speech band energy per frame, set threhold of being voiced or not
		apply median filter to the results of being voiced or not per frame
		if two voiced frames are seperated by only 1 min, merge 
		return time intervals of detected child speech 

	Args:
		mom_puzzle_textgrid: manually annotated mom speech textgrids 
		v: VoiceActivityDetector object 
	
	Returns:
		speech_time: the time intervals of detected child speech segments 
		mom_tier: manually annotated mom speech textgrid tier 
	"""
    speech_time = {}
    data = v.data
    tg = tgt.read_textgrid(mom_puzzle_textgrid)
    mom_tier = tg.get_tier_by_name('Mother')
    child_tier = tg.get_tier_by_name('Child')
    for i in range(len(mom_tier)):
        end_sample = int(round(mom_tier[i].end_time * v.rate))
        if i == len(mom_tier) - 1:
            start_sample = len(data)
        else:
            start_sample = int(round(mom_tier[i + 1].start_time * v.rate))

        v.data = data[end_sample:start_sample]
        detected_windows = v.detect_speech()
        for sample_start, speech in detected_windows:
            sample_start, speech = int(sample_start), int(speech)
            sample_start += end_sample
            if speech:
                # extend if two intervals speperate by only 1min
                region_start_time = sample_start * 1.0 / v.rate
                region_start_time = float(
                    "{0:.2f}".format(region_start_time))  # round to 2 floats

                if len(speech_time):
                    largest_time_prev = max(speech_time.keys())
                    if region_start_time - 1 <= largest_time_prev:
                        speech_time[region_start_time] = speech_time[
                            largest_time_prev]
                        del speech_time[largest_time_prev]
                    else:
                        speech_time[region_start_time] = region_start_time
                else:
                    speech_time[region_start_time] = region_start_time

    speech_time = sorted(speech_time.items(), key=operator.itemgetter(1))

    return speech_time, mom_tier
def extract_linguistic_labels(title, input1_path, input2_path, output_path):

    # Load the f0_timepoints
    with open(os.path.join(input1_path, title + '.json')) as f:
        f0_timepoints = json.load(f)

    # Load textgrid
    tg = tgt.read_textgrid(os.path.join(input2_path, title + '.TextGrid'))
    tier_names = tg.get_tier_names()

    pos_tier_name = [name for name in tier_names if 'pos' in name][0]

    # Extract features from TextGrid, except for phones
    labels = []
    for t in f0_timepoints:
        labels_sublist = []
        for tier in tier_names:
            if 'phones' not in tier and 'words' not in tier:
                label = tg.get_tier_by_name(tier).get_annotations_by_time(
                    t)[0].text
                labels_sublist.append(label)
        labels.append(labels_sublist)

    # Extract syllable boundaries
    syllables_tier_name = [name for name in tier_names if 'sylls' in name][0]
    syllables_tier = tg.get_tier_by_name(syllables_tier_name)

    syllable_boundary_times = [syllables_tier[0].start_time] + [
        interval.end_time for interval in syllables_tier
    ]
    syllable_boudaries = [['1'] if t in syllable_boundary_times else ['0']
                          for t in f0_timepoints]

    # Extract word boundaries
    wores_tier_name = [name for name in tier_names if 'words' in name][0]
    words_tier = tg.get_tier_by_name(wores_tier_name)

    word_boundary_times = [words_tier[0].start_time
                           ] + [interval.end_time for interval in words_tier]
    word_boudaries = [['1'] if t in word_boundary_times else ['0']
                      for t in f0_timepoints]

    syllable_word_boundaries = [
        syllable_boudaries[i] + word_boudaries[i]
        for i in range(len(syllable_boudaries))
    ]

    labels = [
        syllable_word_boundaries[i] + labels[i] for i in range(len(labels))
    ]

    with open(os.path.join(output_path, title + '.json'), 'w') as f:
        json.dump(labels, f)
def main():
	ap = argparse.ArgumentParser()
	ap.add_argument(
		'shift',
		help='offset by which to shift the boundaries (positive or negative)',
		type=float)
	ap.add_argument(
		'file',
		help='the textgrid file',
		type=str)
	ap.add_argument(
		'-e', '--encoding',
		help='file encoding (default "utf-8")',
		default='utf-8',
		type=str)
	ap.add_argument(
		'-f', '--format',
		help='the output format (default "short")',
		default='short',
		type=str)
	ap.add_argument(
		'-o', '--outfile',
		help='the output file (defaults to inputfile.shifted.Extension)',
		type=str)
	arguments = ap.parse_args()

	# Read file
	try:
		tg = tgt.read_textgrid(
				filename=arguments.file,
				encoding=arguments.encoding)
	except IOError:
		print('An error occurred reading file {file}'.
				format(file=arguments.file))
		sys.exit(1)
	# Create new textgrid
	if arguments.outfile is None:
		basename, extension = os.path.splitext(arguments.file)
		output_filename = basename + '.shifted' + extension
	else:
		output_filename = arguments.outfile
	tg_shifted = tgt.TextGrid(filename=output_filename)
	# Shift boundaries
	for tier in tg:
		ts = tgt.util.shift_boundaries(tier, arguments.shift, 0)
		tg_shifted.add_tier(ts)
	# Write file
	tgt.write_to_file(
		textgrid=tg_shifted,
		filename=tg_shifted.filename,
		format=arguments.format,
		encoding=arguments.encoding)
Example #15
0
def generate_lemmas_dictionary(titles, input1_path, input2_path, output_path):

    # Load a control wordlist of the most frequent words in the eng language
    wl = load_wordlist(input2_path)

    # Collect all of the lemmas from the textgrids in only keep the ones in the control wordlist
    # We use the list because we don't want to learn frequent but biased words such as character names or words that are genre-related
    lemmas_dict = {}

    for title in titles:

        # Load the textgrid
        tg = tgt.read_textgrid(os.path.join(input1_path, title + '.TextGrid'))

        # Load name of all tiers
        tier_names = tg.get_tier_names()

        # Select a tier whose name contains 'lemmas'
        lemmas_tier_name = [name for name in tier_names if 'lemmas' in name][0]
        lemmas_tier = tg.get_tier_by_name(lemmas_tier_name)

        # Tally up all the lemmas in the textgrids
        for interval in lemmas_tier:
            if interval.text not in lemmas_dict.keys():
                lemmas_dict[interval.text] = 1
            else:
                lemmas_dict[interval.text] += 1

    lemmas_tuples = sorted(lemmas_dict.items(),
                           key=lambda x: x[1],
                           reverse=True)

    # 0.8 means that of all the frequencies we keep the highest 80% (for stimuli this was 100%)
    # excluding the lowest frequency words might help make the model more robust
    lemmas_freqs = sorted(set(
        [lemmas_tuple[1] for lemmas_tuple in lemmas_tuples]),
                          reverse=True)
    lemmas_freqs = lemmas_freqs[:math.floor(len(lemmas_freqs) * 0.8)]
    lemmas_list = [
        lemmas_tuple[0] for lemmas_tuple in lemmas_tuples
        if lemmas_tuple[1] in lemmas_freqs
    ]

    lemmas_list = ['<unk>'] + [w for w in wl if w in lemmas_list]

    # Convert the list into hot vector dictionary
    hv_dict = {}
    for j, v in enumerate(lemmas_list):
        hv_dict[v] = [int(i) for i in np.eye(len(lemmas_list), dtype=int)[j]]

    with open(os.path.join(output_path, 'lemmas_dictionary.json'), 'w') as f:
        json.dump(hv_dict, f)
Example #16
0
def load_textgrid(file_name, tier_name='phone'):
    '''Load textgrid & return times and labels'''
    tg = tgt.read_textgrid(file_name)
    tier = tg.get_tier_by_name(tier_name)

    times = []
    labels = []
    for t in tier:
        times.append([round(t.start_time, 4), round(t.end_time, 4)])
        labels.append(t.text)
    assert len(times) > 0, f'"times" is empty: len={len(times)}'
    assert len(labels) > 0, f'"{tier_name}" is empty: len={len(labels)}'
    return np.array(times, dtype='float32'), labels
Example #17
0
def f1(predictions, grid_file, error=(0, 0)):
    #Testing(grid_file,cre4,3200*0.256,pred,3200,for_pos)
    vals = Testing(tgt.read_textgrid(grid_file),
                   4,
                   32000 * 0.256,
                   pickle.load(open(predictions, 'rb')),
                   32000,
                   for_pos=False)
    false_pos, true_pos, merged_pos = find(
        Testing(tgt.read_textgrid(grid_file), 4, 32000 * 0.256,
                pickle.load(open(predictions, 'rb')), 32000, True), error)
    if error != (0, 0):
        vals = (delete_vals(vals[0], merged_pos), vals[1])

    false_neg, true_neg = find(vals, (0, 0), for_pos=False)
    #false_neg = true_neg = 1
    print(false_pos, true_pos, false_neg, true_neg)
    precision = true_pos / (true_pos + false_pos)
    recall = true_pos / (true_pos + false_neg)
    print(precision, recall)

    score = 2 * (precision * recall / (precision + recall))
    print(score)
    return merged_pos
Example #18
0
def main():
    ap = argparse.ArgumentParser()
    ap.add_argument(
        'shift',
        help='offset by which to shift the boundaries (positive or negative)',
        type=float)
    ap.add_argument('file', help='the textgrid file', type=str)
    ap.add_argument('-e',
                    '--encoding',
                    help='file encoding (default "utf-8")',
                    default='utf-8',
                    type=str)
    ap.add_argument('-f',
                    '--format',
                    help='the output format (default "short")',
                    default='short',
                    type=str)
    ap.add_argument(
        '-o',
        '--outfile',
        help='the output file (defaults to inputfile.shifted.Extension)',
        type=str)
    arguments = ap.parse_args()

    # Read file
    try:
        tg = tgt.read_textgrid(filename=arguments.file,
                               encoding=arguments.encoding)
    except IOError:
        print('An error occurred reading file {file}'.format(
            file=arguments.file))
        sys.exit(1)
    # Create new textgrid
    if arguments.outfile is None:
        basename, extension = os.path.splitext(arguments.file)
        output_filename = basename + '.shifted' + extension
    else:
        output_filename = arguments.outfile
    tg_shifted = tgt.TextGrid(filename=output_filename)
    # Shift boundaries
    for tier in tg:
        ts = tgt.util.shift_boundaries(tier, arguments.shift, 0)
        tg_shifted.add_tier(ts)
    # Write file
    tgt.write_to_file(textgrid=tg_shifted,
                      filename=tg_shifted.filename,
                      format=arguments.format,
                      encoding=arguments.encoding)
Example #19
0
def open_intervalframe_from_textgrid(filepath,
                                     encoding='utf-8',
                                     asobjects=False,
                                     include_empty_intervals=False):
    """Import a textgrid and return a dict of IntervalFrames.

    Each tier in the textgrid becomes an IntervalFrame (Pandas DataFrame)
    The Intervals by default are tokenized into start_time, end_time and
    text columns.
    The points (for point tiers) are tokenized into time and mark columns.

    Arguments:
    filepath  -- Path + filename of the TextGrid file to be imported.

    Keyword Arguments:
    asobjects -- If True, then values are intervalobjects (as defined in
                 package tgt, instead of tokenizing into start_time etc.
                 IntervalFrame has only one column with these objects.
    include_empty_intervals -- If enabled, empty intervals between
                               annotations are also returned
    encoding -- character encoding to read the textgrid file

    """

    textgrid = tgt.read_textgrid(filepath, encoding, include_empty_intervals)
    result = {}
    for tier in textgrid.tiers:
        if len(tier) > 0:
            if isinstance(tier, tgt.IntervalTier):
                frame = pd.DataFrame(tier.intervals, columns=['intervals'])
                if asobjects == False:
                    frame['start_time'] = frame['intervals'].map(lambda x:\
                                                             x.start_time)
                    frame['end_time'] = frame['intervals'].map(lambda x:\
                                                               x.end_time)
                    frame['text'] = frame['intervals'].map(lambda x: x.text)
                    del frame['intervals']
            elif isinstance(tier, tgt.PointTier):
                frame = pd.DataFrame(tier.points, columns=['points'])
                if asobjects == False:
                    frame['time'] = frame['points'].map(lambda x: x.time)
                    frame['mark'] = frame['points'].map(lambda x: x.text)
                    del frame['points']
            result[tier.name] = frame
    return result
def concatenate_textgrids(input_files, encoding):
    """Concatenate Tiers with matching names. TextGrids are concatenated
    in the order they are specified. The number and the names of tiers
    must be the same in each TextGrid."""
    
    # Read all TextGrids into a list.
    textgrids = [tgt.read_textgrid(path, encoding) for path in input_files]

    # Check whether the TextGrids have the same number of tiers.
    ntiers = [len(x) for x in textgrids]
    assert all([ntiers[0] == x for x in ntiers[1:]]),\
            'TextGrids have different numbers of tiers.'

    # Check whether tiers in the TextGrids have the same names.
    tier_names = [sorted(x.get_tier_names()) for x in textgrids]
    assert all([tier_names[0] == x for x in tier_names[1:]]),\
           'Names of tiers do not match.' 

    tot_duration = 0
    tiers = {} # tier_name : tgt.Tier()

    for textgrid in textgrids:
        for tier in textgrid.tiers:
            intervals = []

            # If this is the first we see this tier, we just make a copy
            # of it as it is.
            if tier.name not in tiers.keys():
                tiers[tier.name] = copy.deepcopy(tier)
            # Otherwise we update the start and end times of intervals
            # and append them to the first part.
            else:
                for interval in tier.intervals:
                    interval.left_bound += tot_duration
                    interval.right_bound += tot_duration
                    intervals.append(interval)
                tiers[tier.name].add_intervals(intervals)
        tot_duration += textgrid.end_time()

    # Create a new TextGrid
    textgrid_concatenated = tgt.TextGrid()
    # Add tiers in the order they're found in the first TextGrid.
    textgrid_concatenated.add_tiers([tiers[x] for x in textgrids[0].get_tier_names()])
    return textgrid_concatenated
Example #21
0
def open_intervalframe_from_textgrid(filepath, encoding='utf-8',
                                     asobjects=False,
                                     include_empty_intervals=False):
    """Import a textgrid and return a dict of IntervalFrames.

    Each tier in the textgrid becomes an IntervalFrame (Pandas DataFrame)
    The Intervals by default are tokenized into start_time, end_time and
    text columns.
    The points (for point tiers) are tokenized into time and mark columns.

    Arguments:
    filepath  -- Path + filename of the TextGrid file to be imported.

    Keyword Arguments:
    asobjects -- If True, then values are intervalobjects (as defined in
                 package tgt, instead of tokenizing into start_time etc.
                 IntervalFrame has only one column with these objects.
    include_empty_intervals -- If enabled, empty intervals between
                               annotations are also returned
    encoding -- character encoding to read the textgrid file

    """

    textgrid = tgt.read_textgrid(filepath, encoding, include_empty_intervals)
    result = {}
    for tier in textgrid.tiers:
        if len(tier) > 0:
            if isinstance(tier, tgt.IntervalTier):
                frame = pd.DataFrame(tier.intervals, columns=['intervals'])
                if asobjects == False:
                    frame['start_time'] = frame['intervals'].map(lambda x:\
                                                             x.start_time)
                    frame['end_time'] = frame['intervals'].map(lambda x:\
                                                               x.end_time)
                    frame['text'] = frame['intervals'].map(lambda x: x.text)
                    del frame['intervals']
            elif isinstance(tier, tgt.PointTier):
                frame = pd.DataFrame(tier.points, columns=['points'])
                if asobjects == False:
                    frame['time'] = frame['points'].map(lambda x: x.time)
                    frame['mark'] = frame['points'].map(lambda x: x.text)
                    del frame['points']
            result[tier.name] = frame
    return result
Example #22
0
def segment_textgrid_audio(textgrid_path, tier):
    directory = os.listdir(textgrid_path)
    try:
        os.mkdir(textgrid_path + '/segments')
    except:
        print("Path already exists")
    for f in directory:
        if '.TextGrid' in f:
            text_grid = tgt.read_textgrid(os.path.join(textgrid_path, f))
            wav = os.path.join(textgrid_path, f.replace('TextGrid', 'wav'))
            interval_num = 0
            for interval in text_grid.tiers[tier].intervals:
                interval_num +=1
                if interval.text != '':
                    start_audio = interval.start_time * 1000
                    end_audio = interval.end_time * 1000
                    audio_segment = AudioSegment.from_file(wav)
                    audio_segment = audio_segment[start_audio:end_audio]
                    audio_segment.export(textgrid_path + '/segments/'+f.replace('.TextGrid', '_')+'_'+str(interval_num)+'.wav', format="wav")
Example #23
0
def read_textgrid(filename, sample_rate=200):
    import tgt
    try:
        tg = tgt.read_textgrid(filename)  #, include_empty_intervals=True)
    except:
        print("reading " + filename + " failed")

        return
    tiers = []
    labs = {}

    for tier in tg.get_tier_names():
        if (tg.get_tier_by_name(tier)).tier_type() != 'IntervalTier':
            continue
        tiers.append(tg.get_tier_by_name(tier))

        lab = []
        for a in tiers[-1].annotations:

            try:
                # this was for some past experiment
                if a.text in ["p1", "p2", "p3", "p4", "p5", "p6", "p7"]:
                    lab[-1][-1] = lab[-1][-1] + "_" + a.text
                else:
                    #lab.append([a.start_time*sample_rate,a.end_time*sample_rate,a.text.encode('utf-8')])
                    lab.append([
                        a.start_time * sample_rate, a.end_time * sample_rate,
                        a.text
                    ])
            except:
                pass
            #print tiers[-1].encode('latin-1')
        labs[tier.lower()] = lab
    try:
        for i in range(len(labs['prosody'])):
            if labs['prosody'][i][2][-2:] not in [
                    "p1", "p2", "p3", "p4", "p5", "p6", "p7"
            ]:
                labs['prosody'][i][2] += "_p0"
    except:
        pass

    return labs
Example #24
0
def getAmatrix(TextGrid_Directory, CsvDirectory, N):
    A = np.zeros((N, N))
    # pdb.set_trace()
    for root, dirs, filenames in os.walk(TextGrid_Directory):
        # pdb.set_trace()
        for f in filenames:
            if f == '.DS_Store':
                continue
            tg = tgt.read_textgrid(TextGrid_Directory+f)
            ipu_tier = tg.get_tier_by_name('Key-Child')
            # pdb.set_trace()
            prev = map(ipu_tier[0].text)
            for i in range(1, len(ipu_tier)):
                cur = map(ipu_tier[i].text)
                if cur >= N:
                    break
                A[prev, cur] += 1
                prev = cur
    return A
Example #25
0
def getBmatrix_multi(num_label, TextGrid_Directory, TextGridname, CsvDirectory):
    tg = tgt.read_textgrid(TextGrid_Directory+TextGridname)
    ipu_tier = tg.get_tier_by_name('Key-Child')
    filename = TextGridname.split('.')[0]

    # dictionaries with key of filename, values of softmax probability predictions being that label
    prob = {}
    with open(CsvDirectory) as csvfile:
        reader = csv.reader(csvfile, delimiter=' ', quotechar='|')
        # prob_filenames = [row[0].split(',')[0] for row in reader]
        for row in reader:
            if row[0].split(',')[0] == 'total_filename':
                continue
            if num_label == 4:
                prob[row[0].split(',')[0]] = [float(row[0].split(',')[-4]), float(row[0].split(',')[-3]), float(row[0].split(',')[-2]), float(row[0].split(',')[-1])]
            else:
                prob[row[0].split(',')[0]] = [float(row[0].split(',')[-5]), float(row[0].split(',')[-4]), float(row[0].split(',')[-3]), float(row[0].split(',')[-2]), float(row[0].split(',')[-1])]

    y = []
    prob_order = []
    # append the probabilities of that segment being different classes in order
    for seg in ipu_tier:
        start_time = seg.start_time
        end_time = seg.end_time
        annotation = seg.text
        segment_filename = filename + '-' + str(start_time) + '-' + str(end_time) + '-' + annotation
        if num_label == 4:
            if annotation == 'HIC':
                continue
        if not segment_filename in prob:
            continue
        if segment_filename in prob:
            prob_order.append(prob[segment_filename])
        y.append(map(annotation))

    prob_order = np.array(prob_order)
    res = prob_order.T
    prediction_orignal = np.argmax(res, 0)
    accuracy = sum(y == prediction_orignal) * 1.0/len(y)
    FSCORE = f1_score(y, prediction_orignal, average='macro')
    # print('original accuracy:',accuracy)
    return res, accuracy, y, FSCORE
Example #26
0
 def get_tiernames_from_tgfile(read_file, print_encoding_info=False):
     encoding_types = [
         'utf-8', 'utf-8-sig', 'utf-16', 'utf-16-le', 'utf-16-be'
     ]
     for encoding_type in encoding_types:
         try:
             tg = tgt.read_textgrid(read_file, encoding=encoding_type)
             if type(tg) is tgt.core.TextGrid:
                 if print_encoding_info:
                     print(
                         f'\t\'{basename(read_file)}\' encoding in {encoding_type}.'
                     )
                 break
         except:
             # print(f'WARNING: failed with reading as {encoding_type}')
             continue
     try:
         tier_names = [tier.name for tier in tg.tiers]
     except:
         print(f'ERROR: {read_file} not in {encoding_types}')
         raise
     return tg, tier_names
Example #27
0
	def __init__(self, path):
		# pp = pprint.PrettyPrinter(indent=4)
		self.generator = MarkovGenerator(n=1, max=3)

		self.path = path + '/textGrids/'
		#let's get the text from the textGrids, save the annotations in a dict, key=filename
		self.annotations = dict()

		for tgFile in os.listdir(self.path):
			if tgFile[-9:] == '.TextGrid':
				#print tgFile
				tg = tgt.read_textgrid(self.path + tgFile)
				file_annotations = [i for t in tg.tiers for i in t]
				for i in range(len(file_annotations)):
					a1 = file_annotations[i]
					filename = tgFile[:-9]
					self.annotations[a1.text] = (filename, a1)
					if i == len(file_annotations)-1:
						continue
					else:
						a2 = file_annotations[i+1]
						self.feedMarkov(a1,a2)
def main():
    args = parser()

    tgfiles = args.textgrid.split(',')
    identifiers = args.identifier.split(',')
    assert len(tgfiles) == len(identifiers), "number of textgrids must match number of identifiers"

    pairs = zip(tgfiles, identifiers)
    
    rows = []
    for tgfile, identifier in pairs:
        tg = tgt.read_textgrid(tgfile)
        tier = tg.get_tier_by_name(args.tier)
        matches = tier.get_annotations_with_text(args.regex, regex=True)

        for m in matches:
            rows.append((str(m.start_time), str(m.end_time), str(m.duration()), m.text, identifier))

    with open(args.output_path, 'w') as out:
        out.write("start\tend\tduration\tlabel\tidentifier\n")
        for row in rows:
            out.write('\t'.join(row) + '\n')
Example #29
0
def calculate_durations(textgrid, phone2idx):
    tokens = []
    durs = []

    frames_per_second = args.sr / args.hop_length
    tg = tgt.read_textgrid(textgrid, include_empty_intervals=True)
    data_tier = tg.get_tier_by_name("phones")

    # Get total frames
    total_frames = ceil((data_tier.end_time - data_tier.start_time) * frames_per_second)

    # Find start and end frames of each token
    se_in_frames = np.array([(frames_per_second * d.start_time, frames_per_second * d.end_time) for d in data_tier])
    se_in_frames = np.round(se_in_frames)
    durs = (se_in_frames[:, 1] - se_in_frames[:, 0]).astype(int)
    blank_set = ('sil', 'sp', 'spn', '', '<unk>')
    blank_token = " "

    # merge repeated blank tokens
    tokens, durations = [], []
    for i in range(len(data_tier)):
        x = data_tier[i].text
        if x == 'spn':
            return None, None, None
        x = blank_token if x in blank_set else x

        if len(tokens) and tokens[-1] == blank_token and x == blank_token:
            durations[-1] += durs[i]
        else:
            tokens.append(x)
            durations.append(durs[i])

    tokens_enc = [phone2idx[token] for token in tokens]
    tokens_enc, durations = torch.LongTensor(tokens_enc), torch.LongTensor(durations)

    # Add rounding error to final token
    durations[-1] += total_frames - durations.sum()

    return tokens, tokens_enc, durations
Example #30
0
def get_textgrid_sa(mfa_file):

    read_textgrid = tgt.read_textgrid(mfa_file)
    [words, start_time, end_time] = read_word_alignment(read_textgrid)
    assert len(words) == len(start_time) == len(end_time)
    stack = []
    for i in range(len(words)):

        if words[i] == "":
            continue

        if stack:
            if start_time[i] > stack[-1][-1]:
                stack.append([start_time[i], end_time[i]])
                #print("non contiguos word")
            else:
                stack[-1][-1] = end_time[i]

        else:
            stack.append([start_time[i], end_time[i]])

    return stack
def main():
    args = parser()

    tgfiles = args.textgrid.split(',')
    identifiers = args.identifier.split(',')
    assert len(tgfiles) == len(
        identifiers), "number of textgrids must match number of identifiers"

    pairs = zip(tgfiles, identifiers)

    rows = []
    for tgfile, identifier in pairs:
        tg = tgt.read_textgrid(tgfile)
        tier = tg.get_tier_by_name(args.tier)
        matches = tier.get_annotations_with_text(args.regex, regex=True)

        for m in matches:
            rows.append((str(m.start_time), str(m.end_time), str(m.duration()),
                         m.text, identifier))

    with open(args.output_path, 'w') as out:
        out.write("start\tend\tduration\tlabel\tidentifier\n")
        for row in rows:
            out.write('\t'.join(row) + '\n')
Example #32
0
def textgrid2rttm(textgrid):
    '''
        Take in input the path to a text grid,
        and output a dictionary of lists *{spkr: [ (onset, duration) ]}*
        that can easily be written in rttm format.
    '''
    # init output
    rttm_out = dict()

    # open textgrid
    #tg = tgio.openTextgrid(textgrid)
    tg = tgt.read_textgrid(textgrid)

    # loop over all speakers in this text grid
    #for spkr in tg.tierNameList:
    for spkr in tg.get_tier_names():

        spkr_timestamps = []
        # loop over all annotations for this speaker
        #for interval in tg.tierDict[spkr].entryList:
        for _interval in tg.get_tiers_by_name(spkr):
            for interval in _interval:

                bg, ed, label = interval.start_time,\
                              interval.end_time,\
                              interval.text

                #if label == "x":
                #    continue
                #elif label == "1" or label == "2":
                #    spkr_timestamps.append((bg, ed-bg))
                spkr_timestamps.append((bg, ed-bg))

        # add list of onsets, durations for each speakers
        rttm_out[spkr] = spkr_timestamps
    return rttm_out
def main():

    # Parse the command-line arguments.
    args = parse_arguments()
    tg_path = args['tg_path']
    offset_start = args['offset_start']
    offset_end = args['offset_end']
    outpath = args['outpath']

    # Read the TextGrid
    tg = tgt.read_textgrid(tg_path)
    tg_part = tgt.TextGrid()

    if offset_start is None and offset_end is None:
        raise Exception('At least one of offset_start and offset_end must be specified.')
    elif offset_start is None:
        offset_start = tg.start_time
    elif offset_end is None:
        offset_end = tg.end_time

    for tr in tg:
        intr_part = tr.get_annotations_between_timepoints(
            offset_start, offset_end)
        tier_part = tgt.IntervalTier(
            name=tr.name,
            start_time=tr.start_time,
            end_time=tr.end_time,
            objects=intr_part)
        tg_part.add_tier(tier_part)


    if outpath is None:
        tg_dirname, tg_filename = os.path.split(tg_path)
        outpath = os.path.splitext(tg_filename)[0] + '_part.TextGrid'

    tgt.write_to_file(tg_part, outpath)
Example #34
0
def stitch_textgrid(batch_title, sequenced_title, input2b_path, input2_path,
                    output3_path):
    combined_intervals = []

    new_tg = tgt.TextGrid()

    new_phone_tier = tgt.IntervalTier()
    final_phone_tier = tgt.IntervalTier()
    new_word_tier = tgt.IntervalTier()

    last_dur = 0.0

    for i, title in enumerate(sequenced_title):

        wave_file = wave.open(os.path.join(input2b_path, title + '.wav'), 'rb')
        frameRate = wave_file.getframerate()
        n_frames = wave_file.getnframes()
        dur = n_frames / frameRate

        f0_start_time = 0.0
        f0_end_time = dur

        tg = tgt.read_textgrid(os.path.join(input2_path, title + '.TextGrid'))

        # Load name of all tiers
        tier_names = tg.get_tier_names()

        words_tier_name = [name for name in tier_names if 'words' in name][0]
        words_tier = tg.get_tier_by_name(words_tier_name)

        phones_tier_name = [name for name in tier_names if 'phones' in name][0]
        phones_tier = tg.get_tier_by_name(phones_tier_name)

        word_annotations = words_tier.get_annotations_between_timepoints(
            f0_start_time, f0_end_time)
        phone_annotations = phones_tier.get_annotations_between_timepoints(
            f0_start_time, f0_end_time)

        word_intervals = []
        for interval in word_annotations:
            interval.end_time = interval.end_time + last_dur
            interval.start_time = interval.start_time + last_dur
            word_intervals.append(interval)
        if word_intervals[-1].end_time > last_dur + f0_end_time:
            word_intervals[-1].end_time = last_dur + f0_end_time

        phone_intervals = []
        for j, interval in enumerate(phone_annotations):
            interval.end_time = interval.end_time + last_dur
            interval.start_time = interval.start_time + last_dur

            if interval.text != 'sil' and interval.text != 'sp':
                phone_intervals.append(interval)

            elif i == len(sequenced_title) - 1 and j == len(
                    phone_annotations) - 1:
                phone_intervals.append(interval)
        if phone_intervals[-1].end_time > last_dur + f0_end_time:
            phone_intervals[-1].end_time = last_dur + f0_end_time

        new_word_tier.add_annotations(word_intervals)
        new_phone_tier.add_annotations(phone_intervals)

        last_dur += dur

    phones_tier_copy = new_phone_tier.get_copy_with_gaps_filled(
        start_time=None, end_time=None, empty_string='')

    # Replace all sil and sp intervals with <sil> tag
    #store these intervals to a list so that we can add them to the other tiers
    sil_intervals = []
    phone_intervals = []
    for interval in phones_tier_copy:
        if interval.text == '':
            interval.text = 'sil'
            sil_intervals.append(interval)
        else:
            phone_intervals.append(interval)

    final_phone_tier.add_annotations(phone_intervals)
    final_phone_tier.add_annotations(sil_intervals)

    final_phone_tier.name = phones_tier_name
    new_word_tier.name = words_tier_name

    new_tg.add_tier(new_word_tier)
    new_tg.add_tier(final_phone_tier)

    tgt.write_to_file(new_tg,
                      os.path.join(output3_path, batch_title + '.TextGrid'),
                      format='short')
        for j in intervals:
            if i == j.text:
                intervals.remove(j)
                break
    # intervals = [interval for interval in intervals if interval.text not in string.ascii_uppercase[:4]]

    return intervals

def standardize(word):
    word = re.sub(r'\[.+?\]\s*', '', word)
    return word

#handle the textgrid file
files = [file for file in os.listdir(folder) if file.endswith(".TextGrid")]
for f in files:
    tg = tgt.read_textgrid(f)
    tier = tg.get_tier_by_name('words')
    #get intervals
    intervals = [i for i in tier.intervals if i.text != 'sp' and i.text != 'sil']
    intervals = remove_redundant(intervals)

    #match the file
    index_json = match_file(f[:len(f)-9])

    #update json
    json_type = json.loads(json_list[index_json])
    count = 0
    print f
    for i in json_type["imgs"]:
        for j in i["texts"]:
            words = j["content"].split(" ")
Example #36
0
    def setUp(self):
        self.f = open_streamframe_from_xiofile(
            "data/fseeksmaller.xio.gz",
            "lab-labtop/irioKinect 2",
            window_size=5,
            with_fields=[],
            without_fields=[],
            discard_duplicates=True,
            start_time=0,
            end_time=13,
            relative=True,
            timestamp_offset=10,
        )

        self.f2 = open_streamframe_from_xiofile(
            "data/fseeksmaller.xio.gz",
            "lab-labtop/irioKinect",
            window_size=5,
            with_fields=[],
            without_fields=[],
            discard_duplicates=True,
            start_time=0,
            end_time=13,
            relative=True,
            timestamp_offset=10,
        )
        # self.outtake_from_stream = self.f.ix[1341393414826]['framenumber']
        self.fraw = open_streamframe_from_xiofile(
            "data/fseeksmaller.xio.gz",
            "lab-labtop/irioKinect 2",
            window_size=5,
            with_fields=[],
            without_fields=[],
            discard_duplicates=True,
            start_time=0,
            end_time=13,
            relative=True,
            timestamp_offset="raw",
        )
        self.ff = open_streamframe_from_xiofile(
            "data/fseeksmaller.xio.gz",
            "lab-labtop/irioKinect 2",
            window_size=5,
            with_fields=[],
            without_fields=[],
            discard_duplicates=True,
            start_time=0,
            end_time=13,
            relative=True,
        )

        save_streamframe_to_xiofile({"lab-labtop/irioKinect 2": self.f}, "data/sf_to_xio.xio.gz")

        save_streamframe_to_xiofile(
            {"lab-labtop/irioKinect 2": self.f, "lab-labtop/irioKinect": self.f2}, "data/sf_to_xio2.xio.gz"
        )

        self.rsn = open_streamframe_from_xiofile("data/fseeksmaller.xio.gz", "wrong/sensor/name")

        self.outtake_from_xio = XIOFile("data/sf_to_xio.xio.gz", indexing=True)

        self.outtake_from_xio_2 = XIOFile("data/sf_to_xio2.xio.gz", indexing=True)

        self.q = [
            ex for ex in quantize(self.outtake_from_xio.xio_quicklinegen(0, 13, True, True), "lab-labtop/irioKinect 2")
        ][0]["soundAngle"]

        self.ivf = open_intervalframe_from_textgrid(
            "data/r1_12_15with" "Point.TextGrid", encoding="utf-8", asobjects=False, include_empty_intervals=False
        )["P"]
        self.cv = convert_pointtier_to_streamframe(self.ivf)
        self.pf = convert_streamframe_to_pointtier(self.f)
        self.outtake_from_pf = convert_streamframe_to_pointtier(self.f)["soundAngle"].ix[0]["time"]

        self.if_from_tg = open_intervalframe_from_textgrid(
            "data/r1-20120704-" "cam1-head-zm.TextGrid",
            encoding="utf-8",
            asobjects=False,
            include_empty_intervals=False,
        )

        self.if_from_tg_tier = self.if_from_tg.values()[0]

        save_intervalframe_to_textgrid(self.if_from_tg, "data/testif.TextGrid", encoding="utf-8")
        self.tg = tgt.read_textgrid("data/testif.TextGrid", encoding="utf-8", include_empty_intervals=False)

        self.ic1 = open_intervalframe_from_increco("data/test.inc_reco")

        self.ic2 = open_intervalframe_from_increco("data/test.inc_reco", lastonly=True)
Example #37
0
def main(wav_path, outfile_path, speech_path=None, speech_tier_name=None):

    if speech_path is not None and speech_path is not None:
        tg = tgt.read_textgrid(speech_path)
        speech = tg.get_tier_by_name(speech_tier_name)
    else:
        speech = None

    # Read the respiratory signal, detrend it, identify cycles and
    # holds.
    resp = rip.RIP.from_wav(wav_path, speech=speech)
    resp.remove_baseline()
    resp.find_cycles(include_holds=True)
    resp.find_holds()
    resp.estimate_range()
    resp.estimate_rel(30)
    resp.save_annotations('breath.TextGrid')

    fname = os.path.splitext(os.path.basename(wav_path))[0]

    # For each inhalation and exhalation, extract the respiratory
    # cycles.

    features = []
    for i, seg in enumerate(resp.segments):

        start = seg.start_time
        end = seg.end_time
        label = seg.text

        features_seg = {
            'file': fname,
            'start': start,
            'end': end,
            'segment': label
        }
        # Odd-numbered rows correspond to inhalations and even-numbered
        # rows correspond to exhalations.
        if label == 'out':
            cycle_start = resp.segments[i - 1].start_time
            features_seg['duty_cycle'] = (end - start) / (end - cycle_start)
        else:
            cycle_end = resp.segments[i + 1].end_time
            features_seg['duty_cycle'] = (end - start) / (cycle_end - start)
        features_seg['duration'] = end - start
        features_seg['slope'] = resp.extract_slope(start, end)
        features_seg['amplitude'] = resp.extract_amplitude(start, end)
        features_seg['vol_start'] = resp.extract_level(start)
        features_seg['vol_end'] = resp.extract_level(end)
        holds = resp.holds.get_annotations_between_timepoints(
            start, end, left_overlap=True, right_overlap=True)
        features_seg['nholds'] = len(holds)

        if len(holds):
            holds_dur = sum(h.end_time - h.start_time for h in holds)
            holds_dur -= max(0, start - holds[0].start_time)
            holds_dur -= max(0, holds[-1].end_time - end)
        else:
            holds_dur = 0

        if speech is not None:

            # Extract: time lag, interval before and after: duration,
            # start, end level, slope
            pass

        features.append(features_seg)

    with open(outfile_path, 'w') as fout:
        csv_out = csv.DictWriter(fout, fieldnames=features[0].keys())
        csv_out.writeheader()
        csv_out.writerows(features)
Example #38
0
def duration(path, C_list, V_list, cid):
    #file_list = glob.glob(path + r"\*\sent\*.TextGrid")  # glob匹配所有的符合条件的文件,并将以list的形式返回
    file_list = glob.glob(path + r"\sent\*.TextGrid")  # glob匹配所有的符合条件的文件,并将以list的形式返回
    #print(file_list)
    AlldeltS = []  # 依次计算 每一个 textgrid 的 结果值 把结果存在 总列表中
    all_vs = []
    all_rpvis = []  # 可能有点问题 每次累加进去 一个值 但是 不清空
    all_npvis = []
    all_ms = []
    for file in file_list:
        TextGrid = tgt.read_textgrid(file, include_empty_intervals=True)  # 依次读取TextGrid文件
        if cid == 'jp':
            tier = TextGrid.get_tier_by_name(TextGrid.get_tier_names()[0])
            #print(tier)
        if cid == 'cn':
            tier = TextGrid.get_tier_by_name(TextGrid.get_tier_names()[0])
            #print(tier)
        elif cid == 'ru':
            tier = TextGrid.get_tier_by_name(TextGrid.get_tier_names()[1])  # 根据 tier的 name/位置 读取 intervals
            #print(tier)
        # tier = TextGrid.get_tier_by_name('SY')
        tier_name = TextGrid.get_tier_names()  # 获取全部的tier 名字
        start = tier.start_time
        end = tier.end_time
        start_syl = tier.start_time
        end_syl = tier.end_time
        tier2insert = tgt.IntervalTier(start, end, name='CV')  # 获取起始点和 终点 插入一条 CV的 intervals
        TextGrid.insert_tier(tier2insert, 3)
        CV = TextGrid.get_tier_by_name('CV')
        annotation = tier.intervals  # 插入一个 intervals
        #syllable = tier_syll.intervals
        num = []
        S_duration = []  # syllable_duration
        duration_all_S = 0  # 全部时长和 (用于计算 %V 和 其他相关参数)

        for i in range(len(annotation)):  # 循环 替换 和 计算 时长
            old_name = annotation[i].text
            old_start_time = annotation[i].start_time
            old_end_time = annotation[i].end_time
            duration = old_end_time - old_start_time
            #if old_name in C_list:  # 判断 属于 C / V
            if old_name != 'sil':
                new_name = 'S'
            # elif old_name in V_list:  # 判断 属于 C / V
            #     new_name = 'S'
            else:
                new_name = 'none'

            # print(old_name, new_name)
            Interval = tgt.Interval(old_start_time, old_end_time, text=new_name)  # interval格式- 依次填写
            # print(old_name, new_name, 'duration=', duration)
            if new_name == 'S':
                S_duration.append(duration)  # 加入 duration 的list
                duration_all_S = duration_all_S + duration

            CV.add_interval(Interval)  # 将 intervals 的标注 >> 到 textgrid
        #print(file, S_duration)
        mean_syl = duration_all_S/len(S_duration)
        #print(mean_syl)
        vacS = duration_all_S / len(S_duration)
        # print(num)
        # if num > 0:
        # mean_syl = a / (len(C_duration) + len(V_duration)) # 计算一个 mean_syllable duration 用于 语速
        # print(mean_syl)
        # mean_syl = a/(len(C_duration)+len(V_duration))
        # print(mean_syl)
        #       vacroC = round(deltaC(C_duration) / mean_syl * 100, 4)
        #       vacroV = round(deltaC(V_duration) / mean_syl * 100, 4)
        vacroS = round(deltaS(S_duration) / vacS * 100, 4)
        # print(file, ',',
        #
        #       deltaS(S_duration), ',',
        #
        #       vacroS, ',',
        #
        #       rPVI_s(S_duration), ',',
        #
        #       nPVI_S(S_duration), ',',
        #       )
        #print(nPVI_S(S_duration))
        AlldeltS.append(deltaS(S_duration))
        all_vs.append(vacroS)
        all_rpvis.append(rPVI_s(S_duration))
        all_npvis.append(nPVI_S(S_duration))
        all_ms.append(mean_syl)
    deltS = round(np.mean(AlldeltS), 9)
    vs = round(np.mean(all_vs), 9)
    rpvis = round(np.mean(all_rpvis), 9)
    npvis = round(np.mean(all_npvis), 9)
    ms = round(np.mean(all_ms), 9)

    #
    print(path, ',',
          ms, ',',
          deltS, ',',
          vs, ',',
          rpvis, ',',
          npvis, ',',
          )