def test_multiple_medial_pauses(self): words = self.words[:] words[5:] = [ Pause('<SIL>', 0.91, 1.0), Pause('<SIL>', 1.0, 1.42), Word('mat', 1.42, 1.7, ['m', 'ae', 't'], ['m', 'ae', 't']) ] utterances = list(words_to_utterances(words)) assert_equal(len(utterances), 2) for i, word in enumerate(utterances[0]): assert_equal(words[i].beg, word.beg) assert_equal(words[i].end, word.end) assert_equal(words[i].orthography, word.orthography) assert_equal(words[i].phonemic, word.phonemic) assert_equal(words[i].phonetic, word.phonetic) for i, word in enumerate(utterances[1], 7): assert_equal(words[i].beg, word.beg) assert_equal(words[i].end, word.end) assert_equal(words[i].orthography, word.orthography) assert_equal(words[i].phonemic, word.phonemic) assert_equal(words[i].phonetic, word.phonetic)
def test_sep_arg(self): words = self.words[:] words[5:] = [ Pause('<SIL>', 0.91, 1.0), Word('mat', 1.0, 1.28, ['m', 'ae', 't'], ['m', 'ae', 't']) ] utterances = list(words_to_utterances(words, sep=0.08)) assert_equal(len(utterances), 2) assert_equal(len(utterances[0]), 5) assert_equal(len(utterances[1]), 1) for i, word in enumerate(utterances[0]): assert_equal(words[i].beg, word.beg) assert_equal(words[i].end, word.end) assert_equal(words[i].orthography, word.orthography) assert_equal(words[i].phonemic, word.phonemic) assert_equal(words[i].phonetic, word.phonetic) assert_equal(words[6].beg, utterances[1][0].beg) assert_equal(words[6].end, utterances[1][0].end) assert_equal(words[6].orthography, utterances[1][0].orthography) assert_equal(words[6].phonemic, utterances[1][0].phonemic) assert_equal(words[6].phonetic, utterances[1][0].phonetic)
def prep_BUCKEYE(speaker_list): track_durations = [] word_durations = [] utterance_durations = [] phone_durations = [] all_words = [] all_phonemic = [] all_phonetic = [] all_pos = [] for speaker_loc in tqdm(speaker_list): speaker_words = [] speaker_phonemic = [] speaker_phonetic = [] speaker_pos = [] speaker = buckeye.Speaker.from_zip(speaker_loc, load_wavs=True) for track in tqdm([track for track in speaker], leave=False): track_durations.append(track.words[-1].end - track.words[0].beg) for utterance in words_to_utterances(track.words): utterance_durations.append(utterance.dur) words = [] phonemic = [] phonetic = [] pos = [] for word in utterance: if hasattr(word, "phonemic"): # if this is not a pause, etc words.append(word.orthography) word_durations.append(word.dur) if word.phonemic is not None: phonemic.append(word.phonemic) if word.phonetic is not None: phonetic.append(word.phonetic) if word.phonemic is not None: for phone in word.phones: phone_durations.append(phone.dur) pos.append(word.pos) speaker_words.append(words) speaker_phonemic.append(phonemic) speaker_phonetic.append(phonetic) speaker_pos.append(pos) all_words.append(speaker_words) all_phonemic.append(speaker_phonemic) all_phonetic.append(speaker_phonetic) all_pos.append(speaker_pos) return ( track_durations, word_durations, utterance_durations, phone_durations, all_words, all_phonemic, all_phonetic, all_pos, )
def test_final_pause_no_strip(self): final_pause = Pause('<SIL>', 1.19, 1.25) words = self.words + [final_pause] utterances = list(words_to_utterances(words, strip_pauses=False)) assert_equal(len(utterances), 1) assert_equal(len(utterances[0]), 7) for i, word in enumerate(utterances[0]): assert_equal(words[i].beg, word.beg) assert_equal(words[i].end, word.end)
def test_initial_pause_no_strip(self): initial_pause = Pause('<SIL>', 0.0, 0.05) words = [initial_pause] + self.words utterances = list(words_to_utterances(words, strip_pauses=False)) assert_equal(len(utterances), 1) assert_equal(len(utterances[0]), 7) for i, word in enumerate(utterances[0]): assert_equal(words[i].beg, word.beg) assert_equal(words[i].end, word.end)
def test_short_medial_pause(self): words = self.words[:] words[5:] = [Pause('<SIL>', 0.91, 1.0), Word('mat', 1.0, 1.28, ['m', 'ae', 't'], ['m', 'ae', 't'])] utterances = list(words_to_utterances(words)) assert_equal(len(utterances), 1) assert_equal(words[5].entry, utterances[0][5].entry) for i, word in enumerate(utterances[0]): assert_equal(words[i].beg, word.beg) assert_equal(words[i].end, word.end)
def test_short_medial_pause(self): words = self.words[:] words[5:] = [ Pause('<SIL>', 0.91, 1.0), Word('mat', 1.0, 1.28, ['m', 'ae', 't'], ['m', 'ae', 't']) ] utterances = list(words_to_utterances(words)) assert_equal(len(utterances), 1) assert_equal(words[5].entry, utterances[0][5].entry) for i, word in enumerate(utterances[0]): assert_equal(words[i].beg, word.beg) assert_equal(words[i].end, word.end)
def test_multiple_short_medial_pauses(self): words = self.words[:] words[3:] = [Pause('<VOCNOISE>', 0.59, 0.65), Word('on', 0.65, 0.77, ['aa', 'n'], ['aa', 'n']), self.words[4], Pause('<SIL>', 0.91, 1.35), Word('mat', 1.35, 1.63, ['m', 'ae', 't'], ['m', 'ae', 't'])] utterances = list(words_to_utterances(words)) assert_equal(len(utterances), 1) assert_equal(words[3].entry, utterances[0][3].entry) assert_equal(words[6].entry, utterances[0][6].entry) for i, word in enumerate(utterances[0]): assert_equal(words[i].beg, word.beg) assert_equal(words[i].end, word.end)
def test_medial_pause_no_strip(self): # insert a pause into a copy of the word list words = self.words[:] words[5:] = [ Pause('<SIL>', 0.91, 1.42), Word('mat', 1.42, 1.7, ['m', 'ae', 't'], ['m', 'ae', 't']) ] utterances = list(words_to_utterances(words, strip_pauses=False)) assert_equal(len(utterances), 2) assert_equal(len(utterances[0]), 6) assert_equal(len(utterances[1]), 1) for i, word in enumerate(utterances[0]): assert_equal(words[i].beg, word.beg) assert_equal(words[i].end, word.end)
def test_multiple_short_medial_pauses(self): words = self.words[:] words[3:] = [ Pause('<VOCNOISE>', 0.59, 0.65), Word('on', 0.65, 0.77, ['aa', 'n'], ['aa', 'n']), self.words[4], Pause('<SIL>', 0.91, 1.35), Word('mat', 1.35, 1.63, ['m', 'ae', 't'], ['m', 'ae', 't']) ] utterances = list(words_to_utterances(words)) assert_equal(len(utterances), 1) assert_equal(words[3].entry, utterances[0][3].entry) assert_equal(words[6].entry, utterances[0][6].entry) for i, word in enumerate(utterances[0]): assert_equal(words[i].beg, word.beg) assert_equal(words[i].end, word.end)
def test_multiple_medial_pauses_no_strip(self): words = self.words[:] words[5:] = [ Pause('<SIL>', 0.91, 1.0), Pause('<SIL>', 1.0, 1.42), Word('mat', 1.42, 1.7, ['m', 'ae', 't'], ['m', 'ae', 't']) ] utterances = list(words_to_utterances(words, strip_pauses=False)) assert_equal(len(utterances), 2) assert_equal(len(utterances[0]), 7) assert_equal(len(utterances[1]), 1) for i, word in enumerate(utterances[0]): assert_equal(words[i].beg, word.beg) assert_equal(words[i].end, word.end) for i, word in enumerate(utterances[1], 7): assert_equal(words[i].beg, word.beg) assert_equal(words[i].end, word.end)
def test_medial_pause(self): # insert a pause into a copy of the word list words = self.words[:] words[5:] = [Pause('<SIL>', 0.91, 1.42), Word('mat', 1.42, 1.7, ['m', 'ae', 't'], ['m', 'ae', 't'])] utterances = list(words_to_utterances(words)) assert_equal(len(utterances), 2) assert_equal(len(utterances[0]), 5) assert_equal(len(utterances[1]), 1) for i, word in enumerate(utterances[0]): assert_equal(words[i].beg, word.beg) assert_equal(words[i].end, word.end) assert_equal(words[6].beg, utterances[1][0].beg) assert_equal(words[6].end, utterances[1][0].end) assert_equal(words[6].orthography, utterances[1][0].orthography) assert_equal(words[6].phonemic, utterances[1][0].phonemic) assert_equal(words[6].phonetic, utterances[1][0].phonetic)
def test_multiple_medial_pauses(self): words = self.words[:] words[5:] = [Pause('<SIL>', 0.91, 1.0), Pause('<SIL>', 1.0, 1.42), Word('mat', 1.42, 1.7, ['m', 'ae', 't'], ['m', 'ae', 't'])] utterances = list(words_to_utterances(words)) assert_equal(len(utterances), 2) for i, word in enumerate(utterances[0]): assert_equal(words[i].beg, word.beg) assert_equal(words[i].end, word.end) assert_equal(words[i].orthography, word.orthography) assert_equal(words[i].phonemic, word.phonemic) assert_equal(words[i].phonetic, word.phonetic) for i, word in enumerate(utterances[1], 7): assert_equal(words[i].beg, word.beg) assert_equal(words[i].end, word.end) assert_equal(words[i].orthography, word.orthography) assert_equal(words[i].phonemic, word.phonemic) assert_equal(words[i].phonetic, word.phonetic)
def test_medial_pause(self): # insert a pause into a copy of the word list words = self.words[:] words[5:] = [ Pause('<SIL>', 0.91, 1.42), Word('mat', 1.42, 1.7, ['m', 'ae', 't'], ['m', 'ae', 't']) ] utterances = list(words_to_utterances(words)) assert_equal(len(utterances), 2) assert_equal(len(utterances[0]), 5) assert_equal(len(utterances[1]), 1) for i, word in enumerate(utterances[0]): assert_equal(words[i].beg, word.beg) assert_equal(words[i].end, word.end) assert_equal(words[6].beg, utterances[1][0].beg) assert_equal(words[6].end, utterances[1][0].end) assert_equal(words[6].orthography, utterances[1][0].orthography) assert_equal(words[6].phonemic, utterances[1][0].phonemic) assert_equal(words[6].phonetic, utterances[1][0].phonetic)
def test_sep_arg(self): words = self.words[:] words[5:] = [Pause('<SIL>', 0.91, 1.0), Word('mat', 1.0, 1.28, ['m', 'ae', 't'], ['m', 'ae', 't'])] utterances = list(words_to_utterances(words, sep=0.08)) assert_equal(len(utterances), 2) assert_equal(len(utterances[0]), 5) assert_equal(len(utterances[1]), 1) for i, word in enumerate(utterances[0]): assert_equal(words[i].beg, word.beg) assert_equal(words[i].end, word.end) assert_equal(words[i].orthography, word.orthography) assert_equal(words[i].phonemic, word.phonemic) assert_equal(words[i].phonetic, word.phonetic) assert_equal(words[6].beg, utterances[1][0].beg) assert_equal(words[6].end, utterances[1][0].end) assert_equal(words[6].orthography, utterances[1][0].orthography) assert_equal(words[6].phonemic, utterances[1][0].phonemic) assert_equal(words[6].phonetic, utterances[1][0].phonetic)
def test_words_to_utterances(self): utterances = list(words_to_utterances(self.words)) yield self.check_expected, utterances
def test_final_pause(self): final_pause = Pause('<SIL>', 1.19, 1.25) utterances = list(words_to_utterances(self.words + [final_pause])) yield self.check_expected, utterances
def test_initial_pause(self): initial_pause = Pause('<SIL>', 0.0, 0.05) utterances = list(words_to_utterances([initial_pause] + self.words)) yield self.check_expected, utterances