def tga_to_tier_reglin(tga_result, timegroups, intercept=True): """Create tiers of intercept,slope from one of the TGA result. :param tga_result: One of the results of TGA :param timegroups: (sppasTier) Time groups :param intercept: (boolean) Export the intercept. If False, export Slope. :returns: (sppasTier) """ if intercept is True: tier = sppasTier('TGA-Intercept') else: tier = sppasTier('TGA-Slope') for tg_ann in timegroups: tg_label = tg_ann.serialize_labels() loc = tg_ann.get_location().copy() if intercept is True: tag_value = tga_result[tg_label][0] else: tag_value = tga_result[tg_label][1] tag_value = round(tag_value, 5) tier.create_annotation(loc, sppasLabel(sppasTag(tag_value, "float"))) return tier
def test_write_text_tracks(self): """Write tokenization and phonetization into separated track files.""" l1 = sppasLabel([sppasTag("j"), sppasTag("S")]) l2 = sppasLabel([sppasTag("e"), sppasTag("E")]) tier_phn = sppasTier("phonemes") tier_phn.create_annotation(sppasLocation(sppasPoint(1)), [l1, l2]) tier_phn.create_annotation(sppasLocation(sppasPoint(2)), sppasLabel(sppasTag("j-e s-H-i"))) tier_tok = sppasTier("tokens") tier_tok.create_annotation(sppasLocation(sppasPoint(1)), sppasLabel(sppasTag("j' ai"))) tier_tok.create_annotation(sppasLocation(sppasPoint(2)), sppasLabel(sppasTag('je suis'))) with self.assertRaises(SizeInputsError): TracksWriter._write_text_tracks(tier_phn, sppasTier('toto'), TEMP) dir_tracks = os.path.join(TEMP, "test_write_text_tracks_1") os.mkdir(dir_tracks) TracksWriter._write_text_tracks(tier_phn, None, dir_tracks) created_files = os.listdir(dir_tracks) self.assertEqual(4, len(created_files)) lines = list() for fn in created_files: with codecs.open(os.path.join(dir_tracks, fn), "r", sg.__encoding__) as fp: new_lines = fp.readlines() fp.close() self.assertEqual(1, len(new_lines)) lines.append(new_lines[0]) self.assertTrue("w_1 w_2" in lines) self.assertTrue("{j|S} {e|E}" in lines) self.assertTrue("j-e s-H-i" in lines) dir_tracks = os.path.join(TEMP, "test_write_text_tracks_2") os.mkdir(dir_tracks) TracksWriter._write_text_tracks(tier_phn, tier_tok, dir_tracks) created_files = os.listdir(dir_tracks) self.assertEqual(4, len(created_files)) lines = list() for fn in created_files: with codecs.open(os.path.join(dir_tracks, fn), "r", sg.__encoding__) as fp: new_lines = fp.readlines() fp.close() self.assertEqual(1, len(new_lines)) lines.append(new_lines[0]) self.assertTrue("j' ai" in lines) self.assertTrue("je suis" in lines) self.assertTrue("{j|S} {e|E}" in lines) self.assertTrue("j-e s-H-i" in lines)
def create_time_tier(self, begin, end, tier_name="MetaInformation"): """Create a tier with activated information as annotations. :param begin: (float) Begin midpoint value :param end: (float) End midpoint value :param tier_name: (str) Name of the tier to create :returns: sppasTier """ active_keys = self.keys_enabled() if len(active_keys) == 0: return None tier_dur = float(end) - float(begin) ann_dur = round(tier_dur / float(len(active_keys)), 3) tier = sppasTier(tier_name) ann_begin = round(begin, 3) ann_end = begin + ann_dur for key in active_keys: value = self.get_metainfo(key) tag = sppasTag(key + "=" + value) tier.create_annotation( sppasLocation( sppasInterval(sppasPoint(ann_begin), sppasPoint(ann_end))), sppasLabel(tag)) ann_begin = ann_end ann_end = ann_begin + ann_dur tier[-1].get_location().get_best().set_end(sppasPoint(end)) return tier
def read_aligned_tracks(dir_name): """Read a set of alignment files and set as tiers. :param dir_name: (str) input directory containing a set of units :return: PhonAlign, TokensAlign """ # Read the time values of each track from a file units = ListOfTracks.read(dir_name) # Check if the directory exists if os.path.exists(dir_name) is False: raise NoDirectoryError(dirname=dir_name) # Create new tiers tier_phn = sppasTier("PhonAlign") tier_tok = sppasTier("TokensAlign") tier_pron = sppasTier("PronTokAlign") # Explore each unit to get alignments track_number = 1 for unit_start, unit_end in units: # Fix filename to read, and load the content basename = \ TrackNamesGenerator.align_filename(dir_name, track_number) try: _phons, _words, _prons = AlignerIO.read_aligned(basename) except IOError: _phons, _words, _prons = [], [], [] # Append alignments in tiers TracksReader._add_aligned_track_into_tier( tier_phn, _phons, unit_start, unit_end) TracksReader._add_aligned_track_into_tier( tier_tok, _words, unit_start, unit_end) TracksReader._add_aligned_track_into_tier( tier_pron, _prons, unit_start, unit_end) track_number += 1 return tier_phn, tier_tok, tier_pron
def test_syllabify_interval(self): """... Perform the syllabification of one interval.""" expected = sppasTier('Expected') expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(1), sppasPoint(3))), sppasLabel(sppasTag('l-@'))) syllables = sppasTier('SyllAlign') self.syll.syllabify_interval(self.tier, 0, 1, syllables) self.assertEqual(len(expected), len(syllables)) for a1, a2 in zip(expected, syllables): self.assertEqual(a1, a2) expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(17), sppasPoint(18))), sppasLabel(sppasTag('E'))) expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(18), sppasPoint(19))), sppasLabel(sppasTag('o'))) self.syll.syllabify_interval(self.tier, 13, 15, syllables) self.assertEqual(len(expected), len(syllables)) for a1, a2 in zip(expected, syllables): self.assertEqual(a1, a2)
def setUp(self): # Create tiers self.tierP = sppasTier("PointTier") self.tierI = sppasTier("IntervalTier") for i in range(8): self.tierP.create_annotation( sppasLocation(sppasPoint(i)), sppasLabel(sppasTag(str(i)))) self.tierI.create_annotation( sppasLocation( sppasInterval(sppasPoint(i), sppasPoint(i+1))), sppasLabel(sppasTag(str(i*10)))) self.tierI.create_annotation( sppasLocation( sppasInterval(sppasPoint(9), sppasPoint(10))), sppasLabel(sppasTag("{quatre-vingts-dix|nonante}"))) # Create TierMapping self.map = sppasMappingTier() self.map.add("1", "un") self.map.add("2", "deux") self.map.add("3", "trois") self.map.add("4", "quatre") self.map.add("5", "cinq") self.map.add("6", "six") self.map.add("7", "sept") self.map.add("8", "huit") self.map.add("9", "neuf") self.map.add("10", "dix") self.map.add("20", "vingt") self.map.add("30", "trente") self.map.add("40", "quarante") self.map.add("50", "cinquante") self.map.add("60", "soixante") self.map.add("70", "septante") self.map.add("70", "soixante-dix") self.map.add("80", "octante") self.map.add("80", "quatre-vingts") self.map.set_delimiters((";", ",", " ", ".", "|"))
def test_tier_tga(self): tier = sppasTier("tier") tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(0., 0.), sppasPoint(1., 0.0))), sppasLabel(sppasTag('#'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(1., 0.), sppasPoint(2., 0.01))), sppasLabel(sppasTag('toto'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(3., 0.01), sppasPoint(4., 0.01))), sppasLabel(sppasTag('titi'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(4., 0.01), sppasPoint(5., 0.01)))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(5., 0.01), sppasPoint(6.5, 0.005))), sppasLabel(sppasTag('toto'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(6.5, 0.005), sppasPoint(9.5, 0.))), sppasLabel(sppasTag('toto'))) # test the timegroups tier tg = sppasTGA().syllables_to_timegroups(tier) self.assertEqual(3, len(tg)) # to be tested: # [1., 2.] tg_1 # [3.; 4.] tg_2 # [5.; 9.5] tg_3 ts = sppasTGA().syllables_to_timesegments(tier) self.assertEqual(3, len(ts)) # to be tested: # [1., 2.] toto # [3.; 4.] titi # [5.; 9.5] toto toto tg_dur = sppasTGA().timegroups_to_durations(tier, tg) self.assertEqual(3, len(tg_dur)) self.assertEqual([1.], tg_dur['tg_1']) self.assertEqual([1.], tg_dur['tg_2']) self.assertEqual([1.5, 3.0], tg_dur['tg_3']) tga = TimeGroupAnalysis(tg_dur) occurrences = tga.len() self.assertEqual(1, occurrences['tg_1']) self.assertEqual(1, occurrences['tg_2']) self.assertEqual(2, occurrences['tg_3']) total = tga.total() self.assertEqual(1.0, total['tg_1']) self.assertEqual(1.0, total['tg_2']) self.assertEqual(4.5, total['tg_3']) mean = tga.mean() self.assertEqual(1.0, mean['tg_1']) self.assertEqual(1.0, mean['tg_2']) self.assertEqual(2.25, mean['tg_3'])
def tracks_to_tier(tracks, end_time, vagueness): """Create a sppasTier object from tracks. :param tracks: (List of tuple) with (from, to) values in seconds :param end_time: (float) End-time of the tier :param vagueness: (float) vagueness used for silence search """ if len(tracks) == 0: raise IOError('No IPUs to write.\n') tier = sppasTier("IPUs") tier.set_meta('number_of_ipus', str(len(tracks))) i = 0 to_prec = 0. for (from_time, to_time) in tracks: if from_time == 0. or to_time == end_time: radius = 0. else: radius = vagueness / 2. # From the previous track to the current track: silence if to_prec < from_time: tier.create_annotation( sppasLocation( sppasInterval(sppasPoint(to_prec, radius), sppasPoint(from_time, radius))), sppasLabel(sppasTag(SIL_ORTHO))) # New track with speech tier.create_annotation( sppasLocation( sppasInterval(sppasPoint(from_time, radius), sppasPoint(to_time, radius))), sppasLabel(sppasTag("ipu_%d" % (i + 1)))) # Go to the next i += 1 to_prec = to_time # The end is a silence? Fill... begin = sppasPoint(to_prec, vagueness / 2.) if begin < end_time: tier.create_annotation( sppasLocation(sppasInterval(begin, sppasPoint(end_time))), sppasLabel(sppasTag(SIL_ORTHO))) return tier
def setUp(self): self.syll = sppasSyll() self.syll.load_resources(FRA_SYLL) tier = sppasTier('PhonAlign') tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(1), sppasPoint(2))), sppasLabel(sppasTag('l'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(2), sppasPoint(3))), sppasLabel(sppasTag('@'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(3), sppasPoint(4))), sppasLabel(sppasTag('#'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(4), sppasPoint(5))), sppasLabel(sppasTag('S'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(5), sppasPoint(6))), sppasLabel(sppasTag('A/'))) # hole [6,7] tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(7), sppasPoint(8))), sppasLabel(sppasTag('#'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(8), sppasPoint(9))), sppasLabel(sppasTag('e'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(9), sppasPoint(10))), sppasLabel(sppasTag('#'))) # hole [10,11] tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(11), sppasPoint(12))), sppasLabel(sppasTag('k'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(12), sppasPoint(13))), sppasLabel(sppasTag('2'))) # hole [13,14] tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(14), sppasPoint(15))), sppasLabel(sppasTag('p'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(15), sppasPoint(16))), sppasLabel(sppasTag('U~/'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(16), sppasPoint(17))), sppasLabel(sppasTag('#'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(17), sppasPoint(18))), sppasLabel(sppasTag('E'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(18), sppasPoint(19))), sppasLabel(sppasTag('o'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(19), sppasPoint(20))), sppasLabel(sppasTag('#'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(20), sppasPoint(21))), sppasLabel(sppasTag('g'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(21), sppasPoint(22))), sppasLabel(sppasTag('j'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(22), sppasPoint(23))), sppasLabel(sppasTag('i'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(23), sppasPoint(24))), sppasLabel(sppasTag('t'))) self.tier = tier
def test_create_tok_tier(self): """Create a tier with tokens like 'w_1 w_2...w_n' from phonemes.""" l1 = sppasLabel([sppasTag("j"), sppasTag("S")]) l2 = sppasLabel([sppasTag("e"), sppasTag("E")]) tier = sppasTier("phonemes") tier.create_annotation(sppasLocation(sppasPoint(1)), [l1, l2]) tier.create_annotation(sppasLocation(sppasPoint(2)), sppasLabel(sppasTag("{j|S} {e|E}"))) tok_tier = TracksWriter._create_tok_tier(tier) self.assertEqual(2, len(tok_tier)) content_a1 = tok_tier[0].get_best_tag().get_content() self.assertEqual("w_1 w_2", content_a1) content_a2 = tok_tier[1].get_best_tag().get_content() self.assertEqual("w_1 w_2", content_a2)
def test_phon_to_intervals(self): """... Create the intervals to be syllabified.""" test_tier = self.tier.copy() expected = sppasTier('Expected') expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(1), sppasPoint(3)))) expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(4), sppasPoint(6)))) expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(8), sppasPoint(9)))) expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(11), sppasPoint(13)))) expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(14), sppasPoint(16)))) expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(17), sppasPoint(19)))) expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(20), sppasPoint(24)))) intervals = sppasSyll._phon_to_intervals(test_tier) self.assertEqual(len(expected), len(intervals)) for a1, a2 in zip(expected, intervals): self.assertEqual(a1, a2) # add en empty interval at start test_tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(0), sppasPoint(1)))) intervals = sppasSyll._phon_to_intervals(test_tier) self.assertEqual(len(expected), len(intervals)) for a1, a2 in zip(expected, intervals): self.assertEqual(a1, a2) # add en empty interval at end test_tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(24), sppasPoint(25)))) intervals = sppasSyll._phon_to_intervals(test_tier) self.assertEqual(len(expected), len(intervals)) for a1, a2 in zip(expected, intervals): self.assertEqual(a1, a2) # silence at start test_tier[0].append_label(sppasLabel(sppasTag('#'))) intervals = sppasSyll._phon_to_intervals(test_tier) self.assertEqual(len(expected), len(intervals)) for a1, a2 in zip(expected, intervals): self.assertEqual(a1, a2) # silence at end test_tier[-1].append_label(sppasLabel(sppasTag('#'))) intervals = sppasSyll._phon_to_intervals(test_tier) self.assertEqual(len(expected), len(intervals)) for a1, a2 in zip(expected, intervals): self.assertEqual(a1, a2)
def make_classes(self, syllables): """ Create the tier with syllable classes. :param syllables: (sppasTier) """ classes = sppasTier("SyllClassAlign") classes.set_meta('syllabification_classes_of_tier', syllables.get_name()) for syll in syllables: location = syll.get_location().copy() syll_tag = syll.get_best_tag() class_tag = sppasTag( self.syllabifier.classes_phonetized( syll_tag.get_typed_content())) classes.create_annotation(location, sppasLabel(class_tag)) return classes
def convert(self, tier): """ Phonetize annotations of a tokenized tier. :param tier: (Tier) the orthographic transcription previously tokenized. :returns: (Tier) phonetized tier with name "Phones" """ if tier.is_empty() is True: raise EmptyInputError(name=tier.get_name()) phones_tier = sppasTier("Phones") for i, ann in enumerate(tier): self.print_message(MSG_TRACK.format(number=i + 1), indent=2) location = ann.get_location().copy() labels = list() # Normalize all labels of the orthographic transcription for label in ann.get_labels(): phonetizations = list() for text, score in label: if text.is_pause() or text.is_silence(): # It's in case the pronunciation dictionary # were not properly fixed. phonetizations.append(SIL) elif text.is_empty() is False: phones = self.phonetize(text.get_content(), i) for p in phones: phonetizations.extend(p.split(VARIANTS_SEPARATOR)) # New in SPPAS 1.9.6. # - The result is a sequence of labels. # - Variants are alternative tags. tags = [sppasTag(p) for p in set(phonetizations)] labels.append(sppasLabel(tags)) phones_tier.create_annotation(location, labels) return phones_tier
def tones_to_tier(tones, anchors_tier): """ Convert the INTSINT result into a tier. :param tones: (list) :param anchors_tier: (sppasTier) """ if len(tones) != len(anchors_tier): raise AnnDataEqError("tones:" + str(len(tones)), "anchors:" + str(len(anchors_tier))) tier = sppasTier("INTSINT") for tone, anchor_ann in zip(tones, anchors_tier): # Create the label tag = sppasTag(tone) # Create the location location = anchor_ann.get_location().copy() # Create the annotation tier.create_annotation(location, sppasLabel(tag)) return tier
def setUp(self): self.x = sppasAnnotation( sppasLocation( sppasInterval(sppasPoint(1., 0.), sppasPoint(2., 0.01))), sppasLabel(sppasTag('toto'))) self.y = sppasAnnotation( sppasLocation( sppasInterval(sppasPoint(3., 0.01), sppasPoint(4., 0.01))), sppasLabel(sppasTag('titi'))) self.a = sppasAnnotation( sppasLocation( sppasInterval(sppasPoint(5., 0.01), sppasPoint(6.5, 0.005))), sppasLabel(sppasTag('toto'))) self.b = sppasAnnotation( sppasLocation( sppasInterval(sppasPoint(6.5, 0.005), sppasPoint(9.5, 0.))), sppasLabel(sppasTag('toto'))) self.tier = sppasTier() self.tier.append(self.x) self.tier.append(self.y) self.tier.append(self.a) self.tier.append(self.b)
def convert(self, phonemes, intervals=None): """ Syllabify labels of a time-aligned phones tier. :param phonemes: (sppasTier) time-aligned phonemes tier :param intervals: (sppasTier) :returns: (sppasTier) """ if intervals is None: intervals = sppasSyll._phon_to_intervals(phonemes) syllables = sppasTier("SyllAlign") syllables.set_meta('syllabification_of_tier', phonemes.get_name()) for interval in intervals: # get the index of the phonemes containing the begin of the interval start_phon_idx = phonemes.lindex(interval.get_lowest_localization()) if start_phon_idx == -1: start_phon_idx = phonemes.mindex(interval.get_lowest_localization(), bound=-1) # get the index of the phonemes containing the end of the interval end_phon_idx = phonemes.rindex(interval.get_highest_localization()) if end_phon_idx == -1: end_phon_idx = phonemes.mindex(interval.get_highest_localization(), bound=1) # syllabify within the interval if start_phon_idx != -1 and end_phon_idx != -1: self.syllabify_interval(phonemes, start_phon_idx, end_phon_idx, syllables) else: self.print_message(MSG_INVALID.format(interval), indent=2, status=WARNING_ID) return syllables
def tga_to_tier(tga_result, timegroups, tier_name, tag_type="float"): """Create a tier from one of the TGA result. :param tga_result: One of the results of TGA :param timegroups: (sppasTier) Time groups :param tier_name: (str) Name of the output tier :param tag_type: (str) Type of the sppasTag to be included :returns: (sppasTier) """ tier = sppasTier(tier_name) for tg_ann in timegroups: tg_label = tg_ann.serialize_labels() tag_value = tga_result[tg_label] if tag_type == "float": tag_value = round(tag_value, 5) tier.create_annotation(tg_ann.get_location().copy(), sppasLabel(sppasTag(tag_value, tag_type))) return tier
def map_tier(self, tier): """Run the mapping process on an input tier. :param tier: (sppasTier) The tier instance to map label symbols. :returns: a new tier """ # Create the output tier new_tier = sppasTier(tier.get_name() + "-map") new_tier.set_media(tier.get_media()) for key in tier.get_meta_keys(): if key != 'id': new_tier.set_meta(key, tier.get_meta(key)) new_tier.set_meta('tier_was_mapped_from', tier.get_name()) # if no annotations if len(tier) == 0: return new_tier # always map, even if empty mapping table: it will copy annotations for ann in tier: new_tier.add(self.map_annotation(ann)) return new_tier
def get_tier(self, trs): """Create and return the activity tier. :param trs: (sppasTranscription) a Transcription containing a tier with exactly the name 'TokensAlign'. :returns: sppasTier :raises: NoInputError """ new_tier = sppasTier('Activity') activity = "<INIT>" # initial activity tokens_tier = sppasFindTier.aligned_tokens(trs) if tokens_tier.is_empty(): return new_tier tokens = fill_gaps(tokens_tier, trs.get_min_loc(), trs.get_max_loc()) if len(tokens) == 0: return new_tier if len(tokens) == 1: new_tier.create_annotation( tokens[0].get_location().copy(), sppasLabel(sppasTag(self.fix_activity(tokens[0])))) return new_tier for ann in tokens: new_activity = self.fix_activity(ann) # The activity has changed if activity != new_activity and activity != "<INIT>": if len(new_tier) == 0: begin = tokens.get_first_point().copy() else: begin = new_tier.get_last_point().copy() new_tier.create_annotation( sppasLocation( sppasInterval(begin, ann.get_lowest_localization())), sppasLabel(sppasTag(activity))) # In any case, update current activity activity = new_activity # last registered activity (we ignored it) if len(new_tier) == 0: # we observed only one activity... new_tier.create_annotation( sppasLocation( sppasInterval(tokens.get_first_point(), tokens.get_last_point())), sppasLabel(sppasTag(activity))) else: if new_tier.get_last_point() < tokens.get_last_point(): new_tier.create_annotation( sppasLocation( sppasInterval(new_tier.get_last_point(), tokens.get_last_point())), sppasLabel(sppasTag(activity))) new_tier = unfill_gaps(new_tier) new_tier.set_name('Activity') return new_tier