def test_write_phonemes(self): """Write the phonetization of a track in a file.""" # test to write an annotation with complex labels l1 = sppasLabel([sppasTag("j"), sppasTag("S")]) l2 = sppasLabel([sppasTag("e"), sppasTag("E")]) ann = sppasAnnotation(sppasLocation(sppasPoint(1)), [l1, l2]) TracksWriter._write_phonemes(ann, TEMP, 1) fn = os.path.join(TEMP, "track_000001.phn") self.assertTrue(os.path.exists(fn)) with codecs.open(fn, "r", sg.__encoding__) as fp: lines = fp.readlines() fp.close() self.assertEqual(1, len(lines)) self.assertEqual("{j|S} {e|E}", lines[0]) # test to write an annotation with already serialized labels sentence = "A serialized list of {labels|tags}" ann = sppasAnnotation( sppasLocation(sppasPoint(1)), sppasLabel(sppasTag(sentence))) TracksWriter._write_phonemes(ann, TEMP, 2) fn = os.path.join(TEMP, "track_000002.phn") self.assertTrue(os.path.exists(fn)) with codecs.open(fn, "r", sg.__encoding__) as fp: lines = fp.readlines() fp.close() self.assertEqual(1, len(lines)) self.assertEqual(sentence, lines[0])
def create_time_tier(self, begin, end, tier_name="MetaInformation"): """Create a tier with activated information as annotations. :param begin: (float) Begin midpoint value :param end: (float) End midpoint value :param tier_name: (str) Name of the tier to create :returns: sppasTier """ active_keys = self.keys_enabled() if len(active_keys) == 0: return None tier_dur = float(end) - float(begin) ann_dur = round(tier_dur / float(len(active_keys)), 3) tier = sppasTier(tier_name) ann_begin = round(begin, 3) ann_end = begin + ann_dur for key in active_keys: value = self.get_metainfo(key) tag = sppasTag(key + "=" + value) tier.create_annotation( sppasLocation( sppasInterval(sppasPoint(ann_begin), sppasPoint(ann_end))), sppasLabel(tag)) ann_begin = ann_end ann_end = ann_begin + ann_dur tier[-1].get_location().get_best().set_end(sppasPoint(end)) return tier
def test_write_text_tracks(self): """Write tokenization and phonetization into separated track files.""" l1 = sppasLabel([sppasTag("j"), sppasTag("S")]) l2 = sppasLabel([sppasTag("e"), sppasTag("E")]) tier_phn = sppasTier("phonemes") tier_phn.create_annotation(sppasLocation(sppasPoint(1)), [l1, l2]) tier_phn.create_annotation(sppasLocation(sppasPoint(2)), sppasLabel(sppasTag("j-e s-H-i"))) tier_tok = sppasTier("tokens") tier_tok.create_annotation(sppasLocation(sppasPoint(1)), sppasLabel(sppasTag("j' ai"))) tier_tok.create_annotation(sppasLocation(sppasPoint(2)), sppasLabel(sppasTag('je suis'))) with self.assertRaises(SizeInputsError): TracksWriter._write_text_tracks(tier_phn, sppasTier('toto'), TEMP) dir_tracks = os.path.join(TEMP, "test_write_text_tracks_1") os.mkdir(dir_tracks) TracksWriter._write_text_tracks(tier_phn, None, dir_tracks) created_files = os.listdir(dir_tracks) self.assertEqual(4, len(created_files)) lines = list() for fn in created_files: with codecs.open(os.path.join(dir_tracks, fn), "r", sg.__encoding__) as fp: new_lines = fp.readlines() fp.close() self.assertEqual(1, len(new_lines)) lines.append(new_lines[0]) self.assertTrue("w_1 w_2" in lines) self.assertTrue("{j|S} {e|E}" in lines) self.assertTrue("j-e s-H-i" in lines) dir_tracks = os.path.join(TEMP, "test_write_text_tracks_2") os.mkdir(dir_tracks) TracksWriter._write_text_tracks(tier_phn, tier_tok, dir_tracks) created_files = os.listdir(dir_tracks) self.assertEqual(4, len(created_files)) lines = list() for fn in created_files: with codecs.open(os.path.join(dir_tracks, fn), "r", sg.__encoding__) as fp: new_lines = fp.readlines() fp.close() self.assertEqual(1, len(new_lines)) lines.append(new_lines[0]) self.assertTrue("j' ai" in lines) self.assertTrue("je suis" in lines) self.assertTrue("{j|S} {e|E}" in lines) self.assertTrue("j-e s-H-i" in lines)
def test_create_tok_tier(self): """Create a tier with tokens like 'w_1 w_2...w_n' from phonemes.""" l1 = sppasLabel([sppasTag("j"), sppasTag("S")]) l2 = sppasLabel([sppasTag("e"), sppasTag("E")]) tier = sppasTier("phonemes") tier.create_annotation(sppasLocation(sppasPoint(1)), [l1, l2]) tier.create_annotation(sppasLocation(sppasPoint(2)), sppasLabel(sppasTag("{j|S} {e|E}"))) tok_tier = TracksWriter._create_tok_tier(tier) self.assertEqual(2, len(tok_tier)) content_a1 = tok_tier[0].get_best_tag().get_content() self.assertEqual("w_1 w_2", content_a1) content_a2 = tok_tier[1].get_best_tag().get_content() self.assertEqual("w_1 w_2", content_a2)
def test_map_annotation(self): """Map a single annotation.""" a = sppasAnnotation(sppasLocation(sppasPoint(1))) b = a.copy() # annotation without label self.assertEqual(a, self.map.map_annotation(a)) # annotation with one label and no alternatives a.set_labels(sppasLabel(sppasTag("1"))) b.set_labels(sppasLabel(sppasTag("un"))) self.assertEqual(b, self.map.map_annotation(a)) # annotation with several labels a.set_labels([sppasLabel(sppasTag("1")), sppasLabel(sppasTag("1"))]) b.set_labels([sppasLabel(sppasTag("un")), sppasLabel(sppasTag("un"))]) self.assertEqual(b, self.map.map_annotation(a)) # annotation with several labels serialized l = [sppasLabel(sppasTag("1")), sppasLabel(sppasTag("1"))] a.set_labels(l) str_l = a.serialize_labels() a.set_labels(sppasLabel(sppasTag(str_l))) l = [sppasLabel(sppasTag("un")), sppasLabel(sppasTag("un"))] b.set_labels(l) str_l = b.serialize_labels() b.set_labels(sppasLabel(sppasTag(str_l))) self.assertEqual(b, self.map.map_annotation(a))
def setUp(self): # Create tiers self.tierP = sppasTier("PointTier") self.tierI = sppasTier("IntervalTier") for i in range(8): self.tierP.create_annotation( sppasLocation(sppasPoint(i)), sppasLabel(sppasTag(str(i)))) self.tierI.create_annotation( sppasLocation( sppasInterval(sppasPoint(i), sppasPoint(i+1))), sppasLabel(sppasTag(str(i*10)))) self.tierI.create_annotation( sppasLocation( sppasInterval(sppasPoint(9), sppasPoint(10))), sppasLabel(sppasTag("{quatre-vingts-dix|nonante}"))) # Create TierMapping self.map = sppasMappingTier() self.map.add("1", "un") self.map.add("2", "deux") self.map.add("3", "trois") self.map.add("4", "quatre") self.map.add("5", "cinq") self.map.add("6", "six") self.map.add("7", "sept") self.map.add("8", "huit") self.map.add("9", "neuf") self.map.add("10", "dix") self.map.add("20", "vingt") self.map.add("30", "trente") self.map.add("40", "quarante") self.map.add("50", "cinquante") self.map.add("60", "soixante") self.map.add("70", "septante") self.map.add("70", "soixante-dix") self.map.add("80", "octante") self.map.add("80", "quatre-vingts") self.map.set_delimiters((";", ",", " ", ".", "|"))
def _add_aligned_track_into_tier(tier, tdata, delta, unitend): """Append a list of (start, end, text, score) into the tier. Shift start/end of a delta value and set the last end value. """ try: for i, t in enumerate(tdata): # fix the location - an interval (loc_s, loc_e, contents, scores) = t loc_s += delta loc_e += delta if i == (len(tdata)-1): loc_e = unitend location = sppasLocation( sppasInterval( sppasPoint(loc_s, TracksReader.RADIUS), sppasPoint(loc_e, TracksReader.RADIUS) )) # fix the label # allow to work with alternative tags tags = [sppasTag(c) for c in contents.split('|')] if scores is not None: tag_scores = [float(s) for s in scores.split('|')] else: tag_scores = None label = sppasLabel(tags, tag_scores) tier.create_annotation(location, label) except: logging.error('The following data were not added to the tier ' '{:s} at position {:f}: {:s}' ''.format(tier.get_name(), delta, str(tdata))) logging.error(traceback.format_exc())
def tracks_to_tier(tracks, end_time, vagueness): """Create a sppasTier object from tracks. :param tracks: (List of tuple) with (from, to) values in seconds :param end_time: (float) End-time of the tier :param vagueness: (float) vagueness used for silence search """ if len(tracks) == 0: raise IOError('No IPUs to write.\n') tier = sppasTier("IPUs") tier.set_meta('number_of_ipus', str(len(tracks))) i = 0 to_prec = 0. for (from_time, to_time) in tracks: if from_time == 0. or to_time == end_time: radius = 0. else: radius = vagueness / 2. # From the previous track to the current track: silence if to_prec < from_time: tier.create_annotation( sppasLocation( sppasInterval(sppasPoint(to_prec, radius), sppasPoint(from_time, radius))), sppasLabel(sppasTag(SIL_ORTHO))) # New track with speech tier.create_annotation( sppasLocation( sppasInterval(sppasPoint(from_time, radius), sppasPoint(to_time, radius))), sppasLabel(sppasTag("ipu_%d" % (i + 1)))) # Go to the next i += 1 to_prec = to_time # The end is a silence? Fill... begin = sppasPoint(to_prec, vagueness / 2.) if begin < end_time: tier.create_annotation( sppasLocation(sppasInterval(begin, sppasPoint(end_time))), sppasLabel(sppasTag(SIL_ORTHO))) return tier
def setUp(self): self.x = sppasAnnotation( sppasLocation( sppasInterval(sppasPoint(1., 0.), sppasPoint(2., 0.01))), sppasLabel(sppasTag('toto'))) self.y = sppasAnnotation( sppasLocation( sppasInterval(sppasPoint(3., 0.01), sppasPoint(4., 0.01))), sppasLabel(sppasTag('titi'))) self.a = sppasAnnotation( sppasLocation( sppasInterval(sppasPoint(5., 0.01), sppasPoint(6.5, 0.005))), sppasLabel(sppasTag('toto'))) self.b = sppasAnnotation( sppasLocation( sppasInterval(sppasPoint(6.5, 0.005), sppasPoint(9.5, 0.))), sppasLabel(sppasTag('toto'))) self.tier = sppasTier() self.tier.append(self.x) self.tier.append(self.y) self.tier.append(self.a) self.tier.append(self.b)
def test_syllabify_interval(self): """... Perform the syllabification of one interval.""" expected = sppasTier('Expected') expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(1), sppasPoint(3))), sppasLabel(sppasTag('l-@'))) syllables = sppasTier('SyllAlign') self.syll.syllabify_interval(self.tier, 0, 1, syllables) self.assertEqual(len(expected), len(syllables)) for a1, a2 in zip(expected, syllables): self.assertEqual(a1, a2) expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(17), sppasPoint(18))), sppasLabel(sppasTag('E'))) expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(18), sppasPoint(19))), sppasLabel(sppasTag('o'))) self.syll.syllabify_interval(self.tier, 13, 15, syllables) self.assertEqual(len(expected), len(syllables)) for a1, a2 in zip(expected, syllables): self.assertEqual(a1, a2)
def ExportToAnnData(self): """ Export this transcription to anndata.sppasTranscription(). """ trs = anndata.sppasTranscription(self.__name) for meta_key in self.metadata: if self.metadata[meta_key] is not None: trs.set_meta(meta_key, self.metadata[meta_key]) for ctrl_vocab in self.GetCtrlVocab(): other_cv = anndata.sppasCtrlVocab(ctrl_vocab.id, ctrl_vocab.GetDescription()) for entry in ctrl_vocab: entry_text = entry.Text entry_desc = entry.GetDescription() other_cv.add(anndata.sppasTag(entry_text), entry_desc) trs.add_ctrl_vocab(other_cv) for media in self.GetMedia(): other_m = anndata.sppasMedia(media.url, media.id, media.mime) trs.add_media(other_m) for tier in self: c = tier.GetCtrlVocab() if c is not None: ctrl_vocab = trs.get_ctrl_vocab_from_name(c.GetName()) else: ctrl_vocab = None m = tier.GetMedia() if m is not None: media = trs.get_media_from_id(m.id) else: media = None other_t = trs.create_tier(tier.GetName(), ctrl_vocab, media) is_point = tier.IsPoint() for ann in tier: text = ann.GetLabel().GetLabel() if is_point is True: p = ann.GetLocation().GetPoint().GetValue() r = ann.GetLocation().GetPoint().GetRadius() if r == 0.: r = None other_t.create_annotation( anndata.sppasLocation(anndata.sppasPoint(p, r)), anndata.sppasLabel(anndata.sppasTag(text))) else: b = ann.GetLocation().GetBegin().GetValue() rb = ann.GetLocation().GetBegin().GetRadius() if rb == 0.: rb = None e = ann.GetLocation().GetEnd().GetValue() re = ann.GetLocation().GetEnd().GetRadius() if rb == 0.: rb = None other_t.create_annotation( anndata.sppasLocation( anndata.sppasInterval(anndata.sppasPoint(b, rb), anndata.sppasPoint(e, re))), anndata.sppasLabel(anndata.sppasTag(text))) for tier in self: parent_tier = self._hierarchy.get_parent(tier) if parent_tier is not None: link_type = self._hierarchy.get_hierarchy_type(tier) new_tier = trs.find(tier.GetName()) new_parent_tier = trs.find(parent_tier.GetName()) trs.add_hierarchy_link(link_type, new_parent_tier, new_tier) return trs
def test_phon_to_intervals(self): """... Create the intervals to be syllabified.""" test_tier = self.tier.copy() expected = sppasTier('Expected') expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(1), sppasPoint(3)))) expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(4), sppasPoint(6)))) expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(8), sppasPoint(9)))) expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(11), sppasPoint(13)))) expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(14), sppasPoint(16)))) expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(17), sppasPoint(19)))) expected.create_annotation(sppasLocation(sppasInterval(sppasPoint(20), sppasPoint(24)))) intervals = sppasSyll._phon_to_intervals(test_tier) self.assertEqual(len(expected), len(intervals)) for a1, a2 in zip(expected, intervals): self.assertEqual(a1, a2) # add en empty interval at start test_tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(0), sppasPoint(1)))) intervals = sppasSyll._phon_to_intervals(test_tier) self.assertEqual(len(expected), len(intervals)) for a1, a2 in zip(expected, intervals): self.assertEqual(a1, a2) # add en empty interval at end test_tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(24), sppasPoint(25)))) intervals = sppasSyll._phon_to_intervals(test_tier) self.assertEqual(len(expected), len(intervals)) for a1, a2 in zip(expected, intervals): self.assertEqual(a1, a2) # silence at start test_tier[0].append_label(sppasLabel(sppasTag('#'))) intervals = sppasSyll._phon_to_intervals(test_tier) self.assertEqual(len(expected), len(intervals)) for a1, a2 in zip(expected, intervals): self.assertEqual(a1, a2) # silence at end test_tier[-1].append_label(sppasLabel(sppasTag('#'))) intervals = sppasSyll._phon_to_intervals(test_tier) self.assertEqual(len(expected), len(intervals)) for a1, a2 in zip(expected, intervals): self.assertEqual(a1, a2)
def setUp(self): self.syll = sppasSyll() self.syll.load_resources(FRA_SYLL) tier = sppasTier('PhonAlign') tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(1), sppasPoint(2))), sppasLabel(sppasTag('l'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(2), sppasPoint(3))), sppasLabel(sppasTag('@'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(3), sppasPoint(4))), sppasLabel(sppasTag('#'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(4), sppasPoint(5))), sppasLabel(sppasTag('S'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(5), sppasPoint(6))), sppasLabel(sppasTag('A/'))) # hole [6,7] tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(7), sppasPoint(8))), sppasLabel(sppasTag('#'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(8), sppasPoint(9))), sppasLabel(sppasTag('e'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(9), sppasPoint(10))), sppasLabel(sppasTag('#'))) # hole [10,11] tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(11), sppasPoint(12))), sppasLabel(sppasTag('k'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(12), sppasPoint(13))), sppasLabel(sppasTag('2'))) # hole [13,14] tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(14), sppasPoint(15))), sppasLabel(sppasTag('p'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(15), sppasPoint(16))), sppasLabel(sppasTag('U~/'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(16), sppasPoint(17))), sppasLabel(sppasTag('#'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(17), sppasPoint(18))), sppasLabel(sppasTag('E'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(18), sppasPoint(19))), sppasLabel(sppasTag('o'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(19), sppasPoint(20))), sppasLabel(sppasTag('#'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(20), sppasPoint(21))), sppasLabel(sppasTag('g'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(21), sppasPoint(22))), sppasLabel(sppasTag('j'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(22), sppasPoint(23))), sppasLabel(sppasTag('i'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(23), sppasPoint(24))), sppasLabel(sppasTag('t'))) self.tier = tier
b = (i + start) * delta e = b + delta for t in tiers_numbers: tier = trs_input[t - 1] # get only ONE annotation in our range anns = tier.find(b, e, overlaps=True) if len(anns) > 1: anni = tier.near(b + int(delta / 2.), direction=0) ann = tier[anni] else: ann = anns[0] texts.append(ann.serialize_labels()) # Append in new tier ti = anndata.sppasInterval(anndata.sppasPoint(b, 0.0001), anndata.sppasPoint(e, 0.0001)) if len(texts) > 1: missing = False for t in texts: if len(t.strip()) == 0: # missing annotation label... missing = True if missing is True: text = "" else: text = ";".join(texts) else: text = str(texts[0]) behavior_tier.create_annotation(anndata.sppasLocation(ti), anndata.sppasLabel(anndata.sppasTag(text)))
# create audio output extracter = channel.extract_fragment(int(begin * framerate), int(end * framerate)) audio_out = sppasAudioPCM() audio_out.append_channel(extracter) if not args.quiet: print(" - audio: " + fn + ".wav") sppas.src.audiodata.aio.save(fn + ".wav", audio_out) # create text output (copy original label as it!) trs_output = sppasTranscription("TrackSegment") tracks_tier = trs_output.create_tier(tier_name + "-" + idx) tracks_tier.create_annotation( sppasLocation( sppasInterval(sppasPoint(0.), sppasPoint(float(end - begin)))), [l.copy() for l in ann.get_labels()]) parser.set_filename(fn + args.e) if not args.quiet: print(" - text: " + fn + args.e) parser.write(trs_output) nb += 1 # just to do things... properly! if nb == 0: os.remove(output_dir) print("Done. No track extracted!\n") else: if not args.quiet: print("Done. {:d} tracks were extracted.\n".format(nb))
def test_tier_tga(self): tier = sppasTier("tier") tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(0., 0.), sppasPoint(1., 0.0))), sppasLabel(sppasTag('#'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(1., 0.), sppasPoint(2., 0.01))), sppasLabel(sppasTag('toto'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(3., 0.01), sppasPoint(4., 0.01))), sppasLabel(sppasTag('titi'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(4., 0.01), sppasPoint(5., 0.01)))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(5., 0.01), sppasPoint(6.5, 0.005))), sppasLabel(sppasTag('toto'))) tier.create_annotation(sppasLocation(sppasInterval(sppasPoint(6.5, 0.005), sppasPoint(9.5, 0.))), sppasLabel(sppasTag('toto'))) # test the timegroups tier tg = sppasTGA().syllables_to_timegroups(tier) self.assertEqual(3, len(tg)) # to be tested: # [1., 2.] tg_1 # [3.; 4.] tg_2 # [5.; 9.5] tg_3 ts = sppasTGA().syllables_to_timesegments(tier) self.assertEqual(3, len(ts)) # to be tested: # [1., 2.] toto # [3.; 4.] titi # [5.; 9.5] toto toto tg_dur = sppasTGA().timegroups_to_durations(tier, tg) self.assertEqual(3, len(tg_dur)) self.assertEqual([1.], tg_dur['tg_1']) self.assertEqual([1.], tg_dur['tg_2']) self.assertEqual([1.5, 3.0], tg_dur['tg_3']) tga = TimeGroupAnalysis(tg_dur) occurrences = tga.len() self.assertEqual(1, occurrences['tg_1']) self.assertEqual(1, occurrences['tg_2']) self.assertEqual(2, occurrences['tg_3']) total = tga.total() self.assertEqual(1.0, total['tg_1']) self.assertEqual(1.0, total['tg_2']) self.assertEqual(4.5, total['tg_3']) mean = tga.mean() self.assertEqual(1.0, mean['tg_1']) self.assertEqual(1.0, mean['tg_2']) self.assertEqual(2.25, mean['tg_3'])