def __init__(self, config, logfile=None): """ Create a new sppasSyll instance. @param config is the configuration (rules) file name, @param logfile is a file descriptor of the log file. """ self._merge = False self._usesintervals = False self._usesphons = True self._tiername = "TokensAlign" try: self.syllabifier = Syllabification(config, logfile) except Exception as e: raise e
def setUp(self): self.syllabifierPOL = Syllabification(POL_SYLL, None) self.syllabifierFRA = Syllabification(FRA_SYLL, None)
class TestSyll(unittest.TestCase): def setUp(self): self.syllabifierPOL = Syllabification(POL_SYLL, None) self.syllabifierFRA = Syllabification(FRA_SYLL, None) def testVV(self): tierP = labels2tier( ['a','a'] ) self.assertIsNotNone(tierP) trsS = self.syllabifierPOL.syllabify( tierP ) syll = get_syll(trsS) self.assertEqual("a|a", syll) def testVCV(self): tierP = labels2tier( ['a','b','a'] ) self.assertIsNotNone(tierP) trsS = self.syllabifierPOL.syllabify( tierP ) syll = get_syll(trsS) self.assertEqual("a|ba", syll) def testVCCV(self): # general rule tierP = labels2tier( ['a','n','c','a'] ) self.assertIsNotNone(tierP) trsS = self.syllabifierPOL.syllabify( tierP ) syll = get_syll(trsS) self.assertEqual("an|ca", syll) # exception rule tierP = labels2tier( ['a','g','j','a'] ) self.assertIsNotNone(tierP) trsS = self.syllabifierPOL.syllabify( tierP ) syll = get_syll(trsS) self.assertEqual("a|gja", syll) # specific (shift to left) tierP = labels2tier( ['a','d','g','a'] ) self.assertIsNotNone(tierP) trsS = self.syllabifierPOL.syllabify( tierP ) syll = get_syll(trsS) self.assertEqual("a|dga", syll) # do not apply the previous specific rule if not VdgV tierP = labels2tier( ['a','x','d','g','a'] ) self.assertIsNotNone(tierP) trsS = self.syllabifierPOL.syllabify( tierP ) syll = get_syll(trsS) self.assertEqual("ax|dga", syll) # specific (shift to right) tierP = labels2tier( ['a','z','Z','a'] ) self.assertIsNotNone(tierP) trsS = self.syllabifierPOL.syllabify( tierP ) syll = get_syll(trsS) self.assertEqual("az|Za", syll) def testVCCCV(self): # general rule tierP = labels2tier( ['a','m','m','n','a'] ) self.assertIsNotNone(tierP) trsS = self.syllabifierPOL.syllabify( tierP ) syll = get_syll(trsS) self.assertEqual("am|mna", syll) # exception rule tierP = labels2tier( ['a','dz','v','j','a'] ) self.assertIsNotNone(tierP) trsS = self.syllabifierPOL.syllabify( tierP ) syll = get_syll(trsS) self.assertEqual("a|dzvja", syll) # specific (shift to left) tierP = labels2tier( ['a','b','z','n','a'] ) self.assertIsNotNone(tierP) trsS = self.syllabifierPOL.syllabify( tierP ) syll = get_syll(trsS) self.assertEqual("a|bzna", syll) # specific (shift to right) tierP = labels2tier( ['a','r','w','S','a'] ) self.assertIsNotNone(tierP) trsS = self.syllabifierPOL.syllabify( tierP ) syll = get_syll(trsS) self.assertEqual("arw|Sa", syll) def testVCCCCV(self): tierP = labels2tier( ['a','b','r','v','j','a'] ) self.assertIsNotNone(tierP) trsS = self.syllabifierPOL.syllabify( tierP ) syll = get_syll(trsS) self.assertEqual("a|brvja", syll) def testVCCCCCV(self): tierP = labels2tier( ['a','p','s','k','m','w','a'] ) self.assertIsNotNone(tierP) trsS = self.syllabifierFRA.syllabify( tierP ) syll = get_syll(trsS) self.assertEqual("apsk|mwa", syll)
class sppasSyll: """ SPPAS automatic syllabification annotation. For details, see: B. Bigi, C. Meunier, I. Nesterenko, R. Bertrand (2010). Automatic detection of syllable boundaries in spontaneous speech Language Resource and Evaluation Conference, pp 3285-3292, La Valetta, Malte. The syllabification of phonemes is performed with a rule-based system from aligned phonemes. This RBS phoneme-to-syllable segmentation system is based on 2 main principles: - a syllable contains a vowel, and only one; - a pause is a syllable boundary. These two principles focus the problem of the task of finding a syllabic boundary between two vowels. As in state-of-the-art systems, phonemes were grouped into classes and rules established to deal with these classes. The rules we propose follow usual phonological statements for most of the corpus. A configuration file indicates phonemes, classes and rules. This file can be edited and modified to adapt the syllabification. The syllable configuration file is a simple ASCII text file that the user can change as needed. How to use sppasSyll? >>> s = sppasAlign( configfilename ) >>> s.run(inputfilename, outputfilename) """ def __init__(self, config, logfile=None): """ Create a new sppasSyll instance. @param config is the configuration (rules) file name, @param logfile is a file descriptor of the log file. """ self._merge = False self._usesintervals = False self._usesphons = True self._tiername = "TokensAlign" try: self.syllabifier = Syllabification(config, logfile) except Exception as e: raise e # End __init__ # ------------------------------------------------------------------------ def fix_options(self, options): """ Fix all options. @param options (dict) Dictionary with key=optionname (string). """ for opt in options: if "merge" == opt.get_key(): self.set_merge( opt.get_value() ) elif "usesintervals" == opt.get_key(): self.set_usesintervals( opt.get_value() ) elif "usesphons" == opt.get_key(): self.set_usesphons( opt.get_value() ) elif "tiername" == opt.get_key(): self.set_tiername(opt.get_value()) # End fix_options # ------------------------------------------------------------------------ def set_merge(self,merge): """ Fix the merge option. If merge is set to True, sppasSyll() will save the input tiers in the output file. @param merge is a Boolean """ self._merge = merge # End set_merge # ---------------------------------------------------------------------- def set_usesintervals(self, mode): """ Fix the usesintervals option. If usesintervals is set to True, the syllabification operates inside specific (given) intervals. @param mode is a Boolean """ self._usesintervals = mode # End set_usesintervals # ---------------------------------------------------------------------- def set_usesphons(self, mode): """ Fix the usesphons option. If usesphons is set to True, the syllabification operates by using only tier with phonemes. @param mode is a Boolean """ self._usesphons = mode # End set_usesphons # ---------------------------------------------------------------------- def set_tiername(self, tiername): """ Fix the tiername option. @param tiername is a string """ self._tiername = tiername # End set_interval_file # ---------------------------------------------------------------------- def save(self, trsinput, inputfilename, syllables, outputfilename): """ Save the syllabification into a file (end of the input or output). """ # An output file name is given if outputfilename is not None: if self._merge is True: for tier in trsinput: syllables.Add(tier) trsoutput = syllables # the syllable' tiers are added to the input transcription else: for tier in syllables: trsinput.Add(tier) trsoutput = trsinput outputfilename = inputfilename # Save try: annotationdata.io.write( outputfilename, trsoutput ) except Exception as e: raise IOError('Syll::syll.py. An error occurred when writing output.\n %s' % e) # End save # ------------------------------------------------------------------------ def run(self, inputfilename, outputfilename=None): """ Perform the Syllabification process. @param inputfilename (string) annotated file including time-aligned phonemes @param outputfilename """ phonemes = None trsinput = annotationdata.io.read(inputfilename) #find the phoneme tier for tier in trsinput: if "align" in tier.GetName().lower() and "phon" in tier.GetName().lower(): phonemes = tier break if phonemes is None: raise IOError("Phoneme tier not found." " The name of a tier must contain both 'align' and 'phon'.") if phonemes.IsEmpty() is True: raise IOError("Syll::sppasSyll. Empty phoneme tier.\n") if self._usesintervals is True: intervals = trsinput.Find(self._tiername) if not intervals: raise IndexError("Interval tier not found: %s" % self._tiername) if self._usesintervals and self._usesphons: syllables = self.syllabifier.syllabify(phonemes) syllables_seg = self.syllabifier.syllabify2(phonemes, intervals) for tier in syllables_seg: syllables.Add(tier) syll = syllables.Find("Syllables") cls = syllables.Find("Classes") struct = syllables.Find("Structures") syll_seg = syllables.Find("Syllables-seg") cls_seg = syllables.Find("Classes-seg") struct_seg = syllables.Find("Structures-seg") #syllables._hierarchy.addLink("TimeAlignment", phonemes, syll) # phonemes are not in this transcription syllables._hierarchy.addLink("TimeAssociation", syll, cls) syllables._hierarchy.addLink('TimeAssociation', syll, struct) #try: # syllables._hierarchy.addLink("TimeAlignment", phonemes, syll_seg) # phonemes are not in this transcription #except Exception: # it happens when radius was not fixed properly in phonemes # pass #try: # syllables._hierarchy.addLink("TimeAlignment", syll_seg, self._tiername) # self._tiername is not in this transcription #except Exception: # it happens when radius was not fixed properly in self._tiername # pass syllables._hierarchy.addLink('TimeAssociation', syll_seg, cls_seg) syllables._hierarchy.addLink('TimeAssociation', syll_seg, struct_seg) elif self._usesintervals: syllables = self.syllabifier.syllabify2(phonemes, intervals) syll_seg = syllables.Find("Syllables-seg") cls_seg = syllables.Find("Classes-seg") struct_seg = syllables.Find("Structures-seg") #try: # syllables._hierarchy.addLink("TimeAlignment", phonemes, syll_seg) #except Exception: # pass #try: # syllables._hierarchy.addLink("TimeAlignment", syll_seg, self._tiername) #except Exception: # pass syllables._hierarchy.addLink('TimeAssociation', syll_seg, cls_seg) syllables._hierarchy.addLink('TimeAssociation', syll_seg, struct_seg) else: syllables = self.syllabifier.syllabify(phonemes) syll = syllables.Find("Syllables") cls = syllables.Find("Classes") struct = syllables.Find("Structures") #syllables._hierarchy.addLink("TimeAlignment", phonemes, syll) syllables._hierarchy.addLink('TimeAssociation', syll, cls) syllables._hierarchy.addLink('TimeAssociation', syll, struct) # Manage results self.save(trsinput, inputfilename, syllables, outputfilename)