def test_add_corpus(self): corpus = TrainingCorpus() nb = corpus.add_corpus( os.path.join(SAMPLES_PATH,"samples-eng") ) self.assertEqual(nb, 0) nb = corpus.add_corpus( os.path.join(SAMPLES_PATH,"samples-ita") ) self.assertEqual(nb, 4) corpus.add_corpus( SAMPLES_PATH ) self.assertGreater(len(corpus.transfiles), 10) self.assertEqual(len(corpus.phonfiles), 0) self.assertEqual(len(corpus.alignfiles), 0)
def test_trainer_with_data(self): #setup_logging(1,None) corpus = TrainingCorpus() corpus.fix_resources(dictfile=os.path.join(RESOURCES_PATH, "dict", "fra.dict"), mappingfile=os.path.join(RESOURCES_PATH,"models","models-fra","monophones.repl" )) corpus.lang = "fra" corpus.datatrainer.protodir = os.path.join(HERE,"protos") corpus.add_file( os.path.join(HERE,"F_F_B003-P8-palign.TextGrid"), os.path.join(HERE,"F_F_B003-P8.wav") ) corpus.add_file( os.path.join(HERE,"track_0001-phon.xra"), os.path.join(HERE,"track_0001.wav") ) corpus.add_corpus( os.path.join(SAMPLES_PATH,"samples-fra") ) trainer = HTKModelTrainer( corpus ) acmodel = trainer.training_recipe( delete=True )
# --------------------------------- # 2. Create a Corpus Manager # it manages the set of training data: # - establishes the list of phonemes (from the dict); # - converts the input annotated data into the HTK-specific data format; # - codes the audio data. corpus = TrainingCorpus( datatrainer, lang=args.l ) corpus.fix_resources( dictfile=args.r, mappingfile=args.m ) if args.i: for entry in args.i: if os.path.isdir( entry ): corpus.add_corpus( entry ) else: logging.info('[ WARNING ] Ignore the given entry: %s'%entry) # --------------------------------- # 3. Acoustic Model Training trainer = HTKModelTrainer( corpus ) DELETE = False if args.t is None: DELETE = True trainer.training_recipe( outdir=args.o, delete=DELETE ) # ---------------------------------------------------------------------------