def test_trainer_without_data(self): trainer = HTKModelTrainer() model = trainer.training_recipe() self.assertEqual( len(model.hmms),0 ) trainer.corpus = TrainingCorpus() model = trainer.training_recipe() self.assertEqual( len(model.hmms),4 )
def test_trainer_with_data(self): #setup_logging(1,None) corpus = TrainingCorpus() corpus.fix_resources(dictfile=os.path.join(RESOURCES_PATH, "dict", "fra.dict"), mappingfile=os.path.join(RESOURCES_PATH,"models","models-fra","monophones.repl" )) corpus.lang = "fra" corpus.datatrainer.protodir = os.path.join(HERE,"protos") corpus.add_file( os.path.join(HERE,"F_F_B003-P8-palign.TextGrid"), os.path.join(HERE,"F_F_B003-P8.wav") ) corpus.add_file( os.path.join(HERE,"track_0001-phon.xra"), os.path.join(HERE,"track_0001.wav") ) corpus.add_corpus( os.path.join(SAMPLES_PATH,"samples-fra") ) trainer = HTKModelTrainer( corpus ) acmodel = trainer.training_recipe( delete=True )
# --------------------------------- # 2. Create a Corpus Manager # it manages the set of training data: # - establishes the list of phonemes (from the dict); # - converts the input annotated data into the HTK-specific data format; # - codes the audio data. corpus = TrainingCorpus( datatrainer, lang=args.l ) corpus.fix_resources( dictfile=args.r, mappingfile=args.m ) if args.i: for entry in args.i: if os.path.isdir( entry ): corpus.add_corpus( entry ) else: logging.info('[ WARNING ] Ignore the given entry: %s'%entry) # --------------------------------- # 3. Acoustic Model Training trainer = HTKModelTrainer( corpus ) DELETE = False if args.t is None: DELETE = True trainer.training_recipe( outdir=args.o, delete=DELETE ) # ---------------------------------------------------------------------------