Example #1
0
 def test_add_corpus(self):
     corpus = TrainingCorpus()
     nb = corpus.add_corpus( os.path.join(SAMPLES_PATH,"samples-eng") )
     self.assertEqual(nb, 0)
     nb = corpus.add_corpus( os.path.join(SAMPLES_PATH,"samples-ita") )
     self.assertEqual(nb, 4)
     corpus.add_corpus( SAMPLES_PATH )
     self.assertGreater(len(corpus.transfiles), 10)
     self.assertEqual(len(corpus.phonfiles), 0)
     self.assertEqual(len(corpus.alignfiles), 0)
Example #2
0
    def test_trainer_with_data(self):
        #setup_logging(1,None)
        corpus = TrainingCorpus()
        corpus.fix_resources(dictfile=os.path.join(RESOURCES_PATH, "dict", "fra.dict"), mappingfile=os.path.join(RESOURCES_PATH,"models","models-fra","monophones.repl" ))
        corpus.lang = "fra"
        corpus.datatrainer.protodir = os.path.join(HERE,"protos")
        corpus.add_file( os.path.join(HERE,"F_F_B003-P8-palign.TextGrid"), os.path.join(HERE,"F_F_B003-P8.wav") )
        corpus.add_file( os.path.join(HERE,"track_0001-phon.xra"), os.path.join(HERE,"track_0001.wav") )
        corpus.add_corpus( os.path.join(SAMPLES_PATH,"samples-fra") )

        trainer = HTKModelTrainer( corpus )
        acmodel = trainer.training_recipe( delete=True )
Example #3
0
    def test_trainingcorpus(self):
        corpus = TrainingCorpus()

        self.assertEqual( corpus.phonemap.map_entry('#'), "#" )

        corpus.fix_resources(dictfile=os.path.join(RESOURCES_PATH, "dict", "nan.dict"))
        self.assertEqual( corpus.monophones.get_size(), 44 )

        corpus.fix_resources(dictfile=os.path.join(RESOURCES_PATH, "dict", "nan.dict"), mappingfile=os.path.join(RESOURCES_PATH,"models","models-nan","monophones.repl" ))
        self.assertEqual( corpus.phonemap.map_entry('#'), "sil" )

        self.assertFalse( corpus.add_file( "toto", "toto" ) )
        self.assertTrue( corpus.add_file( os.path.join(HERE,"F_F_B003-P8-palign.TextGrid"), os.path.join(HERE,"F_F_B003-P8.wav") ) )
        corpus.datatrainer.delete()
Example #4
0
#  - logdir=DEFAULT_LOG_DIR (in)
#  - protodir=None (in)
#  - protofilename=DEFAULT_PROTO_FILENAME (out)

datatrainer = DataTrainer()
datatrainer.create( workdir=args.t, protodir=args.p )


# ---------------------------------
# 2. Create a Corpus Manager
# it manages the set of training data:
#   - establishes the list of phonemes (from the dict);
#   - converts the input annotated data into the HTK-specific data format;
#   - codes the audio data.

corpus = TrainingCorpus( datatrainer, lang=args.l )
corpus.fix_resources( dictfile=args.r, mappingfile=args.m )

if args.i:
    for entry in args.i:
        if os.path.isdir( entry ):
            corpus.add_corpus( entry )
        else:
            logging.info('[ WARNING ] Ignore the given entry: %s'%entry)


# ---------------------------------
# 3. Acoustic Model Training

trainer = HTKModelTrainer( corpus )
DELETE = False