Ejemplo n.º 1
0
def main():
    # process the options based on the default build configuration
    build_conf, parser = build_configuration.get_config(SCRIPT_NAME, DESCRIPTION, SCRIPT_NAME)
    # parse commamd line
    if __name__ == '__main__':
        opts, args = parser.parse_args()
        # and load custom configurations
        if opts.bldconf:
            build_conf.parse(opts.bldconf)
        if opts.spkconf:
            build_conf.parse(opts.spkconf)
        else:
            parser.error("Speaker configuration is required e.g. speaker_conf/bdl.xml")
            
        build_conf.updatefromopts(opts)
    # set up logging, check idlak-scratch, check dependencies and build as required
    build_conf.set_build_environment(SCRIPT_NAME)

    # MODULE SPECIFIC CODE
    # get required input files from idlak-data
    # get required directories from dependent modules
    # examine general settings and set as appropriate
    # process data
    # END OF MODULE SPECIFIC CODE
    
    build_conf.end_processing(SCRIPT_NAME)
Ejemplo n.º 2
0
def main():
    # process the options based on the default build configuration
    build_conf, parser = build_configuration.get_config(SCRIPT_NAME, DESCRIPTION, SCRIPT_NAME)
    # parse commamd line
    if __name__ == '__main__':
        opts, args = parser.parse_args()
        # and load custom configurations
        if opts.bldconf:
            build_conf.parse(opts.bldconf)
        if opts.spkconf:
            build_conf.parse(opts.spkconf)
        else:
            parser.error("Speaker configuration is required e.g. speaker_conf/bdl.xml")
            
        build_conf.updatefromopts(opts)
    # set up logging, check idlak-scratch, check dependencies and build as required
    build_conf.set_build_environment(SCRIPT_NAME)

    # MODULE SPECIFIC CODE
    # get required input files from idlak-data
    # get required directories from dependent modules
    aligndir = build_conf.get_input_dir('align_def')
    outdir = os.path.join(build_conf.outdir, 'output')
    # examine general settings and set as appropriate
    # process data
    wrds_dir = os.path.join(aligndir, 'wrds')

    file_list = glob.glob('%s/*.wrd' % (wrds_dir))

    for f in file_list:
        wrd_file = open(f, 'r')
        # Get the input file's name stem so we can use it for the output filename
        filename_stem = os.path.split(os.path.splitext(f)[0])[1]
        output_filename = os.path.join(outdir, '%s.dur' % (filename_stem))
        output_file = open(output_filename, 'w')

        for line in wrd_file:
          columns = line.split()
          # column #0 is the alotted time prior to the given phone.
          # column #1 is the alotted time after the given phone.
          phone_dur = float(columns[1]) - float(columns[0])
          output_file.write(str(phone_dur) + '\n')
        wrd_file.close()
        output_file.close()
    # END OF MODULE SPECIFIC CODE
    
    build_conf.end_processing(SCRIPT_NAME)
Ejemplo n.º 3
0
def main():
    # process the options based on the default build configuration
    build_conf, parser = build_configuration.get_config(SCRIPT_NAME, DESCRIPTION, SCRIPT_NAME)
    #print 'SEQ', build_conf.dataseq
    # parse commamd line
    if __name__ == '__main__':
        opts, args = parser.parse_args()
        # and load custom configurations
        if opts.bldconf:
            build_conf.parse(opts.bldconf)
        if opts.spkconf:
            build_conf.parse(opts.spkconf)
        else:
            parser.error("Speaker configuration is required e.g. speaker_conf/bdl.xml")
            
        build_conf.updatefromopts(opts)
    # set up logging, check idlak-scratch, check dependencies and build as required
    logger = build_conf.set_build_environment(SCRIPT_NAME)

    # MODULE SPECIFIC CODE
    # get required input files from idlak-data
    kaldisrcdir = os.path.join(build_conf.kaldidir, 'src')
    accdir = os.path.join(build_conf.idlakdata, build_conf.lang, build_conf.acc)
    spkdir = os.path.join(accdir, build_conf.spk)
    outdir = build_conf.outdir
    # get required directories from dependent modules
    # NONE
    # examine module settings and set as appropriate
    # NO MODULE OPTIONS
    # process data
    #run text through the idlak text processing module
    com = '%s/idlaktxpbin/idlaktxp --pretty --tpdb=%s %s %s\n' % (kaldisrcdir,
                                                                  accdir,
                                                                  os.path.join(spkdir, "text.xml"),
                                                                  os.path.join(outdir, "output", "text_norm.xml"))
    logger.log('Info', 'Running normalisation on input xml text: %s' % (com))
    os.system(com)
    # create kaldi required input files (modified from egs/arctic/s1/run.py
    logger.log('Info', 'Creating kaldi input files and train dir')
    wavdir = os.path.join(build_conf.idlakwav, build_conf.lang, build_conf.acc, build_conf.spk, build_conf.srate)
    # use of relative and absolute paths here appears broken MA140305
    kaldidata(os.path.join(outdir, "output"), wavdir, build_conf.spk, build_conf.flist, True)
    # END OF MODULE SPECIFIC CODE
    
    build_conf.end_processing(SCRIPT_NAME)
Ejemplo n.º 4
0
def main():
    # process the options based on the default build configuration
    build_conf, parser = build_configuration.get_config(SCRIPT_NAME, DESCRIPTION, SCRIPT_NAME)
    # parse commamd line
    if __name__ == '__main__':
        opts, args = parser.parse_args()
        # and load custom configurations
        if opts.bldconf:
            build_conf.parse(opts.bldconf)
        if opts.spkconf:
            build_conf.parse(opts.spkconf)
        else:
            parser.error("Speaker configuration is required e.g. speaker_conf/bdl.xml")
            
        build_conf.updatefromopts(opts)
    # set up logging, check idlak-scratch, check dependencies and build as required
    build_conf.set_build_environment(SCRIPT_NAME)

    # MODULE SPECIFIC CODE
    # get required input files from idlak-data
    outdir = build_conf.outdir
    # get required directories from dependent modules
    kaldisrcdir = os.path.join(build_conf.kaldidir, 'src')
    # examine general settings and set as appropriate
    getf0_path = build_conf.getval('pitch_def', 'getf0')
    pplain_path = build_conf.getval('pitch_def', 'pplain')

    # if not os.path.isfile(getf0_path):
    #     build_conf.logger.log('error', 'Supplied get_f0 location %s does not exist!' % (getf0_path))
    #     raise IOError('Supplied get_f0 location %s does not exist!' % (getf0_path))
    # if not os.path.isfile(pplain_path):
    #     build_conf.logger.log('error', 'Supplied pplain location %s does not exist!' % (pplain_path))
    #     raise IOError('Supplied pplain location %s does not exist!' % (pplain_path))

    # process data
    wavdir = os.path.join(build_conf.idlakwav, build_conf.lang, build_conf.acc, build_conf.spk, build_conf.srate)
    
    outdir_data = os.path.join(outdir, "data")
    if not os.path.isdir(outdir_data):
        os.mkdir(outdir_data)

    process_data(outdir, wavdir, getf0_path, pplain_path, build_conf.flist, kaldisrcdir)
    # END OF MODULE SPECIFIC CODE
    
    build_conf.end_processing(SCRIPT_NAME)
Ejemplo n.º 5
0
def main():
    # process the options based on the default build configuration
    build_conf, parser = build_configuration.get_config(SCRIPT_NAME, DESCRIPTION, SCRIPT_NAME)
    # parse commamd line
    if __name__ == '__main__':
        opts, args = parser.parse_args()
        # and load custom configurations
        if opts.bldconf:
            build_conf.parse(opts.bldconf)
        if opts.spkconf:
            build_conf.parse(opts.spkconf)
        else:
            parser.error("Speaker configuration is required e.g. speaker_conf/bdl.xml")
            
        build_conf.updatefromopts(opts)
    # set up logging, check idlak-scratch, check dependencies and build as required
    build_conf.set_build_environment(SCRIPT_NAME)

    # MODULE SPECIFIC CODE
    # get required input files from idlak-data
    # question file
    
    # get required directories from dependent modules
    aligndir = build_conf.get_input_dir('align_def')
    cexdir = build_conf.get_input_dir('cex_def')
    pitchdir = build_conf.get_input_dir('pitch_def')
    outdir_data = os.path.join(build_conf.outdir, 'data')
    if not os.path.isdir(outdir_data):
        os.mkdir(outdir_data)

    # examine general settings and set as appropriate
    # process data
    # merge full context alignment with quinphone alignment
    build_conf.logger.log('info', 'Merging full context information with quinphone alignment')
    makefullctx = os.path.join(build_conf.kaldidir, 'src', 'bin',
                               'make-fullctx-ali')
    quinphonemodel = os.path.join(aligndir, 'kaldidelta_quin_output',
                                  'final.mdl')
    quinphonealign = os.path.join(aligndir, 'kaldidelta_quin_output',
                                  'ali.1.gz')
    contextdata = os.path.join(cexdir, 'cex.ark')
    fullctxali = os.path.join(build_conf.outdir, 'output', 'ali')
    com = '%s %s "ark:gunzip -c %s|" ark,t:%s ark,t:%s' % (makefullctx,
                                                         quinphonemodel,
                                                         quinphonealign,
                                                         contextdata,
                                                         fullctxali)
    os.system(com)
    # compile context question sets from cex_def
    compilequestions = os.path.join(build_conf.kaldidir, 'src', 'bin',
                                    'compile-questions')
    ctxqset = os.path.join(cexdir, 'qset.dat')
    # dummy questions.int
    dummyqset = os.path.join(build_conf.outdir, 'output', 'questions.int')
    os.system("touch %s" % (dummyqset))
    topo =  os.path.join(aligndir, 'data', 'lang', 'topo')
    ctxqsetbin = os.path.join(build_conf.outdir, 'output', 'qset_binary.dat')
    # unclear how the topology affects the pdf generation here
    com = "%s --central-position=2 --binary=false --context-width=5 --keyed-questions=%s %s %s %s" % (compilequestions,
                                                                                       ctxqset,
                                                                                       topo,
                                                                                       dummyqset,
                                                                                       ctxqsetbin)
    os.system(com)
    
    # accumulate statistics for pitch
    fullctxacc = os.path.join(build_conf.kaldidir, 'src', 'bin',
                               'acc-fullctx-stats')
    pitchfeatures = os.path.join(pitchdir, 'lf0.ark')
    pitchacc = os.path.join(build_conf.outdir, 'output', 'pitch_acc.dat')
    com = '%s --binary=false 2 ark:%s ark:%s %s' % (fullctxacc,
                                                    pitchfeatures,
                                                    fullctxali,
                                                    pitchacc)
    os.system(com)

    # build a tree
    buildtree = os.path.join(build_conf.kaldidir, 'src', 'bin',
                             'build-tree')
    roots = os.path.join(aligndir, 'data', 'lang', 'phones', 'roots.int')
    rootsdummy = '/afs/inf.ed.ac.uk/user/m/matthewa/kaldi/matthewa/kaldi-idlak/idlak-voice-build/dummy.int'
    treeout = os.path.join(build_conf.outdir, 'output', 'pitch.tree')
    com = "%s --binary=false --verbose=1 --context-width=5 --central-position=2 %s %s %s %s %s" % (buildtree,
                                                                                    pitchacc,
                                                                                    roots,
                                                                                    ctxqsetbin,
                                                                                    topo,
                                                                                    treeout)
    os.system(com)

    # make a model from the tree and the statistics
    gmminitmodel = os.path.join(build_conf.kaldidir, 'src', 'gmmbin',
                                                 'gmm-init-model')
    modelout = os.path.join(build_conf.outdir, 'output', 'pitch.mdl')
    com = "%s --binary=false %s %s %s %s" % (gmminitmodel, treeout, pitchacc, topo, modelout)
    os.system(com)


    ######################################################################
    #                  DURATION MODELLING
    ######################################################################                  
    # modify full context alignment to have a single line for each phone
    # and generate duration parameters for state durations
    convert_ali_durations_data(NOSTATES, fullctxali, outdir_data)

    # build context question set
    ctxqsetbin = os.path.join(build_conf.outdir, 'output', 'qset_binary_dur.dat')
    com = "%s --central-position=2 --binary=false --context-width=5 --keyed-questions=%s %s %s %s" % (compilequestions,
                                                                                       ctxqset,
                                                                                       topo + '2',
                                                                                       dummyqset,
                                                                                       ctxqsetbin)
    os.system(com)
    
    # accumulate statistics for state duration
    stateduracc = os.path.join(build_conf.outdir, 'output', 'statedur_acc.dat')
    com = '%s --binary=false --var-floor=20.0 2 ark:%s ark:%s %s' % (fullctxacc,
                                                    os.path.join(outdir_data, 'durations_states.ark'),
                                                    os.path.join(outdir_data, 'durationali.ark'),
                                                    stateduracc)
    os.system(com)
    # accumulate statistics for phone durations
    phoneduracc = os.path.join(build_conf.outdir, 'output', 'phonedur_acc.dat')
    com = '%s --binary=false --var-floor=20.0 2 ark:%s ark:%s %s' % (fullctxacc,
                                                    os.path.join(outdir_data, 'durations_phones.ark'),
                                                    os.path.join(outdir_data, 'durationali.ark'),
                                                    phoneduracc)
    os.system(com)
    
    # build a tree
    # For Interspeech 15 work we have the following duration trees and models
    # 1. Kaldi out of the box
    # 2. Kaldi with same number of leaves and no initial roots questions
    # 3. As 2 but using 5 dim state duration data
    # 4. As 3 but with no post processing
    treeout1 = os.path.join(build_conf.outdir, 'output', 'dur_1.tree')
    treeout2 = os.path.join(build_conf.outdir, 'output', 'dur_2.tree')
    treeout3 = os.path.join(build_conf.outdir, 'output', 'dur_3.tree')
    treeout4 = os.path.join(build_conf.outdir, 'output', 'dur_4.tree')
    #11.3 thresh for statedur stats -> 511 leaves
    #8.3 thresh for phonedur stats -> 518 leaves
    com = "%s --binary=false --verbose=1 --context-width=5 --central-position=2 %s %s %s %s %s" % (buildtree,
                                                                                    phoneduracc,
                                                                                    roots,
                                                                                    ctxqsetbin,
                                                                                    topo + '2',
                                                                                    treeout1)
    os.system(com)
    com = "%s --binary=false --max-leaves=513 --thresh=0 --verbose=1 --context-width=5 --central-position=2 %s %s %s %s %s" % (buildtree,
                                                                                    phoneduracc,
                                                                                    rootsdummy,
                                                                                    ctxqsetbin,
                                                                                    topo + '2',
                                                                                    treeout2)
    os.system(com)
    com = "%s --binary=false --max-leaves=513  --thresh=0 --verbose=1 --context-width=5 --central-position=2 %s %s %s %s %s" % (buildtree,
                                                                                    stateduracc,
                                                                                    rootsdummy,
                                                                                    ctxqsetbin,
                                                                                    topo,
                                                                                    treeout3)
    os.system(com)
    com = "%s --binary=false --max-leaves=513  --cluster-thresh=0 --thresh=0 --verbose=1 --context-width=5 --central-position=2 %s %s %s %s %s" % (buildtree,
                                                                                    stateduracc,
                                                                                    rootsdummy,
                                                                                    ctxqsetbin,
                                                                                    topo,
                                                                                    treeout4)
    os.system(com)
    
    # make a model from the tree and the state statistics
    modelout1 = os.path.join(build_conf.outdir, 'output', 'dur_1.mdl')
    modelout2 = os.path.join(build_conf.outdir, 'output', 'dur_2.mdl')
    modelout3 = os.path.join(build_conf.outdir, 'output', 'dur_3.mdl')
    modelout4 = os.path.join(build_conf.outdir, 'output', 'dur_4.mdl')
    com = "%s --binary=false %s %s %s %s" % (gmminitmodel, treeout1, stateduracc, topo, modelout1)
    os.system(com)
    com = "%s --binary=false %s %s %s %s" % (gmminitmodel, treeout2, stateduracc, topo, modelout2)
    os.system(com)
    com = "%s --binary=false %s %s %s %s" % (gmminitmodel, treeout3, stateduracc, topo, modelout3)
    os.system(com)
    com = "%s --binary=false %s %s %s %s" % (gmminitmodel, treeout4, stateduracc, topo, modelout4)
    os.system(com)
    
    # END OF MODULE SPECIFIC CODE
    
    build_conf.end_processing(SCRIPT_NAME)
Ejemplo n.º 6
0
def main():
    # process the options based on the default build configuration
    build_conf, parser = build_configuration.get_config(SCRIPT_NAME, DESCRIPTION, SCRIPT_NAME)
    # parse commamd line
    if __name__ == '__main__':
        opts, args = parser.parse_args()
        # and load custom configurations
        if opts.bldconf:
            build_conf.parse(opts.bldconf)
        if opts.spkconf:
            build_conf.parse(opts.spkconf)
        else:
            parser.error("Speaker configuration is required e.g. speaker_conf/bdl.xml")
            
        build_conf.updatefromopts(opts)
    # set up logging, check idlak-scratch, check dependencies and build as required
    logger = build_conf.set_build_environment(SCRIPT_NAME)

    # MODULE SPECIFIC CODE
    # get required input files from idlak-data
    # text for testing voice after build (same as original HTSDEMO test text
    alicetxtfile = os.path.join(build_conf.idlakdata, build_conf.lang, 'testdata', 'alice.xml')
    # Maping from original arctic corpus id to idlak corpus id
    corpusid2idlakidfile = os.path.join(build_conf.idlakdata, build_conf.lang,
                                        build_conf.acc, build_conf.spk, 'corpusid2idlakid.txt')
    # get required directories from dependent modules
    cexdir = build_conf.get_input_dir('cex_def')
    # examine general settings and set as appropriate
    htsdemodir = build_conf.getval('hts_test', 'htsdemodir')
    if not os.path.isdir(htsdemodir):
        logger.log('critical', '[%s] does not exist' % (htsdemodir))
    if not os.path.isdir(os.path.join(htsdemodir, 'HTS-demo_CMU-ARCTIC-SLT')):
        logger.log('critical', '[%s] does not contain an HTS demo' % (htsdemodir))
    if not build_conf.spk == 'slt':
        logger.log('critical', 'This test currently only setup to work with en/ga/slt')
    # get info to cut slt raw audio in HTSDEMO into spurts
    spttimesfile = os.path.join(cexdir, 'spt_times.dat')
    # get directory for full model files
    htsmodeldir = os.path.join(cexdir, 'htslab')
    # new question set for data
    qsetfile = os.path.join(cexdir, 'questions-kaldi-en-ga.hed')
    # process data

    # create or replace label file directories
    htsdatadir = os.path.join(htsdemodir, 'HTS-demo_CMU-ARCTIC-SLT', 'data')
    # full models
    if os.path.isdir(os.path.join(htsdatadir, 'labels', 'full')):
        if not os.path.isdir(os.path.join(htsdatadir, 'labels', 'full_orig')):
            os.system('mv %s %s' % (os.path.join(htsdatadir, 'labels', 'full'),
                                    os.path.join(htsdatadir, 'labels', 'full_orig')))
            os.mkdir(os.path.join(htsdatadir, 'labels', 'full'))
    else:
        os.mkdir(os.path.join(htsdatadir, 'labels', 'full'))
    # mono models
    if os.path.isdir(os.path.join(htsdatadir, 'labels', 'mono')):
        if not os.path.isdir(os.path.join(htsdatadir, 'labels', 'mono_orig')):
            os.system('mv %s %s' % (os.path.join(htsdatadir, 'labels', 'mono'),
                                    os.path.join(htsdatadir, 'labels', 'mono_orig')))
            os.mkdir(os.path.join(htsdatadir, 'labels', 'mono'))
    else:
        os.mkdir(os.path.join(htsdatadir, 'labels', 'mono'))
    # label files compatible with wavesurfer
    if not os.path.isdir(os.path.join(htsdatadir, 'labels', 'wsurf')):
        os.mkdir(os.path.join(htsdatadir, 'labels', 'wsurf'))
    # create full, mono and wavesurfer label files
    labfiles = glob.glob(htsmodeldir + "/*.lab")
    labfiles.sort()
    for f in labfiles:
        stem = os.path.split(f)[1]
        fp1 = open(os.path.join(htsdatadir, 'labels', 'full', 'cmu_us_arctic_' + stem), 'w')
        fp2 = open(os.path.join(htsdatadir, 'labels', 'mono', 'cmu_us_arctic_' + stem), 'w')
        fp3 = open(os.path.join(htsdatadir, 'labels', 'wsurf', 'cmu_us_arctic_' + stem), 'w')
        for l in open(f).readlines():
            fp1.write(l)
            pat = re.match('^([0-9]+)\s+([0-9]+)\s\S+\-(.*?)\+.*$', l)
            fp2.write("%s %s %s\n" % pat.groups())
            fp3.write("%.3f %.3f %s\n" % (float(pat.group(1))/10000000.0,
                                         float(pat.group(2))/10000000.0,
                                         pat.group(3)))
        fp1.close()
        fp2.close()
        fp3.close()
    # copy question file
    oldqset = os.path.join(htsdatadir, 'questions', 'questions_qst001.hed')
    olduttqset = os.path.join(htsdatadir, 'questions', 'questions_utt_qst001.hed')
    if not os.path.isfile(oldqset + '.orig'):
        os.system('mv %s %s.orig' % (oldqset, oldqset))
    if not os.path.isfile(olduttqset + '.orig'):
        os.system('mv %s %s.orig' % (olduttqset, olduttqset))
    os.system('cp %s %s' % (qsetfile, oldqset))
    # construct utterance qset from qset
    lines = open(qsetfile).readlines()
    fp = open(olduttqset, 'w')
    for l in lines:
        uttqs = False
        for name in UTTQSET:
            if l.find(name) > -1:
                uttqs = True
                break
        if uttqs:
            fp.write(l)
    # cut up audio to correct spt sized chunks
    if not os.path.isdir(os.path.join(htsdatadir, 'kaldiraw')):
        os.system('mv %s %s.orig' % (os.path.join(htsdatadir, 'raw'),
                                     os.path.join(htsdatadir, 'raw')))
        
        os.mkdir(os.path.join(htsdatadir, 'kaldiraw'))
        os.system('ln -s %s %s' % (os.path.join(htsdatadir, 'kaldiraw'),
                                   os.path.join(htsdatadir, 'raw')))
    # load lookup between arcti ids and kaldi ids
    idlak2corpus = {}
    lines = open(corpusid2idlakidfile).readlines()
    for l in lines:
        ll = l.split()
        idlak2corpus[ll[1]] = ll[0]
    # open spt times
    lines = open(spttimesfile).readlines()
    for l in lines:
        ll = l.split()
        origwav = 'cmu_us_arctic_slt_' +  idlak2corpus[ll[0][4:-8]].split('_')[1]
        # currently use ch_wave change to kaldi style MA070314
        cmd = '%s -o %s/cmu_us_arctic_%s.raw -f 48000 -itype raw -otype raw -start %s -end %s %s/%s.raw' % (
            os.path.join(htsdemodir, 'speech_tools/bin/ch_wave'),
            os.path.join(htsdatadir, 'kaldiraw'),
            ll[0][:-4],
            ll[1], ll[2], os.path.join(htsdatadir, 'raw.orig'), origwav)
        print cmd
        os.system(cmd)
    #TODO create gen labels using script in utils
    # END OF MODULE SPECIFIC CODE
    
    build_conf.end_processing(SCRIPT_NAME)
Ejemplo n.º 7
0
def main():
    # process the options based on the default build configuration
    build_conf, parser = build_configuration.get_config(SCRIPT_NAME, DESCRIPTION, SCRIPT_NAME)
    # print 'SEQ', build_conf.dataseq
    # parse commamd line
    if __name__ == "__main__":
        opts, args = parser.parse_args()
        # and load custom configurations
        if opts.bldconf:
            build_conf.parse(opts.bldconf)
        if opts.spkconf:
            build_conf.parse(opts.spkconf)
        else:
            parser.error("Speaker configuration is required e.g. speaker_conf/bdl.xml")

        build_conf.updatefromopts(opts)
    # set up logging, check idlak-scratch, check dependencies and build as required
    build_conf.set_build_environment(SCRIPT_NAME)

    if opts.flist:
        build_conf.logger.log("warn", "flist does NOT currently work in align_def.py")

    # ADD MODULE SPECIFIC CODE HERE
    # get required input files from idlak-data
    spkdir = os.path.join(build_conf.idlakdata, build_conf.lang, build_conf.acc, build_conf.spk)
    # get required directories from dependent modules
    alignsetupdir = build_conf.get_input_dir("alignsetup_def")
    # examine module specific settings and set as appropriate
    breaktype = build_conf.getval("align_def", "break")
    breakdef = build_conf.getval("align_def", "breakdef")
    # process dat
    # remove old setup data
    com = "rm -rf %s" % (os.path.join(build_conf.outdir, "output", "data"))
    build_conf.logger.log("info", "Removing old alignsetup information: %s" % (com))
    os.system(com)
    # copy setup data
    com = "cp -R %s %s" % (alignsetupdir, os.path.join(build_conf.outdir, "output", "data"))
    build_conf.logger.log("info", "Copying alignsetup information: %s" % (com))
    os.system(com)
    # link conf, steps and utils directories from egs/wsj/s5
    com = "ln -s %s %s" % (
        os.path.join(build_conf.kaldidir, "egs", "wsj", "s5", "conf"),
        os.path.join(build_conf.outdir, "output", "conf"),
    )
    build_conf.logger.log("info", "Linking wsj s5 conf: %s" % (com))
    os.system(com)
    com = "ln -s %s %s" % (
        os.path.join(build_conf.kaldidir, "egs", "wsj", "s5", "utils"),
        os.path.join(build_conf.outdir, "output", "utils"),
    )
    build_conf.logger.log("info", "Linking wsj s5 utils: %s" % (com))
    os.system(com)
    com = "ln -s %s %s" % (
        os.path.join(build_conf.kaldidir, "egs", "wsj", "s5", "steps"),
        os.path.join(build_conf.outdir, "output", "steps"),
    )
    build_conf.logger.log("info", "Linking wsj s5 steps: %s" % (com))
    os.system(com)
    # update path for kaldi scripts
    pathlist = [
        os.path.join(build_conf.outdir, "output", "utils"),
        os.path.join(build_conf.kaldidir, "src", "featbin"),
        os.path.join(build_conf.kaldidir, "src", "bin"),
        os.path.join(build_conf.kaldidir, "src", "fstbin"),
        os.path.join(build_conf.kaldidir, "tools", "openfst", "bin"),
        os.path.join(build_conf.kaldidir, "src", "latbin"),
        os.path.join(build_conf.kaldidir, "src", "lm"),
        os.path.join(build_conf.kaldidir, "src", "sgmmbin"),
        os.path.join(build_conf.kaldidir, "src", "sgmm2bin"),
        os.path.join(build_conf.kaldidir, "src", "fgmmbin"),
        os.path.join(build_conf.kaldidir, "src", "nnetbin"),
        os.path.join(build_conf.kaldidir, "src", "nnet-cpubin"),
        os.path.join(build_conf.kaldidir, "src", "kwsbin"),
        os.path.join(build_conf.kaldidir, "src", "gmmbin"),
    ]
    os.environ["PATH"] += os.pathsep + os.pathsep.join(pathlist)
    datadir = os.path.join(build_conf.outdir, "output", "data")
    # create lang directory using kaldi script
    com = "cd %s/output; utils/prepare_lang.sh --num-nonsil-states %d data '<OOV>' data/lang data/lang" % (
        build_conf.outdir,
        NOSTATES,
    )
    build_conf.logger.log("info", "running kaldi script to build lang subdir")
    os.system(com)
    # extract mfccs
    # com = "cd %s/output; steps/make_mfcc.sh --nj 1 data/train data/mfcc_log data/mfcc" % (build_conf.outdir)
    # build_conf.logger.log('info', 'running kaldi script to extract mfccs')
    build_conf.logger.log("info", "making mfcc directory")
    mfccdir = os.path.join(build_conf.outdir, "output", "data", "mfcc")
    if not os.path.isdir(mfccdir):
        os.mkdir(mfccdir)
    build_conf.logger.log("info", "extracting mfccs")
    com = (
        "cd %s/output; compute-mfcc-feats --frame-shift=%d --verbose=0 --config=%s scp:%s ark:- | copy-feats --compress=false ark:- ark,scp:%s,%s"
        % (
            build_conf.outdir,
            int(FRAMESHIFT * 1000),
            "conf/mfcc.conf",
            "data/train/wav.scp",
            "data/mfcc/raw_mfcc_train.1.ark",
            "data/mfcc/raw_mfcc_train.1.scp",
        )
    )
    os.system(com)
    # build dummy spk to utt file
    com = "cd %s/output; utt2spk_to_spk2utt.pl data/train/utt2spk > data/train/spk2utt" % (build_conf.outdir)
    build_conf.logger.log("info", "running kaldi script to compute dummy spk2utt file")
    os.system(com)
    # compute feature stats
    # copy scp file to train/feats.scp
    build_conf.logger.log("info", "copying mfcc scp to feats scp")
    com = "cd %s/output; cp data/mfcc/raw_mfcc_train.1.scp data/train/feats.scp" % (build_conf.outdir)
    os.system(com)
    com = "cd %s/output; steps/compute_cmvn_stats.sh data/train data/mfcc data/mfcc" % (build_conf.outdir)
    build_conf.logger.log("info", "running kaldi script to compute feature statistics")
    os.system(com)
    # mono train
    com = "cd %s/output; steps/train_mono.sh --nj 1 data/train data/lang kaldimono_output" % (build_conf.outdir)
    build_conf.logger.log("info", "running kaldi script to compute flat start monophone models")
    os.system(com)
    # delta train (triphone)
    com = (
        "cd %s/output; steps/train_deltas.sh 2000 10000 3 data/train data/lang kaldimono_output kaldidelta_tri_output"
        % (build_conf.outdir)
    )
    build_conf.logger.log("info", "running kaldi script to compute flat start triphone models")
    os.system(com)
    # delta train (quinphone)
    com = (
        "cd %s/output; steps/train_deltas.sh 2000 10000 5 data/train data/lang kaldidelta_tri_output kaldidelta_quin_output"
        % (build_conf.outdir)
    )
    build_conf.logger.log("info", "running kaldi script to compute flat start quinphone models")
    os.system(com)
    # extract the phone alignment
    com = (
        'cd %s/output; ali-to-phones --per-frame kaldidelta_quin_output/35.mdl "ark:gunzip -c kaldidelta_quin_output/ali.1.gz|" ark,t:- |  utils/int2sym.pl -f 2- data/lang/phones.txt > align.dat'
        % (build_conf.outdir)
    )
    # com = 'cd %s/output; show-alignments data/lang/phones.txt kaldidelta_quin_output/35.mdl "ark:gunzip -c kaldidelta_quin_output/ali.1.gz|" > align.dat' % (build_conf.outdir)
    build_conf.logger.log("info", "running kaldi script to extract alignment")
    os.system(com)
    # extract the state alignment
    com = (
        'cd %s/output; ali-to-hmmstate kaldidelta_quin_output/35.mdl "ark:gunzip -c kaldidelta_quin_output/ali.1.gz|" ark,t:- > sttalign.dat'
        % (build_conf.outdir)
    )
    build_conf.logger.log("info", "running kaldi script to extract state alignment")
    os.system(com)
    # extract the word alignment
    com = (
        "cd %s/output; linear-to-nbest \"ark:gunzip -c kaldidelta_quin_output/ali.1.gz|\" \"ark:utils/sym2int.pl --map-oov 1669 -f 2- data/lang/words.txt < data/train/text |\" '' '' ark:- | lattice-align-words data/lang/phones/word_boundary.int kaldidelta_quin_output/35.mdl ark:- ark:- | nbest-to-ctm --frame-shift=%f --precision=3 ark:- - | utils/int2sym.pl -f 5 data/lang/words.txt > wrdalign.dat"
        % (build_conf.outdir, FRAMESHIFT)
    )
    build_conf.logger.log("info", "running kaldi scripts to extract word alignment")
    os.system(com)
    # get actual duration times of all wav files
    build_conf.logger.log("info", "Collecting wav file durations")
    wavdurations = get_wav_durations(
        build_conf.kaldidir, os.path.join(build_conf.outdir, "output", "data", "train", "wav.scp")
    )
    # write alignment as files that are readbale by wavesurfer etc for checking
    build_conf.logger.log("info", "Writing lab and wrd files")
    labdir = os.path.join(build_conf.outdir, "output", "labs")
    if not os.path.isdir(labdir):
        os.mkdir(labdir)
    write_as_labs(os.path.join(build_conf.outdir, "output", "align.dat"), FRAMESHIFT, wavdurations, labdir)
    wrddir = os.path.join(build_conf.outdir, "output", "wrds")
    if not os.path.isdir(wrddir):
        os.mkdir(wrddir)
    write_as_wrdlabs(os.path.join(build_conf.outdir, "output", "wrdalign.dat"), wavdurations, labdir, wrddir)
    statedir = os.path.join(build_conf.outdir, "output", "stts")
    if not os.path.isdir(statedir):
        os.mkdir(statedir)
    write_as_statelabs(
        os.path.join(build_conf.outdir, "output", "sttalign.dat"), FRAMESHIFT, NOSTATES, wavdurations, labdir, statedir
    )
    # write alignment based xml text file
    write_xml_textalign(breaktype, breakdef, labdir, wrddir, os.path.join(build_conf.outdir, "output", "text.xml"))
    # END OF MODULE SPECIFIC CODE

    build_conf.end_processing(SCRIPT_NAME)
Ejemplo n.º 8
0
def main():
    # process the options based on the default build configuration
    build_conf, parser = build_configuration.get_config(SCRIPT_NAME, DESCRIPTION, SCRIPT_NAME)
    # parse commamd line
    if __name__ == "__main__":
        opts, args = parser.parse_args()
        # and load custom configurations
        if opts.bldconf:
            build_conf.parse(opts.bldconf)
        if opts.spkconf:
            build_conf.parse(opts.spkconf)
        else:
            parser.error("Speaker configuration is required e.g. speaker_conf/bdl.xml")

        build_conf.updatefromopts(opts)
    # set up logging, check idlak-scratch, check dependencies and build as required
    build_conf.set_build_environment(SCRIPT_NAME)

    # MODULE SPECIFIC CODE
    # get required input files from idlak-data
    # get required directories from dependent modules
    kaldisrcdir = os.path.join(build_conf.kaldidir, "src")
    # examine general settings and set as appropriate
    sptk_root = build_conf.getval("mcep_def", "sptk_root")

    if not os.path.isdir(sptk_root):
        build_conf.logger.log("error", "Supplied sptk_root location %s does not exist!" % (sptk_root))
        raise IOError("Supplied sptk_root location %s does not exist!" % (sptk_root))

    # process data
    wavdir = os.path.join(build_conf.idlakwav, build_conf.lang, build_conf.acc, build_conf.spk, build_conf.srate)

    valid_ids = load_input_wavs(wavdir, build_conf.flist)

    sptk_bin_root = os.path.join(sptk_root, "bin")

    for wavfile in valid_ids:
        window_length = 400
        frame_shift = 80
        # all-pass constant
        alpha = 0.42
        # order of mel-generalised cepstrum
        order = 12

        # Strips headers from RIFF wav file.
        wavdata_com = "%s/featbin/wav-data %s/%s.wav" % (kaldisrcdir, wavdir, wavfile)
        # Converts data from short to float (+sf).
        x2x_com = "%s/x2x/x2x +sf" % (sptk_bin_root)
        frame_com = "%s/frame/frame -l %s -p %s" % (sptk_bin_root, window_length, frame_shift)
        # '-L 512' is the output frame length.
        # '-w 1' refers to the usage of a Hamming window.
        # '-n 1' is sigma(n=0,L-1)(w2(n)=1) normalisation.
        window_com = "%s/window/window -l %s -L 512 -w 1 -n 1" % (sptk_bin_root, window_length)
        # '-e 0.001' is a small value added to periodogram
        # '-l 512' is frame length.
        mcep_com = "%s/mcep/mcep -a %s -e 0.001 -m %s -l 512" % (sptk_bin_root, alpha, order)

        com = "%s | %s | %s | %s | %s | %s/x2x/x2x +fa" % (
            wavdata_com,
            x2x_com,
            frame_com,
            window_com,
            mcep_com,
            sptk_bin_root,
        )
        build_conf.logger.log("info", com)
        com_output = os.system(com)

    # END OF MODULE SPECIFIC CODE

    build_conf.end_processing(SCRIPT_NAME)
Ejemplo n.º 9
0
def main():
    # process the options based on the default build configuration
    build_conf, parser = build_configuration.get_config(SCRIPT_NAME, DESCRIPTION, SCRIPT_NAME)
    #print 'SEQ', build_conf.dataseq
    # parse commamd line
    if __name__ == '__main__':
        opts, args = parser.parse_args()
        # and load custom configurations
        if opts.bldconf:
            build_conf.parse(opts.bldconf)
        if opts.spkconf:
            build_conf.parse(opts.spkconf)
        else:
            parser.error("Speaker configuration is required e.g. speaker_conf/bdl.xml")
            
        build_conf.updatefromopts(opts)
    # set up logging, check idlak-scratch, check dependencies and build as required
    build_conf.set_build_environment(SCRIPT_NAME)

    # ADD MODULE SPECIFIC CODE HERE
    # get required input files from idlak-data
    tpdbdir = os.path.join(build_conf.idlakdata, build_conf.lang, build_conf.acc)
    qset =  os.path.join(build_conf.idlakdata, build_conf.lang, build_conf.acc, "qset-default.xml")
    outdir = build_conf.outdir
    # get audio directory
    wavdir = os.path.join(build_conf.idlakwav, build_conf.lang, build_conf.acc,
                          build_conf.spk, build_conf.srate)
    if not os.path.isabs(wavdir):
        wavdir = os.path.realpath(os.path.join(os.path.curdir, wavdir))
    # get required directories from dependent modules
    aligndir = build_conf.get_input_dir('align_def')
    # Check to see if we generate HTS style context models as well
    hts = build_conf.getval('cex_def', 'hts')
    # examine modulespecific settings and set as appropriate
    # process data
    # get path to txpbin
    pathlist = [os.path.join(build_conf.kaldidir, 'src', 'idlaktxpbin')]
    os.environ["PATH"] += os.pathsep + os.pathsep.join(pathlist)
    # Process script through txp and cex
    output_filename = os.path.join(outdir, 'output', 'cex.xml')
    cmd = "idlaktxp --pretty --tpdb=%s %s - | " % (tpdbdir, os.path.join(aligndir, "text.xml")) + \
        "idlakcex --pretty --tpdb=%s - %s" % (tpdbdir, output_filename)
    os.system(cmd)
    # read in the cex xml output and generate kaldi files for tree building
    dom = parse(output_filename)
    cexs, output_contexts, freqtables, cexheader = output_kaldicex(build_conf.logger, dom, outdir)
    # write out script to split original wavs into spts if required (i.e for HTS test)
    phon_labs = write_spt_times(build_conf.logger,
                                dom,
                                os.path.join(aligndir, 'labs'),
                                os.path.join(outdir, 'output', 'spt_times.dat'))    
    # generate HTS style context model names
    cexheaderhts = None
    if hts == "True":
        output_filename = os.path.join(outdir, 'output', 'cex_hts.xml')
        cmd = "idlaktxp --pretty --tpdb=%s %s - | " % (tpdbdir, os.path.join(aligndir, "text.xml")) + \
            "idlakcex --pretty --cex-arch=hts --tpdb=%s - %s" % (tpdbdir, output_filename)
        os.system(cmd)
        dom = parse(output_filename)
        filecontexts, cexheaderhts = output_htscex(build_conf.logger, dom, outdir, phon_labs)
        htsqset = os.path.join(outdir, 'output', 'questions-kaldi-%s-%s.hed' % (build_conf.lang, build_conf.acc))
        write_htsqset(build_conf.logger, qset, htsqset, cexheaderhts)

    # # write frequency tables of contexts for audit purposes
    # for ftable in freqtables.keys():
    #     fp = open(os.path.join(outdir, 'output', ftable + '_freq.txt'), 'w')
    #     vals = freqtables[ftable].keys()
    #     vals.sort()
    #     for v in vals:
    #         fp.write("%s %d\n" % (v, freqtables[ftable][v]))
    #     fp.close()
        
    # create lookup tables if required
    lookuptables = {}
    for i in range(len(cexs)):
        key = 'cex' + ('000' + str(i))[-3:]
        vals = freqtables[key].keys()
        vals.sort()
        for v in vals:
            if not re.match('[0-9]+', v):
                # found a non integer value create a lookup table
                lookuptables[key] = {}
                mapping = 1
                for v in vals:
                    if v == '0':
                        lookuptables[key][v] = 0
                    else:
                        lookuptables[key][v] = mapping
                        mapping += 1
                break
            
    # # output lookup tables
    # for table in lookuptables.keys():
    #     fp = open(os.path.join(outdir, 'output', table + '_lkp.txt'), 'w')
    #     vals = lookuptables[table].keys()
    #     vals.sort()
    #     for v in vals:
    #         fp.write("%s %d\n" % (v, lookuptables[table][v]))
    #     fp.close()

    # Output the context information used in an XMl readable form.
    # incudes frequency/lookup tables field names etc.
    write_kaldi_context_setup(cexheader, cexheaderhts, freqtables, lookuptables, outdir)
            
    # write kaldi style archive replacing symbols with lookup
    output_filename = os.path.join(outdir, 'output', 'cex.ark')
    fp = open(output_filename, 'w')
    for f in output_contexts:
        key = f[0]
        fp.write(key + ' ')
        for p in f[1]:
            for i, v in enumerate(p):
                # replace symbols with integers
                table = 'cex' + ('000' + str(i))[-3:]
                if lookuptables.has_key(table):
                    v = str(lookuptables[table][v])
                fp.write(v + ' ')
            fp.write('; ')
        fp.write('\n')
    fp.close()
    
    kaldiqset = os.path.join(outdir, 'output', 'qset.dat')
    write_kaldiqset(build_conf.logger, qset, kaldiqset, cexheader, lookuptables)
    print filecontexts
    # END OF MODULE SPECIFIC CODE
    
    build_conf.end_processing(SCRIPT_NAME)