Exemple #1
0
def train_language_model(target, source, env):
    """Train an n-gram language model using a plain text transcript.

    Uses IBM's compiled LM tools that ship with Attila.  This can also be used on a segmented transcript,
    in which case the n-grams are over morphs rather than words.

    Sources: transcript file, n
    Targets: language model file
    """
    text_file = source[0].rstr()
    n = source[1].read()
    with temp_dir() as prefix_dir, temp_file() as vocab_file, temp_file(suffix=".txt") as sentence_file, meta_open(text_file) as text_fd:
        sentences = ["<s> %s </s>" % (l) for l in text_fd]
        words =  set(sum([s.split() for s in sentences], []) + ["<s>", "</s>", "<UNK>"])
        with meta_open(vocab_file, "w") as ofd:
            ofd.write("\n".join(words))
        with meta_open(sentence_file, "w") as ofd:
            ofd.write("\n".join(sentences))
        prefix = os.path.join(prefix_dir, "counts")
        cmd = "${ATTILA_PATH}/tools/lm_64/CountNGram -n %d %s %s %s" % (n, sentence_file, vocab_file, prefix)
        out, err, success = run_command(env.subst(cmd))
        if not success:
            return err
        
        lm = ".".join(target[0].rstr().split(".")[0:-2])
        cmd = "${ATTILA_PATH}/tools/lm_64/BuildNGram.sh -n %d -arpabo %s %s" % (n, prefix, lm)
        out, err, success = run_command(env.subst(cmd), env={"SFCLMTOOLS" : env.subst("${ATTILA_PATH}/tools/lm_64")})
        if not success:
            return err
        
    return None
Exemple #2
0
def _run_emma(target, source, env):
    with temp_file() as gold, temp_file() as guess, meta_open(source[0].rstr()) as _guess, meta_open(source[1].rstr()) as _gold:
        guesses = [x for x in _guess]
        words = [x.split()[0] for x in guesses]
        keep = set([x for x in guesses if re.match(r"^\w+$", x) and not re.match(r".*\d.*", x)])
        with meta_open(gold, "w") as gold_fd:
            gold_fd.write("\n".join([x for x in _gold if x.split()[0] in keep]))
        with meta_open(guess, "w") as guess_fd:
            guess_fd.write("\n".join([x for x in guesses if x.split()[0] in keep]))            
        cmd = env.subst("python ${EMMA} -g %s -p %s -L ${LPSOLVE_PATH}" % (guess, gold), source=source, target=target)
        pid = Popen(cmd.split(), stdout=PIPE)
        #out, err = pid.communicate()
        #prec, rec, fscore = [float(x.strip().split()[-1]) for x in out.strip().split("\n")[-3:]]
    with meta_open(target[0].rstr(), "w") as ofd:
        pass
        #ofd.write("\t".join(["MorphP", "MorphR", "MorphF"]) + "\n")
        #ofd.write("\t".join(["%.3f" % x for x in [prec, rec, fscore]]) + "\n")
    return None
def run_g2p(target, source, env):
    with temp_file() as tfname, meta_open(source[0].rstr()) as pl_fd:
        words = set([x.split()[0].split("(")[0] for x in pl_fd])
        with meta_open(tfname, "w") as t_fd:
            t_fd.write("\n".join(words))
        out, err, success = run_command(env.subst("%s %s/bin/g2p.py --model %s --encoding=%s --apply %s --variants-mass=%f  --variants-number=%d" % (env["PYTHON"], env["OVERLAY"], source[1].rstr(), "utf-8", tfname, .9, 4)),
                                        env={"PYTHONPATH" : env.subst("${OVERLAY}/lib/python2.7/site-packages")},
                                        )
        if not success:
            return err
        else:
            with meta_open(target[0].rstr(), "w") as out_fd:
                out_fd.write(out)
    return None
Exemple #4
0
def decode(target, source, env):
    """Decode some audio using a decoding network and some models (based on the example pipelines from IBM).

    This is the heart, and by far the most complicated and error-prone part, of the pipeline.  Basically,
    the models IBM sent us are similar, but have small variations so that some need to be run
    differently.  This builder tries to figure out what to do based on what model files exist, and
    then run the appropriate code.  If it can't figure out what to run, it throws an error.  It is also 
    aware of how many jobs the experiment has been split into, and only runs the job it was told to.  Most
    of the code was just slightly-adapted from the cfg.py, construct.py, and test.py files in the acoustic 
    models IBM sent us.

    Sources: decoding network file, vocabulary file, pronunciation file, language model file
    Targets: ctm transcript file, consensus network file
    """
    dnet, vocabulary, pronunciations, language_model = source
    out_path, tail = os.path.split(os.path.dirname(target[0].rstr()))
    env.Replace(VOCABULARY_FILE=vocabulary.rstr(),
                PRONUNCIATIONS_FILE=pronunciations.rstr(),
                LANGUAGE_MODEL_FILE=language_model,
                NETWORK_FILE=dnet,
    )
        
    cfg = CFG(env)
    postThresh = 1e-04

    mlpFile = env.maybe(env.subst("${MLP_FILE}"))
    melFile = env.maybe(env.subst("${MEL_FILE}"))
    warpFile = env.maybe(env.subst("${WARP_FILE}"))
    ldaFile = env.maybe(env.subst("${LDA_FILE}"))
    priorsFile = env.maybe(env.subst("${PRIORS_FILE}"))

    mlp = os.path.exists(cfg.mlpFile) and "weights.mlp" in mlpFile
    nmlp = os.path.exists(cfg.mlpFile) and "weights.mlp" not in mlpFile
    layer = os.path.exists(env.subst("${MODEL_PATH}/layer0"))


    db = dbase.DB(dirFn=dbase.getFlatDir)

    fe = FeCombo(db, int(env["SAMPLING_RATE"]), env["FEATURE_TYPE"])
    fe.end            = fe.fmllr
    fe.pcm.pcmDir     = cfg.pcmDir
    fe.pcm.readMode   = 'speaker'
    fe.norm.normMode  = 1
    fe.norm.normDir   = env.subst("${CMS_PATH}")
    fe.fmllr.fmllrDir = env.subst("${FMLLR_PATH}")
    
    #
    # from test.py
    #
    jid    = int(env["JOB_ID"])
    jnr    = int(env["JOB_COUNT"])
    genLat = True
    genCons = True
    writeLat = False
    writeCons = True
    cfg.useDispatcher = False
    if nmlp:
        chunkSize = 10
    else:
        chunkSize = 5
    acweight = float(env.subst("${ACOUSTIC_WEIGHT}"))
    db.init(cfg.dbFile, 'utterance', False, jid, jnr, chunkSize=chunkSize)

    fe.mel.readFilter(melFile)
    fe.mel.readWarp(warpFile)
    fe.lda.readLDA(ldaFile)    

    se = dsearch.Decoder(speed=12, scale=acweight, lmType=32, genLat=genLat)
        
    se.initGraph(cfg)
    se.latBeam  = 7
    se.linkMax  = 700

    if mlp:
        fe.ctx2           = frontend.FeCTX([fe.fmllr])
        fe.ctx2.spliceN   = 4
        fe.ctx2.db        = db
        fe.mlp.depL       = [fe.ctx2]
        fe.mlp.db         = db
        fe.end            = fe.mlp
        fe.mlp.mlp.read(mlpFile)

        fe.mlp.mlp.layerL[0].afct = Act_Rectified()
        fe.mlp.mlp.layerL[1].afct = Act_Rectified()
        fe.mlp.mlp.layerL[2].afct = Act_Rectified()
        fe.mlp.mlp.layerL[3].afct = Act_Sigmoid()
        fe.mlp.mlp.layerL[4].afct = Act_ID()

        se.sc = NNScorer()
        se.dnet.scorer = se.sc
        se.sc.scale    = acweight
        se.sc.feat     = fe.end.feat
        se.sc.logInput = True
        se.sc.readPriors(priorsFile)
        
        pass
    elif layer:
        fe.ctx2           = frontend.FeCTX([fe.fmllr])
        fe.ctx2.spliceN   = 4
        fe.ctx2.db        = db
        fe.end            = fe.ctx2
        layerL = []
        for i in range(6):
            l = nnet.LayerWeights()
            l.name = 'layer%d'%i
            l.isTrainable = False
            l.initWeightFile = env.subst('${MODEL_PATH}/layer%d') % i
            layerL.append(l)
            if i < 5:
                l = nnet.LayerSigmoid()
                l.name = 'layer%d-nonl' % i
                layerL.append(l)
        layerL[-1].matrixOut = True
        nn = nnet.NeuralNet(layerL=layerL, depL=[fe.end])
        nn.db = db
        nn.configure()

        se.sc = NNScorer()
        se.dnet.scorer = se.sc
        se.sc.scale    = acweight
        se.sc.feat     = nn.feat
        se.sc.logInput = True
        se.sc.readPriors(priorsFile)
    elif nmlp:
        se.initAM(cfg)    
        mlp      = fe.mlp.mlp
        mlp.feat = MatrixCU()
        sigmoid  = Act_Sigmoid()
        tanh     = Act_Tanh()
        actid    = Act_ID()
        softmax  = Act_Softmax()
        softmax.logOutput = True

        mlp.read(mlpFile)
        for layerX in range(mlp.layerL.size()):
            mlp.layerL[layerX].afct = sigmoid
        mlp.layerL[-1].afct = actid

        se.sc = NNScorer()
        se.dnet.scorer = se.sc
        se.sc.scale    = acweight
        se.sc.logInput = True
        se.sc.feat     = mlp.layerL[-1].Y.mat
        se.sc.readPriors(priorsFile)

        se.latBeam  = 6.5
        se.linkMax  = 700

        binThresh         = 1.0e-10
        writeSIL          = 0

        totUtt    = 0
        totArc    = 0
        totNonSil = 0
        totDur    = 0.0
        totDens   = 0.0
    else:
        return "Don't know how to run ASR with these models!"

    with meta_open(target[0].rstr(), "w") as ctm_ofd, tarfile.open(target[1].rstr(), "w|gz") as tf_ofd, temp_file() as temp_fname:
        for utt in db:
            key    = utt + ' ' + os.path.splitext(db.getFile(utt))[0]
            if mlp or nmlp:
                fe.end.eval(utt)
            else:
                nn.eval(utt)
            se.search()
            txt    = se.getHyp().strip()
            hyp    = se.getCTM(key, db.getFrom(utt))
            tscore = se.getScore()
            for c in hyp:
                ctm_ofd.write("%s\n" % (c))
            se.rescore(env["RESCORE_BEAM"])
            with meta_open(temp_fname, "w") as ofd:
                pass
            if writeLat:
                fname = "%s.fsm" % (utt)
                se.lat.write(temp_fname, db.getFrom(utt))
            elif writeCons:
                fname = "%s.cons" % (utt)
                arcN = len(se.lat.arcs)
                durS = db.getTo(utt)- db.getFrom(utt)
                dens = arcN / durS
                se.consensus(postThresh)
                binThresh         = 1.0e-10
                writeSIL          = 0
                se.cons.write(temp_fname, db.getFrom(utt), binThresh, writeSIL)
            tf_ofd.add(temp_fname, arcname=fname)
        tf_ofd.close()
    return None