Esempio n. 1
0
    sHardVoteFile = os.path.join(sMajorVoteDir, 'hard.csv')
    sSoftVoteFile = os.path.join(sMajorVoteDir, 'soft.csv')
    if os.path.exists(sHardVoteFile) and os.path.exists(sSoftVoteFile):
        majorVotesHard = np.genfromtxt(sHardVoteFile, delimiter=',')
        majorVotesSoft = np.genfromtxt(sSoftVoteFile, delimiter=',')
    else:
        (majorVotesHard,
         majorVotesSoft) = computeMajorVotes(sHardVoteFile, sSoftVoteFile,
                                             sResultDir)

    mProb = majorVotesSoft[:, 1:].copy()
    print mProb.sum(1)
    assert np.all(abs(mProb.sum(1) - config.CLASSIFIERS) < 1E-2)
    mProb /= mProb.sum(1)[:, None]

    kaldiOut = KaldiWriteOut(output_scp, output_ark)
    kaldiOut.open()
    rIdx = 0
    for i, uId in enumerate(uttIDs):
        kaldiOut.write(uId, mProb[rIdx:(rIdx + uttLens[i]), :])
        rIdx += uttLens[i]
    kaldiOut.close()

    # write dummy weights (identity matrix)
    with open(sWeightFile, 'wb') as fOut:
        w = np.eye(mProb.shape[1])
        (sz1, sz2) = w.shape
        fOut.write('<affinetransform> %d %d\n[\n' % (sz2, sz1))
        for c in xrange(0, sz2):
            fOut.write(' '.join([str(w[r, c]) for r in xrange(0, sz1)]) + '\n')
        fOut.write(']\n')
    
    # run majorityVote, compute "probabilities"
    sHardVoteFile = os.path.join(sMajorVoteDir, 'hard.csv')
    sSoftVoteFile = os.path.join(sMajorVoteDir, 'soft.csv')
    if os.path.exists(sHardVoteFile) and os.path.exists(sSoftVoteFile):
        majorVotesHard = np.genfromtxt(sHardVoteFile, delimiter=',')
        majorVotesSoft = np.genfromtxt(sSoftVoteFile, delimiter=',')
    else:
        (majorVotesHard, majorVotesSoft) = computeMajorVotes(sHardVoteFile, sSoftVoteFile, sResultDir)
    
    mProb = majorVotesSoft[:, 1:].copy()
    print mProb.sum(1)
    assert np.all(abs(mProb.sum(1) - config.CLASSIFIERS) < 1E-2)
    mProb /= mProb.sum(1)[:, None]

    kaldiOut = KaldiWriteOut(output_scp, output_ark)
    kaldiOut.open()
    rIdx = 0
    for i, uId in enumerate(uttIDs):
        kaldiOut.write(uId, mProb[rIdx:(rIdx + uttLens[i]), :])
        rIdx += uttLens[i]
    kaldiOut.close()
    
    # write dummy weights (identity matrix)
    with open(sWeightFile, 'wb') as fOut:
        w = np.eye(mProb.shape[1])
        (sz1, sz2) = w.shape
        fOut.write('<affinetransform> %d %d\n[\n' % (sz2, sz1))
        for c in xrange(0, sz2):
            fOut.write(' '.join([str(w[r, c]) for r in xrange(0, sz1)]) + '\n')
        fOut.write(']\n')
Esempio n. 3
0
 sModelFile = arguments['model_file']
 sDeeplearnPath = arguments['deeplearn_path']
 
 # paths for output files
 output_scp = output_file_prefix + '.scp'
 output_ark = output_file_prefix + '.ark'
 removeFile(output_scp)
 removeFile(output_ark)
 
 sDataDir = os.path.join(wdir, 'data')
 if not os.path.exists(sDataDir):
     os.mkdir(sDataDir)
 
 kaldiIn = KaldiReadIn(ark_file)
 kaldiIn.open()
 kaldiOut = KaldiWriteOut(output_scp,output_ark)
 kaldiOut.open()
 uttIDBatch = []
 uttIDLength = []
 featMatBatch = None
 batchSz = -1
 uttID, featMat = kaldiIn.next()
 while featMat is not None:
     if batchSz < 0:
         batchSz = 300*1024*1024 / (4*featMat.shape[1])
         
     if featMatBatch is None:
         featMatBatch = featMat
     else:
         featMatBatch = np.vstack([featMatBatch, featMat])
     uttIDBatch.append(uttID)
Esempio n. 4
0
    sModelFile = arguments['model_file']
    sDeeplearnPath = arguments['deeplearn_path']

    # paths for output files
    output_scp = output_file_prefix + '.scp'
    output_ark = output_file_prefix + '.ark'
    removeFile(output_scp)
    removeFile(output_ark)

    sDataDir = os.path.join(wdir, 'data')
    if not os.path.exists(sDataDir):
        os.mkdir(sDataDir)

    kaldiIn = KaldiReadIn(ark_file)
    kaldiIn.open()
    kaldiOut = KaldiWriteOut(output_scp, output_ark)
    kaldiOut.open()
    uttIDBatch = []
    uttIDLength = []
    featMatBatch = None
    batchSz = -1
    uttID, featMat = kaldiIn.next()
    while featMat is not None:
        if batchSz < 0:
            batchSz = 300 * 1024 * 1024 / (4 * featMat.shape[1])

        if featMatBatch is None:
            featMatBatch = featMat
        else:
            featMatBatch = np.vstack([featMatBatch, featMat])
        uttIDBatch.append(uttID)