sHardVoteFile = os.path.join(sMajorVoteDir, 'hard.csv') sSoftVoteFile = os.path.join(sMajorVoteDir, 'soft.csv') if os.path.exists(sHardVoteFile) and os.path.exists(sSoftVoteFile): majorVotesHard = np.genfromtxt(sHardVoteFile, delimiter=',') majorVotesSoft = np.genfromtxt(sSoftVoteFile, delimiter=',') else: (majorVotesHard, majorVotesSoft) = computeMajorVotes(sHardVoteFile, sSoftVoteFile, sResultDir) mProb = majorVotesSoft[:, 1:].copy() print mProb.sum(1) assert np.all(abs(mProb.sum(1) - config.CLASSIFIERS) < 1E-2) mProb /= mProb.sum(1)[:, None] kaldiOut = KaldiWriteOut(output_scp, output_ark) kaldiOut.open() rIdx = 0 for i, uId in enumerate(uttIDs): kaldiOut.write(uId, mProb[rIdx:(rIdx + uttLens[i]), :]) rIdx += uttLens[i] kaldiOut.close() # write dummy weights (identity matrix) with open(sWeightFile, 'wb') as fOut: w = np.eye(mProb.shape[1]) (sz1, sz2) = w.shape fOut.write('<affinetransform> %d %d\n[\n' % (sz2, sz1)) for c in xrange(0, sz2): fOut.write(' '.join([str(w[r, c]) for r in xrange(0, sz1)]) + '\n') fOut.write(']\n')
# run majorityVote, compute "probabilities" sHardVoteFile = os.path.join(sMajorVoteDir, 'hard.csv') sSoftVoteFile = os.path.join(sMajorVoteDir, 'soft.csv') if os.path.exists(sHardVoteFile) and os.path.exists(sSoftVoteFile): majorVotesHard = np.genfromtxt(sHardVoteFile, delimiter=',') majorVotesSoft = np.genfromtxt(sSoftVoteFile, delimiter=',') else: (majorVotesHard, majorVotesSoft) = computeMajorVotes(sHardVoteFile, sSoftVoteFile, sResultDir) mProb = majorVotesSoft[:, 1:].copy() print mProb.sum(1) assert np.all(abs(mProb.sum(1) - config.CLASSIFIERS) < 1E-2) mProb /= mProb.sum(1)[:, None] kaldiOut = KaldiWriteOut(output_scp, output_ark) kaldiOut.open() rIdx = 0 for i, uId in enumerate(uttIDs): kaldiOut.write(uId, mProb[rIdx:(rIdx + uttLens[i]), :]) rIdx += uttLens[i] kaldiOut.close() # write dummy weights (identity matrix) with open(sWeightFile, 'wb') as fOut: w = np.eye(mProb.shape[1]) (sz1, sz2) = w.shape fOut.write('<affinetransform> %d %d\n[\n' % (sz2, sz1)) for c in xrange(0, sz2): fOut.write(' '.join([str(w[r, c]) for r in xrange(0, sz1)]) + '\n') fOut.write(']\n')
sModelFile = arguments['model_file'] sDeeplearnPath = arguments['deeplearn_path'] # paths for output files output_scp = output_file_prefix + '.scp' output_ark = output_file_prefix + '.ark' removeFile(output_scp) removeFile(output_ark) sDataDir = os.path.join(wdir, 'data') if not os.path.exists(sDataDir): os.mkdir(sDataDir) kaldiIn = KaldiReadIn(ark_file) kaldiIn.open() kaldiOut = KaldiWriteOut(output_scp,output_ark) kaldiOut.open() uttIDBatch = [] uttIDLength = [] featMatBatch = None batchSz = -1 uttID, featMat = kaldiIn.next() while featMat is not None: if batchSz < 0: batchSz = 300*1024*1024 / (4*featMat.shape[1]) if featMatBatch is None: featMatBatch = featMat else: featMatBatch = np.vstack([featMatBatch, featMat]) uttIDBatch.append(uttID)
sModelFile = arguments['model_file'] sDeeplearnPath = arguments['deeplearn_path'] # paths for output files output_scp = output_file_prefix + '.scp' output_ark = output_file_prefix + '.ark' removeFile(output_scp) removeFile(output_ark) sDataDir = os.path.join(wdir, 'data') if not os.path.exists(sDataDir): os.mkdir(sDataDir) kaldiIn = KaldiReadIn(ark_file) kaldiIn.open() kaldiOut = KaldiWriteOut(output_scp, output_ark) kaldiOut.open() uttIDBatch = [] uttIDLength = [] featMatBatch = None batchSz = -1 uttID, featMat = kaldiIn.next() while featMat is not None: if batchSz < 0: batchSz = 300 * 1024 * 1024 / (4 * featMat.shape[1]) if featMatBatch is None: featMatBatch = featMat else: featMatBatch = np.vstack([featMatBatch, featMat]) uttIDBatch.append(uttID)