def RMSECalcore(file1, file2, dim): data1 = funcs.read_raw_mat(file1, dim) data2 = funcs.read_raw_mat(file2, dim) # check the data length if np.abs(data1.shape[0] - data2.shape[0]) * 2.0 / (data1.shape[0] + data2.shape[0]) > 0.2: print "Warning: length mis-match: %s %d, %s %d" % (file1, data1.shape[0], file2, data2.shape[0]) # slightly change the length of data if data1.shape[0]>data2.shape[0]: if dim==1: data1 = data1[0:data2.shape[0]] else: data1 = data1[0:data2.shape[0],:] elif data1.shape[0]<data2.shape[0]: if dim==1: data2 = data2[0:data1.shape[0]] else: data2 = data2[0:data1.shape[0],:] if dim==1: # This is F0 diff = np.zeros([data1.shape[0], 3]) temp1 = data1 > 0 temp2 = data2 > 0 # all voiced time steps indp = (temp1 * temp2) # u/v different time steps indn = np.bitwise_xor(temp1, temp2) # number of voiced frames voiceFrame = sum(indp) if voiceFrame>0: data1 = F0Transform(data1[indp]) data2 = F0Transform(data2[indp]) diff[indp,0] = data1-data2 diff[indn,1] = 1 diff[indp,2] = 1 rmse = diff*diff #corr = scipy.stats.pearsonr(data1,data2) corr = scipy.stats.spearmanr(data1, data2) else: corr = [np.nan, 0] rmse = diff * np.nan else: diff = data1 - data2 rmse = diff*diff voiceFrame = data1.shape[0] corr = -10 return rmse, data1.shape[0], corr
def RMSECalcore(file1, file2, dim): data1 = funcs.read_raw_mat(file1, dim) data2 = funcs.read_raw_mat(file2, dim) # check the data length if np.abs(data1.shape[0] - data2.shape[0]) * 2.0 / (data1.shape[0] + data2.shape[0]) > 0.2: print("Warning: length mis-match: %s %d, %s %d" % (file1, data1.shape[0], file2, data2.shape[0])) # slightly change the length of data if data1.shape[0] > data2.shape[0]: if dim == 1: data1 = data1[0:data2.shape[0]] else: data1 = data1[0:data2.shape[0], :] elif data1.shape[0] < data2.shape[0]: if dim == 1: data2 = data2[0:data1.shape[0]] else: data2 = data2[0:data1.shape[0], :] if dim == 1: # This is F0 diff = np.zeros([data1.shape[0], 3]) temp1 = data1 > 0 temp2 = data2 > 0 # all voiced time steps indp = (temp1 * temp2) # u/v different time steps indn = np.bitwise_xor(temp1, temp2) # number of voiced frames voiceFrame = sum(indp) if voiceFrame > 0: data1 = F0Transform(data1[indp]) data2 = F0Transform(data2[indp]) diff[indp, 0] = data1 - data2 diff[indn, 1] = 1 diff[indp, 2] = 1 rmse = diff * diff #corr = scipy.stats.pearsonr(data1,data2) corr = scipy.stats.spearmanr(data1, data2) else: corr = [np.nan, 0] rmse = diff * np.nan else: diff = data1 - data2 rmse = diff * diff voiceFrame = data1.shape[0] corr = -10 return rmse, data1.shape[0], corr
def showRMSE(dim, rmseFile): if dim == 1: # F0 data = funcs.read_raw_mat(rmseFile, 3) print "RMSE: %f\tCor: %f\t VU:%f\t" % (data[-1, 0], data[-1, 2], data[-1, 1]), else: # MGC data = funcs.read_raw_mat(rmseFile, dim + 1) print "RMSE: %f\t" % (data[-1, -1]),
def showRMSE(dim, rmseFile): if dim == 1: # F0 data = funcs.read_raw_mat(rmseFile, 3) print "RMSE: %f\tCor: %f\t VU:%f\t" % ( data[-1,0], data[-1,2], data[-1,1]), else: # MGC data = funcs.read_raw_mat(rmseFile, dim+1) print "RMSE: %f\t" % (data[-1,-1]),
def RMSECalcore(file1, file2, dim): data1 = funcs.read_raw_mat(file1, dim) data2 = funcs.read_raw_mat(file2, dim) if data1.shape[0]>data2.shape[0]: if dim==1: data1 = data1[0:data2.shape[0]] else: data1 = data1[0:data2.shape[0],:] #else: # assert 1==0, "Unknown dimension" elif data1.shape[0]<data2.shape[0]: if dim==1: data2 = data2[0:data1.shape[0]] else: data2 = data2[0:data1.shape[0],:] #assert 1==0, "Unknown dimension" #if data1.ndim==1: # data1 = data1.reshape([data1.shape[0],1]) # data2 = data2.reshape([data2.shape[0],1]) if dim==1: # This is F0 diff = np.zeros([data1.shape[0], 3]) temp1 = data1>0 temp2 = data2>0 indp = (temp1 *temp2) # all voiced indn = (temp1 - temp2) # u/v different voiceFrame = sum(indp) if voiceFrame>0: data1 = F0Transform(data1[indp]) data2 = F0Transform(data2[indp]) diff[indp,0] = data1-data2 # diff[indn,1] = 1 # diff[indp,2] = 1 pow2 = diff*diff corr = scipy.stats.pearsonr(data1,data2) else: corr = [np.nan,0] pow2 = diff*np.nan else: diff = data1 - data2 pow2 = diff*diff voiceFrame = data1.shape[0] corr = -10 return pow2, data1.shape[0], corr
def RMSECalcore(file1, file2, dim): data1 = funcs.read_raw_mat(file1, dim) data2 = funcs.read_raw_mat(file2, dim) if data1.shape[0]>data2.shape[0]: if dim==1: data1 = data1[0:data2.shape[0]] else: data1 = data1[0:data2.shape[0],:] #else: # assert 1==0, "Unknown dimension" elif data1.shape[0]<data2.shape[0]: if dim==1: data2 = data2[0:data1.shape[0]] else: data2 = data2[0:data1.shape[0],:] #assert 1==0, "Unknown dimension" #if data1.ndim==1: # data1 = data1.reshape([data1.shape[0],1]) # data2 = data2.reshape([data2.shape[0],1]) if dim==1: # This is F0 diff = np.zeros([data1.shape[0], 3]) temp1 = data1>0 temp2 = data2>0 indp = (temp1 *temp2) # all voiced indn = (temp1 - temp2) # u/v different voiceFrame = sum(indp) if voiceFrame>0: data1 = F0Transform(data1[indp]) data2 = F0Transform(data2[indp]) diff[indp,0] = data1-data2 # diff[indn,1] = 1 # diff[indp,2] = 1 pow2 = diff*diff corr = scipy.stats.pearsonr(data1,data2) else: corr = [np.nan,0] pow2 = diff*np.nan else: diff = data1 - data2 pow2 = diff*diff voiceFrame = data1.shape[0] corr = -10 return pow2, data1.shape[0], corr
def f0Conversion(dataOut, outname): """ Convert the discrete F0 into continuous F0, if the data is lf0 """ fileDir = os.path.dirname(outname) fileName = os.path.basename(outname) fileBase, fileExt = os.path.splitext(fileName) if fileExt == '.qf0' or fileExt == '.lf0': # for F0 f0Max, f0Min, f0Levels, f0Interpolated = cfg.f0Info defaultOutput(dataOut, fileDir + os.path.sep + fileBase + '.qf0') dataOut, vuv = f0funcs.f0Conversion(dataOut, f0Max, f0Min, f0Levels, 'd2c', f0Interpolated) if f0Interpolated: vuvFile = fileDir + os.path.sep + fileBase + '.vuv' if os.path.isfile(vuvFile): vuv = funcs.read_raw_mat(vuvFile, 1) dataOut[vuv<0.5] = 0.0 else: print "Can't find %s for interpolated F0" % (vuvFile) # if the extension is .qf0 (quantized F0) defaultOutput(vuv, fileDir + os.path.sep + fileBase + '.vuv') defaultOutput(dataOut, fileDir + os.path.sep + fileBase + '.lf0') else: # for other data defaultOutput(dataOut, outname)
def f0Conversion(dataOut, outname): """ Convert the discrete F0 into continuous F0, if the data is lf0 """ fileDir = os.path.dirname(outname) fileName = os.path.basename(outname) fileBase, fileExt = os.path.splitext(fileName) if fileExt == '.qf0' or fileExt == '.lf0': # for F0 f0Max, f0Min, f0Levels, f0Interpolated = cfg.f0Info defaultOutput(dataOut, fileDir + os.path.sep + fileBase + '.qf0') dataOut, vuv = f0funcs.f0Conversion(dataOut, f0Max, f0Min, f0Levels, 'd2c', f0Interpolated) if f0Interpolated: vuvFile = fileDir + os.path.sep + fileBase + '.vuv' if os.path.isfile(vuvFile): vuv = funcs.read_raw_mat(vuvFile, 1) dataOut[vuv<0.5] = 0.0 else: print("Can't find %s for interpolated F0" % (vuvFile)) # if the extension is .qf0 (quantized F0) defaultOutput(vuv, fileDir + os.path.sep + fileBase + '.vuv') defaultOutput(dataOut, fileDir + os.path.sep + fileBase + '.lf0') else: # for other data defaultOutput(dataOut, outname)
def generateLabIndex(labfile, outfile, featDim): if os.path.isfile(labfile): labFile = py_rw.read_raw_mat(labfile, featDim) outBuf = np.arange(labFile.shape[0]) py_rw.write_raw_mat(outBuf, outfile) else: print("Not found %s" % (labfile))
def getMeanStd(fileScp, fileDim, stdFloor=0.00001, f0Feature=0): """ Calculate the mean and std from a list of files """ meanBuf = np.zeros([fileDim], dtype=np.float64) stdBuf = np.zeros([fileDim], dtype=np.float64) timeStep = 0 fileNum = sum(1 for line in open(fileScp)) with open(fileScp, 'r') as filePtr: for idx, fileName in enumerate(filePtr): fileName = fileName.rstrip('\n') data = py_rw.read_raw_mat(fileName, fileDim) sys.stdout.write('\r') sys.stdout.write("%d/%d" % (idx, fileNum)) if f0Feature and fileDim == 1: # if this is F0 feature, remove unvoiced region data = data[np.where(data>0)] # parallel algorithm # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance dataCount = data.shape[0] if len(data.shape) == 1: meanNew = data.mean() stdNew = data.var() else: meanNew = data.mean(axis=0) stdNew = data.var(axis=0) deltaMean = meanNew - meanBuf meanBuf = meanBuf + deltaMean * (float(dataCount) / (timeStep + dataCount)) if timeStep == 0: if len(data.shape) == 1: stdBuf[0] = stdNew else: stdBuf = stdNew else: stdBuf = (stdBuf * (float(timeStep) / (timeStep + dataCount)) + stdNew * (float(dataCount)/ (timeStep + dataCount)) + deltaMean * deltaMean / (float(dataCount)/timeStep + float(timeStep)/dataCount + 2.0)) timeStep += data.shape[0] sys.stdout.write('\n') stdBuf = np.sqrt(stdBuf) floorIdx = stdBuf < stdFloor stdBuf[floorIdx] = 1.0 meanBuf = np.asarray(meanBuf, dtype=np.float32) stdBuf = np.asarray(stdBuf, dtype=np.float32) return meanBuf, stdBuf
def compennom(ncFile, mvFile, ncTarget=None, mask=None, reverse=0, flagKeepOri=1, addMV=1, flushT=400000, waitT=5, stdT=0.000001): """ Add the mean/std back to some dimensions mask: 1: compensente it 0: not compensente """ print "compensente %s" % (ncFile) if mask is not None: maskData = py_rw.read_raw_mat(mask, 1) if reverse: maskData = 1-maskData operation = lambda x,y,z,m: x+y/z*m else: maskData = None operation = lambda x,y,z: x+y/z ncFileManipulate(ncFile, mvFile, operation, ncTarget, flagKeepOri, addMV, flushT, waitT, stdT, maskData)
def compennom(ncFile, mvFile, ncTarget=None, mask=None, reverse=0, flagKeepOri=1, addMV=1, flushT=400000, waitT=5, stdT=0.000001): """ Add the mean/std back to some dimensions mask: 1: compensente it 0: not compensente """ print("compensente %s" % (ncFile)) if mask is not None: maskData = py_rw.read_raw_mat(mask, 1) if reverse: maskData = 1-maskData operation = lambda x,y,z,m: x+y/z*m else: maskData = None operation = lambda x,y,z: x+y/z ncFileManipulate(ncFile, mvFile, operation, ncTarget, flagKeepOri, addMV, flushT, waitT, stdT, maskData)
def normnom(ncFile, mvFile, ncTarget=None, mask=None, reverse=0, flagKeepOri=1, addMV=1, flushT=400000, waitT=5, stdT=0.000001): """ normalizing without shifting mean of certain dimension specify the operation and call ncFileNanipulate mask: 1: normalize it without shifting mean 0: normalize it """ print("normlizing without mean shift %s" % (ncFile)) if mask is not None: maskData = py_rw.read_raw_mat(mask, 1) if reverse: maskData = 1-maskData operation = lambda x,y,z,m: (x-y*(1-m))/z else: maskData = None operation = lambda x,y,z: (x)/z ncFileManipulate(ncFile, mvFile, operation, ncTarget, flagKeepOri, addMV, flushT, waitT, stdT, maskData)
def norm(ncFile, mvFile, ncTarget=None, mask=None, flagKeepOri=1, addMV=1, flushT=400000, waitT=5, stdT=0.000001, reverse=0): """ normalizing the data, specify the operation and call ncFileNanipulate mask: 1: normalize it 0: not normalize it """ print "norm %s " % (ncFile) if mask is not None: maskData = py_rw.read_raw_mat(mask, 1) if reverse: maskData = 1-maskData operation = lambda x,y,z,m: (x-y*m)/(z**m) else: maskData = None operation = lambda x,y,z: (x-y)/z ncFileManipulate(ncFile, mvFile, operation, ncTarget, flagKeepOri, addMV, flushT, waitT, stdT, maskData)
def norm(ncFile, mvFile, ncTarget=None, mask=None, flagKeepOri=1, addMV=1, flushT=400000, waitT=5, stdT=0.000001, reverse=0): """ normalizing the data, specify the operation and call ncFileNanipulate mask: 1: normalize it 0: not normalize it """ print("norm %s " % (ncFile)) if mask is not None: maskData = py_rw.read_raw_mat(mask, 1) if reverse: maskData = 1-maskData operation = lambda x,y,z,m: (x-y*m)/(z**m) else: maskData = None operation = lambda x,y,z: (x-y)/z ncFileManipulate(ncFile, mvFile, operation, ncTarget, flagKeepOri, addMV, flushT, waitT, stdT, maskData)
def normnom(ncFile, mvFile, ncTarget=None, mask=None, reverse=0, flagKeepOri=1, addMV=1, flushT=400000, waitT=5, stdT=0.000001): """ normalizing without shifting mean of certain dimension specify the operation and call ncFileNanipulate mask: 1: normalize it without shifting mean 0: normalize it """ print "normlizing without mean shift %s" % (ncFile) if mask is not None: maskData = py_rw.read_raw_mat(mask, 1) if reverse: maskData = 1-maskData operation = lambda x,y,z,m: (x-y*(1-m))/z else: maskData = None operation = lambda x,y,z: (x)/z ncFileManipulate(ncFile, mvFile, operation, ncTarget, flagKeepOri, addMV, flushT, waitT, stdT, maskData)
def meanStd(ncScp, mvFile, normMethod=None): """ calculate the mean and variance over all .nc in ncScp Welford's one line algorithm on mean and population variance """ timeStep = 0 with open(ncScp, 'r') as filePtr: for idx, ncFile in enumerate(filePtr): ncFile = ncFile.rstrip('\n') data = io.netcdf_file(ncFile) print "Processing %s" % (ncFile) if idx==0: # for the first file, get the dimension of data # create the buffer inputSize = data.dimensions['inputPattSize'] outSize = data.dimensions['targetPattSize'] meanInBuf = np.zeros([inputSize], dtype=np.float64) stdInBuf = np.zeros([inputSize], dtype=np.float64) meanOutBuf = np.zeros([outSize], dtype=np.float64) stdOutBuf = np.zeros([outSize], dtype=np.float64) # if max min normalization is used, get the max and min value if normMethod is not None: maxminInBuf = np.zeros([inputSize, 2], dtype=np.float64) maxminInBuf[:,0] = data.variables['inputs'][:].max(axis = 0) maxminInBuf[:,1] = data.variables['inputs'][:].min(axis = 0) print "Input max %f\tmin %f" % (maxminInBuf[:,0].max(), maxminInBuf[:,1].min()) maxminOutBuf = np.zeros([outSize, 2], dtype=np.float64) maxminOutBuf[:,0] = data.variables['targetPatterns'][:].max(axis = 0) maxminOutBuf[:,1] = data.variables['targetPatterns'][:].min(axis = 0) print "Output max %f\tmin %f" % (maxminOutBuf[:,0].max(), maxminOutBuf[:,1].min()) # else: # for the remaining data files if normMethod is not None: tmp = data.variables['inputs'][:].max(axis = 0) maxminInBuf[:,0] = np.maximum(tmp, maxminInBuf[:,0]) tmp = data.variables['inputs'][:].min(axis = 0) maxminInBuf[:,1] = np.minimum(tmp, maxminInBuf[:,1]) print "Input max %f\tmin %f" % (maxminInBuf[:,0].max(), maxminInBuf[:,1].min()) tmp = data.variables['targetPatterns'][:].max(axis = 0) maxminOutBuf[:,0] = np.maximum(tmp, maxminOutBuf[:,0]) tmp = data.variables['targetPatterns'][:].min(axis = 0) maxminOutBuf[:,1] = np.minimum(tmp, maxminOutBuf[:,1]) print "Output max %f\tmin %f" % (maxminOutBuf[:,0].max(), maxminOutBuf[:,1].min()) numTimes = data.dimensions['numTimesteps'] print "Processing %s of %s frames" % (ncFile, numTimes) print "Input max %f\tmin %f" % (data.variables['inputs'][:].max(), data.variables['inputs'][:].min()) print "Output max %f\tmin %f" % (data.variables['targetPatterns'][:].max(), data.variables['targetPatterns'][:].min()) for t in xrange(numTimes): tmpIn = (data.variables['inputs'][t, :]-meanInBuf) meanInBuf = meanInBuf + tmpIn*1.0/(timeStep+t+1) tmpOut = (data.variables['targetPatterns'][t, :]-meanOutBuf) meanOutBuf = meanOutBuf + tmpOut*1.0/(timeStep+t+1) stdInBuf = stdInBuf + tmpIn*(data.variables['inputs'][t, :]-meanInBuf) stdOutBuf = stdOutBuf + tmpOut*(data.variables['targetPatterns'][t, :]-meanOutBuf) timeStep += numTimes data.close() stdOutBuf = np.sqrt(stdOutBuf/(timeStep-1)) stdInBuf = np.sqrt(stdInBuf/(timeStep-1)) # create MV and save f = io.netcdf.netcdf_file(mvFile, 'w') f.createDimension('inputPattSize', inputSize) f.createDimension('targetPattSize', outSize) f.createVariable('inputMeans', 'f', ('inputPattSize',)) f.createVariable('inputStdevs', 'f', ('inputPattSize', )) f.createVariable('outputMeans', 'f', ('targetPattSize',)) f.createVariable('outputStdevs', 'f', ('targetPattSize', )) if normMethod is not None: normIdx = py_rw.read_raw_mat(normMethod, 1, 'i4', 'l') assert normIdx.shape[0] == (inputSize + outSize), errorMes([normMethod], 2) inNormIdx = normIdx[0:inputSize] outNormIdx = normIdx[inputSize:(inputSize+outSize)] f.createVariable('inputMeans_ori', 'f', ('inputPattSize', )) f.createVariable('inputStdevs_ori', 'f', ('inputPattSize', )) f.createVariable('outputMeans_ori', 'f', ('targetPattSize',)) f.createVariable('outputStdevs_ori', 'f', ('targetPattSize',)) meanInBuf_ori, stdInBuf_ori = meanInBuf.copy(), stdInBuf.copy() meanOutBuf_ori, stdOutBuf_ori = meanOutBuf.copy(), stdOutBuf.copy() f.variables['inputMeans_ori'][:] = np.asarray(meanInBuf_ori, np.float32) f.variables['inputStdevs_ori'][:] = np.asarray(stdInBuf_ori, np.float32) f.variables['outputMeans_ori'][:] = np.asarray(meanOutBuf_ori, np.float32) f.variables['outputStdevs_ori'][:] = np.asarray(stdOutBuf_ori, np.float32) f.createVariable('inputMax_ori', 'f', ('inputPattSize', )) f.createVariable('inputMin_ori', 'f', ('inputPattSize', )) f.createVariable('outputMax_ori', 'f', ('targetPattSize',)) f.createVariable('outputMin_ori', 'f', ('targetPattSize',)) maxInBuf, minInBuf = maxminInBuf[:,0].copy(), maxminInBuf[:,1].copy() maxOutBuf, minOutBuf = maxminOutBuf[:,0].copy(), maxminOutBuf[:,1].copy() f.variables['inputMax_ori'][:] = np.asarray(maxminInBuf[:,0], np.float32) f.variables['inputMin_ori'][:] = np.asarray(maxminInBuf[:,1], np.float32) f.variables['outputMax_ori'][:] = np.asarray(maxminOutBuf[:,0], np.float32) f.variables['outputMin_ori'][:] = np.asarray(maxminOutBuf[:,1], np.float32) #if min(inNormIdx) < 0: # negIdx = np.unique(inNormIdx[inNormIdx<0]) # the negative method # for idx in negIdx: # dataIdx = np.where(inNormIdx == idx) # assert len(dataIdx)>0, 'Impossible error in normMethod' # tempInBuf = stdInBuf.copy() # tempInBuf[np.where(inNormIdx != idx)] = 0 # inNormIdx[dataIdx] = np.argmax(tempInBuf) #if min(outNormIdx) < 0: # negIdx = np.unique(outNormIdx[outNormIdx<0]) # the negative method # for idx in negIdx: # dataIdx = np.where(outNormIdx == idx) # assert len(dataIdx)>0, 'Impossible error in normMethod' # tempOutBuf = stdOutBuf.copy() # tempOutBuf[np.where(outNormIdx != idx)] = 0 # outNormIdx[dataIdx] = np.argmax(tempOutBuf) #maxIn, minIn = max(inNormIdx), min(inNormIdx) #maxOut, minOut = max(outNormIdx), min(outNormIdx) #assert (maxIn>=0 and maxIn<inputSize), 'inNormIdx out of bound. Please check normMethod' #assert (maxOut>=0 and maxOut<inputSize), 'outNormIdx out of bound. Please check normMethod' #assert (minIn>=0 and minIn<inputSize), 'inNormIdx out of bound. Please check normMethod' #assert (minOut>=0 and minOut<inputSize), 'outNormIdx out of bound. Please check normMethod' if min(inNormIdx) < 0: tmpMin = ((1-g_MinMaxRange) * minInBuf - g_MinMaxRange * maxInBuf)/(1-2*g_MinMaxRange) tmpMax = ((1-g_MinMaxRange) * maxInBuf - g_MinMaxRange * minInBuf)/(1-2*g_MinMaxRange) maxminIndex = inNormIdx < 0 meanInBuf[maxminIndex] = tmpMin[maxminIndex] stdInBuf[maxminIndex] = tmpMax[maxminIndex]-tmpMin[maxminIndex] if min(outNormIdx) < 0: tmpMin = ((1-g_MinMaxRange)*minOutBuf-g_MinMaxRange*maxOutBuf)/(1-2*g_MinMaxRange) tmpMax = ((1-g_MinMaxRange)*maxOutBuf-g_MinMaxRange*minOutBuf)/(1-2*g_MinMaxRange) maxminIndex = outNormIdx < 0 meanOutBuf[maxminIndex] = tmpMin[maxminIndex] stdOutBuf[maxminIndex] = tmpMax[maxminIndex]-tmpMin[maxminIndex] print "Combing maxmin done" f.variables['inputMeans'][:] = np.asarray(meanInBuf, np.float32) f.variables['inputStdevs'][:] = np.asarray(stdInBuf, np.float32) f.variables['outputMeans'][:] = np.asarray(meanOutBuf, np.float32) f.variables['outputStdevs'][:] = np.asarray(stdOutBuf, np.float32) f.flush() f.close() print "*** please check max/min above\n" print "*** writing done %s\n" % (mvFile)
import pickle #import funcs try: from binaryTools import readwriteC2 as funcs except ImportError: try: from binaryTools import readwriteC2_220 as funcs except ImportError: try: from ioTools import readwrite as funcs finally: print "Please add path of pyTools to PYTHONPATH" raise Exception( "Can't not import binaryTools/readwriteC2 or funcs") if __name__ == "__main__": outDim = 256 # dimension of the output outFile = './mseWeight' # where to write the output vector? # write the vector data = np.ones([outDim], dtype=np.float32) # prepare the weight vector data[100:180] = 0.5 # I'd like the 100th-179th dimension with weight 0.5 funcs.write_raw_mat(data, outFile) # test data = funcs.read_raw_mat(outFile, outDim) print data
def bmat2nc_sub2(fileScp, outputfile, shiftInput, shiftOutput, maskFile=None, flushT=300, waitT=30): """ Package the data into .nc file one row, one frame of data maskFile: to discard certain dimension of data. Text file, each line specify the start and end column of the single input of output data e.g. 0 180 # read the 0-180th column of data1 0 3 # read the 0-3th column of data2 10 12 # read the 10-12th column of data3 flushT: after reading this number of utterances, nc block will be flushed to the disk waitT: the number of seconds to wait for the flush process ( to avoid read and write the disk at the same time) """ numSeqs, timeSteps, maxSeqLength, inputPattSize, outputPattSize, \ inputDim, outputDim, inputDimSE, outDimSE, \ allTxtLength, maxTxtLength, txtPatSize = pre_process(fileScp, maskFile) print "Data format input: %s, output: %s" % (str(inputPattSize), str(outputPattSize)) print "Creating nc file %s" % (outputfile) if txtPatSize > 0 and maxTxtLength > 0: print "Using txt data, maxlength and dimension %d %d" % (maxTxtLength, txtPatSize) # create the dimension if os.path.exists(outputfile): print "*** %s exists. It will be overwritten" % (outputfile) f = io.netcdf.netcdf_file(outputfile, mode = 'w',version=2) f.createDimension('numSeqs', numSeqs) f.createDimension('numTimesteps', timeSteps) f.createDimension('inputPattSize', inputPattSize) f.createDimension('targetPattSize', outputPattSize) f.createDimension('maxSeqTagLength', maxSeqLength+1) if txtPatSize>0 and maxTxtLength > 0: f.createDimension('txtLength', allTxtLength) f.createDimension('txtPattSize', txtPatSize) tagsVar = f.createVariable('seqTags', 'S1', ('numSeqs','maxSeqTagLength')) seqLVar = f.createVariable('seqLengths', 'i', ('numSeqs',)) inputVar = f.createVariable('inputs', 'f', ('numTimesteps', 'inputPattSize')) outVar = f.createVariable('targetPatterns', 'f', ('numTimesteps', 'targetPattSize')) if txtPatSize>0 and maxTxtLength > 0: txtVar = f.createVariable('txtData', 'i', ('txtLength', 'txtPattSize')) txtLVar = f.createVariable('txtLengths', 'i', ('numSeqs',)) #seqLVar = np.zeros([numSeqs]) seqLVar[:] = 0 tagsVar[:] = '' timeStart = 0 count = 0 txtStart = 0 with open(fileScp, 'r') as filePtr: for idx1, line in enumerate(filePtr): temp = line.split() print "Reading %s" % (temp[0]) seqFrame = int(temp[3]) seqLVar[idx1] = seqFrame #int(temp[3]) tagsVar[idx1,0:len(temp[0])] = list(temp[0]) #charSeq inputFileNum = int(temp[1]) outFileNum = int(temp[2]) slotBias = 4 if txtPatSize>0 and maxTxtLength > 0: txtLength = int(temp[slotBias + 2*(inputFileNum + outFileNum)]) txtDim = int(temp[slotBias + 2*(inputFileNum + outFileNum)+1]) txtFile = temp[slotBias + 2*(inputFileNum + outFileNum) + 2] data = py_rw.read_raw_mat(txtFile, txtDim) if txtDim == 1: txtVar[txtStart:(txtStart+txtLength),0] = data.copy() else: txtVar[txtStart:(txtStart+txtLength),:] = data.copy() txtStart = txtStart + txtLength txtLVar[idx1] = txtLength for idx2 in xrange(inputFileNum): [sDim, eDim] = inputDim[idx2,0:2] dim = int(temp[slotBias+(idx2)*2]) datafile = temp[slotBias+(idx2)*2+1] #data_raw = readwrite.FromFile(datafile) #m,n = data_raw.size/dim, dim #assert m*n==data_raw.size, "dimension mismatch %s %s" % (line, datafile) #data = data_raw.reshape((m,n)) # store the data tS,tE,dS,dE = timeStart, (timeStart+seqFrame), inputDimSE[idx2][0], \ inputDimSE[idx2][1] if datafile == g_VOIDFILE: data = np.zeros([seqFrame, dim]) else: data = py_rw.read_raw_mat(datafile, dim) assert (data.shape[0]-seqFrame)<seqFrame*0.1, \ errorMes([datafile], 3) + "This data has less number of frames" % (datafile) if dim==1 and data.ndim==1: data = data[0:seqFrame] inputVar[tS:tE,dS] = data[0:seqFrame].copy() else: data = data[0:seqFrame,sDim:eDim] inputVar[tS:tE,dS:dE] = data[0:seqFrame, \ inputDimSE[idx2][2]:inputDimSE[idx2][3]].copy() slotBias = 4+inputFileNum*2 for idx2 in xrange(outFileNum): [sDim, eDim] = outputDim[idx2,0:2] dim = int(temp[slotBias+(idx2)*2]) datafile = temp[slotBias+(idx2)*2+1] #data_raw = readwrite.FromFile(datafile) #m,n = data_raw.size/dim, dim #assert m*n==data_raw.size, "dimension mismatch %s %s" % (line, datafile) #data = data_raw.reshape((m,n)) # read and store the output data tS,tE,dS,dE = timeStart, (timeStart+seqFrame), outDimSE[idx2][0], \ outDimSE[idx2][1] if datafile == g_VOIDFILE: data = np.zeros([seqFrame, dim]) else: data = py_rw.read_raw_mat(datafile, dim) assert (data.shape[0]-seqFrame)<seqFrame*0.1, \ errorMes([datafile], 3) + "This data has less number of frames" % (datafile) if dim==1 and data.ndim==1: data = data[0:seqFrame] outVar[tS:tE,dS] =data[0:seqFrame].copy() else: data = data[0:seqFrame,sDim:eDim] if shiftOutput != 0: outVar[tS:tE,dS:dE] = np.roll(data, shiftOutput, axis=0)[0:seqFrame, \ outDimSE[idx2][2]:outDimSE[idx2][3]].copy() else: outVar[tS:tE,dS:dE] = data[0:seqFrame, \ outDimSE[idx2][2]:outDimSE[idx2][3]].copy() #print idx1 del data if count > flushT: count = 0 _write(f) #.flush() print "Have read %d. Let's wait netCDF for %d(s)" % (idx1, waitT) #raw_input("Enter") for x in xrange(waitT): print "*", sys.stdout.flush() time.sleep(1) count += 1 timeStart += seqFrame print "Reading and writing done " f.close()
def bmat2nc_sub1(fileScp, outputfile, maskFile=None, flushT=300, waitT=30): """ Package the data into .nc file one row, one frame of data maskFile: to discard certain dimension of data. Text file, each line specify the start and end column of the single input of output data e.g. 0 180 # read the 0-180th column of data1 0 3 # read the 0-3th column of data2 10 12 # read the 10-12th column of data3 flushT: after reading this number of utterances, nc block will be flushed to the disk waitT: the number of seconds to wait for the flush process ( to avoid read and write the disk at the same time) """ numSeqs, timeSteps, maxSeqLength, inputPattSize, outputPattSize, \ inputDim, outputDim, inputDimSE, outDimSE, \ allTxtLength, maxTxtLength, txtPatSize = pre_process(fileScp, maskFile) print "Creating nc file %s" % (outputfile) print "Input dimension: %s\n output dimension: %s" % (str(inputPattSize), str(outputPattSize)) # create the dimension if os.path.exists(outputfile): print "*** %s exists. It will be overwritten" % (outputfile) f = io.netcdf.netcdf_file(outputfile, mode = 'w',version=2) f.createDimension('numSeqs', numSeqs) f.createDimension('numTimesteps', timeSteps) f.createDimension('inputPattSize', inputPattSize) f.createDimension('targetPattSize', outputPattSize) f.createDimension('maxSeqTagLength', maxSeqLength+1) tagsVar = f.createVariable('seqTags', 'S1', ('numSeqs','maxSeqTagLength')) seqLVar = f.createVariable('seqLengths', 'i', ('numSeqs',)) inputVar = f.createVariable('inputs', 'f', ('numTimesteps', 'inputPattSize')) outVar = f.createVariable('targetPatterns', 'f', ('numTimesteps', 'targetPattSize')) #seqLVar = np.zeros([numSeqs]) seqLVar[:] = 0 tagsVar[:] = '' timeStart = 0 count = 0 with open(fileScp, 'r') as filePtr: for idx1, line in enumerate(filePtr): temp = line.split() print "Reading %s" % (temp[0]) seqFrame = int(temp[3]) seqLVar[idx1] = seqFrame #int(temp[3]) tagsVar[idx1,0:len(temp[0])] = list(temp[0]) #charSeq inputFileNum = int(temp[1]) outFileNum = int(temp[2]) slotBias = 4 for idx2 in xrange(inputFileNum): [sDim, eDim] = inputDimSE[idx2,2:4] # start, end dimension in raw data dim = int(temp[slotBias+(idx2)*2]) # raw data dim datafile = temp[slotBias+(idx2)*2+1] # path to raw data [dS, dE] = inputDimSE[idx2,0:2] # start, end dimension in package data tS,tE = timeStart,(timeStart+seqFrame) if datafile == g_VOIDFILE: data = np.zeros([seqFrame, dim]) else: data = py_rw.read_raw_mat(datafile, dim) assert (data.shape[0]-seqFrame)<seqFrame*0.3, \ errorMes([datafile], 3) + "This data has less number of frames" % (datafile) if dim==1 and data.ndim==1: #data = data[0:seqFrame] inputVar[tS:tE,dS] = data[0:seqFrame].copy() else: #data = data[0:seqFrame,sDim:eDim] inputVar[tS:tE,dS:dE] = data[0:seqFrame, sDim:eDim].copy() slotBias = 4+inputFileNum*2 for idx2 in xrange(outFileNum): [sDim, eDim] = outDimSE[idx2,2:4] dim = int(temp[slotBias+(idx2)*2]) datafile = temp[slotBias+(idx2)*2+1] [dS, dE] = outDimSE[idx2,0:2] tS, tE = timeStart, (timeStart+seqFrame) if datafile == g_VOIDFILE: data = np.zeros([seqFrame, dim]) else: data = py_rw.read_raw_mat(datafile, dim) assert (data.shape[0]-seqFrame)<seqFrame*0.1, \ errorMes([datafile], 3) + "This data has less number of frames" % (datafile) if dim==1 and data.ndim==1: outVar[tS:tE,dS] =data[0:seqFrame].copy() else: outVar[tS:tE,dS:dE] = data[0:seqFrame,sDim:eDim].copy() #print idx1 del data if count > flushT: count = 0 _write(f) #.flush() print "Have read %d. Let's wait netCDF for %d(s)" % (idx1, waitT) #raw_input("Enter") for x in xrange(waitT): print "*", sys.stdout.flush() time.sleep(1) count += 1 timeStart += seqFrame print "Read and write done\n" f.close()
def SplitData(fileScp, fileDir2, fileDir, outputName, outDim, outputDelta, flagUseDelta, datamv, normMask, outputMethod, stdT=0.000001): """ Split the generated HTK into acoustic features """ filePtr = open(fileDir+os.path.sep+'gen.scp', 'w') if len(datamv) > 0 and os.path.isfile(datamv): print "External Mean Variance file will be used to de-normalize the data" try: datamv = io.netcdf_file(datamv) m = datamv.variables['outputMeans'][:].copy() v = datamv.variables['outputStdevs'][:].copy() assert m.shape[0]==sum(outDim), "Incompatible dimension" except TypeError: datamv = funcs.read_raw_mat(datamv, 1) assert datamv.shape[0] == sum(outDim)*2, 'Dim of datamv is invalid' m = datamv[0:sum(outDim)] v = datamv[sum(outDim):sum(outDim)*2] v[v<stdT] = 1.0 if normMask is not None: assert normMask.shape[0] == m.shape[0], 'normMask dimension invalid' m = m * normMask v = v ** normMask else: m = np.zeros([sum(outDim)]) v = np.ones([sum(outDim)]) for fileName in fileScp: fileBaseName, fileExt = os.path.splitext(fileName) if os.path.isfile(fileDir2 + os.path.sep + fileName) and fileExt=='.htk': filePtr.write(fileDir2+os.path.sep+fileName+'\n') # the output of CURRENNT is big-endian data = funcs.read_htk(fileDir2 + os.path.sep + fileName, end='b') assert data.shape[1]==sum(outDim), "Dimension of "+ fileName +" is not"+ sum(outDim) data = data*v+m # extract the data from the htk output of CURRENNT for index, outname in enumerate(outputName): sIndex = sum(outDim[:index]) eIndex = sum(outDim[:index+1]) if (flagUseDelta=='0' or flagUseDelta==0) and outputDelta[index]>1: # if mlpgFlag is off, only generate the static components # assume *.htk has [static, delta, delta-delta] eIndex = sIndex + (eIndex-sIndex)/outputDelta[index] # remove the '_delta' extension if it has outname = outname.rstrip('_delta') dataOut = data[:, sIndex:eIndex] outname = fileDir + os.path.sep + fileBaseName + os.path.extsep + outname outputMethod(dataOut, outname) print "Writing acoustic data: "+ fileBaseName elif fileExt=='.htk': print "Cannot find file %s" + fileName filePtr.close()
def RMSECalcore(file1, file2, dim): """ Calculate the RMSE and Corr file1: path to the input feature file 1 file2: path to the input feature file 2 dim: dimension of the feature return RMSE_error, valid_frame_number, Corr """ # load the data data1 = funcs.read_raw_mat(file1, dim) data2 = funcs.read_raw_mat(file2, dim) # if the number of frames is different, # get the number of frames that can shift shift_max = np.abs(data2.shape[0] - data1.shape[0]) if data1.shape[0] > data2.shape[0]: # the minimum length of the two input files valid_length = data2.shape[0] # the shorter one is fixed fixed_data = data2 # shift the longer one shift_data = data1 else: valid_length = data1.shape[0] fixed_data = data1 shift_data = data2 max_v_cover = 0 max_corr = -1.0 min_rmse = 1000000 min_rmse_buf = [] max_corr_buf = [] shift_pos = 0 # do RMSE calcualtion by shifting [0, ..., shift_max] frames # find the best value for shift_t in range(shift_max + 1): if dim == 1: # for F0 calculation # shift the longer F0 trajectory shift_data_temp = shift_data[shift_t:shift_t + valid_length].copy() # keep the shorter F0 trajectory fixed_data_temp = fixed_data.copy() # count the frames where both are voiced diff = np.zeros([shift_data_temp.shape[0], 3]) temp1 = shift_data_temp > 0 temp2 = fixed_data_temp > 0 indp = (temp1 * temp2) indn = np.bitwise_xor(temp1, temp2) voiceFrame = sum(indp) if voiceFrame > 0: # if there is common voiced frame # calculate the RMSE and Corr shift_data_temp = F0Transform(shift_data_temp[indp]) fixed_data_temp = F0Transform(fixed_data_temp[indp]) diff[indp, 0] = shift_data_temp - fixed_data_temp diff[indn, 1] = 1 diff[indp, 2] = 1 pow2 = diff * diff corr = scipy.stats.spearmanr(shift_data_temp, fixed_data_temp) else: print("%s %s" % (file1, file2)) # else, no result corr = [np.nan, 0] pow2 = diff * np.nan # calculate the U/V error rate v_cover = voiceFrame * 1.0 / valid_length else: print('Only for F0 data') # We can select the shift point by number of coverage #if v_cover > max_v_cover: # or by max Corr if corr[0] > max_corr: max_corr = corr[0] max_corr_buf = corr min_rmse_buf = pow2 shift_pos = shift_t max_v_cover = v_cover return min_rmse_buf, valid_length, max_corr_buf
def RMSECalcore(file1, file2, dim): data1 = funcs.read_raw_mat(file1, dim) data2 = funcs.read_raw_mat(file2, dim) shift_max = np.abs(data2.shape[0] - data1.shape[0]) if data1.shape[0]>data2.shape[0]: valid_length = data2.shape[0] fixed_data = data2 shift_data = data1 else: valid_length = data1.shape[0] fixed_data = data1 shift_data = data2 max_v_cover = 0 max_corr = -1.0 min_rmse = 1000000 min_rmse_buf = [] max_corr_buf = [] shift_pos = 0 if shift_max == 0: shift_max = 1 for shift_t in range(shift_max): if dim==1: shift_data_temp = shift_data[shift_t:shift_t + valid_length].copy() fixed_data_temp = fixed_data.copy() # This is F0 diff = np.zeros([shift_data_temp.shape[0], 3]) temp1 = shift_data_temp > 0 temp2 = fixed_data_temp > 0 indp = (temp1 *temp2) indn = np.bitwise_xor(temp1, temp2) voiceFrame = sum(indp) if voiceFrame>0: shift_data_temp = F0Transform(shift_data_temp[indp]) fixed_data_temp = F0Transform(fixed_data_temp[indp]) diff[indp,0] = shift_data_temp - fixed_data_temp diff[indn,1] = 1 diff[indp,2] = 1 pow2 = diff*diff corr = scipy.stats.spearmanr(shift_data_temp, fixed_data_temp) else: corr = [np.nan,0] pow2 = diff*np.nan v_cover = voiceFrame * 1.0 / valid_length else: print 'Only for F0 data' if v_cover > max_v_cover: max_corr = corr[0] max_corr_buf = corr min_rmse_buf = pow2 shift_pos = shift_t max_v_cover = v_cover return min_rmse_buf, valid_length, max_corr_buf
def meanStd(ncScp, mvFile, normMethod=None): """ calculate the mean and variance over all .nc in ncScp Welford's one line algorithm on mean and population variance """ timeStep = 0 with open(ncScp, 'r') as filePtr: for idx, ncFile in enumerate(filePtr): ncFile = ncFile.rstrip('\n') data = io.netcdf_file(ncFile) print("Processing %s" % (ncFile)) if idx==0: # for the first file, get the dimension of data # create the buffer inputSize = data.dimensions['inputPattSize'] outSize = data.dimensions['targetPattSize'] meanInBuf = np.zeros([inputSize], dtype=np.float64) stdInBuf = np.zeros([inputSize], dtype=np.float64) meanOutBuf = np.zeros([outSize], dtype=np.float64) stdOutBuf = np.zeros([outSize], dtype=np.float64) # if max min normalization is used, get the max and min value if normMethod is not None: maxminInBuf = np.zeros([inputSize, 2], dtype=np.float64) maxminInBuf[:,0] = data.variables['inputs'][:].max(axis = 0) maxminInBuf[:,1] = data.variables['inputs'][:].min(axis = 0) print("Input max %f\tmin %f" % (maxminInBuf[:,0].max(), maxminInBuf[:,1].min())) maxminOutBuf = np.zeros([outSize, 2], dtype=np.float64) maxminOutBuf[:,0] = data.variables['targetPatterns'][:].max(axis = 0) maxminOutBuf[:,1] = data.variables['targetPatterns'][:].min(axis = 0) print("Output max %f\tmin %f" % (maxminOutBuf[:,0].max(), maxminOutBuf[:,1].min())) # else: # for the remaining data files if normMethod is not None: tmp = data.variables['inputs'][:].max(axis = 0) maxminInBuf[:,0] = np.maximum(tmp, maxminInBuf[:,0]) tmp = data.variables['inputs'][:].min(axis = 0) maxminInBuf[:,1] = np.minimum(tmp, maxminInBuf[:,1]) print("Input max %f\tmin %f" % (maxminInBuf[:,0].max(), maxminInBuf[:,1].min())) tmp = data.variables['targetPatterns'][:].max(axis = 0) maxminOutBuf[:,0] = np.maximum(tmp, maxminOutBuf[:,0]) tmp = data.variables['targetPatterns'][:].min(axis = 0) maxminOutBuf[:,1] = np.minimum(tmp, maxminOutBuf[:,1]) print("Output max %f\tmin %f" % (maxminOutBuf[:,0].max(), maxminOutBuf[:,1].min())) numTimes = data.dimensions['numTimesteps'] print("Processing %s of %s frames" % (ncFile, numTimes)) print("Input max %f\tmin %f" % (data.variables['inputs'][:].max(), data.variables['inputs'][:].min())) print("Output max %f\tmin %f" % (data.variables['targetPatterns'][:].max(), data.variables['targetPatterns'][:].min())) for t in range(numTimes): tmpIn = (data.variables['inputs'][t, :]-meanInBuf) meanInBuf = meanInBuf + tmpIn*1.0/(timeStep+t+1) tmpOut = (data.variables['targetPatterns'][t, :]-meanOutBuf) meanOutBuf = meanOutBuf + tmpOut*1.0/(timeStep+t+1) stdInBuf = stdInBuf + tmpIn*(data.variables['inputs'][t, :]-meanInBuf) stdOutBuf = stdOutBuf + tmpOut*(data.variables['targetPatterns'][t, :]-meanOutBuf) timeStep += numTimes data.close() stdOutBuf = np.sqrt(stdOutBuf/(timeStep-1)) stdInBuf = np.sqrt(stdInBuf/(timeStep-1)) # create MV and save f = io.netcdf.netcdf_file(mvFile, 'w') f.createDimension('inputPattSize', inputSize) f.createDimension('targetPattSize', outSize) f.createVariable('inputMeans', 'f', ('inputPattSize',)) f.createVariable('inputStdevs', 'f', ('inputPattSize', )) f.createVariable('outputMeans', 'f', ('targetPattSize',)) f.createVariable('outputStdevs', 'f', ('targetPattSize', )) if normMethod is not None: normIdx = py_rw.read_raw_mat(normMethod, 1, 'i4', 'l') assert normIdx.shape[0] == (inputSize + outSize), errorMes([normMethod], 2) inNormIdx = normIdx[0:inputSize] outNormIdx = normIdx[inputSize:(inputSize+outSize)] f.createVariable('inputMeans_ori', 'f', ('inputPattSize', )) f.createVariable('inputStdevs_ori', 'f', ('inputPattSize', )) f.createVariable('outputMeans_ori', 'f', ('targetPattSize',)) f.createVariable('outputStdevs_ori', 'f', ('targetPattSize',)) meanInBuf_ori, stdInBuf_ori = meanInBuf.copy(), stdInBuf.copy() meanOutBuf_ori, stdOutBuf_ori = meanOutBuf.copy(), stdOutBuf.copy() f.variables['inputMeans_ori'][:] = np.asarray(meanInBuf_ori, np.float32) f.variables['inputStdevs_ori'][:] = np.asarray(stdInBuf_ori, np.float32) f.variables['outputMeans_ori'][:] = np.asarray(meanOutBuf_ori, np.float32) f.variables['outputStdevs_ori'][:] = np.asarray(stdOutBuf_ori, np.float32) f.createVariable('inputMax_ori', 'f', ('inputPattSize', )) f.createVariable('inputMin_ori', 'f', ('inputPattSize', )) f.createVariable('outputMax_ori', 'f', ('targetPattSize',)) f.createVariable('outputMin_ori', 'f', ('targetPattSize',)) maxInBuf, minInBuf = maxminInBuf[:,0].copy(), maxminInBuf[:,1].copy() maxOutBuf, minOutBuf = maxminOutBuf[:,0].copy(), maxminOutBuf[:,1].copy() f.variables['inputMax_ori'][:] = np.asarray(maxminInBuf[:,0], np.float32) f.variables['inputMin_ori'][:] = np.asarray(maxminInBuf[:,1], np.float32) f.variables['outputMax_ori'][:] = np.asarray(maxminOutBuf[:,0], np.float32) f.variables['outputMin_ori'][:] = np.asarray(maxminOutBuf[:,1], np.float32) #if min(inNormIdx) < 0: # negIdx = np.unique(inNormIdx[inNormIdx<0]) # the negative method # for idx in negIdx: # dataIdx = np.where(inNormIdx == idx) # assert len(dataIdx)>0, 'Impossible error in normMethod' # tempInBuf = stdInBuf.copy() # tempInBuf[np.where(inNormIdx != idx)] = 0 # inNormIdx[dataIdx] = np.argmax(tempInBuf) #if min(outNormIdx) < 0: # negIdx = np.unique(outNormIdx[outNormIdx<0]) # the negative method # for idx in negIdx: # dataIdx = np.where(outNormIdx == idx) # assert len(dataIdx)>0, 'Impossible error in normMethod' # tempOutBuf = stdOutBuf.copy() # tempOutBuf[np.where(outNormIdx != idx)] = 0 # outNormIdx[dataIdx] = np.argmax(tempOutBuf) #maxIn, minIn = max(inNormIdx), min(inNormIdx) #maxOut, minOut = max(outNormIdx), min(outNormIdx) #assert (maxIn>=0 and maxIn<inputSize), 'inNormIdx out of bound. Please check normMethod' #assert (maxOut>=0 and maxOut<inputSize), 'outNormIdx out of bound. Please check normMethod' #assert (minIn>=0 and minIn<inputSize), 'inNormIdx out of bound. Please check normMethod' #assert (minOut>=0 and minOut<inputSize), 'outNormIdx out of bound. Please check normMethod' if min(inNormIdx) < 0: tmpMin = ((1-g_MinMaxRange) * minInBuf - g_MinMaxRange * maxInBuf)/(1-2*g_MinMaxRange) tmpMax = ((1-g_MinMaxRange) * maxInBuf - g_MinMaxRange * minInBuf)/(1-2*g_MinMaxRange) maxminIndex = inNormIdx < 0 meanInBuf[maxminIndex] = tmpMin[maxminIndex] stdInBuf[maxminIndex] = tmpMax[maxminIndex]-tmpMin[maxminIndex] if min(outNormIdx) < 0: tmpMin = ((1-g_MinMaxRange)*minOutBuf-g_MinMaxRange*maxOutBuf)/(1-2*g_MinMaxRange) tmpMax = ((1-g_MinMaxRange)*maxOutBuf-g_MinMaxRange*minOutBuf)/(1-2*g_MinMaxRange) maxminIndex = outNormIdx < 0 meanOutBuf[maxminIndex] = tmpMin[maxminIndex] stdOutBuf[maxminIndex] = tmpMax[maxminIndex]-tmpMin[maxminIndex] print("Combing maxmin done") f.variables['inputMeans'][:] = np.asarray(meanInBuf, np.float32) f.variables['inputStdevs'][:] = np.asarray(stdInBuf, np.float32) f.variables['outputMeans'][:] = np.asarray(meanOutBuf, np.float32) f.variables['outputStdevs'][:] = np.asarray(stdOutBuf, np.float32) f.flush() f.close() print("*** please check max/min above\n") print("*** writing done %s\n" % (mvFile))
def SplitData(fileScp, fileDir2, fileDir, outputName, outDim, outputDelta, datamv, normMask, outputMethod, stdT=0.000001): """ Split the generated HTK into acoustic features """ filePtr = open(fileDir+os.path.sep+'gen.scp', 'w') if len(datamv) > 0 and os.path.isfile(datamv): print("External Mean Variance file will be used to de-normalize the data") try: datamv = io.netcdf_file(datamv) m = datamv.variables['outputMeans'][:].copy() v = datamv.variables['outputStdevs'][:].copy() assert m.shape[0]==sum(outDim), "Incompatible dimension" except TypeError: datamv = funcs.read_raw_mat(datamv, 1) assert datamv.shape[0] == sum(outDim)*2, 'Dim of datamv is invalid' m = datamv[0:sum(outDim)] v = datamv[sum(outDim):sum(outDim)*2] v[v<stdT] = 1.0 if normMask is not None: assert normMask.shape[0] == m.shape[0], 'normMask dimension invalid' m = m * normMask v = v ** normMask else: m = np.zeros([sum(outDim)]) v = np.ones([sum(outDim)]) for fileName in fileScp: fileBaseName, fileExt = os.path.splitext(fileName) if os.path.isfile(fileDir2 + os.path.sep + fileName) and fileExt=='.htk': filePtr.write(fileDir2+os.path.sep+fileName+'\n') # the output of CURRENNT is big-endian data = funcs.read_htk(fileDir2 + os.path.sep + fileName, end='b') assert data.shape[1]==sum(outDim), "Dimension of "+ fileName +" != "+ str(sum(outDim)) data = data*v+m # extract the data from the htk output of CURRENNT for index, outname in enumerate(outputName): sIndex = sum(outDim[:index]) eIndex = sum(outDim[:index+1]) # generate both delta and static components if outputDelta[index]>1: # output static component staticIndex = sIndex + (eIndex-sIndex)/outputDelta[index] outname = outname.split('_delta')[0] dataOut = data[:, sIndex:staticIndex] outname = fileDir + os.path.sep + fileBaseName + os.path.extsep + outname outputMethod(dataOut, outname) # output all outname = outname + '_delta' dataOut = data[:, sIndex:eIndex] outputMethod(dataOut, outname) else: # feature don't have delta component outname = outname.split('_delta')[0] dataOut = data[:, sIndex:eIndex] outname = fileDir + os.path.sep + fileBaseName + os.path.extsep + outname outputMethod(dataOut, outname) print("Writing acoustic data: "+ fileBaseName) elif fileExt=='.htk': print("Cannot find file %s" + fileName) filePtr.close()
def bmat2nc_sub1(fileScp, outputfile, maskFile=None, flushT=300, waitT=30): """ Package the data into .nc file one row, one frame of data maskFile: to discard certain dimension of data. Text file, each line specify the start and end column of the single input of output data e.g. 0 180 # read the 0-180th column of data1 0 3 # read the 0-3th column of data2 10 12 # read the 10-12th column of data3 flushT: after reading this number of utterances, nc block will be flushed to the disk waitT: the number of seconds to wait for the flush process ( to avoid read and write the disk at the same time) """ numSeqs, timeSteps, maxSeqLength, inputPattSize, outputPattSize, \ inputDim, outputDim, inputDimSE, outDimSE, \ allTxtLength, maxTxtLength, txtPatSize = pre_process(fileScp, maskFile) print("Creating nc file %s" % (outputfile)) print("Input dimension: %s\n output dimension: %s" % (str(inputPattSize), str(outputPattSize))) # create the dimension if os.path.exists(outputfile): print("*** %s exists. It will be overwritten" % (outputfile)) f = io.netcdf.netcdf_file(outputfile, mode = 'w',version=2) f.createDimension('numSeqs', numSeqs) f.createDimension('numTimesteps', timeSteps) f.createDimension('inputPattSize', inputPattSize) f.createDimension('targetPattSize', outputPattSize) f.createDimension('maxSeqTagLength', maxSeqLength+1) tagsVar = f.createVariable('seqTags', 'S1', ('numSeqs','maxSeqTagLength')) seqLVar = f.createVariable('seqLengths', 'i', ('numSeqs',)) inputVar = f.createVariable('inputs', 'f', ('numTimesteps', 'inputPattSize')) outVar = f.createVariable('targetPatterns', 'f', ('numTimesteps', 'targetPattSize')) #seqLVar = np.zeros([numSeqs]) seqLVar[:] = 0 tagsVar[:] = '' timeStart = 0 count = 0 with open(fileScp, 'r') as filePtr: for idx1, line in enumerate(filePtr): temp = line.split() print("Reading %s" % (temp[0])) seqFrame = int(temp[3]) if seqFrame < 1: print("Error: data is empty. Please check %s in %s" % (temp[0], fileScp)) assert 1==0, errorMes([datafile], 3) + "Error in preparing data" seqLVar[idx1] = seqFrame #int(temp[3]) tagsVar[idx1,0:len(temp[0])] = list(temp[0]) #charSeq inputFileNum = int(temp[1]) outFileNum = int(temp[2]) slotBias = 4 for idx2 in range(inputFileNum): [sDim, eDim] = inputDimSE[idx2,2:4] # start, end dimension in raw data dim = int(temp[slotBias+(idx2)*2]) # raw data dim datafile = temp[slotBias+(idx2)*2+1] # path to raw data [dS, dE] = inputDimSE[idx2,0:2] # start, end dimension in package data tS,tE = timeStart,(timeStart+seqFrame) if datafile == g_VOIDFILE: data = np.zeros([seqFrame, dim]) else: data = py_rw.read_raw_mat(datafile, dim) if (data.shape[0]-seqFrame)>seqFrame*0.3: print("Error: please check the data named by %s" % (temp[0])) assert 1==0, errorMes([datafile], 3) + "Error in preparing data" % (datafile) if dim==1 and data.ndim==1: #data = data[0:seqFrame] inputVar[tS:tE,dS] = data[0:seqFrame].copy() else: #data = data[0:seqFrame,sDim:eDim] inputVar[tS:tE,dS:dE] = data[0:seqFrame, sDim:eDim].copy() slotBias = 4+inputFileNum*2 for idx2 in range(outFileNum): [sDim, eDim] = outDimSE[idx2,2:4] dim = int(temp[slotBias+(idx2)*2]) datafile = temp[slotBias+(idx2)*2+1] [dS, dE] = outDimSE[idx2,0:2] tS, tE = timeStart, (timeStart+seqFrame) if datafile == g_VOIDFILE: data = np.zeros([seqFrame, dim]) else: data = py_rw.read_raw_mat(datafile, dim) if (data.shape[0]-seqFrame) > seqFrame * 0.3: print("Error: please check the data named by %s" % (temp[0])) assert 1==0, errorMes([datafile], 3) + "Error in preparing data" % (datafile) if dim==1 and data.ndim==1: outVar[tS:tE,dS] =data[0:seqFrame].copy() else: outVar[tS:tE,dS:dE] = data[0:seqFrame,sDim:eDim].copy() #print idx1 del data if count > flushT: count = 0 _write(f) #.flush() print("Have read %d. Let's wait netCDF for %d(s)" % (idx1, waitT)) #raw_input("Enter") for x in range(waitT): print("*", end=' ') sys.stdout.flush() time.sleep(1) count += 1 timeStart += seqFrame print("Read and write done\n") f.close()
def bmat2nc_sub2(fileScp, outputfile, shiftInput, shiftOutput, maskFile=None, flushT=300, waitT=30): """ Package the data into .nc file one row, one frame of data maskFile: to discard certain dimension of data. Text file, each line specify the start and end column of the single input of output data e.g. 0 180 # read the 0-180th column of data1 0 3 # read the 0-3th column of data2 10 12 # read the 10-12th column of data3 flushT: after reading this number of utterances, nc block will be flushed to the disk waitT: the number of seconds to wait for the flush process ( to avoid read and write the disk at the same time) """ numSeqs, timeSteps, maxSeqLength, inputPattSize, outputPattSize, \ inputDim, outputDim, inputDimSE, outDimSE, \ allTxtLength, maxTxtLength, txtPatSize = pre_process(fileScp, maskFile) print("Data format input: %s, output: %s" % (str(inputPattSize), str(outputPattSize))) print("Creating nc file %s" % (outputfile)) if txtPatSize > 0 and maxTxtLength > 0: print("Using txt data, maxlength and dimension %d %d" % (maxTxtLength, txtPatSize)) # create the dimension if os.path.exists(outputfile): print("*** %s exists. It will be overwritten" % (outputfile)) f = io.netcdf.netcdf_file(outputfile, mode = 'w',version=2) f.createDimension('numSeqs', numSeqs) f.createDimension('numTimesteps', timeSteps) f.createDimension('inputPattSize', inputPattSize) f.createDimension('targetPattSize', outputPattSize) f.createDimension('maxSeqTagLength', maxSeqLength+1) if txtPatSize>0 and maxTxtLength > 0: f.createDimension('txtLength', allTxtLength) f.createDimension('txtPattSize', txtPatSize) tagsVar = f.createVariable('seqTags', 'S1', ('numSeqs','maxSeqTagLength')) seqLVar = f.createVariable('seqLengths', 'i', ('numSeqs',)) inputVar = f.createVariable('inputs', 'f', ('numTimesteps', 'inputPattSize')) outVar = f.createVariable('targetPatterns', 'f', ('numTimesteps', 'targetPattSize')) if txtPatSize>0 and maxTxtLength > 0: txtVar = f.createVariable('txtData', 'i', ('txtLength', 'txtPattSize')) txtLVar = f.createVariable('txtLengths', 'i', ('numSeqs',)) #seqLVar = np.zeros([numSeqs]) seqLVar[:] = 0 tagsVar[:] = '' timeStart = 0 count = 0 txtStart = 0 with open(fileScp, 'r') as filePtr: for idx1, line in enumerate(filePtr): temp = line.split() print("Reading %s" % (temp[0])) seqFrame = int(temp[3]) seqLVar[idx1] = seqFrame #int(temp[3]) tagsVar[idx1,0:len(temp[0])] = list(temp[0]) #charSeq inputFileNum = int(temp[1]) outFileNum = int(temp[2]) slotBias = 4 if txtPatSize>0 and maxTxtLength > 0: txtLength = int(temp[slotBias + 2*(inputFileNum + outFileNum)]) txtDim = int(temp[slotBias + 2*(inputFileNum + outFileNum)+1]) txtFile = temp[slotBias + 2*(inputFileNum + outFileNum) + 2] data = py_rw.read_raw_mat(txtFile, txtDim) if txtDim == 1: txtVar[txtStart:(txtStart+txtLength),0] = data.copy() else: txtVar[txtStart:(txtStart+txtLength),:] = data.copy() txtStart = txtStart + txtLength txtLVar[idx1] = txtLength for idx2 in range(inputFileNum): [sDim, eDim] = inputDim[idx2,0:2] dim = int(temp[slotBias+(idx2)*2]) datafile = temp[slotBias+(idx2)*2+1] #data_raw = readwrite.FromFile(datafile) #m,n = data_raw.size/dim, dim #assert m*n==data_raw.size, "dimension mismatch %s %s" % (line, datafile) #data = data_raw.reshape((m,n)) # store the data tS,tE,dS,dE = timeStart, (timeStart+seqFrame), inputDimSE[idx2][0], \ inputDimSE[idx2][1] if datafile == g_VOIDFILE: data = np.zeros([seqFrame, dim]) else: data = py_rw.read_raw_mat(datafile, dim) assert (data.shape[0]-seqFrame)<seqFrame*0.1, \ errorMes([datafile], 3) + "This data has less number of frames" % (datafile) if dim==1 and data.ndim==1: data = data[0:seqFrame] inputVar[tS:tE,dS] = data[0:seqFrame].copy() else: data = data[0:seqFrame,sDim:eDim] inputVar[tS:tE,dS:dE] = data[0:seqFrame, \ inputDimSE[idx2][2]:inputDimSE[idx2][3]].copy() slotBias = 4+inputFileNum*2 for idx2 in range(outFileNum): [sDim, eDim] = outputDim[idx2,0:2] dim = int(temp[slotBias+(idx2)*2]) datafile = temp[slotBias+(idx2)*2+1] #data_raw = readwrite.FromFile(datafile) #m,n = data_raw.size/dim, dim #assert m*n==data_raw.size, "dimension mismatch %s %s" % (line, datafile) #data = data_raw.reshape((m,n)) # read and store the output data tS,tE,dS,dE = timeStart, (timeStart+seqFrame), outDimSE[idx2][0], \ outDimSE[idx2][1] if datafile == g_VOIDFILE: data = np.zeros([seqFrame, dim]) else: data = py_rw.read_raw_mat(datafile, dim) assert (data.shape[0]-seqFrame)<seqFrame*0.1, \ errorMes([datafile], 3) + "This data has less number of frames" % (datafile) if dim==1 and data.ndim==1: data = data[0:seqFrame] outVar[tS:tE,dS] =data[0:seqFrame].copy() else: data = data[0:seqFrame,sDim:eDim] if shiftOutput != 0: outVar[tS:tE,dS:dE] = np.roll(data, shiftOutput, axis=0)[0:seqFrame, \ outDimSE[idx2][2]:outDimSE[idx2][3]].copy() else: outVar[tS:tE,dS:dE] = data[0:seqFrame, \ outDimSE[idx2][2]:outDimSE[idx2][3]].copy() #print idx1 del data if count > flushT: count = 0 _write(f) #.flush() print("Have read %d. Let's wait netCDF for %d(s)" % (idx1, waitT)) #raw_input("Enter") for x in range(waitT): print("*", end=' ') sys.stdout.flush() time.sleep(1) count += 1 timeStart += seqFrame print("Reading and writing done ") f.close()
def getMeanStd(fileScp, fileDim, stdFloor=0.00001, f0Feature=0): """ Calculate the mean and std from a list of files """ meanBuf = np.zeros([fileDim], dtype=np.float64) stdBuf = np.zeros([fileDim], dtype=np.float64) timeStep = 0 fileNum = 0 with open(fileScp, 'r') as filePtr: for line in filePtr: fileNum += 1 with open(fileScp, 'r') as filePtr: for idx, fileName in enumerate(filePtr): fileName = fileName.rstrip('\n') data = py_rw.read_raw_mat(fileName, fileDim) sys.stdout.write('\r') sys.stdout.write("%d/%d" % (idx, fileNum)) if f0Feature and fileDim == 1: # if this is F0 feature, remove unvoiced region data = data[np.where(data > 0)] if data.shape[0] < 1: continue if data.shape[0] < 1: print('%s no data\n' % (fileName)) continue # parallel algorithm # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance dataCount = data.shape[0] try: if len(data.shape) == 1: meanNew = data.mean() stdNew = data.var() else: meanNew = data.mean(axis=0) stdNew = data.var(axis=0) deltaMean = meanNew - meanBuf meanBuf = meanBuf + deltaMean * (float(dataCount) / (timeStep + dataCount)) if timeStep == 0: if len(data.shape) == 1: stdBuf[0] = stdNew else: stdBuf = stdNew else: stdBuf = (stdBuf * (float(timeStep) / (timeStep + dataCount)) + stdNew * (float(dataCount) / (timeStep + dataCount)) + deltaMean * deltaMean / (float(dataCount) / timeStep + float(timeStep) / dataCount + 2.0)) timeStep += data.shape[0] except RuntimeWarning: print("\t%s has ill data. Please consider remove it" % (fileName)) sys.stdout.write('\n') stdBuf = np.sqrt(stdBuf) floorIdx = stdBuf < stdFloor stdBuf[floorIdx] = 1.0 meanBuf = np.asarray(meanBuf, dtype=np.float32) stdBuf = np.asarray(stdBuf, dtype=np.float32) return meanBuf, stdBuf
def RMSECalcore(file1, file2, dim): data1 = funcs.read_raw_mat(file1, dim) data2 = funcs.read_raw_mat(file2, dim) shift_max = np.abs(data2.shape[0] - data1.shape[0]) if data1.shape[0] > data2.shape[0]: valid_length = data2.shape[0] fixed_data = data2 shift_data = data1 else: valid_length = data1.shape[0] fixed_data = data1 shift_data = data2 max_v_cover = 0 max_corr = -1.0 min_rmse = 1000000 min_rmse_buf = [] max_corr_buf = [] shift_pos = 0 if shift_max == 0: shift_max = 1 for shift_t in range(shift_max): if dim == 1: shift_data_temp = shift_data[shift_t:shift_t + valid_length].copy() fixed_data_temp = fixed_data.copy() # This is F0 diff = np.zeros([shift_data_temp.shape[0], 3]) temp1 = shift_data_temp > 0 temp2 = fixed_data_temp > 0 indp = (temp1 * temp2) indn = np.bitwise_xor(temp1, temp2) voiceFrame = sum(indp) if voiceFrame > 0: shift_data_temp = F0Transform(shift_data_temp[indp]) fixed_data_temp = F0Transform(fixed_data_temp[indp]) diff[indp, 0] = shift_data_temp - fixed_data_temp diff[indn, 1] = 1 diff[indp, 2] = 1 pow2 = diff * diff corr = scipy.stats.spearmanr(shift_data_temp, fixed_data_temp) else: corr = [np.nan, 0] pow2 = diff * np.nan v_cover = voiceFrame * 1.0 / valid_length else: print('Only for F0 data') # We can select the shift point by number of coverage #if v_cover > max_v_cover: # or by max Corr if corr[0] > max_corr: max_corr = corr[0] max_corr_buf = corr min_rmse_buf = pow2 shift_pos = shift_t max_v_cover = v_cover return min_rmse_buf, valid_length, max_corr_buf