def generateData(inFile, outFile, vuvInFile, vuvOutFile, featDim, resolution, thre=0.5): if not os.path.isfile(inFile): print inFile return if not os.path.isfile(vuvInFile): print vuvInFile return labData = py_rw.read_raw_mat(inFile, featDim) vuvData = py_rw.read_raw_mat(vuvInFile, 1) if labData.shape[0] > vuvData.shape[0]: labData = labData[0:vuvData.shape[0],:] else: vuvData = vuvData[0:labData.shape[0]] assert labData.shape[0] == vuvData.shape[0], 'Unequal length vuv and lab' maxTime = labData.shape[0] * resolution labIdxBuf = np.zeros([int(maxTime)]) vuvBinBuf = np.zeros([int(maxTime)]) for idx in np.arange(labData.shape[0]): st = idx * resolution et = (idx + 1) * resolution labIdxBuf[st:et] = idx vuvBinBuf[st:et] = vuvData[idx] randU = np.random.rand(et-st) temp = vuvBinBuf[st:et] temp[randU < thre] = 0 vuvBinBuf[st:et] = temp py_rw.write_raw_mat(labIdxBuf, outFile) py_rw.write_raw_mat(vuvBinBuf, vuvOutFile)
def generate(inFile1, inFile2, outfile): data1 = np.asarray(py_rw.read_raw_mat(inFile1, 1), dtype=np.int32) data2 = py_rw.read_raw_mat(inFile2, 1) temp, _ = discreteF0.f0Conversion(data2.copy(), F0Max, F0Min, F0Inter, 'c2d', F0Conti) data3 = np.zeros(data1.shape) data3[data2[data1] > 0] = 1 py_rw.write_raw_mat(data3, outfile)
def tempWarpper(fileIn, outFile, outWav): sr, wavdata = scipy.io.wavfile.read(fileIn) if wavdata.dtype == np.int16: #print outFile transData = np.array(wavdata, dtype=np.float32) / np.power(2.0, 16-1) py_rw.write_raw_mat(transData, outFile) else: print "Unsupported data type"
def tempWarpper(fileIn, outFile, outWav): sr, wavdata = scipy.io.wavfile.read(fileIn) if wavdata.dtype == np.int16: #print outFile transData = np.array(wavdata, dtype=np.float32) / np.power(2.0, 16 - 1) py_rw.write_raw_mat(transData, outFile) else: print("Unsupported data type")
def f0ip2f0(f0_ip_file, vuv_file, output_file, vu_threshold=0.5, unvoiced_value=-1.0e+10): #print("%s %s %s %f" % (f0_ip_file, vuv_file, output_file, vu_threshold)) f0_ip_data = py_rw.read_raw_lf0(f0_ip_file, 1) vuv_data = py_rw.read_raw_mat(vuv_file, 1) assert f0_ip_data.shape[0]==vuv_data.shape[0], "Error: unequal length %s %s" % (f0_ip_file, vuv_file) #print(str(np.where(vuv_data<vu_threshold))) f0_ip_data[vuv_data<vu_threshold] = unvoiced_value py_rw.write_raw_mat(f0_ip_data, output_file)
def generateLabIndex(labfile, outfile, resolution = 80, res=50000): sTime, eTime, labEntry=htslab.read_full_lab(labfile, res) maxTime = eTime[-1] * resolution outBuf = np.zeros([int(maxTime)]) for idx in np.arange(int(eTime[-1])): st = idx * resolution et = (idx + 1) * resolution outBuf[st:et] = idx py_rw.write_raw_mat(outBuf, outfile)
def generateLabIndex(labfile, outfile, resolution=80, res=50000): sTime, eTime, labEntry = htslab.read_full_lab(labfile, res) maxTime = eTime[-1] * resolution outBuf = np.zeros([int(maxTime)]) for idx in np.arange(int(eTime[-1])): st = idx * resolution et = (idx + 1) * resolution outBuf[st:et] = idx py_rw.write_raw_mat(outBuf, outfile)
def CreateTimeMatrix(DataFile): """ """ phofile = PhoAlign + os.path.sep + DataFile + '.lab' sylfile = SylAlign + os.path.sep + DataFile + '.lab' worfile = WorAlign + os.path.sep + DataFile + '.lab' phodata = htslab.read_full_lab(phofile) syldata = htslab.read_full_lab(sylfile) wordata = htslab.read_full_lab(worfile) if len(wordata[1]) != len(syldata[1]) or wordata[1][-1] != syldata[1][-1]: print("\t Unequal Length %s" % (DataFile)) return 0 DataTime = np.int(syldata[1][-1]) # default, update at every frame level dataMat = np.bitwise_or(np.zeros([DataTime], dtype=np.int8), bitInfo['fra']) preSyl = '' preWor = '' for idx1 in range(len(syldata[0])): frameStart = syldata[0][idx1] frameEnd = syldata[1][idx1] # update the phoneme state dataMat[frameStart] = np.bitwise_or(dataMat[frameStart], bitInfo['pho']) syllabel = syldata[2][idx1] worlabel = wordata[2][idx1] if syllabel != preSyl: dataMat[frameStart] = np.bitwise_or(dataMat[frameStart], bitInfo['syl']) if worlabel != preWor: dataMat[frameStart] = np.bitwise_or(dataMat[frameStart], bitInfo['wor']) if len(preWor) == 0 or preWor == phraseSym or worlabel == phraseSym: dataMat[frameStart] = np.bitwise_or(dataMat[frameStart], bitInfo['phr']) preSyl = syllabel preWor = worlabel if CheckBinary: pholabel = phodata[2][idx1] for t in range(np.int(frameStart), np.int(frameEnd)): print("%d, %s [%s %s %s]" % (t, np.binary_repr(dataMat[t], len(bitInfo)), pholabel[0:6], syllabel[0:6], worlabel[0:6])) py_rw.write_raw_mat(dataMat, DataDir + os.path.sep + DataFile + '.bin', 'u1') return DataTime
def generateLabIndex(labfile, outfile, labDim, resolution): data = py_rw.read_raw_mat(labfile, labDim) maxTime = data.shape[0] * resolution outBuf = np.zeros([int(maxTime)]) for idx in np.arange(int(data.shape[0])): st = idx * resolution et = (idx + 1) * resolution outBuf[st:et] = idx py_rw.write_raw_mat(outBuf, outfile)
def split(fileName, inDir, uvDir, uvT, f0Ext, vuExt): conlf0Name = inDir + os.path.sep + fileName + f0Ext vuName = uvDir + os.path.sep + fileName + vuExt print(fileName, end=' ') if os.path.isfile(conlf0Name) and os.path.isfile(vuName): conlf0 = py_rw.read_raw_mat(conlf0Name, 1) vu = py_rw.read_raw_mat(vuName, 1) assert conlf0.shape[0] == vu.shape[0], ': lf0 uv unequal length' conlf0[vu < uvT] = -1.0e+10 py_rw.write_raw_mat(conlf0, conlf0Name) print(': done') else: print(': not found')
def split(fileName, inDir, uvDir, uvT, f0Ext, vuExt): conlf0Name = inDir + os.path.sep + fileName + f0Ext vuName = uvDir + os.path.sep + fileName + vuExt print fileName, if os.path.isfile(conlf0Name) and os.path.isfile(vuName): conlf0 = py_rw.read_raw_mat(conlf0Name, 1) vu = py_rw.read_raw_mat(vuName, 1) assert conlf0.shape[0] == vu.shape[0], ': lf0 uv unequal length' conlf0[vu < uvT] = -1.0e+10 py_rw.write_raw_mat(conlf0, conlf0Name) print ': done' else: print ': not found'
def f0convert(f0File, qF0Output, vuvOutputFile, f0Zero, f0Max, f0Min, f0Inter, f0Conti, f0Type): if f0Type == 0: data = py_rw.read_raw_mat(f0File, 1) idx = data > 0 data[idx] = 1127.0 * np.log(data[idx]/700.0 + 1) elif f0Type == 1: data = py_rw.read_raw_lf0(f0File, 1) idx = data > 0 F0Idx = data>f0Zero dataClas, vuv = discreteF0.f0Conversion(data.copy(), f0Max, f0Min, f0Inter, 'c2d', f0Conti) dataClas[vuv<1] = 0.0 py_rw.write_raw_mat(dataClas, qF0Output) py_rw.write_raw_mat(vuv, vuvOutputFile)
def tempWarpper(fileIn, outFile, outWav, quanLevel): sr, wavdata = scipy.io.wavfile.read(inFile) if wavdata.dtype == np.int16: transData = wavTool.wavformConvert(wavdata, 16, True, quanLevel) elif wavdata.dtype == np.int32: transData = wavTool.wavformConvert(wavdata, 32, True, quanLevel) else: print "Unsupported data type" py_rw.write_raw_mat(transData, outFile) if outWav is not None: recoData = wavTool.wavformDeconvert(transData, quanLevel) wavTool.waveSaveFromFloat(recoData, outWav, sr=sr)
def createMdnConfig(mdnConfigFile, MDNType, MDNTargetDim, ARDynamic=None, tieVariance=0): """Create the mdn.config for MDN CURRENNT """ if ARDynamic is None: # default, no AR dynamic ARDynamic = np.ones([len(MDNType)]) * -1.0 bias = 0 MDNNNOutDim = [] for idx, mdnConfig in enumerate(MDNType): temp = distParaNum(MDNTargetDim[idx][1] - MDNTargetDim[idx][0], mdnConfig, tieVariance, ARDynamic[idx]) MDNNNOutDim.append([bias, bias + temp]) bias = temp + bias #print MDNNNOutDim # check and generating the MDN configuration mdnconfigdata = np.zeros([1 + len(MDNType) * 5], dtype=np.float32) mdnconfigdata[0] = len(MDNType) tmp = 0 for idx, mdnConfig in enumerate(MDNType): mdntarDim = MDNTargetDim[idx] mdnoutDim = MDNNNOutDim[idx] tmp1 = distParaNum(mdntarDim[1] - mdntarDim[0], mdnConfig, tieVariance, ARDynamic[idx]) tmp2 = (mdnoutDim[1] - mdnoutDim[0]) if mdnConfig > 0: assert tmp1 == tmp2, "Error in MDN mixture configuraiton" tmp = tmp + tmp2 elif mdnConfig < 0: assert mdntarDim[1] - mdntarDim[ 0] == 1, "Softmax to 1 dimension targert" #tmp = tmp + 1 tmp = tmp + tmp2 mdnConfig = -1 # change it back to -1 else: tmp = tmp + tmp2 mdnconfigdata[(idx * 5 + 1):((idx + 1) * 5 + 1)] = [ mdnoutDim[0], mdnoutDim[1], mdntarDim[0], mdntarDim[1], mdnConfig ] #print "Dimension of output of NN should be %d" % (tmp) py_rw.write_raw_mat(mdnconfigdata, mdnConfigFile) return tmp
def data_normalize(dataFile, dim, mask=None, ran=False): data = drh.read_raw_mat(dataFile, dim) if ran==True: ranfile = os.path.dirname(dataFile)+os.path.sep+'random_index' if os.path.isfile(ranfile): print "Found random index %s" % (ranfile) randidx = np.asarray(drh.read_raw_mat(ranfile,1), dtype=np.int32) if randidx.shape[0]!=data.shape[0]: print "But it unmatches the data. New random_index will be generated" randidx = np.array(range(data.shape[0])) random.shuffle(randidx) drh.write_raw_mat(randidx, ranfile) else: pass else: randidx = np.array(range(data.shape[0])) random.shuffle(randidx) drh.write_raw_mat(randidx, ranfile) data = data[randidx,:] meanData = data.mean(axis=0) stdData = data.std(axis=0) if mask is None: pass else: stdData[mask>0] = 0.0 # set to zero idx = stdData>0.000001 # a threshold to normalize data[:,idx] = (data[:,idx]-meanData[idx])/stdData[idx] drh.write_raw_mat(data,dataFile+'.norm') drh.write_raw_mat(np.concatenate((meanData, stdData)), dataFile+'.mv')
def CreateTimeMatrix(DataFile): """ """ phofile = PhoAlign + os.path.sep + DataFile + '.lab' sylfile = SylAlign + os.path.sep + DataFile + '.lab' worfile = WorAlign + os.path.sep + DataFile + '.lab' phodata = htslab.read_full_lab(phofile) syldata = htslab.read_full_lab(sylfile) wordata = htslab.read_full_lab(worfile) if len(wordata[1]) != len(syldata[1]) or wordata[1][-1] != syldata[1][-1]: print "\t Unequal Length %s" % (DataFile) return 0 DataTime = np.int(syldata[1][-1]) # default, update at every frame level dataMat = np.bitwise_or(np.zeros([DataTime], dtype=np.int8), bitInfo['fra']) preSyl = '' preWor = '' for idx1 in xrange(len(syldata[0])): frameStart = syldata[0][idx1] frameEnd = syldata[1][idx1] # update the phoneme state dataMat[frameStart] = np.bitwise_or(dataMat[frameStart], bitInfo['pho']) syllabel = syldata[2][idx1] worlabel = wordata[2][idx1] if syllabel != preSyl: dataMat[frameStart] = np.bitwise_or(dataMat[frameStart], bitInfo['syl']) if worlabel != preWor: dataMat[frameStart] = np.bitwise_or(dataMat[frameStart], bitInfo['wor']) if len(preWor)==0 or preWor == phraseSym or worlabel == phraseSym: dataMat[frameStart] = np.bitwise_or(dataMat[frameStart], bitInfo['phr']) preSyl = syllabel preWor = worlabel if CheckBinary: pholabel = phodata[2][idx1] for t in range(np.int(frameStart), np.int(frameEnd)): print "%d, %s [%s %s %s]" % (t,np.binary_repr(dataMat[t], len(bitInfo)), pholabel[0:6], syllabel[0:6], worlabel[0:6]) py_rw.write_raw_mat(dataMat, DataDir+os.path.sep+DataFile+'.bin', 'u1') return DataTime
def meanStdNormMask(fileScps, fileDims, fileNormMask, meanStdOutPath, f0Dim=-1): """ """ assert len(fileDims) == len(fileNormMask), \ "Unequal length feature dim & norm mask" # calcualte the mean/std stats.getMeanStd_merge(fileScps, fileDims, meanStdOutPath + '.unmasked', f0Dim) meanStdData = py_rw.read_raw_mat(meanStdOutPath + '.unmasked', 1) assert meanStdData.shape[0] == sum(fileDims) * 2, \ "%s dimension not %d" % (meanStdOutPath + '.unmasked', sum(fileDims) * 2) featDims = [] startDim = 0 for dim in fileDims: featDims.append([startDim, startDim + dim]) startDim = startDim + dim for dimRange, normMask in zip(featDims, fileNormMask): if len(normMask) == 0: pass elif len(normMask) == 1 and (normMask[0] == 0 or normMask[0] == 'not_norm'): meanStdData[dimRange[0]:dimRange[1]] = 0.0 meanStdData[dimRange[0] + sum(fileDims):dimRange[1] + sum(fileDims)] = 1.0 elif len(normMask) == 2: assert dimRange[0] <= normMask[0], 'normMask range error' % ( str(normMask)) assert dimRange[1] >= normMask[1], 'normMask range error' % ( str(normMask)) meanStdData[normMask[0]:normMask[1]] = 0.0 meanStdData[normMask[0] + sum(fileDims):normMask[1] + sum(fileDims)] = 1.0 else: print("Wrong format of NormMask %s" % (str(normMask))) print('normmask %s' % (str(normMask))) py_rw.write_raw_mat(meanStdData, meanStdOutPath)
def createMdnConfig(mdnConfigFile, MDNType, MDNTargetDim, ARDynamic=None, tieVariance=0): """Create the mdn.config for MDN CURRENNT """ if ARDynamic is None: # default, no AR dynamic ARDynamic = np.ones([len(MDNType)]) * -1.0 bias = 0 MDNNNOutDim = [] for idx, mdnConfig in enumerate(MDNType): temp = distParaNum(MDNTargetDim[idx][1] - MDNTargetDim[idx][0], mdnConfig, tieVariance, ARDynamic[idx]) MDNNNOutDim.append([bias, bias+temp]) bias = temp+bias #print MDNNNOutDim # check and generating the MDN configuration mdnconfigdata = np.zeros([1+len(MDNType)*5], dtype = np.float32) mdnconfigdata[0] = len(MDNType) tmp = 0 for idx, mdnConfig in enumerate(MDNType): mdntarDim = MDNTargetDim[idx] mdnoutDim = MDNNNOutDim[idx] tmp1 = distParaNum(mdntarDim[1]-mdntarDim[0], mdnConfig, tieVariance, ARDynamic[idx]) tmp2 = (mdnoutDim[1]-mdnoutDim[0]) if mdnConfig > 0: assert tmp1 == tmp2, "Error in MDN mixture configuraiton" tmp = tmp + tmp2 elif mdnConfig < 0: assert mdntarDim[1]-mdntarDim[0]==1, "Softmax to 1 dimension targert" tmp = tmp + 1 mdnConfig = -1 # change it back to -1 else: tmp = tmp + tmp2 mdnconfigdata[(idx*5+1):((idx+1)*5+1)] = [mdnoutDim[0],mdnoutDim[1], mdntarDim[0],mdntarDim[1], mdnConfig] #print "Dimension of output of NN should be %d" % (tmp) py_rw.write_raw_mat(mdnconfigdata, mdnConfigFile) return tmp
def meanStdNormMask(fileScps, fileDims, fileNormMask, meanStdOutPath, f0Dim=-1): """ """ assert len(fileDims) == len(fileNormMask), \ "Unequal length feature dim & norm mask" # calcualte the mean/std stats.getMeanStd_merge(fileScps, fileDims, meanStdOutPath + '.unmasked', f0Dim) meanStdData = py_rw.read_raw_mat(meanStdOutPath + '.unmasked', 1) assert meanStdData.shape[0] == sum(fileDims) * 2, \ "%s dimension not %d" % (meanStdOutPath + '.unmasked', sum(fileDims) * 2) featDims = [] startDim = 0 for dim in fileDims: featDims.append([startDim, startDim + dim]) startDim = startDim + dim for dimRange, normMask in zip(featDims, fileNormMask): if len(normMask) == 0: pass elif len(normMask) == 1 and normMask[0] == 0: meanStdData[dimRange[0]:dimRange[1]] = 0.0 meanStdData[dimRange[0]+sum(fileDims):dimRange[1]+sum(fileDims)] = 1.0 elif len(normMask) == 2: assert dimRange[0] <= normMask[0], 'normMask range error' % (str(normMask)) assert dimRange[1] >= normMask[1], 'normMask range error' % (str(normMask)) meanStdData[normMask[0]:normMask[1]] = 0.0 meanStdData[normMask[0]+sum(fileDims):normMask[1]+sum(fileDims)] = 1.0 else: print "Wrong format of NormMask %s" % (str(normMask)) print 'normmask %s' % (str(normMask)) py_rw.write_raw_mat(meanStdData, meanStdOutPath)
#!/usr/bin/python from speechTools import wavTool from scipy.io import wavfile from ioTools import readwrite as py_rw import os import sys import numpy as np dirPath = sys.argv[1] quantiBitNum = int(sys.argv[2]) samplingRate = int(sys.argv[3]) fileList = py_rw.read_txt_list(dirPath + '/gen.scp') for fileName in fileList: fileName = fileName.rstrip('\n') nameHtk = dirPath + os.path.sep + os.path.basename(fileName).rstrip('.htk') + '.htk' nameRaw = dirPath + os.path.sep + os.path.basename(fileName).rstrip('.htk') + '.raw' nameWav = dirPath + os.path.sep + os.path.basename(fileName).rstrip('.htk') + '.wav' print nameRaw, nameWav data = py_rw.read_htk(nameHtk, 'f4', 'b') if quantiBitNum > 0: quantiLevel = np.power(2, quantiBitNum)-1 py_rw.write_raw_mat(data, nameRaw) wavTool.raw2wav(nameRaw, nameWav, quantiLevel, samplingRate=samplingRate) else: wavfile.write(nameWav, samplingRate, data)
from ioTools import readwrite import kaldi_io import numpy as np args = sys.argv data_dir = args[1] out_dir = args[2] dataname = basename(data_dir) xvector_file = "exp/0007_voxceleb_v2_1a/exp/xvector_nnet_1a/am_nsf/xvectors_" + dataname + "/xvector.scp" xvec_out_dir = join(out_dir, "xvector") pitch_out_dir = join(out_dir, "f0") # Write pitch features pitch_file = join(data_dir, 'pitch.scp') pitch2shape = {} for key, mat in kaldi_io.read_mat_scp(pitch_file): #pitch2shape[key] = mat.shape readwrite.write_raw_mat(mat[:, 1], join(pitch_out_dir, key + '.f0')) ''' # Write xvector features with open(xvector_file) as f: for key, mat in kaldi_io.read_vec_flt_scp(f): #print key, mat.shape plen = pitch2shape[key][0] mat = mat[np.newaxis] xvec = np.repeat(mat, plen, axis=0) readwrite.write_raw_mat(xvec, join(xvec_out_dir, key+'.xvector')) '''
# std threshold # if std < threshold, set std = 1.0 (not use std for normalization) stdThresh = 0.0000001 # If mean and std are calculated over multiple types of features, # please specify fileDirs, fileExts, dataDims for each type of feature. # The output contains a mean/std vector concatenated from all types of features dataMvBuffer = [] for idx, fileDir in enumerate(fileDirs): fileExt = fileExts[idx] dataDim = dataDims[idx] dataMv = np.zeros([2, dataDim], dtype=np.float64) dataCounter = 0 with open(fileList, 'r') as filePtr: for fileName in filePtr: fileName = fileName.rstrip('\n') print(fileName) fileName = fileDir + '/' + fileName + fileExt dataMv, dataCounter = temprapper(fileName, dataDim, dataMv, dataCounter) dataMv[1] = np.sqrt(dataMv[1] / (dataCounter - 1)) dataMv = np.asarray(dataMv, dtype=np.float32) dataMvBuffer.append(dataMv) dataMvBuffer = np.concatenate(dataMvBuffer, axis=1) dataMvBuffer[1, np.where(dataMvBuffer[1, :] < stdThresh)] = 1.0 py_rw.write_raw_mat(dataMvBuffer, dataOut)
def lf02f0_file(data_file, out_file, thres=10.0): data = py_rw.read_raw_lf0(data_file, 1) data[data>thres] = lf02f0(data[data>thres]) data[data<=thres] = 0.0 py_rw.write_raw_mat(data, out_file)
frameSum = frameSum + data.shape[0] F0Idx = data>10 dataClas, vuv = discreteF0.f0Conversion(data, F0Max, F0Min, F0Inter, 'c2d', F0Conti) dataClas[vuv<1] = 0.0 #dataClas = np.zeros([data.shape[0]]) #if F0Conti: # # Continous F0 # pass # dataClas[F0Idx] = np.round((data[F0Idx] - F0Min)/(F0Max - F0Min) * (F0Inter - 1)) #else: # # Discontinuous F0, leave one dimension for unvoiced # dataClas[F0Idx] = np.round((data[F0Idx] - F0Min)/(F0Max - F0Min) * (F0Inter - 2)) + 1 tmpmax = np.max(data[F0Idx]) tmpmin = np.min(data[F0Idx]) tmpmax2 = np.max(dataClas[F0Idx]) tmpmin2 = np.min(dataClas[F0Idx]) print tmpmax, tmpmin, tmpmax2, tmpmin2 frameMax = np.max([frameMax, tmpmax]) frameMin = np.min([frameMin, tmpmin]) filePath = fileOut + os.path.sep + fileName + fileExt + '_class' py_rw.write_raw_mat(dataClas, filePath) print "\nmax F0 event, min F0 event, #frame" print frameMax, frameMin, frameSum
fileDir = sys.argv[2] fileList = sys.argv[3] fileExt = sys.argv[4] fileDim = int(sys.argv[5]) fileOut = sys.argv[6] cnt = 0 with open(fileList, 'r') as filePtr: for idx, fileName in enumerate(filePtr): cnt = cnt + 1 gvData = np.zeros([cnt, fileDim]) cnt = 0 with open(fileList, 'r') as filePtr: for idx, fileName in enumerate(filePtr): fileName = fileName.rstrip('\n') data = py_rw.read_raw_mat(fileDir + os.path.sep + fileName + fileExt, fileDim) if (fileExt == '.lf0' or fileExt =='.f0') and fileDim == 1: data = data[data>0] gvData[cnt, :] = gv(data) cnt = cnt + 1 #print fileName py_rw.write_raw_mat(gvData, fileOut + os.path.sep + 'gv.data.bin') print fileOut, '\t', np.median(gvData, axis=0)
# the netTopo should be netTopo = [382, 512, 256, 259] # the layerType should be # [0]: input layer # [1]: feedforward layer # [2]: blstm layer layerType = [0, 1, 1, 1] # Now, specify the connection between the 1st and 2nd, 2nd and 3rd, 3rd and 4th layer # [input_start_dimension, input_end_dimension, output_start_dimension, output_end_dimension] # # I want to connect the 1-256th components of 2nd layer to the 1-128th elements of 3rd layer # it's [0, 256, 0, 128] # Then 257-512th components of 2nd layer to the 129-256th of 3rd layer # [256, 512, 128, 256] # NOTE: python style of index [0, 5] => [0, 1, 2, 3, 4] netconfig= [[], # void for the 1st-2nd layer [[0, 256, 0, 128], [256, 512, 128, 256]], # 2nd-3rd layer []] # void for the 3rd-4th layer # netTopo = np.array(netTopo) netTopoNoInput = netTopo[1:] layerNM, TotalWNum = getNetStruct(netTopo, netTopoNoInput, layerType) weight = genWeightMast(TotalWNum, netconfig, layerNM, netTopoNoInput, netTopo, layerType, 1) py_rw.write_raw_mat(weight, './weightMask') #os.system("cat ./" + __file__ + " > ./log")
if __name__ == "__main__": if sys.argv[1] == 'gv': fileDir = sys.argv[2] fileList = sys.argv[3] fileExt = sys.argv[4] fileDim = int(sys.argv[5]) fileOut = sys.argv[6] cnt = 0 with open(fileList, 'r') as filePtr: for idx, fileName in enumerate(filePtr): cnt = cnt + 1 gvData = np.zeros([cnt, fileDim]) cnt = 0 with open(fileList, 'r') as filePtr: for idx, fileName in enumerate(filePtr): fileName = fileName.rstrip('\n') data = py_rw.read_raw_mat( fileDir + os.path.sep + fileName + fileExt, fileDim) if (fileExt == '.lf0' or fileExt == '.f0') and fileDim == 1: data = data[data > 0] gvData[cnt, :] = gv(data) cnt = cnt + 1 #print fileName py_rw.write_raw_mat(gvData, fileOut + os.path.sep + 'gv.data.bin') print(fileOut, '\t', np.median(gvData, axis=0))
def f02lf0_file(data_file, out_file, thres=10.0, unvoiced_value=-1.0e+10): data = py_rw.read_raw_lf0(data_file, 1) data[data>thres] = f02lf0(data[data>thres]) data[data<=thres] = unvoiced_value py_rw.write_raw_mat(data, out_file)
from ioTools import readwrite as py_rw import os import sys import numpy as np dirPath = sys.argv[1] quantiBitNum = int(sys.argv[2]) samplingRate = int(sys.argv[3]) fileList = py_rw.read_txt_list(dirPath + '/gen.scp') for fileName in fileList: fileName = fileName.rstrip('\n') nameHtk = dirPath + os.path.sep + os.path.basename(fileName).rstrip( '.htk') + '.htk' nameRaw = dirPath + os.path.sep + os.path.basename(fileName).rstrip( '.htk') + '.raw' nameWav = dirPath + os.path.sep + os.path.basename(fileName).rstrip( '.htk') + '.wav' print nameRaw, nameWav data = py_rw.read_htk(nameHtk, 'f4', 'b') if quantiBitNum > 0: quantiLevel = np.power(2, quantiBitNum) - 1 py_rw.write_raw_mat(data, nameRaw) wavTool.raw2wav(nameRaw, nameWav, quantiLevel, samplingRate=samplingRate) else: wavfile.write(nameWav, samplingRate, data)
bias = temp+bias print MDNNNOutDim # check and generating the MDN configuration mdnconfigdata = np.zeros([1+len(MDNType)*5], dtype = np.float32) mdnconfigdata[0] = len(MDNType) tmp = 0 for idx, mdntype in enumerate(MDNType): mdntarDim = MDNTargetDim[idx] mdnoutDim = MDNNNOutDim[idx] tmp1 = kmixPara(mdntarDim[1]-mdntarDim[0], mdntype, tieVariance) tmp2 = (mdnoutDim[1]-mdnoutDim[0]) if mdntype > 0: assert tmp1 == tmp2, "Error in MDN mixture configuraiton" tmp = tmp + tmp2 elif mdntype < 0: assert mdntarDim[1]-mdntarDim[0]==1, "Softmax to 1 dimension targert" tmp = tmp + (mdnoutDim[1]-mdnoutDim[0]) mdntype = -1 # change it back to -1 else: tmp = tmp + tmp2 mdnconfigdata[(idx*5+1):((idx+1)*5+1)] = [mdnoutDim[0],mdnoutDim[1], mdntarDim[0],mdntarDim[1], mdntype] print "Dimension of output of NN should be %d" % (tmp) py_rw.write_raw_mat(mdnconfigdata, mdnconfig)
import sys from os.path import join, basename from ioTools import readwrite from kaldiio import ReadHelper args = sys.argv ppg_file = args[1] out_dir = args[2] ppg_out_dir = join(out_dir, "ppg") print("Writing PPG feats.....") # Write ppg features with ReadHelper('scp:'+ppg_file) as reader: for key, mat in reader: readwrite.write_raw_mat(mat, join(ppg_out_dir, key+'.ppg')) print("Finished writing PPG feats.")
source_stats = {} with open(statsdir + dataname + "/" + key.split("-")[0].split("_")[0]) as f: source_stats = json.load(f) selected_target_speaker_list = [target_spk] pseudo_speaker_f0_stats = {"mu_s": 0, "var_s": 0, "std_s": 0} for selected_target_speaker in selected_target_speaker_list: target_speaker_stats = {} with open(statsdir + dataset_of_target + "/" + selected_target_speaker) as f: target_speaker_stats = json.load(f) mu = target_speaker_stats["mu_s"] var = target_speaker_stats["var_s"] pseudo_speaker_f0_stats["mu_s"] += mu pseudo_speaker_f0_stats["var_s"] += var pseudo_speaker_f0_stats["var_s"] /= len(selected_target_speaker_list) pseudo_speaker_f0_stats["mu_s"] /= len(selected_target_speaker_list) pseudo_speaker_f0_stats["std_s"] = math.sqrt( pseudo_speaker_f0_stats["var_s"]) transfomation = { **source_stats, "mu_t": pseudo_speaker_f0_stats["mu_s"], "std_t": pseudo_speaker_f0_stats["std_s"] } f0t = log_linear_transformation(f0.copy(), transfomation) readwrite.write_raw_mat(f0t, join(pitch_out_dir, key + '.f0'))
if os.path.isdir(dataOut): pass else: os.mkdir(dataOut) for dataFile in dataList: if re.search(dataPattern, dataFile): data = io.netcdf_file(dataDir + os.path.sep + dataFile) uttNum = data.dimensions['numSeqs'] seqLengths = data.variables['seqLengths'][:].copy() seqLengths = np.concatenate((np.array([0]), seqLengths)).cumsum() seqTags = data.variables['seqTags'][:] if inOutData == 1: dataAll = data.variables['inputs'][:] else: dataAll = data.variables['targetPatterns'][:] for i in range(uttNum): outName = dataOut + os.path.sep + ''.join(seqTags[i]) for j, suf in enumerate(name): outFile = outName + suf tmpdata = dataAll[seqLengths[i]:seqLengths[i + 1], dim[j][0]:dim[j][1]].copy() if dataMV is not None: tmpdata = tmpdata * varVec[dim[j][0]:dim[j][1]] + meanVec[ dim[j][0]:dim[j][1]] py_rw.write_raw_mat(tmpdata, outFile) print("%s Utt %d" % (dataFile, i)) del dataAll, seqTags, seqLengths, uttNum data.close()
if os.path.isdir(dataOut): pass else: os.mkdir(dataOut) for dataFile in dataList: if re.search(dataPattern, dataFile): data = io.netcdf_file(dataDir+os.path.sep+dataFile) uttNum = data.dimensions['numSeqs'] seqLengths = data.variables['seqLengths'][:].copy() seqLengths = np.concatenate((np.array([0]), seqLengths)).cumsum() seqTags = data.variables['seqTags'][:] if inOutData == 1: dataAll = data.variables['inputs'][:] else: dataAll = data.variables['targetPatterns'][:] for i in xrange(uttNum): outName = dataOut+os.path.sep+''.join(seqTags[i]) for j, suf in enumerate(name): outFile = outName + suf tmpdata = dataAll[seqLengths[i]:seqLengths[i+1],dim[j][0]:dim[j][1]].copy() if dataMV is not None: tmpdata = tmpdata*varVec[dim[j][0]:dim[j][1]]+meanVec[dim[j][0]:dim[j][1]] py_rw.write_raw_mat(tmpdata, outFile) print "%s Utt %d" % (dataFile, i) del dataAll, seqTags, seqLengths, uttNum data.close()
yaap_pitch_dir = join(data_dir, 'yaapt_pitch') xvec_out_dir = join(out_dir, "xvector") pitch_out_dir = join(out_dir, "f0") # Write pitch features pitch_file = join(data_dir, 'pitch.scp') pitch2shape = {} with ReadHelper('scp:'+pitch_file) as reader: for key, mat in reader: pitch2shape[key] = mat.shape[0] kaldi_f0 = mat[:, 1].squeeze().copy() yaapt_f0 = readwrite.read_raw_mat(join(yaap_pitch_dir, key+'.f0'), 1) #unvoiced = np.where(yaapt_f0 == 0)[0] #kaldi_f0[unvoiced] = 0 #readwrite.write_raw_mat(kaldi_f0, join(pitch_out_dir, key+'.f0')) f0 = np.zeros(kaldi_f0.shape) f0[:yaapt_f0.shape[0]] = yaapt_f0 readwrite.write_raw_mat(f0, join(pitch_out_dir, key+'.f0')) # Write xvector features with ReadHelper('scp:'+xvector_file) as reader: for key, mat in reader: #print key, mat.shape plen = pitch2shape[key] mat = mat[np.newaxis] xvec = np.repeat(mat, plen, axis=0) readwrite.write_raw_mat(xvec, join(xvec_out_dir, key+'.xvector'))
# # skippara, feedforward can be specified using [row_start, row_end, col_start, col_end] for each block # blstm: netconfig = [[], [[0, 5, 0, 4], [5, 10, 4, 8]], []] netTopo = [382, 768, 768, 768, 768, 768, 768, 259] layerType = dupVec(1, len(netTopo)) layerType[0] = 0 netconfig = dupVec([[0, 382, 0, 382], [382, 382 + 256, 382, 382 + 256], [382 + 256, 768, 382 + 256, 768]], len(netTopo) - 1) netconfig[0] = [] netconfig[1] = [] netconfig[len(netTopo) - 2] = [[0, 382, 0, 180], [382, 382 + 256, 180, 184], [382 + 256, 768, 184, 259]] netTopo = np.array(netTopo) netTopoNoInput = netTopo[1:] layerNM, TotalWNum = getNetStruct(netTopo, netTopoNoInput, layerType) weight = genWeightMast(TotalWNum, netconfig, layerNM, netTopoNoInput, netTopo, layerType, 1) py_rw.write_raw_mat(weight, './weightMask') os.system("cat ./" + __file__ + " > ./log") fileptr = open('./log', 'a') fileptr.write('Number of parameter: %d \n' % (TotalWNum)) fileptr.write('Number of non-zero parameter: %d \n' % (np.sum(weight))) fileptr.close() print np.sum(weight)
import sys from os.path import join, basename from ioTools import readwrite import kaldi_io args = sys.argv mspec_file = args[1] out_dir = args[2] mspec_out_dir = join(out_dir, "mel") print "Writing MEL feats....." # Write mspec features for key, mat in kaldi_io.read_mat_scp(mspec_file): #print key, mat.shape readwrite.write_raw_mat(mat, join(mspec_out_dir, key + '.mel')) print "Finished writing MEL feats."