def generateData(inFile, outFile, vuvInFile, vuvOutFile, featDim, resolution, thre=0.5):
    if not os.path.isfile(inFile):
        print inFile
        return
    if not os.path.isfile(vuvInFile):
        print vuvInFile
        return
    
    labData = py_rw.read_raw_mat(inFile, featDim)
    vuvData = py_rw.read_raw_mat(vuvInFile, 1)
    if labData.shape[0] > vuvData.shape[0]:
        labData = labData[0:vuvData.shape[0],:]
    else:
        vuvData = vuvData[0:labData.shape[0]]
    assert labData.shape[0] == vuvData.shape[0], 'Unequal length vuv and lab'
    
    maxTime    = labData.shape[0] * resolution
    labIdxBuf  = np.zeros([int(maxTime)])
    vuvBinBuf  = np.zeros([int(maxTime)])
    for idx in np.arange(labData.shape[0]):
        st = idx * resolution
        et = (idx + 1) * resolution
        labIdxBuf[st:et] = idx
        vuvBinBuf[st:et] = vuvData[idx]
        
        randU = np.random.rand(et-st)
        temp = vuvBinBuf[st:et]
        temp[randU < thre] = 0
        vuvBinBuf[st:et] = temp
        
    py_rw.write_raw_mat(labIdxBuf, outFile)
    py_rw.write_raw_mat(vuvBinBuf, vuvOutFile)
Ejemplo n.º 2
0
def data_normalize(dataFile, dim, mask=None, ran=False):
    
    data = drh.read_raw_mat(dataFile, dim)
    if ran==True:
        ranfile = os.path.dirname(dataFile)+os.path.sep+'random_index'
        if os.path.isfile(ranfile):
            print "Found random index %s" % (ranfile)
            randidx = np.asarray(drh.read_raw_mat(ranfile,1), dtype=np.int32)
            if randidx.shape[0]!=data.shape[0]:
                print "But it unmatches the data. New random_index will be generated"
                randidx = np.array(range(data.shape[0]))
                random.shuffle(randidx)
                drh.write_raw_mat(randidx, ranfile)
            else:
                pass
        else:
            randidx = np.array(range(data.shape[0]))
            random.shuffle(randidx)
            drh.write_raw_mat(randidx, ranfile)
        data = data[randidx,:]

    meanData = data.mean(axis=0)
    stdData  = data.std(axis=0)
    if mask is None:
        pass
    else:
        stdData[mask>0] = 0.0 # set to zero
    
    idx = stdData>0.000001   # a threshold to normalize
    data[:,idx] = (data[:,idx]-meanData[idx])/stdData[idx]
    
    drh.write_raw_mat(data,dataFile+'.norm')
    drh.write_raw_mat(np.concatenate((meanData, stdData)), dataFile+'.mv')
Ejemplo n.º 3
0
def generate(inFile1, inFile2, outfile):
    data1 = np.asarray(py_rw.read_raw_mat(inFile1, 1), dtype=np.int32)
    data2 = py_rw.read_raw_mat(inFile2, 1)
    temp, _ = discreteF0.f0Conversion(data2.copy(), F0Max, F0Min, F0Inter,
                                      'c2d', F0Conti)
    data3 = np.zeros(data1.shape)
    data3[data2[data1] > 0] = 1
    py_rw.write_raw_mat(data3, outfile)
Ejemplo n.º 4
0
def prepare_data_s_b(data_dir, data_in_name, data_out_name, batch_num, dim_in, 
                     dim_out, buffer_size, out_data_in_name, out_data_out_name,
                     p_format='<f4', val_set=0.2):
    '''prepare_data_s_b: prepare the data in Split and Batch_mode
    
    '''
    buffer_in = np.zeros([1, dim_in], dtype=np.float32)
    buffer_out= np.zeros([1, dim_out], dtype=np.float32)
    
    data_counter = 0
    for i in xrange(batch_num):
        # the batch data is assumed to start from data1
        data_tmp_in = os.path.join(data_dir, data_in_name, str(i+1)) 
        data_tmp_out= os.path.join(data_dir, data_out_name, str(i+1)) 
        assert os.path.isfile(data_tmp_in) and os.path.isfile(data_tmp_out), \
        'not found %s and %s' % (data_tmp_in, data_tmp_out)
        data_tmp_in = drh.read_raw_mat(data_tmp_in, dim_in, p_format=p_format)
        data_tmp_out= drh.read_raw_mat(data_tmp_out, dim_out, p_format=p_format)
        buffer_in = np.append(buffer_in, data_tmp_in)
        buffer_out= np.append(buffer_out, data_tmp_out)
        print 'processing %d/%d batch' % (i,batch_num)
    
    # randomlize
    data_idx = np.random.permutation(buffer_in.shape[0]-1) + 1
    buffer_in = buffer_in[data_idx]
    buffer_out= buffer_out[data_idx]
    
    # output as cPickled file
    train_size = int(math.floor((1-val_set)*buffer_in.shape[0]/100))*100
    val_size = buffer_in.shape[0] - train_size
    tmp1 = int(math.floor(train_size/buffer_size))
    tmp2 = train_size - buffer_size*tmp1
    index1 = np.arange(tmp1)*buffer_size
    index2 = index1 + buffer_size
    index1.append(index2[-1])
    index2.append(train_size)
    
    file_counter = 1
    for s_idx, e_idx in zip(index1,index2):
        buffer_data = (buffer_in[s_idx:e_idx], buffer_out[s_idx:e_idx])
        fid = open(os.path.join(data_dir,out_data_in_name,str(file_counter)),'rb')
        cPickle.dump(fid,buffer_data)
        fid.cloe()
        file_counter += 1
    
    if val_set > 0:
        buffer_data = (buffer_in[train_size:], buffer_out[train_size:])
        fid = open(os.path.join(data_dir,out_data_in_name,'val'),'rb')
        cPickle.dump(fid,buffer_data)
        fid.cloe()
    
    print 'Processing Done'
Ejemplo n.º 5
0
def split(fileName, inDir, uvDir, uvT, f0Ext, vuExt):
    conlf0Name = inDir + os.path.sep + fileName + f0Ext
    vuName     = uvDir + os.path.sep + fileName + vuExt
    print(fileName, end=' ')
    if os.path.isfile(conlf0Name) and os.path.isfile(vuName):
        conlf0 = py_rw.read_raw_mat(conlf0Name, 1)
        vu     = py_rw.read_raw_mat(vuName, 1)
        assert conlf0.shape[0] == vu.shape[0], ': lf0 uv unequal length'
        conlf0[vu < uvT] = -1.0e+10
        py_rw.write_raw_mat(conlf0, conlf0Name)
        print(': done')
    else:
        print(': not found')
Ejemplo n.º 6
0
def split(fileName, inDir, uvDir, uvT, f0Ext, vuExt):
    conlf0Name = inDir + os.path.sep + fileName + f0Ext
    vuName     = uvDir + os.path.sep + fileName + vuExt
    print fileName,
    if os.path.isfile(conlf0Name) and os.path.isfile(vuName):
        conlf0 = py_rw.read_raw_mat(conlf0Name, 1)
        vu     = py_rw.read_raw_mat(vuName, 1)
        assert conlf0.shape[0] == vu.shape[0], ': lf0 uv unequal length'
        conlf0[vu < uvT] = -1.0e+10
        py_rw.write_raw_mat(conlf0, conlf0Name)
        print ': done'
    else:
        print ': not found'
Ejemplo n.º 7
0
def ReadCURRENNTWeight(fileName, numFormat = 'f4', swap = 'l'):
    networkFile = py_rw.read_raw_mat(fileName, 1, numFormat, swap)
    layerNum    = int(networkFile[0])
    layerSize   = networkFile[1:1+layerNum*5]
    weights     = networkFile[1+layerNum*5:]

    weightMats  = []
    startPos    = 0
    for layerId in np.arange(0, layerNum):
        weightNum = int(layerSize[layerId * 5])
        tSize     = int(layerSize[layerId * 5 + 1])
        pSize     = int(layerSize[layerId * 5 + 2])
        inPerBlock= int(layerSize[layerId * 5 + 3])
        interW    = int(layerSize[layerId * 5 + 4])
        [weightNumCheck, wNum, bNum, iNum]  = getWeightNum(tSize, pSize, inPerBlock, interW)

        assert weightNumCheck == weightNum, "incompatible of the weight format. Please check CURRENNT version"
        weightMat = weights[startPos:startPos+weightNum]
        startPos  = startPos + weightNum
        if pSize * tSize == wNum:
            
            # normal feed-forward matrix
            W = weightMat[0:wNum].reshape([tSize, pSize]);
            b = weightMat[wNum:(wNum + bNum)].reshape([1, tSize]);
            Inter = weightMat[(wNum+bNum):];
        else:
            # blstm or weights for other architecture
            # need to re-write for blstm matrix
            W = weightMat[0 : wNum].reshape([inPerBlock * tSize, pSize]);
            b = weightMat[wNum:(wNum + bNum)].reshape([inPerBlock, tSize]);
            Inter = weightMat[wNum + bNum:];
        
        weightMats.append([W,b,Inter])

    return weightMats
Ejemplo n.º 8
0
def f0ip2f0(f0_ip_file, vuv_file, output_file, vu_threshold=0.5, unvoiced_value=-1.0e+10):
    #print("%s %s %s %f" % (f0_ip_file, vuv_file, output_file, vu_threshold))
    f0_ip_data = py_rw.read_raw_lf0(f0_ip_file, 1)
    vuv_data = py_rw.read_raw_mat(vuv_file, 1)
    assert f0_ip_data.shape[0]==vuv_data.shape[0], "Error: unequal length %s %s" % (f0_ip_file, vuv_file)
    #print(str(np.where(vuv_data<vu_threshold)))
    f0_ip_data[vuv_data<vu_threshold] = unvoiced_value
    py_rw.write_raw_mat(f0_ip_data, output_file)
Ejemplo n.º 9
0
def generateLabIndex(labfile, outfile, labDim, resolution):
    data = py_rw.read_raw_mat(labfile, labDim)
    
    maxTime = data.shape[0] * resolution
    
    outBuf  = np.zeros([int(maxTime)])
    
    for idx in np.arange(int(data.shape[0])):
        st = idx * resolution
        et = (idx + 1) * resolution
        outBuf[st:et] = idx
    py_rw.write_raw_mat(outBuf, outfile)
Ejemplo n.º 10
0
def generateLabIndex(labfile, outfile, labDim, resolution):
    data = py_rw.read_raw_mat(labfile, labDim)

    maxTime = data.shape[0] * resolution

    outBuf = np.zeros([int(maxTime)])

    for idx in np.arange(int(data.shape[0])):
        st = idx * resolution
        et = (idx + 1) * resolution
        outBuf[st:et] = idx
    py_rw.write_raw_mat(outBuf, outfile)
Ejemplo n.º 11
0
def f0convert(f0File, qF0Output, vuvOutputFile, f0Zero, f0Max, f0Min, f0Inter, f0Conti, f0Type):
    if f0Type == 0:
        data  = py_rw.read_raw_mat(f0File, 1)
        idx   = data > 0
        data[idx] = 1127.0 * np.log(data[idx]/700.0 + 1)
    elif f0Type == 1:
        data  = py_rw.read_raw_lf0(f0File, 1)
        idx   = data > 0
    F0Idx = data>f0Zero
    dataClas, vuv = discreteF0.f0Conversion(data.copy(), f0Max, f0Min, f0Inter, 'c2d', f0Conti)
    dataClas[vuv<1] = 0.0
    py_rw.write_raw_mat(dataClas, qF0Output)
    py_rw.write_raw_mat(vuv, vuvOutputFile)
Ejemplo n.º 12
0
def meanStdNormMask(fileScps,
                    fileDims,
                    fileNormMask,
                    meanStdOutPath,
                    f0Dim=-1):
    """
    """
    assert len(fileDims) == len(fileNormMask), \
        "Unequal length feature dim & norm mask"

    # calcualte the mean/std
    stats.getMeanStd_merge(fileScps, fileDims, meanStdOutPath + '.unmasked',
                           f0Dim)

    meanStdData = py_rw.read_raw_mat(meanStdOutPath + '.unmasked', 1)

    assert meanStdData.shape[0] == sum(fileDims) * 2, \
        "%s dimension not %d" % (meanStdOutPath + '.unmasked', sum(fileDims) * 2)

    featDims = []
    startDim = 0
    for dim in fileDims:
        featDims.append([startDim, startDim + dim])
        startDim = startDim + dim

    for dimRange, normMask in zip(featDims, fileNormMask):
        if len(normMask) == 0:
            pass
        elif len(normMask) == 1 and (normMask[0] == 0
                                     or normMask[0] == 'not_norm'):
            meanStdData[dimRange[0]:dimRange[1]] = 0.0
            meanStdData[dimRange[0] + sum(fileDims):dimRange[1] +
                        sum(fileDims)] = 1.0
        elif len(normMask) == 2:
            assert dimRange[0] <= normMask[0], 'normMask range error' % (
                str(normMask))
            assert dimRange[1] >= normMask[1], 'normMask range error' % (
                str(normMask))
            meanStdData[normMask[0]:normMask[1]] = 0.0
            meanStdData[normMask[0] + sum(fileDims):normMask[1] +
                        sum(fileDims)] = 1.0
        else:
            print("Wrong format of NormMask %s" % (str(normMask)))
        print('normmask %s' % (str(normMask)))

    py_rw.write_raw_mat(meanStdData, meanStdOutPath)
Ejemplo n.º 13
0
def raw2wav(rawFile, wavFile, quanLevel = 255.0, bit=16, samplingRate = 16000):
    """ raw2wav(rawFile, wavFile, quanLevel = 255.0, bit=16, samplingRate = 16000)
        convert quantized wav [0, quanLevel] into wav file
    """
    transData = py_rw.read_raw_mat(rawFile, 1)
    recoData  = wavformDeconvert(transData, quanLevel)
    # recover to 16bit range [-32768, +32767]
    recoData  = recoData * np.power(2.0, bit-1)
    recoData[recoData >= np.power(2.0, bit-1)] = np.power(2.0, bit-1)-1
    recoData[recoData < -1*np.power(2.0, bit-1)] = -1*np.power(2.0, bit-1)
    # write as signed 16bit PCM
    if bit == 16:
        recoData  = np.asarray(recoData, dtype=np.int16)
    elif bit == 32:
        recoData  = np.asarray(recoData, dtype=np.int32)
    else:
        print("Only be able to save wav in int16 and int32 type")
        print("Save to int16")
        recoData  = np.asarray(recoData, dtype=np.int16)
    scipy.io.wavfile.write(wavFile, samplingRate, recoData)
Ejemplo n.º 14
0
def ReadCURRENNTWeight(fileName, numFormat='f4', swap='l'):
    networkFile = py_rw.read_raw_mat(fileName, 1, numFormat, swap)
    layerNum = int(networkFile[0])
    layerSize = networkFile[1:1 + layerNum * 5]
    weights = networkFile[1 + layerNum * 5:]

    weightMats = []
    startPos = 0
    for layerId in np.arange(0, layerNum):
        weightNum = int(layerSize[layerId * 5])
        tSize = int(layerSize[layerId * 5 + 1])
        pSize = int(layerSize[layerId * 5 + 2])
        inPerBlock = int(layerSize[layerId * 5 + 3])
        interW = int(layerSize[layerId * 5 + 4])
        [weightNumCheck, wNum, bNum,
         iNum] = getWeightNum(tSize, pSize, inPerBlock, interW)

        assert weightNumCheck == weightNum, "incompatible of the weight format. Please check CURRENNT version"
        weightMat = weights[startPos:startPos + weightNum]
        startPos = startPos + weightNum
        if pSize * tSize == wNum:

            # normal feed-forward matrix
            W = weightMat[0:wNum].reshape([tSize, pSize])
            b = weightMat[wNum:(wNum + bNum)].reshape([1, tSize])
            Inter = weightMat[(wNum + bNum):]
        else:
            # blstm or weights for other architecture
            # need to re-write for blstm matrix
            W = weightMat[0:wNum].reshape([inPerBlock * tSize, pSize])
            b = weightMat[wNum:(wNum + bNum)].reshape([inPerBlock, tSize])
            Inter = weightMat[wNum + bNum:]

        weightMats.append([W, b, Inter])

    return weightMats
Ejemplo n.º 15
0
def meanStdNormMask(fileScps, fileDims, fileNormMask, meanStdOutPath, f0Dim=-1):
    """
    """
    assert len(fileDims) == len(fileNormMask), \
        "Unequal length feature dim & norm mask"
    
    # calcualte the mean/std
    stats.getMeanStd_merge(fileScps, fileDims, meanStdOutPath + '.unmasked', f0Dim)

    meanStdData = py_rw.read_raw_mat(meanStdOutPath + '.unmasked', 1)
    
    assert meanStdData.shape[0] == sum(fileDims) * 2, \
        "%s dimension not %d" % (meanStdOutPath + '.unmasked', sum(fileDims) * 2)

    featDims = []
    startDim = 0
    for dim in fileDims:
        featDims.append([startDim, startDim + dim])
        startDim = startDim + dim
    
    for dimRange, normMask in zip(featDims, fileNormMask):
        if len(normMask) == 0:
            pass
        elif len(normMask) == 1 and normMask[0] == 0:
            meanStdData[dimRange[0]:dimRange[1]] = 0.0
            meanStdData[dimRange[0]+sum(fileDims):dimRange[1]+sum(fileDims)] = 1.0
        elif len(normMask) == 2:
            assert dimRange[0] <= normMask[0], 'normMask range error' % (str(normMask))
            assert dimRange[1] >= normMask[1], 'normMask range error' % (str(normMask))
            meanStdData[normMask[0]:normMask[1]] = 0.0
            meanStdData[normMask[0]+sum(fileDims):normMask[1]+sum(fileDims)] = 1.0
        else:
            print "Wrong format of NormMask %s" % (str(normMask))
        print 'normmask %s' % (str(normMask))
        
    py_rw.write_raw_mat(meanStdData, meanStdOutPath)
Ejemplo n.º 16
0
        fileDir  = sys.argv[2]
        fileList = sys.argv[3]
        fileExt  = sys.argv[4]
        fileDim  = int(sys.argv[5])
        fileOut  = sys.argv[6]

        cnt = 0
        with open(fileList, 'r') as filePtr:
            for idx, fileName in enumerate(filePtr):
                cnt = cnt + 1

        gvData = np.zeros([cnt, fileDim])
        
        cnt = 0
        with open(fileList, 'r') as filePtr:
            for idx, fileName in enumerate(filePtr):
                fileName = fileName.rstrip('\n')
                data = py_rw.read_raw_mat(fileDir + os.path.sep + fileName + fileExt, fileDim)
                if (fileExt == '.lf0' or fileExt =='.f0') and fileDim == 1:
                    data = data[data>0]
                gvData[cnt, :] = gv(data)
                cnt = cnt + 1
                #print fileName
        py_rw.write_raw_mat(gvData, fileOut + os.path.sep + 'gv.data.bin')
        print fileOut, '\t', np.median(gvData, axis=0)
                
                
        
    

Ejemplo n.º 17
0
def prepareData():
    """ prepreData: 
        1. create the file list
        2. create the symbolic link to the feature data
        3. create the index file (used by CURRENNT)
        4. create package data of index file (data.nc)
        5. calculate the mean and std for a specific data set
    """
    # create directories
    dataDir = cfg.nnDataDirName
    try:
        os.mkdir(dataDir)
    except OSError:
        pass

    dataListPath = dataDir + os.path.sep + 'lists'
    try:
        os.mkdir(dataListPath)
    except OSError:
        pass

    dataRawDir = dataDir + os.path.sep + cfg.idxDirName
    try:
        os.mkdir(dataRawDir)
    except OSError:
        pass

    # decide whether create the symbolic link to each file
    if len(cfg.inputDirs) == 1 and len(cfg.outputDirs) == 1:
        # no validation set
        flagFileUseSymbolLink = False
    elif listSameContent(cfg.inputDirs) and listSameContent(
            cfg.outputDirs) and listSameContent(
                cfg.inputDirs[0]) and listSameContent(cfg.outputDirs[0]):
        # all data have been in the same directory
        flagFileUseSymbolLink = False
    else:
        flagFileUseSymbolLink = True

    #dataLinkDir = dataDir + os.path.sep + cfg.linkDirname
    dataLinkDirInput = dataDir + os.path.sep + cfg.linkDirname_input
    dataLinkDirOutput = dataDir + os.path.sep + cfg.linkDirname_output
    # prepare for data link
    if flagFileUseSymbolLink:
        try:
            os.mkdir(dataLinkDirInput)
            os.mkdir(dataLinkDirOutput)
        except OSError:
            pass
    else:
        if os.path.islink(dataLinkDirInput):
            os.system("rm %s" % (dataLinkDirInput))
        if os.path.islink(dataLinkDirOutput):
            os.system("rm %s" % (dataLinkDirOutput))
        os.system("ln -s %s %s" % (cfg.inputDirs[0][0], dataLinkDirInput))
        os.system("ln -s %s %s" % (cfg.outputDirs[0][0], dataLinkDirOutput))

    # create file list
    for dataList, inputDirSet, outputDirSet, dataPart in zip(
            cfg.dataLists, cfg.inputDirs, cfg.outputDirs, cfg.dataDivision):

        display.self_print('Processing ' + dataPart + ' data', 'highlight')

        if dataList is None:
            # get the cross-set of file list
            listInput = readwrite.list_file_name_in_dir(inputDirSet[0])
            listOutput = readwrite.list_file_name_in_dir(outputDirSet[0])
            fileList = listInput
            if inputDirSet:
                for inputDir in inputDirSet:
                    listInput2 = readwrite.list_file_name_in_dir(inputDir)
                    fileList, diffSet = crossSet(fileList, listInput2)
                    tmpName = os.path.join(
                        dataListPath,
                        dataPart + os.path.basename(inputDir) + '.dif.lst')
                    readwrite.write_txt_list(diffSet, tmpName)

            if outputDirSet:
                for outputDir in outputDirSet:
                    listOutput2 = readwrite.list_file_name_in_dir(outputDir)
                    fileList, diffSet = crossSet(fileList, listOutput2)
                    tmpName = os.path.join(
                        dataListPath,
                        dataPart + os.path.basename(outputDir) + '.dif.lst')
                    readwrite.write_txt_list(diffSet, tmpName)

            # writing the list of file name
            random.shuffle(fileList)
            fileListFilePath = dataListPath + os.path.sep + dataPart + '.lst'
            readwrite.write_txt_list(fileList, fileListFilePath)
        else:
            fileListFilePath = dataListPath + os.path.sep + dataPart + '.lst'
            os.system("cp %s %s" % (dataList, fileListFilePath))
            fileList = readwrite.read_txt_list(fileListFilePath)

        # before start, take a simple test on the configuration of feature dimension
        frameNum = None
        for inputDir, featDim, featName in zip(inputDirSet, cfg.inputDim,
                                               cfg.inputExt):
            inputFile = os.path.join(inputDir, fileList[0]) + '.' + featName
            if os.path.isfile(inputFile):
                tmpframeNum = readwrite.read_raw_mat(inputFile,
                                                     featDim).shape[0]
                if frameNum is None:
                    frameNum = tmpframeNum
                elif np.abs(frameNum - tmpframeNum) * 1.0 / frameNum > 0.1:
                    display.self_print("Large mismatch of frame numbers %s" %
                                       (fileList[0]))
                    display.self_print(
                        "Please check whether inputDim are correct", 'error')
                    display.self_print("Or check input features are corrupted",
                                       'error')
                    raise Exception("Error: mismatch of frame numbers")

        for outputDir, featDim, featName in zip(outputDirSet, cfg.outputDim,
                                                cfg.outputExt):
            outputFile = os.path.join(outputDir, fileList[0]) + '.' + featName
            if os.path.isfile(outputFile):
                tmpframeNum = readwrite.read_raw_mat(outputFile,
                                                     featDim).shape[0]
                if np.abs(frameNum - tmpframeNum) * 1.0 / frameNum > 0.1:
                    display.self_print("Large mismatch of frame numbers %s" %
                                       (fileList[0]))
                    display.self_print(
                        "Please check whether inputDim are correct", 'error')
                    display.self_print("Or check input features are corrupted",
                                       'error')
                    raise Exception("Error: mismatch of frame numbers")

        # create file directories
        dataSaveDir = dataDir + os.path.sep + dataPart
        try:
            os.mkdir(dataSaveDir)
        except OSError:
            pass

        inputScpList = []
        outputScpList = []

        # create the fileName + fileExt lists
        # create symbolic link
        for inputDir, featDim, featName in zip(inputDirSet, cfg.inputDim,
                                               cfg.inputExt):
            tmpFileScp = dataSaveDir + os.path.sep + featName + '.scp'
            inputScpList.append(tmpFileScp)
            filePtr = open(tmpFileScp, 'w')
            for fileName in fileList:
                # write full path to the feature
                filePtr.write('%s%s%s.%s\n' %
                              (inputDir, os.path.sep, fileName, featName))
                if cfg.step01Prepare_LINK is True and flagFileUseSymbolLink:
                    os.system("ln -f -s %s%s%s.%s %s%s%s.%s" % \
                              (inputDir, os.path.sep, fileName, featName,
                               dataLinkDirInput, os.path.sep, fileName, featName))
            filePtr.close()

        for outputDir, featDim, featName in zip(outputDirSet, cfg.outputDim,
                                                cfg.outputExt):
            tmpFileScp = dataSaveDir + os.path.sep + featName + '.scp'
            outputScpList.append(tmpFileScp)
            filePtr = open(tmpFileScp, 'w')
            for fileName in fileList:
                filePtr.write('%s%s%s.%s\n' %
                              (outputDir, os.path.sep, fileName, featName))
                if cfg.step01Prepare_LINK is True and flagFileUseSymbolLink:
                    os.system("ln -f -s %s%s%s.%s %s%s%s.%s" % \
                              (outputDir, os.path.sep, fileName, featName,
                               dataLinkDirOutput, os.path.sep, fileName, featName))
            filePtr.close()

        # create index file list
        filePtr = open(dataSaveDir + os.path.sep + cfg.idxFileName + '.scp',
                       'w')
        for fileName in fileList:
            filePtr.write('%s%s%s.%s\n' %
                          (dataRawDir, os.path.sep, fileName, cfg.idxFileName))
        filePtr.close()

        # create index files
        if cfg.step01Prepare_IDX is True or cfg.step01Prepare_PACK is True:
            # create the lab index lists
            cmd = 'python %s/dataPrepare/getLabIdx5ms.py' % (
                cfg.path_pyTools_scripts)
            cmd = '%s %s %s %s %s %s %s' % (
                cmd, inputDirSet[0], cfg.inputExt[0], cfg.inputDim[0],
                dataRawDir, cfg.idxFileName, fileListFilePath)
            display.self_print('Creating time index files', 'highlight')
            exe_cmd(cmd, cfg.debug)
        else:
            display.self_print('skip creating time index', 'highlight')

        # package the data
        if cfg.step01Prepare_IDX is True or cfg.step01Prepare_PACK is True:
            # write data_config.cfg
            writeDataConfig(dataSaveDir + os.path.sep + 'data_config.py',
                            cfg.idxFileName + '.scp', cfg.fileNumInEachNCPack)
            # pack data
            packDataCmd = 'sh %s/sub_05_package_datanc.sh %s %s' % (
                cfg.path_scripts, dataSaveDir, cfg.path_pyTools_scripts)

            display.self_print('Packing data', 'highlight')
            exe_cmd(packDataCmd, cfg.debug)
        else:
            display.self_print('skip packing data', 'highlight')

    # create file list
    for inputDirSet, outputDirSet, dataPart in zip(cfg.inputDirs,
                                                   cfg.outputDirs,
                                                   cfg.dataDivision):

        dataSaveDir = dataDir + os.path.sep + dataPart
        inputScpList = []
        outputScpList = []

        for inputDir, featDim, featName in zip(inputDirSet, cfg.inputDim,
                                               cfg.inputExt):
            inputScpList.append(dataSaveDir + os.path.sep + featName + '.scp')

        for outputDir, featDim, featName in zip(outputDirSet, cfg.outputDim,
                                                cfg.outputExt):
            outputScpList.append(dataSaveDir + os.path.sep + featName + '.scp')

        # calculate mean and std
        if dataPart == cfg.computMeanStdOn and cfg.step01Prepare_MV is True:
            display.self_print('Calculating mean and std', 'highlight')

            meanStdTool.meanStdNormMask(
                inputScpList, cfg.inputDim, cfg.inputNormMask,
                dataSaveDir + os.path.sep + cfg.nnDataInputMV)
            display.self_print(
                "\nSave input mean-std as %s" %
                (os.path.join(dataSaveDir, cfg.nnDataInputMV)), 'highlight')

            meanStdTool.meanStdNormMask(
                outputScpList, cfg.outputDim, cfg.outputNormMask,
                dataSaveDir + os.path.sep + cfg.nnDataOutputMV)
            display.self_print(
                "\nSave output mean-std as %s" %
                (os.path.join(dataSaveDir, cfg.nnDataOutputMV)), 'highlight')
        else:
            display.self_print('skip calculating mean and std', 'highlight')
Ejemplo n.º 18
0
def read_fft_data_currennt(path, fft_length):
    data = readwrite.read_raw_mat(path, (fft_length / 2 + 1) * 2)
    data_re = data[:, 0::2]
    data_im = data[:, 1::2]
    return data_re, data_im
Ejemplo n.º 19
0
def temprapper(fileName, dim, dataMv, dataCounter):
    data = py_rw.read_raw_mat(fileName, dim)
    return WelFord(data, dataMv, dataCounter)
Ejemplo n.º 20
0
fileDim  = 1
# U/V threshold
F0Zero   = 10.0


###
frameSum = 0
frameMin = 1000
frameMax = 0

with open(fileScp, 'r') as filePtr:
    for idx, fileName in enumerate(filePtr):
        fileName = fileName.rstrip('\n')
        print idx, fileName,
        filePath = fileDir + os.path.sep + fileName + fileExt
        data = py_rw.read_raw_mat(filePath, fileDim)

        if fileDim > 1:
            temp = np.zeros([data.shape[0]])
            temp = data[:,0]
            data = temp

        frameSum = frameSum + data.shape[0]
        data = data[data>F0Zero]
        tmpmax = np.max(data)
        tmpmin = np.min(data)
        print tmpmax, tmpmin
        frameMax = np.max([frameMax, tmpmax])
        frameMin = np.min([frameMin, tmpmin])

Ejemplo n.º 21
0
import os
import sys
from ioTools import readwrite as py_rw

if __name__ == "__main__":
    meanstd_data = sys.argv[1]
    acousExts = sys.argv[2]
    acousDims = sys.argv[3]
    f0Ext = sys.argv[4]

    acousExtList = acousExts.split(',')
    acousDimList = [int(x) for x in acousDims.split('_')]

    assert len(acousExtList) == len(
        acousDimList), "Error: unequal length of acousDims, acousExts"

    dimCnt = 0
    f0Dim = -1
    for acousExt, acousDim in zip(acousExtList, acousDimList):

        # confirm the F0 dimension
        if acousExt == f0Ext:
            f0Dim = dimCnt
        dimCnt = dimCnt + acousDim

    meanstd_data = py_rw.read_raw_mat(meanstd_data, 1)

    if f0Dim >= 0:
        print("%f %f" % (meanstd_data[f0Dim], meanstd_data[dimCnt + f0Dim]))
Ejemplo n.º 22
0
# Dimension of the bindary raw F0 data
fileDim = 1
# U/V threshold
F0Zero = 10.0

###
frameSum = 0
frameMin = 1000
frameMax = 0

with open(fileScp, 'r') as filePtr:
    for idx, fileName in enumerate(filePtr):
        fileName = fileName.rstrip('\n')
        print idx, fileName,
        filePath = fileDir + os.path.sep + fileName + fileExt
        data = py_rw.read_raw_mat(filePath, fileDim)

        if fileDim > 1:
            temp = np.zeros([data.shape[0]])
            temp = data[:, 0]
            data = temp

        frameSum = frameSum + data.shape[0]
        data = data[data > F0Zero]
        tmpmax = np.max(data)
        tmpmin = np.min(data)
        print tmpmax, tmpmin
        frameMax = np.max([frameMax, tmpmax])
        frameMin = np.min([frameMin, tmpmin])

print "\nPlease use the information below for F0 quantization"
Ejemplo n.º 23
0
            display.self_print(
                'Error: not found test data.nc in %s' % (tmp_sub_nc_dir),
                'error')
            quit()
        tmp_test_data_nc_args = ','.join(tmp_test_data_nc_list)
    else:
        display.self_print('Error: not found %s' % (tmp_test_nc_scp), 'error')
        quit()

    # No need to get F0 mean and std
    if False:
        # get F0 mean and std
        dimCnt = 0
        f0Dim = -1

        meanstd_data = readwrite.read_raw_mat(tmp_mv_data, 1)
        for acousExt, acousDim in zip(cfg.ext_acous_feats,
                                      cfg.dim_acous_feats):
            if acousExt == cfg.f0_ext:
                f0Dim = dimCnt
            dimCnt = dimCnt + acousDim

        if f0Dim >= 0:
            f0mean = meanstd_data[f0Dim]
            f0std = meanstd_data[f0Dim + dimCnt]
        else:
            f0mean = -1
            f0std = -1

    # cmmandline of CURRENNT
Ejemplo n.º 24
0
def read_fft_to_log_mag_currennt(path, fft_length):
    data = readwrite.read_raw_mat(path, int(fft_length / 2 + 1) * 2)
    data_re = data[:, 0::2]
    data_im = data[:, 1::2]
    return amplitude_to_db(amplitude_re_im(data_re, data_im))
Ejemplo n.º 25
0
dataset_of_target = args[4]

dataname = basename(data_dir)
yaap_pitch_dir = join(data_dir, 'yaapt_pitch')
pitch_out_dir = join(out_dir, "f0")

statsdir = "exp/vc_toolkit_exp_voice_privacy/feats/f0/"

# Write pitch features
pitch_file = join(data_dir, 'pitch.scp')
pitch2shape = {}
with ReadHelper('scp:' + pitch_file) as reader:
    for key, mat in reader:
        pitch2shape[key] = mat.shape[0]
        kaldi_f0 = mat[:, 1].squeeze().copy()
        yaapt_f0 = readwrite.read_raw_mat(join(yaap_pitch_dir, key + '.f0'), 1)
        #unvoiced = np.where(yaapt_f0 == 0)[0]
        #kaldi_f0[unvoiced] = 0
        #readwrite.write_raw_mat(kaldi_f0, join(pitch_out_dir, key+'.f0'))
        if kaldi_f0.shape < yaapt_f0.shape:
            print("Warning yaapt_f0 > kaldi_f0 for utt:", key)
            yaapt_f0 = yaapt_f0[:kaldi_f0.shape[0]]
        f0 = np.zeros(kaldi_f0.shape)
        f0[:yaapt_f0.shape[0]] = yaapt_f0

        source_stats = {}
        with open(statsdir + dataname + "/" +
                  key.split("-")[0].split("_")[0]) as f:
            source_stats = json.load(f)

        selected_target_speaker_list = [target_spk]
if __name__ == "__main__":

    if sys.argv[1] == 'gv':
        fileDir = sys.argv[2]
        fileList = sys.argv[3]
        fileExt = sys.argv[4]
        fileDim = int(sys.argv[5])
        fileOut = sys.argv[6]

        cnt = 0
        with open(fileList, 'r') as filePtr:
            for idx, fileName in enumerate(filePtr):
                cnt = cnt + 1

        gvData = np.zeros([cnt, fileDim])

        cnt = 0
        with open(fileList, 'r') as filePtr:
            for idx, fileName in enumerate(filePtr):
                fileName = fileName.rstrip('\n')
                data = py_rw.read_raw_mat(
                    fileDir + os.path.sep + fileName + fileExt, fileDim)
                if (fileExt == '.lf0' or fileExt == '.f0') and fileDim == 1:
                    data = data[data > 0]
                gvData[cnt, :] = gv(data)
                cnt = cnt + 1
                #print fileName
        py_rw.write_raw_mat(gvData, fileOut + os.path.sep + 'gv.data.bin')
        print(fileOut, '\t', np.median(gvData, axis=0))
        if acousExt == f0Ext:
            f0Dim = dimCnt

        # clearn the extension
        if acousExt.startswith('.'):
            acousExt = acousExt[1:]

        # write the file script
        fileOutput = dataLstDir + os.path.sep + acousExt + '.scp'
        fileListsBuff.append(fileOutput)
        writePtr = open(fileOutput, 'w')
        with open(dataLst, 'r') as readfilePtr:
            for line in readfilePtr:
                filename = line.rstrip('\n')
                writePtr.write('%s/%s.%s\n' % (acousDir, filename, acousExt))
        writePtr.close()

        dimCnt = dimCnt + acousDim
    
    
    meanStdTool.meanStdNormMask(fileListsBuff, acousDimList, normMaskList, mvoutputPath,
                                f0Dim = f0Dim)
    
    meanstd_data = py_rw.read_raw_mat(mvoutputPath, 1)
    if f0Dim >= 0:
        print("Please note:")
        print("F0 mean: %f" % (meanstd_data[f0Dim]))
        print("F0 std: %f" % (meanstd_data[dimCnt+f0Dim]))


def createFileLst(dataDirs, dataExts, dataDim, dataListDirs, trnList, valList):
    """ create data lists 
        output *.scp will be in dataListDirs
    """
    dataDirs = dataDirs.split(',')
    dataExts = dataExts.split(',')
    dataDims = [int(x) for x in dataDim.split('_')]
    assert len(dataDirs) == len(
        dataExts), 'Error: sub_1_prepare_list.py dataDirs and dataExts wrong'

    # get the cross-set of file lists
    dataList = lstdirNoExt(dataDirs[0], dataExts[0])
    for dataDir, dataExt in zip(dataDirs[1:], dataExts[1:]):
        listTmp = lstdirNoExt(dataDir, dataExt)
        dataList = crossSet(dataList, listTmp)

    # check if file exists
    if len(dataList) < 1:
        display.self_print("Error: fail to found data. Please check:", 'error')
        display.self_print(
            "path_acous_feats, ext_acous_feats, path_waveform in config.py;",
            'error')
        display.self_print("Please also check the names of input data files.",
                           'error')
        raise Exception("Error: fail to generate file list.")

    # check if data exists
    pre_defined_trn_list = readwrite.read_txt_list(trnList)
    pre_defined_val_list = readwrite.read_txt_list(valList)
    diff_trn_list = diff_list(pre_defined_trn_list, dataList)
    diff_val_list = diff_list(pre_defined_val_list, dataList)
    if len(diff_trn_list):
        display.self_print("Error: training data missing. Please check:",
                           'error')
        print(diff_trn_list)
        raise Exception("Error: fail to prepare file list.")

    if len(diff_val_list):
        display.self_print("Error: validation data missing. Please check:",
                           'error')
        print(diff_val_list)
        raise Exception("Error: fail to prepare file list.")

    # before start, take a simple test on the configuration of feature dimension
    frameNum = None
    for inputDir, featDim, featName in zip(dataDirs[0:-1], dataDims[0:-1],
                                           dataExts[0:-1]):
        inputFile = os.path.join(inputDir,
                                 dataList[0]) + '.' + featName.lstrip('.')
        if os.path.isfile(inputFile):
            tmpframeNum = readwrite.read_raw_mat(inputFile, featDim).shape[0]
            if frameNum is None or frameNum < tmpframeNum:
                frameNum = tmpframeNum

    for inputDir, featDim, featName in zip(dataDirs[0:-1], dataDims[0:-1],
                                           dataExts[0:-1]):
        inputFile = os.path.join(inputDir,
                                 dataList[0]) + '.' + featName.lstrip('.')
        if os.path.isfile(inputFile):
            tmpframeNum = readwrite.read_raw_mat(inputFile, featDim).shape[0]
            if np.abs(frameNum - tmpframeNum) * 1.0 / frameNum > 0.1:
                if featDim == readwrite.read_raw_mat(inputFile, 1).shape[0]:
                    pass
                else:
                    display.self_print("Large mismatch of frame numbers %s" %
                                       (inputFile))
                    display.self_print(
                        "Please check whether inputDim are correct", 'error')
                    display.self_print("Or check input features are corrupted",
                                       'error')
                    raise Exception("Error: mismatch of frame numbers")

    display.self_print('Generating data lists in to %s' % (dataListDirs),
                       'highlight')

    if True:
        trainSet = pre_defined_trn_list
        valSet = pre_defined_val_list

        if len(valSet) > len(trainSet):
            display.self_print(
                "Warning: validation set is larger than training set",
                'warning')
            display.self_print("It's better to change train_utts in config.py",
                               'warning')

        trainFileOut = dataListDirs + os.path.sep + 'train.lst'
        trainFilePtr = open(trainFileOut, 'w')
        for fileName in trainSet:
            trainFilePtr.write('%s\n' % (fileName))
        trainFilePtr.close()

        if len(valSet):
            valFileOut = dataListDirs + os.path.sep + 'val.lst'
            valFilePtr = open(valFileOut, 'w')
            for fileName in valSet:
                valFilePtr.write('%s\n' % (fileName))
            valFilePtr.close()
        display.self_print(
            '\ttrain/val sizes: %d, %d' % (len(trainSet), len(valSet)),
            'warning')

    # done
    return
def createFileLst(dataDirs,
                  dataExts,
                  dataDim,
                  dataListDirs,
                  trainortest,
                  trainSetRatio=0.8,
                  random_seed=12345):
    """ create data lists 
        output *.scp will be in dataListDirs
    """
    dataDirs = dataDirs.split(',')
    dataExts = dataExts.split(',')
    dataDims = [int(x) for x in dataDim.split('_')]
    assert len(dataDirs) == len(
        dataExts), 'Error: sub_1_prepare_list.py dataDirs and dataExts wrong'

    # get the cross-set of file lists
    dataList = lstdirNoExt(dataDirs[0], dataExts[0])
    for dataDir, dataExt in zip(dataDirs[1:], dataExts[1:]):
        listTmp = lstdirNoExt(dataDir, dataExt)
        dataList = crossSet(dataList, listTmp)

    # check if file exists
    if len(dataList) < 1:
        display.self_print("Error: fail to generate file list. Please check:",
                           'error')
        display.self_print(
            "path_acous_feats, ext_acous_feats, path_waveform in config.py;",
            'error')
        display.self_print("Please also check the names of input data files.",
                           'error')
        raise Exception("Error: fail to generate file list.")

    # randomize the data file list
    # sort at first,
    dataList.sort()
    random.seed(random_seed)
    random_shuffle(dataList)

    # before start, take a simple test on the configuration of feature dimension
    frameNum = None
    for inputDir, featDim, featName in zip(dataDirs[0:-1], dataDims[0:-1],
                                           dataExts[0:-1]):
        inputFile = os.path.join(inputDir,
                                 dataList[0]) + '.' + featName.lstrip('.')
        if os.path.isfile(inputFile):
            tmpframeNum = readwrite.read_raw_mat(inputFile, featDim).shape[0]
            if frameNum is None or frameNum < tmpframeNum:
                frameNum = tmpframeNum

    for inputDir, featDim, featName in zip(dataDirs[0:-1], dataDims[0:-1],
                                           dataExts[0:-1]):
        inputFile = os.path.join(inputDir,
                                 dataList[0]) + '.' + featName.lstrip('.')
        if os.path.isfile(inputFile):
            tmpframeNum = readwrite.read_raw_mat(inputFile, featDim).shape[0]
            if np.abs(frameNum - tmpframeNum) * 1.0 / frameNum > 0.1:
                if featDim == readwrite.read_raw_mat(inputFile, 1).shape[0]:
                    pass
                else:
                    display.self_print("Large mismatch of frame numbers %s" %
                                       (inputFile))
                    display.self_print(
                        "Please check whether inputDim are correct", 'error')
                    display.self_print("Or check input features are corrupted",
                                       'error')
                    raise Exception("Error: mismatch of frame numbers")

    display.self_print('Generating data lists in to %s' % (dataListDirs),
                       'highlight')

    if trainortest == 'test':
        # generating for test set
        display.self_print('\ttest size: %d' % (len(dataList)), 'highlight')
        testFileOut = dataListDirs + os.path.sep + 'test.lst'
        testFilePtr = open(testFileOut, 'w')
        for fileName in dataList:
            testFilePtr.write('%s\n' % (fileName))
        testFilePtr.close()
    else:
        # determine the train and validatition set if necessary
        if trainSetRatio is not None and trainSetRatio > 0.0:
            if trainSetRatio < 1.0:
                # a ratio
                trainSetDivide = int(np.round(len(dataList) * trainSetRatio))
            elif trainSetRatio < len(dataList):
                # absolute value of the number of training utterances
                trainSetDivide = int(trainSetRatio)
            else:
                # a default ratio 0.8
                display.self_print(
                    'Warning: train_utts = 0.8 is used to divide train/val',
                    'warning')
                trainSetDivide = int(np.round(len(dataList) * 0.8))

            trainSet = dataList[0:trainSetDivide]
            valSet = dataList[trainSetDivide:]

            if len(valSet) > len(trainSet):
                display.self_print(
                    "Warning: validation set is larger than training set",
                    'warning')
                display.self_print(
                    "It's better to change train_utts in config.py", 'warning')

            trainFileOut = dataListDirs + os.path.sep + 'train.lst'
            trainFilePtr = open(trainFileOut, 'w')
            for fileName in trainSet:
                trainFilePtr.write('%s\n' % (fileName))
            trainFilePtr.close()

            if len(valSet):
                valFileOut = dataListDirs + os.path.sep + 'val.lst'
                valFilePtr = open(valFileOut, 'w')
                for fileName in valSet:
                    valFilePtr.write('%s\n' % (fileName))
                valFilePtr.close()

            display.self_print(
                '\ttrain/val sizes: %d, %d' % (len(trainSet), len(valSet)),
                'warning')
        else:
            display.self_print('\ttrain/val sizes: %d, 0' % (len(dataList)),
                               'warning')
            trainFileOut = dataListDirs + os.path.sep + 'train.lst'
            trainFilePtr = open(trainFileOut, 'w')
            for fileName in dataList:
                trainFilePtr.write('%s\n' % (fileName))
            trainFilePtr.close()
    # done
    return