def pitchtrackGetter(fname, outputPitchtrack):
    # pitchContours object
    print 'extracting pitch contours ... ...'
    pitchMakam = pc.PitchExtractMakam()
    pitchMakam.setup()
    
    contours_bins, contours_contourSaliences, contours_start_times, duration = pitchMakam.run(fname)

    contours_start_times = contours_start_times.tolist()

    # filter short pitch contours
    print 'filtering short pitch contours ... ...'
    contours_bins, contours_contourSaliences, contours_start_times, toRmIndexCF = cr.contourFilter(contours_bins, contours_contourSaliences, contours_start_times, framesize = 2048, hopsize = 128)

    # feature extraction
    print 'extracting features ... ...'
    lengthContour, meanPitchContour, sdPitchContour, totalSalience, meanSalience, sdSalience, mfccs = fc.featureExtract(contours_bins, contours_contourSaliences)

    # load mean sd for feature normalization
    meansdpath = './model/meansdJson.json'
    with open(meansdpath) as data_file:
        data = json.load(data_file)
        meansd = data['meansd']

    # normalize the feature vector
    featureVec = []
    for ii in range(len(lengthContour)):
        f1 = [(lengthContour[ii]/meansd[0][0])/meansd[0][1], (meanPitchContour[ii]/meansd[1][0])/meansd[1][1], (sdPitchContour[ii]/meansd[2][0])/meansd[2][1], (totalSalience[ii]/meansd[3][0])/meansd[3][1], (meanSalience[ii]/meansd[4][0])/meansd[4][1], (sdSalience[ii]/meansd[5][0])/meansd[5][1]]
        for kk in range(len(mfccs[ii])):
            f2.append((mfccs[ii][kk]-meansd[6+kk][0])/meansd[6+kk][1])
        feature = f1 + f2
        featureVec.append(feature)

    # classifying pitch contours
    print 'classifying pitch contours ... ...'
    knn = joblib.load('./model/jingju_pitchContoursClassificationModel.pkl') 
    
    classification = knn.predict(featureVec) # 0: voice 1: nonvoice
    classification = classification.tolist()

    contours_bins_out = []
    contours_contourSaliences_out = []
    contours_start_times_out = []
    
    for ii in range(len(classification)):
        if classification[ii] == 0:
           contours_bins_out.append(contours_bins[ii])
           contours_contourSaliences_out.append(contours_contourSaliences[ii])
           contours_start_times_out.append(contours_start_times[ii])
    
    # dumping contours into pitch track
    out = pitchMakam.pitchtrackDumper(contours_bins_out, contours_contourSaliences_out, contours_start_times_out, duration)

    np.savetxt(outputPitchtrack, out)

    return
def voiceContoursGetter(pitchContourJson, classificationResult):
    contours_bins, contours_contourSaliences, contours_start_times, duration = cr.contourReader(pitchContourJson)
    contours_bins, contours_contourSaliences, contours_start_times, toRmIndex_cal = cr.contourFilter(contours_bins, contours_contourSaliences, contours_start_times, framesize = 2048, hopsize = 128)

    toRmIndex_json, classification = classificationResultReader(classificationResult)
            
    if len(toRmIndex_cal) == len(toRmIndex_json):
        contours_bins_out = []
        contours_contourSaliences_out = []
        contours_start_times_out = []
    
        for ii in range(len(classification)):
            if classification[ii] == 0:
                contours_bins_out.append(contours_bins[ii])
                contours_contourSaliences_out.append(contours_contourSaliences[ii])
                contours_start_times_out.append(contours_start_times[ii])
        return contours_bins_out, contours_contourSaliences_out, contours_start_times_out, duration
    else:
        print 'problem with toRmIndex length, not equal.'
        return
def featureNormalization(featureList):
    '''
    the structure of normalized feature list is [(feature0, mean0, sd0), (feature1, mean1, sd1), ...]
    '''
    normalizedFeatureList = []
    for ii in range(len(featureList)-1):
        normalizedFeature, mean, sd = fc.scaleFeatures(featureList[ii])
        normalizedFeatureList.append((normalizedFeature, mean, sd))
    normalizedFeatureList.append(featureList[-1])
    return normalizedFeatureList

jj = 1
for filename in filenamesvoicePath:
    print jj, len(allfilenames)
    contours_bins, contours_contourSaliences, contours_start_times, duration = cr.contourReader(filename)
    contours_bins, contours_contourSaliences, contours_start_times, toRmIndexCF = cr.contourFilter(contours_bins, contours_contourSaliences, contours_start_times, framesize = 2048, hopsize = 128)
    #contours_bins, contours_contourSaliences, contours_start_times, toRmIndexRF = registerFilter(contours_bins, contours_contourSaliences, contours_start_times, tonicHz = 350.0) # need to give tonic firstly

    lengthContour, meanPitchContour, sdPitchContour, totalSalience, meanSalience, sdSalience, mfccs = fc.featureExtract(contours_bins, contours_contourSaliences)

    featureList = featureListCreate(featureList, lengthContour, meanPitchContour, sdPitchContour, totalSalience, meanSalience, sdSalience, mfccs, 0)
    jj += 1

for filename in filenamesnonvoicePath:
    print jj, len(allfilenames)
    contours_bins, contours_contourSaliences, contours_start_times, duration = cr.contourReader(filename)
    contours_bins, contours_contourSaliences, contours_start_times, toRmIndexCF = cr.contourFilter(contours_bins, contours_contourSaliences, contours_start_times, framesize = 2048, hopsize = 128)
    #contours_bins, contours_contourSaliences, contours_start_times, toRmIndexRF = registerFilter(contours_bins, contours_contourSaliences, contours_start_times, tonicHz = 350.0) # need to give tonic firstly

    lengthContour, meanPitchContour, sdPitchContour, totalSalience, meanSalience, sdSalience, mfccs = fc.featureExtract(contours_bins, contours_contourSaliences)