def main_eval_all_files_summary(argv):
    if len(argv) != 5:
        sys.exit('usage: {} <algorithm name> <path dir with to reference word boundaries> <path to dir with detected word boundaries> <path_output>'.format(sys.argv[0]))
    
    algorithm_name = argv[1]
    refs_dir_URI = argv[2]
    detected_dir_URI = argv[3]
    a = os.path.join(detected_dir_URI, "*.lab")
    lab_files = glob.glob(a)
    output_URI = argv[4]
    
    errors = []
    percentages = []
       
    for lab_file in lab_files:
        base_name = os.path.basename(lab_file)
        
        ref_file = os.path.join(refs_dir_URI, base_name[:-4] + '.wordonset.tsv')
        error, percentage = main_eval_one_file(["dummy",  ref_file, lab_file])
        errors.append(error)
        percentages.append(percentage)
        
    meanE,  stdevE, medianE = getMeanAndStDevError(errors)
    meanP, stdevP, medianP = getMeanAndStDevError(percentages)
    
    if not os.path.exists(output_URI):
        results = [['Submission', 'Mean error'    , 'Median error', 'St. dev. error', 'Mean percentage'    , 'Median percentage', 'St. dev. percentage']]

        results.append( [algorithm_name,'{:.2f}'.format(meanE), '{:.2f}'.format(medianE) ,  '{:.2f}'.format(stdevE), '{:.2f}'.format(meanP), '{:.2f}'.format(medianP) ,  '{:.2f}'.format(stdevP) ] )
        writeCsv(output_URI, results)
    else:
        results = [[algorithm_name,'{:.2f}'.format(meanE), '{:.2f}'.format(medianE) ,  '{:.2f}'.format(stdevE), '{:.2f}'.format(meanP), '{:.2f}'.format(medianP) ,  '{:.2f}'.format(stdevP) ]]
        writeCsv(output_URI, results, append=1)
def evalOneFile(argv):
        ''' Main utility function
        ''' 
       
        if len(argv) != 5:
            print ("usage: {} <URI_annotation> <URI_detected> <evalLevel> <URI_audio>".format(argv[0]) )
            sys.exit();
             
        annoURI = argv[1]
        detectedURI = argv[2]
        evalLevel = int(argv[3])
        audio_URI = argv[4]
        alignmentErrors  = evalAlignmentError(annoURI , detectedURI  , evalLevel)
        
        mean, stDev, median = getMeanAndStDevError(alignmentErrors)
        
        # optional
#         print "mean : ", mean, "st dev: " , stDev
        print  mean, " ", stDev
        
        
         ### OPTIONAL : open detection and annotation in praat. can be provided on request
#         wordAlignedSuffix = '"wordsAligned"'
#         phonemeAlignedSuffix =  '"phonemesAligned"'
#         alignedResultPath, fileNameWordAnno = addAlignmentResultToTextGridFIle( detectedURI, annoURI,   wordAlignedSuffix, phonemeAlignedSuffix)
#         
#          
#         openTextGridInPraat(alignedResultPath, fileNameWordAnno, audio_URI)
        
        return mean, stDev,  median, alignmentErrors
Exemple #3
0
def writeResultToFile(resultSet, totalErrors, totalCorrectDurReference,
                      totalCorrectDur, totalDurations, ALPHA):

    currTime = datetime.now().strftime('%Y-%m-%d--%H-%M-%S')
    filename = os.path.join(os.getcwdu(), 'alignError_' + currTime + '.out')
    outputFileHandle = open(filename, 'a')

    logger.info("\n Output file is: " + filename)

    #     // write to file/
    outputFileHandle.write('\n' + str(ALPHA))

    for mean, listLine in resultSet:
        if outputFileHandle.closed:
            outputFileHandle = open(filename, 'a')
        outputFileHandle.write(listLine)
        outputFileHandle.close()

    # total mean
    mean, stDev, median = getMeanAndStDevError(totalErrors)
    result = 'tatal scoreDev accuracy {:.2f} \n total accuracy: {:.2f} \n total mean: {} \n'.format(
        totalCorrectDurReference / totalDurations,
        totalCorrectDur / totalDurations, mean)

    logger.info(result)

    if outputFileHandle.closed:
        outputFileHandle = open(filename, 'a')
    outputFileHandle.write(result)

    outputFileHandle.close()
    print 'written to file ' + filename
def writeResultToFile(resultSet,  totalErrors, ALPHA):
    
    currTime = datetime.now().strftime('%Y-%m-%d--%H-%M-%S')    
    filename = os.path.join(os.getcwdu(),   'alignError_' + currTime + '.out') 
    outputFileHandle = open(filename, 'a')
    
    logger.info("\n Output file is: " + filename )
    
    
    outputFileHandle.write('\n'  + str(ALPHA) )
    
    for mean, listLine in resultSet:
        if outputFileHandle.closed:
            outputFileHandle = open(filename, 'a')
        outputFileHandle.write(listLine)
        outputFileHandle.close()
    
         
    # total mean    
    mean, stDev, median  = getMeanAndStDevError(totalErrors)
    result = '\n' + 'total mean: ' + str(mean) + '\n'
    
    logger.info( result  )
    
    if outputFileHandle.closed:
        outputFileHandle = open(filename, 'a')
    outputFileHandle.write(result)
    
    outputFileHandle.close()
    print 'written to file ' + filename 
def writeResultToFile(resultSet, totalErrors, ALPHA):

    currTime = datetime.now().strftime('%Y-%m-%d--%H-%M-%S')
    filename = os.path.join(os.getcwdu(), 'alignError_' + currTime + '.out')
    outputFileHandle = open(filename, 'a')

    logger.info("\n Output file is: " + filename)

    outputFileHandle.write('\n' + str(ALPHA))

    for mean, listLine in resultSet:
        if outputFileHandle.closed:
            outputFileHandle = open(filename, 'a')
        outputFileHandle.write(listLine)
        outputFileHandle.close()

    # total mean
    mean, stDev, median = getMeanAndStDevError(totalErrors)
    result = '\n' + 'total mean: ' + str(mean) + '\n'

    logger.info(result)

    if outputFileHandle.closed:
        outputFileHandle = open(filename, 'a')
    outputFileHandle.write(result)

    outputFileHandle.close()
    print 'written to file ' + filename
def main_eval_all_files_summary(argv):
    if len(argv) != 5:
        sys.exit(
            'usage: {} <algorithm name> <path dir with to reference word boundaries> <path to dir with detected word boundaries> <path_output>'
            .format(sys.argv[0]))

    algorithm_name = argv[1]
    refs_dir_URI = argv[2]
    detected_dir_URI = argv[3]
    a = os.path.join(detected_dir_URI, "*.lab")
    lab_files = glob.glob(a)
    output_URI = argv[4]

    errors = []
    percentages = []

    for lab_file in lab_files:
        base_name = os.path.basename(lab_file)

        ref_file = os.path.join(refs_dir_URI,
                                base_name[:-4] + '.wordonset.tsv')
        error, percentage = main_eval_one_file(["dummy", ref_file, lab_file])
        errors.append(error)
        percentages.append(percentage)

    meanE, stdevE, medianE = getMeanAndStDevError(errors)
    meanP, stdevP, medianP = getMeanAndStDevError(percentages)

    if not os.path.exists(output_URI):
        results = [[
            'Submission', 'Mean error', 'Median error', 'St. dev. error',
            'Mean percentage', 'Median percentage', 'St. dev. percentage'
        ]]

        results.append([
            algorithm_name, '{:.2f}'.format(meanE), '{:.2f}'.format(medianE),
            '{:.2f}'.format(stdevE), '{:.2f}'.format(meanP),
            '{:.2f}'.format(medianP), '{:.2f}'.format(stdevP)
        ])
        writeCsv(output_URI, results)
    else:
        results = [[
            algorithm_name, '{:.2f}'.format(meanE), '{:.2f}'.format(medianE),
            '{:.2f}'.format(stdevE), '{:.2f}'.format(meanP),
            '{:.2f}'.format(medianP), '{:.2f}'.format(stdevP)
        ]]
        writeCsv(output_URI, results, append=1)
Exemple #7
0
def doitOneChunk(argv):
    
    if len(argv) != 8 and  len(argv) != 9 :
            print ("usage: {}  <pathToComposition> <URI_recording_no_ext> <withDuration=True> <withSynthesis> <ALPHA> <ONLY_MIDDLE_STATE> <evalLevel> <usePersistentFiles=True>".format(argv[0]) )
            sys.exit();
    
    
    URIrecordingNoExt = argv[2]
    whichSection = getSectionNumberFromName(URIrecordingNoExt) 

    pathToComposition = argv[1]
    withDuration = argv[3]
    if withDuration=='True':
        withDuration = True
    elif withDuration=='False':
        withDuration = False
    else: 
        sys.exit("withDuration can be only True or False")  
    
    withSynthesis = argv[4]
    if withSynthesis=='True':
        withSynthesis = True
    elif withSynthesis=='False':
        withSynthesis = False
    else: 
        sys.exit("withSynthesis can be only True or False")  
    
    
    ALPHA = float(argv[5])
    ONLY_MIDDLE_STATE = argv[6]
    
    evalLevel = tierAliases.wordLevel
    evalLevel = int(argv[7])

    params = Parameters(ALPHA, ONLY_MIDDLE_STATE)
    
    usePersistentFiles = 'True'
    if len(argv) == 9:
        usePersistentFiles =  argv[8]
    
    
    set_printoptions(threshold='nan') 
    
    ################## load lyrics and models 
    htkParser = None
    if withDuration:
        htkParser = HtkConverter()
        htkParser.load(MODEL_URI, HMM_LIST_URI)
    
    alignmentErrors, detectedWordList, grTruthDurationWordList, detectedAlignedfileName = alignDependingOnWithDuration(URIrecordingNoExt, whichSection, pathToComposition, withDuration, withSynthesis, evalLevel, params, usePersistentFiles, htkParser)
        
        
    mean, stDev, median = getMeanAndStDevError(alignmentErrors)
#     writeListOfListToTextFile(detectedWordList, None, '/Users/joro/Downloads/test.txt')
    logger.info("mean : {} st dev: {} ".format( mean,stDev))
Exemple #8
0
def evalDtw(argv):
    '''
    for a list of recordings, select those which name contains pattern and evlauate total error 
    '''
    if len(argv) != 3 and len(argv) != 4:
        print("usage: {}  <pathToRecordings> <pattern> <decodedExtension>".
              format(argv[0]))
        sys.exit()

    DETECTED_EXT = '.dtwDurationsAligned'
    if len(argv) == 4:
        DETECTED_EXT = argv[3]

    os.chdir(argv[1])
    # get detected files with starting pattern
    a = argv[2] + '*' + DETECTED_EXT
    listDecodedFiles = glob.glob(a)

    for i in range(len(listDecodedFiles)):
        listDecodedFiles[i] = os.path.join(argv[1], listDecodedFiles[i])


# get annot files with starting pattern
    b = argv[2] + '*' + ANNOTATION_EXT
    listAnnoFiles = glob.glob(b)

    for i in range(len(listAnnoFiles)):
        listAnnoFiles[i] = os.path.join(argv[1], listAnnoFiles[i])

    for file in listAnnoFiles:
        print file

    # check matching decoded
    if len(listDecodedFiles) != len(listAnnoFiles):
        print "{} decoded and {} annotations. they should be equal".format(
            len(listDecodedFiles), len(listAnnoFiles))
        sys.exit()

    totalErrors = []
    for URI_decoded, URI_annotation in zip(listDecodedFiles, listAnnoFiles):
        mean, stDev, median, currAlignmentErrors = evalOneFile(
            ['blah', URI_annotation, URI_decoded, '1'])
        totalErrors.extend(currAlignmentErrors)

    mean, stDev, median = getMeanAndStDevError(totalErrors)
    print "(", median, ",", mean, ",", stDev, ")"
def evalDtw(argv):
    '''
    for a list of recordings, select those which name contains pattern and evlauate total error 
    ''' 
    if len(argv) != 3 and len(argv) != 4:
            print ("usage: {}  <pathToRecordings> <pattern> <decodedExtension>".format(argv[0]) )
            sys.exit();

    DETECTED_EXT = '.dtwDurationsAligned'
    if len(argv) == 4:
        DETECTED_EXT = argv[3]
        
    os.chdir(argv[1])
# get detected files with starting pattern     
    a = argv[2] + '*'   + DETECTED_EXT
    listDecodedFiles = glob.glob(a) 
        
    for i in range(len(listDecodedFiles)) :
        listDecodedFiles[i] = os.path.join(argv[1], listDecodedFiles[i])
# get annot files with starting pattern
    b = argv[2] + '*'   + ANNOTATION_EXT
    listAnnoFiles = glob.glob(b) 
        
    for i in range(len(listAnnoFiles)) :
        listAnnoFiles[i] = os.path.join(argv[1], listAnnoFiles[i])
    
    for file in listAnnoFiles:
        print file
        
    
    # check matching decoded
    if len(listDecodedFiles) != len(listAnnoFiles):
        print "{} decoded and {} annotations. they should be equal".format(len(listDecodedFiles), len(listAnnoFiles) )
        sys.exit();
    
    totalErrors = []
    for URI_decoded, URI_annotation in zip(listDecodedFiles, listAnnoFiles) :
            mean, stDev,  median, currAlignmentErrors    = evalOneFile ([ 'blah',  URI_annotation, URI_decoded, '1'])
            totalErrors.extend(currAlignmentErrors)
          
        
    mean, stDev, median = getMeanAndStDevError(totalErrors)
    print "(", median ,  ",", mean, "," , stDev ,   ")"    
Exemple #10
0
def main(argv):

    if len(argv) != 4:
        print(
            "usage: {}  <pathToComposition> <whichSection> <URI_recording_no_ext>"
            .format(argv[0]))
        sys.exit()

    URIrecordingNOExt = '/Users/joro/Documents/Phd/UPF/adaptation_data_soloVoice/ISTANBUL/goekhan/02_Gel_3_zemin'
    URIrecordingNOExt = argv[3]
    URIrecordingWav = URIrecordingNOExt + AUDIO_EXTENSION

    pathToComposition = '/Users/joro/Documents/Phd/UPF/adaptation_data_soloVoice/nihavent--sarki--aksak--gel_guzelim--faiz_kapanci/'
    pathToComposition = argv[1]

    whichSection = 3
    whichSection = int(argv[2])

    lyrics = loadLyrics(pathToComposition, whichSection)

    withSynthesis = 1

    URIrecordingAnno = URIrecordingNOExt + PHRASE_ANNOTATION_EXT

    outputHTKPhoneAlignedURI = Aligner.alignOnechunk(MODEL_URI,
                                                     URIrecordingWav, lyrics,
                                                     URIrecordingAnno, '/tmp/',
                                                     withSynthesis)
    EVALLEVEL = 2

    alignmentErrors = evalAlignmentError(URIrecordingAnno,
                                         outputHTKPhoneAlignedURI, EVALLEVEL)

    mean, stDev, median = getMeanAndStDevError(alignmentErrors)

    print "(", mean, ",", stDev, ")"

    ### OPTIONAL : open in praat
    withDuration = False
    visualiseInPraat(URIrecordingNOExt, withDuration, outputHTKPhoneAlignedURI,
                     [])

    return mean, stDev, alignmentErrors
    audioName = '01_Bakmiyor_0_zemin'
    annotationURI = os.path.join(PATH_TEST_DATASET,  audioName + ANNOTATION_EXT)
    
    # TODO: load from file
#     detectedURI = os.path.join(PATH_TEST_DATASET,  audioName +  '.phrasesDurationAligned')
  
        

    detectedList =    [ [0.386834650351, 0.996834650351,    '_SAZ_'],
                     [0.996834650351,3.17683465035,    u'Bakmıyor'],
                      [3.17683465035,4.44683465035,  u'çeşmi'],
                      [4.44683465035,6.02683465035,    'siyah'],
                      [6.02683465035,11.5068346504,    u'feryâde']]
    
    alignmentErrors = _evalAlignmentError(annotationURI, detectedList, tierAliases.phraseLevel)
    mean, stDev, median = getMeanAndStDevError(alignmentErrors)
        
    print  mean, " ", stDev
    
    ############# FROM HERE ON: old testing code for word-level eval 
#     tmpMLF= '/Users/joro/Documents/Phd/UPF/turkish-makam-lyrics-2-audio-test-data/muhayyerkurdi--sarki--duyek--ruzgar_soyluyor--sekip_ayhan_ozisik/1-05_Ruzgar_Soyluyor_Simdi_O_Yerlerde/1-05_Ruzgar_Soyluyor_Simdi_O_Yerlerde_nakarat2_from_192.962376_to_225.170507.phone-level.output'
#     listWordsAndTs = mlf2WordAndTsList(tmpMLF)
#   
#     
#     
#   
# # TODO: error in parsing of sertan's textGrid
#     textGridFile = '/Users/joro/Documents/Phd/UPF/turkish-makam-lyrics-2-audio-test-data/muhayyerkurdi--sarki--duyek--ruzgar_soyluyor--sekip_ayhan_ozisik/1-05_Ruzgar_Soyluyor_Simdi_O_Yerlerde/1-05_Ruzgar_Soyluyor_Simdi_O_Yerlerde.TextGrid'
# #     textGridFile='/Volumes/IZOTOPE/adaptation_data/kani_karaca-cargah_tevsih.TextGrid'
# #     textGridFile = '/Users/joro/Documents/Phd/UPF/Example_words_phonemes.TextGrid'
#     textGridFile = '/Users/joro/Documents/Phd/UPF/adaptation_data_soloVoice/04_Hamiyet_Yuceses_-_Bakmiyor_Cesm-i_Siyah_Feryade/04_Hamiyet_Yuceses_-_Bakmiyor_Cesm-i_Siyah_Feryade_gazel.wordAnnotation.TextGrid'
def doitOneRecording(argv):
    '''
    for a list of recordings, select those which name contains pattern and evlauate total error 
    ''' 
    if len(argv) != 9 and  len(argv) != 10 :
            print ("usage: {}  <pathToComposition>  <pathToRecordings> <pattern> <withDuration=True/False> <withSynthesis> <ALPHA>  <ONLY_MIDDLE_STATE> <evalLevel> <usePersistentFiles=True> ".format(argv[0]) )
            sys.exit();
    
    os.chdir(argv[2])
    
    
        
# get annot files with starting pattern
    pattern = argv[3] + '*'   + AUDIO_EXT
    listAudioFilesAll = glob.glob(pattern) 
        

    for i in range(len(listAudioFilesAll)) :
        listAudioFilesAll[i] = os.path.join(argv[2], listAudioFilesAll[i])
        
#     listAudioFiles = []
#         if not isfile( os.path.splitext(listAudioFilesAll[i])[0] +  ".notUsed"):
#             listAudioFiles.append(listAudioFilesAll[i])
    listAudioFiles = listAudioFilesAll
    
    for file in listAudioFiles:
        logger.debug(file)
        
    pathToComposition  = argv[1]
    withDuration = argv[4]
    if withDuration=='True':
        withDuration = True
    elif withDuration=='False':
        withDuration = False
    else: 
        sys.exit("withDuration can be only True or False")  
    
    withSynthesis = argv[5]
    if withSynthesis=='True':
        withSynthesis = True
    elif withSynthesis=='False':
        withSynthesis = False
    else: 
        sys.exit("withSynthesis can be only True or False")  

    
        
    ALPHA = float(argv[6])
    
     
    ONLY_MIDDLE_STATE = argv[7]
    
    params = Parameters(ALPHA, ONLY_MIDDLE_STATE)
    
    evalLevel = int(argv[8])
    
    usePersistentFiles = 'True'
    if len(argv) == 10:
        usePersistentFiles =  argv[9]
        
         
    totalErrors = []
    
    htkParser = None
    if withDuration:
        htkParser = HtkConverter()
        htkParser.load(MODEL_URI, HMM_LIST_URI)
    
    for  URI_annotation in listAudioFiles :
            URIrecordingNoExt  = os.path.splitext(URI_annotation)[0]
            logger.debug("PROCESSING {}".format(URIrecordingNoExt) )
            whichSection = getSectionNumberFromName(URIrecordingNoExt) 
            
            currAlignmentErrors, detectedWordList, grTruthDurationWordList, detectedAlignedfileName = alignDependingOnWithDuration(URIrecordingNoExt, whichSection, pathToComposition, withDuration, withSynthesis, evalLevel, params, usePersistentFiles, htkParser)

            totalErrors.extend(currAlignmentErrors)
            
#             visualiseInPraat(URIrecordingNoExt, withDuration, detectedWordList, grTruthDurationWordList)

    mean = []
    stDev =  []     
    if len(totalErrors) != 0:    
        mean, stDev, median = getMeanAndStDevError(totalErrors)
        infoA = "Total  mean: "  "," +  str(mean), ", st dev: " + str(stDev) +   " ALPHA: " +  str(ALPHA)

        logger.info(infoA)
    return mean, stDev, totalErrors