def evalAccuracyTest():

######### for test logic see WordLevelEvaluator instead

    PATH_TEST_DATASET = '../example/'
      
    annotationURI = os.path.join(PATH_TEST_DATASET,  'grTruth.TextGrid')
    
    #  load from file
#     detectedURI = os.path.join(PATH_TEST_DATASET,  audioName +  '.phrasesDurationAligned')
    detectedTokenList = readListOfListTextFile(os.path.join(PATH_TEST_DATASET,  'detected.aligned'))
    
    
    ###############
    annotationURI = '/Users/joro/Documents/Phd/UPF/arias_dev_01_t_70//laosheng-erhuang_04.TextGrid'
    
    detectedTokenList = readListOfListTextFile('/Users/joro/Documents/Phd/UPF/arias_dev_01_t_70/laosheng-erhuang_04_49.8541936425_108.574785469.syllables')
    startIdx=1; endIdx=13
    
    #################
    annotationURI = '/Users/joro/Documents/Phd/UPF/arias_dev_01_t_70/laosheng-erhuang_04.TextGrid'
    detectedTokenList = readListOfListTextFile('/Users/joro/Documents/Phd/UPF/arias_dev_01_t_70/laosheng-erhuang_04_134.647686205_168.77679257.syllables')
    startIdx=15; endIdx=26

    
    whichTier=3
    durationCorrect, totalLength  = _evalAccuracy(annotationURI, detectedTokenList,whichTier , startIdx, endIdx)
    print durationCorrect
    print totalLength
    print durationCorrect/totalLength
예제 #2
0
def test_oracle_jingju(URIrecordingNoExt,  whichSentence, fromPhonemeIdx, toPhonemeIdx):
    '''
    read phoneme-level ground truth and test with dan-xipi_02
    '''
    
    ANNOTATION_EXT = '.TextGrid'
    listSentences = divideIntoSentencesFromAnno(URIrecordingNoExt + ANNOTATION_EXT) #uses TextGrid annotation to derive structure. TODO: instead of annotation, uses score
    
    withSynthesis = False
    currSentence = listSentences[whichSentence]
    
    # consider only part of audio
  
    fromTs = currSentence[0]
    toTs = currSentence[1]

    
    lyrics = loadLyricsFromTextGridSentence(currSentence)
    
    tokenLevelAlignedSuffix = '.syllables_oracle'
    detectedAlignedfileName = URIrecordingNoExt + '_' + str(fromTs) + '_' + str(toTs) + '_'  + tokenLevelAlignedSuffix
    
    if os.path.isfile(detectedAlignedfileName):
        print "{} already exists. No decoding".format(detectedAlignedfileName)
        
        from Utilz import readListOfListTextFile
        detectedTokenList  = readListOfListTextFile(detectedAlignedfileName)
        
    else:
        detectedTokenList = decodeWithOracle(lyrics, URIrecordingNoExt, fromTs, toTs, fromPhonemeIdx, toPhonemeIdx)
          
        if not os.path.isfile(detectedAlignedfileName):
            from PraatVisualiser import tokenList2TabFile
            detectedAlignedfileName =  tokenList2TabFile(detectedTokenList, URIrecordingNoExt, tokenLevelAlignedSuffix)
          
    # eval on phrase level
    evalLevel = 2
    
    fromSyllable = currSentence[2]
    toSyllable = currSentence[3]
    

    correctDuration, totalDuration = _evalAccuracy(URIrecordingNoExt + ANNOTATION_EXT, detectedTokenList, evalLevel, fromSyllable, toSyllable )
    print "accuracy= {}".format(correctDuration / totalDuration)
    
    return detectedTokenList
예제 #3
0
def getReferenceDurations(URI_recording_noExt, lyricsWithModels, evalLevel):
        '''
        timestamps of words according to reference durations read from score. Used to obtain so called 'score-deviation' metric
        not used in decoding 
        '''
        
        
        annotationURI = URI_recording_noExt + ANNOTATION_EXT

        ##### get duration of initial silence 

        try:
            annotationTokenListA = TextGrid2WordList(annotationURI, evalLevel)     
            
            # just copy duration of silence in groundTruth 
            annoTsAndToken =  annotationTokenListA[0]
            if annoTsAndToken[2] != "" and not(annoTsAndToken[2].isspace()): # skip empty phrases
                    logger.warn("annotaiton {} starts with non-sil token ".format(annotationURI))
                    finalSilFram =  float(annoTsAndToken[0]) * NUM_FRAMES_PERSECOND
            else:
                finalSilFram = float(annoTsAndToken[1]) * NUM_FRAMES_PERSECOND
            
        
        except :
        # if no Gr Truth annotation file (or needed layer) present - take from model    
            finalSilFram = 0
            countFirstStateFirstWord = lyricsWithModels.listWords[0].syllables[0].phonemes[0].numFirstState
             
            for i in range(countFirstStateFirstWord):
                finalSilFram += lyricsWithModels.statesNetwork[i].getDurationInFrames()
        
            
        refTokenList = expandlyrics2WordList (lyricsWithModels, lyricsWithModels.statesNetwork, finalSilFram,  _constructTimeStampsForToken)
        grTruthDurationfileExtension = '.scoreDeviation'
        writeListOfListToTextFile(refTokenList, None , URI_recording_noExt + grTruthDurationfileExtension )
        
#     TODO: could be done easier with this code, and check last method in Word
#         refTokenList =    testT(lyricsWithModels)

        correctDuration, totalDuration = _evalAccuracy(annotationURI, refTokenList, evalLevel )

        return correctDuration, totalDuration
예제 #4
0
def doitOneChunkAlign(URIrecordingNoExt, musicXMLParser,  whichSentence, currSentence, withScores, withVocalPrediction):
    '''
    align one chunk only.
    @param musicXMLParser: parsed  score for whole recording
    @param whichSentence: sentence number to process  
    '''

    fromTs = currSentence[0]
    toTs = currSentence[1]
    
    listNonVocalFragments = []
    if withVocalPrediction:
        listNonVocalFragments = getListNonVocalFragments(URIrecordingNoExt, fromTs, toTs)
    
    URIRecordingChunkNoExt = URIrecordingNoExt + "_" + str(fromTs) + '_' + str(toTs)
    if (withScores):
        tokenLevelAlignedSuffix = '.syllables_dur'
    else:
        tokenLevelAlignedSuffix = '.syllables'

    detectedAlignedfileName = URIRecordingChunkNoExt + tokenLevelAlignedSuffix

    fromSyllable = currSentence[2]
    toSyllable = currSentence[3]
    
    # already decoded
    if os.path.isfile(detectedAlignedfileName):
        print "{} already exists. No decoding".format(detectedAlignedfileName)
        detectedTokenList  = readListOfListTextFile(detectedAlignedfileName)
        correctDuration, totalDuration = _evalAccuracy(URIrecordingNoExt + ANNOTATION_EXT, detectedTokenList, evalLevel, fromSyllable, toSyllable  )
#         correctDuration= 0; totalDuration=1 
        return correctDuration, totalDuration 
    
    
    
    ###### 1) load Lyrics
    lyrics = loadLyricsFromTextGridSentence(currSentence)
#     if logger.level == logging.DEBUG:
#     lyrics.printSyllables()
    
    if withScores: # load from score instead
        lyrics = musicXMLParser.getLyricsForSection(whichSentence) # indexing in python

    withSynthesis = True
#     2) load features
    lyricsWithModels, obsFeatures, dummyChunkURI  = loadSmallAudioFragment(lyrics,  URIrecordingNoExt, withSynthesis, fromTs, toTs)
#     lyricsWithModels.printWordsAndStates()
    
    ##### align
    usePersistentFiles = 'False'
    alpha = 0.97
    from hmm.Parameters import Parameters
    ONLY_MIDDLE_STATE = False
    params  = Parameters(alpha, ONLY_MIDDLE_STATE)
    
    alignmentErrors, detectedTokenList, detectedPath = alignOneChunk(obsFeatures, lyricsWithModels, listNonVocalFragments, alpha, evalLevel, usePersistentFiles, tokenLevelAlignedSuffix, URIRecordingChunkNoExt)
    


    correctDuration, totalDuration = _evalAccuracy(URIrecordingNoExt + ANNOTATION_EXT, detectedTokenList, evalLevel, fromSyllable, toSyllable  )
    acc = correctDuration / totalDuration
    print "result is: " + str(acc)
    
    
    
    return correctDuration, totalDuration
예제 #5
0
    def evalAccuracy(self, eval_level):

        pathEvaluation = os.path.join(parentDir,
                                      'AlignmentEvaluation/align_eval')
        if pathEvaluation not in sys.path:
            sys.path.append(pathEvaluation)
        from AccuracyEvaluator import _evalAccuracy

        totalCorrectDurations = 0
        totalDurations = 0

        if self.WITH_SECTION_ANNOTATIONS:
            sectionLinks = self.recording.sectionAnnos
        else:
            sectionLinks = self.recording.sectionLinks

##     might be needed for jingju
        URI_TextGrid = os.path.join(self.recording.recordingNoExtURI +
                                    ANNOTATION_EXT)

        ##### add index of begin token and end token

        high_level_tier_name = tierAliases.line

        list_start_end_indices, annotationLinesList = divideIntoSentencesFromAnnoWithSil(URI_TextGrid, \
        high_level_tier_name, eval_level)
        if len(list_start_end_indices) != len(sectionLinks):
            sys.exit(
                'TextGrid has {} lines, whereas section Links are {}'.format(
                    len(list_start_end_indices), len(sectionLinks)))
        for idx, currSectionLink in enumerate(
                sectionLinks
        ):  # assign syllable/word/phrase start- and end-indices in TextGrid
            currSectionLink.set_begin_end_indices(
                list_start_end_indices[idx][0], list_start_end_indices[idx][1])

        for currSectionLink in sectionLinks:

            if not hasattr(
                    currSectionLink, 'detectedTokenList'
            ):  # use the exostence of  detected token list as indicator of lyrics-sections
                continue

            ###################### eval phoeneme level
#                         self.eval_percentage_phonemes(URI_TextGrid, currSectionLink)

############################# eval accuracy Annotaion level
            correctDuration = 0
            totalDuration = 1

            correctDuration, totalDuration = _evalAccuracy(
                URI_TextGrid, currSectionLink.detectedTokenList,
                ParametersAlgo.EVAL_LEVEL, currSectionLink.token_begin_idx,
                currSectionLink.token_end_idx)
            logger.debug('current section {} accuracy: {}'.format(
                currSectionLink, correctDuration / totalDuration))

            totalCorrectDurations += correctDuration
            totalDurations += totalDuration

        accuracy = totalCorrectDurations / totalDurations
        logger.warning("recording {} accuracy: {:.2f}".format(
            self.recording.recordingNoExtURI, accuracy))
        return totalCorrectDurations, totalDurations