def alignDependingOnWithDuration(URIrecordingNoExt, whichSection, pathToComposition, withDuration, withSynthesis, evalLevel, params, usePersistentFiles, htkParser): ''' call alignment method depending on whether duration or htk selected ''' Phonetizer.initLookupTable(withSynthesis) tokenLevelAlignedSuffix, phonemesAlignedSuffix = determineSuffix(withDuration, withSynthesis, evalLevel) if withDuration: alignmentErrors, detectedWordList, grTruthDurationWordList = alignOneChunk(URIrecordingNoExt, pathToComposition, whichSection, htkParser, params, evalLevel, usePersistentFiles) else: URIrecordingAnno = URIrecordingNoExt + ANNOTATION_EXT URIrecordingWav = URIrecordingNoExt + AUDIO_EXTENSION # new makamScore used lyricsObj = loadLyrics(pathToComposition, whichSection) lyrics = lyricsObj.__str__() # in case we are at no-lyrics section if not lyrics or lyrics =='_SAZ_': logger.warn("skipping section {} with no lyrics ...".format(whichSection)) return [], [], [], [] outputHTKPhoneAlignedURI = Aligner.alignOnechunk(MODEL_URI, URIrecordingWav, lyrics.__str__(), URIrecordingAnno, '/tmp/', withSynthesis) alignmentErrors = evalAlignmentError(URIrecordingAnno, outputHTKPhoneAlignedURI, evalLevel) detectedWordList = outputHTKPhoneAlignedURI grTruthDurationWordList = [] # store decoding results in a file FIXME: if with duration it is not mlf detectedAlignedfileName = [] detectedAlignedfileName = tokenList2TabFile(detectedWordList, URIrecordingNoExt, tokenLevelAlignedSuffix) return alignmentErrors, detectedWordList, grTruthDurationWordList, detectedAlignedfileName
def alignDependingOnWithDuration(URIrecordingNoExt, sectionLink, pathToComposition, withDuration, withSynthesis, evalLevel, params, usePersistentFiles, htkParser): ''' call alignment method depending on whether duration or htk selected ''' #### 1) load lyrics makamScore = loadMakamScore(pathToComposition) lyrics = makamScore.getLyricsForSection(sectionLink.melodicStructure) lyricsStr = lyrics.__str__() if not lyricsStr or lyricsStr=='None' or lyricsStr =='_SAZ_': logger.warn("skipping sectionLink {} with no lyrics ...".format(sectionLink.melodicStructure)) return [], 'dummy', 0, 0, 0 ############## ## reference duration # correctDurationScoreDev, totalDuration = getReferenceDurations(URIrecordingNoExt, lyricsWithModels, evalLevel) correctDurationScoreDev = 0 tokenLevelAlignedSuffix, phonemesAlignedSuffix = determineSuffix(withDuration, withSynthesis, evalLevel) alignmentErrors = [] if withDuration: withOracle = 0 oracleLyrics = 'dummy' detectedTokenList, detectedPath, maxPhiScore = alignOneChunk( lyrics, withSynthesis, withOracle, oracleLyrics, [], params.ALPHA, usePersistentFiles, tokenLevelAlignedSuffix, URIrecordingNoExt, sectionLink, htkParser) logger.debug('maxPhiScore: ' + str(maxPhiScore) ) correctDuration = 0 totalDuration = 1 # correctDuration, totalDuration = _evalAccuracy(URIrecordingNoExt + ANNOTATION_EXT, detectedTokenList, evalLevel ) # detectedTokenList = test_oracle(URIrecordingNoExt, pathToComposition, whichSection) else: URIrecordingAnno = URIrecordingNoExt + ANNOTATION_EXT URIrecordingWav = URIrecordingNoExt + AUDIO_EXTENSION # new makamScore used # lyricsObj = loadLyrics(pathToComposition, whichSection) # lyrics = lyricsObj.__str__() # # in case we are at no-lyrics sectionLink # if not lyrics or lyrics=='None' or lyrics =='_SAZ_': # logger.warn("skipping sectionLink {} with no lyrics ...".format(whichSection)) # return [], [], [], [] outputHTKPhoneAlignedURI = Aligner.alignOnechunk(MODEL_URI, URIrecordingWav, lyricsStr, URIrecordingAnno, '/tmp/', withSynthesis) alignmentErrors = [] alignmentErrors = evalAlignmentError(URIrecordingAnno, outputHTKPhoneAlignedURI, evalLevel) detectedTokenList = outputHTKPhoneAlignedURI # correctDuration, totalDuration = evalAccuracy(URIrecordingAnno, outputHTKPhoneAlignedURI, evalLevel) return alignmentErrors, correctDuration, totalDuration, correctDurationScoreDev, maxPhiScore
def main(argv): if len(argv) != 4: print( "usage: {} <pathToComposition> <whichSection> <URI_recording_no_ext>" .format(argv[0])) sys.exit() URIrecordingNOExt = '/Users/joro/Documents/Phd/UPF/adaptation_data_soloVoice/ISTANBUL/goekhan/02_Gel_3_zemin' URIrecordingNOExt = argv[3] URIrecordingWav = URIrecordingNOExt + AUDIO_EXTENSION pathToComposition = '/Users/joro/Documents/Phd/UPF/adaptation_data_soloVoice/nihavent--sarki--aksak--gel_guzelim--faiz_kapanci/' pathToComposition = argv[1] whichSection = 3 whichSection = int(argv[2]) lyrics = loadLyrics(pathToComposition, whichSection) withSynthesis = 1 URIrecordingAnno = URIrecordingNOExt + PHRASE_ANNOTATION_EXT outputHTKPhoneAlignedURI = Aligner.alignOnechunk(MODEL_URI, URIrecordingWav, lyrics, URIrecordingAnno, '/tmp/', withSynthesis) EVALLEVEL = 2 alignmentErrors = evalAlignmentError(URIrecordingAnno, outputHTKPhoneAlignedURI, EVALLEVEL) mean, stDev, median = getMeanAndStDevError(alignmentErrors) print "(", mean, ",", stDev, ")" ### OPTIONAL : open in praat withDuration = False visualiseInPraat(URIrecordingNOExt, withDuration, outputHTKPhoneAlignedURI, []) return mean, stDev, alignmentErrors
MODEL_URI = os.path.abspath('model/hmmdefs9gmm9iter') import sys parentDir = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__) ), os.path.pardir)) pathHMMDuration = os.path.join(parentDir, 'JingjuAlignment') if pathHMMDuration not in sys.path: sys.path.append(pathHMMDuration) from lyricsParser import divideIntoSectionsFromAnno, loadLyricsFromTextGridSentence if __name__ == '__main__': # LOAD LYRICS lyricsTextGrid = 'dan-xipi_01.TextGrid' listSentences = divideIntoSectionsFromAnno(lyricsTextGrid) lyrics = loadLyricsFromTextGridSentence(listSentences[0]) URIrecordingWav = 'dan-xipi_01_32.511032007_51.9222930007.wav' # TODO: generate this TextGrid lyricsTextGridSentence = 'dan-xipi_01_32.511032007_51.9222930007.TextGrid' withSynthesis = 0 # align outputHTKPhoneAlignedURI = Aligner.alignOnechunk(MODEL_URI, URIrecordingWav, lyrics, lyricsTextGridSentence, '/tmp/', withSynthesis)