def alignDependingOnWithDuration(URIrecordingNoExt, whichSection, pathToComposition, withDuration, withSynthesis, evalLevel, params, usePersistentFiles, htkParser): ''' call alignment method depending on whether duration or htk selected ''' Phonetizer.initLookupTable(withSynthesis) tokenLevelAlignedSuffix, phonemesAlignedSuffix = determineSuffix(withDuration, withSynthesis, evalLevel) if withDuration: alignmentErrors, detectedWordList, grTruthDurationWordList = alignOneChunk(URIrecordingNoExt, pathToComposition, whichSection, htkParser, params, evalLevel, usePersistentFiles) else: URIrecordingAnno = URIrecordingNoExt + ANNOTATION_EXT URIrecordingWav = URIrecordingNoExt + AUDIO_EXTENSION # new makamScore used lyricsObj = loadLyrics(pathToComposition, whichSection) lyrics = lyricsObj.__str__() # in case we are at no-lyrics section if not lyrics or lyrics =='_SAZ_': logger.warn("skipping section {} with no lyrics ...".format(whichSection)) return [], [], [], [] outputHTKPhoneAlignedURI = Aligner.alignOnechunk(MODEL_URI, URIrecordingWav, lyrics.__str__(), URIrecordingAnno, '/tmp/', withSynthesis) alignmentErrors = evalAlignmentError(URIrecordingAnno, outputHTKPhoneAlignedURI, evalLevel) detectedWordList = outputHTKPhoneAlignedURI grTruthDurationWordList = [] # store decoding results in a file FIXME: if with duration it is not mlf detectedAlignedfileName = [] detectedAlignedfileName = tokenList2TabFile(detectedWordList, URIrecordingNoExt, tokenLevelAlignedSuffix) return alignmentErrors, detectedWordList, grTruthDurationWordList, detectedAlignedfileName
def test_oracle_jingju(URIrecordingNoExt, whichSentence, fromPhonemeIdx, toPhonemeIdx): ''' read phoneme-level ground truth and test with dan-xipi_02 ''' ANNOTATION_EXT = '.TextGrid' listSentences = divideIntoSentencesFromAnno(URIrecordingNoExt + ANNOTATION_EXT) #uses TextGrid annotation to derive structure. TODO: instead of annotation, uses score withSynthesis = False currSentence = listSentences[whichSentence] # consider only part of audio fromTs = currSentence[0] toTs = currSentence[1] lyrics = loadLyricsFromTextGridSentence(currSentence) tokenLevelAlignedSuffix = '.syllables_oracle' detectedAlignedfileName = URIrecordingNoExt + '_' + str(fromTs) + '_' + str(toTs) + '_' + tokenLevelAlignedSuffix if os.path.isfile(detectedAlignedfileName): print "{} already exists. No decoding".format(detectedAlignedfileName) from Utilz import readListOfListTextFile detectedTokenList = readListOfListTextFile(detectedAlignedfileName) else: detectedTokenList = decodeWithOracle(lyrics, URIrecordingNoExt, fromTs, toTs, fromPhonemeIdx, toPhonemeIdx) if not os.path.isfile(detectedAlignedfileName): from PraatVisualiser import tokenList2TabFile detectedAlignedfileName = tokenList2TabFile(detectedTokenList, URIrecordingNoExt, tokenLevelAlignedSuffix) # eval on phrase level evalLevel = 2 fromSyllable = currSentence[2] toSyllable = currSentence[3] correctDuration, totalDuration = _evalAccuracy(URIrecordingNoExt + ANNOTATION_EXT, detectedTokenList, evalLevel, fromSyllable, toSyllable ) print "accuracy= {}".format(correctDuration / totalDuration) return detectedTokenList
def mlfResult2TextGrid(argv): ''' open mlf in praat. ''' if len(argv) != 3 : print (" usage: {} <detectedHTK_URI> <whichLevel>".format(argv[0]) ) sys.exit(); detectedHTK_URI= argv[1] whichEvalLevel = int(argv[2]) tokenAlignedSuffix, dummy = determineSuffixOld(withDuration=False, withSynthesis='dummy', evalLevel=whichEvalLevel) # load result from file into python list detectedTokenList = loadDetectedTokenListFromMlf( detectedHTK_URI, whichEvalLevel ) # write to tsv file. praat can open only tsv-s baseNameAudioFile = os.path.splitext(detectedHTK_URI)[0] tokenAlignedfileName = tokenList2TabFile(detectedTokenList, baseNameAudioFile, tokenAlignedSuffix) tab2PraatAndOpenWithPRaat(['dummy', tokenAlignedfileName])