def readNonEmptyTokensTextGrid(annotationURI, whichLevel, initialTimeOffset=0): ''' ###################### # prepare list of phrases from ANNOTATION. remove empy annotaion tokens ''' try: annotationTokenListA = TextGrid2WordList(annotationURI, whichLevel) except Exception as errorMsg: sys.exit(str(errorMsg)) for currAnnoTsAndToken in annotationTokenListA: currAnnoTsAndToken[0] = float(currAnnoTsAndToken[0]) currAnnoTsAndToken[0] += initialTimeOffset currAnnoTsAndToken[1] = float(currAnnoTsAndToken[1]) currAnnoTsAndToken[1] += initialTimeOffset # store to file .anno baseN = os.path.basename(annotationURI) dir = os.path.dirname(annotationURI) annotationURI_anno = os.path.join(dir,baseN+'.anno') writeListOfListToTextFile(annotationTokenListA, None, annotationURI_anno ) annotationTokenListNoPauses = [] ######### # remove empty phrases for currAnnoTsAndToken in annotationTokenListA: if currAnnoTsAndToken[2] != "" and not (currAnnoTsAndToken[2].isspace()): # skip empty phrases annotationTokenListNoPauses.append(currAnnoTsAndToken) return annotationTokenListA, annotationTokenListNoPauses
def _constructLogLiksTable(self, usePersistentProbs): PATH_LOOKUP_DUR_TABLE = PATH_LOGS + '/lookupTable' logger.info("path lookup table: " + PATH_LOOKUP_DUR_TABLE) if usePersistentProbs and os.path.exists(PATH_LOOKUP_DUR_TABLE): self.lookupTableLogLiks = numpy.loadtxt(PATH_LOOKUP_DUR_TABLE) logger.info("reading lookup table from {}".format( PATH_LOOKUP_DUR_TABLE )) # if table covers max dur if self.lookupTableLogLiks.shape[0] >= self.R_MAX: return else: self.lookupTableLogLiks = numpy.empty((self.R_MAX, self.MAX_ALLOWED_DURATION_RATIO * self.R_MAX + 1)) self.lookupTableLogLiks.fill(-Infinity) # otherwise construct logging.info("constructing duration Probability lookup Table...") quantileVals = linspace(self.minVal, self.maxVal, self.numDurs ) liks = numpy.zeros((self.numDurs,1)) for d in range(0,self.numDurs): liks[d] = norm.pdf(quantileVals[d]) for currMaxDur in range(1,int(self.R_MAX)+1): self._constructLogLikDistrib( currMaxDur, liks) writeListOfListToTextFile(self.lookupTableLogLiks, None , PATH_LOOKUP_DUR_TABLE)
def _createWordMLFandDict(self): #txtTur to METU. txtMETU as persistent file not really needed. Kept only for reference baseNameAudioFile = os.path.splitext(self.pathToAudioFile)[0] METUBETfileName = baseNameAudioFile + LYRICS_TXT_METUBET_EXT if (self.loadLyricsFromFile == 1): METULyrics = PhonetizerOld.turkishScriptLyrics2METUScriptLyricsFile(baseNameAudioFile + LYRICS_TXT_EXT, METUBETfileName) else: # TODO: change this step METULyrics = PhonetizerOld.turkishScriptLyrics2METUScriptLyrics(self.lyrics, METUBETfileName) # create Word-level mlf: baneN = os.path.basename(self.pathToAudioFile) baneN = os.path.splitext(baneN)[0] headerLine = baneN + ' ' + METULyrics writeListOfListToTextFile([], headerLine, '/tmp/prompts') # prompts2mlf mlfName = '/tmp/tmp' + HTK_MLF_WORD_ANNO_SUFFIX prompts2mlf = os.path.abspath('prompts2mlf') pipe = subprocess.Popen(['/usr/bin/perl', prompts2mlf, mlfName, '/tmp/prompts']) pipe.wait() # phonetize dictName = '/tmp/lexicon2' PhonetizerOld.METULyrics2phoneticDict(METUBETfileName, dictName, self.withSynthesis) return (dictName, mlfName, METULyrics )
def _mapB(self,observations): ''' Required implementation for _mapB. Refer to _BaseHMM for more details. This method highly optimizes the running time, since all PDF calculations are done here once in each training iteration. - self.Bmix_map - computesand maps Bjm(Ot) to Bjm(t). ''' self.B_map = numpy.zeros( (self.n,len(observations)), dtype=self.precision) if self.usePersistentFiles and os.path.exists(self.PATH_BMAP): self.B_map = numpy.loadtxt(self.PATH_BMAP) if self.B_map.shape[1] == len(observations): # sys.exit('{} does not store all feature vectors. delete it and generate them again'.format(self.PATH_BMAP)) return self.Bmix_map = numpy.zeros( (self.n,self.m,len(observations)), dtype=self.precision) for j in xrange(self.n): for t in xrange(len(observations)): lik = self._calcbjt(j, t, observations[t]) if lik == 0: logging.warning("obs likelihood at time {} for state {} = 0. Repair by adding {}".format(t,j, MINIMAL_PROB)) lik = MINIMAL_PROB self.B_map[j][t] = lik self._normalizeBByMax() # normalize over states for t in xrange(len(observations)): self.B_map[:,t] = _normalize(self.B_map[:,t]) logging.debug("sum={} at time {}".format(sum(self.B_map[:,t]), t)) if self.usePersistentFiles: writeListOfListToTextFile(self.B_map, None , self.PATH_BMAP)
def _initKappas(self, lenObservations): ''' kappas[t][s] - starting and staying at time t in same currState s. WITH LogLik ''' if lenObservations <= self.R_MAX: sys.exit("observations are only {}, R_max = {}. not able to run initialization. Increase size of observations".format(lenObservations,self.R_MAX)) print 'init kappas...' self.kappas = numpy.empty((self.R_MAX,self.n), dtype=self.precision) # if some kappa[t, state] = -INFINITY and phi[t,state] = -INFINITY, no initialization is possilbe (e.g. not possible to choose max btw kappa and phi) self.kappas.fill(numpy.log(MINIMAL_PROB)) for currState in range(self.n): sumObsProb = 0 currRefMax = self.durationPdf.getMaxRefDur( self.durationMap[currState]) currLogPi = numpy.log(self.pi[currState]) for t in range(1,int(currRefMax)+1): updateQuantity, sumObsProb = self._calcUpdateQuantity(t-1, t, currState, 0, sumObsProb) self.kappas[t-1,currState] = currLogPi + updateQuantity #sanity check. for debug if self.kappas[t-1,currState] == 0: print "underflow error at time {}, currState {}".format(t-1, currState) writeListOfListToTextFile(self.kappas, None , PATH_LOGS + '/kappas')
def _mapB_OLD(self, observations): ''' Required implementation for _mapB. Refer to _BaseHMM for more details. This method highly optimizes the running time, since all PDF calculations are done here once in each training iteration. - self.Bmix_map - computesand maps Bjm(Ot) to Bjm(t). log precomputed ''' # return if self.usePersistentFiles and os.path.exists(self.PATH_BMAP): self.logger.info("loading probs all observations from {}".format(self.PATH_BMAP)) self.B_map = numpy.loadtxt(self.PATH_BMAP) # check length if self.B_map.shape[1] == len(observations) and self.B_map.shape[0] == self.n: # sys.exit('{} does not store all feature vectors. delete it and generate them again'.format(self.PATH_BMAP)) self.B_map = numpy.log( self.B_map) return else: self.logger.info("file {} found, but has not the expected num of states {} or observations {}".format(self.PATH_BMAP, self.n, len(observations)) ) self.B_map = numpy.zeros( (self.n,len(observations)), dtype=self.precision) self.Bmix_map = numpy.zeros( (self.n,self.m,len(observations)), dtype=self.precision) for j in xrange(self.n): for t in xrange(len(observations)): self.logger.debug("at calcbjt at state {} and time {}...\n".format(j, t)) lik = self._calcbjt(j, t, observations[t]) if lik == 0: self.logger.debug("obs likelihood at time {} for state {} = 0. Repair by adding {}".format(t,j, MINIMAL_PROB)) lik = MINIMAL_PROB self.B_map[j,t] = lik # normalize over states for t in xrange(len(observations)): self.B_map[:,t] = _normalize(self.B_map[:,t]) self.logger.debug("sum={} at time {}".format(sum(self.B_map[:,t]), t)) if self.usePersistentFiles: writeListOfListToTextFile(self.B_map, None , self.PATH_BMAP) self.B_map = numpy.log( self.B_map)
def tokenList2TabFile( listTsAndPhonemes, baseNameAudioFile, whichSuffix): ''' convenience method. ''' # timeshift # for index in range(len(listTsAndPhonemes)): # listTsAndPhonemes[index][0] = listTsAndPhonemes[index][0] + timeShift # if (len(listTsAndPhonemes[index]) == 3): # del listTsAndPhonemes[index][1] phonemeAlignedfileName = baseNameAudioFile + whichSuffix writeListOfListToTextFile(listTsAndPhonemes, 'startTs endTs phonemeOrWord\n', phonemeAlignedfileName) logging.debug('phoneme level alignment written to file: ', phonemeAlignedfileName) return phonemeAlignedfileName
def _viterbiForcedDur(self, observations): # sanity check. make sure durations are init from score print "decoding..." for t in range(self.R_MAX,len(observations)): for currState in xrange(1, self.n): self.computePhi(t, currState) # get max duration quantities # for t in range(self.R_MAX+19, self.R_MAX+20): # for currState in xrange(57, 59): # self.computePhi(t, currState) # get max duration quantities writeListOfListToTextFile(self.phi, None , PATH_LOGS + '/phi') # return for backtracking return self.chi, self.psi
def _initBeginingPhis(self, lenObservations): ''' init phis when t < self.R_MAX ''' self._initKappas(lenObservations) # for convenience put as class vars self.phi = numpy.empty((lenObservations,self.n),dtype=self.precision) self.phi.fill(-Infinity) # self.phi = numpy.loadtxt(PATH_LOGS + '/phi_init', dtype=self.precision) # return # init t=0 # for currState in range(self.n): self.phi[0,currState] = self.kappas[currState,0] self.phi[0,:] = self.kappas[0,:] # init first state = kappa (done to allow self.getMaxPhi_slow to access prev. currState) self.phi[:len(self.kappas[:,0]),0] = self.kappas[:,0] # select bigger (kappa and phi_star) for t in range(1,int(self.R_MAX)): self.logger.debug("at time t={}".format(t) ) # phi start makes sence only from second state for currState in range(1, self.n): phiStar, fromState, maxDurIndex = self.computePhiStar(t, currState) # take bigger : eq:deltaStarOrKappa if phiStar > self.kappas[t,currState] : self.phi[t,currState] = phiStar self.psi[t,currState] = fromState self.chi[t,currState] = maxDurIndex else: self.logger.debug( " kappa more than phi at time {} and state {}".format(t, currState)) self.phi[t, currState] = self.kappas[t, currState] # kappas mean still at beginning state self.psi[t,currState] = currState self.chi[t,currState] = t writeListOfListToTextFile(self.phi, None , PATH_LOGS + '/phi_init')
def _mlf2PraatFormat( listTsAndPhonemes, timeShift, baneNameAudioFile, whichSuffix): # timeshift for index in range(len(listTsAndPhonemes)): listTsAndPhonemes[index][0] = listTsAndPhonemes[index][0] + timeShift if (len(listTsAndPhonemes[index]) == 3): del listTsAndPhonemes[index][1] phonemeAlignedfileName = baneNameAudioFile + whichSuffix writeListOfListToTextFile(listTsAndPhonemes, 'startTs phonemeOrWord\n', phonemeAlignedfileName) logger.debug('phoneme level alignment written to file: ', phonemeAlignedfileName) return phonemeAlignedfileName '''
def getReferenceDurations(URI_recording_noExt, decoder, evalLevel): ''' timestamps of words according to reference durations read from score. Used to obtain so called 'score-deviation' metric not used in decoding ''' annotationURI = URI_recording_noExt + ANNOTATION_EXT ##### get duration of initial silence try: annotationTokenListA = TextGrid2WordList(annotationURI, evalLevel) # just copy duration of silence in groundTruth annoTsAndToken = annotationTokenListA[0] if annoTsAndToken[2] != "" and not(annoTsAndToken[2].isspace()): # skip empty phrases logger.warn("annotaiton {} starts with non-sil token ".format(annotationURI)) finalSilFram = float(annoTsAndToken[0]) * NUM_FRAMES_PERSECOND else: finalSilFram = float(annoTsAndToken[1]) * NUM_FRAMES_PERSECOND except : # if no Gr Truth annotation file (or needed layer) present - take from model finalSilFram = 0 countFirstStateFirstWord = decoder.lyricsWithModels.listWords[0].syllables[0].phonemes[0].numFirstState for i in range(countFirstStateFirstWord): finalSilFram += decoder.lyricsWithModels.statesNetwork[i].getDurationInFrames() grTruthWordList = expandlyrics2Words (decoder.lyricsWithModels, decoder.lyricsWithModels.statesNetwork, finalSilFram, _constructTimeStampsForWord) grTruthDurationfileExtension = '.grTruthDuration' writeListOfListToTextFile(grTruthWordList, None , URI_recording_noExt + grTruthDurationfileExtension ) # TODO: could be done easier with this code, and check last method in Word # grTruthWordList = testT(decoder.lyricsWithModels) return grTruthWordList
def testReadListOfListTextFile_gen(): URIfile = '/Users/joro/Downloads/kimseye-annotation-score-to-audio.txt' shiftedNakarat = [] inNakarat = 0 detectedTokenList = readListOfListTextFile_gen(URIfile) # get TS for entry in detectedTokenList: if entry[3] == 'D5-NAKARAT-n1': beginTs = entry[0] if entry[3] == 'D5-NAKARAT*-n1': endTs = entry[0] break endTs = 111.687981859 for entry in detectedTokenList: if entry[3] == 'D5-NAKARAT-n1': inNakarat = 1 if entry[3] == 'D5-NAKARAT*-n1': inNakarat = 0 break if inNakarat: entry[0] += (endTs - beginTs) shiftedNakarat.append(entry) writeListOfListToTextFile(shiftedNakarat,None, 'shiftedNakarat.txt', toFlip=False) print detectedTokenList