def readNonEmptyTokensTextGrid(annotationURI, whichLevel, initialTimeOffset=0):
    '''
        ######################
    # prepare list of phrases from ANNOTATION. remove empy annotaion tokens
    '''
    try:
        annotationTokenListA = TextGrid2WordList(annotationURI, whichLevel)
    except Exception as errorMsg:
        sys.exit(str(errorMsg))
    
    for currAnnoTsAndToken in annotationTokenListA:
        currAnnoTsAndToken[0] = float(currAnnoTsAndToken[0])
        currAnnoTsAndToken[0] += initialTimeOffset
        currAnnoTsAndToken[1] = float(currAnnoTsAndToken[1])
        currAnnoTsAndToken[1] += initialTimeOffset

    
    # store to file .anno
    baseN = os.path.basename(annotationURI)
    dir = os.path.dirname(annotationURI)
    annotationURI_anno = os.path.join(dir,baseN+'.anno')
    
    
    writeListOfListToTextFile(annotationTokenListA, None,   annotationURI_anno )

    annotationTokenListNoPauses = []
    
    #########
    # remove empty phrases
    for currAnnoTsAndToken in annotationTokenListA:
        if currAnnoTsAndToken[2] != "" and not (currAnnoTsAndToken[2].isspace()): # skip empty phrases
            annotationTokenListNoPauses.append(currAnnoTsAndToken)

    
    return annotationTokenListA, annotationTokenListNoPauses
Example #2
0
 def _constructLogLiksTable(self, usePersistentProbs):
     
     PATH_LOOKUP_DUR_TABLE = PATH_LOGS + '/lookupTable'
     logger.info("path lookup table: " +  PATH_LOOKUP_DUR_TABLE)
     if usePersistentProbs and os.path.exists(PATH_LOOKUP_DUR_TABLE): 
         self.lookupTableLogLiks = numpy.loadtxt(PATH_LOOKUP_DUR_TABLE)
         logger.info("reading lookup table from {}".format( PATH_LOOKUP_DUR_TABLE ))
         
         # if table covers max dur
         if self.lookupTableLogLiks.shape[0] >= self.R_MAX:
             return 
         else:
             self.lookupTableLogLiks  = numpy.empty((self.R_MAX, self.MAX_ALLOWED_DURATION_RATIO * self.R_MAX + 1))
             self.lookupTableLogLiks.fill(-Infinity)      
     
     # otherwise construct
   
     logging.info("constructing duration Probability lookup Table...")
     
     quantileVals  = linspace(self.minVal, self.maxVal, self.numDurs )
     liks = numpy.zeros((self.numDurs,1)) 
     for d in range(0,self.numDurs):
         liks[d] = norm.pdf(quantileVals[d])
     
     for currMaxDur in range(1,int(self.R_MAX)+1):
         self._constructLogLikDistrib( currMaxDur, liks)
     
     writeListOfListToTextFile(self.lookupTableLogLiks, None ,  PATH_LOOKUP_DUR_TABLE) 
    def _createWordMLFandDict(self):
        #txtTur to METU. txtMETU as persistent file not really needed. Kept only for reference 
        
        baseNameAudioFile = os.path.splitext(self.pathToAudioFile)[0]
        
        METUBETfileName = baseNameAudioFile + LYRICS_TXT_METUBET_EXT
        
        if (self.loadLyricsFromFile == 1):
            METULyrics = PhonetizerOld.turkishScriptLyrics2METUScriptLyricsFile(baseNameAudioFile + LYRICS_TXT_EXT, METUBETfileName)
        else:
            # TODO: change this step
            METULyrics = PhonetizerOld.turkishScriptLyrics2METUScriptLyrics(self.lyrics, METUBETfileName)
    # create Word-level mlf:
        baneN = os.path.basename(self.pathToAudioFile)
        baneN = os.path.splitext(baneN)[0]
        headerLine = baneN + ' ' + METULyrics
        
        writeListOfListToTextFile([], headerLine, '/tmp/prompts')
        
        # prompts2mlf
        mlfName = '/tmp/tmp' + HTK_MLF_WORD_ANNO_SUFFIX
        prompts2mlf =  os.path.abspath('prompts2mlf')
        
        pipe = subprocess.Popen(['/usr/bin/perl', prompts2mlf,  mlfName, '/tmp/prompts'])
        pipe.wait()

        # phonetize
        dictName = '/tmp/lexicon2'
        
        PhonetizerOld.METULyrics2phoneticDict(METUBETfileName, dictName, self.withSynthesis)
        return (dictName, mlfName, METULyrics )
Example #4
0
    def _mapB(self,observations):
        '''
        Required implementation for _mapB. Refer to _BaseHMM for more details.
        This method highly optimizes the running time, since all PDF calculations
        are done here once in each training iteration.
        
        - self.Bmix_map - computesand maps Bjm(Ot) to Bjm(t).
        '''        
        self.B_map = numpy.zeros( (self.n,len(observations)), dtype=self.precision)
        
        if self.usePersistentFiles and os.path.exists(self.PATH_BMAP):
            self.B_map = numpy.loadtxt(self.PATH_BMAP)
            if self.B_map.shape[1] == len(observations):
#                 sys.exit('{} does not store all feature vectors. delete it and generate them again'.format(self.PATH_BMAP))
                return     
        
        
        self.Bmix_map = numpy.zeros( (self.n,self.m,len(observations)), dtype=self.precision)
        for j in xrange(self.n):
            for t in xrange(len(observations)):
                lik = self._calcbjt(j, t, observations[t])
                if lik == 0: 
                    logging.warning("obs likelihood at time {} for state {} = 0. Repair by adding {}".format(t,j, MINIMAL_PROB))
                    lik = MINIMAL_PROB
                self.B_map[j][t] = lik
        self._normalizeBByMax()
        
        # normalize over states
        for t in xrange(len(observations)):
             self.B_map[:,t] = _normalize(self.B_map[:,t])
             logging.debug("sum={} at time {}".format(sum(self.B_map[:,t]), t))
             
        if self.usePersistentFiles:        
            writeListOfListToTextFile(self.B_map, None , self.PATH_BMAP)                 
Example #5
0
 def _initKappas(self, lenObservations):
     '''
     kappas[t][s] - starting and staying at time t in same currState s.
     WITH LogLik 
     '''
     if lenObservations <= self.R_MAX:
         sys.exit("observations are only {}, R_max = {}. not able to run initialization. Increase size of observations".format(lenObservations,self.R_MAX)) 
     
     print 'init kappas...'
     self.kappas = numpy.empty((self.R_MAX,self.n), dtype=self.precision)
     # if some kappa[t, state] = -INFINITY and phi[t,state] = -INFINITY, no initialization is possilbe (e.g. not possible to choose max btw kappa and phi)
     self.kappas.fill(numpy.log(MINIMAL_PROB))
     
     for currState in range(self.n):
         sumObsProb = 0
         currRefMax = self.durationPdf.getMaxRefDur( self.durationMap[currState])
         currLogPi = numpy.log(self.pi[currState])
         
         for t in range(1,int(currRefMax)+1):
                             
             updateQuantity, sumObsProb = self._calcUpdateQuantity(t-1, t, currState, 0, sumObsProb)
             
             self.kappas[t-1,currState] = currLogPi + updateQuantity
             
              #sanity check. for debug
             if self.kappas[t-1,currState] == 0:
                  print "underflow error at time {}, currState {}".format(t-1, currState)
             
             
     writeListOfListToTextFile(self.kappas, None , PATH_LOGS + '/kappas') 
Example #6
0
    def _mapB_OLD(self, observations):
        '''
        Required implementation for _mapB. Refer to _BaseHMM for more details.
        This method highly optimizes the running time, since all PDF calculations
        are done here once in each training iteration.
        
        - self.Bmix_map - computesand maps Bjm(Ot) to Bjm(t).
        log precomputed
        '''   
#         return
        
        if self.usePersistentFiles and os.path.exists(self.PATH_BMAP):
            
            self.logger.info("loading probs all observations from {}".format(self.PATH_BMAP))
 
            self.B_map = numpy.loadtxt(self.PATH_BMAP)
            # check length
            if self.B_map.shape[1]  == len(observations)  and self.B_map.shape[0] == self.n:
#                 sys.exit('{} does not store all feature vectors. delete it and generate them again'.format(self.PATH_BMAP))
                
                self.B_map = numpy.log( self.B_map)
                return     
            else:
                self.logger.info("file {} found, but has not the expected num of states {} or observations {}".format(self.PATH_BMAP, self.n, len(observations)) )
       
        self.B_map = numpy.zeros( (self.n,len(observations)), dtype=self.precision)
        self.Bmix_map = numpy.zeros( (self.n,self.m,len(observations)), dtype=self.precision)
        
        for j in xrange(self.n):
            for t in xrange(len(observations)):
                self.logger.debug("at calcbjt at state {} and time {}...\n".format(j, t))
                lik = self._calcbjt(j, t, observations[t])
              
                if lik == 0: 
                    self.logger.debug("obs likelihood at time {} for state {} = 0. Repair by adding {}".format(t,j, MINIMAL_PROB))
                    lik = MINIMAL_PROB
                  
                self.B_map[j,t] = lik
  

        # normalize over states
        for t in xrange(len(observations)):
             self.B_map[:,t] = _normalize(self.B_map[:,t])
             self.logger.debug("sum={} at time {}".format(sum(self.B_map[:,t]), t))
             
        if self.usePersistentFiles:        
            writeListOfListToTextFile(self.B_map, None , self.PATH_BMAP)                 

        self.B_map = numpy.log( self.B_map)
def tokenList2TabFile( listTsAndPhonemes,  baseNameAudioFile, whichSuffix):
    '''
    convenience method. 
    '''
    
    # timeshift
#     for index in range(len(listTsAndPhonemes)):
#         listTsAndPhonemes[index][0] = listTsAndPhonemes[index][0] + timeShift
#         if (len(listTsAndPhonemes[index]) == 3): 
#             del listTsAndPhonemes[index][1]
        
    phonemeAlignedfileName = baseNameAudioFile + whichSuffix
    
    writeListOfListToTextFile(listTsAndPhonemes, 'startTs endTs phonemeOrWord\n', phonemeAlignedfileName)
    logging.debug('phoneme level alignment written to file: ',  phonemeAlignedfileName)
    return phonemeAlignedfileName
Example #8
0
    def _viterbiForcedDur(self, observations):
        # sanity check. make sure durations are init from score
      
        
        print "decoding..."
        for t in range(self.R_MAX,len(observations)):                          
            for currState in xrange(1, self.n):
                self.computePhi(t, currState) # get max duration quantities
                  

#         for t in range(self.R_MAX+19, self.R_MAX+20):                          
#             for currState in xrange(57, 59):
#                 self.computePhi(t, currState) # get max duration quantities
        
        writeListOfListToTextFile(self.phi, None , PATH_LOGS + '/phi') 
            
        # return for backtracking
        return  self.chi, self.psi
Example #9
0
    def _initBeginingPhis(self, lenObservations):
        '''
        init phis when t < self.R_MAX
        '''
        
        self._initKappas(lenObservations)
        
         # for convenience put as class vars
        self.phi = numpy.empty((lenObservations,self.n),dtype=self.precision)
        self.phi.fill(-Infinity)
        
#         self.phi = numpy.loadtxt(PATH_LOGS + '/phi_init', dtype=self.precision)
#         return
         
        # init t=0
#         for currState in range(self.n): self.phi[0,currState] = self.kappas[currState,0]
        self.phi[0,:] = self.kappas[0,:]
        
        # init first state = kappa (done to allow self.getMaxPhi_slow  to access prev. currState)
        self.phi[:len(self.kappas[:,0]),0] = self.kappas[:,0]        
        
      
        # select bigger (kappa and phi_star)
        for t in  range(1,int(self.R_MAX)):
            self.logger.debug("at time t={}".format(t) )          
            # phi start makes sence only from second state 
            for currState in range(1, self.n): 
                
                phiStar, fromState, maxDurIndex = self.computePhiStar(t, currState)
                
                # take bigger : eq:deltaStarOrKappa
                if  phiStar > self.kappas[t,currState] :
                    self.phi[t,currState] = phiStar
                    self.psi[t,currState] = fromState 
                    self.chi[t,currState] = maxDurIndex
                else:
                    self.logger.debug( " kappa more than phi at time {} and state {}".format(t, currState))                        
                    self.phi[t, currState] = self.kappas[t, currState]
                    # kappas mean still at beginning state
                    self.psi[t,currState] = currState
                    self.chi[t,currState] = t
                    
        
        writeListOfListToTextFile(self.phi, None , PATH_LOGS + '/phi_init') 
def _mlf2PraatFormat( listTsAndPhonemes, timeShift, baneNameAudioFile, whichSuffix):
    
    # timeshift
    for index in range(len(listTsAndPhonemes)):
        listTsAndPhonemes[index][0] = listTsAndPhonemes[index][0] + timeShift
        if (len(listTsAndPhonemes[index]) == 3): 
            del listTsAndPhonemes[index][1]
        
    phonemeAlignedfileName = baneNameAudioFile + whichSuffix
    
    writeListOfListToTextFile(listTsAndPhonemes, 'startTs phonemeOrWord\n', phonemeAlignedfileName)
    logger.debug('phoneme level alignment written to file: ',  phonemeAlignedfileName)
    return phonemeAlignedfileName

    
    

  
                    

    '''
Example #11
0
def getReferenceDurations(URI_recording_noExt, decoder, evalLevel):
        '''
        timestamps of words according to reference durations read from score. Used to obtain so called 'score-deviation' metric
        not used in decoding 
        '''
        
        annotationURI = URI_recording_noExt + ANNOTATION_EXT

        ##### get duration of initial silence 

        try:
            annotationTokenListA = TextGrid2WordList(annotationURI, evalLevel)     
            
            # just copy duration of silence in groundTruth 
            annoTsAndToken =  annotationTokenListA[0]
            if annoTsAndToken[2] != "" and not(annoTsAndToken[2].isspace()): # skip empty phrases
                    logger.warn("annotaiton {} starts with non-sil token ".format(annotationURI))
                    finalSilFram =  float(annoTsAndToken[0]) * NUM_FRAMES_PERSECOND
            else:
                finalSilFram = float(annoTsAndToken[1]) * NUM_FRAMES_PERSECOND
            
        
        except :
        # if no Gr Truth annotation file (or needed layer) present - take from model    
            finalSilFram = 0
            countFirstStateFirstWord = decoder.lyricsWithModels.listWords[0].syllables[0].phonemes[0].numFirstState
             
            for i in range(countFirstStateFirstWord):
                finalSilFram += decoder.lyricsWithModels.statesNetwork[i].getDurationInFrames()
        
            
        grTruthWordList = expandlyrics2Words (decoder.lyricsWithModels, decoder.lyricsWithModels.statesNetwork, finalSilFram,  _constructTimeStampsForWord)
        grTruthDurationfileExtension = '.grTruthDuration'
        writeListOfListToTextFile(grTruthWordList, None , URI_recording_noExt + grTruthDurationfileExtension )
        
#     TODO: could be done easier with this code, and check last method in Word
#         grTruthWordList =    testT(decoder.lyricsWithModels)
        return grTruthWordList
Example #12
0
def testReadListOfListTextFile_gen():
        URIfile = '/Users/joro/Downloads/kimseye-annotation-score-to-audio.txt'
        
        shiftedNakarat = []
        
        inNakarat = 0
        detectedTokenList = readListOfListTextFile_gen(URIfile)
        
        
        # get TS 
        for entry in detectedTokenList:
            if entry[3] == 'D5-NAKARAT-n1':
               beginTs = entry[0]
            if entry[3] == 'D5-NAKARAT*-n1':
                endTs = entry[0]
                break
        
        
        endTs = 111.687981859 
        for entry in detectedTokenList:
            if entry[3] == 'D5-NAKARAT-n1':
               inNakarat = 1
            if entry[3] == 'D5-NAKARAT*-n1':
                inNakarat = 0
                break
            
            if inNakarat:
                entry[0] += (endTs - beginTs)
                shiftedNakarat.append(entry)
        
        writeListOfListToTextFile(shiftedNakarat,None, 'shiftedNakarat.txt', toFlip=False)        
               
        
            
            
        print detectedTokenList