Example #1
0
def divideIntoSentencesFromAnno(annotationURI):
    '''
    infer section/line timestamps from annotation-textgrid, 
    parse divison into sentences from Tier 'lines' and load its syllables corresponding by timestamps 
    '''
    
    whichLevel = 5 # read lines (sentences) tier
    annotationTokenList, annotationLinesListNoPauses =  readNonEmptyTokensTextGrid(annotationURI, whichLevel, 0, -1)
    
    whichLevel = 3 # read syllables as pinyin 
    syllablesList = TextGrid2WordList(annotationURI, whichLevel)
    annotationTokenList, syllablesList =  readNonEmptyTokensTextGrid(annotationURI, whichLevel, 0, -1)

    syllablePointer = 0
    
    listSentences = []
    for currSentence in annotationLinesListNoPauses:
        currSectionSyllables = []
        currSentenceBegin = currSentence[0] 
        currSentenceEnd = currSentence[1]
         
        while syllablesList[syllablePointer][0] < currSentenceBegin: # search for beginning
             syllablePointer += 1
        if not syllablesList[syllablePointer][0] == currSentenceBegin: # start has to be aligned 
            sys.exit("no syllable starting at sentence start at {}  ".format(currSentenceBegin) )
        
        fromSyllableIdx = syllablesList[syllablePointer][3]
        while syllablePointer < len(syllablesList) and float(syllablesList[syllablePointer][1]) <= currSentenceEnd: # syllables in currSentence
            isEndOfSentence, syllableTxt = stripPunctuationSings(syllablesList[syllablePointer][2])
            currSyllable = SyllableJingju(syllableTxt, -1)
            currSyllable.setDurationInMinUnit(1)
            currSectionSyllables.append(currSyllable)
            syllablePointer += 1
        if not syllablesList[syllablePointer-1][1] == currSentenceEnd: # end has to be aligned 
            sys.exit("no syllable ending at sentence end at {}  ".format(currSentenceEnd) )
        toSyllableIdx = syllablesList[syllablePointer-1][3]
        
        listSentences.append(( currSentenceBegin, currSentenceEnd, fromSyllableIdx, toSyllableIdx, currSectionSyllables))

     
    return listSentences
Example #2
0
def divideIntoSentencesFromAnnoOld(annotationURI):
        '''
        infer section/line timestamps from annotation-textgrid, 
        use punctuation as marker for sentence ends
        @deprecated
        '''
#         whichLevel = 5 # line
        whichLevel = 3 # pinyin
        annotationTokenList, annotationTokenListNoPauses =  readNonEmptyTokensTextGrid(annotationURI, whichLevel, 0, -1)

        
        currSectionSyllables =  []
        listSentences = []
        
        i = 0
        currSectionStartTime = annotationTokenListNoPauses[i][0]
        fromSyllable = annotationTokenListNoPauses[i][3]
        
        for i in range(len(annotationTokenListNoPauses)):
            
            token = annotationTokenListNoPauses[i]
            isEndOfSentence, token[2] = stripPunctuationSings(token[2])
            if isEndOfSentence:
                currSyllable = SyllableJingju(token[2], -1)
                currSyllable.setDurationInMinUnit(1)
                currSectionSyllables.append(currSyllable)
                
                currSectionEndTime = token[1]
                toSyllable = token[3]
                listSentences.append(( currSectionStartTime, currSectionEndTime, fromSyllable, toSyllable, currSectionSyllables))
                
                # start next section
                currSectionSyllables =  []
                if i != len(annotationTokenListNoPauses)-1:
                    currSectionStartTime = annotationTokenListNoPauses[i+1][0]
                    fromSyllable = annotationTokenListNoPauses[i+1][3]
                
            else: # syllable not at end of sentence
                currSyllable = SyllableJingju(token[2], -1)
                currSyllable.setDurationInMinUnit(1)
                currSectionSyllables.append(currSyllable)
        return listSentences
Example #3
0
def createSyllables(annotationURI, fromSyllable, toSyllable):
    '''
    @param refSyllableDuration: its value does not matter. important is that all syllables are assigned same relative duration.
    
    create Syllables, assign their durations in refSyllableDuration
    
    @return: lyrics - created lyrics oboject
    '''
    listSyllables = []
    
    annotationTokenList, annotationTokenListNoPauses =  readNonEmptyTokensTextGrid(annotationURI, 3, fromSyllable, toSyllable)
    
    
    for tsAndSyll in annotationTokenListNoPauses:
        currSyllable = SyllableJingju(tsAndSyll[2], -1)
        currSyllable.setDurationInMinUnit(1)
        listSyllables.append(currSyllable)
    
    
    
    
    return listSyllables