def txt2label(txt, sfsfile=None, style='default'):
    '''Return a generator of HTS format label of txt.

    Args:
        txt: like raw txt "向香港特别行政区同胞澳门台湾同胞"
             or txt with prosody make like "向#1香港#2特别行政区#1同胞#3澳门台湾#1同胞",
             punctuation is also allow in txt
        sfsfile: absolute path of sfs file (alignment file). A sfs file
            example(measure time by 10e-7 second, 12345678 means 1.2345678
            second)
            --------
            239100 s
            313000 a
            323000 d
            400000 b
            480000 s
            ---------
            a stands for consonant
            b stands for vowel
            d stands for silence that is shorter than 100ms
            s stands for silence that is longer than 100ms
        style: label style, currently only support the default HTS format

    Return:
        A generator of phone label for the txt, convenient to save as a label file
    '''
    assert style == 'default', 'Currently only default style is support in txt2label'

    # delete all character which is not number && alphabet && chinese word
    txt = re.sub(r'(?!#)\W', '', txt)

    # If txt with prosody mark, use prosody mark,
    # else use jieba position segmetation
    if '#' in txt:
        words, poses, rhythms = _adjust(txt)
    else:
        txt = re.sub('[,.,。]', '#4', txt)
        words = []
        poses = []
        for word, pos in posseg.cut(txt):
            words.append(word)
            poses.append(pos[0])
        rhythms = ['#0'] * (len(words) - 1)
        rhythms.append('#4')

    syllables = txt2pinyin(''.join(words))

    phone_num = 0
    for syllable in syllables:
        phone_num += len(syllable)  # syllable is like ('b', 'a3')

    if sfsfile:
        phs_type = []
        times = ['0']
        with open(sfsfile) as fid:
            for line in fid.readlines():
                line = line.strip().rstrip('\n')
                assert len(line.split(' ')) == 2, 'check format of sfs file'
                time, ph = line.split(' ')
                times.append(int(float(time)))
                phs_type.extend(ph)
    else:
        phs_type = []
        for i, rhythm in enumerate(rhythms):
            single_word_pinyin = txt2pinyin(words[i])
            single_word_phone_num = sum(
                [len(syllable) for syllable in single_word_pinyin])
            phs_type.extend(['a'] * single_word_phone_num)
            if i != (len(rhythms) - 1) and rhythm == '#4':
                phs_type.append('s')
        '''
        phs_type = ['a'] * phone_num
        '''
        phs_type.insert(0, 's')
        phs_type.append('s')
        times = [0] * (len(phs_type) + 1)
    '''
    for item in words:
        print(item)

    print ('words: ', words)
    print ('rhythms: ',rhythms)
    print ('syllables: ', syllables)
    print ('poses: ', poses)
    print ('phs_type: ', phs_type)
    print ('times: ', times)
    '''

    phone = tree(words, rhythms, syllables, poses, phs_type)
    return LabGenerator(phone, rhythms, times)
Esempio n. 2
0
def txt2label(txt, wavfile=None, sfsfile=None, style='default'):
    '''Return a generator of HTS format label of txt.
    
    If only input txt without prosody mark and wavfile, it also 
    return label (but without prosody and wav information)
    
    Args:
        txt: like raw txt "向香港特别行政区同胞澳门台湾同胞"
             or txt with prosody make like "向#1香港#2特别行政区#1同胞#3澳门台湾#1同胞",
             punctuation is also allow in txt
        wavfile: absolute path of wav file for txt
        sfsfile: absolute path of sfs file (alignment file). A sfs file
            example(measure time by 10e-7 second, 12345678 means 1.2345678
            second)
            --------
            239100 s 
            313000 a 
            323000 d
            400000 b 
            480000 s 
            ---------
            a stands for consonant
            b stands for vowel
            d stands for silence that is shorter than 100ms
            s stands for silence that is longer than 100ms
        style: label style, currently only support the default HTS format
        
    Return:
        A generator of phone label for the txt, convenient to save as a label file
    '''
    # If txt with prosody mark, use prosody mark,
    # else use jieba position segmetation
    assert wavfile == None, 'wavfile currently is not supported'
    assert style == 'default', 'Currently only default style is support in txt2label'

    if '#' in txt:
        words, poses, rhythms = _adjust(txt)
    else:
        words = []
        poses = []
        for word, pos in posseg.cut(txt):
            words.append(word.encode('utf-8'))
            poses.append(pos[0].encode('utf-8'))
        rhythms = ['#0'] * (len(words) - 1)
        rhythms.append('#4')

    syllables = txt2pinyin(''.join(words))

    if sfsfile:
        phs_type = []
        times = ['0']
        with open(sfsfile) as fid:
            for line in fid.readlines():
                line = line.strip().rstrip('\n')
                assert len(line.split(' ')) == 2, 'check format of sfs file'
                time, ph = line.split(' ')
                times.append(int(time))
                phs_type.extend(ph)
    else:
        length = 0
        for syllable in syllables:
            length += len(syllable)
        phs_type = ['a'] * length
        phs_type.insert(0, 's')
        phs_type.append('s')
        times = [0] * (length + 3)
    phone = tree(words, rhythms, syllables, poses, phs_type)
    return LabGenerator(phone, rhythms, times)
Esempio n. 3
0
def txt2label(txt, sfsfile=None, style='default'):
    '''Return a generator of HTS format label of txt.
    
    Args:
        txt: like raw txt "向香港特别行政区同胞澳门台湾同胞"
             or txt with prosody make like "向#1香港#2特别行政区#1同胞#3澳门台湾#1同胞",
             punctuation is also allow in txt
        sfsfile: absolute path of sfs file (alignment file). A sfs file
            example(measure time by 10e-7 second, 12345678 means 1.2345678
            second)
            --------
            239100 s 
            313000 a 
            323000 d
            400000 b 
            480000 s 
            ---------
            a stands for consonant
            b stands for vowel
            d stands for silence that is shorter than 100ms
            s stands for silence that is longer than 100ms
        style: label style, currently only support the default HTS format
        
    Return:
        A generator of phone label for the txt, convenient to save as a label file
    '''
    assert style == 'default', 'Currently only default style is support in txt2label'

    # del all Chinese punctuation
    # punctuation = "·!?。"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏."
    # txt = re.sub(r'[%s]'%punctuation, '', txt)

    # delete all character which is not number && alphabet && chinese word
    txt = re.sub(r'\W', '', txt)

    # If txt with prosody mark, use prosody mark,
    # else use jieba position segmetation
    if '#' in txt:
        words, poses, rhythms = _adjust(txt)
    else:
        words = []
        poses = []
        for word, pos in posseg.cut(txt):
            words.append(word)
            poses.append(pos[0])
        rhythms = ['#0'] * (len(words) - 1)
        rhythms.append('#4')

    syllables = txt2pinyin(''.join(words))

    phone_num = 0
    for syllable in syllables:
        phone_num += len(syllable)

    if sfsfile:
        phs_type = []
        times = ['0']
        with open(sfsfile) as fid:
            for line in fid.readlines():
                line = line.strip().rstrip('\n')
                assert len(line.split(' ')) == 2, 'check format of sfs file'
                time, ph = line.split(' ')
                times.append(int(float(time)))
                phs_type.extend(ph)
    else:
        length = 0
        for syllable in syllables:
            length += len(syllable)
        phs_type = ['a'] * phone_num
        phs_type.insert(0, 's')
        phs_type.append('s')
        times = [0] * (phone_num + 3)
    '''
    for item in words:
        print(item)

    print (words)
    print (rhythms)
    print (syllables)
    print (poses)
    print (phs_type)
    '''

    phone = tree(words, rhythms, syllables, poses, phs_type)
    return LabGenerator(phone, rhythms, times)