Ejemplo n.º 1
0
def create_lab_input(txt_file, speaker_ident):
    seq = []
    fin = open(txt_file, 'r')
    line = fin.readline().strip().replace('\t', ' ')
    while True:
        nl = line.replace('  ', ' ')
        if nl == line:
            break
        line = nl

    # fout.write('START\n')
    from io_modules.dataset import PhoneInfo

    seq.append(PhoneInfo('START', [], 0, 0))
    # sys.stdout.write('START\n')
    for char in line:
        l_char = char.lower()
        style = 'CASE:lower'
        if l_char == l_char.upper():
            style = 'CASE:symb'
        elif l_char != char:
            style = 'CASE:upper'
        speaker = 'SPEAKER:' + speaker_ident
        seq.append(PhoneInfo(l_char, [speaker, style], 0, 0))
        # sys.stdout.write(l_char + '\t' + speaker + '\t' + style + '\n')

    seq.append(PhoneInfo('STOP', [], 0, 0))
    # sys.stdout.write('STOP\n')

    fin.close()
    return seq
Ejemplo n.º 2
0
def get_phone_input_from_text(text, speaker_identity):
    from io_modules.dataset import PhoneInfo

    seq = [PhoneInfo('START', [], 0, 0)]

    for char in text:
        l_char = char.lower()
        style = 'CASE:lower'
        if l_char == l_char.upper():
            style = 'CASE:symb'
        elif l_char != char:
            style = 'CASE:upper'
        speaker = 'SPEAKER:' + speaker_identity
        seq.append(PhoneInfo(l_char, [speaker, style], 0, 0))

    seq.append(PhoneInfo('STOP', [], 0, 0))

    return seq
Ejemplo n.º 3
0
def get_phone_input_from_text(text, speaker_identity, g2p=None):
    from io_modules.dataset import PhoneInfo
    speaker = 'SPEAKER:' + speaker_identity
    seq = [PhoneInfo('START', [], 0, 0)]
    if g2p is not None:
        w = ''
        for char in text:
            l_char = char.lower()
            if l_char == l_char.upper():  # symbol
                # append word, then symbol
                if w.strip() != '':
                    transcription = g2p.transcribe(w)
                    first = True
                    for phon in transcription:
                        if first and w[0].upper() == w[0]:
                            style = 'CASE:upper'
                            first = False
                        else:
                            style = 'CASE:lower'

                        # fout.write(phon + '\t' + speaker + '\t' + style + '\n')
                        seq.append(PhoneInfo(phon, [speaker, style], 0, 0))
                w = ''
                # fout.write(l_char + '\t' + speaker + '\tCASE:symb\n')
                seq.append(PhoneInfo(l_char, [speaker, "CASE:symb"], 0, 0))
            else:
                w += l_char
        if w.strip() != '':
            transcription = g2p.transcribe(w)
            first = True
            for phon in transcription:
                if first and w[0].upper() == w[0]:
                    style = 'CASE:upper'
                    first = False
                else:
                    style = 'CASE:lower'

                # fout.write(phon + '\t' + speaker + '\t' + style + '\n')
                seq.append(PhoneInfo(phon, [speaker, style], 0, 0))
            w = ''
    else:
        for char in text:
            l_char = char.lower()
            style = 'CASE:lower'
            if l_char == l_char.upper():
                style = 'CASE:symb'
            elif l_char != char:
                style = 'CASE:upper'

            seq.append(PhoneInfo(l_char, [speaker, style], 0, 0))

    seq.append(PhoneInfo('STOP', [], 0, 0))

    return seq