Ejemplo n.º 1
0
def text2symbols(text, dtype):
    clean_char = custom_english_cleaners(text.rstrip()).rstrip()
    if clean_char[-1] in ['.', ',']:
        while clean_char[-1] in ['.', ',']:
            clean_char = clean_char[:-1]
        clean_char = clean_char + '.'
    elif clean_char[-1] in ['!', '?']:
        clean_char = clean_char
    else:
        clean_char = clean_char + '.'

    if dtype == 'char':
        return clean_char

    clean_phone = []
    for s in g2p(clean_char.lower()):
        if (s in [',', '!', '.', '?', "'"]) and (clean_phone[-1] == ' '):
            clean_phone.pop()
            clean_phone.append(s)

        elif '@' + s in symbol_to_id:
            clean_phone.append('@' + s)

        else:
            clean_phone.append(s)

    return clean_phone
Ejemplo n.º 2
0
    def frontend(self, text):
        """Clean text and then convert to id sequence."""
        text = custom_english_cleaners(text)

        charseq = list(text)
        idseq = []
        for c in charseq:
            if c.isspace():
                idseq += [self.char_to_id["<space>"]]
            elif c not in self.char_to_id.keys():
                idseq += [self.char_to_id["<unk>"]]
            else:
                idseq += [self.char_to_id[c]]
        idseq += [self.idim - 1]  # <eos>
        return torch.LongTensor(idseq).view(-1).to(self.device)
Ejemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--lang_tag", type=str, default=None, nargs="?",
                        help="language tag (can be used for multi lingual case)")
    parser.add_argument("--spk_tag", type=str,
                        help="speaker tag")
    parser.add_argument("jsons", nargs="+", type=str,
                        help="*_mls.json filenames")
    parser.add_argument("out", type=str,
                        help="output filename")
    parser.add_argument("trans_type", type=str, default="phn",
                        choices=["char", "phn"],
                        help="Input transcription type")
    args = parser.parse_args()

    dirname = os.path.dirname(args.out)
    if len(dirname) != 0 and not os.path.exists(dirname):
        os.makedirs(dirname)

    with codecs.open(args.out, "w", encoding="utf-8") as out:
        for filename in sorted(args.jsons):
            with codecs.open(filename, "r", encoding="utf-8") as f:
                js = json.load(f)
            for key in sorted(js.keys()):
                uid = args.spk_tag + "_" + key.replace(".wav", "")
                
                content = js[key]["clean"]
                text = custom_english_cleaners(content.rstrip())
                if args.trans_type == "phn":
                    clean_content = text.lower()
                    text = g2p(clean_content)

                if args.lang_tag is None:
                    line = "%s %s \n" % (uid, text)
                else:
                    line = "%s <%s> %s\n" % (uid, args.lang_tag, text)
                out.write(line)
Ejemplo n.º 4
0
    f_g2p("")
except ImportError:
    raise ImportError("g2p_en is not installed. please run `. ./path.sh && pip install g2p_en`.")
except LookupError:
    # NOTE: we need to download dict in initial running
    nltk.download("punkt")


def g2p(text):
    """Convert grapheme to phoneme."""
    tokens = filter(lambda s: s != " ", f_g2p(text))
    return ' '.join(tokens)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('text', type=str, help='text to be cleaned')
    parser.add_argument("trans_type", type=str, default="kana",
                        choices=["char", "phn"],
                        help="Input transcription type")
    args = parser.parse_args()
    with codecs.open(args.text, 'r', 'utf-8') as fid:
        for line in fid.readlines():
            id, _, content = line.split("|")
            clean_content = custom_english_cleaners(content.rstrip())
            if args.trans_type == "phn":
                text = clean_content.lower()
                clean_content = g2p(text)

            print("%s %s" % (id, clean_content))
Ejemplo n.º 5
0
root_dir = '/hd0/dataset/VCTK/VCTK-Corpus/wav48'
data_dir = '/hd0/speech-aligner/preprocessed/VCTK20_engspks'

os.makedirs(data_dir, exist_ok=True)
os.makedirs(os.path.join(data_dir, 'char_seq'), exist_ok=True)
os.makedirs(os.path.join(data_dir, 'phone_seq'), exist_ok=True)
os.makedirs(os.path.join(data_dir, 'melspectrogram'), exist_ok=True)

g2p = G2p()
metadata = {}
with codecs.open(csv_file, 'r', 'utf-8') as fid:
    for line in fid.readlines():
        id, text, spk = line.split("|")
        id = os.path.splitext(id)[0]

        clean_char = custom_english_cleaners(text.rstrip())
        clean_phone = []
        for s in g2p(clean_char.lower()):
            if '@' + s in symbol_to_id:
                clean_phone.append('@' + s)
            else:
                clean_phone.append(s)

        metadata[id] = {'char': clean_char, 'phone': clean_phone}

stft = TacotronSTFT(filter_length=1024,
                    hop_length=256,
                    win_length=1024,
                    n_mel_channels=80,
                    sampling_rate=16000,
                    mel_fmin=55.0,
Ejemplo n.º 6
0
                        choices=["char", "phn"],
                        help="Input transcription type")
    parser.add_argument("--lowercase",
                        type=bool,
                        default=False,
                        help="Lower case the result or not")
    args = parser.parse_args()

    # clean every line in transcription file first
    with codecs.open(args.transcription_path, 'r', 'utf-8') as fid:
        for line in fid.read().splitlines():
            segments = line.split(" ")

            # clean contents
            content = ' '.join(segments[:-1])
            clean_content = custom_english_cleaners(content)

            # get id by taking off the parentheses
            id = segments[-1][1:-1]

            if args.trans_type == "phn":
                text = clean_content.lower()
                clean_content = g2p(text)

            if args.lowercase:
                clean_content = clean_content.lower()

            if args.lang_tag == "":
                print("{} {}".format(id, clean_content))
            else:
                print("{} {}".format(