Ejemplo n.º 1
0
def prepare_dataloaders(hparams, distributed_run=False):
    # Get data, data loaders and collate function ready
    if hparams.use_basic_handler:
        text_handler = Handler(hparams.charset)
    else:
        text_handler = Handler.from_charset(hparams.charset,
                                            data_dir="data",
                                            silent=True)

    trainset = TextMelLoader(text_handler, hparams.training_files, hparams)
    valset = TextMelLoader(text_handler, hparams.validation_files, hparams)
    collate_fn = TextMelCollate(hparams.n_frames_per_step)

    if distributed_run:
        train_sampler = DistributedSampler(trainset)
    else:
        train_sampler = CustomSampler(trainset, hparams.batch_size,
                                      hparams.shuffle, hparams.optimize,
                                      hparams.len_diff)

    train_loader = DataLoader(trainset,
                              num_workers=1,
                              sampler=train_sampler,
                              batch_size=hparams.batch_size,
                              pin_memory=False,
                              drop_last=False,
                              collate_fn=collate_fn)
    return train_loader, valset, collate_fn
Ejemplo n.º 2
0
def english():
    handler = Handler("en")
    text = "Peter Piper picked a peck of pickled peppers;      A peck of pickled peppers Peter Piper picked"
    target = "peter piper picked a peck of pickled peppers; a peck of pickled peppers peter piper picked"

    result = handler.process_text(text,
                                  cleaners="light_punctuation_cleaners",
                                  keep_delimiters=False)

    assert result == target
Ejemplo n.º 3
0
def prepare_dataloaders(hparams, distributed_run=False):
    # Get data, data loaders and collate function ready
    assert isinstance(hparams.text_handler_cfg, str)
    text_handler = Handler.from_config(hparams.text_handler_cfg)
    text_handler.out_max_length = None
    assert text_handler.charset.value == hparams.charset

    trainset = TextMelLoader(text_handler, hparams.training_files, hparams)
    valset = TextMelLoader(text_handler, hparams.validation_files, hparams)
    collate_fn = TextMelCollate(hparams.n_frames_per_step)

    if distributed_run:
        train_sampler = DistributedSampler(trainset)
    else:
        train_sampler = CustomSampler(trainset, hparams.batch_size,
                                      hparams.shuffle, hparams.optimize,
                                      hparams.len_diff)

    train_loader = DataLoader(trainset,
                              num_workers=1,
                              sampler=train_sampler,
                              batch_size=hparams.batch_size,
                              pin_memory=False,
                              drop_last=False,
                              collate_fn=collate_fn)
    return train_loader, valset, collate_fn
Ejemplo n.º 4
0
def russian():
    handler = Handler.from_charset("ru", data_dir=data_dir, silent=True)
    text = "В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! " \
           "Ежик испытвал стресс, потому что под елкой не было грибочка. " \
           "Но детектив нашел его для ежика. Это столило ежику триста руб. " \
           "Синтез речи     -    это     увлекательно."
    target = "в ч+ащах +юга ж+ил бы ц+итрус? да, но фальш+ивый экземпл+яр! " \
             "+ёжик испытвал стр+эсс, потому чт+о п+од +ёлкой не было гриб+очка. " \
             "но дэтэкт+ив наш+ёл ег+о дл+я +ёжика. +это столило +ёжику тр+иста рубл+ей. " \
             "с+интэз р+ечи — +это увлек+ательно."

    user_dict = {"руб": "рублей"}

    result = handler.process_text(text,
                                  cleaners="light_punctuation_cleaners",
                                  user_dict=user_dict,
                                  keep_delimiters=False)
    assert result == target

    vector = handler.text2vec(result)
    assert handler.vec2text(vector) == result

    n = 100
    t1 = time()
    for _ in range(n):
        result = handler.process_text(text,
                                      cleaners="light_punctuation_cleaners",
                                      user_dict=user_dict,
                                      keep_delimiters=False)
        vector = handler.text2vec(result)
        handler.vec2text(vector)

    print("Handler processing time (text length is {} symbols): {}".format(
        len(text), (time() - t1) / n))
Ejemplo n.º 5
0
def _load_text_handler(config_dict):
    logger.info("Loading text handler")

    out_max_length = config_dict["out_max_length"]

    config = config_dict["config"]
    assert config is not None

    if config in Charset._member_names_:
        handler = Handler.from_charset(config, out_max_length, silent=True)
    else:
        handler_config = Synthesizer.load_config(config)
        handler_config["handler"]["out_max_length"] = out_max_length

        handler = Handler.from_config(handler_config)

    return handler
Ejemplo n.º 6
0
def _load_text_handler(config_dict):
    logger.info("Loading text handler")

    out_max_length = config_dict["out_max_length"]

    config = config_dict["config"]
    assert config is not None

    handler = Handler.from_charset(config, out_max_length, silent=True)

    return handler
Ejemplo n.º 7
0
def _load_text_handler(config_dict):
    logger.info("Loading text handler")

    out_max_length = config_dict["out_max_length"]

    config_path = config_dict["config_path"]
    assert config_path is not None

    handler_config = Synthesizer.load_config(config_dict["config_path"])
    handler_config["handler"]["out_max_length"] = out_max_length

    return Handler.from_config(handler_config)