コード例 #1
0
ファイル: eval.py プロジェクト: geneing/TTS
 def load_model(self, model_path, model_config, wavernn_path, use_cuda):
     
     self.model_file = model_path
     print(" > Loading model ...")
     print(" | > model config: ", model_config)
     print(" | > model file: ", self.model_file)
     config = load_config(model_config)
     self.config = config
     self.use_cuda = use_cuda
     self.use_phonemes = config.use_phonemes
     self.ap = AudioProcessor(**config.audio)
     
     if self.use_phonemes:
         self.input_size = len(phonemes)
         self.input_adapter = lambda sen: phoneme_to_sequence(sen, [self.config.text_cleaner], self.config.phoneme_language)
     else:
         self.input_size = len(symbols)
         self.input_adapter = lambda sen: text_to_sequence(sen, [self.config.text_cleaner])
     
     self.model = Tacotron(self.input_size, config.embedding_size, self.ap.num_freq, self.ap.num_mels, config.r, attn_windowing=True)
     self.model.decoder.max_decoder_steps = 8000
     # load model state
     if use_cuda:
         cp = torch.load(self.model_file)
     else:
         cp = torch.load(self.model_file, map_location=lambda storage, loc: storage)
     # load the model
     self.model.load_state_dict(cp['model'])
     if use_cuda:
         self.model.cuda()
     self.model.eval()
     self.vocoder=WaveRNNVocoder.Vocoder()
     self.vocoder.loadWeights(wavernn_path)
     self.firwin = signal.firwin(1025, [65, 7600], pass_zero=False, fs=16000)
コード例 #2
0
ファイル: synthesizer.py プロジェクト: tuanad121/TTS
 def load_tts(self, model_path, model_file, model_config, use_cuda):
     tts_config = os.path.join(model_path, model_config)
     self.model_file = os.path.join(model_path, model_file)
     print(" > Loading TTS model ...")
     print(" | > model config: ", tts_config)
     print(" | > model file: ", model_file)
     self.tts_config = load_config(tts_config)
     self.use_phonemes = self.tts_config.use_phonemes
     self.ap = AudioProcessor(**self.tts_config.audio)
     if self.use_phonemes:
         self.input_size = len(phonemes)
         self.input_adapter = lambda sen: phoneme_to_sequence(sen, [self.tts_config.text_cleaner], self.tts_config.phoneme_language, self.tts_config.enable_eos_bos_chars)
     else:
         self.input_size = len(symbols)
         self.input_adapter = lambda sen: text_to_sequence(sen, [self.tts_config.text_cleaner])
     self.tts_model = setup_model(self.input_size, self.tts_config)
     # load model state
     if use_cuda:
         cp = torch.load(self.model_file)
     else:
         cp = torch.load(self.model_file, map_location=lambda storage, loc: storage)
     # load the model
     self.tts_model.load_state_dict(cp['model'])
     if use_cuda:
         self.tts_model.cuda()
     self.tts_model.eval()
     self.tts_model.decoder.max_decoder_steps = 3000
コード例 #3
0
ファイル: LJSpeech.py プロジェクト: unoffices/TTS
    def __init__(self,
                 csv_file,
                 root_dir,
                 outputs_per_step,
                 sample_rate,
                 text_cleaner,
                 num_mels,
                 min_level_db,
                 frame_shift_ms,
                 frame_length_ms,
                 preemphasis,
                 ref_level_db,
                 num_freq,
                 power,
                 min_seq_len=0):

        with open(csv_file, "r", encoding="utf8") as f:
            self.frames = [line.split('|') for line in f]
        self.root_dir = root_dir
        self.outputs_per_step = outputs_per_step
        self.sample_rate = sample_rate
        self.cleaners = text_cleaner
        self.min_seq_len = min_seq_len
        self.ap = AudioProcessor(sample_rate, num_mels, min_level_db,
                                 frame_shift_ms, frame_length_ms, preemphasis,
                                 ref_level_db, num_freq, power)
        print(" > Reading LJSpeech from - {}".format(root_dir))
        print(" | > Number of instances : {}".format(len(self.frames)))
        self._sort_frames()
コード例 #4
0
    def load_model(self, model_path, model_name, model_config, use_cuda):
        model_config = os.path.join(model_path, model_config)
        self.model_file = os.path.join(model_path, model_name)
        print(" > Loading model ...")
        print(" | > model config: ", model_config)
        print(" | > model file: ", self.model_file)
        config = load_config(model_config)
        self.config = config
        self.use_cuda = use_cuda
        self.use_phonemes = config.use_phonemes
        self.ap = AudioProcessor(**config.audio)

        if self.use_phonemes:
            self.input_size = len(phonemes)
            self.input_adapter = lambda sen: phoneme_to_sequence(
                sen, [self.config.text_cleaner], self.config.phoneme_language)
        else:
            self.input_size = len(symbols)
            self.input_adapter = lambda sen: text_to_sequence(
                sen, [self.config.text_cleaner])

        self.model = Tacotron(self.input_size, config.embedding_size,
                              self.ap.num_freq, self.ap.num_mels, config.r)
        # load model state
        if use_cuda:
            cp = torch.load(self.model_file)
        else:
            cp = torch.load(self.model_file,
                            map_location=lambda storage, loc: storage)
        # load the model
        self.model.load_state_dict(cp['model'])
        if use_cuda:
            self.model.cuda()
        self.model.eval()
コード例 #5
0
 def load_tts(self, model_path, model_file, model_config, use_cuda):
     tts_config = os.path.join(model_path, model_config)
     self.model_file = os.path.join(model_path, model_file)
     print(" > Loading TTS model ...")
     print(" | > model config: ", tts_config)
     print(" | > model file: ", model_file)
     self.tts_config = load_config(tts_config)
     self.use_phonemes = self.tts_config.use_phonemes
     self.ap = AudioProcessor(**self.tts_config.audio)
     if self.use_phonemes:
         self.input_size = len(phonemes)
     else:
         self.input_size = len(symbols)
     # load speakers
     if self.config.tts_speakers is not None:
         self.tts_speakers = load_speaker_mapping(os.path.join(model_path, self.config.tts_speakers))
         num_speakers = len(self.tts_speakers)
     else:
         num_speakers = 0
     self.tts_model = setup_model(self.input_size, num_speakers=num_speakers, c=self.tts_config) 
     # load model state
     cp = torch.load(self.model_file)
     # load the model
     self.tts_model.load_state_dict(cp['model'])
     if use_cuda:
         self.tts_model.cuda()
     self.tts_model.eval()
     self.tts_model.decoder.max_decoder_steps = 3000
     if 'r' in cp and self.tts_config.model in ["Tacotron", "TacotronGST"]:
         self.tts_model.decoder.set_r(cp['r'])
コード例 #6
0
def main():
    ap = AudioProcessor()

    # load model
    num_chars = len(phonemes)
    model = Tacotron(num_chars).to(device)
    cp = torch.load(args.model_path)
    model.load_state_dict(cp['model'])
    model.eval()

    print('Text: {}'.format(args.text))
    wav = tts(model, args.text, ap)

    file_name = args.text.replace(' ', '_') + '.wav'
    out_path = os.path.join(args.out_path, file_name)
    ap.save_wav(wav, out_path)
コード例 #7
0
    def load_model(self, model_path, model_name, model_config, use_cuda):

        #build the config's path
        model_config = os.path.join(model_path, model_config)

        #build the model's path
        model_file = os.path.join(model_path, model_name)
        print(" > Loading model ...")
        print(" | > Model config path: ", model_config)
        print(" | > Model file path: ", model_file)

        config = load_config(model_config)
        self.use_cuda = use_cuda
        self.use_phonemes = config.use_phonemes
        self.ap = AudioProcessor(**config.audio)

        if self.use_phonemes:
            self.input_size = len(phonemes)
            self.input_adapter = lambda sen: phoneme_to_sequence(
                sen, [config.text_cleaner], config.phoneme_language)
        else:
            self.input_size = len(symbols)
            self.input_adapter = lambda sen: text_to_sequence(
                sen, [config.text_cleaner])

        self.model = Tacotron(num_chars=config['num_chars'],
                              embedding_dim=config['embedding_size'],
                              linear_dim=self.ap.num_freq,
                              mel_dim=self.ap.num_mels,
                              r=config['r'])

        #load model state
        if use_cuda:
            cp = torch.load(model_file)
        else:
            cp = torch.load(model_file,
                            map_location=lambda storage, loc: storage)

        #load the model
        self.model.load_state_dict(cp['model'])

        #if cuda is enabled & available move tensors to GPU
        if use_cuda:
            self.model.cuda()

        #disables normalization techniques present in code
        self.model.eval()
コード例 #8
0
 def load_model(self, model_path, model_name, model_config, use_cuda):
     model_config = os.path.join(model_path, model_config)
     self.model_file = os.path.join(model_path, model_name)
     print(" > Loading model ...")
     print(" | > model config: ", model_config)
     print(" | > model file: ", self.model_file)
     config = load_config(model_config)
     self.config = config
     self.use_cuda = use_cuda
     self.ap = AudioProcessor(**config.audio)
     self.model = Tacotron(61, config.embedding_size, self.ap.num_freq,
                           self.ap.num_mels, config.r)
     # load model state
     if use_cuda:
         cp = torch.load(self.model_file)
     else:
         cp = torch.load(self.model_file,
                         map_location=lambda storage, loc: storage)
     # load the model
     self.model.load_state_dict(cp['model'])
     if use_cuda:
         self.model.cuda()
     self.model.eval()
コード例 #9
0
        raise FileNotFoundError('{} not found'.format(metadata_file))

    melspec_dir = os.path.join(args.data_root, 'melspec')
    if not os.path.exists(melspec_dir):
        os.makedirs(melspec_dir, exist_ok=True)

    spec_dir = os.path.join(args.data_root, 'spec')
    if not os.path.exists(spec_dir):
        os.makedirs(spec_dir, exist_ok=True)

    phoneme_dir = os.path.join(args.data_root, 'phoneme')
    if not os.path.exists(phoneme_dir):
        os.makedirs(phoneme_dir, exist_ok=True)

    items = load_metadata(metadata_file)
    ap = AudioProcessor()

    for text, wav_file in tqdm(items):
        prefix = wav_file.replace('.wav', '')

        # 音素系列を生成
        generate_phoneme_sequence(text,
                                  os.path.join(phoneme_dir, prefix + '.npy'))

        wav = np.array(ap.load_wav(os.path.join(wav_dir, wav_file)),
                       dtype=np.float32)

        # メルスペクトログラムを生成
        melspec = ap.melspectrogram(wav).astype('float32')
        np.save(os.path.join(melspec_dir, prefix + '.npy'), melspec)
コード例 #10
0
ファイル: loader_tests.py プロジェクト: twerkmeister/oldtts
 def __init__(self, *args, **kwargs):
     super(TestTTSDataset, self).__init__(*args, **kwargs)
     self.max_loader_iter = 4
     self.ap = AudioProcessor(**c.audio)
コード例 #11
0
ファイル: train.py プロジェクト: marzus555/WaveRNN
    if args.data_path != "":
        CONFIG.data_path = args.data_path
    DATA_PATH = CONFIG.data_path

    # DISTRUBUTED
    if num_gpus > 1:
        init_distributed(
            args.rank,
            num_gpus,
            args.group_id,
            CONFIG.distributed["backend"],
            CONFIG.distributed["url"],
        )

    global ap
    ap = AudioProcessor(**CONFIG.audio)
    mode = CONFIG.mode

    # setup output paths and read configs
    _ = os.path.dirname(os.path.realpath(__file__))
    if args.data_path != "":
        CONFIG.data_path = args.data_path

    if args.output_path == "":
        OUT_PATH = os.path.join(_, CONFIG.output_path)
    else:
        OUT_PATH = args.output_path

    if args.group_id == "":
        OUT_PATH = create_experiment_folder(OUT_PATH, CONFIG.model_name)
コード例 #12
0
ファイル: train_gstnet.py プロジェクト: geneing/TTS
def main(args): #pylint: disable=redefined-outer-name
    # Audio processor
    ap = AudioProcessor(**c.audio)

    # DISTRUBUTED
    if num_gpus > 1:
コード例 #13
0
def main(args):
    dataset = importlib.import_module('datasets.' + c.dataset)
    Dataset = getattr(dataset, 'MyDataset')
    audio = importlib.import_module('utils.' + c.audio_processor)
    AudioProcessor = getattr(audio, 'AudioProcessor')

    ap = AudioProcessor(sample_rate=c.sample_rate,
                        num_mels=c.num_mels,
                        min_level_db=c.min_level_db,
                        frame_shift_ms=c.frame_shift_ms,
                        frame_length_ms=c.frame_length_ms,
                        ref_level_db=c.ref_level_db,
                        num_freq=c.num_freq,
                        power=c.power,
                        preemphasis=c.preemphasis)

    # Setup the dataset
    train_dataset = Dataset(c.data_path,
                            c.meta_file_train,
                            c.r,
                            c.text_cleaner,
                            ap=ap,
                            min_seq_len=c.min_seq_len)

    train_loader = DataLoader(train_dataset,
                              batch_size=c.batch_size,
                              shuffle=False,
                              collate_fn=train_dataset.collate_fn,
                              drop_last=False,
                              num_workers=c.num_loader_workers,
                              pin_memory=True)

    if c.run_eval:
        val_dataset = Dataset(c.data_path,
                              c.meta_file_val,
                              c.r,
                              c.text_cleaner,
                              ap=ap)

        val_loader = DataLoader(val_dataset,
                                batch_size=c.eval_batch_size,
                                shuffle=False,
                                collate_fn=val_dataset.collate_fn,
                                drop_last=False,
                                num_workers=4,
                                pin_memory=True)
    else:
        val_loader = None

    model = Tacotron(c.embedding_size, ap.num_freq, c.num_mels, c.r)
    print(" | > Num output units : {}".format(ap.num_freq), flush=True)

    optimizer = optim.Adam(model.parameters(), lr=c.lr)
    optimizer_st = optim.Adam(model.decoder.stopnet.parameters(), lr=c.lr)

    criterion = L1LossMasked()
    criterion_st = nn.BCELoss()

    if args.restore_path:
        checkpoint = torch.load(args.restore_path)
        model.load_state_dict(checkpoint['model'])
        if use_cuda:
            model = model.cuda()
            criterion.cuda()
            criterion_st.cuda()
        optimizer.load_state_dict(checkpoint['optimizer'])
        # optimizer_st.load_state_dict(checkpoint['optimizer_st'])
        for state in optimizer.state.values():
            for k, v in state.items():
                if torch.is_tensor(v):
                    state[k] = v.cuda()
        print(" > Model restored from step %d" % checkpoint['step'],
              flush=True)
        start_epoch = checkpoint['step'] // len(train_loader)
        best_loss = checkpoint['linear_loss']
        args.restore_step = checkpoint['step']
    else:
        args.restore_step = 0
        print("\n > Starting a new training", flush=True)
        if use_cuda:
            model = model.cuda()
            criterion.cuda()
            criterion_st.cuda()

    scheduler = AnnealLR(optimizer, warmup_steps=c.warmup_steps)
    num_params = count_parameters(model)
    print(" | > Model has {} parameters".format(num_params), flush=True)

    if not os.path.exists(CHECKPOINT_PATH):
        os.mkdir(CHECKPOINT_PATH)

    if 'best_loss' not in locals():
        best_loss = float('inf')

    for epoch in range(0, c.epochs):
        train_loss, current_step = train(model, criterion, criterion_st,
                                         train_loader, optimizer, optimizer_st,
                                         scheduler, ap, epoch)
        val_loss = evaluate(model, criterion, criterion_st, val_loader, ap,
                            current_step)
        print(" | > Train Loss: {:.5f}   Validation Loss: {:.5f}".format(
            train_loss, val_loss),
              flush=True)
        best_loss = save_best_model(model, optimizer, train_loss, best_loss,
                                    OUT_PATH, current_step, epoch)
コード例 #14
0
ファイル: audio_tests.py プロジェクト: BlueTaurus/moz2
 def __init__(self, *args, **kwargs):
     super(TestAudio, self).__init__(*args, **kwargs)
     self.ap = AudioProcessor(**c.audio)
コード例 #15
0
ファイル: audiobook_reader.py プロジェクト: geneing/TTS
        type=str,
        help='Path to save final wav file.',
    )

    args = parser.parse_args()

    try:
        path = os.path.realpath(os.path.dirname(__file__))
    except NameError as e:
        path = './'

    C = load_config(os.path.join(path, 'pretrained_models/TTS/config.json'))
    C.forward_attn_mask = False
    C.windowing = True
    # load the audio processor
    ap = AudioProcessor(**C.audio)
    num_speakers = 0

    # load the model
    num_chars = len(phonemes) if C.use_phonemes else len(symbols)
    model = setup_model(num_chars, num_speakers, C)
    cp = torch.load(os.path.join(path,
                                 'pretrained_models/TTS/best_model.pth.tar'),
                    map_location='cpu')
    model.load_state_dict(cp['model'], strict=False)
    model.r = cp['r']
    model.decoder.r = cp['r']
    model.eval()
    if use_cuda:
        model.cuda()
コード例 #16
0
ファイル: train.py プロジェクト: aidiary/tacotron-pytorch
def main():
    ap = AudioProcessor()

    train_dataset = TTSDataset('data/LJSpeech-1.1',
                               'train.list',
                               outputs_per_step=r)
    valid_dataset = TTSDataset('data/LJSpeech-1.1',
                               'valid.list',
                               outputs_per_step=r)

    print('train data:', len(train_dataset))
    print('valid data:', len(valid_dataset))

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        collate_fn=train_dataset.collate_fn,
        drop_last=False,
        num_workers=0,
        pin_memory=False)

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=batch_size,
        shuffle=False,
        collate_fn=valid_dataset.collate_fn,
        drop_last=False,
        num_workers=0,
        pin_memory=False)

    # Create models
    num_chars = len(phonemes)
    model = Tacotron(num_chars, r=r).to(device)

    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=0.0)
    # StopNetは二値分類タスクなので独自に訓練する
    optimizer_st = optim.Adam(model.decoder.stopnet.parameters(),
                              lr=lr,
                              weight_decay=0.0)

    criterion = L1LossMasked()
    criterion_st = nn.BCEWithLogitsLoss()

    num_params = count_parameters(model)
    print('Model has {} parameters'.format(num_params))

    # Training
    best_loss = float('inf')
    global_step = 0
    for epoch in range(0, epochs + 1):
        train_loss, global_step = train(train_loader, model, criterion,
                                        criterion_st, optimizer, optimizer_st,
                                        ap, global_step, epoch)

        valid_loss = evaluate(valid_loader, model, criterion, criterion_st, ap,
                              global_step, epoch)

        print('Epoch [{}/{}] train_loss: {:.5f} valid_loss: {:.5f}'.format(
            epoch, epochs, train_loss, valid_loss))

        if valid_loss < best_loss:
            print('  => valid_loss improved from {:.5f} to {:.5f}!'.format(
                best_loss, valid_loss))
            new_state_dict = model.state_dict()
            state = {
                'model': new_state_dict,
                'optimizer': optimizer.state_dict(),
                'epoch': epoch,
                'linear_loss': valid_loss
            }
            best_loss = valid_loss
            best_model_path = os.path.join(writer.logdir, 'best_model.pth')
            torch.save(state, best_model_path)
コード例 #17
0
ファイル: train.py プロジェクト: geneing/TTS
def main(args): #pylint: disable=redefined-outer-name
    # Audio processor
    ap = AudioProcessor(**c.audio)

    # DISTRUBUTED
    if num_gpus > 1:
        init_distributed(args.rank, num_gpus, args.group_id,
                         c.distributed["backend"], c.distributed["url"])
    num_chars = len(phonemes) if c.use_phonemes else len(symbols)

    if c.use_speaker_embedding:
        speakers = get_speakers(c.data_path, c.meta_file_train, c.dataset)
        if args.restore_path:
            prev_out_path = os.path.dirname(args.restore_path)
            speaker_mapping = load_speaker_mapping(prev_out_path)
            assert all([speaker in speaker_mapping
                        for speaker in speakers]), "As of now you, you cannot " \
                                                   "introduce new speakers to " \
                                                   "a previously trained model."
        else:
            speaker_mapping = {name: i
                               for i, name in enumerate(speakers)}
        save_speaker_mapping(OUT_PATH, speaker_mapping)
        num_speakers = len(speaker_mapping)
        print("Training with {} speakers: {}".format(num_speakers,
                                                     ", ".join(speakers)))
    else:
        num_speakers = 0

    model = setup_model(num_chars, num_speakers, c)

    print(" | > Num output units : {}".format(ap.num_freq), flush=True)

    #optimizer = optim.Adam(model.parameters(), lr=c.lr, weight_decay=0)
    optimizer = Ranger(model.parameters(), lr=c.lr, weight_decay=c.wd)
    optimizer_gst = Ranger(model.textgst.parameters(), lr=c.lr, weight_decay=c.wd) if c.text_gst else None

    if c.stopnet and c.separate_stopnet:
        optimizer_st = Ranger(model.decoder.stopnet.parameters(), lr=c.lr)
    else:
        optimizer_st = None

    if c.loss_masking:
        criterion = L1LossMasked() if c.model in ["Tacotron", "TacotronGST"] else MSELossMasked()
    else:
        criterion = nn.L1Loss() if c.model in ["Tacotron", "TacotronGST"] else nn.MSELoss()
    criterion_st = nn.BCEWithLogitsLoss() if c.stopnet else None
    criterion_gst = nn.L1Loss() if c.text_gst else None

    if args.restore_path:
        checkpoint = torch.load(args.restore_path)
        try:
            # TODO: fix optimizer init, model.cuda() needs to be called before
            # optimizer restore
            # optimizer.load_state_dict(checkpoint['optimizer'])
            if c.reinit_layers:
                raise RuntimeError
            model.load_state_dict(checkpoint['model'])
        except:
            print(" > Partial model initialization.")
            model_dict = model.state_dict()
            model_dict = set_init_dict(model_dict, checkpoint, c)
            model.load_state_dict(model_dict)
            del model_dict
        for group in optimizer.param_groups:
            group['lr'] = c.lr
        print(
            " > Model restored from step %d" % checkpoint['step'], flush=True)
        args.restore_step = checkpoint['step']
    else:
        args.restore_step = 0

    if use_cuda:
        model = model.cuda()
        criterion.cuda()
        if criterion_st:
            criterion_st.cuda()

    # DISTRUBUTED
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)

    if c.lr_decay:
        scheduler = NoamLR(
            optimizer,
            warmup_steps=c.warmup_steps,
            last_epoch=args.restore_step - 1)
    else:
        scheduler = None

    num_params = count_parameters(model)
    print("\n > Model has {} parameters".format(num_params), flush=True)

    if 'best_loss' not in locals():
        best_loss = float('inf')

    global_step = args.restore_step
    for epoch in range(0, c.epochs):
        # set gradual training
        if c.gradual_training is not None:
            r, c.batch_size = gradual_training_scheduler(global_step, c)
            c.r = r
            model.decoder.set_r(r)
        print(" > Number of outputs per iteration:", model.decoder.r)

        train_loss, global_step = train(model, criterion, criterion_st,
                                        optimizer, optimizer_st, scheduler,
                                        ap, global_step, epoch, criterion_gst=criterion_gst, optimizer_gst=optimizer_gst)
        
        if epoch % 5 == 0:
            val_loss = evaluate(model, criterion, criterion_st, criterion_gst, ap, global_step, epoch)
            print(
                " | > Training Loss: {:.5f}   Validation Loss: {:.5f}".format(
                    train_loss, val_loss),
                flush=True)
            target_loss = train_loss
            if c.run_eval:
                target_loss = val_loss
            best_loss = save_best_model(model, optimizer, optimizer_st, optimizer_gst, target_loss, best_loss,
                                        OUT_PATH, global_step, epoch)
コード例 #18
0
ファイル: loader_tests.py プロジェクト: BlueTaurus/moz2
 def __init__(self, *args, **kwargs):
     super(TestTTSDatasetCached, self).__init__(*args, **kwargs)
     self.max_loader_iter = 4
     self.c = load_config(os.path.join(c.data_path_cache, 'config.json'))
     self.ap = AudioProcessor(**self.c.audio)
コード例 #19
0
ファイル: train.py プロジェクト: JackInTaiwan/TTS
        if args.restore_path:
            new_fields["restore_path"] = args.restore_path
        new_fields["github_branch"] = get_git_branch()
        copy_config_file(args.config_path,
                         os.path.join(OUT_PATH, 'config.json'), new_fields)
        os.chmod(AUDIO_PATH, 0o775)
        os.chmod(OUT_PATH, 0o775)

    if args.rank == 0:
        LOG_DIR = OUT_PATH
        tb_logger = Logger(LOG_DIR)

    # Conditional imports
    preprocessor = importlib.import_module('datasets.preprocess')
    preprocessor = getattr(preprocessor, c.dataset.lower())

    # Audio processor
    ap = AudioProcessor(**c.audio)

    try:
        main(args)
    except KeyboardInterrupt:
        try:
            sys.exit(0)
        except SystemExit:
            os._exit(0)
    except Exception:
        remove_experiment_folder(OUT_PATH)
        traceback.print_exc()
        sys.exit(1)
コード例 #20
0
# Set constants
ROOT_PATH = ''
MODEL_PATH = ROOT_PATH + 'best_model.pth.tar'
CONFIG_PATH = ROOT_PATH + 'config.json'
OUT_FOLDER = ROOT_PATH + ''
CONFIG = load_config(CONFIG_PATH)
use_cuda = False

# load the model
model = Tacotron(61, CONFIG.embedding_size, CONFIG.audio['num_freq'], CONFIG.audio['num_mels'], CONFIG.r)

# load the audio processor

ap = AudioProcessor(CONFIG.audio['sample_rate'], CONFIG.audio['num_mels'], CONFIG.audio['min_level_db'],
                    CONFIG.audio['frame_shift_ms'], CONFIG.audio['frame_length_ms'], CONFIG.audio['preemphasis'],
                    CONFIG.audio['ref_level_db'], CONFIG.audio['num_freq'], CONFIG.audio['power'], griffin_lim_iters=30)         


# load model state
if use_cuda:
    cp = torch.load(MODEL_PATH)
else:
    cp = torch.load(MODEL_PATH, map_location=lambda storage, loc: storage)

new=list(cp['model'].items())

my_model_kvpair=model.state_dict()
count=0
for key,value in my_model_kvpair.items():
    layer_name,weights=new[count]