Esempi in Python per Denoiser.Denoiser, esempi in Python per denoiser.Denoiser.Denoiser

Esempio n. 1

0

Mostra file

File: tacotron.py Progetto: SongArtish/ARI-Storyteller

    def load_model(self):

        ####TODO#### 1.학습된 모델 불러오기
        # 학습된 tacotron 모델 주소를 load하고
        # 모델에 hparam과 statedict를 load한다
        checkpoint_path = "/home/ubuntu/test/TTS/checkpoint_28000"
        self.model = train.load_model(self.hparams)
        self.model.load_state_dict(
            torch.load(checkpoint_path,
                       map_location=torch.device("cpu"))['state_dict'])

        # pass

        ####TODO####
        # _ = self.model.cpu().eval().half()
        _ = self.model.cpu().eval()

        #waveglow model load
        # waveglow_path = "/home/multicam/checkpoints/waveglow.pt"
        waveglow_path = "/home/ubuntu/test/TTS/waveglow.pt"
        self.waveglow = torch.load(waveglow_path,
                                   map_location=torch.device("cpu"))['model']
        self.waveglow.cpu().eval()
        #self.waveglow.cpu().eval().half()
        for k in self.waveglow.convinv:
            k.float()
        self.denoiser = Denoiser(self.waveglow)

Esempio n. 2

0

Mostra file

File: inference.py Progetto: zwjgit/VocGAN

def main(args):
    checkpoint = torch.load(args.checkpoint_path)
    if args.config is not None:
        hp = HParam(args.config)
    else:
        hp = load_hparam_str(checkpoint['hp_str'])

    model = ModifiedGenerator(hp.audio.n_mel_channels, hp.model.n_residual_layers,
                        ratios=hp.model.generator_ratio, mult = hp.model.mult,
                        out_band = hp.model.out_channels).cuda()
    model.load_state_dict(checkpoint['model_g'])
    model.eval(inference=True)

    with torch.no_grad():
        mel = torch.from_numpy(np.load(args.input))
        if len(mel.shape) == 2:
            mel = mel.unsqueeze(0)
        mel = mel.cuda()
        audio = model.inference(mel)

        audio = audio.squeeze(0)  # collapse all dimension except time axis
        if args.d:
            denoiser = Denoiser(model).cuda()
            audio = denoiser(audio, 0.01)
        audio = audio.squeeze()
        audio = audio[:-(hp.audio.hop_length*10)]
        audio = MAX_WAV_VALUE * audio
        audio = audio.clamp(min=-MAX_WAV_VALUE, max=MAX_WAV_VALUE-1)
        audio = audio.short()
        audio = audio.cpu().detach().numpy()

        out_path = args.input.replace('.npy', '_reconstructed_epoch%04d.wav' % checkpoint['epoch'])
        write(out_path, hp.audio.sampling_rate, audio)

Esempio n. 3

0

Mostra file

def main(waveglow_path, sigma, output_dir, sampling_rate, is_fp16,
         denoiser_strength):
    waveglow = torch.load(waveglow_path)['model']
    waveglow = waveglow.remove_weightnorm(waveglow)
    waveglow.cuda().eval()
    if is_fp16:
        from apex import amp
        waveglow, _ = amp.initialize(waveglow, [], opt_level="O3")

    if denoiser_strength > 0:
        denoiser = Denoiser(waveglow).cuda()

    for i, file_path in enumerate(glob.glob('*.npy')):
        file_name = os.path.splitext(os.path.basename(file_path))[0]
        mel = torch.from_numpy(np.load(file_path))
        mel = torch.unsqueeze(mel, 0).cuda()
        mel = mel.half() if is_fp16 else mel
        with torch.no_grad():
            audio = waveglow.infer(mel, sigma=sigma)
            if denoiser_strength > 0:
                audio = denoiser(audio, denoiser_strength)
            audio = audio * MAX_WAV_VALUE
        audio = audio.squeeze().cpu().numpy()
        audio_path = os.path.join(output_dir, f'waveglow_{file_name}.wav')
        write(audio_path, sampling_rate, audio.astype('int16'))

Esempio n. 4

0

Mostra file

def main(mel_files, waveglow_path, sigma, output_dir, sampling_rate, is_fp16,
         denoiser_strength):
    mel_files = files_to_list(mel_files)
    waveglow = torch.load(waveglow_path)['model']
    waveglow = waveglow.remove_weightnorm(waveglow)
    waveglow.cuda().eval()
    if is_fp16:
        from apex import amp
        waveglow, _ = amp.initialize(waveglow, [], opt_level="O3")

    if denoiser_strength > 0:
        denoiser = Denoiser(waveglow).cuda()

    for i, file_path in enumerate(mel_files):
        file_name = os.path.splitext(os.path.basename(file_path))[0]
        mel = torch.load(file_path)
        mel = torch.autograd.Variable(mel.cuda())
        mel = torch.unsqueeze(mel, 0)
        mel = mel.half() if is_fp16 else mel
        with torch.no_grad():
            audio = waveglow.infer(mel, sigma=sigma)
            if denoiser_strength > 0:
                audio = denoiser(audio, denoiser_strength)
            audio = audio * MAX_WAV_VALUE
        audio = audio.squeeze()
        audio = audio.cpu().numpy()
        audio = audio.astype('int16')
        audio_path = os.path.join(output_dir,
                                  "{}_synthesis.wav".format(file_name))
        write(audio_path, sampling_rate, audio)
        print(audio_path)

Esempio n. 5

0

Mostra file

def main(args):
    checkpoint = torch.load(args.checkpoint_path)
    if args.config is not None:
        hp = HParam(args.config)
    else:
        hp = load_hparam_str(checkpoint['hp_str'])

    model = Generator(hp.audio.n_mel_channels).cuda()

    model.load_state_dict(checkpoint['model_g'])
    model.eval()

    with torch.no_grad():
        mel = torch.from_numpy(np.load(args.input))
        if len(mel.shape) == 2:
            mel = mel.unsqueeze(0)
        mel = mel.cuda()
        audio = model(mel)
        # For multi-band inference
        print(audio.shape)
        audio = audio.squeeze(0)  # collapse all dimension except time axis
        if args.d:
            denoiser = Denoiser(model).cuda()
            audio = denoiser(audio, 0.1)
        audio = audio.squeeze()
        audio = audio[:-(hp.audio.hop_length * 10)]
        audio = MAX_WAV_VALUE * audio
        audio = audio.clamp(min=-MAX_WAV_VALUE, max=MAX_WAV_VALUE - 1)
        audio = audio.short()
        audio = audio.cpu().detach().numpy()

        out_path = args.input.replace(
            '.npy', '_hifi_GAN_epoch%04d.wav' % checkpoint['epoch'])
        write(out_path, hp.audio.sampling_rate, audio)

Esempio n. 6

0

Mostra file

File: tts_server_utils.py Progetto: kb-rahul/tacotron2

def load_waveglow(chk_pt_path):
    waveglow = torch.load(chk_pt_path)['model']
    waveglow.cuda().eval().half()
    for k in waveglow.convinv:
        k.float()
    denoiser = Denoiser(waveglow)
    return waveglow, denoiser

Esempio n. 7

0

Mostra file

File: ui.py Progetto: williammo2016/ocr-pipeline

def create_models(dataset_dir):
    """Initialize the app (available for localhost only)

    Parameters:
        dataset_dir (:func:`str`): Path to the training set
    """
    logger.debug("Creating models...")

    if not local_exec:
        logger.error("Models can only be generated locally")
        exit(1)

    # Modify the configuration for local execution
    app_config['root'] = os.environ['ROOT']

    # Generate inline models and train classifier
    denoiser = Denoiser(app_config)

    if not exists(dataset_dir) or not isdir(dataset_dir):
        logger.error(dataset_dir + " is not a valid directory")
        exit(2)

    dataset = [join(dataset_dir, f) for f in listdir(dataset_dir)]

    denoiser.generate_models(dataset)
    logger.info("Inline models generated")

    denoiser.train(dataset)
    logger.info("Classifier trained")

Esempio n. 8

0

Mostra file

File: generate_from_file.py Progetto: scripples/tacotron2

def generate_from_file(tacotron2_path, waveglow_path, text_file, output_directory):

  # Make synthesis paths

  if not os.path.exists(output_directory):
    os.makedirs(output_directory)
    print("Creating directory " + output_directory + "...")

  hparams = create_hparams()
  hparams.sampling_rate = 22050

  print("Loading models...")
  model = load_model(hparams)
  model.load_state_dict(torch.load(tacotron2_path)['state_dict'])
  _ = model.cuda().eval().half()

  waveglow = torch.load(waveglow_path)['model']
  waveglow.cuda().eval().half()
  for k in waveglow.convinv:
      k.float()
  denoiser = Denoiser(waveglow)

  genlist = []
  with open(text_file) as file:
    for line in file:
      genlist.append(line.strip())

  for entry in genlist:
    wav_name = "_".join(entry.split(" ")[:4]).lower() + ".wav"

    epi = epitran.Epitran('eng-Latn', ligatures = True)
    if hparams.preprocessing == "ipa":
      entry = ipa.convert(english_cleaners(entry))
      foreign_words = re.findall(r"[^ ]{0,}\*", entry)
      for word in foreign_words:
        entry = entry.replace(word, epi.transliterate(word[0:len(word)-1]))
    if hparams.preprocessing == "arpabet":
      entry = make_arpabet(entry)

    # Text sequencer
    if hparams.preprocessing is not None:
      sequence = np.array(text_to_sequence(entry, None))[None, :]
    else:
      sequence = np.array(text_to_sequence(entry, ['english_cleaners']))[None, :]
    sequence = torch.autograd.Variable(
      torch.from_numpy(sequence)).cuda().long()

    # Synthesis
    mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)
    with torch.no_grad():
      audio = waveglow.infer(mel_outputs_postnet, sigma=0.666)
    audio_denoised = denoiser(audio, strength=0.01)[:, 0]

    # Save audio
    print ("Saving " + wav_name)
    write(os.path.join(output_directory, wav_name), hparams.sampling_rate, audio_denoised[0].data.cpu().numpy())

Esempio n. 9

0

Mostra file

    def __init__(self,
                 ds_name,
                 ds_path,
                 lr,
                 iterations,
                 batch_size,
                 print_freq,
                 k,
                 eps,
                 is_normalized,
                 adv_momentum,
                 store_adv=None,
                 load_adv_dir=None,
                 load_adv_name=None,
                 load_dir=None,
                 load_name=None,
                 save_dir=None):

        self.data_processor = Preprocessor(ds_name, ds_path, is_normalized)

        # Load Data
        self.train_data, self.test_data, self.N_train, self.N_test = self.data_processor.datasets(
        )
        self.train_loader = DataLoader(self.train_data,
                                       batch_size=batch_size,
                                       shuffle=True)
        self.test_loader = DataLoader(self.test_data, batch_size=batch_size)

        # Other Variables
        self.save_dir = save_dir
        self.store_adv = store_adv

        # Set Model Hyperparameters
        self.learning_rate = lr
        self.iterations = iterations
        self.print_freq = print_freq
        self.cuda = torch.cuda.is_available()

        # Load Model to Conduct Adversarial Training
        adversarial_model = self.load_model(self.cuda, load_adv_dir,
                                            load_adv_name, TEST)
        self.adversarial_generator = Attacks(adversarial_model, eps,
                                             self.N_train, self.N_test,
                                             self.data_processor.get_const(),
                                             adv_momentum, is_normalized,
                                             store_adv)

        # Load Target Model
        self.target_model = self.load_model(self.cuda, load_dir, load_name,
                                            TEST)

        # Load Denoiser
        self.denoiser = Denoiser(x_h=32, x_w=32)
        self.denoiser = self.denoiser.cuda()

Esempio n. 10

0

Mostra file

 def __init__(self):
     hparams = create_hparams()
     hparams.sampling_rate = 22050
     checkpoint_path = constants.TACOTRON_PT
     self.model = load_model(hparams)
     self.model.load_state_dict(torch.load(checkpoint_path)['state_dict'])
     _ = self.model.cuda().eval().half()
     waveglow_path = constants.WAVEGLOW_PT
     self.waveglow = torch.load(waveglow_path)['model']
     self.waveglow.cuda().eval().half()
     for k in self.waveglow.convinv:
         k.float()
     self.denoiser = Denoiser(self.waveglow)

Esempio n. 11

0

Mostra file

File: text2speech.py Progetto: nare-ua/DeepLearningExamples

    def __init__(self, lang):
        tacotron2 = load_and_setup_model('Tacotron2',
                                         parser,
                                         args.tacotron2,
                                         args.amp_run,
                                         args.cpu_run,
                                         forward_is_infer=True)
        waveglow = load_and_setup_model('WaveGlow',
                                        parser,
                                        args.waveglow,
                                        args.amp_run,
                                        args.cpu_run,
                                        forward_is_infer=True)

        if args.cpu_run:
            denoiser = Denoiser(waveglow, args.cpu_run)
        else:
            denoiser = Denoiser(waveglow, args.cpu_run).cuda()

        jitted_tacotron2 = torch.jit.script(tacotron2)

        self.language = lang

Esempio n. 12

0

Mostra file

def main(mel_files, waveglow_path, sigma, output_dir, sampling_rate, is_fp16,
         denoiser_strength):
    mel_files = files_to_list(mel_files)
    waveglow = torch.load(waveglow_path)['model']
    for m in waveglow.modules():
        if 'Conv' in str(type(m)):
            setattr(m, 'padding_mode', 'zeros')
    waveglow = waveglow.remove_weightnorm(waveglow)
    waveglow.cuda().eval()
    if is_fp16:
        from apex import amp
        waveglow, _ = amp.initialize(waveglow, [], opt_level="O3")

    if denoiser_strength > 0:
        denoiser = Denoiser(waveglow).cuda()



    for i, file_path in enumerate(mel_files):

        file_name = os.path.splitext(os.path.basename(file_path))[0]
        #print(file_name)
        mel = torch.load(file_path)
        # print("mel",mel)
        #print(mel.shape)
        mel = torch.autograd.Variable(mel.cuda())
        # print("mel",mel)
        mel = torch.unsqueeze(mel, 0)
        mel = mel.half() if is_fp16 else mel
        # print("mel",mel)

        print(torch.min(mel),torch.max(mel))
        with torch.no_grad():
            audio = waveglow.infer(mel, sigma=sigma)
            if denoiser_strength > 0:
                audio = denoiser(audio, denoiser_strength)


            k.append(abs(audio).max().item())
            #print(min(k),max(k))
            #audio = audio*18000*abs(audio).max()/0.99
            #print("audio",audio)
            #print((audio).min().item(),(audio).max().item())
            audio = audio * MAX_WAV_VALUE
        audio = audio.squeeze()
        audio = audio.cpu().numpy()
        audio = audio.astype('int16')
        audio_path = os.path.join(
            output_dir, "{}_synthesis_sig0.7_d_0.1.wav".format(file_name))
        write(audio_path, sampling_rate, audio)
        print(audio_path)

Esempio n. 13

0

Mostra file

File: inference.py Progetto: SomeUserName1/tacotron2

def main(text):
    hparams = create_hparams()
    hparams.sampling_rate = 22050
    hparams.gate_threshold = 0.1
    hparams.max_decoder_steps = 5000

    # #### Load model from checkpoint
    checkpoint_path = "tacotron2_statedict.pt"
    model = load_model(hparams)
    model.load_state_dict(torch.load(checkpoint_path)['state_dict'])
    _ = model.cuda().eval().half()

    # #### Load WaveGlow for mel2audio synthesis and denoiser
    waveglow_path = 'waveglow_256channels.pt'
    waveglow = torch.load(waveglow_path)['model']
    waveglow.cuda().eval().half()

    for m in waveglow.modules():
        if 'Conv' in str(type(m)):
            setattr(m, 'padding_mode', 'zeros')

    for k in waveglow.convinv:
        k.float()
    denoiser = Denoiser(waveglow)

    # #### Prepare text input
    sequence = np.array(text_to_sequence(text, ['english_cleaners']))[None, :]
    sequence = torch.autograd.Variable(
        torch.from_numpy(sequence)).cuda().long()

    # #### Decode text input and plot results
    mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)
    plot_data((mel_outputs.float().data.cpu().numpy()[0],
               mel_outputs_postnet.float().data.cpu().numpy()[0],
               alignments.float().data.cpu().numpy()[0].T))

    # #### Synthesize audio from spectrogram using WaveGlow
    with torch.no_grad():
        audio = waveglow.infer(mel_outputs_postnet, sigma=0.666)

    # #### (Optional) Remove WaveGlow bias
    audio_denoised = denoiser(audio, strength=0.01)[:, 0]

    # save
    if (os.path.isfile("out.wav")):
        x, sr = librosa.load("out.wav")
        out = np.append(x, audio[0].data.cpu().numpy().astype(np.float32))
    else:
        out = audio[0].data.cpu().numpy().astype(np.float32)

    librosa.output.write_wav('./out.wav', out, 22050)

Esempio n. 14

0

Mostra file

File: text2speech.py Progetto: Harishgeth/cookietts

    def load_waveglow(self, vocoder_path, config_fpath):
        # Load config file
        with open(config_fpath) as f:
            data = f.read()
        config = json.loads(data)
        train_config = config["train_config"]
        data_config = config["data_config"]
        dist_config = config["dist_config"]
        vocoder_config = {
            **config["waveglow_config"], 'win_length':
            data_config['win_length'],
            'hop_length': data_config['hop_length']
        }
        print(vocoder_config)
        print(f"Config File from '{config_fpath}' successfully loaded.")

        # import the correct model core
        if self.is_ax(vocoder_config):
            from efficient_model_ax import WaveGlow
        else:
            if vocoder_config["yoyo"]:
                from efficient_model import WaveGlow
            else:
                from glow import WaveGlow

        # initialize model
        print(f"intializing WaveGlow model... ", end="")
        waveglow = WaveGlow(**vocoder_config).cuda()
        print(f"Done!")

        # load checkpoint from file
        print(f"loading WaveGlow checkpoint... ", end="")
        checkpoint = torch.load(vocoder_path)
        waveglow.load_state_dict(
            checkpoint['model']
        )  # and overwrite initialized weights with checkpointed weights
        waveglow.cuda().eval().half(
        )  # move to GPU and convert to half precision
        print(f"Done!")

        print(f"initializing Denoiser... ", end="")
        denoiser = Denoiser(waveglow)
        print(f"Done!")
        vocoder_iters = checkpoint['iteration']
        print(f"WaveGlow trained for {vocoder_iters} iterations")
        speaker_lookup = checkpoint['speaker_lookup']  # ids lookup
        training_sigma = train_config['sigma']

        return waveglow, denoiser, training_sigma, speaker_lookup

Esempio n. 15

0

Mostra file

File: web_app.py Progetto: jireh-father/tacotron2

def init_model():
    print("init model!!!!")
    global tacotron2_model
    global waveglow_model
    global denoiser

    tacotron2_path = "outdir_finetune/checkpoint_62500"
    #    tacotron2_path = "outdir_korean/checkpoint_8800"
    #    tacotron2_path = "../models/tacotron2/outdir_korean/checkpoint_25000"
    #    tacotron2_path = "../tacotron2-pytorch/outdir/checkpoint_15000"
    #    tacotron2_path = "../models/tacotron2/outdir_korean/checkpoint_15000"
    #    tacotron2_path = "outdir_lj_korean/checkpoint_5000"
    #    tacotron2_path = "outdir_longtrain/checkpoint_439500"
    waveglow_path = "../waveglow-fix/checkpoints_finetune/waveglow_478000"
    #   waveglow_path = "../waveglow/checkpoints/waveglow_335000"
    # waveglow_path = "../waveglow-fix/checkpoints_longtrain/waveglow_484000"
    sampling_rate = 22050
    denoiser_strength = 0.0
    hparams = create_hparams()
    hparams.sampling_rate = sampling_rate
    hparams.training = False

    tacotron2_model = load_model(hparams)
    tacotron2_model.load_state_dict(torch.load(tacotron2_path)['state_dict'])
    _ = tacotron2_model.cuda().eval().half()

    # with open("waveglow/config.json") as f:
    #     data = f.read()
    # import json
    # config = json.loads(data)
    # waveglow_config = config["waveglow_config"]
    #
    # waveglow_model = glow.WaveGlow(**waveglow_config)
    #
    # checkpoint_dict = torch.load(waveglow_path, map_location='cpu')
    # model_for_loading = checkpoint_dict['model']
    # waveglow_model.load_state_dict(model_for_loading.state_dict())
    #
    # # waveglow_model.load_state_dict(torch.load(waveglow_path)['state_dict'])
    # waveglow_model = waveglow_model.remove_weightnorm(waveglow_model)
    # waveglow_model.cuda().eval().half()

    waveglow_model = torch.load(waveglow_path)['model']
    waveglow_model = waveglow_model.remove_weightnorm(waveglow_model)
    waveglow_model.cuda().eval().half()
    for k in waveglow_model.convinv:
        k.float()
    if denoiser_strength > 0:
        denoiser = Denoiser(waveglow_model)

Esempio n. 16

0

Mostra file

File: tacotron2_node.py Progetto: nathantsoi/tacotron2-ros

 def tacotron2_init(self):
     self.plot_wav_data = False
     # set parameters
     self.hparams = create_hparams()
     self.hparams.sampling_rate = 22050
     # load tacotron2
     self.model = load_model(self.hparams)
     self.model.load_state_dict(torch.load(TACOTRON_CHECKPOINT_FILE)['state_dict'])
     _ = self.model.cuda().eval().half()
     # load waveglow
     self.waveglow = torch.load(WAVEGLOW_CHECKPOINT_FILE)['model']
     self.waveglow.cuda().eval().half()
     for k in self.waveglow.convinv:
         k.float()
     self.denoiser = Denoiser(self.waveglow)

Esempio n. 17

0

Mostra file

File: inference.py Progetto: Welsun/chinese_tacotron

def text2audio(waveglow_path, sigma, output_dir, sampling_rate, mel):
    waveglow = torch.load(waveglow_path)['model']
    waveglow = waveglow.remove_weightnorm(waveglow)
    waveglow.cuda().eval()

    denoiser = Denoiser(waveglow).cuda()

    with torch.no_grad():
        audio = waveglow.infer(mel.cuda(), sigma=sigma)
        # if denoiser_strength > 0:
        #     audio = denoiser(audio, denoiser_strength)
        #audio = audio * MAX_WAV_VALUE
    audio = audio.squeeze()
    audio = audio.cpu().numpy()
    sf.write(os.path.join(output_dir, "pred2.wav"), audio, sampling_rate)

Esempio n. 18

0

Mostra file

File: inference.py Progetto: dodohow1011/waveglow_2

def main(text_files, waveglow_path, sigma, output_dir, sampling_rate, is_fp16,
         denoiser_strength):
    hparams = create_hparams()
    Taco2 = load_pretrained_taco('tacotron2.pt', hparams)

    testset = TextMelLoader(text_files, hparams)
    collate_fn = TextMelCollate()

    test_loader = DataLoader(testset,
                             num_workers=0,
                             shuffle=False,
                             sampler=None,
                             batch_size=1,
                             pin_memory=False,
                             drop_last=True,
                             collate_fn=collate_fn)
    waveglow = torch.load(waveglow_path)['model']
    # waveglow = waveglow.remove_weightnorm(waveglow)
    waveglow.cuda().eval()
    if is_fp16:
        from apex import amp
        waveglow, _ = amp.initialize(waveglow, [], opt_level="O3")

    if denoiser_strength > 0:
        denoiser = Denoiser(waveglow).cuda()

    for i, batch in enumerate(test_loader):
        text_padded, input_lengths, mel_padded, max_len, output_lengths = parse_batch(
            batch)
        enc_outputs, _ = Taco2(
            (text_padded, input_lengths, mel_padded, max_len, output_lengths))
        # mel = torch.autograd.Variable(mel.cuda())
        # mel = torch.unsqueeze(mel, 0)
        # mel = mel.half() if is_fp16 else mel
        with torch.no_grad():
            mel = waveglow.infer(enc_outputs, input_lengths, sigma=sigma)
            '''if denoiser_strength > 0:
                audio = denoiser(audio, denoiser_strength)
            audio = audio * MAX_WAV_VALUE'''
        # audio = audio.squeeze()
        # mel = mel.cpu().numpy()
        # audio = audio.astype('int16')
        print(mel)
        mel = mel.squeeze()
        print(mel.size())
        mel_path = os.path.join(output_dir, "{}_synthesis.pt".format(i))
        torch.save(mel, mel_path)
        print(mel_path)

Esempio n. 19

0

Mostra file

File: inference.py Progetto: yhgon/SqueezeWave

def main(mel_files, squeezewave_path, sigma, output_dir, sampling_rate, is_fp16,
         denoiser_strength):
    tic_prepare= time.time()
    mel_files = files_to_list(mel_files)
    squeezewave = torch.load(squeezewave_path)['model']
    squeezewave = squeezewave.remove_weightnorm(squeezewave)
    squeezewave.cuda().eval()
    if is_fp16:
        from apex import amp
        squeezewave, _ = amp.initialize(squeezewave, [], opt_level="O3")

    if denoiser_strength > 0:
        denoiser = Denoiser(squeezewave).cuda()
        
    toc_prepare = time.time()
    dur_prepare = toc_prepare - tic_prepare
    print("prepare model {:3.2}sec".format(dur_prepare) )
    

    for i, file_path in enumerate(mel_files):
        file_name = os.path.splitext(os.path.basename(file_path))[0]
        mel = torch.load(file_path)
        mel = torch.autograd.Variable(mel.cuda())
        mel = torch.unsqueeze(mel, 0)
        mel = mel.half() if is_fp16 else mel
        tic=time.time()
        
        with torch.no_grad():
            audio = squeezewave.infer(mel, sigma=sigma).float()
            if denoiser_strength > 0:
                audio = denoiser(audio, denoiser_strength)
            audio = audio * MAX_WAV_VALUE
        toc=time.time()
        dur = toc -tic
        
        audio = audio.squeeze()
        audio = audio.cpu().numpy()
        
        len_wav = len(audio)
        sec_wav = len_wav/sampling_rate
        samples_sec =  len_wav / dur
        audio = audio.astype('int16')
        audio_path = os.path.join(
            output_dir, "{}_s{}.wav".format(file_name,sigma))
        write(audio_path, sampling_rate, audio)
        print("{} it took {:4.3f}sec  for  {:4.3f}sec {:4.2f}K sample 22Khz Audio files :   RTF {:4.3f} {:4.3f}X  {:4.2f}Ksamples/sec  "
              .format(audio_path, dur, sec_wav, len_wav/1000,  dur/sec_wav,  sec_wav/dur , samples_sec/1000  ) )

Esempio n. 20

0

Mostra file

File: inference.py Progetto: ruaruaruabick/waveglow

def main(mel_files, waveglow_path, sigma, output_dir, sampling_rate, is_fp16,
         denoiser_strength):
    mel_files = files_to_list(mel_files)  #测试集mel谱list
    waveglow = torch.load(waveglow_path)['model']  #加载模型
    waveglow = waveglow.remove_weightnorm(waveglow)  #？移除权重归一化
    waveglow.cuda().eval()  #cuda()拷贝进gpu #？变成测试模式，dropout和BN在训练时和测不一样
    #apex加速
    if is_fp16:
        from apex import amp
        waveglow, _ = amp.initialize(waveglow, [], opt_level="O3")
    # denoiser_strength=0
    if denoiser_strength > 0:
        denoiser = Denoiser(waveglow).cuda()
    for i, file_path in enumerate(mel_files):
        #file_name-对应的wav
        file_name = os.path.splitext(os.path.basename(file_path))[0]
        #加载MFCC特征，80个滤波器
        mel = torch.load(file_path)
        #mel={key:mel[key].cuda() for key in mel}
        #封装数据
        mel = torch.autograd.Variable(mel.cuda())
        #80，375 -> 1*80*375
        mel = torch.unsqueeze(mel, 0)
        #变成fp16数据以便apex加速
        mel = mel.half() if is_fp16 else mel
        #反向传播不会自动求导
        with torch.no_grad():
            #生成1*96000Tensor数据,x为原始音频，z为mel谱
            audio = waveglow.infer(mel, sigma=sigma)
            if denoiser_strength > 0:
                audio = denoiser(audio, denoiser_strength)
            #为了转成wav？
            audio = audio * MAX_WAV_VALUE
        #变成1维数据
        audio = audio.squeeze()
        #在cpu中转成numpy
        audio = audio.cpu().numpy()
        #改变类型
        audio = audio.astype('int16')
        #生成数据存储位置
        audio_path = os.path.join(output_dir,
                                  "{}_synthesis.wav".format(file_name))
        write(audio_path, sampling_rate, audio)
        #写入音频
        print(audio_path)

Esempio n. 21

0

Mostra file

    def __init__(self):
        for module_path in './waveglow/', './waveglow/tacotron2':
            if module_path not in sys.path:
                sys.path.insert(0, module_path)

        # Disable deprecation warnings
        import warnings
        warnings.simplefilter('ignore')

        self.waveglow = torch.load('waveglow_256channels_ljs_v2.pt')['model']
        self.waveglow = self.waveglow.remove_weightnorm(self.waveglow)
        self.waveglow.cuda().eval()

        from denoiser import Denoiser
        self.denoiser = Denoiser(self.waveglow).cuda()

        # Re-enable warnings
        warnings.resetwarnings()

Esempio n. 22

0

Mostra file

File: plc_exam.py Progetto: AugggRush/newPLC

def inference_plc(mel, waveglow, sigma, is_fp16, denoiser_strength):

    if is_fp16:
        from apex import amp
        waveglow, _ = amp.initialize(waveglow, [], opt_level="O3")

    if denoiser_strength > 0:
        denoiser = Denoiser(waveglow).cuda()
    mel = torch.autograd.Variable(mel.cuda())
    mel = mel.half() if is_fp16 else mel
    with torch.no_grad():
        audio = waveglow.infer(mel, sigma=sigma)
        if denoiser_strength > 0:
            audio = denoiser(audio, denoiser_strength)
        audio = audio * MAX_WAV_VALUE
    audio = audio.squeeze()

    return audio

Esempio n. 23

0

Mostra file

File: mode_collapse_test.py Progetto: jireh-father/tacotron2

def main(tacotron2_path, waveglow_path, sigma, output_dir, sampling_rate,
         denoiser_strength, text, file_idx, inference_name, zip_file, hparams):
    hparams.sampling_rate = sampling_rate

    torch.manual_seed(hparams.seed)
    torch.cuda.manual_seed(hparams.seed)
    random.seed(hparams.seed)

    model = load_model(hparams)
    model.load_state_dict(torch.load(tacotron2_path)['state_dict'])
    _ = model.cuda().eval().half()

    waveglow = torch.load(waveglow_path)['model']
    waveglow = waveglow.remove_weightnorm(waveglow)
    waveglow.cuda().eval().half()
    for k in waveglow.convinv:
        k.float()
    if denoiser_strength > 0:
        denoiser = Denoiser(waveglow)

    sequence = np.array(text_to_sequence(
        text, ['transliteration_cleaners']))[None, :]
    print(sequence)
    # sequence2 = np.array(text_to_sequence(text, ['korean_cleaners']))[None, :]
    # sequence3 = np.array(text_to_sequence(text, ['korean_cleaners']))[None, :]
    # print(np.array_equal(sequence, sequence2))
    # print(np.array_equal(sequence, sequence3))
    sequence = torch.autograd.Variable(
        torch.from_numpy(sequence)).cuda().long()

    mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)

    mel_outputs, mel_outputs_postnet2, _, alignments = model.inference(
        sequence)

    MAX_WAV_VALUE = 32768.0
    print(mel_outputs_postnet.cpu().data.numpy()[0][0][:30])
    print(mel_outputs_postnet2.cpu().data.numpy()[0][0][:30])
    if np.array_equal(mel_outputs_postnet.cpu().data.numpy(),
                      mel_outputs_postnet2.cpu().data.numpy()):
        print("same!!")
    else:
        print("different!!")

Esempio n. 24

0

Mostra file

File: text2speech.py Progetto: yondu22/flask-tacotron2-tts-web-app

    def __init__(self, lang):
        self.language = lang
        self.hparams = create_hparams()
        self.hparams.sampling_rate = 22050
        with open('config.json', 'r') as f:
            self.config = json.load(f)

        self.waveglow_path = self.config.get('model').get('waveglow')
        self.waveglow = torch.load(self.waveglow_path)['model']
        self.waveglow.cuda().eval().half()

        for m in self.waveglow.modules():
            if 'Conv' in str(type(m)):
                setattr(m, 'padding_mode', 'zeros')
                
        for k in self.waveglow.convinv:
            k.float()
        self.denoiser = Denoiser(self.waveglow)
        self.update_model(lang)

Esempio n. 25

0

Mostra file

def main(mel_files, waveglow_path, sigma, output_dir, sampling_rate, is_fp16,
         denoiser_strength):
    mel_files = files_to_list(mel_files)
    waveglow = torch.load(waveglow_path)['model']
    waveglow = waveglow.remove_weightnorm(waveglow)
    waveglow.cuda().eval()
    if is_fp16:
        from apex import amp
        waveglow, _ = amp.initialize(waveglow, [], opt_level="O3")

    if denoiser_strength > 0:
        denoiser = Denoiser(waveglow).cuda()

    for i, file_path in enumerate(mel_files):
        file_name = os.path.splitext(os.path.basename(file_path))[0]
        print('Loading file: ', file_path)
        if file_path.find('.pt') != -1:
            print('load by torch')
            mel = torch.load(file_path)
        elif file_path.find('.npy') != -1:
            print('load by numpy')
            mel = np.load(file_path)
            mel = torch.from_numpy(mel)
        print(f"original mel shape: {mel.shape}")
        mel = torch.autograd.Variable(mel.cuda())
        mel = torch.unsqueeze(mel, 0)
        mel = mel.half() if is_fp16 else mel
        print(f"mel shape right before using waveglow: {mel.shape}")
        with torch.no_grad():
            audio = waveglow.infer(mel, sigma=sigma)
            if denoiser_strength > 0:
                audio = denoiser(audio, denoiser_strength)
            audio = audio * MAX_WAV_VALUE
        audio = audio.squeeze()
        audio = audio.cpu().numpy()
        audio = audio.astype('int16')
        if not os.path.exists(output_dir):
            os.mkdir(output_dir)
        audio_path = os.path.join(output_dir,
                                  "{}_synthesis.wav".format(file_name))
        write(audio_path, sampling_rate, audio)
        print(audio_path)

Esempio n. 26

0

Mostra file

    def __init__(self, model_path, device, sigma=0.666, strength=0.1):
        self.device = torch.device("cpu" if not torch.cuda.is_available() else device)
        self.dtype = torch.float if self.device.type == "cpu" else torch.half

        self.model = torch.load(model_path, map_location=self.device)["model"]
        self.model.device = self.device

        for m in self.model.modules():
            if "Conv" in str(type(m)):
                setattr(m, "padding_mode", "zeros")

        self.model.eval().to(device=self.device, dtype=self.dtype)

        for k in self.model.convinv:
            k.float()

        self.denoiser = Denoiser(self.model, device=self.device)

        self.sigma = sigma
        self.strength = strength

Esempio n. 27

0

Mostra file

def waveglow_gen(waveglow_path,
                 mel,
                 sigma=0.666,
                 denoiser_strength=0.1,
                 fp16=False):
    """Generate audio with waveglow from checkpoint"""
    torch.cuda.empty_cache()

    waveglow = torch.load(waveglow_path)['model']
    waveglow.cuda().eval()
    if fp16:
        waveglow = waveglow.half()
        mel = mel.half()
    for k in waveglow.convinv:
        k.float()
    denoiser = Denoiser(waveglow)
    with torch.no_grad():
        audio = denoiser(waveglow.infer(mel, sigma), denoiser_strength)
    del waveglow, denoiser
    torch.cuda.empty_cache()
    return audio.cpu().view(1, -1)

Esempio n. 28

0

Mostra file

def main(mel_files, waveglow_path, sigma, output_dir, sampling_rate, is_fp16,
         denoiser_strength):
    mel_files = files_to_list(mel_files)
    waveglow = torch.load(waveglow_path)['model']
    waveglow = waveglow.remove_weightnorm(waveglow)
    waveglow.cuda().eval()
    if is_fp16:
        from apex import amp
        waveglow, _ = amp.initialize(waveglow, [], opt_level="O3")

    if denoiser_strength > 0:
        denoiser = Denoiser(waveglow).cuda()

    for i, file_path in enumerate(mel_files):
        file_name = os.path.splitext(os.path.basename(file_path))[0]

        if True:
            # Processing for generic mel files
            shape = tuple(np.fromfile(file_path, count=2, dtype=np.int32))
            mel = np.memmap(file_path, offset=8, dtype=np.float32, shape=shape)
            # mel = mel[1:1000,:]
            mel = mel.transpose()
            mel = torch.from_numpy(mel)
        else:
            mel = torch.load(file_path)

        mel = torch.autograd.Variable(mel.cuda())
        mel = torch.unsqueeze(mel, 0)
        mel = mel.half() if is_fp16 else mel
        with torch.no_grad():
            audio = waveglow.infer(mel, sigma=sigma)
            if denoiser_strength > 0:
                audio = denoiser(audio, denoiser_strength)
            audio = audio * MAX_WAV_VALUE
        audio = audio.squeeze()
        audio = audio.cpu().numpy()
        audio = audio.astype('int16')
        audio_path = os.path.join(output_dir, "{}.wav".format(file_name))
        write(audio_path, sampling_rate, audio)
        print(audio_path)

Esempio n. 29

0

Mostra file

File: plc_inference.py Progetto: BaiYunLiu/newPLC

def inference_plc(mel, waveglow_path, sigma, sampling_rate, is_fp16,
         denoiser_strength):

    waveglow = torch.load(waveglow_path)['model']
    waveglow = waveglow.remove_weightnorm(waveglow)
    waveglow.cuda().eval()
    if is_fp16:
        from apex import amp
        waveglow, _ = amp.initialize(waveglow, [], opt_level="O3")
   
    if denoiser_strength > 0:
        denoiser = Denoiser(waveglow).cuda()
    mel = torch.autograd.Variable(mel.cuda())
    mel = mel.half() if is_fp16 else mel
    with torch.no_grad():
        audio = waveglow.infer(mel, sigma=sigma)
        if denoiser_strength > 0:
            audio = denoiser(audio, denoiser_strength)
        audio = audio * MAX_WAV_VALUE
    audio = audio.squeeze()

    return audio

Esempio n. 30

0

Mostra file

File: melgan_infer_api.py Progetto: seantempesta/melgan-1

def predict(hp, model, mel, denoise=False, device="cuda"):
    with torch.no_grad():
        if len(mel.shape) == 2:
            mel = mel.unsqueeze(0)
        mel = mel.to(device)
        audio = model.inference(mel)
        # For multi-band inference
        if hp.model.out_channels > 1:
            pqmf = PQMF(device=device)
            audio = pqmf.synthesis(audio).squeeze(0)  #.view(-1)

#      audio = audio.squeeze(0)  # collapse all dimension except time axis
        if denoise:
            denoiser = Denoiser(model, device=device).to(device)
            audio = denoiser(audio, 0.1).mean(0)
        audio = audio.squeeze()
        audio = audio[:-(hp.audio.hop_length * 10)]
        audio = MAX_WAV_VALUE * audio
        audio = audio.clamp(min=-MAX_WAV_VALUE, max=MAX_WAV_VALUE - 1)
        audio = audio.short()
        audio = audio.cpu().detach().numpy()
        return audio