Example #1
0
    def initialize(self, ctx):
        """First try to load torchscript else load eager mode state_dict based model"""

        properties = ctx.system_properties
        model_dir = properties.get("model_dir")
        if not torch.cuda.is_available() or properties.get("gpu_id") is None :
            raise RuntimeError("This model is not supported on CPU machines.")
        self.device = torch.device("cuda:" + str(properties.get("gpu_id")))

        with zipfile.ZipFile(model_dir + '/tacotron.zip', 'r') as zip_ref:
            zip_ref.extractall(model_dir)

        waveglow_checkpoint = torch.load(os.path.join(model_dir, "nvidia_waveglowpyt_fp32_20190427.pth"))
        waveglow_state_dict = self._unwrap_distributed(waveglow_checkpoint['state_dict'])
        waveglow_config = waveglow_checkpoint['config']
        self.waveglow_model = WaveGlow(**waveglow_config)
        self.waveglow_model.load_state_dict(waveglow_state_dict)
        self.waveglow_model = self.waveglow_model.remove_weightnorm(self.waveglow_model)
        self.waveglow_model.to(self.device)
        self.waveglow_model.eval()

        self._load_tacotron2_model(model_dir)

        logger.debug('WaveGlow model file loaded successfully')
        self.initialized = True
Example #2
0
validation_dataset = utils.load_single_file_tfrecords(
    record_file=os.path.join(hparams['tfrecords_dir'], hparams['eval_file']))
validation_dataset = validation_dataset.batch(hparams['train_batch_size'])

# In[8]:

training_dataset = utils.load_training_files_tfrecords(
    record_pattern=os.path.join(hparams['tfrecords_dir'],
                                hparams['train_files'] + '*'))

# ## Instantiate model and optimizer

# In[9]:

myWaveGlow = WaveGlow(hparams=hparams, name='myWaveGlow')

optimizer = utils.get_optimizer(hparams=hparams)

# ## Model Checkpoints : Initialise or Restore

# In[10]:

checkpoint = tf.train.Checkpoint(step=tf.Variable(0),
                                 optimizer=optimizer,
                                 net=myWaveGlow)

manager_checkpoint = tf.train.CheckpointManager(
    checkpoint,
    directory=hparams['checkpoint_dir'],
    max_to_keep=hparams['max_to_keep'])
Example #3
0
class WaveGlowSpeechSynthesizer(BaseHandler):

    def __init__(self):
        self.waveglow_model = None
        self.tacotron2_model = None
        self.mapping = None
        self.device = None
        self.initialized = False
        self.metrics = None

    # From https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/SpeechSynthesis/Tacotron2/inference.py
    def _unwrap_distributed(self, state_dict):
        """
        Unwraps model from DistributedDataParallel.
        DDP wraps model in additional "module.", it needs to be removed for single
        GPU inference.
        :param state_dict: model's state dict
        """
        new_state_dict = {}
        for key, value in state_dict.items():
            new_key = key.replace('module.', '')
            new_state_dict[new_key] = value
        return new_state_dict

    def _load_tacotron2_model(self, model_dir):
        from PyTorch.SpeechSynthesis.Tacotron2.tacotron2 import model as tacotron2
        from PyTorch.SpeechSynthesis.Tacotron2.tacotron2.text import text_to_sequence
        tacotron2_checkpoint = torch.load(os.path.join(model_dir, 'nvidia_tacotron2pyt_fp32_20190427.pth'))
        tacotron2_state_dict = self._unwrap_distributed(tacotron2_checkpoint['state_dict'])
        tacotron2_config = tacotron2_checkpoint['config']
        self.tacotron2_model = tacotron2.Tacotron2(**tacotron2_config)
        self.tacotron2_model.load_state_dict(tacotron2_state_dict)
        self.tacotron2_model.text_to_sequence = text_to_sequence
        self.tacotron2_model.to(self.device)

    def initialize(self, ctx):
        """First try to load torchscript else load eager mode state_dict based model"""

        properties = ctx.system_properties
        model_dir = properties.get("model_dir")
        if not torch.cuda.is_available() or properties.get("gpu_id") is None :
            raise RuntimeError("This model is not supported on CPU machines.")
        self.device = torch.device("cuda:" + str(properties.get("gpu_id")))

        with zipfile.ZipFile(model_dir + '/tacotron.zip', 'r') as zip_ref:
            zip_ref.extractall(model_dir)

        waveglow_checkpoint = torch.load(os.path.join(model_dir, "nvidia_waveglowpyt_fp32_20190427.pth"))
        waveglow_state_dict = self._unwrap_distributed(waveglow_checkpoint['state_dict'])
        waveglow_config = waveglow_checkpoint['config']
        self.waveglow_model = WaveGlow(**waveglow_config)
        self.waveglow_model.load_state_dict(waveglow_state_dict)
        self.waveglow_model = self.waveglow_model.remove_weightnorm(self.waveglow_model)
        self.waveglow_model.to(self.device)
        self.waveglow_model.eval()

        self._load_tacotron2_model(model_dir)

        logger.debug('WaveGlow model file loaded successfully')
        self.initialized = True

    def preprocess(self, data):
        """
         converts text to sequence of IDs using tacatron2 text_to_sequence
         with english cleaners to transform text and standardize input
         (ex: lowercasing, expanding abbreviations and numbers, etc.)
         returns an Numpy array
        """
        text = data[0].get("data")
        if text is None:
            text = data[0].get("body")
        text = text.decode('utf-8')

        sequence = np.array(self.tacotron2_model.text_to_sequence(text, ['english_cleaners']))[None, :]
        sequence = torch.from_numpy(sequence).to(device=self.device, dtype=torch.int64)

        return sequence

    def inference(self, data):
        with torch.no_grad():
            _, mel, _, _ = self.tacotron2_model.infer(data)
            audio = self.waveglow_model.infer(mel)

            return audio

    def postprocess(self, inference_output):
        audio_numpy = inference_output[0].data.cpu().numpy()
        path = "/tmp/{}.wav".format(uuid.uuid4().hex)
        write(path, 22050, audio_numpy)
        with open(path, 'rb') as output:
            data = output.read()
        os.remove(path)
        return [data]
def training_procedure(dataset=None, num_gpus=0, output_directory='./train', epochs=1000, learning_rate=1e-4, batch_size=12, checkpointing=True, checkpoint_path="./checkpoints", seed=2019, params = [96, 6, 24, 3, 8, 2, [1,2], 96, 3], use_gpu=True, gen_tests=False, mname='model', validation_patience=10):
	params.append(use_gpu)
	torch.manual_seed(seed)
	if use_gpu:
		torch.cuda.manual_seed(seed)

	if checkpointing and not os.path.isdir(checkpoint_path[2:]): os.mkdir(checkpoint_path[2:])
	criterion = WaveGlowLoss()
	model = WaveGlow(*params)
	if use_gpu:
		model.cuda()

	valid_context, valid_forecast = dataset.valid_data()
	valid_forecast = set_gpu_tensor(valid_forecast, use_gpu)
	valid_context = set_gpu_tensor(valid_context, use_gpu)

	optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
	model.train()
	loss_iteration = []
	end_training = False
	best_validation = np.inf; validation_streak = 0
	for epoch in range(epochs):
		if end_training: break
		iteration = 0
		print("Epoch: %d/%d" % (epoch+1, epochs))
		avg_loss = []
		while(dataset.epoch_end):
			context, forecast = dataset.sample(batch_size)
			forecast = set_gpu_train_tensor(forecast, use_gpu)
			context = set_gpu_train_tensor(context, use_gpu)
			z, log_s_list, log_det_w_list, early_out_shapes = model(forecast, context)

			loss = criterion((z, log_s_list, log_det_w_list))
			reduced_loss = loss.item()
			loss_iteration.append(reduced_loss)
			optimizer.zero_grad()
			loss.backward()
			avg_loss.append(reduced_loss)
			optimizer.step()
			print("Epoch [%d/%d] on iteration %d with loss %.4f" % (epoch+1, epochs, iteration, reduced_loss))
			iteration += 1

		epoch_loss = sum(avg_loss)/len(avg_loss)
		validation_loss = get_validation_loss(model, criterion, valid_context, valid_forecast)
		print("Epoch [%d/%d] had training loss: %.4f and validation_loss: %.4f" % (epoch+1, epochs, epoch_loss, validation_loss))
		
		if best_validation > validation_loss:
			print("Validation loss improved to %.5f" % validation_loss)
			best_validation = validation_loss
			if gen_tests: generate_tests(dataset, model, 5, 96, use_gpu, str(epoch+1), mname=mname)
			if checkpointing:
				checkpoint_path = "%s/%s/epoch-%d_loss-%.4f" % (output_directory, mname, epoch, validation_loss)
				save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path, use_gpu)

			validation_streak = 0
		else:
			validation_streak += 1
		dataset.epoch_end = True

		if validation_streak == validation_patience: end_training = True

	if checkpointing:
		model iteration = load_checkpoint(checkpoint_path, model)
		
	test_context, test_forecast = dataset.test_data()
	test_loss, test_mse = get_test_loss_and_mse(model, criterion, test_context, test_forecast, use_gpu)

	if not checkpointing:
		checkpoint_path = "%s/%s/finalmodel_epoch-%d_testloss-%.4f_testmse_%.4f" % (output_directory, mname, epoch, test_loss, test_mse)
		save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path, use_gpu)
	
	print("Test loss for this model is %.5f, mse loss: %.5f" % (test_loss, test_mse))

	plt.figure()
	plt.plot(range(len(loss_iteration)), np.log10(np.array(loss_iteration)+1.0))
	plt.xlabel('iteration')
	plt.ylabel('log10 of loss')
	plt.savefig('%s/%s/total_loss_graph.png' % (output_directory, mname))
	plt.close()
	return test_loss, model
def training(dataset=None,
             num_gpus=0,
             output_directory='./train',
             epochs=1000,
             learning_rate=1e-4,
             batch_size=12,
             checkpointing=True,
             checkpoint_path="./checkpoints",
             seed=2019,
             params=[96, 6, 24, 3, 8, 2, [1, 2], 96, 3],
             use_gpu=True,
             gen_tests=True):
    print("#############")
    print(use_gpu)
    params.append(use_gpu)
    torch.manual_seed(seed)
    if use_gpu:
        torch.cuda.manual_seed(seed)

    if not os.path.isdir(output_directory[2:]): os.mkdir(output_directory[2:])
    if checkpointing and not os.path.isdir(checkpoint_path[2:]):
        os.mkdir(checkpoint_path[2:])
    criterion = WaveGlowLoss()
    model = WaveGlow(*params)
    if use_gpu:
        model.cuda()

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # iteration = 0
    # if checkpoint_path != "":
    # model, optimizer, iteration = load_checkpoint(checkpoint_path, model, optimizer)

    # iteration += 1

    model.train()
    loss_iteration = []
    for epoch in range(epochs):
        iteration = 0
        print("Epoch: %d/%d" % (epoch + 1, epochs))
        avg_loss = []
        while (dataset.epoch_end):
            # model.zero_grad()
            context, forecast = dataset.sample(batch_size)

            if use_gpu:
                forecast = torch.autograd.Variable(
                    torch.cuda.FloatTensor(forecast))
                context = torch.autograd.Variable(
                    torch.cuda.FloatTensor(context))
            else:
                forecast = torch.autograd.Variable(torch.FloatTensor(forecast))
                context = torch.autograd.Variable(torch.FloatTensor(context))

            z, log_s_list, log_det_w_list, early_out_shapes = model(
                forecast, context)

            loss = criterion((z, log_s_list, log_det_w_list))
            reduced_loss = loss.item()
            loss_iteration.append(reduced_loss)
            optimizer.zero_grad()
            loss.backward()
            avg_loss.append(reduced_loss)
            optimizer.step()
            # print("On iteration %d with loss %.4f" % (iteration, reduced_loss))
            iteration += 1
            # if (checkpointing and (iteration % iters_per_checkpoint == 0)):

        if gen_tests:
            generate_tests(dataset, model, 5, 96, use_gpu, str(epoch + 1))
        epoch_loss = sum(avg_loss) / len(avg_loss)
        if checkpointing:
            checkpoint_path = "%s/waveglow_epoch-%d_%.4f" % (output_directory,
                                                             epoch, epoch_loss)
            save_checkpoint(model, optimizer, learning_rate, iteration,
                            checkpoint_path, use_gpu)

        print("\tLoss: %.3f" % loss)
        dataset.epoch_end = True
    plt.figure()
    plt.semilogy(range(len(loss_iteration)), np.array(loss_iteration))
    # plt.plot(range(len(loss_iteration)), np.log10(np.array(loss_iteration)+1.0))
    plt.xlabel('iteration')
    plt.ylabel('loss')
    # plt.savefig('total_loss_graph.png')
    # plt.close()
    return model
Example #6
0
random.shuffle(all_sound_paths)

# ## Load preprocessed long audio split mel spectrograms

# In[8]:

long_audio_record_file = os.path.join(hparams['tfrecords_dir'],
                                      hparams['long_audio_file'])
long_audio_dataset = utils.load_long_audio_tfrecords(
    long_audio_record_file).batch(hparams['train_batch_size'])

# ## Instantiate model

# In[9]:

myWaveGlow = WaveGlow(hparams=hparams, name='myWaveGlow')
optimizer = utils.get_optimizer(hparams=hparams)

# ## Model Checkpoints : Initialise or Restore

# In[10]:

checkpoint = tf.train.Checkpoint(step=tf.Variable(0),
                                 optimizer=optimizer,
                                 net=myWaveGlow)

manager_checkpoint = tf.train.CheckpointManager(
    checkpoint,
    directory=hparams['checkpoint_dir'],
    max_to_keep=hparams['max_to_keep'])