def initialize(self, ctx): """First try to load torchscript else load eager mode state_dict based model""" properties = ctx.system_properties model_dir = properties.get("model_dir") if not torch.cuda.is_available() or properties.get("gpu_id") is None : raise RuntimeError("This model is not supported on CPU machines.") self.device = torch.device("cuda:" + str(properties.get("gpu_id"))) with zipfile.ZipFile(model_dir + '/tacotron.zip', 'r') as zip_ref: zip_ref.extractall(model_dir) waveglow_checkpoint = torch.load(os.path.join(model_dir, "nvidia_waveglowpyt_fp32_20190427.pth")) waveglow_state_dict = self._unwrap_distributed(waveglow_checkpoint['state_dict']) waveglow_config = waveglow_checkpoint['config'] self.waveglow_model = WaveGlow(**waveglow_config) self.waveglow_model.load_state_dict(waveglow_state_dict) self.waveglow_model = self.waveglow_model.remove_weightnorm(self.waveglow_model) self.waveglow_model.to(self.device) self.waveglow_model.eval() self._load_tacotron2_model(model_dir) logger.debug('WaveGlow model file loaded successfully') self.initialized = True
validation_dataset = utils.load_single_file_tfrecords( record_file=os.path.join(hparams['tfrecords_dir'], hparams['eval_file'])) validation_dataset = validation_dataset.batch(hparams['train_batch_size']) # In[8]: training_dataset = utils.load_training_files_tfrecords( record_pattern=os.path.join(hparams['tfrecords_dir'], hparams['train_files'] + '*')) # ## Instantiate model and optimizer # In[9]: myWaveGlow = WaveGlow(hparams=hparams, name='myWaveGlow') optimizer = utils.get_optimizer(hparams=hparams) # ## Model Checkpoints : Initialise or Restore # In[10]: checkpoint = tf.train.Checkpoint(step=tf.Variable(0), optimizer=optimizer, net=myWaveGlow) manager_checkpoint = tf.train.CheckpointManager( checkpoint, directory=hparams['checkpoint_dir'], max_to_keep=hparams['max_to_keep'])
class WaveGlowSpeechSynthesizer(BaseHandler): def __init__(self): self.waveglow_model = None self.tacotron2_model = None self.mapping = None self.device = None self.initialized = False self.metrics = None # From https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/SpeechSynthesis/Tacotron2/inference.py def _unwrap_distributed(self, state_dict): """ Unwraps model from DistributedDataParallel. DDP wraps model in additional "module.", it needs to be removed for single GPU inference. :param state_dict: model's state dict """ new_state_dict = {} for key, value in state_dict.items(): new_key = key.replace('module.', '') new_state_dict[new_key] = value return new_state_dict def _load_tacotron2_model(self, model_dir): from PyTorch.SpeechSynthesis.Tacotron2.tacotron2 import model as tacotron2 from PyTorch.SpeechSynthesis.Tacotron2.tacotron2.text import text_to_sequence tacotron2_checkpoint = torch.load(os.path.join(model_dir, 'nvidia_tacotron2pyt_fp32_20190427.pth')) tacotron2_state_dict = self._unwrap_distributed(tacotron2_checkpoint['state_dict']) tacotron2_config = tacotron2_checkpoint['config'] self.tacotron2_model = tacotron2.Tacotron2(**tacotron2_config) self.tacotron2_model.load_state_dict(tacotron2_state_dict) self.tacotron2_model.text_to_sequence = text_to_sequence self.tacotron2_model.to(self.device) def initialize(self, ctx): """First try to load torchscript else load eager mode state_dict based model""" properties = ctx.system_properties model_dir = properties.get("model_dir") if not torch.cuda.is_available() or properties.get("gpu_id") is None : raise RuntimeError("This model is not supported on CPU machines.") self.device = torch.device("cuda:" + str(properties.get("gpu_id"))) with zipfile.ZipFile(model_dir + '/tacotron.zip', 'r') as zip_ref: zip_ref.extractall(model_dir) waveglow_checkpoint = torch.load(os.path.join(model_dir, "nvidia_waveglowpyt_fp32_20190427.pth")) waveglow_state_dict = self._unwrap_distributed(waveglow_checkpoint['state_dict']) waveglow_config = waveglow_checkpoint['config'] self.waveglow_model = WaveGlow(**waveglow_config) self.waveglow_model.load_state_dict(waveglow_state_dict) self.waveglow_model = self.waveglow_model.remove_weightnorm(self.waveglow_model) self.waveglow_model.to(self.device) self.waveglow_model.eval() self._load_tacotron2_model(model_dir) logger.debug('WaveGlow model file loaded successfully') self.initialized = True def preprocess(self, data): """ converts text to sequence of IDs using tacatron2 text_to_sequence with english cleaners to transform text and standardize input (ex: lowercasing, expanding abbreviations and numbers, etc.) returns an Numpy array """ text = data[0].get("data") if text is None: text = data[0].get("body") text = text.decode('utf-8') sequence = np.array(self.tacotron2_model.text_to_sequence(text, ['english_cleaners']))[None, :] sequence = torch.from_numpy(sequence).to(device=self.device, dtype=torch.int64) return sequence def inference(self, data): with torch.no_grad(): _, mel, _, _ = self.tacotron2_model.infer(data) audio = self.waveglow_model.infer(mel) return audio def postprocess(self, inference_output): audio_numpy = inference_output[0].data.cpu().numpy() path = "/tmp/{}.wav".format(uuid.uuid4().hex) write(path, 22050, audio_numpy) with open(path, 'rb') as output: data = output.read() os.remove(path) return [data]
def training_procedure(dataset=None, num_gpus=0, output_directory='./train', epochs=1000, learning_rate=1e-4, batch_size=12, checkpointing=True, checkpoint_path="./checkpoints", seed=2019, params = [96, 6, 24, 3, 8, 2, [1,2], 96, 3], use_gpu=True, gen_tests=False, mname='model', validation_patience=10): params.append(use_gpu) torch.manual_seed(seed) if use_gpu: torch.cuda.manual_seed(seed) if checkpointing and not os.path.isdir(checkpoint_path[2:]): os.mkdir(checkpoint_path[2:]) criterion = WaveGlowLoss() model = WaveGlow(*params) if use_gpu: model.cuda() valid_context, valid_forecast = dataset.valid_data() valid_forecast = set_gpu_tensor(valid_forecast, use_gpu) valid_context = set_gpu_tensor(valid_context, use_gpu) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) model.train() loss_iteration = [] end_training = False best_validation = np.inf; validation_streak = 0 for epoch in range(epochs): if end_training: break iteration = 0 print("Epoch: %d/%d" % (epoch+1, epochs)) avg_loss = [] while(dataset.epoch_end): context, forecast = dataset.sample(batch_size) forecast = set_gpu_train_tensor(forecast, use_gpu) context = set_gpu_train_tensor(context, use_gpu) z, log_s_list, log_det_w_list, early_out_shapes = model(forecast, context) loss = criterion((z, log_s_list, log_det_w_list)) reduced_loss = loss.item() loss_iteration.append(reduced_loss) optimizer.zero_grad() loss.backward() avg_loss.append(reduced_loss) optimizer.step() print("Epoch [%d/%d] on iteration %d with loss %.4f" % (epoch+1, epochs, iteration, reduced_loss)) iteration += 1 epoch_loss = sum(avg_loss)/len(avg_loss) validation_loss = get_validation_loss(model, criterion, valid_context, valid_forecast) print("Epoch [%d/%d] had training loss: %.4f and validation_loss: %.4f" % (epoch+1, epochs, epoch_loss, validation_loss)) if best_validation > validation_loss: print("Validation loss improved to %.5f" % validation_loss) best_validation = validation_loss if gen_tests: generate_tests(dataset, model, 5, 96, use_gpu, str(epoch+1), mname=mname) if checkpointing: checkpoint_path = "%s/%s/epoch-%d_loss-%.4f" % (output_directory, mname, epoch, validation_loss) save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path, use_gpu) validation_streak = 0 else: validation_streak += 1 dataset.epoch_end = True if validation_streak == validation_patience: end_training = True if checkpointing: model iteration = load_checkpoint(checkpoint_path, model) test_context, test_forecast = dataset.test_data() test_loss, test_mse = get_test_loss_and_mse(model, criterion, test_context, test_forecast, use_gpu) if not checkpointing: checkpoint_path = "%s/%s/finalmodel_epoch-%d_testloss-%.4f_testmse_%.4f" % (output_directory, mname, epoch, test_loss, test_mse) save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path, use_gpu) print("Test loss for this model is %.5f, mse loss: %.5f" % (test_loss, test_mse)) plt.figure() plt.plot(range(len(loss_iteration)), np.log10(np.array(loss_iteration)+1.0)) plt.xlabel('iteration') plt.ylabel('log10 of loss') plt.savefig('%s/%s/total_loss_graph.png' % (output_directory, mname)) plt.close() return test_loss, model
def training(dataset=None, num_gpus=0, output_directory='./train', epochs=1000, learning_rate=1e-4, batch_size=12, checkpointing=True, checkpoint_path="./checkpoints", seed=2019, params=[96, 6, 24, 3, 8, 2, [1, 2], 96, 3], use_gpu=True, gen_tests=True): print("#############") print(use_gpu) params.append(use_gpu) torch.manual_seed(seed) if use_gpu: torch.cuda.manual_seed(seed) if not os.path.isdir(output_directory[2:]): os.mkdir(output_directory[2:]) if checkpointing and not os.path.isdir(checkpoint_path[2:]): os.mkdir(checkpoint_path[2:]) criterion = WaveGlowLoss() model = WaveGlow(*params) if use_gpu: model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # iteration = 0 # if checkpoint_path != "": # model, optimizer, iteration = load_checkpoint(checkpoint_path, model, optimizer) # iteration += 1 model.train() loss_iteration = [] for epoch in range(epochs): iteration = 0 print("Epoch: %d/%d" % (epoch + 1, epochs)) avg_loss = [] while (dataset.epoch_end): # model.zero_grad() context, forecast = dataset.sample(batch_size) if use_gpu: forecast = torch.autograd.Variable( torch.cuda.FloatTensor(forecast)) context = torch.autograd.Variable( torch.cuda.FloatTensor(context)) else: forecast = torch.autograd.Variable(torch.FloatTensor(forecast)) context = torch.autograd.Variable(torch.FloatTensor(context)) z, log_s_list, log_det_w_list, early_out_shapes = model( forecast, context) loss = criterion((z, log_s_list, log_det_w_list)) reduced_loss = loss.item() loss_iteration.append(reduced_loss) optimizer.zero_grad() loss.backward() avg_loss.append(reduced_loss) optimizer.step() # print("On iteration %d with loss %.4f" % (iteration, reduced_loss)) iteration += 1 # if (checkpointing and (iteration % iters_per_checkpoint == 0)): if gen_tests: generate_tests(dataset, model, 5, 96, use_gpu, str(epoch + 1)) epoch_loss = sum(avg_loss) / len(avg_loss) if checkpointing: checkpoint_path = "%s/waveglow_epoch-%d_%.4f" % (output_directory, epoch, epoch_loss) save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path, use_gpu) print("\tLoss: %.3f" % loss) dataset.epoch_end = True plt.figure() plt.semilogy(range(len(loss_iteration)), np.array(loss_iteration)) # plt.plot(range(len(loss_iteration)), np.log10(np.array(loss_iteration)+1.0)) plt.xlabel('iteration') plt.ylabel('loss') # plt.savefig('total_loss_graph.png') # plt.close() return model
random.shuffle(all_sound_paths) # ## Load preprocessed long audio split mel spectrograms # In[8]: long_audio_record_file = os.path.join(hparams['tfrecords_dir'], hparams['long_audio_file']) long_audio_dataset = utils.load_long_audio_tfrecords( long_audio_record_file).batch(hparams['train_batch_size']) # ## Instantiate model # In[9]: myWaveGlow = WaveGlow(hparams=hparams, name='myWaveGlow') optimizer = utils.get_optimizer(hparams=hparams) # ## Model Checkpoints : Initialise or Restore # In[10]: checkpoint = tf.train.Checkpoint(step=tf.Variable(0), optimizer=optimizer, net=myWaveGlow) manager_checkpoint = tf.train.CheckpointManager( checkpoint, directory=hparams['checkpoint_dir'], max_to_keep=hparams['max_to_keep'])