def parse_batch(self, batch): ppg_padded, input_lengths, acoustic_padded, gate_padded, output_lengths = batch ppg_padded = to_gpu(ppg_padded).float() input_lengths = to_gpu(input_lengths).long() max_len = torch.max(input_lengths.data).item() acoustic_padded = to_gpu(acoustic_padded).float() gate_padded = to_gpu(gate_padded).float() output_lengths = to_gpu(output_lengths).long() return ((ppg_padded, input_lengths, acoustic_padded, max_len, output_lengths), (acoustic_padded, gate_padded))
def parse_batch(self, batch): text_padded, input_lengths, mel_padded, gate_padded, \ output_lengths = batch text_padded = to_gpu(text_padded).long() input_lengths = to_gpu(input_lengths).long() max_len = torch.max(input_lengths.data).item() mel_padded = to_gpu(mel_padded).float() gate_padded = to_gpu(gate_padded).float() output_lengths = to_gpu(output_lengths).long() return ((text_padded, input_lengths, mel_padded, max_len, output_lengths), (mel_padded, gate_padded))
def batch_to_gpu(batch): text_padded, input_lengths, mel_padded, gate_padded, \ output_lengths, len_x = batch text_padded = to_gpu(text_padded).long() input_lengths = to_gpu(input_lengths).long() max_len = torch.max(input_lengths.data).item() mel_padded = to_gpu(mel_padded).float() gate_padded = to_gpu(gate_padded).float() output_lengths = to_gpu(output_lengths).long() x = (text_padded, input_lengths, mel_padded, max_len, output_lengths) y = (mel_padded, gate_padded) len_x = torch.sum(output_lengths) return (x, y, len_x)
def batch_to_gpu(batch: tuple): """Moves `batch` to GPU by elements. """ text_padded, input_lengths, mel_padded, gate_padded, \ output_lengths, len_x = batch text_padded = to_gpu(text_padded).long() input_lengths = to_gpu(input_lengths).long() max_len = torch.max(input_lengths.data).item() mel_padded = to_gpu(mel_padded).float() gate_padded = to_gpu(gate_padded).float() output_lengths = to_gpu(output_lengths).long() x = (text_padded, input_lengths, mel_padded, max_len, output_lengths) y = (mel_padded, gate_padded) len_x = torch.sum(output_lengths) return (x, y, len_x)
def main(): """CBOWモデルの学習 """ # ハイパーパラメータの設定 window_size = 5 hidden_size = 100 batch_size = 100 max_epoch = 1 # データの読み込み corpus, word_to_id, id_to_word = ptb.load_data('train') vocab_size = len(word_to_id) contexts, target = create_contexts_target(corpus, window_size) if config.GPU: contexts, target = to_gpu(contexts), to_gpu(target) # モデルなどの生成 model = CBOW(vocab_size, hidden_size, window_size, corpus) optimizer = Adam() trainer = Trainer(model, optimizer) # 学習開始 trainer.fit(contexts, target, max_epoch, batch_size) trainer.plot() # 後ほど利用できるように、必要なデータを保存 word_vecs = model.word_vecs if config.GPU: word_vecs = to_cpu(word_vecs) params = {} params['word_vecs'] = word_vecs.astype(np.float16) params['word_to_id'] = word_to_id params['id_to_word'] = id_to_word pkl_file = 'cbow_params.pkl' with open(pkl_file, 'wb') as f: pickle.dump(params, f, -1)
def run(self, iterator, eval_interval=20, max_grad=None): self.eval_interval = eval_interval model, optimizer = self.model, self.optimizer total_loss = 0 loss_count = 0 bleu_score = 0.0 start_tile = time.time() for batch_src, batch_tgt in iterator: batch_src = to_gpu(batch_src) batch_tgt = to_gpu(batch_tgt) loss = model.forward(batch_src, batch_tgt) model.backward() params, grads = remove_duplicate(model.params, model.grads) if max_grad is not None: clip_grads(grads, max_grad) optimizer.update(params, grads) total_loss += loss loss_count += 1 if (iterator.iteration % eval_interval) == 0: avg_loss = total_loss / loss_count elapsed_time = time.time() - start_tile print( '| epoch %d \t| iter %d / %d \t| time %d[s] \t| loss %.2f' % (iterator.epoch + 1, iterator.iteration, iterator.max_iter, elapsed_time, avg_loss)) total_loss, loss_count = 0, 0 if iterator.is_new_epoch and self.do_report_bleu: bleu_score = self.culc_bleu(model) print('bleu: %.4f' % bleu_score) if iterator.is_new_epoch: self.save_score(iterator.epoch + 1, float(avg_loss), bleu_score) self.save_model(model, iterator.epoch + 1) print('Saved model.')
def load_params(self, file_name): if '/' in file_name: file_name = file_name.replace('/', os.sep) if not os.path.exists(file_name): raise IOError('No file: ', file_name) with open(file_name, 'rb') as f: params = pickle.load(f) params = [p.astype('f') for p in params] if GPU: params = [to_gpu(p) for p in params] for i, param in enumerate(self.params): param[...] = params[i]
def batch_to_gpu(batch): text_padded, input_lengths, mel_padded, \ output_lengths, len_x, dur_padded, dur_lens, pitch_padded = batch text_padded = to_gpu(text_padded).long() input_lengths = to_gpu(input_lengths).long() mel_padded = to_gpu(mel_padded).float() output_lengths = to_gpu(output_lengths).long() dur_padded = to_gpu(dur_padded).long() dur_lens = to_gpu(dur_lens).long() pitch_padded = to_gpu(pitch_padded).float() # Alignments act as both inputs and targets - pass shallow copies x = [text_padded, input_lengths, mel_padded, output_lengths, dur_padded, dur_lens, pitch_padded] y = [mel_padded, dur_padded, dur_lens, pitch_padded] len_x = torch.sum(output_lengths) return (x, y, len_x)
def batch_to_gpu(batch): (text_padded, input_lengths, mel_padded, output_lengths, len_x, pitch_padded, energy_padded, speaker, attn_prior, audiopaths) = batch text_padded = to_gpu(text_padded).long() input_lengths = to_gpu(input_lengths).long() mel_padded = to_gpu(mel_padded).float() output_lengths = to_gpu(output_lengths).long() pitch_padded = to_gpu(pitch_padded).float() energy_padded = to_gpu(energy_padded).float() attn_prior = to_gpu(attn_prior).float() if speaker is not None: speaker = to_gpu(speaker).long() # Alignments act as both inputs and targets - pass shallow copies x = [ text_padded, input_lengths, mel_padded, output_lengths, pitch_padded, energy_padded, speaker, attn_prior, audiopaths ] y = [mel_padded, input_lengths, output_lengths] len_x = torch.sum(output_lengths) return (x, y, len_x)
def get_inference(seq, model, is_clip=False): """Tacotron inference. Args: seq: T*D numpy array. model: Tacotron model. is_clip: Set to True to avoid the artifacts at the end. Returns: synthesized mels. """ # (T, D) numpy -> (1, D, T) cpu tensor seq = torch.from_numpy(seq).float().transpose(0, 1).unsqueeze(0) # cpu tensor -> gpu tensor seq = to_gpu(seq) mel_outputs, mel_outputs_postnet, _, alignments = model.inference(seq) if is_clip: return mel_outputs_postnet[:, :, 10:(seq.size(2) - 10)] else: return mel_outputs_postnet
def batch_to_gpu(batch): text_padded, input_lengths, mel_padded, gate_padded, \ output_lengths, len_x, speaker_ids, style_img = batch text_padded = to_gpu(text_padded).long() input_lengths = to_gpu(input_lengths).long() max_len = torch.max(input_lengths.data).item() mel_padded = to_gpu(mel_padded).float() gate_padded = to_gpu(gate_padded).float() output_lengths = to_gpu(output_lengths).long() #print('to gpu wjs data_function.py batch_to_gpu ===> ', speaker_ids) speaker_ids = to_gpu(speaker_ids).long() style_img = to_gpu(style_img).float() #print('data_function.py batch_to_gpu ===> ', style_img) x = (text_padded, input_lengths, mel_padded, max_len, output_lengths, speaker_ids, style_img) y = (mel_padded, gate_padded) len_x = torch.sum(output_lengths) return (x, y, len_x)
def batch_to_gpu(batch): x, y, len_y = batch x = to_gpu(x).float() y = to_gpu(y).float() len_y = to_gpu(torch.sum(len_y)) return ((x, y), y, len_y)