Ejemplo n.º 1
0
    def parse_batch(self, batch):
        ppg_padded, input_lengths, acoustic_padded, gate_padded, output_lengths = batch
        ppg_padded = to_gpu(ppg_padded).float()
        input_lengths = to_gpu(input_lengths).long()
        max_len = torch.max(input_lengths.data).item()
        acoustic_padded = to_gpu(acoustic_padded).float()
        gate_padded = to_gpu(gate_padded).float()
        output_lengths = to_gpu(output_lengths).long()

        return ((ppg_padded, input_lengths, acoustic_padded, max_len,
                 output_lengths), (acoustic_padded, gate_padded))
Ejemplo n.º 2
0
    def parse_batch(self, batch):
        text_padded, input_lengths, mel_padded, gate_padded, \
            output_lengths = batch
        text_padded = to_gpu(text_padded).long()
        input_lengths = to_gpu(input_lengths).long()
        max_len = torch.max(input_lengths.data).item()
        mel_padded = to_gpu(mel_padded).float()
        gate_padded = to_gpu(gate_padded).float()
        output_lengths = to_gpu(output_lengths).long()

        return ((text_padded, input_lengths, mel_padded, max_len,
                 output_lengths), (mel_padded, gate_padded))
Ejemplo n.º 3
0
def batch_to_gpu(batch):
    text_padded, input_lengths, mel_padded, gate_padded, \
        output_lengths, len_x = batch
    text_padded = to_gpu(text_padded).long()
    input_lengths = to_gpu(input_lengths).long()
    max_len = torch.max(input_lengths.data).item()
    mel_padded = to_gpu(mel_padded).float()
    gate_padded = to_gpu(gate_padded).float()
    output_lengths = to_gpu(output_lengths).long()
    x = (text_padded, input_lengths, mel_padded, max_len, output_lengths)
    y = (mel_padded, gate_padded)
    len_x = torch.sum(output_lengths)
    return (x, y, len_x)
Ejemplo n.º 4
0
def batch_to_gpu(batch: tuple):
    """Moves `batch` to GPU by elements.
    """
    text_padded, input_lengths, mel_padded, gate_padded, \
        output_lengths, len_x = batch
    text_padded = to_gpu(text_padded).long()
    input_lengths = to_gpu(input_lengths).long()
    max_len = torch.max(input_lengths.data).item()
    mel_padded = to_gpu(mel_padded).float()
    gate_padded = to_gpu(gate_padded).float()
    output_lengths = to_gpu(output_lengths).long()
    x = (text_padded, input_lengths, mel_padded, max_len, output_lengths)
    y = (mel_padded, gate_padded)
    len_x = torch.sum(output_lengths)
    return (x, y, len_x)
Ejemplo n.º 5
0
def main():
    """CBOWモデルの学習
    """

    # ハイパーパラメータの設定
    window_size = 5
    hidden_size = 100
    batch_size = 100
    max_epoch = 1

    # データの読み込み
    corpus, word_to_id, id_to_word = ptb.load_data('train')
    vocab_size = len(word_to_id)

    contexts, target = create_contexts_target(corpus, window_size)
    if config.GPU:
        contexts, target = to_gpu(contexts), to_gpu(target)

    # モデルなどの生成
    model = CBOW(vocab_size, hidden_size, window_size, corpus)
    optimizer = Adam()
    trainer = Trainer(model, optimizer)

    # 学習開始
    trainer.fit(contexts, target, max_epoch, batch_size)
    trainer.plot()

    # 後ほど利用できるように、必要なデータを保存
    word_vecs = model.word_vecs

    if config.GPU:
        word_vecs = to_cpu(word_vecs)
    params = {}
    params['word_vecs'] = word_vecs.astype(np.float16)
    params['word_to_id'] = word_to_id
    params['id_to_word'] = id_to_word
    pkl_file = 'cbow_params.pkl'
    with open(pkl_file, 'wb') as f:
        pickle.dump(params, f, -1)
Ejemplo n.º 6
0
    def run(self, iterator, eval_interval=20, max_grad=None):
        self.eval_interval = eval_interval
        model, optimizer = self.model, self.optimizer
        total_loss = 0
        loss_count = 0
        bleu_score = 0.0

        start_tile = time.time()
        for batch_src, batch_tgt in iterator:
            batch_src = to_gpu(batch_src)
            batch_tgt = to_gpu(batch_tgt)

            loss = model.forward(batch_src, batch_tgt)
            model.backward()
            params, grads = remove_duplicate(model.params, model.grads)
            if max_grad is not None:
                clip_grads(grads, max_grad)
            optimizer.update(params, grads)
            total_loss += loss
            loss_count += 1

            if (iterator.iteration % eval_interval) == 0:
                avg_loss = total_loss / loss_count
                elapsed_time = time.time() - start_tile
                print(
                    '| epoch %d \t| iter %d / %d \t| time %d[s] \t| loss %.2f'
                    % (iterator.epoch + 1, iterator.iteration,
                       iterator.max_iter, elapsed_time, avg_loss))
                total_loss, loss_count = 0, 0

            if iterator.is_new_epoch and self.do_report_bleu:
                bleu_score = self.culc_bleu(model)
                print('bleu: %.4f' % bleu_score)

            if iterator.is_new_epoch:
                self.save_score(iterator.epoch + 1, float(avg_loss),
                                bleu_score)
                self.save_model(model, iterator.epoch + 1)
                print('Saved model.')
Ejemplo n.º 7
0
    def load_params(self, file_name):
        if '/' in file_name:
            file_name = file_name.replace('/', os.sep)
        if not os.path.exists(file_name):
            raise IOError('No file: ', file_name)
        with open(file_name, 'rb') as f:
            params = pickle.load(f)

        params = [p.astype('f') for p in params]

        if GPU:
            params = [to_gpu(p) for p in params]

        for i, param in enumerate(self.params):
            param[...] = params[i]
Ejemplo n.º 8
0
def batch_to_gpu(batch):
    text_padded, input_lengths, mel_padded, \
        output_lengths, len_x, dur_padded, dur_lens, pitch_padded = batch
    text_padded = to_gpu(text_padded).long()
    input_lengths = to_gpu(input_lengths).long()
    mel_padded = to_gpu(mel_padded).float()
    output_lengths = to_gpu(output_lengths).long()
    dur_padded = to_gpu(dur_padded).long()
    dur_lens = to_gpu(dur_lens).long()
    pitch_padded = to_gpu(pitch_padded).float()
    # Alignments act as both inputs and targets - pass shallow copies
    x = [text_padded, input_lengths, mel_padded, output_lengths,
         dur_padded, dur_lens, pitch_padded]
    y = [mel_padded, dur_padded, dur_lens, pitch_padded]
    len_x = torch.sum(output_lengths)
    return (x, y, len_x)
Ejemplo n.º 9
0
def batch_to_gpu(batch):
    (text_padded, input_lengths, mel_padded, output_lengths, len_x,
     pitch_padded, energy_padded, speaker, attn_prior, audiopaths) = batch

    text_padded = to_gpu(text_padded).long()
    input_lengths = to_gpu(input_lengths).long()
    mel_padded = to_gpu(mel_padded).float()
    output_lengths = to_gpu(output_lengths).long()
    pitch_padded = to_gpu(pitch_padded).float()
    energy_padded = to_gpu(energy_padded).float()
    attn_prior = to_gpu(attn_prior).float()
    if speaker is not None:
        speaker = to_gpu(speaker).long()

    # Alignments act as both inputs and targets - pass shallow copies
    x = [
        text_padded, input_lengths, mel_padded, output_lengths, pitch_padded,
        energy_padded, speaker, attn_prior, audiopaths
    ]
    y = [mel_padded, input_lengths, output_lengths]
    len_x = torch.sum(output_lengths)
    return (x, y, len_x)
Ejemplo n.º 10
0
def get_inference(seq, model, is_clip=False):
    """Tacotron inference.

    Args:
        seq: T*D numpy array.
        model: Tacotron model.
        is_clip: Set to True to avoid the artifacts at the end.

    Returns:
        synthesized mels.
    """
    # (T, D) numpy -> (1, D, T) cpu tensor
    seq = torch.from_numpy(seq).float().transpose(0, 1).unsqueeze(0)
    # cpu tensor -> gpu tensor
    seq = to_gpu(seq)
    mel_outputs, mel_outputs_postnet, _, alignments = model.inference(seq)
    if is_clip:
        return mel_outputs_postnet[:, :, 10:(seq.size(2) - 10)]
    else:
        return mel_outputs_postnet
Ejemplo n.º 11
0
def batch_to_gpu(batch):
    text_padded, input_lengths, mel_padded, gate_padded, \
        output_lengths, len_x, speaker_ids, style_img = batch
    text_padded = to_gpu(text_padded).long()
    input_lengths = to_gpu(input_lengths).long()
    max_len = torch.max(input_lengths.data).item()
    mel_padded = to_gpu(mel_padded).float()
    gate_padded = to_gpu(gate_padded).float()
    output_lengths = to_gpu(output_lengths).long()
    #print('to gpu wjs data_function.py batch_to_gpu ===> ', speaker_ids)
    speaker_ids = to_gpu(speaker_ids).long()
    style_img = to_gpu(style_img).float()

    #print('data_function.py batch_to_gpu ===> ', style_img)

    x = (text_padded, input_lengths, mel_padded, max_len, output_lengths, speaker_ids, style_img)
    y = (mel_padded, gate_padded)
    len_x = torch.sum(output_lengths)
    return (x, y, len_x)
Ejemplo n.º 12
0
def batch_to_gpu(batch):
    x, y, len_y = batch
    x = to_gpu(x).float()
    y = to_gpu(y).float()
    len_y = to_gpu(torch.sum(len_y))
    return ((x, y), y, len_y)