Esempio n. 1
0
    def forward(self, state, x):
        # update state with input label x
        if state is None:  # make initial states and log-prob vectors
            self.var_word_eos = to_cuda(self, self.var_word_eos)
            self.var_word_unk = to_cuda(self, self.var_word_eos)
            wlm_state, z_wlm = self.wordlm(None, self.var_word_eos)
            wlm_logprobs = F.log_softmax(z_wlm, dim=1)
            clm_state, z_clm = self.subwordlm(None, x)
            log_y = F.log_softmax(z_clm, dim=1) * self.subwordlm_weight
            new_node = self.lexroot
            clm_logprob = 0.
            xi = self.space
        else:
            clm_state, wlm_state, wlm_logprobs, node, log_y, clm_logprob = state
            xi = int(x)
            if xi == self.space:  # inter-word transition
                if node is not None and node[
                        1] >= 0:  # check if the node is word end
                    w = to_cuda(self, torch.LongTensor([node[1]]))
                else:  # this node is not a word end, which means <unk>
                    w = self.var_word_unk
                # update wordlm state and log-prob vector
                wlm_state, z_wlm = self.wordlm(wlm_state, w)
                wlm_logprobs = F.log_softmax(z_wlm, dim=1)
                new_node = self.lexroot  # move to the tree root
                clm_logprob = 0.
            elif node is not None and xi in node[0]:  # intra-word transition
                new_node = node[0][xi]
                clm_logprob += log_y[0, xi]
            elif self.open_vocab:  # if no path in the tree, enter open-vocabulary mode
                new_node = None
                clm_logprob += log_y[0, xi]
            else:  # if open_vocab flag is disabled, return 0 probabilities
                log_y = to_cuda(
                    self, torch.full((1, self.subword_dict_size),
                                     self.logzero))
                return (clm_state, wlm_state, wlm_logprobs, None, log_y,
                        0.), log_y

            clm_state, z_clm = self.subwordlm(clm_state, x)
            log_y = F.log_softmax(z_clm, dim=1) * self.subwordlm_weight

        # apply word-level probabilies for <space> and <eos> labels
        if xi != self.space:
            if new_node is not None and new_node[
                    1] >= 0:  # if new node is word end
                wlm_logprob = wlm_logprobs[:, new_node[1]] - clm_logprob
            else:
                wlm_logprob = wlm_logprobs[:, self.
                                           word_unk] + self.log_oov_penalty
            log_y[:, self.space] = wlm_logprob
            log_y[:, self.eos] = wlm_logprob
        else:
            log_y[:, self.space] = self.logzero
            log_y[:, self.eos] = self.logzero

        return (clm_state, wlm_state, wlm_logprobs, new_node, log_y,
                float(clm_logprob)), log_y
Esempio n. 2
0
 def final(self, state):
     wlm_state, cumsum_probs, node = state
     if node is not None and node[1] >= 0:  # check if the node is word end
         w = to_cuda(self, torch.LongTensor([node[1]]))
     else:  # this node is not a word end, which means <unk>
         w = self.var_word_unk
     wlm_state, z_wlm = self.wordlm(wlm_state, w)
     return float(F.log_softmax(z_wlm, dim=1)[:, self.word_eos])
Esempio n. 3
0
    def forward(self, state, x):
        if state is None:
            c = [
                to_cuda(self, self.zero_state(x.size(0)))
                for n in six.moves.range(self.n_layers)
            ]
            h = [
                to_cuda(self, self.zero_state(x.size(0)))
                for n in six.moves.range(self.n_layers)
            ]
            state = {'c': c, 'h': h}

        h = [None] * self.n_layers
        c = [None] * self.n_layers
        emb = self.embed(x)
        h[0], c[0] = self.lstm[0](self.dropout[0](emb),
                                  (state['h'][0], state['c'][0]))
        for n in six.moves.range(1, self.n_layers):
            h[n], c[n] = self.lstm[n](self.dropout[n](h[n - 1]),
                                      (state['h'][n], state['c'][n]))
        y = self.lo(self.dropout[-1](h[-1]))
        state = {'c': c, 'h': h}
        return state, y
Esempio n. 4
0
    def forward(self, xs, ilens, ys, labels, olens, spembs=None, spcs=None):
        """TACOTRON2 LOSS FORWARD CALCULATION

        :param torch.Tensor xs: batch of padded character ids (B, Tmax)
        :param list ilens: list of lengths of each input batch (B)
        :param torch.Tensor ys: batch of padded target features (B, Lmax, odim)
        :param torch.Tensor labels: batch of the sequences of stop token labels (B, Lmax)
        :param list olens: batch of the lengths of each target (B)
        :param torch.Tensor spembs: batch of speaker embedding vector (B, spk_embed_dim)
        :param torch.Tensor spcs: batch of padded target features (B, Lmax, spc_dim)
        :return: loss value
        :rtype: torch.Tensor
        """
        # calcuate outputs
        if self.use_cbhg:
            cbhg_outs, after_outs, before_outs, logits = self.model(
                xs, ilens, ys, olens, spembs)
        else:
            after_outs, before_outs, logits = self.model(
                xs, ilens, ys, olens, spembs)

        # remove mod part
        if self.reduction_factor > 1:
            olens = [olen - olen % self.reduction_factor for olen in olens]
            ys = ys[:, :max(olens)]
            labels = labels[:, :max(olens)]
            spcs = spcs[:, :max(olens)] if spcs is not None else None

        # prepare weight of positive samples in cross entorpy
        if self.bce_pos_weight != 1.0:
            weights = ys.new(*labels.size()).fill_(1)
            weights.masked_fill_(labels.eq(1), self.bce_pos_weight)
        else:
            weights = None

        # perform masking for padded values
        if self.use_masking:
            mask = to_cuda(self, make_non_pad_mask(olens).unsqueeze(-1))
            ys = ys.masked_select(mask)
            after_outs = after_outs.masked_select(mask)
            before_outs = before_outs.masked_select(mask)
            labels = labels.masked_select(mask[:, :, 0])
            logits = logits.masked_select(mask[:, :, 0])
            weights = weights.masked_select(
                mask[:, :, 0]) if weights is not None else None
            if self.use_cbhg:
                spcs = spcs.masked_select(mask)
                cbhg_outs = cbhg_outs.masked_select(mask)

        # calculate loss
        l1_loss = F.l1_loss(after_outs, ys) + F.l1_loss(before_outs, ys)
        mse_loss = F.mse_loss(after_outs, ys) + F.mse_loss(before_outs, ys)
        bce_loss = F.binary_cross_entropy_with_logits(logits, labels, weights)
        if self.use_cbhg:
            # calculate chbg loss and then itegrate them
            cbhg_l1_loss = F.l1_loss(cbhg_outs, spcs)
            cbhg_mse_loss = F.mse_loss(cbhg_outs, spcs)
            loss = l1_loss + mse_loss + bce_loss + cbhg_l1_loss + cbhg_mse_loss
            # report loss values for logging
            self.reporter.report([{
                'l1_loss': l1_loss.item()
            }, {
                'mse_loss': mse_loss.item()
            }, {
                'bce_loss': bce_loss.item()
            }, {
                'cbhg_l1_loss': cbhg_l1_loss.item()
            }, {
                'cbhg_mse_loss': cbhg_mse_loss.item()
            }, {
                'loss': loss.item()
            }])
        else:
            # integrate loss
            loss = l1_loss + mse_loss + bce_loss
            # report loss values for logging
            self.reporter.report([{
                'l1_loss': l1_loss.item()
            }, {
                'mse_loss': mse_loss.item()
            }, {
                'bce_loss': bce_loss.item()
            }, {
                'loss': loss.item()
            }])

        return loss
Esempio n. 5
0
    def forward(self, state, x):
        # update state with input label x
        if state is None:  # make initial states and cumlative probability vector
            self.var_word_eos = to_cuda(self, self.var_word_eos)
            self.var_word_unk = to_cuda(self, self.var_word_eos)
            self.zero_tensor = to_cuda(self, self.zero_tensor)
            wlm_state, z_wlm = self.wordlm(None, self.var_word_eos)
            cumsum_probs = torch.cumsum(F.softmax(z_wlm, dim=1), dim=1)
            new_node = self.lexroot
            xi = self.space
        else:
            wlm_state, cumsum_probs, node = state
            xi = int(x)
            if xi == self.space:  # inter-word transition
                if node is not None and node[
                        1] >= 0:  # check if the node is word end
                    w = to_cuda(self, torch.LongTensor([node[1]]))
                else:  # this node is not a word end, which means <unk>
                    w = self.var_word_unk
                # update wordlm state and cumlative probability vector
                wlm_state, z_wlm = self.wordlm(wlm_state, w)
                cumsum_probs = torch.cumsum(F.softmax(z_wlm, dim=1), dim=1)
                new_node = self.lexroot  # move to the tree root
            elif node is not None and xi in node[0]:  # intra-word transition
                new_node = node[0][xi]
            elif self.open_vocab:  # if no path in the tree, enter open-vocabulary mode
                new_node = None
            else:  # if open_vocab flag is disabled, return 0 probabilities
                log_y = to_cuda(
                    self, torch.full((1, self.subword_dict_size),
                                     self.logzero))
                return (wlm_state, None, None), log_y

        if new_node is not None:
            succ, wid, wids = new_node
            # compute parent node probability
            sum_prob = (cumsum_probs[:, wids[1]] -
                        cumsum_probs[:, wids[0]]) if wids is not None else 1.0
            if sum_prob < self.zero:
                log_y = to_cuda(
                    self, torch.full((1, self.subword_dict_size),
                                     self.logzero))
                return (wlm_state, cumsum_probs, new_node), log_y
            # set <unk> probability as a default value
            unk_prob = cumsum_probs[:, self.
                                    word_unk] - cumsum_probs[:,
                                                             self.word_unk - 1]
            y = to_cuda(
                self,
                torch.full((1, self.subword_dict_size),
                           float(unk_prob) * self.oov_penalty))
            # compute transition probabilities to child nodes
            for cid, nd in succ.items():
                y[:, cid] = (cumsum_probs[:, nd[2][1]] -
                             cumsum_probs[:, nd[2][0]]) / sum_prob
            # apply word-level probabilies for <space> and <eos> labels
            if wid >= 0:
                wlm_prob = (cumsum_probs[:, wid] -
                            cumsum_probs[:, wid - 1]) / sum_prob
                y[:, self.space] = wlm_prob
                y[:, self.eos] = wlm_prob
            elif xi == self.space:
                y[:, self.space] = self.zero
                y[:, self.eos] = self.zero
            log_y = torch.log(torch.max(
                y, self.zero_tensor))  # clip to avoid log(0)
        else:  # if no path in the tree, transition probability is one
            log_y = to_cuda(self, torch.zeros(1, self.subword_dict_size))
        return (wlm_state, cumsum_probs, new_node), log_y