예제 #1
0
    def __call__(self, xs, ilens):
        """RNNP forward

        :param xs:
        :param ilens:
        :return:
        """
        logging.info(self.__class__.__name__ + ' input lengths: ' + str(ilens))

        for layer in six.moves.range(self.elayers):
            if "lstm" in self.typ:
                _, _, ys = self[self.rnn_label + str(layer)](None, None, xs)
            else:
                _, ys = self[self.rnn_label + str(layer)](None, xs)
            # ys: utt list of frame x cdim x 2 (2: means bidirectional)
            # TODO(watanabe) replace subsample and FC layer with CNN
            ys, ilens = _subsamplex(ys, self.subsample[layer + 1])
            # (sum _utt frame_utt) x dim
            ys = self['bt' + str(layer)](F.vstack(ys))
            xs = F.split_axis(ys, np.cumsum(ilens[:-1]), axis=0)

        # final tanh operation
        xs = F.split_axis(F.tanh(F.vstack(xs)), np.cumsum(ilens[:-1]), axis=0)

        # 1 utterance case, it becomes an array, so need to make a utt tuple
        if not isinstance(xs, tuple):
            xs = [xs]

        return xs, ilens  # x: utt list of frame x dim
예제 #2
0
    def __call__(self, xs, ilens):
        """RNNP forward.

        Args:
            xs (chainer.Variable): Batch of padded charactor ids. (B, Tmax)
            ilens (chainer.Variable): Batch of length of each input batch. (B,)

        Returns:
            xs (chainer.Variable):subsampled vector of xs.
            chainer.Variable: Subsampled vector of ilens.

        """
        logging.info(self.__class__.__name__ + " input lengths: " + str(ilens))

        for layer in six.moves.range(self.elayers):
            if "lstm" in self.typ:
                _, _, ys = self[self.rnn_label + str(layer)](None, None, xs)
            else:
                _, ys = self[self.rnn_label + str(layer)](None, xs)
            # ys: utt list of frame x cdim x 2 (2: means bidirectional)
            # TODO(watanabe) replace subsample and FC layer with CNN
            ys, ilens = _subsamplex(ys, self.subsample[layer + 1])
            # (sum _utt frame_utt) x dim
            ys = self["bt" + str(layer)](F.vstack(ys))
            xs = F.split_axis(ys, np.cumsum(ilens[:-1]), axis=0)

        # final tanh operation
        xs = F.split_axis(F.tanh(F.vstack(xs)), np.cumsum(ilens[:-1]), axis=0)

        # 1 utterance case, it becomes an array, so need to make a utt tuple
        if not isinstance(xs, tuple):
            xs = [xs]

        return xs, ilens  # x: utt list of frame x dim