コード例 #1
0
ファイル: train.py プロジェクト: IshikuraGaku/persona2
    def __call__(self, trainer):
        with chainer.no_backprop_mode():
            references = []
            hypotheses = []

            use_data = random.sample(self.test_data, self.batch)

            if self.device >= 0:
                sources = [cuda.cupy.asarray(x[1]) for x, _ in use_data]
                targets = [cuda.cupy.asarray(y[1]) for _, y in use_data]
                #sourcePersona = [cuda.cupy.asarray(x[0]) for x, _ in use_data]#今は使わん将来使うかも?
                targetPersona = [cuda.cupy.asarray(y[0]) for _, y in use_data]
            else:
                sources = [x[1] for x, _ in use_data]
                targets = [y[1] for _, y in use_data]
                sourcePersona = [x[0] for x, _ in use_data]
                targetPersona = [y[0] for _, y in use_data]
            
            sources = F.pad_sequence(sources, loadData.LoadData.maxlen, -1)
            targets = F.pad_sequence(targets, loadData.LoadData.maxlen, -1)

            references.extend([[t.tolist()] for t in targets.data])
            ys = [y.tolist()
                for y in self.model.predict(sources.data, targetPersona)]#batch_size
            hypotheses.extend(ys)

        bleu = bleu_score.corpus_bleu(
            references, hypotheses, smoothing_function=bleu_score.SmoothingFunction().method1)
        chainer.report({self.key:bleu})
コード例 #2
0
    def __call__(self, enc_hs, dec_z, att_prev):
        """Compute NoAtt forward layer.

        Args:
            enc_hs (chainer.Variable | N-dimensional array):
                Input variable from encoders.
            dec_z: Dummy.
            att_prev (chainer.Variable | None): Attention weight.

        Returns:
            chainer.Variable: Sum over flames.
            chainer.Variable: Attention weight.

        """
        # pre-compute all h outside the decoder loop
        if self.pre_compute_enc_h is None:
            self.enc_h = F.pad_sequence(enc_hs)  # utt x frame x hdim
            self.h_length = self.enc_h.shape[1]

        # initialize attention weight with uniform dist.
        if att_prev is None:
            att_prev = [
                self.xp.full(hh.shape[0], 1.0 / hh.shape[0], dtype=np.float32)
                for hh in enc_hs
            ]
            att_prev = [chainer.Variable(att) for att in att_prev]
            att_prev = F.pad_sequence(att_prev)
            self.c = F.sum(
                self.enc_h
                * F.broadcast_to(F.expand_dims(att_prev, 2), self.enc_h.shape),
                axis=1,
            )

        return self.c, att_prev
コード例 #3
0
ファイル: ima.py プロジェクト: nuric/softuni
def vectorise_stories(encoded_stories):
    """Given a list of encoded stories, vectorise them with padding."""
    # Vectorise stories
    vctx = np.zeros(
        (len(encoded_stories), max([
            len(s['context']) for s in encoded_stories
        ]), max([len(rule) for s in encoded_stories for rule in s['context']]),
         max([
             len(pred) for s in encoded_stories for rule in s['context']
             for pred in rule
         ])),
        dtype=np.int32)  # (B, R, P, C)
    vq = F.pad_sequence([
        np.array(s['query'], dtype=np.int32) for s in encoded_stories
    ]).array  # (B, Q)
    vas = np.array([s['answers'] for s in encoded_stories],
                   dtype=np.int32)  # (B,)
    supps = F.pad_sequence(
        [np.array(s['supps'], dtype=np.int32) for s in encoded_stories],
        padding=-1).array  # (B, I)
    for i, s in enumerate(encoded_stories):
        for j, rule in enumerate(s['context']):
            for k, pred in enumerate(rule):
                vctx[i, j, k, :len(pred)] = np.array(pred, dtype=np.int32)
        if DEEPLOGIC:
            perm = np.random.permutation(len(s['context']))
            vctx[i, :len(s['context'])] = vctx[i, perm]
            for j, supp in enumerate(supps[i]):
                if supp != -1:
                    supps[i, j] = np.argmax(perm == supp)
    return vctx, vq, vas, supps
コード例 #4
0
ファイル: ctc.py プロジェクト: zhuanaa/espnet
    def __call__(self, hs, ys):
        """CTC forward.

        Args:
            hs (list of chainer.Variable | N-dimension array): Input variable from encoder.
            ys (list of chainer.Variable | N-dimension array): Input variable of decoder.

        Returns:
            chainer.Variable: A variable holding a scalar value of the CTC loss.

        """
        self.loss = None
        ilens = [x.shape[0] for x in hs]
        olens = [x.shape[0] for x in ys]

        # zero padding for hs
        y_hat = linear_tensor(self.ctc_lo, F.dropout(
            F.pad_sequence(hs), ratio=self.dropout_rate))
        y_hat = F.separate(y_hat, axis=1)  # ilen list of batch x hdim

        # zero padding for ys
        y_true = F.pad_sequence(ys, padding=-1)  # batch x olen

        # get length info
        input_length = chainer.Variable(self.xp.array(ilens, dtype=np.int32))
        label_length = chainer.Variable(self.xp.array(olens, dtype=np.int32))
        logging.info(self.__class__.__name__ + ' input lengths:  ' + str(input_length.data))
        logging.info(self.__class__.__name__ + ' output lengths: ' + str(label_length.data))

        # get ctc loss
        self.loss = F.connectionist_temporal_classification(
            y_hat, y_true, 0, input_length, label_length)
        logging.info('ctc loss:' + str(self.loss.data))

        return self.loss
コード例 #5
0
ファイル: test_loss.py プロジェクト: zpppy/espnet
def test_ctc_loss():
    pytest.importorskip("torch")
    pytest.importorskip("warpctc_pytorch")
    import torch
    import warpctc_pytorch

    from espnet.nets.e2e_asr_th import pad_list

    n_out = 7
    input_length = numpy.array([11, 17, 15], dtype=numpy.int32)
    label_length = numpy.array([4, 2, 3], dtype=numpy.int32)
    np_pred = [
        numpy.random.rand(il, n_out).astype(numpy.float32)
        for il in input_length
    ]
    np_target = [
        numpy.random.randint(0, n_out, size=ol, dtype=numpy.int32)
        for ol in label_length
    ]

    # NOTE: np_pred[i] seems to be transposed and used axis=-1 in e2e_asr.py
    ch_pred = F.separate(F.pad_sequence(np_pred), axis=-2)
    ch_target = F.pad_sequence(np_target, padding=-1)
    ch_loss = F.connectionist_temporal_classification(ch_pred, ch_target, 0,
                                                      input_length,
                                                      label_length).data

    th_pred = pad_list([torch.from_numpy(x) for x in np_pred],
                       0.0).transpose(0, 1)
    th_target = torch.from_numpy(numpy.concatenate(np_target))
    th_ilen = torch.from_numpy(input_length)
    th_olen = torch.from_numpy(label_length)
    th_loss = warpctc_pytorch.CTCLoss(size_average=True)(
        th_pred, th_target, th_ilen, th_olen).data.numpy()[0]
    numpy.testing.assert_allclose(th_loss, ch_loss, 0.05)
コード例 #6
0
ファイル: model_reg.py プロジェクト: machi-da/multi_task
    def __call__(self, hx, cx, xs, enc_hs):
        xs_embed = [self.embed(x) for x in xs]
        hy, cy, ys = self.Nlstm(hx, cx, xs_embed)

        ys_pad = F.pad_sequence(ys, length=None, padding=0.0)
        enc_hs = F.pad_sequence(enc_hs, length=None, padding=0.0)

        mask = self.xp.all(enc_hs.data == 0, axis=2, keepdims=True)
        mask_num = self.xp.full(mask.shape, -1024.0, dtype=self.xp.float32)

        alignment = []
        decode = []

        ys_pad = F.transpose(ys_pad, (1, 0, 2))
        for y in ys_pad:
            y = F.reshape(y, (*y.shape, 1))
            score = F.matmul(enc_hs, y)
            score = F.where(mask, mask_num, score)
            align = F.softmax(score, axis=1)
            context_vector = F.matmul(enc_hs, align, True, False)
            t = self.W_c(
                F.dropout(F.concat((y, context_vector), axis=1), self.dropout))
            ys_proj = self.proj(F.dropout(t, self.dropout))
            alignment.append(F.reshape(align, (len(xs), -1)))
            decode.append(ys_proj)

        decode = F.stack(decode, axis=1)
        alignment = F.stack(alignment, axis=1)
        return hy, cy, decode, alignment.data
コード例 #7
0
def test_ctc_loss():
    pytest.importorskip("torch")
    pytest.importorskip("warpctc_pytorch")
    import torch
    from warpctc_pytorch import CTCLoss

    from e2e_asr_attctc_th import pad_list

    n_out = 7
    n_batch = 3
    input_length = numpy.array([11, 17, 15], dtype=numpy.int32)
    label_length = numpy.array([4, 2, 3], dtype=numpy.int32)
    np_pred = [numpy.random.rand(il, n_out).astype(
        numpy.float32) for il in input_length]
    np_target = [numpy.random.randint(
        0, n_out, size=ol, dtype=numpy.int32) for ol in label_length]

    # NOTE: np_pred[i] seems to be transposed and used axis=-1 in e2e_asr_attctc.py
    ch_pred = F.separate(F.pad_sequence(np_pred), axis=-2)
    ch_target = F.pad_sequence(np_target, padding=-1)
    ch_loss = F.connectionist_temporal_classification(
        ch_pred, ch_target, 0, input_length, label_length).data

    th_pred = pad_list([torch.autograd.Variable(torch.from_numpy(x))
                        for x in np_pred]).transpose(0, 1)
    th_target = torch.autograd.Variable(
        torch.from_numpy(numpy.concatenate(np_target)))
    th_ilen = torch.autograd.Variable(torch.from_numpy(input_length))
    th_olen = torch.autograd.Variable(torch.from_numpy(label_length))
    # NOTE: warpctc_pytorch.CTCLoss does not normalize itself by batch-size while chainer's default setting does
    th_loss = (CTCLoss()(th_pred, th_target, th_ilen,
                         th_olen) / n_batch).data.numpy()[0]
    numpy.testing.assert_allclose(th_loss, ch_loss, 0.05)
コード例 #8
0
    def forward(self, xs1, xs2):
        # padding inputs
        x1 = F.pad_sequence(xs1, padding=-1)
        x2 = F.pad_sequence(xs2, padding=-1)

        # word idx -> word vector
        ex1 = F.dropout(self.embed(x1), self.dropout)
        ex2 = F.dropout(self.embed(x2), self.dropout)

        # this mini batch parameters definition
        batch_size = len(xs1)
        row, column = ex1.shape[1], ex2.shape[1]
        utils = Utils(self.out_units, batch_size, column, row, self.xp, self.minute_num)

        m1, m2 = utils.masking(ex1), utils.masking(ex2)

        mean = utils.kernel_shaping(self.means)
        variance = utils.kernel_shaping(self.variances)

        # cross match and kernel pooling
        h, mask = utils.cross_match(ex1, ex2, m1, m2)
        h = utils.kernel_pooling(h, mask, mean, variance)

        # calculate ranking score
        h = self.liner(h)
        h = F.leaky_relu(h)

        return h
コード例 #9
0
    def __call__(self, enc_hs, dec_z, att_prev):
        '''NoAtt forward

        :param enc_hs:
        :param dec_z: dummy
        :param att_prev:
        :return:
        '''
        # pre-compute all h outside the decoder loop
        if self.pre_compute_enc_h is None:
            self.enc_h = F.pad_sequence(enc_hs)  # utt x frame x hdim
            self.h_length = self.enc_h.shape[1]

        # initialize attention weight with uniform dist.
        if att_prev is None:
            att_prev = [
                self.xp.full(hh.shape[0], 1.0 / hh.shape[0], dtype=np.float32)
                for hh in enc_hs
            ]
            att_prev = [chainer.Variable(att) for att in att_prev]
            att_prev = F.pad_sequence(att_prev)
            self.c = F.sum(
                self.enc_h *
                F.broadcast_to(F.expand_dims(att_prev, 2), self.enc_h.shape),
                axis=1)

        return self.c, att_prev
コード例 #10
0
ファイル: e2e_asr.py プロジェクト: zane678/espnet
    def __call__(self, hs, ys):
        '''CTC forward

        :param hs:
        :param ys:
        :return:
        '''
        self.loss = None
        ilens = [x.shape[0] for x in hs]
        olens = [x.shape[0] for x in ys]

        # zero padding for hs
        y_hat = linear_tensor(self.ctc_lo, F.dropout(
            F.pad_sequence(hs), ratio=self.dropout_rate))
        y_hat = F.separate(y_hat, axis=1)  # ilen list of batch x hdim

        # zero padding for ys
        y_true = F.pad_sequence(ys, padding=-1)  # batch x olen

        # get length info
        input_length = chainer.Variable(self.xp.array(ilens, dtype=np.int32))
        label_length = chainer.Variable(self.xp.array(olens, dtype=np.int32))
        logging.info(self.__class__.__name__ + ' input lengths:  ' + str(input_length.data))
        logging.info(self.__class__.__name__ + ' output lengths: ' + str(label_length.data))

        # get ctc loss
        self.loss = F.connectionist_temporal_classification(
            y_hat, y_true, 0, input_length, label_length)
        logging.info('ctc loss:' + str(self.loss.data))

        return self.loss
コード例 #11
0
    def forward(self, enc_hs, dec_z, att_prev):
        '''AttLoc forward

        :param enc_hs:
        :param dec_z:
        :param att_prev:
        :param scaling:
        :return:
        '''
        # EDIT(hamaji): scaling is now a local variable.
        scaling = 2.0
        batch = len(enc_hs)
        # pre-compute all h outside the decoder loop
        if self.pre_compute_enc_h is None:
            self.enc_h = F.pad_sequence(enc_hs)  # utt x frame x hdim
            self.h_length = self.enc_h.shape[1]
            # utt x frame x att_dim
            self.pre_compute_enc_h = linear_tensor_3d(self.mlp_enc, self.enc_h)

        if dec_z is None:
            dec_z = chainer.Variable(self.xp.zeros(
                (batch, self.dunits), dtype=np.float32))
        else:
            dec_z = F.reshape(dec_z, (batch, self.dunits))

        # initialize attention weight with uniform dist.
        if att_prev is None:
            att_prev = [self.xp.full(
                hh.shape[0], 1.0 / hh.shape[0], dtype=np.float32) for hh in enc_hs]
            att_prev = [chainer.Variable(att) for att in att_prev]
            att_prev = F.pad_sequence(att_prev)

        # TODO(watanabe) use <chainer variable>.reshpae(), instead of F.reshape()
        # att_prev: utt x frame -> utt x 1 x 1 x frame -> utt x att_conv_chans x 1 x frame
        att_conv = self.loc_conv(
            F.reshape(att_prev, (batch, 1, 1, self.h_length)))
        # att_conv: utt x att_conv_chans x 1 x frame -> utt x frame x att_conv_chans
        att_conv = F.swapaxes(F.squeeze(att_conv, axis=2), 1, 2)
        # att_conv: utt x frame x att_conv_chans -> utt x frame x att_dim
        att_conv = linear_tensor_3d(self.mlp_att, att_conv)

        # dec_z_tiled: utt x frame x att_dim
        dec_z_tiled = F.broadcast_to(
            F.expand_dims(self.mlp_dec(dec_z), 1), self.pre_compute_enc_h.shape)

        # dot with gvec
        # utt x frame x att_dim -> utt x frame
        # TODO(watanabe) use batch_matmul
        e = F.squeeze(linear_tensor_3d(self.gvec, F.tanh(
            att_conv + self.pre_compute_enc_h + dec_z_tiled)), axis=2)
        # Applying a minus-large-number filter to make a probability value zero for a padded area
        # simply degrades the performance, and I gave up this implementation
        # Apply a scaling to make an attention sharp
        w = F.softmax(scaling * e)

        # weighted sum over flames
        # utt x hdim
        c = F.sum(self.enc_h * F.broadcast_to(F.expand_dims(w, 2), self.enc_h.shape), axis=1)

        return c, w
コード例 #12
0
ファイル: BiLSTM.py プロジェクト: nananaman/BiLSTM
    def __call__(self, hs, ht, x_list):
        '''
        Attentionの計算
        :param hs : Encoderの中間ベクトルが記録されたリスト
        :param ht : Decoderの中間ベクトルが記録されたリスト
        :return : 中間ベクトルht
        '''
        batch_size = len(x_list)
        ht = F.reshape(ht, (batch_size, 1, self.hidden_size))
        h = []
        for i in range(batch_size):
            h.append(Variable((hs[i].data * ht[i].data)))
        concat_h = F.concat(h, axis=0)
        attn = self.eh(concat_h)
        sections = np.cumsum([len(x) for x in x_list])
        split_attention = F.split_axis(attn, sections[:-1], axis=0)
        split_attention_pad = F.pad_sequence(split_attention, padding=-1024.)
        attn_softmax = F.softmax(split_attention_pad, axis=1)
        hs_pad = F.pad_sequence(hs)
        hs_pad_reshape = F.reshape(hs_pad, (-1, hs_pad.shape[-1]))

        r = F.reshape(attn_softmax, (-1, attn_softmax.shape[-1]))
        attn_softmax_reshape = F.broadcast_to(
            F.reshape(attn_softmax, (-1, attn_softmax.shape[-1])),
            hs_pad_reshape.shape)
        attention_hidden = hs_pad_reshape * attn_softmax_reshape

        attention_hidden_reshape = F.reshape(
            attention_hidden, (batch_size, -1, attention_hidden.shape[-1]))
        result = F.sum(attention_hidden_reshape, axis=1)
        '''
        # hの次元を落とす
        h = Variable(h.data[0])
        batch_size = h.data.shape[0]
        # 重みを記録するリスト
        ws = []
        # 重みの合計値を初期化
        sum_w = Variable(self.ARR.zeros((batch_size, 1), dtype='float32'))
        # EncoderとDecoderの中間ベクトルを使って重みの計算
        for e in es:
            print(e.shape)
            print(h.shape)
            # 重みの計算
            w = F.tanh(self.eh(e) + self.dh(h))
            # 正規化
            w = F.exp(self.hw(w))
            # 記録
            ws.append(w)
            sum_w += w
        # 加重平均ベクトルの初期化
        att = Variable(self.ARR.zeros(
            (1, batch_size, self.hidden_size), dtype='float32'))
        for e, w in zip(es, ws):
            # 重みの正規化
            w /= sum_w
            # 重み * Encoderの中間ベクトルを、出力するベクトルに加算
            att += F.reshape(F.batch_matmul(e, w),
                             (1, batch_size, self.hidden_size))
                             '''
        return F.reshape(result, (1, result.shape[0], result.shape[1]))
コード例 #13
0
def batch_pit_loss_faster(ys, ts, label_delay=0):
    """
    PIT loss over mini-batch.
    Args:
      ys: B-length list of predictions
      ts: B-length list of labels
    Returns:
      loss: (1,)-shape mean cross entropy over mini-batch
      labels: B-length list of permuted labels
    """

    n_speakers = ts[0].shape[1]
    xp = chainer.backend.get_array_module(ys[0])
    # (B, T, C)
    ys = F.pad_sequence(ys, padding=-1)

    losses = []
    for shift in range(n_speakers):
        # rolled along with speaker-axis
        ts_roll = [xp.roll(t, -shift, axis=1) for t in ts]
        ts_roll = F.pad_sequence(ts_roll, padding=-1)
        # loss: (B, T, C)
        loss = F.sigmoid_cross_entropy(ys, ts_roll, reduce='no')
        # sum over time: (B, C)
        loss = F.sum(loss, axis=1)
        losses.append(loss)
    # losses: (B, C, C)
    losses = F.stack(losses, axis=2)
    # losses[b, i, j] is a loss between
    # `i`-th speaker in y and `(i+j)%C`-th speaker in t

    perms = xp.array(
        list(permutations(range(n_speakers))),
        dtype='i',
    )
    # y_inds: [0,1,2,3]
    y_ind = xp.arange(n_speakers, dtype='i')
    #  perms  -> relation to t_inds      -> t_inds
    # 0,1,2,3 -> 0+j=0,1+j=1,2+j=2,3+j=3 -> 0,0,0,0
    # 0,1,3,2 -> 0+j=0,1+j=1,2+j=3,3+j=2 -> 0,0,1,3
    t_inds = xp.mod(perms - y_ind, n_speakers)

    losses_perm = []
    for t_ind in t_inds:
        losses_perm.append(F.mean(losses[:, y_ind, t_ind], axis=1))
    # losses_perm: (B, Perm)
    losses_perm = F.stack(losses_perm, axis=1)

    min_loss = F.sum(F.min(losses_perm, axis=1))

    min_loss = F.sum(F.min(losses_perm, axis=1))
    n_frames = np.sum([t.shape[0] for t in ts])
    min_loss = min_loss / n_frames

    min_indices = xp.argmin(losses_perm.array, axis=1)
    labels_perm = [t[:, perms[idx]] for t, idx in zip(ts, min_indices)]

    return min_loss, labels_perm
コード例 #14
0
ファイル: dataloader.py プロジェクト: ssethia2/ast
    def get_batch(self, batch_size, set_key, train, labels=False):
        xp = cuda.cupy if self.gpuid >= 0 else np

        batches = []

        num_b = self.buckets[set_key]["num_b"]
        width_b = self.buckets[set_key]["width_b"]
        max_sp = (num_b + 1) * width_b

        if labels:
            dec_key = self.data_cfg["dec_key"]
            max_pred = self.data_cfg["max_pred"]

        for b, bucket in enumerate(self.buckets[set_key]["buckets"]):
            # Shuffle utterances in a bucket
            random.shuffle(bucket)
            for i in range(0, len(bucket), batch_size):
                # append utterances, and the width of the current batch
                # width of 10, implies 10 speech frames = 10 * 10ms = 100ms
                batches.append((bucket[i:i + batch_size], (b + 1) * width_b))
        # end for

        # Shuffle all the batches
        random.shuffle(batches)

        # Generator for batches
        for (utts, b) in batches:
            batch_data = {"X": [], "utts": []}

            if labels:
                batch_data["y"] = []

            for u in utts:
                batch_data["X"].append(self._load_speech(u, set_key, max_sp))
                if labels:
                    en_ids = [
                        self.vocab[dec_key]['w2i'].get(w, SYMBOLS.UNK_ID)
                        for w in self.map[set_key][u][dec_key]
                    ]

                    y_ids = [SYMBOLS.GO_ID
                             ] + en_ids[:max_pred - 2] + [SYMBOLS.EOS_ID]
                    batch_data["y"].append(xp.asarray(y_ids, dtype=xp.int32))

            # end for utts
            # include the utt ids
            batch_data['utts'].extend(utts)
            batch_data['X'] = F.pad_sequence(batch_data['X'],
                                             padding=SYMBOLS.PAD_ID)
            batch_data['X'].to_gpu(self.gpuid)
            if labels:
                batch_data['y'] = F.pad_sequence(batch_data['y'],
                                                 padding=SYMBOLS.PAD_ID)
                batch_data['y'].to_gpu(self.gpuid)

            yield batch_data
コード例 #15
0
def _compute_metrics(parsed,
                     gold_batch,
                     lengths,
                     use_predicted_arcs_for_rels=True):
    logits_arc, logits_rel = parsed
    true_arcs, true_rels = zip(*gold_batch)

    # exclude attachment from the root
    logits_arc, logits_rel = logits_arc[:, 1:], logits_rel[:, 1:]
    true_arcs = F.pad_sequence(true_arcs, padding=-1)[:, 1:]
    true_rels = F.pad_sequence(true_rels, padding=-1)[:, 1:]
    lengths = np.array(lengths, dtype=np.int32) - 1
    xp = chainer.cuda.get_array_module(logits_arc)
    if xp is not np:
        true_arcs.to_gpu()
        true_rels.to_gpu()

    b, n_deps, n_heads = logits_arc.shape
    logits_arc_flatten = F.reshape(logits_arc, (b * n_deps, n_heads))
    true_arcs_flatten = F.reshape(true_arcs, (b * n_deps, ))
    arc_loss = F.softmax_cross_entropy(logits_arc_flatten,
                                       true_arcs_flatten,
                                       ignore_label=-1)
    arc_accuracy = F.accuracy(logits_arc_flatten,
                              true_arcs_flatten,
                              ignore_label=-1)
    arc_accuracy.to_cpu()

    if use_predicted_arcs_for_rels:
        parsed_arcs = xp.argmax(logits_arc.data, axis=2)
    else:
        parsed_arcs = true_arcs.data
    logits_rel = [
        logits[np.arange(length), arcs[:length]]
        for logits, arcs, length in zip(logits_rel, parsed_arcs, lengths)
    ]
    logits_rel = F.pad_sequence(logits_rel)
    b, n_deps, n_rels = logits_rel.shape
    logits_rel_flatten = F.reshape(logits_rel, (b * n_deps, n_rels))
    true_rels_flatten = F.reshape(true_rels, (b * n_deps, ))
    rel_loss = F.softmax_cross_entropy(logits_rel_flatten,
                                       true_rels_flatten,
                                       ignore_label=-1)
    rel_accuracy = F.accuracy(logits_rel_flatten,
                              true_rels_flatten,
                              ignore_label=-1)
    rel_accuracy.to_cpu()

    return {
        'arc_loss': arc_loss,
        'arc_accuracy': arc_accuracy,
        'rel_loss': rel_loss,
        'rel_accuracy': rel_accuracy
    }
コード例 #16
0
ファイル: decoders.py プロジェクト: yuekaizhang/espnet
    def calculate_all_attentions(self, hs, ys):
        """Calculate all of attentions.

        Args:
            hs (list of chainer.Variable | N-dimensional array):
                Input variable from encoder.
            ys (list of chainer.Variable | N-dimensional array):
                Input variable of decoder.

        Returns:
            chainer.Variable: List of attention weights.

        """
        # prepare input and output word sequences with sos/eos IDs
        eos = self.xp.array([self.eos], "i")
        sos = self.xp.array([self.sos], "i")
        ys_in = [F.concat([sos, y], axis=0) for y in ys]
        ys_out = [F.concat([y, eos], axis=0) for y in ys]

        # padding for ys with -1
        # pys: utt x olen
        pad_ys_in = F.pad_sequence(ys_in, padding=self.eos)
        pad_ys_out = F.pad_sequence(ys_out, padding=-1)

        # get length info
        olength = pad_ys_out.shape[1]

        # initialization
        c_list = [None]  # list of cell state of each layer
        z_list = [None]  # list of hidden state of each layer
        for _ in six.moves.range(1, self.dlayers):
            c_list.append(None)
            z_list.append(None)
        att_w = None
        att_ws = []
        self.att.reset()  # reset pre-computation of h

        # pre-computation of embedding
        eys = self.embed(pad_ys_in)  # utt x olen x zdim
        eys = F.separate(eys, axis=1)

        # loop for an output sequence
        for i in six.moves.range(olength):
            att_c, att_w = self.att(hs, z_list[0], att_w)
            ey = F.hstack((eys[i], att_c))  # utt x (zdim + hdim)
            z_list, c_list = self.rnn_forward(ey, z_list, c_list, z_list,
                                              c_list)
            att_ws.append(att_w)  # for debugging

        att_ws = F.stack(att_ws, axis=1)
        att_ws.to_cpu()

        return att_ws.data
コード例 #17
0
def _compute_metrics(parsed,
                     gold_batch,
                     lengths,
                     use_predicted_arcs_for_rels=True):
    logits_arc, logits_rel, *_ = parsed
    true_arcs, true_rels, *_ = zip(*gold_batch)

    # exclude attachment from the root
    logits_arc, logits_rel = logits_arc[:, 1:], logits_rel[:, 1:]
    true_arcs = F.pad_sequence(true_arcs, padding=-1)[:, 1:]
    true_rels = F.pad_sequence(true_rels, padding=-1)[:, 1:]
    lengths = np.array(lengths, dtype=np.int32) - 1
    xp = chainer.cuda.get_array_module(logits_arc)
    if xp is not np:
        true_arcs.to_gpu()
        true_rels.to_gpu()

    b, n_deps, n_heads = logits_arc.shape
    logits_arc_flatten = F.reshape(logits_arc, (b * n_deps, n_heads))
    true_arcs_flatten = F.reshape(true_arcs, (b * n_deps, ))
    arc_loss = F.softmax_cross_entropy(logits_arc_flatten,
                                       true_arcs_flatten,
                                       ignore_label=-1)
    arc_accuracy = _accuracy(logits_arc_flatten,
                             true_arcs_flatten,
                             ignore_label=-1)

    if use_predicted_arcs_for_rels:
        parsed_arcs = xp.argmax(logits_arc.data, axis=2)
    else:
        parsed_arcs = true_arcs.data
    parsed_arcs = chainer.cuda.to_cpu(parsed_arcs)
    b, n_deps, n_heads, n_rels = logits_rel.shape
    base1, base2 = n_deps * n_heads, np.arange(n_deps) * n_heads
    parsed_arcs_flatten = np.concatenate(
        [base1 * i + base2 + arcs for i, arcs in enumerate(parsed_arcs)])
    logits_rel_flatten = F.embed_id(xp.asarray(parsed_arcs_flatten),
                                    F.reshape(logits_rel, (b * base1, n_rels)))
    true_rels_flatten = F.reshape(true_rels, (b * n_deps, ))
    rel_loss = F.softmax_cross_entropy(logits_rel_flatten,
                                       true_rels_flatten,
                                       ignore_label=-1)
    rel_accuracy = _accuracy(logits_rel_flatten,
                             true_rels_flatten,
                             ignore_label=-1)

    return {
        'arc_loss': arc_loss,
        'arc_accuracy': arc_accuracy,
        'rel_loss': rel_loss,
        'rel_accuracy': rel_accuracy
    }
コード例 #18
0
    def _encode(self, xs, x_lens, is_multi_task=False):
        """Encode acoustic features.
        Args:
            xs (list of chainer.Variable(float)):
                A list of tensors of size `[T_in, input_size]`
            x_lens (np.ndarray): A tensor of size `[B]`
            is_multi_task (bool, optional):
        Returns:
            logits (chainer.Variable, float): A tensor of size
                `[B, T, num_classes (including the blank class)]`
            x_lens (np.ndarray): A tensor of size `[B]`
            OPTION:
                logits_sub (chainer.Variable, float): A tensor of size
                    `[B, T, num_classes_sub (including the blank class)]`
                x_lens_sub (np.ndarray): A tensor of size `[B]`
        """
        if is_multi_task:
            if self.encoder_type == 'cnn':
                xs, x_lens = self.encoder(xs, x_lens)
                xs_sub = xs
                x_lens_sub = x_lens
            else:
                xs, x_lens, xs_sub, x_lens_sub = self.encoder(xs, x_lens)
        else:
            xs, x_lens = self.encoder(xs, x_lens)

        # Concatenate
        xs = F.pad_sequence(xs, padding=0)

        # Path through fully-connected layers
        if len(self.fc_list) > 0:
            for i in range(len(self.fc_list)):
                # if self.batch_norm:
                #     xs = self['bn_fc_' + str(i)](xs)
                xs = self['fc_' + str(i)](xs)
        logits = self.fc_out(xs)

        if is_multi_task:
            # Concatenate
            xs_sub = F.pad_sequence(xs_sub, padding=0)

            # Path through fully-connected layers
            if len(self.fc_list_sub) > 0:
                for i in range(len(self.fc_list_sub)):
                    # if self.batch_norm:
                    #     xs_sub = self['bn_fc_sub_' + str(i)](xs_sub)
                    xs_sub = self['fc_sub_' + str(i)](xs_sub)
            logits_sub = self.fc_out_sub(xs_sub)

            return logits, x_lens, logits_sub, x_lens_sub
        else:
            return logits, x_lens
コード例 #19
0
ファイル: Model.py プロジェクト: yslab2018/Recommendation
    def __call__(self, x):
        x_len = [len(d) for d in x]
        x_section = np.cumsum(x_len[:-1])

        ex = np.copy(self.embed[F.concat(x, axis=0).data])
        if self.gpu_flag >= 0:
            ex = cuda.to_gpu(ex)

        ex = F.dropout(ex, ratio=self.dr_input)
        exs = F.split_axis(ex, x_section, 0)

        _, y_seq_list = self.gru(None, exs)

        #==================
        #  Self-Attention
        #==================
        # バッチ内の各ステップ出力を0でパディング
        pd_y_seq = F.pad_sequence(y_seq_list,
                                  padding=0)  # [Batch, max_len, Unit]

        n_B, n_ML, n_U = pd_y_seq.shape
        n_AH = self.n_att_head

        pd_y_seq = F.reshape(pd_y_seq,
                             (n_B, n_ML, n_U, 1))  # [Batch, max_len, Unit, 1]
        pd_y_seq = F.broadcast_to(
            pd_y_seq, (n_B, n_ML, n_U, n_AH))  # [Batch, max_len, Unit, Head]

        # concatanate
        att_in = F.concat(y_seq_list, axis=0)  # [All element, Unit]
        att_h1 = F.tanh(self.att_w1(att_in))  # [All element, Att unit]
        att_h2 = self.att_w2(att_h1)  # [All element, Head]

        att_h2_seq = F.split_axis(att_h2, x_section, 0, 0)
        att_h2_pad = F.pad_sequence(att_h2_seq,
                                    padding=-1024.0)  # [Batch, max_len, Head]

        # Softmax
        weight = F.softmax(att_h2_pad, axis=1)  # [Batch, max_len, Head]
        self.tmp_weight = weight.data
        weight = F.reshape(weight,
                           (n_B, n_ML, 1, n_AH))  # [Batch, max_len, 1, Head]
        weight = F.broadcast_to(
            weight, (n_B, n_ML, n_U, n_AH))  # [Batch, max_len, Unit, Head]

        # 加重和を計算
        att_out = F.sum(pd_y_seq * weight, axis=1)  # [Batch, Unit, Head]
        out = F.tanh(self.decode(att_out))

        return out
コード例 #20
0
 def forward(self, xs, h, c, mask):
     batch_size = len(xs)
     lens = [x.shape[0] for x in xs]
     #max_len = max(lens)
     max_len = self.sequence_length
     #mask = (np.expand_dims(np.arange(max_len), 0) <
     #        np.expand_dims(lens, 1)).astype(np.float)
     #h = np.zeros((batch_size, self.num_hidden), dtype=np.float32)
     #c = np.zeros((batch_size, self.num_hidden), dtype=np.float32)
     #h = self.initial_h
     #c = self.initial_c
     inputs = F.pad_sequence(xs)
     for time in range(max_len):
         x = inputs[:, time]
         input = F.concat((x, h), axis=1)
         gate = self.l(input)
         i = gate[:, 0:self.num_hidden]
         o = gate[:, self.num_hidden:self.num_hidden * 2]
         f = gate[:, self.num_hidden * 2:self.num_hidden * 3]
         nc = gate[:, self.num_hidden * 3:self.num_hidden * 4]
         #i, o, f, nc = F.split_axis(gate, 4, axis=1)
         i = F.sigmoid(i)
         o = F.sigmoid(o)
         f = F.sigmoid(f)
         nc = F.tanh(nc)
         nc = f * c + i * nc
         nh = o * F.tanh(nc)
         m = mask[:, time]
         pmask = F.reshape(m, (self.batch_size, ))
         pmask = F.broadcast_to(F.expand_dims(pmask, axis=1),
                                (self.batch_size, self.num_hidden))
         nmask = 1.0 - pmask
         h = nh * pmask + h * nmask
     return h
コード例 #21
0
    def __call__(self, xs):
        # forward calculation

        h1 = [F.relu(self.l1(x)) for x in xs]

        _, _, h1 = self.encoder1(None, None, h1)

        h1 = [F.max_pooling_2d(x[None][None], ksize=(2, 1))[0][0] for x in h1]

        _, _, h1 = self.encoder2(None, None, h1)

        h1 = [F.max_pooling_2d(x[None][None], ksize=(2, 1))[0][0] for x in h1]

        _, _, h1 = self.encoder3(None, None, h1)

        h1 = [F.max_pooling_2d(x[None][None], ksize=(2, 1))[0][0] for x in h1]

        _, _, ys = self.encoder4(None, None, h1)

        _, _, ys = self.encoder5(None, None, h1)

        input_length = [len(y) for y in ys]
        ys = [self.output(y) for y in ys]

        ys = F.pad_sequence(ys)
        result = list(F.stack(ys, axis=1))

        return result, input_length
コード例 #22
0
 def forward(self, equery, vmemory, ememory, mask, iteration=0):
     """Compute an attention over memory given the query."""
     # equery.shape == (..., E)
     # vmemory.shape == (..., Ms, M)
     # ememory.shape == (..., Ms, E)
     # mask.shape == (..., Ms)
     # Setup memory embedding
     eq = F.repeat(equery[..., None, :], vmemory.shape[-2],
                   -2)  # (..., Ms, E)
     # Compute content based attention
     merged = F.concat(
         [eq, ememory, eq * ememory,
          F.squared_difference(eq, ememory)], -1)  # (..., Ms, 4*E)
     inter = self.att_linear(merged, n_batch_axes=len(vmemory.shape) -
                             1)  # (..., Ms, E)
     inter = F.tanh(inter)  # (..., Ms, E)
     inter = F.dropout(inter, DROPOUT)  # (..., Ms, E)
     # Split into sentences
     lengths = np.sum(np.any((vmemory != 0), -1), -1)  # (...,)
     mems = [s[..., :l, :] for s, l in zip(F.separate(inter, 0), lengths)
             ]  # B x [(M1, E), (M2, E), ...]
     _, bimems = self.att_birnn(None,
                                mems)  # B x [(M1, 2*E), (M2, 2*E), ...]
     bimems = F.pad_sequence(bimems)  # (..., Ms, 2*E)
     att = self.att_score(bimems, n_batch_axes=len(vmemory.shape) -
                          1)  # (..., Ms, 1)
     att = F.squeeze(att, -1)  # (..., Ms)
     if mask is not None:
         att += mask * MINUS_INF  # (..., Ms)
     return att
コード例 #23
0
    def forward(self, ys_pad, source, x_mask):
        """Forward decoder.

        :param xp.array e: input token ids, int64 (batch, maxlen_out)
        :param xp.array yy_mask: input token mask, uint8  (batch, maxlen_out)
        :param xp.array source: encoded memory, float32  (batch, maxlen_in, feat)
        :param xp.array xy_mask: encoded memory mask, uint8  (batch, maxlen_in)
        :return e: decoded token score before softmax (batch, maxlen_out, token)
        :rtype: chainer.Variable
        """
        xp = self.xp
        sos = np.array([self.sos], np.int32)
        ys = [np.concatenate([sos, y], axis=0) for y in ys_pad]
        e = F.pad_sequence(ys, padding=self.eos).data
        e = xp.array(e)
        # mask preparation
        xy_mask = self.make_attention_mask(e, xp.array(x_mask))
        yy_mask = self.make_attention_mask(e, e)
        yy_mask *= make_history_mask(xp, e)

        e = self.pe(self.embed(e))
        batch, length, dims = e.shape
        e = e.reshape(-1, dims)
        source = source.reshape(-1, dims)
        for i in range(self.n_layers):
            e = self["decoders." + str(i)](e, source, xy_mask, yy_mask, batch)
        return self.output_layer(self.output_norm(e)).reshape(
            batch, length, -1)
コード例 #24
0
def probability_loss(P, n_speakers):
    """Get cross-entropy loss for the probabilities reported

    Args:
     P: (B, n_speakers + 1, 1)
     n_speakers: B-length list

    Returns:
     loss_a: (1, )-shape mean cross entropy loss over mini-batch
    """
    # l: (B, n_speakers + 1, 1)
    #    loss = 0
    #    for p in P:
    #        p = p.T
    #        l = np.ones_like(p).astype(np.int32)
    #        l[0, -1] = 0
    #        loss += F.sigmoid_cross_entropy(p, l)
    #    return loss / len(P)

    # New Method
    P = F.swapaxes(F.pad_sequence(P, padding=1), 1, 2)
    L = np.ones_like(P).astype(np.int32)
    for i, n in enumerate(n_speakers):
        L[i, 0, n] = 0
    return F.sigmoid_cross_entropy(P, L)
コード例 #25
0
    def forward(self, xs, n_speakers, activation=None):
        ilens = [x.shape[0] for x in xs]
        # xs: (B, T, F)
        xs = F.pad_sequence(xs, padding=-1)
        pad_shape = xs.shape
        # emb: (B*T, E)
        emb = self.enc(xs)
        # emb: (B, T, F)
        emb = F.separate(emb.reshape(pad_shape[0], pad_shape[1], -1), axis=0)
        emb = [F.get_item(e, slice(0, ilen)) for e, ilen in zip(emb, ilens)]
        emb2 = [cp.random.permutation(e) for e in emb]

        # get name: main-                 num_speakers=n_speakers, to_train=1
        #           validation/main-      num_speakers=n_speaker,  to_train=0
        #           validation_1/main-    num_speakers=None,       to_train=0
        name = reporter.get_current_reporter()._observer_names[id(self)]
        num_speakers = None if name == "validation_1/main" else n_speakers
        to_train = 1 if name == 'main' else 0
        # h_0: (1, B, F)
        # c_0: (1, B, F)
        h_0, c_0 = self.encoder(emb2)
        # A: (B, n_spk, F)
        # P: (B, n_spk, 1)
        A, P = self.decoder(h_0,
                            c_0,
                            n_speakers=num_speakers,
                            to_train=to_train)
        # yhat: (B, T, n_spk)
        ys = [F.matmul(e, a.T) for a, e in zip(A, emb)]

        return ys, P
コード例 #26
0
def seq_rnn_embed(vxs, exs, birnn, return_seqs=False):
    """Embed given sequences using rnn."""
    # vxs.shape == (..., S)
    # exs.shape == (..., S, E)
    assert vxs.shape == exs.shape[:
                                  -1], "Sequence embedding dimensions do not match."
    lengths = np.sum(vxs != 0, -1).flatten()  # (X,)
    seqs = F.reshape(exs, (-1, ) + exs.shape[-2:])  # (X, S, E)
    toembed = [
        s[..., :l, :] for s, l in zip(F.separate(seqs, 0), lengths) if l != 0
    ]  # Y x [(S1, E), (S2, E), ...]
    hs, ys = birnn(None, toembed)  # (2, Y, E), Y x [(S1, 2*E), (S2, 2*E), ...]
    if return_seqs:
        ys = F.pad_sequence(ys)  # (Y, S, 2*E)
        ys = F.reshape(ys, ys.shape[:-1] + (2, EMBED))  # (Y, S, 2, E)
        ys = F.mean(ys, -2)  # (Y, S, E)
        if ys.shape[0] == lengths.size:
            ys = F.reshape(ys, exs.shape)  # (..., S, E)
            return ys
        embeds = np.zeros((lengths.size, vxs.shape[-1], EMBED),
                          dtype=np.float32)  # (X, S, E)
        idxs = np.nonzero(lengths)  # (Y,)
        embeds = F.scatter_add(embeds, idxs, ys)  # (X, S, E)
        embeds = F.reshape(embeds, exs.shape)  # (..., S, E)
        return embeds  # (..., S, E)
    hs = F.mean(hs, 0)  # (Y, E)
    if hs.shape[0] == lengths.size:
        hs = F.reshape(hs, vxs.shape[:-1] + (EMBED, ))  # (..., E)
        return hs
    # Add zero values back to match original shape
    embeds = np.zeros((lengths.size, EMBED), dtype=np.float32)  # (X, E)
    idxs = np.nonzero(lengths)  # (Y,)
    embeds = F.scatter_add(embeds, idxs, hs)  # (X, E)
    embeds = F.reshape(embeds, vxs.shape[:-1] + (EMBED, ))  # (..., E)
    return embeds  # (..., E)
コード例 #27
0
ファイル: ctc.py プロジェクト: zhuanaa/espnet
    def __call__(self, hs, ys):
        """Core function of the Warp-CTC layer.

        Args:
            hs (iterable of chainer.Variable | N-dimention array): Input variable from encoder.
            ys (iterable of chainer.Variable | N-dimension array): Input variable of decoder.

        Returns:
           chainer.Variable: A variable holding a scalar value of the CTC loss.

        """
        self.loss = None
        ilens = [x.shape[0] for x in hs]
        olens = [x.shape[0] for x in ys]

        # zero padding for hs
        y_hat = linear_tensor(self.ctc_lo, F.dropout(
            F.pad_sequence(hs), ratio=self.dropout_rate))
        y_hat = F.transpose(y_hat, (1, 0, 2))  # batch x frames x hdim

        # get length info
        logging.info(self.__class__.__name__ + ' input lengths:  ' + str(ilens))
        logging.info(self.__class__.__name__ + ' output lengths: ' + str(olens))

        # get ctc loss
        from chainer_ctc.warpctc import ctc as warp_ctc
        self.loss = warp_ctc(y_hat, ilens, [cuda.to_cpu(l.data) for l in ys])[0]
        logging.info('ctc loss:' + str(self.loss.data))

        return self.loss
コード例 #28
0
    def calc_attention(self, xs, ys, genre_exs, gender_exs, attn_linear):

        concat_ys = F.concat(
            ys,
            axis=0)  # -> (total len of batched sentence, word embedding dim)
        attn_ys = attn_linear(F.tanh(concat_ys))
        cond_feature = self.proj_cond(F.concat(
            (genre_exs, gender_exs)))  # -> (batchsize, proj_cond dim)

        cumsum_ys = self.xp.cumsum(
            self.xp.array([len(x) for x in xs], dtype=self.xp.int32))
        split_attn_ys = F.split_axis(attn_ys, cumsum_ys[:-1].tolist(), axis=0)
        split_attn_ys_pad = F.pad_sequence(split_attn_ys, padding=-1024)

        bool_cond = split_attn_ys_pad.array == -1024
        split_attn_ys_pad = split_attn_ys_pad * F.expand_dims(F.broadcast_to(
            cond_feature, (split_attn_ys_pad.shape[:-1])),
                                                              axis=-1)

        padding_array = self.xp.full(split_attn_ys_pad.shape,
                                     -1024,
                                     dtype=self.xp.float32)

        split_attn_ys_pad = F.where(bool_cond, padding_array,
                                    split_attn_ys_pad)

        attn_softmax = F.softmax(split_attn_ys_pad, axis=1)

        return attn_softmax
コード例 #29
0
def test_train_acc():
    n_out = 7
    _eos = n_out - 1
    n_batch = 3
    label_length = numpy.array([4, 2, 3], dtype=numpy.int32)
    np_pred = numpy.random.rand(n_batch,
                                max(label_length) + 1,
                                n_out).astype(numpy.float32)
    # NOTE: 0 is only used for CTC, never appeared in attn target
    np_target = [
        numpy.random.randint(1, n_out - 1, size=ol, dtype=numpy.int32)
        for ol in label_length
    ]

    eos = numpy.array([_eos], 'i')
    ys_out = [F.concat([y, eos], axis=0) for y in np_target]

    # padding for ys with -1
    # pys: utt x olen
    # NOTE: -1 is default ignore index for chainer
    pad_ys_out = F.pad_sequence(ys_out, padding=-1)
    y_all = F.reshape(np_pred, (n_batch * (max(label_length) + 1), n_out))
    ch_acc = F.accuracy(y_all, F.concat(pad_ys_out, axis=0), ignore_label=-1)

    # NOTE: this index 0 is only for CTC not attn. so it can be ignored
    # unfortunately, torch cross_entropy does not accept out-of-bound ids
    th_ignore = 0
    th_pred = torch.from_numpy(y_all.data)
    th_ys = [torch.from_numpy(numpy.append(t, eos)).long() for t in np_target]
    th_target = pad_list(th_ys, th_ignore)
    th_acc = th_accuracy(th_pred, th_target, th_ignore)

    numpy.testing.assert_allclose(ch_acc.data, th_acc)
コード例 #30
0
ファイル: e2e_asr.py プロジェクト: zane678/espnet
    def __call__(self, hs, ys):
        '''CTC forward

        :param hs:
        :param ys:
        :return:
        '''
        self.loss = None
        ilens = [x.shape[0] for x in hs]
        olens = [x.shape[0] for x in ys]

        # zero padding for hs
        y_hat = linear_tensor(self.ctc_lo, F.dropout(
            F.pad_sequence(hs), ratio=self.dropout_rate))
        y_hat = F.transpose(y_hat, (1, 0, 2))  # batch x frames x hdim

        # get length info
        logging.info(self.__class__.__name__ + ' input lengths:  ' + str(ilens))
        logging.info(self.__class__.__name__ + ' output lengths: ' + str(olens))

        # get ctc loss
        self.loss = warp_ctc(y_hat, ilens, [cuda.to_cpu(l.data) for l in ys])[0]
        logging.info('ctc loss:' + str(self.loss.data))

        return self.loss
コード例 #31
0
ファイル: net.py プロジェクト: souravsingh/models
 def norm_vec_sentence_level(d, nn_flag=False, include_norm_term=False):
     dim = d.shape[1]
     d_list = F.split_axis(d, np.cumsum(lengths)[:-1], axis=0)
     max_length = np.max(lengths)
     d_pad = F.pad_sequence(d_list, length=max_length, padding=0.0)
     d_flat = F.reshape(get_normalized_vector(d_pad, None), (-1, dim))
     split_size = np.cumsum(np.full(batchsize, max_length))[:-1]
     d_list = F.split_axis(d_flat, split_size, axis=0)
     d_list = [_d[:_length] for _d, _length in zip(d_list, lengths)]
     d = F.concat(d_list, axis=0)
     return d
コード例 #32
0
ファイル: test_pad_sequence.py プロジェクト: fukatani/chainer
    def check_forward(self, xs):
        # Non-finite values does not work for integer values.
        if not numpy.isfinite(self.pad) and \
           numpy.dtype(self.dtype).kind != 'f':
            return

        with disable_debug_mode_if(self.can_include_nan):
            y = functions.pad_sequence(
                xs, length=self.length, padding=self.pad)

        self.assertEqual(y.shape, self.y_shape)
        for i, (length, x) in enumerate(six.moves.zip(self.lengths, self.xs)):
            testing.assert_allclose(y.data[i, 0:length], x)
            testing.assert_allclose(
                y.data[i, length:], self.dtype(self.pad))
コード例 #33
0
ファイル: test_pad_sequence.py プロジェクト: fukatani/chainer
 def f(*xs):
     return functions.pad_sequence(
         xs, length=self.length, padding=self.pad)
コード例 #34
0
    def forward(self,
                inputs,
                batch_lengths,
                initial_state=None):
        """
        Parameters
        ----------
        inputs : ``torch.FloatTensor``, required.
            A tensor of shape (batch_size, num_timesteps, input_size)
            to apply the LSTM over.
        batch_lengths : ``List[int]``, required.
            A list of length batch_size containing the lengths of the sequences in batch.
        initial_state : ``Tuple[torch.Tensor, torch.Tensor]``, optional, (default = None)
            A tuple (state, memory) representing the initial hidden state and memory
            of the LSTM. The ``state`` has shape (1, batch_size, hidden_size) and the
            ``memory`` has shape (1, batch_size, cell_size).

        Returns
        -------
        output_accumulator : ``torch.FloatTensor``
            The outputs of the LSTM for each timestep. A tensor of shape
            (batch_size, max_timesteps, hidden_size) where for a given batch
            element, all outputs past the sequence length for that batch are
            zero tensors.
        final_state : ``Tuple[``torch.FloatTensor, torch.FloatTensor]``
            A tuple (state, memory) representing the initial hidden state and memory
            of the LSTM. The ``state`` has shape (1, batch_size, hidden_size) and the
            ``memory`` has shape (1, batch_size, cell_size).
        """
        batch_size = inputs.shape[0]
        total_timesteps = inputs.shape[1]

        output_accumulator_list = []
        if initial_state is None:
            full_batch_previous_memory = chainer.Variable(
                self.xp.zeros((batch_size, self.cell_size), 'f'))
            full_batch_previous_state = chainer.Variable(
                self.xp.zeros((batch_size, self.hidden_size), 'f'))
        else:
            # first dimension is just (layer * (1 + is_bidirection)), i.e., 1.
            full_batch_previous_state = F.squeeze(initial_state[0], axis=0)
            full_batch_previous_memory = F.squeeze(initial_state[1], axis=0)

        current_length_index = batch_size - 1 if self.go_forward else 0
        if self.recurrent_dropout_probability > 0.0 and \
           (self.training or chainer.confing.train):
            dropout_mask = get_dropout_mask(self.recurrent_dropout_probability,
                                            full_batch_previous_state)
        else:
            dropout_mask = None

        for timestep in range(total_timesteps):
            # The index depends on which end we start.
            index = timestep if self.go_forward else total_timesteps - timestep - 1

            # What we are doing here is finding the index into the batch dimension
            # which we need to use for this timestep, because the sequences have
            # variable length, so once the index is greater than the length of this
            # particular batch sequence, we no longer need to do the computation for
            # this sequence. The key thing to recognise here is that the batch inputs
            # must be _ordered_ by length from longest (first in batch) to shortest
            # (last) so initially, we are going forwards with every sequence and as we
            # pass the index at which the shortest elements of the batch finish,
            # we stop picking them up for the computation.

            if self.go_forward:
                while batch_lengths[current_length_index] <= index:
                    current_length_index -= 1
            # If we're going backwards, we are _picking up_ more indices.
            else:
                # First conditional: Are we already at the maximum number of elements in the batch?
                # Second conditional: Does the next shortest sequence beyond the current batch
                # index require computation use this timestep?
                while current_length_index < (len(batch_lengths) - 1) and \
                        batch_lengths[current_length_index + 1] > index:
                    current_length_index += 1

            # Actually get the slices of the batch which we
            # need for the computation at this timestep.
            # shape (batch_size, cell_size)
            previous_memory = full_batch_previous_memory[0: current_length_index + 1]
            # Shape (batch_size, hidden_size)
            previous_state = full_batch_previous_state[0: current_length_index + 1]
            # Shape (batch_size, input_size)
            timestep_input = inputs[0: current_length_index + 1, index]

            # Do the projections for all the gates all at once.
            # Both have shape (batch_size, 4 * cell_size)
            projected_input = self.input_linearity(timestep_input)
            projected_state = self.state_linearity(previous_state)

            # Main LSTM equations using relevant chunks of the big linear
            # projections of the hidden state and inputs.
            # TODO: split_axis
            # TODO: cuda kernel
            input_gate = F.sigmoid(projected_input[:, (0 * self.cell_size):(1 * self.cell_size)] +
                                   projected_state[:, (0 * self.cell_size):(1 * self.cell_size)])
            forget_gate = F.sigmoid(projected_input[:, (1 * self.cell_size):(2 * self.cell_size)] +
                                    projected_state[:, (1 * self.cell_size):(2 * self.cell_size)])
            memory_init = F.tanh(projected_input[:, (2 * self.cell_size):(3 * self.cell_size)] +
                                 projected_state[:, (2 * self.cell_size):(3 * self.cell_size)])
            output_gate = F.sigmoid(projected_input[:, (3 * self.cell_size):(4 * self.cell_size)] +
                                    projected_state[:, (3 * self.cell_size):(4 * self.cell_size)])
            memory = input_gate * memory_init + forget_gate * previous_memory

            # Here is the non-standard part of this LSTM cell; first, we clip the
            # memory cell, then we project the output of the timestep to a smaller size
            # and again clip it.

            if self.memory_cell_clip_value:
                memory = F.clip(memory, -self.memory_cell_clip_value,
                                self.memory_cell_clip_value)

            # shape (current_length_index, cell_size)
            pre_projection_timestep_output = output_gate * F.tanh(memory)

            # shape (current_length_index, hidden_size)
            timestep_output = self.state_projection(
                pre_projection_timestep_output)
            if self.state_projection_clip_value:
                timestep_output = F.clip(timestep_output,
                                         -self.state_projection_clip_value,
                                         self.state_projection_clip_value)

            # Only do dropout if the dropout prob is > 0.0 and we are in training mode.
            if dropout_mask is not None:
                timestep_output = timestep_output * \
                    dropout_mask[0: current_length_index + 1]

            # We've been doing computation with less than the full batch, so here we create a new
            # variable for the the whole batch at this timestep and insert the result for the
            # relevant elements of the batch into it.
            full_batch_previous_memory = F.concat(
                [memory, full_batch_previous_memory[current_length_index + 1:]], axis=0)
            full_batch_previous_state = F.concat(
                [timestep_output, full_batch_previous_state[current_length_index + 1:]], axis=0)
            output_accumulator_list.append(timestep_output)

        # Mimic the pytorch API by returning state in the following shape:
        # (num_layers * num_directions, batch_size, ...). As this
        # LSTM cell cannot be stacked, the first dimension here is just 1.
        final_state = (F.expand_dims(full_batch_previous_state, 0),
                       F.expand_dims(full_batch_previous_memory, 0))
        if not self.go_forward:
            output_accumulator_list = output_accumulator_list[::-1]
        output_accumulator = F.pad_sequence(output_accumulator_list)
        output_accumulator = output_accumulator.transpose((1, 0, 2))
        # (batch_size, total_timesteps, self.hidden_size)

        return output_accumulator, final_state
コード例 #35
0
 def stack_and_to_gpu(data_list):
     sdata = F.pad_sequence(
         data_list, length=None, padding=0).array
     return chainer.dataset.to_device(gpu, sdata)
コード例 #36
0
ファイル: elmo_lstm.py プロジェクト: souravsingh/models
    def _lstm_forward(self,
                      inputs,
                      batch_lengths,
                      initial_state=None):
        """
        Parameters
        ----------
        inputs : ``PackedSequence``, required.
            A batch first ``PackedSequence`` to run the stacked LSTM over.
        initial_state : ``Tuple[torch.Tensor, torch.Tensor]``, optional, (default = None)
            A tuple (state, memory) representing the initial hidden state and memory
            of the LSTM, with shape (num_layers, batch_size, 2 * hidden_size) and
            (num_layers, batch_size, 2 * cell_size) respectively.

        Returns
        -------
        output_sequence : ``torch.FloatTensor``
            The encoded sequence of shape (num_layers, batch_size, sequence_length, hidden_size)
        final_states: ``Tuple[torch.FloatTensor, torch.FloatTensor]``
            The per-layer final (state, memory) states of the LSTM, with shape
            (num_layers, batch_size, 2 * hidden_size) and  (num_layers, batch_size, 2 * cell_size)
            respectively. The last dimension is duplicated because it contains the state/memory
            for both the forward and backward layers.
        """
        if initial_state is None:
            hidden_states = [None] * len(self.forward_layers)
        elif initial_state[0].shape[0] != len(self.forward_layers):
            raise ConfigurationError("Initial states were passed to forward() but the number of "
                                     "initial states does not match the number of layers.")
        else:
            hidden_states = list(zip(F.split_axis(initial_state[0], initial_state[0].shape[0], 0),
                                     F.split_axis(initial_state[1], initial_state[1].shape[0], 0)))

        inputs = F.pad_sequence(inputs)
        forward_output_sequence = inputs
        backward_output_sequence = inputs

        final_states = []
        sequence_outputs = []
        for layer_index, state in enumerate(hidden_states):
            forward_layer = getattr(
                self, 'forward_layer_{}'.format(layer_index))
            backward_layer = getattr(
                self, 'backward_layer_{}'.format(layer_index))

            forward_cache = forward_output_sequence
            backward_cache = backward_output_sequence

            if state is not None:
                forward_hidden_state, backward_hidden_state = F.split_axis(
                    state[0], 2, axis=2)
                forward_memory_state, backward_memory_state = F.split_axis(
                    state[1], 2, axis=2)
                forward_state = (forward_hidden_state, forward_memory_state)
                backward_state = (backward_hidden_state, backward_memory_state)
            else:
                forward_state = None
                backward_state = None

            forward_output_sequence, forward_state = forward_layer.forward(
                forward_output_sequence,
                batch_lengths,
                forward_state)

            backward_output_sequence, backward_state = backward_layer.forward(
                backward_output_sequence,
                batch_lengths,
                backward_state)
            # Skip connections, just adding the input to the output.
            if layer_index != 0:
                forward_output_sequence += forward_cache
                backward_output_sequence += backward_cache

            sequence_outputs.append(F.concat([forward_output_sequence,
                                              backward_output_sequence], -1))
            # Append the state tuples in a list, so that we can return
            # the final states for all the layers.
            final_states.append((F.concat([forward_state[0], backward_state[0]], -1),
                                 F.concat([forward_state[1], backward_state[1]], -1)))

        stacked_sequence_outputs = F.stack(sequence_outputs, axis=0)
        # Stack the hidden state and memory for each layer into 2 tensors of shape
        # (num_layers, batch_size, hidden_size) and (num_layers, batch_size, cell_size)
        # respectively.
        final_hidden_states, final_memory_states = zip(*final_states)
        final_state_tuple = (F.concat(final_hidden_states, 0),
                             F.concat(final_memory_states, 0))
        return stacked_sequence_outputs, final_state_tuple