def forward(self,
                x,
                src,
                src_length=None,
                tar_length=None,
                mask_src=None,
                mask_tar=None,
                vocab_attention_layer=None):

        if mask_tar is None and tar_length is not None:
            mask_tar = generateMask(x.shape[1], tar_length, float, device=x).transpose(0, 1).\
               unsqueeze(1).expand(x.shape[0], x.shape[1], x.shape[1])
        if mask_src is None and src_length is not None:
            mask_src = generateMask(src[0].shape[1], src_length, float, device=x).transpose(0, 1).\
               unsqueeze(1).expand(x.shape[0], x.shape[1], src[0].shape[1])

        x = self.dropout(x)
        xs = []
        for (layer, enc) in zip(self.layers, src):
            x = layer(x,
                      enc,
                      mask_src=mask_src,
                      mask_tar=mask_tar,
                      vocab_attention_layer=vocab_attention_layer)
            xs.append(x)
        return xs
    def init_forward(self,
                     src,
                     src_length=None,
                     mask_src=None,
                     vocab_attention_layer=None):

        assert self.attend_mode == "only_attend_front"

        if mask_src is None and src_length is not None:
            mask_src = generateMask(src[0].shape[1],
                                    src_length,
                                    float,
                                    device=src[0]).transpose(0, 1)
        layer_step = []
        for layer, enc in zip(self.layers, src):
            layer_step.append(
                layer.init_forward(
                    enc, vocab_attention_layer=vocab_attention_layer))

        def nextStep(x, flag=None, regroup=None):
            nonlocal mask_src, layer_step
            xs = []
            for step in layer_step:
                x = step(x, mask_src, regroup=regroup)
                xs.append(x)
            return xs

        return nextStep
Esempio n. 3
0
    def init_forward_3d(self, src, src_length, mask_src, top_k):

        assert self.attend_mode == "only_attend_front"

        #src batch * seq * hidden

        batch_size = src.shape[0]
        seqlen = src.shape[1]
        if mask_src is None and src_length is not None:
            mask_src = generateMask(src.shape[1],
                                    src_length,
                                    float,
                                    device=src).transpose(0, 1)  # batch * seq
        if mask_src is not None:
            mask_src = mask_src.unsqueeze(1).expand(-1, top_k, -1).reshape(
                batch_size * top_k, -1)
        src = src.unsqueeze(1).expand(-1, top_k, -1,
                                      -1).reshape(batch_size * top_k, seqlen,
                                                  -1)

        step = self.init_forward(src, None, mask_src)

        def nextStep(x, flag=None, regroup=None):
            nonlocal step, batch_size, top_k
            # regroup: batch * top_k
            regroup = regroup + LongTensor(list(
                range(batch_size))).unsqueeze(1) * top_k
            regroup = regroup.reshape(-1)
            x = x.reshape(batch_size * top_k, -1)
            x = step(x, regroup=regroup)
            x = x.reshape(batch_size, top_k, -1)

            return x

        return nextStep
    def forward(self, x, tar_length=None, mask_tar=None):

        if mask_tar is None and tar_length is not None:
            mask_tar = generateMask(x.shape[1], tar_length, float, device=x).transpose(0, 1).\
               unsqueeze(1).expand(x.shape[0], x.shape[1], x.shape[1])

        x = self.dropout(x)
        xs = []
        for layer in self.layers:
            x = layer(x, mask_tar=mask_tar)
            xs.append(x)
        return xs