def forward(self, x, src, src_length=None, tar_length=None, mask_src=None, mask_tar=None, vocab_attention_layer=None): if mask_tar is None and tar_length is not None: mask_tar = generateMask(x.shape[1], tar_length, float, device=x).transpose(0, 1).\ unsqueeze(1).expand(x.shape[0], x.shape[1], x.shape[1]) if mask_src is None and src_length is not None: mask_src = generateMask(src[0].shape[1], src_length, float, device=x).transpose(0, 1).\ unsqueeze(1).expand(x.shape[0], x.shape[1], src[0].shape[1]) x = self.dropout(x) xs = [] for (layer, enc) in zip(self.layers, src): x = layer(x, enc, mask_src=mask_src, mask_tar=mask_tar, vocab_attention_layer=vocab_attention_layer) xs.append(x) return xs
def init_forward(self, src, src_length=None, mask_src=None, vocab_attention_layer=None): assert self.attend_mode == "only_attend_front" if mask_src is None and src_length is not None: mask_src = generateMask(src[0].shape[1], src_length, float, device=src[0]).transpose(0, 1) layer_step = [] for layer, enc in zip(self.layers, src): layer_step.append( layer.init_forward( enc, vocab_attention_layer=vocab_attention_layer)) def nextStep(x, flag=None, regroup=None): nonlocal mask_src, layer_step xs = [] for step in layer_step: x = step(x, mask_src, regroup=regroup) xs.append(x) return xs return nextStep
def init_forward_3d(self, src, src_length, mask_src, top_k): assert self.attend_mode == "only_attend_front" #src batch * seq * hidden batch_size = src.shape[0] seqlen = src.shape[1] if mask_src is None and src_length is not None: mask_src = generateMask(src.shape[1], src_length, float, device=src).transpose(0, 1) # batch * seq if mask_src is not None: mask_src = mask_src.unsqueeze(1).expand(-1, top_k, -1).reshape( batch_size * top_k, -1) src = src.unsqueeze(1).expand(-1, top_k, -1, -1).reshape(batch_size * top_k, seqlen, -1) step = self.init_forward(src, None, mask_src) def nextStep(x, flag=None, regroup=None): nonlocal step, batch_size, top_k # regroup: batch * top_k regroup = regroup + LongTensor(list( range(batch_size))).unsqueeze(1) * top_k regroup = regroup.reshape(-1) x = x.reshape(batch_size * top_k, -1) x = step(x, regroup=regroup) x = x.reshape(batch_size, top_k, -1) return x return nextStep
def forward(self, x, tar_length=None, mask_tar=None): if mask_tar is None and tar_length is not None: mask_tar = generateMask(x.shape[1], tar_length, float, device=x).transpose(0, 1).\ unsqueeze(1).expand(x.shape[0], x.shape[1], x.shape[1]) x = self.dropout(x) xs = [] for layer in self.layers: x = layer(x, mask_tar=mask_tar) xs.append(x) return xs