def logit_fn(self, decoder_output): softmax_weight = self.out_embedding if not self.config[ 'fix_norm'] else ut.normalize(self.out_embedding, scale=True) logits = F.linear(decoder_output, softmax_weight, bias=self.out_bias) logits = logits.reshape(-1, logits.size()[-1]) logits[:, ~self.trg_vocab_mask] = -1e9 return logits
def logit_fn(self, decoder_output): softmax_weight = self.out_embedding if not self.config[ 'fix_norm'] else ut.normalize(self.out_embedding, scale=True) logits = F.linear(decoder_output, softmax_weight, bias=self.out_bias) logits = logits.reshape(-1, logits.size()[-1]) #logits[:, ~self.trg_vocab_mask] = -1e9 # speed logits.masked_fill_(~self.trg_vocab_mask.unsqueeze(0), -3e38) #-1e9) return logits
def get_trg_inp(ids, time_step): ids = ids.type(src_toks.type()) word_embeds = self.trg_embedding(ids) if self.config['fix_norm']: word_embeds = ut.normalize(word_embeds, scale=False) else: word_embeds = word_embeds * self.embed_scale pos_embeds = self.pos_embedding[time_step, :].reshape(1, 1, -1) return word_embeds + pos_embeds
def get_input(self, toks, is_src=True): embeds = self.src_embedding if is_src else self.trg_embedding word_embeds = embeds(toks) # [bsz, max_len, embed_dim] if self.config['fix_norm']: word_embeds = ut.normalize(word_embeds, scale=False) else: word_embeds = word_embeds * self.embed_scale pos_embeds = self.pos_embedding[:toks.size()[-1], :].unsqueeze(0) # [1, max_len, embed_dim] return word_embeds + pos_embeds
def get_input(self, toks, is_src=True): embeds = self.src_embedding if is_src else self.trg_embedding word_embeds = embeds(toks) # [bsz, max_len, embed_dim] if self.config['fix_norm']: word_embeds = ut.normalize(word_embeds, scale=False) else: word_embeds = word_embeds * self.embed_scale if toks.size()[-1] > self.pos_embedding.size()[-2]: ut.get_logger().error( "Sentence length ({}) is longer than max_pos_length ({}); please increase max_pos_length" .format(toks.size()[-1], self.pos_embedding.size()[0])) pos_embeds = self.pos_embedding[:toks.size()[-1], :].unsqueeze( 0) # [1, max_len, embed_dim] return word_embeds + pos_embeds
def get_input(self, toks, structs=None, calc_reg=False): max_len = toks.size()[-1] embed_dim = self.config['embed_dim'] embeds = self.src_embedding if structs is not None else self.trg_embedding word_embeds = embeds(toks) # [bsz, max_len, embed_dim] embed_scale = self.trg_embed_scale if structs is None else self.src_embed_scale if self.config['fix_norm']: word_embeds = ut.normalize(word_embeds, scale=False) else: word_embeds = word_embeds * embed_scale pos_embeds = self.get_pos_embedding(max_len, structs) pe_scale = self.src_pos_embed_scale if structs is not None else self.trg_pos_embed_scale reg_penalty = 0.0 if calc_reg: reg_penalty = self.struct.get_reg_penalty( pos_embeds, toks != ac.PAD_ID) * self.config['pos_norm_penalty'] sinusoidal_pe = self.get_pos_embedding( max_len) if structs is not None and self.config[ 'add_sinusoidal_pe_src'] else 0 return word_embeds + sinusoidal_pe + pos_embeds * pe_scale, reg_penalty