def __init__(self, num_layers, d_model, heads, d_ff, dropout, embeddings, tag_embeddings=None): super(TransformerDecoder, self).__init__() # Basic attributes. self.decoder_type = 'transformer' self.num_layers = num_layers self.embeddings = embeddings self.tag_embeddings = tag_embeddings self.pos_emb = PositionalEncoding(dropout, self.embeddings.embedding_dim) # Build TransformerDecoder. self.transformer_layers = nn.ModuleList([ TransformerDecoderLayer(d_model, heads, d_ff, dropout) for _ in range(num_layers) ]) self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
def __init__(self, num_layers, d_model, heads, d_ff, dropout, embeddings): super(TransformerDecoder, self).__init__() # Basic attributes. self.decoder_type = 'transformer' self.num_layers = num_layers self.embeddings = embeddings self.pos_emb = PositionalEncoding(dropout, self.embeddings.embedding_dim) # self.context_attn_graph = MultiHeadedAttention( heads, d_model, dropout=dropout) self.feed_forward = PositionwiseFeedForward(d_model, d_ff, dropout) self.drop_3 = nn.Dropout(dropout) self.layer_norm_3 = nn.LayerNorm(d_model, eps=1e-6) # Build TransformerDecoder. self.transformer_layers = nn.ModuleList( [TransformerDecoderLayer(d_model, heads, d_ff, dropout) for _ in range(num_layers)]) self.layer_norm = nn.LayerNorm(d_model, eps=1e-6) self.att_weight_c = nn.Linear(self.embeddings.embedding_dim, 1) self.att_weight_q = nn.Linear(self.embeddings.embedding_dim, 1) self.att_weight_cq = nn.Linear(self.embeddings.embedding_dim, 1) self.graph_act = gelu self.graph_aware = nn.Linear(self.embeddings.embedding_dim*3, self.embeddings.embedding_dim) self.graph_drop = nn.Dropout(dropout) self.linear_filter = nn.Linear(d_model*2, 1) self.fix_top = torch.tensor((torch.arange(512,0,-1).type(torch.FloatTensor)/512).\ unsqueeze(0).unsqueeze(0).expand(8, 512, -1)).to(self.get_device()) self.fix_top.requires_grad = True self.fix_top = torch.nn.Parameter(self.fix_top, requires_grad=True) self.register_parameter("fix_top", self.fix_top)
def __init__(self, num_layers, d_model, heads, d_ff, dropout, embeddings, cond_type=''): super(TransformerDecoder, self).__init__() # Basic attributes. self.decoder_type = 'transformer' self.num_layers = num_layers self.embeddings = embeddings self.pos_emb = PositionalEncoding(dropout, self.embeddings.embedding_dim) # Build TransformerDecoder. self.transformer_layers = nn.ModuleList([ TransformerDecoderLayer(d_model, heads, d_ff, dropout, cond_type=cond_type) for _ in range(num_layers) ]) self.layer_norm = nn.LayerNorm(d_model, eps=1e-6) self.position_generator = nn.Sequential(nn.Linear(d_model * 2, 1), #nn.Softmax(dim=0) )
def __init__(self, num_layers, d_model, heads, d_ff, dropout, embeddings=None, topic=False, topic_dim=300, split_noise=False): super(TransformerDecoder, self).__init__() # Basic attributes. self.decoder_type = 'transformer' self.num_layers = num_layers if embeddings is not None: self.embeddings = embeddings self.pos_emb = PositionalEncoding(dropout, self.embeddings.embedding_dim) # Build TransformerDecoder. self.transformer_layers = nn.ModuleList([ TransformerDecoderLayer(d_model, heads, d_ff, dropout, topic=topic, topic_dim=topic_dim, split_noise=split_noise) for _ in range(num_layers) ]) self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
def __init__(self, num_layers, d_model, heads, d_ff, dropout, embeddings, use_universal_transformer): super(TransformerDecoder, self).__init__() # Basic attributes. self.decoder_type = 'transformer' self.num_layers = num_layers self.embeddings = embeddings self.pos_emb = PositionalEncoding(dropout, self.embeddings.embedding_dim) # Build TransformerDecoder. self.dim_mismatch = d_model != 768 if self.dim_mismatch: self.linear_custom = nn.Linear(768, d_model) self.linear_custom_reverse = nn.Linear(d_model, 768) print( "TransformerDecoder# Dimension of input is 768, while d_model is {}. Therefore, Adding Upsampling and Downsampling Layer" .format(str(d_model))) self.common_ff = None if use_universal_transformer: print("Using Universal Transformer in Decoder") self.common_ff = PositionwiseFeedForward(d_model, d_ff, dropout) self.transformer_layers = nn.ModuleList([ TransformerDecoderLayer(d_model, heads, d_ff, dropout, self.common_ff) for _ in range(num_layers) ]) self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
def __init__(self, d_model, d_ff, heads, dropout, num_inter_layers=0, args=None): super(SentTransformerEncoder, self).__init__() self.d_model = d_model self.num_inter_layers = num_inter_layers sent_pos_emb = args.sent_pos_emb if sent_pos_emb: self.pos_emb = PositionalEncoding(dropout, d_model) else: self.pos_emb = None self.transformer_inter = nn.ModuleList( [TransformerEncoderLayer(d_model, heads, d_ff, dropout) for _ in range(num_inter_layers)]) self.dropout = nn.Dropout(dropout) self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
def __init__(self, args): super(SentTransformerEncoder, self).__init__() self.encoder_layers = args.encoder_layers if args.sent_pos_emb_enc: # whether use pos emb for inter sentence encoding self.pos_emb = PositionalEncoding(args.dropout, args.encoder_embed_dim) else: self.pos_emb = None self.layers = nn.ModuleList([]) self.layers.extend([ TransformerEncoderLayer(args=args) for _ in range(args.encoder_layers) ]) self.dropout = nn.Dropout(args.dropout) self.normalize = args.encoder_normalize_before if self.normalize: self.layer_norm = LayerNorm(args.encoder_embed_dim)
def __init__(self, d_model, d_ff, heads, dropout, num_inter_layers=0, use_pos_emb=False): super(TransformerDecoder, self).__init__() self.use_pos_emb = use_pos_emb self.d_model = d_model self.num_inter_layers = num_inter_layers if self.use_pos_emb: self.pos_emb = PositionalEncoding(dropout, d_model) self.transformer_inter = nn.ModuleList([ TransformerDecoderLayer(d_model, heads, d_ff, dropout) for _ in range(num_inter_layers) ]) self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
def __init__(self, num_layers, d_model, heads, d_ff, dropout, embeddings, vocab_size): super(Z_TransformerDecoder, self).__init__() # Basic attributes. self.decoder_type = 'transformer' self.num_layers = num_layers self.embeddings = embeddings self.pos_emb = PositionalEncoding(dropout,self.embeddings.embedding_dim) self.vocab_size = vocab_size if COPY: self.copy_attn = MultiHeadedAttention( 1, d_model, dropout=dropout) # Build TransformerDecoder. self.transformer_layers = nn.ModuleList( [Z_TransformerDecoderLayer(d_model, heads, d_ff, dropout) for _ in range(num_layers)]) self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
def __init__(self, num_layers, d_model, heads, d_ff, dropout, embeddings, sep_dec=False): super(TransformerDecoder, self).__init__() # print("dmodel = ", d_model) d_sep_model = d_model # (d_model * 2) // 3 # Basic attributes. self.decoder_type = 'transformer' self.num_layers = num_layers self.embeddings = embeddings self.pos_emb = PositionalEncoding(dropout, self.embeddings.embedding_dim) # Build TransformerDecoder. # self.transformer_layers = nn.ModuleList( # [TransformerDecoderLayer(d_model, heads, d_ff, dropout) # for _ in range(num_layers)]) # self.transformer_layers = nn.ModuleList( # [TransformerDecoderLayer(d_model, heads, d_ff, dropout, sep_dec=False) # for _ in range(num_layers//2)] + [TransformerDecoderLayer(d_sep_model, heads, d_ff, dropout, sep_dec=sep_dec) # for _ in range(num_layers - num_layers//2)] ) self.transformer_layers = nn.ModuleList([ TransformerDecoderLayer(d_sep_model, heads, d_ff, dropout, sep_dec=sep_dec) for _ in range(num_layers) ]) self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
def __init__(self, args, device, vocab, checkpoint=None): super(RankAE, self).__init__() self.args = args self.device = device self.vocab = vocab self.vocab_size = len(vocab) self.beam_size = args.beam_size self.max_length = args.max_length self.min_length = args.min_length self.start_token = vocab['[unused1]'] self.end_token = vocab['[unused2]'] self.pad_token = vocab['[PAD]'] self.mask_token = vocab['[MASK]'] self.seg_token = vocab['[unused3]'] self.cls_token = vocab['[CLS]'] self.hidden_size = args.enc_hidden_size self.embeddings = nn.Embedding(self.vocab_size, self.hidden_size, padding_idx=0) if args.encoder == 'bert': self.encoder = Bert(args.bert_dir, args.finetune_bert) if(args.max_pos > 512): my_pos_embeddings = nn.Embedding(args.max_pos, self.encoder.model.config.hidden_size) my_pos_embeddings.weight.data[:512] = self.encoder.model.embeddings.position_embeddings.weight.data my_pos_embeddings.weight.data[512:] = self.encoder.model.embeddings.position_embeddings.weight.data[-1][None, :].repeat(args.max_pos-512, 1) self.encoder.model.embeddings.position_embeddings = my_pos_embeddings tgt_embeddings = nn.Embedding(self.vocab_size, self.encoder.model.config.hidden_size, padding_idx=0) else: self.encoder = TransformerEncoder(self.hidden_size, args.enc_ff_size, args.enc_heads, args.enc_dropout, args.enc_layers) tgt_embeddings = nn.Embedding(self.vocab_size, self.hidden_size, padding_idx=0) self.hier_encoder = TransformerEncoder(self.hidden_size, args.hier_ff_size, args.hier_heads, args.hier_dropout, args.hier_layers) self.cup_bilinear = nn.Bilinear(self.hidden_size, self.hidden_size, 1) self.pos_emb = PositionalEncoding(0., self.hidden_size) self.decoder = TransformerDecoder( self.args.dec_layers, self.args.dec_hidden_size, heads=self.args.dec_heads, d_ff=self.args.dec_ff_size, dropout=self.args.dec_dropout, embeddings=tgt_embeddings) self.generator = Generator(self.vocab_size, self.args.dec_hidden_size, self.pad_token) self.generator.linear.weight = self.decoder.embeddings.weight if checkpoint is not None: self.load_state_dict(checkpoint['model'], strict=True) else: if args.encoder == "transformer": for module in self.encoder.modules(): self._set_parameter_tf(module) xavier_uniform_(self.embeddings.weight) for module in self.decoder.modules(): self._set_parameter_tf(module) for module in self.hier_encoder.modules(): self._set_parameter_tf(module) for p in self.generator.parameters(): self._set_parameter_linear(p) for p in self.cup_bilinear.parameters(): self._set_parameter_linear(p) if args.share_emb: if args.encoder == 'bert': self.embeddings = self.encoder.model.embeddings.word_embeddings tgt_embeddings = nn.Embedding(self.vocab_size, self.encoder.model.config.hidden_size, padding_idx=0) tgt_embeddings.weight = copy.deepcopy(self.encoder.model.embeddings.word_embeddings.weight) else: tgt_embeddings = self.embeddings self.decoder.embeddings = tgt_embeddings self.generator.linear.weight = self.decoder.embeddings.weight self.to(device)