def __init__(self, config: BartConfig, embed_tokens): super().__init__(config, embed_tokens) self.dropout = config.dropout self.layerdrop = config.encoder_layerdrop self.visual = None embed_dim = embed_tokens.embedding_dim self.embed_scale = math.sqrt( embed_dim) if config.scale_embedding else 1.0 self.padding_idx = embed_tokens.padding_idx self.max_source_positions = config.max_position_embeddings self.embed_tokens = embed_tokens if config.static_position_embeddings: self.embed_positions = SinusoidalPositionalEmbedding( config.max_position_embeddings, embed_dim, self.padding_idx) else: self.embed_positions = LearnedPositionalEmbedding( config.max_position_embeddings, embed_dim, self.padding_idx, config.extra_pos_embeddings, ) self.layers = nn.ModuleList( [EncoderLayer(config) for _ in range(config.encoder_layers)]) self.layernorm_embedding = LayerNorm( embed_dim) if config.normalize_embedding else nn.Identity() # mbart has one extra layer_norm self.layer_norm = LayerNorm( config.d_model) if config.normalize_before else None
def __init__(self, config: BartConfig, embed_tokens: nn.Embedding): super().__init__() self.output_attentions = config.output_attentions self.output_hidden_states = config.output_hidden_states self.dropout = config.dropout self.layerdrop = config.decoder_layerdrop self.padding_idx = embed_tokens.padding_idx self.max_target_positions = config.max_position_embeddings self.embed_scale = math.sqrt( config.d_model) if config.scale_embedding else 1.0 self.embed_tokens = embed_tokens if config.static_position_embeddings: self.embed_positions = SinusoidalPositionalEmbedding( config.max_position_embeddings, config.d_model, config.pad_token_id) else: self.embed_positions = LearnedPositionalEmbedding( config.max_position_embeddings, config.d_model, self.padding_idx, ) self.layers = nn.ModuleList([ DecoderLayer(config) for _ in range(config.decoder_layers) ]) # type: List[DecoderLayer] self.layernorm_embedding = LayerNorm( config.d_model) if config.normalize_embedding else nn.Identity() self.layer_norm = LayerNorm( config.d_model) if config.add_final_layer_norm else None
def __init__(self, config: BartConfig): super().__init__() self.embed_dim = config.d_model self.output_attentions = config.output_attentions self.self_attn = SelfAttention( embed_dim=self.embed_dim, num_heads=config.decoder_attention_heads, dropout=config.attention_dropout, ) self.dropout = config.dropout self.activation_fn = ACT2FN[config.activation_function] self.activation_dropout = config.activation_dropout self.normalize_before = config.normalize_before self.self_attn_layer_norm = LayerNorm(self.embed_dim) self.encoder_attn = SelfAttention( self.embed_dim, config.decoder_attention_heads, dropout=config.attention_dropout, encoder_decoder_attention=True, ) self.encoder_attn_layer_norm = LayerNorm(self.embed_dim) self.fc1 = nn.Linear(self.embed_dim, config.decoder_ffn_dim) self.fc2 = nn.Linear(config.decoder_ffn_dim, self.embed_dim) self.final_layer_norm = LayerNorm(self.embed_dim)
def __init__(self, config, embed_tokens): super().__init__() self.config = config self.dropout = config.dropout self.embed_tokens = embed_tokens self.embed_synt = nn.Embedding(77, config.d_model, config.pad_token_id) self.embed_synt.weight.data.normal_(mean=0.0, std=config.init_std) self.embed_synt.weight.data[config.pad_token_id].zero_() self.embed_positions = LearnedPositionalEmbedding( config.max_position_embeddings, config.d_model, config.pad_token_id, config.extra_pos_embeddings) self.layers = nn.ModuleList( [EncoderLayer(config) for _ in range(config.encoder_layers)]) self.synt_layers = nn.ModuleList( [EncoderLayer(config) for _ in range(1)]) self.layernorm_embedding = LayerNorm(config.d_model) self.synt_layernorm_embedding = LayerNorm(config.d_model) self.pooling = MeanPooling(config)
def __init__(self, config, embed_tokens): super().__init__() self.dropout = config.dropout self.embed_tokens = embed_tokens self.embed_positions = LearnedPositionalEmbedding( config.max_position_embeddings, config.d_model, config.pad_token_id, config.extra_pos_embeddings) self.layers = nn.ModuleList([DecoderLayer(config) for _ in range(1)]) self.layernorm_embedding = LayerNorm(config.d_model)
def __init__(self, config): super().__init__() self.sent_layernorm_embedding = LayerNorm(config.d_model, elementwise_affine=False) self.adv = nn.Linear(config.d_model, 74)