def __init__(self, embed_dim, n_att, dropout=0.5, normalize_before=True): super().__init__() self.embed_dim = embed_dim self.dropout = dropout self.relu_dropout = dropout self.normalize_before = normalize_before num_layer_norm = 3 # self-attention on generated recipe self.self_attn = MultiheadAttention( self.embed_dim, n_att, dropout=dropout, ) self.encoder_attn = MultiheadAttention( self.embed_dim, n_att, dropout=dropout, ) self.fc1 = Linear(self.embed_dim, self.embed_dim) self.fc2 = Linear(self.embed_dim, self.embed_dim) self.layer_norms = nn.ModuleList([LayerNorm(self.embed_dim) for i in range(num_layer_norm)])
def __init__(self, embed_dim, num_heads=4, src_attn_dropout=0.1, relu_dropout=0.1, res_dropout=0.1, tgt_attn_dropout=0.1, src_mask=True, tgt_mask=False): super().__init__() self.embed_dim = embed_dim self.num_heads = num_heads self.self_attn = MultiheadAttention( embed_dim=self.embed_dim, num_heads=self.num_heads, attn_dropout=src_attn_dropout, bias=True, add_bias_kv=False, add_zero_attn=False, ) # self.attn_mask = attn_mask # self.crossmodal = True # self.normalize = True self.src_mask = src_mask # used as last arg in forward function call self.tgt_mask = tgt_mask # used as last arg in forward function call self.relu_dropout = relu_dropout self.res_dropout = res_dropout # self.normalize_before = True self.attn = MultiheadAttention( embed_dim=self.embed_dim, num_heads=self.num_heads, attn_dropout=tgt_attn_dropout, bias=True, add_bias_kv=False, add_zero_attn=False, ) self.fc1_A = Linear( self.embed_dim, self.embed_dim) # The "Add & Norm" part in the paper self.fc2_A = Linear(self.embed_dim, self.embed_dim) self.fc1_B = Linear( self.embed_dim, self.embed_dim) # The "Add & Norm" part in the paper self.fc2_B = Linear(self.embed_dim, self.embed_dim) self.layer_norms_A = nn.ModuleList( [LayerNorm(self.embed_dim) for _ in range(3)]) self.layer_norms_B = nn.ModuleList( [LayerNorm(self.embed_dim) for _ in range(3)])
def __init__(self, embed_dim, num_heads=4, attn_dropout=0.1, relu_dropout=0.1, res_dropout=0.1, attn_mask=False): super().__init__() self.embed_dim = embed_dim self.num_heads = num_heads self.self_attn = MultiheadAttention(embed_dim=self.embed_dim, num_heads=self.num_heads, attn_dropout=attn_dropout) self.attn_mask = attn_mask self.relu_dropout = relu_dropout self.res_dropout = res_dropout self.normalize_before = True self.fc1 = Linear(self.embed_dim, 4 * self.embed_dim) # The "Add & Norm" part in the paper self.fc2 = Linear(4 * self.embed_dim, self.embed_dim) self.layer_norms = nn.ModuleList( [LayerNorm(self.embed_dim) for _ in range(2)])
def __init__(self, embed_dim, num_heads=4, attn_dropout=0.1, relu_dropout=0.1, res_dropout=0.1, attn_mask=False): super().__init__() self.embed_dim = embed_dim self.num_heads = num_heads self.self_attn = MultiheadAttention(embed_dim=self.embed_dim, num_heads=self.num_heads, attn_dropout=attn_dropout, bias=True, add_bias_kv=True, add_zero_attn=True) self.attn_mask = attn_mask self.crossmodal = True self.normalize = True self.relu_dropout = relu_dropout self.res_dropout = res_dropout self.normalize_before = True self.fc1 = ComplexLinear( self.embed_dim, self.embed_dim) # The "Add & Norm" part in the paper self.fc2 = ComplexLinear( self.embed_dim, self.embed_dim) # The "Add & Norm" part in the paper self.layer_norms_A = nn.ModuleList( [LayerNorm(self.embed_dim) for _ in range(2)]) self.layer_norms_B = nn.ModuleList( [LayerNorm(self.embed_dim) for _ in range(2)])
def get_incremental_state( module: MultiheadAttention, incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]], key: str, ) -> Optional[Dict[str, Optional[Tensor]]]: """Helper for getting incremental state for an nn.Module.""" return module.get_incremental_state(incremental_state, key)
def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1): super(TransEncoderLayer, self).__init__() self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout) self.linear1 = Linear(d_model, dim_feedforward) self.dropout = Dropout(dropout) self.linear2 = Linear(dim_feedforward, d_model) self.norm = LayerNorm(d_model) self.dropout1 = Dropout(dropout) self.dropout2 = Dropout(dropout)
def set_incremental_state( module: MultiheadAttention, incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]], key: str, value: Dict[str, Optional[Tensor]], ) -> Optional[Dict[str, Dict[str, Optional[Tensor]]]]: """Helper for setting incremental state for an nn.Module.""" if incremental_state is not None: result = module.set_incremental_state(incremental_state, key, value) if result is not None: incremental_state = result return incremental_state
def __init__(self, embed_dim, num_heads=4, src_attn_dropout=0.1, relu_dropout=0.1, res_dropout=0.1, tgt_attn_dropout=0.1, src_mask=True, tgt_mask=False): super().__init__() self.embed_dim = embed_dim self.num_heads = num_heads self.self_attn = MultiheadAttention( embed_dim=self.embed_dim, num_heads=self.num_heads, attn_dropout=src_attn_dropout, bias=True, add_bias_kv=True, add_zero_attn=True, ) self.src_mask = src_mask # used as last arg in forward function call self.tgt_mask = tgt_mask # used as last arg in forward function call self.relu_dropout = relu_dropout self.res_dropout = res_dropout self.attn = MultiheadAttention( embed_dim=self.embed_dim, num_heads=self.num_heads, attn_dropout=tgt_attn_dropout, bias=True, add_bias_kv=True, add_zero_attn=True, ) self.fc1 = nn.Linear(self.embed_dim, self.embed_dim) self.fc2 = nn.Linear(self.embed_dim, self.embed_dim) self.layer_norms = nn.ModuleList( [LayerNorm(self.embed_dim) for _ in range(3)])
def __init__(self, model_dim, num_head, inner_dim, dropout, attn_dropout=0., head_dim=None, bias=True, activation=gelu): super().__init__() self.model_dim = model_dim self.num_head = num_head self.dropout = dropout self.bias = bias # Parameters self.layer_norm = LayerNorm(model_dim) self.constituent = ConstituentAttention(model_dim, model_dim, bias) self.multi_head = MultiheadAttention(model_dim, num_head, attn_dropout, bias, head_dim) position_wise = PositionWiseFeedForward( model_dim, inner_dim, bias=bias, dropout=dropout, activation=activation ) self.position_wise = Sublayer(position_wise, model_dim, dropout)