def __init__(self, head_count, model_dim, p=0.1): """ Args: head_count(int): number of parallel heads. model_dim(int): the dimension of keys/values/queries in this MultiHeadedAttention, must be divisible by head_count. """ assert model_dim % head_count == 0 self.dim_per_head = model_dim // head_count self.model_dim = model_dim super(MultiHeadedAttention, self).__init__() self.head_count = head_count self.linear_keys = BottleLinear(model_dim, head_count * self.dim_per_head, bias=False) self.linear_values = BottleLinear(model_dim, head_count * self.dim_per_head, bias=False) self.linear_query = BottleLinear(model_dim, head_count * self.dim_per_head, bias=False) self.sm = BottleSoftmax() self.activation = nn.ReLU() self.dropout = nn.Dropout(p) self.res_dropout = nn.Dropout(p)
def __init__(self, dim, coverage=False, attn_type="dot", dropout=0.0): super(GlobalAttention, self).__init__() self.dim = dim self.attn_type = attn_type assert (self.attn_type in ["dot", "general", "mlp"]), ("Please select a valid attention type.") if self.attn_type == "general": self.linear_in = nn.Linear(dim, dim, bias=False) elif self.attn_type == "mlp": self.linear_context = BottleLinear(dim, dim, bias=False) self.linear_query = nn.Linear(dim, dim, bias=True) self.v = BottleLinear(dim, 1, bias=False) # mlp wants it with bias out_bias = self.attn_type == "mlp" self.linear_out = nn.Linear(dim * 2, dim, bias=out_bias) self.sm = nn.Softmax(dim=1) self.tanh = nn.Tanh() self.dropout = nn.Dropout(p=dropout) if coverage: self.linear_cover = nn.Linear(1, dim, bias=False)
def __init__(self, hidden_size, context_size, attn_type="dot"): super(MultiSizeAttention, self).__init__() self.hidden_size = hidden_size self.context_size = context_size self.attn_type = attn_type assert (self.attn_type in ['dot', 'general', 'mlp', 'mlp-conc']), ("Please select a valid attention type.") if self.attn_type == 'mlp-conc': # Maps hidden_size + context_size --> 1 self.mlp_conc = nn.Linear(hidden_size + context_size, 1, bias=False) elif self.attn_type == 'general': # self.linear_in = nn.Linear(hidden_size, hidden_size, bias=False) self.linear_in = nn.Linear(hidden_size, context_size, bias=False) elif self.attn_type == 'mlp': self.linear_context = BottleLinear(hidden_size, hidden_size, bias=False) self.linear_query = nn.Linear(hidden_size, hidden_size, bias=True) self.v = BottleLinear(hidden_size, 1, bias=False) # mlp wants it with bias out_bias = self.attn_type == 'mlp' # self.linear_out = nn.Linear(hidden_size*2, hidden_size, bias=out_bias) self.linear_out = nn.Linear(hidden_size + context_size, hidden_size, bias=out_bias) self.sm = nn.Softmax() self.tanh = nn.Tanh()
def __init__(self, dim, cuda, coverage=False, attn_type="dot"): super(GlobalAttention, self).__init__() self.dim = dim self.cuda = cuda self.tt = torch.cuda if cuda else torch self.attn_type = attn_type assert (self.attn_type in ["dot", "general", "mlp"]), ("Please select a valid attention type.") if self.attn_type == "general": self.linear_in = nn.Linear(dim, dim, bias=False) elif self.attn_type == "mlp": self.linear_context = BottleLinear(dim, dim, bias=False) self.linear_query = nn.Linear(dim, dim, bias=True) self.v = BottleLinear(dim, 1, bias=False) # mlp wants it with bias out_bias = self.attn_type == "mlp" self.linear_trans = nn.Linear(dim * 2, dim, bias=out_bias) self.sm = nn.Softmax() self.tanh = nn.Tanh() self.sigmoid = nn.Sigmoid() if coverage: self.linear_cover = nn.Linear(1, dim, bias=False)
def __init__(self, dim, coverage=False, attn_type="dot", affective_attention=None, affective_attn_strength=0.1, embedding_size=1027, local_weights=False): super(GlobalAttention, self).__init__() self.dim = dim self.attn_type = attn_type self.affective_attention = affective_attention self.affective_attn_strength = affective_attn_strength self.embedding_size = embedding_size self.local_weights = local_weights # weighted affective attention, local weights assert (self.attn_type in ["dot", "general", "mlp"]), ("Please select a valid attention type.") if self.attn_type == "general": self.linear_in = nn.Linear(dim, dim, bias=False) elif self.attn_type == "mlp": self.linear_context = BottleLinear(dim, dim, bias=False) self.linear_query = nn.Linear(dim, dim, bias=True) self.v = BottleLinear(dim, 1, bias=False) # mlp wants it with bias out_bias = self.attn_type == "mlp" self.linear_out = nn.Linear(dim * 2, dim, bias=out_bias) self.sm = nn.Softmax() self.tanh = nn.Tanh() if coverage: self.linear_cover = nn.Linear(1, dim, bias=False) # Add affective attention params if self.affective_attention == "matrix_norm": self.affect_linear = nn.Linear(dim, 3, bias=False) elif self.affective_attention == "bigram_norm": self.affect_linear = nn.Linear(embedding_size - 3, 3, bias=False) self.affect_linear1 = nn.Linear(3, 1, bias=False)
def __init__(self, head_count, model_dim, dropout=0.1): assert model_dim % head_count == 0 self.dim_per_head = model_dim // head_count self.model_dim = model_dim super(MultiHeadedAttention, self).__init__() self.head_count = head_count self.linear_keys = BottleLinear(model_dim, head_count * self.dim_per_head, bias=False) self.linear_values = BottleLinear(model_dim, head_count * self.dim_per_head, bias=False) self.linear_query = BottleLinear(model_dim, head_count * self.dim_per_head, bias=False) self.sm = BottleSoftmax() self.activation = nn.ReLU() self.dropout = nn.Dropout(dropout) self.res_dropout = nn.Dropout(dropout)