def __init__(self, d_model, d_inner_hid, n_head, d_k, d_v, dropout=0.1): super(DecoderLayer, self).__init__() self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) self.enc_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) self.pos_ffn = PositionwiseFeedForward(d_model, d_inner_hid, dropout=dropout)
def initialize(self, params): self.params = params modelParts = zoo.selectModel(params) flags = ['imgTransform', 'combine', 'embedder', 'postEmbedder'] # refine flags for flag in flags: if flag not in modelParts: print('Missing: %s' % flag) else: setattr(self, flag, modelParts[flag]) # define word transform as composition self.wordTransform = lambda x: self.postEmbedder(self.embedder(x)) # Set pooling operation for set #self.pooler = torch.max; #torch.mean; self.selfatt = MultiHeadAttention(1, params['hiddenSize'], params['hiddenSize'], params['hiddenSize']) self.pooler = torch.sum #torch.mean; # Initialize the parameters with xavier modules = ['embedder', 'postEmbedder', 'imgTransform', 'combine'] modules = [ getattr(self, mod) for mod in modules if hasattr(self, mod) ] initializeWeights(modules, 'xavier')
def __init__(self, batch_size, out_dim): super(Pool_attn, self).__init__() self.batch_size = batch_size self.out_dim = out_dim self.k = torch.nn.Parameter(torch.Tensor(1, 1, out_dim)).cuda() nn.init.zeros_(self.k) self.kk = self.k.repeat([batch_size, 1, 1]).cuda() self.attn = MultiHeadAttention(1, out_dim, out_dim, out_dim)
def __init__(self, batch_size, in_dim, out_dim, num=1): super(PermEqui_attn_norm, self).__init__() self.batch_size = batch_size self.in_dim = in_dim self.out_dim = out_dim self.ks = [torch.nn.Parameter(torch.Tensor(1, 1, in_dim)).cuda() for i in range(num)] for k in self.ks: nn.init.zeros_(k) self.kks = [k.repeat([batch_size, 1, 1]).cuda() for k in self.ks] self.attn = MultiHeadAttention(1, in_dim, out_dim, in_dim) self.Gamma = nn.Linear(in_dim * num, out_dim)
def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.2): super().__init__() self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout) self.layer_norm = nn.LayerNorm(d_model) self.dropout = nn.Dropout(dropout)
def __init__(self, in_features, set_features=50): super(DeepSet, self).__init__() self.in_features = in_features self.out_features = set_features self.feature_extractor = nn.Sequential(nn.Linear(in_features, 50), nn.ELU(inplace=True), nn.Linear(50, 100), nn.ELU(inplace=True), nn.Linear(100, set_features)) self.regressor = nn.Sequential( nn.Linear(set_features * 2, 30), nn.ELU(inplace=True), nn.Linear(30, 30), nn.ELU(inplace=True), nn.Linear(30, 10), nn.ELU(inplace=True), nn.Linear(10, 1), ) self.selfatt = MultiHeadAttention(1, 50, 50, 50) self.add_module('0', self.feature_extractor) self.add_module('1', self.regressor)
def __init__(self, in_dim, out_dim): super(PermEqui_max_attn_concat, self).__init__() self.Gamma = nn.Linear(in_dim*2, out_dim) self.attn = MultiHeadAttention(1, in_dim, out_dim, in_dim)