def __init__(self, q_dim, gamma=0.3, dropout=0.5, wn=True): super(QusSelfAttention, self).__init__() self.W = FCNet([q_dim, q_dim], wn=wn) self.P = FCNet([q_dim, 1], wn=wn) self.activation = nn.Tanh() self.dropout = nn.Dropout(dropout) self.gamma = gamma
def __init__(self, v_dim, q_dim, num_hid, dropout=0.2): super(BilinearAttentionLayer, self).__init__() self.v_proj = FCNet([v_dim, num_hid], dropout) self.q_proj = FCNet([q_dim, num_hid], dropout) self.h_mat = nn.Parameter(torch.Tensor(1, 1, num_hid).normal_()) self.h_bias = nn.Parameter(torch.Tensor(1, 1, 1).normal_())
def __init__(self, q_dim, ent_dim, rel_dim, att_hid_dim, out_dim, n_att=1, multi_head_type="concat", dropout=0.2, wn=True): super(GraphAttNet, self).__init__() self.out_dim = out_dim self.self_att = GATSelfAtt(n_att, ent_dim + rel_dim, q_dim, att_hid_dim, dropout, wn) self.n_att = n_att self.multi_head_type = multi_head_type if self.multi_head_type == "concat": assert out_dim % n_att == 0 transform = [ FCNet([ent_dim + rel_dim, int(out_dim / n_att)], wn=wn) for _ in range(n_att) ] else: transform = [ FCNet([ent_dim + rel_dim, out_dim], wn=wn) for _ in range(n_att) ] self.transform = nn.ModuleList(transform)
def __init__(self, v_dim, q_dim, num_hid, dropout=0.2): super(NewAttention, self).__init__() self.v_proj = FCNet([v_dim, num_hid]) self.q_proj = FCNet([q_dim, num_hid]) self.dropout = nn.Dropout(dropout) self.linear = weight_norm(nn.Linear(q_dim, 1), dim=None)
def __init__(self, in_dim, hid_dim, out_dim, dropout, wn=True): super(SimpleClassifier, self).__init__() layers = [ FCNet([in_dim, hid_dim], relu=True, wn=wn), nn.Dropout(dropout), FCNet([hid_dim, out_dim], relu=False, wn=wn) ] self.main = nn.Sequential(*layers)
def __init__(self, r_dim, q_dim, h_dim, gamma=0.3, dropout=0.5, wn=True): super(RelationAttention, self).__init__() self.r_proj = FCNet([r_dim, h_dim], wn=wn) self.q_proj = FCNet([q_dim, h_dim], wn=wn) self.P_mat = nn.Parameter(torch.Tensor(1, 1, 1, h_dim).normal_()) self.P_bias = nn.Parameter(torch.Tensor(1, 1, 1, 1).normal_()) self.activation = nn.ReLU() self.dropout = nn.Dropout(dropout) self.gamma = gamma
def __init__(self, v_dim, q_dim, subspace_dim, relation_glimpse, ksize=3, pe_enable=True, dropout_ratio=.2): super(RN, self).__init__() self.pe_enable = pe_enable self.relation_glimpse = relation_glimpse conv_channels = subspace_dim if pe_enable: v_dim = v_dim + 4 self.v_prj = FCNet([v_dim, conv_channels], dropout=dropout_ratio) self.q_prj = FCNet([q_dim, conv_channels], dropout=dropout_ratio) out_channel1 = int(conv_channels / 2) out_channel2 = int(conv_channels / 4) if ksize == 3: padding1, padding2, padding3 = 1, 2, 4 if ksize == 5: padding1, padding2, padding3 = 2, 4, 8 if ksize == 7: padding1, padding2, padding3 = 3, 6, 12 # self.r_conv01 = nn.Conv2d(in_channels=conv_channels, out_channels=out_channel1, kernel_size=1) # self.r_conv02 = nn.Conv2d(in_channels=out_channel1, out_channels=out_channel2, kernel_size=1) # self.r_conv03 = nn.Conv2d(in_channels=out_channel2, out_channels=relation_glimpse, kernel_size=1) self.r_conv1 = (nn.Conv2d(in_channels=conv_channels, out_channels=out_channel1, kernel_size=ksize, dilation=1, padding=padding1)) self.r_conv2 = (nn.Conv2d(in_channels=out_channel1, out_channels=out_channel2, kernel_size=ksize, dilation=2, padding=padding2)) self.r_conv3 = (nn.Conv2d(in_channels=out_channel2, out_channels=relation_glimpse, kernel_size=ksize, dilation=4, padding=padding3)) self.drop = nn.Dropout(dropout_ratio) self.relu = nn.ReLU() for m in self.modules(): if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d): init.xavier_uniform_(m.weight) if m.bias is not None: m.bias.data.zero_()
def __init__(self,cfg): super(RNModel, self).__init__() self.glimpse = 1 q_dim = cfg['rnn_dim'] * 2 if cfg['rnn_bidirection'] else cfg['rnn_dim'] self.w_emb = WordEmbedding(cfg['n_vocab'], cfg['word_embedding_dim']) self.w_emb.init_embedding(cfg['word_dic_file'], cfg['embedding_file']) self.q_emb = QuestionEmbedding(cfg['word_embedding_dim'], cfg['rnn_dim'], cfg['rnn_layer'], cfg['rnn_type'], keep_seq=False, bidirectional=cfg['rnn_bidirection']) self.v_att = StackedAttention(1, cfg['v_dim'], q_dim, cfg['fused_dim']) if cfg['att_enable'] else None self.rn = RN(cfg['v_dim'], q_dim, subspace_dim=cfg['rn_sub_dim'], relation_glimpse=1, pe_enable=cfg['pe_enable'], ksize=cfg['ksize']) self.att_v_net = FCNet([cfg['v_dim'], cfg['fused_dim']]) if cfg['att_enable'] else None self.rn_v_net = FCNet([cfg['v_dim'], cfg['fused_dim']]) self.q_net = FCNet([q_dim, cfg['fused_dim']]) self.classifier = SimpleClassifier(cfg['fused_dim'], cfg['classifier_hid_dim'], cfg['classes'], 0.5)
def __init__(self, n_att, ent_dim, q_dim, hid_dim, dropout=0.5, wn=True): super(GATSelfAtt, self).__init__() self.n_att = n_att self.h_dim = hid_dim self.transform_W = FCNet([ent_dim, self.h_dim], bias=False, relu=False, wn=wn) self.transform_A = FCNet([self.h_dim, n_att], bias=False, relu=False, wn=wn) self.transform_Q = FCNet([q_dim, self.h_dim], bias=False, relu=False, wn=wn) self.leakyReLU = nn.LeakyReLU()
def __init__(self, v_dim, q_dim, subspace_dim, r_dim, pe_enable=True, dropout=.2, wn=True): super(RN, self).__init__() self.pe_enable = pe_enable self.r_dim = r_dim conv_channels = subspace_dim if pe_enable: v_dim = v_dim + 4 self.v_prj = FCNet([v_dim, conv_channels], dropout=dropout, wn=wn) self.q_prj = FCNet([q_dim, conv_channels], dropout=dropout, wn=wn) out_channel1 = r_dim out_channel2 = r_dim self.r_conv01 = nn.Conv2d(in_channels=conv_channels, out_channels=out_channel1, kernel_size=1) self.r_conv02 = nn.Conv2d(in_channels=out_channel1, out_channels=out_channel2, kernel_size=1) self.drop = nn.Dropout(dropout) self.relu = nn.ReLU() if not wn: for m in self.modules(): if isinstance(m, nn.Linear): init.xavier_uniform_(m.weight) if m.bias is not None: m.bias.data.zero_() if isinstance(m, nn.Conv2d): init.kaiming_uniform_(m.weight) if m.bias is not None: m.bias.data.zero_()
def __init__(self, v_dim, dropout=0.2, wn=True): super(SelectGate, self).__init__() self.dropout = nn.Dropout(p=dropout) self.fc1 = FCNet([2 * v_dim, 2 * v_dim], relu=False, wn=wn) self.fc2 = FCNet([2 * v_dim, v_dim], relu=False, wn=wn)