Esempio n. 1
0
    def construct(self, N_word, hidden_dim, num_layers, gpu, use_hs):  
        self.value_pad_token =-10000

        self.q_lstm = PackedLSTM(input_size=N_word, hidden_size=hidden_dim//2,
                              num_layers=num_layers, batch_first=True,
                              dropout=0.3, bidirectional=True)

        self.hs_lstm = PackedLSTM(input_size=N_word, hidden_size=hidden_dim//2,
                               num_layers=num_layers, batch_first=True,
                               dropout=0.3, bidirectional=True)

        self.col_lstm = PackedLSTM(input_size=N_word, hidden_size=hidden_dim//2,
                                num_layers=num_layers, batch_first=True,
                                dropout=0.3, bidirectional=True)

        # Number of tokens
        self.col_q_num = ConditionalAttention(hidden_dim=hidden_dim, use_bag_of_word=True)
        self.hs_q_num = ConditionalAttention(hidden_dim=hidden_dim, use_bag_of_word=True)
        self.tokens_num_out = nn.Sequential(nn.Tanh(), nn.Linear(hidden_dim, self.max_num_tokens)) # num of tokens: 1-6

        # tokens
        self.col_q = ConditionalAttention(hidden_dim=hidden_dim, use_bag_of_word=False)
        self.hs_q = ConditionalAttention(hidden_dim=hidden_dim, use_bag_of_word=False)
        self.W_value = nn.Linear(in_features=hidden_dim, out_features=hidden_dim)
        self.value_out = nn.Sequential(nn.Tanh(), nn.Linear(hidden_dim, 1))

        pos_weight = torch.tensor(3).double()
        if gpu: pos_weight = pos_weight.cuda()
        self.bce_logit = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
Esempio n. 2
0
 def construct(self, N_word, hidden_dim, num_layers, gpu, use_hs):
     self.q_lstm = PackedLSTM(input_size=N_word,
                              hidden_size=hidden_dim // 2,
                              num_layers=num_layers,
                              batch_first=True,
                              dropout=0.3,
                              bidirectional=True)
     self.hs_lstm = PackedLSTM(input_size=N_word,
                               hidden_size=hidden_dim // 2,
                               num_layers=num_layers,
                               batch_first=True,
                               dropout=0.3,
                               bidirectional=True)
     self.col_lstm = PackedLSTM(input_size=N_word,
                                hidden_size=hidden_dim // 2,
                                num_layers=num_layers,
                                batch_first=True,
                                dropout=0.3,
                                bidirectional=True)
     self.q_cs = ConditionalAttention(hidden_dim=hidden_dim,
                                      use_bag_of_word=True)
     self.hs_cs = ConditionalAttention(hidden_dim=hidden_dim,
                                       use_bag_of_word=True)
     self.W_cs = nn.Linear(in_features=hidden_dim, out_features=hidden_dim)
     self.hv_out = nn.Sequential(nn.Tanh(), nn.Linear(self.hidden_dim, 2))
Esempio n. 3
0
 def construct(self, N_word, hidden_dim, num_layers, gpu, use_hs):
     self.q_lstm = PackedLSTM(input_size=N_word,
                              hidden_size=hidden_dim // 2,
                              num_layers=num_layers,
                              batch_first=True,
                              dropout=0.3,
                              bidirectional=True)
     self.hs_lstm = PackedLSTM(input_size=N_word,
                               hidden_size=hidden_dim // 2,
                               num_layers=num_layers,
                               batch_first=True,
                               dropout=0.3,
                               bidirectional=True)
     self.kw_lstm = PackedLSTM(input_size=N_word,
                               hidden_size=hidden_dim // 2,
                               num_layers=num_layers,
                               batch_first=True,
                               dropout=0.3,
                               bidirectional=True)
     # preprocess the num
     self.q_kw_num = ConditionalAttention(hidden_dim, use_bag_of_word=True)
     self.hs_kw_num = ConditionalAttention(hidden_dim, use_bag_of_word=True)
     self.kw_num_out = nn.Sequential(nn.Tanh(), nn.Linear(hidden_dim, 4))
     # preprocess the value
     self.q_kw = ConditionalAttention(hidden_dim, use_bag_of_word=False)
     self.hs_kw = ConditionalAttention(hidden_dim, use_bag_of_word=False)
     self.W_kw = nn.Linear(in_features=hidden_dim, out_features=hidden_dim)
     self.kw_out = nn.Sequential(nn.Tanh(), nn.Linear(hidden_dim, 1))
     self.bce_logit = nn.BCEWithLogitsLoss(pos_weight=3 *
                                           torch.tensor(3).cuda().double())
Esempio n. 4
0
    def construct(self, N_word, hidden_dim, num_layers, gpu, use_hs):
        self.q_lstm = PackedLSTM(input_size=N_word, hidden_size=hidden_dim//2,
                num_layers=num_layers, batch_first=True,
                dropout=0.3, bidirectional=True)

        self.hs_lstm = PackedLSTM(input_size=N_word, hidden_size=hidden_dim//2,
                num_layers=num_layers, batch_first=True,
                dropout=0.3, bidirectional=True)

        self.bag_of_word = BagOfWord()
        self.W_q = nn.Linear(hidden_dim, hidden_dim)
        self.W_hs = nn.Linear(hidden_dim, hidden_dim)
        self.ao_out = nn.Sequential(nn.Tanh(), nn.Linear(hidden_dim, 2)) 
Esempio n. 5
0
 def construct(self, N_word, hidden_dim, num_layers, gpu, use_hs):
     self.col_pad_token = -10000
     self.q_lstm = PackedLSTM(input_size=N_word, hidden_size=hidden_dim//2, num_layers=num_layers, batch_first=True, dropout=0.3, bidirectional=True)
     self.hs_lstm = PackedLSTM(input_size=N_word, hidden_size=hidden_dim//2, num_layers=num_layers, batch_first=True, dropout=0.3, bidirectional=True)
     self.col_lstm = PackedLSTM(input_size=N_word, hidden_size=hidden_dim//2, num_layers=num_layers, batch_first=True, dropout=0.3, bidirectional=True)
     self.q_col_num = ConditionalAttention(hidden_dim=hidden_dim, use_bag_of_word=True)
     self.hs_col_num = ConditionalAttention(hidden_dim=hidden_dim, use_bag_of_word=True)
     self.col_num_out = nn.Sequential(nn.Tanh(), nn.Linear(hidden_dim, self.num))  # num of cols: 1-6
     # predicting number of columns
     self.col_rep_out = nn.Sequential(
         nn.Tanh(), nn.Linear(hidden_dim, 4))  # num of repeats: 0-3
     # columns
     self.q_col = ConditionalAttention(hidden_dim=hidden_dim, use_bag_of_word=False)
     self.hs_col = ConditionalAttention(hidden_dim=hidden_dim, use_bag_of_word=False)
     self.W_col = nn.Linear(in_features=hidden_dim, out_features=hidden_dim)
     self.col_out = nn.Sequential(nn.Tanh(), nn.Linear(hidden_dim, 1))
     pos_weight = torch.tensor(3).double()
     if gpu:
         pos_weight = pos_weight.cuda()
     self.bce_logit = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
     self.cosine_loss = nn.CosineEmbeddingLoss()
Esempio n. 6
0
    def construct(self, N_word, hidden_dim, num_layers, gpu, use_hs):
        self.q_lstm = PackedLSTM(input_size=N_word,
                                 hidden_size=hidden_dim // 2,
                                 num_layers=num_layers,
                                 batch_first=True,
                                 dropout=0.3,
                                 bidirectional=True)

        self.hs_lstm = PackedLSTM(input_size=N_word,
                                  hidden_size=hidden_dim // 2,
                                  num_layers=num_layers,
                                  batch_first=True,
                                  dropout=0.3,
                                  bidirectional=True)

        self.kw_lstm = PackedLSTM(input_size=N_word,
                                  hidden_size=hidden_dim // 2,
                                  num_layers=num_layers,
                                  batch_first=True,
                                  dropout=0.3,
                                  bidirectional=True)

        self.q_kw_num = ConditionalAttention(hidden_dim, use_bag_of_word=True)
        self.hs_kw_num = ConditionalAttention(hidden_dim, use_bag_of_word=True)
        self.kw_num_out = nn.Sequential(nn.Tanh(),
                                        nn.Linear(hidden_dim,
                                                  4))  # num of key words: 0-3

        self.q_kw = ConditionalAttention(hidden_dim, use_bag_of_word=False)
        self.hs_kw = ConditionalAttention(hidden_dim, use_bag_of_word=False)
        self.W_kw = nn.Linear(in_features=hidden_dim, out_features=hidden_dim)
        self.kw_out = nn.Sequential(nn.Tanh(), nn.Linear(hidden_dim, 1))

        # TODO: Where does this 3 number come from? number of classes?
        # Answer: pos_weight is a number that indicates how to balance positive to negative examples of a class
        # eg. for 1 class with 1 postive and 3 negative, set pos_weight to 3 such that the loss acts as if there where 3 positive examples
        self.bce_logit = nn.BCEWithLogitsLoss(pos_weight=3 *
                                              torch.tensor(3).cuda().double())