def __init__(self, num_answers, fn_type="softmax"): super().__init__() # Build LXRT encoder self.lxrt_encoder = LXRTEncoder( args, max_seq_length=MAX_VQA_LENGTH ) hid_dim = self.lxrt_encoder.dim print("Size of Hidden Dimension:",hid_dim) fc_dim = int(hid_dim) print("Size of Hidden Dimension:",fc_dim) # Type Predictor self.type_fc = nn.Sequential( nn.Linear(hid_dim, hid_dim * 2), GeLU(), BertLayerNorm(hid_dim * 2, eps=1e-12), nn.Linear(hid_dim * 2, 4) ) self.sigmoid = nn.Sigmoid() self.tanh = nn.Tanh() self.softmax = nn.Softmax() if fn_type=="tanh": self.fn =self.tanh print("FN: TANH") elif fn_type=="softmax": self.fn= self.softmax print("FN: SOFTMAX") else: self.fn = self.sigmoid print("FN: SIGMOID") # YESNO feedforward self.yesno_fc = nn.Sequential( nn.Linear(hid_dim, hid_dim *2), GeLU(), BertLayerNorm(hid_dim *2, eps=1e-12), nn.Linear(2*hid_dim, fc_dim), GeLU(), BertLayerNorm(fc_dim, eps=1e-12) ) # NUMBER feedforward self.number_fc = nn.Sequential( nn.Linear(hid_dim, hid_dim *2), GeLU(), BertLayerNorm(hid_dim *2, eps=1e-12), nn.Linear(2*hid_dim, fc_dim), GeLU(), BertLayerNorm(fc_dim, eps=1e-12) ) # OTHER feedforward self.other_fc = nn.Sequential( nn.Linear(hid_dim, hid_dim *2), GeLU(), BertLayerNorm(hid_dim *2, eps=1e-12), nn.Linear(2*hid_dim, fc_dim), GeLU(), BertLayerNorm(fc_dim, eps=1e-12) ) # OTHER feedforward self.color_fc = nn.Sequential( nn.Linear(hid_dim, hid_dim *2), GeLU(), BertLayerNorm(hid_dim *2, eps=1e-12), nn.Linear(2*hid_dim, fc_dim), GeLU(), BertLayerNorm(fc_dim, eps=1e-12) ) # Answering Heads self.logit_fc1 = nn.Sequential( nn.Linear(5*fc_dim, hid_dim * 2), GeLU(), BertLayerNorm(hid_dim * 2, eps=1e-12), nn.Linear(hid_dim * 2, hid_dim) ) # Answering Heads self.logit_fc = nn.Sequential( nn.Linear(hid_dim, hid_dim * 2), GeLU(), BertLayerNorm(hid_dim * 2, eps=1e-12), nn.Linear(hid_dim * 2, num_answers) ) # self.logit_fc = nn.Sequential( # nn.Linear(hid_dim, hid_dim * 3), # GeLU(), # BertLayerNorm(hid_dim * 3, eps=1e-12), # nn.Linear(hid_dim * 3, num_answers) # ) self.logit_fc.apply(self.lxrt_encoder.model.init_bert_weights)
def __init__(self, num_answers, fn_type="softmax"): super().__init__() # Build LXRT encoder self.lxrt_encoder = LXRTEncoder(args, max_seq_length=MAX_VQA_LENGTH) hid_dim = self.lxrt_encoder.dim print("Size of Hidden Dimension:", hid_dim) fc_dim = int(hid_dim) print("Size of Hidden Dimension:", fc_dim) self.sigmoid = nn.Sigmoid() self.tanh = nn.Tanh() self.softmax = nn.Softmax() if fn_type == "tanh": self.fn = self.tanh print("FN: TANH") elif fn_type == "softmax": self.fn = self.softmax print("FN: SOFTMAX") else: self.fn = self.sigmoid print("FN: SIGMOID") # YN:AND/OR/NOT/NONE Type Predictor self.yn_fc = nn.Sequential(nn.Linear(hid_dim, hid_dim * 2), GeLU(), BertLayerNorm(hid_dim * 2, eps=1e-12), nn.Linear(hid_dim * 2, 4)) # AND FF self.and_fc = nn.Sequential(nn.Linear(hid_dim, hid_dim * 2), GeLU(), BertLayerNorm(hid_dim * 2, eps=1e-12), nn.Linear(2 * hid_dim, fc_dim), GeLU(), BertLayerNorm(fc_dim, eps=1e-12)) # OR FF self.or_fc = nn.Sequential(nn.Linear(hid_dim, hid_dim * 2), GeLU(), BertLayerNorm(hid_dim * 2, eps=1e-12), nn.Linear(2 * hid_dim, fc_dim), GeLU(), BertLayerNorm(fc_dim, eps=1e-12)) # NOT FF self.not_fc = nn.Sequential(nn.Linear(hid_dim, hid_dim * 2), GeLU(), BertLayerNorm(hid_dim * 2, eps=1e-12), nn.Linear(2 * hid_dim, fc_dim), GeLU(), BertLayerNorm(fc_dim, eps=1e-12)) # NONE FF self.none_fc = nn.Sequential(nn.Linear(hid_dim, hid_dim * 2), GeLU(), BertLayerNorm(hid_dim * 2, eps=1e-12), nn.Linear(2 * hid_dim, fc_dim), GeLU(), BertLayerNorm(fc_dim, eps=1e-12)) # Answering Heads self.logit_fc1 = nn.Sequential(nn.Linear(6 * fc_dim, hid_dim * 2), GeLU(), BertLayerNorm(hid_dim * 2, eps=1e-12), nn.Linear(hid_dim * 2, hid_dim)) self.logit_fc = nn.Sequential(nn.Linear(hid_dim, hid_dim * 2), GeLU(), BertLayerNorm(hid_dim * 2, eps=1e-12), nn.Linear(hid_dim * 2, num_answers)) self.logit_fc.apply(self.lxrt_encoder.model.init_bert_weights)