def __init__(self, config): print( "[TIME] --- time: {} ---, init model fusion head self attn".format( time.ctime(time.time()))) super(RobertaForMultipleChoice_Fusion_Head_Dual_Attn, self).__init__(config) self.roberta = RobertaModel(config) self.transformer_mrc = Trans_Encoder_self_attn(n_layers=3, n_head=12, d_k=64, d_v=64, d_model=768, d_inner=4096, dropout=0.1) self.pooler = BertPooler(config) self.bn = torch.nn.BatchNorm1d(num_features=config.hidden_size) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, 1) self.init_weights()
def __init__(self, config): print("[TIME] --- time: {} ---, init model fusion layer".format( time.ctime(time.time()))) super(RobertaForMultipleChoice_Fusion_Layer, self).__init__(config) self.roberta = RobertaModel(config) self.transformer_mrc = Trans_Encoder_layer(n_layers=3, n_head=12, d_k=64, d_v=64, d_model=768, d_inner=4096, dropout=0.1) self.pooler = BertPooler(config) self.weight = nn.Parameter(torch.randn(3, 4)) self.softmax = nn.Softmax(dim=-1) self.bn = torch.nn.BatchNorm1d(num_features=config.hidden_size) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, 1) self.init_weights()
def __init__(self, config): super(Roberta_Encoder, self).__init__(config) self.roberta = RobertaModel(config) self.init_weights()