def __init__(self, config, evidence_lambda=0.8, freeze_predictor: bool = False): super(BertQAYesnoHierarchicalTopK, self).__init__(config) logger.info(f'The model {self.__class__.__name__} is loading...') logger.info(f'The coefficient of evidence loss is {evidence_lambda}') layers.set_seq_dropout(True) layers.set_my_dropout_prob(config.hidden_dropout_prob) rep_layers.set_seq_dropout(True) rep_layers.set_my_dropout_prob(config.hidden_dropout_prob) self.bert = BertModel(config) self.doc_sen_self_attn = rep_layers.LinearSelfAttention(config.hidden_size) self.que_self_attn = rep_layers.LinearSelfAttention(config.hidden_size) self.word_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.vector_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) # self.yesno_predictor = nn.Linear(config.hidden_size, 2) self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3) self.evidence_lam = evidence_lambda self.freeze_predictor = freeze_predictor if self.freeze_predictor: logger.info(f"Freeze parameters of yes_no predictor") for param in self.yesno_predictor.parameters(): param.requires_grad = False self.apply(self.init_bert_weights)
def __init__(self, config, evidence_lambda=0.8, view_id=1): super(BertQAYesnoHierarchicalTwoViewTopKfp16, self).__init__(config) print(f'The model {self.__class__.__name__} is loading...') print(f'The coefficient of evidence loss is {evidence_lambda}') print(f'The coefficient of view id is {view_id}') layers.set_seq_dropout(True) layers.set_my_dropout_prob(config.hidden_dropout_prob) rep_layers.set_seq_dropout(True) rep_layers.set_my_dropout_prob(config.hidden_dropout_prob) self.bert = BertModel(config) config.hidden_size = int(config.hidden_size / 2) self.doc_sen_self_attn = rep_layers.LinearSelfAttention( config.hidden_size) self.que_self_attn = rep_layers.LinearSelfAttention(config.hidden_size) self.word_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.vector_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) # self.yesno_predictor = nn.Linear(config.hidden_size, 2) self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3) self.evidence_lam = evidence_lambda self.view_id = view_id self.apply(self.init_bert_weights)
def __init__(self, config, evidence_lambda=0.8, num_choices=4, sample_steps: int = 5, reward_func: int = 0, freeze_bert=False): super(BertQAYesnoHierarchicalReinforceRACE, self).__init__(config) logger.info(f'The model {self.__class__.__name__} is loading...') logger.info(f'The coefficient of evidence loss is {evidence_lambda}') logger.info(f'Currently the number of choices is {num_choices}') logger.info(f'Sample steps: {sample_steps}') logger.info(f'Reward function: {reward_func}') logger.info(f'If freeze BERT\'s parameters: {freeze_bert} ') layers.set_seq_dropout(True) layers.set_my_dropout_prob(config.hidden_dropout_prob) rep_layers.set_seq_dropout(True) rep_layers.set_my_dropout_prob(config.hidden_dropout_prob) self.bert = BertModel(config) if freeze_bert: for p in self.bert.parameters(): p.requires_grad = False self.doc_sen_self_attn = rep_layers.LinearSelfAttention( config.hidden_size) self.que_self_attn = rep_layers.LinearSelfAttention(config.hidden_size) self.word_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.vector_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) # self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3) self.classifier = nn.Linear(config.hidden_size * 2, 1) self.evidence_lam = evidence_lambda self.sample_steps = sample_steps self.reward_func = [self.reinforce_step, self.reinforce_step_1][reward_func] self.num_choices = num_choices self.apply(self.init_bert_weights)
def __init__(self, config, evidence_lambda=0.8, num_choices=4, use_gumbel=True, freeze_bert=False): super(BertQAYesnoHierarchicalHardRACE, self).__init__(config) logger.info(f'The model {self.__class__.__name__} is loading...') logger.info(f'The coefficient of evidence loss is {evidence_lambda}') logger.info(f'Currently the number of choices is {num_choices}') logger.info(f'Use gumbel: {use_gumbel}') logger.info(f'If freeze BERT\'s parameters: {freeze_bert} ') layers.set_seq_dropout(True) layers.set_my_dropout_prob(config.hidden_dropout_prob) rep_layers.set_seq_dropout(True) rep_layers.set_my_dropout_prob(config.hidden_dropout_prob) self.bert = BertModel(config) if freeze_bert: for p in self.bert.parameters(): p.requires_grad = False # self.doc_sen_self_attn = layers.LinearSelfAttnAllennlp(config.hidden_size) # self.que_self_attn = layers.LinearSelfAttn(config.hidden_size) self.doc_sen_self_attn = rep_layers.LinearSelfAttention( config.hidden_size) self.que_self_attn = rep_layers.LinearSelfAttention(config.hidden_size) self.word_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.vector_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.classifier = nn.Linear(config.hidden_size * 2, 1) self.evidence_lam = evidence_lambda self.use_gumbel = use_gumbel self.num_choices = num_choices self.apply(self.init_bert_weights)
def __init__(self, config, evidence_lambda=0.8): super(RobertaQAYesnoHierarchicalTopK, self).__init__(config) print(f'The model {self.__class__.__name__} is loading...') print(f'The coefficient of evidence loss is {evidence_lambda}') layers.set_seq_dropout(True) layers.set_my_dropout_prob(config.hidden_dropout_prob) rep_layers.set_seq_dropout(True) rep_layers.set_my_dropout_prob(config.hidden_dropout_prob) self.roberta = RobertaModel(config) self.doc_sen_self_attn = rep_layers.LinearSelfAttention(config.hidden_size) self.que_self_attn = rep_layers.LinearSelfAttention(config.hidden_size) self.word_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.vector_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) # self.yesno_predictor = nn.Linear(config.hidden_size, 2) self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3) self.evidence_lam = evidence_lambda self.init_weights()