Example #1
0
    def __init__(self, config, evidence_lambda=0.8, freeze_predictor: bool = False):
        super(BertQAYesnoHierarchicalTopKfp32, self).__init__(config)
        print(f'The model {self.__class__.__name__} is loading...')
        print(f'The coefficient of evidence loss is {evidence_lambda}')

        layers.set_seq_dropout(True)
        layers.set_my_dropout_prob(config.hidden_dropout_prob)

        self.bert = BertModel(config)
        # self.dropout = nn.Dropout(config.hidden_dropout_prob)
        # self.answer_choice = nn.Linear(config.hidden_size, 2)

        self.doc_sen_self_attn = layers.LinearSelfAttnAllennlp(config.hidden_size)
        self.que_self_attn = layers.LinearSelfAttn(config.hidden_size)

        self.word_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False)
        self.vector_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False)

        # self.yesno_predictor = nn.Linear(config.hidden_size, 2)
        self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3)
        self.evidence_lam = evidence_lambda

        if freeze_predictor:
            for param in self.yesno_predictor.parameters():
                param.requires_grad = False
        self.freeze_predictor = freeze_predictor

        self.apply(self.init_bert_weights)
Example #2
0
    def __init__(self, config, evidence_lambda=0.8, negative_lambda=1.0):
        super(BertQAYesnoHierarchicalNeg, self).__init__(config)
        print(f'The model {self.__class__.__name__} is loading...')
        print(f'The coefficient of evidence loss is {evidence_lambda}')

        layers.set_seq_dropout(True)
        layers.set_my_dropout_prob(config.hidden_dropout_prob)

        self.bert = BertModel(config)
        # self.dropout = nn.Dropout(config.hidden_dropout_prob)
        # self.answer_choice = nn.Linear(config.hidden_size, 2)

        self.doc_sen_self_attn = layers.LinearSelfAttnAllennlp(
            config.hidden_size)
        self.que_self_attn = layers.LinearSelfAttn(config.hidden_size)

        self.word_similarity = layers.AttentionScore(config.hidden_size,
                                                     250,
                                                     do_similarity=False)
        self.vector_similarity = layers.AttentionScore(config.hidden_size,
                                                       250,
                                                       do_similarity=False)

        # self.yesno_predictor = nn.Linear(config.hidden_size, 2)
        self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3)
        self.evidence_lam = evidence_lambda
        self.negative_lam = negative_lambda

        self.apply(self.init_bert_weights)
Example #3
0
    def __init__(self, config, evidence_lambda=0.8, view_id=1, split_type=0):
        super(BertQAYesnoHierarchicalTwoViewTopK, self).__init__(config)
        print(f'The model {self.__class__.__name__} is loading...')
        print(f'The coefficient of evidence loss is {evidence_lambda}')
        print(f'The coefficient of view id is {view_id}')

        total_dim = 768
        rank_list = list(range(total_dim))
        self.split_type = split_type
        if split_type == 0:
            self.view_ranks = [
                rank_list[:int(.5 * total_dim)],
                rank_list[int(.5 * total_dim):]
            ]
        elif split_type == 1:
            self.view_ranks = [rank_list[::2], rank_list[1::2]]
        elif split_type == 2:
            random.seed(19970417)
            random.shuffle(rank_list)
            self.view_ranks = [
                rank_list[:int(.5 * total_dim)],
                rank_list[int(.5 * total_dim):]
            ]
        elif split_type == 3:
            random.seed(20190914)
            random.shuffle(rank_list)
            self.view_ranks = [
                rank_list[:int(.5 * total_dim)],
                rank_list[int(.5 * total_dim):]
            ]
        else:
            raise ValueError("split type should be 0/1/2/3, but found %d" %
                             (split_type))
        print(self.view_ranks[view_id][:20])

        layers.set_seq_dropout(True)
        layers.set_my_dropout_prob(config.hidden_dropout_prob)

        self.bert = BertModel(config)
        # self.dropout = nn.Dropout(config.hidden_dropout_prob)
        # self.answer_choice = nn.Linear(config.hidden_size, 2)
        config.hidden_size = int(config.hidden_size / 2)

        self.doc_sen_self_attn = layers.LinearSelfAttnAllennlp(
            config.hidden_size)
        self.que_self_attn = layers.LinearSelfAttn(config.hidden_size)

        self.word_similarity = layers.AttentionScore(config.hidden_size,
                                                     250,
                                                     do_similarity=False)
        self.vector_similarity = layers.AttentionScore(config.hidden_size,
                                                       250,
                                                       do_similarity=False)

        # self.yesno_predictor = nn.Linear(config.hidden_size, 2)
        self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3)
        self.evidence_lam = evidence_lambda
        self.view_id = view_id

        self.apply(self.init_bert_weights)
Example #4
0
    def __init__(self,
                 config,
                 evidence_lambda=0.8,
                 negative_lambda=1.0,
                 add_entropy: bool = False,
                 split_num: int = 3,
                 split_index: int = 0):
        super(BertQAYesnoHierarchicalNegHalf, self).__init__(config)
        print(f'The model {self.__class__.__name__} is loading...')
        print(f'The coefficient of evidence loss is {evidence_lambda}')
        print(f'The coefficient of negative samples loss is {negative_lambda}')
        print(f'Add entropy loss: {add_entropy}')

        layers.set_seq_dropout(True)
        layers.set_my_dropout_prob(config.hidden_dropout_prob)

        self.bert = BertModel(config)
        # self.dropout = nn.Dropout(config.hidden_dropout_prob)
        # self.answer_choice = nn.Linear(config.hidden_size, 2)
        self.split_num = split_num
        self.split_size = config.hidden_size // self.split_num
        self.split_index = split_index
        self.split_sizes = [
            ((i - 1) * self.split_size, i * self.split_size)
            for i in range(1, split_num)
        ] + [((split_num - 1) * self.split_size, config.hidden_size)]
        print(f'Split BERT output into {self.split_num}.')
        print(f'Current model use the {self.split_index}th hidden state.')
        print(f'Read hidden state in {self.split_sizes[self.split_index]}.')

        self.doc_sen_self_attn = layers.LinearSelfAttnAllennlp(self.split_size)
        self.que_self_attn = layers.LinearSelfAttn(self.split_size)

        self.word_similarity = layers.AttentionScore(self.split_size,
                                                     250,
                                                     do_similarity=False)
        self.vector_similarity = layers.AttentionScore(self.split_size,
                                                       250,
                                                       do_similarity=False)

        # self.yesno_predictor = nn.Linear(config.hidden_size, 2)
        self.yesno_predictor = nn.Linear(self.split_size * 2, 3)
        self.evidence_lam = evidence_lambda
        self.negative_lam = negative_lambda
        self.add_entropy = add_entropy

        self.apply(self.init_bert_weights)
    def __init__(self,
                 config,
                 evidence_lambda=0.8,
                 negative_lambda=1.0,
                 add_entropy: bool = False,
                 fix_bert: bool = False):
        super(BertQAYesnoHierarchicalSingleRNN, self).__init__(config)
        logger.info(f'The model {self.__class__.__name__} is loading...')
        logger.info(f'The coefficient of evidence loss is {evidence_lambda}')
        logger.info(
            f'The coefficient of negative samples loss is {negative_lambda}')
        logger.info(f'Fix parameters of BERT: {fix_bert}')
        logger.info(f'Add entropy loss: {add_entropy}')
        # logger.info(f'Use bidirectional attention before summarizing vectors: {bi_attention}')

        layers.set_seq_dropout(True)
        layers.set_my_dropout_prob(config.hidden_dropout_prob)

        self.bert = BertModel(config)
        # self.dropout = nn.Dropout(config.hidden_dropout_prob)
        # self.answer_choice = nn.Linear(config.hidden_size, 2)
        if fix_bert:
            for param in self.bert.parameters():
                param.requires_grad = False

        self.doc_sen_self_attn = layers.LinearSelfAttnAllennlp(
            config.hidden_size)
        self.que_self_attn = layers.LinearSelfAttn(config.hidden_size)

        self.doc_sen_encoder = layers.StackedBRNN(config.hidden_size,
                                                  config.hidden_size // 2,
                                                  num_layers=1)

        self.word_similarity = layers.AttentionScore(config.hidden_size,
                                                     250,
                                                     do_similarity=False)
        self.vector_similarity = layers.AttentionScore(config.hidden_size,
                                                       250,
                                                       do_similarity=False)

        # self.yesno_predictor = nn.Linear(config.hidden_size, 2)
        self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3)
        self.evidence_lam = evidence_lambda
        self.negative_lam = negative_lambda
        self.add_entropy = add_entropy

        self.apply(self.init_bert_weights)
    def __init__(self,
                 config,
                 evidence_lambda=0.8,
                 sample_steps: int = 5,
                 reward_func: int = 0,
                 freeze_bert=False):
        super(BertQAYesnoHierarchicalReinforce, self).__init__(config)
        logger.info(f'The model {self.__class__.__name__} is loading...')
        logger.info(f'The coefficient of evidence loss is {evidence_lambda}')
        logger.info(f'Sample steps: {sample_steps}')
        logger.info(f'Reward function: {reward_func}')
        logger.info(f'If freeze BERT\'s parameters: {freeze_bert} ')

        layers.set_seq_dropout(True)
        layers.set_my_dropout_prob(config.hidden_dropout_prob)

        self.bert = BertModel(config)

        if freeze_bert:
            for p in self.bert.parameters():
                p.requires_grad = False

        self.doc_sen_self_attn = layers.LinearSelfAttnAllennlp(
            config.hidden_size)
        self.que_self_attn = layers.LinearSelfAttn(config.hidden_size)

        self.word_similarity = layers.AttentionScore(config.hidden_size,
                                                     250,
                                                     do_similarity=False)
        self.vector_similarity = layers.AttentionScore(config.hidden_size,
                                                       250,
                                                       do_similarity=False)

        self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3)
        self.evidence_lam = evidence_lambda
        self.sample_steps = sample_steps
        self.reward_func = [self.reinforce_step,
                            self.reinforce_step_1][reward_func]

        self.apply(self.init_bert_weights)
    def __init__(self,
                 config,
                 evidence_lambda=0.8,
                 use_gumbel=True,
                 freeze_bert=False):
        super(BertQAYesnoHierarchicalHard, self).__init__(config)
        logger.info(f'The model {self.__class__.__name__} is loading...')
        logger.info(f'The coefficient of evidence loss is {evidence_lambda}')
        logger.info(f'Use gumbel: {use_gumbel}')
        logger.info(f'If freeze BERT\'s parameters: {freeze_bert} ')

        layers.set_seq_dropout(True)
        layers.set_my_dropout_prob(config.hidden_dropout_prob)

        self.bert = BertModel(config)

        if freeze_bert:
            for p in self.bert.parameters():
                p.requires_grad = False

        self.doc_sen_self_attn = layers.LinearSelfAttnAllennlp(
            config.hidden_size)
        self.que_self_attn = layers.LinearSelfAttn(config.hidden_size)

        self.word_similarity = layers.AttentionScore(config.hidden_size,
                                                     250,
                                                     do_similarity=False)
        self.vector_similarity = layers.AttentionScore(config.hidden_size,
                                                       250,
                                                       do_similarity=False)

        self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3)
        self.evidence_lam = evidence_lambda
        self.use_gumbel = use_gumbel

        self.apply(self.init_bert_weights)