コード例 #1
0
def get_multi_hop_model(rnn_dim, c2c: bool, q2c: bool, res_rnn: bool,
                        res_self_att: bool, post_merge: bool, encoder: str,
                        merge_type: str, num_c2c_hops: int):
    recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05))
    answer_encoder = BinaryAnswerEncoder()

    res_model = get_res_fc_seq_fc(model_rnn_dim=rnn_dim,
                                  rnn=res_rnn,
                                  self_att=res_self_att)
    context_to_context = \
        AttentionWithPostMapper(BiAttention(TriLinear(bias=True), True), post_mapper=res_model) if c2c else None
    question_to_context = \
        AttentionWithPostMapper(BiAttention(TriLinear(bias=True), True), post_mapper=res_model) if q2c else None

    if encoder == 'max':
        sequence_encoder = MaxPool(map_layer=None,
                                   min_val=0,
                                   regular_reshape=True)
    elif encoder == 'rnn':
        sequence_encoder = CudnnGruEncoder(rnn_dim,
                                           w_init=TruncatedNormal(stddev=0.05))
    else:
        raise NotImplementedError()

    if merge_type == 'max':
        attention_merger = MaxMerge(
            pre_map_layer=None,
            post_map_layer=(res_model if post_merge else None))
    else:
        attention_merger = WeightedMerge(
            pre_map_layer=None,
            post_map_layer=(res_model if post_merge else None),
            weight_type=merge_type)

    return MultiHopContextsToQuestionModel(
        encoder=QuestionsAndParagraphsEncoder(answer_encoder),
        word_embed=FixedWordEmbedder(vec_name="glove.840B.300d",
                                     word_vec_init_scale=0,
                                     learn_unk=False,
                                     cpu=True),
        char_embed=CharWordEmbedder(LearnedCharEmbedder(word_size_th=14,
                                                        char_th=50,
                                                        char_dim=20,
                                                        init_scale=0.05,
                                                        force_cpu=True),
                                    MaxPool(Conv1d(100, 5, 0.8)),
                                    shared_parameters=True),
        embed_mapper=SequenceMapperSeq(
            VariationalDropoutLayer(0.8),
            recurrent_layer,
            VariationalDropoutLayer(0.8),
        ),
        question_to_context_attention=question_to_context,
        context_to_context_attention=context_to_context,
        c2c_hops=num_c2c_hops,
        context_to_question_attention=BiAttention(TriLinear(bias=True), True),
        attention_merger=attention_merger,
        sequence_encoder=sequence_encoder,
        predictor=BinaryFixedPredictor())
コード例 #2
0
def get_model(rnn_dim):
    recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05))
    answer_encoder = BinaryAnswerEncoder()

    return ContextPairRelevanceModel(
        encoder=QuestionsAndParagraphsEncoder(answer_encoder),
        word_embed=FixedWordEmbedder(vec_name="glove.840B.300d",
                                     word_vec_init_scale=0,
                                     learn_unk=False,
                                     cpu=True),
        char_embed=CharWordEmbedder(LearnedCharEmbedder(word_size_th=14,
                                                        char_th=50,
                                                        char_dim=20,
                                                        init_scale=0.05,
                                                        force_cpu=True),
                                    MaxPool(Conv1d(100, 5, 0.8)),
                                    shared_parameters=True),
        embed_mapper=SequenceMapperSeq(
            VariationalDropoutLayer(0.8),
            recurrent_layer,
            # VariationalDropoutLayer(0.8),  # fixme probably doesn't belong here
        ),
        question_to_context_attention=None,
        context_to_context_attention=None,
        context_to_question_attention=None,
        sequence_encoder=MaxPool(map_layer=None,
                                 min_val=0,
                                 regular_reshape=True),
        merger=MergeTwoContextsConcatQuestion(),
        predictor=BinaryFixedPredictor())
コード例 #3
0
def get_reread_model(rnn_dim, use_elmo, encoder_keep_rate=0.8, reread_keep_rate=0.8,
                     two_phase_att=False, res_rnn=True, res_self_att=False,
                     multiply_iteration_probs=False, reformulate_by_context=False,
                     rank_first=False, rank_second=False, reread_rnn_dim=None,
                     first_rank_lambda=1.0, second_rank_lambda=1.0,
                     ranking_gamma=1.0):
    recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05))
    answer_encoder = IterativeAnswerEncoder(group=rank_first or rank_second)

    embed_mapper = SequenceMapperSeq(VariationalDropoutLayer(encoder_keep_rate), recurrent_layer)

    if res_rnn or res_self_att:
        res_model = get_res_fc_seq_fc(model_rnn_dim=rnn_dim, rnn=res_rnn, self_att=res_self_att,
                                      keep_rate=reread_keep_rate)
    else:
        res_model = FullyConnected(rnn_dim * 2, activation="relu")
    attention = AttentionWithPostMapper(BiAttention(TriLinear(bias=True), True),
                                        post_mapper=res_model)
    use_c2q = two_phase_att or not reformulate_by_context
    use_q2c = two_phase_att or reformulate_by_context

    elmo_model = None
    if use_elmo:
        print("Using Elmo!")
        elmo_model = get_hotpot_elmo()
        lm_reduce = MapperSeq(
            ElmoLayer(0, layer_norm=False, top_layer_only=False),
            DropoutLayer(0.5),
        )
        embed_mapper = ElmoWrapper(input_append=True, output_append=False, rnn_layer=embed_mapper, lm_reduce=lm_reduce)

    return IterativeContextReReadModel(
        encoder=QuestionsAndParagraphsEncoder(answer_encoder, use_sentence_segments=True, paragraph_as_sentence=True),
        word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True),
        char_embed=CharWordEmbedder(
            LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True),
            MaxPool(Conv1d(100, 5, 0.8)),
            shared_parameters=True
        ),
        elmo_model=elmo_model,
        embed_mapper=embed_mapper,
        sequence_encoder=MaxPool(map_layer=None, min_val=VERY_NEGATIVE_NUMBER, regular_reshape=True),
        sentences_encoder=SentenceMaxEncoder(),
        sentence_mapper=None,
        post_merger=None,
        merger=WithConcatOptions(sub=False, hadamard=True, dot=True, raw=True),
        reread_mapper=None if reread_rnn_dim is None else CudnnGru(reread_rnn_dim, w_init=TruncatedNormal(stddev=0.05)),
        pre_attention_mapper=None,  # VariationalDropoutLayer(reread_keep_rate),
        context_to_question_attention=attention if use_c2q else None,
        question_to_context_attention=attention if use_q2c else None,
        reformulate_by_context=reformulate_by_context,
        multiply_iteration_probs=multiply_iteration_probs,
        first_predictor=BinaryNullPredictor(rank_first, ranking_lambda=first_rank_lambda, gamma=ranking_gamma),
        second_predictor=BinaryNullPredictor(rank_second, ranking_lambda=second_rank_lambda, gamma=ranking_gamma),
        max_batch_size=512
    )
コード例 #4
0
ファイル: ablate_hotpot_qa.py プロジェクト: sjliu0920/MUPPET
def get_model_with_yes_no(rnn_dim: int, use_elmo, keep_rate=0.8):
    recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05))

    embed_mapper = SequenceMapperSeq(
        VariationalDropoutLayer(keep_rate),
        recurrent_layer,
        VariationalDropoutLayer(keep_rate),
    )

    elmo_model = None
    if use_elmo:
        print("Using Elmo!")
        elmo_model = get_hotpot_elmo()
        lm_reduce = MapperSeq(
            ElmoLayer(0, layer_norm=False, top_layer_only=False),
            DropoutLayer(0.5),
        )
        embed_mapper = ElmoWrapper(input_append=True, output_append=False, rnn_layer=embed_mapper, lm_reduce=lm_reduce)

    answer_encoder = GroupedSpanAnswerEncoderWithYesNo(group=True)
    predictor = BoundsPredictor(
        ChainBiMapper(
            first_layer=recurrent_layer,
            second_layer=recurrent_layer
        ),
        span_predictor=IndependentBoundsGroupedWithYesNo()
    )

    return AttentionQAWithYesNo(
        encoder=QuestionsAndParagraphsEncoder(answer_encoder, use_sentence_segments=False),
        word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True),
        char_embed=CharWordEmbedder(
            LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True),
            MaxPool(Conv1d(100, 5, 0.8)),
            shared_parameters=True
        ),
        elmo_model=elmo_model,
        embed_mapper=embed_mapper,
        question_mapper=None,
        context_mapper=None,
        memory_builder=NullBiMapper(),
        attention=BiAttention(TriLinear(bias=True), True),
        match_encoder=SequenceMapperSeq(FullyConnected(rnn_dim * 2, activation="relu"),
                                        ResidualLayer(SequenceMapperSeq(
                                            VariationalDropoutLayer(keep_rate),
                                            recurrent_layer,
                                            VariationalDropoutLayer(keep_rate),
                                            StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct()),
                                            FullyConnected(rnn_dim * 2, activation="relu"),
                                        )),
                                        VariationalDropoutLayer(keep_rate)),
        predictor=predictor,
        yes_no_question_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True),
        yes_no_context_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True)
    )
コード例 #5
0
def get_res_fc_seq_fc(model_rnn_dim, rnn: bool, self_att: bool, keep_rate=0.8):
    seq_mapper = []
    if not rnn and not self_att:
        raise NotImplementedError()
    if rnn:
        seq_mapper.extend([VariationalDropoutLayer(keep_rate),
                           CudnnGru(model_rnn_dim, w_init=TruncatedNormal(stddev=0.05))])
    if self_att:
        seq_mapper.extend([VariationalDropoutLayer(keep_rate),
                           StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct())])
    seq_mapper.append(FullyConnected(model_rnn_dim * 2, activation="relu"))
    return SequenceMapperSeq(FullyConnected(model_rnn_dim * 2, activation="relu"),
                             ResidualLayer(SequenceMapperSeq(
                                 *seq_mapper
                             )))
コード例 #6
0
    def _apply_transposed(self, is_train, x, initial_states=None):
        w_init = TruncatedNormal(stddev=0.05)
        x_size = x.shape.as_list()[-1]
        if x_size is None:
            raise ValueError("Last dimension must be defined (have shape %s)" %
                             str(x.shape))

        cell = cudnn_rnn_ops.CudnnLSTM(1,
                                       self.n_out,
                                       x_size,
                                       input_mode="linear_input")

        # We need to know the mapping of weights/baises -> CudnnLSTM parameter, so just
        # build a `CudnnLSTM` and read its fields
        c = cudnn_layers.CudnnLSTM(1, self.n_out)
        c._input_size = x.shape.as_list()[-1]
        w_shapes = c.canonical_weight_shapes
        b_shapes = c.canonical_bias_shapes
        weights = [w_init(s, tf.float32) for s in w_shapes]
        biases = [tf.zeros(s, tf.float32) for s in b_shapes]
        biases[1] = tf.constant(self.lstm_bias / 2.0, tf.float32, b_shapes[1])
        biases[5] = tf.constant(self.lstm_bias / 2.0, tf.float32, b_shapes[5])

        opaque_params_t = cell.canonical_to_params(weights, biases)
        parameters = tf.get_variable("opaque_kernel",
                                     initializer=opaque_params_t,
                                     validate_shape=False)

        p = 1.0 - self.dropout

        if is_train and self.dropout > 0:
            mult_bias = [tf.ones_like(x) for x in biases]
            mult_w = [tf.ones_like(x) for x in weights]

            bias_mask = tf.floor(tf.random_uniform(
                (self.n_out, ), p, 1 + p)) / p

            for j in range(4, 8):
                mult_w[j] *= tf.expand_dims(bias_mask, 0)

            mult_mask = cell.canonical_to_params(mult_w, mult_bias)
            parameters = parameters * mult_mask

        initial_state_h, initial_state_c = initial_states
        out = cell(x, initial_state_h, initial_state_c, parameters, True)[0]

        return out
コード例 #7
0
def get_reread_merge_model(rnn_dim, use_elmo, keep_rate=0.8, res_rnn=True, res_self_att=False,
                           multiply_iteration_probs=False):
    recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05))
    answer_encoder = IterativeAnswerEncoder()

    embed_mapper = SequenceMapperSeq(VariationalDropoutLayer(keep_rate), recurrent_layer)

    if res_rnn or res_self_att:
        res_model = get_res_fc_seq_fc(model_rnn_dim=rnn_dim, rnn=res_rnn, self_att=res_self_att)
    else:
        res_model = FullyConnected(rnn_dim * 2, activation="relu")
    attention = AttentionWithPostMapper(BiAttention(TriLinear(bias=True), True), post_mapper=res_model)

    elmo_model = None
    if use_elmo:
        print("Using Elmo!")
        elmo_model = get_hotpot_elmo()
        lm_reduce = MapperSeq(
            ElmoLayer(0, layer_norm=False, top_layer_only=False),
            DropoutLayer(0.5),
        )
        embed_mapper = ElmoWrapper(input_append=True, output_append=False, rnn_layer=embed_mapper, lm_reduce=lm_reduce)

    return IterativeContextReReadMergeModel(
        encoder=QuestionsAndParagraphsEncoder(answer_encoder, use_sentence_segments=True),
        word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True),
        char_embed=CharWordEmbedder(
            LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True),
            MaxPool(Conv1d(100, 5, 0.8)),
            shared_parameters=True
        ),
        elmo_model=elmo_model,
        embed_mapper=embed_mapper,
        sequence_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True),
        sentences_encoder=SentenceMaxEncoder(),
        sentence_mapper=None,
        post_merger=None,
        merger=WithConcatOptions(sub=False, hadamard=True, dot=False, raw=True),
        context_to_question_attention=attention,
        question_to_context_attention=attention,
        reread_merger=ConcatWithProduct(),
        multiply_iteration_probs=multiply_iteration_probs,
        max_batch_size=128
    )
コード例 #8
0
def get_model(rnn_dim, use_elmo, keep_rate=0.8):
    recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05))
    answer_encoder = IterativeAnswerEncoder()

    embed_mapper = SequenceMapperSeq(VariationalDropoutLayer(keep_rate), recurrent_layer)

    elmo_model = None
    if use_elmo:
        print("Using Elmo!")
        elmo_model = get_hotpot_elmo()
        lm_reduce = MapperSeq(
            ElmoLayer(0, layer_norm=False, top_layer_only=False),
            DropoutLayer(0.5),
        )
        embed_mapper = ElmoWrapper(input_append=True, output_append=False, rnn_layer=embed_mapper, lm_reduce=lm_reduce)

    reformulation = ProjectMapEncodeReformulation(project_layer=None,
                                                  sequence_mapper=None,
                                                  encoder=CudnnGruEncoder(rnn_dim, w_init=TruncatedNormal(stddev=0.05)))
    # reformulation = WeightedSumThenProjectReformulation(rnn_dim*2, activation='relu')
    # reformulation = ProjectThenWeightedSumReformulation(rnn_dim*2, activation='relu')

    return IterativeContextMaxSentenceModel(
        encoder=QuestionsAndParagraphsEncoder(answer_encoder, use_sentence_segments=True),
        word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True),
        char_embed=CharWordEmbedder(
            LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True),
            MaxPool(Conv1d(100, 5, 0.8)),
            shared_parameters=True
        ),
        elmo_model=elmo_model,
        embed_mapper=embed_mapper,
        sequence_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True),
        sentences_encoder=SentenceMaxEncoder(),
        sentence_mapper=recurrent_layer,
        post_merger=None,
        merger=WithConcatOptions(sub=False, hadamard=True, dot=True, raw=True),
        reformulation_layer=reformulation,
        max_batch_size=128
    )
コード例 #9
0
def get_contexts_to_question_model(rnn_dim, post_merge):
    recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05))
    answer_encoder = BinaryAnswerEncoder()

    if post_merge == 'res_rnn_self_att':
        post_map_layer = SequenceMapperSeq(
            FullyConnected(rnn_dim * 2, activation="relu"),
            ResidualLayer(
                SequenceMapperSeq(
                    VariationalDropoutLayer(0.8),
                    recurrent_layer,
                    VariationalDropoutLayer(0.8),
                    StaticAttentionSelf(TriLinear(bias=True),
                                        ConcatWithProduct()),
                    FullyConnected(rnn_dim * 2, activation="relu"),
                )))
    elif post_merge == 'res_rnn':
        post_map_layer = SequenceMapperSeq(
            FullyConnected(rnn_dim * 2, activation="relu"),
            ResidualLayer(
                SequenceMapperSeq(
                    VariationalDropoutLayer(0.8),
                    recurrent_layer,
                    FullyConnected(rnn_dim * 2, activation="relu"),
                )))
    elif post_merge == 'res_self_att':
        post_map_layer = SequenceMapperSeq(
            FullyConnected(rnn_dim * 2, activation="relu"),
            ResidualLayer(
                SequenceMapperSeq(
                    VariationalDropoutLayer(0.8),
                    StaticAttentionSelf(TriLinear(bias=True),
                                        ConcatWithProduct()),
                    FullyConnected(rnn_dim * 2, activation="relu"),
                )))
    else:
        raise NotImplementedError()

    return ContextsToQuestionModel(
        encoder=QuestionsAndParagraphsEncoder(answer_encoder),
        word_embed=FixedWordEmbedder(vec_name="glove.840B.300d",
                                     word_vec_init_scale=0,
                                     learn_unk=False,
                                     cpu=True),
        char_embed=CharWordEmbedder(LearnedCharEmbedder(word_size_th=14,
                                                        char_th=50,
                                                        char_dim=20,
                                                        init_scale=0.05,
                                                        force_cpu=True),
                                    MaxPool(Conv1d(100, 5, 0.8)),
                                    shared_parameters=True),
        embed_mapper=SequenceMapperSeq(
            VariationalDropoutLayer(0.8),
            recurrent_layer,
            VariationalDropoutLayer(0.8),
        ),
        attention_merger=MaxMerge(pre_map_layer=None,
                                  post_map_layer=post_map_layer),
        context_to_question_attention=BiAttention(TriLinear(bias=True), True),
        sequence_encoder=MaxPool(map_layer=None,
                                 min_val=0,
                                 regular_reshape=True),
        predictor=BinaryFixedPredictor())