Exemple #1
0
def get_model_with_yes_no(rnn_dim: int, use_elmo, keep_rate=0.8):
    recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05))

    embed_mapper = SequenceMapperSeq(
        VariationalDropoutLayer(keep_rate),
        recurrent_layer,
        VariationalDropoutLayer(keep_rate),
    )

    elmo_model = None
    if use_elmo:
        print("Using Elmo!")
        elmo_model = get_hotpot_elmo()
        lm_reduce = MapperSeq(
            ElmoLayer(0, layer_norm=False, top_layer_only=False),
            DropoutLayer(0.5),
        )
        embed_mapper = ElmoWrapper(input_append=True, output_append=False, rnn_layer=embed_mapper, lm_reduce=lm_reduce)

    answer_encoder = GroupedSpanAnswerEncoderWithYesNo(group=True)
    predictor = BoundsPredictor(
        ChainBiMapper(
            first_layer=recurrent_layer,
            second_layer=recurrent_layer
        ),
        span_predictor=IndependentBoundsGroupedWithYesNo()
    )

    return AttentionQAWithYesNo(
        encoder=QuestionsAndParagraphsEncoder(answer_encoder, use_sentence_segments=False),
        word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True),
        char_embed=CharWordEmbedder(
            LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True),
            MaxPool(Conv1d(100, 5, 0.8)),
            shared_parameters=True
        ),
        elmo_model=elmo_model,
        embed_mapper=embed_mapper,
        question_mapper=None,
        context_mapper=None,
        memory_builder=NullBiMapper(),
        attention=BiAttention(TriLinear(bias=True), True),
        match_encoder=SequenceMapperSeq(FullyConnected(rnn_dim * 2, activation="relu"),
                                        ResidualLayer(SequenceMapperSeq(
                                            VariationalDropoutLayer(keep_rate),
                                            recurrent_layer,
                                            VariationalDropoutLayer(keep_rate),
                                            StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct()),
                                            FullyConnected(rnn_dim * 2, activation="relu"),
                                        )),
                                        VariationalDropoutLayer(keep_rate)),
        predictor=predictor,
        yes_no_question_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True),
        yes_no_context_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True)
    )
def get_res_fc_seq_fc(model_rnn_dim, rnn: bool, self_att: bool):
    seq_mapper = []
    if not rnn and not self_att:
        raise NotImplementedError()
    if rnn:
        seq_mapper.extend([VariationalDropoutLayer(0.8), CudnnGru(model_rnn_dim, w_init=TruncatedNormal(stddev=0.05))])
    if self_att:
        seq_mapper.extend([VariationalDropoutLayer(0.8),
                           StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct())])
    seq_mapper.append(FullyConnected(model_rnn_dim * 2, activation="relu"))
    return SequenceMapperSeq(FullyConnected(model_rnn_dim * 2, activation="relu"),
                             ResidualLayer(SequenceMapperSeq(
                                 *seq_mapper
                             )))
def get_contexts_to_question_model(rnn_dim, post_merge):
    recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05))
    answer_encoder = BinaryAnswerEncoder()

    if post_merge == 'res_rnn_self_att':
        post_map_layer = SequenceMapperSeq(
            FullyConnected(rnn_dim * 2, activation="relu"),
            ResidualLayer(
                SequenceMapperSeq(
                    VariationalDropoutLayer(0.8),
                    recurrent_layer,
                    VariationalDropoutLayer(0.8),
                    StaticAttentionSelf(TriLinear(bias=True),
                                        ConcatWithProduct()),
                    FullyConnected(rnn_dim * 2, activation="relu"),
                )))
    elif post_merge == 'res_rnn':
        post_map_layer = SequenceMapperSeq(
            FullyConnected(rnn_dim * 2, activation="relu"),
            ResidualLayer(
                SequenceMapperSeq(
                    VariationalDropoutLayer(0.8),
                    recurrent_layer,
                    FullyConnected(rnn_dim * 2, activation="relu"),
                )))
    elif post_merge == 'res_self_att':
        post_map_layer = SequenceMapperSeq(
            FullyConnected(rnn_dim * 2, activation="relu"),
            ResidualLayer(
                SequenceMapperSeq(
                    VariationalDropoutLayer(0.8),
                    StaticAttentionSelf(TriLinear(bias=True),
                                        ConcatWithProduct()),
                    FullyConnected(rnn_dim * 2, activation="relu"),
                )))
    else:
        raise NotImplementedError()

    return ContextsToQuestionModel(
        encoder=QuestionsAndParagraphsEncoder(answer_encoder),
        word_embed=FixedWordEmbedder(vec_name="glove.840B.300d",
                                     word_vec_init_scale=0,
                                     learn_unk=False,
                                     cpu=True),
        char_embed=CharWordEmbedder(LearnedCharEmbedder(word_size_th=14,
                                                        char_th=50,
                                                        char_dim=20,
                                                        init_scale=0.05,
                                                        force_cpu=True),
                                    MaxPool(Conv1d(100, 5, 0.8)),
                                    shared_parameters=True),
        embed_mapper=SequenceMapperSeq(
            VariationalDropoutLayer(0.8),
            recurrent_layer,
            VariationalDropoutLayer(0.8),
        ),
        attention_merger=MaxMerge(pre_map_layer=None,
                                  post_map_layer=post_map_layer),
        context_to_question_attention=BiAttention(TriLinear(bias=True), True),
        sequence_encoder=MaxPool(map_layer=None,
                                 min_val=0,
                                 regular_reshape=True),
        predictor=BinaryFixedPredictor())