def get_context_to_question_model(rnn_dim: int, q2c: bool, res_rnn: bool, res_self_att: bool):
    recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05))
    answer_encoder = BinaryAnswerEncoder()

    res_model = get_res_fc_seq_fc(model_rnn_dim=rnn_dim, rnn=res_rnn, self_att=res_self_att)

    question_to_context = \
        AttentionWithPostMapper(BiAttention(TriLinear(bias=True), True), post_mapper=res_model) if q2c else None
    context_to_question = AttentionWithPostMapper(BiAttention(TriLinear(bias=True), True), post_mapper=res_model)

    return SingleContextToQuestionModel(
        encoder=QuestionsAndParagraphsEncoder(answer_encoder),
        word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True),
        char_embed=CharWordEmbedder(
            LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True),
            MaxPool(Conv1d(100, 5, 0.8)),
            shared_parameters=True
        ),
        embed_mapper=SequenceMapperSeq(
            VariationalDropoutLayer(0.8),
            recurrent_layer,
            VariationalDropoutLayer(0.8),
        ),
        question_to_context_attention=question_to_context,
        context_to_question_attention=context_to_question,
        sequence_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True),
        predictor=BinaryFixedPredictor()
    )
def get_multi_encode_softmax_weighting_model(rnn_dim, multi_rnn_dim, num_encodings, keep_rate=0.8, map_embed=True):
    recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05))
    multi_recurrent_layer = CudnnGru(multi_rnn_dim, w_init=TruncatedNormal(stddev=0.05))
    answer_encoder = BinaryAnswerEncoder()

    return SingleContextMultipleEncodingWeightedSoftmaxModel(
        encoder=QuestionsAndParagraphsEncoder(answer_encoder),
        word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True),
        char_embed=CharWordEmbedder(
            LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True),
            MaxPool(Conv1d(100, 5, 0.8)),
            shared_parameters=True
        ),
        embed_mapper=SequenceMapperSeq(
            VariationalDropoutLayer(keep_rate),
            recurrent_layer,
        ) if map_embed else None,
        sequence_multi_encoder=MultiMapThenEncode(
            mapper=SequenceMapperSeq(VariationalDropoutLayer(keep_rate), multi_recurrent_layer),
            encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True),
            num_encodings=num_encodings
        ),
        weight_layer=MultiEncodingWeights(weight_mode='mlp'),
        merger=ConcatWithProduct(),
        post_merger=None,
        predictor=BinaryWeightedMultipleFixedPredictor()
    )
def get_sentences_model(rnn_dim, use_elmo, keep_rate=0.8):
    recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05))
    answer_encoder = BinaryAnswerEncoder()

    embed_mapper = SequenceMapperSeq(VariationalDropoutLayer(keep_rate), recurrent_layer)

    elmo_model = None
    if use_elmo:
        print("Using Elmo!")
        elmo_model = get_wiki_elmo()
        lm_reduce = MapperSeq(
            ElmoLayer(0, layer_norm=False, top_layer_only=False),
            DropoutLayer(0.5),
        )
        embed_mapper = ElmoWrapper(input_append=True, output_append=False, rnn_layer=embed_mapper, lm_reduce=lm_reduce)

    return SingleContextMaxSentenceModel(
        encoder=QuestionsAndParagraphsEncoder(answer_encoder, use_sentence_segments=True, paragraph_as_sentence=True),
        word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True),
        char_embed=CharWordEmbedder(
            LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True),
            MaxPool(Conv1d(100, 5, 0.8)),
            shared_parameters=True
        ),
        elmo_model=elmo_model,
        embed_mapper=embed_mapper,
        sequence_encoder=MaxPool(map_layer=None, min_val=VERY_NEGATIVE_NUMBER, regular_reshape=True),
        sentences_encoder=SentenceMaxEncoder(),
        post_merger=None,
        merger=WithConcatOptions(sub=False, hadamard=True, dot=True, raw=True),
        max_batch_size=256
    )
def get_model(rnn_dim):
    recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05))
    answer_encoder = BinaryAnswerEncoder()

    return ContextPairRelevanceModel(
        encoder=QuestionsAndParagraphsEncoder(answer_encoder),
        word_embed=FixedWordEmbedder(vec_name="glove.840B.300d",
                                     word_vec_init_scale=0,
                                     learn_unk=False,
                                     cpu=True),
        char_embed=CharWordEmbedder(LearnedCharEmbedder(word_size_th=14,
                                                        char_th=50,
                                                        char_dim=20,
                                                        init_scale=0.05,
                                                        force_cpu=True),
                                    MaxPool(Conv1d(100, 5, 0.8)),
                                    shared_parameters=True),
        embed_mapper=SequenceMapperSeq(
            VariationalDropoutLayer(0.8),
            recurrent_layer,
            # VariationalDropoutLayer(0.8),  # fixme probably doesn't belong here
        ),
        question_to_context_attention=None,
        context_to_context_attention=None,
        context_to_question_attention=None,
        sequence_encoder=MaxPool(map_layer=None,
                                 min_val=0,
                                 regular_reshape=True),
        merger=MergeTwoContextsConcatQuestion(),
        predictor=BinaryFixedPredictor())
def get_multi_hop_model(rnn_dim, c2c: bool, q2c: bool, res_rnn: bool,
                        res_self_att: bool, post_merge: bool, encoder: str,
                        merge_type: str, num_c2c_hops: int):
    recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05))
    answer_encoder = BinaryAnswerEncoder()

    res_model = get_res_fc_seq_fc(model_rnn_dim=rnn_dim,
                                  rnn=res_rnn,
                                  self_att=res_self_att)
    context_to_context = \
        AttentionWithPostMapper(BiAttention(TriLinear(bias=True), True), post_mapper=res_model) if c2c else None
    question_to_context = \
        AttentionWithPostMapper(BiAttention(TriLinear(bias=True), True), post_mapper=res_model) if q2c else None

    if encoder == 'max':
        sequence_encoder = MaxPool(map_layer=None,
                                   min_val=0,
                                   regular_reshape=True)
    elif encoder == 'rnn':
        sequence_encoder = CudnnGruEncoder(rnn_dim,
                                           w_init=TruncatedNormal(stddev=0.05))
    else:
        raise NotImplementedError()

    if merge_type == 'max':
        attention_merger = MaxMerge(
            pre_map_layer=None,
            post_map_layer=(res_model if post_merge else None))
    else:
        attention_merger = WeightedMerge(
            pre_map_layer=None,
            post_map_layer=(res_model if post_merge else None),
            weight_type=merge_type)

    return MultiHopContextsToQuestionModel(
        encoder=QuestionsAndParagraphsEncoder(answer_encoder),
        word_embed=FixedWordEmbedder(vec_name="glove.840B.300d",
                                     word_vec_init_scale=0,
                                     learn_unk=False,
                                     cpu=True),
        char_embed=CharWordEmbedder(LearnedCharEmbedder(word_size_th=14,
                                                        char_th=50,
                                                        char_dim=20,
                                                        init_scale=0.05,
                                                        force_cpu=True),
                                    MaxPool(Conv1d(100, 5, 0.8)),
                                    shared_parameters=True),
        embed_mapper=SequenceMapperSeq(
            VariationalDropoutLayer(0.8),
            recurrent_layer,
            VariationalDropoutLayer(0.8),
        ),
        question_to_context_attention=question_to_context,
        context_to_context_attention=context_to_context,
        c2c_hops=num_c2c_hops,
        context_to_question_attention=BiAttention(TriLinear(bias=True), True),
        attention_merger=attention_merger,
        sequence_encoder=sequence_encoder,
        predictor=BinaryFixedPredictor())
def get_reread_model(rnn_dim, use_elmo, encoder_keep_rate=0.8, reread_keep_rate=0.8,
                     two_phase_att=False, res_rnn=True, res_self_att=False,
                     multiply_iteration_probs=False, reformulate_by_context=False,
                     rank_first=False, rank_second=False, reread_rnn_dim=None,
                     first_rank_lambda=1.0, second_rank_lambda=1.0,
                     ranking_gamma=1.0):
    recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05))
    answer_encoder = IterativeAnswerEncoder(group=rank_first or rank_second)

    embed_mapper = SequenceMapperSeq(VariationalDropoutLayer(encoder_keep_rate), recurrent_layer)

    if res_rnn or res_self_att:
        res_model = get_res_fc_seq_fc(model_rnn_dim=rnn_dim, rnn=res_rnn, self_att=res_self_att,
                                      keep_rate=reread_keep_rate)
    else:
        res_model = FullyConnected(rnn_dim * 2, activation="relu")
    attention = AttentionWithPostMapper(BiAttention(TriLinear(bias=True), True),
                                        post_mapper=res_model)
    use_c2q = two_phase_att or not reformulate_by_context
    use_q2c = two_phase_att or reformulate_by_context

    elmo_model = None
    if use_elmo:
        print("Using Elmo!")
        elmo_model = get_hotpot_elmo()
        lm_reduce = MapperSeq(
            ElmoLayer(0, layer_norm=False, top_layer_only=False),
            DropoutLayer(0.5),
        )
        embed_mapper = ElmoWrapper(input_append=True, output_append=False, rnn_layer=embed_mapper, lm_reduce=lm_reduce)

    return IterativeContextReReadModel(
        encoder=QuestionsAndParagraphsEncoder(answer_encoder, use_sentence_segments=True, paragraph_as_sentence=True),
        word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True),
        char_embed=CharWordEmbedder(
            LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True),
            MaxPool(Conv1d(100, 5, 0.8)),
            shared_parameters=True
        ),
        elmo_model=elmo_model,
        embed_mapper=embed_mapper,
        sequence_encoder=MaxPool(map_layer=None, min_val=VERY_NEGATIVE_NUMBER, regular_reshape=True),
        sentences_encoder=SentenceMaxEncoder(),
        sentence_mapper=None,
        post_merger=None,
        merger=WithConcatOptions(sub=False, hadamard=True, dot=True, raw=True),
        reread_mapper=None if reread_rnn_dim is None else CudnnGru(reread_rnn_dim, w_init=TruncatedNormal(stddev=0.05)),
        pre_attention_mapper=None,  # VariationalDropoutLayer(reread_keep_rate),
        context_to_question_attention=attention if use_c2q else None,
        question_to_context_attention=attention if use_q2c else None,
        reformulate_by_context=reformulate_by_context,
        multiply_iteration_probs=multiply_iteration_probs,
        first_predictor=BinaryNullPredictor(rank_first, ranking_lambda=first_rank_lambda, gamma=ranking_gamma),
        second_predictor=BinaryNullPredictor(rank_second, ranking_lambda=second_rank_lambda, gamma=ranking_gamma),
        max_batch_size=512
    )
Example #7
0
def get_model_with_yes_no(rnn_dim: int, use_elmo, keep_rate=0.8):
    recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05))

    embed_mapper = SequenceMapperSeq(
        VariationalDropoutLayer(keep_rate),
        recurrent_layer,
        VariationalDropoutLayer(keep_rate),
    )

    elmo_model = None
    if use_elmo:
        print("Using Elmo!")
        elmo_model = get_hotpot_elmo()
        lm_reduce = MapperSeq(
            ElmoLayer(0, layer_norm=False, top_layer_only=False),
            DropoutLayer(0.5),
        )
        embed_mapper = ElmoWrapper(input_append=True, output_append=False, rnn_layer=embed_mapper, lm_reduce=lm_reduce)

    answer_encoder = GroupedSpanAnswerEncoderWithYesNo(group=True)
    predictor = BoundsPredictor(
        ChainBiMapper(
            first_layer=recurrent_layer,
            second_layer=recurrent_layer
        ),
        span_predictor=IndependentBoundsGroupedWithYesNo()
    )

    return AttentionQAWithYesNo(
        encoder=QuestionsAndParagraphsEncoder(answer_encoder, use_sentence_segments=False),
        word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True),
        char_embed=CharWordEmbedder(
            LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True),
            MaxPool(Conv1d(100, 5, 0.8)),
            shared_parameters=True
        ),
        elmo_model=elmo_model,
        embed_mapper=embed_mapper,
        question_mapper=None,
        context_mapper=None,
        memory_builder=NullBiMapper(),
        attention=BiAttention(TriLinear(bias=True), True),
        match_encoder=SequenceMapperSeq(FullyConnected(rnn_dim * 2, activation="relu"),
                                        ResidualLayer(SequenceMapperSeq(
                                            VariationalDropoutLayer(keep_rate),
                                            recurrent_layer,
                                            VariationalDropoutLayer(keep_rate),
                                            StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct()),
                                            FullyConnected(rnn_dim * 2, activation="relu"),
                                        )),
                                        VariationalDropoutLayer(keep_rate)),
        predictor=predictor,
        yes_no_question_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True),
        yes_no_context_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True)
    )
Example #8
0
 def __init__(self,
              encoder: QuestionsAndParagraphsEncoder,
              word_embed: Optional[WordEmbedder],
              char_embed: Optional[CharWordEmbedder],
              embed_mapper: Optional[Union[SequenceMapper, ElmoWrapper]],
              sequence_encoder: SequenceEncoder,
              sentences_encoder: SentencesEncoder,
              sentence_mapper: Optional[SequenceMapper],
              merger: FixedMergeLayer,
              post_merger: Optional[Mapper],
              reformulation_layer: ReformulationLayer,
              max_batch_size: Optional[int] = None,
              elmo_model: Optional[LanguageModel] = None):
     super().__init__(encoder=encoder,
                      word_embed=word_embed,
                      char_embed=char_embed,
                      max_batch_size=max_batch_size,
                      elmo_model=elmo_model)
     self.embed_mapper = embed_mapper
     self.sequence_encoder = sequence_encoder
     self.sentences_encoder = sentences_encoder
     self.sentence_mapper = sentence_mapper
     self.merger = merger
     self.post_merger = post_merger
     self.reformulation_layer = reformulation_layer
     self.predictor = BinaryNullPredictor()
     self.max_pool = MaxPool(map_layer=None,
                             min_val=VERY_NEGATIVE_NUMBER,
                             regular_reshape=True)
     self.mean_pool = MeanPool()
def get_reread_merge_model(rnn_dim, use_elmo, keep_rate=0.8, res_rnn=True, res_self_att=False,
                           multiply_iteration_probs=False):
    recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05))
    answer_encoder = IterativeAnswerEncoder()

    embed_mapper = SequenceMapperSeq(VariationalDropoutLayer(keep_rate), recurrent_layer)

    if res_rnn or res_self_att:
        res_model = get_res_fc_seq_fc(model_rnn_dim=rnn_dim, rnn=res_rnn, self_att=res_self_att)
    else:
        res_model = FullyConnected(rnn_dim * 2, activation="relu")
    attention = AttentionWithPostMapper(BiAttention(TriLinear(bias=True), True), post_mapper=res_model)

    elmo_model = None
    if use_elmo:
        print("Using Elmo!")
        elmo_model = get_hotpot_elmo()
        lm_reduce = MapperSeq(
            ElmoLayer(0, layer_norm=False, top_layer_only=False),
            DropoutLayer(0.5),
        )
        embed_mapper = ElmoWrapper(input_append=True, output_append=False, rnn_layer=embed_mapper, lm_reduce=lm_reduce)

    return IterativeContextReReadMergeModel(
        encoder=QuestionsAndParagraphsEncoder(answer_encoder, use_sentence_segments=True),
        word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True),
        char_embed=CharWordEmbedder(
            LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True),
            MaxPool(Conv1d(100, 5, 0.8)),
            shared_parameters=True
        ),
        elmo_model=elmo_model,
        embed_mapper=embed_mapper,
        sequence_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True),
        sentences_encoder=SentenceMaxEncoder(),
        sentence_mapper=None,
        post_merger=None,
        merger=WithConcatOptions(sub=False, hadamard=True, dot=False, raw=True),
        context_to_question_attention=attention,
        question_to_context_attention=attention,
        reread_merger=ConcatWithProduct(),
        multiply_iteration_probs=multiply_iteration_probs,
        max_batch_size=128
    )
Example #10
0
    def __init__(
            self,
            encoder: QuestionsAndParagraphsEncoder,
            word_embed: Optional[WordEmbedder],
            char_embed: Optional[CharWordEmbedder],
            embed_mapper: Optional[Union[SequenceMapper, ElmoWrapper]],
            sequence_encoder: SequenceEncoder,
            sentences_encoder: SentencesEncoder,
            sentence_mapper: Optional[SequenceMapper],
            reread_mapper: Optional[Union[SequenceMapper, ElmoWrapper]],
            pre_attention_mapper: Optional[SequenceMapper],
            context_to_question_attention: Optional[AttentionWithPostMapper],
            question_to_context_attention: Optional[AttentionWithPostMapper],
            first_predictor: BinaryNullPredictor,
            second_predictor: BinaryNullPredictor,
            reformulate_by_context: bool,
            max_batch_size: Optional[int] = None,
            elmo_model: Optional[LanguageModel] = None):
        super().__init__(encoder=encoder,
                         word_embed=word_embed,
                         char_embed=char_embed,
                         max_batch_size=max_batch_size,
                         elmo_model=elmo_model)
        self.embed_mapper = embed_mapper
        self.sequence_encoder = sequence_encoder
        self.sentences_encoder = sentences_encoder
        self.sentence_mapper = sentence_mapper
        self.reread_mapper = reread_mapper
        self.pre_attention_mapper = pre_attention_mapper
        self.question_to_context_attention = question_to_context_attention
        self.context_to_question_attention = context_to_question_attention
        self.reformulate_by_context = reformulate_by_context
        self.first_predictor = first_predictor
        self.second_predictor = second_predictor
        self.max_pool = MaxPool(map_layer=None,
                                min_val=VERY_NEGATIVE_NUMBER,
                                regular_reshape=True)

        if (self.reformulate_by_context and question_to_context_attention is None) or \
                (not self.reformulate_by_context and context_to_question_attention is None):
            raise ValueError("The last attention must be defined")
def get_model(rnn_dim, use_elmo, keep_rate=0.8):
    recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05))
    answer_encoder = IterativeAnswerEncoder()

    embed_mapper = SequenceMapperSeq(VariationalDropoutLayer(keep_rate), recurrent_layer)

    elmo_model = None
    if use_elmo:
        print("Using Elmo!")
        elmo_model = get_hotpot_elmo()
        lm_reduce = MapperSeq(
            ElmoLayer(0, layer_norm=False, top_layer_only=False),
            DropoutLayer(0.5),
        )
        embed_mapper = ElmoWrapper(input_append=True, output_append=False, rnn_layer=embed_mapper, lm_reduce=lm_reduce)

    reformulation = ProjectMapEncodeReformulation(project_layer=None,
                                                  sequence_mapper=None,
                                                  encoder=CudnnGruEncoder(rnn_dim, w_init=TruncatedNormal(stddev=0.05)))
    # reformulation = WeightedSumThenProjectReformulation(rnn_dim*2, activation='relu')
    # reformulation = ProjectThenWeightedSumReformulation(rnn_dim*2, activation='relu')

    return IterativeContextMaxSentenceModel(
        encoder=QuestionsAndParagraphsEncoder(answer_encoder, use_sentence_segments=True),
        word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True),
        char_embed=CharWordEmbedder(
            LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True),
            MaxPool(Conv1d(100, 5, 0.8)),
            shared_parameters=True
        ),
        elmo_model=elmo_model,
        embed_mapper=embed_mapper,
        sequence_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True),
        sentences_encoder=SentenceMaxEncoder(),
        sentence_mapper=recurrent_layer,
        post_merger=None,
        merger=WithConcatOptions(sub=False, hadamard=True, dot=True, raw=True),
        reformulation_layer=reformulation,
        max_batch_size=128
    )
def get_fixed_context_to_question(rnn_dim):
    recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05))
    answer_encoder = BinaryAnswerEncoder()

    return SingleFixedContextToQuestionModel(
        encoder=QuestionsAndParagraphsEncoder(answer_encoder),
        word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True),
        char_embed=CharWordEmbedder(
            LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True),
            MaxPool(Conv1d(100, 5, 0.8)),
            shared_parameters=True
        ),
        embed_mapper=SequenceMapperSeq(
            SequenceMapperSeq(VariationalDropoutLayer(0.8), recurrent_layer),
            ResidualLayer(SequenceMapperSeq(VariationalDropoutLayer(0.8), recurrent_layer)),
            ResidualLayer(SequenceMapperSeq(VariationalDropoutLayer(0.8), recurrent_layer))
        ),
        context_mapper=None,
        # ResidualLayer(
        #     SequenceMapperSeq(
        #         VariationalDropoutLayer(0.8),
        #         StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct()),
        #         FullyConnected(rnn_dim*2, activation=None))),
        context_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True),
        question_mapper=None,
        # ResidualLayer(
        #     SequenceMapperSeq(
        #         VariationalDropoutLayer(0.8),
        #         StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct()),
        #         FullyConnected(rnn_dim*2, activation=None))),
        merger=WithConcatOptions(dot=True, sub=True, hadamard=True, raw=True, project=False),
        post_merger=SequenceMapperSeq(
            FullyConnected(rnn_dim * 2, activation='relu'),
            ResidualLayer(SequenceMapperSeq(VariationalDropoutLayer(0.8), recurrent_layer,
                                            FullyConnected(rnn_dim * 2, activation='relu')))
        ),
        final_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True),
        predictor=BinaryFixedPredictor()
    )
def get_basic_model(rnn_dim, post_merger_params: Optional[dict] = None, use_elmo=False, keep_rate=0.8):
    recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05))
    answer_encoder = BinaryAnswerEncoder()

    embed_mapper = SequenceMapperSeq(VariationalDropoutLayer(keep_rate), recurrent_layer)
    # embed_mapper = SequenceMapperSeq(
    #         SequenceMapperSeq(VariationalDropoutLayer(keep_rate), recurrent_layer),
    #         ResidualLayer(SequenceMapperSeq(VariationalDropoutLayer(keep_rate), recurrent_layer)),
    #         ResidualLayer(SequenceMapperSeq(VariationalDropoutLayer(keep_rate), recurrent_layer))
    #     )

    elmo_model = None
    if use_elmo:
        print("Using Elmo!")
        elmo_model = get_squad_elmo()
        lm_reduce = MapperSeq(
            ElmoLayer(0, layer_norm=False, top_layer_only=False),
            DropoutLayer(0.5),
        )
        embed_mapper = ElmoWrapper(input_append=True, output_append=True, rnn_layer=embed_mapper, lm_reduce=lm_reduce)

    post_merger = None if post_merger_params is None else get_mlp(**post_merger_params)

    return BasicSingleContextAndQuestionIndependentModel(
        encoder=QuestionsAndParagraphsEncoder(answer_encoder),
        word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True),
        char_embed=CharWordEmbedder(
            LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True),
            MaxPool(Conv1d(100, 5, 0.8)),
            shared_parameters=True
        ),
        elmo_model=elmo_model,
        embed_mapper=embed_mapper,
        sequence_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True),
        merger=ConcatWithProductSub(),
        post_merger=post_merger,
        predictor=BinaryFixedPredictor(sigmoid=True),
        max_batch_size=128
    )
def get_bottleneck_to_seq_model(rnn_dim, q2c: bool, res_rnn: bool, res_self_att: bool, seq_len=50):
    recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05))
    answer_encoder = BinaryAnswerEncoder()

    res_model = get_res_fc_seq_fc(model_rnn_dim=rnn_dim, rnn=res_rnn, self_att=res_self_att)

    question_to_context = \
        AttentionWithPostMapper(BiAttention(TriLinear(bias=True), True), post_mapper=res_model) if q2c else None
    context_to_question = AttentionWithPostMapper(BiAttention(TriLinear(bias=True), True), post_mapper=res_model)

    sequence_generator = GenerativeRNN(tf.contrib.rnn.LSTMCell(num_units=rnn_dim,
                                                               initializer=tf.initializers.truncated_normal(
                                                                   stddev=0.05)),
                                       output_layer=FullyConnected(rnn_dim * 2, activation='relu'),
                                       vec_to_in=FullyConnected(rnn_dim * 2, activation='relu'),
                                       seq_len=seq_len, include_original_vec=False)

    return SingleContextBottleneckToSeqQuestionModel(
        encoder=QuestionsAndParagraphsEncoder(answer_encoder),
        word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True),
        char_embed=CharWordEmbedder(
            LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True),
            MaxPool(Conv1d(100, 5, 0.8)),
            shared_parameters=True
        ),
        embed_mapper=SequenceMapperSeq(
            VariationalDropoutLayer(0.8),
            recurrent_layer,
        ),
        sequence_generator=sequence_generator,
        pre_attention=VariationalDropoutLayer(0.8),
        question_to_context_attention=question_to_context,
        context_to_question_attention=context_to_question,
        sequence_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True),
        predictor=BinaryFixedPredictor()
    )
def get_contexts_to_question_model(rnn_dim, post_merge):
    recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05))
    answer_encoder = BinaryAnswerEncoder()

    if post_merge == 'res_rnn_self_att':
        post_map_layer = SequenceMapperSeq(
            FullyConnected(rnn_dim * 2, activation="relu"),
            ResidualLayer(
                SequenceMapperSeq(
                    VariationalDropoutLayer(0.8),
                    recurrent_layer,
                    VariationalDropoutLayer(0.8),
                    StaticAttentionSelf(TriLinear(bias=True),
                                        ConcatWithProduct()),
                    FullyConnected(rnn_dim * 2, activation="relu"),
                )))
    elif post_merge == 'res_rnn':
        post_map_layer = SequenceMapperSeq(
            FullyConnected(rnn_dim * 2, activation="relu"),
            ResidualLayer(
                SequenceMapperSeq(
                    VariationalDropoutLayer(0.8),
                    recurrent_layer,
                    FullyConnected(rnn_dim * 2, activation="relu"),
                )))
    elif post_merge == 'res_self_att':
        post_map_layer = SequenceMapperSeq(
            FullyConnected(rnn_dim * 2, activation="relu"),
            ResidualLayer(
                SequenceMapperSeq(
                    VariationalDropoutLayer(0.8),
                    StaticAttentionSelf(TriLinear(bias=True),
                                        ConcatWithProduct()),
                    FullyConnected(rnn_dim * 2, activation="relu"),
                )))
    else:
        raise NotImplementedError()

    return ContextsToQuestionModel(
        encoder=QuestionsAndParagraphsEncoder(answer_encoder),
        word_embed=FixedWordEmbedder(vec_name="glove.840B.300d",
                                     word_vec_init_scale=0,
                                     learn_unk=False,
                                     cpu=True),
        char_embed=CharWordEmbedder(LearnedCharEmbedder(word_size_th=14,
                                                        char_th=50,
                                                        char_dim=20,
                                                        init_scale=0.05,
                                                        force_cpu=True),
                                    MaxPool(Conv1d(100, 5, 0.8)),
                                    shared_parameters=True),
        embed_mapper=SequenceMapperSeq(
            VariationalDropoutLayer(0.8),
            recurrent_layer,
            VariationalDropoutLayer(0.8),
        ),
        attention_merger=MaxMerge(pre_map_layer=None,
                                  post_map_layer=post_map_layer),
        context_to_question_attention=BiAttention(TriLinear(bias=True), True),
        sequence_encoder=MaxPool(map_layer=None,
                                 min_val=0,
                                 regular_reshape=True),
        predictor=BinaryFixedPredictor())
Example #16
0
class IterativeContextReReadSimpleScoreModel(MultipleContextModel):
    """
    Calculating the similarities by a simple dot product between question an paragraph representations.
    This is a more neat model which we should check to see if on par with the more complicated one above.
    """
    def __init__(
            self,
            encoder: QuestionsAndParagraphsEncoder,
            word_embed: Optional[WordEmbedder],
            char_embed: Optional[CharWordEmbedder],
            embed_mapper: Optional[Union[SequenceMapper, ElmoWrapper]],
            sequence_encoder: SequenceEncoder,
            sentences_encoder: SentencesEncoder,
            sentence_mapper: Optional[SequenceMapper],
            reread_mapper: Optional[Union[SequenceMapper, ElmoWrapper]],
            pre_attention_mapper: Optional[SequenceMapper],
            context_to_question_attention: Optional[AttentionWithPostMapper],
            question_to_context_attention: Optional[AttentionWithPostMapper],
            first_predictor: BinaryNullPredictor,
            second_predictor: BinaryNullPredictor,
            reformulate_by_context: bool,
            max_batch_size: Optional[int] = None,
            elmo_model: Optional[LanguageModel] = None):
        super().__init__(encoder=encoder,
                         word_embed=word_embed,
                         char_embed=char_embed,
                         max_batch_size=max_batch_size,
                         elmo_model=elmo_model)
        self.embed_mapper = embed_mapper
        self.sequence_encoder = sequence_encoder
        self.sentences_encoder = sentences_encoder
        self.sentence_mapper = sentence_mapper
        self.reread_mapper = reread_mapper
        self.pre_attention_mapper = pre_attention_mapper
        self.question_to_context_attention = question_to_context_attention
        self.context_to_question_attention = context_to_question_attention
        self.reformulate_by_context = reformulate_by_context
        self.first_predictor = first_predictor
        self.second_predictor = second_predictor
        self.max_pool = MaxPool(map_layer=None,
                                min_val=VERY_NEGATIVE_NUMBER,
                                regular_reshape=True)

        if (self.reformulate_by_context and question_to_context_attention is None) or \
                (not self.reformulate_by_context and context_to_question_attention is None):
            raise ValueError("The last attention must be defined")

    def _get_predictions_for(self, is_train, question_embed, question_mask,
                             context_embed, context_mask, answer, question_lm,
                             context_lm, sentence_segments, sentence_mask):
        question_rep, context_rep = question_embed, context_embed
        context1_rep, context2_rep = tf.unstack(context_rep, axis=1, num=2)
        context1_mask, context2_mask = tf.unstack(context_mask, axis=1, num=2)
        context1_sentence_segments, context2_sentence_segments = tf.unstack(
            sentence_segments, axis=1, num=2)
        context1_sentence_mask, context2_sentence_mask = tf.unstack(
            sentence_mask, axis=1, num=2)
        q_lm_in, c1_lm_in, c2_lm_in = [], [], []
        if self.use_elmo:
            context1_lm, context2_lm = tf.unstack(context_lm, axis=1, num=2)
            q_lm_in = [question_lm]
            c1_lm_in = [context1_lm]
            c2_lm_in = [context2_lm]
        if self.embed_mapper is not None:
            with tf.variable_scope("map_embed"):
                context1_rep = self.embed_mapper.apply(is_train, context1_rep,
                                                       context1_mask,
                                                       *c1_lm_in)
            with tf.variable_scope("map_embed", reuse=True):
                context2_rep = self.embed_mapper.apply(is_train, context2_rep,
                                                       context2_mask,
                                                       *c2_lm_in)
                question_rep = self.embed_mapper.apply(is_train, question_rep,
                                                       question_mask, *q_lm_in)

        with tf.variable_scope("seq_enc"):
            question_enc = self.sequence_encoder.apply(is_train, question_rep,
                                                       question_mask)
            question_enc = tf.identity(question_enc, name='encode_question')
            tf.add_to_collection(INTERMEDIATE_LAYER_COLLECTION, question_enc)

        def encode_sentences(context, sentence_segs, sentence_mask, rep_name):
            context = self.sentences_encoder.apply(context, sentence_segs,
                                                   sentence_mask)
            if self.sentence_mapper is not None:
                with tf.variable_scope('sentence_mapper'):
                    context = self.sentence_mapper.apply(is_train,
                                                         context,
                                                         mask=sentence_mask)
            context = tf.identity(context, name=rep_name)
            tf.add_to_collection(INTERMEDIATE_LAYER_COLLECTION, context)
            return context

        with tf.variable_scope('sentences_enc'):
            context1_sent_rep = encode_sentences(context1_rep,
                                                 context1_sentence_segments,
                                                 context1_sentence_mask,
                                                 'encode_context1')
        with tf.variable_scope('sentences_enc', reuse=True):
            context2_sent_rep = encode_sentences(context2_rep,
                                                 context2_sentence_segments,
                                                 context2_sentence_mask,
                                                 'encode_context2')

        # First Iteration (same as in the single context model)
        with tf.variable_scope("context1_relevance"):
            sentence_logits = tf.matmul(context1_sent_rep,
                                        tf.expand_dims(question_enc, axis=2))
            max_logits = self.max_pool.apply(is_train, sentence_logits,
                                             context1_sentence_mask)
            with tf.variable_scope("predictor"):
                context1_pred = self.first_predictor.apply(
                    is_train, max_logits, [answer[0]] + answer[2:])

        # Question Reformulation
        with tf.variable_scope("reformulation"):
            if self.reread_mapper is not None:
                question_rep, context_rep = question_embed, context_embed
                context1_rep, _ = tf.unstack(context_rep, axis=1, num=2)
                context1_mask, _ = tf.unstack(context_mask, axis=1, num=2)
                if not isinstance(self.reread_mapper, ElmoWrapper):
                    c1_lm_in, q_lm_in = [], []
                with tf.variable_scope("reread_map_embed"):
                    context1_rep = self.reread_mapper.apply(
                        is_train, context1_rep, context1_mask, *c1_lm_in)
                with tf.variable_scope("reread_map_embed", reuse=True):
                    question_rep = self.reread_mapper.apply(
                        is_train, question_rep, question_mask, *q_lm_in)
            if self.pre_attention_mapper is not None:
                with tf.variable_scope("pre_att"):
                    question_rep = self.pre_attention_mapper.apply(
                        is_train, question_rep, question_mask)
                with tf.variable_scope("pre_att", reuse=True):
                    context1_rep = self.pre_attention_mapper.apply(
                        is_train, context1_rep, context1_mask)
            if not self.reformulate_by_context:
                if self.question_to_context_attention is not None:
                    with tf.variable_scope('q2c'):
                        context1_rep = self.question_to_context_attention.apply(
                            is_train,
                            x=context1_rep,
                            keys=question_rep,
                            memories=question_rep,
                            x_mask=context1_mask,
                            memory_mask=question_mask)
                    if self.pre_attention_mapper is not None:
                        with tf.variable_scope("pre_att", reuse=True):
                            context1_rep = self.pre_attention_mapper.apply(
                                is_train, context1_rep, context1_mask)
                with tf.variable_scope('c2q'):
                    question_rep = self.context_to_question_attention.apply(
                        is_train,
                        x=question_rep,
                        keys=context1_rep,
                        memories=context1_rep,
                        x_mask=question_mask,
                        memory_mask=context1_mask)
                reformulated_q = self.sequence_encoder.apply(
                    is_train, question_rep, question_mask)
            else:
                if self.context_to_question_attention is not None:
                    with tf.variable_scope('c2q'):
                        question_rep = self.context_to_question_attention.apply(
                            is_train,
                            x=question_rep,
                            keys=context1_rep,
                            memories=context1_rep,
                            x_mask=question_mask,
                            memory_mask=context1_mask)
                    if self.pre_attention_mapper is not None:
                        with tf.variable_scope("pre_att", reuse=True):
                            question_rep = self.pre_attention_mapper.apply(
                                is_train, question_rep, question_mask)
                with tf.variable_scope('q2c'):
                    context1_rep = self.question_to_context_attention.apply(
                        is_train,
                        x=context1_rep,
                        keys=question_rep,
                        memories=question_rep,
                        x_mask=context1_mask,
                        memory_mask=question_mask)
                reformulated_q = self.sequence_encoder.apply(
                    is_train, context1_rep, context1_mask)
            reformulated_q = tf.identity(reformulated_q,
                                         name='reformulated_question')
            tf.add_to_collection(INTERMEDIATE_LAYER_COLLECTION, reformulated_q)

        # Second Iteration
        with tf.variable_scope("context2_relevance"):
            sentence_logits = tf.matmul(context2_sent_rep,
                                        tf.expand_dims(reformulated_q, axis=2))
            max_logits = self.max_pool.apply(is_train, sentence_logits,
                                             context2_sentence_mask)
            with tf.variable_scope("predictor"):
                context2_pred = self.second_predictor.apply(
                    is_train, max_logits, [answer[1]] + answer[2:])

        return MultipleBinaryPredictions([context1_pred, context2_pred])
Example #17
0
class SingleContextMaxSentenceModel(MultipleContextModel):
    """
    Model for a question and a single paragraph which takes into account the sentences.
    This model first creates an encoding for each sentence, and then performs a fully connected layer on the
        encodings to get each sentence's prediction. It then gets the maximum value and predicts with it.
    """

    def __init__(self,
                 encoder: QuestionsAndParagraphsEncoder,
                 word_embed: Optional[WordEmbedder],
                 char_embed: Optional[CharWordEmbedder],
                 embed_mapper: Optional[Union[SequenceMapper, ElmoWrapper]],
                 sequence_encoder: SequenceEncoder,
                 sentences_encoder: SentencesEncoder,
                 merger: FixedMergeLayer,
                 post_merger: Optional[Mapper],
                 max_batch_size: Optional[int] = None,
                 elmo_model: Optional[LanguageModel] = None
                 ):
        super().__init__(encoder=encoder, word_embed=word_embed, char_embed=char_embed, max_batch_size=max_batch_size,
                         elmo_model=elmo_model)
        self.embed_mapper = embed_mapper
        self.sequence_encoder = sequence_encoder
        self.sentences_encoder = sentences_encoder
        self.merger = merger
        self.post_merger = post_merger
        self.predictor = BinaryNullPredictor()
        self.max_pool = MaxPool(map_layer=None, min_val=VERY_NEGATIVE_NUMBER, regular_reshape=True)
        self.mean_pool = MeanPool()

    def _get_predictions_for(self,
                             is_train,
                             question_embed, question_mask,
                             context_embed, context_mask,
                             answer,
                             question_lm, context_lm, sentence_segments, sentence_mask):
        question_rep, context_rep = question_embed, context_embed
        context1_rep, = tf.unstack(context_rep, axis=1, num=1)
        context1_mask, = tf.unstack(context_mask, axis=1, num=1)
        sentence_segments, = tf.unstack(sentence_segments, axis=1, num=1)
        sentence_mask, = tf.unstack(sentence_mask, axis=1, num=1)
        q_lm_in, c1_lm_in = [], []
        if self.use_elmo:
            context1_lm, = tf.unstack(context_lm, axis=1, num=1)
            q_lm_in = [question_lm]
            c1_lm_in = [context1_lm]
        if self.embed_mapper is not None:
            with tf.variable_scope("map_embed"):
                context1_rep = self.embed_mapper.apply(is_train, context1_rep, context1_mask, *c1_lm_in)
            with tf.variable_scope("map_embed", reuse=True):
                question_rep = self.embed_mapper.apply(is_train, question_rep, question_mask, *q_lm_in)

        with tf.variable_scope("seq_enc"):
            question_rep = self.sequence_encoder.apply(is_train, question_rep, question_mask)

        with tf.variable_scope("sentences_enc"):
            context1_rep = self.sentences_encoder.apply(context1_rep, sentence_segments, sentence_mask)
            context1_rep = tf.identity(context1_rep, name='encode_context')
            tf.add_to_collection(INTERMEDIATE_LAYER_COLLECTION, context1_rep)

        with tf.variable_scope("merger"):
            merged_rep = self.merger.apply(is_train, tensor=context1_rep, fixed_tensor=question_rep, mask=sentence_mask)

        if self.post_merger is not None:
            with tf.variable_scope("post_merger"):
                merged_rep = self.post_merger.apply(is_train, merged_rep, mask=sentence_mask)

        with tf.variable_scope("sentence_level_predictions"):
            sentences_logits = fully_connected(merged_rep, 1,
                                               use_bias=True,
                                               activation=None,
                                               kernel_initializer=get_keras_initialization('glorot_uniform'))
            max_logits = self.max_pool.apply(is_train, sentences_logits, sentence_mask)

        with tf.variable_scope("predictor"):
            return self.predictor.apply(is_train, max_logits, answer)

    def __setstate__(self, state):
        if "post_merger" not in state:
            state["post_merger"] = None
        super().__setstate__(state)