def get_model_with_yes_no(rnn_dim: int, use_elmo, keep_rate=0.8): recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) embed_mapper = SequenceMapperSeq( VariationalDropoutLayer(keep_rate), recurrent_layer, VariationalDropoutLayer(keep_rate), ) elmo_model = None if use_elmo: print("Using Elmo!") elmo_model = get_hotpot_elmo() lm_reduce = MapperSeq( ElmoLayer(0, layer_norm=False, top_layer_only=False), DropoutLayer(0.5), ) embed_mapper = ElmoWrapper(input_append=True, output_append=False, rnn_layer=embed_mapper, lm_reduce=lm_reduce) answer_encoder = GroupedSpanAnswerEncoderWithYesNo(group=True) predictor = BoundsPredictor( ChainBiMapper( first_layer=recurrent_layer, second_layer=recurrent_layer ), span_predictor=IndependentBoundsGroupedWithYesNo() ) return AttentionQAWithYesNo( encoder=QuestionsAndParagraphsEncoder(answer_encoder, use_sentence_segments=False), word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder( LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True ), elmo_model=elmo_model, embed_mapper=embed_mapper, question_mapper=None, context_mapper=None, memory_builder=NullBiMapper(), attention=BiAttention(TriLinear(bias=True), True), match_encoder=SequenceMapperSeq(FullyConnected(rnn_dim * 2, activation="relu"), ResidualLayer(SequenceMapperSeq( VariationalDropoutLayer(keep_rate), recurrent_layer, VariationalDropoutLayer(keep_rate), StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct()), FullyConnected(rnn_dim * 2, activation="relu"), )), VariationalDropoutLayer(keep_rate)), predictor=predictor, yes_no_question_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True), yes_no_context_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True) )
def get_res_fc_seq_fc(model_rnn_dim, rnn: bool, self_att: bool): seq_mapper = [] if not rnn and not self_att: raise NotImplementedError() if rnn: seq_mapper.extend([VariationalDropoutLayer(0.8), CudnnGru(model_rnn_dim, w_init=TruncatedNormal(stddev=0.05))]) if self_att: seq_mapper.extend([VariationalDropoutLayer(0.8), StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct())]) seq_mapper.append(FullyConnected(model_rnn_dim * 2, activation="relu")) return SequenceMapperSeq(FullyConnected(model_rnn_dim * 2, activation="relu"), ResidualLayer(SequenceMapperSeq( *seq_mapper )))
def get_contexts_to_question_model(rnn_dim, post_merge): recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) answer_encoder = BinaryAnswerEncoder() if post_merge == 'res_rnn_self_att': post_map_layer = SequenceMapperSeq( FullyConnected(rnn_dim * 2, activation="relu"), ResidualLayer( SequenceMapperSeq( VariationalDropoutLayer(0.8), recurrent_layer, VariationalDropoutLayer(0.8), StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct()), FullyConnected(rnn_dim * 2, activation="relu"), ))) elif post_merge == 'res_rnn': post_map_layer = SequenceMapperSeq( FullyConnected(rnn_dim * 2, activation="relu"), ResidualLayer( SequenceMapperSeq( VariationalDropoutLayer(0.8), recurrent_layer, FullyConnected(rnn_dim * 2, activation="relu"), ))) elif post_merge == 'res_self_att': post_map_layer = SequenceMapperSeq( FullyConnected(rnn_dim * 2, activation="relu"), ResidualLayer( SequenceMapperSeq( VariationalDropoutLayer(0.8), StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct()), FullyConnected(rnn_dim * 2, activation="relu"), ))) else: raise NotImplementedError() return ContextsToQuestionModel( encoder=QuestionsAndParagraphsEncoder(answer_encoder), word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder(LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True), embed_mapper=SequenceMapperSeq( VariationalDropoutLayer(0.8), recurrent_layer, VariationalDropoutLayer(0.8), ), attention_merger=MaxMerge(pre_map_layer=None, post_map_layer=post_map_layer), context_to_question_attention=BiAttention(TriLinear(bias=True), True), sequence_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True), predictor=BinaryFixedPredictor())