def get_sentences_model(rnn_dim, use_elmo, keep_rate=0.8): recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) answer_encoder = BinaryAnswerEncoder() embed_mapper = SequenceMapperSeq(VariationalDropoutLayer(keep_rate), recurrent_layer) elmo_model = None if use_elmo: print("Using Elmo!") elmo_model = get_wiki_elmo() lm_reduce = MapperSeq( ElmoLayer(0, layer_norm=False, top_layer_only=False), DropoutLayer(0.5), ) embed_mapper = ElmoWrapper(input_append=True, output_append=False, rnn_layer=embed_mapper, lm_reduce=lm_reduce) return SingleContextMaxSentenceModel( encoder=QuestionsAndParagraphsEncoder(answer_encoder, use_sentence_segments=True, paragraph_as_sentence=True), word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder( LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True ), elmo_model=elmo_model, embed_mapper=embed_mapper, sequence_encoder=MaxPool(map_layer=None, min_val=VERY_NEGATIVE_NUMBER, regular_reshape=True), sentences_encoder=SentenceMaxEncoder(), post_merger=None, merger=WithConcatOptions(sub=False, hadamard=True, dot=True, raw=True), max_batch_size=256 )
def get_reread_model(rnn_dim, use_elmo, encoder_keep_rate=0.8, reread_keep_rate=0.8, two_phase_att=False, res_rnn=True, res_self_att=False, multiply_iteration_probs=False, reformulate_by_context=False, rank_first=False, rank_second=False, reread_rnn_dim=None, first_rank_lambda=1.0, second_rank_lambda=1.0, ranking_gamma=1.0): recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) answer_encoder = IterativeAnswerEncoder(group=rank_first or rank_second) embed_mapper = SequenceMapperSeq(VariationalDropoutLayer(encoder_keep_rate), recurrent_layer) if res_rnn or res_self_att: res_model = get_res_fc_seq_fc(model_rnn_dim=rnn_dim, rnn=res_rnn, self_att=res_self_att, keep_rate=reread_keep_rate) else: res_model = FullyConnected(rnn_dim * 2, activation="relu") attention = AttentionWithPostMapper(BiAttention(TriLinear(bias=True), True), post_mapper=res_model) use_c2q = two_phase_att or not reformulate_by_context use_q2c = two_phase_att or reformulate_by_context elmo_model = None if use_elmo: print("Using Elmo!") elmo_model = get_hotpot_elmo() lm_reduce = MapperSeq( ElmoLayer(0, layer_norm=False, top_layer_only=False), DropoutLayer(0.5), ) embed_mapper = ElmoWrapper(input_append=True, output_append=False, rnn_layer=embed_mapper, lm_reduce=lm_reduce) return IterativeContextReReadModel( encoder=QuestionsAndParagraphsEncoder(answer_encoder, use_sentence_segments=True, paragraph_as_sentence=True), word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder( LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True ), elmo_model=elmo_model, embed_mapper=embed_mapper, sequence_encoder=MaxPool(map_layer=None, min_val=VERY_NEGATIVE_NUMBER, regular_reshape=True), sentences_encoder=SentenceMaxEncoder(), sentence_mapper=None, post_merger=None, merger=WithConcatOptions(sub=False, hadamard=True, dot=True, raw=True), reread_mapper=None if reread_rnn_dim is None else CudnnGru(reread_rnn_dim, w_init=TruncatedNormal(stddev=0.05)), pre_attention_mapper=None, # VariationalDropoutLayer(reread_keep_rate), context_to_question_attention=attention if use_c2q else None, question_to_context_attention=attention if use_q2c else None, reformulate_by_context=reformulate_by_context, multiply_iteration_probs=multiply_iteration_probs, first_predictor=BinaryNullPredictor(rank_first, ranking_lambda=first_rank_lambda, gamma=ranking_gamma), second_predictor=BinaryNullPredictor(rank_second, ranking_lambda=second_rank_lambda, gamma=ranking_gamma), max_batch_size=512 )
def get_model_with_yes_no(rnn_dim: int, use_elmo, keep_rate=0.8): recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) embed_mapper = SequenceMapperSeq( VariationalDropoutLayer(keep_rate), recurrent_layer, VariationalDropoutLayer(keep_rate), ) elmo_model = None if use_elmo: print("Using Elmo!") elmo_model = get_hotpot_elmo() lm_reduce = MapperSeq( ElmoLayer(0, layer_norm=False, top_layer_only=False), DropoutLayer(0.5), ) embed_mapper = ElmoWrapper(input_append=True, output_append=False, rnn_layer=embed_mapper, lm_reduce=lm_reduce) answer_encoder = GroupedSpanAnswerEncoderWithYesNo(group=True) predictor = BoundsPredictor( ChainBiMapper( first_layer=recurrent_layer, second_layer=recurrent_layer ), span_predictor=IndependentBoundsGroupedWithYesNo() ) return AttentionQAWithYesNo( encoder=QuestionsAndParagraphsEncoder(answer_encoder, use_sentence_segments=False), word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder( LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True ), elmo_model=elmo_model, embed_mapper=embed_mapper, question_mapper=None, context_mapper=None, memory_builder=NullBiMapper(), attention=BiAttention(TriLinear(bias=True), True), match_encoder=SequenceMapperSeq(FullyConnected(rnn_dim * 2, activation="relu"), ResidualLayer(SequenceMapperSeq( VariationalDropoutLayer(keep_rate), recurrent_layer, VariationalDropoutLayer(keep_rate), StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct()), FullyConnected(rnn_dim * 2, activation="relu"), )), VariationalDropoutLayer(keep_rate)), predictor=predictor, yes_no_question_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True), yes_no_context_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True) )
def get_reread_merge_model(rnn_dim, use_elmo, keep_rate=0.8, res_rnn=True, res_self_att=False, multiply_iteration_probs=False): recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) answer_encoder = IterativeAnswerEncoder() embed_mapper = SequenceMapperSeq(VariationalDropoutLayer(keep_rate), recurrent_layer) if res_rnn or res_self_att: res_model = get_res_fc_seq_fc(model_rnn_dim=rnn_dim, rnn=res_rnn, self_att=res_self_att) else: res_model = FullyConnected(rnn_dim * 2, activation="relu") attention = AttentionWithPostMapper(BiAttention(TriLinear(bias=True), True), post_mapper=res_model) elmo_model = None if use_elmo: print("Using Elmo!") elmo_model = get_hotpot_elmo() lm_reduce = MapperSeq( ElmoLayer(0, layer_norm=False, top_layer_only=False), DropoutLayer(0.5), ) embed_mapper = ElmoWrapper(input_append=True, output_append=False, rnn_layer=embed_mapper, lm_reduce=lm_reduce) return IterativeContextReReadMergeModel( encoder=QuestionsAndParagraphsEncoder(answer_encoder, use_sentence_segments=True), word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder( LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True ), elmo_model=elmo_model, embed_mapper=embed_mapper, sequence_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True), sentences_encoder=SentenceMaxEncoder(), sentence_mapper=None, post_merger=None, merger=WithConcatOptions(sub=False, hadamard=True, dot=False, raw=True), context_to_question_attention=attention, question_to_context_attention=attention, reread_merger=ConcatWithProduct(), multiply_iteration_probs=multiply_iteration_probs, max_batch_size=128 )
def get_model(rnn_dim, use_elmo, keep_rate=0.8): recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) answer_encoder = IterativeAnswerEncoder() embed_mapper = SequenceMapperSeq(VariationalDropoutLayer(keep_rate), recurrent_layer) elmo_model = None if use_elmo: print("Using Elmo!") elmo_model = get_hotpot_elmo() lm_reduce = MapperSeq( ElmoLayer(0, layer_norm=False, top_layer_only=False), DropoutLayer(0.5), ) embed_mapper = ElmoWrapper(input_append=True, output_append=False, rnn_layer=embed_mapper, lm_reduce=lm_reduce) reformulation = ProjectMapEncodeReformulation(project_layer=None, sequence_mapper=None, encoder=CudnnGruEncoder(rnn_dim, w_init=TruncatedNormal(stddev=0.05))) # reformulation = WeightedSumThenProjectReformulation(rnn_dim*2, activation='relu') # reformulation = ProjectThenWeightedSumReformulation(rnn_dim*2, activation='relu') return IterativeContextMaxSentenceModel( encoder=QuestionsAndParagraphsEncoder(answer_encoder, use_sentence_segments=True), word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder( LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True ), elmo_model=elmo_model, embed_mapper=embed_mapper, sequence_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True), sentences_encoder=SentenceMaxEncoder(), sentence_mapper=recurrent_layer, post_merger=None, merger=WithConcatOptions(sub=False, hadamard=True, dot=True, raw=True), reformulation_layer=reformulation, max_batch_size=128 )
def get_basic_model(rnn_dim, post_merger_params: Optional[dict] = None, use_elmo=False, keep_rate=0.8): recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) answer_encoder = BinaryAnswerEncoder() embed_mapper = SequenceMapperSeq(VariationalDropoutLayer(keep_rate), recurrent_layer) # embed_mapper = SequenceMapperSeq( # SequenceMapperSeq(VariationalDropoutLayer(keep_rate), recurrent_layer), # ResidualLayer(SequenceMapperSeq(VariationalDropoutLayer(keep_rate), recurrent_layer)), # ResidualLayer(SequenceMapperSeq(VariationalDropoutLayer(keep_rate), recurrent_layer)) # ) elmo_model = None if use_elmo: print("Using Elmo!") elmo_model = get_squad_elmo() lm_reduce = MapperSeq( ElmoLayer(0, layer_norm=False, top_layer_only=False), DropoutLayer(0.5), ) embed_mapper = ElmoWrapper(input_append=True, output_append=True, rnn_layer=embed_mapper, lm_reduce=lm_reduce) post_merger = None if post_merger_params is None else get_mlp(**post_merger_params) return BasicSingleContextAndQuestionIndependentModel( encoder=QuestionsAndParagraphsEncoder(answer_encoder), word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder( LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True ), elmo_model=elmo_model, embed_mapper=embed_mapper, sequence_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True), merger=ConcatWithProductSub(), post_merger=post_merger, predictor=BinaryFixedPredictor(sigmoid=True), max_batch_size=128 )
def get_mlp(layer_sizes: List[int], dropout=0.5, activation='relu'): layers = [] for layer_size in layer_sizes: layers.append(FullyConnected(layer_size, activation=activation)) layers.append(DropoutLayer(dropout)) return MapperSeq(*layers)