def get_multi_hop_model(rnn_dim, c2c: bool, q2c: bool, res_rnn: bool, res_self_att: bool, post_merge: bool, encoder: str, merge_type: str, num_c2c_hops: int): recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) answer_encoder = BinaryAnswerEncoder() res_model = get_res_fc_seq_fc(model_rnn_dim=rnn_dim, rnn=res_rnn, self_att=res_self_att) context_to_context = \ AttentionWithPostMapper(BiAttention(TriLinear(bias=True), True), post_mapper=res_model) if c2c else None question_to_context = \ AttentionWithPostMapper(BiAttention(TriLinear(bias=True), True), post_mapper=res_model) if q2c else None if encoder == 'max': sequence_encoder = MaxPool(map_layer=None, min_val=0, regular_reshape=True) elif encoder == 'rnn': sequence_encoder = CudnnGruEncoder(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) else: raise NotImplementedError() if merge_type == 'max': attention_merger = MaxMerge( pre_map_layer=None, post_map_layer=(res_model if post_merge else None)) else: attention_merger = WeightedMerge( pre_map_layer=None, post_map_layer=(res_model if post_merge else None), weight_type=merge_type) return MultiHopContextsToQuestionModel( encoder=QuestionsAndParagraphsEncoder(answer_encoder), word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder(LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True), embed_mapper=SequenceMapperSeq( VariationalDropoutLayer(0.8), recurrent_layer, VariationalDropoutLayer(0.8), ), question_to_context_attention=question_to_context, context_to_context_attention=context_to_context, c2c_hops=num_c2c_hops, context_to_question_attention=BiAttention(TriLinear(bias=True), True), attention_merger=attention_merger, sequence_encoder=sequence_encoder, predictor=BinaryFixedPredictor())
def get_model(rnn_dim): recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) answer_encoder = BinaryAnswerEncoder() return ContextPairRelevanceModel( encoder=QuestionsAndParagraphsEncoder(answer_encoder), word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder(LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True), embed_mapper=SequenceMapperSeq( VariationalDropoutLayer(0.8), recurrent_layer, # VariationalDropoutLayer(0.8), # fixme probably doesn't belong here ), question_to_context_attention=None, context_to_context_attention=None, context_to_question_attention=None, sequence_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True), merger=MergeTwoContextsConcatQuestion(), predictor=BinaryFixedPredictor())
def get_reread_model(rnn_dim, use_elmo, encoder_keep_rate=0.8, reread_keep_rate=0.8, two_phase_att=False, res_rnn=True, res_self_att=False, multiply_iteration_probs=False, reformulate_by_context=False, rank_first=False, rank_second=False, reread_rnn_dim=None, first_rank_lambda=1.0, second_rank_lambda=1.0, ranking_gamma=1.0): recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) answer_encoder = IterativeAnswerEncoder(group=rank_first or rank_second) embed_mapper = SequenceMapperSeq(VariationalDropoutLayer(encoder_keep_rate), recurrent_layer) if res_rnn or res_self_att: res_model = get_res_fc_seq_fc(model_rnn_dim=rnn_dim, rnn=res_rnn, self_att=res_self_att, keep_rate=reread_keep_rate) else: res_model = FullyConnected(rnn_dim * 2, activation="relu") attention = AttentionWithPostMapper(BiAttention(TriLinear(bias=True), True), post_mapper=res_model) use_c2q = two_phase_att or not reformulate_by_context use_q2c = two_phase_att or reformulate_by_context elmo_model = None if use_elmo: print("Using Elmo!") elmo_model = get_hotpot_elmo() lm_reduce = MapperSeq( ElmoLayer(0, layer_norm=False, top_layer_only=False), DropoutLayer(0.5), ) embed_mapper = ElmoWrapper(input_append=True, output_append=False, rnn_layer=embed_mapper, lm_reduce=lm_reduce) return IterativeContextReReadModel( encoder=QuestionsAndParagraphsEncoder(answer_encoder, use_sentence_segments=True, paragraph_as_sentence=True), word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder( LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True ), elmo_model=elmo_model, embed_mapper=embed_mapper, sequence_encoder=MaxPool(map_layer=None, min_val=VERY_NEGATIVE_NUMBER, regular_reshape=True), sentences_encoder=SentenceMaxEncoder(), sentence_mapper=None, post_merger=None, merger=WithConcatOptions(sub=False, hadamard=True, dot=True, raw=True), reread_mapper=None if reread_rnn_dim is None else CudnnGru(reread_rnn_dim, w_init=TruncatedNormal(stddev=0.05)), pre_attention_mapper=None, # VariationalDropoutLayer(reread_keep_rate), context_to_question_attention=attention if use_c2q else None, question_to_context_attention=attention if use_q2c else None, reformulate_by_context=reformulate_by_context, multiply_iteration_probs=multiply_iteration_probs, first_predictor=BinaryNullPredictor(rank_first, ranking_lambda=first_rank_lambda, gamma=ranking_gamma), second_predictor=BinaryNullPredictor(rank_second, ranking_lambda=second_rank_lambda, gamma=ranking_gamma), max_batch_size=512 )
def get_model_with_yes_no(rnn_dim: int, use_elmo, keep_rate=0.8): recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) embed_mapper = SequenceMapperSeq( VariationalDropoutLayer(keep_rate), recurrent_layer, VariationalDropoutLayer(keep_rate), ) elmo_model = None if use_elmo: print("Using Elmo!") elmo_model = get_hotpot_elmo() lm_reduce = MapperSeq( ElmoLayer(0, layer_norm=False, top_layer_only=False), DropoutLayer(0.5), ) embed_mapper = ElmoWrapper(input_append=True, output_append=False, rnn_layer=embed_mapper, lm_reduce=lm_reduce) answer_encoder = GroupedSpanAnswerEncoderWithYesNo(group=True) predictor = BoundsPredictor( ChainBiMapper( first_layer=recurrent_layer, second_layer=recurrent_layer ), span_predictor=IndependentBoundsGroupedWithYesNo() ) return AttentionQAWithYesNo( encoder=QuestionsAndParagraphsEncoder(answer_encoder, use_sentence_segments=False), word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder( LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True ), elmo_model=elmo_model, embed_mapper=embed_mapper, question_mapper=None, context_mapper=None, memory_builder=NullBiMapper(), attention=BiAttention(TriLinear(bias=True), True), match_encoder=SequenceMapperSeq(FullyConnected(rnn_dim * 2, activation="relu"), ResidualLayer(SequenceMapperSeq( VariationalDropoutLayer(keep_rate), recurrent_layer, VariationalDropoutLayer(keep_rate), StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct()), FullyConnected(rnn_dim * 2, activation="relu"), )), VariationalDropoutLayer(keep_rate)), predictor=predictor, yes_no_question_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True), yes_no_context_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True) )
def get_res_fc_seq_fc(model_rnn_dim, rnn: bool, self_att: bool, keep_rate=0.8): seq_mapper = [] if not rnn and not self_att: raise NotImplementedError() if rnn: seq_mapper.extend([VariationalDropoutLayer(keep_rate), CudnnGru(model_rnn_dim, w_init=TruncatedNormal(stddev=0.05))]) if self_att: seq_mapper.extend([VariationalDropoutLayer(keep_rate), StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct())]) seq_mapper.append(FullyConnected(model_rnn_dim * 2, activation="relu")) return SequenceMapperSeq(FullyConnected(model_rnn_dim * 2, activation="relu"), ResidualLayer(SequenceMapperSeq( *seq_mapper )))
def _apply_transposed(self, is_train, x, initial_states=None): w_init = TruncatedNormal(stddev=0.05) x_size = x.shape.as_list()[-1] if x_size is None: raise ValueError("Last dimension must be defined (have shape %s)" % str(x.shape)) cell = cudnn_rnn_ops.CudnnLSTM(1, self.n_out, x_size, input_mode="linear_input") # We need to know the mapping of weights/baises -> CudnnLSTM parameter, so just # build a `CudnnLSTM` and read its fields c = cudnn_layers.CudnnLSTM(1, self.n_out) c._input_size = x.shape.as_list()[-1] w_shapes = c.canonical_weight_shapes b_shapes = c.canonical_bias_shapes weights = [w_init(s, tf.float32) for s in w_shapes] biases = [tf.zeros(s, tf.float32) for s in b_shapes] biases[1] = tf.constant(self.lstm_bias / 2.0, tf.float32, b_shapes[1]) biases[5] = tf.constant(self.lstm_bias / 2.0, tf.float32, b_shapes[5]) opaque_params_t = cell.canonical_to_params(weights, biases) parameters = tf.get_variable("opaque_kernel", initializer=opaque_params_t, validate_shape=False) p = 1.0 - self.dropout if is_train and self.dropout > 0: mult_bias = [tf.ones_like(x) for x in biases] mult_w = [tf.ones_like(x) for x in weights] bias_mask = tf.floor(tf.random_uniform( (self.n_out, ), p, 1 + p)) / p for j in range(4, 8): mult_w[j] *= tf.expand_dims(bias_mask, 0) mult_mask = cell.canonical_to_params(mult_w, mult_bias) parameters = parameters * mult_mask initial_state_h, initial_state_c = initial_states out = cell(x, initial_state_h, initial_state_c, parameters, True)[0] return out
def get_reread_merge_model(rnn_dim, use_elmo, keep_rate=0.8, res_rnn=True, res_self_att=False, multiply_iteration_probs=False): recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) answer_encoder = IterativeAnswerEncoder() embed_mapper = SequenceMapperSeq(VariationalDropoutLayer(keep_rate), recurrent_layer) if res_rnn or res_self_att: res_model = get_res_fc_seq_fc(model_rnn_dim=rnn_dim, rnn=res_rnn, self_att=res_self_att) else: res_model = FullyConnected(rnn_dim * 2, activation="relu") attention = AttentionWithPostMapper(BiAttention(TriLinear(bias=True), True), post_mapper=res_model) elmo_model = None if use_elmo: print("Using Elmo!") elmo_model = get_hotpot_elmo() lm_reduce = MapperSeq( ElmoLayer(0, layer_norm=False, top_layer_only=False), DropoutLayer(0.5), ) embed_mapper = ElmoWrapper(input_append=True, output_append=False, rnn_layer=embed_mapper, lm_reduce=lm_reduce) return IterativeContextReReadMergeModel( encoder=QuestionsAndParagraphsEncoder(answer_encoder, use_sentence_segments=True), word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder( LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True ), elmo_model=elmo_model, embed_mapper=embed_mapper, sequence_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True), sentences_encoder=SentenceMaxEncoder(), sentence_mapper=None, post_merger=None, merger=WithConcatOptions(sub=False, hadamard=True, dot=False, raw=True), context_to_question_attention=attention, question_to_context_attention=attention, reread_merger=ConcatWithProduct(), multiply_iteration_probs=multiply_iteration_probs, max_batch_size=128 )
def get_model(rnn_dim, use_elmo, keep_rate=0.8): recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) answer_encoder = IterativeAnswerEncoder() embed_mapper = SequenceMapperSeq(VariationalDropoutLayer(keep_rate), recurrent_layer) elmo_model = None if use_elmo: print("Using Elmo!") elmo_model = get_hotpot_elmo() lm_reduce = MapperSeq( ElmoLayer(0, layer_norm=False, top_layer_only=False), DropoutLayer(0.5), ) embed_mapper = ElmoWrapper(input_append=True, output_append=False, rnn_layer=embed_mapper, lm_reduce=lm_reduce) reformulation = ProjectMapEncodeReformulation(project_layer=None, sequence_mapper=None, encoder=CudnnGruEncoder(rnn_dim, w_init=TruncatedNormal(stddev=0.05))) # reformulation = WeightedSumThenProjectReformulation(rnn_dim*2, activation='relu') # reformulation = ProjectThenWeightedSumReformulation(rnn_dim*2, activation='relu') return IterativeContextMaxSentenceModel( encoder=QuestionsAndParagraphsEncoder(answer_encoder, use_sentence_segments=True), word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder( LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True ), elmo_model=elmo_model, embed_mapper=embed_mapper, sequence_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True), sentences_encoder=SentenceMaxEncoder(), sentence_mapper=recurrent_layer, post_merger=None, merger=WithConcatOptions(sub=False, hadamard=True, dot=True, raw=True), reformulation_layer=reformulation, max_batch_size=128 )
def get_contexts_to_question_model(rnn_dim, post_merge): recurrent_layer = CudnnGru(rnn_dim, w_init=TruncatedNormal(stddev=0.05)) answer_encoder = BinaryAnswerEncoder() if post_merge == 'res_rnn_self_att': post_map_layer = SequenceMapperSeq( FullyConnected(rnn_dim * 2, activation="relu"), ResidualLayer( SequenceMapperSeq( VariationalDropoutLayer(0.8), recurrent_layer, VariationalDropoutLayer(0.8), StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct()), FullyConnected(rnn_dim * 2, activation="relu"), ))) elif post_merge == 'res_rnn': post_map_layer = SequenceMapperSeq( FullyConnected(rnn_dim * 2, activation="relu"), ResidualLayer( SequenceMapperSeq( VariationalDropoutLayer(0.8), recurrent_layer, FullyConnected(rnn_dim * 2, activation="relu"), ))) elif post_merge == 'res_self_att': post_map_layer = SequenceMapperSeq( FullyConnected(rnn_dim * 2, activation="relu"), ResidualLayer( SequenceMapperSeq( VariationalDropoutLayer(0.8), StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct()), FullyConnected(rnn_dim * 2, activation="relu"), ))) else: raise NotImplementedError() return ContextsToQuestionModel( encoder=QuestionsAndParagraphsEncoder(answer_encoder), word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder(LearnedCharEmbedder(word_size_th=14, char_th=50, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True), embed_mapper=SequenceMapperSeq( VariationalDropoutLayer(0.8), recurrent_layer, VariationalDropoutLayer(0.8), ), attention_merger=MaxMerge(pre_map_layer=None, post_map_layer=post_map_layer), context_to_question_attention=BiAttention(TriLinear(bias=True), True), sequence_encoder=MaxPool(map_layer=None, min_val=0, regular_reshape=True), predictor=BinaryFixedPredictor())