Ejemplo n.º 1
0
def get_model(char_th: int, dim: int, mode: str, preprocess: Optional[TextPreprocessor]):
    recurrent_layer = CudnnGru(dim, w_init=TruncatedNormal(stddev=0.05))
    #pdb.set_trace()
    if mode.startswith("shared-norm"):
        answer_encoder = GroupedSpanAnswerEncoder()
        predictor = BoundsPredictor(
            ChainBiMapper(
                first_layer=recurrent_layer,
                second_layer=recurrent_layer
            ),
            span_predictor=IndependentBoundsGrouped(aggregate="sum")
        )
    elif mode == "confidence":
        answer_encoder = DenseMultiSpanAnswerEncoder()
        predictor = ConfidencePredictor(
            ChainBiMapper(
                first_layer=recurrent_layer,
                second_layer=recurrent_layer,
            ),
            AttentionEncoder(),
            FullyConnected(80, activation="tanh"),
            aggregate="sum"
        )
    elif mode == "sigmoid":
        answer_encoder = DenseMultiSpanAnswerEncoder()
        predictor = BoundsPredictor(
            ChainBiMapper(
                first_layer=recurrent_layer,
                second_layer=recurrent_layer
            ),
            span_predictor=IndependentBoundsSigmoidLoss()
        )
    elif mode == "paragraph" or mode == "merge":
        answer_encoder = MultiChoiceAnswerEncoder()
        predictor = MultiChoicePredictor(4)
    else:
        raise NotImplementedError(mode)

    return Attention(
        encoder=DocumentAndQuestionEncoder(answer_encoder),
        word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True),
        char_embed=CharWordEmbedder(
            LearnedCharEmbedder(word_size_th=14, char_th=char_th, char_dim=20, init_scale=0.05, force_cpu=True),
            MaxPool(Conv1d(100, 5, 0.8)),
            shared_parameters=True
        ),
        preprocess=preprocess,
        word_embed_layer=None,
        embed_mapper=SequenceMapperSeq(
            VariationalDropoutLayer(0.8),
            recurrent_layer,
            VariationalDropoutLayer(0.8),
        ),
        question_mapper=None,
        context_mapper=None,
        memory_builder=NullBiMapper(),
        attention=BiAttention(TriLinear(bias=True), True),
        match_encoder=SequenceMapperSeq(FullyConnected(dim * 2, activation="relu"),
                                        ResidualLayer(SequenceMapperSeq(
                                            VariationalDropoutLayer(0.8),
                                            recurrent_layer,
                                            VariationalDropoutLayer(0.8),
                                            StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct()),
                                            FullyConnected(dim * 2, activation="relu"),
                                        )),
                                        VariationalDropoutLayer(0.8)),
        #templayer = BiLinear(bias = True),
        predictor=predictor
    )
Ejemplo n.º 2
0
def build_model(preprocess: Optional[TextPreprocessor], train_config, use_cudnn=False):
    if use_cudnn:
        print('Using Cuddn:')
        recurrent_layer = CudnnGru(train_config.dim, w_init=TruncatedNormal(stddev=train_config.recurrent_stdev))
    else:
        recurrent_layer = BiRecurrentMapper(CompatGruCellSpec(train_config.dim))

    lm_reduce = MapperSeq(
        ElmoLayer(
            train_config.l2,
            layer_norm=train_config.lm_layernorm,
            top_layer_only=train_config.top_layer_only
        ),
        DropoutLayer(train_config.elmo_dropout),
    )

    answer_encoder = GroupedSpanAnswerEncoder()
    predictor = BoundsPredictor(
        ChainBiMapper(
            first_layer=recurrent_layer,
            second_layer=recurrent_layer
        ),
        span_predictor=IndependentBoundsGrouped(aggregate="sum")
    )
    word_embed = FixedWordEmbedder(
        vec_name=train_config.word_vectors,
        word_vec_init_scale=0,
        learn_unk=train_config.learn_unk_vector,
        cpu=True
    )
    char_embed = CharWordEmbedder(
        LearnedCharEmbedder(
            word_size_th=14,
            char_th=train_config.char_th,
            char_dim=train_config.char_dim,
            init_scale=0.05,
            force_cpu=True
        ),
        MaxPool(Conv1d(100, 5, 0.8)),
        shared_parameters=True
    )
    embed_mapper = SequenceMapperSeq(
        VariationalDropoutLayer(train_config.var_dropout),
        recurrent_layer,
        VariationalDropoutLayer(train_config.var_dropout)
    )
    attention = BiAttention(TriLinear(bias=True), True)
    match_encoder = SequenceMapperSeq(
        FullyConnected(train_config.dim * 2, activation="relu"),
        ResidualLayer(SequenceMapperSeq(
            VariationalDropoutLayer(train_config.var_dropout),
            recurrent_layer,
            VariationalDropoutLayer(train_config.var_dropout),
            StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct()),
            FullyConnected(train_config.dim * 2, activation="relu"),
        )),
        VariationalDropoutLayer(train_config.var_dropout)
    )
    lm_model = LanguageModel(LM_VOCAB, LM_OPTIONS, LM_WEIGHTS, LM_TOKEN_WEIGHTS)
    model = CapeAttentionWithElmo(
        encoder=DocumentAndQuestionEncoder(answer_encoder),
        lm_model=lm_model,
        max_batch_size=train_config.max_batch_size,
        preprocess=preprocess,
        per_sentence=False,
        append_embed=(train_config.elmo_mode == "both" or train_config.elmo_mode == "input"),
        append_before_atten=(train_config.elmo_mode == "both" or train_config.elmo_mode == "output"),
        word_embed=word_embed,
        char_embed=char_embed,
        embed_mapper=embed_mapper,
        lm_reduce=None,
        lm_reduce_shared=lm_reduce,
        memory_builder=NullBiMapper(),
        attention=attention,
        match_encoder=match_encoder,
        predictor=predictor
    )
    return model