Beispiel #1
0
def build_model(nr_class, width, depth, conv_depth, **kwargs):
    with Model.define_operators({"|": concatenate, ">>": chain, "**": clone}):
        embed = (HashEmbed(width, 5000, column=1)
                 | StaticVectors("spacy_pretrained_vectors", width, column=5)
                 | HashEmbed(width // 2, 750, column=2)
                 | HashEmbed(width // 2, 750, column=3)
                 | HashEmbed(width // 2, 750, column=4)) >> LN(Maxout(width))

        sent2vec = (flatten_add_lengths >> with_getitem(
            0,
            embed >> Residual(ExtractWindow(nW=1) >> LN(Maxout(width)))**
            conv_depth,
        ) >> ParametricAttention(width) >> Pooling(sum_pool) >> Residual(
            LN(Maxout(width)))**depth)

        model = (
            foreach(sent2vec, drop_factor=2.0) >> flatten_add_lengths
            # This block would allow the model to learn some cross-sentence
            # features. It's not useful on this problem. It might make more
            # sense to use a BiLSTM here, following Liang et al (2016).
            # >> with_getitem(0,
            #    Residual(ExtractWindow(nW=1) >> LN(Maxout(width))) ** conv_depth
            # )
            >> ParametricAttention(width, hard=False) >> Pooling(sum_pool) >>
            Residual(LN(Maxout(width)))**depth >> Softmax(nr_class))
    model.lsuv = False
    return model
Beispiel #2
0
def build_text_classifier(nr_class, width=64, **cfg):
    nr_vector = cfg.get('nr_vector', 5000)
    pretrained_dims = cfg.get('pretrained_dims', 0)
    with Model.define_operators({
            '>>': chain,
            '+': add,
            '|': concatenate,
            '**': clone
    }):
        if cfg.get('low_data') and pretrained_dims:
            model = (SpacyVectors >> flatten_add_lengths >> with_getitem(
                0, Affine(width, pretrained_dims)) >>
                     ParametricAttention(width) >> Pooling(sum_pool) >>
                     Residual(ReLu(width, width))**2 >> zero_init(
                         Affine(nr_class, width, drop_factor=0.0)) >> logistic)
            return model

        lower = HashEmbed(width, nr_vector, column=1)
        prefix = HashEmbed(width // 2, nr_vector, column=2)
        suffix = HashEmbed(width // 2, nr_vector, column=3)
        shape = HashEmbed(width // 2, nr_vector, column=4)

        trained_vectors = (FeatureExtracter(
            [ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]) >> with_flatten(
                uniqued((lower | prefix | suffix | shape) >> LN(
                    Maxout(width, width + (width // 2) * 3)),
                        column=0)))

        if pretrained_dims:
            static_vectors = (
                SpacyVectors >> with_flatten(Affine(width, pretrained_dims)))
            # TODO Make concatenate support lists
            vectors = concatenate_lists(trained_vectors, static_vectors)
            vectors_width = width * 2
        else:
            vectors = trained_vectors
            vectors_width = width
            static_vectors = None
        cnn_model = (
            vectors >> with_flatten(
                LN(Maxout(width, vectors_width)) >> Residual(
                    (ExtractWindow(nW=1) >> LN(Maxout(width, width * 3))))**2,
                pad=2) >> flatten_add_lengths >> ParametricAttention(width) >>
            Pooling(sum_pool) >> Residual(zero_init(Maxout(width, width))) >>
            zero_init(Affine(nr_class, width, drop_factor=0.0)))

        linear_model = (
            _preprocess_doc >> LinearModel(nr_class, drop_factor=0.))

        model = ((linear_model | cnn_model) >> zero_init(
            Affine(nr_class, nr_class * 2, drop_factor=0.0)) >> logistic)
    model.nO = nr_class
    model.lsuv = False
    return model
Beispiel #3
0
def build_model(nr_class, width, depth, conv_depth, **kwargs):
    with Model.define_operators({'|': concatenate, '>>': chain, '**': clone}):
        embed = ((HashEmbed(width, 5000, column=1)
                  | StaticVectors('spacy_pretrained_vectors', width, column=5)
                  | HashEmbed(width // 2, 750, column=2)
                  | HashEmbed(width // 2, 750, column=3)
                  | HashEmbed(width // 2, 750, column=4)) >> LN(Maxout(width)))

        sent2vec = (flatten_add_lengths >> with_getitem(
            0, embed >> Residual(ExtractWindow(nW=1) >> LN(Maxout(width)))**
            conv_depth) >> ParametricAttention(width) >> Pooling(sum_pool) >>
                    Residual(LN(Maxout(width)))**depth)

        model = (foreach(sent2vec, drop_factor=2.0) >> flatten_add_lengths >>
                 ParametricAttention(width, hard=False) >> Pooling(sum_pool) >>
                 Residual(LN(Maxout(width)))**depth >> Softmax(nr_class))
    model.lsuv = False
    return model
Beispiel #4
0
def build_model(nr_class, width, depth, conv_depth, vectors_name, **kwargs):
    with Model.define_operators({"|": concatenate, ">>": chain, "**": clone}):
        embed = (HashEmbed(width, 5000, column=1)
                 | StaticVectors(vectors_name, width, column=5)
                 | HashEmbed(width // 2, 750, column=2)
                 | HashEmbed(width // 2, 750, column=3)
                 | HashEmbed(width // 2, 750, column=4)) >> LN(Maxout(width))

        sent2vec = (with_flatten(embed) >> Residual(
            prepare_self_attention(Affine(width * 3, width), nM=width, nH=4) >>
            MultiHeadedAttention() >> with_flatten(
                Maxout(width, width, pieces=3))) >> flatten_add_lengths >>
                    ParametricAttention(width, hard=False) >>
                    Pooling(mean_pool) >> Residual(LN(Maxout(width))))

        model = (foreach(sent2vec, drop_factor=2.0) >> Residual(
            prepare_self_attention(Affine(width * 3, width), nM=width, nH=4) >>
            MultiHeadedAttention() >> with_flatten(LN(Affine(width, width))))
                 >> flatten_add_lengths >> ParametricAttention(
                     width, hard=False) >> Pooling(mean_pool) >> Residual(
                         LN(Maxout(width)))**2 >> Softmax(nr_class))
    model.lsuv = False
    return model
Beispiel #5
0
def build_text_classifier(nr_class, width=64, **cfg):
    depth = cfg.get("depth", 2)
    nr_vector = cfg.get("nr_vector", 5000)
    pretrained_dims = cfg.get("pretrained_dims", 0)
    with Model.define_operators({
            ">>": chain,
            "+": add,
            "|": concatenate,
            "**": clone
    }):
        if cfg.get("low_data") and pretrained_dims:
            model = (SpacyVectors >> flatten_add_lengths >> with_getitem(
                0, Affine(width, pretrained_dims)) >>
                     ParametricAttention(width) >> Pooling(sum_pool) >>
                     Residual(ReLu(width, width))**2 >> zero_init(
                         Affine(nr_class, width, drop_factor=0.0)) >> logistic)
            return model

        lower = HashEmbed(width, nr_vector, column=1)
        prefix = HashEmbed(width // 2, nr_vector, column=2)
        suffix = HashEmbed(width // 2, nr_vector, column=3)
        shape = HashEmbed(width // 2, nr_vector, column=4)

        trained_vectors = FeatureExtracter(
            [ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]) >> with_flatten(
                uniqued(
                    (lower | prefix | suffix | shape) >> LN(
                        Maxout(width, width + (width // 2) * 3)),
                    column=0,
                ))

        if pretrained_dims:
            static_vectors = SpacyVectors >> with_flatten(
                Affine(width, pretrained_dims))
            # TODO Make concatenate support lists
            vectors = concatenate_lists(trained_vectors, static_vectors)
            vectors_width = width * 2
        else:
            vectors = trained_vectors
            vectors_width = width
            static_vectors = None
        tok2vec = vectors >> with_flatten(
            LN(Maxout(width, vectors_width)) >> Residual(
                (ExtractWindow(nW=1) >> LN(Maxout(width, width * 3))))**depth,
            pad=depth,
        )
        cnn_model = (
            tok2vec >> flatten_add_lengths >> ParametricAttention(width) >>
            Pooling(sum_pool) >> Residual(zero_init(Maxout(width, width))) >>
            zero_init(Affine(nr_class, width, drop_factor=0.0)))

        linear_model = build_bow_text_classifier(nr_class,
                                                 ngram_size=cfg.get(
                                                     "ngram_size", 1),
                                                 exclusive_classes=False)
        if cfg.get("exclusive_classes"):
            output_layer = Softmax(nr_class, nr_class * 2)
        else:
            output_layer = (zero_init(
                Affine(nr_class, nr_class * 2, drop_factor=0.0)) >> logistic)
        model = (linear_model | cnn_model) >> output_layer
        model.tok2vec = chain(tok2vec, flatten)
    model.nO = nr_class
    model.lsuv = False
    return model