예제 #1
0
파일: imdb_cnn.py 프로젝트: niedakh/thinc
def build_model(nr_class, width, depth, conv_depth, **kwargs):
    with Model.define_operators({"|": concatenate, ">>": chain, "**": clone}):
        embed = (HashEmbed(width, 5000, column=1)
                 | StaticVectors("spacy_pretrained_vectors", width, column=5)
                 | HashEmbed(width // 2, 750, column=2)
                 | HashEmbed(width // 2, 750, column=3)
                 | HashEmbed(width // 2, 750, column=4)) >> LN(Maxout(width))

        sent2vec = (flatten_add_lengths >> with_getitem(
            0,
            embed >> Residual(ExtractWindow(nW=1) >> LN(Maxout(width)))**
            conv_depth,
        ) >> ParametricAttention(width) >> Pooling(sum_pool) >> Residual(
            LN(Maxout(width)))**depth)

        model = (
            foreach(sent2vec, drop_factor=2.0) >> flatten_add_lengths
            # This block would allow the model to learn some cross-sentence
            # features. It's not useful on this problem. It might make more
            # sense to use a BiLSTM here, following Liang et al (2016).
            # >> with_getitem(0,
            #    Residual(ExtractWindow(nW=1) >> LN(Maxout(width))) ** conv_depth
            # )
            >> ParametricAttention(width, hard=False) >> Pooling(sum_pool) >>
            Residual(LN(Maxout(width)))**depth >> Softmax(nr_class))
    model.lsuv = False
    return model
예제 #2
0
def build_text_classifier(nr_class, width=64, **cfg):
    nr_vector = cfg.get('nr_vector', 5000)
    pretrained_dims = cfg.get('pretrained_dims', 0)
    with Model.define_operators({
            '>>': chain,
            '+': add,
            '|': concatenate,
            '**': clone
    }):
        if cfg.get('low_data') and pretrained_dims:
            model = (SpacyVectors >> flatten_add_lengths >> with_getitem(
                0, Affine(width, pretrained_dims)) >>
                     ParametricAttention(width) >> Pooling(sum_pool) >>
                     Residual(ReLu(width, width))**2 >> zero_init(
                         Affine(nr_class, width, drop_factor=0.0)) >> logistic)
            return model

        lower = HashEmbed(width, nr_vector, column=1)
        prefix = HashEmbed(width // 2, nr_vector, column=2)
        suffix = HashEmbed(width // 2, nr_vector, column=3)
        shape = HashEmbed(width // 2, nr_vector, column=4)

        trained_vectors = (FeatureExtracter(
            [ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]) >> with_flatten(
                uniqued((lower | prefix | suffix | shape) >> LN(
                    Maxout(width, width + (width // 2) * 3)),
                        column=0)))

        if pretrained_dims:
            static_vectors = (
                SpacyVectors >> with_flatten(Affine(width, pretrained_dims)))
            # TODO Make concatenate support lists
            vectors = concatenate_lists(trained_vectors, static_vectors)
            vectors_width = width * 2
        else:
            vectors = trained_vectors
            vectors_width = width
            static_vectors = None
        cnn_model = (
            vectors >> with_flatten(
                LN(Maxout(width, vectors_width)) >> Residual(
                    (ExtractWindow(nW=1) >> LN(Maxout(width, width * 3))))**2,
                pad=2) >> flatten_add_lengths >> ParametricAttention(width) >>
            Pooling(sum_pool) >> Residual(zero_init(Maxout(width, width))) >>
            zero_init(Affine(nr_class, width, drop_factor=0.0)))

        linear_model = (
            _preprocess_doc >> LinearModel(nr_class, drop_factor=0.))

        model = ((linear_model | cnn_model) >> zero_init(
            Affine(nr_class, nr_class * 2, drop_factor=0.0)) >> logistic)
    model.nO = nr_class
    model.lsuv = False
    return model
예제 #3
0
파일: _ml.py 프로젝트: cs394-s20/Aqua
def build_nel_encoder(embed_width, hidden_width, ner_types, **cfg):
    if "entity_width" not in cfg:
        raise ValueError(Errors.E144.format(param="entity_width"))

    conv_depth = cfg.get("conv_depth", 2)
    cnn_maxout_pieces = cfg.get("cnn_maxout_pieces", 3)
    pretrained_vectors = cfg.get("pretrained_vectors", None)
    context_width = cfg.get("entity_width")

    with Model.define_operators({">>": chain, "**": clone}):
        # context encoder
        tok2vec = Tok2Vec(
            width=hidden_width,
            embed_size=embed_width,
            pretrained_vectors=pretrained_vectors,
            cnn_maxout_pieces=cnn_maxout_pieces,
            subword_features=True,
            conv_depth=conv_depth,
            bilstm_depth=0,
        )

        model = (
            tok2vec >> flatten_add_lengths >> Pooling(mean_pool) >> Residual(
                zero_init(Maxout(hidden_width, hidden_width))) >> zero_init(
                    Affine(context_width, hidden_width, drop_factor=0.0)))

        model.tok2vec = tok2vec
        model.nO = context_width
    return model
예제 #4
0
def build_model(nr_class, width, **kwargs):
    with Model.define_operators({'|': concatenate, '>>': chain, '**': clone}):
        model = (FeatureExtracter([ORTH]) >> flatten_add_lengths >>
                 with_getitem(0, uniqued(HashEmbed(width, 10000, column=0))) >>
                 Pooling(mean_pool) >> Softmax(nr_class))
    model.lsuv = False
    return model
예제 #5
0
def build_model(nr_class, width, depth, conv_depth, **kwargs):
    with Model.define_operators({'|': concatenate, '>>': chain, '**': clone}):
        embed = ((HashEmbed(width, 5000, column=1)
                  | StaticVectors('spacy_pretrained_vectors', width, column=5)
                  | HashEmbed(width // 2, 750, column=2)
                  | HashEmbed(width // 2, 750, column=3)
                  | HashEmbed(width // 2, 750, column=4)) >> LN(Maxout(width)))

        sent2vec = (flatten_add_lengths >> with_getitem(
            0, embed >> Residual(ExtractWindow(nW=1) >> LN(Maxout(width)))**
            conv_depth) >> ParametricAttention(width) >> Pooling(sum_pool) >>
                    Residual(LN(Maxout(width)))**depth)

        model = (foreach(sent2vec, drop_factor=2.0) >> flatten_add_lengths >>
                 ParametricAttention(width, hard=False) >> Pooling(sum_pool) >>
                 Residual(LN(Maxout(width)))**depth >> Softmax(nr_class))
    model.lsuv = False
    return model
def softmax_last_hidden(nr_class, *, exclusive_classes=True, **cfg):
    """Select features from the class-vectors from the last hidden state,
    mean-pool them, and softmax to produce one vector per document.
    The gradients of the class vectors are incremented in the backward pass,
    to allow fine-tuning.
    """
    width = cfg["token_vector_width"]
    return chain(get_pytt_last_hidden, flatten_add_lengths, Pooling(mean_pool),
                 Softmax(nr_class, width))
예제 #7
0
def sigmoid_last_hidden(nr_class, *, exclusive_classes=False, **cfg):
    width = cfg["token_vector_width"]
    return chain(
        get_last_hidden,
        flatten_add_lengths,
        Pooling(mean_pool),
        zero_init(Affine(nr_class, width, drop_factor=0.0)),
        logistic,
    )
예제 #8
0
def build_textcat_model(tok2vec, nr_class, width):
    from thinc.v2v import Model, Softmax
    from thinc.api import flatten_add_lengths, chain
    from thinc.t2v import Pooling, mean_pool

    with Model.define_operators({">>": chain}):
        model = (tok2vec >> flatten_add_lengths >> Pooling(mean_pool) >>
                 Softmax(nr_class, width))
    model.tok2vec = chain(tok2vec, flatten)
    return model
예제 #9
0
def fine_tune_pooler_output(nr_class, *, exclusive_classes=True, **cfg):
    """Select features from the class-vectors from the last hidden state,
    softmax them, and then mean-pool them to produce one feature per vector.
    The gradients of the class vectors are incremented in the backward pass,
    to allow fine-tuning.
    """
    return chain(
        get_pytt_pooler_output,
        flatten_add_lengths,
        with_getitem(0, Softmax(nr_class, cfg["token_vector_width"])),
        Pooling(mean_pool),
    )
예제 #10
0
def softmax_pooler_output(nr_class, *, exclusive_classes=True, **cfg):
    """Select features from the pooler output, (if necessary) mean-pool them
    to produce one vector per item, and then softmax them.
    The gradients of the class vectors are incremented in the backward pass,
    to allow fine-tuning.
    """
    return chain(
        get_pooler_output,
        flatten_add_lengths,
        with_getitem(0, Softmax(nr_class, cfg["token_vector_width"])),
        Pooling(mean_pool),
    )
예제 #11
0
def build_textcat_model(tok2vec, nr_class, width):
    from thinc.v2v import Model, Softmax, Maxout
    from thinc.api import flatten_add_lengths, chain
    from thinc.t2v import Pooling, sum_pool, mean_pool, max_pool
    from thinc.misc import Residual, LayerNorm
    from spacy._ml import logistic, zero_init

    with Model.define_operators({">>": chain}):
        model = (tok2vec >> flatten_add_lengths >> Pooling(mean_pool) >>
                 Softmax(nr_class, width))
    model.tok2vec = tok2vec
    return model
예제 #12
0
def build_model(nr_class, width, depth, conv_depth, vectors_name, **kwargs):
    with Model.define_operators({"|": concatenate, ">>": chain, "**": clone}):
        embed = (HashEmbed(width, 5000, column=1)
                 | StaticVectors(vectors_name, width, column=5)
                 | HashEmbed(width // 2, 750, column=2)
                 | HashEmbed(width // 2, 750, column=3)
                 | HashEmbed(width // 2, 750, column=4)) >> LN(Maxout(width))

        sent2vec = (with_flatten(embed) >> Residual(
            prepare_self_attention(Affine(width * 3, width), nM=width, nH=4) >>
            MultiHeadedAttention() >> with_flatten(
                Maxout(width, width, pieces=3))) >> flatten_add_lengths >>
                    ParametricAttention(width, hard=False) >>
                    Pooling(mean_pool) >> Residual(LN(Maxout(width))))

        model = (foreach(sent2vec, drop_factor=2.0) >> Residual(
            prepare_self_attention(Affine(width * 3, width), nM=width, nH=4) >>
            MultiHeadedAttention() >> with_flatten(LN(Affine(width, width))))
                 >> flatten_add_lengths >> ParametricAttention(
                     width, hard=False) >> Pooling(mean_pool) >> Residual(
                         LN(Maxout(width)))**2 >> Softmax(nr_class))
    model.lsuv = False
    return model
예제 #13
0
def softmax_tanh_class_vector(nr_class, *, exclusive_classes=True, **cfg):
    """Select features from the class-vectors from the last hidden state,
    mean-pool them, apply a tanh-activated hidden layer, and then softmax-activated
    output layer to produce one vector per document.
    The gradients of the class vectors are incremented in the backward pass,
    to allow fine-tuning.
    """
    width = cfg["token_vector_width"]
    return chain(
        get_class_tokens,
        flatten_add_lengths,
        with_getitem(0, chain(Affine(width, width), tanh)),
        Pooling(mean_pool),
        Softmax(nr_class, width),
    )
예제 #14
0
def build_simple_cnn_text_classifier(tok2vec, nr_class, exclusive_classes=False, **cfg):
    """
    Build a simple CNN text classifier, given a token-to-vector model as inputs.
    If exclusive_classes=True, a softmax non-linearity is applied, so that the
    outputs sum to 1. If exclusive_classes=False, a logistic non-linearity
    is applied instead, so that outputs are in the range [0, 1].
    """
    with Model.define_operators({">>": chain}):
        if exclusive_classes:
            output_layer = Softmax(nr_class, tok2vec.nO)
        else:
            output_layer = zero_init(Affine(nr_class, tok2vec.nO, drop_factor=0.0)) >> logistic
        model = tok2vec >> flatten_add_lengths >> Pooling(mean_pool) >> output_layer
    model.tok2vec = chain(tok2vec, flatten)
    model.nO = nr_class
    return model
예제 #15
0
파일: _ml.py 프로젝트: dev-seasia/dev
def build_nel_encoder(embed_width, hidden_width, ner_types, **cfg):
    # TODO proper error
    if "entity_width" not in cfg:
        raise ValueError("entity_width not found")
    if "context_width" not in cfg:
        raise ValueError("context_width not found")

    conv_depth = cfg.get("conv_depth", 2)
    cnn_maxout_pieces = cfg.get("cnn_maxout_pieces", 3)
    pretrained_vectors = cfg.get(
        "pretrained_vectors")  # self.nlp.vocab.vectors.name
    context_width = cfg.get("context_width")
    entity_width = cfg.get("entity_width")

    with Model.define_operators({">>": chain, "**": clone}):
        model = (
            Affine(entity_width, entity_width + context_width + 1 + ner_types)
            >> Affine(1, entity_width, drop_factor=0.0) >> logistic)

        # context encoder
        tok2vec = (Tok2Vec(
            width=hidden_width,
            embed_size=embed_width,
            pretrained_vectors=pretrained_vectors,
            cnn_maxout_pieces=cnn_maxout_pieces,
            subword_features=True,
            conv_depth=conv_depth,
            bilstm_depth=0,
        ) >> flatten_add_lengths >> Pooling(mean_pool) >> Residual(
            zero_init(Maxout(hidden_width, hidden_width))) >> zero_init(
                Affine(context_width, hidden_width)))

        model.tok2vec = tok2vec

    model.tok2vec = tok2vec
    model.tok2vec.nO = context_width
    model.nO = 1
    return model
예제 #16
0
파일: _ml.py 프로젝트: cs394-s20/Aqua
def build_text_classifier(nr_class, width=64, **cfg):
    depth = cfg.get("depth", 2)
    nr_vector = cfg.get("nr_vector", 5000)
    pretrained_dims = cfg.get("pretrained_dims", 0)
    with Model.define_operators({
            ">>": chain,
            "+": add,
            "|": concatenate,
            "**": clone
    }):
        if cfg.get("low_data") and pretrained_dims:
            model = (SpacyVectors >> flatten_add_lengths >> with_getitem(
                0, Affine(width, pretrained_dims)) >>
                     ParametricAttention(width) >> Pooling(sum_pool) >>
                     Residual(ReLu(width, width))**2 >> zero_init(
                         Affine(nr_class, width, drop_factor=0.0)) >> logistic)
            return model

        lower = HashEmbed(width, nr_vector, column=1)
        prefix = HashEmbed(width // 2, nr_vector, column=2)
        suffix = HashEmbed(width // 2, nr_vector, column=3)
        shape = HashEmbed(width // 2, nr_vector, column=4)

        trained_vectors = FeatureExtracter(
            [ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]) >> with_flatten(
                uniqued(
                    (lower | prefix | suffix | shape) >> LN(
                        Maxout(width, width + (width // 2) * 3)),
                    column=0,
                ))

        if pretrained_dims:
            static_vectors = SpacyVectors >> with_flatten(
                Affine(width, pretrained_dims))
            # TODO Make concatenate support lists
            vectors = concatenate_lists(trained_vectors, static_vectors)
            vectors_width = width * 2
        else:
            vectors = trained_vectors
            vectors_width = width
            static_vectors = None
        tok2vec = vectors >> with_flatten(
            LN(Maxout(width, vectors_width)) >> Residual(
                (ExtractWindow(nW=1) >> LN(Maxout(width, width * 3))))**depth,
            pad=depth,
        )
        cnn_model = (
            tok2vec >> flatten_add_lengths >> ParametricAttention(width) >>
            Pooling(sum_pool) >> Residual(zero_init(Maxout(width, width))) >>
            zero_init(Affine(nr_class, width, drop_factor=0.0)))

        linear_model = build_bow_text_classifier(nr_class,
                                                 ngram_size=cfg.get(
                                                     "ngram_size", 1),
                                                 exclusive_classes=False)
        if cfg.get("exclusive_classes"):
            output_layer = Softmax(nr_class, nr_class * 2)
        else:
            output_layer = (zero_init(
                Affine(nr_class, nr_class * 2, drop_factor=0.0)) >> logistic)
        model = (linear_model | cnn_model) >> output_layer
        model.tok2vec = chain(tok2vec, flatten)
    model.nO = nr_class
    model.lsuv = False
    return model
def main(dataset='quora',
         width=50,
         depth=2,
         min_batch_size=1,
         max_batch_size=512,
         dropout=0.2,
         dropout_decay=0.0,
         pooling="mean+max",
         nb_epoch=5,
         pieces=3,
         L2=0.0,
         use_gpu=False,
         out_loc=None,
         quiet=False,
         job_id=None,
         ws_api_url=None,
         rest_api_url=None):
    global CTX
    if job_id is not None:
        CTX = neptune.Context()
        width = CTX.params.width
        L2 = CTX.params.L2
        nb_epoch = CTX.params.nb_epoch
        depth = CTX.params.depth
        max_batch_size = CTX.params.max_batch_size
    cfg = dict(locals())

    if out_loc:
        out_loc = Path(out_loc)
        if not out_loc.parent.exists():
            raise IOError("Can't open output location: %s" % out_loc)
    print(cfg)
    if pooling == 'mean+max':
        pool_layer = Pooling(mean_pool, max_pool)
    elif pooling == "mean":
        pool_layer = mean_pool
    elif pooling == "max":
        pool_layer = max_pool
    else:
        raise ValueError("Unrecognised pooling", pooling)

    print("Load spaCy")
    nlp = get_spacy('en')

    if use_gpu:
        Model.ops = CupyOps()

    print("Construct model")
    # Bind operators for the scope of the block:
    # * chain (>>): Compose models in a 'feed forward' style,
    # i.e. chain(f, g)(x) -> g(f(x))
    # * clone (**): Create n copies of a model, and chain them, i.e.
    # (f ** 3)(x) -> f''(f'(f(x))), where f, f' and f'' have distinct weights.
    # * concatenate (|): Merge the outputs of two models into a single vector,
    # i.e. (f|g)(x) -> hstack(f(x), g(x))
    Model.lsuv = True
    #Model.ops = CupyOps()
    with Model.define_operators({
            '>>': chain,
            '**': clone,
            '|': concatenate,
            '+': add
    }):
        mwe_encode = ExtractWindow(nW=1) >> BN(
            Maxout(width, drop_factor=0.0, pieces=pieces))

        sent2vec = (  # List[spacy.token.Doc]{B}
            flatten_add_lengths  # : (ids{T}, lengths{B})
            >> with_getitem(
                0,
                #(StaticVectors('en', width)
                HashEmbed(width, 3000)
                #+ HashEmbed(width, 3000))
                #>> Residual(mwe_encode ** 2)
            )  # : word_ids{T}
            >> Pooling(mean_pool, max_pool)
            #>> Residual(BN(Maxout(width*2, pieces=pieces), nO=width*2)**2)
            >> Maxout(width * 2, pieces=pieces, drop_factor=0.0) >> logistic)
        model = Siamese(sent2vec, CauchySimilarity(width * 2))

    print("Read and parse data: %s" % dataset)
    if dataset == 'quora':
        train, dev = datasets.quora_questions()
    elif dataset == 'snli':
        train, dev = datasets.snli()
    elif dataset == 'stackxc':
        train, dev = datasets.stack_exchange()
    elif dataset in ('quora+snli', 'snli+quora'):
        train, dev = datasets.quora_questions()
        train2, dev2 = datasets.snli()
        train.extend(train2)
        dev.extend(dev2)
    else:
        raise ValueError("Unknown dataset: %s" % dataset)
    get_ids = get_word_ids(Model.ops)
    train_X, train_y = preprocess(model.ops, nlp, train, get_ids)
    dev_X, dev_y = preprocess(model.ops, nlp, dev, get_ids)

    with model.begin_training(train_X[:10000], train_y[:10000],
                              **cfg) as (trainer, optimizer):
        # Pass a callback to print progress. Give it all the local scope,
        # because why not?
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = min_batch_size
        batch_size = float(min_batch_size)
        print("Accuracy before training", model.evaluate_logloss(dev_X, dev_y))
        print("Train")
        global epoch_train_acc
        n_iter = 0

        for X, y in trainer.iterate(train_X, train_y, progress_bar=not quiet):
            # Slightly useful trick: Decay the dropout as training proceeds.
            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            assert yh.shape == y.shape, (yh.shape, y.shape)

            assert (yh >= 0.).all(), yh
            train_acc = ((yh >= 0.5) == (y >= 0.5)).sum()
            loss = model.ops.xp.abs(yh - y).mean()
            epoch_train_acc += train_acc
            backprop(yh - y, optimizer)
            n_iter += 1

            # Slightly useful trick: start with low batch size, accelerate.
            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001
        if out_loc:
            out_loc = Path(out_loc)
            print('Saving to', out_loc)
            with out_loc.open('wb') as file_:
                pickle.dump(model, file_, -1)
예제 #18
0
 def Model(cls, length):
     return Siamese(Pooling(max_pool, mean_pool), CauchySimilarity(length))