Exemple #1
0
def build_spancat_model(
    tok2vec: Model[List[Doc], List[Floats2d]],
    reducer: Model[Ragged, Floats2d],
    scorer: Model[Floats2d, Floats2d],
) -> Model[Tuple[List[Doc], Ragged], Floats2d]:
    """Build a span categorizer model, given a token-to-vector model, a
    reducer model to map the sequence of vectors for each span down to a single
    vector, and a scorer model to map the vectors to probabilities.

    tok2vec (Model[List[Doc], List[Floats2d]]): The tok2vec model.
    reducer (Model[Ragged, Floats2d]): The reducer model.
    scorer (Model[Floats2d, Floats2d]): The scorer model.
    """
    model = chain(
        cast(
            Model[Tuple[List[Doc], Ragged], Tuple[Ragged, Ragged]],
            with_getitem(
                0,
                chain(tok2vec,
                      cast(Model[List[Floats2d], Ragged], list2ragged()))),
        ),
        extract_spans(),
        reducer,
        scorer,
    )
    model.set_ref("tok2vec", tok2vec)
    model.set_ref("reducer", reducer)
    model.set_ref("scorer", scorer)
    return model
def test_issue208():
    """Test issue that was caused by trying to flatten nested chains."""
    layer1 = Linear(nO=9, nI=3)
    layer2 = Linear(nO=12, nI=9)
    layer3 = Linear(nO=5, nI=12)
    model = chain(layer1, chain(layer2, layer3)).initialize()
    assert model.get_dim("nO") == 5
Exemple #3
0
def test_wrap_non_child_references():
    relu = Relu(5)
    relu2 = Relu(5)
    chained = chain(relu, relu)
    chained2 = chain(relu2, chained)
    chained2.set_ref("relu", relu)
    # Fails in case non-child references cannot be set.
    wrap_model_recursive(chained2, with_debug)
Exemple #4
0
def test_multi_model_load_missing_dims():
    model = chain(Maxout(5, 10, pieces=2), Maxout(2, 3))
    model._layers[0].b += 1
    model._layers[1].b += 2
    data = model.to_bytes()

    model2 = chain(Maxout(5), Maxout())
    model2 = model2.from_bytes(data)
    assert model2._layers[0].b[0, 0] == 1
    assert model2._layers[1].b[0, 0] == 2
Exemple #5
0
def softmax_tanh_class_vector(nr_class, *, exclusive_classes=True, **cfg):
    """Select features from the class-vectors from the last hidden state,
    mean-pool them, and softmax to produce one vector per document.
    The gradients of the class vectors are incremented in the backward pass,
    to allow fine-tuning.
    """
    width = cfg["token_vector_width"]
    return chain(get_pytt_class_tokens, flatten_add_lengths,
                 with_getitem(0, chain(Affine(width, width), tanh)),
                 Pooling(mean_pool), Softmax(2, width))
Exemple #6
0
def Tok2Vec(config):
    doc2feats = make_layer(config["@doc2feats"])
    embed = make_layer(config["@embed"])
    encode = make_layer(config["@encode"])
    field_size = getattr(encode, "receptive_field", 0)
    tok2vec = chain(doc2feats, with_flatten(chain(embed, encode), pad=field_size))
    tok2vec.cfg = config
    tok2vec.nO = encode.nO
    tok2vec.embed = embed
    tok2vec.encode = encode
    return tok2vec
Exemple #7
0
def Tok2Vec(config):
    doc2feats = make_layer(config["@doc2feats"])
    embed = make_layer(config["@embed"])
    encode = make_layer(config["@encode"])
    depth = config["@encode"]["config"]["depth"]
    tok2vec = chain(doc2feats, with_flatten(chain(embed, encode), pad=depth))
    tok2vec.cfg = config
    tok2vec.nO = encode.nO
    tok2vec.embed = embed
    tok2vec.encode = encode
    return tok2vec
Exemple #8
0
def test_multi_model_load_missing_dims():
    model = chain(Maxout(5, 10, nP=2), Maxout(2, 3)).initialize()
    b = model.layers[0].get_param("b")
    b += 1
    b = model.layers[1].get_param("b")
    b += 2
    data = model.to_bytes()

    model2 = chain(Maxout(5, nP=None), Maxout(nP=None))
    model2 = model2.from_bytes(data)
    assert model2.layers[0].get_param("b")[0, 0] == 1
    assert model2.layers[1].get_param("b")[0, 0] == 2
Exemple #9
0
def test_walk_dfs():
    relu = Relu(5)
    relu2 = Relu(5)
    inner_chain = chain(relu, relu2)
    chained = chain(inner_chain, inner_chain)
    assert list(
        chained.walk(order="dfs_pre")) == [chained, inner_chain, relu, relu2]
    assert list(chained.walk(order="dfs_post")) == [
        relu,
        relu2,
        inner_chain,
        chained,
    ]
Exemple #10
0
def build_cloze_characters_multi_task_model(
    vocab: "Vocab", tok2vec: Model, maxout_pieces: int, hidden_size: int, nr_char: int
) -> Model:
    output_layer = chain(
        list2array(),
        Maxout(nO=hidden_size, nP=maxout_pieces),
        LayerNorm(nI=hidden_size),
        MultiSoftmax([256] * nr_char, nI=hidden_size),
    )
    model = build_masked_language_model(vocab, chain(tok2vec, output_layer))
    model.set_ref("tok2vec", tok2vec)
    model.set_ref("output_layer", output_layer)
    return model
def fine_tune_class_vector(nr_class, *, exclusive_classes=True, **cfg):
    """Select features from the class-vectors from the last hidden state,
    softmax them, and then mean-pool them to produce one feature per vector.
    The gradients of the class vectors are incremented in the backward pass,
    to allow fine-tuning.
    """
    return chain(
        get_pytt_class_tokens, flatten_add_lengths,
        with_getitem(
            0,
            chain(Affine(cfg["token_vector_width"], cfg["token_vector_width"]),
                  tanh)), Pooling(mean_pool),
        Affine(2, cfg["token_vector_width"], drop_factor=0), softmax)
Exemple #12
0
def test_serialize_model_shims_roundtrip_bytes():
    fwd = lambda model, X, is_train: (X, lambda dY: dY)
    test_shim = SerializableShim(None)
    shim_model = Model("shimmodel", fwd, shims=[test_shim])
    model = chain(Linear(2, 3), shim_model, Maxout(2, 3))
    model.initialize()
    assert model.layers[1].shims[0].value == "shimdata"
    model_bytes = model.to_bytes()
    with pytest.raises(ValueError):
        Linear(2, 3).from_bytes(model_bytes)
    test_shim = SerializableShim(None)
    shim_model = Model("shimmodel", fwd, shims=[test_shim])
    new_model = chain(Linear(2, 3), shim_model,
                      Maxout(2, 3)).from_bytes(model_bytes)
    assert new_model.layers[1].shims[0].value == "shimdata from bytes"
Exemple #13
0
def test_recursive_double_wrap():
    def dummy_model(name, layers):
        return Model(name, lambda model, X, is_train:..., layers=layers)

    relu = Relu(5)
    chained = chain(relu, relu)
    concat = concatenate(chained, chained, relu)
    concat_wrapped = wrap_model_recursive(
        concat, lambda model: dummy_model(f"dummy({model.name})", [model]))

    n_debug = 0
    for model in concat_wrapped.walk():
        if model.name.startswith("dummy"):
            n_debug += 1

    # There should be 3 unique dummy wrappers:
    # * Around concatenate.
    # * Around chain.
    # * Around relu.
    assert n_debug == 3

    assert concat_wrapped.layers[0].layers[0].layers[0].layers[
        0].name == "dummy(relu)"
    assert concat_wrapped.layers[0].layers[0].layers[0].layers[
        1].name == "dummy(relu)"
    assert concat_wrapped.layers[0].layers[1].layers[0].layers[
        0].name == "dummy(relu)"
    assert concat_wrapped.layers[0].layers[1].layers[0].layers[
        1].name == "dummy(relu)"
    assert concat_wrapped.layers[0].layers[2].name == "dummy(relu)"
    def Model(cls, **cfg) -> Any:
        """Create an instance of `PyTT_Wrapper`, which holds the
        PyTorch-Transformers model.

        **cfg: Optional config parameters.
        RETURNS (thinc.neural.Model): The wrapped model.
        """
        name = cfg.get("pytt_name")
        if not name:
            raise ValueError(
                "Need pytt_name argument, e.g. 'bert-base-uncased'")
        if cfg.get("from_pretrained"):
            pytt_model = PyTT_Wrapper.from_pretrained(name)
        else:
            pytt_model = PyTT_Wrapper(name)
        nO = pytt_model.nO
        batch_by_length = cfg.get("words_per_batch", 3000)
        max_length = cfg.get("max_length", 512)
        model = foreach_sentence(
            chain(
                get_word_pieces,
                with_length_batching(
                    truncate_long_inputs(pytt_model, max_length),
                    batch_by_length),
            ))
        setattr(model, "nO", nO)
        setattr(model, "_model", pytt_model)
        return model
Exemple #15
0
def test_replace_node_with_indirect_node_ref():
    #  a
    # / \
    # x  b[y=y]
    # |  |
    # y  x
    #    |
    #    y

    def dummy_model(name, layers):
        return Model(name, lambda model, X, is_train:..., layers=layers)

    y = dummy_model("y", [])
    x = dummy_model("x", [y])

    y_debug = with_debug(y)

    b = dummy_model("b", [x])
    b.set_ref("y", y)

    a = chain(x, b)
    a.name = "a"

    a.replace_node(y, y_debug)

    assert a.layers[0].layers[0] == y_debug
    assert a.layers[1].layers[0].layers[0] == y_debug
    assert a.layers[1].get_ref("y") == y_debug
Exemple #16
0
def MaxoutWindowEncoder_v1(width: int, window_size: int, maxout_pieces: int,
                           depth: int) -> Model[Floats2d, Floats2d]:
    """Encode context using convolutions with maxout activation, layer
    normalization and residual connections.

    width (int): The input and output width. These are required to be the same,
        to allow residual connections. This value will be determined by the
        width of the inputs. Recommended values are between 64 and 300.
    window_size (int): The number of words to concatenate around each token
        to construct the convolution. Recommended value is 1.
    maxout_pieces (int): The number of maxout pieces to use. Recommended
        values are 2 or 3.
    depth (int): The number of convolutional layers. Recommended value is 4.
    """
    cnn = chain(
        expand_window(window_size=window_size),
        Maxout(
            nO=width,
            nI=width * ((window_size * 2) + 1),
            nP=maxout_pieces,
            dropout=0.0,
            normalize=True,
        ),
    )
    model = clone(residual(cnn), depth)
    model.set_dim("nO", width)
    model.attrs["receptive_field"] = window_size * depth
    return model
Exemple #17
0
def test_model_gpu():
    prefer_gpu()
    n_hidden = 32
    dropout = 0.2
    (train_X, train_Y), (dev_X, dev_Y) = ml_datasets.mnist()
    model = chain(
        Relu(nO=n_hidden, dropout=dropout),
        Relu(nO=n_hidden, dropout=dropout),
        Softmax(),
    )
    # making sure the data is on the right device
    train_X = model.ops.asarray(train_X)
    train_Y = model.ops.asarray(train_Y)
    dev_X = model.ops.asarray(dev_X)
    dev_Y = model.ops.asarray(dev_Y)

    model.initialize(X=train_X[:5], Y=train_Y[:5])
    optimizer = Adam(0.001)
    batch_size = 128

    for i in range(2):
        batches = model.ops.multibatch(batch_size, train_X, train_Y, shuffle=True)
        for X, Y in batches:
            Yh, backprop = model.begin_update(X)
            backprop(Yh - Y)
            model.finish_update(optimizer)
        # Evaluate and print progress
        correct = 0
        total = 0
        for X, Y in model.ops.multibatch(batch_size, dev_X, dev_Y):
            Yh = model.predict(X)
            correct += (Yh.argmax(axis=1) == Y.argmax(axis=1)).sum()
            total += Yh.shape[0]
def FancyEmbed(width, rows, cols=(ORTH, SHAPE, PREFIX, SUFFIX)):
    from thinc.i2v import HashEmbed
    from thinc.v2v import Maxout
    from thinc.api import chain, concatenate
    tables = [HashEmbed(width, rows, column=i) for i in range(len(cols))]
    return chain(concatenate(*tables),
                 Maxout(width, width * len(tables), pieces=3))
def transformer_tok2vec_v2(
    name: str,
    get_spans,
    tokenizer_config: dict,
    transformer_config: dict,
    pooling: Model[Ragged, Floats2d],
    grad_factor: float = 1.0,
) -> Model[List[Doc], List[Floats2d]]:
    """Use a transformer as a "Tok2Vec" layer directly. This does not allow
    multiple components to share the transformer weights, and does not allow
    the transformer to set annotations into the `Doc` object, but it's a
    simpler solution if you only need the transformer within one component.

    get_spans (Callable[[List[Doc]], List[List[Span]]]): A function to extract
        spans from the batch of Doc objects. See the "TransformerModel" layer
        for details.
    tokenizer_config (dict): Settings to pass to the transformers tokenizer.
    transformers_config (dict): Settings to pass to the transformers forward pass
        of the transformer.
    pooling (Model[Ragged, Floats2d]): A reduction layer used to calculate
        the token vectors based on zero or more wordpiece vectors. If in doubt,
        mean pooling (see `thinc.layers.reduce_mean`) is usually a good choice.
     grad_factor (float): Reweight gradients from the component before passing
        them to the transformer. You can set this to 0 to "freeze" the transformer
        weights with respect to the component, or to make it learn more slowly.
        Leaving it at 1.0 is usually fine.
    """
    return chain(
        TransformerModel(name, get_spans, tokenizer_config,
                         transformer_config),
        split_trf_batch(),
        trfs2arrays(pooling, grad_factor),
    )
Exemple #20
0
def test_recursive_double_wrap():
    relu = Relu(5)
    chained = chain(relu, relu)
    concat = concatenate(chained, chained)
    concat_debug = wrap_model_recursive(concat, with_debug)

    n_debug = 0
    for model in concat_debug.walk():
        if model.name.startswith("debug"):
            n_debug += 1

    # There should be 5 unique debug wrappers:
    # * Around concatenate. (= 1)
    # * One around each chain in concatenate. (= 2)
    # * One around each relu in the chain. (= 2)
    assert n_debug == 5

    assert concat_debug.layers[0].layers[0].layers[0].layers[
        0].name == "debug(relu)"
    assert concat_debug.layers[0].layers[0].layers[0].layers[
        1].name == "debug(relu)"
    assert concat_debug.layers[0].layers[1].layers[0].layers[
        0].name == "debug(relu)"
    assert concat_debug.layers[0].layers[1].layers[0].layers[
        1].name == "debug(relu)"
Exemple #21
0
def test_with_debug():
    pytest.importorskip("ml_datasets")
    import ml_datasets

    (train_X, train_Y), (dev_X, dev_Y) = ml_datasets.mnist()

    counts = Counter()

    def on_init(*_):
        counts["init"] += 1

    def on_forward(*_):
        counts["forward"] += 1

    def on_backprop(*_):
        counts["backprop"] += 1

    relu = Relu()
    relu2 = with_debug(Relu(),
                       on_init=on_init,
                       on_forward=on_forward,
                       on_backprop=on_backprop)
    chained = chain(relu, relu2, relu2)
    chained.initialize(X=train_X[:5], Y=train_Y[:5])
    _, backprop = chained(X=train_X[:5], is_train=False)

    # Not real loss gradients, but we don't care for testing.
    backprop(train_Y[:5])

    # Four times forward, because initialization also applies forward for
    # validation.
    assert counts == {"init": 2, "forward": 4, "backprop": 2}
def transformer_listener_tok2vec_v1(
        pooling: Model[Ragged, Floats2d],
        grad_factor: float = 1.0,
        upstream: str = "*") -> Model[List[Doc], List[Floats2d]]:
    """Create a 'TransformerListener' layer, which will connect to a Transformer
    component earlier in the pipeline.

    The layer takes a list of Doc objects as input, and produces a list of
    2d arrays as output, with each array having one row per token. Most spaCy
    models expect a sublayer with this signature, making it easy to connect them
    to a transformer model via this sublayer.
    Transformer models usually operate over wordpieces, which usually don't align
    one-to-one against spaCy tokens. The layer therefore requires a reduction
    operation in order to calculate a single token vector given zero or more
    wordpiece vectors.

    pooling (Model[Ragged, Floats2d]): A reduction layer used to calculate
        the token vectors based on zero or more wordpiece vectors. If in doubt,
        mean pooling (see `thinc.layers.reduce_mean`) is usually a good choice.
    grad_factor (float): Reweight gradients from the component before passing
        them upstream. You can set this to 0 to "freeze" the transformer weights
        with respect to the component, or use it to make some components more
        significant than others. Leaving it at 1.0 is usually fine.
    upstream (str): A string to identify the 'upstream' Transformer
        to communicate with. The upstream name should either be the wildcard
        string '*', or the name of the `Transformer` component. You'll almost
        never have multiple upstream Transformer components, so the wildcard
        string will almost always be fine.
    """
    listener = TransformerListener(upstream_name=upstream)
    model = chain(listener, trfs2arrays(pooling, grad_factor))
    model.set_ref("listener", listener)
    return model
Exemple #23
0
def concatenate_lists(*layers, **kwargs):  # pragma: no cover
    """Compose two or more models `f`, `g`, etc, such that their outputs are
    concatenated, i.e. `concatenate(f, g)(x)` computes `hstack(f(x), g(x))`
    """
    if not layers:
        return noop()
    drop_factor = kwargs.get("drop_factor", 1.0)
    ops = layers[0].ops
    layers = [chain(layer, flatten) for layer in layers]
    concat = concatenate(*layers)

    def concatenate_lists_fwd(Xs, drop=0.0):
        if drop is not None:
            drop *= drop_factor
        lengths = ops.asarray([len(X) for X in Xs], dtype="i")
        flat_y, bp_flat_y = concat.begin_update(Xs, drop=drop)
        ys = ops.unflatten(flat_y, lengths)

        def concatenate_lists_bwd(d_ys, sgd=None):
            return bp_flat_y(ops.flatten(d_ys), sgd=sgd)

        return ys, concatenate_lists_bwd

    model = wrap(concatenate_lists_fwd, concat)
    return model
Exemple #24
0
def main(n_hidden: int = 256,
         dropout: float = 0.2,
         n_iter: int = 10,
         batch_size: int = 128):
    # Define the model
    model: Model = chain(
        Relu(nO=n_hidden, dropout=dropout),
        Relu(nO=n_hidden, dropout=dropout),
        Softmax(),
    )
    # Load the data
    (train_X, train_Y), (dev_X, dev_Y) = ml_datasets.mnist()
    # Set any missing shapes for the model.
    model.initialize(X=train_X[:5], Y=train_Y[:5])
    train_data = model.ops.multibatch(batch_size,
                                      train_X,
                                      train_Y,
                                      shuffle=True)
    dev_data = model.ops.multibatch(batch_size, dev_X, dev_Y)
    # Create the optimizer.
    optimizer = Adam(0.001)
    for i in range(n_iter):
        for X, Y in tqdm(train_data, leave=False):
            Yh, backprop = model.begin_update(X)
            backprop(Yh - Y)
            model.finish_update(optimizer)
        # Evaluate and print progress
        correct = 0
        total = 0
        for X, Y in dev_data:
            Yh = model.predict(X)
            correct += (Yh.argmax(axis=1) == Y.argmax(axis=1)).sum()
            total += Yh.shape[0]
        score = correct / total
        msg.row((i, f"{score:.3f}"), widths=(3, 5))
def transformer_listener_tok2vec_v1(
        pooling: Model[Ragged, Floats2d],
        grad_factor: float = 1.0) -> Model[List[Doc], List[Floats2d]]:
    """Create a 'TransformerListener' layer, which will connect to a Transformer
    component earlier in the pipeline.
     
    The layer takes a list of Doc objects as input, and produces a list of
    2d arrays as output, with each array having one row per token. Most spaCy
    models expect a sublayer with this signature, making it easy to connect them
    to a transformer model via this sublayer.
    Transformer models usually operate over wordpieces, which usually don't align
    one-to-one against spaCy tokens. The layer therefore requires a reduction
    operation in order to calculate a single token vector given zero or more
    wordpiece vectors.

    pooling (Model[Ragged, Floats2d]): A reduction layer used to calculate
        the token vectors based on zero or more wordpiece vectors. If in doubt,
        mean pooling (see `thinc.layers.reduce_mean`) is usually a good choice.
    grad_factor (float): Reweight gradients from the component before passing
        them upstream. You can set this to 0 to "freeze" the transformer weights
        with respect to the component, or use it to make some components more
        significant than others. Leaving it at 1.0 is usually fine.
    """
    return chain(
        TransformerListener("transformer"),
        trfs2arrays(pooling, grad_factor),
    )
Exemple #26
0
def test_replace_node():
    relu1 = Relu(5)
    relu2 = Relu(5)
    relu_chain = chain(relu1, relu2)
    relu1_debug = with_debug(relu1)
    debug = Model(
        "test",
        lambda X: (X, lambda dY: dY),
        layers=[relu1, relu2, relu1, relu_chain],
        refs={
            "relu1": relu1,
            "relu2": relu2,
            "relu3": relu1
        },
    )
    debug.replace_node(relu1, relu1_debug)
    assert debug.layers[0] == relu1_debug
    assert debug.layers[1] == relu2
    assert debug.layers[2] == relu1_debug
    assert debug.get_ref("relu1") == relu1_debug
    assert debug.get_ref("relu2") == relu2
    assert debug.get_ref("relu3") == relu1_debug

    # Check that nodes are replaced recursively
    assert debug.layers[3] == relu_chain
    assert debug.layers[3].layers[0] == relu1_debug
    assert debug.layers[3].layers[1] == relu2
Exemple #27
0
def TransformersTagger(
    starter: str, n_tags: int = 17
) -> Model[List[List[str]], List[Floats2d]]:
    return chain(
        TransformersTokenizer(starter),
        Transformer(starter),
        with_array(Softmax(nO=n_tags)),
    )
Exemple #28
0
def tok2vec_per_sentence(model_name, cfg):
    max_words = cfg.get("words_per_batch", 1000)
    name = cfg["trf_name"]

    model = foreach_sentence(
        chain(get_word_pieces(name),
              with_length_batching(model_name, max_words)))
    return model
Exemple #29
0
def test_validation_complex():
    good_model = chain(list2ragged(), reduce_sum(), Relu(12, dropout=0.5), Relu(1))
    X = [good_model.ops.xp.zeros((4, 75), dtype="f")]
    Y = good_model.ops.xp.zeros((1,), dtype="f")
    good_model.initialize(X, Y)
    good_model.predict(X)

    bad_model = chain(
        list2ragged(),
        reduce_sum(),
        Relu(12, dropout=0.5),
        # ERROR: Why can't I attach a Relu to an attention layer?
        ParametricAttention(12),
        Relu(1),
    )
    with pytest.raises(DataValidationError):
        bad_model.initialize(X, Y)
Exemple #30
0
def test_validation():
    model = chain(Relu(10), Relu(10), with_ragged(reduce_max()), Softmax())
    with pytest.raises(DataValidationError):
        model.initialize(X=model.ops.alloc2f(1, 10), Y=model.ops.alloc2f(1, 10))
    with pytest.raises(DataValidationError):
        model.initialize(X=model.ops.alloc3f(1, 10, 1), Y=model.ops.alloc2f(1, 10))
    with pytest.raises(DataValidationError):
        model.initialize(X=[model.ops.alloc2f(1, 10)], Y=model.ops.alloc2f(1, 10))
def baseline_mwe(nO, nP, depth):
    from thinc.neural._classes.model import Model
    from thinc.neural._classes.resnet import Residual
    from thinc.neural._classes.convolution import ExtractWindow
    from thinc.neural._classes.layernorm import LayerNorm
    from thinc.api import chain, clone, with_flatten
    maxout = Maxout(nO, nO*3, pieces=nP)
    normalize = LayerNorm(maxout)
    with Model.define_operators({'>>': chain, '**': clone}):
        model = Residual(ExtractWindow(nW=1) >> normalize)
        model = with_flatten(chain(*([model]*depth)))
    model.maxout = maxout
    model.normalize = normalize
    return model
Exemple #32
0
def build_simple_cnn_text_classifier(tok2vec, nr_class, exclusive_classes=False, **cfg):
    """
    Build a simple CNN text classifier, given a token-to-vector model as inputs.
    If exclusive_classes=True, a softmax non-linearity is applied, so that the
    outputs sum to 1. If exclusive_classes=False, a logistic non-linearity
    is applied instead, so that outputs are in the range [0, 1].
    """
    with Model.define_operators({">>": chain}):
        if exclusive_classes:
            output_layer = Softmax(nr_class, tok2vec.nO)
        else:
            output_layer = (
                zero_init(Affine(nr_class, tok2vec.nO, drop_factor=0.0)) >> logistic
            )
        model = tok2vec >> flatten_add_lengths >> Pooling(mean_pool) >> output_layer
    model.tok2vec = chain(tok2vec, flatten)
    model.nO = nr_class
    return model
Exemple #33
0
def main(width=32, nr_vector=1000):
    train_data, check_data, nr_tag = ancora_pos_tags(encode_words=True)

    model = with_flatten(
                 chain(
                    HashEmbed(width, nr_vector),
                    ReLu(width, width),
                    ReLu(width, width),
                    Softmax(nr_tag, width)))

    train_X, train_y = zip(*train_data)
    dev_X, dev_y = zip(*check_data)
    train_y = [to_categorical(y, nb_classes=nr_tag) for y in train_y]
    dev_y = [to_categorical(y, nb_classes=nr_tag) for y in dev_y]
    with model.begin_training(train_X, train_y) as (trainer, optimizer):
        trainer.each_epoch.append(
            lambda: print(model.evaluate(dev_X, dev_y)))
        for X, y in trainer.iterate(train_X, train_y):
            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            backprop([yh[i]-y[i] for i in range(len(yh))], optimizer)
    with model.use_params(optimizer.averages):
        print(model.evaluate(dev_X, dev_y))
Exemple #34
0
def concatenate_lists(*layers, **kwargs):  # pragma: no cover
    """Compose two or more models `f`, `g`, etc, such that their outputs are
    concatenated, i.e. `concatenate(f, g)(x)` computes `hstack(f(x), g(x))`
    """
    if not layers:
        return noop()
    drop_factor = kwargs.get("drop_factor", 1.0)
    ops = layers[0].ops
    layers = [chain(layer, flatten) for layer in layers]
    concat = concatenate(*layers)

    def concatenate_lists_fwd(Xs, drop=0.0):
        drop *= drop_factor
        lengths = ops.asarray([len(X) for X in Xs], dtype="i")
        flat_y, bp_flat_y = concat.begin_update(Xs, drop=drop)
        ys = ops.unflatten(flat_y, lengths)

        def concatenate_lists_bwd(d_ys, sgd=None):
            return bp_flat_y(ops.flatten(d_ys), sgd=sgd)

        return ys, concatenate_lists_bwd

    model = wrap(concatenate_lists_fwd, concat)
    return model
Exemple #35
0
def build_text_classifier(nr_class, width=64, **cfg):
    depth = cfg.get("depth", 2)
    nr_vector = cfg.get("nr_vector", 5000)
    pretrained_dims = cfg.get("pretrained_dims", 0)
    with Model.define_operators({">>": chain, "+": add, "|": concatenate, "**": clone}):
        if cfg.get("low_data") and pretrained_dims:
            model = (
                SpacyVectors
                >> flatten_add_lengths
                >> with_getitem(0, Affine(width, pretrained_dims))
                >> ParametricAttention(width)
                >> Pooling(sum_pool)
                >> Residual(ReLu(width, width)) ** 2
                >> zero_init(Affine(nr_class, width, drop_factor=0.0))
                >> logistic
            )
            return model

        lower = HashEmbed(width, nr_vector, column=1)
        prefix = HashEmbed(width // 2, nr_vector, column=2)
        suffix = HashEmbed(width // 2, nr_vector, column=3)
        shape = HashEmbed(width // 2, nr_vector, column=4)

        trained_vectors = FeatureExtracter(
            [ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]
        ) >> with_flatten(
            uniqued(
                (lower | prefix | suffix | shape)
                >> LN(Maxout(width, width + (width // 2) * 3)),
                column=0,
            )
        )

        if pretrained_dims:
            static_vectors = SpacyVectors >> with_flatten(
                Affine(width, pretrained_dims)
            )
            # TODO Make concatenate support lists
            vectors = concatenate_lists(trained_vectors, static_vectors)
            vectors_width = width * 2
        else:
            vectors = trained_vectors
            vectors_width = width
            static_vectors = None
        tok2vec = vectors >> with_flatten(
            LN(Maxout(width, vectors_width))
            >> Residual((ExtractWindow(nW=1) >> LN(Maxout(width, width * 3)))) ** depth,
            pad=depth,
        )
        cnn_model = (
            tok2vec
            >> flatten_add_lengths
            >> ParametricAttention(width)
            >> Pooling(sum_pool)
            >> Residual(zero_init(Maxout(width, width)))
            >> zero_init(Affine(nr_class, width, drop_factor=0.0))
        )

        linear_model = build_bow_text_classifier(
            nr_class, ngram_size=cfg.get("ngram_size", 1), exclusive_classes=False
        )
        if cfg.get("exclusive_classes"):
            output_layer = Softmax(nr_class, nr_class * 2)
        else:
            output_layer = (
                zero_init(Affine(nr_class, nr_class * 2, drop_factor=0.0)) >> logistic
            )
        model = (linear_model | cnn_model) >> output_layer
        model.tok2vec = chain(tok2vec, flatten)
    model.nO = nr_class
    model.lsuv = False
    return model