Example #1
0
def test_use_ops():
    class_ops = get_current_ops()
    with use_ops("numpy"):
        new_ops = get_current_ops()
        assert new_ops.name == "numpy"
    with use_ops("cupy"):
        new_ops = get_current_ops()
        assert new_ops.name == "cupy"
    new_ops = get_current_ops()
    assert class_ops.name == new_ops.name
Example #2
0
def _resize_upper(model, new_nO):
    upper = model.get_ref("upper")
    if upper.has_dim("nO") is None:
        upper.set_dim("nO", new_nO)
        return model
    elif new_nO == upper.get_dim("nO"):
        return model

    smaller = upper
    nI = smaller.maybe_get_dim("nI")
    with use_ops("cpu"):
        larger = _define_upper(nO=new_nO, nI=nI)
    # it could be that the model is not initialized yet, then skip this bit
    if smaller.has_param("W"):
        larger_W = larger.ops.alloc2f(new_nO, nI)
        larger_b = larger.ops.alloc1f(new_nO)
        smaller_W = smaller.get_param("W")
        smaller_b = smaller.get_param("b")
        # Weights are stored in (nr_out, nr_in) format, so we're basically
        # just adding rows here.
        if smaller.has_dim("nO"):
            old_nO = smaller.get_dim("nO")
            larger_W[:old_nO] = smaller_W
            larger_b[:old_nO] = smaller_b
            for i in range(old_nO, new_nO):
                model.attrs["unseen_classes"].add(i)

        larger.set_param("W", larger_W)
        larger.set_param("b", larger_b)
    model._layers[-1] = larger
    model.set_ref("upper", larger)
    return model
Example #3
0
def test_model_gpu():
    pytest.importorskip("ml_datasets")
    import ml_datasets

    ops = "cpu"
    if has_cupy and gpu_is_available():
        ops = "cupy"

    with use_ops(ops):
        n_hidden = 32
        dropout = 0.2
        (train_X, train_Y), (dev_X, dev_Y) = ml_datasets.mnist()
        model = chain(
            Relu(nO=n_hidden, dropout=dropout),
            Relu(nO=n_hidden, dropout=dropout),
            Softmax(),
        )
        # make sure the data is on the right device
        train_X = model.ops.asarray(train_X)
        train_Y = model.ops.asarray(train_Y)
        dev_X = model.ops.asarray(dev_X)
        dev_Y = model.ops.asarray(dev_Y)

        model.initialize(X=train_X[:5], Y=train_Y[:5])
        optimizer = Adam(0.001)
        batch_size = 128

        for i in range(2):
            batches = model.ops.multibatch(batch_size,
                                           train_X,
                                           train_Y,
                                           shuffle=True)
            for X, Y in batches:
                Yh, backprop = model.begin_update(X)
                backprop(Yh - Y)
                model.finish_update(optimizer)
            # Evaluate and print progress
            correct = 0
            total = 0
            for X, Y in model.ops.multibatch(batch_size, dev_X, dev_Y):
                Yh = model.predict(X)
                correct += (Yh.argmax(axis=1) == Y.argmax(axis=1)).sum()
                total += Yh.shape[0]
Example #4
0
def resize_output_v1(model, new_nO):
    Linear = registry.get("layers", "Linear.v1")

    lower = model.get_ref("lower")
    upper = model.get_ref("upper")
    if not model.attrs["has_upper"]:
        if lower.has_dim("nO") is None:
            lower.set_dim("nO", new_nO)
        return
    elif upper.has_dim("nO") is None:
        upper.set_dim("nO", new_nO)
        return
    elif new_nO == upper.get_dim("nO"):
        return
    smaller = upper
    nI = None
    if smaller.has_dim("nI"):
        nI = smaller.get_dim("nI")
    with use_ops("numpy"):
        larger = Linear(nO=new_nO, nI=nI)
        larger.init = smaller.init
    # it could be that the model is not initialized yet, then skip this bit
    if nI:
        larger_W = larger.ops.alloc2f(new_nO, nI)
        larger_b = larger.ops.alloc1f(new_nO)
        smaller_W = smaller.get_param("W")
        smaller_b = smaller.get_param("b")
        # Weights are stored in (nr_out, nr_in) format, so we're basically
        # just adding rows here.
        if smaller.has_dim("nO"):
            larger_W[: smaller.get_dim("nO")] = smaller_W
            larger_b[: smaller.get_dim("nO")] = smaller_b
            for i in range(smaller.get_dim("nO"), new_nO):
                model.attrs["unseen_classes"].add(i)

        larger.set_param("W", larger_W)
        larger.set_param("b", larger_b)
    model._layers[-1] = larger
    model.set_ref("upper", larger)
    return model
Example #5
0
def TransitionBasedParser_v1(
    tok2vec: Model[List[Doc], List[Floats2d]],
    state_type: Literal["parser", "ner"],
    extra_state_tokens: bool,
    hidden_width: int,
    maxout_pieces: int,
    use_upper: bool = True,
    nO: Optional[int] = None,
) -> Model:

    chain = registry.get("layers", "chain.v1")
    list2array = registry.get("layers", "list2array.v1")
    Linear = registry.get("layers", "Linear.v1")
    TransitionModel = registry.get("layers", "spacy.TransitionModel.v1")
    PrecomputableAffine = registry.get("layers", "spacy.PrecomputableAffine.v1")

    if state_type == "parser":
        nr_feature_tokens = 13 if extra_state_tokens else 8
    elif state_type == "ner":
        nr_feature_tokens = 6 if extra_state_tokens else 3
    else:
        raise ValueError(Errors.E917.format(value=state_type))
    t2v_width = tok2vec.get_dim("nO") if tok2vec.has_dim("nO") else None
    tok2vec = chain(tok2vec, list2array(), Linear(hidden_width, t2v_width))
    tok2vec.set_dim("nO", hidden_width)
    lower = PrecomputableAffine(
        nO=hidden_width if use_upper else nO,
        nF=nr_feature_tokens,
        nI=tok2vec.get_dim("nO"),
        nP=maxout_pieces,
    )
    if use_upper:
        with use_ops("numpy"):
            # Initialize weights at zero, as it's a classification layer.
            upper = Linear(nO=nO, init_W=zero_init)
    else:
        upper = None
    return TransitionModel(tok2vec, lower, upper, resize_output=resize_output_v1)
Example #6
0
def build_tb_parser_model(
    tok2vec: Model[List[Doc], List[Floats2d]],
    state_type: Literal["parser", "ner"],
    extra_state_tokens: bool,
    hidden_width: int,
    maxout_pieces: int,
    use_upper: bool,
    nO: Optional[int] = None,
) -> Model:
    """
    Build a transition-based parser model. Can apply to NER or dependency-parsing.

    Transition-based parsing is an approach to structured prediction where the
    task of predicting the structure is mapped to a series of state transitions.
    You might find this tutorial helpful as background:
    https://explosion.ai/blog/parsing-english-in-python

    The neural network state prediction model consists of either two or three
    subnetworks:

    * tok2vec: Map each token into a vector representations. This subnetwork
        is run once for each batch.
    * lower: Construct a feature-specific vector for each (token, feature) pair.
        This is also run once for each batch. Constructing the state
        representation is then simply a matter of summing the component features
        and applying the non-linearity.
    * upper (optional): A feed-forward network that predicts scores from the
        state representation. If not present, the output from the lower model is
        used as action scores directly.

    tok2vec (Model[List[Doc], List[Floats2d]]):
        Subnetwork to map tokens into vector representations.
    state_type (str):
        String value denoting the type of parser model: "parser" or "ner"
    extra_state_tokens (bool): Whether or not to use additional tokens in the context
        to construct the state vector. Defaults to `False`, which means 3 and 8
        for the NER and parser respectively. When set to `True`, this would become 6
        feature sets (for the NER) or 13 (for the parser).
    hidden_width (int): The width of the hidden layer.
    maxout_pieces (int): How many pieces to use in the state prediction layer.
        Recommended values are 1, 2 or 3. If 1, the maxout non-linearity
        is replaced with a ReLu non-linearity if use_upper=True, and no
        non-linearity if use_upper=False.
    use_upper (bool): Whether to use an additional hidden layer after the state
        vector in order to predict the action scores. It is recommended to set
        this to False for large pretrained models such as transformers, and True
        for smaller networks. The upper layer is computed on CPU, which becomes
        a bottleneck on larger GPU-based models, where it's also less necessary.
    nO (int or None): The number of actions the model will predict between.
        Usually inferred from data at the beginning of training, or loaded from
        disk.
    """
    if state_type == "parser":
        nr_feature_tokens = 13 if extra_state_tokens else 8
    elif state_type == "ner":
        nr_feature_tokens = 6 if extra_state_tokens else 3
    else:
        raise ValueError(Errors.E917.format(value=state_type))
    t2v_width = tok2vec.get_dim("nO") if tok2vec.has_dim("nO") else None
    tok2vec = chain(
        tok2vec,
        list2array(),
        Linear(hidden_width, t2v_width),
    )
    tok2vec.set_dim("nO", hidden_width)
    lower = _define_lower(
        nO=hidden_width if use_upper else nO,
        nF=nr_feature_tokens,
        nI=tok2vec.get_dim("nO"),
        nP=maxout_pieces,
    )
    upper = None
    if use_upper:
        with use_ops("cpu"):
            # Initialize weights at zero, as it's a classification layer.
            upper = _define_upper(nO=nO, nI=None)
    return TransitionModel(tok2vec, lower, upper, resize_output)