コード例 #1
0
def noop_models():
    return [
        with_padded(noop()),
        with_array(noop()),
        with_array2d(noop()),
        with_list(noop()),
        with_ragged(noop())
    ]
コード例 #2
0
ファイル: _ml.py プロジェクト: cs394-s20/Aqua
def concatenate_lists(*layers, **kwargs):  # pragma: no cover
    """Compose two or more models `f`, `g`, etc, such that their outputs are
    concatenated, i.e. `concatenate(f, g)(x)` computes `hstack(f(x), g(x))`
    """
    if not layers:
        return noop()
    drop_factor = kwargs.get("drop_factor", 1.0)
    ops = layers[0].ops
    layers = [chain(layer, flatten) for layer in layers]
    concat = concatenate(*layers)

    def concatenate_lists_fwd(Xs, drop=0.0):
        if drop is not None:
            drop *= drop_factor
        lengths = ops.asarray([len(X) for X in Xs], dtype="i")
        flat_y, bp_flat_y = concat.begin_update(Xs, drop=drop)
        ys = ops.unflatten(flat_y, lengths)

        def concatenate_lists_bwd(d_ys, sgd=None):
            return bp_flat_y(ops.flatten(d_ys), sgd=sgd)

        return ys, concatenate_lists_bwd

    model = wrap(concatenate_lists_fwd, concat)
    return model
コード例 #3
0
def TransitionModel(tok2vec,
                    lower,
                    upper,
                    resize_output,
                    dropout=0.2,
                    unseen_classes=set()):
    """Set up a stepwise transition-based model"""
    if upper is None:
        has_upper = False
        upper = noop()
    else:
        has_upper = True
    # don't define nO for this object, because we can't dynamically change it
    return Model(
        name="parser_model",
        forward=forward,
        dims={"nI": tok2vec.maybe_get_dim("nI")},
        layers=[tok2vec, lower, upper],
        refs={
            "tok2vec": tok2vec,
            "lower": lower,
            "upper": upper
        },
        init=init,
        attrs={
            "has_upper": has_upper,
            "unseen_classes": set(unseen_classes),
            "resize_output": resize_output,
        },
    )
コード例 #4
0
def test_noop():
    data = numpy.asarray([1, 2, 3], dtype="f")
    model = noop(Linear(), Linear())
    model.initialize(data, data)
    Y, backprop = model(data, is_train=True)
    assert numpy.array_equal(Y, data)
    dX = backprop(Y)
    assert numpy.array_equal(dX, data)
コード例 #5
0
def test_layerize_update_noop(model1, model2, nI):
    ones = numpy.ones((10, nI))
    model = layerize(noop(model1, model2))
    y, finish_update = model.begin_update(ones)
    assert_allclose(y, ones)
    grad_in = numpy.ones(y.shape) + 1.0
    grad_out = finish_update(grad_in)
    assert_allclose(grad_in, grad_out)
コード例 #6
0
def PyTorchBiLSTM(nO, nI, depth, dropout=0.2):
    import torch.nn
    from thinc.api import with_square_sequences
    from thinc.extra.wrappers import PyTorchWrapperRNN

    if depth == 0:
        return layerize(noop())
    model = torch.nn.LSTM(nI, nO // 2, depth, bidirectional=True, dropout=dropout)
    return with_square_sequences(PyTorchWrapperRNN(model))
コード例 #7
0
ファイル: _ml.py プロジェクト: Ernest-Macharia/chatbot-app
def PyTorchBiLSTM(nO, nI, depth, dropout=0.2):
    if depth == 0:
        return layerize(noop())
    model = torch.nn.LSTM(nI,
                          nO // 2,
                          depth,
                          bidirectional=True,
                          dropout=dropout)
    return with_square_sequences(PyTorchWrapperRNN(model))
コード例 #8
0
def TorchBiLSTMEncoder(config):
    import torch.nn
    from thinc.extra.wrappers import PyTorchWrapperRNN

    width = config["width"]
    depth = config["depth"]
    if depth == 0:
        return layerize(noop())
    return with_square_sequences(
        PyTorchWrapperRNN(torch.nn.LSTM(width, width // 2, depth, bidirectional=True))
    )
コード例 #9
0
ファイル: tok2vec.py プロジェクト: xettrisomeman/spaCy
def BiLSTMEncoder(width: int, depth: int,
                  dropout: float) -> Model[List[Floats2d], List[Floats2d]]:
    """Encode context using bidirectonal LSTM layers. Requires PyTorch.

    width (int): The input and output width. These are required to be the same,
        to allow residual connections. This value will be determined by the
        width of the inputs. Recommended values are between 64 and 300.
    depth (int): The number of recurrent layers.
    dropout (float): Creates a Dropout layer on the outputs of each LSTM layer
        except the last layer. Set to 0 to disable this functionality.
    """
    if depth == 0:
        return noop()
    return with_padded(
        PyTorchLSTM(width, width, bi=True, depth=depth, dropout=dropout))
コード例 #10
0
ファイル: _ml.py プロジェクト: spacy-io/spaCy
def concatenate_lists(*layers, **kwargs):  # pragma: no cover
    """Compose two or more models `f`, `g`, etc, such that their outputs are
    concatenated, i.e. `concatenate(f, g)(x)` computes `hstack(f(x), g(x))`
    """
    if not layers:
        return noop()
    drop_factor = kwargs.get("drop_factor", 1.0)
    ops = layers[0].ops
    layers = [chain(layer, flatten) for layer in layers]
    concat = concatenate(*layers)

    def concatenate_lists_fwd(Xs, drop=0.0):
        drop *= drop_factor
        lengths = ops.asarray([len(X) for X in Xs], dtype="i")
        flat_y, bp_flat_y = concat.begin_update(Xs, drop=drop)
        ys = ops.unflatten(flat_y, lengths)

        def concatenate_lists_bwd(d_ys, sgd=None):
            return bp_flat_y(ops.flatten(d_ys), sgd=sgd)

        return ys, concatenate_lists_bwd

    model = wrap(concatenate_lists_fwd, concat)
    return model
コード例 #11
0
ファイル: _ml.py プロジェクト: spacy-io/spaCy
def PyTorchBiLSTM(nO, nI, depth, dropout=0.2):
    if depth == 0:
        return layerize(noop())
    model = torch.nn.LSTM(nI, nO // 2, depth, bidirectional=True, dropout=dropout)
    return with_square_sequences(PyTorchWrapperRNN(model))
コード例 #12
0
def pretrain(
    texts_loc,
    vectors_model,
    output_dir,
    width=128,
    depth=4,
    embed_rows=1000,
    dropout=0.2,
    nr_iter=10,
    seed=0,
):
    """
    Pre-train the 'token-to-vector' (tok2vec) layer of pipeline components,
    using an approximate language-modelling objective. Specifically, we load
    pre-trained vectors, and train a component like a CNN, BiLSTM, etc to predict
    vectors which match the pre-trained ones. The weights are saved to a directory
    after each epoch. You can then pass a path to one of these pre-trained weights
    files to the 'spacy train' command.

    This technique may be especially helpful if you have little labelled data.
    However, it's still quite experimental, so your mileage may vary.

    To load the weights back in during 'spacy train', you need to ensure
    all settings are the same between pretraining and training. The API and
    errors around this need some improvement.
    """
    config = dict(locals())
    output_dir = ensure_path(output_dir)
    random.seed(seed)
    numpy.random.seed(seed)
    if not output_dir.exists():
        output_dir.mkdir()
    with (output_dir / "config.json").open("w") as file_:
        file_.write(json.dumps(config))
    has_gpu = prefer_gpu()
    nlp = spacy.load(vectors_model)
    tok2vec = Tok2Vec_LSTM(width, embed_rows, depth, dropout)
    print(dir(tok2vec))
    model = create_pretraining_model(nlp, tok2vec)
    optimizer = create_default_optimizer(model.ops)
    tracker = ProgressTracker()
    print("Epoch", "#Words", "Loss", "L/W", "w/s")
    texts = stream_texts() if texts_loc == "-" else load_texts(texts_loc)
    for epoch in range(nr_iter):
        for batch in minibatch(texts, size=256):
            docs = make_docs(nlp, batch, heads=False)
            loss = make_update(model, docs, optimizer, drop=dropout)
            progress = tracker.update(epoch, loss, docs)
            if progress:
                print(*progress)
                if texts_loc == "-" and tracker.words_per_epoch[epoch] >= 10 ** 6:
                    break
        with (output_dir / ("model%d.bin" % epoch)).open("wb") as file_:
            # This is annoying -- work around how Parser expects this
            file_.write(chain(tok2vec, layerize(noop())).to_bytes())
        with (output_dir / "log.jsonl").open("a") as file_:
            file_.write(
                json.dumps(
                    {"nr_word": tracker.nr_word, "loss": tracker.loss, "epoch": epoch}
                )
            )
        if texts_loc != "-":
            texts = load_texts(texts_loc)
コード例 #13
0
def test_tuplify_dulicates_input():
    model = tuplify(noop(), noop())
    ones = numpy.ones([10])
    out = model.predict(ones)
    assert out == (ones, ones)
コード例 #14
0
def test_layerize_predict_noop(model1, model2, nI):
    ones = numpy.ones((10, nI))
    model = layerize(noop(model1, model2))
    y = model(ones)
    assert_allclose(y, ones)