def test_replace_node(): relu1 = Relu(5) relu2 = Relu(5) relu_chain = chain(relu1, relu2) relu1_debug = with_debug(relu1) debug = Model( "test", lambda X: (X, lambda dY: dY), layers=[relu1, relu2, relu1, relu_chain], refs={ "relu1": relu1, "relu2": relu2, "relu3": relu1 }, ) debug.replace_node(relu1, relu1_debug) assert debug.layers[0] == relu1_debug assert debug.layers[1] == relu2 assert debug.layers[2] == relu1_debug assert debug.get_ref("relu1") == relu1_debug assert debug.get_ref("relu2") == relu2 assert debug.get_ref("relu3") == relu1_debug # Check that nodes are replaced recursively assert debug.layers[3] == relu_chain assert debug.layers[3].layers[0] == relu1_debug assert debug.layers[3].layers[1] == relu2
def cnn_tagger(width: int, vector_width: int, nr_classes: int = 17): with Model.define_operators({">>": chain}): model = strings2arrays() >> with_array( HashEmbed(nO=width, nV=vector_width, column=0) >> expand_window( window_size=1) >> Relu(nO=width, nI=width * 3) >> Relu( nO=width, nI=width) >> Softmax(nO=nr_classes, nI=width)) return model
def main(n_hidden: int = 256, dropout: float = 0.2, n_iter: int = 10, batch_size: int = 128): # Define the model model: Model = chain( Relu(nO=n_hidden, dropout=dropout), Relu(nO=n_hidden, dropout=dropout), Softmax(), ) # Load the data (train_X, train_Y), (dev_X, dev_Y) = ml_datasets.mnist() # Set any missing shapes for the model. model.initialize(X=train_X[:5], Y=train_Y[:5]) train_data = model.ops.multibatch(batch_size, train_X, train_Y, shuffle=True) dev_data = model.ops.multibatch(batch_size, dev_X, dev_Y) # Create the optimizer. optimizer = Adam(0.001) for i in range(n_iter): for X, Y in tqdm(train_data, leave=False): Yh, backprop = model.begin_update(X) backprop(Yh - Y) model.finish_update(optimizer) # Evaluate and print progress correct = 0 total = 0 for X, Y in dev_data: Yh = model.predict(X) correct += (Yh.argmax(axis=1) == Y.argmax(axis=1)).sum() total += Yh.shape[0] score = correct / total msg.row((i, f"{score:.3f}"), widths=(3, 5))
def test_model_gpu(): prefer_gpu() n_hidden = 32 dropout = 0.2 (train_X, train_Y), (dev_X, dev_Y) = ml_datasets.mnist() model = chain( Relu(nO=n_hidden, dropout=dropout), Relu(nO=n_hidden, dropout=dropout), Softmax(), ) # making sure the data is on the right device train_X = model.ops.asarray(train_X) train_Y = model.ops.asarray(train_Y) dev_X = model.ops.asarray(dev_X) dev_Y = model.ops.asarray(dev_Y) model.initialize(X=train_X[:5], Y=train_Y[:5]) optimizer = Adam(0.001) batch_size = 128 for i in range(2): batches = model.ops.multibatch(batch_size, train_X, train_Y, shuffle=True) for X, Y in batches: Yh, backprop = model.begin_update(X) backprop(Yh - Y) model.finish_update(optimizer) # Evaluate and print progress correct = 0 total = 0 for X, Y in model.ops.multibatch(batch_size, dev_X, dev_Y): Yh = model.predict(X) correct += (Yh.argmax(axis=1) == Y.argmax(axis=1)).sum() total += Yh.shape[0]
def test_with_debug(): pytest.importorskip("ml_datasets") import ml_datasets (train_X, train_Y), (dev_X, dev_Y) = ml_datasets.mnist() counts = Counter() def on_init(*_): counts["init"] += 1 def on_forward(*_): counts["forward"] += 1 def on_backprop(*_): counts["backprop"] += 1 relu = Relu() relu2 = with_debug(Relu(), on_init=on_init, on_forward=on_forward, on_backprop=on_backprop) chained = chain(relu, relu2, relu2) chained.initialize(X=train_X[:5], Y=train_Y[:5]) _, backprop = chained(X=train_X[:5], is_train=False) # Not real loss gradients, but we don't care for testing. backprop(train_Y[:5]) # Four times forward, because initialization also applies forward for # validation. assert counts == {"init": 2, "forward": 4, "backprop": 2}
def test_wrap_non_child_references(): relu = Relu(5) relu2 = Relu(5) chained = chain(relu, relu) chained2 = chain(relu2, chained) chained2.set_ref("relu", relu) # Fails in case non-child references cannot be set. wrap_model_recursive(chained2, with_debug)
def test_validation(): model = chain(Relu(10), Relu(10), with_ragged(reduce_max()), Softmax()) with pytest.raises(DataValidationError): model.initialize(X=model.ops.alloc2f(1, 10), Y=model.ops.alloc2f(1, 10)) with pytest.raises(DataValidationError): model.initialize(X=model.ops.alloc3f(1, 10, 1), Y=model.ops.alloc2f(1, 10)) with pytest.raises(DataValidationError): model.initialize(X=[model.ops.alloc2f(1, 10)], Y=model.ops.alloc2f(1, 10))
def test_walk_dfs(): relu = Relu(5) relu2 = Relu(5) inner_chain = chain(relu, relu2) chained = chain(inner_chain, inner_chain) assert list( chained.walk(order="dfs_pre")) == [chained, inner_chain, relu, relu2] assert list(chained.walk(order="dfs_post")) == [ relu, relu2, inner_chain, chained, ]
def test_recursive_double_wrap(): def dummy_model(name, layers): return Model(name, lambda model, X, is_train:..., layers=layers) relu = Relu(5) chained = chain(relu, relu) concat = concatenate(chained, chained, relu) concat_wrapped = wrap_model_recursive( concat, lambda model: dummy_model(f"dummy({model.name})", [model])) n_debug = 0 for model in concat_wrapped.walk(): if model.name.startswith("dummy"): n_debug += 1 # There should be 3 unique dummy wrappers: # * Around concatenate. # * Around chain. # * Around relu. assert n_debug == 3 assert concat_wrapped.layers[0].layers[0].layers[0].layers[ 0].name == "dummy(relu)" assert concat_wrapped.layers[0].layers[0].layers[0].layers[ 1].name == "dummy(relu)" assert concat_wrapped.layers[0].layers[1].layers[0].layers[ 0].name == "dummy(relu)" assert concat_wrapped.layers[0].layers[1].layers[0].layers[ 1].name == "dummy(relu)" assert concat_wrapped.layers[0].layers[2].name == "dummy(relu)"
def test_recursive_double_wrap(): relu = Relu(5) chained = chain(relu, relu) concat = concatenate(chained, chained) concat_debug = wrap_model_recursive(concat, with_debug) n_debug = 0 for model in concat_debug.walk(): if model.name.startswith("debug"): n_debug += 1 # There should be 5 unique debug wrappers: # * Around concatenate. (= 1) # * One around each chain in concatenate. (= 2) # * One around each relu in the chain. (= 2) assert n_debug == 5 assert concat_debug.layers[0].layers[0].layers[0].layers[ 0].name == "debug(relu)" assert concat_debug.layers[0].layers[0].layers[0].layers[ 1].name == "debug(relu)" assert concat_debug.layers[0].layers[1].layers[0].layers[ 0].name == "debug(relu)" assert concat_debug.layers[0].layers[1].layers[0].layers[ 1].name == "debug(relu)"
def test_validation_complex(): good_model = chain(list2ragged(), reduce_sum(), Relu(12, dropout=0.5), Relu(1)) X = [good_model.ops.xp.zeros((4, 75), dtype="f")] Y = good_model.ops.xp.zeros((1,), dtype="f") good_model.initialize(X, Y) good_model.predict(X) bad_model = chain( list2ragged(), reduce_sum(), Relu(12, dropout=0.5), # ERROR: Why can't I attach a Relu to an attention layer? ParametricAttention(12), Relu(1), ) with pytest.raises(DataValidationError): bad_model.initialize(X, Y)
def build_text_classifier_lowdata( width: int, dropout: Optional[float], nO: Optional[int] = None) -> Model[List[Doc], Floats2d]: # Don't document this yet, I'm not sure it's right. # Note, before v.3, this was the default if setting "low_data" and "pretrained_dims" with Model.define_operators({">>": chain, "**": clone}): model = (StaticVectors(width) >> list2ragged() >> ParametricAttention(width) >> reduce_sum() >> residual( Relu(width, width))**2 >> Linear(nO, width)) if dropout: model = model >> Dropout(dropout) model = model >> Logistic() return model
def test_infer_output_shape(): model = Relu(dropout=0.2) X = model.ops.alloc2f(4, 5) Y = model.ops.alloc2f(4, 2) assert model.has_dim("nI") is None assert model.has_dim("nO") is None model.initialize(X=X, Y=Y) assert model.get_dim("nI") == 5 assert model.get_dim("nO") == 2
def test_recursive_wrap(): # Check: # # * Recursion: chain -> relu # * Multiple sublayers: chain -> [relu, relu] relu = Relu(5) chained = chain(relu, relu) chained_debug = wrap_model_recursive(chained, with_debug) assert chained_debug.name == "debug(relu>>relu)" assert chained_debug.layers[0] is chained assert chained_debug.layers[0].layers[0].name == "debug(relu)" assert chained_debug.layers[0].layers[0].layers[0] is relu assert chained_debug.layers[0].layers[1].name == "debug(relu)" assert chained_debug.layers[0].layers[1].layers[0] is relu
def test_spancat_model_forward_backward(nO=5): tok2vec = build_Tok2Vec_model(**get_tok2vec_kwargs()) docs = get_docs() spans_list = [] lengths = [] for doc in docs: spans_list.append(doc[:2]) spans_list.append(doc[1:4]) lengths.append(2) spans = Ragged( tok2vec.ops.asarray([[s.start, s.end] for s in spans_list], dtype="i"), tok2vec.ops.asarray(lengths, dtype="i"), ) model = build_spancat_model(tok2vec, reduce_mean(), chain(Relu(nO=nO), Logistic())).initialize(X=(docs, spans)) Y, backprop = model((docs, spans), is_train=True) assert Y.shape == (spans.dataXd.shape[0], nO) backprop(Y)
def test_recursive_wrap(): def dummy_model(name, layers): return Model(name, lambda model, X, is_train:..., layers=layers) # Check: # # * Recursion: chain -> relu # * Multiple sublayers: chain -> [relu, relu] relu = Relu(5) chained = chain(relu, relu) chained_debug = wrap_model_recursive( chained, lambda model: dummy_model(f"dummy({model.name})", [model])) assert chained_debug.name == "dummy(relu>>relu)" assert chained_debug.layers[0] is chained assert chained_debug.layers[0].layers[0].name == "dummy(relu)" assert chained_debug.layers[0].layers[0].layers[0] is relu assert chained_debug.layers[0].layers[1].name == "dummy(relu)" assert chained_debug.layers[0].layers[1].layers[0] is relu
def create_relu_softmax(width, dropout, nI, nO): return chain(clone(Relu(nO=width, dropout=dropout), 2), Softmax(10, width))
def test_walk_bfs_post_order_fails(): relu = Relu(5) with pytest.raises(ValueError, match="Invalid order"): relu.walk(order="dfs_post_order")
from thinc.api import chain, Relu, reduce_max, Softmax, add good_model = chain(Relu(10), Relu(10), Softmax()) reveal_type(good_model) good_model2 = add(Relu(10), Relu(10), Softmax()) reveal_type(good_model2) bad_model_undetected = chain(Relu(10), Relu(10), reduce_max(), Softmax()) reveal_type(bad_model_undetected) bad_model_undetected2 = add(Relu(10), Relu(10), reduce_max(), Softmax()) reveal_type(bad_model_undetected2)
def model1(nH, nI): model = Relu(nH, nI).initialize() return model
from thinc.api import chain, Relu, reduce_max, Softmax, add, concatenate bad_model = chain(Relu(10), reduce_max(), Softmax()) bad_model2 = add(Relu(10), reduce_max(), Softmax()) bad_model_only_plugin = chain(Relu(10), Relu(10), Relu(10), Relu(10), reduce_max(), Softmax()) bad_model_only_plugin2 = add(Relu(10), Relu(10), Relu(10), Relu(10), reduce_max(), Softmax()) reveal_type(bad_model_only_plugin2) bad_model_only_plugin3 = concatenate(Relu(10), Relu(10), Relu(10), Relu(10), reduce_max(), Softmax()) reveal_type(bad_model_only_plugin3)
def create_embed_relu_relu_softmax(depth, width, vector_length): with Model.define_operators({">>": chain}): model = strings2arrays() >> with_array( HashEmbed(width, vector_length) >> expand_window(window_size=1) >> Relu(width, width * 3) >> Relu(width, width) >> Softmax(17, width)) return model
from thinc.api import chain, Relu, reduce_max, Softmax, add bad_model = chain(Relu(10), reduce_max(), Softmax()) bad_model2 = add(Relu(10), reduce_max(), Softmax())