def main(n_hidden: int = 256, dropout: float = 0.2, n_iter: int = 10, batch_size: int = 128): # Define the model model: Model = chain( ReLu(nO=n_hidden, dropout=dropout), ReLu(nO=n_hidden, dropout=dropout), Softmax(), ) # Load the data (train_X, train_Y), (dev_X, dev_Y) = ml_datasets.mnist() # Set any missing shapes for the model. model.initialize(X=train_X[:5], Y=train_Y[:5]) train_data = model.ops.multibatch(batch_size, train_X, train_Y, shuffle=True) dev_data = model.ops.multibatch(batch_size, dev_X, dev_Y) # Create the optimizer. optimizer = Adam(0.001) for i in range(n_iter): for X, Y in tqdm(train_data, leave=False): Yh, backprop = model.begin_update(X) backprop(Yh - Y) model.finish_update(optimizer) # Evaluate and print progress correct = 0 total = 0 for X, Y in dev_data: Yh = model.predict(X) correct += (Yh.argmax(axis=1) == Y.argmax(axis=1)).sum() total += Yh.shape[0] score = correct / total msg.row((i, f"{score:.3f}"), widths=(3, 5))
def test_validation(): model = chain(ReLu(10), ReLu(10), with_ragged(reduce_max()), Softmax()) with pytest.raises(DataValidationError): model.initialize(X=model.ops.alloc2f(1, 10), Y=model.ops.alloc2f(1, 10)) with pytest.raises(DataValidationError): model.initialize(X=model.ops.alloc3f(1, 10, 1), Y=model.ops.alloc2f(1, 10)) with pytest.raises(DataValidationError): model.initialize(X=[model.ops.alloc2f(1, 10)], Y=model.ops.alloc2f(1, 10))
def test_validation_complex(): good_model = chain(list2ragged(), reduce_sum(), ReLu(12, dropout=0.5), ReLu(1)) X = [good_model.ops.xp.zeros((4, 75), dtype="f")] Y = good_model.ops.xp.zeros((1, ), dtype="f") good_model.initialize(X, Y) good_model.predict(X) bad_model = chain( list2ragged(), reduce_sum(), ReLu(12, dropout=0.5), # ERROR: Why can't I attach a ReLu to an attention layer? ParametricAttention(12), ReLu(1), ) with pytest.raises(DataValidationError): bad_model.initialize(X, Y)
def test_infer_output_shape(): model = ReLu(dropout=0.2) X = model.ops.alloc2f(4, 5) Y = model.ops.alloc2f(4, 2) assert model.has_dim("nI") is None assert model.has_dim("nO") is None model.initialize(X=X, Y=Y) assert model.get_dim("nI") == 5 assert model.get_dim("nO") == 2
def test_model_gpu(): prefer_gpu() n_hidden = 32 dropout = 0.2 (train_X, train_Y), (dev_X, dev_Y) = ml_datasets.mnist() model = chain( ReLu(nO=n_hidden, dropout=dropout), ReLu(nO=n_hidden, dropout=dropout), Softmax(), ) # making sure the data is on the right device train_X = model.ops.asarray(train_X) train_Y = model.ops.asarray(train_Y) dev_X = model.ops.asarray(dev_X) dev_Y = model.ops.asarray(dev_Y) model.initialize(X=train_X[:5], Y=train_Y[:5]) optimizer = Adam(0.001) batch_size = 128 for i in range(2): batches = model.ops.multibatch(batch_size, train_X, train_Y, shuffle=True) for X, Y in batches: Yh, backprop = model.begin_update(X) backprop(Yh - Y) model.finish_update(optimizer) # Evaluate and print progress correct = 0 total = 0 for X, Y in model.ops.multibatch(batch_size, dev_X, dev_Y): Yh = model.predict(X) correct += (Yh.argmax(axis=1) == Y.argmax(axis=1)).sum() total += Yh.shape[0]
def create_relu_softmax(width, dropout, nI, nO): return chain(clone(ReLu(nO=width, dropout=dropout), 2), Softmax(10, width))
def create_embed_relu_relu_softmax(depth, width, vector_length): with Model.define_operators({">>": chain}): model = strings2arrays() >> with_array( HashEmbed(width, vector_length) >> expand_window(window_size=1) >> ReLu(width, width * 3) >> ReLu(width, width) >> Softmax(17, width)) return model
from thinc.api import chain, ReLu, reduce_max, Softmax, add bad_model = chain(ReLu(10), reduce_max(), Softmax()) bad_model2 = add(ReLu(10), reduce_max(), Softmax())
from thinc.api import chain, ReLu, reduce_max, Softmax, add good_model = chain(ReLu(10), ReLu(10), Softmax()) reveal_type(good_model) good_model2 = add(ReLu(10), ReLu(10), Softmax()) reveal_type(good_model2) bad_model_undetected = chain(ReLu(10), ReLu(10), reduce_max(), Softmax()) reveal_type(bad_model_undetected) bad_model_undetected2 = add(ReLu(10), ReLu(10), reduce_max(), Softmax()) reveal_type(bad_model_undetected2)
from thinc.api import chain, ReLu, reduce_max, Softmax, add, concatenate bad_model = chain(ReLu(10), reduce_max(), Softmax()) bad_model2 = add(ReLu(10), reduce_max(), Softmax()) bad_model_only_plugin = chain(ReLu(10), ReLu(10), ReLu(10), ReLu(10), reduce_max(), Softmax()) bad_model_only_plugin2 = add(ReLu(10), ReLu(10), ReLu(10), ReLu(10), reduce_max(), Softmax()) reveal_type(bad_model_only_plugin2) bad_model_only_plugin3 = concatenate(ReLu(10), ReLu(10), ReLu(10), ReLu(10), reduce_max(), Softmax()) reveal_type(bad_model_only_plugin3)
def model1(nH, nI): model = ReLu(nH, nI).initialize() return model