def test_model_init(): class MyShim(Shim): name = "testshim" model_a = create_model("a") model = Model( "test", lambda X: (X, lambda dY: dY), dims={ "nI": 10, "nO": None }, params={ "W": numpy.zeros((10, )), "b": None }, refs={ "a": model_a, "b": None }, attrs={"foo": "bar"}, shims=[MyShim(None)], layers=[model_a, model_a], ) assert model.has_param("W") assert model.get_param("W").shape == (10, ) assert model.has_param("b") is None with pytest.raises(KeyError): model.get_param("b") with pytest.raises(KeyError): model.get_param("X") model.set_param("X", numpy.zeros((10, ))) assert model.has_param("X") assert model.get_param("X").shape == (10, ) with model.use_params({(model.id, "X"): numpy.ones((10, ))}): assert numpy.array_equal(model.get_param("X"), numpy.ones((10, ))) assert numpy.array_equal(model.get_param("X"), numpy.zeros((10, ))) assert not model.has_grad("W") assert not model.has_grad("xyz") with pytest.raises(KeyError): model.get_grad("b") model.set_param("W", model.ops.alloc1f(10)) model.set_grad("W", model.ops.alloc1f(10)) with pytest.raises(ValueError): model.inc_grad("W", numpy.zeros((5, 0))) assert model.has_dim("nI") assert model.get_dim("nI") == 10 with pytest.raises(KeyError): model.get_dim("xyz") with pytest.raises(ValueError): model.get_dim("nO") with pytest.raises(KeyError): model.set_dim("xyz", 20) with pytest.raises(ValueError): model.set_dim("nI", 20) assert model.has_ref("a") assert model.get_ref("a").name == "a" assert not model.has_ref("xyz") with pytest.raises(KeyError): model.get_ref("xyz") assert model.has_ref("b") is None with pytest.raises(ValueError): model.get_ref("b") model.set_ref("c", model_a) assert model.has_ref("c") assert model.get_ref("c").name == "a" with pytest.raises(ValueError): model.set_ref("c", create_model("c")) assert "foo" in model.attrs assert "bar" not in model.attrs assert model.attrs["foo"] == "bar" with pytest.raises(KeyError): model.attrs["bar"] model.attrs["bar"] = "baz" model_copy = model.copy() assert model_copy.name == "test"
def train_model( model: Model, *, train: Sequence[Tuple[str, str]], test: Sequence[Tuple[str, str]], n_iter: int, batch_size: int | thinc.types.Generator = 32, learn_rate: float | List[float] | thinc.types.Generator = 0.001, ) -> Model: """ Args: model train test n_iter batch_size learn_rate """ # binarize language labels # NOTE: thinc seems to require type "float32" arrays for training labels # errors otherwise... :/ lb = sklearn.preprocessing.LabelBinarizer() lb.fit([lang for _, lang in train]) # THIS NEXT LINE IS CRITICAL: we need to save the training class labels # but don't want to keep this label binarizer around; so, add it to the model model.layers[-1].attrs["classes"] = lb.classes_ Y_train = lb.transform([lang for _, lang in train]).astype("float32") Y_test = lb.transform([lang for _, lang in test]) # make sure data is on the right device? # Y_train = self.model.ops.asarray(Y_train) # Y_test = self.model.ops.asarray(Y_test) X_train = [text for text, _ in train] X_test = [text for text, _ in test] losser = thinc.api.CategoricalCrossentropy(normalize=True) optimizer = thinc.api.Adam(learn_rate) model.initialize(X=X_train[:10], Y=Y_train[:10]) print(f"{'epoch':>5} {'loss':>8} {'score':>8}") # iterate over epochs for n in range(n_iter): loss = 0.0 # iterate over batches batches = model.ops.multibatch(batch_size, X_train, Y_train, shuffle=True) for X, Y in tqdm(batches, leave=False): Yh, backprop = model.begin_update(X) dYh, loss_batch = losser(Yh, Y) loss += loss_batch backprop(dYh) model.finish_update(optimizer) optimizer.step_schedules() if optimizer.averages: with model.use_params(optimizer.averages): score = evaluate_model(model, X_test=X_test, Y_test=Y_test, batch_size=1000) else: score = evaluate_model(model, X_test=X_test, Y_test=Y_test, batch_size=1000) print(f"{n:>5} {loss:>8.3f} {score:>8.3f}") if optimizer.averages: with model.use_params(optimizer.averages): pred_langs = models.get_model_preds( model, X_test, model.layers[-1].attrs["classes"]) else: pred_langs = models.get_model_preds(model, X_test, model.layers[-1].attrs["classes"]) true_langs = list(lb.inverse_transform(Y_test)) print(sklearn.metrics.classification_report(true_langs, pred_langs)) return model